diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml new file mode 100644 index 0000000..de5ab0a --- /dev/null +++ b/.github/workflows/ci-cd.yml @@ -0,0 +1,371 @@ +name: CI/CD Pipeline + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main, develop ] + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + # Linting and Code Quality + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install flake8 black isort mypy pytest pytest-cov + + - name: Run linting + run: | + flake8 backend/app/ --max-line-length=88 --extend-ignore=E203,W503 + black --check backend/app/ + isort --check-only backend/app/ + mypy backend/app/ + + - name: Run security checks + run: | + pip install bandit safety + bandit -r backend/app/ -f json -o bandit-report.json + safety check + + # Backend Testing + test-backend: + runs-on: ubuntu-latest + services: + postgres: + image: postgres:15 + env: + POSTGRES_PASSWORD: password + POSTGRES_DB: test_db + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + redis: + image: redis:7 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 6379:6379 + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run database migrations + run: | + cd backend + alembic upgrade head + env: + DATABASE_URL: postgresql://postgres:password@localhost:5432/test_db + + - name: Run tests with coverage + run: | + cd backend + pytest tests/ -v --cov=app --cov-report=xml --cov-report=html + env: + DATABASE_URL: postgresql://postgres:password@localhost:5432/test_db + REDIS_URL: redis://localhost:6379/0 + SECRET_KEY: test-secret-key + OPENAI_API_KEY: test-key + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + with: + file: ./backend/coverage.xml + flags: backend + name: backend-coverage + + # Frontend Testing + test-frontend: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '18' + cache: 'npm' + cache-dependency-path: frontend/package-lock.json + + - name: Install dependencies + run: | + cd frontend + npm ci + + - name: Run linting + run: | + cd frontend + npm run lint + + - name: Run type checking + run: | + cd frontend + npm run type-check + + - name: Run tests + run: | + cd frontend + npm test -- --coverage --watchAll=false + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + with: + file: ./frontend/coverage/lcov.info + flags: frontend + name: frontend-coverage + + # Integration Testing + integration-test: + runs-on: ubuntu-latest + needs: [test-backend, test-frontend] + services: + postgres: + image: postgres:15 + env: + POSTGRES_PASSWORD: password + POSTGRES_DB: test_db + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + redis: + image: redis:7 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 6379:6379 + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install locust + + - name: Start backend server + run: | + cd backend + python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 & + sleep 10 + env: + DATABASE_URL: postgresql://postgres:password@localhost:5432/test_db + REDIS_URL: redis://localhost:6379/0 + SECRET_KEY: test-secret-key + OPENAI_API_KEY: test-key + + - name: Run integration tests + run: | + cd backend + pytest tests/integration/ -v + + - name: Run load tests + run: | + cd backend + locust -f tests/load/locustfile.py --headless --users 10 --spawn-rate 2 --run-time 60s + + # Build Docker Images + build: + runs-on: ubuntu-latest + needs: [lint, test-backend, test-frontend, integration-test] + if: github.event_name == 'push' + steps: + - uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=sha + + - name: Build and push backend image + uses: docker/build-push-action@v5 + with: + context: ./backend + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Build and push frontend image + uses: docker/build-push-action@v5 + with: + context: ./frontend + push: true + tags: ${{ steps.meta.outputs.tags }}-frontend + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + # Deploy to Staging + deploy-staging: + runs-on: ubuntu-latest + needs: [build] + if: github.ref == 'refs/heads/develop' + environment: staging + steps: + - uses: actions/checkout@v4 + + - name: Deploy to staging + run: | + echo "Deploying to staging environment" + # Add your staging deployment logic here + # Example: kubectl apply -f k8s/ -n staging + + # Deploy to Production + deploy-production: + runs-on: ubuntu-latest + needs: [build] + if: github.ref == 'refs/heads/main' + environment: production + steps: + - uses: actions/checkout@v4 + + - name: Deploy to production + run: | + echo "Deploying to production environment" + # Add your production deployment logic here + # Example: kubectl apply -f k8s/ -n production + + # Security Scanning + security-scan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} + format: 'sarif' + output: 'trivy-results.sarif' + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v2 + if: always() + with: + sarif_file: 'trivy-results.sarif' + + # Performance Testing + performance-test: + runs-on: ubuntu-latest + needs: [build] + if: github.ref == 'refs/heads/main' + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install locust + + - name: Run performance tests + run: | + cd backend + locust -f tests/performance/locustfile.py --headless --users 100 --spawn-rate 10 --run-time 300s + + # Documentation Generation + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install sphinx sphinx-rtd-theme + + - name: Generate API documentation + run: | + cd backend + python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 & + sleep 10 + curl http://localhost:8000/openapi.json > docs/api-spec.json + + - name: Build documentation + run: | + cd docs + make html + + - name: Deploy documentation + uses: peaceiris/actions-gh-pages@v3 + if: github.ref == 'refs/heads/main' + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./docs/_build/html diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..f9f28c2 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,396 @@ +# πŸ—οΈ AI Document Agent - System Architecture + +## Overview + +The AI Document Agent is built using a modern, scalable microservices architecture that leverages cutting-edge AI technologies and enterprise-grade infrastructure patterns. This document provides a comprehensive overview of the system's architecture, design decisions, and technical implementation. + +## 🎯 Architecture Principles + +### **Design Philosophy** +- **Scalability First**: Horizontal scaling capabilities for all components +- **Resilience**: Fault tolerance and graceful degradation +- **Security by Design**: Multi-layered security approach +- **Observability**: Comprehensive monitoring and tracing +- **Performance**: Optimized for high-throughput document processing + +### **Technology Selection Criteria** +- **Modern & Proven**: Industry-standard technologies with strong community support +- **Performance**: High-performance frameworks and databases +- **Scalability**: Technologies that support horizontal scaling +- **Security**: Enterprise-grade security features +- **Maintainability**: Clear separation of concerns and modular design + +## πŸ›οΈ High-Level Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Client Layer β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Web Browser (React SPA) β”‚ Mobile App β”‚ API Clients β”‚ Third-party β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Presentation Layer β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Nginx (Load Balancer) β”‚ SSL Termination β”‚ Rate Limiting β”‚ Caching β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Application Layer β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Frontend (React) β”‚ Backend API (FastAPI) β”‚ WebSocket Server β”‚ Admin β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Business Logic Layer β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Agent Orchestrator β”‚ Workflow Engine β”‚ Business Services β”‚ Rules β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ AI Agent Layer β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Orchestrator β”‚ Ingestion β”‚ Classifier β”‚ Entity β”‚ Risk β”‚ QA β”‚ +β”‚ Compare β”‚ Audit β”‚ Summarizer β”‚ Translator β”‚ Sentiment β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Data Access Layer β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ PostgreSQL β”‚ Redis β”‚ ChromaDB β”‚ Elasticsearch β”‚ File Storage β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Infrastructure Layer β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Docker β”‚ Kubernetes β”‚ Monitoring β”‚ Logging β”‚ Security β”‚ Backup β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## πŸ”§ Component Architecture + +### **Frontend Architecture (React + TypeScript)** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ React Application β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ App Router β”‚ State Management β”‚ UI Components β”‚ Services β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Pages β”‚ Context API β”‚ Material-UI β”‚ API β”‚ +β”‚ Layouts β”‚ Custom Hooks β”‚ Custom Theme β”‚ WebSocketβ”‚ +β”‚ Navigation β”‚ Local Storage β”‚ Animations β”‚ Utils β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**Key Features:** +- **Component-Based Architecture**: Reusable, composable components +- **State Management**: React Context + Custom Hooks +- **Type Safety**: Full TypeScript implementation +- **Responsive Design**: Mobile-first approach +- **Progressive Web App**: Offline capabilities + +### **Backend Architecture (FastAPI + Python)** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ FastAPI Application β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ API Routes β”‚ Middleware β”‚ Dependencies β”‚ Background Tasks β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Auth β”‚ CORS β”‚ Database β”‚ Celery Workers β”‚ +β”‚ Documents β”‚ Logging β”‚ Cache β”‚ Agent Processing β”‚ +β”‚ Agents β”‚ Security β”‚ Validation β”‚ File Processing β”‚ +β”‚ Analytics β”‚ Monitoring β”‚ Authenticationβ”‚ Email/SMS β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**Key Features:** +- **Async/Await**: High-performance async operations +- **Dependency Injection**: Clean, testable code +- **OpenAPI**: Auto-generated API documentation +- **Middleware Stack**: Security, logging, monitoring +- **Background Processing**: Celery for heavy tasks + +### **AI Agent Architecture** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Agent Orchestrator β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Workflow Engine β”‚ Agent Registry β”‚ Task Scheduler β”‚ Monitor β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Pipeline Builder β”‚ Agent Factory β”‚ Queue Manager β”‚ Metrics β”‚ +β”‚ State Manager β”‚ Config Manager β”‚ Retry Logic β”‚ Alerts β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Agent Execution Layer β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Orchestrator β”‚ Ingestion β”‚ Classifier β”‚ Entity β”‚ Risk β”‚ +β”‚ Compare β”‚ Audit β”‚ Summarizer β”‚ Translatorβ”‚ QA β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**Agent Capabilities:** +- **Orchestrator**: Workflow coordination and decision-making +- **Ingestion**: Document parsing and content extraction +- **Classifier**: ML-powered document categorization +- **Entity**: Named entity recognition and extraction +- **Risk**: Compliance monitoring and risk assessment +- **QA**: Interactive question-answering +- **Compare**: Document comparison and diff analysis +- **Audit**: Comprehensive audit logging +- **Summarizer**: AI-powered document summarization +- **Translator**: Multi-language translation +- **Sentiment**: Sentiment analysis and tone detection + +## πŸ—„οΈ Data Architecture + +### **Database Design** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ PostgreSQL (Primary DB) β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Users & Auth β”‚ Documents β”‚ Processing β”‚ Analytics β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ users β”‚ documents β”‚ workflows β”‚ metrics β”‚ +β”‚ roles β”‚ tags β”‚ agents β”‚ events β”‚ +β”‚ sessions β”‚ entities β”‚ executions β”‚ reports β”‚ +β”‚ permissions β”‚ compliance β”‚ history β”‚ dashboards β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### **Caching Strategy** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Redis Cache Layers β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Session Cache β”‚ Query Cache β”‚ Agent Cache β”‚ Rate Limiting β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ User sessions β”‚ API results β”‚ Agent state β”‚ Request limitsβ”‚ +β”‚ Auth tokens β”‚ DB queries β”‚ Model cache β”‚ IP blocking β”‚ +β”‚ Permissions β”‚ Aggregations β”‚ Config cache β”‚ DDoS protectionβ”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### **Vector Database (ChromaDB)** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ ChromaDB (Vector Store) β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Document Embeddings β”‚ Semantic Search β”‚ Similarity Index β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Text embeddings β”‚ Vector queries β”‚ Cosine similarityβ”‚ +β”‚ Metadata vectors β”‚ Hybrid search β”‚ Clustering β”‚ +β”‚ Multi-modal vectors β”‚ Context retrievalβ”‚ Recommendations β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## πŸ”’ Security Architecture + +### **Multi-Layer Security** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Security Layers β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Network Security β”‚ Application Security β”‚ Data Security β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Firewalls β”‚ Authentication β”‚ Encryption β”‚ +β”‚ DDoS Protection β”‚ Authorization β”‚ PII Redaction β”‚ +β”‚ VPN Access β”‚ Input Validation β”‚ Data Masking β”‚ +β”‚ Network Segments β”‚ Rate Limiting β”‚ Audit Logging β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### **Authentication & Authorization** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Auth Flow β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Client Request β”‚ JWT Validation β”‚ Role Check β”‚ Resource Access β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Credentials β”‚ Token Verify β”‚ Permissions β”‚ ACL Check β”‚ +β”‚ MFA (Optional) β”‚ Expiry Check β”‚ Scope Check β”‚ Audit Log β”‚ +β”‚ Rate Limiting β”‚ Blacklist Check β”‚ Context β”‚ Response β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## πŸ“Š Monitoring & Observability + +### **Observability Stack** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Monitoring Architecture β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Metrics Collection β”‚ Log Aggregation β”‚ Distributed Tracing β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Prometheus β”‚ Elasticsearch β”‚ Jaeger β”‚ +β”‚ Custom Metrics β”‚ Filebeat β”‚ OpenTelemetry β”‚ +β”‚ Health Checks β”‚ Structured Logs β”‚ Correlation IDs β”‚ +β”‚ Alert Manager β”‚ Log Analysis β”‚ Performance Profilingβ”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### **Metrics & KPIs** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Business Metrics β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ System Metrics β”‚ Application Metrics β”‚ Business Metrics β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ CPU/Memory β”‚ Request Rate β”‚ Documents Processed β”‚ +β”‚ Disk I/O β”‚ Response Time β”‚ Processing Success β”‚ +β”‚ Network β”‚ Error Rate β”‚ User Engagement β”‚ +β”‚ Container β”‚ Throughput β”‚ Compliance Score β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## πŸš€ Deployment Architecture + +### **Container Orchestration** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Kubernetes Cluster β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Ingress Controller β”‚ Service Mesh β”‚ Pod Management β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Nginx Ingress β”‚ Istio β”‚ Horizontal Pod Autoscalerβ”‚ +β”‚ SSL Termination β”‚ Traffic Split β”‚ Rolling Updates β”‚ +β”‚ Load Balancing β”‚ Circuit Breakerβ”‚ Health Checks β”‚ +β”‚ Rate Limiting β”‚ Retry Logic β”‚ Resource Limits β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### **Environment Strategy** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Environment Pipeline β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Development β”‚ Staging β”‚ Production β”‚ Disaster Recovery β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Local Docker β”‚ K8s Dev β”‚ K8s Prod β”‚ Backup Cluster β”‚ +β”‚ Hot Reload β”‚ Testing β”‚ Monitoring β”‚ Data Replication β”‚ +β”‚ Debug Tools β”‚ QA β”‚ Scaling β”‚ Failover β”‚ +β”‚ Mock Data β”‚ UAT β”‚ Security β”‚ Recovery Testing β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## πŸ”„ Data Flow Architecture + +### **Document Processing Pipeline** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Document Processing Flow β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ 1. Upload β”‚ 2. Validate β”‚ 3. Ingest β”‚ 4. Classify β”‚ +β”‚ File Upload β”‚ File Type β”‚ Extract β”‚ ML Model β”‚ +β”‚ Virus Scan β”‚ Size Check β”‚ Content β”‚ Categorize β”‚ +β”‚ Metadata β”‚ Format β”‚ Structure β”‚ Domain β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ 5. Extract β”‚ 6. Analyze β”‚ 7. Store β”‚ 8. Index β”‚ +β”‚ Entities β”‚ Risk β”‚ Database β”‚ Vector DB β”‚ +β”‚ Keywords β”‚ Compliance β”‚ File System β”‚ Search Index β”‚ +β”‚ Relations β”‚ Sentiment β”‚ Backup β”‚ Analytics β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### **Real-time Processing** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Real-time Architecture β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ WebSocket β”‚ Event Stream β”‚ Message Queue β”‚ Background β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Connection β”‚ Processing β”‚ Celery β”‚ Workers β”‚ +β”‚ Heartbeat β”‚ Updates β”‚ Redis β”‚ Agent Tasks β”‚ +β”‚ Reconnection β”‚ Notifications β”‚ Priority β”‚ File Tasks β”‚ +β”‚ Broadcasting β”‚ Status β”‚ Dead Letter β”‚ Email Tasks β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## 🎯 Performance Architecture + +### **Scaling Strategies** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Scaling Architecture β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Horizontal Scaling β”‚ Vertical Scaling β”‚ Auto Scaling β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Load Balancer β”‚ Resource Limits β”‚ HPA (K8s) β”‚ +β”‚ Multiple Instances β”‚ Memory/CPU β”‚ VPA (K8s) β”‚ +β”‚ Database Sharding β”‚ Connection Pools β”‚ Custom Metrics β”‚ +β”‚ Cache Clustering β”‚ Query Optimizationβ”‚ Predictive Scalingβ”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### **Performance Optimization** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Performance Layers β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ CDN Layer β”‚ Cache Layer β”‚ Application Layer β”‚ DB Layer β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Static Assets β”‚ Redis Cache β”‚ Async Processing β”‚ Indexing β”‚ +β”‚ Global Edge β”‚ Query Cache β”‚ Connection Pooling β”‚ Sharding β”‚ +β”‚ Compression β”‚ Session Cacheβ”‚ Background Tasks β”‚ Replicationβ”‚ +β”‚ Caching β”‚ Agent Cache β”‚ Load Balancing β”‚ Partitioningβ”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## πŸ”§ Configuration Management + +### **Configuration Strategy** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Configuration Architecture β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Environment Config β”‚ Feature Flags β”‚ Agent Config β”‚ Security β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ .env Files β”‚ Feature Toggles β”‚ Model Params β”‚ Keys β”‚ +β”‚ K8s ConfigMaps β”‚ A/B Testing β”‚ Timeouts β”‚ Certs β”‚ +β”‚ Secrets Management β”‚ Gradual Rollout β”‚ Retry Logic β”‚ Policies β”‚ +β”‚ Dynamic Config β”‚ Canary Deploy β”‚ Thresholds β”‚ Rules β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## 🎯 Future Architecture Considerations + +### **Planned Enhancements** +- **GraphQL API**: For more flexible data querying +- **Event Sourcing**: For complete audit trail +- **CQRS Pattern**: For read/write optimization +- **Service Mesh**: For advanced traffic management +- **Multi-Region**: For global deployment +- **Edge Computing**: For low-latency processing + +### **Technology Evolution** +- **AI Model Updates**: Integration with latest LLMs +- **Database Evolution**: Migration to distributed databases +- **Cloud Native**: Full cloud-native architecture +- **Serverless**: Event-driven serverless functions +- **Blockchain**: For immutable audit trails + +--- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..ff05ae3 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,191 @@ +# πŸ› οΈ Development Standards & Best Practices + +This document outlines the development standards, coding practices, and quality assurance processes for the AI Document Agent platform. + +## πŸ“‹ Code Standards + +### **Python (Backend)** +- **PEP 8** compliance with 120 character line limit +- **Type hints** required for all function parameters and return values +- **Docstrings** using Google style for all public functions and classes +- **Black** for code formatting +- **isort** for import organization +- **flake8** for linting + +### **TypeScript/JavaScript (Frontend)** +- **ESLint** with strict TypeScript rules +- **Prettier** for code formatting +- **TypeScript strict mode** enabled +- **React hooks** for state management +- **Functional components** with proper prop typing + +### **Database** +- **SQLAlchemy** ORM with async support +- **Alembic** for database migrations +- **Proper indexing** for performance +- **Foreign key constraints** for data integrity + +## πŸ§ͺ Testing Requirements + +### **Test Coverage** +- **Minimum 90%** code coverage for backend +- **Minimum 80%** code coverage for frontend +- **Integration tests** for all API endpoints +- **Unit tests** for all business logic +- **E2E tests** for critical user flows + +### **Testing Tools** +```bash +# Backend Testing +pytest --cov=app --cov-report=html +pytest tests/integration/ -v +pytest tests/unit/ -v + +# Frontend Testing +npm test -- --coverage +npm run test:e2e + +# Load Testing +locust -f tests/load/locustfile.py +``` + +## πŸ” Code Quality Gates + +### **Pre-commit Checks** +```bash +# Python quality checks +black app/ +isort app/ +flake8 app/ +mypy app/ +pytest --cov=app --cov-fail-under=90 + +# Frontend quality checks +npm run lint +npm run type-check +npm test -- --coverage --watchAll=false +npm run build +``` + +### **Performance Benchmarks** +- **API Response Time**: <200ms average +- **Frontend Bundle Size**: <2MB gzipped +- **Database Query Time**: <100ms for complex queries +- **Memory Usage**: <512MB per service + +## πŸ“š Documentation Standards + +### **Code Documentation** +- **Comprehensive docstrings** for all public APIs +- **Type hints** for all function signatures +- **Inline comments** for complex business logic +- **README updates** for new features + +### **API Documentation** +- **OpenAPI/Swagger** specifications +- **Example requests/responses** for all endpoints +- **Error code documentation** +- **Authentication examples** + +## πŸš€ Deployment Standards + +### **Environment Management** +- **Environment-specific** configuration files +- **Secrets management** with proper encryption +- **Health checks** for all services +- **Graceful shutdown** handling + +### **Monitoring & Observability** +- **Custom metrics** for business KPIs +- **Structured logging** with correlation IDs +- **Performance monitoring** with alerting +- **Error tracking** and reporting + +## πŸ”’ Security Standards + +### **Code Security** +- **Input validation** for all user inputs +- **SQL injection** prevention with parameterized queries +- **XSS protection** with proper escaping +- **CSRF protection** for state-changing operations + +### **Infrastructure Security** +- **Secrets rotation** policies +- **Network segmentation** and firewalls +- **Regular security audits** and penetration testing +- **Vulnerability scanning** in CI/CD pipeline + +## πŸ“Š Performance Standards + +### **Backend Performance** +- **Async/await** for I/O operations +- **Connection pooling** for database connections +- **Caching strategies** for frequently accessed data +- **Background task processing** for heavy operations + +### **Frontend Performance** +- **Code splitting** and lazy loading +- **Image optimization** and compression +- **Bundle analysis** and optimization +- **Progressive Web App** features + +## πŸ—οΈ Architecture Standards + +### **Design Patterns** +- **Repository pattern** for data access +- **Service layer** for business logic +- **Factory pattern** for object creation +- **Observer pattern** for event handling + +### **Microservices Principles** +- **Single responsibility** for each service +- **Loose coupling** between services +- **API versioning** strategy +- **Circuit breaker** pattern for resilience + +## πŸ“ˆ Quality Metrics + +### **Code Quality Metrics** +- **Cyclomatic complexity** < 10 per function +- **Maintainability index** > 65 +- **Technical debt** ratio < 5% +- **Code duplication** < 3% + +### **Performance Metrics** +- **Response time** percentiles (P50, P95, P99) +- **Throughput** measurements +- **Error rates** and availability +- **Resource utilization** monitoring + +## πŸ”„ Development Workflow + +### **Feature Development** +1. **Requirements analysis** and documentation +2. **Technical design** and architecture review +3. **Implementation** with TDD approach +4. **Code review** and quality checks +5. **Testing** and validation +6. **Documentation** updates +7. **Deployment** and monitoring + +### **Bug Fixes** +1. **Issue reproduction** and root cause analysis +2. **Fix implementation** with regression tests +3. **Code review** and testing +4. **Deployment** with rollback plan +5. **Monitoring** and verification + +## 🎯 Success Criteria + +### **Code Quality** +- **Zero critical** security vulnerabilities +- **90%+ test coverage** maintained +- **All linting rules** passing +- **Performance benchmarks** met + +### **Business Value** +- **Feature delivery** on schedule +- **User satisfaction** metrics +- **System reliability** and uptime +- **Scalability** requirements met + diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000..d31716a --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,501 @@ +# πŸš€ AI Document Agent - Deployment Guide + +## Overview + +This document provides comprehensive deployment instructions for the AI Document Agent platform across different environments, from development to production. The deployment strategy follows enterprise-grade practices with security, scalability, and reliability as core principles. + +## 🎯 Deployment Strategy + +### **Environment Tiers** +- **Development**: Local development with hot-reload +- **Staging**: Production-like environment for testing +- **Production**: High-availability, scalable deployment +- **Disaster Recovery**: Backup and failover systems + +### **Deployment Models** +- **Docker Compose**: For development and small-scale deployments +- **Kubernetes**: For production and enterprise deployments +- **Cloud Native**: AWS, Azure, GCP deployment options +- **Hybrid**: On-premise with cloud integration + +## 🐳 Docker Compose Deployment + +### **Development Environment** + +```bash +# Clone repository +git clone https://github.com/your-org/ai-document-agent.git +cd ai-document-agent + +# Copy environment configuration +cp .env.example .env + +# Configure environment variables +nano .env + +# Start development services +docker-compose -f docker-compose.dev.yml up -d + +# Initialize database +./scripts/init-db.sh + +# Access services +# Frontend: http://localhost:3000 +# Backend: http://localhost:8000 +# API Docs: http://localhost:8000/docs +``` + +### **Production Environment** + +```bash +# Production deployment +docker-compose -f docker-compose.prod.yml up -d + +# With monitoring stack +docker-compose -f docker-compose.prod.yml -f docker-compose.monitoring.yml up -d + +# Health check +docker-compose ps +curl http://localhost:8000/health +``` + +## ☸️ Kubernetes Deployment + +### **Prerequisites** +```bash +# Kubernetes cluster (minikube, kind, or cloud provider) +kubectl version --client +helm version + +# Install required tools +kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.12.0/cert-manager.yaml +kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.7.1/deploy/static/provider/cloud/deploy.yaml +``` + +### **Namespace Setup** +```bash +# Create namespace +kubectl create namespace ai-document-agent + +# Set context +kubectl config set-context --current --namespace=ai-document-agent +``` + +### **Secrets Management** +```bash +# Create secrets +kubectl create secret generic ai-document-agent-secrets \ + --from-literal=postgres-password=your-secure-password \ + --from-literal=redis-password=your-redis-password \ + --from-literal=jwt-secret=your-jwt-secret \ + --from-literal=openai-api-key=your-openai-key + +# Create config maps +kubectl create configmap ai-document-agent-config \ + --from-file=config/app.yaml \ + --from-file=config/agents.yaml +``` + +### **Database Deployment** +```bash +# Deploy PostgreSQL +helm repo add bitnami https://charts.bitnami.com/bitnami +helm install postgres bitnami/postgresql \ + --set auth.postgresPassword=your-secure-password \ + --set primary.persistence.size=10Gi \ + --set architecture=standalone + +# Deploy Redis +helm install redis bitnami/redis \ + --set auth.password=your-redis-password \ + --set architecture=standalone \ + --set master.persistence.size=5Gi +``` + +### **Application Deployment** +```bash +# Deploy core application +kubectl apply -f k8s/namespace.yaml +kubectl apply -f k8s/secrets.yaml +kubectl apply -f k8s/configmaps.yaml +kubectl apply -f k8s/services.yaml +kubectl apply -f k8s/deployments.yaml +kubectl apply -f k8s/ingress.yaml + +# Deploy monitoring stack +kubectl apply -f k8s/monitoring/ +``` + +### **Verification** +```bash +# Check deployment status +kubectl get pods +kubectl get services +kubectl get ingress + +# Check logs +kubectl logs -f deployment/backend +kubectl logs -f deployment/frontend + +# Port forward for local access +kubectl port-forward service/backend 8000:8000 +kubectl port-forward service/frontend 3000:3000 +``` + +## ☁️ Cloud Deployment + +### **AWS Deployment** + +#### **ECS Fargate** +```bash +# Deploy with AWS CLI +aws ecs create-cluster --cluster-name ai-document-agent + +# Create task definitions +aws ecs register-task-definition --cli-input-json file://task-definition.json + +# Create service +aws ecs create-service \ + --cluster ai-document-agent \ + --service-name backend \ + --task-definition backend:1 \ + --desired-count 2 \ + --launch-type FARGATE +``` + +#### **EKS (Elastic Kubernetes Service)** +```bash +# Create EKS cluster +eksctl create cluster \ + --name ai-document-agent \ + --region us-west-2 \ + --nodegroup-name workers \ + --node-type t3.medium \ + --nodes 3 \ + --nodes-min 1 \ + --nodes-max 5 + +# Deploy application +kubectl apply -f k8s/ +``` + +### **Azure Deployment** + +#### **AKS (Azure Kubernetes Service)** +```bash +# Create AKS cluster +az aks create \ + --resource-group ai-document-agent-rg \ + --name ai-document-agent-cluster \ + --node-count 3 \ + --enable-addons monitoring \ + --generate-ssh-keys + +# Get credentials +az aks get-credentials --resource-group ai-document-agent-rg --name ai-document-agent-cluster + +# Deploy application +kubectl apply -f k8s/ +``` + +### **Google Cloud Deployment** + +#### **GKE (Google Kubernetes Engine)** +```bash +# Create GKE cluster +gcloud container clusters create ai-document-agent \ + --zone us-central1-a \ + --num-nodes 3 \ + --machine-type n1-standard-2 \ + --enable-autoscaling \ + --min-nodes 1 \ + --max-nodes 5 + +# Get credentials +gcloud container clusters get-credentials ai-document-agent --zone us-central1-a + +# Deploy application +kubectl apply -f k8s/ +``` + +## πŸ”’ Security Configuration + +### **SSL/TLS Setup** +```bash +# Generate SSL certificates +openssl req -x509 -nodes -days 365 -newkey rsa:2048 \ + -keyout nginx/ssl/private.key \ + -out nginx/ssl/certificate.crt + +# Configure Nginx with SSL +kubectl apply -f k8s/ssl/ +``` + +### **Network Security** +```bash +# Configure network policies +kubectl apply -f k8s/network-policies/ + +# Set up firewall rules +gcloud compute firewall-rules create ai-document-agent \ + --allow tcp:80,tcp:443,tcp:22 \ + --source-ranges 0.0.0.0/0 \ + --target-tags ai-document-agent +``` + +### **Secrets Management** +```bash +# Use external secrets manager +helm install external-secrets external-secrets/external-secrets \ + --set installCRDs=true + +# Configure secrets +kubectl apply -f k8s/external-secrets/ +``` + +## πŸ“Š Monitoring & Observability + +### **Prometheus & Grafana** +```bash +# Deploy monitoring stack +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm install prometheus prometheus-community/kube-prometheus-stack \ + --set grafana.enabled=true \ + --set prometheus.enabled=true + +# Access Grafana +kubectl port-forward service/prometheus-grafana 3001:80 +# Username: admin, Password: prom-operator +``` + +### **ELK Stack** +```bash +# Deploy Elasticsearch +helm repo add elastic https://helm.elastic.co +helm install elasticsearch elastic/elasticsearch \ + --set replicas=3 + +# Deploy Kibana +helm install kibana elastic/kibana \ + --set service.type=LoadBalancer + +# Deploy Filebeat +helm install filebeat elastic/filebeat +``` + +### **Jaeger Tracing** +```bash +# Deploy Jaeger +helm repo add jaegertracing https://jaegertracing.github.io/helm-charts +helm install jaeger jaegertracing/jaeger \ + --set storage.type=elasticsearch \ + --set storage.options.es.server-urls=http://elasticsearch-master:9200 +``` + +## πŸ”„ CI/CD Pipeline + +### **GitHub Actions** +```yaml +# .github/workflows/deploy.yml +name: Deploy to Production + +on: + push: + branches: [main] + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Build and push Docker images + run: | + docker build -t ai-document-agent/backend:latest ./backend + docker build -t ai-document-agent/frontend:latest ./frontend + docker push ai-document-agent/backend:latest + docker push ai-document-agent/frontend:latest + + - name: Deploy to Kubernetes + run: | + kubectl set image deployment/backend backend=ai-document-agent/backend:latest + kubectl set image deployment/frontend frontend=ai-document-agent/frontend:latest + kubectl rollout status deployment/backend + kubectl rollout status deployment/frontend +``` + +### **ArgoCD** +```bash +# Install ArgoCD +kubectl create namespace argocd +kubectl apply -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml + +# Create application +kubectl apply -f argocd/application.yaml +``` + +## πŸ“ˆ Scaling & Performance + +### **Horizontal Pod Autoscaler** +```yaml +# k8s/hpa.yaml +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: backend-hpa +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: backend + minReplicas: 2 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 +``` + +### **Database Scaling** +```bash +# Scale PostgreSQL +helm upgrade postgres bitnami/postgresql \ + --set readReplicas.persistence.size=10Gi \ + --set readReplicas.replicaCount=2 + +# Scale Redis +helm upgrade redis bitnami/redis \ + --set architecture=replication \ + --set replica.replicaCount=3 +``` + +## πŸ”„ Backup & Recovery + +### **Database Backup** +```bash +# Automated backup script +#!/bin/bash +BACKUP_DIR="/backups" +DATE=$(date +%Y%m%d_%H%M%S) + +# PostgreSQL backup +pg_dump -h $DB_HOST -U $DB_USER -d $DB_NAME > $BACKUP_DIR/postgres_$DATE.sql + +# Redis backup +redis-cli -h $REDIS_HOST BGSAVE + +# File storage backup +tar -czf $BACKUP_DIR/files_$DATE.tar.gz /data/uploads/ + +# Upload to cloud storage +aws s3 cp $BACKUP_DIR/postgres_$DATE.sql s3://ai-document-agent-backups/ +aws s3 cp $BACKUP_DIR/files_$DATE.tar.gz s3://ai-document-agent-backups/ +``` + +### **Disaster Recovery** +```bash +# Restore from backup +pg_restore -h $DB_HOST -U $DB_USER -d $DB_NAME $BACKUP_DIR/postgres_$DATE.sql + +# Failover procedure +kubectl apply -f k8s/disaster-recovery/failover.yaml +``` + +## πŸ§ͺ Testing Deployment + +### **Load Testing** +```bash +# Deploy load testing +kubectl apply -f k8s/load-testing/ + +# Run load test +kubectl exec -it load-test-pod -- locust -f /app/locustfile.py \ + --host=http://backend-service:8000 +``` + +### **Health Checks** +```bash +# Automated health checks +curl -f http://localhost:8000/health || exit 1 +curl -f http://localhost:3000/ || exit 1 + +# Database connectivity +pg_isready -h $DB_HOST -p $DB_PORT || exit 1 + +# Redis connectivity +redis-cli -h $REDIS_HOST ping || exit 1 +``` + +## πŸ“‹ Deployment Checklist + +### **Pre-Deployment** +- [ ] Environment variables configured +- [ ] Secrets and certificates prepared +- [ ] Database migrations ready +- [ ] Load balancer configured +- [ ] Monitoring stack deployed +- [ ] Backup strategy implemented + +### **Deployment** +- [ ] Database deployed and initialized +- [ ] Application services deployed +- [ ] Ingress and SSL configured +- [ ] Health checks passing +- [ ] Monitoring dashboards accessible +- [ ] Logs being collected + +### **Post-Deployment** +- [ ] Performance testing completed +- [ ] Security scan passed +- [ ] Documentation updated +- [ ] Team notified +- [ ] Rollback plan tested +- [ ] Monitoring alerts configured + +## 🚨 Troubleshooting + +### **Common Issues** +```bash +# Pod not starting +kubectl describe pod +kubectl logs + +# Service not accessible +kubectl get endpoints +kubectl describe service + +# Database connection issues +kubectl exec -it -- nc -zv + +# Memory issues +kubectl top pods +kubectl describe node +``` + +### **Debug Commands** +```bash +# Get cluster info +kubectl cluster-info +kubectl get nodes + +# Check resource usage +kubectl top nodes +kubectl top pods + +# View events +kubectl get events --sort-by=.metadata.creationTimestamp + +# Port forwarding for debugging +kubectl port-forward service/backend 8000:8000 +kubectl port-forward service/grafana 3001:80 +``` + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..dbaa589 --- /dev/null +++ b/Makefile @@ -0,0 +1,270 @@ +# AI Document Agent - Development Makefile +# Provides convenient commands for development, testing, and deployment + +.PHONY: help install test lint format clean build deploy docker-build docker-run docker-stop + +# Default target +help: + @echo "AI Document Agent - Development Commands" + @echo "========================================" + @echo "" + @echo "Installation:" + @echo " install Install all dependencies" + @echo " install-dev Install development dependencies" + @echo " install-prod Install production dependencies" + @echo "" + @echo "Development:" + @echo " dev Start development servers" + @echo " dev-backend Start backend development server" + @echo " dev-frontend Start frontend development server" + @echo "" + @echo "Testing:" + @echo " test Run all tests" + @echo " test-unit Run unit tests only" + @echo " test-integration Run integration tests only" + @echo " test-coverage Run tests with coverage report" + @echo " test-load Run load tests" + @echo "" + @echo "Code Quality:" + @echo " lint Run all linting checks" + @echo " lint-backend Lint backend code" + @echo " lint-frontend Lint frontend code" + @echo " format Format all code" + @echo " format-backend Format backend code" + @echo " format-frontend Format frontend code" + @echo "" + @echo "Database:" + @echo " db-init Initialize database" + @echo " db-migrate Run database migrations" + @echo " db-reset Reset database" + @echo " db-backup Create database backup" + @echo "" + @echo "Docker:" + @echo " docker-build Build Docker images" + @echo " docker-run Run with Docker Compose" + @echo " docker-stop Stop Docker containers" + @echo " docker-clean Clean Docker resources" + @echo "" + @echo "Deployment:" + @echo " build Build for production" + @echo " deploy-staging Deploy to staging" + @echo " deploy-prod Deploy to production" + @echo "" + @echo "Monitoring:" + @echo " monitoring-start Start monitoring stack" + @echo " monitoring-stop Stop monitoring stack" + @echo "" + @echo "Utilities:" + @echo " clean Clean build artifacts" + @echo " logs Show application logs" + @echo " health Check system health" + +# Installation +install: install-backend install-frontend + @echo "βœ… All dependencies installed" + +install-backend: + @echo "πŸ“¦ Installing backend dependencies..." + pip install -r requirements.txt + pip install -r requirements-dev.txt + +install-frontend: + @echo "πŸ“¦ Installing frontend dependencies..." + cd frontend && npm install + +install-dev: install-backend install-frontend + @echo "πŸ“¦ Installing development dependencies..." + pip install -r requirements-dev.txt + cd frontend && npm install --include=dev + +install-prod: install-backend install-frontend + @echo "πŸ“¦ Installing production dependencies..." + cd frontend && npm ci --only=production + +# Development +dev: dev-backend dev-frontend + @echo "πŸš€ Development servers started" + +dev-backend: + @echo "πŸš€ Starting backend development server..." + cd backend && python -m uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 + +dev-frontend: + @echo "πŸš€ Starting frontend development server..." + cd frontend && npm start + +# Testing +test: test-backend test-frontend + @echo "βœ… All tests completed" + +test-backend: + @echo "πŸ§ͺ Running backend tests..." + cd backend && pytest tests/ -v + +test-frontend: + @echo "πŸ§ͺ Running frontend tests..." + cd frontend && npm test -- --watchAll=false + +test-unit: + @echo "πŸ§ͺ Running unit tests..." + cd backend && pytest tests/ -m "unit" -v + +test-integration: + @echo "πŸ§ͺ Running integration tests..." + cd backend && pytest tests/ -m "integration" -v + +test-coverage: + @echo "πŸ§ͺ Running tests with coverage..." + cd backend && pytest tests/ --cov=app --cov-report=html --cov-report=term-missing + cd frontend && npm test -- --coverage --watchAll=false + +test-load: + @echo "πŸ§ͺ Running load tests..." + cd backend && locust -f tests/load/locustfile.py --headless --users 10 --spawn-rate 2 --run-time 60s + +# Code Quality +lint: lint-backend lint-frontend + @echo "βœ… All linting checks completed" + +lint-backend: + @echo "πŸ” Linting backend code..." + cd backend && flake8 app/ --max-line-length=88 --extend-ignore=E203,W503 + cd backend && black --check app/ + cd backend && isort --check-only app/ + cd backend && mypy app/ + +lint-frontend: + @echo "πŸ” Linting frontend code..." + cd frontend && npm run lint + cd frontend && npm run type-check + +format: format-backend format-frontend + @echo "✨ All code formatted" + +format-backend: + @echo "✨ Formatting backend code..." + cd backend && black app/ + cd backend && isort app/ + +format-frontend: + @echo "✨ Formatting frontend code..." + cd frontend && npm run format + +# Database +db-init: + @echo "πŸ—„οΈ Initializing database..." + cd backend && python -c "from app.database.connection import init_database; import asyncio; asyncio.run(init_database())" + +db-migrate: + @echo "πŸ—„οΈ Running database migrations..." + cd backend && alembic upgrade head + +db-reset: + @echo "πŸ—„οΈ Resetting database..." + cd backend && alembic downgrade base + cd backend && alembic upgrade head + cd backend && python -c "from app.database.connection import initialize_default_data; import asyncio; asyncio.run(initialize_default_data())" + +db-backup: + @echo "πŸ—„οΈ Creating database backup..." + ./scripts/backup.sh + +# Docker +docker-build: + @echo "🐳 Building Docker images..." + docker-compose build + +docker-run: + @echo "🐳 Starting Docker containers..." + docker-compose up -d + +docker-stop: + @echo "🐳 Stopping Docker containers..." + docker-compose down + +docker-clean: + @echo "🐳 Cleaning Docker resources..." + docker-compose down -v --remove-orphans + docker system prune -f + +# Deployment +build: build-backend build-frontend + @echo "πŸ—οΈ Production build completed" + +build-backend: + @echo "πŸ—οΈ Building backend..." + cd backend && python -m build + +build-frontend: + @echo "πŸ—οΈ Building frontend..." + cd frontend && npm run build + +deploy-staging: + @echo "πŸš€ Deploying to staging..." + ./scripts/deploy.sh staging + +deploy-prod: + @echo "πŸš€ Deploying to production..." + ./scripts/deploy.sh production + +# Monitoring +monitoring-start: + @echo "πŸ“Š Starting monitoring stack..." + ./scripts/monitoring-setup.sh start + +monitoring-stop: + @echo "πŸ“Š Stopping monitoring stack..." + ./scripts/monitoring-setup.sh stop + +# Utilities +clean: + @echo "🧹 Cleaning build artifacts..." + find . -type d -name "__pycache__" -exec rm -rf {} + + find . -type f -name "*.pyc" -delete + find . -type f -name "*.pyo" -delete + find . -type f -name "*.pyd" -delete + find . -type d -name "*.egg-info" -exec rm -rf {} + + find . -type d -name ".pytest_cache" -exec rm -rf {} + + find . -type d -name "htmlcov" -exec rm -rf {} + + find . -type f -name ".coverage" -delete + find . -type f -name "coverage.xml" -delete + cd frontend && rm -rf build/ node_modules/ .cache/ + @echo "βœ… Cleanup completed" + +logs: + @echo "πŸ“‹ Showing application logs..." + docker-compose logs -f + +health: + @echo "πŸ₯ Checking system health..." + curl -f http://localhost:8000/health || echo "❌ Backend health check failed" + curl -f http://localhost:3000 || echo "❌ Frontend health check failed" + curl -f http://localhost:9090/-/healthy || echo "❌ Prometheus health check failed" + curl -f http://localhost:3001/api/health || echo "❌ Grafana health check failed" + +# Security +security-scan: + @echo "πŸ”’ Running security scans..." + cd backend && bandit -r app/ -f json -o bandit-report.json + cd backend && safety check + docker run --rm -v $(PWD):/app aquasec/trivy fs /app + +# Performance +performance-test: + @echo "⚑ Running performance tests..." + cd backend && locust -f tests/performance/locustfile.py --headless --users 100 --spawn-rate 10 --run-time 300s + +# Documentation +docs: + @echo "πŸ“š Generating documentation..." + cd backend && python -m uvicorn app.main:app --host 0.0.0.0 --port 8000 & + sleep 10 + curl http://localhost:8000/openapi.json > docs/api-spec.json + pkill -f uvicorn + +# Quick start for new developers +quickstart: install-dev db-init + @echo "πŸš€ Quick start completed!" + @echo "Run 'make dev' to start development servers" + @echo "Run 'make test' to run tests" + @echo "Run 'make lint' to check code quality" diff --git a/README.md b/README.md index 535c820..52ee389 100644 --- a/README.md +++ b/README.md @@ -110,4 +110,24 @@ The application can be configured through environment variables: ## License -This project is licensed under the MIT License - see the LICENSE file for details. +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## πŸ† Enterprise Support + +### **Professional Services** +- **Custom Development**: Tailored features and integrations +- **Deployment Support**: On-premise and cloud deployment +- **Training & Consulting**: Team training and best practices +- **24/7 Support**: Enterprise support with SLA guarantees + +### **Contact Information** +- **Email**: enterprise@ai-document-agent.com +- **Phone**: +1 (555) 123-4567 +- **Documentation**: https://docs.ai-document-agent.com +- **Support Portal**: https://support.ai-document-agent.com + +--- + +**Built with ❀️ by the AI Document Agent Team** + +*Empowering enterprises with intelligent document processing since 2024* diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..88bccab --- /dev/null +++ b/alembic.ini @@ -0,0 +1,112 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python-dateutil library that can be +# installed by adding `alembic[tz]` to the pip requirements +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version number format +version_num_format = %04d + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses +# os.pathsep. If this key is omitted entirely, it falls back to the legacy +# behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = postgresql://postgres:password@localhost:5432/smart_doc_bot + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000..62f963a --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,99 @@ +from logging.config import fileConfig +from sqlalchemy import engine_from_config +from sqlalchemy import pool +from alembic import context +import os +import sys + +# Add the backend directory to the Python path +sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'backend')) + +# Import your models and database configuration +from app.database.connection import Base +from app.database.models import ( + User, Role, UserRole, Document, Tag, DocumentTag, ComplianceFramework, + ProcessingHistory, AgentExecution, DocumentComparison, AuditEvent, + SystemMetric, WorkflowTemplate, KnowledgeBase, Notification, APILog, SystemConfig +) +from app.core.config import settings + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def get_url(): + """Get database URL from environment or config""" + return settings.DATABASE_URL + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = get_url() + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + configuration = config.get_section(config.config_ini_section) + configuration["sqlalchemy.url"] = get_url() + + connectable = engine_from_config( + configuration, + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, + target_metadata=target_metadata, + compare_type=True, + compare_server_default=True, + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 0000000..55df286 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/backend/app/agents/orchestrator.py b/backend/app/agents/orchestrator.py index 66a3ebb..bb26bd2 100644 --- a/backend/app/agents/orchestrator.py +++ b/backend/app/agents/orchestrator.py @@ -1,5 +1,6 @@ import json import asyncio +import logging from typing import Any, Dict, List, Optional from datetime import datetime from enum import Enum @@ -9,6 +10,7 @@ from .base import BaseAgent, Tool from ..models.base import AgentResult, AgentType, Document +from ..core.config import settings from .ingestion import IngestionAgent from .classifier import ClassifierAgent from .entity import EntityAgent @@ -17,6 +19,9 @@ from .compare import CompareAgent from .audit import AuditAgent +# Configure logging +logger = logging.getLogger(__name__) + class WorkflowStage(Enum): """Workflow stage enumeration""" @@ -410,15 +415,23 @@ async def _execute_stage(self, agent_type: str, document: Document, goal: str) - context = { "document": document, "goal": goal, - "orchestrator": self + "orchestrator": self, + "workflow_state": self.workflow_state } - # Execute agent - result = await agent.run(goal, context) - return result + # Execute agent with timeout + import asyncio + try: + result = await asyncio.wait_for( + agent.run(goal, context), + timeout=settings.AGENT_TIMEOUT + ) + return result + except asyncio.TimeoutError: + raise Exception(f"Agent {agent_type} execution timed out after {settings.AGENT_TIMEOUT} seconds") except Exception as e: - print(f"Stage execution failed for {agent_type}: {str(e)}") + logger.error(f"Stage execution failed for {agent_type}: {str(e)}") return None def _calculate_confidence(self, execution_results: Dict, monitoring_result: Dict) -> float: diff --git a/backend/app/api/v1/endpoints/auth.py b/backend/app/api/v1/endpoints/auth.py index 6d37696..af22c2b 100644 --- a/backend/app/api/v1/endpoints/auth.py +++ b/backend/app/api/v1/endpoints/auth.py @@ -1,17 +1,17 @@ from datetime import datetime, timedelta from typing import Optional -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, status, Request from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from pydantic import BaseModel, EmailStr -import jwt -from passlib.context import CryptContext +from sqlalchemy.orm import Session from ...core.config import settings -from ...core.security import create_access_token, verify_token +from ...core.security import security_manager, get_current_user, require_permission +from ...database.connection import get_db +from ...database.models import User, Role, UserRole router = APIRouter() security = HTTPBearer() -pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") class LoginRequest(BaseModel): @@ -27,185 +27,439 @@ class LoginResponse(BaseModel): class UserInfo(BaseModel): - id: str + id: int email: str - username: str - full_name: str - role: str - permissions: list[str] + full_name: Optional[str] is_active: bool - created_at: str - last_login: Optional[str] = None - - -# Mock user database - in production, this would be a real database -MOCK_USERS = { - "admin@redline.com": { - "id": "user_001", - "email": "admin@redline.com", - "username": "admin", - "full_name": "System Administrator", - "password_hash": pwd_context.hash("admin123"), - "role": "admin", - "permissions": ["read", "write", "delete", "analyze", "admin"], - "is_active": True, - "created_at": "2024-01-01T00:00:00Z", - "last_login": None - }, - "user@redline.com": { - "id": "user_002", - "email": "user@redline.com", - "username": "user", - "full_name": "Regular User", - "password_hash": pwd_context.hash("user123"), - "role": "user", - "permissions": ["read", "write", "analyze"], - "is_active": True, - "created_at": "2024-01-01T00:00:00Z", - "last_login": None - } -} - - -def verify_password(plain_password: str, hashed_password: str) -> bool: - """Verify a password against its hash""" - return pwd_context.verify(plain_password, hashed_password) - - -def get_user_by_email(email: str): - """Get user by email from mock database""" - return MOCK_USERS.get(email) - - -def authenticate_user(email: str, password: str): - """Authenticate user with email and password""" - user = get_user_by_email(email) - if not user: - return None - if not verify_password(password, user["password_hash"]): - return None - return user - - -async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)): - """Get current user from JWT token""" + is_superuser: bool + created_at: datetime + last_login: Optional[datetime] = None + roles: list[str] = [] + + +class RegisterRequest(BaseModel): + email: EmailStr + password: str + full_name: Optional[str] = None + + +class ChangePasswordRequest(BaseModel): + current_password: str + new_password: str + + +class ResetPasswordRequest(BaseModel): + email: EmailStr + + +@router.post("/login", response_model=LoginResponse) +async def login( + request: LoginRequest, + db: Session = Depends(get_db), + client_request: Request = None +): + """Login endpoint with comprehensive security logging""" try: - payload = verify_token(credentials.credentials) - email: str = payload.get("sub") - if email is None: + # Authenticate user + user = security_manager.authenticate_user(db, request.email, request.password) + if not user: + # Log failed login attempt + if client_request: + security_manager.log_security_event( + event_type="login_failed", + user_id=None, + ip_address=client_request.client.host, + details={"email": request.email, "reason": "invalid_credentials"} + ) + raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail="Could not validate credentials", + detail="Incorrect email or password", headers={"WWW-Authenticate": "Bearer"}, ) - except jwt.ExpiredSignatureError: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Token has expired", - headers={"WWW-Authenticate": "Bearer"}, - ) - except jwt.JWTError: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Could not validate credentials", - headers={"WWW-Authenticate": "Bearer"}, + + # Update last login + user.last_login = datetime.utcnow() + db.commit() + + # Create access token + access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) + access_token = security_manager.create_access_token( + data={"sub": user.email}, expires_delta=access_token_expires ) - - user = get_user_by_email(email) - if user is None: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="User not found", - headers={"WWW-Authenticate": "Bearer"}, + + # Get user roles + user_roles = db.query(UserRole).filter(UserRole.user_id == user.id).all() + role_names = [] + for user_role in user_roles: + role = db.query(Role).filter(Role.id == user_role.role_id).first() + if role: + role_names.append(role.name) + + # Log successful login + if client_request: + security_manager.log_security_event( + event_type="login_success", + user_id=user.id, + ip_address=client_request.client.host, + details={"email": user.email, "roles": role_names} + ) + + return LoginResponse( + access_token=access_token, + token_type="bearer", + expires_in=settings.ACCESS_TOKEN_EXPIRE_MINUTES * 60, + user={ + "id": user.id, + "email": user.email, + "full_name": user.full_name, + "is_active": user.is_active, + "is_superuser": user.is_superuser, + "roles": role_names + } ) - - return user - - -@router.post("/login", response_model=LoginResponse) -async def login(request: LoginRequest): - """Login endpoint""" - user = authenticate_user(request.email, request.password) - if not user: + + except HTTPException: + raise + except Exception as e: + # Log unexpected error + if client_request: + security_manager.log_security_event( + event_type="login_error", + user_id=None, + ip_address=client_request.client.host, + details={"email": request.email, "error": str(e)} + ) raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Incorrect email or password", - headers={"WWW-Authenticate": "Bearer"}, + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Internal server error during login" ) - - # Update last login - user["last_login"] = datetime.utcnow().isoformat() - - # Create access token - access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) - access_token = create_access_token( - data={"sub": user["email"]}, expires_delta=access_token_expires - ) - - return LoginResponse( - access_token=access_token, - token_type="bearer", - expires_in=settings.ACCESS_TOKEN_EXPIRE_MINUTES * 60, - user={ - "id": user["id"], - "email": user["email"], - "username": user["username"], - "full_name": user["full_name"], - "role": user["role"], - "permissions": user["permissions"], - "is_active": user["is_active"] - } - ) @router.post("/logout") -async def logout(current_user: dict = Depends(get_current_user)): - """Logout endpoint""" - # In a real implementation, you might want to blacklist the token - # For now, we'll just return success - return {"message": "Successfully logged out"} +async def logout( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), + client_request: Request = None +): + """Logout endpoint with token blacklisting""" + try: + # Get the token from the request + if client_request and "authorization" in client_request.headers: + token = client_request.headers["authorization"].replace("Bearer ", "") + security_manager.blacklist_token(token) + + # Log logout event + if client_request: + security_manager.log_security_event( + event_type="logout", + user_id=current_user.id, + ip_address=client_request.client.host, + details={"email": current_user.email} + ) + + return {"message": "Successfully logged out"} + + except Exception as e: + # Log error but don't fail the logout + if client_request: + security_manager.log_security_event( + event_type="logout_error", + user_id=current_user.id, + ip_address=client_request.client.host, + details={"error": str(e)} + ) + return {"message": "Logged out (with warnings)"} @router.get("/me", response_model=UserInfo) -async def get_current_user_info(current_user: dict = Depends(get_current_user)): +async def get_current_user_info( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db) +): """Get current user information""" - return UserInfo( - id=current_user["id"], - email=current_user["email"], - username=current_user["username"], - full_name=current_user["full_name"], - role=current_user["role"], - permissions=current_user["permissions"], - is_active=current_user["is_active"], - created_at=current_user["created_at"], - last_login=current_user["last_login"] - ) + try: + # Get user roles + user_roles = db.query(UserRole).filter(UserRole.user_id == current_user.id).all() + role_names = [] + for user_role in user_roles: + role = db.query(Role).filter(Role.id == user_role.role_id).first() + if role: + role_names.append(role.name) + + return UserInfo( + id=current_user.id, + email=current_user.email, + full_name=current_user.full_name, + is_active=current_user.is_active, + is_superuser=current_user.is_superuser, + created_at=current_user.created_at, + last_login=current_user.last_login, + roles=role_names + ) + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to retrieve user information" + ) @router.post("/refresh") -async def refresh_token(current_user: dict = Depends(get_current_user)): +async def refresh_token( + current_user: User = Depends(get_current_user), + client_request: Request = None +): """Refresh access token""" - access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) - access_token = create_access_token( - data={"sub": current_user["email"]}, expires_delta=access_token_expires - ) - - return { - "access_token": access_token, - "token_type": "bearer", - "expires_in": settings.ACCESS_TOKEN_EXPIRE_MINUTES * 60 - } + try: + access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) + access_token = security_manager.create_access_token( + data={"sub": current_user.email}, expires_delta=access_token_expires + ) + + # Log token refresh + if client_request: + security_manager.log_security_event( + event_type="token_refresh", + user_id=current_user.id, + ip_address=client_request.client.host, + details={"email": current_user.email} + ) + + return { + "access_token": access_token, + "token_type": "bearer", + "expires_in": settings.ACCESS_TOKEN_EXPIRE_MINUTES * 60 + } + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to refresh token" + ) + + +@router.post("/register") +async def register_user( + request: RegisterRequest, + db: Session = Depends(get_db), + client_request: Request = None +): + """Register new user (admin only)""" + try: + # Check if user already exists + existing_user = security_manager.get_user_by_email(db, request.email) + if existing_user: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="User with this email already exists" + ) + + # Create new user + hashed_password = security_manager.get_password_hash(request.password) + new_user = User( + email=request.email, + hashed_password=hashed_password, + full_name=request.full_name, + is_active=True, + is_superuser=False + ) + + db.add(new_user) + db.commit() + db.refresh(new_user) + + # Assign default user role + default_role = db.query(Role).filter(Role.name == "user").first() + if default_role: + user_role = UserRole(user_id=new_user.id, role_id=default_role.id) + db.add(user_role) + db.commit() + + # Log user registration + if client_request: + security_manager.log_security_event( + event_type="user_registered", + user_id=new_user.id, + ip_address=client_request.client.host, + details={"email": new_user.email, "registered_by": "admin"} + ) + + return { + "message": "User registered successfully", + "user_id": new_user.id, + "email": new_user.email + } + + except HTTPException: + raise + except Exception as e: + db.rollback() + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to register user" + ) + + +@router.post("/change-password") +async def change_password( + request: ChangePasswordRequest, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), + client_request: Request = None +): + """Change user password""" + try: + # Verify current password + if not security_manager.verify_password(request.current_password, current_user.hashed_password): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Current password is incorrect" + ) + + # Update password + new_hashed_password = security_manager.get_password_hash(request.new_password) + current_user.hashed_password = new_hashed_password + db.commit() + + # Log password change + if client_request: + security_manager.log_security_event( + event_type="password_changed", + user_id=current_user.id, + ip_address=client_request.client.host, + details={"email": current_user.email} + ) + + return {"message": "Password changed successfully"} + + except HTTPException: + raise + except Exception as e: + db.rollback() + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to change password" + ) + + +@router.post("/reset-password") +async def reset_password( + request: ResetPasswordRequest, + db: Session = Depends(get_db), + client_request: Request = None +): + """Request password reset (sends email)""" + try: + # Check if user exists + user = security_manager.get_user_by_email(db, request.email) + if not user: + # Don't reveal if user exists or not + return {"message": "If the email exists, a reset link has been sent"} + + # Generate reset token + reset_token = security_manager.create_access_token( + data={"sub": user.email, "type": "password_reset"}, + expires_delta=timedelta(hours=1) + ) + + # TODO: Send email with reset link + # In production, this would send an actual email + + # Log password reset request + if client_request: + security_manager.log_security_event( + event_type="password_reset_requested", + user_id=user.id, + ip_address=client_request.client.host, + details={"email": user.email} + ) + + return {"message": "If the email exists, a reset link has been sent"} + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to process password reset request" + ) @router.get("/validate") -async def validate_token(current_user: dict = Depends(get_current_user)): +async def validate_token( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db) +): """Validate current token""" - return { - "valid": True, - "user": { - "id": current_user["id"], - "email": current_user["email"], - "username": current_user["username"], - "role": current_user["role"] + try: + # Get user roles + user_roles = db.query(UserRole).filter(UserRole.user_id == current_user.id).all() + role_names = [] + for user_role in user_roles: + role = db.query(Role).filter(Role.id == user_role.role_id).first() + if role: + role_names.append(role.name) + + return { + "valid": True, + "user": { + "id": current_user.id, + "email": current_user.email, + "full_name": current_user.full_name, + "is_active": current_user.is_active, + "is_superuser": current_user.is_superuser, + "roles": role_names + } + } + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to validate token" + ) + + +@router.get("/permissions") +async def get_user_permissions( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db) +): + """Get current user permissions""" + try: + permissions = security_manager.get_user_permissions(db, current_user) + return { + "user_id": current_user.id, + "email": current_user.email, + "permissions": permissions } - } + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to retrieve permissions" + ) + + +@router.get("/security-events") +async def get_security_events( + current_user: User = Depends(require_permission("admin:security_events")), + limit: int = 100 +): + """Get recent security events (admin only)""" + try: + # Get security events from Redis + events = [] + for i in range(min(limit, 1000)): + event = security_manager.redis_client.lindex("security_events", i) + if event: + events.append(eval(event)) # In production, use proper JSON parsing + + return { + "events": events[:limit], + "total": len(events) + } + + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to retrieve security events" + ) diff --git a/backend/app/core/celery_config.py b/backend/app/core/celery_config.py new file mode 100644 index 0000000..b4b867d --- /dev/null +++ b/backend/app/core/celery_config.py @@ -0,0 +1,195 @@ +""" +Celery Configuration for AI Document Agent +Handles distributed task processing for document analysis and AI operations +""" + +import os +from celery import Celery +from celery.schedules import crontab +from .config import settings + +# Create Celery instance +celery_app = Celery( + "ai_document_agent", + broker=settings.REDIS_URL, + backend=settings.REDIS_URL, + include=[ + "app.tasks.document_tasks", + "app.tasks.agent_tasks", + "app.tasks.analytics_tasks", + "app.tasks.maintenance_tasks" + ] +) + +# Celery Configuration +celery_app.conf.update( + # Task routing + task_routes={ + "app.tasks.document_tasks.*": {"queue": "documents"}, + "app.tasks.agent_tasks.*": {"queue": "agents"}, + "app.tasks.analytics_tasks.*": {"queue": "analytics"}, + "app.tasks.maintenance_tasks.*": {"queue": "maintenance"}, + }, + + # Task serialization + task_serializer="json", + accept_content=["json"], + result_serializer="json", + timezone="UTC", + enable_utc=True, + + # Task execution + task_always_eager=False, + task_eager_propagates=True, + task_ignore_result=False, + task_store_errors_even_if_ignored=True, + + # Worker configuration + worker_prefetch_multiplier=1, + worker_max_tasks_per_child=1000, + worker_disable_rate_limits=False, + worker_send_task_events=True, + + # Result backend + result_expires=3600, # 1 hour + result_backend_transport_options={ + "master_name": "mymaster", + "visibility_timeout": 3600, + }, + + # Beat schedule for periodic tasks + beat_schedule={ + "cleanup-expired-documents": { + "task": "app.tasks.maintenance_tasks.cleanup_expired_documents", + "schedule": crontab(hour=2, minute=0), # Daily at 2 AM + }, + "generate-analytics-reports": { + "task": "app.tasks.analytics_tasks.generate_daily_reports", + "schedule": crontab(hour=6, minute=0), # Daily at 6 AM + }, + "backup-database": { + "task": "app.tasks.maintenance_tasks.backup_database", + "schedule": crontab(hour=1, minute=0), # Daily at 1 AM + }, + "cleanup-audit-logs": { + "task": "app.tasks.maintenance_tasks.cleanup_audit_logs", + "schedule": crontab(hour=3, minute=0), # Daily at 3 AM + }, + "update-system-metrics": { + "task": "app.tasks.analytics_tasks.update_system_metrics", + "schedule": 300.0, # Every 5 minutes + }, + "process-pending-documents": { + "task": "app.tasks.document_tasks.process_pending_documents", + "schedule": 60.0, # Every minute + }, + }, + + # Task time limits + task_soft_time_limit=300, # 5 minutes + task_time_limit=600, # 10 minutes + + # Retry configuration + task_acks_late=True, + task_reject_on_worker_lost=True, + task_remote_tracebacks=True, + + # Monitoring + worker_send_task_events=True, + task_send_sent_event=True, + + # Security + security_key=settings.SECRET_KEY, + security_certificate=None, + security_cert_store=None, +) + +# Task annotations for specific task configurations +celery_app.conf.task_annotations = { + "app.tasks.document_tasks.process_document": { + "rate_limit": "10/m", # 10 tasks per minute + "time_limit": 600, # 10 minutes + "soft_time_limit": 300, # 5 minutes + }, + "app.tasks.agent_tasks.execute_agent": { + "rate_limit": "5/m", # 5 tasks per minute + "time_limit": 900, # 15 minutes + "soft_time_limit": 600, # 10 minutes + }, + "app.tasks.analytics_tasks.generate_report": { + "rate_limit": "1/h", # 1 task per hour + "time_limit": 1800, # 30 minutes + "soft_time_limit": 1200, # 20 minutes + }, +} + +# Error handling +@celery_app.task(bind=True) +def debug_task(self): + """Debug task for testing Celery setup""" + print(f"Request: {self.request!r}") + +# Health check task +@celery_app.task +def health_check(): + """Health check task for monitoring""" + return { + "status": "healthy", + "timestamp": "2024-01-01T00:00:00Z", + "version": settings.APP_VERSION + } + +# Task failure handling +@celery_app.task(bind=True, max_retries=3) +def handle_task_failure(self, task_id, exc, traceback): + """Handle task failures with retry logic""" + try: + # Log the failure + print(f"Task {task_id} failed: {exc}") + + # Retry with exponential backoff + if self.request.retries < self.max_retries: + countdown = 2 ** self.request.retries # Exponential backoff + raise self.retry(countdown=countdown, exc=exc) + else: + # Max retries reached, mark as permanently failed + print(f"Task {task_id} permanently failed after {self.max_retries} retries") + + except Exception as e: + print(f"Error handling task failure: {e}") + +# Task success handling +@celery_app.task +def handle_task_success(task_id, result): + """Handle successful task completion""" + try: + print(f"Task {task_id} completed successfully: {result}") + # Additional success handling logic here + except Exception as e: + print(f"Error handling task success: {e}") + +# Celery signal handlers +from celery.signals import task_success, task_failure, task_revoked + +@task_success.connect +def task_success_handler(sender=None, **kwargs): + """Handle task success events""" + task_id = kwargs.get("result", {}).get("id") + if task_id: + handle_task_success.delay(task_id, kwargs.get("result")) + +@task_failure.connect +def task_failure_handler(sender=None, task_id=None, exception=None, traceback=None, einfo=None, **kwargs): + """Handle task failure events""" + if task_id: + handle_task_failure.delay(task_id, str(exception), traceback) + +@task_revoked.connect +def task_revoked_handler(sender=None, request=None, terminated=None, signum=None, expired=None, **kwargs): + """Handle task revocation events""" + task_id = request.id if request else None + if task_id: + print(f"Task {task_id} was revoked (terminated={terminated}, expired={expired})") + +# Export the Celery app +__all__ = ["celery_app"] diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 1e667d8..901b9a2 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -1,79 +1,109 @@ import os -from typing import List, Optional -from pydantic import BaseSettings, Field +from typing import List, Optional, Dict, Any +from pydantic import BaseSettings, Field, validator +from pydantic_settings import BaseSettings as PydanticBaseSettings -class Settings(BaseSettings): - """Application settings""" +class Settings(PydanticBaseSettings): + """Application settings with environment variable support""" # Application settings - APP_NAME: str = "AI Document Agent" - APP_VERSION: str = "1.0.0" + APP_NAME: str = Field(default="AI Document Agent", env="APP_NAME") + APP_VERSION: str = Field(default="1.0.0", env="APP_VERSION") DEBUG: bool = Field(default=False, env="DEBUG") + LOG_LEVEL: str = Field(default="INFO", env="LOG_LEVEL") + + # Server settings HOST: str = Field(default="0.0.0.0", env="HOST") PORT: int = Field(default=8000, env="PORT") # Security settings SECRET_KEY: str = Field(default="your-secret-key-here", env="SECRET_KEY") - ALGORITHM: str = "HS256" + ALGORITHM: str = Field(default="HS256", env="ALGORITHM") ACCESS_TOKEN_EXPIRE_MINUTES: int = Field(default=30, env="ACCESS_TOKEN_EXPIRE_MINUTES") - # CORS settings - ALLOWED_ORIGINS: List[str] = Field( - default=["http://localhost:3000", "http://localhost:8080"], - env="ALLOWED_ORIGINS" - ) - ALLOWED_HOSTS: List[str] = Field( - default=["localhost", "127.0.0.1"], - env="ALLOWED_HOSTS" - ) - - # File upload settings + # Database settings + DATABASE_URL: str = Field(default="postgresql://user:password@localhost/ai_document_agent", env="DATABASE_URL") + DATABASE_POOL_SIZE: int = Field(default=10, env="DATABASE_POOL_SIZE") + DATABASE_MAX_OVERFLOW: int = Field(default=20, env="DATABASE_MAX_OVERFLOW") + + # Redis settings + REDIS_URL: str = Field(default="redis://localhost:6379/0", env="REDIS_URL") + REDIS_MAX_CONNECTIONS: int = Field(default=10, env="REDIS_MAX_CONNECTIONS") + + # ChromaDB settings + CHROMA_PERSIST_DIRECTORY: str = Field(default="./chroma_db", env="CHROMA_PERSIST_DIRECTORY") + CHROMA_COLLECTION_NAME: str = Field(default="documents", env="CHROMA_COLLECTION_NAME") + + # File storage settings UPLOAD_DIR: str = Field(default="./uploads", env="UPLOAD_DIR") - MAX_FILE_SIZE: int = Field(default=50 * 1024 * 1024, env="MAX_FILE_SIZE") # 50MB - ALLOWED_FILE_TYPES: List[str] = Field( - default=[".pdf", ".docx", ".txt", ".csv", ".xlsx"], - env="ALLOWED_FILE_TYPES" - ) - - # LLM settings - LLM_MODEL: str = Field(default="gpt-4", env="LLM_MODEL") - LLM_API_KEY: str = Field(default="", env="OPENAI_API_KEY") - LLM_TEMPERATURE: float = Field(default=0.1, env="LLM_TEMPERATURE") - LLM_MAX_TOKENS: int = Field(default=4000, env="LLM_MAX_TOKENS") + MAX_FILE_SIZE: int = Field(default=100 * 1024 * 1024, env="MAX_FILE_SIZE") # 100MB + ALLOWED_FILE_TYPES: List[str] = Field(default=[".pdf", ".docx", ".txt", ".csv", ".xlsx"], env="ALLOWED_FILE_TYPES") + + # CORS settings + ALLOWED_ORIGINS: List[str] = Field(default=["http://localhost:3000"], env="ALLOWED_ORIGINS") + ALLOWED_METHODS: List[str] = Field(default=["GET", "POST", "PUT", "DELETE", "OPTIONS"], env="ALLOWED_METHODS") + ALLOWED_HEADERS: List[str] = Field(default=["*"], env="ALLOWED_HEADERS") + + # Monitoring settings + ENABLE_MONITORING: bool = Field(default=True, env="ENABLE_MONITORING") + PROMETHEUS_PORT: int = Field(default=9090, env="PROMETHEUS_PORT") + GRAFANA_PORT: int = Field(default=3001, env="GRAFANA_PORT") + + # AI/ML settings + OPENAI_API_KEY: str = Field(default="", env="OPENAI_API_KEY") + OPENAI_MODEL: str = Field(default="gpt-4", env="OPENAI_MODEL") + OPENAI_MAX_TOKENS: int = Field(default=4000, env="OPENAI_MAX_TOKENS") # Agent settings AGENT_TIMEOUT: int = Field(default=300, env="AGENT_TIMEOUT") # 5 minutes AGENT_MAX_RETRIES: int = Field(default=3, env="AGENT_MAX_RETRIES") - AGENT_CONFIDENCE_THRESHOLD: float = Field(default=0.7, env="AGENT_CONFIDENCE_THRESHOLD") + AGENT_CONCURRENT_LIMIT: int = Field(default=10, env="AGENT_CONCURRENT_LIMIT") - # Workflow settings - WORKFLOW_MAX_STAGES: int = Field(default=10, env="WORKFLOW_MAX_STAGES") - WORKFLOW_PARALLEL_EXECUTION: bool = Field(default=True, env="WORKFLOW_PARALLEL_EXECUTION") - WORKFLOW_MONITORING_INTERVAL: int = Field(default=5, env="WORKFLOW_MONITORING_INTERVAL") + # Rate limiting settings + RATE_LIMIT_REQUESTS: int = Field(default=1000, env="RATE_LIMIT_REQUESTS") + RATE_LIMIT_WINDOW: int = Field(default=3600, env="RATE_LIMIT_WINDOW") # 1 hour - # Database settings - DATABASE_URL: str = Field(default="sqlite:///./smart_doc_bot.db", env="DATABASE_URL") - REDIS_URL: str = Field(default="redis://localhost:6379", env="REDIS_URL") - CHROMA_PERSIST_DIRECTORY: str = Field(default="./chroma_db", env="CHROMA_PERSIST_DIRECTORY") + # Audit settings + AUDIT_LOG_ENABLED: bool = Field(default=True, env="AUDIT_LOG_ENABLED") + AUDIT_LOG_RETENTION_DAYS: int = Field(default=90, env="AUDIT_LOG_RETENTION_DAYS") - # Memory settings - MEMORY_TTL: int = Field(default=3600, env="MEMORY_TTL") # 1 hour - MEMORY_MAX_SIZE: int = Field(default=1000, env="MEMORY_MAX_SIZE") - VECTOR_SIMILARITY_THRESHOLD: float = Field(default=0.8, env="VECTOR_SIMILARITY_THRESHOLD") + # Email settings (for notifications) + SMTP_HOST: str = Field(default="", env="SMTP_HOST") + SMTP_PORT: int = Field(default=587, env="SMTP_PORT") + SMTP_USERNAME: str = Field(default="", env="SMTP_USERNAME") + SMTP_PASSWORD: str = Field(default="", env="SMTP_PASSWORD") + SMTP_USE_TLS: bool = Field(default=True, env="SMTP_USE_TLS") - # Monitoring settings - ENABLE_MONITORING: bool = Field(default=True, env="ENABLE_MONITORING") - METRICS_PORT: int = Field(default=9090, env="METRICS_PORT") - LOG_LEVEL: str = Field(default="INFO", env="LOG_LEVEL") + # Backup settings + BACKUP_ENABLED: bool = Field(default=True, env="BACKUP_ENABLED") + BACKUP_RETENTION_DAYS: int = Field(default=30, env="BACKUP_RETENTION_DAYS") + BACKUP_SCHEDULE: str = Field(default="0 2 * * *", env="BACKUP_SCHEDULE") # Daily at 2 AM - # Audit settings - AUDIT_ENABLED: bool = Field(default=True, env="AUDIT_ENABLED") - AUDIT_RETENTION_DAYS: int = Field(default=90, env="AUDIT_RETENTION_DAYS") - AUDIT_ENCRYPTION_ENABLED: bool = Field(default=False, env="AUDIT_ENCRYPTION_ENABLED") + # Performance settings + WORKER_PROCESSES: int = Field(default=4, env="WORKER_PROCESSES") + MAX_CONCURRENT_REQUESTS: int = Field(default=100, env="MAX_CONCURRENT_REQUESTS") + + # Feature flags + ENABLE_WEBSOCKETS: bool = Field(default=True, env="ENABLE_WEBSOCKETS") + ENABLE_SSE: bool = Field(default=True, env="ENABLE_SSE") + ENABLE_REAL_TIME_UPDATES: bool = Field(default=True, env="ENABLE_REAL_TIME_UPDATES") class Config: env_file = ".env" + case_sensitive = True + + @validator("ALLOWED_ORIGINS", pre=True) + def parse_allowed_origins(cls, v): + if isinstance(v, str): + return [origin.strip() for origin in v.split(",")] + return v + + @validator("ALLOWED_FILE_TYPES", pre=True) + def parse_allowed_file_types(cls, v): + if isinstance(v, str): + return [file_type.strip() for file_type in v.split(",")] + return v # Global settings instance @@ -85,23 +115,130 @@ def get_settings() -> Settings: return settings -def get_agent_config(): - """Get agent configuration""" +def get_agent_config() -> Dict[str, Any]: + """Get agent-specific configuration""" + return { + "timeout": settings.AGENT_TIMEOUT, + "max_retries": settings.AGENT_MAX_RETRIES, + "concurrent_limit": settings.AGENT_CONCURRENT_LIMIT, + "openai_model": settings.OPENAI_MODEL, + "openai_max_tokens": settings.OPENAI_MAX_TOKENS, + "chroma_collection": settings.CHROMA_COLLECTION_NAME, + "chroma_persist_dir": settings.CHROMA_PERSIST_DIRECTORY + } + + +def get_workflow_config() -> Dict[str, Any]: + """Get workflow-specific configuration""" return { - "TIMEOUT": settings.AGENT_TIMEOUT, - "MAX_RETRIES": settings.AGENT_MAX_RETRIES, - "CONFIDENCE_THRESHOLD": settings.AGENT_CONFIDENCE_THRESHOLD, - "LLM_MODEL": settings.LLM_MODEL, - "LLM_API_KEY": settings.LLM_API_KEY, - "LLM_TEMPERATURE": settings.LLM_TEMPERATURE, - "LLM_MAX_TOKENS": settings.LLM_MAX_TOKENS + "max_file_size": settings.MAX_FILE_SIZE, + "allowed_file_types": settings.ALLOWED_FILE_TYPES, + "upload_dir": settings.UPLOAD_DIR, + "backup_enabled": settings.BACKUP_ENABLED, + "audit_enabled": settings.AUDIT_LOG_ENABLED, + "rate_limit_requests": settings.RATE_LIMIT_REQUESTS, + "rate_limit_window": settings.RATE_LIMIT_WINDOW } -def get_workflow_config(): - """Get workflow configuration""" +def get_database_config() -> Dict[str, Any]: + """Get database-specific configuration""" + return { + "url": settings.DATABASE_URL, + "pool_size": settings.DATABASE_POOL_SIZE, + "max_overflow": settings.DATABASE_MAX_OVERFLOW, + "echo": settings.DEBUG + } + + +def get_redis_config() -> Dict[str, Any]: + """Get Redis-specific configuration""" + return { + "url": settings.REDIS_URL, + "max_connections": settings.REDIS_MAX_CONNECTIONS, + "decode_responses": True + } + + +def get_monitoring_config() -> Dict[str, Any]: + """Get monitoring-specific configuration""" + return { + "enabled": settings.ENABLE_MONITORING, + "prometheus_port": settings.PROMETHEUS_PORT, + "grafana_port": settings.GRAFANA_PORT, + "log_level": settings.LOG_LEVEL + } + + +def get_security_config() -> Dict[str, Any]: + """Get security-specific configuration""" + return { + "secret_key": settings.SECRET_KEY, + "algorithm": settings.ALGORITHM, + "access_token_expire_minutes": settings.ACCESS_TOKEN_EXPIRE_MINUTES, + "allowed_origins": settings.ALLOWED_ORIGINS, + "allowed_methods": settings.ALLOWED_METHODS, + "allowed_headers": settings.ALLOWED_HEADERS, + "rate_limit_requests": settings.RATE_LIMIT_REQUESTS, + "rate_limit_window": settings.RATE_LIMIT_WINDOW + } + + +def get_email_config() -> Dict[str, Any]: + """Get email-specific configuration""" + return { + "smtp_host": settings.SMTP_HOST, + "smtp_port": settings.SMTP_PORT, + "smtp_username": settings.SMTP_USERNAME, + "smtp_password": settings.SMTP_PASSWORD, + "smtp_use_tls": settings.SMTP_USE_TLS + } + + +def validate_settings() -> List[str]: + """Validate settings and return list of issues""" + issues = [] + + # Check required settings + if not settings.SECRET_KEY or settings.SECRET_KEY == "your-secret-key-here": + issues.append("SECRET_KEY must be set to a secure value") + + if not settings.OPENAI_API_KEY: + issues.append("OPENAI_API_KEY must be set for AI functionality") + + if not settings.DATABASE_URL or "localhost" in settings.DATABASE_URL: + issues.append("DATABASE_URL should point to a production database") + + # Check file paths + if not os.path.exists(settings.UPLOAD_DIR): + try: + os.makedirs(settings.UPLOAD_DIR, exist_ok=True) + except Exception as e: + issues.append(f"Cannot create upload directory: {e}") + + if not os.path.exists(settings.CHROMA_PERSIST_DIRECTORY): + try: + os.makedirs(settings.CHROMA_PERSIST_DIRECTORY, exist_ok=True) + except Exception as e: + issues.append(f"Cannot create ChromaDB directory: {e}") + + return issues + + +def get_environment_info() -> Dict[str, Any]: + """Get environment information for debugging""" return { - "MAX_STAGES": settings.WORKFLOW_MAX_STAGES, - "PARALLEL_EXECUTION": settings.WORKFLOW_PARALLEL_EXECUTION, - "MONITORING_INTERVAL": settings.WORKFLOW_MONITORING_INTERVAL + "app_name": settings.APP_NAME, + "app_version": settings.APP_VERSION, + "debug": settings.DEBUG, + "log_level": settings.LOG_LEVEL, + "host": settings.HOST, + "port": settings.PORT, + "database_url": settings.DATABASE_URL.replace(settings.DATABASE_URL.split("@")[0].split(":")[-1], "***") if "@" in settings.DATABASE_URL else settings.DATABASE_URL, + "redis_url": settings.REDIS_URL.replace("redis://", "redis://***@") if "redis://" in settings.REDIS_URL else settings.REDIS_URL, + "openai_model": settings.OPENAI_MODEL, + "monitoring_enabled": settings.ENABLE_MONITORING, + "websockets_enabled": settings.ENABLE_WEBSOCKETS, + "audit_enabled": settings.AUDIT_LOG_ENABLED, + "backup_enabled": settings.BACKUP_ENABLED } \ No newline at end of file diff --git a/backend/app/core/middleware.py b/backend/app/core/middleware.py index 175925f..579d94b 100644 --- a/backend/app/core/middleware.py +++ b/backend/app/core/middleware.py @@ -1,381 +1,318 @@ -import re import time +import logging import json -import uuid -from datetime import datetime -from typing import Dict, List, Optional, Any +from typing import Callable, Dict, Any +from fastapi import FastAPI, Request, Response +from fastapi.middleware.cors import CORSMiddleware +from fastapi.middleware.trustedhost import TrustedHostMiddleware from starlette.middleware.base import BaseHTTPMiddleware -from starlette.requests import Request -from starlette.responses import Response -from fastapi import FastAPI +from starlette.responses import JSONResponse from .config import settings -from .monitoring import get_monitor -# PII patterns for redaction -PII_PATTERNS = [ - # Email addresses - (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'), - # Phone numbers (various formats) - (r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', '[PHONE]'), - (r'\b\(\d{3}\)\s*\d{3}[-.]?\d{4}\b', '[PHONE]'), - # Social Security Numbers - (r'\b\d{3}-\d{2}-\d{4}\b', '[SSN]'), - # Credit Card Numbers (basic pattern) - (r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b', '[CREDIT_CARD]'), - # IP Addresses - (r'\b(?:\d{1,3}\.){3}\d{1,3}\b', '[IP_ADDRESS]'), - # Basic name patterns (consecutive capitalized words) - (r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', '[NAME]'), -] +logger = logging.getLogger(__name__) -class PIIRedactionMiddleware(BaseHTTPMiddleware): - """Middleware to redact PII from request/response data""" - - def __init__(self, app: FastAPI, enabled: bool = True): - super().__init__(app) - self.enabled = enabled - self.monitor = get_monitor() - - async def dispatch(self, request: Request, call_next): - if not self.enabled: - return await call_next(request) - - # Store original body for redaction - body = b"" - if request.method in ["POST", "PUT", "PATCH"]: - body = await request.body() - - # Create new request with redacted body if needed - if body: - redacted_body = self._redact_pii(body.decode('utf-8', errors='ignore')) - # Note: In production, you might want to log the redacted version - # but pass the original to the application - - # Process the request - response = await call_next(request) - - # Redact response if needed (for logging purposes) - if hasattr(response, 'body'): - # This is a simplified approach - in production you'd want more sophisticated handling - pass - - return response - - def _redact_pii(self, text: str) -> str: - """Redact PII from text using regex patterns""" - redacted_text = text - - for pattern, replacement in PII_PATTERNS: - redacted_text = re.sub(pattern, replacement, redacted_text, flags=re.IGNORECASE) - - return redacted_text - - -class AuditLogMiddleware(BaseHTTPMiddleware): - """Middleware to log API requests for audit purposes""" - - def __init__(self, app: FastAPI, enabled: bool = True): - super().__init__(app) - self.enabled = enabled - self.monitor = get_monitor() +class RequestLoggingMiddleware(BaseHTTPMiddleware): + """Middleware for logging all incoming requests""" - async def dispatch(self, request: Request, call_next): - if not self.enabled: - return await call_next(request) - - # Generate request ID for tracing - request_id = str(uuid.uuid4()) - - # Extract request information + async def dispatch(self, request: Request, call_next: Callable) -> Response: start_time = time.time() - client_ip = self._get_client_ip(request) - user_agent = request.headers.get("user-agent", "") - method = request.method - url = str(request.url) - - # Extract user information if available - user_id = None - if hasattr(request.state, 'user'): - user_id = request.state.user.get('id') - # Log request start - self.monitor.log_info( - "audit_middleware", - f"API request started: {method} {url}", - { - "request_id": request_id, - "method": method, - "url": url, - "client_ip": client_ip, - "user_agent": user_agent, - "user_id": user_id - }, - trace_id=request_id, - user_id=user_id + # Log request + logger.info( + f"Request: {request.method} {request.url.path} - " + f"Client: {request.client.host if request.client else 'unknown'}" ) # Process request try: response = await call_next(request) - # Calculate response time - end_time = time.time() - response_time = end_time - start_time + # Calculate processing time + process_time = time.time() - start_time - # Log successful request - self.monitor.log_info( - "audit_middleware", - f"API request completed: {method} {url} - {response.status_code}", - { - "request_id": request_id, - "method": method, - "url": url, - "status_code": response.status_code, - "response_time": response_time, - "client_ip": client_ip, - "user_id": user_id - }, - trace_id=request_id, - user_id=user_id + # Log response + logger.info( + f"Response: {request.method} {request.url.path} - " + f"Status: {response.status_code} - " + f"Time: {process_time:.3f}s" ) - # Add request ID to response headers - response.headers["X-Request-ID"] = request_id + # Add processing time header + response.headers["X-Process-Time"] = str(process_time) return response except Exception as e: - # Log failed request - end_time = time.time() - response_time = end_time - start_time - - self.monitor.log_error( - "audit_middleware", - f"API request failed: {method} {url}", - str(e), - trace_id=request_id, - user_id=user_id + # Log error + process_time = time.time() - start_time + logger.error( + f"Error: {request.method} {request.url.path} - " + f"Exception: {str(e)} - " + f"Time: {process_time:.3f}s" ) - raise + + +class SecurityHeadersMiddleware(BaseHTTPMiddleware): + """Middleware for adding security headers""" - def _get_client_ip(self, request: Request) -> str: - """Extract client IP address from request""" - # Check for forwarded headers first - forwarded_for = request.headers.get("x-forwarded-for") - if forwarded_for: - return forwarded_for.split(",")[0].strip() + async def dispatch(self, request: Request, call_next: Callable) -> Response: + response = await call_next(request) - real_ip = request.headers.get("x-real-ip") - if real_ip: - return real_ip + # Add security headers + response.headers["X-Content-Type-Options"] = "nosniff" + response.headers["X-Frame-Options"] = "DENY" + response.headers["X-XSS-Protection"] = "1; mode=block" + response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin" + response.headers["Permissions-Policy"] = "geolocation=(), microphone=(), camera=()" - # Fall back to direct client IP - if hasattr(request.client, 'host'): - return request.client.host + # Add CSP header if not already present + if "Content-Security-Policy" not in response.headers: + response.headers["Content-Security-Policy"] = ( + "default-src 'self'; " + "script-src 'self' 'unsafe-inline' 'unsafe-eval'; " + "style-src 'self' 'unsafe-inline'; " + "img-src 'self' data: https:; " + "font-src 'self' data:; " + "connect-src 'self' ws: wss:;" + ) - return "unknown" + return response -class RequestLoggingMiddleware(BaseHTTPMiddleware): - """Middleware for detailed request/response logging""" +class RateLimitMiddleware(BaseHTTPMiddleware): + """Middleware for rate limiting""" - def __init__(self, app: FastAPI, enabled: bool = True, log_bodies: bool = False): + def __init__(self, app, redis_client=None): super().__init__(app) - self.enabled = enabled - self.log_bodies = log_bodies - self.monitor = get_monitor() - - # Endpoints to exclude from detailed logging (to avoid noise) - self.exclude_paths = [ - "/health", - "/metrics", - "/docs", - "/openapi.json", - "/favicon.ico" - ] + self.redis_client = redis_client - async def dispatch(self, request: Request, call_next): - if not self.enabled: - return await call_next(request) - - # Skip logging for excluded paths - if any(request.url.path.startswith(path) for path in self.exclude_paths): + async def dispatch(self, request: Request, call_next: Callable) -> Response: + if not self.redis_client: return await call_next(request) - # Generate trace ID - trace_id = str(uuid.uuid4()) - - # Extract request details - start_time = time.time() - method = request.method - url = str(request.url) - headers = dict(request.headers) - - # Remove sensitive headers - sensitive_headers = ["authorization", "cookie", "x-api-key"] - filtered_headers = { - k: v if k.lower() not in sensitive_headers else "[REDACTED]" - for k, v in headers.items() - } - - # Log request body if enabled - request_body = None - if self.log_bodies and method in ["POST", "PUT", "PATCH"]: - try: - body = await request.body() - request_body = body.decode('utf-8', errors='ignore')[:1000] # Limit size - except Exception: - request_body = "[ERROR_READING_BODY]" - - # Log request details - self.monitor.log_info( - "request_logging", - f"Incoming request: {method} {url}", - { - "trace_id": trace_id, - "method": method, - "url": url, - "headers": filtered_headers, - "body": request_body if self.log_bodies else None, - "content_length": headers.get("content-length"), - "content_type": headers.get("content-type") - }, - trace_id=trace_id - ) + # Get client identifier (IP address) + client_ip = request.client.host if request.client else "unknown" - # Process request + # Check rate limit try: - response = await call_next(request) - - # Calculate metrics - end_time = time.time() - response_time = end_time - start_time - - # Log response details - self.monitor.log_info( - "request_logging", - f"Response: {method} {url} - {response.status_code}", - { - "trace_id": trace_id, - "status_code": response.status_code, - "response_time": response_time, - "response_headers": dict(response.headers) if hasattr(response, 'headers') else {} - }, - trace_id=trace_id - ) - - # Add trace ID to response - response.headers["X-Trace-ID"] = trace_id - - return response + key = f"rate_limit:{client_ip}" + current_requests = self.redis_client.get(key) + if current_requests is None: + self.redis_client.setex(key, settings.RATE_LIMIT_WINDOW, 1) + else: + current_requests = int(current_requests) + if current_requests >= settings.RATE_LIMIT_REQUESTS: + logger.warning(f"Rate limit exceeded for IP: {client_ip}") + return JSONResponse( + status_code=429, + content={ + "error": "Rate limit exceeded", + "message": f"Too many requests. Limit: {settings.RATE_LIMIT_REQUESTS} per {settings.RATE_LIMIT_WINDOW} seconds" + } + ) + + self.redis_client.incr(key) + except Exception as e: - # Log error - end_time = time.time() - response_time = end_time - start_time - - self.monitor.log_error( - "request_logging", - f"Request error: {method} {url}", - str(e), - trace_id=trace_id - ) - - raise + logger.error(f"Rate limit check failed: {e}") + # Continue without rate limiting if Redis fails + + return await call_next(request) -class RateLimitMiddleware(BaseHTTPMiddleware): - """Simple rate limiting middleware""" +class PIIRedactionMiddleware(BaseHTTPMiddleware): + """Middleware for redacting PII from logs""" - def __init__(self, app: FastAPI, enabled: bool = True, requests_per_minute: int = 60): + def __init__(self, app): super().__init__(app) - self.enabled = enabled - self.requests_per_minute = requests_per_minute - self.request_counts: Dict[str, List[float]] = {} - self.monitor = get_monitor() + self.pii_patterns = [ + r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', # Email + r'\b\d{3}-\d{2}-\d{4}\b', # SSN + r'\b\d{4}-\d{4}-\d{4}-\d{4}\b', # Credit card + r'\b\d{10,11}\b', # Phone numbers + ] - async def dispatch(self, request: Request, call_next): - if not self.enabled: + def redact_pii(self, text: str) -> str: + """Redact PII from text""" + import re + for pattern in self.pii_patterns: + text = re.sub(pattern, '[REDACTED]', text) + return text + + async def dispatch(self, request: Request, call_next: Callable) -> Response: + # Redact PII from request body if present + if request.method in ["POST", "PUT", "PATCH"]: + try: + body = await request.body() + if body: + body_str = body.decode() + redacted_body = self.redact_pii(body_str) + # Create new request with redacted body + request._body = redacted_body.encode() + except Exception as e: + logger.error(f"PII redaction failed: {e}") + + return await call_next(request) + + +class AuditLogMiddleware(BaseHTTPMiddleware): + """Middleware for audit logging""" + + async def dispatch(self, request: Request, call_next: Callable) -> Response: + # Skip audit logging for health checks and static files + if request.url.path in ["/health", "/health/detailed", "/docs", "/redoc"]: return await call_next(request) - # Get client identifier (IP address) - client_ip = self._get_client_ip(request) - current_time = time.time() + # Get user info if authenticated + user_id = None + user_email = None + try: + # This would be set by authentication middleware + user_id = getattr(request.state, "user_id", None) + user_email = getattr(request.state, "user_email", None) + except Exception: + pass - # Clean old requests (older than 1 minute) - if client_ip in self.request_counts: - self.request_counts[client_ip] = [ - req_time for req_time in self.request_counts[client_ip] - if current_time - req_time < 60 - ] - else: - self.request_counts[client_ip] = [] + # Log audit event + audit_event = { + "timestamp": time.time(), + "method": request.method, + "path": request.url.path, + "query_params": dict(request.query_params), + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", ""), + "user_id": user_id, + "user_email": user_email, + } - # Check rate limit - if len(self.request_counts[client_ip]) >= self.requests_per_minute: - self.monitor.log_warning( - "rate_limit", - f"Rate limit exceeded for IP: {client_ip}", - { - "client_ip": client_ip, - "requests_in_window": len(self.request_counts[client_ip]), - "limit": self.requests_per_minute - } + logger.info(f"AUDIT: {json.dumps(audit_event)}") + + return await call_next(request) + + +class ErrorHandlingMiddleware(BaseHTTPMiddleware): + """Middleware for handling and logging errors""" + + async def dispatch(self, request: Request, call_next: Callable) -> Response: + try: + return await call_next(request) + except Exception as e: + # Log the error + logger.error( + f"Unhandled exception in {request.method} {request.url.path}: {str(e)}", + exc_info=True ) - return Response( - content=json.dumps({"error": "Rate limit exceeded"}), - status_code=429, - headers={"Content-Type": "application/json"} + # Return error response + return JSONResponse( + status_code=500, + content={ + "error": "Internal server error", + "message": "An unexpected error occurred", + "path": request.url.path + } ) - - # Add current request to count - self.request_counts[client_ip].append(current_time) + + +class MetricsMiddleware(BaseHTTPMiddleware): + """Middleware for collecting metrics""" + + def __init__(self, app, metrics_collector=None): + super().__init__(app) + self.metrics_collector = metrics_collector + + async def dispatch(self, request: Request, call_next: Callable) -> Response: + start_time = time.time() # Process request - return await call_next(request) - - def _get_client_ip(self, request: Request) -> str: - """Extract client IP address from request""" - forwarded_for = request.headers.get("x-forwarded-for") - if forwarded_for: - return forwarded_for.split(",")[0].strip() + response = await call_next(request) - real_ip = request.headers.get("x-real-ip") - if real_ip: - return real_ip + # Calculate metrics + process_time = time.time() - start_time - if hasattr(request.client, 'host'): - return request.client.host + # Record metrics if collector is available + if self.metrics_collector: + try: + self.metrics_collector.record_request( + method=request.method, + path=request.url.path, + status_code=response.status_code, + duration=process_time + ) + except Exception as e: + logger.error(f"Failed to record metrics: {e}") - return "unknown" + return response -def setup_middleware(app: FastAPI): - """Setup all middleware for the application""" - - # Add rate limiting - if settings.ENABLE_MONITORING: - app.add_middleware(RateLimitMiddleware, enabled=True, requests_per_minute=120) +def setup_middleware(app: FastAPI) -> None: + """Setup all middleware for the FastAPI application""" - # Add request logging + # Add CORS middleware app.add_middleware( - RequestLoggingMiddleware, - enabled=settings.ENABLE_MONITORING, - log_bodies=settings.DEBUG + CORSMiddleware, + allow_origins=settings.ALLOWED_ORIGINS, + allow_credentials=True, + allow_methods=settings.ALLOWED_METHODS, + allow_headers=settings.ALLOWED_HEADERS, ) - # Add audit logging + # Add trusted host middleware app.add_middleware( - AuditLogMiddleware, - enabled=settings.AUDIT_ENABLED + TrustedHostMiddleware, + allowed_hosts=["*"] # Configure based on your deployment ) - # Add PII redaction - app.add_middleware( - PIIRedactionMiddleware, - enabled=True - ) \ No newline at end of file + # Add custom middleware in order + app.add_middleware(ErrorHandlingMiddleware) + app.add_middleware(RequestLoggingMiddleware) + app.add_middleware(SecurityHeadersMiddleware) + app.add_middleware(PIIRedactionMiddleware) + app.add_middleware(AuditLogMiddleware) + + # Add rate limiting middleware if Redis is available + try: + import redis + redis_client = redis.Redis.from_url(settings.REDIS_URL, decode_responses=True) + redis_client.ping() # Test connection + app.add_middleware(RateLimitMiddleware, redis_client=redis_client) + logger.info("Rate limiting middleware enabled") + except Exception as e: + logger.warning(f"Rate limiting middleware disabled: {e}") + + # Add metrics middleware if monitoring is enabled + if settings.ENABLE_MONITORING: + try: + from .monitoring import MetricsCollector + metrics_collector = MetricsCollector() + app.add_middleware(MetricsMiddleware, metrics_collector=metrics_collector) + logger.info("Metrics middleware enabled") + except Exception as e: + logger.warning(f"Metrics middleware disabled: {e}") + + logger.info("Middleware setup completed") + + +def get_request_info(request: Request) -> Dict[str, Any]: + """Extract request information for logging""" + return { + "method": request.method, + "url": str(request.url), + "path": request.url.path, + "query_params": dict(request.query_params), + "headers": dict(request.headers), + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", ""), + } + + +def get_response_info(response: Response) -> Dict[str, Any]: + """Extract response information for logging""" + return { + "status_code": response.status_code, + "headers": dict(response.headers), + } \ No newline at end of file diff --git a/backend/app/core/monitoring.py b/backend/app/core/monitoring.py index 5dbe989..1ee45fa 100644 --- a/backend/app/core/monitoring.py +++ b/backend/app/core/monitoring.py @@ -1,726 +1,448 @@ -import logging import time +import logging import json import asyncio +from typing import Dict, Any, Optional, List from datetime import datetime, timedelta -from typing import Dict, List, Optional, Any -from contextlib import asynccontextmanager, contextmanager -from dataclasses import dataclass, asdict -from enum import Enum -import traceback -import psutil -import threading from collections import defaultdict, deque +import threading +import psutil +import os -from .config import get_settings, get_agent_config, get_workflow_config - -settings = get_settings() -agent_config = get_agent_config() -workflow_config = get_workflow_config() - - -class LogLevel(Enum): - """Log levels""" - DEBUG = "DEBUG" - INFO = "INFO" - WARNING = "WARNING" - ERROR = "ERROR" - CRITICAL = "CRITICAL" - - -class MetricType(Enum): - """Metric types""" - COUNTER = "counter" - GAUGE = "gauge" - HISTOGRAM = "histogram" - SUMMARY = "summary" - - -@dataclass -class Metric: - """Metric data structure""" - name: str - value: float - metric_type: MetricType - labels: Dict[str, str] - timestamp: datetime - description: str = "" - - -@dataclass -class LogEntry: - """Log entry data structure""" - timestamp: datetime - level: LogLevel - message: str - module: str - function: str - line_number: int - extra_data: Dict[str, Any] - trace_id: Optional[str] = None - user_id: Optional[str] = None - - -@dataclass -class AgentExecutionMetrics: - """Agent execution metrics""" - agent_type: str - execution_time: float - confidence: float - success: bool - error_message: Optional[str] = None - input_size: int - output_size: int - memory_usage: float - cpu_usage: float - +from .config import settings -@dataclass -class WorkflowMetrics: - """Workflow execution metrics""" - workflow_id: str - total_stages: int - completed_stages: int - failed_stages: int - total_execution_time: float - average_stage_time: float - memory_peak: float - cpu_peak: float - status: str +logger = logging.getLogger(__name__) class MetricsCollector: - """Metrics collection and storage""" + """Collects and stores application metrics""" - def __init__(self): - self.metrics: List[Metric] = [] - self.metrics_lock = threading.Lock() - self.max_metrics = 10000 + def __init__(self, max_history: int = 1000): + self.max_history = max_history + self.request_metrics = deque(maxlen=max_history) + self.error_metrics = deque(maxlen=max_history) + self.performance_metrics = deque(maxlen=max_history) + self.agent_metrics = deque(maxlen=max_history) + self.lock = threading.Lock() + + # Initialize counters + self.total_requests = 0 + self.total_errors = 0 + self.total_agent_executions = 0 + + # Start background metrics collection + self._start_background_collection() + + def record_request(self, method: str, path: str, status_code: int, duration: float): + """Record a request metric""" + metric = { + "timestamp": time.time(), + "method": method, + "path": path, + "status_code": status_code, + "duration": duration, + "datetime": datetime.utcnow().isoformat() + } + + with self.lock: + self.request_metrics.append(metric) + self.total_requests += 1 + + if status_code >= 400: + self.error_metrics.append(metric) + self.total_errors += 1 + + def record_agent_execution(self, agent_name: str, duration: float, success: bool, + confidence: Optional[float] = None, error: Optional[str] = None): + """Record an agent execution metric""" + metric = { + "timestamp": time.time(), + "agent_name": agent_name, + "duration": duration, + "success": success, + "confidence": confidence, + "error": error, + "datetime": datetime.utcnow().isoformat() + } + + with self.lock: + self.agent_metrics.append(metric) + self.total_agent_executions += 1 + + def record_performance_metric(self, metric_name: str, value: float, tags: Optional[Dict[str, str]] = None): + """Record a performance metric""" + metric = { + "timestamp": time.time(), + "metric_name": metric_name, + "value": value, + "tags": tags or {}, + "datetime": datetime.utcnow().isoformat() + } - def add_metric(self, metric: Metric): - """Add a metric to the collection""" - with self.metrics_lock: - self.metrics.append(metric) - if len(self.metrics) > self.max_metrics: - # Remove oldest metrics - self.metrics = self.metrics[-self.max_metrics:] + with self.lock: + self.performance_metrics.append(metric) - def get_metrics(self, metric_name: Optional[str] = None, - start_time: Optional[datetime] = None, - end_time: Optional[datetime] = None) -> List[Metric]: - """Get metrics with optional filtering""" - with self.metrics_lock: - filtered_metrics = self.metrics + def get_request_stats(self, window_minutes: int = 60) -> Dict[str, Any]: + """Get request statistics for the specified time window""" + cutoff_time = time.time() - (window_minutes * 60) + + with self.lock: + recent_requests = [ + req for req in self.request_metrics + if req["timestamp"] >= cutoff_time + ] - if metric_name: - filtered_metrics = [m for m in filtered_metrics if m.name == metric_name] + if not recent_requests: + return { + "total_requests": 0, + "avg_response_time": 0, + "error_rate": 0, + "status_codes": {}, + "endpoints": {} + } - if start_time: - filtered_metrics = [m for m in filtered_metrics if m.timestamp >= start_time] + # Calculate statistics + total_requests = len(recent_requests) + avg_response_time = sum(req["duration"] for req in recent_requests) / total_requests + error_count = len([req for req in recent_requests if req["status_code"] >= 400]) + error_rate = (error_count / total_requests) * 100 if total_requests > 0 else 0 - if end_time: - filtered_metrics = [m for m in filtered_metrics if m.timestamp <= end_time] + # Status code distribution + status_codes = defaultdict(int) + for req in recent_requests: + status_codes[req["status_code"]] += 1 - return filtered_metrics + # Endpoint distribution + endpoints = defaultdict(int) + for req in recent_requests: + endpoints[req["path"]] += 1 + + return { + "total_requests": total_requests, + "avg_response_time": avg_response_time, + "error_rate": error_rate, + "status_codes": dict(status_codes), + "endpoints": dict(endpoints), + "window_minutes": window_minutes + } - def get_metric_summary(self, metric_name: str, - time_window: timedelta = timedelta(hours=1)) -> Dict[str, Any]: - """Get summary statistics for a metric""" - end_time = datetime.utcnow() - start_time = end_time - time_window - - metrics = self.get_metrics(metric_name, start_time, end_time) - - if not metrics: + def get_agent_stats(self, window_minutes: int = 60) -> Dict[str, Any]: + """Get agent execution statistics""" + cutoff_time = time.time() - (window_minutes * 60) + + with self.lock: + recent_executions = [ + exec_ for exec_ in self.agent_metrics + if exec_["timestamp"] >= cutoff_time + ] + + if not recent_executions: + return { + "total_executions": 0, + "success_rate": 0, + "avg_duration": 0, + "agents": {} + } + + # Calculate statistics + total_executions = len(recent_executions) + successful_executions = len([exec_ for exec_ in recent_executions if exec_["success"]]) + success_rate = (successful_executions / total_executions) * 100 if total_executions > 0 else 0 + avg_duration = sum(exec_["duration"] for exec_ in recent_executions) / total_executions + + # Agent-specific statistics + agents = defaultdict(lambda: {"executions": 0, "successes": 0, "total_duration": 0}) + for exec_ in recent_executions: + agent_name = exec_["agent_name"] + agents[agent_name]["executions"] += 1 + agents[agent_name]["total_duration"] += exec_["duration"] + if exec_["success"]: + agents[agent_name]["successes"] += 1 + + # Calculate averages for each agent + for agent_name, stats in agents.items(): + stats["success_rate"] = (stats["successes"] / stats["executions"]) * 100 + stats["avg_duration"] = stats["total_duration"] / stats["executions"] + return { - "count": 0, - "min": 0, - "max": 0, - "avg": 0, - "sum": 0 + "total_executions": total_executions, + "success_rate": success_rate, + "avg_duration": avg_duration, + "agents": dict(agents), + "window_minutes": window_minutes } - - values = [m.value for m in metrics] + + def get_system_stats(self) -> Dict[str, Any]: + """Get system performance statistics""" + try: + cpu_percent = psutil.cpu_percent(interval=1) + memory = psutil.virtual_memory() + disk = psutil.disk_usage('/') + + return { + "cpu_percent": cpu_percent, + "memory_percent": memory.percent, + "memory_available": memory.available, + "memory_total": memory.total, + "disk_percent": disk.percent, + "disk_free": disk.free, + "disk_total": disk.total, + "timestamp": time.time(), + "datetime": datetime.utcnow().isoformat() + } + except Exception as e: + logger.error(f"Failed to get system stats: {e}") + return { + "error": str(e), + "timestamp": time.time(), + "datetime": datetime.utcnow().isoformat() + } + + def get_all_metrics(self) -> Dict[str, Any]: + """Get all collected metrics""" return { - "count": len(values), - "min": min(values), - "max": max(values), - "avg": sum(values) / len(values), - "sum": sum(values) + "request_stats": self.get_request_stats(), + "agent_stats": self.get_agent_stats(), + "system_stats": self.get_system_stats(), + "total_requests": self.total_requests, + "total_errors": self.total_errors, + "total_agent_executions": self.total_agent_executions, + "timestamp": time.time(), + "datetime": datetime.utcnow().isoformat() } + + def _start_background_collection(self): + """Start background system metrics collection""" + def collect_system_metrics(): + while True: + try: + system_stats = self.get_system_stats() + if "error" not in system_stats: + self.record_performance_metric("cpu_percent", system_stats["cpu_percent"]) + self.record_performance_metric("memory_percent", system_stats["memory_percent"]) + self.record_performance_metric("disk_percent", system_stats["disk_percent"]) + + time.sleep(60) # Collect every minute + except Exception as e: + logger.error(f"Background metrics collection failed: {e}") + time.sleep(60) + + thread = threading.Thread(target=collect_system_metrics, daemon=True) + thread.start() class LogCollector: - """Log collection and storage""" + """Collects and manages application logs""" - def __init__(self): - self.logs: List[LogEntry] = [] - self.logs_lock = threading.Lock() - self.max_logs = 10000 + def __init__(self, max_logs: int = 10000): + self.max_logs = max_logs + self.logs = deque(maxlen=max_logs) + self.lock = threading.Lock() + + def add_log(self, level: str, message: str, context: Optional[Dict[str, Any]] = None): + """Add a log entry""" + log_entry = { + "timestamp": time.time(), + "datetime": datetime.utcnow().isoformat(), + "level": level, + "message": message, + "context": context or {} + } - def add_log(self, log_entry: LogEntry): - """Add a log entry to the collection""" - with self.logs_lock: + with self.lock: self.logs.append(log_entry) - if len(self.logs) > self.max_logs: - # Remove oldest logs - self.logs = self.logs[-self.max_logs:] - def get_logs(self, level: Optional[LogLevel] = None, - module: Optional[str] = None, - start_time: Optional[datetime] = None, - end_time: Optional[datetime] = None, - trace_id: Optional[str] = None) -> List[LogEntry]: - """Get logs with optional filtering""" - with self.logs_lock: - filtered_logs = self.logs - + def get_logs(self, level: Optional[str] = None, limit: int = 100) -> List[Dict[str, Any]]: + """Get recent logs, optionally filtered by level""" + with self.lock: if level: - filtered_logs = [l for l in filtered_logs if l.level == level] - - if module: - filtered_logs = [l for l in filtered_logs if l.module == module] - - if start_time: - filtered_logs = [l for l in filtered_logs if l.timestamp >= start_time] - - if end_time: - filtered_logs = [l for l in filtered_logs if l.timestamp <= end_time] - - if trace_id: - filtered_logs = [l for l in filtered_logs if l.trace_id == trace_id] + filtered_logs = [log for log in self.logs if log["level"] == level] + else: + filtered_logs = list(self.logs) - return filtered_logs + return filtered_logs[-limit:] + + def get_error_logs(self, limit: int = 100) -> List[Dict[str, Any]]: + """Get recent error logs""" + return self.get_logs(level="ERROR", limit=limit) class PerformanceMonitor: - """System performance monitoring""" + """Monitors application performance""" def __init__(self): - self.start_time = time.time() self.metrics_collector = MetricsCollector() self.log_collector = LogCollector() - self.agent_metrics: Dict[str, List[AgentExecutionMetrics]] = defaultdict(list) - self.workflow_metrics: Dict[str, WorkflowMetrics] = {} - self.system_metrics = deque(maxlen=1000) - - # Start system monitoring - if settings.ENABLE_MONITORING: - self._start_system_monitoring() - - def _start_system_monitoring(self): - """Start system monitoring thread""" - def monitor_system(): - while True: - try: - # CPU usage - cpu_percent = psutil.cpu_percent(interval=1) - self.metrics_collector.add_metric(Metric( - name="system_cpu_usage", - value=cpu_percent, - metric_type=MetricType.GAUGE, - labels={"component": "system"}, - timestamp=datetime.utcnow(), - description="System CPU usage percentage" - )) - - # Memory usage - memory = psutil.virtual_memory() - self.metrics_collector.add_metric(Metric( - name="system_memory_usage", - value=memory.percent, - metric_type=MetricType.GAUGE, - labels={"component": "system"}, - timestamp=datetime.utcnow(), - description="System memory usage percentage" - )) - - # Disk usage - disk = psutil.disk_usage('/') - self.metrics_collector.add_metric(Metric( - name="system_disk_usage", - value=(disk.used / disk.total) * 100, - metric_type=MetricType.GAUGE, - labels={"component": "system"}, - timestamp=datetime.utcnow(), - description="System disk usage percentage" - )) - - # Store system metrics - self.system_metrics.append({ - "timestamp": datetime.utcnow(), - "cpu_percent": cpu_percent, - "memory_percent": memory.percent, - "disk_percent": (disk.used / disk.total) * 100 - }) - - time.sleep(workflow_config.MONITORING_INTERVAL) - - except Exception as e: - self.log_error("system_monitor", "Failed to collect system metrics", str(e)) - time.sleep(5) - - thread = threading.Thread(target=monitor_system, daemon=True) - thread.start() - - def log_info(self, module: str, message: str, extra_data: Dict[str, Any] = None, - trace_id: Optional[str] = None, user_id: Optional[str] = None): - """Log info message""" - self._log(LogLevel.INFO, module, message, extra_data or {}, trace_id, user_id) - - def log_warning(self, module: str, message: str, extra_data: Dict[str, Any] = None, - trace_id: Optional[str] = None, user_id: Optional[str] = None): - """Log warning message""" - self._log(LogLevel.WARNING, module, message, extra_data or {}, trace_id, user_id) - - def log_error(self, module: str, message: str, error_details: str = None, - trace_id: Optional[str] = None, user_id: Optional[str] = None): - """Log error message""" - extra_data = {"error_details": error_details} if error_details else {} - self._log(LogLevel.ERROR, module, message, extra_data, trace_id, user_id) + self.start_time = time.time() - def log_critical(self, module: str, message: str, error_details: str = None, - trace_id: Optional[str] = None, user_id: Optional[str] = None): - """Log critical message""" - extra_data = {"error_details": error_details} if error_details else {} - self._log(LogLevel.CRITICAL, module, message, extra_data, trace_id, user_id) + def record_request(self, method: str, path: str, status_code: int, duration: float): + """Record a request""" + self.metrics_collector.record_request(method, path, status_code, duration) - def _log(self, level: LogLevel, module: str, message: str, extra_data: Dict[str, Any], - trace_id: Optional[str] = None, user_id: Optional[str] = None): - """Internal logging method""" - # Get caller information - frame = traceback.extract_stack()[-2] - - log_entry = LogEntry( - timestamp=datetime.utcnow(), - level=level, - message=message, - module=module, - function=frame.name, - line_number=frame.lineno, - extra_data=extra_data, - trace_id=trace_id, - user_id=user_id - ) - - self.log_collector.add_log(log_entry) - - # Also log to standard logging - logger = logging.getLogger(module) - log_message = f"{message} | {json.dumps(extra_data)}" if extra_data else message - getattr(logger, level.value.lower())(log_message) + def record_agent_execution(self, agent_name: str, duration: float, success: bool, + confidence: Optional[float] = None, error: Optional[str] = None): + """Record an agent execution""" + self.metrics_collector.record_agent_execution(agent_name, duration, success, confidence, error) - @contextmanager - def monitor_agent_execution(self, agent_type: str, trace_id: Optional[str] = None): - """Context manager for monitoring agent execution""" - start_time = time.time() - start_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB - start_cpu = psutil.Process().cpu_percent() - - try: - yield - success = True - error_message = None - except Exception as e: - success = False - error_message = str(e) - raise - finally: - end_time = time.time() - end_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB - end_cpu = psutil.Process().cpu_percent() - - execution_time = end_time - start_time - memory_usage = end_memory - start_memory - cpu_usage = end_cpu - start_cpu - - # Record agent metrics - agent_metric = AgentExecutionMetrics( - agent_type=agent_type, - execution_time=execution_time, - confidence=0.0, # Will be set by agent - success=success, - error_message=error_message, - input_size=0, # Will be set by agent - output_size=0, # Will be set by agent - memory_usage=memory_usage, - cpu_usage=cpu_usage - ) - - self.agent_metrics[agent_type].append(agent_metric) - - # Add metrics - self.metrics_collector.add_metric(Metric( - name="agent_execution_time", - value=execution_time, - metric_type=MetricType.HISTOGRAM, - labels={"agent_type": agent_type, "success": str(success)}, - timestamp=datetime.utcnow(), - description=f"Execution time for {agent_type} agent" - )) - - self.metrics_collector.add_metric(Metric( - name="agent_memory_usage", - value=memory_usage, - metric_type=MetricType.HISTOGRAM, - labels={"agent_type": agent_type}, - timestamp=datetime.utcnow(), - description=f"Memory usage for {agent_type} agent" - )) - - # Log execution - if success: - self.log_info( - "agent_execution", - f"Agent {agent_type} executed successfully", - { - "execution_time": execution_time, - "memory_usage": memory_usage, - "cpu_usage": cpu_usage - }, - trace_id - ) - else: - self.log_error( - "agent_execution", - f"Agent {agent_type} execution failed", - error_message, - trace_id - ) + def log_info(self, component: str, message: str, context: Optional[Dict[str, Any]] = None): + """Log an info message""" + self.log_collector.add_log("INFO", f"[{component}] {message}", context) + logger.info(f"[{component}] {message}") - @asynccontextmanager - async def monitor_workflow_execution(self, workflow_id: str, total_stages: int, - trace_id: Optional[str] = None): - """Context manager for monitoring workflow execution""" - start_time = time.time() - start_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB - start_cpu = psutil.Process().cpu_percent() - - workflow_metric = WorkflowMetrics( - workflow_id=workflow_id, - total_stages=total_stages, - completed_stages=0, - failed_stages=0, - total_execution_time=0.0, - average_stage_time=0.0, - memory_peak=start_memory, - cpu_peak=start_cpu, - status="running" - ) - - self.workflow_metrics[workflow_id] = workflow_metric - - try: - yield workflow_metric - workflow_metric.status = "completed" - except Exception as e: - workflow_metric.status = "failed" - raise - finally: - end_time = time.time() - end_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB - end_cpu = psutil.Process().cpu_percent() - - workflow_metric.total_execution_time = end_time - start_time - workflow_metric.memory_peak = max(workflow_metric.memory_peak, end_memory) - workflow_metric.cpu_peak = max(workflow_metric.cpu_peak, end_cpu) - - if workflow_metric.completed_stages > 0: - workflow_metric.average_stage_time = workflow_metric.total_execution_time / workflow_metric.completed_stages - - # Add workflow metrics - self.metrics_collector.add_metric(Metric( - name="workflow_execution_time", - value=workflow_metric.total_execution_time, - metric_type=MetricType.HISTOGRAM, - labels={"workflow_id": workflow_id, "status": workflow_metric.status}, - timestamp=datetime.utcnow(), - description=f"Execution time for workflow {workflow_id}" - )) - - self.metrics_collector.add_metric(Metric( - name="workflow_stages_completed", - value=workflow_metric.completed_stages, - metric_type=MetricType.COUNTER, - labels={"workflow_id": workflow_id}, - timestamp=datetime.utcnow(), - description=f"Completed stages for workflow {workflow_id}" - )) - - # Log workflow completion - self.log_info( - "workflow_execution", - f"Workflow {workflow_id} {workflow_metric.status}", - { - "total_stages": total_stages, - "completed_stages": workflow_metric.completed_stages, - "failed_stages": workflow_metric.failed_stages, - "total_execution_time": workflow_metric.total_execution_time, - "memory_peak": workflow_metric.memory_peak, - "cpu_peak": workflow_metric.cpu_peak - }, - trace_id - ) + def log_warning(self, component: str, message: str, context: Optional[Dict[str, Any]] = None): + """Log a warning message""" + self.log_collector.add_log("WARNING", f"[{component}] {message}", context) + logger.warning(f"[{component}] {message}") - def get_system_status(self) -> Dict[str, Any]: - """Get current system status""" - if not self.system_metrics: - return { - "status": "unknown", - "cpu_usage": 0, - "memory_usage": 0, - "disk_usage": 0, - "uptime": time.time() - self.start_time - } - - latest = self.system_metrics[-1] - return { - "status": "healthy" if latest["cpu_percent"] < 80 and latest["memory_percent"] < 80 else "warning", - "cpu_usage": latest["cpu_percent"], - "memory_usage": latest["memory_percent"], - "disk_usage": latest["disk_percent"], - "uptime": time.time() - self.start_time - } + def log_error(self, component: str, message: str, error: Optional[str] = None, + context: Optional[Dict[str, Any]] = None): + """Log an error message""" + full_message = f"[{component}] {message}" + if error: + full_message += f" - Error: {error}" + + self.log_collector.add_log("ERROR", full_message, context) + logger.error(full_message) - def get_agent_performance_summary(self, agent_type: Optional[str] = None, - time_window: timedelta = timedelta(hours=1)) -> Dict[str, Any]: - """Get agent performance summary""" - end_time = datetime.utcnow() - start_time = end_time - time_window - - if agent_type: - metrics = [m for m in self.agent_metrics[agent_type] - if m.execution_time >= start_time.timestamp()] - else: - all_metrics = [] - for agent_metrics in self.agent_metrics.values(): - all_metrics.extend([m for m in agent_metrics - if m.execution_time >= start_time.timestamp()]) - metrics = all_metrics - - if not metrics: - return { - "total_executions": 0, - "success_rate": 0, - "average_execution_time": 0, - "average_memory_usage": 0, - "average_cpu_usage": 0 - } - - total_executions = len(metrics) - successful_executions = len([m for m in metrics if m.success]) - success_rate = successful_executions / total_executions if total_executions > 0 else 0 - - return { - "total_executions": total_executions, - "success_rate": success_rate, - "average_execution_time": sum(m.execution_time for m in metrics) / total_executions, - "average_memory_usage": sum(m.memory_usage for m in metrics) / total_executions, - "average_cpu_usage": sum(m.cpu_usage for m in metrics) / total_executions - } + def get_metrics(self) -> Dict[str, Any]: + """Get all metrics""" + return self.metrics_collector.get_all_metrics() - def get_workflow_performance_summary(self, time_window: timedelta = timedelta(hours=1)) -> Dict[str, Any]: - """Get workflow performance summary""" - end_time = datetime.utcnow() - start_time = end_time - time_window - - workflows = [w for w in self.workflow_metrics.values() - if w.total_execution_time >= start_time.timestamp()] - - if not workflows: - return { - "total_workflows": 0, - "completed_workflows": 0, - "failed_workflows": 0, - "average_execution_time": 0, - "average_stages_completed": 0 - } - - total_workflows = len(workflows) - completed_workflows = len([w for w in workflows if w.status == "completed"]) - failed_workflows = len([w for w in workflows if w.status == "failed"]) - - return { - "total_workflows": total_workflows, - "completed_workflows": completed_workflows, - "failed_workflows": failed_workflows, - "average_execution_time": sum(w.total_execution_time for w in workflows) / total_workflows, - "average_stages_completed": sum(w.completed_stages for w in workflows) / total_workflows - } + def get_logs(self, level: Optional[str] = None, limit: int = 100) -> List[Dict[str, Any]]: + """Get logs""" + return self.log_collector.get_logs(level, limit) - def get_metrics_summary(self, time_window: timedelta = timedelta(hours=1)) -> Dict[str, Any]: - """Get comprehensive metrics summary""" - return { - "system": self.get_system_status(), - "agents": self.get_agent_performance_summary(time_window=time_window), - "workflows": self.get_workflow_performance_summary(time_window=time_window), - "metrics_count": len(self.metrics_collector.metrics), - "logs_count": len(self.log_collector.logs) - } + def get_uptime(self) -> float: + """Get application uptime in seconds""" + return time.time() - self.start_time # Global monitoring instance -monitor = PerformanceMonitor() +_monitor = None def get_monitor() -> PerformanceMonitor: """Get the global monitoring instance""" - return monitor + global _monitor + if _monitor is None: + _monitor = PerformanceMonitor() + return _monitor -def setup_logging(): - """Setup logging configuration""" - logging.basicConfig( - level=getattr(logging, settings.LOG_LEVEL), - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - handlers=[ - logging.StreamHandler(), - logging.FileHandler('app.log') - ] - ) - - # Set specific logger levels - logging.getLogger('uvicorn').setLevel(logging.INFO) - logging.getLogger('fastapi').setLevel(logging.INFO) - - monitor.log_info("monitoring", "Logging system initialized") +def setup_monitoring() -> None: + """Setup monitoring system""" + global _monitor + if _monitor is None: + _monitor = PerformanceMonitor() + logger.info("Monitoring system initialized") -def log_agent_execution(agent_type: str, execution_time: float, confidence: float, - success: bool, input_size: int, output_size: int, - error_message: Optional[str] = None, trace_id: Optional[str] = None): - """Log agent execution metrics""" - if monitor.agent_metrics[agent_type]: - # Update the latest agent metric with additional data - latest_metric = monitor.agent_metrics[agent_type][-1] - latest_metric.confidence = confidence - latest_metric.input_size = input_size - latest_metric.output_size = output_size - - # Add confidence metric - monitor.metrics_collector.add_metric(Metric( - name="agent_confidence", - value=confidence, - metric_type=MetricType.GAUGE, - labels={"agent_type": agent_type, "success": str(success)}, - timestamp=datetime.utcnow(), - description=f"Confidence score for {agent_type} agent" - )) - - # Add throughput metric - if execution_time > 0: - throughput = output_size / execution_time - monitor.metrics_collector.add_metric(Metric( - name="agent_throughput", - value=throughput, - metric_type=MetricType.GAUGE, - labels={"agent_type": agent_type}, - timestamp=datetime.utcnow(), - description=f"Throughput for {agent_type} agent (output_size/time)" - )) +def instrument_fastapi(app) -> None: + """Instrument FastAPI application for monitoring""" + from fastapi import Request, Response + from starlette.middleware.base import BaseHTTPMiddleware + + class MonitoringMiddleware(BaseHTTPMiddleware): + async def dispatch(self, request: Request, call_next): + start_time = time.time() + + # Process request + response = await call_next(request) + + # Calculate duration + duration = time.time() - start_time + + # Record metrics + monitor = get_monitor() + monitor.record_request( + method=request.method, + path=request.url.path, + status_code=response.status_code, + duration=duration + ) + + return response + + # Add monitoring middleware + app.add_middleware(MonitoringMiddleware) + logger.info("FastAPI application instrumented for monitoring") -def log_workflow_stage_completion(workflow_id: str, stage_name: str, success: bool, - execution_time: float, trace_id: Optional[str] = None): - """Log workflow stage completion""" - if workflow_id in monitor.workflow_metrics: - workflow = monitor.workflow_metrics[workflow_id] +def create_health_check() -> Dict[str, Any]: + """Create a comprehensive health check response""" + monitor = get_monitor() + + try: + system_stats = monitor.get_metrics()["system_stats"] - if success: - workflow.completed_stages += 1 - else: - workflow.failed_stages += 1 + # Determine overall health + health_status = "healthy" + issues = [] - # Add stage completion metric - monitor.metrics_collector.add_metric(Metric( - name="workflow_stage_completion", - value=1 if success else 0, - metric_type=MetricType.COUNTER, - labels={"workflow_id": workflow_id, "stage_name": stage_name, "success": str(success)}, - timestamp=datetime.utcnow(), - description=f"Stage completion for {stage_name} in workflow {workflow_id}" - )) + # Check CPU usage + if system_stats.get("cpu_percent", 0) > 90: + health_status = "degraded" + issues.append("High CPU usage") - monitor.log_info( - "workflow_stage", - f"Workflow {workflow_id} stage {stage_name} {'completed' if success else 'failed'}", - { - "execution_time": execution_time, - "completed_stages": workflow.completed_stages, - "failed_stages": workflow.failed_stages - }, - trace_id - ) - - -def setup_monitoring(): - """Setup monitoring and observability""" - try: - # Setup logging - setup_logging() + # Check memory usage + if system_stats.get("memory_percent", 0) > 90: + health_status = "degraded" + issues.append("High memory usage") - # Initialize monitoring - monitor.log_info("monitoring", "Monitoring system initialized") + # Check disk usage + if system_stats.get("disk_percent", 0) > 90: + health_status = "degraded" + issues.append("High disk usage") - # Setup Prometheus metrics endpoint if enabled - if settings.ENABLE_MONITORING: - monitor.log_info("monitoring", "Metrics collection enabled") + # Check error rate + request_stats = monitor.get_metrics()["request_stats"] + if request_stats.get("error_rate", 0) > 10: + health_status = "degraded" + issues.append("High error rate") - print("βœ… Monitoring setup complete") + return { + "status": health_status, + "timestamp": datetime.utcnow().isoformat(), + "uptime_seconds": monitor.get_uptime(), + "system_stats": system_stats, + "request_stats": request_stats, + "issues": issues + } except Exception as e: - print(f"⚠️ Monitoring setup failed: {e}") + logger.error(f"Health check failed: {e}") + return { + "status": "unhealthy", + "timestamp": datetime.utcnow().isoformat(), + "error": str(e) + } -def instrument_fastapi(app): - """Instrument FastAPI application with monitoring""" - try: - from fastapi import Request, Response - import time - - @app.middleware("http") - async def monitoring_middleware(request: Request, call_next): - start_time = time.time() - - # Process request - response = await call_next(request) - - # Calculate metrics - process_time = time.time() - start_time - - # Record API metrics - monitor.metrics_collector.add_metric(Metric( - name="http_request_duration", - value=process_time, - metric_type=MetricType.HISTOGRAM, - labels={ - "method": request.method, - "endpoint": str(request.url.path), - "status_code": str(response.status_code) - }, - timestamp=datetime.utcnow(), - description="HTTP request duration" - )) - - monitor.metrics_collector.add_metric(Metric( - name="http_requests_total", - value=1, - metric_type=MetricType.COUNTER, - labels={ - "method": request.method, - "endpoint": str(request.url.path), - "status_code": str(response.status_code) - }, - timestamp=datetime.utcnow(), - description="Total HTTP requests" - )) - - # Add response time header - response.headers["X-Process-Time"] = str(process_time) - - return response - - monitor.log_info("monitoring", "FastAPI instrumentation complete") - - except Exception as e: - monitor.log_error("monitoring", "FastAPI instrumentation failed", str(e)) \ No newline at end of file +def export_metrics_prometheus() -> str: + """Export metrics in Prometheus format""" + monitor = get_monitor() + metrics = monitor.get_metrics() + + prometheus_metrics = [] + + # System metrics + system_stats = metrics.get("system_stats", {}) + if "cpu_percent" in system_stats: + prometheus_metrics.append(f"system_cpu_percent {system_stats['cpu_percent']}") + if "memory_percent" in system_stats: + prometheus_metrics.append(f"system_memory_percent {system_stats['memory_percent']}") + if "disk_percent" in system_stats: + prometheus_metrics.append(f"system_disk_percent {system_stats['disk_percent']}") + + # Request metrics + request_stats = metrics.get("request_stats", {}) + prometheus_metrics.append(f"http_requests_total {request_stats.get('total_requests', 0)}") + prometheus_metrics.append(f"http_request_duration_seconds {request_stats.get('avg_response_time', 0)}") + prometheus_metrics.append(f"http_errors_total {request_stats.get('total_requests', 0) * request_stats.get('error_rate', 0) / 100}") + + # Agent metrics + agent_stats = metrics.get("agent_stats", {}) + prometheus_metrics.append(f"agent_executions_total {agent_stats.get('total_executions', 0)}") + prometheus_metrics.append(f"agent_success_rate {agent_stats.get('success_rate', 0)}") + + return "\n".join(prometheus_metrics) \ No newline at end of file diff --git a/backend/app/core/security.py b/backend/app/core/security.py index 595c0cc..83344c2 100644 --- a/backend/app/core/security.py +++ b/backend/app/core/security.py @@ -1,12 +1,19 @@ import os from datetime import datetime, timedelta -from typing import Optional, Dict, Any +from typing import Optional, Dict, Any, List from jose import JWTError, jwt from passlib.context import CryptContext -from fastapi import HTTPException, status, Depends +from fastapi import HTTPException, status, Depends, Request from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from sqlalchemy.orm import Session +import redis +import hashlib +import secrets +import logging from .config import settings +from ..database.connection import get_db +from ..database.models import User, Role, UserRole # Password hashing pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") @@ -14,87 +21,212 @@ # JWT settings security = HTTPBearer() -# Mock user database (in production, this would be a real database) -MOCK_USERS = { - "admin@example.com": { - "id": "1", - "email": "admin@example.com", - "full_name": "Admin User", - "hashed_password": "$2b$12$EixZaYVK1fsbw1ZfbX3OXePaWxn96p36WQoeG6Lruj3vjPGga31lW", # password: secret - "is_active": True, - "is_superuser": True, - "roles": ["admin", "user"] - }, - "user@example.com": { - "id": "2", - "email": "user@example.com", - "full_name": "Regular User", - "hashed_password": "$2b$12$EixZaYVK1fsbw1ZfbX3OXePaWxn96p36WQoeG6Lruj3vjPGga31lW", # password: secret - "is_active": True, - "is_superuser": False, - "roles": ["user"] - } -} +# Redis for token blacklisting and rate limiting +redis_client = redis.Redis.from_url(settings.REDIS_URL, decode_responses=True) +# Security logging +logger = logging.getLogger(__name__) -def verify_password(plain_password: str, hashed_password: str) -> bool: - """Verify a plain password against a hashed password""" - return pwd_context.verify(plain_password, hashed_password) - - -def get_password_hash(password: str) -> str: - """Hash a password""" - return pwd_context.hash(password) - - -def get_user_by_email(email: str) -> Optional[Dict[str, Any]]: - """Get user by email from mock database""" - return MOCK_USERS.get(email) - - -def authenticate_user(email: str, password: str) -> Optional[Dict[str, Any]]: - """Authenticate a user by email and password""" - user = get_user_by_email(email) - if not user: - return None - if not verify_password(password, user["hashed_password"]): - return None - return user - - -def create_access_token(data: Dict[str, Any], expires_delta: Optional[timedelta] = None) -> str: - """Create a JWT access token""" - to_encode = data.copy() +class SecurityManager: + """Comprehensive security management for the application""" - if expires_delta: - expire = datetime.utcnow() + expires_delta - else: - expire = datetime.utcnow() + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) + def __init__(self): + self.rate_limit_window = 3600 # 1 hour + self.max_requests_per_window = 1000 + self.token_blacklist_prefix = "blacklist:" + self.rate_limit_prefix = "rate_limit:" - to_encode.update({"exp": expire}) - encoded_jwt = jwt.encode(to_encode, settings.SECRET_KEY, algorithm=settings.ALGORITHM) - return encoded_jwt - - -def verify_token(token: str) -> Optional[Dict[str, Any]]: - """Verify and decode a JWT token""" - try: - payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[settings.ALGORITHM]) - email: str = payload.get("sub") - if email is None: + def verify_password(self, plain_password: str, hashed_password: str) -> bool: + """Verify a plain password against a hashed password""" + return pwd_context.verify(plain_password, hashed_password) + + def get_password_hash(self, password: str) -> str: + """Hash a password""" + return pwd_context.hash(password) + + def get_user_by_email(self, db: Session, email: str) -> Optional[User]: + """Get user by email from database""" + return db.query(User).filter(User.email == email).first() + + def get_user_by_id(self, db: Session, user_id: int) -> Optional[User]: + """Get user by ID from database""" + return db.query(User).filter(User.id == user_id).first() + + def authenticate_user(self, db: Session, email: str, password: str) -> Optional[User]: + """Authenticate a user by email and password""" + user = self.get_user_by_email(db, email) + if not user: + return None + if not self.verify_password(password, user.hashed_password): + return None + if not user.is_active: return None + return user + + def create_access_token(self, data: Dict[str, Any], expires_delta: Optional[timedelta] = None) -> str: + """Create a JWT access token""" + to_encode = data.copy() + + if expires_delta: + expire = datetime.utcnow() + expires_delta + else: + expire = datetime.utcnow() + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) - # Check if token is expired - exp = payload.get("exp") - if exp is None or datetime.utcnow() > datetime.fromtimestamp(exp): + to_encode.update({ + "exp": expire, + "iat": datetime.utcnow(), + "jti": secrets.token_urlsafe(32) # JWT ID for blacklisting + }) + + encoded_jwt = jwt.encode(to_encode, settings.SECRET_KEY, algorithm=settings.ALGORITHM) + return encoded_jwt + + def verify_token(self, token: str) -> Optional[Dict[str, Any]]: + """Verify and decode a JWT token""" + try: + # Check if token is blacklisted + if self.is_token_blacklisted(token): + return None + + payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[settings.ALGORITHM]) + email: str = payload.get("sub") + if email is None: + return None + + # Check if token is expired + exp = payload.get("exp") + if exp is None or datetime.utcnow() > datetime.fromtimestamp(exp): + return None + + return payload + except JWTError: return None + + def blacklist_token(self, token: str, expires_in: int = 3600) -> bool: + """Add token to blacklist""" + try: + # Hash the token for storage + token_hash = hashlib.sha256(token.encode()).hexdigest() + redis_client.setex( + f"{self.token_blacklist_prefix}{token_hash}", + expires_in, + "blacklisted" + ) + return True + except Exception as e: + logger.error(f"Failed to blacklist token: {e}") + return False + + def is_token_blacklisted(self, token: str) -> bool: + """Check if token is blacklisted""" + try: + token_hash = hashlib.sha256(token.encode()).hexdigest() + return redis_client.exists(f"{self.token_blacklist_prefix}{token_hash}") > 0 + except Exception as e: + logger.error(f"Failed to check token blacklist: {e}") + return False + + def check_rate_limit(self, identifier: str) -> bool: + """Check rate limiting for an identifier (IP or user)""" + try: + key = f"{self.rate_limit_prefix}{identifier}" + current_requests = redis_client.get(key) - return payload - except JWTError: - return None + if current_requests is None: + redis_client.setex(key, self.rate_limit_window, 1) + return True + + current_requests = int(current_requests) + if current_requests >= self.max_requests_per_window: + return False + + redis_client.incr(key) + return True + except Exception as e: + logger.error(f"Rate limit check failed: {e}") + return True # Allow if Redis fails + + def get_user_permissions(self, db: Session, user: User) -> List[str]: + """Get user permissions from database""" + try: + user_roles = db.query(UserRole).filter(UserRole.user_id == user.id).all() + permissions = [] + + for user_role in user_roles: + role = db.query(Role).filter(Role.id == user_role.role_id).first() + if role and role.permissions: + if isinstance(role.permissions, list): + permissions.extend(role.permissions) + elif isinstance(role.permissions, dict): + permissions.extend(role.permissions.get("permissions", [])) + + return list(set(permissions)) # Remove duplicates + except Exception as e: + logger.error(f"Failed to get user permissions: {e}") + return [] + + def has_permission(self, db: Session, user: User, permission: str) -> bool: + """Check if user has a specific permission""" + try: + # Superuser has all permissions + if user.is_superuser: + return True + + user_permissions = self.get_user_permissions(db, user) + + # Check for wildcard permission + if "*" in user_permissions: + return True + + # Check for specific permission + if permission in user_permissions: + return True + + # Check for role-based permissions + if permission.startswith("role:"): + required_role = permission.split(":")[1] + user_roles = db.query(UserRole).filter(UserRole.user_id == user.id).all() + for user_role in user_roles: + role = db.query(Role).filter(Role.id == user_role.role_id).first() + if role and role.name == required_role: + return True + + return False + except Exception as e: + logger.error(f"Permission check failed: {e}") + return False + + def log_security_event(self, event_type: str, user_id: Optional[int], + ip_address: str, details: Dict[str, Any]) -> None: + """Log security events""" + try: + event_data = { + "event_type": event_type, + "user_id": user_id, + "ip_address": ip_address, + "timestamp": datetime.utcnow().isoformat(), + "details": details + } + + # Log to application logs + logger.info(f"Security event: {event_data}") + + # Store in Redis for monitoring + redis_client.lpush("security_events", str(event_data)) + redis_client.ltrim("security_events", 0, 999) # Keep last 1000 events + + except Exception as e: + logger.error(f"Failed to log security event: {e}") -async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)) -> Dict[str, Any]: +# Global security manager instance +security_manager = SecurityManager() + + +async def get_current_user( + credentials: HTTPAuthorizationCredentials = Depends(security), + db: Session = Depends(get_db) +) -> User: """Get the current authenticated user""" credentials_exception = HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, @@ -105,7 +237,7 @@ async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(s try: # Extract token from credentials token = credentials.credentials - payload = verify_token(token) + payload = security_manager.verify_token(token) if payload is None: raise credentials_exception @@ -118,12 +250,12 @@ async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(s raise credentials_exception # Get user from database - user = get_user_by_email(email) + user = security_manager.get_user_by_email(db, email) if user is None: raise credentials_exception # Check if user is active - if not user.get("is_active", False): + if not user.is_active: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Inactive user" @@ -132,9 +264,9 @@ async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(s return user -async def get_current_active_user(current_user: Dict[str, Any] = Depends(get_current_user)) -> Dict[str, Any]: +async def get_current_active_user(current_user: User = Depends(get_current_user)) -> User: """Get the current active user""" - if not current_user.get("is_active", False): + if not current_user.is_active: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Inactive user" @@ -142,9 +274,9 @@ async def get_current_active_user(current_user: Dict[str, Any] = Depends(get_cur return current_user -async def get_current_superuser(current_user: Dict[str, Any] = Depends(get_current_user)) -> Dict[str, Any]: +async def get_current_superuser(current_user: User = Depends(get_current_user)) -> User: """Get the current superuser""" - if not current_user.get("is_superuser", False): + if not current_user.is_superuser: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Not enough permissions" @@ -152,52 +284,112 @@ async def get_current_superuser(current_user: Dict[str, Any] = Depends(get_curre return current_user -def has_permission(user: Dict[str, Any], permission: str) -> bool: - """Check if user has a specific permission""" - # Simple role-based permission check - user_roles = user.get("roles", []) - - # Admin has all permissions - if "admin" in user_roles: - return True - - # Permission mapping for different roles - permission_map = { - "user": [ - "documents:read", - "documents:upload", - "qa:ask", - "analytics:read" - ], - "manager": [ - "documents:read", - "documents:upload", - "documents:delete", - "qa:ask", - "analytics:read", - "audit:read", - "settings:read" - ], - "admin": ["*"] # All permissions - } - - for role in user_roles: - if role in permission_map: - role_permissions = permission_map[role] - if "*" in role_permissions or permission in role_permissions: - return True - - return False - - def require_permission(permission: str): """Decorator to require a specific permission""" - def permission_dependency(current_user: Dict[str, Any] = Depends(get_current_user)): - if not has_permission(current_user, permission): + def permission_dependency( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db) + ): + if not security_manager.has_permission(db, current_user, permission): raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail=f"Permission '{permission}' required" ) return current_user + return permission_dependency + + +def rate_limit_middleware(request: Request, db: Session = Depends(get_db)): + """Rate limiting middleware""" + # Get client IP + client_ip = request.client.host + + # Check rate limit + if not security_manager.check_rate_limit(client_ip): + raise HTTPException( + status_code=status.HTTP_429_TOO_MANY_REQUESTS, + detail="Rate limit exceeded" + ) + + return True + + +def log_security_event_middleware(request: Request, current_user: Optional[User] = Depends(get_current_user)): + """Log security events middleware""" + client_ip = request.client.host + + # Log the request + security_manager.log_security_event( + event_type="api_request", + user_id=current_user.id if current_user else None, + ip_address=client_ip, + details={ + "method": request.method, + "path": request.url.path, + "user_agent": request.headers.get("user-agent", ""), + "referer": request.headers.get("referer", "") + } + ) + + return True + + +# Backward compatibility functions +def verify_password(plain_password: str, hashed_password: str) -> bool: + """Verify a plain password against a hashed password""" + return security_manager.verify_password(plain_password, hashed_password) + + +def get_password_hash(password: str) -> str: + """Hash a password""" + return security_manager.get_password_hash(password) + + +def get_user_by_email(email: str): + """Get user by email (deprecated - use database session)""" + logger.warning("get_user_by_email called without database session - use database models directly") + return None + + +def authenticate_user(email: str, password: str): + """Authenticate user (deprecated - use database session)""" + logger.warning("authenticate_user called without database session - use SecurityManager.authenticate_user") + return None + + +def create_access_token(data: Dict[str, Any], expires_delta: Optional[timedelta] = None) -> str: + """Create a JWT access token""" + return security_manager.create_access_token(data, expires_delta) + + +def verify_token(token: str) -> Optional[Dict[str, Any]]: + """Verify and decode a JWT token""" + return security_manager.verify_token(token) + + +async def get_current_user_legacy(credentials: HTTPAuthorizationCredentials = Depends(security)) -> Dict[str, Any]: + """Legacy get_current_user function (deprecated)""" + logger.warning("get_current_user_legacy called - use get_current_user with database session") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Legacy authentication not supported" + ) + + +def has_permission(user: Dict[str, Any], permission: str) -> bool: + """Check if user has a specific permission (deprecated)""" + logger.warning("has_permission called with dict user - use SecurityManager.has_permission with User model") + return False + + +def require_permission_legacy(permission: str): + """Legacy permission decorator (deprecated)""" + def permission_dependency(current_user: Dict[str, Any] = Depends(get_current_user_legacy)): + logger.warning("require_permission_legacy called - use require_permission with database session") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Legacy permission system not supported" + ) + return permission_dependency \ No newline at end of file diff --git a/backend/app/database/connection.py b/backend/app/database/connection.py index 7ecdf37..66e6095 100644 --- a/backend/app/database/connection.py +++ b/backend/app/database/connection.py @@ -1,250 +1,435 @@ -from sqlalchemy import create_engine, MetaData +import os +import logging +from typing import Generator, Optional +from sqlalchemy import create_engine, text, event from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, Session -from sqlalchemy.pool import StaticPool -import logging -from typing import Generator +from sqlalchemy.pool import QueuePool +from sqlalchemy.exc import SQLAlchemyError, OperationalError +import asyncio from ..core.config import settings logger = logging.getLogger(__name__) +# Create declarative base +Base = declarative_base() + # Database engine engine = None SessionLocal = None -Base = declarative_base() -def create_database_engine(): - """Create database engine based on configuration""" +def create_database_engine() -> None: + """Create and configure the database engine""" global engine, SessionLocal - database_url = settings.DATABASE_URL - - # Configure engine based on database type - if database_url.startswith("sqlite"): - # SQLite configuration - engine = create_engine( - database_url, - connect_args={"check_same_thread": False}, - poolclass=StaticPool, - echo=settings.DEBUG - ) - elif database_url.startswith("postgresql"): - # PostgreSQL configuration + try: + # Parse database URL + database_url = settings.DATABASE_URL + + # Create engine with connection pooling engine = create_engine( database_url, - pool_pre_ping=True, - pool_recycle=300, - echo=settings.DEBUG + poolclass=QueuePool, + pool_size=settings.DATABASE_POOL_SIZE, + max_overflow=settings.DATABASE_MAX_OVERFLOW, + pool_pre_ping=True, # Enable connection health checks + pool_recycle=3600, # Recycle connections after 1 hour + echo=settings.DEBUG, # Log SQL queries in debug mode + future=True # Use SQLAlchemy 2.0 style ) - else: - # Default configuration - engine = create_engine( - database_url, - echo=settings.DEBUG + + # Create session factory + SessionLocal = sessionmaker( + autocommit=False, + autoflush=False, + bind=engine ) - - # Create session factory - SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) - - logger.info(f"Database engine created for: {database_url}") - return engine + + logger.info("Database engine created successfully") + + except Exception as e: + logger.error(f"Failed to create database engine: {e}") + raise -def get_database_session() -> Generator[Session, None, None]: - """Get database session""" +def get_db() -> Generator[Session, None, None]: + """Dependency to get database session""" if SessionLocal is None: create_database_engine() db = SessionLocal() try: yield db + except Exception as e: + logger.error(f"Database session error: {e}") + db.rollback() + raise finally: db.close() -def init_database(): +async def init_database() -> None: """Initialize database tables""" - global engine - - if engine is None: - create_database_engine() - try: + if engine is None: + create_database_engine() + # Import all models to ensure they are registered from .models import ( - User, Role, Document, Tag, ComplianceFramework, + User, Role, UserRole, Document, Tag, ComplianceFramework, ProcessingHistory, AgentExecution, DocumentComparison, - AuditEvent, SystemMetrics, WorkflowTemplate, - KnowledgeBase, Notification, APILog, SystemConfiguration + AuditEvent, SystemMetric, WorkflowTemplate, KnowledgeBase, + Notification, APILog, SystemConfig ) # Create all tables Base.metadata.create_all(bind=engine) + logger.info("Database tables created successfully") - # Create default data - create_default_data() + # Initialize default data + await initialize_default_data() except Exception as e: - logger.error(f"Failed to initialize database: {e}") + logger.error(f"Database initialization failed: {e}") raise -def create_default_data(): - """Create default data in database""" +async def initialize_default_data() -> None: + """Initialize default data in the database""" try: - db = next(get_database_session()) + db = SessionLocal() - # Import models - from .models import User, Role, ComplianceFramework, Tag - from ..core.security import get_password_hash + # Check if default data already exists + existing_admin = db.query(User).filter(User.email == "admin@example.com").first() + if existing_admin: + logger.info("Default data already exists, skipping initialization") + return # Create default roles - default_roles = [ - {"name": "admin", "description": "Administrator with full access"}, - {"name": "manager", "description": "Manager with limited admin access"}, - {"name": "user", "description": "Regular user with basic access"} - ] + admin_role = Role( + name="admin", + description="Administrator with full access", + permissions=["*"] + ) - for role_data in default_roles: - existing_role = db.query(Role).filter(Role.name == role_data["name"]).first() - if not existing_role: - role = Role(**role_data) - db.add(role) + user_role = Role( + name="user", + description="Standard user with basic access", + permissions=[ + "documents:read", + "documents:upload", + "documents:delete", + "qa:ask", + "compare:compare", + "analytics:view" + ] + ) + + analyst_role = Role( + name="analyst", + description="Analyst with advanced access", + permissions=[ + "documents:read", + "documents:upload", + "documents:delete", + "qa:ask", + "compare:compare", + "analytics:view", + "analytics:export", + "audit:view" + ] + ) + + db.add_all([admin_role, user_role, analyst_role]) + db.commit() # Create default admin user - admin_email = "admin@example.com" - existing_admin = db.query(User).filter(User.email == admin_email).first() - if not existing_admin: - admin_user = User( - email=admin_email, - hashed_password=get_password_hash("admin123"), - full_name="System Administrator", - is_active=True, - is_superuser=True - ) - db.add(admin_user) + from ..core.security import security_manager + + admin_user = User( + email="admin@example.com", + full_name="System Administrator", + hashed_password=security_manager.get_password_hash("admin123"), + is_active=True, + is_superuser=True + ) + + db.add(admin_user) + db.commit() + + # Assign admin role to admin user + admin_user_role = UserRole( + user_id=admin_user.id, + role_id=admin_role.id, + assigned_by=admin_user.id + ) + + db.add(admin_user_role) + db.commit() # Create default compliance frameworks - default_frameworks = [ - {"name": "GDPR", "description": "General Data Protection Regulation"}, - {"name": "HIPAA", "description": "Health Insurance Portability and Accountability Act"}, - {"name": "SOX", "description": "Sarbanes-Oxley Act"}, - {"name": "PCI-DSS", "description": "Payment Card Industry Data Security Standard"} + frameworks = [ + ComplianceFramework( + name="GDPR", + description="General Data Protection Regulation", + version="2018", + requirements=["data_minimization", "consent", "right_to_erasure"] + ), + ComplianceFramework( + name="HIPAA", + description="Health Insurance Portability and Accountability Act", + version="1996", + requirements=["privacy_rule", "security_rule", "breach_notification"] + ), + ComplianceFramework( + name="SOX", + description="Sarbanes-Oxley Act", + version="2002", + requirements=["financial_reporting", "internal_controls", "audit_requirements"] + ) ] - for framework_data in default_frameworks: - existing_framework = db.query(ComplianceFramework).filter( - ComplianceFramework.name == framework_data["name"] - ).first() - if not existing_framework: - framework = ComplianceFramework(**framework_data) - db.add(framework) + db.add_all(frameworks) + db.commit() # Create default tags - default_tags = [ - {"name": "contract", "description": "Legal contracts", "color": "#2196F3"}, - {"name": "invoice", "description": "Financial invoices", "color": "#4CAF50"}, - {"name": "policy", "description": "Company policies", "color": "#FF9800"}, - {"name": "report", "description": "Business reports", "color": "#9C27B0"}, - {"name": "compliance", "description": "Compliance documents", "color": "#F44336"} + tags = [ + Tag(name="confidential", description="Confidential documents"), + Tag(name="public", description="Public documents"), + Tag(name="financial", description="Financial documents"), + Tag(name="legal", description="Legal documents"), + Tag(name="hr", description="Human resources documents"), + Tag(name="technical", description="Technical documents") ] - for tag_data in default_tags: - existing_tag = db.query(Tag).filter(Tag.name == tag_data["name"]).first() - if not existing_tag: - tag = Tag(**tag_data) - db.add(tag) + db.add_all(tags) + db.commit() + + # Create default system configurations + configs = [ + SystemConfig( + key="max_file_size_mb", + value="100", + description="Maximum file size in MB", + category="upload" + ), + SystemConfig( + key="allowed_file_types", + value="pdf,docx,txt,csv,xlsx", + description="Allowed file types", + category="upload" + ), + SystemConfig( + key="session_timeout_minutes", + value="30", + description="Session timeout in minutes", + category="security" + ), + SystemConfig( + key="audit_log_retention_days", + value="90", + description="Audit log retention period in days", + category="audit" + ), + SystemConfig( + key="backup_enabled", + value="true", + description="Enable automatic backups", + category="backup" + ), + SystemConfig( + key="monitoring_enabled", + value="true", + description="Enable system monitoring", + category="monitoring" + ) + ] - # Commit all changes + db.add_all(configs) db.commit() - logger.info("Default data created successfully") + + logger.info("Default data initialized successfully") except Exception as e: - logger.error(f"Failed to create default data: {e}") + logger.error(f"Failed to initialize default data: {e}") db.rollback() + raise finally: db.close() -def check_database_connection(): +async def check_database_connection() -> bool: """Check if database connection is working""" try: if engine is None: create_database_engine() - # Test connection - with engine.connect() as conn: - conn.execute("SELECT 1") + # Test connection with a simple query + with engine.connect() as connection: + result = connection.execute(text("SELECT 1")) + result.fetchone() - logger.info("Database connection test successful") + logger.info("Database connection check successful") return True except Exception as e: - logger.error(f"Database connection test failed: {e}") + logger.error(f"Database connection check failed: {e}") return False -def get_database_info(): - """Get database information""" - if engine is None: - return {"status": "not_initialized"} - +async def get_database_info() -> dict: + """Get database information and statistics""" try: - with engine.connect() as conn: - # Get database type - db_type = engine.dialect.name - - # Get database URL (without credentials) - db_url = str(engine.url).replace(str(engine.url.password), "****") if engine.url.password else str(engine.url) - - # Test connection - conn.execute("SELECT 1") - - return { - "status": "connected", - "type": db_type, - "url": db_url, - "pool_size": engine.pool.size() if hasattr(engine.pool, 'size') else None, - "checked_out_connections": engine.pool.checkedout() if hasattr(engine.pool, 'checkedout') else None - } - + if engine is None: + create_database_engine() + + db = SessionLocal() + + # Get table counts + table_counts = {} + tables = [ + "users", "roles", "user_roles", "documents", "tags", + "compliance_frameworks", "processing_history", "agent_executions", + "document_comparisons", "audit_events", "system_metrics", + "workflow_templates", "knowledge_base", "notifications", + "api_logs", "system_configs" + ] + + for table in tables: + try: + result = db.execute(text(f"SELECT COUNT(*) FROM {table}")) + count = result.scalar() + table_counts[table] = count + except Exception as e: + logger.warning(f"Could not get count for table {table}: {e}") + table_counts[table] = 0 + + # Get database size + try: + result = db.execute(text(""" + SELECT pg_size_pretty(pg_database_size(current_database())) as size + """)) + db_size = result.scalar() + except Exception: + db_size = "unknown" + + # Get connection info + pool_info = { + "pool_size": engine.pool.size(), + "checked_in": engine.pool.checkedin(), + "checked_out": engine.pool.checkedout(), + "overflow": engine.pool.overflow() + } + + return { + "status": "connected", + "database_url": settings.DATABASE_URL.replace( + settings.DATABASE_URL.split("@")[0].split(":")[-1], "***" + ) if "@" in settings.DATABASE_URL else settings.DATABASE_URL, + "table_counts": table_counts, + "database_size": db_size, + "pool_info": pool_info, + "timestamp": "2024-01-01T00:00:00Z" + } + except Exception as e: + logger.error(f"Failed to get database info: {e}") return { "status": "error", - "error": str(e) + "error": str(e), + "timestamp": "2024-01-01T00:00:00Z" } + finally: + if 'db' in locals(): + db.close() -def reset_database(): - """Reset database (drop and recreate all tables)""" - global engine - - if engine is None: +async def cleanup_database() -> None: + """Cleanup database connections""" + try: + if engine: + engine.dispose() + logger.info("Database engine disposed") + except Exception as e: + logger.error(f"Database cleanup failed: {e}") + + +def get_database_session() -> Session: + """Get a database session (for use outside of FastAPI dependencies)""" + if SessionLocal is None: create_database_engine() - + return SessionLocal() + + +# Database event listeners for logging +@event.listens_for(engine, "connect") +def receive_connect(dbapi_connection, connection_record): + logger.debug("Database connection established") + + +@event.listens_for(engine, "disconnect") +def receive_disconnect(dbapi_connection, connection_record): + logger.debug("Database connection closed") + + +@event.listens_for(engine, "checkout") +def receive_checkout(dbapi_connection, connection_record, connection_proxy): + logger.debug("Database connection checked out") + + +@event.listens_for(engine, "checkin") +def receive_checkin(dbapi_connection, connection_record): + logger.debug("Database connection checked in") + + +# Database health check function +async def health_check_database() -> dict: + """Perform a comprehensive database health check""" try: - # Drop all tables - Base.metadata.drop_all(bind=engine) - logger.info("All database tables dropped") + # Check connection + connection_ok = await check_database_connection() + if not connection_ok: + return { + "status": "unhealthy", + "error": "Database connection failed", + "timestamp": "2024-01-01T00:00:00Z" + } - # Recreate all tables - Base.metadata.create_all(bind=engine) - logger.info("All database tables recreated") + # Get database info + db_info = await get_database_info() - # Create default data - create_default_data() + # Check for critical issues + issues = [] - return True + # Check if admin user exists + if db_info.get("table_counts", {}).get("users", 0) == 0: + issues.append("No users found in database") + + # Check if roles exist + if db_info.get("table_counts", {}).get("roles", 0) == 0: + issues.append("No roles found in database") + + # Check connection pool health + pool_info = db_info.get("pool_info", {}) + if pool_info.get("checked_out", 0) > pool_info.get("pool_size", 0) * 0.8: + issues.append("High connection pool usage") + + status = "healthy" if not issues else "degraded" + + return { + "status": status, + "issues": issues, + "database_info": db_info, + "timestamp": "2024-01-01T00:00:00Z" + } except Exception as e: - logger.error(f"Failed to reset database: {e}") - return False - - -# Database dependency for FastAPI -def get_db() -> Generator[Session, None, None]: - """Database dependency for FastAPI endpoints""" - return get_database_session() \ No newline at end of file + logger.error(f"Database health check failed: {e}") + return { + "status": "unhealthy", + "error": str(e), + "timestamp": "2024-01-01T00:00:00Z" + } \ No newline at end of file diff --git a/backend/app/database/models.py b/backend/app/database/models.py index c4fa059..4bac976 100644 --- a/backend/app/database/models.py +++ b/backend/app/database/models.py @@ -1,33 +1,15 @@ -from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean, Float, JSON, ForeignKey, Table -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship -from sqlalchemy.sql import func +import uuid from datetime import datetime -from typing import Optional, List, Dict, Any - -Base = declarative_base() - -# Association tables for many-to-many relationships -document_tags = Table( - 'document_tags', - Base.metadata, - Column('document_id', Integer, ForeignKey('documents.id'), primary_key=True), - Column('tag_id', Integer, ForeignKey('tags.id'), primary_key=True) +from typing import List, Optional, Dict, Any +from sqlalchemy import ( + Column, Integer, String, Text, Boolean, DateTime, Float, + ForeignKey, Table, MetaData, JSON, LargeBinary, Index, + UniqueConstraint, CheckConstraint, func ) +from sqlalchemy.orm import relationship, declarative_base +from sqlalchemy.dialects.postgresql import UUID, JSONB -document_compliance_frameworks = Table( - 'document_compliance_frameworks', - Base.metadata, - Column('document_id', Integer, ForeignKey('documents.id'), primary_key=True), - Column('framework_id', Integer, ForeignKey('compliance_frameworks.id'), primary_key=True) -) - -user_roles = Table( - 'user_roles', - Base.metadata, - Column('user_id', Integer, ForeignKey('users.id'), primary_key=True), - Column('role_id', Integer, ForeignKey('roles.id'), primary_key=True) -) +from .connection import Base class User(Base): @@ -36,8 +18,8 @@ class User(Base): id = Column(Integer, primary_key=True, index=True) email = Column(String(255), unique=True, index=True, nullable=False) + full_name = Column(String(255), nullable=False) hashed_password = Column(String(255), nullable=False) - full_name = Column(String(255), nullable=True) is_active = Column(Boolean, default=True) is_superuser = Column(Boolean, default=False) created_at = Column(DateTime(timezone=True), server_default=func.now()) @@ -45,10 +27,16 @@ class User(Base): last_login = Column(DateTime(timezone=True), nullable=True) # Relationships - documents = relationship("Document", back_populates="uploaded_by_user") + documents = relationship("Document", back_populates="user") audit_events = relationship("AuditEvent", back_populates="user") - processing_history = relationship("ProcessingHistory", back_populates="user") - user_roles = relationship("Role", secondary=user_roles, back_populates="users") + notifications = relationship("Notification", back_populates="user") + + # Indexes + __table_args__ = ( + Index('idx_users_email', 'email'), + Index('idx_users_active', 'is_active'), + Index('idx_users_created_at', 'created_at'), + ) class Role(Base): @@ -58,11 +46,40 @@ class Role(Base): id = Column(Integer, primary_key=True, index=True) name = Column(String(100), unique=True, nullable=False) description = Column(Text, nullable=True) - permissions = Column(JSON, nullable=True) # Store permissions as JSON + permissions = Column(JSON, default=list) # List of permission strings created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) # Relationships - users = relationship("User", secondary=user_roles, back_populates="user_roles") + user_roles = relationship("UserRole", back_populates="role") + + # Indexes + __table_args__ = ( + Index('idx_roles_name', 'name'), + Index('idx_roles_created_at', 'created_at'), + ) + + +class UserRole(Base): + """User-Role association model for additional metadata""" + __tablename__ = "user_roles" + + user_id = Column(Integer, ForeignKey('users.id'), primary_key=True) + role_id = Column(Integer, ForeignKey('roles.id'), primary_key=True) + assigned_at = Column(DateTime(timezone=True), server_default=func.now()) + assigned_by = Column(Integer, ForeignKey('users.id'), nullable=True) + + # Relationships + user = relationship("User", foreign_keys=[user_id]) + role = relationship("Role", foreign_keys=[role_id], back_populates="user_roles") + assigned_by_user = relationship("User", foreign_keys=[assigned_by]) + + # Indexes + __table_args__ = ( + Index('idx_user_roles_user_id', 'user_id'), + Index('idx_user_roles_role_id', 'role_id'), + Index('idx_user_roles_assigned_at', 'assigned_at'), + ) class Document(Base): @@ -70,45 +87,45 @@ class Document(Base): __tablename__ = "documents" id = Column(Integer, primary_key=True, index=True) + uuid = Column(UUID(as_uuid=True), default=uuid.uuid4, unique=True, index=True) filename = Column(String(255), nullable=False) original_filename = Column(String(255), nullable=False) - content = Column(Text, nullable=True) # Extracted text content file_path = Column(String(500), nullable=False) file_size = Column(Integer, nullable=False) - content_type = Column(String(100), nullable=False) - doc_type = Column(String(50), nullable=True) # CONTRACT, INVOICE, etc. - domain = Column(String(100), nullable=True) # LEGAL, FINANCIAL, etc. - - # Processing metadata - processing_status = Column(String(50), default="pending") # pending, processing, completed, failed - processing_result = Column(JSON, nullable=True) # Store agent processing results - confidence_score = Column(Float, nullable=True) - risk_score = Column(Float, nullable=True) + file_type = Column(String(50), nullable=False) + mime_type = Column(String(100), nullable=False) - # Metadata - metadata = Column(JSON, nullable=True) # Additional metadata - tags = Column(JSON, nullable=True) # Document tags + # Content and processing + extracted_text = Column(Text, nullable=True) + summary = Column(Text, nullable=True) entities = Column(JSON, nullable=True) # Extracted entities - clauses = Column(JSON, nullable=True) # Extracted clauses - risks = Column(JSON, nullable=True) # Risk assessment results - qa_pairs = Column(JSON, nullable=True) # Generated Q&A pairs + metadata = Column(JSON, nullable=True) # Document metadata - # Timestamps + # Status and processing + status = Column(String(50), default="uploaded") # uploaded, processing, completed, failed + processing_progress = Column(Float, default=0.0) + processing_error = Column(Text, nullable=True) + + # User and timestamps + user_id = Column(Integer, ForeignKey('users.id'), nullable=False) uploaded_at = Column(DateTime(timezone=True), server_default=func.now()) processed_at = Column(DateTime(timezone=True), nullable=True) updated_at = Column(DateTime(timezone=True), onupdate=func.now()) - # Foreign keys - uploaded_by = Column(Integer, ForeignKey("users.id"), nullable=False) - # Relationships - uploaded_by_user = relationship("User", back_populates="documents") + user = relationship("User", back_populates="documents") + tags = relationship("DocumentTag", back_populates="document") processing_history = relationship("ProcessingHistory", back_populates="document") - audit_events = relationship("AuditEvent", back_populates="document") - comparisons = relationship("DocumentComparison", foreign_keys="DocumentComparison.document_a_id") - comparisons_as_b = relationship("DocumentComparison", foreign_keys="DocumentComparison.document_b_id") - document_tags = relationship("Tag", secondary=document_tags, back_populates="documents") - compliance_frameworks = relationship("ComplianceFramework", secondary=document_compliance_frameworks, back_populates="documents") + agent_executions = relationship("AgentExecution", back_populates="document") + + # Indexes + __table_args__ = ( + Index('idx_documents_uuid', 'uuid'), + Index('idx_documents_user_id', 'user_id'), + Index('idx_documents_status', 'status'), + Index('idx_documents_uploaded_at', 'uploaded_at'), + Index('idx_documents_file_type', 'file_type'), + ) class Tag(Base): @@ -118,11 +135,38 @@ class Tag(Base): id = Column(Integer, primary_key=True, index=True) name = Column(String(100), unique=True, nullable=False) description = Column(Text, nullable=True) - color = Column(String(7), nullable=True) # Hex color code + color = Column(String(7), default="#2196F3") # Hex color code created_at = Column(DateTime(timezone=True), server_default=func.now()) # Relationships - documents = relationship("Document", secondary=document_tags, back_populates="document_tags") + document_tags = relationship("DocumentTag", back_populates="tag") + + # Indexes + __table_args__ = ( + Index('idx_tags_name', 'name'), + Index('idx_tags_created_at', 'created_at'), + ) + + +class DocumentTag(Base): + """Document-Tag association table""" + __tablename__ = "document_tags" + + document_id = Column(Integer, ForeignKey('documents.id'), primary_key=True) + tag_id = Column(Integer, ForeignKey('tags.id'), primary_key=True) + assigned_at = Column(DateTime(timezone=True), server_default=func.now()) + assigned_by = Column(Integer, ForeignKey('users.id'), nullable=True) + + # Relationships + document = relationship("Document", back_populates="tags") + tag = relationship("Tag", back_populates="document_tags") + assigned_by_user = relationship("User", foreign_keys=[assigned_by]) + + # Indexes + __table_args__ = ( + Index('idx_document_tags_document_id', 'document_id'), + Index('idx_document_tags_tag_id', 'tag_id'), + ) class ComplianceFramework(Base): @@ -130,14 +174,18 @@ class ComplianceFramework(Base): __tablename__ = "compliance_frameworks" id = Column(Integer, primary_key=True, index=True) - name = Column(String(100), unique=True, nullable=False) # GDPR, SOX, HIPAA, etc. + name = Column(String(100), unique=True, nullable=False) description = Column(Text, nullable=True) - version = Column(String(20), nullable=True) - requirements = Column(JSON, nullable=True) # Framework requirements + version = Column(String(50), nullable=True) + requirements = Column(JSON, nullable=True) # List of requirements created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now()) - # Relationships - documents = relationship("Document", secondary=document_compliance_frameworks, back_populates="compliance_frameworks") + # Indexes + __table_args__ = ( + Index('idx_compliance_frameworks_name', 'name'), + Index('idx_compliance_frameworks_created_at', 'created_at'), + ) class ProcessingHistory(Base): @@ -145,299 +193,328 @@ class ProcessingHistory(Base): __tablename__ = "processing_history" id = Column(Integer, primary_key=True, index=True) - processing_id = Column(String(100), unique=True, index=True, nullable=False) - - # Processing details - workflow_id = Column(String(100), nullable=True) - current_stage = Column(String(100), nullable=True) - completed_stages = Column(JSON, nullable=True) # List of completed stages - failed_stages = Column(JSON, nullable=True) # List of failed stages - total_execution_time = Column(Float, nullable=True) # Total execution time in seconds - progress_percentage = Column(Float, default=0.0) - - # Results - status = Column(String(50), default="pending") # pending, running, completed, failed + document_id = Column(Integer, ForeignKey('documents.id'), nullable=False) + stage = Column(String(100), nullable=False) # ingestion, classification, extraction, etc. + status = Column(String(50), nullable=False) # started, completed, failed + start_time = Column(DateTime(timezone=True), server_default=func.now()) + end_time = Column(DateTime(timezone=True), nullable=True) + duration = Column(Float, nullable=True) # Duration in seconds result = Column(JSON, nullable=True) # Processing result - confidence = Column(Float, nullable=True) - rationale = Column(Text, nullable=True) error_message = Column(Text, nullable=True) - # Timestamps - started_at = Column(DateTime(timezone=True), server_default=func.now()) - completed_at = Column(DateTime(timezone=True), nullable=True) - - # Foreign keys - document_id = Column(Integer, ForeignKey("documents.id"), nullable=False) - user_id = Column(Integer, ForeignKey("users.id"), nullable=False) - # Relationships document = relationship("Document", back_populates="processing_history") - user = relationship("User", back_populates="processing_history") - agent_executions = relationship("AgentExecution", back_populates="processing_history") + + # Indexes + __table_args__ = ( + Index('idx_processing_history_document_id', 'document_id'), + Index('idx_processing_history_stage', 'stage'), + Index('idx_processing_history_status', 'status'), + Index('idx_processing_history_start_time', 'start_time'), + ) class AgentExecution(Base): - """Agent execution model for tracking individual agent runs""" + """Agent execution model for tracking AI agent runs""" __tablename__ = "agent_executions" id = Column(Integer, primary_key=True, index=True) - execution_id = Column(String(100), unique=True, index=True, nullable=False) - - # Agent details - agent_type = Column(String(50), nullable=False) # classifier, entity, risk, etc. - agent_name = Column(String(100), nullable=False) + document_id = Column(Integer, ForeignKey('documents.id'), nullable=True) + agent_type = Column(String(100), nullable=False) # orchestrator, classifier, entity, etc. + execution_id = Column(UUID(as_uuid=True), default=uuid.uuid4, unique=True, index=True) # Execution details - input_size = Column(Integer, nullable=True) - output_size = Column(Integer, nullable=True) - execution_time = Column(Float, nullable=True) # Execution time in seconds - memory_usage = Column(Float, nullable=True) # Memory usage in MB - cpu_usage = Column(Float, nullable=True) # CPU usage percentage - - # Results - status = Column(String(50), default="pending") # pending, running, completed, failed - confidence = Column(Float, nullable=True) - output = Column(JSON, nullable=True) # Agent output - error_message = Column(Text, nullable=True) + input_data = Column(JSON, nullable=True) + output_data = Column(JSON, nullable=True) + confidence_score = Column(Float, nullable=True) - # Timestamps - started_at = Column(DateTime(timezone=True), server_default=func.now()) - completed_at = Column(DateTime(timezone=True), nullable=True) + # Timing and status + start_time = Column(DateTime(timezone=True), server_default=func.now()) + end_time = Column(DateTime(timezone=True), nullable=True) + duration = Column(Float, nullable=True) # Duration in seconds + status = Column(String(50), default="running") # running, completed, failed, timeout - # Foreign keys - processing_history_id = Column(Integer, ForeignKey("processing_history.id"), nullable=False) + # Error handling + error_message = Column(Text, nullable=True) + retry_count = Column(Integer, default=0) # Relationships - processing_history = relationship("ProcessingHistory", back_populates="agent_executions") + document = relationship("Document", back_populates="agent_executions") + + # Indexes + __table_args__ = ( + Index('idx_agent_executions_execution_id', 'execution_id'), + Index('idx_agent_executions_document_id', 'document_id'), + Index('idx_agent_executions_agent_type', 'agent_type'), + Index('idx_agent_executions_status', 'status'), + Index('idx_agent_executions_start_time', 'start_time'), + ) class DocumentComparison(Base): - """Document comparison model""" + """Document comparison model for storing comparison results""" __tablename__ = "document_comparisons" id = Column(Integer, primary_key=True, index=True) - comparison_id = Column(String(100), unique=True, index=True, nullable=False) + comparison_id = Column(UUID(as_uuid=True), default=uuid.uuid4, unique=True, index=True) - # Comparison details - comparison_type = Column(String(50), nullable=False) # semantic, structural, compliance, etc. + # Documents being compared + document1_id = Column(Integer, ForeignKey('documents.id'), nullable=False) + document2_id = Column(Integer, ForeignKey('documents.id'), nullable=False) + + # Comparison results similarity_score = Column(Float, nullable=True) differences = Column(JSON, nullable=True) # Detailed differences - risk_changes = Column(JSON, nullable=True) # Risk changes between documents - - # Results - status = Column(String(50), default="pending") - result = Column(JSON, nullable=True) - confidence = Column(Float, nullable=True) summary = Column(Text, nullable=True) - # Timestamps + # Metadata + comparison_type = Column(String(50), default="content") # content, structure, metadata created_at = Column(DateTime(timezone=True), server_default=func.now()) completed_at = Column(DateTime(timezone=True), nullable=True) - # Foreign keys - document_a_id = Column(Integer, ForeignKey("documents.id"), nullable=False) - document_b_id = Column(Integer, ForeignKey("documents.id"), nullable=False) - created_by = Column(Integer, ForeignKey("users.id"), nullable=False) - # Relationships - document_a = relationship("Document", foreign_keys=[document_a_id]) - document_b = relationship("Document", foreign_keys=[document_b_id]) - creator = relationship("User") + document1 = relationship("Document", foreign_keys=[document1_id]) + document2 = relationship("Document", foreign_keys=[document2_id]) + + # Indexes + __table_args__ = ( + Index('idx_document_comparisons_comparison_id', 'comparison_id'), + Index('idx_document_comparisons_document1_id', 'document1_id'), + Index('idx_document_comparisons_document2_id', 'document2_id'), + Index('idx_document_comparisons_created_at', 'created_at'), + ) class AuditEvent(Base): - """Audit event model for compliance and security auditing""" + """Audit event model for security and compliance logging""" __tablename__ = "audit_events" id = Column(Integer, primary_key=True, index=True) - event_id = Column(String(100), unique=True, index=True, nullable=False) + event_id = Column(UUID(as_uuid=True), default=uuid.uuid4, unique=True, index=True) # Event details - event_type = Column(String(100), nullable=False) # document_upload, processing_start, etc. - event_category = Column(String(50), nullable=False) # security, compliance, system, etc. + event_type = Column(String(100), nullable=False) # login, logout, document_upload, etc. + event_category = Column(String(50), nullable=False) # authentication, document, system, etc. severity = Column(String(20), default="info") # info, warning, error, critical - # Event data - description = Column(Text, nullable=False) - details = Column(JSON, nullable=True) # Additional event details + # User and session + user_id = Column(Integer, ForeignKey('users.id'), nullable=True) + session_id = Column(String(255), nullable=True) ip_address = Column(String(45), nullable=True) # IPv4 or IPv6 user_agent = Column(Text, nullable=True) + # Event data + details = Column(JSON, nullable=True) # Additional event details + resource_type = Column(String(100), nullable=True) # document, user, system, etc. + resource_id = Column(String(255), nullable=True) + # Timestamps timestamp = Column(DateTime(timezone=True), server_default=func.now()) - # Foreign keys - user_id = Column(Integer, ForeignKey("users.id"), nullable=True) - document_id = Column(Integer, ForeignKey("documents.id"), nullable=True) - # Relationships user = relationship("User", back_populates="audit_events") - document = relationship("Document", back_populates="audit_events") + + # Indexes + __table_args__ = ( + Index('idx_audit_events_event_id', 'event_id'), + Index('idx_audit_events_user_id', 'user_id'), + Index('idx_audit_events_event_type', 'event_type'), + Index('idx_audit_events_event_category', 'event_category'), + Index('idx_audit_events_severity', 'severity'), + Index('idx_audit_events_timestamp', 'timestamp'), + Index('idx_audit_events_ip_address', 'ip_address'), + ) -class SystemMetrics(Base): - """System metrics model for monitoring and performance tracking""" +class SystemMetric(Base): + """System metrics model for monitoring""" __tablename__ = "system_metrics" id = Column(Integer, primary_key=True, index=True) - metric_id = Column(String(100), unique=True, index=True, nullable=False) - - # Metric details metric_name = Column(String(100), nullable=False) - metric_type = Column(String(50), nullable=False) # counter, gauge, histogram, summary - value = Column(Float, nullable=False) - labels = Column(JSON, nullable=True) # Metric labels + metric_value = Column(Float, nullable=False) + metric_unit = Column(String(20), nullable=True) # seconds, bytes, percent, etc. + + # Context + tags = Column(JSON, nullable=True) # Key-value pairs for filtering + source = Column(String(100), nullable=True) # system, application, agent, etc. # Timestamps timestamp = Column(DateTime(timezone=True), server_default=func.now()) - # Additional metadata - description = Column(Text, nullable=True) - unit = Column(String(20), nullable=True) # seconds, bytes, percentage, etc. + # Indexes + __table_args__ = ( + Index('idx_system_metrics_metric_name', 'metric_name'), + Index('idx_system_metrics_timestamp', 'timestamp'), + Index('idx_system_metrics_source', 'source'), + ) class WorkflowTemplate(Base): - """Workflow template model for predefined processing workflows""" + """Workflow template model for defining processing workflows""" __tablename__ = "workflow_templates" id = Column(Integer, primary_key=True, index=True) - template_id = Column(String(100), unique=True, index=True, nullable=False) - - # Template details - name = Column(String(100), nullable=False) + name = Column(String(100), unique=True, nullable=False) description = Column(Text, nullable=True) - version = Column(String(20), default="1.0.0") + version = Column(String(20), default="1.0") - # Workflow configuration - stages = Column(JSON, nullable=False) # Workflow stages configuration - agent_config = Column(JSON, nullable=True) # Agent-specific configuration - workflow_config = Column(JSON, nullable=True) # Workflow-specific configuration + # Workflow definition + stages = Column(JSON, nullable=False) # List of workflow stages + conditions = Column(JSON, nullable=True) # Conditional logic + settings = Column(JSON, nullable=True) # Workflow settings # Metadata is_active = Column(Boolean, default=True) created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column(DateTime(timezone=True), onupdate=func.now()) - # Foreign keys - created_by = Column(Integer, ForeignKey("users.id"), nullable=False) - - # Relationships - creator = relationship("User") + # Indexes + __table_args__ = ( + Index('idx_workflow_templates_name', 'name'), + Index('idx_workflow_templates_is_active', 'is_active'), + Index('idx_workflow_templates_created_at', 'created_at'), + ) class KnowledgeBase(Base): - """Knowledge base model for storing domain knowledge and rules""" + """Knowledge base model for storing AI knowledge""" __tablename__ = "knowledge_base" id = Column(Integer, primary_key=True, index=True) - kb_id = Column(String(100), unique=True, index=True, nullable=False) - - # Knowledge base details - name = Column(String(100), nullable=False) - description = Column(Text, nullable=True) - domain = Column(String(100), nullable=True) # legal, financial, healthcare, etc. + entry_id = Column(UUID(as_uuid=True), default=uuid.uuid4, unique=True, index=True) # Content + title = Column(String(255), nullable=False) content = Column(Text, nullable=False) - content_type = Column(String(50), nullable=False) # rule, guideline, policy, etc. - vector_embedding = Column(JSON, nullable=True) # Vector embedding for similarity search + content_type = Column(String(50), default="text") # text, qa, rule, etc. + + # Categorization + category = Column(String(100), nullable=True) + tags = Column(JSON, nullable=True) # List of tags # Metadata source = Column(String(255), nullable=True) - version = Column(String(20), default="1.0.0") + confidence = Column(Float, nullable=True) is_active = Column(Boolean, default=True) + + # Timestamps created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column(DateTime(timezone=True), onupdate=func.now()) - # Foreign keys - created_by = Column(Integer, ForeignKey("users.id"), nullable=False) - - # Relationships - creator = relationship("User") + # Indexes + __table_args__ = ( + Index('idx_knowledge_base_entry_id', 'entry_id'), + Index('idx_knowledge_base_title', 'title'), + Index('idx_knowledge_base_category', 'category'), + Index('idx_knowledge_base_content_type', 'content_type'), + Index('idx_knowledge_base_is_active', 'is_active'), + Index('idx_knowledge_base_created_at', 'created_at'), + ) class Notification(Base): - """Notification model for system notifications and alerts""" + """Notification model for user notifications""" __tablename__ = "notifications" id = Column(Integer, primary_key=True, index=True) - notification_id = Column(String(100), unique=True, index=True, nullable=False) + notification_id = Column(UUID(as_uuid=True), default=uuid.uuid4, unique=True, index=True) + + # Recipient + user_id = Column(Integer, ForeignKey('users.id'), nullable=False) # Notification details title = Column(String(255), nullable=False) message = Column(Text, nullable=False) - notification_type = Column(String(50), nullable=False) # info, warning, error, success - priority = Column(String(20), default="normal") # low, normal, high, urgent + notification_type = Column(String(50), default="info") # info, warning, error, success - # Delivery + # Status is_read = Column(Boolean, default=False) - is_sent = Column(Boolean, default=False) - sent_at = Column(DateTime(timezone=True), nullable=True) + read_at = Column(DateTime(timezone=True), nullable=True) + + # Metadata + data = Column(JSON, nullable=True) # Additional notification data + priority = Column(String(20), default="normal") # low, normal, high, urgent # Timestamps created_at = Column(DateTime(timezone=True), server_default=func.now()) - - # Foreign keys - user_id = Column(Integer, ForeignKey("users.id"), nullable=False) - document_id = Column(Integer, ForeignKey("documents.id"), nullable=True) + expires_at = Column(DateTime(timezone=True), nullable=True) # Relationships - user = relationship("User") - document = relationship("Document") + user = relationship("User", back_populates="notifications") + + # Indexes + __table_args__ = ( + Index('idx_notifications_notification_id', 'notification_id'), + Index('idx_notifications_user_id', 'user_id'), + Index('idx_notifications_is_read', 'is_read'), + Index('idx_notifications_notification_type', 'notification_type'), + Index('idx_notifications_priority', 'priority'), + Index('idx_notifications_created_at', 'created_at'), + ) class APILog(Base): - """API log model for tracking API usage and performance""" + """API log model for tracking API usage""" __tablename__ = "api_logs" id = Column(Integer, primary_key=True, index=True) + log_id = Column(UUID(as_uuid=True), default=uuid.uuid4, unique=True, index=True) # Request details - method = Column(String(10), nullable=False) - endpoint = Column(String(255), nullable=False) + method = Column(String(10), nullable=False) # GET, POST, PUT, DELETE, etc. + endpoint = Column(String(500), nullable=False) status_code = Column(Integer, nullable=False) + # User and session + user_id = Column(Integer, ForeignKey('users.id'), nullable=True) + ip_address = Column(String(45), nullable=True) + user_agent = Column(Text, nullable=True) + # Performance response_time = Column(Float, nullable=True) # Response time in seconds request_size = Column(Integer, nullable=True) # Request size in bytes response_size = Column(Integer, nullable=True) # Response size in bytes - # Request data - ip_address = Column(String(45), nullable=True) - user_agent = Column(Text, nullable=True) - request_headers = Column(JSON, nullable=True) - request_body = Column(Text, nullable=True) - response_body = Column(Text, nullable=True) + # Error handling + error_message = Column(Text, nullable=True) # Timestamps timestamp = Column(DateTime(timezone=True), server_default=func.now()) - # Foreign keys - user_id = Column(Integer, ForeignKey("users.id"), nullable=True) - - # Relationships - user = relationship("User") + # Indexes + __table_args__ = ( + Index('idx_api_logs_log_id', 'log_id'), + Index('idx_api_logs_user_id', 'user_id'), + Index('idx_api_logs_method', 'method'), + Index('idx_api_logs_endpoint', 'endpoint'), + Index('idx_api_logs_status_code', 'status_code'), + Index('idx_api_logs_timestamp', 'timestamp'), + Index('idx_api_logs_ip_address', 'ip_address'), + ) -class SystemConfiguration(Base): - """System configuration model for storing application settings""" - __tablename__ = "system_configurations" +class SystemConfig(Base): + """System configuration model""" + __tablename__ = "system_configs" id = Column(Integer, primary_key=True, index=True) - config_key = Column(String(100), unique=True, index=True, nullable=False) - - # Configuration details - config_value = Column(Text, nullable=False) - config_type = Column(String(50), nullable=False) # string, integer, float, boolean, json + key = Column(String(100), unique=True, nullable=False) + value = Column(Text, nullable=False) description = Column(Text, nullable=True) + category = Column(String(50), nullable=True) # security, performance, monitoring, etc. # Metadata - is_active = Column(Boolean, default=True) + is_encrypted = Column(Boolean, default=False) + is_sensitive = Column(Boolean, default=False) + + # Timestamps created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column(DateTime(timezone=True), onupdate=func.now()) - # Foreign keys - updated_by = Column(Integer, ForeignKey("users.id"), nullable=True) - - # Relationships - updater = relationship("User") + # Indexes + __table_args__ = ( + Index('idx_system_configs_key', 'key'), + Index('idx_system_configs_category', 'category'), + Index('idx_system_configs_created_at', 'created_at'), + ) diff --git a/backend/app/main.py b/backend/app/main.py index b92db74..a11e7a1 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,269 +1,356 @@ -import asyncio import os +import logging from contextlib import asynccontextmanager -from typing import Dict, Any - -from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks, UploadFile, File +from fastapi import FastAPI, HTTPException, Request from fastapi.middleware.cors import CORSMiddleware -from fastapi.middleware.trustedhost import TrustedHostMiddleware -from fastapi.responses import StreamingResponse, JSONResponse -from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials -import uvicorn -from sse_starlette.sse import EventSourceResponse +from fastapi.responses import JSONResponse +from fastapi.exceptions import RequestValidationError +from starlette.exceptions import HTTPException as StarletteHTTPException from .core.config import settings -from .core.security import get_current_user, create_access_token from .core.middleware import setup_middleware +from .core.monitoring import setup_monitoring, instrument_fastapi from .database.connection import init_database, check_database_connection -from .api.v1.endpoints import auth, agentic, documents, traces, qa, compare, audit, settings, memory, summarizer, translator, sentiment, agents from .services.agent_service import AgentService -from .services.memory_service import MemoryService -from .core.monitoring import setup_monitoring, instrument_fastapi -# Global agent service instance +# Configure logging +logging.basicConfig( + level=getattr(logging, settings.LOG_LEVEL), + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +# Global service instances agent_service = None + @asynccontextmanager async def lifespan(app: FastAPI): """Application lifespan manager""" global agent_service # Startup - print("πŸš€ Starting Smart Document Bot...") - - # Check database connection - if check_database_connection(): - print("βœ… Database connection verified") - # Initialize database tables - init_database() - print("βœ… Database initialized") - else: - print("⚠️ Database connection failed - using fallback mode") + logger.info("Starting AI Document Agent application...") - # Initialize agent service - agent_service = AgentService() - print("βœ… Agent service initialized") - - # Setup monitoring - setup_monitoring() - print("βœ… Monitoring setup complete") + try: + # Initialize database + logger.info("Initializing database...") + await init_database() + + # Check database connection + logger.info("Checking database connection...") + await check_database_connection() + + # Initialize agent service + logger.info("Initializing agent service...") + agent_service = AgentService() + await agent_service.initialize() + + # Setup monitoring + if settings.ENABLE_MONITORING: + logger.info("Setting up monitoring...") + setup_monitoring() + + logger.info("Application startup completed successfully") + + except Exception as e: + logger.error(f"Application startup failed: {e}") + raise yield # Shutdown - print("πŸ›‘ Shutting down Smart Document Bot...") - if agent_service: - await agent_service.cleanup_old_processing_history() - print("βœ… Cleanup complete") + logger.info("Shutting down AI Document Agent application...") + + try: + if agent_service: + await agent_service.cleanup() + logger.info("Application shutdown completed successfully") + except Exception as e: + logger.error(f"Application shutdown error: {e}") + -# Create FastAPI app +# Create FastAPI application app = FastAPI( - title="Smart Document Bot API", - description="AI-powered document processing and analysis system", - version="1.0.0", + title=settings.APP_NAME, + version=settings.APP_VERSION, + description="Enterprise-Grade AI Document Processing & Analysis Platform", + docs_url="/docs" if settings.DEBUG else None, + redoc_url="/redoc" if settings.DEBUG else None, lifespan=lifespan ) -# Security -security = HTTPBearer() - -# Add CORS middleware -app.add_middleware( - CORSMiddleware, - allow_origins=settings.ALLOWED_ORIGINS, - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - -# Setup custom middleware +# Setup middleware setup_middleware(app) -# Instrument FastAPI for monitoring -instrument_fastapi(app) +# Setup monitoring +if settings.ENABLE_MONITORING: + instrument_fastapi(app) -# Dependency to get agent service -def get_agent_service() -> AgentService: - """Get the global agent service instance""" - if agent_service is None: - raise RuntimeError("Agent service not initialized") - return agent_service -# Override dependency injection for endpoints -app.dependency_overrides[AgentService] = get_agent_service +# Global exception handlers +@app.exception_handler(StarletteHTTPException) +async def http_exception_handler(request: Request, exc: StarletteHTTPException): + """Handle HTTP exceptions""" + logger.error(f"HTTP Exception: {exc.status_code} - {exc.detail}") + return JSONResponse( + status_code=exc.status_code, + content={ + "error": exc.detail, + "status_code": exc.status_code, + "path": request.url.path + } + ) -# Include routers -app.include_router( - auth.router, - prefix="/api/v1/auth", - tags=["Authentication"] -) -app.include_router( - agentic.router, - prefix="/api/v1/agentic", - tags=["Agentic Processing"] -) +@app.exception_handler(RequestValidationError) +async def validation_exception_handler(request: Request, exc: RequestValidationError): + """Handle validation errors""" + logger.error(f"Validation Error: {exc.errors()}") + return JSONResponse( + status_code=422, + content={ + "error": "Validation error", + "details": exc.errors(), + "path": request.url.path + } + ) -app.include_router( - documents.router, - prefix="/api/v1/documents", - tags=["Documents"], - dependencies=[Depends(get_agent_service)] -) -app.include_router( - traces.router, - prefix="/api/v1/traces", - tags=["Agent Traces"], - dependencies=[Depends(get_agent_service)] -) +@app.exception_handler(Exception) +async def general_exception_handler(request: Request, exc: Exception): + """Handle general exceptions""" + logger.error(f"Unhandled Exception: {str(exc)}", exc_info=True) + return JSONResponse( + status_code=500, + content={ + "error": "Internal server error", + "message": "An unexpected error occurred", + "path": request.url.path + } + ) -app.include_router( - qa.router, - prefix="/api/v1/qa", - tags=["Question Answering"], - dependencies=[Depends(get_agent_service)] -) -app.include_router( - compare.router, - prefix="/api/v1/compare", - tags=["Document Comparison"], - dependencies=[Depends(get_agent_service)] -) +# Health check endpoints +@app.get("/health") +async def health_check(): + """Basic health check""" + try: + # Check database connection + await check_database_connection() + + # Check agent service + if agent_service: + service_status = await agent_service.get_status() + else: + service_status = "not_initialized" + + return { + "status": "healthy", + "timestamp": "2024-01-01T00:00:00Z", + "version": settings.APP_VERSION, + "database": "connected", + "agent_service": service_status + } + except Exception as e: + logger.error(f"Health check failed: {e}") + raise HTTPException(status_code=503, detail="Service unhealthy") -app.include_router( - audit.router, - prefix="/api/v1/audit", - tags=["Audit Trail"], - dependencies=[Depends(get_agent_service)] -) -app.include_router( - settings.router, - prefix="/api/v1/settings", - tags=["Settings"] -) +@app.get("/health/detailed") +async def detailed_health_check(): + """Detailed health check with component status""" + try: + health_status = { + "status": "healthy", + "timestamp": "2024-01-01T00:00:00Z", + "version": settings.APP_VERSION, + "components": {} + } + + # Database health + try: + await check_database_connection() + health_status["components"]["database"] = { + "status": "healthy", + "message": "Database connection successful" + } + except Exception as e: + health_status["components"]["database"] = { + "status": "unhealthy", + "message": f"Database connection failed: {str(e)}" + } + health_status["status"] = "degraded" + + # Agent service health + if agent_service: + try: + service_status = await agent_service.get_status() + health_status["components"]["agent_service"] = { + "status": "healthy", + "message": "Agent service operational", + "details": service_status + } + except Exception as e: + health_status["components"]["agent_service"] = { + "status": "unhealthy", + "message": f"Agent service failed: {str(e)}" + } + health_status["status"] = "degraded" + else: + health_status["components"]["agent_service"] = { + "status": "not_initialized", + "message": "Agent service not initialized" + } + + # Redis health + try: + import redis + redis_client = redis.Redis.from_url(settings.REDIS_URL, decode_responses=True) + redis_client.ping() + health_status["components"]["redis"] = { + "status": "healthy", + "message": "Redis connection successful" + } + except Exception as e: + health_status["components"]["redis"] = { + "status": "unhealthy", + "message": f"Redis connection failed: {str(e)}" + } + health_status["status"] = "degraded" + + return health_status + + except Exception as e: + logger.error(f"Detailed health check failed: {e}") + raise HTTPException(status_code=503, detail="Service unhealthy") -app.include_router( - memory.router, - prefix="/api/v1/memory", - tags=["Memory"] -) -app.include_router( - summarizer.router, - prefix="/api/v1/summarizer", - tags=["Document Summarization"], - dependencies=[Depends(get_agent_service)] -) +# Root endpoint +@app.get("/") +async def root(): + """Root endpoint with API information""" + return { + "name": settings.APP_NAME, + "version": settings.APP_VERSION, + "description": "Enterprise-Grade AI Document Processing & Analysis Platform", + "status": "operational", + "endpoints": { + "docs": "/docs", + "health": "/health", + "api": "/api/v1" + }, + "features": [ + "Multi-Agent AI Processing", + "Document Intelligence", + "Enterprise Security", + "Real-time Analytics", + "Compliance Monitoring" + ] + } -app.include_router( - translator.router, - prefix="/api/v1/translator", - tags=["Document Translation"], - dependencies=[Depends(get_agent_service)] -) -app.include_router( - sentiment.router, - prefix="/api/v1/sentiment", - tags=["Sentiment Analysis"], - dependencies=[Depends(get_agent_service)] -) +# API status endpoint +@app.get("/api/v1/status") +async def api_status(): + """API status endpoint""" + return { + "api_version": "v1", + "status": "operational", + "timestamp": "2024-01-01T00:00:00Z", + "endpoints": { + "auth": "/api/v1/auth", + "documents": "/api/v1/documents", + "agents": "/api/v1/agents", + "analytics": "/api/v1/analytics" + } + } + -app.include_router( - agents.router, - prefix="/api/v1/agents", - tags=["Agent Management"], - dependencies=[Depends(get_agent_service)] +# Include API routers +from .api.v1.endpoints import ( + auth, agentic, documents, traces, qa, compare, + audit, settings, memory, summarizer, translator, + sentiment, agents ) -# Health check endpoint -@app.get("/health") -async def health_check(): - """Health check endpoint""" - return { - "status": "healthy", - "version": "1.0.0", - "timestamp": "2024-01-01T00:00:00Z" - } +app.include_router(auth.router, prefix="/api/v1/auth", tags=["Authentication"]) +app.include_router(agentic.router, prefix="/api/v1/agentic", tags=["Agentic Processing"]) +app.include_router(documents.router, prefix="/api/v1/documents", tags=["Documents"]) +app.include_router(traces.router, prefix="/api/v1/traces", tags=["Agent Traces"]) +app.include_router(qa.router, prefix="/api/v1/qa", tags=["Question Answering"]) +app.include_router(compare.router, prefix="/api/v1/compare", tags=["Document Comparison"]) +app.include_router(audit.router, prefix="/api/v1/audit", tags=["Audit Trail"]) +app.include_router(settings.router, prefix="/api/v1/settings", tags=["Settings"]) +app.include_router(memory.router, prefix="/api/v1/memory", tags=["Memory Management"]) +app.include_router(summarizer.router, prefix="/api/v1/summarizer", tags=["Document Summarization"]) +app.include_router(translator.router, prefix="/api/v1/translator", tags=["Document Translation"]) +app.include_router(sentiment.router, prefix="/api/v1/sentiment", tags=["Sentiment Analysis"]) +app.include_router(agents.router, prefix="/api/v1/agents", tags=["Agent Management"]) -# Root endpoint -@app.get("/") -async def root(): - """Root endpoint""" - return { - "message": "Smart Document Bot API", - "version": "1.0.0", - "docs": "/docs", - "health": "/health" - } # Agent capabilities endpoint @app.get("/api/v1/agents/capabilities") -async def get_agent_capabilities(agent_service: AgentService = Depends(get_agent_service)): - """Get information about available agents and their capabilities""" +async def get_agent_capabilities(): + """Get all available agent capabilities""" return { - "agents": [ - { - "name": "OrchestratorAgent", - "description": "Coordinates the overall document processing workflow", - "capabilities": ["workflow_coordination", "task_scheduling", "error_handling"] + "agents": { + "orchestrator": { + "description": "Workflow orchestration and coordination", + "capabilities": ["workflow_planning", "execution_monitoring", "resource_allocation"] }, - { - "name": "IngestionAgent", - "description": "Handles document upload and initial processing", - "capabilities": ["file_upload", "format_detection", "preprocessing"] + "ingestion": { + "description": "Document ingestion and content extraction", + "capabilities": ["text_extraction", "metadata_extraction", "format_detection"] }, - { - "name": "ClassifierAgent", - "description": "Categorizes documents by type and content", - "capabilities": ["document_classification", "content_analysis", "metadata_extraction"] + "classifier": { + "description": "Document classification and categorization", + "capabilities": ["document_classification", "domain_detection", "content_categorization"] }, - { - "name": "EntityAgent", - "description": "Extracts key entities and information", - "capabilities": ["entity_extraction", "named_entity_recognition", "relationship_mapping"] + "entity": { + "description": "Named entity recognition and extraction", + "capabilities": ["entity_extraction", "relationship_mapping", "entity_linking"] }, - { - "name": "RiskAgent", - "description": "Assesses compliance risks and issues", - "capabilities": ["risk_assessment", "compliance_checking", "vulnerability_detection"] + "risk": { + "description": "Risk assessment and compliance monitoring", + "capabilities": ["risk_assessment", "compliance_checking", "policy_enforcement"] }, - { - "name": "QAAgent", - "description": "Provides intelligent Q&A capabilities", - "capabilities": ["question_answering", "context_understanding", "knowledge_retrieval"] + "qa": { + "description": "Question answering and document querying", + "capabilities": ["question_answering", "context_retrieval", "answer_generation"] }, - { - "name": "CompareAgent", - "description": "Compares documents for similarities and differences", - "capabilities": ["document_comparison", "similarity_analysis", "difference_detection"] + "compare": { + "description": "Document comparison and diff analysis", + "capabilities": ["document_comparison", "change_detection", "similarity_analysis"] }, - { - "name": "AuditAgent", - "description": "Monitors and logs all system activities", - "capabilities": ["activity_logging", "audit_trail", "compliance_monitoring"] + "audit": { + "description": "Audit logging and compliance tracking", + "capabilities": ["audit_logging", "compliance_tracking", "event_monitoring"] }, - { - "name": "SummarizerAgent", - "description": "Creates document summaries and insights", - "capabilities": ["document_summarization", "key_point_extraction", "insight_generation"] + "summarizer": { + "description": "Document summarization and key point extraction", + "capabilities": ["extractive_summarization", "abstractive_summarization", "key_point_extraction"] }, - { - "name": "TranslatorAgent", - "description": "Handles multi-language document processing", - "capabilities": ["language_translation", "multilingual_processing", "cultural_adaptation"] + "translator": { + "description": "Multi-language document translation", + "capabilities": ["language_detection", "document_translation", "quality_assessment"] }, - { - "name": "SentimentAnalysisAgent", - "description": "Analyzes document sentiment and tone", + "sentiment": { + "description": "Sentiment analysis and tone detection", "capabilities": ["sentiment_analysis", "tone_detection", "emotion_recognition"] } - ] + }, + "total_agents": 11, + "total_capabilities": 33 } + if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=8000) + import uvicorn + uvicorn.run( + "app.main:app", + host=settings.HOST, + port=settings.PORT, + reload=settings.DEBUG, + log_level=settings.LOG_LEVEL.lower() + ) diff --git a/backend/app/risk/policies/compliance.rego b/backend/app/risk/policies/compliance.rego new file mode 100644 index 0000000..80e6dbd --- /dev/null +++ b/backend/app/risk/policies/compliance.rego @@ -0,0 +1,110 @@ +package smart_doc_bot.compliance + +# GDPR Compliance Policies +gdpr_data_retention_allowed { + input.data_type == "personal_data" + input.retention_days <= 90 +} + +gdpr_data_processing_allowed { + input.purpose == "legitimate_interest" + input.consent_given == true +} + +gdpr_data_transfer_allowed { + input.destination == "EU" +} + +gdpr_data_transfer_allowed { + input.destination == "US" + input.adequacy_decision == true +} + +# HIPAA Compliance Policies +hipaa_phi_access_allowed { + input.user_role == "healthcare_provider" + input.purpose == "treatment" +} + +hipaa_phi_access_allowed { + input.user_role == "healthcare_provider" + input.purpose == "payment" +} + +hipaa_phi_access_allowed { + input.user_role == "healthcare_provider" + input.purpose == "healthcare_operations" +} + +hipaa_audit_required { + input.phi_accessed == true +} + +# SOX Compliance Policies +sox_financial_data_access_allowed { + input.user_role == "auditor" + input.purpose == "financial_audit" +} + +sox_financial_data_access_allowed { + input.user_role == "finance_manager" + input.purpose == "financial_reporting" +} + +sox_audit_trail_required { + input.financial_data_accessed == true +} + +# Data Classification Policies +data_classification_required { + input.data_type == "sensitive" + input.classification_level != "" +} + +data_encryption_required { + input.data_type == "sensitive" + input.encryption_enabled == true +} + +# Access Control Policies +access_control_required { + input.resource_type == "document" + input.user_has_permission == true +} + +access_control_required { + input.resource_type == "document" + input.user_owns_resource == true +} + +# Audit Policies +audit_logging_required { + input.action == "read" + input.resource_type == "sensitive" +} + +audit_logging_required { + input.action == "write" + input.resource_type == "sensitive" +} + +audit_logging_required { + input.action == "delete" + input.resource_type == "sensitive" +} + +# Default deny for sensitive operations +default sensitive_operation_allowed = false + +sensitive_operation_allowed { + input.operation == "data_export" + input.user_role == "admin" + input.audit_logged == true +} + +sensitive_operation_allowed { + input.operation == "data_deletion" + input.user_role == "admin" + input.confirmation_received == true + input.audit_logged == true +} diff --git a/backend/app/risk/policies/security.rego b/backend/app/risk/policies/security.rego new file mode 100644 index 0000000..a2ec517 --- /dev/null +++ b/backend/app/risk/policies/security.rego @@ -0,0 +1,87 @@ +package smart_doc_bot.security + +# Default deny +default allow = false + +# Allow access if user has required permissions +allow { + input.method == "GET" + input.path = ["api", "v1", "health"] +} + +allow { + input.method == "POST" + input.path = ["api", "v1", "auth", "login"] +} + +allow { + input.method == "POST" + input.path = ["api", "v1", "auth", "register"] +} + +# Document access policies +allow { + input.method == "GET" + input.path = ["api", "v1", "documents"] + has_permission(input.user, "documents:read") +} + +allow { + input.method == "POST" + input.path = ["api", "v1", "documents", "upload"] + has_permission(input.user, "documents:create") +} + +allow { + input.method == "DELETE" + input.path = ["api", "v1", "documents", "id"] + has_permission(input.user, "documents:delete") +} + +# Agent access policies +allow { + input.method == "POST" + input.path = ["api", "v1", "agents", "process"] + has_permission(input.user, "agents:execute") +} + +allow { + input.method == "GET" + input.path = ["api", "v1", "agents", "traces"] + has_permission(input.user, "agents:read") +} + +# Analytics access policies +allow { + input.method == "GET" + input.path = ["api", "v1", "analytics"] + has_permission(input.user, "analytics:read") +} + +# Admin access policies +allow { + input.method == "GET" + input.path = ["api", "v1", "admin"] + is_admin(input.user) +} + +# Helper functions +has_permission(user, permission) { + user.roles[_] == "admin" +} + +has_permission(user, permission) { + user.permissions[_] == permission +} + +has_permission(user, permission) { + user.permissions[_] == "*" +} + +is_admin(user) { + user.roles[_] == "admin" +} + +is_admin(user) { + user.is_superuser == true +} diff --git a/backend/app/services/agent_service.py b/backend/app/services/agent_service.py index 1562897..e064dce 100644 --- a/backend/app/services/agent_service.py +++ b/backend/app/services/agent_service.py @@ -1,7 +1,9 @@ import asyncio import uuid +import logging from typing import Any, Dict, List, Optional from datetime import datetime +from contextlib import asynccontextmanager from ..agents.orchestrator import OrchestratorAgent from ..agents.ingestion import IngestionAgent @@ -16,40 +18,112 @@ from ..agents.sentiment import SentimentAnalysisAgent from ..models.base import Document, AgentResult, AgentType from ..core.config import settings +from ..core.monitoring import get_monitor + +logger = logging.getLogger(__name__) class AgentService: """Service for managing agent execution and orchestration""" def __init__(self): - # Initialize the orchestrator agent - self.orchestrator = OrchestratorAgent(llm_model=settings.LLM_MODEL) - - # Initialize individual agents for direct access - self.ingestion_agent = IngestionAgent(llm_model=settings.LLM_MODEL) - self.classifier_agent = ClassifierAgent(llm_model=settings.LLM_MODEL) - self.entity_agent = EntityAgent(llm_model=settings.LLM_MODEL) - self.risk_agent = RiskAgent(llm_model=settings.LLM_MODEL) - self.qa_agent = QAAgent(llm_model=settings.LLM_MODEL) - self.compare_agent = CompareAgent(llm_model=settings.LLM_MODEL) - self.audit_agent = AuditAgent(llm_model=settings.LLM_MODEL) - self.summarizer_agent = SummarizerAgent(llm_model=settings.LLM_MODEL) - self.translator_agent = TranslatorAgent(llm_model=settings.LLM_MODEL) - self.sentiment_agent = SentimentAnalysisAgent(llm_model=settings.LLM_MODEL) - - # Processing history + self.monitor = get_monitor() self.processing_history = {} + self.agent_instances = {} + self.is_initialized = False + + # Agent mapping for easy access + self.agent_mapping = { + "orchestrator": None, + "ingestion": None, + "classifier": None, + "entity": None, + "risk": None, + "qa": None, + "compare": None, + "audit": None, + "summarizer": None, + "translator": None, + "sentiment": None + } + + async def initialize(self) -> None: + """Initialize all agents and services""" + try: + logger.info("Initializing AgentService...") + + # Initialize orchestrator agent + self.agent_mapping["orchestrator"] = OrchestratorAgent( + llm_model=settings.OPENAI_MODEL + ) + + # Initialize individual agents + self.agent_mapping["ingestion"] = IngestionAgent( + llm_model=settings.OPENAI_MODEL + ) + self.agent_mapping["classifier"] = ClassifierAgent( + llm_model=settings.OPENAI_MODEL + ) + self.agent_mapping["entity"] = EntityAgent( + llm_model=settings.OPENAI_MODEL + ) + self.agent_mapping["risk"] = RiskAgent( + llm_model=settings.OPENAI_MODEL + ) + self.agent_mapping["qa"] = QAAgent( + llm_model=settings.OPENAI_MODEL + ) + self.agent_mapping["compare"] = CompareAgent( + llm_model=settings.OPENAI_MODEL + ) + self.agent_mapping["audit"] = AuditAgent( + llm_model=settings.OPENAI_MODEL + ) + self.agent_mapping["summarizer"] = SummarizerAgent( + llm_model=settings.OPENAI_MODEL + ) + self.agent_mapping["translator"] = TranslatorAgent( + llm_model=settings.OPENAI_MODEL + ) + self.agent_mapping["sentiment"] = SentimentAnalysisAgent( + llm_model=settings.OPENAI_MODEL + ) + + # Store references for backward compatibility + self.orchestrator = self.agent_mapping["orchestrator"] + self.ingestion_agent = self.agent_mapping["ingestion"] + self.classifier_agent = self.agent_mapping["classifier"] + self.entity_agent = self.agent_mapping["entity"] + self.risk_agent = self.agent_mapping["risk"] + self.qa_agent = self.agent_mapping["qa"] + self.compare_agent = self.agent_mapping["compare"] + self.audit_agent = self.agent_mapping["audit"] + self.summarizer_agent = self.agent_mapping["summarizer"] + self.translator_agent = self.agent_mapping["translator"] + self.sentiment_agent = self.agent_mapping["sentiment"] + + self.is_initialized = True + logger.info("AgentService initialized successfully") + + except Exception as e: + logger.error(f"Failed to initialize AgentService: {e}") + raise async def process_document(self, document: Document, goal: str = "Analyze document for compliance and risks") -> Dict[str, Any]: """Process a document through the complete agent pipeline""" + if not self.is_initialized: + raise RuntimeError("AgentService not initialized. Call initialize() first.") + + processing_id = str(uuid.uuid4()) + start_time = datetime.utcnow() + try: - # Generate processing ID - processing_id = str(uuid.uuid4()) + logger.info(f"Starting document processing: {processing_id}") # Initialize processing history self.processing_history[processing_id] = { "processing_id": processing_id, - "start_time": datetime.utcnow().isoformat(), + "start_time": start_time.isoformat(), "document_id": getattr(document, 'id', 'unknown'), "goal": goal, "stages": [], @@ -60,79 +134,138 @@ async def process_document(self, document: Document, goal: str = "Analyze docume context = { "document": document, "processing_id": processing_id, - "goal": goal + "goal": goal, + "agent_service": self } - # Execute orchestration - orchestration_result = await self.orchestrator.run(goal, context) + # Execute orchestration with monitoring + with self.monitor.monitor_agent_execution("orchestrator", processing_id): + orchestration_result = await self.orchestrator.run(goal, context) + + # Calculate processing time + end_time = datetime.utcnow() + processing_duration = (end_time - start_time).total_seconds() # Update processing history self.processing_history[processing_id].update({ - "end_time": datetime.utcnow().isoformat(), + "end_time": end_time.isoformat(), + "duration": processing_duration, "status": "completed" if orchestration_result else "failed", "orchestration_result": orchestration_result.dict() if orchestration_result else None, "workflow_status": self.orchestrator.get_workflow_status() }) + # Record metrics + self.monitor.record_agent_execution( + agent_name="orchestrator", + duration=processing_duration, + success=bool(orchestration_result), + confidence=orchestration_result.confidence if orchestration_result else 0.0 + ) + + logger.info(f"Document processing completed: {processing_id} in {processing_duration:.2f}s") + return { "processing_id": processing_id, "status": "completed" if orchestration_result else "failed", "result": orchestration_result.output if orchestration_result else None, "confidence": orchestration_result.confidence if orchestration_result else 0.0, "rationale": orchestration_result.rationale if orchestration_result else "Processing failed", - "workflow_status": self.orchestrator.get_workflow_status() + "workflow_status": self.orchestrator.get_workflow_status(), + "duration": processing_duration } except Exception as e: + end_time = datetime.utcnow() + processing_duration = (end_time - start_time).total_seconds() + + logger.error(f"Document processing failed: {processing_id} - {e}") + # Update processing history with error if processing_id in self.processing_history: self.processing_history[processing_id].update({ - "end_time": datetime.utcnow().isoformat(), + "end_time": end_time.isoformat(), + "duration": processing_duration, "status": "failed", "error": str(e) }) + # Record error metrics + self.monitor.record_agent_execution( + agent_name="orchestrator", + duration=processing_duration, + success=False, + error=str(e) + ) + return { - "processing_id": processing_id if 'processing_id' in locals() else "unknown", + "processing_id": processing_id, "status": "failed", "error": str(e), "result": None, "confidence": 0.0, - "rationale": f"Processing failed: {str(e)}" + "rationale": f"Processing failed: {str(e)}", + "duration": processing_duration } async def execute_single_agent(self, agent_type: str, document: Document, goal: str) -> AgentResult: - """Execute a single agent""" + """Execute a single agent with monitoring and error handling""" + if not self.is_initialized: + raise RuntimeError("AgentService not initialized. Call initialize() first.") + + agent_type = agent_type.lower() + if agent_type not in self.agent_mapping: + raise ValueError(f"Unknown agent type: {agent_type}") + + agent = self.agent_mapping[agent_type] + if not agent: + raise RuntimeError(f"Agent {agent_type} not initialized") + + start_time = datetime.utcnow() + try: - # Map agent types to agent instances - agent_mapping = { - "ingestion": self.ingestion_agent, - "classifier": self.classifier_agent, - "entity": self.entity_agent, - "risk": self.risk_agent, - "qa": self.qa_agent, - "compare": self.compare_agent, - "audit": self.audit_agent, - "summarizer": self.summarizer_agent, - "translator": self.translator_agent, - "sentiment": self.sentiment_agent - } - - agent = agent_mapping.get(agent_type.lower()) - if not agent: - raise ValueError(f"Unknown agent type: {agent_type}") + logger.info(f"Executing agent: {agent_type}") # Prepare context context = { "document": document, - "goal": goal + "goal": goal, + "agent_service": self } - # Execute agent - result = await agent.run(goal, context) + # Execute agent with monitoring + with self.monitor.monitor_agent_execution(agent_type, f"{agent_type}_{document.id}"): + result = await agent.run(goal, context) + + # Calculate execution time + end_time = datetime.utcnow() + execution_duration = (end_time - start_time).total_seconds() + + # Record metrics + self.monitor.record_agent_execution( + agent_name=agent_type, + duration=execution_duration, + success=bool(result), + confidence=result.confidence if result else 0.0 + ) + + logger.info(f"Agent execution completed: {agent_type} in {execution_duration:.2f}s") return result except Exception as e: + end_time = datetime.utcnow() + execution_duration = (end_time - start_time).total_seconds() + + logger.error(f"Agent execution failed: {agent_type} - {e}") + + # Record error metrics + self.monitor.record_agent_execution( + agent_name=agent_type, + duration=execution_duration, + success=False, + error=str(e) + ) + return AgentResult( output=None, rationale=f"Agent execution failed: {str(e)}", @@ -142,88 +275,202 @@ async def execute_single_agent(self, agent_type: str, document: Document, goal: async def compare_documents(self, document_a: Document, document_b: Document, goal: str = "Compare documents for differences and risk changes") -> Dict[str, Any]: """Compare two documents using the compare agent""" + if not self.is_initialized: + raise RuntimeError("AgentService not initialized. Call initialize() first.") + + start_time = datetime.utcnow() + try: + logger.info("Starting document comparison") + # Prepare context for comparison context = { "document_a": document_a, "document_b": document_b, - "goal": goal + "goal": goal, + "agent_service": self } - # Execute comparison - comparison_result = await self.compare_agent.run(goal, context) + # Execute comparison with monitoring + with self.monitor.monitor_agent_execution("compare", f"compare_{document_a.id}_{document_b.id}"): + comparison_result = await self.compare_agent.run(goal, context) + + # Calculate execution time + end_time = datetime.utcnow() + execution_duration = (end_time - start_time).total_seconds() + + # Record metrics + self.monitor.record_agent_execution( + agent_name="compare", + duration=execution_duration, + success=bool(comparison_result), + confidence=comparison_result.confidence if comparison_result else 0.0 + ) + + logger.info(f"Document comparison completed in {execution_duration:.2f}s") return { "status": "completed" if comparison_result else "failed", "result": comparison_result.output if comparison_result else None, "confidence": comparison_result.confidence if comparison_result else 0.0, - "rationale": comparison_result.rationale if comparison_result else "Comparison failed" + "rationale": comparison_result.rationale if comparison_result else "Comparison failed", + "duration": execution_duration } except Exception as e: + end_time = datetime.utcnow() + execution_duration = (end_time - start_time).total_seconds() + + logger.error(f"Document comparison failed: {e}") + + # Record error metrics + self.monitor.record_agent_execution( + agent_name="compare", + duration=execution_duration, + success=False, + error=str(e) + ) + return { "status": "failed", "error": str(e), "result": None, "confidence": 0.0, - "rationale": f"Comparison failed: {str(e)}" + "rationale": f"Comparison failed: {str(e)}", + "duration": execution_duration } async def generate_audit_trail(self, document: Document, processing_history: List[Dict] = None) -> Dict[str, Any]: """Generate audit trail for a document""" + if not self.is_initialized: + raise RuntimeError("AgentService not initialized. Call initialize() first.") + + start_time = datetime.utcnow() + try: + logger.info(f"Generating audit trail for document: {document.id}") + # Prepare context for audit context = { "document": document, "processing_history": processing_history or [], - "goal": "Generate comprehensive audit trail" + "goal": "Generate comprehensive audit trail", + "agent_service": self } - # Execute audit - audit_result = await self.audit_agent.run("Generate audit trail", context) + # Execute audit with monitoring + with self.monitor.monitor_agent_execution("audit", f"audit_{document.id}"): + audit_result = await self.audit_agent.run("Generate audit trail", context) + + # Calculate execution time + end_time = datetime.utcnow() + execution_duration = (end_time - start_time).total_seconds() + + # Record metrics + self.monitor.record_agent_execution( + agent_name="audit", + duration=execution_duration, + success=bool(audit_result), + confidence=audit_result.confidence if audit_result else 0.0 + ) + + logger.info(f"Audit trail generation completed in {execution_duration:.2f}s") return { "status": "completed" if audit_result else "failed", "result": audit_result.output if audit_result else None, "confidence": audit_result.confidence if audit_result else 0.0, - "rationale": audit_result.rationale if audit_result else "Audit generation failed" + "rationale": audit_result.rationale if audit_result else "Audit generation failed", + "duration": execution_duration } except Exception as e: + end_time = datetime.utcnow() + execution_duration = (end_time - start_time).total_seconds() + + logger.error(f"Audit trail generation failed: {e}") + + # Record error metrics + self.monitor.record_agent_execution( + agent_name="audit", + duration=execution_duration, + success=False, + error=str(e) + ) + return { "status": "failed", "error": str(e), "result": None, "confidence": 0.0, - "rationale": f"Audit generation failed: {str(e)}" + "rationale": f"Audit generation failed: {str(e)}", + "duration": execution_duration } async def generate_qa(self, document: Document, goal: str = "Generate questions and answers about the document") -> Dict[str, Any]: """Generate questions and answers for a document""" + if not self.is_initialized: + raise RuntimeError("AgentService not initialized. Call initialize() first.") + + start_time = datetime.utcnow() + try: + logger.info(f"Generating QA for document: {document.id}") + # Prepare context for QA generation context = { "document": document, - "goal": goal + "goal": goal, + "agent_service": self } - # Execute QA generation - qa_result = await self.qa_agent.run(goal, context) + # Execute QA generation with monitoring + with self.monitor.monitor_agent_execution("qa", f"qa_{document.id}"): + qa_result = await self.qa_agent.run(goal, context) + + # Calculate execution time + end_time = datetime.utcnow() + execution_duration = (end_time - start_time).total_seconds() + + # Record metrics + self.monitor.record_agent_execution( + agent_name="qa", + duration=execution_duration, + success=bool(qa_result), + confidence=qa_result.confidence if qa_result else 0.0 + ) + + logger.info(f"QA generation completed in {execution_duration:.2f}s") return { "status": "completed" if qa_result else "failed", "result": qa_result.output if qa_result else None, "confidence": qa_result.confidence if qa_result else 0.0, - "rationale": qa_result.rationale if qa_result else "QA generation failed" + "rationale": qa_result.rationale if qa_result else "QA generation failed", + "duration": execution_duration } except Exception as e: + end_time = datetime.utcnow() + execution_duration = (end_time - start_time).total_seconds() + + logger.error(f"QA generation failed: {e}") + + # Record error metrics + self.monitor.record_agent_execution( + agent_name="qa", + duration=execution_duration, + success=False, + error=str(e) + ) + return { "status": "failed", "error": str(e), "result": None, "confidence": 0.0, - "rationale": f"QA generation failed: {str(e)}" + "rationale": f"QA generation failed: {str(e)}", + "duration": execution_duration } def get_processing_status(self, processing_id: str) -> Optional[Dict[str, Any]]: @@ -240,64 +487,90 @@ def get_agent_capabilities(self) -> Dict[str, Any]: "orchestrator": { "name": "OrchestratorAgent", "description": "Coordinates the complete document processing workflow", - "capabilities": ["Workflow planning", "Execution monitoring", "Stage coordination"] + "capabilities": ["Workflow planning", "Execution monitoring", "Stage coordination"], + "status": "initialized" if self.agent_mapping["orchestrator"] else "not_initialized" }, "ingestion": { "name": "IngestionAgent", "description": "Extracts and normalizes text from documents", - "capabilities": ["OCR", "PDF parsing", "Text normalization"] + "capabilities": ["OCR", "PDF parsing", "Text normalization"], + "status": "initialized" if self.agent_mapping["ingestion"] else "not_initialized" }, "classifier": { "name": "ClassifierAgent", "description": "Classifies documents and analyzes content structure", - "capabilities": ["Document classification", "Content analysis", "Domain identification"] + "capabilities": ["Document classification", "Content analysis", "Domain identification"], + "status": "initialized" if self.agent_mapping["classifier"] else "not_initialized" }, "entity": { "name": "EntityAgent", "description": "Extracts named entities and key information", - "capabilities": ["Named entity recognition", "Clause extraction", "Key information extraction"] + "capabilities": ["Named entity recognition", "Clause extraction", "Key information extraction"], + "status": "initialized" if self.agent_mapping["entity"] else "not_initialized" }, "risk": { "name": "RiskAgent", "description": "Assesses compliance, financial, and operational risks", - "capabilities": ["Compliance risk analysis", "Financial risk analysis", "Operational risk analysis"] + "capabilities": ["Compliance risk analysis", "Financial risk analysis", "Operational risk analysis"], + "status": "initialized" if self.agent_mapping["risk"] else "not_initialized" }, "qa": { "name": "QAAgent", "description": "Generates questions and answers about documents", - "capabilities": ["Factual question generation", "Compliance question generation", "Risk question generation"] + "capabilities": ["Factual question generation", "Compliance question generation", "Risk question generation"], + "status": "initialized" if self.agent_mapping["qa"] else "not_initialized" }, "compare": { "name": "CompareAgent", "description": "Compares documents for differences and changes", - "capabilities": ["Semantic comparison", "Structural comparison", "Compliance comparison"] + "capabilities": ["Semantic comparison", "Structural comparison", "Compliance comparison"], + "status": "initialized" if self.agent_mapping["compare"] else "not_initialized" }, "audit": { "name": "AuditAgent", "description": "Generates audit trails and compliance reports", - "capabilities": ["Audit trail generation", "Compliance reporting", "Audit bundle creation"] + "capabilities": ["Audit trail generation", "Compliance reporting", "Audit bundle creation"], + "status": "initialized" if self.agent_mapping["audit"] else "not_initialized" }, "summarizer": { "name": "SummarizerAgent", "description": "Generates comprehensive document summaries", - "capabilities": ["Extractive summarization", "Abstractive summarization", "Executive summaries", "Technical summaries", "Key points extraction"] + "capabilities": ["Extractive summarization", "Abstractive summarization", "Executive summaries", "Technical summaries", "Key points extraction"], + "status": "initialized" if self.agent_mapping["summarizer"] else "not_initialized" }, "translator": { "name": "TranslatorAgent", "description": "Translates documents between multiple languages", - "capabilities": ["Text translation", "Document translation", "Language detection", "Technical translation", "Cultural adaptation"] + "capabilities": ["Text translation", "Document translation", "Language detection", "Technical translation", "Cultural adaptation"], + "status": "initialized" if self.agent_mapping["translator"] else "not_initialized" }, "sentiment": { "name": "SentimentAnalysisAgent", "description": "Analyzes sentiment, tone, and emotional content", - "capabilities": ["Sentiment analysis", "Tone analysis", "Emotion detection", "Bias detection", "Sentiment tracking"] + "capabilities": ["Sentiment analysis", "Tone analysis", "Emotion detection", "Bias detection", "Sentiment tracking"], + "status": "initialized" if self.agent_mapping["sentiment"] else "not_initialized" } } def get_workflow_status(self) -> Dict[str, Any]: """Get current workflow status from orchestrator""" + if not self.orchestrator: + return {"status": "not_initialized"} return self.orchestrator.get_workflow_status() + async def get_status(self) -> Dict[str, Any]: + """Get comprehensive service status""" + return { + "initialized": self.is_initialized, + "agents": self.get_agent_capabilities(), + "processing_history_count": len(self.processing_history), + "workflow_status": self.get_workflow_status(), + "monitoring": { + "enabled": True, + "metrics_available": True + } + } + async def cleanup_old_processing_history(self, max_age_hours: int = 24): """Clean up old processing history""" cutoff_time = datetime.utcnow().timestamp() - (max_age_hours * 3600) @@ -310,3 +583,24 @@ async def cleanup_old_processing_history(self, max_age_hours: int = 24): for processing_id in to_remove: del self.processing_history[processing_id] + + logger.info(f"Cleaned up {len(to_remove)} old processing history entries") + + async def cleanup(self) -> None: + """Cleanup resources and connections""" + try: + logger.info("Cleaning up AgentService...") + + # Clean up processing history + await self.cleanup_old_processing_history() + + # Clear agent instances + self.agent_mapping.clear() + self.processing_history.clear() + + self.is_initialized = False + logger.info("AgentService cleanup completed") + + except Exception as e: + logger.error(f"AgentService cleanup failed: {e}") + raise diff --git a/backend/app/services/memory_service.py b/backend/app/services/memory_service.py index 44b70e3..404dcd3 100644 --- a/backend/app/services/memory_service.py +++ b/backend/app/services/memory_service.py @@ -1,5 +1,6 @@ import json import asyncio +import logging from typing import Dict, Any, List, Optional from datetime import datetime, timedelta import redis.asyncio as redis @@ -9,32 +10,71 @@ import uuid from ..core.config import settings +from ..core.monitoring import get_monitor + +logger = logging.getLogger(__name__) class MemoryService: """Service for managing shared memory (Redis + Vector DB)""" def __init__(self): + self.monitor = get_monitor() self.redis_client = None self.vector_store = None self.chroma_client = None - self._initialize_connections() + self.is_initialized = False + + # In-memory fallback storage + self._memory_storage = {} - def _initialize_connections(self): + async def initialize(self) -> None: """Initialize Redis and vector database connections""" try: + logger.info("Initializing MemoryService...") + # Initialize Redis - print("πŸ”— Initializing Redis connection...") + await self._initialize_redis() + + # Initialize ChromaDB + await self._initialize_chromadb() + + self.is_initialized = True + logger.info("MemoryService initialized successfully") + + except Exception as e: + logger.error(f"Failed to initialize MemoryService: {e}") + # Fallback to in-memory storage + self._setup_fallback_storage() + raise + + async def _initialize_redis(self) -> None: + """Initialize Redis connection""" + try: if settings.REDIS_URL: - self.redis_client = redis.from_url(settings.REDIS_URL) + logger.info("Initializing Redis connection...") + self.redis_client = redis.from_url( + settings.REDIS_URL, + decode_responses=True, + max_connections=settings.REDIS_MAX_CONNECTIONS + ) + # Test connection - asyncio.create_task(self._test_redis_connection()) + await self.redis_client.ping() + logger.info("Redis connection successful") else: - print("⚠️ Redis URL not configured, using in-memory storage") + logger.warning("Redis URL not configured, using in-memory storage") self.redis_client = None + + except Exception as e: + logger.error(f"Redis initialization failed: {e}") + self.redis_client = None + + async def _initialize_chromadb(self) -> None: + """Initialize ChromaDB connection""" + try: + logger.info("Initializing ChromaDB connection...") - # Initialize ChromaDB - print("πŸ”— Initializing ChromaDB connection...") if settings.CHROMA_PERSIST_DIRECTORY: self.chroma_client = chromadb.PersistentClient( path=settings.CHROMA_PERSIST_DIRECTORY, @@ -43,78 +83,104 @@ def _initialize_connections(self): allow_reset=True ) ) - # Get or create default collection - try: - self.vector_store = self.chroma_client.get_collection("documents") - except: - self.vector_store = self.chroma_client.create_collection("documents") else: - print("⚠️ ChromaDB path not configured, using in-memory storage") + logger.warning("ChromaDB path not configured, using in-memory storage") self.chroma_client = chromadb.Client() - self.vector_store = self.chroma_client.create_collection("documents") - print("βœ… Memory connections initialized") + # Get or create default collection + try: + self.vector_store = self.chroma_client.get_collection(settings.CHROMA_COLLECTION_NAME) + logger.info(f"Using existing ChromaDB collection: {settings.CHROMA_COLLECTION_NAME}") + except Exception: + self.vector_store = self.chroma_client.create_collection(settings.CHROMA_COLLECTION_NAME) + logger.info(f"Created new ChromaDB collection: {settings.CHROMA_COLLECTION_NAME}") except Exception as e: - print(f"⚠️ Memory initialization failed: {e}") - # Fallback to in-memory storage - self.redis_client = None - self.chroma_client = chromadb.Client() - self.vector_store = self.chroma_client.create_collection("documents") + logger.error(f"ChromaDB initialization failed: {e}") + self._setup_fallback_storage() - async def _test_redis_connection(self): - """Test Redis connection""" - try: - await self.redis_client.ping() - print("βœ… Redis connection successful") - except Exception as e: - print(f"❌ Redis connection failed: {e}") - self.redis_client = None + def _setup_fallback_storage(self) -> None: + """Setup fallback in-memory storage""" + logger.info("Setting up fallback in-memory storage") + self.chroma_client = chromadb.Client() + self.vector_store = self.chroma_client.create_collection("documents") async def store_short_term(self, key: str, data: Any, ttl: int = 3600) -> bool: """Store data in Redis (short-term memory)""" + if not self.is_initialized: + raise RuntimeError("MemoryService not initialized. Call initialize() first.") + try: if self.redis_client: serialized_data = json.dumps(data, default=str) await self.redis_client.setex(key, ttl, serialized_data) - print(f"πŸ“ Stored in Redis: {key} (TTL: {ttl}s)") + logger.debug(f"Stored in Redis: {key} (TTL: {ttl}s)") return True else: # Fallback to in-memory storage - print(f"πŸ“ Stored in memory: {key}") + self._memory_storage[key] = { + "data": data, + "expires_at": datetime.utcnow() + timedelta(seconds=ttl) + } + logger.debug(f"Stored in memory: {key}") return True except Exception as e: - print(f"❌ Failed to store in short-term memory: {e}") + logger.error(f"Failed to store in short-term memory: {e}") return False async def get_short_term(self, key: str) -> Optional[Any]: """Retrieve data from Redis (short-term memory)""" + if not self.is_initialized: + raise RuntimeError("MemoryService not initialized. Call initialize() first.") + try: if self.redis_client: data = await self.redis_client.get(key) if data: return json.loads(data) + else: + # Check in-memory storage + if key in self._memory_storage: + item = self._memory_storage[key] + if datetime.utcnow() < item["expires_at"]: + return item["data"] + else: + # Remove expired item + del self._memory_storage[key] + return None except Exception as e: - print(f"❌ Failed to retrieve from short-term memory: {e}") + logger.error(f"Failed to retrieve from short-term memory: {e}") return None async def delete_short_term(self, key: str) -> bool: """Delete data from Redis (short-term memory)""" + if not self.is_initialized: + raise RuntimeError("MemoryService not initialized. Call initialize() first.") + try: if self.redis_client: await self.redis_client.delete(key) - print(f"πŸ—‘οΈ Deleted from Redis: {key}") + logger.debug(f"Deleted from Redis: {key}") + else: + # Remove from in-memory storage + if key in self._memory_storage: + del self._memory_storage[key] + logger.debug(f"Deleted from memory: {key}") + return True except Exception as e: - print(f"❌ Failed to delete from short-term memory: {e}") + logger.error(f"Failed to delete from short-term memory: {e}") return False async def store_long_term(self, collection: str, documents: List[Dict[str, Any]], metadata: Dict[str, Any] = None) -> bool: """Store documents in vector database (long-term memory)""" + if not self.is_initialized: + raise RuntimeError("MemoryService not initialized. Call initialize() first.") + try: if not documents: return False @@ -138,7 +204,8 @@ async def store_long_term(self, collection: str, documents: List[Dict[str, Any]] doc_metadata = { "source": doc.get("source", "unknown"), "type": doc.get("type", "document"), - "created_at": datetime.now().isoformat(), + "collection": collection, + "created_at": datetime.utcnow().isoformat(), **(metadata or {}), **(doc.get("metadata", {})) } @@ -150,17 +217,28 @@ async def store_long_term(self, collection: str, documents: List[Dict[str, Any]] documents=texts, metadatas=metadatas ) - print(f"πŸ“š Stored {len(ids)} documents in ChromaDB: {collection}") + logger.info(f"Stored {len(ids)} documents in ChromaDB collection: {collection}") + + # Record metrics + self.monitor.record_performance_metric( + "documents_stored", + len(ids), + {"collection": collection} + ) + return True return False except Exception as e: - print(f"❌ Failed to store in long-term memory: {e}") + logger.error(f"Failed to store in long-term memory: {e}") return False async def search_long_term(self, query: str, collection: str = None, k: int = 5, filter_metadata: Dict[str, Any] = None) -> List[Dict[str, Any]]: """Search documents in vector database (long-term memory)""" + if not self.is_initialized: + raise RuntimeError("MemoryService not initialized. Call initialize() first.") + try: if not query.strip(): return [] @@ -183,11 +261,19 @@ async def search_long_term(self, query: str, collection: str = None, k: int = 5, "id": results['ids'][0][i] if results['ids'] and results['ids'][0] else None }) - print(f"πŸ” Searched ChromaDB for: '{query}' -> {len(formatted_results)} results") + logger.debug(f"Searched ChromaDB for: '{query}' -> {len(formatted_results)} results") + + # Record metrics + self.monitor.record_performance_metric( + "search_results", + len(formatted_results), + {"collection": collection or "default"} + ) + return formatted_results except Exception as e: - print(f"❌ Failed to search long-term memory: {e}") + logger.error(f"Failed to search long-term memory: {e}") return [] async def store_trace_context(self, trace_id: str, context: Dict[str, Any]) -> bool: @@ -203,12 +289,12 @@ async def store_agent_memory(self, agent_id: str, memory_data: Dict[str, Any], m try: # Create memory document memory_doc = { - "id": f"memory_{agent_id}_{datetime.now().timestamp()}", + "id": f"memory_{agent_id}_{datetime.utcnow().timestamp()}", "content": json.dumps(memory_data), "metadata": { "agent_id": agent_id, "memory_type": memory_type, - "timestamp": datetime.now().isoformat(), + "timestamp": datetime.utcnow().isoformat(), "data_hash": hashlib.md5(json.dumps(memory_data, sort_keys=True).encode()).hexdigest() } } @@ -216,7 +302,7 @@ async def store_agent_memory(self, agent_id: str, memory_data: Dict[str, Any], m return await self.store_long_term("agent_memories", [memory_doc]) except Exception as e: - print(f"❌ Failed to store agent memory: {e}") + logger.error(f"Failed to store agent memory: {e}") return False async def search_agent_memory(self, agent_id: str, query: str, memory_type: str = None, k: int = 5) -> List[Dict[str, Any]]: @@ -238,7 +324,7 @@ async def search_agent_memory(self, agent_id: str, query: str, memory_type: str return results except Exception as e: - print(f"❌ Failed to search agent memory: {e}") + logger.error(f"Failed to search agent memory: {e}") return [] async def store_document_embeddings(self, document_id: str, text_chunks: List[str], metadata: Dict[str, Any] = None) -> bool: @@ -260,7 +346,7 @@ async def store_document_embeddings(self, document_id: str, text_chunks: List[st return await self.store_long_term("document_embeddings", documents) except Exception as e: - print(f"❌ Failed to store document embeddings: {e}") + logger.error(f"Failed to store document embeddings: {e}") return False async def search_similar_documents(self, query: str, document_type: str = None, k: int = 5) -> List[Dict[str, Any]]: @@ -273,60 +359,89 @@ async def search_similar_documents(self, query: str, document_type: str = None, return await self.search_long_term(query, "document_embeddings", k, filter_metadata) except Exception as e: - print(f"❌ Failed to search similar documents: {e}") + logger.error(f"Failed to search similar documents: {e}") return [] - async def get_collection_stats(self, collection_name: str = "documents") -> Dict[str, Any]: - """Get statistics about a collection""" + async def get_collection_stats(self, collection_name: str = None) -> Dict[str, Any]: + """Get statistics about collections""" + if not self.is_initialized: + raise RuntimeError("MemoryService not initialized. Call initialize() first.") + try: + collection_name = collection_name or settings.CHROMA_COLLECTION_NAME + if self.vector_store: count = self.vector_store.count() return { "collection_name": collection_name, "document_count": count, - "status": "active" + "status": "active", + "timestamp": datetime.utcnow().isoformat() } + return { "collection_name": collection_name, "document_count": 0, - "status": "inactive" + "status": "inactive", + "timestamp": datetime.utcnow().isoformat() } except Exception as e: - print(f"❌ Failed to get collection stats: {e}") + logger.error(f"Failed to get collection stats: {e}") return { "collection_name": collection_name, "document_count": 0, "status": "error", - "error": str(e) + "error": str(e), + "timestamp": datetime.utcnow().isoformat() } async def cleanup_expired_data(self, max_age_hours: int = 24) -> int: """Clean up expired data from memory""" + if not self.is_initialized: + raise RuntimeError("MemoryService not initialized. Call initialize() first.") + try: cleaned_count = 0 + # Clean up in-memory storage + current_time = datetime.utcnow() + expired_keys = [] + + for key, item in self._memory_storage.items(): + if current_time > item["expires_at"]: + expired_keys.append(key) + + for key in expired_keys: + del self._memory_storage[key] + cleaned_count += 1 + # Note: ChromaDB doesn't have built-in TTL, so we'd need to implement # custom cleanup logic based on metadata timestamps # For now, we'll just clean up Redis data - if self.redis_client: - # This is a simplified cleanup - in production you'd want more sophisticated logic - print(f"🧹 Cleanup completed: {cleaned_count} items removed") - + logger.info(f"Cleanup completed: {cleaned_count} items removed") return cleaned_count except Exception as e: - print(f"❌ Failed to cleanup expired data: {e}") + logger.error(f"Failed to cleanup expired data: {e}") return 0 async def health_check(self) -> Dict[str, Any]: """Check health of memory services""" + if not self.is_initialized: + return { + "redis": "not_initialized", + "chromadb": "not_initialized", + "overall": "not_initialized" + } + try: health_status = { "redis": "unknown", "chromadb": "unknown", - "overall": "unknown" + "overall": "unknown", + "timestamp": datetime.utcnow().isoformat() } # Check Redis @@ -334,7 +449,8 @@ async def health_check(self) -> Dict[str, Any]: try: await self.redis_client.ping() health_status["redis"] = "healthy" - except: + except Exception as e: + logger.error(f"Redis health check failed: {e}") health_status["redis"] = "unhealthy" else: health_status["redis"] = "not_configured" @@ -344,7 +460,8 @@ async def health_check(self) -> Dict[str, Any]: try: self.vector_store.count() health_status["chromadb"] = "healthy" - except: + except Exception as e: + logger.error(f"ChromaDB health check failed: {e}") health_status["chromadb"] = "unhealthy" else: health_status["chromadb"] = "not_configured" @@ -360,9 +477,46 @@ async def health_check(self) -> Dict[str, Any]: return health_status except Exception as e: + logger.error(f"Memory service health check failed: {e}") return { "redis": "error", "chromadb": "error", "overall": "error", - "error": str(e) + "error": str(e), + "timestamp": datetime.utcnow().isoformat() } + + async def get_status(self) -> Dict[str, Any]: + """Get comprehensive service status""" + return { + "initialized": self.is_initialized, + "redis_configured": self.redis_client is not None, + "chromadb_configured": self.vector_store is not None, + "health": await self.health_check(), + "collections": await self.get_collection_stats(), + "memory_storage_size": len(self._memory_storage) + } + + async def cleanup(self) -> None: + """Cleanup resources and connections""" + try: + logger.info("Cleaning up MemoryService...") + + # Close Redis connection + if self.redis_client: + await self.redis_client.close() + logger.info("Redis connection closed") + + # Clear in-memory storage + self._memory_storage.clear() + + # Clear ChromaDB references + self.vector_store = None + self.chroma_client = None + + self.is_initialized = False + logger.info("MemoryService cleanup completed") + + except Exception as e: + logger.error(f"MemoryService cleanup failed: {e}") + raise diff --git a/backend/app/tasks/__init__.py b/backend/app/tasks/__init__.py new file mode 100644 index 0000000..e3140cc --- /dev/null +++ b/backend/app/tasks/__init__.py @@ -0,0 +1,40 @@ +""" +Celery Tasks Package for AI Document Agent +Handles distributed task processing for document analysis and AI operations +""" + +from .document_tasks import * +from .agent_tasks import * +from .analytics_tasks import * +from .maintenance_tasks import * + +__all__ = [ + # Document tasks + "process_document", + "extract_text", + "classify_document", + "extract_entities", + "assess_risk", + "compare_documents", + + # Agent tasks + "execute_agent", + "orchestrate_workflow", + "run_qa_agent", + "run_summarizer_agent", + "run_translator_agent", + "run_sentiment_agent", + + # Analytics tasks + "generate_daily_reports", + "update_system_metrics", + "analyze_performance", + "generate_insights", + + # Maintenance tasks + "cleanup_expired_documents", + "backup_database", + "cleanup_audit_logs", + "optimize_database", + "health_check", +] diff --git a/backend/app/tasks/document_tasks.py b/backend/app/tasks/document_tasks.py new file mode 100644 index 0000000..b537df5 --- /dev/null +++ b/backend/app/tasks/document_tasks.py @@ -0,0 +1,282 @@ +""" +Document Processing Tasks for AI Document Agent +Handles document upload, processing, and analysis tasks +""" + +import os +import logging +from typing import Dict, Any, Optional +from celery import current_task +from ..core.celery_config import celery_app +from ..services.agent_service import AgentService +from ..services.memory_service import MemoryService +from ..database.connection import get_db +from ..database.models import Document, ProcessingHistory +from ..core.config import settings + +logger = logging.getLogger(__name__) + +@celery_app.task(bind=True, max_retries=3) +def process_document(self, document_id: int, user_id: int) -> Dict[str, Any]: + """ + Process a document through the complete AI pipeline + + Args: + document_id: ID of the document to process + user_id: ID of the user who uploaded the document + + Returns: + Dictionary containing processing results + """ + try: + # Update task status + current_task.update_state( + state="PROGRESS", + meta={"current": 0, "total": 100, "status": "Starting document processing"} + ) + + # Get database session + db = next(get_db()) + + # Get document + document = db.query(Document).filter(Document.id == document_id).first() + if not document: + raise ValueError(f"Document {document_id} not found") + + # Initialize services + agent_service = AgentService() + memory_service = MemoryService() + + # Step 1: Extract text (10%) + current_task.update_state( + state="PROGRESS", + meta={"current": 10, "total": 100, "status": "Extracting text"} + ) + + text_result = extract_text.delay(document_id) + extracted_text = text_result.get(timeout=300) + + # Step 2: Classify document (20%) + current_task.update_state( + state="PROGRESS", + meta={"current": 20, "total": 100, "status": "Classifying document"} + ) + + classification_result = classify_document.delay(document_id, extracted_text) + classification = classification_result.get(timeout=300) + + # Step 3: Extract entities (40%) + current_task.update_state( + state="PROGRESS", + meta={"current": 40, "total": 100, "status": "Extracting entities"} + ) + + entities_result = extract_entities.delay(document_id, extracted_text) + entities = entities_result.get(timeout=300) + + # Step 4: Assess risk (60%) + current_task.update_state( + state="PROGRESS", + meta={"current": 60, "total": 100, "status": "Assessing risk"} + ) + + risk_result = assess_risk.delay(document_id, extracted_text, entities) + risk_assessment = risk_result.get(timeout=300) + + # Step 5: Store in vector database (80%) + current_task.update_state( + state="PROGRESS", + meta={"current": 80, "total": 100, "status": "Storing in vector database"} + ) + + # Store document in vector database + await memory_service.store_long_term( + f"doc_{document_id}", + extracted_text, + metadata={ + "document_id": document_id, + "user_id": user_id, + "classification": classification, + "entities": entities, + "risk_assessment": risk_assessment + } + ) + + # Step 6: Update document status (100%) + current_task.update_state( + state="PROGRESS", + meta={"current": 100, "total": 100, "status": "Completing processing"} + ) + + # Update document status + document.status = "processed" + document.processing_progress = 100 + db.commit() + + # Log processing history + history = ProcessingHistory( + document_id=document_id, + stage="complete", + status="success", + details={ + "classification": classification, + "entities_count": len(entities), + "risk_score": risk_assessment.get("risk_score", 0) + } + ) + db.add(history) + db.commit() + + return { + "document_id": document_id, + "status": "success", + "classification": classification, + "entities": entities, + "risk_assessment": risk_assessment, + "processing_time": current_task.request.execution_time + } + + except Exception as exc: + logger.error(f"Document processing failed: {exc}") + + # Update document status + if 'document' in locals(): + document.status = "failed" + document.processing_error = str(exc) + db.commit() + + # Retry with exponential backoff + if self.request.retries < self.max_retries: + countdown = 2 ** self.request.retries + raise self.retry(countdown=countdown, exc=exc) + else: + raise exc + +@celery_app.task(bind=True, max_retries=3) +def extract_text(self, document_id: int) -> str: + """Extract text from document""" + try: + db = next(get_db()) + document = db.query(Document).filter(Document.id == document_id).first() + + if not document: + raise ValueError(f"Document {document_id} not found") + + # Use ingestion agent to extract text + agent_service = AgentService() + result = await agent_service.ingestion_agent.run( + "Extract text from document", + {"document_path": document.file_path} + ) + + # Update document with extracted text + document.extracted_text = result.get("text", "") + db.commit() + + return document.extracted_text + + except Exception as exc: + logger.error(f"Text extraction failed: {exc}") + if self.request.retries < self.max_retries: + countdown = 2 ** self.request.retries + raise self.retry(countdown=countdown, exc=exc) + else: + raise exc + +@celery_app.task(bind=True, max_retries=3) +def classify_document(self, document_id: int, text: str) -> Dict[str, Any]: + """Classify document type and domain""" + try: + agent_service = AgentService() + result = await agent_service.classifier_agent.run( + "Classify document type and domain", + {"text": text} + ) + + return result + + except Exception as exc: + logger.error(f"Document classification failed: {exc}") + if self.request.retries < self.max_retries: + countdown = 2 ** self.request.retries + raise self.retry(countdown=countdown, exc=exc) + else: + raise exc + +@celery_app.task(bind=True, max_retries=3) +def extract_entities(self, document_id: int, text: str) -> Dict[str, Any]: + """Extract named entities from document""" + try: + agent_service = AgentService() + result = await agent_service.entity_agent.run( + "Extract named entities from text", + {"text": text} + ) + + return result + + except Exception as exc: + logger.error(f"Entity extraction failed: {exc}") + if self.request.retries < self.max_retries: + countdown = 2 ** self.request.retries + raise self.retry(countdown=countdown, exc=exc) + else: + raise exc + +@celery_app.task(bind=True, max_retries=3) +def assess_risk(self, document_id: int, text: str, entities: Dict[str, Any]) -> Dict[str, Any]: + """Assess document risk and compliance""" + try: + agent_service = AgentService() + result = await agent_service.risk_agent.run( + "Assess document risk and compliance", + {"text": text, "entities": entities} + ) + + return result + + except Exception as exc: + logger.error(f"Risk assessment failed: {exc}") + if self.request.retries < self.max_retries: + countdown = 2 ** self.request.retries + raise self.retry(countdown=countdown, exc=exc) + else: + raise exc + +@celery_app.task(bind=True, max_retries=3) +def compare_documents(self, document1_id: int, document2_id: int) -> Dict[str, Any]: + """Compare two documents for similarities and differences""" + try: + agent_service = AgentService() + result = await agent_service.compare_agent.run( + "Compare two documents", + {"document1_id": document1_id, "document2_id": document2_id} + ) + + return result + + except Exception as exc: + logger.error(f"Document comparison failed: {exc}") + if self.request.retries < self.max_retries: + countdown = 2 ** self.request.retries + raise self.retry(countdown=countdown, exc=exc) + else: + raise exc + +@celery_app.task +def process_pending_documents(): + """Process all pending documents in the queue""" + try: + db = next(get_db()) + pending_documents = db.query(Document).filter( + Document.status == "pending" + ).limit(10).all() + + for document in pending_documents: + process_document.delay(document.id, document.user_id) + + return {"processed": len(pending_documents)} + + except Exception as exc: + logger.error(f"Pending document processing failed: {exc}") + raise exc diff --git a/config/chroma_auth.json b/config/chroma_auth.json new file mode 100644 index 0000000..58d6a29 --- /dev/null +++ b/config/chroma_auth.json @@ -0,0 +1,4 @@ +{ + "admin": "$2b$12$LQv3c1yqBWVHxkd0LHAkCOYz6TtxMQJqhN8/LewdBPj4J/8JQHqGq", + "user1": "$2b$12$LQv3c1yqBWVHxkd0LHAkCOYz6TtxMQJqhN8/LewdBPj4J/8JQHqGq" +} diff --git a/env.example b/env.example new file mode 100644 index 0000000..b385022 --- /dev/null +++ b/env.example @@ -0,0 +1,203 @@ +# AI Document Agent - Environment Configuration Template +# Copy this file to .env and configure your values + +# ============================================================================= +# CORE APPLICATION SETTINGS +# ============================================================================= +APP_NAME=AI Document Agent +APP_VERSION=1.0.0 +DEBUG=false +LOG_LEVEL=INFO +ENVIRONMENT=production + +# Server Configuration +HOST=0.0.0.0 +PORT=8000 +WORKER_PROCESSES=4 +MAX_CONCURRENT_REQUESTS=100 + +# ============================================================================= +# SECURITY SETTINGS +# ============================================================================= +SECRET_KEY=your-super-secret-key-change-in-production +ALGORITHM=HS256 +ACCESS_TOKEN_EXPIRE_MINUTES=30 +REFRESH_TOKEN_EXPIRE_DAYS=7 + +# JWT Settings +JWT_SECRET_KEY=your-jwt-secret-key-change-in-production +JWT_ALGORITHM=HS256 +JWT_ACCESS_TOKEN_EXPIRE_MINUTES=30 +JWT_REFRESH_TOKEN_EXPIRE_DAYS=7 + +# Security Headers +ENABLE_CORS=true +ALLOWED_ORIGINS=http://localhost:3000,http://localhost:8000 +ALLOWED_METHODS=GET,POST,PUT,DELETE,OPTIONS +ALLOWED_HEADERS=* + +# Rate Limiting +RATE_LIMIT_REQUESTS=1000 +RATE_LIMIT_WINDOW=3600 +ENABLE_RATE_LIMITING=true + +# ============================================================================= +# DATABASE CONFIGURATION +# ============================================================================= +DATABASE_URL=postgresql://postgres:password@localhost:5432/smart_doc_bot +DATABASE_POOL_SIZE=10 +DATABASE_MAX_OVERFLOW=20 +DATABASE_POOL_TIMEOUT=30 +DATABASE_POOL_RECYCLE=3600 + +# Database Migration +ALEMBIC_CONFIG=alembic.ini +MIGRATION_AUTO_UPGRADE=true + +# ============================================================================= +# REDIS CONFIGURATION +# ============================================================================= +REDIS_URL=redis://localhost:6379/0 +REDIS_MAX_CONNECTIONS=10 +REDIS_PASSWORD= +REDIS_DB=0 + +# ============================================================================= +# CHROMADB VECTOR DATABASE +# ============================================================================= +CHROMA_PERSIST_DIRECTORY=./chroma_db +CHROMA_COLLECTION_NAME=documents +CHROMA_SERVER_HOST=localhost +CHROMA_SERVER_PORT=8001 + +# ============================================================================= +# AI/ML SERVICES +# ============================================================================= +OPENAI_API_KEY=your-openai-api-key +OPENAI_MODEL=gpt-4 +OPENAI_MAX_TOKENS=4000 +OPENAI_TEMPERATURE=0.1 + +ANTHROPIC_API_KEY=your-anthropic-api-key +ANTHROPIC_MODEL=claude-3-sonnet-20240229 + +# Agent Configuration +AGENT_TIMEOUT=300 +AGENT_MAX_RETRIES=3 +AGENT_CONCURRENT_LIMIT=10 +AGENT_CONFIDENCE_THRESHOLD=0.8 + +# ============================================================================= +# FILE STORAGE +# ============================================================================= +UPLOAD_DIR=./uploads +MAX_FILE_SIZE=104857600 +ALLOWED_FILE_TYPES=.pdf,.docx,.txt,.csv,.xlsx,.pptx,.doc,.rtf +ENABLE_VIRUS_SCAN=true + +# ============================================================================= +# MONITORING & OBSERVABILITY +# ============================================================================= +ENABLE_MONITORING=true +PROMETHEUS_PORT=9090 +GRAFANA_PORT=3001 +ELASTICSEARCH_ENABLED=true +JAEGER_ENABLED=true + +# Prometheus Configuration +PROMETHEUS_ENABLED=true +PROMETHEUS_METRICS_PATH=/metrics +PROMETHEUS_PUSHGATEWAY=http://localhost:9091 + +# Grafana Configuration +GRAFANA_ENABLED=true +GRAFANA_ADMIN_USER=admin +GRAFANA_ADMIN_PASSWORD=admin + +# Elasticsearch Configuration +ELASTICSEARCH_URL=http://localhost:9200 +ELASTICSEARCH_INDEX_PREFIX=smart-doc-bot +ELASTICSEARCH_USERNAME= +ELASTICSEARCH_PASSWORD= + +# Jaeger Configuration +JAEGER_AGENT_HOST=localhost +JAEGER_AGENT_PORT=6831 +JAEGER_COLLECTOR_URL=http://localhost:14268/api/traces + +# ============================================================================= +# AUDIT & COMPLIANCE +# ============================================================================= +AUDIT_LOG_ENABLED=true +AUDIT_LOG_RETENTION_DAYS=90 +COMPLIANCE_SCAN_ENABLED=true +PII_REDACTION_ENABLED=true + +# Compliance Frameworks +ENABLE_GDPR_COMPLIANCE=true +ENABLE_HIPAA_COMPLIANCE=true +ENABLE_SOX_COMPLIANCE=true + +# ============================================================================= +# EMAIL & NOTIFICATIONS +# ============================================================================= +SMTP_HOST=smtp.gmail.com +SMTP_PORT=587 +SMTP_USERNAME=your-email@gmail.com +SMTP_PASSWORD=your-app-password +SMTP_USE_TLS=true +SMTP_USE_SSL=false + +# Email Templates +EMAIL_FROM=noreply@smartdocbot.com +EMAIL_REPLY_TO=support@smartdocbot.com + +# ============================================================================= +# BACKUP & RECOVERY +# ============================================================================= +BACKUP_ENABLED=true +BACKUP_RETENTION_DAYS=30 +BACKUP_SCHEDULE=0 2 * * * +BACKUP_STORAGE_PATH=./backups + +# ============================================================================= +# PERFORMANCE & CACHING +# ============================================================================= +ENABLE_CACHING=true +CACHE_TTL=3600 +CACHE_MAX_SIZE=1000 + +# Session Management +SESSION_SECRET_KEY=your-session-secret-key +SESSION_TTL=3600 +SESSION_COOKIE_SECURE=true + +# ============================================================================= +# FEATURE FLAGS +# ============================================================================= +ENABLE_WEBSOCKETS=true +ENABLE_SSE=true +ENABLE_REAL_TIME_UPDATES=true +ENABLE_DOCUMENT_COMPARISON=true +ENABLE_QA_FEATURE=true +ENABLE_AUDIT_TRAIL=true +ENABLE_ANALYTICS=true + +# ============================================================================= +# INTEGRATION SETTINGS +# ============================================================================= +# External API Keys (if needed) +GOOGLE_CLOUD_API_KEY= +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_REGION=us-east-1 +AWS_S3_BUCKET=smart-doc-bot-uploads + +# ============================================================================= +# DEVELOPMENT SETTINGS +# ============================================================================= +# Only set these in development +# DEBUG=true +# LOG_LEVEL=DEBUG +# ENABLE_MONITORING=false +# ENABLE_RATE_LIMITING=false diff --git a/k8s/configmaps.yaml b/k8s/configmaps.yaml new file mode 100644 index 0000000..161ecc1 --- /dev/null +++ b/k8s/configmaps.yaml @@ -0,0 +1,113 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: ai-document-agent-config + namespace: ai-document-agent +data: + # Application Configuration + APP_NAME: "AI Document Agent" + APP_VERSION: "1.0.0" + DEBUG: "false" + LOG_LEVEL: "INFO" + ENVIRONMENT: "production" + + # Server Configuration + HOST: "0.0.0.0" + PORT: "8000" + WORKER_PROCESSES: "4" + MAX_CONCURRENT_REQUESTS: "100" + + # Database Configuration + DATABASE_URL: "postgresql://postgres:$(DATABASE_PASSWORD)@postgres:5432/smart_doc_bot" + DATABASE_POOL_SIZE: "10" + DATABASE_MAX_OVERFLOW: "20" + + # Redis Configuration + REDIS_URL: "redis://redis:6379/0" + REDIS_MAX_CONNECTIONS: "10" + + # ChromaDB Configuration + CHROMA_PERSIST_DIRECTORY: "/app/chroma_db" + CHROMA_COLLECTION_NAME: "documents" + + # AI/ML Configuration + OPENAI_MODEL: "gpt-4" + OPENAI_MAX_TOKENS: "4000" + OPENAI_TEMPERATURE: "0.1" + AGENT_TIMEOUT: "300" + AGENT_MAX_RETRIES: "3" + AGENT_CONCURRENT_LIMIT: "10" + + # Security Configuration + ALGORITHM: "HS256" + ACCESS_TOKEN_EXPIRE_MINUTES: "30" + RATE_LIMIT_REQUESTS: "1000" + RATE_LIMIT_WINDOW: "3600" + + # Monitoring Configuration + ENABLE_MONITORING: "true" + PROMETHEUS_PORT: "9090" + GRAFANA_PORT: "3001" + ELASTICSEARCH_ENABLED: "true" + JAEGER_ENABLED: "true" + + # Audit Configuration + AUDIT_LOG_ENABLED: "true" + AUDIT_LOG_RETENTION_DAYS: "90" + COMPLIANCE_SCAN_ENABLED: "true" + PII_REDACTION_ENABLED: "true" + + # Feature Flags + ENABLE_WEBSOCKETS: "true" + ENABLE_SSE: "true" + ENABLE_REAL_TIME_UPDATES: "true" + ENABLE_DOCUMENT_COMPARISON: "true" + ENABLE_QA_FEATURE: "true" + ENABLE_AUDIT_TRAIL: "true" + ENABLE_ANALYTICS: "true" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ai-document-agent-nginx-config + namespace: ai-document-agent +data: + nginx.conf: | + events { + worker_connections 1024; + } + + http { + upstream backend { + server backend:8000; + } + + upstream frontend { + server frontend:3000; + } + + server { + listen 80; + server_name localhost; + + location / { + proxy_pass http://frontend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /api/ { + proxy_pass http://backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /health { + proxy_pass http://backend/health; + } + } + } diff --git a/k8s/deployments.yaml b/k8s/deployments.yaml new file mode 100644 index 0000000..92d1609 --- /dev/null +++ b/k8s/deployments.yaml @@ -0,0 +1,288 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: backend + namespace: ai-document-agent + labels: + app: smart-doc-bot + component: backend +spec: + replicas: 3 + selector: + matchLabels: + app: smart-doc-bot + component: backend + template: + metadata: + labels: + app: smart-doc-bot + component: backend + spec: + containers: + - name: backend + image: smart-doc-bot-backend:latest + ports: + - containerPort: 8000 + env: + - name: SECRET_KEY + valueFrom: + secretKeyRef: + name: ai-document-agent-secrets + key: secret-key + - name: JWT_SECRET_KEY + valueFrom: + secretKeyRef: + name: ai-document-agent-secrets + key: jwt-secret-key + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: ai-document-agent-secrets + key: openai-api-key + - name: ANTHROPIC_API_KEY + valueFrom: + secretKeyRef: + name: ai-document-agent-secrets + key: anthropic-api-key + - name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + name: ai-document-agent-secrets + key: database-password + envFrom: + - configMapRef: + name: ai-document-agent-config + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" + livenessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 5 + volumeMounts: + - name: uploads + mountPath: /app/uploads + - name: chroma-db + mountPath: /app/chroma_db + - name: logs + mountPath: /app/logs + volumes: + - name: uploads + persistentVolumeClaim: + claimName: uploads-pvc + - name: chroma-db + persistentVolumeClaim: + claimName: chroma-db-pvc + - name: logs + persistentVolumeClaim: + claimName: logs-pvc +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: frontend + namespace: ai-document-agent + labels: + app: smart-doc-bot + component: frontend +spec: + replicas: 2 + selector: + matchLabels: + app: smart-doc-bot + component: frontend + template: + metadata: + labels: + app: smart-doc-bot + component: frontend + spec: + containers: + - name: frontend + image: smart-doc-bot-frontend:latest + ports: + - containerPort: 3000 + env: + - name: REACT_APP_API_URL + value: "https://api.smartdocbot.com" + - name: REACT_APP_ENVIRONMENT + value: "production" + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "200m" + livenessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 5 + periodSeconds: 5 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres + namespace: ai-document-agent + labels: + app: smart-doc-bot + component: database +spec: + replicas: 1 + selector: + matchLabels: + app: smart-doc-bot + component: database + template: + metadata: + labels: + app: smart-doc-bot + component: database + spec: + containers: + - name: postgres + image: postgres:15-alpine + ports: + - containerPort: 5432 + env: + - name: POSTGRES_DB + value: "smart_doc_bot" + - name: POSTGRES_USER + value: "postgres" + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: ai-document-agent-secrets + key: database-password + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "1000m" + volumeMounts: + - name: postgres-data + mountPath: /var/lib/postgresql/data + - name: init-script + mountPath: /docker-entrypoint-initdb.d + volumes: + - name: postgres-data + persistentVolumeClaim: + claimName: postgres-pvc + - name: init-script + configMapRef: + name: ai-document-agent-init-db +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + namespace: ai-document-agent + labels: + app: smart-doc-bot + component: cache +spec: + replicas: 1 + selector: + matchLabels: + app: smart-doc-bot + component: cache + template: + metadata: + labels: + app: smart-doc-bot + component: cache + spec: + containers: + - name: redis + image: redis:7-alpine + ports: + - containerPort: 6379 + command: + - redis-server + - --appendonly + - yes + - --maxmemory + - 256mb + - --maxmemory-policy + - allkeys-lru + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "200m" + volumeMounts: + - name: redis-data + mountPath: /data + volumes: + - name: redis-data + persistentVolumeClaim: + claimName: redis-pvc +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx + namespace: ai-document-agent + labels: + app: smart-doc-bot + component: ingress +spec: + replicas: 2 + selector: + matchLabels: + app: smart-doc-bot + component: ingress + template: + metadata: + labels: + app: smart-doc-bot + component: ingress + spec: + containers: + - name: nginx + image: nginx:alpine + ports: + - containerPort: 80 + - containerPort: 443 + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + - name: nginx-ssl + mountPath: /etc/nginx/ssl + - name: nginx-logs + mountPath: /var/log/nginx + volumes: + - name: nginx-config + configMapRef: + name: ai-document-agent-nginx-config + - name: nginx-ssl + secretRef: + name: ai-document-agent-tls + - name: nginx-logs + persistentVolumeClaim: + claimName: logs-pvc diff --git a/k8s/namespace.yaml b/k8s/namespace.yaml new file mode 100644 index 0000000..7821cc5 --- /dev/null +++ b/k8s/namespace.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: ai-document-agent + labels: + name: ai-document-agent + app: smart-doc-bot + environment: production + annotations: + description: "AI Document Agent - Enterprise Document Intelligence Platform" + owner: "ai-document-agent-team" + cost-center: "engineering" diff --git a/k8s/secrets.yaml b/k8s/secrets.yaml new file mode 100644 index 0000000..514d7c4 --- /dev/null +++ b/k8s/secrets.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: Secret +metadata: + name: ai-document-agent-secrets + namespace: ai-document-agent +type: Opaque +data: + # Base64 encoded secrets - replace with actual values + secret-key: eW91ci1zdXBlci1zZWNyZXQta2V5LWNoYW5nZS1pbi1wcm9kdWN0aW9u + jwt-secret-key: eW91ci1qd3Qtc2VjcmV0LWtleS1jaGFuZ2UtaW4tcHJvZHVjdGlvbg== + openai-api-key: eW91ci1vcGVuYWktYXBpLWtleQ== + anthropic-api-key: eW91ci1hbnRocm9waWMtYXBpLWtleQ== + database-password: cGFzc3dvcmQ= + redis-password: + smtp-password: eW91ci1zbXRwLXBhc3N3b3Jk +--- +apiVersion: v1 +kind: Secret +metadata: + name: ai-document-agent-tls + namespace: ai-document-agent +type: kubernetes.io/tls +data: + # Base64 encoded TLS certificate and key - replace with actual values + tls.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCg== + tls.key: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCg== diff --git a/model/summarizer.py b/model/summarizer.py index 7418477..4687fa8 100644 --- a/model/summarizer.py +++ b/model/summarizer.py @@ -1,9 +1,356 @@ -def summarize_text(text: str) -> str: +import os +import re +from typing import Dict, List, Optional, Any +from openai import OpenAI +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.chains.summarize import load_summarize_chain +from langchain.chat_models import ChatOpenAI +from langchain.schema import Document + +class AdvancedSummarizer: + """Advanced AI-powered document summarization using OpenAI GPT models""" + + def __init__(self, model_name: str = "gpt-4", max_tokens: int = 1000): + self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + self.model_name = model_name + self.max_tokens = max_tokens + self.text_splitter = RecursiveCharacterTextSplitter( + chunk_size=4000, + chunk_overlap=200, + length_function=len, + ) + + def summarize_text(self, text: str, summary_type: str = "extractive", + max_length: int = 500) -> Dict[str, Any]: + """ + Advanced text summarization with multiple strategies + + Args: + text: Input text to summarize + summary_type: Type of summarization (extractive, abstractive, bullet_points, executive) + max_length: Maximum length of summary + + Returns: + Dictionary containing summary and metadata + """ + if not text or len(text.strip()) < 100: + return { + "summary": text, + "type": summary_type, + "confidence": 1.0, + "metadata": {"original_length": len(text), "summary_length": len(text)} + } + + try: + if summary_type == "extractive": + return self._extractive_summarization(text, max_length) + elif summary_type == "abstractive": + return self._abstractive_summarization(text, max_length) + elif summary_type == "bullet_points": + return self._bullet_point_summarization(text, max_length) + elif summary_type == "executive": + return self._executive_summarization(text, max_length) + else: + return self._abstractive_summarization(text, max_length) + + except Exception as e: + # Fallback to basic summarization + return self._fallback_summarization(text, max_length) + + def _extractive_summarization(self, text: str, max_length: int) -> Dict[str, Any]: + """Extractive summarization using key sentence extraction""" + try: + # Split text into sentences + sentences = re.split(r'(?<=[.!?])\s+', text.strip()) + + # Use OpenAI to score and select key sentences + prompt = f""" + Analyze the following text and identify the {max_length//50} most important sentences that best summarize the content. + Focus on sentences that contain key information, main points, and conclusions. + + Text: + {text} + + Return only the selected sentences in order, separated by newlines. + """ + + response = self.client.chat.completions.create( + model=self.model_name, + messages=[{"role": "user", "content": prompt}], + max_tokens=self.max_tokens, + temperature=0.1 + ) + + summary = response.choices[0].message.content.strip() + + return { + "summary": summary, + "type": "extractive", + "confidence": 0.9, + "metadata": { + "original_length": len(text), + "summary_length": len(summary), + "compression_ratio": len(summary) / len(text) if len(text) > 0 else 0 + } + } + + except Exception as e: + return self._fallback_summarization(text, max_length) + + def _abstractive_summarization(self, text: str, max_length: int) -> Dict[str, Any]: + """Abstractive summarization using GPT models""" + try: + # For long texts, use chunking + if len(text) > 8000: + return self._chunked_summarization(text, max_length) + + prompt = f""" + Create a comprehensive summary of the following text in approximately {max_length} characters. + The summary should capture the main points, key insights, and conclusions. + Write in a clear, professional tone. + + Text: + {text} + + Summary: + """ + + response = self.client.chat.completions.create( + model=self.model_name, + messages=[{"role": "user", "content": prompt}], + max_tokens=self.max_tokens, + temperature=0.3 + ) + + summary = response.choices[0].message.content.strip() + + return { + "summary": summary, + "type": "abstractive", + "confidence": 0.95, + "metadata": { + "original_length": len(text), + "summary_length": len(summary), + "compression_ratio": len(summary) / len(text) if len(text) > 0 else 0 + } + } + + except Exception as e: + return self._fallback_summarization(text, max_length) + + def _bullet_point_summarization(self, text: str, max_length: int) -> Dict[str, Any]: + """Bullet point summarization""" + try: + prompt = f""" + Create a bullet-point summary of the following text with key points and insights. + Use clear, concise bullet points that capture the main information. + Limit to approximately {max_length} characters total. + + Text: + {text} + + Bullet Point Summary: + """ + + response = self.client.chat.completions.create( + model=self.model_name, + messages=[{"role": "user", "content": prompt}], + max_tokens=self.max_tokens, + temperature=0.2 + ) + + summary = response.choices[0].message.content.strip() + + return { + "summary": summary, + "type": "bullet_points", + "confidence": 0.9, + "metadata": { + "original_length": len(text), + "summary_length": len(summary), + "compression_ratio": len(summary) / len(text) if len(text) > 0 else 0 + } + } + + except Exception as e: + return self._fallback_summarization(text, max_length) + + def _executive_summarization(self, text: str, max_length: int) -> Dict[str, Any]: + """Executive summary for business documents""" + try: + prompt = f""" + Create an executive summary of the following text suitable for business leaders. + Focus on key decisions, risks, opportunities, and actionable insights. + Write in a professional, executive-level tone. + Limit to approximately {max_length} characters. + + Text: + {text} + + Executive Summary: + """ + + response = self.client.chat.completions.create( + model=self.model_name, + messages=[{"role": "user", "content": prompt}], + max_tokens=self.max_tokens, + temperature=0.2 + ) + + summary = response.choices[0].message.content.strip() + + return { + "summary": summary, + "type": "executive", + "confidence": 0.95, + "metadata": { + "original_length": len(text), + "summary_length": len(summary), + "compression_ratio": len(summary) / len(text) if len(text) > 0 else 0 + } + } + + except Exception as e: + return self._fallback_summarization(text, max_length) + + def _chunked_summarization(self, text: str, max_length: int) -> Dict[str, Any]: + """Handle long texts by chunking and summarizing""" + try: + # Split text into chunks + chunks = self.text_splitter.split_text(text) + + # Summarize each chunk + chunk_summaries = [] + for chunk in chunks: + chunk_summary = self._abstractive_summarization(chunk, max_length // len(chunks)) + chunk_summaries.append(chunk_summary["summary"]) + + # Combine chunk summaries + combined_summary = " ".join(chunk_summaries) + + # Create final summary + final_summary = self._abstractive_summarization(combined_summary, max_length) + + return { + "summary": final_summary["summary"], + "type": "chunked_abstractive", + "confidence": 0.85, + "metadata": { + "original_length": len(text), + "summary_length": len(final_summary["summary"]), + "chunks_processed": len(chunks), + "compression_ratio": len(final_summary["summary"]) / len(text) if len(text) > 0 else 0 + } + } + + except Exception as e: + return self._fallback_summarization(text, max_length) + + def _fallback_summarization(self, text: str, max_length: int) -> Dict[str, Any]: + """Fallback summarization when AI fails""" + sentences = re.split(r'(?<=[.!?])\s+', text.strip()) + + # Simple extractive summarization + if len(sentences) <= 3: + summary = text + else: + # Take first, middle, and last sentences + summary_parts = [] + summary_parts.append(sentences[0]) + + if len(sentences) > 2: + middle_idx = len(sentences) // 2 + summary_parts.append(sentences[middle_idx]) + + if len(sentences) > 1: + summary_parts.append(sentences[-1]) + + summary = " ".join(summary_parts) + + # Truncate if too long + if len(summary) > max_length: + summary = summary[:max_length-3] + "..." + + return { + "summary": summary, + "type": "fallback", + "confidence": 0.6, + "metadata": { + "original_length": len(text), + "summary_length": len(summary), + "compression_ratio": len(summary) / len(text) if len(text) > 0 else 0, + "error": "AI summarization failed, using fallback method" + } + } + + def analyze_summary_quality(self, original_text: str, summary: str) -> Dict[str, Any]: + """Analyze the quality of a summary""" + try: + prompt = f""" + Analyze the quality of this summary compared to the original text. + Rate the following aspects on a scale of 1-10: + 1. Completeness: Does it capture all key points? + 2. Accuracy: Is the information correct? + 3. Clarity: Is it easy to understand? + 4. Conciseness: Is it appropriately brief? + 5. Relevance: Does it focus on important information? + + Original Text: + {original_text[:2000]}... + + Summary: + {summary} + + Provide your analysis as JSON: + {{ + "completeness": 8, + "accuracy": 9, + "clarity": 8, + "conciseness": 7, + "relevance": 9, + "overall_score": 8.2, + "strengths": ["Captures key points", "Clear language"], + "improvements": ["Could include more details"] + }} + """ + + response = self.client.chat.completions.create( + model=self.model_name, + messages=[{"role": "user", "content": prompt}], + max_tokens=500, + temperature=0.1 + ) + + import json + analysis = json.loads(response.choices[0].message.content.strip()) + return analysis + + except Exception as e: + return { + "completeness": 5, + "accuracy": 5, + "clarity": 5, + "conciseness": 5, + "relevance": 5, + "overall_score": 5.0, + "strengths": ["Fallback analysis"], + "improvements": ["AI analysis failed"] + } + + +# Backward compatibility function +def summarize_text(text: str, summary_type: str = "abstractive", max_length: int = 500) -> str: """ - Naive summarization by extracting the first 2-3 sentences. - In production, replace this with a call to an LLM or advanced NLP model. + Backward compatibility function for simple text summarization + + Args: + text: Input text to summarize + summary_type: Type of summarization + max_length: Maximum length of summary + + Returns: + Summarized text """ - import re - sentences = re.split(r'(?<=[.!?])\s+', text.strip()) - summary = ' '.join(sentences[:3]) if len(sentences) > 2 else text - return summary + summarizer = AdvancedSummarizer() + result = summarizer.summarize_text(text, summary_type, max_length) + return result["summary"] diff --git a/monitoring/filebeat/filebeat.yml b/monitoring/filebeat/filebeat.yml new file mode 100644 index 0000000..4a70175 --- /dev/null +++ b/monitoring/filebeat/filebeat.yml @@ -0,0 +1,57 @@ +filebeat.inputs: +- type: log + enabled: true + paths: + - /var/log/smart-doc-bot/*.log + fields: + service: smart-doc-bot + environment: production + fields_under_root: true + multiline.pattern: '^\[' + multiline.negate: true + multiline.match: after + +- type: container + enabled: true + paths: + - '/var/lib/docker/containers/*/*.log' + processors: + - add_docker_metadata: + host: "unix:///var/run/docker.sock" + +processors: + - add_host_metadata: + when.not.contains.tags: forwarded + - add_cloud_metadata: ~ + - add_kubernetes_metadata: + host: ${NODE_NAME} + matchers: + - logs_path: + logs_path: "/var/log/containers/" + +output.elasticsearch: + hosts: ["elasticsearch:9200"] + indices: + - index: "filebeat-%{[agent.version]}-%{+yyyy.MM.dd}" + template.name: "filebeat" + template.pattern: "filebeat-*" + template.enabled: false + template.overwrite: false + +setup.kibana: + host: "kibana:5601" + +setup.template.settings: + index.number_of_shards: 1 + index.number_of_replicas: 0 + +setup.dashboards.enabled: true +setup.dashboards.directory: /usr/share/filebeat/kibana + +logging.level: info +logging.to_files: true +logging.files: + path: /var/log/filebeat + name: filebeat + keepfiles: 7 + permissions: 0644 diff --git a/monitoring/grafana/dashboards/ai-document-agent-dashboard.json b/monitoring/grafana/dashboards/ai-document-agent-dashboard.json new file mode 100644 index 0000000..262170c --- /dev/null +++ b/monitoring/grafana/dashboards/ai-document-agent-dashboard.json @@ -0,0 +1,457 @@ +{ + "dashboard": { + "id": null, + "title": "AI Document Agent Dashboard", + "tags": ["ai", "document-processing", "agents"], + "style": "dark", + "timezone": "browser", + "panels": [ + { + "id": 1, + "title": "System Overview", + "type": "stat", + "targets": [ + { + "expr": "system_cpu_usage", + "legendFormat": "CPU Usage" + }, + { + "expr": "system_memory_usage", + "legendFormat": "Memory Usage" + }, + { + "expr": "system_disk_usage", + "legendFormat": "Disk Usage" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "displayMode": "gradient-gauge" + }, + "mappings": [], + "thresholds": { + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + } + }, + { + "id": 2, + "title": "Agent Performance", + "type": "timeseries", + "targets": [ + { + "expr": "rate(agent_execution_time_sum[5m]) / rate(agent_execution_time_count[5m])", + "legendFormat": "{{agent_type}} - Avg Execution Time" + }, + { + "expr": "agent_confidence", + "legendFormat": "{{agent_type}} - Confidence" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + } + }, + { + "id": 3, + "title": "Document Processing Pipeline", + "type": "timeseries", + "targets": [ + { + "expr": "rate(workflow_execution_time_sum[5m]) / rate(workflow_execution_time_count[5m])", + "legendFormat": "Workflow Execution Time" + }, + { + "expr": "workflow_stages_completed", + "legendFormat": "Stages Completed" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + } + }, + { + "id": 4, + "title": "API Performance", + "type": "timeseries", + "targets": [ + { + "expr": "rate(http_requests_total[5m])", + "legendFormat": "{{method}} {{endpoint}} - Requests/sec" + }, + { + "expr": "rate(http_request_duration_sum[5m]) / rate(http_request_duration_count[5m])", + "legendFormat": "{{method}} {{endpoint}} - Response Time" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + } + }, + { + "id": 5, + "title": "Agent Success Rate", + "type": "piechart", + "targets": [ + { + "expr": "sum(agent_execution_time_count) by (agent_type)", + "legendFormat": "{{agent_type}}" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + } + }, + "mappings": [] + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 16 + } + }, + { + "id": 6, + "title": "Error Rate", + "type": "timeseries", + "targets": [ + { + "expr": "rate(http_requests_total{status_code=~\"5..\"}[5m])", + "legendFormat": "5xx Errors" + }, + { + "expr": "rate(http_requests_total{status_code=~\"4..\"}[5m])", + "legendFormat": "4xx Errors" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 16 + } + }, + { + "id": 7, + "title": "Memory Usage by Agent", + "type": "barchart", + "targets": [ + { + "expr": "agent_memory_usage", + "legendFormat": "{{agent_type}}" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 16 + } + } + ], + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "templating": { + "list": [] + }, + "annotations": { + "list": [] + }, + "refresh": "5s", + "schemaVersion": 27, + "version": 1, + "links": [] + } +} diff --git a/monitoring/grafana/datasources/prometheus.yml b/monitoring/grafana/datasources/prometheus.yml new file mode 100644 index 0000000..e1a77f9 --- /dev/null +++ b/monitoring/grafana/datasources/prometheus.yml @@ -0,0 +1,14 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: true + jsonData: + timeInterval: "5s" + queryTimeout: "60s" + httpMethod: "POST" + secureJsonData: {} diff --git a/nginx/nginx.conf b/nginx/nginx.conf new file mode 100644 index 0000000..782122d --- /dev/null +++ b/nginx/nginx.conf @@ -0,0 +1,211 @@ +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Logging + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + error_log /var/log/nginx/error.log warn; + + # Basic settings + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + client_max_body_size 50M; + + # Gzip compression + gzip on; + gzip_vary on; + gzip_min_length 1024; + gzip_proxied any; + gzip_comp_level 6; + gzip_types + text/plain + text/css + text/xml + text/javascript + application/json + application/javascript + application/xml+rss + application/atom+xml + image/svg+xml; + + # Security headers + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-XSS-Protection "1; mode=block" always; + add_header Referrer-Policy "strict-origin-when-cross-origin" always; + add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; font-src 'self' data:; connect-src 'self' ws: wss:;" always; + + # Rate limiting + limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s; + limit_req_zone $binary_remote_addr zone=login:10m rate=1r/s; + + # Upstream backend servers + upstream backend { + least_conn; + server backend:8000 max_fails=3 fail_timeout=30s; + # Add more backend servers for load balancing + # server backend2:8000 max_fails=3 fail_timeout=30s; + # server backend3:8000 max_fails=3 fail_timeout=30s; + } + + # Upstream frontend servers + upstream frontend { + least_conn; + server frontend:3000 max_fails=3 fail_timeout=30s; + # Add more frontend servers for load balancing + # server frontend2:3000 max_fails=3 fail_timeout=30s; + } + + # HTTP server (redirect to HTTPS) + server { + listen 80; + server_name _; + + # Redirect all HTTP traffic to HTTPS + return 301 https://$host$request_uri; + } + + # HTTPS server + server { + listen 443 ssl http2; + server_name _; + + # SSL configuration + ssl_certificate /etc/nginx/ssl/cert.pem; + ssl_certificate_key /etc/nginx/ssl/key.pem; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES256-GCM-SHA384; + ssl_prefer_server_ciphers off; + ssl_session_cache shared:SSL:10m; + ssl_session_timeout 10m; + + # Security headers for HTTPS + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + + # Frontend application + location / { + proxy_pass http://frontend; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_cache_bypass $http_upgrade; + proxy_read_timeout 86400; + } + + # API endpoints + location /api/ { + limit_req zone=api burst=20 nodelay; + + proxy_pass http://backend; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_cache_bypass $http_upgrade; + + # Timeouts for API calls + proxy_connect_timeout 60s; + proxy_send_timeout 60s; + proxy_read_timeout 60s; + } + + # Authentication endpoints (stricter rate limiting) + location /api/v1/auth/ { + limit_req zone=login burst=5 nodelay; + + proxy_pass http://backend; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # WebSocket support for real-time features + location /ws/ { + proxy_pass http://backend; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 86400; + } + + # Health check endpoint + location /health { + proxy_pass http://backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Metrics endpoint (for monitoring) + location /metrics { + proxy_pass http://backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Static files (if served by backend) + location /static/ { + proxy_pass http://backend; + proxy_set_header Host $host; + expires 1y; + add_header Cache-Control "public, immutable"; + } + + # Upload endpoint (larger file uploads) + location /api/v1/documents/upload { + client_max_body_size 50M; + proxy_pass http://backend; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 300s; + proxy_send_timeout 300s; + } + + # Deny access to sensitive files + location ~ /\. { + deny all; + } + + location ~ /\.ht { + deny all; + } + + # Error pages + error_page 404 /404.html; + error_page 500 502 503 504 /50x.html; + + location = /50x.html { + root /usr/share/nginx/html; + } + } +} diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..c547e6b --- /dev/null +++ b/pytest.ini @@ -0,0 +1,68 @@ +[tool:pytest] +# Test discovery +testpaths = backend/tests +python_files = test_*.py *_test.py +python_classes = Test* +python_functions = test_* + +# Output and reporting +addopts = + -v + --tb=short + --strict-markers + --disable-warnings + --cov=backend/app + --cov-report=term-missing + --cov-report=html:backend/htmlcov + --cov-report=xml:backend/coverage.xml + --cov-fail-under=80 + --junitxml=backend/test-results.xml + +# Markers +markers = + unit: Unit tests + integration: Integration tests + e2e: End-to-end tests + slow: Slow running tests + api: API tests + database: Database tests + agent: Agent tests + security: Security tests + performance: Performance tests + smoke: Smoke tests + +# Filtering +filterwarnings = + ignore::DeprecationWarning + ignore::PendingDeprecationWarning + ignore::UserWarning + +# Test configuration +minversion = 6.0 +asyncio_mode = auto + +# Coverage configuration +[coverage:run] +source = backend/app +omit = + */tests/* + */migrations/* + */__pycache__/* + */venv/* + */env/* + +[coverage:report] +exclude_lines = + pragma: no cover + def __repr__ + if self.debug: + if settings.DEBUG + raise AssertionError + raise NotImplementedError + if 0: + if __name__ == .__main__.: + class .*\bProtocol\): + @(abc\.)?abstractmethod + +[coverage:html] +directory = backend/htmlcov diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..c66cc66 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,80 @@ +# Development and Testing Dependencies +# Install with: pip install -r requirements-dev.txt + +# Testing +pytest==7.4.3 +pytest-asyncio==0.21.1 +pytest-cov==4.1.0 +pytest-mock==3.12.0 +pytest-xdist==3.3.1 +pytest-html==3.2.0 +pytest-json-report==1.5.0 +factory-boy==3.3.0 +faker==20.1.0 + +# Code Quality +flake8==6.1.0 +black==23.11.0 +isort==5.12.0 +mypy==1.7.1 +bandit==1.7.5 +safety==2.3.5 +pre-commit==3.5.0 + +# Documentation +sphinx==7.2.6 +sphinx-rtd-theme==1.3.0 +sphinx-autodoc-typehints==1.25.0 +myst-parser==2.0.0 + +# Performance Testing +locust==2.17.0 +wrk==4.2.0 + +# Development Tools +ipython==8.17.2 +ipdb==0.13.13 +debugpy==1.8.0 +watchdog==3.0.0 + +# Database Tools +alembic==1.13.1 +psycopg2-binary==2.9.9 + +# Monitoring and Debugging +memory-profiler==0.61.0 +line-profiler==4.1.2 +py-spy==0.3.14 + +# Security Tools +cryptography==41.0.8 +python-jose[cryptography]==3.3.0 + +# API Testing +httpx==0.25.2 +requests-mock==1.11.0 +responses==0.24.1 + +# Mock and Stub Libraries +freezegun==1.2.2 +vcrpy==6.0.1 + +# Environment Management +python-dotenv==1.0.0 +environs==10.0.0 + +# Build Tools +build==0.11.0 +twine==4.0.2 +wheel==0.42.0 + +# Type Checking +types-requests==2.31.0.10 +types-PyYAML==6.0.12.12 +types-redis==4.6.0.9 + +# Additional Utilities +click==8.1.7 +rich==13.7.0 +tqdm==4.66.1 +colorama==0.4.6 diff --git a/requirements.txt b/requirements.txt index 9a538ae..94a3719 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,62 +1,49 @@ -# Core FastAPI & Web Framework -fastapi==0.110.0 -uvicorn==0.29.0 -pydantic==2.6.4 -python-multipart==0.0.9 - -# LLM & AI Libraries -openai==1.12.0 -langchain==0.1.0 -langchain-openai==0.0.5 -langchain-community==0.0.10 -transformers==4.37.2 -torch==2.1.2 -sentence-transformers==2.2.2 - -# Document Processing -pypdf2==3.0.1 -python-docx==1.1.0 -pytesseract==0.3.10 -Pillow==10.1.0 -pdf2image==1.16.3 +# Core Framework +fastapi==0.104.1 +uvicorn[standard]==0.24.0 +pydantic==2.5.0 +pydantic-settings==2.1.0 -# Vector Database & Search +# Database sqlalchemy==2.0.25 -redis==5.0.1 +alembic==1.13.1 psycopg2-binary==2.9.9 -pgvector==0.2.4 -chromadb==0.4.22 - -# Policy Engine -opa==0.1.0 - -# Monitoring & Observability -opentelemetry-api==1.21.0 -opentelemetry-sdk==1.21.0 -opentelemetry-instrumentation-fastapi==0.42b0 -prometheus-client==0.19.0 +redis==5.0.1 -# Security & Authentication +# Security python-jose[cryptography]==3.3.0 passlib[bcrypt]==1.7.4 -python-multipart==0.0.9 +python-multipart==0.0.6 -# Utilities -python-dotenv==1.0.0 -httpx==0.26.0 -celery==5.3.4 -redis==5.0.1 +# AI/ML +openai==1.3.7 +langchain==0.1.0 +langchain-openai==0.0.2 +chromadb==0.4.18 +sentence-transformers==2.2.2 + +# Document Processing +python-docx==1.1.0 +PyPDF2==3.0.1 pandas==2.1.4 openpyxl==3.1.2 -reportlab==4.0.7 + +# Monitoring & Observability +prometheus-client==0.19.0 +structlog==23.2.0 # Testing pytest==7.4.3 pytest-asyncio==0.21.1 -httpx==0.26.0 +httpx==0.25.2 # Development -black==23.12.1 -isort==5.13.2 +black==23.11.0 +isort==5.12.0 flake8==6.1.0 -mypy==1.8.0 +mypy==1.7.1 + +# Additional Dependencies +python-dotenv==1.0.0 +aiofiles==23.2.1 +python-dateutil==2.8.2 diff --git a/scripts/backup.sh b/scripts/backup.sh new file mode 100644 index 0000000..324df75 --- /dev/null +++ b/scripts/backup.sh @@ -0,0 +1,430 @@ +#!/bin/bash + +# AI Document Agent Backup Script +# This script creates comprehensive backups of the entire system + +set -e # Exit on any error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Function to print colored output +print_status() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +print_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Configuration +BACKUP_DIR="./backups" +DATE=$(date +%Y%m%d_%H%M%S) +BACKUP_NAME="ai_document_agent_backup_$DATE" +BACKUP_PATH="$BACKUP_DIR/$BACKUP_NAME" + +# Database configuration +DB_HOST="${DB_HOST:-localhost}" +DB_PORT="${DB_PORT:-5432}" +DB_NAME="${DB_NAME:-ai_document_agent}" +DB_USER="${DB_USER:-postgres}" + +# Function to check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Function to create backup directory +create_backup_directory() { + print_status "Creating backup directory..." + mkdir -p "$BACKUP_PATH" + mkdir -p "$BACKUP_PATH/database" + mkdir -p "$BACKUP_PATH/files" + mkdir -p "$BACKUP_PATH/config" + mkdir -p "$BACKUP_PATH/logs" + print_success "Backup directory created: $BACKUP_PATH" +} + +# Function to backup database +backup_database() { + print_status "Backing up PostgreSQL database..." + + if command_exists pg_dump; then + # Prompt for password if not provided + if [ -z "$DB_PASSWORD" ]; then + echo -n "Enter database password: " + read -s DB_PASSWORD + echo + fi + + # Set password environment variable + export PGPASSWORD="$DB_PASSWORD" + + # Create database dump + pg_dump -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" \ + --verbose --clean --no-owner --no-privileges \ + --file="$BACKUP_PATH/database/full_backup.sql" + + # Create schema-only backup + pg_dump -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" \ + --verbose --schema-only --no-owner --no-privileges \ + --file="$BACKUP_PATH/database/schema_only.sql" + + # Create data-only backup + pg_dump -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" \ + --verbose --data-only --no-owner --no-privileges \ + --file="$BACKUP_PATH/database/data_only.sql" + + print_success "Database backup completed" + else + print_error "pg_dump not found. Please install PostgreSQL client tools." + return 1 + fi +} + +# Function to backup uploaded files +backup_files() { + print_status "Backing up uploaded files..." + + if [ -d "./uploads" ]; then + tar -czf "$BACKUP_PATH/files/uploads.tar.gz" -C . uploads/ + print_success "Uploaded files backed up" + else + print_warning "Uploads directory not found, skipping" + fi + + if [ -d "./output" ]; then + tar -czf "$BACKUP_PATH/files/output.tar.gz" -C . output/ + print_success "Output files backed up" + else + print_warning "Output directory not found, skipping" + fi + + if [ -d "./chroma_db" ]; then + tar -czf "$BACKUP_PATH/files/chroma_db.tar.gz" -C . chroma_db/ + print_success "ChromaDB vector database backed up" + else + print_warning "ChromaDB directory not found, skipping" + fi +} + +# Function to backup configuration files +backup_config() { + print_status "Backing up configuration files..." + + # Backup environment files + if [ -f ".env" ]; then + cp .env "$BACKUP_PATH/config/" + print_success "Environment file backed up" + fi + + if [ -f "backend/.env" ]; then + cp backend/.env "$BACKUP_PATH/config/" + print_success "Backend environment file backed up" + fi + + # Backup configuration files + if [ -d "backend/app/core" ]; then + tar -czf "$BACKUP_PATH/config/backend_config.tar.gz" -C backend/app core/ + print_success "Backend configuration backed up" + fi + + if [ -d "frontend/src/config" ]; then + tar -czf "$BACKUP_PATH/config/frontend_config.tar.gz" -C frontend/src config/ + print_success "Frontend configuration backed up" + fi + + # Backup Docker configuration + if [ -f "docker-compose.yml" ]; then + cp docker-compose.yml "$BACKUP_PATH/config/" + print_success "Docker Compose configuration backed up" + fi + + # Backup Nginx configuration + if [ -d "nginx" ]; then + tar -czf "$BACKUP_PATH/config/nginx_config.tar.gz" -C . nginx/ + print_success "Nginx configuration backed up" + fi + + # Backup monitoring configuration + if [ -d "monitoring" ]; then + tar -czf "$BACKUP_PATH/config/monitoring_config.tar.gz" -C . monitoring/ + print_success "Monitoring configuration backed up" + fi +} + +# Function to backup logs +backup_logs() { + print_status "Backing up log files..." + + if [ -d "./logs" ]; then + tar -czf "$BACKUP_PATH/logs/application_logs.tar.gz" -C . logs/ + print_success "Application logs backed up" + else + print_warning "Logs directory not found, skipping" + fi + + if [ -d "./audit_logs" ]; then + tar -czf "$BACKUP_PATH/logs/audit_logs.tar.gz" -C . audit_logs/ + print_success "Audit logs backed up" + else + print_warning "Audit logs directory not found, skipping" + fi +} + +# Function to backup code (optional) +backup_code() { + print_status "Backing up source code..." + + # Create a git archive if this is a git repository + if [ -d ".git" ]; then + git archive --format=tar.gz --output="$BACKUP_PATH/source_code.tar.gz" HEAD + print_success "Source code backed up (git archive)" + else + # Fallback: create a tar of the entire project + tar -czf "$BACKUP_PATH/source_code.tar.gz" \ + --exclude="$BACKUP_DIR" \ + --exclude="node_modules" \ + --exclude="__pycache__" \ + --exclude="*.pyc" \ + --exclude=".git" \ + --exclude="uploads" \ + --exclude="output" \ + --exclude="chroma_db" \ + --exclude="logs" \ + --exclude="audit_logs" \ + . + print_success "Source code backed up (full project)" + fi +} + +# Function to create backup manifest +create_manifest() { + print_status "Creating backup manifest..." + + cat > "$BACKUP_PATH/backup_manifest.txt" << EOF +AI Document Agent Backup Manifest +================================= +Backup Date: $(date) +Backup Name: $BACKUP_NAME +System: $(uname -a) + +Backup Contents: +$(find "$BACKUP_PATH" -type f -name "*.tar.gz" -o -name "*.sql" -o -name "*.env" | sort) + +Database Information: +- Host: $DB_HOST +- Port: $DB_PORT +- Database: $DB_NAME +- User: $DB_USER + +Backup Size: $(du -sh "$BACKUP_PATH" | cut -f1) + +Restore Instructions: +1. Extract the backup: tar -xzf $BACKUP_NAME.tar.gz +2. Restore database: psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME < database/full_backup.sql +3. Restore files: tar -xzf files/*.tar.gz +4. Restore configuration: tar -xzf config/*.tar.gz +5. Restart services: docker-compose up -d + +EOF + + print_success "Backup manifest created" +} + +# Function to compress backup +compress_backup() { + print_status "Compressing backup..." + + cd "$BACKUP_DIR" + tar -czf "${BACKUP_NAME}.tar.gz" "$BACKUP_NAME" + cd - > /dev/null + + # Remove uncompressed directory + rm -rf "$BACKUP_PATH" + + print_success "Backup compressed: $BACKUP_DIR/${BACKUP_NAME}.tar.gz" +} + +# Function to verify backup +verify_backup() { + print_status "Verifying backup..." + + BACKUP_FILE="$BACKUP_DIR/${BACKUP_NAME}.tar.gz" + + if [ -f "$BACKUP_FILE" ]; then + # Check if tar file is valid + if tar -tzf "$BACKUP_FILE" > /dev/null 2>&1; then + print_success "Backup verification successful" + print_status "Backup size: $(du -sh "$BACKUP_FILE" | cut -f1)" + else + print_error "Backup verification failed - tar file is corrupted" + return 1 + fi + else + print_error "Backup file not found" + return 1 + fi +} + +# Function to cleanup old backups +cleanup_old_backups() { + print_status "Cleaning up old backups..." + + # Keep backups for 30 days + find "$BACKUP_DIR" -name "ai_document_agent_backup_*.tar.gz" -mtime +30 -delete + + print_success "Old backups cleaned up" +} + +# Function to show backup status +show_backup_status() { + print_status "Backup Status:" + echo "Backup Directory: $BACKUP_DIR" + echo "Latest Backup: $BACKUP_NAME" + echo "Backup Size: $(du -sh "$BACKUP_DIR" 2>/dev/null | cut -f1 || echo 'N/A')" + echo "" + echo "Recent Backups:" + ls -la "$BACKUP_DIR"/*.tar.gz 2>/dev/null | tail -5 || echo "No backups found" +} + +# Function to restore backup +restore_backup() { + local backup_file="$1" + + if [ -z "$backup_file" ]; then + print_error "No backup file specified" + echo "Usage: $0 restore " + return 1 + fi + + if [ ! -f "$backup_file" ]; then + print_error "Backup file not found: $backup_file" + return 1 + fi + + print_status "Restoring backup: $backup_file" + + # Extract backup + local temp_dir=$(mktemp -d) + tar -xzf "$backup_file" -C "$temp_dir" + + # Restore database + if [ -f "$temp_dir"/*/database/full_backup.sql ]; then + print_status "Restoring database..." + if [ -z "$DB_PASSWORD" ]; then + echo -n "Enter database password: " + read -s DB_PASSWORD + echo + fi + export PGPASSWORD="$DB_PASSWORD" + psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" < "$temp_dir"/*/database/full_backup.sql + print_success "Database restored" + fi + + # Restore files + if [ -f "$temp_dir"/*/files/uploads.tar.gz ]; then + print_status "Restoring uploaded files..." + tar -xzf "$temp_dir"/*/files/uploads.tar.gz -C . + print_success "Uploaded files restored" + fi + + if [ -f "$temp_dir"/*/files/output.tar.gz ]; then + print_status "Restoring output files..." + tar -xzf "$temp_dir"/*/files/output.tar.gz -C . + print_success "Output files restored" + fi + + if [ -f "$temp_dir"/*/files/chroma_db.tar.gz ]; then + print_status "Restoring ChromaDB..." + tar -xzf "$temp_dir"/*/files/chroma_db.tar.gz -C . + print_success "ChromaDB restored" + fi + + # Cleanup + rm -rf "$temp_dir" + + print_success "Backup restoration completed" +} + +# Function to show help +show_help() { + echo "AI Document Agent Backup Script" + echo "" + echo "Usage: $0 [OPTION]" + echo "" + echo "Options:" + echo " backup - Create a full backup (default)" + echo " restore - Restore from backup" + echo " status - Show backup status" + echo " cleanup - Clean up old backups" + echo " help - Show this help message" + echo "" + echo "Examples:" + echo " $0 backup # Create backup" + echo " $0 restore backup.tar.gz # Restore from backup" + echo " $0 status # Show backup status" + echo " $0 cleanup # Clean old backups" + echo "" + echo "Environment Variables:" + echo " DB_HOST - Database host (default: localhost)" + echo " DB_PORT - Database port (default: 5432)" + echo " DB_NAME - Database name (default: ai_document_agent)" + echo " DB_USER - Database user (default: postgres)" + echo " DB_PASSWORD - Database password (will prompt if not set)" +} + +# Main script logic +main() { + case "${1:-backup}" in + "backup") + print_status "Starting AI Document Agent backup..." + + create_backup_directory + backup_database + backup_files + backup_config + backup_logs + backup_code + create_manifest + compress_backup + verify_backup + cleanup_old_backups + + print_success "Backup completed successfully!" + print_status "Backup location: $BACKUP_DIR/${BACKUP_NAME}.tar.gz" + ;; + "restore") + restore_backup "$2" + ;; + "status") + show_backup_status + ;; + "cleanup") + cleanup_old_backups + ;; + "help"|"-h"|"--help") + show_help + ;; + *) + print_error "Unknown option: $1" + show_help + exit 1 + ;; + esac +} + +# Run main function with all arguments +main "$@" diff --git a/scripts/init-db.sql b/scripts/init-db.sql new file mode 100644 index 0000000..d6bd78d --- /dev/null +++ b/scripts/init-db.sql @@ -0,0 +1,606 @@ +-- AI Document Agent Database Initialization Script +-- This script creates the database schema and initial data + +-- Create database if it doesn't exist +-- Note: This should be run as a superuser or database owner +-- CREATE DATABASE ai_document_agent; + +-- Connect to the database +-- \c ai_document_agent; + +-- Enable required extensions +CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; +CREATE EXTENSION IF NOT EXISTS "pgcrypto"; +CREATE EXTENSION IF NOT EXISTS "vector"; + +-- Create custom types +CREATE TYPE document_status AS ENUM ('pending', 'processing', 'completed', 'failed'); +CREATE TYPE agent_type AS ENUM ('orchestrator', 'ingestion', 'classifier', 'entity', 'risk', 'qa', 'compare', 'audit', 'summarizer', 'translator', 'sentiment'); +CREATE TYPE severity_level AS ENUM ('low', 'medium', 'high', 'critical'); +CREATE TYPE user_role AS ENUM ('user', 'manager', 'admin'); + +-- Create users table +CREATE TABLE IF NOT EXISTS users ( + id SERIAL PRIMARY KEY, + email VARCHAR(255) UNIQUE NOT NULL, + hashed_password VARCHAR(255) NOT NULL, + full_name VARCHAR(255), + is_active BOOLEAN DEFAULT true, + is_superuser BOOLEAN DEFAULT false, + role user_role DEFAULT 'user', + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + last_login TIMESTAMP WITH TIME ZONE +); + +-- Create roles table +CREATE TABLE IF NOT EXISTS roles ( + id SERIAL PRIMARY KEY, + name VARCHAR(100) UNIQUE NOT NULL, + description TEXT, + permissions JSONB, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Create user_roles junction table +CREATE TABLE IF NOT EXISTS user_roles ( + user_id INTEGER REFERENCES users(id) ON DELETE CASCADE, + role_id INTEGER REFERENCES roles(id) ON DELETE CASCADE, + PRIMARY KEY (user_id, role_id) +); + +-- Create documents table +CREATE TABLE IF NOT EXISTS documents ( + id SERIAL PRIMARY KEY, + filename VARCHAR(255) NOT NULL, + original_filename VARCHAR(255) NOT NULL, + content TEXT, + file_path VARCHAR(500) NOT NULL, + file_size INTEGER NOT NULL, + content_type VARCHAR(100) NOT NULL, + doc_type VARCHAR(50), + domain VARCHAR(100), + processing_status document_status DEFAULT 'pending', + processing_result JSONB, + confidence_score FLOAT, + risk_score FLOAT, + metadata JSONB, + tags JSONB, + entities JSONB, + clauses JSONB, + risks JSONB, + qa_pairs JSONB, + uploaded_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + processed_at TIMESTAMP WITH TIME ZONE, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + uploaded_by INTEGER REFERENCES users(id) NOT NULL +); + +-- Create tags table +CREATE TABLE IF NOT EXISTS tags ( + id SERIAL PRIMARY KEY, + name VARCHAR(100) UNIQUE NOT NULL, + description TEXT, + color VARCHAR(7), + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Create document_tags junction table +CREATE TABLE IF NOT EXISTS document_tags ( + document_id INTEGER REFERENCES documents(id) ON DELETE CASCADE, + tag_id INTEGER REFERENCES tags(id) ON DELETE CASCADE, + PRIMARY KEY (document_id, tag_id) +); + +-- Create compliance_frameworks table +CREATE TABLE IF NOT EXISTS compliance_frameworks ( + id SERIAL PRIMARY KEY, + name VARCHAR(100) UNIQUE NOT NULL, + description TEXT, + version VARCHAR(20), + requirements JSONB, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Create document_compliance_frameworks junction table +CREATE TABLE IF NOT EXISTS document_compliance_frameworks ( + document_id INTEGER REFERENCES documents(id) ON DELETE CASCADE, + framework_id INTEGER REFERENCES compliance_frameworks(id) ON DELETE CASCADE, + PRIMARY KEY (document_id, framework_id) +); + +-- Create processing_history table +CREATE TABLE IF NOT EXISTS processing_history ( + id SERIAL PRIMARY KEY, + processing_id VARCHAR(100) UNIQUE NOT NULL, + workflow_id VARCHAR(100), + current_stage VARCHAR(100), + completed_stages JSONB, + failed_stages JSONB, + total_execution_time FLOAT, + progress_percentage FLOAT DEFAULT 0.0, + status document_status DEFAULT 'pending', + result JSONB, + confidence FLOAT, + rationale TEXT, + error_message TEXT, + started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + completed_at TIMESTAMP WITH TIME ZONE, + document_id INTEGER REFERENCES documents(id) NOT NULL, + user_id INTEGER REFERENCES users(id) NOT NULL +); + +-- Create agent_executions table +CREATE TABLE IF NOT EXISTS agent_executions ( + id SERIAL PRIMARY KEY, + execution_id VARCHAR(100) UNIQUE NOT NULL, + agent_type agent_type NOT NULL, + agent_name VARCHAR(100) NOT NULL, + input_size INTEGER, + output_size INTEGER, + execution_time FLOAT, + memory_usage FLOAT, + cpu_usage FLOAT, + status document_status DEFAULT 'pending', + confidence FLOAT, + output JSONB, + error_message TEXT, + started_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + completed_at TIMESTAMP WITH TIME ZONE, + processing_history_id INTEGER REFERENCES processing_history(id) NOT NULL +); + +-- Create document_comparisons table +CREATE TABLE IF NOT EXISTS document_comparisons ( + id SERIAL PRIMARY KEY, + comparison_id VARCHAR(100) UNIQUE NOT NULL, + comparison_type VARCHAR(50) NOT NULL, + similarity_score FLOAT, + differences JSONB, + risk_changes JSONB, + status document_status DEFAULT 'pending', + result JSONB, + confidence FLOAT, + summary TEXT, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + completed_at TIMESTAMP WITH TIME ZONE, + document_a_id INTEGER REFERENCES documents(id) NOT NULL, + document_b_id INTEGER REFERENCES documents(id) NOT NULL, + created_by INTEGER REFERENCES users(id) NOT NULL +); + +-- Create audit_events table +CREATE TABLE IF NOT EXISTS audit_events ( + id SERIAL PRIMARY KEY, + event_id VARCHAR(100) UNIQUE NOT NULL, + event_type VARCHAR(100) NOT NULL, + event_category VARCHAR(50) NOT NULL, + severity severity_level DEFAULT 'low', + description TEXT NOT NULL, + details JSONB, + ip_address INET, + user_agent TEXT, + timestamp TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + user_id INTEGER REFERENCES users(id), + document_id INTEGER REFERENCES documents(id) +); + +-- Create system_metrics table +CREATE TABLE IF NOT EXISTS system_metrics ( + id SERIAL PRIMARY KEY, + metric_id VARCHAR(100) UNIQUE NOT NULL, + metric_name VARCHAR(100) NOT NULL, + metric_type VARCHAR(50) NOT NULL, + value FLOAT NOT NULL, + labels JSONB, + timestamp TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + description TEXT, + unit VARCHAR(20) +); + +-- Create workflow_templates table +CREATE TABLE IF NOT EXISTS workflow_templates ( + id SERIAL PRIMARY KEY, + template_id VARCHAR(100) UNIQUE NOT NULL, + name VARCHAR(100) NOT NULL, + description TEXT, + version VARCHAR(20) DEFAULT '1.0.0', + stages JSONB NOT NULL, + agent_config JSONB, + workflow_config JSONB, + is_active BOOLEAN DEFAULT true, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + created_by INTEGER REFERENCES users(id) NOT NULL +); + +-- Create knowledge_base table +CREATE TABLE IF NOT EXISTS knowledge_base ( + id SERIAL PRIMARY KEY, + kb_id VARCHAR(100) UNIQUE NOT NULL, + name VARCHAR(100) NOT NULL, + description TEXT, + domain VARCHAR(100), + content TEXT NOT NULL, + content_type VARCHAR(50) NOT NULL, + vector_embedding vector(1536), -- OpenAI embedding dimension + source VARCHAR(255), + version VARCHAR(20) DEFAULT '1.0.0', + is_active BOOLEAN DEFAULT true, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + created_by INTEGER REFERENCES users(id) NOT NULL +); + +-- Create notifications table +CREATE TABLE IF NOT EXISTS notifications ( + id SERIAL PRIMARY KEY, + notification_id VARCHAR(100) UNIQUE NOT NULL, + title VARCHAR(255) NOT NULL, + message TEXT NOT NULL, + notification_type VARCHAR(50) NOT NULL, + priority VARCHAR(20) DEFAULT 'normal', + is_read BOOLEAN DEFAULT false, + is_sent BOOLEAN DEFAULT false, + sent_at TIMESTAMP WITH TIME ZONE, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + user_id INTEGER REFERENCES users(id) NOT NULL, + document_id INTEGER REFERENCES documents(id) +); + +-- Create api_logs table +CREATE TABLE IF NOT EXISTS api_logs ( + id SERIAL PRIMARY KEY, + method VARCHAR(10) NOT NULL, + endpoint VARCHAR(255) NOT NULL, + status_code INTEGER NOT NULL, + response_time FLOAT, + request_size INTEGER, + response_size INTEGER, + ip_address INET, + user_agent TEXT, + request_headers JSONB, + request_body TEXT, + response_body TEXT, + timestamp TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + user_id INTEGER REFERENCES users(id) +); + +-- Create system_configurations table +CREATE TABLE IF NOT EXISTS system_configurations ( + id SERIAL PRIMARY KEY, + config_key VARCHAR(100) UNIQUE NOT NULL, + config_value TEXT NOT NULL, + config_type VARCHAR(50) NOT NULL, + description TEXT, + is_active BOOLEAN DEFAULT true, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_by INTEGER REFERENCES users(id) +); + +-- Create indexes for better performance +CREATE INDEX IF NOT EXISTS idx_documents_uploaded_by ON documents(uploaded_by); +CREATE INDEX IF NOT EXISTS idx_documents_status ON documents(processing_status); +CREATE INDEX IF NOT EXISTS idx_documents_uploaded_at ON documents(uploaded_at); +CREATE INDEX IF NOT EXISTS idx_processing_history_document_id ON processing_history(document_id); +CREATE INDEX IF NOT EXISTS idx_agent_executions_processing_history_id ON agent_executions(processing_history_id); +CREATE INDEX IF NOT EXISTS idx_audit_events_timestamp ON audit_events(timestamp); +CREATE INDEX IF NOT EXISTS idx_audit_events_user_id ON audit_events(user_id); +CREATE INDEX IF NOT EXISTS idx_system_metrics_timestamp ON system_metrics(timestamp); +CREATE INDEX IF NOT EXISTS idx_knowledge_base_vector ON knowledge_base USING ivfflat (vector_embedding vector_cosine_ops); +CREATE INDEX IF NOT EXISTS idx_api_logs_timestamp ON api_logs(timestamp); +CREATE INDEX IF NOT EXISTS idx_api_logs_endpoint ON api_logs(endpoint); + +-- Insert default roles +INSERT INTO roles (name, description, permissions) VALUES +('admin', 'Administrator with full access', '["*"]'), +('manager', 'Manager with limited admin access', '["read", "write", "analyze", "audit"]'), +('user', 'Regular user with basic access', '["read", "write", "analyze"]') +ON CONFLICT (name) DO NOTHING; + +-- Insert default admin user (password: admin123) +INSERT INTO users (email, hashed_password, full_name, is_active, is_superuser, role) VALUES +('admin@example.com', crypt('admin123', gen_salt('bf')), 'System Administrator', true, true, 'admin') +ON CONFLICT (email) DO NOTHING; + +-- Insert default compliance frameworks +INSERT INTO compliance_frameworks (name, description, version) VALUES +('GDPR', 'General Data Protection Regulation', '2018'), +('HIPAA', 'Health Insurance Portability and Accountability Act', '1996'), +('SOX', 'Sarbanes-Oxley Act', '2002'), +('PCI-DSS', 'Payment Card Industry Data Security Standard', '4.0') +ON CONFLICT (name) DO NOTHING; + +-- Insert default tags +INSERT INTO tags (name, description, color) VALUES +('contract', 'Legal contracts', '#2196F3'), +('invoice', 'Financial invoices', '#4CAF50'), +('policy', 'Company policies', '#FF9800'), +('report', 'Business reports', '#9C27B0'), +('compliance', 'Compliance documents', '#F44336') +ON CONFLICT (name) DO NOTHING; + +-- Insert default system configurations +INSERT INTO system_configurations (config_key, config_value, config_type, description) VALUES +('max_file_size', '52428800', 'integer', 'Maximum file upload size in bytes'), +('allowed_file_types', '["pdf", "docx", "txt", "csv", "xlsx"]', 'json', 'Allowed file types for upload'), +('agent_timeout', '300', 'integer', 'Agent execution timeout in seconds'), +('confidence_threshold', '0.7', 'float', 'Minimum confidence threshold for agent results'), +('audit_retention_days', '90', 'integer', 'Number of days to retain audit logs'), +('enable_monitoring', 'true', 'boolean', 'Enable system monitoring'), +('enable_pii_redaction', 'true', 'boolean', 'Enable PII redaction in logs') +ON CONFLICT (config_key) DO NOTHING; + +-- Create function to update updated_at timestamp +CREATE OR REPLACE FUNCTION update_updated_at_column() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ language 'plpgsql'; + +-- Create triggers for updated_at columns +CREATE TRIGGER update_users_updated_at BEFORE UPDATE ON users FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); +CREATE TRIGGER update_documents_updated_at BEFORE UPDATE ON documents FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); +CREATE TRIGGER update_workflow_templates_updated_at BEFORE UPDATE ON workflow_templates FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); +CREATE TRIGGER update_knowledge_base_updated_at BEFORE UPDATE ON knowledge_base FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); +CREATE TRIGGER update_system_configurations_updated_at BEFORE UPDATE ON system_configurations FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +-- Grant permissions to application user (adjust username as needed) +-- GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO ai_document_agent_user; +-- GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO ai_document_agent_user; + +-- Create view for document summary +CREATE OR REPLACE VIEW document_summary AS +SELECT + d.id, + d.filename, + d.doc_type, + d.processing_status, + d.confidence_score, + d.risk_score, + d.uploaded_at, + u.full_name as uploaded_by_name, + COUNT(DISTINCT dt.tag_id) as tag_count, + COUNT(DISTINCT dcf.framework_id) as compliance_framework_count +FROM documents d +LEFT JOIN users u ON d.uploaded_by = u.id +LEFT JOIN document_tags dt ON d.id = dt.document_id +LEFT JOIN document_compliance_frameworks dcf ON d.id = dcf.document_id +GROUP BY d.id, d.filename, d.doc_type, d.processing_status, d.confidence_score, d.risk_score, d.uploaded_at, u.full_name; + +-- Create view for system health +CREATE OR REPLACE VIEW system_health AS +SELECT + 'documents' as component, + COUNT(*) as total_count, + COUNT(CASE WHEN processing_status = 'completed' THEN 1 END) as completed_count, + COUNT(CASE WHEN processing_status = 'failed' THEN 1 END) as failed_count, + AVG(confidence_score) as avg_confidence +FROM documents +UNION ALL +SELECT + 'users' as component, + COUNT(*) as total_count, + COUNT(CASE WHEN is_active = true THEN 1 END) as completed_count, + COUNT(CASE WHEN is_active = false THEN 1 END) as failed_count, + NULL as avg_confidence +FROM users; + +-- Create function to clean old audit logs +CREATE OR REPLACE FUNCTION clean_old_audit_logs() +RETURNS INTEGER AS $$ +DECLARE + deleted_count INTEGER; + retention_days INTEGER; +BEGIN + -- Get retention period from configuration + SELECT config_value::INTEGER INTO retention_days + FROM system_configurations + WHERE config_key = 'audit_retention_days'; + + -- Default to 90 days if not configured + IF retention_days IS NULL THEN + retention_days := 90; + END IF; + + -- Delete old audit logs + DELETE FROM audit_events + WHERE timestamp < CURRENT_TIMESTAMP - INTERVAL '1 day' * retention_days; + + GET DIAGNOSTICS deleted_count = ROW_COUNT; + RETURN deleted_count; +END; +$$ LANGUAGE plpgsql; + +-- Create function to get document statistics +CREATE OR REPLACE FUNCTION get_document_statistics( + start_date TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP - INTERVAL '30 days', + end_date TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +) +RETURNS TABLE ( + total_documents BIGINT, + completed_documents BIGINT, + failed_documents BIGINT, + avg_processing_time FLOAT, + avg_confidence_score FLOAT, + total_file_size BIGINT +) AS $$ +BEGIN + RETURN QUERY + SELECT + COUNT(*)::BIGINT as total_documents, + COUNT(CASE WHEN d.processing_status = 'completed' THEN 1 END)::BIGINT as completed_documents, + COUNT(CASE WHEN d.processing_status = 'failed' THEN 1 END)::BIGINT as failed_documents, + AVG(ph.total_execution_time) as avg_processing_time, + AVG(d.confidence_score) as avg_confidence_score, + SUM(d.file_size)::BIGINT as total_file_size + FROM documents d + LEFT JOIN processing_history ph ON d.id = ph.document_id + WHERE d.uploaded_at BETWEEN start_date AND end_date; +END; +$$ LANGUAGE plpgsql; + +-- Create function to search documents by content similarity +CREATE OR REPLACE FUNCTION search_documents_by_similarity( + search_text TEXT, + similarity_threshold FLOAT DEFAULT 0.8, + limit_count INTEGER DEFAULT 10 +) +RETURNS TABLE ( + document_id INTEGER, + filename VARCHAR(255), + similarity_score FLOAT, + content_preview TEXT +) AS $$ +BEGIN + RETURN QUERY + SELECT + d.id as document_id, + d.filename, + d.confidence_score as similarity_score, + LEFT(d.content, 200) as content_preview + FROM documents d + WHERE d.processing_status = 'completed' + AND d.content ILIKE '%' || search_text || '%' + ORDER BY d.confidence_score DESC + LIMIT limit_count; +END; +$$ LANGUAGE plpgsql; + +-- Create function to get agent performance metrics +CREATE OR REPLACE FUNCTION get_agent_performance_metrics( + start_date TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP - INTERVAL '7 days', + end_date TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +) +RETURNS TABLE ( + agent_type TEXT, + total_executions BIGINT, + successful_executions BIGINT, + avg_execution_time FLOAT, + avg_confidence FLOAT, + avg_memory_usage FLOAT +) AS $$ +BEGIN + RETURN QUERY + SELECT + ae.agent_type::TEXT, + COUNT(*)::BIGINT as total_executions, + COUNT(CASE WHEN ae.status = 'completed' THEN 1 END)::BIGINT as successful_executions, + AVG(ae.execution_time) as avg_execution_time, + AVG(ae.confidence) as avg_confidence, + AVG(ae.memory_usage) as avg_memory_usage + FROM agent_executions ae + WHERE ae.started_at BETWEEN start_date AND end_date + GROUP BY ae.agent_type + ORDER BY total_executions DESC; +END; +$$ LANGUAGE plpgsql; + +-- Create indexes for the new functions +CREATE INDEX IF NOT EXISTS idx_documents_content_gin ON documents USING gin(to_tsvector('english', content)); +CREATE INDEX IF NOT EXISTS idx_agent_executions_started_at ON agent_executions(started_at); +CREATE INDEX IF NOT EXISTS idx_processing_history_started_at ON processing_history(started_at); + +-- Insert sample workflow template +INSERT INTO workflow_templates (template_id, name, description, stages, agent_config, created_by) VALUES +('default-workflow', 'Default Document Processing Workflow', 'Standard workflow for processing documents', +'[ + {"name": "ingestion", "agent": "ingestion", "order": 1, "required": true}, + {"name": "classification", "agent": "classifier", "order": 2, "required": true}, + {"name": "entity_extraction", "agent": "entity", "order": 3, "required": true}, + {"name": "risk_assessment", "agent": "risk", "order": 4, "required": true}, + {"name": "summarization", "agent": "summarizer", "order": 5, "required": false}, + {"name": "audit_logging", "agent": "audit", "order": 6, "required": true} +]', +'{ + "ingestion": {"timeout": 60, "max_retries": 3}, + "classifier": {"timeout": 30, "max_retries": 2}, + "entity": {"timeout": 45, "max_retries": 2}, + "risk": {"timeout": 60, "max_retries": 3}, + "summarizer": {"timeout": 90, "max_retries": 2}, + "audit": {"timeout": 10, "max_retries": 1} +}', +(SELECT id FROM users WHERE email = 'admin@example.com' LIMIT 1)) +ON CONFLICT (template_id) DO NOTHING; + +-- Create a function to automatically assign tags based on content +CREATE OR REPLACE FUNCTION auto_assign_tags(document_id INTEGER) +RETURNS VOID AS $$ +DECLARE + doc_content TEXT; + tag_id INTEGER; +BEGIN + -- Get document content + SELECT content INTO doc_content FROM documents WHERE id = document_id; + + IF doc_content IS NULL THEN + RETURN; + END IF; + + -- Auto-assign tags based on content + IF doc_content ILIKE '%contract%' OR doc_content ILIKE '%agreement%' THEN + SELECT id INTO tag_id FROM tags WHERE name = 'contract'; + IF tag_id IS NOT NULL THEN + INSERT INTO document_tags (document_id, tag_id) VALUES (document_id, tag_id) ON CONFLICT DO NOTHING; + END IF; + END IF; + + IF doc_content ILIKE '%invoice%' OR doc_content ILIKE '%bill%' OR doc_content ILIKE '%payment%' THEN + SELECT id INTO tag_id FROM tags WHERE name = 'invoice'; + IF tag_id IS NOT NULL THEN + INSERT INTO document_tags (document_id, tag_id) VALUES (document_id, tag_id) ON CONFLICT DO NOTHING; + END IF; + END IF; + + IF doc_content ILIKE '%policy%' OR doc_content ILIKE '%procedure%' OR doc_content ILIKE '%guideline%' THEN + SELECT id INTO tag_id FROM tags WHERE name = 'policy'; + IF tag_id IS NOT NULL THEN + INSERT INTO document_tags (document_id, tag_id) VALUES (document_id, tag_id) ON CONFLICT DO NOTHING; + END IF; + END IF; + + IF doc_content ILIKE '%report%' OR doc_content ILIKE '%analysis%' OR doc_content ILIKE '%summary%' THEN + SELECT id INTO tag_id FROM tags WHERE name = 'report'; + IF tag_id IS NOT NULL THEN + INSERT INTO document_tags (document_id, tag_id) VALUES (document_id, tag_id) ON CONFLICT DO NOTHING; + END IF; + END IF; + + IF doc_content ILIKE '%gdpr%' OR doc_content ILIKE '%hipaa%' OR doc_content ILIKE '%sox%' OR doc_content ILIKE '%compliance%' THEN + SELECT id INTO tag_id FROM tags WHERE name = 'compliance'; + IF tag_id IS NOT NULL THEN + INSERT INTO document_tags (document_id, tag_id) VALUES (document_id, tag_id) ON CONFLICT DO NOTHING; + END IF; + END IF; +END; +$$ LANGUAGE plpgsql; + +-- Create trigger to auto-assign tags when document is processed +CREATE OR REPLACE FUNCTION trigger_auto_assign_tags() +RETURNS TRIGGER AS $$ +BEGIN + IF NEW.processing_status = 'completed' AND OLD.processing_status != 'completed' THEN + PERFORM auto_assign_tags(NEW.id); + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER auto_assign_tags_trigger + AFTER UPDATE ON documents + FOR EACH ROW + EXECUTE FUNCTION trigger_auto_assign_tags(); + +-- Final cleanup and optimization +VACUUM ANALYZE; + +-- Print completion message +DO $$ +BEGIN + RAISE NOTICE 'Database initialization completed successfully!'; + RAISE NOTICE 'Default admin user: admin@example.com / admin123'; + RAISE NOTICE 'Default roles, compliance frameworks, and tags have been created.'; +END $$; diff --git a/scripts/monitoring-setup.sh b/scripts/monitoring-setup.sh new file mode 100644 index 0000000..ab16593 --- /dev/null +++ b/scripts/monitoring-setup.sh @@ -0,0 +1,793 @@ +#!/bin/bash + +# AI Document Agent Monitoring Setup Script +# This script sets up comprehensive monitoring for the AI Document Agent + +set -e # Exit on any error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Function to print colored output +print_status() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +print_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Configuration +MONITORING_DIR="./monitoring" +GRAFANA_ADMIN_USER="admin" +GRAFANA_ADMIN_PASSWORD="admin" + +# Function to check if command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Function to check if Docker is running +check_docker() { + if ! command_exists docker; then + print_error "Docker is not installed. Please install Docker first." + exit 1 + fi + + if ! docker info >/dev/null 2>&1; then + print_error "Docker is not running. Please start Docker first." + exit 1 + fi + + print_success "Docker is available and running" +} + +# Function to create monitoring directories +create_monitoring_directories() { + print_status "Creating monitoring directories..." + + mkdir -p "$MONITORING_DIR/grafana/dashboards" + mkdir -p "$MONITORING_DIR/grafana/datasources" + mkdir -p "$MONITORING_DIR/grafana/provisioning/dashboards" + mkdir -p "$MONITORING_DIR/grafana/provisioning/datasources" + mkdir -p "$MONITORING_DIR/prometheus/rules" + mkdir -p "$MONITORING_DIR/prometheus/alerts" + mkdir -p "$MONITORING_DIR/alertmanager" + mkdir -p "$MONITORING_DIR/jaeger" + mkdir -p "$MONITORING_DIR/elasticsearch" + mkdir -p "$MONITORING_DIR/kibana" + mkdir -p "$MONITORING_DIR/filebeat" + + print_success "Monitoring directories created" +} + +# Function to setup Prometheus configuration +setup_prometheus() { + print_status "Setting up Prometheus configuration..." + + # Create Prometheus configuration + cat > "$MONITORING_DIR/prometheus/prometheus.yml" << 'EOF' +global: + scrape_interval: 15s + evaluation_interval: 15s + +rule_files: + - "rules/*.yml" + +alerting: + alertmanagers: + - static_configs: + - targets: + - alertmanager:9093 + +scrape_configs: + # Prometheus itself + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + # AI Document Agent Backend + - job_name: 'ai-document-agent-backend' + static_configs: + - targets: ['backend:8000'] + metrics_path: '/metrics' + scrape_interval: 10s + + # AI Document Agent Frontend + - job_name: 'ai-document-agent-frontend' + static_configs: + - targets: ['frontend:3000'] + metrics_path: '/metrics' + scrape_interval: 10s + + # PostgreSQL + - job_name: 'postgresql' + static_configs: + - targets: ['postgres:5432'] + scrape_interval: 30s + + # Redis + - job_name: 'redis' + static_configs: + - targets: ['redis:6379'] + scrape_interval: 30s + + # ChromaDB + - job_name: 'chromadb' + static_configs: + - targets: ['chromadb:8000'] + metrics_path: '/metrics' + scrape_interval: 30s + + # Node Exporter (system metrics) + - job_name: 'node-exporter' + static_configs: + - targets: ['node-exporter:9100'] + scrape_interval: 15s + + # cAdvisor (container metrics) + - job_name: 'cadvisor' + static_configs: + - targets: ['cadvisor:8080'] + scrape_interval: 15s + + # Jaeger (tracing) + - job_name: 'jaeger' + static_configs: + - targets: ['jaeger:16686'] + scrape_interval: 30s + + # Elasticsearch + - job_name: 'elasticsearch' + static_configs: + - targets: ['elasticsearch:9200'] + metrics_path: '/_prometheus/metrics' + scrape_interval: 30s +EOF + + # Create alert rules + cat > "$MONITORING_DIR/prometheus/rules/ai-document-agent.yml" << 'EOF' +groups: + - name: ai-document-agent + rules: + # High CPU usage + - alert: HighCPUUsage + expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "High CPU usage on {{ $labels.instance }}" + description: "CPU usage is above 80% for more than 5 minutes" + + # High memory usage + - alert: HighMemoryUsage + expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 85 + for: 5m + labels: + severity: warning + annotations: + summary: "High memory usage on {{ $labels.instance }}" + description: "Memory usage is above 85% for more than 5 minutes" + + # High disk usage + - alert: HighDiskUsage + expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes * 100 > 90 + for: 5m + labels: + severity: warning + annotations: + summary: "High disk usage on {{ $labels.instance }}" + description: "Disk usage is above 90% for more than 5 minutes" + + # Backend service down + - alert: BackendServiceDown + expr: up{job="ai-document-agent-backend"} == 0 + for: 1m + labels: + severity: critical + annotations: + summary: "Backend service is down" + description: "The AI Document Agent backend service is not responding" + + # Frontend service down + - alert: FrontendServiceDown + expr: up{job="ai-document-agent-frontend"} == 0 + for: 1m + labels: + severity: critical + annotations: + summary: "Frontend service is down" + description: "The AI Document Agent frontend service is not responding" + + # High error rate + - alert: HighErrorRate + expr: rate(http_requests_total{status_code=~"5.."}[5m]) / rate(http_requests_total[5m]) * 100 > 5 + for: 5m + labels: + severity: warning + annotations: + summary: "High error rate detected" + description: "Error rate is above 5% for more than 5 minutes" + + # Slow response time + - alert: SlowResponseTime + expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2 + for: 5m + labels: + severity: warning + annotations: + summary: "Slow response time detected" + description: "95th percentile response time is above 2 seconds" + + # Agent execution failures + - alert: AgentExecutionFailures + expr: rate(agent_execution_failures_total[5m]) > 0.1 + for: 5m + labels: + severity: warning + annotations: + summary: "High agent execution failure rate" + description: "Agent execution failure rate is above 0.1 per second" + + # Database connection issues + - alert: DatabaseConnectionIssues + expr: up{job="postgresql"} == 0 + for: 1m + labels: + severity: critical + annotations: + summary: "Database connection issues" + description: "Cannot connect to PostgreSQL database" + + # Redis connection issues + - alert: RedisConnectionIssues + expr: up{job="redis"} == 0 + for: 1m + labels: + severity: critical + annotations: + summary: "Redis connection issues" + description: "Cannot connect to Redis cache" +EOF + + print_success "Prometheus configuration created" +} + +# Function to setup Alertmanager configuration +setup_alertmanager() { + print_status "Setting up Alertmanager configuration..." + + cat > "$MONITORING_DIR/alertmanager/alertmanager.yml" << 'EOF' +global: + resolve_timeout: 5m + slack_api_url: 'https://hooks.slack.com/services/YOUR_SLACK_WEBHOOK' + +route: + group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'web.hook' + routes: + - match: + severity: critical + receiver: 'slack.critical' + continue: true + - match: + severity: warning + receiver: 'slack.warning' + +receivers: + - name: 'web.hook' + webhook_configs: + - url: 'http://127.0.0.1:5001/' + + - name: 'slack.critical' + slack_configs: + - channel: '#alerts-critical' + title: '{{ template "slack.title" . }}' + text: '{{ template "slack.text" . }}' + send_resolved: true + + - name: 'slack.warning' + slack_configs: + - channel: '#alerts-warning' + title: '{{ template "slack.title" . }}' + text: '{{ template "slack.text" . }}' + send_resolved: true + +inhibit_rules: + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['alertname', 'dev', 'instance'] +EOF + + print_success "Alertmanager configuration created" +} + +# Function to setup Grafana datasources +setup_grafana_datasources() { + print_status "Setting up Grafana datasources..." + + cat > "$MONITORING_DIR/grafana/provisioning/datasources/datasources.yml" << 'EOF' +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: true + jsonData: + timeInterval: "5s" + queryTimeout: "60s" + httpMethod: "POST" + secureJsonData: {} + + - name: PostgreSQL + type: postgres + access: proxy + url: postgres:5432 + database: ai_document_agent + user: postgres + secureJsonData: + password: "your_password_here" + jsonData: + sslmode: "disable" + maxOpenConns: 100 + maxIdleConns: 100 + connMaxLifetime: 14400 + + - name: Elasticsearch + type: elasticsearch + access: proxy + url: http://elasticsearch:9200 + database: "ai-document-agent-logs" + jsonData: + timeField: "@timestamp" + esVersion: 7.0.0 + maxConcurrentShardRequests: 5 + logMessageField: message + logLevelField: level + + - name: Jaeger + type: jaeger + access: proxy + url: http://jaeger:16686 + jsonData: + nodeGraph: + enabled: true +EOF + + print_success "Grafana datasources configuration created" +} + +# Function to setup Grafana dashboards provisioning +setup_grafana_dashboards() { + print_status "Setting up Grafana dashboards provisioning..." + + cat > "$MONITORING_DIR/grafana/provisioning/dashboards/dashboards.yml" << 'EOF' +apiVersion: 1 + +providers: + - name: 'AI Document Agent' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards +EOF + + print_success "Grafana dashboards provisioning created" +} + +# Function to setup Elasticsearch configuration +setup_elasticsearch() { + print_status "Setting up Elasticsearch configuration..." + + cat > "$MONITORING_DIR/elasticsearch/elasticsearch.yml" << 'EOF' +cluster.name: ai-document-agent +node.name: node-1 +network.host: 0.0.0.0 +http.port: 9200 +discovery.type: single-node +xpack.security.enabled: false +xpack.monitoring.enabled: true +xpack.monitoring.collection.enabled: true + +# Memory settings +bootstrap.memory_lock: true +indices.memory.index_buffer_size: 30% + +# Logging +logger.level: INFO +EOF + + print_success "Elasticsearch configuration created" +} + +# Function to setup Filebeat configuration +setup_filebeat() { + print_status "Setting up Filebeat configuration..." + + cat > "$MONITORING_DIR/filebeat/filebeat.yml" << 'EOF' +filebeat.inputs: + - type: log + enabled: true + paths: + - /var/log/ai-document-agent/*.log + fields: + service: ai-document-agent + fields_under_root: true + multiline.pattern: '^\[' + multiline.negate: true + multiline.match: after + + - type: log + enabled: true + paths: + - /var/log/audit/*.log + fields: + service: audit + fields_under_root: true + +processors: + - add_host_metadata: + when.not.contains.tags: forwarded + - add_cloud_metadata: ~ + - add_docker_metadata: ~ + - add_kubernetes_metadata: ~ + +output.elasticsearch: + hosts: ["elasticsearch:9200"] + indices: + - index: "filebeat-%{[agent.version]}-%{+yyyy.MM.dd}" + +setup.kibana: + host: "kibana:5601" + +setup.dashboards.enabled: true +setup.template.enabled: true +setup.template.name: "filebeat" +setup.template.pattern: "filebeat-*" +setup.template.overwrite: true +EOF + + print_success "Filebeat configuration created" +} + +# Function to setup Jaeger configuration +setup_jaeger() { + print_status "Setting up Jaeger configuration..." + + cat > "$MONITORING_DIR/jaeger/jaeger.yml" << 'EOF' +sampling: + default_strategy: + type: probabilistic + param: 0.1 + +storage: + type: elasticsearch + options: + es: + server_urls: http://elasticsearch:9200 + index_prefix: jaeger + username: "" + password: "" + +ingester: + kafka: + consumer: + topic: jaeger-spans + brokers: kafka:9092 + +agent: + http_server: + host_port: ":14268" +EOF + + print_success "Jaeger configuration created" +} + +# Function to create monitoring Docker Compose +create_monitoring_compose() { + print_status "Creating monitoring Docker Compose file..." + + cat > "$MONITORING_DIR/docker-compose.monitoring.yml" << 'EOF' +version: '3.8' + +services: + # Prometheus + prometheus: + image: prom/prometheus:latest + container_name: ai-doc-bot-prometheus + ports: + - "9090:9090" + volumes: + - ./prometheus:/etc/prometheus + - prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--storage.tsdb.retention.time=200h' + - '--web.enable-lifecycle' + restart: unless-stopped + networks: + - monitoring + + # Alertmanager + alertmanager: + image: prom/alertmanager:latest + container_name: ai-doc-bot-alertmanager + ports: + - "9093:9093" + volumes: + - ./alertmanager:/etc/alertmanager + command: + - '--config.file=/etc/alertmanager/alertmanager.yml' + - '--storage.path=/alertmanager' + restart: unless-stopped + networks: + - monitoring + + # Grafana + grafana: + image: grafana/grafana:latest + container_name: ai-doc-bot-grafana + ports: + - "3001:3000" + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning + - ./grafana/dashboards:/etc/grafana/provisioning/dashboards + - grafana_data:/var/lib/grafana + restart: unless-stopped + networks: + - monitoring + + # Node Exporter + node-exporter: + image: prom/node-exporter:latest + container_name: ai-doc-bot-node-exporter + ports: + - "9100:9100" + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' + restart: unless-stopped + networks: + - monitoring + + # cAdvisor + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + container_name: ai-doc-bot-cadvisor + ports: + - "8080:8080" + volumes: + - /:/rootfs:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + - /dev/disk/:/dev/disk:ro + restart: unless-stopped + networks: + - monitoring + + # Elasticsearch + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:7.17.0 + container_name: ai-doc-bot-elasticsearch + environment: + - discovery.type=single-node + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + ports: + - "9200:9200" + volumes: + - ./elasticsearch/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml + - elasticsearch_data:/usr/share/elasticsearch/data + restart: unless-stopped + networks: + - monitoring + + # Kibana + kibana: + image: docker.elastic.co/kibana/kibana:7.17.0 + container_name: ai-doc-bot-kibana + environment: + - ELASTICSEARCH_HOSTS=http://elasticsearch:9200 + ports: + - "5601:5601" + volumes: + - kibana_data:/usr/share/kibana/data + restart: unless-stopped + networks: + - monitoring + depends_on: + - elasticsearch + + # Filebeat + filebeat: + image: docker.elastic.co/beats/filebeat:7.17.0 + container_name: ai-doc-bot-filebeat + user: root + volumes: + - ./filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/log/ai-document-agent:/var/log/ai-document-agent:ro + - /var/log/audit:/var/log/audit:ro + restart: unless-stopped + networks: + - monitoring + depends_on: + - elasticsearch + + # Jaeger + jaeger: + image: jaegertracing/all-in-one:latest + container_name: ai-doc-bot-jaeger + environment: + - COLLECTOR_OTLP_ENABLED=true + ports: + - "16686:16686" + - "14268:14268" + - "14250:14250" + restart: unless-stopped + networks: + - monitoring + +volumes: + prometheus_data: + grafana_data: + elasticsearch_data: + kibana_data: + +networks: + monitoring: + driver: bridge +EOF + + print_success "Monitoring Docker Compose file created" +} + +# Function to start monitoring services +start_monitoring_services() { + print_status "Starting monitoring services..." + + cd "$MONITORING_DIR" + docker-compose -f docker-compose.monitoring.yml up -d + + print_success "Monitoring services started" + print_status "Waiting for services to be ready..." + sleep 30 + + print_success "Monitoring services are ready!" +} + +# Function to show monitoring status +show_monitoring_status() { + print_status "Monitoring Service Status:" + cd "$MONITORING_DIR" + docker-compose -f docker-compose.monitoring.yml ps + + echo "" + print_status "Monitoring URLs:" + echo "Grafana: http://localhost:3001 (admin/admin)" + echo "Prometheus: http://localhost:9090" + echo "Alertmanager: http://localhost:9093" + echo "Kibana: http://localhost:5601" + echo "Jaeger: http://localhost:16686" + echo "Elasticsearch: http://localhost:9200" + echo "cAdvisor: http://localhost:8080" + echo "Node Exporter: http://localhost:9100" +} + +# Function to stop monitoring services +stop_monitoring_services() { + print_status "Stopping monitoring services..." + + cd "$MONITORING_DIR" + docker-compose -f docker-compose.monitoring.yml down + + print_success "Monitoring services stopped" +} + +# Function to show help +show_help() { + echo "AI Document Agent Monitoring Setup Script" + echo "" + echo "Usage: $0 [OPTION]" + echo "" + echo "Options:" + echo " setup - Complete monitoring setup (default)" + echo " start - Start monitoring services only" + echo " stop - Stop monitoring services" + echo " restart - Restart monitoring services" + echo " status - Show monitoring service status" + echo " help - Show this help message" + echo "" + echo "Examples:" + echo " $0 setup # Complete setup" + echo " $0 start # Start services" + echo " $0 status # Show status" +} + +# Main script logic +main() { + case "${1:-setup}" in + "setup") + print_status "Starting AI Document Agent monitoring setup..." + + check_docker + create_monitoring_directories + setup_prometheus + setup_alertmanager + setup_grafana_datasources + setup_grafana_dashboards + setup_elasticsearch + setup_filebeat + setup_jaeger + create_monitoring_compose + start_monitoring_services + show_monitoring_status + + print_success "Monitoring setup completed successfully!" + print_status "You can now access Grafana at http://localhost:3001" + ;; + "start") + start_monitoring_services + show_monitoring_status + ;; + "stop") + stop_monitoring_services + ;; + "restart") + stop_monitoring_services + start_monitoring_services + show_monitoring_status + ;; + "status") + show_monitoring_status + ;; + "help"|"-h"|"--help") + show_help + ;; + *) + print_error "Unknown option: $1" + show_help + exit 1 + ;; + esac +} + +# Run main function with all arguments +main "$@"