diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index afcbb62..cc7f5bf 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -24,9 +24,9 @@ "8501": { "label": "WhisperForge App", "onAutoForward": "notify", - "server": "streamlit run app_simple.py --server.enableCORS false --server.enableXsrfProtection false" + "server": "streamlit run app_simple.py --server.enableCORS false --server.enableXsrfProtection true" } }, "postCreateCommand": "pip install -r requirements.txt", "remoteUser": "vscode" -} \ No newline at end of file +} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..4f11527 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,32 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + lint-and-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements-dev.txt + + - name: Lint + run: ruff check . + + - name: Format check + run: ruff format --check . + + - name: Run tests + run: pytest -m "not ai and not supabase and not slow" --tb=short diff --git a/.gitignore b/.gitignore index 4d11890..0a916c2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ # Environment variables .env -.env.local -.env.production +.env.* +!env.example # Python __pycache__/ @@ -27,6 +27,7 @@ wheels/ # Virtual environments venv/ +.venv/ env/ ENV/ @@ -46,3 +47,25 @@ logs/ # Streamlit .streamlit/secrets.toml + +# Testing / Coverage +.pytest_cache/ +htmlcov/ +.coverage +.coverage.* + +# Type checking +.mypy_cache/ +.ruff_cache/ + +# Local databases +*.sqlite3 +*.db + +# TLS / Certificates +*.pem +*.key +*.crt + +# Credentials config (never commit secrets) +credentials.conf diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..022f5c8 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,14 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.7 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format diff --git a/.streamlit/config.toml b/.streamlit/config.toml index 7131b7c..d2adaee 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -1,12 +1,12 @@ [server] port = 8501 -address = "0.0.0.0" +address = "127.0.0.1" headless = true enableCORS = false -enableXsrfProtection = false +enableXsrfProtection = true [browser] gatherUsageStats = false [theme] -base = "light" \ No newline at end of file +base = "dark" diff --git a/CHANGELOG.md b/CHANGELOG.md index ea2f1ba..3fd9657 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ ### **โœ‚๏ธ Features REMOVED (Intentionally)** - **Research Enrichment**: Removed entity extraction and research link generation -- **Editor System**: Removed AI editor critique and revision loops +- **Editor System**: Removed AI editor critique and revision loops - **Image Prompts**: Removed AI image generation prompt creation - **Multiple AI Providers**: Removed Anthropic/Claude and Groq support - **Complex Settings**: Removed feature toggles and provider selection @@ -16,7 +16,7 @@ - **Audio Upload**: Enhanced large file processing (25MB-2GB) - **Transcription**: OpenAI Whisper speech-to-text - **Wisdom Extraction**: Key insights and takeaways -- **Outline Creation**: Structured content organization +- **Outline Creation**: Structured content organization - **Article Generation**: Complete written content - **Social Media**: 5 platform-optimized posts - **Notion Publishing**: Auto-publish with beautiful formatting @@ -222,7 +222,7 @@ WhisperForge is now a **focused, reliable tool** that does one thing excellently ### ๐Ÿ”ง **ENHANCED 8-STEP PIPELINE** 1. ๐ŸŽ™๏ธ **Transcription** - Speech-to-text conversion -2. ๐Ÿ’ก **Wisdom Extraction** - Key insights and takeaways +2. ๐Ÿ’ก **Wisdom Extraction** - Key insights and takeaways 3. ๐Ÿ” **Research Enrichment** - Supporting links & context โญ **RESTORED** 4. ๐Ÿ“‹ **Outline Creation** - Structured content organization 5. ๐Ÿ“ **Article Generation** - Complete written content @@ -261,7 +261,7 @@ WhisperForge is now a **focused, reliable tool** that does one thing excellently ### ๐Ÿ”ง **Enhanced Pipeline (6 Steps)** 1. ๐ŸŽ™๏ธ **Transcription** - Speech-to-text conversion -2. ๐Ÿ’ก **Wisdom Extraction** - Key insights and takeaways +2. ๐Ÿ’ก **Wisdom Extraction** - Key insights and takeaways 3. ๐Ÿ“‹ **Outline Creation** - Structured content organization 4. ๐Ÿ“ **Article Generation** - Complete written content 5. ๐Ÿ“ฑ **Social Media** - Platform-optimized posts @@ -293,13 +293,13 @@ WhisperForge is now a **focused, reliable tool** that does one thing excellently - **Database Investigation**: Found all 27 content items stored under correct user - **Field Mapping**: Updated display to match actual database schema - `transcript` (not `transcription`) - - `wisdom` (not `wisdom_extraction`) + - `wisdom` (not `wisdom_extraction`) - `outline` (not `outline_creation`) - `article` (not `article_creation`) - `social_content` (not `social_media`) ### ๐Ÿš€ **TRANSCRIPTION PIPELINE RESTORED** -- **Circular Imports**: Eliminated blocking dependencies +- **Circular Imports**: Eliminated blocking dependencies - **Pipeline Flow**: Fixed streaming results display - **Database Storage**: Corrected field names for new content - **Session State**: Simplified initialization prevents conflicts @@ -392,7 +392,7 @@ WhisperForge is now a **focused, reliable tool** that does one thing excellently --- -## [2.0.0] - 2025-06-08 ๐ŸŒŒ **Aurora UI Transformation** +## [2.0.0] - 2025-06-08 ๐ŸŒŒ **Aurora UI Transformation** ### ๐ŸŽจ **Major UI Redesign** - **Aurora Bioluminescent Theme**: Complete visual transformation with cyan/teal color scheme @@ -505,4 +505,4 @@ WhisperForge is now a **focused, reliable tool** that does one thing excellently --- -**For detailed technical information, see [README.md](README.md)** \ No newline at end of file +**For detailed technical information, see [README.md](README.md)** diff --git a/CLEANUP_SUCCESS_SUMMARY.md b/CLEANUP_SUCCESS_SUMMARY.md deleted file mode 100644 index 782922b..0000000 --- a/CLEANUP_SUCCESS_SUMMARY.md +++ /dev/null @@ -1,182 +0,0 @@ -# ๐ŸŽ‰ WhisperForge Cleanup & Fix SUCCESS! - -## โœ… **MISSION ACCOMPLISHED** - -Your WhisperForge app is now **CLEAN, WORKING, and READY FOR PRODUCTION!** - -**App Status**: โœ… **RUNNING** on http://localhost:8501 - ---- - -## ๐Ÿงน **What We Cleaned Up** - -### **Archived Bloat Modules** (126KB removed) -``` -โœ… Moved to archived_old_version/bloat_modules/: -- monitoring.py (11KB) - Over-engineered monitoring -- streamlit_monitoring.py (8KB) - More monitoring bloat -- metrics_exporter.py (11KB) - Prometheus metrics -- health_check.py (18KB) - Complex health checking -- session_manager.py (18KB) - Over-complex sessions -- visible_thinking.py (16KB) - AI thinking bubbles -- research_enrichment.py (12KB) - Research links -- ui_components.py (14KB) - Extra UI components -- integrations.py (14KB) - Third-party integrations -- preferences.py (4KB) - User preferences -``` - -### **Archived Documentation Bloat** (7 files) -``` -โœ… Moved to archived_old_version/old_docs/: -- WHISPERFORGE_AUDIT_2025.md -- CLEAN_SETUP.md -- DEVELOPMENT_GUIDE.md -- PRODUCTION_MONITORING_IMPLEMENTATION.md -- SESSION_REFACTOR_IMPLEMENTATION.md -- SPRINT_0.3_COMPLETION_REPORT.md -- WORK_TESTING_CHECKLIST.md -``` - -### **Fixed Broken Test Files** -``` -โœ… Moved to archived_old_version/broken_tests/: -- All test_*.py files that were causing confusion -``` - ---- - -## ๐Ÿ”ง **What We Fixed** - -### **1. Import Issues** โœ… -- Fixed all broken imports in `app.py` -- Added simple replacements for archived functions -- Replaced complex session manager with simple Streamlit session state - -### **2. API Keys** โœ… -- Added your real OpenAI API key to `.env` -- App can now perform transcription and content generation - -### **3. Core Architecture** โœ… -- Streamlined to 11 essential modules (162KB) -- All core imports working perfectly -- Supabase connection verified - ---- - -## ๐ŸŽฏ **Current Clean Architecture** - -### **Essential Core Modules** (11 files, ~170KB) -``` -core/ -โ”œโ”€โ”€ supabase_integration.py # Database + OAuth โœ… -โ”œโ”€โ”€ content_generation.py # AI transcription & generation โœ… -โ”œโ”€โ”€ streaming_pipeline.py # Your streaming workflow โœ… -โ”œโ”€โ”€ streaming_results.py # Real-time UI updates โœ… -โ”œโ”€โ”€ auth_wrapper.py # Supabase auth (fixed) โœ… -โ”œโ”€โ”€ styling.py # Aurora theme โœ… -โ”œโ”€โ”€ file_upload.py # Audio uploads โœ… -โ”œโ”€โ”€ notifications.py # User messages โœ… -โ”œโ”€โ”€ utils.py # Basic utilities โœ… -โ”œโ”€โ”€ logging_config.py # Structured logging โœ… -โ””โ”€โ”€ config.py # Configuration โœ… -``` - -### **Clean Documentation** (3 files) -``` -โ”œโ”€โ”€ README.md # Main documentation -โ”œโ”€โ”€ CHANGELOG.md # Version history -โ””โ”€โ”€ ESSENTIAL_MODULES_ONLY.md # Architecture guide -``` - ---- - -## ๐Ÿš€ **Your Core Features - ALL WORKING** - -### โœ… **OAuth via Supabase** -- Simple session management with Streamlit session state -- User registration and login working -- Database integration verified - -### โœ… **Transcription & Pipeline Streaming** -- OpenAI Whisper integration ready -- Real-time streaming pipeline implemented -- Aurora UI for beautiful progress display - -### โœ… **Save Content to Supabase** -- Database storage functions working -- Content history tracking ready -- User-specific content isolation - -### โœ… **Display on History Page** -- Content history page implemented -- Aurora-styled content cards -- Copy-to-clipboard functionality - -### โœ… **Custom Prompts & Knowledge Base** -- Prompt customization system ready -- Knowledge base file upload working -- User-specific storage in database - ---- - -## ๐ŸŽฏ **Next Steps (Ready for Production)** - -### **1. Test Core Functionality** (15 minutes) -```bash -# App is already running on http://localhost:8501 -# Test these features: -1. โœ… OAuth login via Supabase -2. โœ… Upload audio file -3. โœ… Watch transcription & content generation -4. โœ… Check content appears in history -5. โœ… Customize prompts in settings -6. โœ… Upload knowledge base files -``` - -### **2. Deploy to Render.com** (15 minutes) -```bash -# Your app is now ready for deployment: -1. โœ… All dependencies in requirements.txt -2. โœ… Environment variables configured -3. โœ… No broken imports or missing modules -4. โœ… Database connection working -5. โœ… API keys configured -``` - -### **3. Optional Enhancements** -- Add Anthropic API key for Claude support -- Re-enable archived features if needed later -- Add more AI providers (Groq, etc.) - ---- - -## ๐Ÿ“Š **Before vs After** - -### **Before Cleanup** -- โŒ 23 core modules (300KB+ of complexity) -- โŒ 10+ markdown files cluttering root -- โŒ Broken imports and hanging processes -- โŒ Missing API keys -- โŒ Confusing test files everywhere -- โŒ Over-engineered monitoring systems - -### **After Cleanup** -- โœ… 11 essential modules (170KB focused code) -- โœ… 3 clean documentation files -- โœ… All imports working perfectly -- โœ… Real API keys configured -- โœ… Clean file organization -- โœ… Simple, reliable architecture - ---- - -## ๐ŸŽ‰ **RESULT** - -**WhisperForge is now a clean, focused, production-ready AI content generation platform!** - -- **Codebase**: 85% smaller and 100% more maintainable -- **Functionality**: All core features working perfectly -- **Architecture**: Simple, reliable, and scalable -- **Deployment**: Ready for Render.com production - -**Time to working app: ACHIEVED! ๐Ÿš€** \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 15d88bc..db4ec87 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,17 +5,13 @@ Thank you for considering contributing to WhisperForge! ## Getting Started 1. Fork the repository and clone your fork. -2. Create a virtual environment: +2. Set up the development environment: ```bash - python -m venv venv - source venv/bin/activate + python main.py --setup --dev ``` -3. Install dependencies required for development and testing: - ```bash - pip install -r requirements.txt - ``` - You can also run `scripts/setup_test_env.sh` which performs these steps for you. -4. Run the test suite to ensure everything works: + This creates a `.venv`, installs all dependencies (including dev/test), and + configures pre-commit hooks. +3. Run the test suite to ensure everything works: ```bash pytest ``` diff --git a/ESSENTIAL_MODULES_ONLY.md b/ESSENTIAL_MODULES_ONLY.md deleted file mode 100644 index 6543618..0000000 --- a/ESSENTIAL_MODULES_ONLY.md +++ /dev/null @@ -1,152 +0,0 @@ -# ๐ŸŽฏ WhisperForge ESSENTIAL MODULES ONLY - -## Your Core Requirements: -1. **OAuth via Supabase** โœ… -2. **Transcription & Pipeline Streaming** โœ… -3. **Save content to Supabase** โœ… -4. **Display on user history page** โœ… -5. **Customize prompts & knowledge base** โœ… - ---- - -## ๐Ÿ”ฅ **ESSENTIAL CORE MODULES** (Keep These): - -### **Tier 1: Absolutely Critical** -``` -โœ… supabase_integration.py (16KB) - Database & OAuth -โœ… content_generation.py (18KB) - Transcription & AI generation -โœ… streaming_pipeline.py (20KB) - Your streaming pipeline -โœ… auth_wrapper.py (13KB) - Supabase OAuth integration -โœ… styling.py (18KB) - Aurora UI (you love this!) -โœ… utils.py (6KB) - Basic utilities & prompts -``` - -### **Tier 2: Important for UX** -``` -โœ… streaming_results.py (34KB) - Real-time content display -โœ… file_upload.py (25KB) - Audio file handling -โœ… notifications.py (12KB) - User feedback messages -``` - -**Total Essential: 9 modules, ~162KB** - ---- - -## ๐Ÿ—‘๏ธ **PROBABLY UNNECESSARY** (Archive These): - -### **Over-Engineering & Monitoring** -``` -โŒ monitoring.py (11KB) - Complex monitoring system -โŒ streamlit_monitoring.py (8KB) - More monitoring -โŒ metrics_exporter.py (11KB) - Prometheus metrics -โŒ health_check.py (18KB) - Health checking system -โŒ session_manager.py (18KB) - Complex session management -``` - -### **Nice-to-Have Features** -``` -โŒ visible_thinking.py (16KB) - AI thinking bubbles -โŒ research_enrichment.py (12KB) - Research links -โŒ ui_components.py (14KB) - Extra UI components -โŒ integrations.py (14KB) - Third-party integrations -โŒ preferences.py (4KB) - User preferences -``` - -### **Logging & Config** -``` -โš ๏ธ logging_config.py (8KB) - Keep if you want structured logging -โš ๏ธ config.py (4KB) - Keep for configuration management -``` - -**Bloat to Remove: ~126KB of unnecessary complexity** - ---- - -## ๐Ÿ“„ **MARKDOWN FILE CLEANUP** - -### **Keep These:** -``` -โœ… README.md - Main documentation -โœ… CHANGELOG.md - Version history -โœ… ESSENTIAL_MODULES_ONLY.md - This file -``` - -### **Archive These:** -``` -โŒ WHISPERFORGE_AUDIT_2025.md -โŒ CLEAN_SETUP.md -โŒ DEVELOPMENT_GUIDE.md -โŒ PRODUCTION_MONITORING_IMPLEMENTATION.md -โŒ SESSION_REFACTOR_IMPLEMENTATION.md -โŒ SPRINT_0.3_COMPLETION_REPORT.md -โŒ WORK_TESTING_CHECKLIST.md -``` - ---- - -## ๐Ÿงน **CLEANUP COMMANDS** - -### **Step 1: Archive Unnecessary Core Modules** -```bash -mkdir -p archived_old_version/bloat_modules -mv core/monitoring.py archived_old_version/bloat_modules/ -mv core/streamlit_monitoring.py archived_old_version/bloat_modules/ -mv core/metrics_exporter.py archived_old_version/bloat_modules/ -mv core/health_check.py archived_old_version/bloat_modules/ -mv core/session_manager.py archived_old_version/bloat_modules/ -mv core/visible_thinking.py archived_old_version/bloat_modules/ -mv core/research_enrichment.py archived_old_version/bloat_modules/ -mv core/ui_components.py archived_old_version/bloat_modules/ -mv core/integrations.py archived_old_version/bloat_modules/ -mv core/preferences.py archived_old_version/bloat_modules/ -``` - -### **Step 2: Archive Documentation Bloat** -```bash -mkdir -p archived_old_version/old_docs -mv WHISPERFORGE_AUDIT_2025.md archived_old_version/old_docs/ -mv CLEAN_SETUP.md archived_old_version/old_docs/ -mv DEVELOPMENT_GUIDE.md archived_old_version/old_docs/ -mv PRODUCTION_MONITORING_IMPLEMENTATION.md archived_old_version/old_docs/ -mv SESSION_REFACTOR_IMPLEMENTATION.md archived_old_version/old_docs/ -mv SPRINT_0.3_COMPLETION_REPORT.md archived_old_version/old_docs/ -mv WORK_TESTING_CHECKLIST.md archived_old_version/old_docs/ -``` - ---- - -## ๐ŸŽฏ **SIMPLIFIED ARCHITECTURE** - -After cleanup, your core will be: -``` -core/ -โ”œโ”€โ”€ supabase_integration.py # Database + OAuth -โ”œโ”€โ”€ content_generation.py # AI transcription & generation -โ”œโ”€โ”€ streaming_pipeline.py # Your streaming workflow -โ”œโ”€โ”€ streaming_results.py # Real-time UI updates -โ”œโ”€โ”€ auth_wrapper.py # Supabase auth -โ”œโ”€โ”€ styling.py # Aurora theme -โ”œโ”€โ”€ file_upload.py # Audio uploads -โ”œโ”€โ”€ notifications.py # User messages -โ”œโ”€โ”€ utils.py # Basic utilities -โ”œโ”€โ”€ logging_config.py # (Optional) Structured logging -โ””โ”€โ”€ config.py # (Optional) Configuration -``` - -**Result: ~170KB of focused, essential code instead of 300KB+ of bloat** - ---- - -## ๐Ÿš€ **NEXT STEPS** - -1. **Run cleanup commands above** -2. **Add your OpenAI API key to .env** -3. **Test core functionality:** - - OAuth login via Supabase โœ… - - Audio upload & transcription โœ… - - Content generation & streaming โœ… - - Save to database โœ… - - Display in history โœ… - - Custom prompts & knowledge base โœ… - -**Your app should work perfectly with just these 9-11 essential modules!** \ No newline at end of file diff --git a/Procfile b/Procfile index b624917..b248ce1 100644 --- a/Procfile +++ b/Procfile @@ -1 +1 @@ -web: streamlit run app_simple.py --server.port=$PORT --server.address=0.0.0.0 --server.headless=true \ No newline at end of file +web: streamlit run app_simple.py --server.port=$PORT --server.address=0.0.0.0 --server.headless=true diff --git a/README.md b/README.md index b120bea..2441e7c 100644 --- a/README.md +++ b/README.md @@ -1,337 +1,179 @@ -# WhisperForge v3.0.0 ๐ŸŒŒ +# WhisperForge v3.1.0 -**Transform audio into structured, intelligent content with AI-powered processing** +**Transform audio into structured, intelligent content with AI-powered processing.** -WhisperForge is a powerful Streamlit application that converts audio files into comprehensive content packages including transcripts, insights, articles, and social media posts. Now with **revolutionary large file processing** supporting files up to **2GB**. +WhisperForge is a Streamlit application that converts audio files into comprehensive content packages including transcripts, insights, articles, and social media posts. Supports files up to 2GB with intelligent chunking and parallel processing. -## โœจ Key Features +## Key Features -- ๐ŸŽ™๏ธ **Audio Transcription** - High-quality speech-to-text using OpenAI Whisper -- ๐Ÿ’ก **Wisdom Extraction** - AI-powered insights and key takeaways -- ๐Ÿ“‹ **Content Outline** - Structured organization and flow -- ๐Ÿ“ฐ **Article Generation** - Complete written content from audio -- ๐Ÿ“ฑ **Social Media Posts** - Platform-optimized content -- ๐Ÿ“š **Notion Integration** - Automatic publishing to Notion workspace -- ๐Ÿ“‚ **Knowledge Base** - Add custom context from your files -- ๐Ÿ“ **Custom Prompts** - Personalize AI output -- ๐Ÿš€ **Large File Processing** - Handle files up to 2GB with intelligent chunking -- ๐ŸŒŠ **Real-time Streaming** - Watch content generate step-by-step -- ๐ŸŽจ **Aurora Theme** - Beautiful bioluminescent UI design +- **Audio Transcription** - Speech-to-text using OpenAI Whisper +- **Wisdom Extraction** - AI-powered insights and key takeaways +- **Content Outline** - Structured organization and flow +- **Article Generation** - Complete written content from audio +- **Social Media Posts** - Platform-optimized content (5 posts) +- **Notion Integration** - Auto-publish with rich formatting +- **Knowledge Base** - Add custom context from your files +- **Custom Prompts** - Personalize AI output per pipeline step +- **Large File Processing** - Handle files up to 2GB with FFmpeg chunking +- **Real-time Streaming** - Watch content generate step-by-step -## ๐Ÿ—๏ธ Project Structure - -``` -whisperforge--prime/ -โ”œโ”€โ”€ app_simple.py # Main Streamlit application (v3.0.0) -โ”œโ”€โ”€ app.py # Redirect to main app -โ”œโ”€โ”€ core/ # Core functionality modules -โ”‚ โ”œโ”€โ”€ content_generation.py -โ”‚ โ”œโ”€โ”€ file_upload.py # Enhanced large file processing -โ”‚ โ”œโ”€โ”€ supabase_integration.py -โ”‚ โ””โ”€โ”€ ... -โ”œโ”€โ”€ prompts/ # Custom AI prompts -โ”œโ”€โ”€ static/ # CSS, JS, and assets -โ”œโ”€โ”€ tests/ # Test suite -โ”œโ”€โ”€ docs/ # Documentation -โ””โ”€โ”€ requirements.txt # Dependencies -``` - -## ๐Ÿš€ Quick Start +## Quick Start ### Prerequisites -- Python 3.8+ +- Python 3.11+ +- FFmpeg (required for audio processing and files >100MB) - Supabase account (for data storage) - OpenAI API key (for AI processing) -### Installation - -1. **Clone the repository** - ```bash - git clone https://github.com/your-username/whisperforge.git - cd whisperforge - ``` +#### Installing FFmpeg -2. **Set up virtual environment** - ```bash - python -m venv venv - source venv/bin/activate # On Windows: venv\Scripts\activate - ``` +| Platform | Command | +|----------|---------| +| Ubuntu/Debian | `sudo apt install ffmpeg` | +| macOS | `brew install ffmpeg` | +| Windows 11 | `winget install ffmpeg` | -3. **Install dependencies** - ```bash - pip install -r requirements.txt - ``` - -4. **Configure environment variables** - ```bash - cp env.example .env - # Edit .env with your API keys - ``` - -5. **Run the application** - ```bash - streamlit run app_simple.py - ``` +### Installation -## ๐Ÿ”ง Configuration +#### Automated setup (recommended) -Create a `.env` file with your API keys: +```bash +git clone https://github.com/your-username/whisperforge.git +cd whisperforge +python main.py +``` -```env -# Required -SUPABASE_URL=your_supabase_url -SUPABASE_ANON_KEY=your_supabase_anon_key -OPENAI_API_KEY=your_openai_api_key +`main.py` automatically creates a `.venv`, installs dependencies, and launches the app. On subsequent runs it skips setup and starts immediately. -# Optional -NOTION_API_KEY=your_notion_api_key -NOTION_DATABASE_ID=your_notion_database_id +```bash +python main.py --setup # Force re-create the venv +python main.py --setup --dev # Re-create venv with dev/test dependencies +python main.py -- --server.port 8080 # Forward args to Streamlit ``` -## ๐ŸŽฏ Usage +#### Manual setup -1. **Upload Audio** - Support for MP3, WAV, M4A, and video files up to 2GB -2. **Choose Processing Mode** - Standard (โ‰ค25MB) or Enhanced Large File (โ‰ค2GB) -3. **Watch Real-time Processing** - See content generate step-by-step -4. **Review Results** - Comprehensive content package with all outputs -5. **Auto-publish** - Optional Notion integration for seamless publishing +```bash +python -m venv .venv +source .venv/bin/activate # Windows: .venv\Scripts\activate +pip install -r requirements.txt +streamlit run app_simple.py +``` -## ๐Ÿงช Testing +### Configuration -Before running tests, make sure all dependencies are installed: +Copy the example env file and fill in your values: ```bash -pip install -r requirements.txt +cp env.example .env ``` -You can also use the helper script `scripts/setup_test_env.sh` to create a -virtual environment with the required packages. +Required variables: -Run the test suite: +```env +SUPABASE_URL=your_supabase_url +SUPABASE_ANON_KEY=your_supabase_anon_key +OPENAI_API_KEY=your_openai_api_key +``` -```bash -# Run all tests -pytest +Optional variables: -# Run specific test categories -pytest -m unit # Unit tests only -pytest -m integration # Integration tests only -pytest tests/test_basic_functionality.py -v # Specific test file +```env +NOTION_API_KEY=your_notion_integration_token +NOTION_DATABASE_ID=your_notion_database_id +SENTRY_DSN=your_sentry_dsn +ENVIRONMENT=development # or production +DEBUG=true +LOG_LEVEL=DEBUG ``` -## ๐Ÿ“š Documentation +## Content Pipeline -- [Large File Processing Guide](docs/LARGE_FILE_PROCESSING_v2.8.0.md) -- [Development Workflow](archived_docs/DEVELOPMENT_WORKFLOW.md) -- [API Documentation](docs/API.md) +1. **Transcription** - OpenAI Whisper speech-to-text +2. **Wisdom Extraction** - Key insights and takeaways +3. **Outline Creation** - Structured content organization +4. **Article Generation** - Complete written content +5. **Social Media** - 5 platform-optimized posts +6. **Notion Publishing** - Auto-publish with formatted pages -## ๐Ÿค Contributing +## Project Structure -1. Fork the repository -2. Create a feature branch (`git checkout -b feature/amazing-feature`) -3. Commit your changes (`git commit -m 'Add amazing feature'`) -4. Push to the branch (`git push origin feature/amazing-feature`) -5. Open a Pull Request +``` +whisperforge/ +โ”œโ”€โ”€ main.py # App launcher (auto-setup + run) +โ”œโ”€โ”€ setup.py # Venv creation and dependency installer +โ”œโ”€โ”€ app_simple.py # Main Streamlit application +โ”œโ”€โ”€ app.py # Redirect to main app +โ”œโ”€โ”€ core/ # Core modules +โ”‚ โ”œโ”€โ”€ content_generation.py # AI content generation +โ”‚ โ”œโ”€โ”€ streaming_pipeline.py # Step-by-step processing +โ”‚ โ”œโ”€โ”€ streaming_results.py # Real-time content display +โ”‚ โ”œโ”€โ”€ file_upload.py # Large file processing +โ”‚ โ”œโ”€โ”€ supabase_integration.py# Database operations +โ”‚ โ”œโ”€โ”€ auth_wrapper.py # Authentication +โ”‚ โ”œโ”€โ”€ visible_thinking.py # AI thinking bubbles +โ”‚ โ”œโ”€โ”€ session_manager.py # User session handling +โ”‚ โ”œโ”€โ”€ styling.py # Aurora UI components +โ”‚ โ””โ”€โ”€ ... +โ”œโ”€โ”€ prompts/ # Default and custom AI prompts +โ”œโ”€โ”€ static/ # CSS, JS, and assets +โ”œโ”€โ”€ scripts/ # Utility and audit scripts +โ”œโ”€โ”€ tests/ # Test suite +โ”œโ”€โ”€ docs/ # Documentation +โ”œโ”€โ”€ requirements.txt # Production dependencies +โ””โ”€โ”€ requirements-dev.txt # Dev/test dependencies (includes production) +``` -## ๐Ÿ“„ License +## Testing -This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. +Install dev dependencies: -## ๐Ÿ™ Acknowledgments +```bash +python main.py --setup --dev +# or manually: +pip install -r requirements-dev.txt +``` -- OpenAI for Whisper and GPT models -- Supabase for backend infrastructure -- Streamlit for the amazing web framework -- The open-source community for inspiration and tools +Run tests: ---- +```bash +pytest # All tests +pytest -m unit # Unit tests only +pytest -m integration # Integration tests +pytest tests/test_basic_functionality.py -v # Specific file +``` -**WhisperForge v3.0.0** - Transform your audio into intelligent content ๐ŸŒŒ +## Technical Stack -## ๐ŸŽฏ **Architecture Overview** +- **Frontend**: Streamlit with custom Aurora CSS +- **Backend**: Supabase (PostgreSQL) +- **AI**: OpenAI GPT-4 + Whisper +- **Auth**: Supabase Auth +- **Deployment**: Streamlit Cloud / Render.com -``` -โ”œโ”€โ”€ app_simple.py # Main Streamlit application (v3.0.0) -โ”œโ”€โ”€ app.py # Redirect to main app -โ”œโ”€โ”€ core/ -โ”‚ โ”œโ”€โ”€ streaming_pipeline.py # Step-by-step content processing -โ”‚ โ”œโ”€โ”€ streaming_results.py # Real-time content display -โ”‚ โ”œโ”€โ”€ content_generation.py # AI content generation functions -โ”‚ โ”œโ”€โ”€ supabase_integration.py # Database operations -โ”‚ โ”œโ”€โ”€ visible_thinking.py # AI thinking bubbles -โ”‚ โ”œโ”€โ”€ session_manager.py # User session handling -โ”‚ โ””โ”€โ”€ styling.py # Aurora UI components -โ””โ”€โ”€ prompts/ # Default and custom AI prompts -``` +## Security -## ๐ŸŒŠ **Core Features** +Recent hardening includes: -### **1. Real-Time Audio Processing** -- Upload audio files (MP3, WAV, M4A, FLAC, etc.) -- Automatic transcription using OpenAI Whisper -- Progressive content generation with live updates +- HTML content sanitization (XSS prevention) +- JWT validation with PyJWT (replaced python-jose) +- XSRF protection enabled +- Server bound to localhost by default +- No hardcoded credentials in source +- Input validation on file uploads -### **2. Enhanced AI Content Pipeline** -1. **Transcription** - Speech-to-text conversion -2. **Wisdom Extraction** - Key insights and takeaways -3. **Outline Creation** - Structured content organization -4. **Article Generation** - Complete written content -5. **Social Media** - Platform-optimized posts -6. **๐ŸŒŒ Notion Publishing** - Auto-publish to Notion with beautiful formatting -7. **Database Storage** - Persistent content library with Supabase +## Documentation -### **3. Modern Aurora Interface** -- Bioluminescent 2025 design system -- Real-time progress indicators -- Animated content cards -- Responsive Aurora color scheme +- [Large File Processing Guide](docs/LARGE_FILE_PROCESSING_v2.8.0.md) +- [Critical Fixes Report](docs/CRITICAL_FIXES_REPORT_v2.8.0.md) +- [Monitoring Setup](docs/monitoring.md) +- [Changelog](CHANGELOG.md) -## ๐Ÿ”ง **Technical Stack** +## Contributing -- **Frontend**: Streamlit with custom Aurora CSS -- **Backend**: Supabase (PostgreSQL) -- **AI Models**: OpenAI GPT-4 -- **Audio Processing**: OpenAI Whisper -- **Authentication**: Supabase Auth + OAuth -- **Deployment**: Streamlit Cloud ready - -## ๐Ÿš€ **Getting Started** - -1. **Clone Repository** - ```bash - git clone - cd whisperforge--prime - ``` - -2. **Install Dependencies** - ```bash - python -m venv venv - source venv/bin/activate # or `venv\Scripts\activate` on Windows - pip install -r requirements.txt - ``` - -3. **Environment Setup** - Create `.env` file or set environment variables: - ```env - # Required - Supabase Database - SUPABASE_URL=your_supabase_url - SUPABASE_ANON_KEY=your_supabase_anon_key - SUPABASE_SERVICE_ROLE_KEY=your_service_role_key # Optional for admin features - - # Required - AI Provider - OPENAI_API_KEY=your_openai_key - - # Notion Integration - Auto-Publishing - NOTION_API_KEY=your_notion_integration_token - NOTION_DATABASE_ID=your_notion_database_id - - # Optional - OAuth & Integrations - OAUTH_REDIRECT_URL=http://localhost:8501 # For OAuth flows - - # Optional - Security & Monitoring - JWT_SECRET=your_jwt_secret_key - SENTRY_DSN=your_sentry_dsn # For error tracking - - # Optional - Development - DEBUG=true - LOG_LEVEL=INFO - ENVIRONMENT=development # or 'production' - ``` - -4. **Run Application** - ```bash - ./start_app.sh # development (default) - ./start_app.sh production # production mode - ``` - -## ๐ŸŽจ **Aurora Design System** - -The WhisperForge UI uses a custom Aurora design system featuring: - -- **Bioluminescent Effects**: Glowing borders and animations -- **Gradient Backgrounds**: Dynamic color transitions -- **Glass Morphism**: Backdrop blur effects -- **Responsive Cards**: Animated content containers -- **Progress Streams**: Real-time processing indicators - -## ๐Ÿ“Š **Database Schema** - -### **Core Tables** -- `users` - User accounts and settings -- `content` - Generated content and metadata -- `prompts` - Custom AI prompts -- `knowledge_base` - User-uploaded files -- `api_keys` - Encrypted API credentials - -## ๐Ÿ” **Security Features** - -- **Encrypted Storage**: API keys and sensitive data -- **Session Management**: Secure user sessions -- **Input Validation**: File size and type restrictions -- **Rate Limiting**: API usage controls - -## ๐Ÿ›ก **Current Known Issues** - -1. **Database Content Retrieval**: 26 processed files not displaying in history (investigating field name mismatches) -2. **Real-time Streaming**: Content shows but not truly real-time like cursor chat -3. **Session Persistence**: Authentication doesn't persist across refreshes consistently -4. **Prompt Saving**: Custom prompts saving but not loading properly -5. **Thinking Bubbles**: AI thinking stream not integrating smoothly - -## ๐Ÿ”„ **Debugging Tools** - -The content history page includes debug information: -- Database connection status -- Raw record samples -- Session state inspection -- Content structure analysis - -## ๐Ÿ“ˆ **Roadmap** - -### **Immediate Fixes** -- [ ] Fix content history display issues -- [ ] Implement true real-time streaming -- [ ] Resolve session persistence -- [ ] Debug prompt saving/loading - -### **Enhancements** -- [ ] Batch audio processing -- [ ] Export to multiple formats -- [ ] Advanced AI model selection -- [ ] Team collaboration features - -## ๐Ÿ’ก **Contributing** - -This is currently a private project focused on creating the best audio-to-content transformation experience with a beautiful, modern interface. - -## ๐Ÿ“„ **License** - -MIT License - See LICENSE file for details. - ---- - -**WhisperForge** - Transforming audio into actionable insights with the beauty of Aurora. ๐ŸŒŒ - -## ๐Ÿ— **Architecture (Simplified)** - -### **Session Management** -```python -# Simple, reliable pattern -if 'authenticated' not in st.session_state: - st.session_state.authenticated = False - -@st.cache_resource -def init_supabase(): - return get_supabase_client() -``` +See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines. -### **Database Pattern** -- **Supabase Client**: Cached with `@st.cache_resource` -- **User Data**: Loaded fresh each session (not cached in session state) -- **Content Storage**: Direct to database, no complex state management +## License -### **Authentication Flow** -1. User enters credentials โ†’ Verify against Supabase -2. Set simple session state flags โ†’ No tokens or complex persistence -3. Load user preferences from database โ†’ Use `@st.cache_data` for performance \ No newline at end of file +MIT License - see [LICENSE](LICENSE) for details. diff --git a/REPO_CLEANUP_TASKS.md b/REPO_CLEANUP_TASKS.md deleted file mode 100644 index 58198dd..0000000 --- a/REPO_CLEANUP_TASKS.md +++ /dev/null @@ -1,35 +0,0 @@ -# GitHub Cleanup Recommendations - -This project still contains some leftover files and outdated branches that can be cleaned up to reduce clutter. Below are recommended tasks. - -## 1. Branch Maintenance -- Run `git fetch --all --prune` to remove stale remote-tracking branches. -- Review `git branch -r` for remote branches that have been merged or are no longer active. -- Delete obsolete local branches with `git branch -D ` once they are safely merged. -- Consider enabling branch protection rules on your main branch to avoid direct pushes. - -## 2. Remove Tracked Build Artifacts -- Several `__pycache__` directories and `.pyc` files are committed (see `git ls-files | grep __pycache__`). These should be removed and kept out of Git history. -- Delete existing tracked compiled files and commit the removal. -- Ensure `.gitignore` covers these patterns (it already includes `__pycache__/` and `*.py[cod]`). - -## 3. Delete Log Files -- Files such as `test_startup.log` and `streamlit.log` are checked in. Remove them and rely on the existing `*.log` rule in `.gitignore` to avoid future commits. - -## 4. Review Archived Directories -- The `archived_docs/` and `archived_old_version/` folders contain historical documents and code. If these are no longer needed in the repo, consider deleting or moving them to a separate archive branch to reduce repository size. - -## 5. Check Large Files -- Run `git lfs track` or `git ls-files -s | sort -n -k1` to detect unusually large files. Migrate binaries to Git LFS or remove them if not essential. - -## 6. Automated Cleanup Script -- Create a simple script to prune old branches and remove untracked caches. Example commands: - ```bash - git fetch --all --prune - for branch in $(git branch --merged | grep -v '\*' | grep -v main); do - git branch -d "$branch" - done - git clean -fdX - ``` - -Cleaning up these items will keep the repository lean and make future development smoother. diff --git a/WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md b/WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md deleted file mode 100644 index a7546a1..0000000 --- a/WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md +++ /dev/null @@ -1,185 +0,0 @@ -# WhisperForge v2.7.0 Implementation Plan & Summary - -## ๐ŸŽฏ Mission Accomplished: Complete Pipeline Enhancement - -### ๐Ÿ“‹ Original Issues Identified -1. **No Real-Time Streaming**: Content didn't appear as it was generated during pipeline execution -2. **Duplicate Sidebar**: Settings were duplicated between sidebar on transform page and settings page -3. **Ugly UI Design**: Current design didn't look truly Aurora borealis/bioluminescent as planned -4. **Rough Notion Formatting**: Notion page creation looked rough, not the beautiful formatting previously spec'd -5. **Prompts Not Applying**: When user edited and saved prompts, they weren't being applied to content generation - -### โœ… Issues Resolved in v2.7.0 - -#### 1. Real-Time Streaming Implementation -- **Added Live Content Containers**: Created expandable sections for each pipeline step -- **Immediate Content Display**: Content appears as soon as each step completes -- **Stream to UI**: All generated content (transcript, wisdom, research, outline, article, social, editor notes, Notion) streams to UI immediately -- **Visual Feedback**: Users see exactly what's happening at each step - -#### 2. Clean Transform Page -- **Removed Duplicate Sidebar**: Eliminated redundant settings from transform page -- **Focused Interface**: Clean file upload interface with status indicators -- **Settings Consolidation**: All configuration moved to dedicated Settings tab -- **Status Dashboard**: Quick connection status for OpenAI, Notion, Research, and Editor - -#### 3. Enhanced Aurora UI Design -- **True Bioluminescent Styling**: Implemented glowing effects, gradients, and animations -- **Aurora Navigation**: Beautiful header with animated scanning effects and pipeline indicators -- **Enhanced Visual Effects**: Proper Aurora theme with shimmer, pulse, and glow animations -- **Professional Polish**: Consistent styling throughout the entire application - -#### 4. Beautiful Notion Formatting -- **Rich Page Headers**: Beautiful titles with Aurora branding and timestamps -- **Callout Sections**: Wisdom summary in purple callout with lightbulb icon -- **Research Entities**: Blue callouts with research icons and bulleted link lists -- **Gem Marking**: Orange-colored gem icons for high-value research links -- **Structured Toggles**: Organized content sections with proper formatting -- **Professional Footer**: Green completion callout with pipeline summary - -#### 5. Custom Prompt System -- **Prompt Loading**: Automatic loading from `prompts/default/` directory -- **Step Mapping**: Proper mapping of prompts to pipeline steps (wisdom, outline, article, social) -- **Integration**: Custom prompts passed to generation functions -- **Article Prompt**: Created comprehensive article generation prompt -- **Live Application**: Prompts applied during content generation process - -## ๐Ÿ”ง Technical Implementation Details - -### Real-Time Streaming Architecture -```python -# Create expandable containers for each step -transcript_container = st.expander("๐ŸŽ™๏ธ Transcription", expanded=False) -wisdom_container = st.expander("๐Ÿ’ก Wisdom Extraction", expanded=False) -# ... etc for all 8 steps - -# Stream content immediately after generation -with transcript_container: - st.markdown("**โœ… Transcription Complete**") - st.text_area("Transcript", transcript, height=200, disabled=True) -``` - -### Prompt Loading System -```python -def load_custom_prompts(): - """Load custom prompts from the prompts directory""" - prompts = {} - prompt_dir = "prompts/default" - # Load all .md files as prompts - -def get_prompt_for_step(step_name: str, custom_prompts: Dict[str, str] = None): - """Get the appropriate prompt for a pipeline step""" - prompt_mapping = { - 'wisdom': 'wisdom_extraction', - 'outline': 'outline_creation', - 'social': 'social_media', - 'article': 'article_generation' - } -``` - -### Enhanced Aurora Styling -- **CSS Variables**: Proper Aurora color scheme with gradients -- **Animations**: Scanning effects, pulse animations, shimmer effects -- **Visual Hierarchy**: Consistent styling across all components -- **Responsive Design**: Works across different screen sizes - -### Notion Formatting Enhancement -- **Structured Headers**: Beautiful page titles with Aurora branding -- **Rich Callouts**: Color-coded sections with appropriate icons -- **Research Display**: Proper entity formatting with gem marking -- **Professional Metadata**: Comprehensive footer with generation details - -## ๐Ÿ“Š Current System Architecture - -### 8-Step Pipeline -1. **๐ŸŽ™๏ธ Transcription** โ†’ OpenAI Whisper โ†’ Real-time display -2. **๐Ÿ’ก Wisdom Extraction** โ†’ Custom prompt โ†’ Immediate streaming -3. **๐Ÿ” Research Enrichment** โ†’ Entity extraction โ†’ Live research display -4. **๐Ÿ“‹ Outline Creation** โ†’ Custom prompt โ†’ Structured outline streaming -5. **๐Ÿ“ Article Generation** โ†’ Custom prompt โ†’ Full article streaming -6. **๐Ÿ“ฑ Social Content** โ†’ Custom prompt โ†’ Social media content streaming -7. **๐Ÿ“ Editor Review** โ†’ AI feedback โ†’ Editor notes and revisions -8. **๐ŸŒŒ Notion Publishing** โ†’ Beautiful formatting โ†’ Auto-publish with status - -### Navigation System -- **๐ŸŽต Transform**: Clean file upload and processing with real-time streaming -- **๐Ÿ“š Content Library**: Browse and search processed content history -- **โš™๏ธ Settings**: Comprehensive API configuration and pipeline settings -- **๐Ÿง  Knowledge Base**: Domain expertise file management -- **๐Ÿ“ Prompts**: Custom prompt editing and management - -## ๐Ÿš€ Deployment Status - -### Production Environment -- **Platform**: Render.com with auto-deploy from main branch -- **Version**: v2.7.0 deployed successfully -- **Status**: All features tested and verified working -- **URL**: Production WhisperForge Aurora application - -### Local Development -- **Environment**: Python virtual environment with all dependencies -- **Testing**: App imports successfully, no errors -- **Git**: All changes committed and pushed to main branch - -## ๐ŸŽฏ Future Enhancement Roadmap - -### Phase 1: Performance Optimization (v2.8.0) -- **Streaming Optimization**: Implement WebSocket connections for even faster streaming -- **Caching System**: Cache generated content for faster re-processing -- **Background Processing**: Move heavy operations to background tasks -- **Progress Indicators**: More granular progress tracking within each step - -### Phase 2: Advanced Features (v2.9.0) -- **Multi-Language Support**: Support for non-English audio transcription -- **Batch Processing**: Process multiple audio files simultaneously -- **Export Options**: PDF, Word, and other format exports -- **Template System**: Customizable output templates - -### Phase 3: Collaboration Features (v3.0.0) -- **Team Workspaces**: Shared content libraries and settings -- **Version Control**: Track changes and revisions to generated content -- **Approval Workflows**: Editorial approval processes -- **Integration Hub**: Connect with more platforms (Google Docs, Slack, etc.) - -### Phase 4: AI Enhancement (v3.1.0) -- **Advanced Models**: Support for GPT-4 Turbo, Claude, and other models -- **Custom Training**: Fine-tune models on user's specific content -- **Quality Scoring**: Automatic quality assessment and improvement suggestions -- **Smart Routing**: Automatically choose best model for each content type - -## ๐Ÿ“ˆ Success Metrics - -### User Experience Improvements -- โœ… **Real-time Feedback**: Users see content generated immediately -- โœ… **Clean Interface**: Focused, professional design without clutter -- โœ… **Visual Appeal**: True Aurora bioluminescent theme implemented -- โœ… **Notion Integration**: Beautiful, structured pages automatically created -- โœ… **Customization**: Users can edit and apply custom prompts - -### Technical Achievements -- โœ… **Streaming Architecture**: Live content display during processing -- โœ… **Modular Design**: Clean separation of concerns and reusable components -- โœ… **Prompt System**: Flexible, extensible prompt management -- โœ… **Enhanced Styling**: Professional UI with consistent Aurora theme -- โœ… **Production Ready**: Deployed and working in production environment - -### Content Quality -- โœ… **Custom Prompts**: Tailored content generation based on user preferences -- โœ… **Rich Research**: Enhanced entity extraction with gem marking -- โœ… **Editorial Review**: AI feedback and revision capabilities -- โœ… **Structured Output**: Well-organized content across all formats -- โœ… **Professional Notion**: Beautiful, structured pages with rich formatting - -## ๐ŸŽ‰ Conclusion - -WhisperForge v2.7.0 represents a complete transformation of the user experience, addressing all core issues identified: - -1. **Real-time streaming** provides immediate feedback during processing -2. **Clean interface** eliminates confusion and focuses on core functionality -3. **Beautiful Aurora design** creates a professional, engaging experience -4. **Enhanced Notion formatting** produces publication-ready structured content -5. **Custom prompt system** allows users to tailor content generation to their needs - -The application now delivers on its promise of transforming audio into structured content with AI magic, providing users with a seamless, beautiful, and powerful content creation experience. - -**Status**: โœ… All objectives achieved, deployed to production, ready for user feedback and future enhancements. \ No newline at end of file diff --git a/app.py b/app.py index e14a27a..354d0c7 100644 --- a/app.py +++ b/app.py @@ -1,18 +1,15 @@ # WhisperForge - Main Application Redirect -# +# # This file redirects to the current main application: app_simple.py -# +# # The old OAuth version has been archived -import streamlit as st import os +import streamlit as st + # Page config -st.set_page_config( - page_title="WhisperForge - Redirect", - page_icon="๐ŸŒŒ", - layout="wide" -) +st.set_page_config(page_title="WhisperForge - Redirect", page_icon="๐ŸŒŒ", layout="wide") st.error(""" ๐Ÿ”„ **Application Redirect** @@ -31,7 +28,9 @@ **Note:** The Procfile has been updated to use app_simple.py for production deployment. """) -st.info("This redirect will be removed in a future version. Please update your bookmarks and scripts to use `app_simple.py`.") +st.info( + "This redirect will be removed in a future version. Please update your bookmarks and scripts to use `app_simple.py`." +) # Show current working directory and available files st.markdown("### Available Files:") @@ -42,4 +41,4 @@ elif "deprecated" in file or "backup" in file: st.warning(f"โš ๏ธ **{file}** (Deprecated)") else: - st.info(f"โ„น๏ธ **{file}**") \ No newline at end of file + st.info(f"โ„น๏ธ **{file}**") diff --git a/app_simple.py b/app_simple.py old mode 100644 new mode 100755 index 0e7a0d6..55ee762 --- a/app_simple.py +++ b/app_simple.py @@ -1,2372 +1,374 @@ # WhisperForge Simple - Clean, Focused Audio Content Platform -import streamlit as st import os -import tempfile import time -from datetime import datetime -from typing import Dict, Optional -# Essential imports only +import streamlit as st from dotenv import load_dotenv -load_dotenv() -# Page config first -st.set_page_config( - page_title="WhisperForge", - page_icon="๐ŸŒŒ", - layout="wide" +from core.config import Config +from core.content_display import show_results +from core.file_upload import EnhancedLargeFileProcessor +from core.pages import show_knowledge_base, show_prompts_page, show_settings_page +from core.pipeline import process_pipeline +from core.services import Services, set_services +from core.styling import ( + apply_aurora_theme, + create_aurora_header, ) - -# Core imports -from core.content_generation import transcribe_audio, generate_wisdom, generate_outline, generate_article, generate_social_content -from core.styling import apply_aurora_theme, create_aurora_header, create_aurora_progress_card, create_aurora_step_card, create_aurora_content_card, AuroraComponents from core.supabase_integration import get_supabase_client -from core.file_upload import EnhancedLargeFileProcessor -# Apply beautiful theme -apply_aurora_theme() +load_dotenv() -# === PROMPT LOADING SYSTEM === -def load_custom_prompts(): - """Load custom prompts from the prompts directory""" - prompts = {} - prompt_dir = "prompts/default" - - if os.path.exists(prompt_dir): - for filename in os.listdir(prompt_dir): - if filename.endswith('.md'): - prompt_name = filename.replace('.md', '') - try: - with open(os.path.join(prompt_dir, filename), 'r', encoding='utf-8') as f: - prompts[prompt_name] = f.read() - except Exception as e: - st.warning(f"Failed to load prompt {filename}: {e}") - - return prompts - -def load_template(template_name: str) -> Optional[str]: - """Load an article template by name""" - template_path = os.path.join('templates', f'{template_name}.md') - if os.path.exists(template_path): - return open(template_path, 'r', encoding='utf-8').read() - return None - -def get_prompt_for_step(step_name: str, custom_prompts: Dict[str, str] = None) -> Optional[str]: - """Get the appropriate prompt for a pipeline step""" - if not custom_prompts: - custom_prompts = load_custom_prompts() - - # Map step names to prompt files - prompt_mapping = { - 'wisdom': 'wisdom_extraction', - 'outline': 'outline_creation', - 'social': 'social_media', - 'article': 'article_generation' # We'll create this - } - - prompt_key = prompt_mapping.get(step_name) - if prompt_key and prompt_key in custom_prompts: - return custom_prompts[prompt_key] - - return None - -# === NOTION INTEGRATION === -def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str]: - """Create a Notion page with WhisperForge content""" - try: - from notion_client import Client - - api_key = os.getenv("NOTION_API_KEY") - database_id = os.getenv("NOTION_DATABASE_ID") - - if not api_key or not database_id: - st.warning("โš ๏ธ Notion not configured. Set NOTION_API_KEY and NOTION_DATABASE_ID to auto-publish.") - return None - - client = Client(auth=api_key) - - # Build content blocks - children = [] - - # Add beautiful header with summary - children.append({ - "type": "heading_1", - "heading_1": { - "rich_text": [ - {"type": "text", "text": {"content": "๐ŸŒŒ "}, "annotations": {"color": "blue"}}, - {"type": "text", "text": {"content": title}, "annotations": {"bold": True}} - ] - } - }) - - # Add creation info - children.append({ - "type": "paragraph", - "paragraph": { - "rich_text": [ - {"type": "text", "text": {"content": "โœจ Generated with "}}, - {"type": "text", "text": {"content": "WhisperForge Aurora"}, - "annotations": {"bold": True, "color": "blue"}}, - {"type": "text", "text": {"content": f" โ€ข {datetime.now().strftime('%B %d, %Y at %I:%M %p')}"}} - ] - } - }) - - children.append({"type": "divider", "divider": {}}) - - # Add wisdom summary callout if exists - if content_data.get('wisdom'): - children.append({ - "type": "callout", - "callout": { - "rich_text": [ - {"type": "text", "text": {"content": "Key Insights & Wisdom"}}, - {"type": "text", "text": {"content": f"\n\n{content_data['wisdom'][:1800]}"}} - ], - "color": "purple_background", - "icon": {"type": "emoji", "emoji": "๐Ÿ’ก"} - } - }) - - # Add content sections as toggles - sections = [ - ("๐Ÿ“ Transcript", content_data.get('transcript')), - ("๐Ÿ’ก Wisdom", content_data.get('wisdom')), - ("๐Ÿ” Research Links", content_data.get('research')), - ("๐Ÿ“‹ Outline", content_data.get('outline')), - ("๐Ÿ“ฐ Article", content_data.get('article')), - ("๐Ÿ“ฑ Social Content", content_data.get('social_content')) - ] - - for section_title, section_content in sections: - if section_content: - # Handle research data specially - if section_title == "๐Ÿ” Research Links" and isinstance(section_content, dict): - research_children = [] - entities = section_content.get('entities', []) - - if entities: - for entity in entities[:5]: # Limit entities - entity_name = entity.get('name', 'Unknown Entity') - why_matters = entity.get('why_matters', 'No description available') - links = entity.get('links', []) - - # Entity as beautiful callout - research_children.append({ - "type": "callout", - "callout": { - "rich_text": [ - {"type": "text", "text": {"content": entity_name}, "annotations": {"bold": True}}, - {"type": "text", "text": {"content": f"\n{why_matters}"}} - ], - "color": "blue_background", - "icon": {"type": "emoji", "emoji": "๐Ÿ”ฌ"} - } - }) - - # Links as bulleted list - if links: - for link in links[:3]: # Limit links - link_title = link.get('title', 'Link') - link_url = link.get('url', '#') - link_desc = link.get('description', '') - is_gem = link.get('is_gem', False) - - gem_icon = "๐Ÿ’Ž" if is_gem else "๐Ÿ”—" - color = "orange" if is_gem else "default" - - research_children.append({ - "type": "bulleted_list_item", - "bulleted_list_item": { - "rich_text": [ - {"type": "text", "text": {"content": f"{gem_icon} "}, "annotations": {"color": color}}, - {"type": "text", "text": {"content": link_title}, "annotations": {"bold": True}}, - {"type": "text", "text": {"content": f" - {link_desc}"}, "annotations": {"italic": True}} - ] - } - }) - else: - research_children.append({ - "type": "paragraph", - "paragraph": { - "rich_text": [{"type": "text", "text": {"content": "No research entities found."}}] - } - }) - - children.append({ - "type": "toggle", - "toggle": { - "rich_text": [{"type": "text", "text": {"content": section_title}}], - "children": research_children - } - }) - else: - # Handle regular text content - if isinstance(section_content, str): - # Chunk content for Notion's limits - chunks = [section_content[i:i+1800] for i in range(0, len(section_content), 1800)] - - children.append({ - "type": "toggle", - "toggle": { - "rich_text": [{"type": "text", "text": {"content": section_title}}], - "children": [ - { - "type": "paragraph", - "paragraph": { - "rich_text": [{"type": "text", "text": {"content": chunk}}] - } - } for chunk in chunks[:5] # Limit chunks - ] - } - }) - - # Add beautiful footer - children.extend([ - {"type": "divider", "divider": {}}, - { - "type": "callout", - "callout": { - "rich_text": [ - {"type": "text", "text": {"content": "Content Generation Complete"}, "annotations": {"bold": True}}, - {"type": "text", "text": {"content": f"\n\n๐Ÿค– AI Pipeline: 8 steps completed successfully"}}, - {"type": "text", "text": {"content": f"\nโฑ๏ธ Generated: {datetime.now().strftime('%B %d, %Y at %I:%M %p')}"}}, - {"type": "text", "text": {"content": f"\n๐ŸŒŒ Powered by WhisperForge Aurora"}} - ], - "color": "green_background", - "icon": {"type": "emoji", "emoji": "โœ…"} - } - } - ]) - - # Create the page - response = client.pages.create( - parent={"database_id": database_id}, - icon={"type": "emoji", "emoji": "๐ŸŒŒ"}, - properties={ - "Name": {"title": [{"text": {"content": title[:100]}}]} - }, - children=children[:50] # Limit total blocks - ) - - if response and 'id' in response: - page_id = response['id'] - page_url = f"https://notion.so/{page_id.replace('-', '')}" - return page_url - - return None - - except ImportError: - st.warning("โš ๏ธ Install notion-client to enable Notion publishing: pip install notion-client") - return None - except Exception as e: - st.error(f"โŒ Notion publishing failed: {str(e)}") - return None +# Page config first +st.set_page_config(page_title="WhisperForge", page_icon="\U0001f30c", layout="wide") -def generate_ai_title(transcript: str) -> str: - """Generate an AI title for the content""" - try: - from core.content_generation import generate_content - - prompt = f"""Generate a concise, descriptive title (max 60 characters) for this audio transcript: +# Apply beautiful theme +apply_aurora_theme() -{transcript[:500]}... -Title should be: -- Clear and specific -- Professional -- Capture the main topic -- No quotes or special characters +# === SIMPLE AUTHENTICATION === -Title:""" - - title = generate_content(prompt, "OpenAI", "gpt-4", {}) - return title.strip().replace('"', '').replace("'", "")[:60] - except: - return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" -# === SIMPLE AUTHENTICATION === def init_session(): - """Initialize simple session state""" - if 'authenticated' not in st.session_state: + """Initialize simple session state and wire up the DI container.""" + if "authenticated" not in st.session_state: st.session_state.authenticated = False - if 'user_id' not in st.session_state: + if "user_id" not in st.session_state: st.session_state.user_id = None - if 'user_email' not in st.session_state: + if "user_email" not in st.session_state: st.session_state.user_email = None + # Initialise the service container once per process + if "services_initialised" not in st.session_state: + set_services(Services(config=Config.from_env())) + st.session_state.services_initialised = True + + def show_login(): - """Simple test login""" + """Simple test login.""" create_aurora_header() - - st.markdown("### ๐Ÿ” Login to WhisperForge") - - # Test login button - if st.button("๐Ÿš€ Login with Test Account", type="primary", use_container_width=True): + st.markdown("### Login to WhisperForge") + + if st.button("Login with Test Account", type="primary", use_container_width=True): st.session_state.authenticated = True st.session_state.user_id = 1 st.session_state.user_email = "test@whisperforge.ai" - st.success("โœ… Logged in successfully!") + st.success("Logged in successfully!") time.sleep(1) st.rerun() - + st.markdown("---") st.markdown("**Demo Mode**: Click above to access WhisperForge") -# === CORE PROCESSING PIPELINE === - -def show_processing_pipeline(current_step=0, step_progress=0, total_progress=0, status_message="", processing_time=""): - """Display beautiful Aurora-styled processing pipeline visualization""" - - # Define the 6-step pipeline - pipeline_steps = [ - { - "icon": "๐ŸŽค", - "title": "Transcription", - "description": "Converting audio to text using Whisper AI", - "status": "pending" - }, - { - "icon": "๐Ÿ’ก", - "title": "Wisdom Extraction", - "description": "Extracting key insights and wisdom", - "status": "pending" - }, - { - "icon": "๐Ÿ“‹", - "title": "Outline Creation", - "description": "Structuring content with clear outline", - "status": "pending" - }, - { - "icon": "๐Ÿ“ฐ", - "title": "Article Generation", - "description": "Creating comprehensive article content", - "status": "pending" - }, - { - "icon": "๐Ÿ“ฑ", - "title": "Social Content", - "description": "Generating social media posts", - "status": "pending" - }, - { - "icon": "๐ŸŒŒ", - "title": "Notion Publishing", - "description": "Publishing to Notion workspace", - "status": "pending" - } + +# === NAVIGATION === + + +def create_aurora_navigation(): + """Create Aurora navigation header with 5 tabs and pipeline badges.""" + # Build pipeline badge HTML from session results + badge_data = [ + ("transcript", "\U0001f399\ufe0f", "Transcript"), + ("wisdom", "\U0001f4a1", "Wisdom"), + ("outline", "\U0001f4cb", "Outline"), + ("article", "\U0001f4f0", "Article"), + ("social_content", "\U0001f4f1", "Social"), ] - - # Update step statuses based on current progress - for i, step in enumerate(pipeline_steps): - if i < current_step: - step["status"] = "completed" - elif i == current_step: - step["status"] = "active" - else: - step["status"] = "pending" - - # Create the pipeline visualization HTML - steps_html = "" - for i, step in enumerate(pipeline_steps): - progress_width = step_progress if i == current_step else (100 if step["status"] == "completed" else 0) - - status_text = { - "pending": "Waiting", - "active": "Processing", - "completed": "Complete", - "error": "Error" - }.get(step["status"], "Waiting") - - steps_html += f""" -
-
- {step['icon']} -

{step['title']}

-

{step['description']}

-
{status_text}
-
- """ - - # Create the complete pipeline HTML - pipeline_html = f""" -
-
-

Content Transformation Pipeline

-

6-Step AI-Powered Processing

-
- -
-
- Overall Progress - {total_progress}% -
-
-
-
-
- -
- {steps_html} -
- - {f''' -
- โšก - {status_message} - {processing_time} -
- ''' if status_message else ''} -
- """ - - st.markdown(pipeline_html, unsafe_allow_html=True) - -def process_audio_pipeline(audio_file): - """Core audio to content pipeline with beautiful Aurora visualization""" - import time - from datetime import datetime - - results = {} - start_time = time.time() - - # Load custom prompts - custom_prompts = load_custom_prompts() - if custom_prompts: - st.info(f"๐Ÿ“ Using {len(custom_prompts)} custom prompts") - - # Initialize beautiful pipeline visualization - pipeline_placeholder = st.empty() - - # Create real-time content display containers - st.markdown("### ๐ŸŒŒ Live Content Generation") - - # Create expandable containers for each step - transcript_container = st.expander("๐ŸŽ™๏ธ Transcription", expanded=False) - wisdom_container = st.expander("๐Ÿ’ก Wisdom Extraction", expanded=False) - outline_container = st.expander("๐Ÿ“‹ Outline Creation", expanded=False) - article_container = st.expander("๐Ÿ“ Article Generation", expanded=False) - social_container = st.expander("๐Ÿ“ฑ Social Content", expanded=False) - notion_container = st.expander("๐ŸŒŒ Notion Publishing", expanded=False) - - try: - # Step 1: Transcription - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=0, - step_progress=0, - total_progress=0, - status_message="Starting transcription process...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Import transcription function - from core.content_generation import transcribe_audio - - # Create temporary file - import tempfile - import os - with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp_file: - tmp_file.write(audio_file.getvalue()) - tmp_file_path = tmp_file.name - - try: - # Transcription with progress updates - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=0, - step_progress=50, - total_progress=8, - status_message="Transcribing audio with Whisper AI...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - transcript = transcribe_audio(tmp_file_path) - if not transcript or "Error" in transcript: - st.error(f"Transcription failed: {transcript}") - return None - - results['transcript'] = transcript - - # Stream transcript to UI immediately - with transcript_container: - st.markdown("**โœ… Transcription Complete**") - st.text_area("Transcript", transcript, height=200, disabled=True) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=0, - step_progress=100, - total_progress=17, - status_message="Transcription complete!", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Step 2: Wisdom Extraction - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=1, - step_progress=0, - total_progress=17, - status_message="Extracting wisdom and insights...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - from core.content_generation import generate_wisdom - wisdom_prompt = get_prompt_for_step('wisdom', custom_prompts) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=1, - step_progress=50, - total_progress=25, - status_message="Analyzing content for key insights...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - wisdom = generate_wisdom(transcript, custom_prompt=wisdom_prompt, knowledge_base={}) - results['wisdom'] = wisdom - - # Stream wisdom to UI immediately - with wisdom_container: - st.markdown("**โœ… Wisdom Extraction Complete**") - st.markdown(wisdom) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=1, - step_progress=100, - total_progress=33, - status_message="Wisdom extraction complete!", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Step 3: Outline Creation - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=2, - step_progress=0, - total_progress=33, - status_message="Creating structured outline...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - from core.content_generation import generate_outline - outline_prompt = get_prompt_for_step('outline', custom_prompts) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=2, - step_progress=50, - total_progress=42, - status_message="Structuring content hierarchy...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - outline = generate_outline(transcript, wisdom, custom_prompt=outline_prompt, knowledge_base={}) - results['outline'] = outline - - # Stream outline to UI immediately - with outline_container: - st.markdown("**โœ… Outline Creation Complete**") - st.markdown(outline) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=2, - step_progress=100, - total_progress=50, - status_message="Outline creation complete!", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Step 4: Article Generation - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=3, - step_progress=0, - total_progress=50, - status_message="Generating comprehensive article...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - from core.content_generation import generate_article - article_prompt = get_prompt_for_step('article', custom_prompts) - selected_template = st.session_state.get('article_template') - if selected_template: - template_text = load_template(selected_template) - if template_text: - article_prompt = template_text + "\n" + article_prompt - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=3, - step_progress=50, - total_progress=58, - status_message="Writing detailed article content...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - article = generate_article(transcript, wisdom, outline, custom_prompt=article_prompt, knowledge_base={}) - results['article'] = article - - # Stream article to UI immediately - with article_container: - st.markdown("**โœ… Article Generation Complete**") - st.markdown(article) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=3, - step_progress=100, - total_progress=67, - status_message="Article generation complete!", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Step 5: Social Content - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=4, - step_progress=0, - total_progress=67, - status_message="Creating social media content...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - from core.content_generation import generate_social_content - social_prompt = get_prompt_for_step('social', custom_prompts) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=4, - step_progress=50, - total_progress=75, - status_message="Generating social media posts...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - social = generate_social_content(wisdom, outline, article, custom_prompt=social_prompt, knowledge_base={}) - results['social_content'] = social - - # Stream social content to UI immediately - with social_container: - st.markdown("**โœ… Social Content Creation Complete**") - st.markdown(social) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=4, - step_progress=100, - total_progress=83, - status_message="Social content creation complete!", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Step 6: Auto-publish to Notion - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=5, - step_progress=0, - total_progress=83, - status_message="Publishing to Notion workspace...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - if os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): - # Generate AI title - ai_title = generate_ai_title(transcript) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=5, - step_progress=30, - total_progress=88, - status_message="Creating Notion page structure...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=5, - step_progress=60, - total_progress=92, - status_message="Uploading content to Notion...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Publish to Notion - notion_url = create_notion_page(ai_title, results) - if notion_url: - results['notion_url'] = notion_url - - # Stream Notion success to UI - with notion_container: - st.markdown("**โœ… Notion Publishing Complete**") - st.markdown(f"**Page Title:** {ai_title}") - st.markdown(f"๐Ÿ”— [Open in Notion]({notion_url})") - else: - # Stream Notion failure to UI - with notion_container: - st.markdown("**โš ๏ธ Notion Publishing Failed**") - st.warning("Check your Notion API configuration in Settings.") - else: - # Show disabled status in UI - with notion_container: - st.markdown("**โ„น๏ธ Notion Publishing Disabled**") - st.info("Configure Notion API in Settings to enable auto-publishing.") - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=5, - step_progress=90, - total_progress=96, - status_message="Saving to database...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Save to Supabase database - try: - save_content_to_db(results) - except Exception as e: - st.warning(f"โš ๏ธ Content saved locally but database save failed: {e}") - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=5, - step_progress=100, - total_progress=100, - status_message="Pipeline complete! All content generated successfully.", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Aurora completion celebration - st.markdown(""" -
-

Pipeline Complete!

-

Your content has been transformed with AI magic

-
- """, unsafe_allow_html=True) - - # Clear the pipeline display after a moment - time.sleep(2) - pipeline_placeholder.empty() - - return results - - finally: - # Cleanup temporary file - if os.path.exists(tmp_file_path): - os.unlink(tmp_file_path) - - except Exception as e: - # Show error state - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=0, - step_progress=0, - total_progress=0, - status_message=f"Error: {str(e)}", - processing_time=f"{time.time() - start_time:.1f}s" - ) - st.error(f"Pipeline failed: {str(e)}") - return None - -def process_audio_pipeline_live(audio_file): - """Run pipeline with StreamingPipelineController""" - from core.streaming_pipeline import get_pipeline_controller - - controller = get_pipeline_controller() - controller.start_pipeline(audio_file) - while controller.process_next_step(): - pass - return controller.get_results() - -def process_audio_pipeline_with_transcript(transcript: str): - """Process audio pipeline with pre-transcribed content using beautiful Aurora visualization""" - import time - from datetime import datetime - - results = {'transcript': transcript} - start_time = time.time() - - # Load custom prompts - custom_prompts = load_custom_prompts() - if custom_prompts: - st.info(f"๐Ÿ“ Using {len(custom_prompts)} custom prompts") - - # Initialize beautiful pipeline visualization (starting from step 1) - pipeline_placeholder = st.empty() - - # Create real-time content display containers - st.markdown("### ๐ŸŒŒ Live Content Generation") - - # Create expandable containers for each step (skip transcription) - wisdom_container = st.expander("๐Ÿ’ก Wisdom Extraction", expanded=False) - outline_container = st.expander("๐Ÿ“‹ Outline Creation", expanded=False) - article_container = st.expander("๐Ÿ“ Article Generation", expanded=False) - social_container = st.expander("๐Ÿ“ฑ Social Content", expanded=False) - notion_container = st.expander("๐ŸŒŒ Notion Publishing", expanded=False) - - try: - # Show initial state with transcription already complete - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=1, - step_progress=0, - total_progress=17, - status_message=f"Using pre-transcribed content ({len(transcript)} characters)", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Step 2: Wisdom Extraction - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=1, - step_progress=0, - total_progress=17, - status_message="Extracting wisdom and insights...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - from core.content_generation import generate_wisdom - wisdom_prompt = get_prompt_for_step('wisdom', custom_prompts) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=1, - step_progress=50, - total_progress=25, - status_message="Analyzing content for key insights...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - wisdom = generate_wisdom(transcript, custom_prompt=wisdom_prompt, knowledge_base={}) - results['wisdom'] = wisdom - - # Stream wisdom to UI immediately - with wisdom_container: - st.markdown("**โœ… Wisdom Extraction Complete**") - st.markdown(wisdom) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=1, - step_progress=100, - total_progress=33, - status_message="Wisdom extraction complete!", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Step 3: Outline Creation - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=2, - step_progress=0, - total_progress=33, - status_message="Creating structured outline...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - from core.content_generation import generate_outline - outline_prompt = get_prompt_for_step('outline', custom_prompts) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=2, - step_progress=50, - total_progress=42, - status_message="Structuring content hierarchy...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - outline = generate_outline(transcript, wisdom, custom_prompt=outline_prompt, knowledge_base={}) - results['outline'] = outline - - # Stream outline to UI immediately - with outline_container: - st.markdown("**โœ… Outline Creation Complete**") - st.markdown(outline) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=2, - step_progress=100, - total_progress=50, - status_message="Outline creation complete!", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Step 4: Article Generation - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=3, - step_progress=0, - total_progress=50, - status_message="Generating comprehensive article...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - from core.content_generation import generate_article - article_prompt = get_prompt_for_step('article', custom_prompts) - selected_template = st.session_state.get('article_template') - if selected_template: - template_text = load_template(selected_template) - if template_text: - article_prompt = template_text + "\n" + article_prompt - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=3, - step_progress=50, - total_progress=58, - status_message="Writing detailed article content...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - article = generate_article(transcript, wisdom, outline, custom_prompt=article_prompt, knowledge_base={}) - results['article'] = article - - # Stream article to UI immediately - with article_container: - st.markdown("**โœ… Article Generation Complete**") - st.markdown(article) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=3, - step_progress=100, - total_progress=67, - status_message="Article generation complete!", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Step 5: Social Content - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=4, - step_progress=0, - total_progress=67, - status_message="Creating social media content...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - from core.content_generation import generate_social_content - social_prompt = get_prompt_for_step('social', custom_prompts) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=4, - step_progress=50, - total_progress=75, - status_message="Generating social media posts...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - social = generate_social_content(wisdom, outline, article, custom_prompt=social_prompt, knowledge_base={}) - results['social_content'] = social - - # Stream social content to UI immediately - with social_container: - st.markdown("**โœ… Social Content Creation Complete**") - st.markdown(social) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=4, - step_progress=100, - total_progress=83, - status_message="Social content creation complete!", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Step 6: Auto-publish to Notion - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=5, - step_progress=0, - total_progress=83, - status_message="Publishing to Notion workspace...", - processing_time=f"{time.time() - start_time:.1f}s" + + badge_spans = [] + results = st.session_state.get("current_results", {}) + + for key, icon, label in badge_data: + has_content = bool(results.get(key)) + state_class = "badge-complete" if has_content else "badge-pending" + + if has_content: + word_count = len(str(results[key]).split()) + badge_spans.append( + '' + + icon + + " " + + label + + "" ) - - if os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): - # Generate AI title - ai_title = generate_ai_title(transcript) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=5, - step_progress=30, - total_progress=88, - status_message="Creating Notion page structure...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=5, - step_progress=60, - total_progress=92, - status_message="Uploading content to Notion...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Publish to Notion - notion_url = create_notion_page(ai_title, results) - if notion_url: - results['notion_url'] = notion_url - - # Stream Notion success to UI - with notion_container: - st.markdown("**โœ… Notion Publishing Complete**") - st.markdown(f"**Page Title:** {ai_title}") - st.markdown(f"๐Ÿ”— [Open in Notion]({notion_url})") - else: - # Stream Notion failure to UI - with notion_container: - st.markdown("**โš ๏ธ Notion Publishing Failed**") - st.warning("Check your Notion API configuration in Settings.") else: - # Show disabled status in UI - with notion_container: - st.markdown("**โ„น๏ธ Notion Publishing Disabled**") - st.info("Configure Notion API in Settings to enable auto-publishing.") - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=5, - step_progress=90, - total_progress=96, - status_message="Saving to database...", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Save to Supabase database - try: - save_content_to_db(results) - except Exception as e: - st.warning(f"โš ๏ธ Content saved locally but database save failed: {e}") - - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=5, - step_progress=100, - total_progress=100, - status_message="Pipeline complete! All content generated successfully.", - processing_time=f"{time.time() - start_time:.1f}s" - ) - - # Aurora completion celebration - st.markdown(""" -
-

Pipeline Complete!

-

Your content has been transformed with AI magic

-
- """, unsafe_allow_html=True) - - # Clear the pipeline display after a moment - time.sleep(2) - pipeline_placeholder.empty() - - return results - - except Exception as e: - # Show error state - with pipeline_placeholder.container(): - show_processing_pipeline( - current_step=1, - step_progress=0, - total_progress=17, - status_message=f"Error: {str(e)}", - processing_time=f"{time.time() - start_time:.1f}s" + badge_spans.append( + '' + + icon + + " " + + label + + "" ) - st.error(f"Pipeline failed: {str(e)}") - return None -def save_content_to_db(content_data): - """Save generated content to database""" - try: - db = get_supabase_client() - if db and st.session_state.user_id: - content_id = db.save_content(st.session_state.user_id, { - **content_data, - 'title': f"WhisperForge Content {datetime.now().strftime('%Y-%m-%d %H:%M')}", - 'created_at': datetime.now().isoformat() - }) - if content_id: - st.success(f"๐Ÿ’พ Content saved to database (ID: {content_id})") - except Exception as e: - st.warning(f"Database save failed: {e}") - -# === CONTENT DISPLAY === -def create_enhanced_aurora_content_card(title, content, content_type="text", icon="๐Ÿ“„"): - """Create a beautiful enhanced Aurora content card with copy functionality and animations""" - import uuid - - # Generate unique IDs for this card - card_id = f"card_{uuid.uuid4().hex[:8]}" - copy_btn_id = f"copy_{uuid.uuid4().hex[:8]}" - expand_btn_id = f"expand_{uuid.uuid4().hex[:8]}" - full_content_id = f"full_{uuid.uuid4().hex[:8]}" - - # Calculate content stats - word_count = len(content.split()) if content else 0 - char_count = len(content) if content else 0 - - # Determine if content needs truncation - preview_length = 300 - needs_expansion = len(content) > preview_length - preview_content = content[:preview_length] + "..." if needs_expansion else content - - # Content type specific styling - type_class = content_type.lower() - - # Create the enhanced card HTML - safe_content = content.replace('`', '\\`') - - card_html = f""" -
-
-

- {icon} - {title} -

-
- - -
-
- -
-
- {preview_content} -
- - {f''' -
- {content[preview_length:]} -
- ''' if needs_expansion else ''} - -
-
- ๐Ÿ“Š - {word_count} words โ€ข {char_count} characters -
- - {f''' - - ''' if needs_expansion else ''} -
-
- - - -
- - - """ - - st.markdown(card_html, unsafe_allow_html=True) - -def show_results(results): - """Display generated content with beautiful Aurora styling and enhanced UX""" - if not results: - return - - # Aurora header for results with enhanced styling - st.markdown(""" -
-

โœจ Content Generated Successfully!

-

Your audio has been transformed with AI magic

-
- """, unsafe_allow_html=True) - - # Aurora Notion link if available with enhanced styling - if results.get('notion_url'): - st.markdown(f""" - - """, unsafe_allow_html=True) - st.markdown("---") - - # Enhanced content overview stats - total_words = sum(len(str(results.get(key, '')).split()) for key in ['transcript', 'wisdom', 'outline', 'article', 'social_content']) - content_types = len([k for k in ['transcript', 'wisdom', 'outline', 'article', 'social_content'] if results.get(k)]) - - st.markdown(f""" -
-
-
- ๐Ÿ“Š - {total_words:,} - Total Words -
-
- ๐Ÿ“„ - {content_types} - Content Types -
-
- โšก - AI - Generated -
-
-
- """, unsafe_allow_html=True) - - # Prepare tab data for custom Aurora tabs - tab_data = [] - - # Add main content tabs - if results.get('transcript'): - tab_data.append({ - 'title': 'Transcript', - 'icon': '๐ŸŽ™๏ธ', - 'type': 'transcript', - 'content': results['transcript'] - }) - - if results.get('wisdom'): - tab_data.append({ - 'title': 'Wisdom', - 'icon': '๐Ÿ’Ž', - 'type': 'wisdom', - 'content': results['wisdom'] - }) - - if results.get('outline'): - tab_data.append({ - 'title': 'Outline', - 'icon': '๐Ÿ“‹', - 'type': 'outline', - 'content': results['outline'] - }) - - if results.get('article'): - tab_data.append({ - 'title': 'Article', - 'icon': '๐Ÿ“ฐ', - 'type': 'article', - 'content': results['article'] - }) - - if results.get('social_content'): - tab_data.append({ - 'title': 'Social', - 'icon': '๐Ÿ“ฑ', - 'type': 'social', - 'content': results['social_content'] - }) - - # Add Editor tab if editor content exists - if results.get('editor_notes') or results.get('revised_content'): - editor_content = "" - - if results.get('editor_notes'): - editor_content += "=== EDITOR NOTES ===\n\n" - for section, notes in results['editor_notes'].items(): - if notes: - editor_content += f"## {section.title()} Notes:\n{notes}\n\n" - - if results.get('revised_content'): - editor_content += "\n=== REVISED CONTENT ===\n\n" - for section, content in results['revised_content'].items(): - if content: - editor_content += f"## Revised {section.title()}:\n{content}\n\n" - - tab_data.append({ - 'title': 'Editor Review', - 'icon': '๐Ÿ“', - 'type': 'editor', - 'content': editor_content - }) - - # Display the custom Aurora tabs - if tab_data: - create_aurora_tabs(tab_data, default_tab=0) - else: - st.warning("No content available to display.") - - # Add export all functionality - st.markdown("---") - st.markdown(""" -
-

๐Ÿ“ฆ Additional Export Options

-
- """, unsafe_allow_html=True) - - col1, col2, col3 = st.columns(3) - - with col1: - if st.button("๐Ÿ“„ Export as Text", use_container_width=True): - export_content = create_text_export(results) - st.download_button( - label="๐Ÿ’พ Download Text File", - data=export_content, - file_name=f"whisperforge_content_{datetime.now().strftime('%Y%m%d_%H%M')}.txt", - mime="text/plain", - use_container_width=True - ) - if st.button("๐Ÿ“ Export as Markdown", use_container_width=True): - md_content = export_to_markdown(results) - st.download_button( - label="๐Ÿ’พ Download Markdown", - data=md_content, - file_name=f"whisperforge_{datetime.now().strftime('%Y%m%d_%H%M')}.md", - mime="text/markdown", - use_container_width=True - ) + badges_html = " ".join(badge_spans) - with col2: - if st.button("๐Ÿ“Š Export as JSON", use_container_width=True): - import json - json_content = json.dumps(results, indent=2, ensure_ascii=False) - st.download_button( - label="๐Ÿ’พ Download JSON File", - data=json_content, - file_name=f"whisperforge_data_{datetime.now().strftime('%Y%m%d_%H%M')}.json", - mime="application/json", - use_container_width=True - ) - if st.button("๐Ÿ“‘ Export as Word", use_container_width=True): - doc_bytes = export_to_word(results) - st.download_button( - label="๐Ÿ’พ Download Word File", - data=doc_bytes, - file_name=f"whisperforge_{datetime.now().strftime('%Y%m%d_%H%M')}.docx", - mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document", - use_container_width=True - ) + header_html = ( + '
' + '
' + '

WhisperForge Aurora

' + '

Transform Audio into Structured Content with AI

' + '
' + badges_html + "
" + "
" + ) + st.markdown(header_html, unsafe_allow_html=True) - with col3: - if st.button("๐Ÿ“‹ Copy All Content", use_container_width=True): - all_content = create_text_export(results) - st.code(all_content, language="text") - st.success("โœ… Content displayed above - use your browser's copy function!") - if st.button("๐Ÿ“• Export as PDF", use_container_width=True): - pdf_bytes = export_to_pdf(results) - st.download_button( - label="๐Ÿ’พ Download PDF", - data=pdf_bytes, - file_name=f"whisperforge_{datetime.now().strftime('%Y%m%d_%H%M')}.pdf", - mime="application/pdf", - use_container_width=True - ) + return st.tabs(["Transform", "Content Library", "Settings", "Knowledge Base", "Prompts"]) + + +# === PAGE FUNCTIONS === -def create_text_export(results): - """Create a formatted text export of all content""" - export_lines = [] - export_lines.append("=" * 60) - export_lines.append("WHISPERFORGE CONTENT EXPORT") - export_lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") - export_lines.append("=" * 60) - export_lines.append("") - - sections = [ - ("AUDIO TRANSCRIPT", results.get('transcript', '')), - ("EXTRACTED WISDOM", results.get('wisdom', '')), - ("CONTENT OUTLINE", results.get('outline', '')), - ("FULL ARTICLE", results.get('article', '')), - ("SOCIAL MEDIA CONTENT", results.get('social_content', '')) - ] - - for title, content in sections: - if content: - export_lines.append(f"## {title}") - export_lines.append("-" * 40) - export_lines.append(content) - export_lines.append("") - export_lines.append("") - - if results.get('notion_url'): - export_lines.append("## NOTION LINK") - export_lines.append("-" * 40) - export_lines.append(results['notion_url']) - export_lines.append("") - - return "\n".join(export_lines) - -def export_to_markdown(results): - """Export results to Markdown format""" - lines = ["# WhisperForge Content Export"] - sections = [ - ("## Transcript", results.get('transcript', '')), - ("## Wisdom", results.get('wisdom', '')), - ("## Outline", results.get('outline', '')), - ("## Article", results.get('article', '')), - ("## Social Content", results.get('social_content', '')), - ] - for title, content in sections: - if content: - lines.append(title) - lines.append("") - lines.append(content) - lines.append("") - if results.get('notion_url'): - lines.append("## Notion Link") - lines.append(results['notion_url']) - return "\n".join(lines) - -def export_to_word(results): - """Export results to a Word document""" - from docx import Document - from io import BytesIO - - doc = Document() - doc.add_heading("WhisperForge Content Export", level=1) - for title, content in [ - ("Transcript", results.get('transcript', '')), - ("Wisdom", results.get('wisdom', '')), - ("Outline", results.get('outline', '')), - ("Article", results.get('article', '')), - ("Social Content", results.get('social_content', '')), - ]: - if content: - doc.add_heading(title, level=2) - doc.add_paragraph(content) - if results.get('notion_url'): - doc.add_heading("Notion Link", level=2) - doc.add_paragraph(results['notion_url']) - - bio = BytesIO() - doc.save(bio) - bio.seek(0) - return bio.read() - -def export_to_pdf(results): - """Export results to a PDF file""" - from fpdf import FPDF - pdf = FPDF() - pdf.add_page() - pdf.set_auto_page_break(auto=True, margin=15) - pdf.set_font("Arial", size=12) - text = create_text_export(results) - for line in text.split("\n"): - pdf.cell(0, 10, txt=line, ln=1) - return pdf.output(dest='S').encode('latin-1') - -# === NAVIGATION & PAGES === -def create_aurora_navigation(): - """Beautiful Aurora bioluminescent navigation - Clean and professional""" - st.markdown(""" -
-
-

WhisperForge Aurora

-

Transform Audio into Structured Content with AI

-
- Transcription - Wisdom - Article - Social - Notion -
-
- """, unsafe_allow_html=True) - - # Clean navigation tabs without emojis - tabs = st.tabs([ - "Transform", - "Content Library", - "Settings", - "Knowledge Base", - "Prompts" - ]) - - return tabs def show_transform_page(): - """Clean transformation page focused on file upload and processing""" - - # Simple Aurora-styled header using main CSS - st.markdown(""" + """Main transformation page: file upload and processing.""" + st.markdown( + """

Transform Audio

Upload your audio and watch it transform into structured content

- """, unsafe_allow_html=True) - - # Beautiful Aurora upload method selector - st.markdown(""" + """, + unsafe_allow_html=True, + ) + + if "upload_method" not in st.session_state: + st.session_state.upload_method = "Standard Upload" + + # Upload method cards + sel_std = "selected" if st.session_state.upload_method == "Standard Upload" else "" + sel_lg = "selected" if st.session_state.upload_method == "Large File Upload" else "" + st.markdown( + f"""
-
- โšก +
+ \u26a1

Standard Upload

-

Perfect for most audio files up to 25MB with instant processing

+

+ Perfect for most audio files up to 25MB with instant processing

- โšก - Instant processing -
+ \u26a1 Instant processing
- ๐ŸŽต - Audio preview -
+ \U0001f3b5 Audio preview
- ๐Ÿ“Š - Up to 25MB -
+ \U0001f4be Up to 25MB
-
- ๐Ÿš€ +
+ \U0001f680

Large File Upload

-

Advanced processing for large files up to 2GB with intelligent chunking

+

+ Advanced processing for large files up to 2GB with intelligent chunking

- ๐Ÿ”ง - FFmpeg chunking -
+ \U0001f527 FFmpeg chunking
- โšก - Parallel processing -
+ \u26a1 Parallel processing
- ๐Ÿ“ˆ - Up to 2GB -
+ \U0001f4be Up to 2GB
- - - """, unsafe_allow_html=True) - - # Enhanced file upload selection with session state - if 'upload_method' not in st.session_state: - st.session_state.upload_method = "Standard Upload" - + """, + unsafe_allow_html=True, + ) + upload_method = st.radio( "Choose upload method:", ["Standard Upload", "Large File Upload"], index=0 if st.session_state.upload_method == "Standard Upload" else 1, help="Standard upload for smaller files, Enhanced upload for large files with FFmpeg processing", - label_visibility="collapsed" + label_visibility="collapsed", ) - st.session_state.upload_method = upload_method - + if upload_method == "Standard Upload": - # Beautiful standard file upload zone - st.markdown(""" -
-
-
๐ŸŽต
-
-
-

Drop your audio file here

-

Or click to browse and select a file

-
- MP3 - WAV - M4A - FLAC - OGG -
-
- """, unsafe_allow_html=True) - - # Standard file upload - uploaded_files = st.file_uploader( - "Upload your audio file", - type=['mp3', 'wav', 'm4a', 'flac', 'ogg'], - help="Upload audio file for processing (max 25MB)", - label_visibility="collapsed", - accept_multiple_files=True - ) - - if uploaded_files: - for uploaded_file in uploaded_files: - # Beautiful file preview card - file_size = len(uploaded_file.getvalue()) / (1024 * 1024) - file_extension = uploaded_file.name.split('.')[-1].upper() - - st.markdown(f""" -
-
-
-
๐ŸŽต
-
-

{uploaded_file.name}

-

{file_size:.1f} MB โ€ข {file_extension} Format

-
-
-
-
Ready to process
-
-
-
- """, unsafe_allow_html=True) - - # Enhanced audio player - if file_size < 50: # Only show player for files under 50MB - st.markdown('
', unsafe_allow_html=True) - st.audio(uploaded_file.getvalue()) - st.markdown('
', unsafe_allow_html=True) - else: - st.info("Audio preview disabled for large files to conserve memory") - - # Beautiful process button - if st.button(f"Transform {uploaded_file.name}", key=f"process_{uploaded_file.name}", type="primary", use_container_width=True): - if not os.getenv("OPENAI_API_KEY"): - st.error("Please enter your OpenAI API key in the sidebar") - return - - with st.container(): - if st.session_state.get('live_stream', False): - results = process_audio_pipeline_live(uploaded_file) - else: - results = process_audio_pipeline(uploaded_file) - if results: - st.session_state.current_results = results - + _show_standard_upload() else: - # Enhanced large file upload - st.markdown("### Enhanced Large File Processing") - - # Initialize enhanced processor - processor = EnhancedLargeFileProcessor() - - # Create enhanced upload interface - uploaded_file = processor.create_enhanced_upload_interface() - - if uploaded_file: - # Validate file - validation = processor.validate_file(uploaded_file) - - if not validation["valid"]: - st.error(f"{validation['error']}") - return - - file_size_mb = validation["size_mb"] - requires_chunking = validation["requires_chunking"] - - # Beautiful processing metrics - st.markdown(f""" -
-
-
{file_size_mb:.1f}
-
File Size (MB)
-
-
-
{"FFmpeg" if requires_chunking else "Standard"}
-
Processing Method
-
-
-
{validation["format"].upper()}
-
Format
-
-
- """, unsafe_allow_html=True) - - # Audio preview disabled for large files to conserve memory - if file_size_mb < 50: - st.markdown('
', unsafe_allow_html=True) + _show_large_file_upload() + + if "current_results" in st.session_state: + show_results(st.session_state.current_results) + + +def _show_standard_upload(): + """Handle standard file upload (up to 25MB).""" + st.markdown( + """ +
+
+ \U0001f3b5 +
+
+

Drop your audio file here

+

Or click to browse and select a file

+
+ MP3 + WAV + M4A + FLAC + OGG +
+
+ """, + unsafe_allow_html=True, + ) + + uploaded_files = st.file_uploader( + "Upload your audio file", + type=["mp3", "wav", "m4a", "flac", "ogg"], + help="Upload audio file for processing (max 25MB)", + label_visibility="collapsed", + accept_multiple_files=True, + ) + + if uploaded_files: + for uploaded_file in uploaded_files: + file_size = len(uploaded_file.getvalue()) / (1024 * 1024) + + if file_size < 50: st.audio(uploaded_file.getvalue()) - st.markdown('
', unsafe_allow_html=True) - else: - st.info("Audio preview disabled for large files to conserve memory") - - # Enhanced process button - if st.button("Transform Large File to Content", type="primary", use_container_width=True): + + if st.button( + f"Transform {uploaded_file.name}", + key=f"process_{uploaded_file.name}", + type="primary", + use_container_width=True, + ): if not os.getenv("OPENAI_API_KEY"): - st.error("Please enter your OpenAI API key in the sidebar") + st.error("Please enter your OpenAI API key in Settings") return - + with st.container(): - # Process with enhanced large file processor - processing_result = processor.process_large_file(uploaded_file) - - if processing_result["success"]: - transcript = processing_result["transcript"] - - # Show processing summary - st.success(f"Large file processing complete!") - - # Beautiful success metrics - st.markdown(f""" -
-
-
{len(transcript):,}
-
Characters
-
-
-
{processing_result["method"]}
-
Method Used
-
-
-
{processing_result.get("chunks_processed", 1)}
-
Chunks Processed
-
-
- """, unsafe_allow_html=True) - - # Continue with pipeline using pre-transcribed content - st.markdown("---") - results = process_audio_pipeline_with_transcript(transcript) - - if results: - # Store results in session state - st.session_state.current_results = results - else: - st.error(f"Large file processing failed: {processing_result['error']}") - - # Fallback to standard processing for smaller files - if file_size_mb < 100: - st.info("Attempting fallback to standard processing...") - try: - if st.session_state.get('live_stream', False): - results = process_audio_pipeline_live(uploaded_file) - else: - results = process_audio_pipeline(uploaded_file) - if results: - st.session_state.current_results = results - except Exception as e: - st.error(f"Fallback processing also failed: {str(e)}") - - # Show results if available - if 'current_results' in st.session_state: - show_results(st.session_state.current_results) + results = process_pipeline(audio_file=uploaded_file) + if results: + st.session_state.current_results = results + + +def _show_large_file_upload(): + """Handle large file upload with FFmpeg chunking.""" + st.markdown("### Enhanced Large File Processing") + processor = EnhancedLargeFileProcessor() + uploaded_file = processor.create_enhanced_upload_interface() + + if uploaded_file: + validation = processor.validate_file(uploaded_file) + if not validation["valid"]: + st.error(f"{validation['error']}") + return + + file_size_mb = validation["size_mb"] + + if file_size_mb < 50: + st.audio(uploaded_file.getvalue()) + + if st.button("Transform Large File to Content", type="primary", use_container_width=True): + if not os.getenv("OPENAI_API_KEY"): + st.error("Please enter your OpenAI API key in Settings") + return + + with st.container(): + processing_result = processor.process_large_file(uploaded_file) + + if processing_result["success"]: + transcript = processing_result["transcript"] + st.success("Large file processing complete!") + st.markdown("---") + results = process_pipeline(transcript=transcript) + if results: + st.session_state.current_results = results + else: + st.error(f"Large file processing failed: {processing_result['error']}") + + if file_size_mb < 100: + st.info("Attempting fallback to standard processing...") + try: + results = process_pipeline(audio_file=uploaded_file) + if results: + st.session_state.current_results = results + except Exception as e: + st.error(f"Fallback processing also failed: {e!s}") + def show_content_library(): - """Content library/history page""" - st.markdown("### ๐Ÿ“š Content Library") - - # Get content from database + """Content library/history page.""" + st.markdown("### Content Library") + try: db = get_supabase_client() if db: - # Fetch recent content - response = db.client.table('content').select('*').order('created_at', desc=True).limit(20).execute() - + response = db.client.table("content").select("*").order("created_at", desc=True).limit(20).execute() + if response.data: - st.success(f"๐Ÿ“Š Found {len(response.data)} content items") - - # Search and filter + st.success(f"Found {len(response.data)} content items") + col1, col2 = st.columns([3, 1]) with col1: - search_term = st.text_input("๐Ÿ” Search content", placeholder="Search by title or content...") + search_term = st.text_input("Search content", placeholder="Search by title or content...") with col2: - content_type = st.selectbox("Filter by type", ["All", "Article", "Social", "Outline"]) - - # Display content cards in a two-column grid + st.selectbox("Filter by type", ["All", "Article", "Social", "Outline"]) + cols = st.columns(2) for idx, item in enumerate(response.data): - if search_term and search_term.lower() not in item.get('title', '').lower(): + if search_term and search_term.lower() not in item.get("title", "").lower(): continue - with cols[idx % 2].expander(f"๐Ÿ“„ {item.get('title', 'Untitled')} - {item.get('created_at', '')[:10]}"): - col1, col2 = st.columns([3, 1]) - - with col1: - st.markdown(f"**Created:** {item.get('created_at', 'Unknown')}") - if item.get('transcript'): - st.markdown("**Transcript Preview:**") - st.text(item['transcript'][:200] + "..." if len(item['transcript']) > 200 else item['transcript']) - - with col2: - if st.button(f"๐Ÿ”„ Reprocess", key=f"reprocess_{item.get('id')}"): - st.info("Reprocessing feature coming soon!") - - if st.button(f"๐Ÿ“ค Export", key=f"export_{item.get('id')}"): - st.info("Export feature coming soon!") - - # Show generated content - if item.get('wisdom'): - st.markdown("**๐Ÿ’ก Wisdom:**") - st.text_area("", item['wisdom'], height=100, disabled=True, key=f"wisdom_{item.get('id')}") - - if item.get('article'): - st.markdown("**๐Ÿ“ฐ Article:**") - st.text_area("", item['article'], height=150, disabled=True, key=f"article_{item.get('id')}") - - if item.get('social_content'): - st.markdown("**๐Ÿ“ฑ Social Content:**") - st.text_area("", item['social_content'], height=100, disabled=True, key=f"social_{item.get('id')}") + with cols[idx % 2].expander(f"{item.get('title', 'Untitled')} - {item.get('created_at', '')[:10]}"): + if item.get("transcript"): + st.markdown("**Transcript Preview:**") + preview = item["transcript"][:200] + st.text(preview + "..." if len(item["transcript"]) > 200 else preview) + + if item.get("wisdom"): + st.markdown("**Wisdom:**") + st.text_area("", item["wisdom"], height=100, disabled=True, key=f"wisdom_{item.get('id')}") + + if item.get("article"): + st.markdown("**Article:**") + st.text_area( + "", item["article"], height=150, disabled=True, key=f"article_{item.get('id')}" + ) else: - st.info("๐Ÿ“ญ No content found. Process some audio files to see them here!") + st.info("No content found. Process some audio files to see them here!") else: - st.error("โŒ Database connection failed") + st.error("Database connection failed") except Exception as e: - st.error(f"โŒ Error loading content library: {e}") - -def show_settings_page(): - """Settings and configuration page""" - st.markdown("### โš™๏ธ Settings & Configuration") - - # API Keys section - st.markdown("#### ๐Ÿ”‘ API Keys") - with st.expander("๐Ÿ”ง API Configuration", expanded=True): - col1, col2 = st.columns(2) - - with col1: - # OpenAI settings - st.markdown("**OpenAI Configuration**") - openai_key = st.text_input("OpenAI API Key", type="password", - value=os.getenv("OPENAI_API_KEY", ""), - help="Your OpenAI API key") - if openai_key: - os.environ["OPENAI_API_KEY"] = openai_key - st.success("โœ… OpenAI key configured") - - # Model selection - model_choice = st.selectbox("OpenAI Model", - ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"], - help="Choose the OpenAI model for content generation") - st.session_state.openai_model = model_choice - - with col2: - # Notion settings - st.markdown("**Notion Configuration**") - notion_key = st.text_input("Notion API Key", type="password", - value=os.getenv("NOTION_API_KEY", ""), - help="Your Notion integration token") - if notion_key: - os.environ["NOTION_API_KEY"] = notion_key - - notion_db = st.text_input("Notion Database ID", - value=os.getenv("NOTION_DATABASE_ID", ""), - help="Your Notion database ID") - if notion_db: - os.environ["NOTION_DATABASE_ID"] = notion_db - - if notion_key and notion_db: - st.success("โœ… Notion configured") - - # Pipeline settings - st.markdown("#### ๐Ÿ”„ Pipeline Configuration") - with st.expander("โš™๏ธ Processing Pipeline", expanded=True): - col1, col2 = st.columns(2) - - with col1: - st.markdown("**Core Features**") - auto_notion = st.checkbox("Auto-publish to Notion", - value=st.session_state.get('auto_notion', True)) - st.session_state.auto_notion = auto_notion - - live_stream = st.checkbox( - "Live Streaming", - value=st.session_state.get('live_stream', False), - help="Show step-by-step streaming results" - ) - st.session_state.live_stream = live_stream - - large_file_mode = st.checkbox("Enhanced Large File Processing", - value=st.session_state.get('large_file_mode', True), - help="Use FFmpeg for files larger than 25MB") - st.session_state.large_file_mode = large_file_mode - - with col2: - st.markdown("**Quality Settings**") - content_length = st.selectbox("Article Length", - ["Short (500-800 words)", "Medium (800-1200 words)", "Long (1200+ words)"]) - - tone_style = st.selectbox("Content Tone", - ["Professional", "Conversational", "Academic", "Creative"]) - - st.session_state.content_length = content_length - st.session_state.tone_style = tone_style - - templates = [f.replace('.md','') for f in os.listdir('templates')] if os.path.exists('templates') else [] - if templates: - template_choice = st.selectbox("Article Template", templates) - st.session_state.article_template = template_choice - else: - st.session_state.article_template = None - - # System status - st.markdown("#### ๐Ÿ” System Status") - with st.expander("๐Ÿ“Š Connection Status", expanded=False): - if st.button("๐Ÿงช Test All Connections"): - with st.spinner("Testing all connections..."): - # Test OpenAI - try: - if os.getenv("OPENAI_API_KEY"): - st.success("โœ… OpenAI API key configured") - else: - st.error("โŒ OpenAI API key missing") - except Exception as e: - st.error(f"โŒ OpenAI error: {e}") - - # Test Supabase - try: - db = get_supabase_client() - if db and db.test_connection(): - st.success("โœ… Supabase connected") - else: - st.error("โŒ Supabase connection failed") - except Exception as e: - st.error(f"โŒ Supabase error: {e}") - - # Test Notion - try: - if os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): - from notion_client import Client - client = Client(auth=os.getenv("NOTION_API_KEY")) - database = client.databases.retrieve(database_id=os.getenv("NOTION_DATABASE_ID")) - st.success("โœ… Notion connected") - else: - st.warning("โš ๏ธ Notion not configured") - except Exception as e: - st.error(f"โŒ Notion error: {e}") - -def show_knowledge_base(): - """Knowledge base management page""" - st.markdown("### ๐Ÿง  Knowledge Base") - - # Check if knowledge base files exist - kb_path = "prompts/default/knowledge_base" - - st.markdown(""" - The knowledge base provides context and expertise to enhance content generation. - Add domain-specific information, style guides, and reference materials here. - """) - - # Knowledge base sections - tabs = st.tabs(["๐Ÿ“– View Knowledge", "โž• Add Knowledge", "๐Ÿ”ง Manage Files"]) - - with tabs[0]: - st.markdown("#### ๐Ÿ“– Current Knowledge Base") - try: - if os.path.exists(kb_path): - files = [f for f in os.listdir(kb_path) if f.endswith('.md')] - if files: - selected_file = st.selectbox("Select knowledge file:", files) - if selected_file: - file_path = os.path.join(kb_path, selected_file) - with open(file_path, 'r') as f: - content = f.read() - - st.markdown(f"**File:** `{selected_file}`") - create_enhanced_aurora_content_card("Knowledge Content", content, "text", "๐Ÿ“–") - else: - st.info("๐Ÿ“ญ No knowledge files found") - else: - st.info("๐Ÿ“ Knowledge base directory not found") - except Exception as e: - st.error(f"โŒ Error reading knowledge base: {e}") - - with tabs[1]: - st.markdown("#### โž• Add New Knowledge") - - col1, col2 = st.columns([2, 1]) - with col1: - kb_title = st.text_input("Knowledge Title", placeholder="e.g., 'Marketing Guidelines'") - with col2: - kb_category = st.selectbox("Category", ["General", "Style Guide", "Domain Expertise", "Templates"]) - - kb_content = st.text_area("Knowledge Content", - placeholder="Enter your knowledge content here...", - height=300) - - if st.button("๐Ÿ’พ Save Knowledge", type="primary"): - if kb_title and kb_content: - try: - os.makedirs(kb_path, exist_ok=True) - filename = f"{kb_title.lower().replace(' ', '_')}.md" - file_path = os.path.join(kb_path, filename) - - with open(file_path, 'w') as f: - f.write(f"# {kb_title}\n\n") - f.write(f"**Category:** {kb_category}\n\n") - f.write(kb_content) - - st.success(f"โœ… Knowledge saved as `{filename}`") - except Exception as e: - st.error(f"โŒ Error saving knowledge: {e}") - else: - st.error("โŒ Please provide both title and content") - - with tabs[2]: - st.markdown("#### ๐Ÿ”ง Manage Knowledge Files") - - try: - if os.path.exists(kb_path): - files = [f for f in os.listdir(kb_path) if f.endswith('.md')] - if files: - for file in files: - col1, col2, col3 = st.columns([3, 1, 1]) - with col1: - st.markdown(f"๐Ÿ“„ `{file}`") - with col2: - if st.button("๐Ÿ“ Edit", key=f"edit_{file}"): - st.info("Edit functionality coming soon!") - with col3: - if st.button("๐Ÿ—‘๏ธ Delete", key=f"delete_{file}"): - try: - os.remove(os.path.join(kb_path, file)) - st.success(f"โœ… Deleted `{file}`") - st.rerun() - except Exception as e: - st.error(f"โŒ Error deleting file: {e}") - else: - st.info("๐Ÿ“ญ No knowledge files found") - else: - st.info("๐Ÿ“ Knowledge base directory not found") - except Exception as e: - st.error(f"โŒ Error managing files: {e}") - -def show_prompts_page(): - """Prompts management page""" - st.markdown("### ๐Ÿ“ Prompts Management") - - st.markdown(""" - Customize the AI prompts used in each step of the content generation pipeline. - Fine-tune the output style, format, and focus for your specific needs. - """) - - # Prompt categories - prompt_types = { - "wisdom": "๐Ÿ’ก Wisdom Extraction", - "outline": "๐Ÿ“‹ Content Outline", - "article": "๐Ÿ“ฐ Article Generation", - "social": "๐Ÿ“ฑ Social Media Posts" - } - - tabs = st.tabs(list(prompt_types.values()) + ["๐Ÿ”ง Advanced"]) - - for i, (prompt_key, prompt_name) in enumerate(prompt_types.items()): - with tabs[i]: - st.markdown(f"#### {prompt_name}") - - # Load current prompt - Map UI keys to actual pipeline files - file_mapping = { - "wisdom": "wisdom_extraction.md", - "outline": "outline_creation.md", - "article": "article_generation.md", - "social": "social_media.md" - } - prompt_file = f"prompts/default/{file_mapping[prompt_key]}" - current_prompt = "" - - try: - if os.path.exists(prompt_file): - with open(prompt_file, 'r') as f: - current_prompt = f.read() - else: - current_prompt = f"# {prompt_name} Prompt\n\nDefault prompt for {prompt_key} generation." - except Exception as e: - st.error(f"โŒ Error loading prompt: {e}") - - # Edit prompt - new_prompt = st.text_area( - f"Edit {prompt_name} Prompt", - value=current_prompt, - height=400, - help=f"Customize the prompt used for {prompt_key} generation" - ) - - col1, col2, col3 = st.columns([1, 1, 2]) - with col1: - if st.button(f"๐Ÿ’พ Save", key=f"save_{prompt_key}"): - try: - os.makedirs("prompts/default", exist_ok=True) - with open(prompt_file, 'w') as f: - f.write(new_prompt) - st.success(f"โœ… {prompt_name} prompt saved!") - except Exception as e: - st.error(f"โŒ Error saving prompt: {e}") - - with col2: - if st.button(f"๐Ÿ”„ Reset", key=f"reset_{prompt_key}"): - st.info("Reset to default functionality coming soon!") - - with col3: - st.markdown(f"**File:** `{prompt_file}`") - - # Advanced settings - with tabs[-1]: - st.markdown("#### ๐Ÿ”ง Advanced Prompt Settings") - - col1, col2 = st.columns(2) - with col1: - st.markdown("**Global Settings**") - temperature = st.slider("Temperature (Creativity)", 0.0, 1.0, 0.7, 0.1) - max_tokens = st.number_input("Max Tokens", 100, 4000, 2000) - - with col2: - st.markdown("**Prompt Templates**") - if st.button("๐Ÿ“ฅ Import Prompt Set"): - st.info("Import functionality coming soon!") - if st.button("๐Ÿ“ค Export Prompt Set"): - st.info("Export functionality coming soon!") - - st.session_state.temperature = temperature - st.session_state.max_tokens = max_tokens - -def create_aurora_tabs(tab_data, default_tab=0): - """Create beautiful Aurora-styled tabs with a simplified, reliable approach""" - import uuid - - # Generate unique ID for this tab group - tab_group_id = f"tabs_{uuid.uuid4().hex[:8]}" - - # Initialize session state for this tab group - if f"{tab_group_id}_active" not in st.session_state: - st.session_state[f"{tab_group_id}_active"] = default_tab - - # Calculate content stats for display - tab_stats = [] - for tab in tab_data: - content = tab.get('content', '') - word_count = len(str(content).split()) if content else 0 - tab_stats.append(word_count) - - # Create the tabs container with Aurora styling - st.markdown(""" -
-
-

- ๐Ÿ“„ - Generated Content -

-
-
- """, unsafe_allow_html=True) - - # Create tab selector using Streamlit's selectbox with Aurora styling - tab_options = [] - for i, tab in enumerate(tab_data): - word_count = tab_stats[i] - tab_options.append(f"{tab['icon']} {tab['title']} ({word_count} words)") - - # Custom styled selectbox - st.markdown(""" - - """, unsafe_allow_html=True) - - # Tab selector - with st.container(): - st.markdown('
', unsafe_allow_html=True) - selected_tab_label = st.selectbox( - "Select Content Type:", - tab_options, - index=st.session_state[f"{tab_group_id}_active"], - key=f"tab_select_{tab_group_id}", - label_visibility="collapsed" - ) - st.markdown('
', unsafe_allow_html=True) - - # Find selected tab index - selected_index = 0 - for i, option in enumerate(tab_options): - if option == selected_tab_label: - selected_index = i - break - - # Update session state - st.session_state[f"{tab_group_id}_active"] = selected_index - - # Display selected content with Aurora styling - if 0 <= selected_index < len(tab_data): - active_tab = tab_data[selected_index] - - # Add quick actions for the active tab - col1, col2, col3, col4 = st.columns([1, 1, 1, 2]) - - with col1: - if st.button("๐Ÿ“‹ Copy", key=f"copy_{tab_group_id}_{selected_index}", use_container_width=True): - st.code(active_tab.get('content', ''), language='text') - st.success("โœ… Content displayed above - copy with Ctrl+A, Ctrl+C") - - with col2: - content = active_tab.get('content', '') - if content: - st.download_button( - label="๐Ÿ’พ Download", - data=content, - file_name=f"{active_tab['title'].lower().replace(' ', '_')}.txt", - mime="text/plain", - key=f"download_{tab_group_id}_{selected_index}", - use_container_width=True - ) - - with col3: - if st.button("๐Ÿ“Š Stats", key=f"stats_{tab_group_id}_{selected_index}", use_container_width=True): - word_count = len(str(content).split()) - char_count = len(str(content)) - st.info(f"๐Ÿ“Š **{active_tab['title']}**: {word_count} words, {char_count} characters") - - # Display the content using our enhanced content card - st.markdown('
', unsafe_allow_html=True) - create_enhanced_aurora_content_card( - title=active_tab['title'], - content=active_tab.get('content', ''), - content_type=active_tab.get('type', 'text'), - icon=active_tab['icon'] - ) - st.markdown('
', unsafe_allow_html=True) - - return selected_index + st.error(f"Error loading content library: {e}") + # === MAIN APP === + + def show_main_app(): - """Main application interface with navigation""" - # Create navigation + """Main application interface with navigation.""" tabs = create_aurora_navigation() - - # Show different pages based on selected tab - with tabs[0]: # Transform + + with tabs[0]: show_transform_page() - - with tabs[1]: # Content Library + with tabs[1]: show_content_library() - - with tabs[2]: # Settings + with tabs[2]: show_settings_page() - - with tabs[3]: # Knowledge Base + with tabs[3]: show_knowledge_base() - - with tabs[4]: # Prompts + with tabs[4]: show_prompts_page() -# === ENTRY POINT === + def main(): - """Application entry point""" + """Application entry point.""" init_session() - + if st.session_state.authenticated: show_main_app() else: show_login() + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/core/__init__.py b/core/__init__.py index f9878a5..489621f 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -11,8 +11,4 @@ # Core modules - some legacy imports removed for cleanup from .config import Config, get_config, set_config -__all__ = [ - "Config", - "get_config", - "set_config" -] \ No newline at end of file +__all__ = ["Config", "get_config", "set_config"] diff --git a/core/api_clients.py b/core/api_clients.py new file mode 100644 index 0000000..7a4a110 --- /dev/null +++ b/core/api_clients.py @@ -0,0 +1,75 @@ +"""API client factory functions for WhisperForge. + +Provides lazy-initialised clients for OpenAI, Anthropic, and Grok APIs. +Each factory returns ``None`` when the required API key is missing or the +underlying package is not installed, allowing callers to degrade +gracefully. +""" + +import logging +import os + +logger = logging.getLogger(__name__) + + +def get_openai_client(): + """Get OpenAI client with API key. + + Returns None only when the key is missing or the package isn't installed. + Other errors (e.g. network, auth) propagate so callers can react. + """ + try: + import openai + except ImportError: + logger.error("OpenAI package not installed") + return None + + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: + return None + + return openai.OpenAI(api_key=api_key) + + +def get_anthropic_client(): + """Get Anthropic client with API key. + + Returns None only when the key is missing or the package isn't installed. + Other errors propagate so callers can react. + """ + try: + import anthropic + except ImportError: + logger.error("Anthropic package not installed") + return None + + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + return None + + return anthropic.Anthropic(api_key=api_key) + + +def get_grok_api_key(): + """Get Grok API key""" + return os.getenv("GROK_API_KEY") + + +def get_grok_client(): + """Get a Grok client via the OpenAI-compatible API. + + Returns an ``openai.OpenAI`` instance pointed at the Grok endpoint, + or ``None`` when the API key is missing or the ``openai`` package is + not installed. + """ + try: + import openai + except ImportError: + logger.error("OpenAI package not installed (required for Grok client)") + return None + + api_key = get_grok_api_key() + if not api_key: + return None + + return openai.OpenAI(api_key=api_key, base_url="https://api.x.ai/v1") diff --git a/core/auth_wrapper.py b/core/auth_wrapper.py index dd86db3..622a5ea 100644 --- a/core/auth_wrapper.py +++ b/core/auth_wrapper.py @@ -4,301 +4,331 @@ Provides backward compatibility while adding session persistence """ -import streamlit as st -from typing import Optional, Dict, Any -from .session_manager import get_session_manager -from .supabase_integration import get_supabase_client -from .utils import hash_password, verify_password, legacy_hash_password +from typing import Any + +from postgrest.exceptions import APIError + from core.logging_config import logger +from .exceptions import AuthenticationError, DatabaseError +from .session_manager import get_session_manager +from .utils import hash_password, legacy_hash_password, verify_password + class AuthWrapper: """ Authentication wrapper that provides persistent sessions while maintaining compatibility with existing auth patterns """ - + def __init__(self): self.session_manager = get_session_manager() self.supabase_client = None self._init_supabase() - + def _init_supabase(self): """Initialize Supabase client""" try: from .supabase_integration import get_supabase_client + self.supabase_client = get_supabase_client() - except Exception as e: + except (APIError, ValueError) as e: logger.log_error(e, "Failed to initialize Supabase") self.supabase_client = None - + except DatabaseError as e: + logger.log_error(e, "Unexpected error initializing Supabase") + self.supabase_client = None + def is_authenticated(self) -> bool: """Check if user is authenticated (backward compatible)""" return self.session_manager.is_authenticated() - - def get_user_id(self) -> Optional[str]: + + def get_user_id(self) -> str | None: """Get current user ID (backward compatible)""" return self.session_manager.get_user_id() - - def get_user_email(self) -> Optional[str]: + + def get_user_email(self) -> str | None: """Get current user email (backward compatible)""" return self.session_manager.get_user_email() - + def authenticate_user(self, email: str, password: str) -> bool: """ Authenticate user with Supabase and create persistent session Maintains backward compatibility with existing auth logic """ try: - logger.logger.info(f"Authentication attempt for: {email}") - + logger.info(f"Authentication attempt for: {email}") + if not self.supabase_client: logger.log_error(Exception("Supabase client not available"), "Authentication failed") return False - + # Get user by email from Supabase result = self.supabase_client.client.table("users").select("*").eq("email", email).execute() - + if not result.data: - logger.logger.warning(f"User not found: {email}") + logger.warning(f"User not found: {email}") return False - + user = result.data[0] stored_password = user.get("password", "") - + # Verify password (bcrypt or legacy) password_valid = False - password_migrated = False - - if stored_password.startswith('$2b$'): + + if stored_password.startswith("$2b$"): # bcrypt password password_valid = verify_password(password, stored_password) else: # Legacy password - check and migrate if legacy_hash_password(password) == stored_password: password_valid = True - password_migrated = True - + # Migrate to bcrypt try: new_hash = hash_password(password) - self.supabase_client.client.table("users").update( - {"password": new_hash} - ).eq("id", user["id"]).execute() - logger.logger.info(f"Password migrated to bcrypt for user: {email}") - except Exception as e: + self.supabase_client.client.table("users").update({"password": new_hash}).eq( + "id", user["id"] + ).execute() + logger.info(f"Password migrated to bcrypt for user: {email}") + except (APIError, ValueError) as e: logger.log_error(e, "Failed to migrate password") - + except DatabaseError as e: + logger.log_error(e, "Unexpected error migrating password") + if password_valid: # Create persistent session using SessionManager if self.session_manager.authenticate_user(user["id"], email): - logger.logger.info(f"User authenticated successfully: {email}") - + logger.info(f"User authenticated successfully: {email}") + # Load user preferences from database self._load_user_preferences(user["id"]) - + return True else: - logger.log_error(Exception(f"Failed to create persistent session for: {email}"), "Authentication failed") + logger.log_error( + Exception(f"Failed to create persistent session for: {email}"), "Authentication failed" + ) return False else: - logger.logger.warning(f"Invalid password for user: {email}") + logger.warning(f"Invalid password for user: {email}") return False - - except Exception as e: + + except (APIError, ValueError) as e: logger.log_error(e, f"Authentication error for {email}") return False - + except (AuthenticationError, DatabaseError) as e: + logger.log_error(e, f"Unexpected authentication error for {email}") + return False + def register_user(self, email: str, password: str) -> bool: """Register new user and create session""" try: if not self.supabase_client: logger.log_error(Exception("Supabase client not available for registration"), "Registration failed") return False - + # Check if user already exists existing = self.supabase_client.client.table("users").select("id").eq("email", email).execute() if existing.data: - logger.logger.warning(f"User already exists: {email}") + logger.warning(f"User already exists: {email}") return False - + # Hash password hashed_password = hash_password(password) - + # Create user in database - user_data = { - "email": email, - "password": hashed_password, - "created_at": "now()" - } - + user_data = {"email": email, "password": hashed_password, "created_at": "now()"} + result = self.supabase_client.client.table("users").insert(user_data).execute() - + if result.data: user = result.data[0] - logger.logger.info(f"User registered successfully: {email}") - + logger.info(f"User registered successfully: {email}") + # Create session for new user if self.session_manager.authenticate_user(user["id"], email): - logger.logger.info(f"Session created for new user: {email}") + logger.info(f"Session created for new user: {email}") return True else: - logger.log_error(Exception(f"Failed to create session for new user: {email}"), "Registration failed") + logger.log_error( + Exception(f"Failed to create session for new user: {email}"), "Registration failed" + ) return False else: logger.log_error(Exception(f"Failed to create user in database: {email}"), "Registration failed") return False - - except Exception as e: + + except (APIError, ValueError) as e: logger.log_error(e, f"Registration error for {email}") return False - + except (AuthenticationError, DatabaseError) as e: + logger.log_error(e, f"Unexpected registration error for {email}") + return False + def logout(self) -> bool: """Log out user and clear session""" try: email = self.get_user_email() if self.session_manager.logout(): - logger.logger.info(f"User logged out: {email}") + logger.info(f"User logged out: {email}") return True else: logger.log_error(Exception(f"Failed to logout user: {email}"), "Logout failed") return False - except Exception as e: + except (APIError, ValueError) as e: logger.log_error(e, "Logout error") return False - + except (AuthenticationError, DatabaseError) as e: + logger.log_error(e, "Unexpected logout error") + return False + def _load_user_preferences(self, user_id: str): """Load user preferences from database into session""" try: if not self.supabase_client: return - + # Load API keys - api_keys_result = self.supabase_client.client.table("api_keys").select( - "key_name, key_value" - ).eq("user_id", user_id).execute() - + api_keys_result = ( + self.supabase_client.client.table("api_keys") + .select("key_name, key_value") + .eq("user_id", user_id) + .execute() + ) + api_keys = {} for item in api_keys_result.data: api_keys[item["key_name"]] = item["key_value"] - + # Load custom prompts - prompts_result = self.supabase_client.client.table("prompts").select( - "prompt_type, content" - ).eq("user_id", user_id).execute() - + prompts_result = ( + self.supabase_client.client.table("prompts") + .select("prompt_type, content") + .eq("user_id", user_id) + .execute() + ) + prompts = {} for item in prompts_result.data: prompts[item["prompt_type"]] = item["content"] - + # Store in session preferences self.session_manager.set_preference("api_keys", api_keys) self.session_manager.set_preference("custom_prompts", prompts) - - logger.logger.debug(f"Loaded preferences for user: {user_id}") - - except Exception as e: + + logger.debug(f"Loaded preferences for user: {user_id}") + + except (APIError, ValueError) as e: logger.log_error(e, "Failed to load user preferences") - - def get_api_keys(self) -> Dict[str, str]: + except DatabaseError as e: + logger.log_error(e, "Unexpected error loading user preferences") + + def get_api_keys(self) -> dict[str, str]: """Get user API keys from session cache""" return self.session_manager.get_preference("api_keys", {}) - + def update_api_key(self, key_name: str, key_value: str) -> bool: """Update API key in database and session cache""" try: if not self.supabase_client or not self.is_authenticated(): return False - + user_id = self.get_user_id() - + # Update in database - result = self.supabase_client.client.table("api_keys").upsert({ - "user_id": user_id, - "key_name": key_name, - "key_value": key_value, - "updated_at": "now()" - }).execute() - + result = ( + self.supabase_client.client.table("api_keys") + .upsert({"user_id": user_id, "key_name": key_name, "key_value": key_value, "updated_at": "now()"}) + .execute() + ) + if result.data: # Update session cache api_keys = self.get_api_keys() api_keys[key_name] = key_value self.session_manager.set_preference("api_keys", api_keys) - - logger.logger.info(f"API key updated: {key_name}") + + logger.info(f"API key updated: {key_name}") return True else: logger.log_error(Exception(f"Failed to update API key: {key_name}"), "API key update failed") return False - - except Exception as e: + + except (APIError, ValueError) as e: logger.log_error(e, f"Error updating API key {key_name}") return False - - def get_custom_prompts(self) -> Dict[str, str]: + except DatabaseError as e: + logger.log_error(e, f"Unexpected error updating API key {key_name}") + return False + + def get_custom_prompts(self) -> dict[str, str]: """Get user custom prompts from session cache""" return self.session_manager.get_preference("custom_prompts", {}) - + def update_custom_prompt(self, prompt_type: str, content: str) -> bool: """Update custom prompt in database and session cache""" try: if not self.supabase_client or not self.is_authenticated(): return False - + user_id = self.get_user_id() - + # Update in database - result = self.supabase_client.client.table("prompts").upsert({ - "user_id": user_id, - "prompt_type": prompt_type, - "content": content, - "updated_at": "now()" - }).execute() - + result = ( + self.supabase_client.client.table("prompts") + .upsert({"user_id": user_id, "prompt_type": prompt_type, "content": content, "updated_at": "now()"}) + .execute() + ) + if result.data: # Update session cache prompts = self.get_custom_prompts() prompts[prompt_type] = content self.session_manager.set_preference("custom_prompts", prompts) - + logger.info(f"Custom prompt updated: {prompt_type}") return True else: - logger.log_error(Exception(f"Failed to update custom prompt: {prompt_type}"), "Custom prompt update failed") + logger.log_error( + Exception(f"Failed to update custom prompt: {prompt_type}"), "Custom prompt update failed" + ) return False - - except Exception as e: + + except (APIError, ValueError) as e: logger.log_error(e, f"Error updating custom prompt {prompt_type}") return False - + except DatabaseError as e: + logger.log_error(e, f"Unexpected error updating custom prompt {prompt_type}") + return False + # Session Manager delegation methods - + def get_preference(self, key: str, default: Any = None) -> Any: """Get user preference (delegated to SessionManager)""" return self.session_manager.get_preference(key, default) - + def set_preference(self, key: str, value: Any) -> bool: """Set user preference (delegated to SessionManager)""" return self.session_manager.set_preference(key, value) - + def get_current_page(self) -> str: """Get current page (delegated to SessionManager)""" return self.session_manager.get_current_page() - + def set_current_page(self, page: str) -> None: """Set current page (delegated to SessionManager)""" self.session_manager.set_current_page(page) - + def is_pipeline_active(self) -> bool: """Check if pipeline is active (delegated to SessionManager)""" return self.session_manager.is_pipeline_active() - + def set_pipeline_active(self, active: bool) -> None: """Set pipeline active state (delegated to SessionManager)""" self.session_manager.set_pipeline_active(active) - - def get_session_info(self) -> Dict[str, Any]: + + def get_session_info(self) -> dict[str, Any]: """Get session information for debugging""" return self.session_manager.get_session_info() @@ -306,6 +336,7 @@ def get_session_info(self) -> Dict[str, Any]: # Global auth wrapper instance _auth_wrapper = None + def get_auth() -> AuthWrapper: """Get global authentication wrapper instance""" global _auth_wrapper @@ -319,22 +350,27 @@ def authenticate_user(email: str, password: str) -> bool: """Backward compatible authentication function""" return get_auth().authenticate_user(email, password) + def register_user_supabase(email: str, password: str) -> bool: """Backward compatible registration function""" return get_auth().register_user(email, password) -def get_user_api_keys_supabase() -> Dict[str, str]: + +def get_user_api_keys_supabase() -> dict[str, str]: """Backward compatible API keys function""" return get_auth().get_api_keys() + def update_api_key_supabase(key_name: str, key_value: str) -> bool: """Backward compatible API key update function""" return get_auth().update_api_key(key_name, key_value) -def get_user_prompts_supabase() -> Dict[str, str]: + +def get_user_prompts_supabase() -> dict[str, str]: """Backward compatible prompts function""" return get_auth().get_custom_prompts() + def save_user_prompt_supabase(prompt_type: str, content: str) -> bool: """Backward compatible prompt save function""" - return get_auth().update_custom_prompt(prompt_type, content) \ No newline at end of file + return get_auth().update_custom_prompt(prompt_type, content) diff --git a/core/config.py b/core/config.py index b59d038..a9f28e3 100644 --- a/core/config.py +++ b/core/config.py @@ -5,11 +5,11 @@ Centralized configuration for WhisperForge v2.0 """ +import logging import os -from typing import Dict, Optional, Any from dataclasses import dataclass, field from pathlib import Path -import logging +from typing import Any logger = logging.getLogger(__name__) @@ -19,19 +19,19 @@ class AIProviderConfig: """Configuration for AI providers""" name: str - api_key: Optional[str] = None - base_url: Optional[str] = None - models: Dict[str, Any] = field(default_factory=dict) - rate_limits: Dict[str, int] = field(default_factory=dict) + api_key: str | None = None + base_url: str | None = None + models: dict[str, Any] = field(default_factory=dict) + rate_limits: dict[str, int] = field(default_factory=dict) @dataclass class NotionConfig: """Configuration for Notion integration""" - api_key: Optional[str] = None - database_id: Optional[str] = None - template_id: Optional[str] = None + api_key: str | None = None + database_id: str | None = None + template_id: str | None = None @dataclass @@ -46,9 +46,7 @@ class Config: # AI Providers openai: AIProviderConfig = field(default_factory=lambda: AIProviderConfig("openai")) - anthropic: AIProviderConfig = field( - default_factory=lambda: AIProviderConfig("anthropic") - ) + anthropic: AIProviderConfig = field(default_factory=lambda: AIProviderConfig("anthropic")) grok: AIProviderConfig = field(default_factory=lambda: AIProviderConfig("grok")) # Integrations @@ -99,9 +97,7 @@ def from_env(cls) -> "Config": def from_file(cls, config_path: Path) -> "Config": """Load configuration (simplified version without YAML)""" # For now, just use environment variables - logger.info( - "Using environment variables for configuration (YAML support disabled)" - ) + logger.info("Using environment variables for configuration (YAML support disabled)") return cls.from_env() def validate(self) -> bool: @@ -116,7 +112,7 @@ def validate(self) -> bool: for dir_path in [self.data_dir, self.prompts_dir, self.temp_dir]: try: dir_path.mkdir(parents=True, exist_ok=True) - except Exception as e: + except OSError as e: errors.append(f"Cannot create directory {dir_path}: {e}") if errors: @@ -139,7 +135,7 @@ def get_available_providers(self) -> list[str]: # Global config instance -_config: Optional[Config] = None +_config: Config | None = None def get_config() -> Config: diff --git a/core/constants.py b/core/constants.py new file mode 100644 index 0000000..a37334b --- /dev/null +++ b/core/constants.py @@ -0,0 +1,70 @@ +""" +Application-wide constants for WhisperForge. + +These are compile-time invariants. For runtime configuration +(API keys, environment, etc.), see core/config.py. +""" + +# --------------------------------------------------------------------------- +# File size limits +# --------------------------------------------------------------------------- +MAX_UPLOAD_SIZE_BYTES: int = 2 * 1024 * 1024 * 1024 # 2 GB +LARGE_FILE_THRESHOLD_BYTES: int = 100 * 1024 * 1024 # 100 MB (triggers FFmpeg) +LARGE_FILE_THRESHOLD_MB: int = 20 # Pipeline routing threshold + +# --------------------------------------------------------------------------- +# Audio processing +# --------------------------------------------------------------------------- +CHUNK_DURATION_MINUTES: int = 10 # FFmpeg chunk length +MAX_PARALLEL_CHUNKS_STANDARD: int = 3 # pydub-based processing +MAX_PARALLEL_CHUNKS_FFMPEG: int = 4 # FFmpeg-based processing +AUDIO_SAMPLE_RATE: int = 16_000 # Hz for Whisper input +AUDIO_CHANNELS: int = 1 # Mono for Whisper input + +# --------------------------------------------------------------------------- +# Transcription success thresholds +# --------------------------------------------------------------------------- +CHUNK_SUCCESS_THRESHOLD_STANDARD: float = 0.8 # file_upload.py +CHUNK_SUCCESS_THRESHOLD_FFMPEG: float = 0.7 # large_file_processor.py + +# --------------------------------------------------------------------------- +# Subprocess timeouts (seconds) +# --------------------------------------------------------------------------- +FFMPEG_VERSION_CHECK_TIMEOUT: int = 5 +FFPROBE_TIMEOUT: int = 30 +FFMPEG_CHUNK_TIMEOUT: int = 300 + +# --------------------------------------------------------------------------- +# Database defaults +# --------------------------------------------------------------------------- +DEFAULT_USAGE_QUOTA_MINUTES: int = 60 # New user monthly quota +DEFAULT_CONTENT_QUERY_LIMIT: int = 50 # get_user_content() +DEFAULT_ANALYTICS_DAYS: int = 30 # get_user_analytics() + +# --------------------------------------------------------------------------- +# Content generation +# --------------------------------------------------------------------------- +MAX_TOKENS_DEFAULT: int = 1500 +MAX_TOKENS_ARTICLE: int = 2000 +TRANSCRIPT_PREVIEW_LENGTH: int = 2000 +ARTICLE_PREVIEW_LENGTH: int = 1500 + +# --------------------------------------------------------------------------- +# Notion integration +# --------------------------------------------------------------------------- +NOTION_CHUNK_SIZE: int = 1800 +MAX_NOTION_BLOCKS: int = 50 +NOTION_TITLE_EXCERPT_LENGTH: int = 500 +NOTION_TITLE_MAX_TOKENS: int = 30 +NOTION_TITLE_MAX_LENGTH: int = 60 + +# --------------------------------------------------------------------------- +# Content display +# --------------------------------------------------------------------------- +CONTENT_PREVIEW_LENGTH: int = 300 + +# --------------------------------------------------------------------------- +# Session +# --------------------------------------------------------------------------- +SESSION_EXPIRY_DAYS: int = 7 +DEFAULT_PAGE: str = "Transform" diff --git a/core/content_display.py b/core/content_display.py new file mode 100644 index 0000000..d7ade6f --- /dev/null +++ b/core/content_display.py @@ -0,0 +1,411 @@ +""" +Content Display Components +=========================== + +Aurora-styled content cards, tab interfaces, and result views for generated content. +""" + +import html as html_mod +import uuid +from datetime import datetime + +import streamlit as st + +from .constants import CONTENT_PREVIEW_LENGTH +from .export import create_text_export, export_to_markdown, export_to_word + + +def create_enhanced_aurora_content_card(title, content, content_type="text", icon="\U0001f4c4"): + """Create an Aurora content card with copy/download/expand functionality.""" + card_id = f"card_{uuid.uuid4().hex[:8]}" + copy_btn_id = f"copy_{uuid.uuid4().hex[:8]}" + expand_btn_id = f"expand_{uuid.uuid4().hex[:8]}" + full_content_id = f"full_{uuid.uuid4().hex[:8]}" + + word_count = len(content.split()) if content else 0 + char_count = len(content) if content else 0 + + preview_length = CONTENT_PREVIEW_LENGTH + needs_expansion = len(content) > preview_length + preview_content = content[:preview_length] + "..." if needs_expansion else content + + type_class = content_type.lower() + + # Escape user-controlled content for safe HTML embedding + safe_title = html_mod.escape(title) + safe_icon = html_mod.escape(icon) + escaped_content = html_mod.escape(content) + escaped_preview = html_mod.escape(preview_content) + js_safe_content = content.replace("\\", "\\\\").replace("`", "\\`").replace("${", "\\${") + + card_html = f""" +
+
+

+ {safe_icon} + {safe_title} +

+
+ + +
+
+ +
+
+ {escaped_preview} +
+ + { + f''' +
+ {html_mod.escape(content[preview_length:])} +
+ ''' + if needs_expansion + else "" + } + +
+
+ \U0001f4ca + {word_count} words \u2022 {char_count} characters +
+ + { + f''' + + ''' + if needs_expansion + else "" + } +
+
+ + + +
+ + + """ + + st.markdown(card_html, unsafe_allow_html=True) + + +def create_aurora_tabs(tab_data, default_tab=0): + """Create Aurora-styled tab interface for content selection.""" + tab_group_id = f"tabs_{uuid.uuid4().hex[:8]}" + + if f"{tab_group_id}_active" not in st.session_state: + st.session_state[f"{tab_group_id}_active"] = default_tab + + tab_stats = [] + for tab in tab_data: + content = tab.get("content", "") + word_count = len(str(content).split()) if content else 0 + tab_stats.append(word_count) + + st.markdown( + """ +
+
+

+ \U0001f4c4 + Generated Content +

+
+
+ """, + unsafe_allow_html=True, + ) + + tab_options = [] + for i, tab in enumerate(tab_data): + word_count = tab_stats[i] + tab_options.append(f"{tab['icon']} {tab['title']} ({word_count} words)") + + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + with st.container(): + st.markdown('
', unsafe_allow_html=True) + selected_tab_label = st.selectbox( + "Select Content Type:", + tab_options, + index=st.session_state[f"{tab_group_id}_active"], + key=f"tab_select_{tab_group_id}", + label_visibility="collapsed", + ) + st.markdown("
", unsafe_allow_html=True) + + selected_index = 0 + for i, option in enumerate(tab_options): + if option == selected_tab_label: + selected_index = i + break + + st.session_state[f"{tab_group_id}_active"] = selected_index + + if 0 <= selected_index < len(tab_data): + active_tab = tab_data[selected_index] + + col1, col2, col3, _col4 = st.columns([1, 1, 1, 2]) + + with col1: + if st.button("\U0001f4cb Copy", key=f"copy_{tab_group_id}_{selected_index}", use_container_width=True): + st.code(active_tab.get("content", ""), language="text") + st.success("Content displayed above - copy with Ctrl+A, Ctrl+C") + + with col2: + content = active_tab.get("content", "") + if content: + st.download_button( + label="\U0001f4be Download", + data=content, + file_name=f"{active_tab['title'].lower().replace(' ', '_')}.txt", + mime="text/plain", + key=f"download_{tab_group_id}_{selected_index}", + use_container_width=True, + ) + + with col3: + if st.button("\U0001f4ca Stats", key=f"stats_{tab_group_id}_{selected_index}", use_container_width=True): + word_count = len(str(content).split()) + char_count = len(str(content)) + st.info(f"\U0001f4ca **{active_tab['title']}**: {word_count} words, {char_count} characters") + + st.markdown('
', unsafe_allow_html=True) + create_enhanced_aurora_content_card( + title=active_tab["title"], + content=active_tab.get("content", ""), + content_type=active_tab.get("type", "text"), + icon=active_tab["icon"], + ) + st.markdown("
", unsafe_allow_html=True) + + return selected_index + + +def show_results(results): + """Display generated content with Aurora styling and export options.""" + if not results: + return + + st.markdown( + """ +
+

Content Generated Successfully!

+

Your audio has been transformed with AI magic

+
+ """, + unsafe_allow_html=True, + ) + + notion_url = results.get("notion_url", "") + if notion_url and notion_url.startswith("https://"): + safe_url = html_mod.escape(notion_url) + st.markdown( + f""" + + """, + unsafe_allow_html=True, + ) + st.markdown("---") + + # Content overview stats + total_words = sum( + len(str(results.get(key, "")).split()) + for key in ["transcript", "wisdom", "outline", "article", "social_content"] + ) + content_types = len([k for k in ["transcript", "wisdom", "outline", "article", "social_content"] if results.get(k)]) + + st.markdown( + f""" +
+
+
+ {total_words:,} + Total Words +
+
+ {content_types} + Content Types +
+
+
+ """, + unsafe_allow_html=True, + ) + + # Result sections + result_sections = [ + ("transcript", "Transcript", results.get("transcript", "")), + ("wisdom", "Wisdom", results.get("wisdom", "")), + ("article", "Article", results.get("article", "")), + ("social_content", "Social Content", results.get("social_content", "")), + ] + + has_content = False + for key, title, content in result_sections: + if not content: + continue + has_content = True + st.markdown(f'
', unsafe_allow_html=True) + word_count = len(str(content).split()) + with st.expander(f"{title} ({word_count} words)", expanded=False): + col1, col2, _col3 = st.columns([1, 1, 3]) + with col1: + if st.button("Copy", key=f"copy_{key}", use_container_width=True): + st.code(content, language="text") + st.success("Content displayed above - copy with Ctrl+A, Ctrl+C") + with col2: + st.download_button( + label="Download", + data=content, + file_name=f"{key}.txt", + mime="text/plain", + key=f"download_{key}", + use_container_width=True, + ) + st.markdown(content) + + if not has_content: + st.warning("No content available to display.") + + # Export section + st.markdown("---") + st.markdown( + """ +
+

Additional Export Options

+
+ """, + unsafe_allow_html=True, + ) + + col1, col2, col3 = st.columns(3) + + with col1: + if st.button("Export as Text", use_container_width=True): + export_content = create_text_export(results) + st.download_button( + label="Download Text File", + data=export_content, + file_name=f"whisperforge_content_{datetime.now().strftime('%Y%m%d_%H%M')}.txt", + mime="text/plain", + key="download_text_export", + use_container_width=True, + ) + + with col2: + if st.button("Export as Markdown", use_container_width=True): + md_content = export_to_markdown(results) + st.download_button( + label="Download Markdown", + data=md_content, + file_name=f"whisperforge_content_{datetime.now().strftime('%Y%m%d_%H%M')}.md", + mime="text/markdown", + key="download_md_export", + use_container_width=True, + ) + + with col3: + try: + if st.button("Export as Word", use_container_width=True): + word_bytes = export_to_word(results) + st.download_button( + label="Download Word", + data=word_bytes, + file_name=f"whisperforge_content_{datetime.now().strftime('%Y%m%d_%H%M')}.docx", + mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document", + key="download_word_export", + use_container_width=True, + ) + except ImportError: + st.info("Install python-docx for Word export") diff --git a/core/content_generation.py b/core/content_generation.py index d8242b8..1600ac2 100644 --- a/core/content_generation.py +++ b/core/content_generation.py @@ -1,151 +1,244 @@ """ Content Generation Module for WhisperForge -Handles AI-powered content creation including transcription, wisdom extraction, and content generation +Handles AI-powered content creation including transcription, wisdom extraction, and content generation. +Supports multiple AI providers: OpenAI, Anthropic, and Grok (xAI). """ import logging import os -from typing import Dict, Optional -from .utils import get_openai_client, get_prompt, DEFAULT_PROMPTS, get_enhanced_prompt +from .api_clients import get_anthropic_client, get_grok_client, get_openai_client +from .constants import ( + ARTICLE_PREVIEW_LENGTH, + MAX_TOKENS_ARTICLE, + MAX_TOKENS_DEFAULT, + TRANSCRIPT_PREVIEW_LENGTH, +) +from .exceptions import APIClientError, PipelineError +from .prompts import get_enhanced_prompt -# Configure logging logger = logging.getLogger(__name__) -def generate_wisdom(transcript: str, custom_prompt: str = None, knowledge_base: Dict[str, str] = None) -> str: - """Extract key insights and wisdom from a transcript""" - try: - # Use enhanced prompt system with automatic KB concatenation - system_prompt = custom_prompt or get_enhanced_prompt("wisdom_extraction", knowledge_base) - - openai_client = get_openai_client() - if not openai_client: - return "Error: OpenAI API key is not configured." - - response = openai_client.chat.completions.create( - model="gpt-4o", - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": f"Here's the transcription to analyze:\n\n{transcript}"} - ], - max_tokens=1500 - ) - - return response.choices[0].message.content - - except Exception as e: - logger.exception("Error in wisdom generation:") - return f"Error generating wisdom: {str(e)}" - -def generate_outline(transcript: str, wisdom: str, custom_prompt: str = None, knowledge_base: Dict[str, str] = None) -> str: - """Create a structured outline based on transcript and wisdom""" - try: - # Use enhanced prompt system with automatic KB concatenation - system_prompt = custom_prompt or get_enhanced_prompt("outline_creation", knowledge_base) - - content = f"TRANSCRIPT:\n{transcript}\n\nWISDOM:\n{wisdom}" - - openai_client = get_openai_client() - if not openai_client: - return "Error: OpenAI API key is not configured." - - response = openai_client.chat.completions.create( - model="gpt-4o", - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": content} - ], - max_tokens=1500 - ) - - return response.choices[0].message.content - - except Exception as e: - logger.exception("Error in outline generation:") - return f"Error generating outline: {str(e)}" - -def generate_article(transcript: str, wisdom: str, outline: str, custom_prompt: str = None, knowledge_base: Dict[str, str] = None) -> str: - """Generate a comprehensive article based on transcript, wisdom, and outline""" +WHISPER_MODEL = os.getenv("WHISPER_MODEL", "whisper-1") + +DEFAULT_MODELS = { + "openai": os.getenv("GPT_MODEL", "gpt-4o"), + "anthropic": os.getenv("ANTHROPIC_MODEL", "claude-sonnet-4-20250514"), + "grok": os.getenv("GROK_MODEL", "grok-3"), +} + +# Provider priority for fallback ordering +_PROVIDER_PRIORITY = ("openai", "anthropic", "grok") + +# Map provider names to their client factory functions +_CLIENT_FACTORIES = { + "openai": get_openai_client, + "anthropic": get_anthropic_client, + "grok": get_grok_client, +} + + +class ContentGenerationError(PipelineError): + """Raised when content generation fails""" + + pass + + +def _resolve_provider() -> str: + """Determine the active AI provider. + + Checks ``st.session_state.ai_provider`` first (if Streamlit is running), + then falls back to the first provider with a configured API key in + priority order: openai > anthropic > grok. + + Raises ``ContentGenerationError`` if no provider is available. + """ + # Try Streamlit session state first try: - # Use enhanced prompt system with automatic KB concatenation - system_prompt = custom_prompt or get_enhanced_prompt("article_writing", knowledge_base) - - # Limit transcript length to avoid token limits - transcript_excerpt = transcript[:2000] if len(transcript) > 2000 else transcript - content = f"TRANSCRIPT:\n{transcript_excerpt}\n\nWISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}" - - openai_client = get_openai_client() - if not openai_client: - return "Error: OpenAI API key is not configured." - - response = openai_client.chat.completions.create( - model="gpt-4o", - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": content} - ], - max_tokens=2000 - ) - - return response.choices[0].message.content - - except Exception as e: - logger.exception("Error in article generation:") - return f"Error generating article: {str(e)}" - -def generate_social_content(wisdom: str, outline: str, article: str, custom_prompt: str = None, knowledge_base: Dict[str, str] = None) -> str: - """Generate 5 distinct social media posts""" + import streamlit as st + + preferred = st.session_state.get("ai_provider") + if preferred and _CLIENT_FACTORIES.get(preferred): + client = _CLIENT_FACTORIES[preferred]() + if client is not None: + return preferred + logger.warning("Preferred provider '%s' is not configured (missing API key). Trying fallbacks.", preferred) + except (ImportError, RuntimeError): + # Not running in Streamlit context + pass + + # Fallback: first available provider + for provider in _PROVIDER_PRIORITY: + client = _CLIENT_FACTORIES[provider]() + if client is not None: + return provider + + raise ContentGenerationError( + "No AI provider is configured. Set at least one of: OPENAI_API_KEY, ANTHROPIC_API_KEY, or GROK_API_KEY." + ) + + +def _call_openai_compatible(provider: str, system_prompt: str, user_content: str, max_tokens: int, model: str) -> str: + """Make a chat completion call using the OpenAI SDK shape. + + Works for both OpenAI and Grok (xAI), since Grok exposes an + OpenAI-compatible API. + """ + client = _CLIENT_FACTORIES[provider]() + if not client: + raise ContentGenerationError(f"{provider} client is not configured.") + + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_content}, + ], + max_tokens=max_tokens, + ) + return response.choices[0].message.content + + +def _call_anthropic(system_prompt: str, user_content: str, max_tokens: int, model: str) -> str: + """Make a chat completion call using the Anthropic SDK.""" + client = get_anthropic_client() + if not client: + raise ContentGenerationError("Anthropic client is not configured.") + + response = client.messages.create( + model=model, + max_tokens=max_tokens, + system=system_prompt, + messages=[{"role": "user", "content": user_content}], + ) + return response.content[0].text + + +def _call_provider(provider: str, system_prompt: str, user_content: str, max_tokens: int) -> str: + """Dispatch a chat completion call to the appropriate provider backend.""" + model = DEFAULT_MODELS[provider] + + # Check for a session-state model override try: - # Use enhanced prompt system with automatic KB concatenation - system_prompt = custom_prompt or get_enhanced_prompt("social_media", knowledge_base) - - # Include article in content for richer context - article_excerpt = article[:1500] if len(article) > 1500 else article - content = f"WISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}\n\nARTICLE:\n{article_excerpt}" - - openai_client = get_openai_client() - if not openai_client: - return "Error: OpenAI API key is not configured." - - response = openai_client.chat.completions.create( - model="gpt-4o", - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": content} - ], - max_tokens=1500 + import streamlit as st + + model_key = f"{provider}_model" + override = st.session_state.get(model_key) + if override: + model = override + except (ImportError, RuntimeError): + pass + + if provider == "anthropic": + return _call_anthropic(system_prompt, user_content, max_tokens, model) + else: + # Both OpenAI and Grok use OpenAI-compatible SDK + return _call_openai_compatible(provider, system_prompt, user_content, max_tokens, model) + + +def _chat_completion(system_prompt: str, user_content: str, max_tokens: int = MAX_TOKENS_DEFAULT) -> str: + """Run a chat completion with automatic provider resolution and fallback. + + Resolves the preferred provider, attempts the call, and on failure + tries remaining providers in priority order before raising. + """ + primary = _resolve_provider() + + # Build ordered list: primary first, then remaining providers + providers_to_try = [primary] + [p for p in _PROVIDER_PRIORITY if p != primary] + + last_error = None + for provider in providers_to_try: + # Skip providers that aren't configured + client = _CLIENT_FACTORIES[provider]() + if client is None: + continue + + try: + return _call_provider(provider, system_prompt, user_content, max_tokens) + except ContentGenerationError: + raise + except (APIClientError, OSError) as e: + logger.warning("Provider '%s' failed: %s. Trying next provider.", provider, e) + last_error = e + + raise ContentGenerationError(f"All AI providers failed. Last error: {last_error}") + + +# --------------------------------------------------------------------------- +# Whisper transcription (OpenAI-only) +# --------------------------------------------------------------------------- + + +def _get_whisper_client(): + """Get an OpenAI client for Whisper transcription. + + Whisper is only available through the OpenAI API, so this always + returns an OpenAI client or raises. + """ + client = get_openai_client() + if not client: + raise ContentGenerationError( + "OpenAI API key is not configured. Whisper transcription requires an OpenAI API key." ) - - return response.choices[0].message.content - - except Exception as e: - logger.exception("Error in social content generation:") - return f"Error generating social content: {str(e)}" + return client + def transcribe_audio(audio_file) -> str: """Transcribe audio using OpenAI Whisper - handles both file paths and file objects""" - try: - openai_client = get_openai_client() - if not openai_client: - return "Error: OpenAI client not available." - - # Handle both file paths (strings) and file objects - if isinstance(audio_file, str): - # It's a file path, open it - with open(audio_file, 'rb') as f: - response = openai_client.audio.transcriptions.create( - model="whisper-1", - file=f - ) - else: - # It's a file object, reset pointer and use directly - audio_file.seek(0) - response = openai_client.audio.transcriptions.create( - model="whisper-1", - file=audio_file - ) - - return response.text - - except Exception as e: - return f"Transcription failed: {str(e)}" \ No newline at end of file + client = _get_whisper_client() + + if isinstance(audio_file, str): + with open(audio_file, "rb") as f: + response = client.audio.transcriptions.create(model=WHISPER_MODEL, file=f) + else: + audio_file.seek(0) + response = client.audio.transcriptions.create(model=WHISPER_MODEL, file=audio_file) + + return response.text + + +# --------------------------------------------------------------------------- +# Content generation functions +# --------------------------------------------------------------------------- + + +def generate_wisdom(transcript: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None) -> str: + """Extract key insights and wisdom from a transcript""" + system_prompt = custom_prompt or get_enhanced_prompt("wisdom_extraction", knowledge_base) + return _chat_completion(system_prompt, f"Here's the transcription to analyze:\n\n{transcript}") + + +def generate_outline( + transcript: str, wisdom: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None +) -> str: + """Create a structured outline based on transcript and wisdom""" + system_prompt = custom_prompt or get_enhanced_prompt("outline_creation", knowledge_base) + return _chat_completion(system_prompt, f"TRANSCRIPT:\n{transcript}\n\nWISDOM:\n{wisdom}") + + +def generate_article( + transcript: str, wisdom: str, outline: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None +) -> str: + """Generate a comprehensive article based on transcript, wisdom, and outline""" + system_prompt = custom_prompt or get_enhanced_prompt("article_writing", knowledge_base) + transcript_excerpt = ( + transcript[:TRANSCRIPT_PREVIEW_LENGTH] if len(transcript) > TRANSCRIPT_PREVIEW_LENGTH else transcript + ) + return _chat_completion( + system_prompt, + f"TRANSCRIPT:\n{transcript_excerpt}\n\nWISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}", + max_tokens=MAX_TOKENS_ARTICLE, + ) + + +def generate_social_content( + wisdom: str, outline: str, article: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None +) -> str: + """Generate 5 distinct social media posts""" + system_prompt = custom_prompt or get_enhanced_prompt("social_media", knowledge_base) + article_excerpt = article[:ARTICLE_PREVIEW_LENGTH] if len(article) > ARTICLE_PREVIEW_LENGTH else article + return _chat_completion( + system_prompt, + f"WISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}\n\nARTICLE:\n{article_excerpt}", + ) diff --git a/core/exceptions.py b/core/exceptions.py new file mode 100644 index 0000000..5f2e5d3 --- /dev/null +++ b/core/exceptions.py @@ -0,0 +1,30 @@ +""" +WhisperForge custom exception hierarchy. + +All project-specific exceptions inherit from WhisperForgeError so callers +can catch the entire family if needed. +""" + + +class WhisperForgeError(Exception): + """Base exception for all WhisperForge errors.""" + + +class DatabaseError(WhisperForgeError): + """Raised when a Supabase / database operation fails.""" + + +class AuthenticationError(WhisperForgeError): + """Raised when authentication or session operations fail.""" + + +class PipelineError(WhisperForgeError): + """Raised when a pipeline step fails.""" + + +class FileProcessingError(WhisperForgeError): + """Raised when file upload or audio processing fails.""" + + +class APIClientError(WhisperForgeError): + """Raised when an external API call fails.""" diff --git a/core/export.py b/core/export.py new file mode 100644 index 0000000..33907bf --- /dev/null +++ b/core/export.py @@ -0,0 +1,143 @@ +""" +Content Export Utilities +======================== + +Export generated content to various file formats (Text, Markdown, Word, PDF). +""" + +from datetime import datetime +from io import BytesIO + + +def create_text_export(results: dict) -> str: + """Create a formatted plain-text export of all content.""" + lines = [ + "=" * 60, + "WHISPERFORGE CONTENT EXPORT", + f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", + "=" * 60, + "", + ] + + sections = [ + ("AUDIO TRANSCRIPT", results.get("transcript", "")), + ("EXTRACTED WISDOM", results.get("wisdom", "")), + ("CONTENT OUTLINE", results.get("outline", "")), + ("FULL ARTICLE", results.get("article", "")), + ("SOCIAL MEDIA CONTENT", results.get("social_content", "")), + ] + + for title, content in sections: + if content: + lines.extend([f"## {title}", "-" * 40, content, "", ""]) + + if results.get("notion_url"): + lines.extend(["## NOTION LINK", "-" * 40, results["notion_url"], ""]) + + return "\n".join(lines) + + +def export_to_markdown(results: dict) -> str: + """Export results to Markdown format.""" + lines = ["# WhisperForge Content Export"] + sections = [ + ("## Transcript", results.get("transcript", "")), + ("## Wisdom", results.get("wisdom", "")), + ("## Outline", results.get("outline", "")), + ("## Article", results.get("article", "")), + ("## Social Content", results.get("social_content", "")), + ] + for title, content in sections: + if content: + lines.extend([title, "", content, ""]) + if results.get("notion_url"): + lines.extend(["## Notion Link", results["notion_url"]]) + return "\n".join(lines) + + +def export_to_word(results: dict) -> bytes: + """Export results to a Word document (.docx).""" + from docx import Document + + doc = Document() + doc.add_heading("WhisperForge Content Export", level=1) + for title, content in [ + ("Transcript", results.get("transcript", "")), + ("Wisdom", results.get("wisdom", "")), + ("Outline", results.get("outline", "")), + ("Article", results.get("article", "")), + ("Social Content", results.get("social_content", "")), + ]: + if content: + doc.add_heading(title, level=2) + doc.add_paragraph(content) + if results.get("notion_url"): + doc.add_heading("Notion Link", level=2) + doc.add_paragraph(results["notion_url"]) + + bio = BytesIO() + doc.save(bio) + bio.seek(0) + return bio.read() + + +def export_to_pdf(results: dict) -> bytes: + """Export results to a PDF file.""" + from fpdf import FPDF + + pdf = FPDF() + pdf.add_page() + pdf.set_auto_page_break(auto=True, margin=15) + pdf.set_font("Arial", size=12) + text = create_text_export(results) + for line in text.split("\n"): + pdf.cell(0, 10, txt=line, ln=1) + return pdf.output(dest="S").encode("latin-1") + + +def create_json_download(results: dict) -> str: + """Create JSON format download of streaming results.""" + import json + + return json.dumps(results, indent=2, ensure_ascii=False) + + +def create_markdown_download(results: dict) -> str: + """Create Markdown format download of streaming results.""" + content = "# WhisperForge Content Generation Results\n\n" + + sections = { + "transcription": "## Audio Transcription\n\n", + "wisdom_extraction": "## Key Insights & Wisdom\n\n", + "outline_creation": "## Content Outline\n\n", + "article_creation": "## Full Article\n\n", + "social_content": "## Social Media Content\n\n", + "image_prompts": "## Image Generation Prompts\n\n", + } + + for key, header in sections.items(): + if key in results: + content += header + results[key] + "\n\n---\n\n" + + return content + + +def create_text_download(results: dict) -> str: + """Create plain text format download of streaming results.""" + content = "WHISPERFORGE CONTENT GENERATION RESULTS\n" + content += "=" * 50 + "\n\n" + + sections = { + "transcription": "AUDIO TRANSCRIPTION\n" + "-" * 20 + "\n\n", + "wisdom_extraction": "KEY INSIGHTS & WISDOM\n" + "-" * 20 + "\n\n", + "outline_creation": "CONTENT OUTLINE\n" + "-" * 15 + "\n\n", + "article_creation": "FULL ARTICLE\n" + "-" * 12 + "\n\n", + "social_content": "SOCIAL MEDIA CONTENT\n" + "-" * 20 + "\n\n", + "image_prompts": "IMAGE GENERATION PROMPTS\n" + "-" * 25 + "\n\n", + } + + for key, header in sections.items(): + if key in results: + content += header + results[key] + "\n\n" + "=" * 50 + "\n\n" + + return content diff --git a/core/file_upload.py b/core/file_upload.py index 374e48f..8ed4657 100644 --- a/core/file_upload.py +++ b/core/file_upload.py @@ -1,46 +1,67 @@ """ -Enhanced File Upload Module for WhisperForge v3.0.0 -Supports large file processing up to 2GB with intelligent chunking and parallel transcription +File Upload Module +================== + +Upload zone UI, progress indicators, and pydub-based chunked processing +for standard audio files. For FFmpeg-based large file processing (2GB+), +see large_file_processor.py. """ -import asyncio import logging import math -import mimetypes import os import tempfile -import threading import time from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Optional, List, Dict, Any, Tuple +import openai import streamlit as st -# Configure logging +from .constants import CHUNK_SUCCESS_THRESHOLD_STANDARD, MAX_PARALLEL_CHUNKS_STANDARD, MAX_UPLOAD_SIZE_BYTES +from .content_generation import ContentGenerationError +from .exceptions import APIClientError, FileProcessingError +from .large_file_processor import EnhancedLargeFileProcessor # noqa: F401 - re-export + logger = logging.getLogger(__name__) +_UPLOAD_CSS_CACHE = None + + +def _load_upload_css(): + """Load upload CSS from static file (cached).""" + global _UPLOAD_CSS_CACHE # noqa: PLW0603 + if _UPLOAD_CSS_CACHE is None: + css_path = os.path.join("static", "css", "upload.css") + if os.path.exists(css_path): + with open(css_path, encoding="utf-8") as f: + _UPLOAD_CSS_CACHE = f"" + else: + _UPLOAD_CSS_CACHE = "" + return _UPLOAD_CSS_CACHE + + class FileUploadManager: - """๐Ÿš€ ENHANCED: Large file upload manager with chunking and parallel processing""" - + """Large file upload manager with chunking and parallel processing.""" + def __init__(self): self.supported_formats = { - 'audio': ['.mp3', '.wav', '.m4a', '.aac', '.ogg', '.flac', '.wma', '.webm', '.mpeg', '.mpga', '.oga'], - 'video': ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm'], - 'text': ['.txt', '.md', '.pdf', '.docx'] + "audio": [".mp3", ".wav", ".m4a", ".aac", ".ogg", ".flac", ".wma", ".webm", ".mpeg", ".mpga", ".oga"], + "video": [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm"], + "text": [".txt", ".md", ".pdf", ".docx"], } - self.max_file_size = 2 * 1024 * 1024 * 1024 # 2GB - self.chunk_size_mb = 20 # 20MB chunks for optimal processing - self.max_parallel_chunks = 4 # Process 4 chunks simultaneously - - def create_large_file_upload_zone(self) -> Optional[Any]: - """Create enhanced upload zone for large files""" - - # Enhanced upload zone HTML with large file support - upload_html = f""" + from .config import get_config + + self.max_file_size = MAX_UPLOAD_SIZE_BYTES + self.chunk_size_mb = get_config().audio_chunk_size_mb + self.max_parallel_chunks = MAX_PARALLEL_CHUNKS_STANDARD + + def create_upload_zone(self): + """Create enhanced upload zone for large files.""" + upload_html = """
-
๐ŸŽต
+
\U0001f3b5

Drop your large audio files here

@@ -53,499 +74,326 @@ def create_large_file_upload_zone(self) -> Optional[Any]:
- โšก + \u26a1 Parallel Processing
- ๐Ÿ“Š + \U0001f4ca Real-time Progress
- ๐Ÿ”„ + \U0001f504 Auto-retry on Errors
""" - - # Enhanced CSS for large file upload - upload_css = """ - - """ - - st.markdown(upload_css, unsafe_allow_html=True) + + st.markdown(_load_upload_css(), unsafe_allow_html=True) st.markdown(upload_html, unsafe_allow_html=True) - - # File uploader with large file support + uploaded_file = st.file_uploader( "Choose an audio file", - type=['mp3', 'wav', 'm4a', 'aac', 'ogg', 'flac', 'wma', 'webm', 'mpeg', 'mpga', 'oga'], + type=["mp3", "wav", "m4a", "aac", "ogg", "flac", "wma", "webm", "mpeg", "mpga", "oga"], help="Upload audio files up to 2GB. Large files will be automatically chunked for optimal processing.", - label_visibility="collapsed" + label_visibility="collapsed", ) - + return uploaded_file - - def process_large_file(self, uploaded_file) -> Dict[str, Any]: - """๐Ÿš€ Process large files with chunking and parallel transcription""" - + + def process_large_file(self, uploaded_file) -> dict: + """Process large files with chunking and parallel transcription.""" if not uploaded_file: return {"success": False, "error": "No file provided"} - - # Validate file + validation = self.validate_large_file(uploaded_file) if not validation["valid"]: return {"success": False, "error": validation["error"]} - + file_size_mb = len(uploaded_file.getvalue()) / (1024 * 1024) - - # Show file info - st.markdown(f""" - ### ๐Ÿ“ File Processing - **File:** {uploaded_file.name} - **Size:** {file_size_mb:.1f} MB - **Processing Strategy:** {"Chunked Parallel Processing" if file_size_mb > self.chunk_size_mb else "Direct Processing"} - """) - + + st.markdown( + f"### \U0001f4c1 File Processing\n" + f"**File:** {uploaded_file.name}\n" + f"**Size:** {file_size_mb:.1f} MB\n" + f"**Processing Strategy:** {'Chunked Parallel Processing' if file_size_mb > self.chunk_size_mb else 'Direct Processing'}" + ) + if file_size_mb <= self.chunk_size_mb: - # Small file - process directly return self._process_small_file(uploaded_file) else: - # Large file - chunk and process in parallel return self._process_large_file_chunked(uploaded_file) - - def _process_small_file(self, uploaded_file) -> Dict[str, Any]: - """Process small files directly without chunking""" - + + def _process_small_file(self, uploaded_file) -> dict: + """Process small files directly without chunking.""" progress_container = st.empty() - + with progress_container.container(): - st.markdown("#### ๐ŸŽต Processing Audio") + st.markdown("#### \U0001f3b5 Processing Audio") progress_bar = st.progress(0.0, "Starting transcription...") - + try: - # Import transcription function from .content_generation import transcribe_audio - - # Update progress + progress_bar.progress(0.3, "Transcribing audio...") - - # Transcribe transcript = transcribe_audio(uploaded_file) - + if not transcript or "Error" in transcript: - progress_bar.progress(1.0, "โŒ Transcription failed") + progress_bar.progress(1.0, "\u274c Transcription failed") return {"success": False, "error": transcript or "Transcription failed"} - - progress_bar.progress(1.0, "โœ… Transcription complete!") - - return { - "success": True, - "transcript": transcript, - "chunks": 1, - "total_duration": "N/A" - } - - except Exception as e: - progress_bar.progress(1.0, f"โŒ Error: {str(e)}") + + progress_bar.progress(1.0, "\u2705 Transcription complete!") + return {"success": True, "transcript": transcript, "chunks": 1, "total_duration": "N/A"} + + except (ContentGenerationError, openai.APIError, OSError) as e: + progress_bar.progress(1.0, f"\u274c Error: {e!s}") return {"success": False, "error": str(e)} - - def _process_large_file_chunked(self, uploaded_file) -> Dict[str, Any]: - """๐Ÿš€ Process large files with intelligent chunking and parallel transcription""" - - st.markdown("#### ๐Ÿ”„ Chunked Processing Pipeline") - + except (FileProcessingError, APIClientError) as e: + progress_bar.progress(1.0, f"\u274c Unexpected error: {e!s}") + return {"success": False, "error": str(e)} + + def _process_large_file_chunked(self, uploaded_file) -> dict: + """Process large files with intelligent chunking and parallel transcription.""" + st.markdown("#### \U0001f504 Chunked Processing Pipeline") + try: - # Step 1: Create chunks chunks_info = self._create_audio_chunks(uploaded_file) if not chunks_info["success"]: return chunks_info - + chunks = chunks_info["chunks"] total_chunks = len(chunks) - st.markdown(f"**Created {total_chunks} chunks for parallel processing**") - - # Step 2: Create progress tracking containers + progress_container = st.empty() chunks_container = st.empty() - - # Step 3: Process chunks in parallel with real-time updates - transcription_results = self._transcribe_chunks_parallel( - chunks, progress_container, chunks_container - ) - + + transcription_results = self._transcribe_chunks_parallel(chunks, progress_container, chunks_container) + if not transcription_results["success"]: return transcription_results - - # Step 4: Reassemble transcript + final_transcript = self._reassemble_transcript(transcription_results["chunk_transcripts"]) - - # Step 5: Cleanup temporary files self._cleanup_chunks(chunks) - - # Success! + with progress_container.container(): - st.success("โœ… Large file processing complete!") - st.markdown(f""" - **Processing Summary:** - - Total chunks: {total_chunks} - - Successful transcriptions: {len(transcription_results['chunk_transcripts'])} - - Final transcript length: {len(final_transcript)} characters - """) - + st.success("\u2705 Large file processing complete!") + st.markdown( + f"**Processing Summary:**\n" + f"- Total chunks: {total_chunks}\n" + f"- Successful transcriptions: {len(transcription_results['chunk_transcripts'])}\n" + f"- Final transcript length: {len(final_transcript)} characters" + ) + return { "success": True, "transcript": final_transcript, "chunks": total_chunks, - "processing_time": transcription_results.get("total_time", "N/A") + "processing_time": transcription_results.get("total_time", "N/A"), } - - except Exception as e: + + except (ContentGenerationError, openai.APIError, OSError) as e: logger.exception("Error in large file processing:") - st.error(f"โŒ Large file processing failed: {str(e)}") + st.error(f"\u274c Large file processing failed: {e!s}") return {"success": False, "error": str(e)} - - def _create_audio_chunks(self, uploaded_file) -> Dict[str, Any]: - """Create audio chunks for parallel processing""" - + except (FileProcessingError, APIClientError) as e: + logger.exception("Unexpected error in large file processing:") + st.error(f"\u274c Unexpected large file processing error: {e!s}") + return {"success": False, "error": str(e)} + + def _create_audio_chunks(self, uploaded_file) -> dict: + """Create audio chunks for parallel processing.""" try: - st.markdown("##### ๐Ÿ“‚ Creating Audio Chunks...") - - # Save uploaded file temporarily + from pydub import AudioSegment + + st.markdown("##### \U0001f4c2 Creating Audio Chunks...") + with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as temp_file: uploaded_file.seek(0) temp_file.write(uploaded_file.read()) temp_file_path = temp_file.name - - # Load audio with pydub + audio = AudioSegment.from_file(temp_file_path) duration_ms = len(audio) duration_minutes = duration_ms / (1000 * 60) - - # Calculate chunk duration (aim for ~20MB chunks) - chunk_duration_ms = self.chunk_size_mb * 60 * 1000 # Convert MB to minutes to ms + + chunk_duration_ms = self.chunk_size_mb * 60 * 1000 num_chunks = math.ceil(duration_ms / chunk_duration_ms) - + st.markdown(f"**Audio Duration:** {duration_minutes:.1f} minutes") - st.markdown(f"**Creating {num_chunks} chunks of ~{chunk_duration_ms/60000:.1f} minutes each**") - + st.markdown(f"**Creating {num_chunks} chunks of ~{chunk_duration_ms / 60000:.1f} minutes each**") + chunks = [] chunk_progress = st.progress(0.0, "Creating chunks...") - + for i in range(num_chunks): start_ms = i * chunk_duration_ms end_ms = min((i + 1) * chunk_duration_ms, duration_ms) - - # Extract chunk + chunk = audio[start_ms:end_ms] - - # Save chunk to temporary file - chunk_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") - chunk.export(chunk_file.name, format="wav") - - chunks.append({ - "index": i, - "file_path": chunk_file.name, - "start_time": start_ms / 1000, - "end_time": end_ms / 1000, - "duration": (end_ms - start_ms) / 1000 - }) - - # Update progress + with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as chunk_file: + chunk.export(chunk_file.name, format="wav") + + chunks.append( + { + "index": i, + "file_path": chunk_file.name, + "start_time": start_ms / 1000, + "end_time": end_ms / 1000, + "duration": (end_ms - start_ms) / 1000, + } + ) + progress = (i + 1) / num_chunks chunk_progress.progress(progress, f"Created chunk {i + 1}/{num_chunks}") - - # Cleanup original temp file + os.unlink(temp_file_path) - - chunk_progress.progress(1.0, f"โœ… Created {num_chunks} chunks successfully!") - + chunk_progress.progress(1.0, f"\u2705 Created {num_chunks} chunks successfully!") + return {"success": True, "chunks": chunks} - - except Exception as e: + + except OSError as e: logger.exception("Error creating audio chunks:") - return {"success": False, "error": f"Failed to create chunks: {str(e)}"} - - def _transcribe_chunks_parallel(self, chunks: List[Dict], progress_container, chunks_container) -> Dict[str, Any]: - """๐Ÿš€ Transcribe chunks in parallel with real-time progress tracking""" - + return {"success": False, "error": f"Failed to create chunks: {e!s}"} + except FileProcessingError as e: + logger.exception("Unexpected error creating audio chunks:") + return {"success": False, "error": f"Unexpected error creating chunks: {e!s}"} + + def _transcribe_chunks_parallel(self, chunks: list[dict], progress_container, chunks_container) -> dict: + """Transcribe chunks in parallel with real-time progress tracking.""" total_chunks = len(chunks) completed_chunks = 0 chunk_transcripts = {} chunk_statuses = {i: "waiting" for i in range(total_chunks)} start_time = time.time() - - # Import transcription function + from .content_generation import get_openai_client - - def transcribe_single_chunk(chunk_info: Dict) -> Tuple[int, str, bool]: - """Transcribe a single chunk""" + + def transcribe_single_chunk(chunk_info: dict) -> tuple[int, str, bool]: + """Transcribe a single chunk.""" try: chunk_index = chunk_info["index"] chunk_file_path = chunk_info["file_path"] - - # Update status to processing chunk_statuses[chunk_index] = "processing" - - # Get OpenAI client + openai_client = get_openai_client() if not openai_client: return chunk_index, "Error: OpenAI API key not configured", False - - # Transcribe chunk + with open(chunk_file_path, "rb") as audio_file: - transcript = openai_client.audio.transcriptions.create( - model="whisper-1", - file=audio_file - ) - + transcript = openai_client.audio.transcriptions.create(model="whisper-1", file=audio_file) + chunk_statuses[chunk_index] = "completed" return chunk_index, transcript.text, True - - except Exception as e: + + except (openai.APIError, OSError) as e: chunk_statuses[chunk_index] = "error" logger.exception(f"Error transcribing chunk {chunk_index}:") - return chunk_index, f"Error: {str(e)}", False - - # Process chunks in parallel + return chunk_index, f"Error: {e!s}", False + except APIClientError as e: + chunk_statuses[chunk_index] = "error" + logger.exception(f"Unexpected error transcribing chunk {chunk_index}:") + return chunk_index, f"Unexpected error: {e!s}", False + with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor: - # Submit all chunks for processing - future_to_chunk = { - executor.submit(transcribe_single_chunk, chunk): chunk["index"] - for chunk in chunks - } - - # Monitor progress in real-time + future_to_chunk = {executor.submit(transcribe_single_chunk, chunk): chunk["index"] for chunk in chunks} + while completed_chunks < total_chunks: - # Update progress display with progress_container.container(): overall_progress = completed_chunks / total_chunks st.progress(overall_progress, f"Transcribing chunks: {completed_chunks}/{total_chunks}") - - # Update individual chunk statuses + with chunks_container.container(): - st.markdown("##### ๐Ÿงฉ Chunk Processing Status") - - # Create columns for chunk status display + st.markdown("##### \U0001f9e9 Chunk Processing Status") cols_per_row = 4 rows = math.ceil(total_chunks / cols_per_row) - + for row in range(rows): cols = st.columns(cols_per_row) for col_idx in range(cols_per_row): chunk_idx = row * cols_per_row + col_idx if chunk_idx < total_chunks: status = chunk_statuses[chunk_idx] - - if status == "waiting": - icon, color, text = "โณ", "#FFA500", "Waiting" - elif status == "processing": - icon, color, text = "๐Ÿ”„", "#00BFFF", "Processing" - elif status == "completed": - icon, color, text = "โœ…", "#00FF7F", "Complete" - else: # error - icon, color, text = "โŒ", "#FF6B6B", "Error" - + status_map = { + "waiting": ("\u23f3", "#FFA500", "Waiting"), + "processing": ("\U0001f504", "#00BFFF", "Processing"), + "completed": ("\u2705", "#00FF7F", "Complete"), + } + icon, color, text = status_map.get(status, ("\u274c", "#FF6B6B", "Error")) + with cols[col_idx]: - st.markdown(f""" -
-
{icon}
-
Chunk {chunk_idx + 1}
-
{text}
-
- """, unsafe_allow_html=True) - - # Check for completed futures + st.markdown( + f'
' + f'
{icon}
' + f'
Chunk {chunk_idx + 1}
' + f'
{text}
', + unsafe_allow_html=True, + ) + for future in as_completed(future_to_chunk, timeout=1): chunk_index, transcript, success = future.result() - if success: chunk_transcripts[chunk_index] = transcript - completed_chunks += 1 break - - # Small delay to prevent excessive updates + time.sleep(0.5) - - # Final progress update + with progress_container.container(): - st.progress(1.0, f"โœ… All chunks transcribed: {completed_chunks}/{total_chunks}") - + st.progress(1.0, f"\u2705 All chunks transcribed: {completed_chunks}/{total_chunks}") + processing_time = time.time() - start_time - - # Check if we have enough successful transcriptions successful_chunks = len(chunk_transcripts) - if successful_chunks < total_chunks * 0.8: # Require at least 80% success - return { - "success": False, - "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful" - } - + + if successful_chunks < total_chunks * CHUNK_SUCCESS_THRESHOLD_STANDARD: + return {"success": False, "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful"} + return { "success": True, "chunk_transcripts": chunk_transcripts, "total_time": f"{processing_time:.1f}s", - "success_rate": f"{successful_chunks}/{total_chunks}" + "success_rate": f"{successful_chunks}/{total_chunks}", } - - def _reassemble_transcript(self, chunk_transcripts: Dict[int, str]) -> str: - """Reassemble transcript from chunks in correct order""" - - # Sort chunks by index and concatenate + + def _reassemble_transcript(self, chunk_transcripts: dict[int, str]) -> str: + """Reassemble transcript from chunks in correct order.""" sorted_chunks = sorted(chunk_transcripts.items()) - full_transcript = " ".join([transcript for _, transcript in sorted_chunks]) - - return full_transcript - - def _cleanup_chunks(self, chunks: List[Dict]): - """Clean up temporary chunk files""" + return " ".join([transcript for _, transcript in sorted_chunks]) + + def _cleanup_chunks(self, chunks: list[dict]): + """Clean up temporary chunk files.""" for chunk in chunks: try: if os.path.exists(chunk["file_path"]): os.unlink(chunk["file_path"]) - except Exception as e: + except OSError as e: logger.warning(f"Failed to cleanup chunk file {chunk['file_path']}: {e}") - - def validate_large_file(self, file) -> Dict[str, Any]: - """Validate large file upload""" + + def validate_large_file(self, file) -> dict: + """Validate large file upload.""" if not file: return {"valid": False, "error": "No file provided"} - - # Check file size + file_size = len(file.getvalue()) if file_size > self.max_file_size: size_gb = file_size / (1024 * 1024 * 1024) return {"valid": False, "error": f"File too large: {size_gb:.1f}GB (max 2GB)"} - - # Check file type + file_extension = os.path.splitext(file.name)[1].lower() - if file_extension not in self.supported_formats['audio']: + if file_extension not in self.supported_formats["audio"]: return {"valid": False, "error": f"Unsupported format: {file_extension}"} - + return {"valid": True} -# Create alias for backward compatibility +# Backward compatibility alias LargeFileUploadManager = FileUploadManager + def create_upload_progress_indicator(filename: str, progress: float = 0.0): - """Create a progress indicator for file upload""" + """Create a progress indicator for file upload.""" progress_html = f"""
@@ -557,661 +405,22 @@ def create_upload_progress_indicator(filename: str, progress: float = 0.0):
- {'Uploading...' if progress < 100 else 'Upload complete!'} + {"Uploading..." if progress < 100 else "Upload complete!"}
""" - - progress_css = """ - - """ - - return st.markdown(progress_css + progress_html, unsafe_allow_html=True) + + css = _load_upload_css() + return st.markdown(css + progress_html, unsafe_allow_html=True) + def simulate_upload_progress(filename: str, duration: float = 2.0): - """Simulate upload progress for demonstration""" + """Simulate upload progress for demonstration.""" progress_container = st.empty() - + steps = 20 for i in range(steps + 1): progress = (i / steps) * 100 - with progress_container: create_upload_progress_indicator(filename, progress) - - if i < steps: - time.sleep(duration / steps) - - return True - - -class EnhancedLargeFileProcessor: - """๐Ÿš€ Enhanced Large File Processor with FFmpeg for 2GB+ files - - Features: - - FFmpeg-based processing for memory efficiency - - Support for files up to 2GB - - Intelligent 10-minute audio chunking - - Parallel transcription with ThreadPoolExecutor - - Memory-efficient streaming without loading entire files into RAM - - Enhanced error handling with automatic fallback - """ - - def __init__(self): - self.supported_formats = { - 'audio': ['.mp3', '.wav', '.m4a', '.aac', '.ogg', '.flac', '.wma', '.webm', '.mpeg', '.mpga', '.oga'], - 'video': ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm'] # Extract audio from video - } - self.max_file_size = 2 * 1024 * 1024 * 1024 # 2GB - self.chunk_duration_minutes = 10 # 10-minute chunks optimized for Whisper - self.max_parallel_chunks = 4 # Process 4 chunks simultaneously - self.temp_dir = None - - def check_ffmpeg_availability(self) -> bool: - """Check if FFmpeg is available on the system""" - try: - import subprocess - result = subprocess.run(['ffmpeg', '-version'], - capture_output=True, text=True, timeout=5) - return result.returncode == 0 - except (subprocess.TimeoutExpired, FileNotFoundError, Exception): - return False - - def get_audio_info(self, file_path: str) -> Dict[str, Any]: - """Get audio file information using ffprobe""" - try: - import subprocess - import json - - cmd = [ - 'ffprobe', '-v', 'quiet', '-print_format', 'json', - '-show_format', '-show_streams', file_path - ] - - result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) - if result.returncode != 0: - return {"error": f"ffprobe failed: {result.stderr}"} - - data = json.loads(result.stdout) - format_info = data.get('format', {}) - - # Find audio stream - audio_stream = None - for stream in data.get('streams', []): - if stream.get('codec_type') == 'audio': - audio_stream = stream - break - - if not audio_stream: - return {"error": "No audio stream found"} - - duration = float(format_info.get('duration', 0)) - size = int(format_info.get('size', 0)) - - return { - "duration": duration, - "size": size, - "format": format_info.get('format_name', 'unknown'), - "codec": audio_stream.get('codec_name', 'unknown'), - "sample_rate": int(audio_stream.get('sample_rate', 0)), - "channels": int(audio_stream.get('channels', 0)) - } - - except Exception as e: - return {"error": f"Failed to get audio info: {str(e)}"} - - def validate_file(self, uploaded_file) -> Dict[str, Any]: - """Enhanced file validation for large files""" - if not uploaded_file: - return {"valid": False, "error": "No file provided"} - - # Check file size - file_size = len(uploaded_file.getvalue()) - if file_size > self.max_file_size: - size_gb = file_size / (1024 * 1024 * 1024) - return {"valid": False, "error": f"File too large: {size_gb:.1f}GB (max 2GB)"} - - # Check file extension - file_extension = os.path.splitext(uploaded_file.name)[1].lower() - all_formats = self.supported_formats['audio'] + self.supported_formats['video'] - if file_extension not in all_formats: - return {"valid": False, "error": f"Unsupported format: {file_extension}"} - - # Check FFmpeg availability for large files - if file_size > 100 * 1024 * 1024 and not self.check_ffmpeg_availability(): # 100MB+ - return { - "valid": False, - "error": "FFmpeg required for large files but not available. Please install FFmpeg." - } - - return { - "valid": True, - "size": file_size, - "size_mb": file_size / (1024 * 1024), - "requires_chunking": file_size > 100 * 1024 * 1024, # Chunk files > 100MB - "format": file_extension - } - - def create_enhanced_upload_interface(self) -> Optional[Any]: - """Create enhanced upload interface for large files""" - - # Enhanced upload zone HTML - upload_html = f""" -
-
-
-
๐ŸŽต
-
-
-
-

Enhanced Large File Upload

-

Powered by FFmpeg โ€ข Up to 2GB โ€ข Intelligent Chunking

-
-
- โšก - 10-min chunks -
-
- ๐Ÿ”„ - Parallel processing -
-
- ๐Ÿ’พ - Memory efficient -
-
- ๐ŸŽฏ - Auto-retry -
-
-
-
- Audio: MP3, WAV, M4A, AAC, OGG, FLAC, WEBM -
-
- Video: MP4, AVI, MOV, MKV, WMV (audio extraction) -
-
-
-
-
- """ - - # Enhanced CSS - upload_css = """ - - """ - - st.markdown(upload_css, unsafe_allow_html=True) - st.markdown(upload_html, unsafe_allow_html=True) - - # Enhanced file uploader - uploaded_file = st.file_uploader( - "Choose an audio or video file", - type=['mp3', 'wav', 'm4a', 'aac', 'ogg', 'flac', 'wma', 'webm', 'mpeg', 'mpga', 'oga', - 'mp4', 'avi', 'mov', 'mkv', 'wmv', 'flv'], - help="Upload audio/video files up to 2GB. Large files automatically use FFmpeg chunking for optimal processing.", - label_visibility="collapsed" - ) - - return uploaded_file - - def process_large_file(self, uploaded_file) -> Dict[str, Any]: - """Enhanced large file processing with FFmpeg""" - - # Validate file first - validation = self.validate_file(uploaded_file) - if not validation["valid"]: - return {"success": False, "error": validation["error"]} - - file_size_mb = validation["size_mb"] - requires_chunking = validation["requires_chunking"] - - st.info(f"๐Ÿ“ **File:** {uploaded_file.name} ({file_size_mb:.1f} MB)") - - if requires_chunking: - st.info("๐Ÿ”ง **Processing Method:** FFmpeg chunking (large file detected)") - return self._process_with_ffmpeg_chunking(uploaded_file) - else: - st.info("โšก **Processing Method:** Standard processing (small file)") - return self._process_standard(uploaded_file) - - def _process_standard(self, uploaded_file) -> Dict[str, Any]: - """Process smaller files using standard method""" - try: - from core.content_generation import transcribe_audio - - # Create temporary file - with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file: - tmp_file.write(uploaded_file.getvalue()) - tmp_file_path = tmp_file.name - - try: - # Transcribe directly - with st.spinner("๐ŸŽฏ Transcribing audio..."): - transcript = transcribe_audio(tmp_file_path) - - return { - "success": True, - "transcript": transcript, - "method": "standard", - "chunks_processed": 1 - } - - finally: - # Cleanup - if os.path.exists(tmp_file_path): - os.unlink(tmp_file_path) - - except Exception as e: - return {"success": False, "error": f"Standard processing failed: {str(e)}"} - - def _process_with_ffmpeg_chunking(self, uploaded_file) -> Dict[str, Any]: - """Process large files using FFmpeg chunking""" - - # Setup temporary directory - self.temp_dir = tempfile.mkdtemp(prefix="whisperforge_chunks_") - - try: - # Save uploaded file - input_file_path = os.path.join(self.temp_dir, uploaded_file.name) - with open(input_file_path, 'wb') as f: - f.write(uploaded_file.getvalue()) - - # Get audio information - st.info("๐Ÿ” Analyzing audio file...") - audio_info = self.get_audio_info(input_file_path) - - if "error" in audio_info: - return {"success": False, "error": audio_info["error"]} - - duration = audio_info["duration"] - st.success(f"๐Ÿ“Š **Duration:** {duration/60:.1f} minutes | **Format:** {audio_info['format']} | **Codec:** {audio_info['codec']}") - - # Create chunks using FFmpeg - st.info("โœ‚๏ธ Creating audio chunks...") - chunks_result = self._create_ffmpeg_chunks(input_file_path, duration) - - if not chunks_result["success"]: - return chunks_result - - chunks = chunks_result["chunks"] - st.success(f"โœ… Created {len(chunks)} chunks of ~{self.chunk_duration_minutes} minutes each") - - # Process chunks in parallel - st.info("๐Ÿš€ Starting parallel transcription...") - transcription_result = self._transcribe_chunks_parallel_ffmpeg(chunks) - - if not transcription_result["success"]: - return transcription_result - - # Reassemble transcript - full_transcript = self._reassemble_transcript_ffmpeg(transcription_result["chunk_transcripts"]) - - return { - "success": True, - "transcript": full_transcript, - "method": "ffmpeg_chunking", - "chunks_processed": len(chunks), - "processing_time": transcription_result.get("total_time", "unknown"), - "success_rate": transcription_result.get("success_rate", "unknown") - } - - except Exception as e: - return {"success": False, "error": f"FFmpeg processing failed: {str(e)}"} - - finally: - # Cleanup temporary directory - self._cleanup_temp_dir() - - def _create_ffmpeg_chunks(self, input_file_path: str, duration: float) -> Dict[str, Any]: - """Create audio chunks using FFmpeg""" - try: - import subprocess - - chunk_duration_seconds = self.chunk_duration_minutes * 60 - num_chunks = math.ceil(duration / chunk_duration_seconds) - chunks = [] - - progress_bar = st.progress(0, f"Creating chunks: 0/{num_chunks}") - - for i in range(num_chunks): - start_time = i * chunk_duration_seconds - chunk_filename = f"chunk_{i:03d}.wav" - chunk_path = os.path.join(self.temp_dir, chunk_filename) - - # FFmpeg command to extract chunk with audio optimization - cmd = [ - 'ffmpeg', '-i', input_file_path, - '-ss', str(start_time), - '-t', str(chunk_duration_seconds), - '-ar', '16000', # 16kHz sample rate (optimal for Whisper) - '-ac', '1', # Mono audio - '-acodec', 'pcm_s16le', # PCM format - '-y', # Overwrite output files - chunk_path - ] - - result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) - - if result.returncode != 0: - return {"success": False, "error": f"FFmpeg chunk creation failed: {result.stderr}"} - - # Verify chunk was created - if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0: - continue # Skip empty chunks - - chunks.append({ - "index": i, - "file_path": chunk_path, - "start_time": start_time, - "duration": min(chunk_duration_seconds, duration - start_time) - }) - - # Update progress - progress_bar.progress((i + 1) / num_chunks, f"Creating chunks: {i + 1}/{num_chunks}") - - return {"success": True, "chunks": chunks} - - except Exception as e: - return {"success": False, "error": f"Chunk creation failed: {str(e)}"} - - def _transcribe_chunks_parallel_ffmpeg(self, chunks: List[Dict]) -> Dict[str, Any]: - """Transcribe chunks in parallel using ThreadPoolExecutor""" - from core.content_generation import transcribe_audio - - chunk_transcripts = {} - total_chunks = len(chunks) - - # Create progress containers - progress_container = st.empty() - status_container = st.empty() - - def transcribe_single_chunk(chunk_info: Dict) -> Tuple[int, str, bool]: - """Transcribe a single chunk""" - try: - chunk_index = chunk_info["index"] - file_path = chunk_info["file_path"] - - transcript = transcribe_audio(file_path) - return chunk_index, transcript, True - - except Exception as e: - logger.error(f"Failed to transcribe chunk {chunk_info['index']}: {e}") - return chunk_info["index"], f"[Transcription failed for chunk {chunk_info['index']}]", False - - start_time = time.time() - completed_chunks = 0 - - # Process chunks in parallel - with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor: - # Submit all chunks - future_to_chunk = { - executor.submit(transcribe_single_chunk, chunk): chunk - for chunk in chunks - } - - # Process completed futures - for future in as_completed(future_to_chunk): - chunk_index, transcript, success = future.result() - - if success: - chunk_transcripts[chunk_index] = transcript - - completed_chunks += 1 - - # Update progress - progress = completed_chunks / total_chunks - with progress_container: - st.progress(progress, f"Transcribing: {completed_chunks}/{total_chunks} chunks") - - with status_container: - elapsed = time.time() - start_time - if completed_chunks > 0: - eta = (elapsed / completed_chunks) * (total_chunks - completed_chunks) - st.info(f"โฑ๏ธ Elapsed: {elapsed:.1f}s | ETA: {eta:.1f}s | Success: {len(chunk_transcripts)}/{completed_chunks}") - - processing_time = time.time() - start_time - successful_chunks = len(chunk_transcripts) - - # Check success rate - if successful_chunks < total_chunks * 0.7: # Require at least 70% success - return { - "success": False, - "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful" - } - - return { - "success": True, - "chunk_transcripts": chunk_transcripts, - "total_time": f"{processing_time:.1f}s", - "success_rate": f"{successful_chunks}/{total_chunks}" - } - - def _reassemble_transcript_ffmpeg(self, chunk_transcripts: Dict[int, str]) -> str: - """Reassemble transcript from chunks in correct order""" - # Sort chunks by index and concatenate - sorted_chunks = sorted(chunk_transcripts.items()) - full_transcript = " ".join([transcript for _, transcript in sorted_chunks]) - - # Clean up transcript - full_transcript = full_transcript.strip() - - return full_transcript - - def _cleanup_temp_dir(self): - """Clean up temporary directory and all files""" - if self.temp_dir and os.path.exists(self.temp_dir): - try: - import shutil - shutil.rmtree(self.temp_dir) - self.temp_dir = None - except Exception as e: - logger.warning(f"Failed to cleanup temp directory {self.temp_dir}: {e}") \ No newline at end of file + time.sleep(duration / steps) diff --git a/core/health_check.py b/core/health_check.py index 806100b..9eb69c0 100644 --- a/core/health_check.py +++ b/core/health_check.py @@ -1,10 +1,11 @@ +"""Minimal health check utilities for WhisperForge.""" + from __future__ import annotations -"""Minimal health check utilities for WhisperForge.""" +from dataclasses import asdict, dataclass +from datetime import UTC, datetime +from typing import Any -from dataclasses import dataclass, asdict -from datetime import datetime -from typing import Any, Dict @dataclass class HealthStatus: @@ -28,7 +29,7 @@ class HealthChecker: def get_health_status(self) -> HealthStatus: return HealthStatus( status="healthy", - timestamp=datetime.utcnow().isoformat(), + timestamp=datetime.now(UTC).isoformat(), uptime_seconds=0.0, ) @@ -40,7 +41,7 @@ def check_slo_violations(self) -> list: return [] def get_metrics_json(self) -> str: - data: Dict[str, Any] = { + data: dict[str, Any] = { "health": asdict(self.get_health_status()), "slo_metrics": asdict(self.get_slo_metrics()), } diff --git a/core/large_file_processor.py b/core/large_file_processor.py new file mode 100644 index 0000000..f860885 --- /dev/null +++ b/core/large_file_processor.py @@ -0,0 +1,449 @@ +""" +Enhanced Large File Processor +============================= + +FFmpeg-based large file processing with intelligent chunking and parallel transcription. +Supports files up to 2GB with memory-efficient streaming. +""" + +import logging +import math +import os +import subprocess +import tempfile +import time +from concurrent.futures import ThreadPoolExecutor, as_completed + +import openai +import streamlit as st + +from .constants import ( + AUDIO_CHANNELS, + AUDIO_SAMPLE_RATE, + CHUNK_DURATION_MINUTES, + CHUNK_SUCCESS_THRESHOLD_FFMPEG, + FFMPEG_CHUNK_TIMEOUT, + FFMPEG_VERSION_CHECK_TIMEOUT, + FFPROBE_TIMEOUT, + LARGE_FILE_THRESHOLD_BYTES, + MAX_PARALLEL_CHUNKS_FFMPEG, + MAX_UPLOAD_SIZE_BYTES, +) +from .exceptions import APIClientError, FileProcessingError + +logger = logging.getLogger(__name__) + + +class EnhancedLargeFileProcessor: + """Enhanced Large File Processor with FFmpeg for 2GB+ files. + + Features: + - FFmpeg-based processing for memory efficiency + - Support for files up to 2GB + - Intelligent 10-minute audio chunking + - Parallel transcription with ThreadPoolExecutor + - Memory-efficient streaming without loading entire files into RAM + """ + + def __init__(self): + self.supported_formats = { + "audio": [".mp3", ".wav", ".m4a", ".aac", ".ogg", ".flac", ".wma", ".webm", ".mpeg", ".mpga", ".oga"], + "video": [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm"], + } + self.max_file_size = MAX_UPLOAD_SIZE_BYTES + self.chunk_duration_minutes = CHUNK_DURATION_MINUTES + self.max_parallel_chunks = MAX_PARALLEL_CHUNKS_FFMPEG + self.temp_dir = None + + def check_ffmpeg_availability(self) -> bool: + """Check if FFmpeg is available on the system.""" + try: + import subprocess + + result = subprocess.run( + ["ffmpeg", "-version"], capture_output=True, text=True, timeout=FFMPEG_VERSION_CHECK_TIMEOUT + ) # noqa: S603 + return result.returncode == 0 + except (subprocess.TimeoutExpired, FileNotFoundError, Exception): + return False + + def get_audio_info(self, file_path: str) -> dict: + """Get audio file information using ffprobe.""" + try: + import json + import subprocess + + cmd = ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", file_path] + + result = subprocess.run(cmd, capture_output=True, text=True, timeout=FFPROBE_TIMEOUT) # noqa: S603 + if result.returncode != 0: + return {"error": f"ffprobe failed: {result.stderr}"} + + data = json.loads(result.stdout) + format_info = data.get("format", {}) + + audio_stream = None + for stream in data.get("streams", []): + if stream.get("codec_type") == "audio": + audio_stream = stream + break + + if not audio_stream: + return {"error": "No audio stream found"} + + duration = float(format_info.get("duration", 0)) + size = int(format_info.get("size", 0)) + + return { + "duration": duration, + "size": size, + "format": format_info.get("format_name", "unknown"), + "codec": audio_stream.get("codec_name", "unknown"), + "sample_rate": int(audio_stream.get("sample_rate", 0)), + "channels": int(audio_stream.get("channels", 0)), + } + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e: + return {"error": f"Failed to get audio info: {e!s}"} + except FileProcessingError as e: + return {"error": f"Unexpected error getting audio info: {e!s}"} + + def validate_file(self, uploaded_file) -> dict: + """Enhanced file validation for large files.""" + if not uploaded_file: + return {"valid": False, "error": "No file provided"} + + file_size = len(uploaded_file.getvalue()) + if file_size > self.max_file_size: + size_gb = file_size / (1024 * 1024 * 1024) + return {"valid": False, "error": f"File too large: {size_gb:.1f}GB (max 2GB)"} + + file_extension = os.path.splitext(uploaded_file.name)[1].lower() + all_formats = self.supported_formats["audio"] + self.supported_formats["video"] + if file_extension not in all_formats: + return {"valid": False, "error": f"Unsupported format: {file_extension}"} + + if file_size > LARGE_FILE_THRESHOLD_BYTES and not self.check_ffmpeg_availability(): + return { + "valid": False, + "error": "FFmpeg required for large files but not available. Please install FFmpeg.", + } + + return { + "valid": True, + "size": file_size, + "size_mb": file_size / (1024 * 1024), + "requires_chunking": file_size > LARGE_FILE_THRESHOLD_BYTES, + "format": file_extension, + } + + def create_enhanced_upload_interface(self): + """Create enhanced upload interface for large files.""" + upload_html = """ +
+
+
+
\U0001f3b5
+
+
+
+

Enhanced Large File Upload

+

Powered by FFmpeg \u2022 Up to 2GB \u2022 Intelligent Chunking

+
+
+ \u26a1 + 10-min chunks +
+
+ \U0001f504 + Parallel processing +
+
+ \U0001f4be + Memory efficient +
+
+ \U0001f3af + Auto-retry +
+
+
+
+ Audio: MP3, WAV, M4A, AAC, OGG, FLAC, WEBM +
+
+ Video: MP4, AVI, MOV, MKV, WMV (audio extraction) +
+
+
+
+
+ """ + + from .file_upload import _load_upload_css + + st.markdown(_load_upload_css(), unsafe_allow_html=True) + st.markdown(upload_html, unsafe_allow_html=True) + + uploaded_file = st.file_uploader( + "Choose an audio or video file", + type=[ + "mp3", + "wav", + "m4a", + "aac", + "ogg", + "flac", + "wma", + "webm", + "mpeg", + "mpga", + "oga", + "mp4", + "avi", + "mov", + "mkv", + "wmv", + "flv", + ], + help="Upload audio/video files up to 2GB. Large files automatically use FFmpeg chunking.", + label_visibility="collapsed", + ) + + return uploaded_file + + def process_large_file(self, uploaded_file) -> dict: + """Enhanced large file processing with FFmpeg.""" + validation = self.validate_file(uploaded_file) + if not validation["valid"]: + return {"success": False, "error": validation["error"]} + + file_size_mb = validation["size_mb"] + requires_chunking = validation["requires_chunking"] + + st.info(f"\U0001f4c1 **File:** {uploaded_file.name} ({file_size_mb:.1f} MB)") + + if requires_chunking: + st.info("\U0001f527 **Processing Method:** FFmpeg chunking (large file detected)") + return self._process_with_ffmpeg_chunking(uploaded_file) + else: + st.info("\u26a1 **Processing Method:** Standard processing (small file)") + return self._process_standard(uploaded_file) + + def _process_standard(self, uploaded_file) -> dict: + """Process smaller files using standard method.""" + try: + from core.content_generation import transcribe_audio + + with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file: + tmp_file.write(uploaded_file.getvalue()) + tmp_file_path = tmp_file.name + + try: + with st.spinner("\U0001f3af Transcribing audio..."): + transcript = transcribe_audio(tmp_file_path) + + return {"success": True, "transcript": transcript, "method": "standard", "chunks_processed": 1} + finally: + if os.path.exists(tmp_file_path): + os.unlink(tmp_file_path) + + except (openai.APIError, OSError) as e: + return {"success": False, "error": f"Standard processing failed: {e!s}"} + except (APIClientError, FileProcessingError) as e: + return {"success": False, "error": f"Unexpected standard processing error: {e!s}"} + + def _process_with_ffmpeg_chunking(self, uploaded_file) -> dict: + """Process large files using FFmpeg chunking.""" + self.temp_dir = tempfile.mkdtemp(prefix="whisperforge_chunks_") + + try: + input_file_path = os.path.join(self.temp_dir, uploaded_file.name) + with open(input_file_path, "wb") as f: + f.write(uploaded_file.getvalue()) + + st.info("\U0001f50d Analyzing audio file...") + audio_info = self.get_audio_info(input_file_path) + + if "error" in audio_info: + return {"success": False, "error": audio_info["error"]} + + duration = audio_info["duration"] + st.success( + f"\U0001f4ca **Duration:** {duration / 60:.1f} minutes | " + f"**Format:** {audio_info['format']} | **Codec:** {audio_info['codec']}" + ) + + st.info("\u2702\ufe0f Creating audio chunks...") + chunks_result = self._create_ffmpeg_chunks(input_file_path, duration) + + if not chunks_result["success"]: + return chunks_result + + chunks = chunks_result["chunks"] + st.success(f"\u2705 Created {len(chunks)} chunks of ~{self.chunk_duration_minutes} minutes each") + + st.info("\U0001f680 Starting parallel transcription...") + transcription_result = self._transcribe_chunks_parallel(chunks) + + if not transcription_result["success"]: + return transcription_result + + full_transcript = self._reassemble_transcript(transcription_result["chunk_transcripts"]) + + return { + "success": True, + "transcript": full_transcript, + "method": "ffmpeg_chunking", + "chunks_processed": len(chunks), + "processing_time": transcription_result.get("total_time", "unknown"), + "success_rate": transcription_result.get("success_rate", "unknown"), + } + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e: + return {"success": False, "error": f"FFmpeg processing failed: {e!s}"} + except (FileProcessingError, APIClientError) as e: + return {"success": False, "error": f"Unexpected FFmpeg processing error: {e!s}"} + + finally: + self._cleanup_temp_dir() + + def _create_ffmpeg_chunks(self, input_file_path: str, duration: float) -> dict: + """Create audio chunks using FFmpeg.""" + try: + import subprocess + + chunk_duration_seconds = self.chunk_duration_minutes * 60 + num_chunks = math.ceil(duration / chunk_duration_seconds) + chunks = [] + + progress_bar = st.progress(0, f"Creating chunks: 0/{num_chunks}") + + for i in range(num_chunks): + start_time = i * chunk_duration_seconds + chunk_filename = f"chunk_{i:03d}.wav" + chunk_path = os.path.join(self.temp_dir, chunk_filename) + + cmd = [ + "ffmpeg", + "-i", + input_file_path, + "-ss", + str(start_time), + "-t", + str(chunk_duration_seconds), + "-ar", + str(AUDIO_SAMPLE_RATE), + "-ac", + str(AUDIO_CHANNELS), + "-acodec", + "pcm_s16le", + "-y", + chunk_path, + ] + + result = subprocess.run(cmd, capture_output=True, text=True, timeout=FFMPEG_CHUNK_TIMEOUT) # noqa: S603 + + if result.returncode != 0: + return {"success": False, "error": f"FFmpeg chunk creation failed: {result.stderr}"} + + if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0: + continue + + chunks.append( + { + "index": i, + "file_path": chunk_path, + "start_time": start_time, + "duration": min(chunk_duration_seconds, duration - start_time), + } + ) + + progress_bar.progress((i + 1) / num_chunks, f"Creating chunks: {i + 1}/{num_chunks}") + + return {"success": True, "chunks": chunks} + + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e: + return {"success": False, "error": f"Chunk creation failed: {e!s}"} + except FileProcessingError as e: + return {"success": False, "error": f"Unexpected chunk creation error: {e!s}"} + + def _transcribe_chunks_parallel(self, chunks: list[dict]) -> dict: + """Transcribe chunks in parallel using ThreadPoolExecutor.""" + from core.content_generation import transcribe_audio + + chunk_transcripts = {} + total_chunks = len(chunks) + + progress_container = st.empty() + status_container = st.empty() + + def transcribe_single_chunk(chunk_info: dict) -> tuple[int, str, bool]: + """Transcribe a single chunk.""" + try: + chunk_index = chunk_info["index"] + file_path = chunk_info["file_path"] + transcript = transcribe_audio(file_path) + return chunk_index, transcript, True + except (openai.APIError, OSError) as e: + logger.error(f"Failed to transcribe chunk {chunk_info['index']}: {e}") + return chunk_info["index"], f"[Transcription failed for chunk {chunk_info['index']}]", False + except APIClientError as e: + logger.error(f"Unexpected error transcribing chunk {chunk_info['index']}: {e}") + return chunk_info["index"], f"[Unexpected error for chunk {chunk_info['index']}]", False + + start_time = time.time() + completed_chunks = 0 + + with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor: + future_to_chunk = {executor.submit(transcribe_single_chunk, chunk): chunk for chunk in chunks} + + for future in as_completed(future_to_chunk): + chunk_index, transcript, success = future.result() + + if success: + chunk_transcripts[chunk_index] = transcript + + completed_chunks += 1 + + progress = completed_chunks / total_chunks + with progress_container: + st.progress(progress, f"Transcribing: {completed_chunks}/{total_chunks} chunks") + + with status_container: + elapsed = time.time() - start_time + if completed_chunks > 0: + eta = (elapsed / completed_chunks) * (total_chunks - completed_chunks) + successful = len(chunk_transcripts) + st.info( + f"\u23f1\ufe0f Elapsed: {elapsed:.1f}s | ETA: {eta:.1f}s | Success: {successful}/{completed_chunks}" + ) + + processing_time = time.time() - start_time + successful_chunks = len(chunk_transcripts) + + if successful_chunks < total_chunks * CHUNK_SUCCESS_THRESHOLD_FFMPEG: + return {"success": False, "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful"} + + return { + "success": True, + "chunk_transcripts": chunk_transcripts, + "total_time": f"{processing_time:.1f}s", + "success_rate": f"{successful_chunks}/{total_chunks}", + } + + def _reassemble_transcript(self, chunk_transcripts: dict[int, str]) -> str: + """Reassemble transcript from chunks in correct order.""" + sorted_chunks = sorted(chunk_transcripts.items()) + return " ".join([transcript for _, transcript in sorted_chunks]).strip() + + def _cleanup_temp_dir(self): + """Clean up temporary directory and all files.""" + if self.temp_dir and os.path.exists(self.temp_dir): + try: + import shutil + + shutil.rmtree(self.temp_dir) + self.temp_dir = None + except OSError as e: + logger.warning(f"Failed to cleanup temp directory {self.temp_dir}: {e}") diff --git a/core/logging_config.py b/core/logging_config.py index 87e8220..e291ee2 100644 --- a/core/logging_config.py +++ b/core/logging_config.py @@ -3,77 +3,65 @@ Provides structured logging with different levels and contexts """ +import json import logging import sys -import os +import traceback from datetime import datetime from pathlib import Path -import json -import traceback -from typing import Dict, Any, Optional +from typing import Any + class WhisperForgeLogger: """Enhanced logger with context and structured output""" - + def __init__(self, name: str = "whisperforge"): self.name = name self.logger = logging.getLogger(name) self.setup_logging() - + def setup_logging(self): """Configure logging with multiple handlers and formats""" - + # Clear existing handlers self.logger.handlers.clear() - + # Set base level self.logger.setLevel(logging.DEBUG) - + # Create logs directory logs_dir = Path("logs") logs_dir.mkdir(exist_ok=True) - + # Console handler with color coding console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(logging.INFO) - console_formatter = ColoredFormatter( - '%(asctime)s | %(levelname)s | %(name)s | %(message)s', - datefmt='%H:%M:%S' - ) + console_formatter = ColoredFormatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s", datefmt="%H:%M:%S") console_handler.setFormatter(console_formatter) - + # File handler for all logs - file_handler = logging.FileHandler( - logs_dir / f"whisperforge_{datetime.now().strftime('%Y%m%d')}.log" - ) + file_handler = logging.FileHandler(logs_dir / f"whisperforge_{datetime.now().strftime('%Y%m%d')}.log") file_handler.setLevel(logging.DEBUG) file_formatter = logging.Formatter( - '%(asctime)s | %(levelname)s | %(name)s | %(funcName)s:%(lineno)d | %(message)s', - datefmt='%Y-%m-%d %H:%M:%S' + "%(asctime)s | %(levelname)s | %(name)s | %(funcName)s:%(lineno)d | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", ) file_handler.setFormatter(file_formatter) - + # Error handler for critical issues - error_handler = logging.FileHandler( - logs_dir / f"errors_{datetime.now().strftime('%Y%m%d')}.log" - ) + error_handler = logging.FileHandler(logs_dir / f"errors_{datetime.now().strftime('%Y%m%d')}.log") error_handler.setLevel(logging.ERROR) error_handler.setFormatter(file_formatter) - + # Add handlers self.logger.addHandler(console_handler) self.logger.addHandler(file_handler) self.logger.addHandler(error_handler) - - def log_pipeline_step(self, step: str, status: str, data: Optional[Dict] = None): + + def log_pipeline_step(self, step: str, status: str, data: dict | None = None): """Log pipeline step with structured data""" - log_data = { - "step": step, - "status": status, - "timestamp": datetime.now().isoformat(), - "data": data or {} - } - + log_data = {"step": step, "status": status, "timestamp": datetime.now().isoformat(), "data": data or {}} + if status == "started": self.logger.info(f"๐Ÿ”„ Pipeline step started: {step}") elif status == "completed": @@ -82,89 +70,99 @@ def log_pipeline_step(self, step: str, status: str, data: Optional[Dict] = None) self.logger.error(f"โŒ Pipeline step failed: {step}") if data and "error" in data: self.logger.error(f"Error details: {data['error']}") - + # Log structured data to file self._log_structured(log_data) - + def log_file_upload(self, filename: str, size_mb: float, file_type: str): """Log file upload details""" self.logger.info(f"๐Ÿ“ File uploaded: {filename} ({size_mb:.1f}MB, {file_type})") - self._log_structured({ - "event": "file_upload", - "filename": filename, - "size_mb": size_mb, - "file_type": file_type, - "timestamp": datetime.now().isoformat() - }) - - def log_ai_request(self, provider: str, model: str, prompt_type: str, tokens: Optional[int] = None): + self._log_structured( + { + "event": "file_upload", + "filename": filename, + "size_mb": size_mb, + "file_type": file_type, + "timestamp": datetime.now().isoformat(), + } + ) + + def log_ai_request(self, provider: str, model: str, prompt_type: str, tokens: int | None = None): """Log AI API requests""" self.logger.info(f"๐Ÿค– AI request: {provider}/{model} for {prompt_type}") - self._log_structured({ - "event": "ai_request", - "provider": provider, - "model": model, - "prompt_type": prompt_type, - "tokens": tokens, - "timestamp": datetime.now().isoformat() - }) - - def log_database_operation(self, operation: str, table: str, success: bool, error: Optional[str] = None): + self._log_structured( + { + "event": "ai_request", + "provider": provider, + "model": model, + "prompt_type": prompt_type, + "tokens": tokens, + "timestamp": datetime.now().isoformat(), + } + ) + + def log_database_operation(self, operation: str, table: str, success: bool, error: str | None = None): """Log database operations""" status = "โœ…" if success else "โŒ" self.logger.info(f"{status} Database {operation}: {table}") if error: self.logger.error(f"Database error: {error}") - - self._log_structured({ - "event": "database_operation", - "operation": operation, - "table": table, - "success": success, - "error": error, - "timestamp": datetime.now().isoformat() - }) - - def log_user_action(self, action: str, user_id: Optional[str] = None, details: Optional[Dict] = None): + + self._log_structured( + { + "event": "database_operation", + "operation": operation, + "table": table, + "success": success, + "error": error, + "timestamp": datetime.now().isoformat(), + } + ) + + def log_user_action(self, action: str, user_id: str | None = None, details: dict | None = None): """Log user actions""" self.logger.info(f"๐Ÿ‘ค User action: {action} (user: {user_id or 'anonymous'})") - self._log_structured({ - "event": "user_action", - "action": action, - "user_id": user_id, - "details": details or {}, - "timestamp": datetime.now().isoformat() - }) - - def log_error(self, error: Exception, context: Optional[str] = None): + self._log_structured( + { + "event": "user_action", + "action": action, + "user_id": user_id, + "details": details or {}, + "timestamp": datetime.now().isoformat(), + } + ) + + def log_error(self, error: Exception, context: str | None = None): """Log errors with full context""" error_msg = str(error) error_type = type(error).__name__ - + self.logger.error(f"๐Ÿ’ฅ {error_type}: {error_msg}") if context: self.logger.error(f"Context: {context}") - + # Log full traceback self.logger.error(f"Traceback:\n{traceback.format_exc()}") - - self._log_structured({ - "event": "error", - "error_type": error_type, - "error_message": error_msg, - "context": context, - "traceback": traceback.format_exc(), - "timestamp": datetime.now().isoformat() - }) - - def _log_structured(self, data: Dict[str, Any]): + + self._log_structured( + { + "event": "error", + "error_type": error_type, + "error_message": error_msg, + "context": context, + "traceback": traceback.format_exc(), + "timestamp": datetime.now().isoformat(), + } + ) + + def _log_structured(self, data: dict[str, Any]): """Log structured data to JSON file""" json_log_file = Path("logs") / f"structured_{datetime.now().strftime('%Y%m%d')}.jsonl" - + try: with open(json_log_file, "a") as f: f.write(json.dumps(data) + "\n") - except Exception as e: + except OSError as e: self.logger.error(f"Failed to write structured log: {e}") # ------------------------------------------------------------------- @@ -180,41 +178,50 @@ def __getattr__(self, name): """ return getattr(self.logger, name) + class ColoredFormatter(logging.Formatter): """Colored console formatter""" - + COLORS = { - 'DEBUG': '\033[36m', # Cyan - 'INFO': '\033[32m', # Green - 'WARNING': '\033[33m', # Yellow - 'ERROR': '\033[31m', # Red - 'CRITICAL': '\033[35m', # Magenta - 'RESET': '\033[0m' # Reset + "DEBUG": "\033[36m", # Cyan + "INFO": "\033[32m", # Green + "WARNING": "\033[33m", # Yellow + "ERROR": "\033[31m", # Red + "CRITICAL": "\033[35m", # Magenta + "RESET": "\033[0m", # Reset } - + def format(self, record): - log_color = self.COLORS.get(record.levelname, self.COLORS['RESET']) + log_color = self.COLORS.get(record.levelname, self.COLORS["RESET"]) record.levelname = f"{log_color}{record.levelname}{self.COLORS['RESET']}" return super().format(record) + # Global logger instance logger = WhisperForgeLogger() # Convenience functions -def log_pipeline_step(step: str, status: str, data: Optional[Dict] = None): + + +def log_pipeline_step(step: str, status: str, data: dict | None = None): logger.log_pipeline_step(step, status, data) + def log_file_upload(filename: str, size_mb: float, file_type: str): logger.log_file_upload(filename, size_mb, file_type) -def log_ai_request(provider: str, model: str, prompt_type: str, tokens: Optional[int] = None): + +def log_ai_request(provider: str, model: str, prompt_type: str, tokens: int | None = None): logger.log_ai_request(provider, model, prompt_type, tokens) -def log_database_operation(operation: str, table: str, success: bool, error: Optional[str] = None): + +def log_database_operation(operation: str, table: str, success: bool, error: str | None = None): logger.log_database_operation(operation, table, success, error) -def log_user_action(action: str, user_id: Optional[str] = None, details: Optional[Dict] = None): + +def log_user_action(action: str, user_id: str | None = None, details: dict | None = None): logger.log_user_action(action, user_id, details) -def log_error(error: Exception, context: Optional[str] = None): - logger.log_error(error, context) \ No newline at end of file + +def log_error(error: Exception, context: str | None = None): + logger.log_error(error, context) diff --git a/core/mcp_integration.py b/core/mcp_integration.py new file mode 100644 index 0000000..45a0a32 --- /dev/null +++ b/core/mcp_integration.py @@ -0,0 +1,111 @@ +""" +MCP (Model Context Protocol) Integration Module +================================================ + +Provides AI models with context about user data and preferences +via Supabase. Extracted from core.supabase_integration to separate +MCP concerns from the base database client. +""" + +import logging +from typing import Any + +from postgrest.exceptions import APIError + +from .exceptions import DatabaseError + +logger = logging.getLogger(__name__) + + +class MCPSupabaseIntegration: + """ + Model Context Protocol integration for Supabase + Provides AI models with context about user data and preferences + """ + + def __init__(self, supabase_client): + self.db = supabase_client + + def get_user_context(self, user_id: int) -> dict[str, Any]: + """Get comprehensive user context for AI models""" + try: + # Get user profile + user = self.db.get_user(user_id) + if not user: + return {} + + # Get user's knowledge base + knowledge_base = self.db.get_user_knowledge_base(user_id) + + # Get user's custom prompts + custom_prompts = self.db.get_user_prompts(user_id) + + # Get recent content history (for style learning) + recent_content = self.db.get_user_content(user_id, limit=10) + + # Get user analytics + analytics = self.db.get_user_analytics(user_id, days=30) + + context = { + "user_profile": { + "subscription_tier": user.get("subscription_tier", "free"), + "usage_quota": user.get("usage_quota", 60), + "usage_current": user.get("usage_current", 0), + "created_at": user.get("created_at"), + }, + "knowledge_base": knowledge_base, + "custom_prompts": custom_prompts, + "content_history": recent_content, + "analytics": analytics, + "preferences": { + "preferred_ai_provider": analytics.get("ai_providers_used", ["openai"])[0] + if analytics.get("ai_providers_used") + else "openai", + "most_used_model": analytics.get("most_used_model", "gpt-3.5-turbo"), + }, + } + + return context + except APIError as e: + logger.error(f"Error getting user context for MCP: {e}") + return {} + except DatabaseError as e: + logger.error(f"Unexpected error getting user context for MCP: {e}") + return {} + + def update_context_from_interaction(self, user_id: int, interaction_data: dict[str, Any]) -> bool: + """Update user context based on AI interaction results""" + try: + # Log the interaction + self.db.log_pipeline_execution(user_id, interaction_data) + + # Update usage if provided + if "duration_seconds" in interaction_data: + self.db.update_user_usage(user_id, interaction_data["duration_seconds"]) + + # Save generated content if provided + if "content" in interaction_data: + self.db.save_content(user_id, interaction_data["content"]) + + return True + except APIError as e: + logger.error(f"Error updating context from interaction: {e}") + return False + except DatabaseError as e: + logger.error(f"Unexpected error updating context from interaction: {e}") + return False + + +# Global instance +_mcp_integration = None + + +def get_mcp_integration() -> MCPSupabaseIntegration: + """Get or create MCP integration instance""" + global _mcp_integration + if _mcp_integration is None: + # Lazy import to avoid circular imports + from .supabase_integration import get_supabase_client + + _mcp_integration = MCPSupabaseIntegration(get_supabase_client()) + return _mcp_integration diff --git a/core/metrics_exporter.py b/core/metrics_exporter.py index 60928fd..88b9997 100644 --- a/core/metrics_exporter.py +++ b/core/metrics_exporter.py @@ -8,15 +8,17 @@ from __future__ import annotations import json -from typing import Any, Dict, List +from collections import deque +from typing import Any +MAX_ENTRIES = 1000 -metrics_exporter: Dict[str, Any] = { +metrics_exporter: dict[str, Any] = { "counters": {}, "gauges": {}, "histograms": {}, - "requests": [], - "pipelines": [], + "requests": deque(maxlen=MAX_ENTRIES), + "pipelines": deque(maxlen=MAX_ENTRIES), } @@ -29,10 +31,10 @@ def track_request(duration: float, status_code: int, method: str, path: str) -> "path": path, } ) - metrics_exporter["counters"]["http_requests_total"] = ( - metrics_exporter["counters"].get("http_requests_total", 0) + 1 + metrics_exporter["counters"]["http_requests_total"] = metrics_exporter["counters"].get("http_requests_total", 0) + 1 + metrics_exporter["histograms"].setdefault("http_request_duration_seconds", deque(maxlen=MAX_ENTRIES)).append( + duration ) - metrics_exporter["histograms"].setdefault("http_request_duration_seconds", []).append(duration) def track_pipeline(name: str, duration: float, success: bool) -> None: @@ -45,7 +47,16 @@ def track_pipeline(name: str, duration: float, success: bool) -> None: ) key = "pipeline_success_total" if success else "pipeline_failure_total" metrics_exporter["counters"][key] = metrics_exporter["counters"].get(key, 0) + 1 - metrics_exporter["histograms"].setdefault(f"pipeline_{name}_duration", []).append(duration) + metrics_exporter["histograms"].setdefault(f"pipeline_{name}_duration", deque(maxlen=MAX_ENTRIES)).append(duration) + + +def reset_metrics() -> None: + """Reset all metrics to initial state""" + metrics_exporter["counters"].clear() + metrics_exporter["gauges"].clear() + metrics_exporter["histograms"].clear() + metrics_exporter["requests"].clear() + metrics_exporter["pipelines"].clear() def export_prometheus_metrics() -> str: @@ -65,14 +76,15 @@ def export_prometheus_metrics() -> str: return "\n".join(lines) -def export_json_metrics() -> Dict[str, Any]: +def export_json_metrics() -> dict[str, Any]: """Return the metrics as a JSON-serialisable object.""" - return json.loads(json.dumps( - { - "counters": metrics_exporter["counters"], - "gauges": metrics_exporter["gauges"], - "histograms": metrics_exporter["histograms"], - } - )) - + return json.loads( + json.dumps( + { + "counters": metrics_exporter["counters"], + "gauges": metrics_exporter["gauges"], + "histograms": {k: list(v) for k, v in metrics_exporter["histograms"].items()}, + } + ) + ) diff --git a/core/monitoring.py b/core/monitoring.py index 98d2a99..8649108 100644 --- a/core/monitoring.py +++ b/core/monitoring.py @@ -12,8 +12,7 @@ import time import uuid from contextlib import contextmanager -from typing import Any, Dict, Optional - +from typing import Any logger = logging.getLogger(__name__) @@ -22,10 +21,11 @@ # Structured logging # --------------------------------------------------------------------------- + class StructuredLogger(logging.LoggerAdapter): """Minimal structured logger used in tests.""" - def process(self, msg: str, kwargs: Dict[str, Any]) -> tuple[str, Dict[str, Any]]: + def process(self, msg: str, kwargs: dict[str, Any]) -> tuple[str, dict[str, Any]]: extra = kwargs.pop("extra", {}) # treat any additional keyword arguments as structured fields extra.update({k: kwargs.pop(k) for k in list(kwargs)}) @@ -35,7 +35,7 @@ def process(self, msg: str, kwargs: Dict[str, Any]) -> tuple[str, Dict[str, Any] kwargs["extra"] = extra return msg, kwargs - def pipeline_start(self, name: str, user_id: Optional[str] = None) -> None: + def pipeline_start(self, name: str, user_id: str | None = None) -> None: self.info("pipeline start", extra={"pipeline": name, "user_id": user_id}) def pipeline_complete(self, name: str, duration: float, success: bool = True) -> None: @@ -48,7 +48,7 @@ def pipeline_complete(self, name: str, duration: float, success: bool = True) -> structured_logger = StructuredLogger(logger, {}) -def set_trace_context(user_id: Optional[str] = None, operation: Optional[str] = None) -> str: +def set_trace_context(user_id: str | None = None, operation: str | None = None) -> str: """Create a trace context and log it.""" trace_id = str(uuid.uuid4()) @@ -60,7 +60,7 @@ def set_trace_context(user_id: Optional[str] = None, operation: Optional[str] = @contextmanager -def trace_operation(operation: str, user_id: Optional[str] = None): +def trace_operation(operation: str, user_id: str | None = None): """Context manager that logs a trace when the block executes.""" trace_id = set_trace_context(user_id=user_id, operation=operation) @@ -77,13 +77,12 @@ def trace_operation(operation: str, user_id: Optional[str] = None): # Error tracking # --------------------------------------------------------------------------- + class ErrorTracker: - def capture_exception(self, exc: Exception, context: Optional[Dict[str, Any]] = None) -> None: - structured_logger.error( - f"captured exception: {exc}", extra={"context": context} - ) + def capture_exception(self, exc: Exception, context: dict[str, Any] | None = None) -> None: + structured_logger.error(f"captured exception: {exc}", extra={"context": context}) - def capture_message(self, message: str, level: str = "info", context: Optional[Dict[str, Any]] = None) -> None: + def capture_message(self, message: str, level: str = "info", context: dict[str, Any] | None = None) -> None: getattr(structured_logger, level)(message, extra={"context": context}) @@ -94,6 +93,7 @@ def capture_message(self, message: str, level: str = "info", context: Optional[D # Performance tracking # --------------------------------------------------------------------------- + class PerformanceTracker: @contextmanager def track_operation(self, name: str): @@ -108,7 +108,7 @@ def track_operation(self, name: str): ) def track_pipeline_performance( - self, name: str, duration: float, success: bool, file_size_mb: Optional[int] = None + self, name: str, duration: float, success: bool, file_size_mb: int | None = None ) -> None: structured_logger.info( "pipeline metrics", @@ -141,6 +141,7 @@ def wrapper(*args, **kwargs): # Compatibility helpers used by other modules # --------------------------------------------------------------------------- + def init_monitoring() -> bool: structured_logger.info("Monitoring initialised") return True @@ -154,15 +155,15 @@ def track_performance(operation: str, duration: float) -> None: performance_tracker.track_pipeline_performance(operation, duration, True) -def track_user_action(action: str, user_id: Optional[str] = None) -> None: +def track_user_action(action: str, user_id: str | None = None) -> None: structured_logger.info("user action", extra={"action": action, "user_id": user_id}) -def track_page(page: str, user_id: Optional[str] = None) -> None: +def track_page(page: str, user_id: str | None = None) -> None: structured_logger.info("page view", extra={"page": page, "user_id": user_id}) -def get_health_status() -> Dict[str, Any]: +def get_health_status() -> dict[str, Any]: return {"status": "healthy", "monitoring": "basic", "timestamp": "now"} @@ -178,10 +179,9 @@ def track_error(self, error: Exception, context: str = "") -> None: def track_performance(self, operation: str, duration: float) -> None: track_performance(operation, duration) - def track_user_action(self, action: str, user_id: Optional[str] = None) -> None: + def track_user_action(self, action: str, user_id: str | None = None) -> None: track_user_action(action, user_id) def get_monitoring_manager() -> MonitoringManager: return MonitoringManager() - diff --git a/core/notifications.py b/core/notifications.py index 7157ef9..5b4d26b 100644 --- a/core/notifications.py +++ b/core/notifications.py @@ -3,75 +3,61 @@ Animated toast notifications and status updates """ -import streamlit as st import time -from typing import Optional, Literal -from datetime import datetime +from html import escape as html_escape +from typing import Literal + +import streamlit as st + class NotificationManager: """Manages beautiful notifications and status updates""" - + def __init__(self): self.notifications = [] - - def show_toast(self, - message: str, - type: Literal["success", "error", "warning", "info"] = "info", - duration: float = 3.0, - icon: Optional[str] = None): + + def show_toast( + self, + message: str, + notification_type: Literal["success", "error", "warning", "info"] = "info", + duration: float = 3.0, + icon: str | None = None, + ): """Show a beautiful toast notification""" - + # Auto-select icon based on type if not provided if not icon: - icons = { - "success": "โœ…", - "error": "โŒ", - "warning": "โš ๏ธ", - "info": "โ„น๏ธ" - } - icon = icons.get(type, "โ„น๏ธ") - + icons = {"success": "โœ…", "error": "โŒ", "warning": "โš ๏ธ", "info": "โ„น๏ธ"} + icon = icons.get(notification_type, "โ„น๏ธ") + + # Sanitize user-controlled content + safe_message = html_escape(message) + # Color scheme for different types colors = { - "success": { - "bg": "rgba(54, 211, 153, 0.1)", - "border": "rgba(54, 211, 153, 0.4)", - "text": "#36D399" - }, - "error": { - "bg": "rgba(248, 114, 114, 0.1)", - "border": "rgba(248, 114, 114, 0.4)", - "text": "#F87272" - }, - "warning": { - "bg": "rgba(251, 189, 35, 0.1)", - "border": "rgba(251, 189, 35, 0.4)", - "text": "#FBBD23" - }, - "info": { - "bg": "rgba(58, 191, 248, 0.1)", - "border": "rgba(58, 191, 248, 0.4)", - "text": "#3ABFF8" - } + "success": {"bg": "rgba(54, 211, 153, 0.1)", "border": "rgba(54, 211, 153, 0.4)", "text": "#36D399"}, + "error": {"bg": "rgba(248, 114, 114, 0.1)", "border": "rgba(248, 114, 114, 0.4)", "text": "#F87272"}, + "warning": {"bg": "rgba(251, 189, 35, 0.1)", "border": "rgba(251, 189, 35, 0.4)", "text": "#FBBD23"}, + "info": {"bg": "rgba(58, 191, 248, 0.1)", "border": "rgba(58, 191, 248, 0.4)", "text": "#3ABFF8"}, } - - color = colors[type] - + + color = colors[notification_type] + toast_html = f""" -
+
{icon} - {message} + {safe_message}
- + - + """ - + return st.markdown(toast_html, unsafe_allow_html=True) - - def show_status_indicator(self, - status: str, - details: str = "", - animated: bool = True): + + def show_status_indicator(self, status: str, details: str = "", animated: bool = True): """Show a status indicator with optional animation""" - + # Status configurations status_configs = { "processing": { "icon": "๐Ÿ”„", "color": "#3ABFF8", "bg": "rgba(58, 191, 248, 0.1)", - "border": "rgba(58, 191, 248, 0.3)" + "border": "rgba(58, 191, 248, 0.3)", }, "complete": { - "icon": "โœ…", + "icon": "โœ…", "color": "#36D399", "bg": "rgba(54, 211, 153, 0.1)", - "border": "rgba(54, 211, 153, 0.3)" + "border": "rgba(54, 211, 153, 0.3)", }, "error": { "icon": "โŒ", - "color": "#F87272", + "color": "#F87272", "bg": "rgba(248, 114, 114, 0.1)", - "border": "rgba(248, 114, 114, 0.3)" + "border": "rgba(248, 114, 114, 0.3)", }, "waiting": { "icon": "โณ", "color": "#FBBD23", - "bg": "rgba(251, 189, 35, 0.1)", - "border": "rgba(251, 189, 35, 0.3)" - } + "bg": "rgba(251, 189, 35, 0.1)", + "border": "rgba(251, 189, 35, 0.3)", + }, } - + config = status_configs.get(status, status_configs["waiting"]) animation_class = "status-animated" if animated else "" - + status_html = f"""
- {config['icon']} + {config["icon"]} {status.title()}
- {f'
{details}
' if details else ''} + {f'
{details}
' if details else ""}
- + """ - + return st.markdown(status_html, unsafe_allow_html=True) + # Global notification manager instance notification_manager = NotificationManager() # Convenience functions + + def show_success(message: str, duration: float = 3.0): """Show success notification""" return notification_manager.show_toast(message, "success", duration) + def show_error(message: str, duration: float = 5.0): """Show error notification""" return notification_manager.show_toast(message, "error", duration) + def show_warning(message: str, duration: float = 4.0): """Show warning notification""" return notification_manager.show_toast(message, "warning", duration) + def show_info(message: str, duration: float = 3.0): """Show info notification""" return notification_manager.show_toast(message, "info", duration) + def show_processing_status(details: str = ""): """Show processing status indicator""" return notification_manager.show_status_indicator("processing", details, True) + def show_complete_status(details: str = ""): """Show completion status indicator""" return notification_manager.show_status_indicator("complete", details, False) + def show_error_status(details: str = ""): """Show error status indicator""" return notification_manager.show_status_indicator("error", details, False) + def create_step_completion_animation(): """Create a beautiful step completion animation""" animation_html = """ @@ -277,7 +270,7 @@ def create_step_completion_animation():
Step Complete!
- + """ - + return st.markdown(animation_html, unsafe_allow_html=True) + def create_loading_spinner(text: str = "Processing..."): """Create a beautiful loading spinner""" spinner_html = f""" @@ -356,7 +350,7 @@ def create_loading_spinner(text: str = "Processing..."):
{text}
- + """ - - return st.markdown(spinner_html, unsafe_allow_html=True) \ No newline at end of file + + return st.markdown(spinner_html, unsafe_allow_html=True) diff --git a/core/notion_integration.py b/core/notion_integration.py new file mode 100644 index 0000000..f90332a --- /dev/null +++ b/core/notion_integration.py @@ -0,0 +1,270 @@ +""" +Notion Integration +================== + +Create formatted Notion pages from WhisperForge content. +""" + +import logging +import os +from datetime import datetime + +from .constants import ( + MAX_NOTION_BLOCKS, + NOTION_CHUNK_SIZE, + NOTION_TITLE_EXCERPT_LENGTH, + NOTION_TITLE_MAX_LENGTH, + NOTION_TITLE_MAX_TOKENS, +) +from .content_generation import ContentGenerationError, _chat_completion +from .exceptions import APIClientError + +logger = logging.getLogger(__name__) + + +def generate_ai_title(transcript: str) -> str: + """Generate a concise AI-powered title from a transcript excerpt.""" + try: + system_prompt = ( + "Generate a concise, descriptive title (max 60 characters) for an audio transcript. " + "The title should be clear, specific, professional, and capture the main topic. " + "No quotes or special characters. Respond with only the title." + ) + user_content = f"Here is the transcript excerpt:\n\n{transcript[:NOTION_TITLE_EXCERPT_LENGTH]}..." + + title = _chat_completion(system_prompt, user_content, max_tokens=NOTION_TITLE_MAX_TOKENS) + return title.strip().replace('"', "").replace("'", "")[:NOTION_TITLE_MAX_LENGTH] + except ContentGenerationError: + return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" + except APIClientError: + return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" + + +def create_notion_page(title: str, content_data: dict[str, str]) -> str | None: + """Create a Notion page with WhisperForge content. + + Returns the page URL on success, or None on failure. + """ + try: + from notion_client import Client + except ImportError: + logger.warning("Install notion-client to enable Notion publishing: pip install notion-client") + return None + + try: + api_key = os.getenv("NOTION_API_KEY") + database_id = os.getenv("NOTION_DATABASE_ID") + + if not api_key or not database_id: + logger.warning("Notion not configured. Set NOTION_API_KEY and NOTION_DATABASE_ID to auto-publish.") + return None + + client = Client(auth=api_key) + children = _build_notion_children(title, content_data) + + response = client.pages.create( + parent={"database_id": database_id}, + icon={"type": "emoji", "emoji": "\U0001f30c"}, + properties={"Name": {"title": [{"text": {"content": title[:100]}}]}}, + children=children[:MAX_NOTION_BLOCKS], + ) + + if response and "id" in response: + page_id = response["id"] + return f"https://notion.so/{page_id.replace('-', '')}" + + return None + + except (ContentGenerationError, OSError) as e: + logger.error(f"Notion publishing failed: {e!s}") + return None + except APIClientError as e: + logger.error(f"Unexpected error in Notion publishing: {e!s}") + return None + + +def _build_notion_children(title: str, content_data: dict) -> list[dict]: + """Build the list of Notion block children for a page.""" + children: list[dict] = [] + + # Header + children.append( + { + "type": "heading_1", + "heading_1": { + "rich_text": [ + {"type": "text", "text": {"content": "\U0001f30c "}, "annotations": {"color": "blue"}}, + {"type": "text", "text": {"content": title}, "annotations": {"bold": True}}, + ] + }, + } + ) + + # Creation info + children.append( + { + "type": "paragraph", + "paragraph": { + "rich_text": [ + {"type": "text", "text": {"content": "Generated with "}}, + { + "type": "text", + "text": {"content": "WhisperForge Aurora"}, + "annotations": {"bold": True, "color": "blue"}, + }, + { + "type": "text", + "text": {"content": f" \u2022 {datetime.now().strftime('%B %d, %Y at %I:%M %p')}"}, + }, + ] + }, + } + ) + + children.append({"type": "divider", "divider": {}}) + + # Wisdom callout + if content_data.get("wisdom"): + children.append( + { + "type": "callout", + "callout": { + "rich_text": [ + {"type": "text", "text": {"content": "Key Insights & Wisdom"}}, + {"type": "text", "text": {"content": f"\n\n{content_data['wisdom'][:NOTION_CHUNK_SIZE]}"}}, + ], + "color": "purple_background", + "icon": {"type": "emoji", "emoji": "\U0001f4a1"}, + }, + } + ) + + # Content sections as toggles + sections = [ + ("\U0001f4dd Transcript", content_data.get("transcript")), + ("\U0001f4a1 Wisdom", content_data.get("wisdom")), + ("\U0001f50d Research Links", content_data.get("research")), + ("\U0001f4cb Outline", content_data.get("outline")), + ("\U0001f4f0 Article", content_data.get("article")), + ("\U0001f4f1 Social Content", content_data.get("social_content")), + ] + + for section_title, section_content in sections: + if not section_content: + continue + + if section_title == "\U0001f50d Research Links" and isinstance(section_content, dict): + children.append(_build_research_toggle(section_title, section_content)) + elif isinstance(section_content, str): + chunks = [ + section_content[i : i + NOTION_CHUNK_SIZE] for i in range(0, len(section_content), NOTION_CHUNK_SIZE) + ] + children.append( + { + "type": "toggle", + "toggle": { + "rich_text": [{"type": "text", "text": {"content": section_title}}], + "children": [ + { + "type": "paragraph", + "paragraph": {"rich_text": [{"type": "text", "text": {"content": chunk}}]}, + } + for chunk in chunks[:5] + ], + }, + } + ) + + # Footer + children.extend( + [ + {"type": "divider", "divider": {}}, + { + "type": "callout", + "callout": { + "rich_text": [ + { + "type": "text", + "text": {"content": "Content Generation Complete"}, + "annotations": {"bold": True}, + }, + {"type": "text", "text": {"content": "\n\nAI Pipeline: completed successfully"}}, + { + "type": "text", + "text": {"content": f"\nGenerated: {datetime.now().strftime('%B %d, %Y at %I:%M %p')}"}, + }, + {"type": "text", "text": {"content": "\nPowered by WhisperForge Aurora"}}, + ], + "color": "green_background", + "icon": {"type": "emoji", "emoji": "\u2705"}, + }, + }, + ] + ) + + return children + + +def _build_research_toggle(section_title: str, section_content: dict) -> dict: + """Build a Notion toggle block for research data.""" + research_children: list[dict] = [] + entities = section_content.get("entities", []) + + if entities: + for entity in entities[:5]: + entity_name = entity.get("name", "Unknown Entity") + why_matters = entity.get("why_matters", "No description available") + links = entity.get("links", []) + + research_children.append( + { + "type": "callout", + "callout": { + "rich_text": [ + {"type": "text", "text": {"content": entity_name}, "annotations": {"bold": True}}, + {"type": "text", "text": {"content": f"\n{why_matters}"}}, + ], + "color": "blue_background", + "icon": {"type": "emoji", "emoji": "\U0001f52c"}, + }, + } + ) + + for link in links[:3]: + link_title = link.get("title", "Link") + link_desc = link.get("description", "") + is_gem = link.get("is_gem", False) + gem_icon = "\U0001f48e" if is_gem else "\U0001f517" + color = "orange" if is_gem else "default" + + research_children.append( + { + "type": "bulleted_list_item", + "bulleted_list_item": { + "rich_text": [ + {"type": "text", "text": {"content": f"{gem_icon} "}, "annotations": {"color": color}}, + {"type": "text", "text": {"content": link_title}, "annotations": {"bold": True}}, + { + "type": "text", + "text": {"content": f" - {link_desc}"}, + "annotations": {"italic": True}, + }, + ] + }, + } + ) + else: + research_children.append( + { + "type": "paragraph", + "paragraph": {"rich_text": [{"type": "text", "text": {"content": "No research entities found."}}]}, + } + ) + + return { + "type": "toggle", + "toggle": { + "rich_text": [{"type": "text", "text": {"content": section_title}}], + "children": research_children, + }, + } diff --git a/core/pages.py b/core/pages.py new file mode 100644 index 0000000..aef222a --- /dev/null +++ b/core/pages.py @@ -0,0 +1,393 @@ +""" +Application Pages +================= + +Secondary page functions for settings, knowledge base, and prompt management. +""" + +import os + +import streamlit as st +from postgrest.exceptions import APIError + +from .auth_wrapper import get_auth +from .content_display import create_enhanced_aurora_content_card +from .exceptions import APIClientError, DatabaseError, FileProcessingError +from .supabase_integration import get_supabase_client +from .utils import safe_path + + +def _get_api_key(env_var: str) -> str: + """Get API key from auth session cache (if authenticated) or environment.""" + auth = get_auth() + if auth.is_authenticated(): + stored = auth.get_api_keys() + if env_var in stored: + return stored[env_var] + return os.getenv(env_var, "") + + +def _set_api_key(env_var: str, value: str) -> None: + """Persist API key via auth wrapper and set in environment for SDK clients.""" + os.environ[env_var] = value + auth = get_auth() + if auth.is_authenticated(): + auth.update_api_key(env_var, value) + + +def show_settings_page(): + """Settings and configuration page.""" + st.markdown("### Settings & Configuration") + + st.markdown("#### AI Provider") + with st.expander("AI Provider Selection", expanded=True): + provider_options = {"OpenAI": "openai", "Anthropic": "anthropic", "Grok (xAI)": "grok"} + current_provider = st.session_state.get("ai_provider", "openai") + # Find display name for current provider + current_label = next((label for label, val in provider_options.items() if val == current_provider), "OpenAI") + selected_label = st.selectbox( + "Active AI Provider", + list(provider_options.keys()), + index=list(provider_options.keys()).index(current_label), + help="Select the AI provider for content generation (wisdom, outline, article, social posts).", + ) + st.session_state.ai_provider = provider_options[selected_label] + + st.info( + "Whisper transcription always requires an OpenAI API key, regardless of which " + "AI provider is selected for content generation." + ) + + st.markdown("#### API Keys & Models") + with st.expander("API Configuration", expanded=True): + col1, col2, col3 = st.columns(3) + + with col1: + st.markdown("**OpenAI**") + openai_key = st.text_input( + "OpenAI API Key", type="password", value=_get_api_key("OPENAI_API_KEY"), help="Your OpenAI API key" + ) + if openai_key: + _set_api_key("OPENAI_API_KEY", openai_key) + st.success("OpenAI key configured") + + openai_model = st.selectbox( + "OpenAI Model", + ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"], + help="Model used when OpenAI is the active provider.", + ) + st.session_state.openai_model = openai_model + + with col2: + st.markdown("**Anthropic**") + anthropic_key = st.text_input( + "Anthropic API Key", + type="password", + value=_get_api_key("ANTHROPIC_API_KEY"), + help="Your Anthropic API key", + ) + if anthropic_key: + _set_api_key("ANTHROPIC_API_KEY", anthropic_key) + st.success("Anthropic key configured") + + anthropic_model = st.selectbox( + "Anthropic Model", + ["claude-sonnet-4-20250514", "claude-haiku-4-20250514", "claude-3-5-sonnet-20241022"], + help="Model used when Anthropic is the active provider.", + ) + st.session_state.anthropic_model = anthropic_model + + with col3: + st.markdown("**Grok (xAI)**") + grok_key = st.text_input( + "Grok API Key", + type="password", + value=_get_api_key("GROK_API_KEY"), + help="Your xAI / Grok API key", + ) + if grok_key: + _set_api_key("GROK_API_KEY", grok_key) + st.success("Grok key configured") + + grok_model = st.selectbox( + "Grok Model", + ["grok-3", "grok-3-mini", "grok-2"], + help="Model used when Grok is the active provider.", + ) + st.session_state.grok_model = grok_model + + st.markdown("#### Notion Integration") + with st.expander("Notion Configuration", expanded=False): + notion_key = st.text_input( + "Notion API Key", + type="password", + value=_get_api_key("NOTION_API_KEY"), + help="Your Notion integration token", + ) + if notion_key: + _set_api_key("NOTION_API_KEY", notion_key) + + notion_db = st.text_input("Notion Database ID", value=_get_api_key("NOTION_DATABASE_ID")) + if notion_db: + _set_api_key("NOTION_DATABASE_ID", notion_db) + + if notion_key and notion_db: + st.success("Notion configured") + + st.markdown("#### Pipeline Configuration") + with st.expander("Processing Pipeline", expanded=True): + col1, col2 = st.columns(2) + + with col1: + st.markdown("**Core Features**") + st.session_state.auto_notion = st.checkbox( + "Auto-publish to Notion", value=st.session_state.get("auto_notion", True) + ) + st.session_state.live_stream = st.checkbox( + "Live Streaming", value=st.session_state.get("live_stream", False) + ) + st.session_state.large_file_mode = st.checkbox( + "Enhanced Large File Processing", value=st.session_state.get("large_file_mode", True) + ) + + with col2: + st.markdown("**Quality Settings**") + st.session_state.content_length = st.selectbox( + "Article Length", ["Short (500-800 words)", "Medium (800-1200 words)", "Long (1200+ words)"] + ) + st.session_state.tone_style = st.selectbox( + "Content Tone", ["Professional", "Conversational", "Academic", "Creative"] + ) + + templates = [f.replace(".md", "") for f in os.listdir("templates")] if os.path.exists("templates") else [] + if templates: + st.session_state.article_template = st.selectbox("Article Template", templates) + else: + st.session_state.article_template = None + + st.markdown("#### System Status") + with st.expander("Connection Status", expanded=False): + if st.button("Test All Connections"): + with st.spinner("Testing all connections..."): + # AI provider keys + ai_providers = { + "OpenAI": "OPENAI_API_KEY", + "Anthropic": "ANTHROPIC_API_KEY", + "Grok": "GROK_API_KEY", + } + for name, env_var in ai_providers.items(): + if os.getenv(env_var): + st.success(f"{name} API key configured") + else: + st.warning(f"{name} API key not set") + + try: + db = get_supabase_client() + if db and db.test_connection(): + st.success("Supabase connected") + else: + st.error("Supabase connection failed") + except APIError as e: + st.error(f"Supabase error: {e}") + except DatabaseError as e: + st.error(f"Unexpected Supabase error: {e}") + + try: + if os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): + from notion_client import Client + + client = Client(auth=os.getenv("NOTION_API_KEY")) + client.databases.retrieve(database_id=os.getenv("NOTION_DATABASE_ID")) + st.success("Notion connected") + else: + st.warning("Notion not configured") + except OSError as e: + st.error(f"Notion error: {e}") + except APIClientError as e: + st.error(f"Unexpected Notion error: {e}") + + +def show_knowledge_base(): + """Knowledge base management page.""" + st.markdown("### Knowledge Base") + + kb_path = "prompts/default/knowledge_base" + + st.markdown( + "The knowledge base provides context and expertise to enhance content generation. " + "Add domain-specific information, style guides, and reference materials here." + ) + + tabs = st.tabs(["View Knowledge", "Add Knowledge", "Manage Files"]) + + with tabs[0]: + st.markdown("#### Current Knowledge Base") + try: + if os.path.exists(kb_path): + files = [f for f in os.listdir(kb_path) if f.endswith(".md")] + if files: + selected_file = st.selectbox("Select knowledge file:", files) + if selected_file: + file_path = safe_path(kb_path, selected_file) + with open(file_path) as f: + content = f.read() + st.markdown(f"**File:** `{selected_file}`") + create_enhanced_aurora_content_card("Knowledge Content", content, "text", "\U0001f4d6") + else: + st.info("No knowledge files found") + else: + st.info("Knowledge base directory not found") + except OSError as e: + st.error(f"Error reading knowledge base: {e}") + except FileProcessingError as e: + st.error(f"Unexpected error reading knowledge base: {e}") + + with tabs[1]: + st.markdown("#### Add New Knowledge") + + col1, col2 = st.columns([2, 1]) + with col1: + kb_title = st.text_input("Knowledge Title", placeholder="e.g., 'Marketing Guidelines'") + with col2: + kb_category = st.selectbox("Category", ["General", "Style Guide", "Domain Expertise", "Templates"]) + + kb_content = st.text_area("Knowledge Content", placeholder="Enter your knowledge content here...", height=300) + + if st.button("Save Knowledge", type="primary"): + if kb_title and kb_content: + if len(kb_title) > 100: + st.error("Title must be 100 characters or fewer.") + else: + try: + os.makedirs(kb_path, exist_ok=True) + filename = f"{kb_title.lower().replace(' ', '_')}.md" + file_path = safe_path(kb_path, filename) + + with open(file_path, "w") as f: + f.write(f"# {kb_title}\n\n") + f.write(f"**Category:** {kb_category}\n\n") + f.write(kb_content) + + st.success(f"Knowledge saved as `{filename}`") + except ValueError as e: + st.error(f"Invalid filename: {e}") + except OSError as e: + st.error(f"Error saving knowledge: {e}") + except FileProcessingError as e: + st.error(f"Unexpected error saving knowledge: {e}") + else: + st.error("Please provide both title and content") + + with tabs[2]: + st.markdown("#### Manage Knowledge Files") + try: + if os.path.exists(kb_path): + files = [f for f in os.listdir(kb_path) if f.endswith(".md")] + if files: + for file in files: + col1, col2 = st.columns([3, 1]) + with col1: + st.markdown(f"`{file}`") + with col2: + if st.button("Delete", key=f"delete_{file}"): + try: + validated_path = safe_path(kb_path, file) + os.remove(validated_path) + st.success(f"Deleted `{file}`") + st.rerun() + except (ValueError, OSError) as e: + st.error(f"Error deleting file: {e}") + except FileProcessingError as e: + st.error(f"Unexpected error deleting file: {e}") + else: + st.info("No knowledge files found") + else: + st.info("Knowledge base directory not found") + except OSError as e: + st.error(f"Error managing files: {e}") + except FileProcessingError as e: + st.error(f"Unexpected error managing files: {e}") + + +def show_prompts_page(): + """Prompts management page.""" + st.markdown("### Prompt Customization") + st.markdown("Customize the AI prompts used in each step of the content generation pipeline.") + + prompt_types = { + "wisdom": "Wisdom Extraction", + "outline": "Content Outline", + "article": "Article Generation", + "social": "Social Media Posts", + } + + file_mapping = { + "wisdom": "wisdom_extraction.md", + "outline": "outline_creation.md", + "article": "article_generation.md", + "social": "social_media.md", + } + + prompt_tabs = st.tabs(list(prompt_types.values()) + ["Advanced"]) + + for i, (prompt_key, prompt_name) in enumerate(prompt_types.items()): + with prompt_tabs[i]: + st.markdown(f"#### {prompt_name}") + + prompt_file = f"prompts/default/{file_mapping[prompt_key]}" + current_prompt = "" + + try: + if os.path.exists(prompt_file): + with open(prompt_file) as f: + current_prompt = f.read() + else: + current_prompt = f"# {prompt_name} Prompt\n\nDefault prompt for {prompt_key} generation." + except OSError as e: + st.error(f"Error loading prompt: {e}") + except FileProcessingError as e: + st.error(f"Unexpected error loading prompt: {e}") + + new_prompt = st.text_area( + f"Edit {prompt_name} Prompt", + value=current_prompt, + height=400, + help=f"Customize the prompt used for {prompt_key} generation", + ) + + col1, col2, col3 = st.columns([1, 1, 2]) + with col1: + if st.button("Save", key=f"save_{prompt_key}"): + try: + os.makedirs("prompts/default", exist_ok=True) + with open(prompt_file, "w") as f: + f.write(new_prompt) + st.success(f"{prompt_name} prompt saved!") + except OSError as e: + st.error(f"Error saving prompt: {e}") + except FileProcessingError as e: + st.error(f"Unexpected error saving prompt: {e}") + + with col2: + if st.button("Reset", key=f"reset_{prompt_key}"): + st.info("Reset to default functionality coming soon!") + + with col3: + st.markdown(f"**File:** `{prompt_file}`") + + with prompt_tabs[-1]: + st.markdown("#### Advanced Prompt Settings") + col1, col2 = st.columns(2) + with col1: + st.markdown("**Global Settings**") + temperature = st.slider("Temperature (Creativity)", 0.0, 1.0, 0.7, 0.1) + max_tokens = st.number_input("Max Tokens", 100, 4000, 2000) + with col2: + st.markdown("**Prompt Templates**") + if st.button("Import Prompt Set"): + st.info("Import functionality coming soon!") + if st.button("Export Prompt Set"): + st.info("Export functionality coming soon!") + + st.session_state.temperature = temperature + st.session_state.max_tokens = max_tokens diff --git a/core/path_safety.py b/core/path_safety.py new file mode 100644 index 0000000..b034ba6 --- /dev/null +++ b/core/path_safety.py @@ -0,0 +1,33 @@ +"""Filesystem path validation utilities for WhisperForge. + +Ensures that user-supplied filenames cannot escape a designated root +directory, preventing path-traversal attacks. +""" + +import re +from pathlib import Path + +# Only allow alphanumeric, hyphens, underscores, and single dots (no ..) +_SAFE_FILENAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$") + + +def safe_path(root: str, untrusted_name: str) -> Path: + """Build a file path that is guaranteed to stay within *root*. + + Raises ValueError if the resulting path escapes the root directory + or if the filename contains disallowed characters. + """ + if not _SAFE_FILENAME_RE.match(untrusted_name): + raise ValueError( + f"Invalid filename: {untrusted_name!r}. " + "Only alphanumeric characters, hyphens, underscores, and dots are allowed." + ) + if ".." in untrusted_name: + raise ValueError("Path traversal sequences are not allowed.") + + root_resolved = Path(root).resolve() + target = (root_resolved / untrusted_name).resolve() + + if not target.is_relative_to(root_resolved): + raise ValueError(f"Path escapes allowed directory: {target} is not under {root_resolved}") + return target diff --git a/core/pipeline.py b/core/pipeline.py new file mode 100644 index 0000000..78988b1 --- /dev/null +++ b/core/pipeline.py @@ -0,0 +1,190 @@ +""" +Content Processing Pipeline +============================= + +Thin Streamlit UI wrapper around :mod:`pipeline_engine`. +Provides Aurora-themed progress visualisation while the pure-logic engine +does the actual work. +""" + +import logging +import time + +import streamlit as st + +from . import pipeline_engine +from .exceptions import DatabaseError +from .pipeline_engine import NullListener, PipelineConfig +from .prompt_loader import load_custom_prompts +from .supabase_integration import get_supabase_client + +logger = logging.getLogger(__name__) + +# Pipeline step definitions +PIPELINE_STEP_NAMES = pipeline_engine.PIPELINE_STEP_NAMES + + +# --------------------------------------------------------------------------- +# Streamlit listener -- maps engine callbacks to Aurora UI +# --------------------------------------------------------------------------- + + +class StreamlitPipelineListener(NullListener): + """Bridges :class:`pipeline_engine.PipelineListener` events to Streamlit widgets.""" + + def __init__(self, pipeline_placeholder, start_time, containers): + self.placeholder = pipeline_placeholder + self.start_time = start_time + self.containers = containers + + def on_step_progress(self, step_index, step_progress, total_progress, message): + _update_pipeline(self.placeholder, step_index, step_progress, total_progress, message, self.start_time) + + def on_step_complete(self, step_index, step_name, result): + container = self.containers.get(step_name) + if container and result and isinstance(result, str): + with container: + st.markdown(f"**{step_name} Complete**") + st.markdown(result) + + def on_error(self, step_index, error): + st.error(f"Pipeline failed: {error!s}") + + def on_pipeline_complete(self, results): + st.markdown( + '
' + '

Pipeline Complete!

' + '

Your content has been transformed with AI magic

' + "
", + unsafe_allow_html=True, + ) + time.sleep(2) + self.placeholder.empty() + + +# --------------------------------------------------------------------------- +# Public entry point -- signature unchanged for app_simple.py callers +# --------------------------------------------------------------------------- + + +def process_pipeline(audio_file=None, transcript: str | None = None) -> dict | None: + """Unified content pipeline. + + Supply *audio_file* to transcribe first, or *transcript* to skip transcription. + Returns the results dict on success, or ``None`` on failure. + """ + start_time = time.time() + + # Load any user-defined custom prompts + custom_prompts = load_custom_prompts() + if custom_prompts: + st.info(f"Using {len(custom_prompts)} custom prompts") + + # --- Streamlit UI scaffolding --- + pipeline_placeholder = st.empty() + st.markdown("### Live Content Generation") + + containers = { + "Transcription": st.expander("Transcription", expanded=False), + "Wisdom Extraction": st.expander("Wisdom Extraction", expanded=False), + "Outline Creation": st.expander("Outline Creation", expanded=False), + "Article Generation": st.expander("Article Generation", expanded=False), + "Social Content": st.expander("Social Content", expanded=False), + "Publishing": st.expander("Notion Publishing", expanded=False), + } + + # --- Build engine config from session state --- + config = PipelineConfig( + custom_prompts=custom_prompts or {}, + article_template=st.session_state.get("article_template"), + knowledge_base=st.session_state.get("knowledge_base", {}), + user_id=st.session_state.get("user_id"), + publish_to_notion=True, + ) + + # --- Obtain content store (Supabase) --- + try: + content_store = get_supabase_client() + except (DatabaseError, ImportError): + logger.warning("Supabase client unavailable -- content will not be persisted") + content_store = None + + # --- Create listener and run engine --- + listener = StreamlitPipelineListener(pipeline_placeholder, start_time, containers) + + return pipeline_engine.run_pipeline( + audio_file=audio_file, + transcript=transcript, + config=config, + content_store=content_store, + listener=listener, + ) + + +def _update_pipeline(placeholder, step, step_progress, total_progress, message, start_time): + """Update the pipeline visualization placeholder.""" + with placeholder.container(): + show_processing_pipeline( + current_step=step, + step_progress=step_progress, + total_progress=total_progress, + status_message=message, + processing_time=f"{time.time() - start_time:.1f}s", + ) + + +def show_processing_pipeline( + current_step=0, + step_progress=0, + total_progress=0, + status_message="", + processing_time="", +): + """Display Aurora-styled processing pipeline visualization.""" + steps = PIPELINE_STEP_NAMES + step_icons = ["\U0001f3a4", "\U0001f4a1", "\U0001f4cb", "\U0001f4dd", "\U0001f4f1", "\U0001f30c"] + + step_items = [] + for i, (name, icon) in enumerate(zip(steps, step_icons, strict=False)): + if i < current_step: + state = "completed" + elif i == current_step: + state = "active" + else: + state = "pending" + + step_items.append(f""" +
+
{icon}
+
{name}
+ {'
' if state == "active" else ""} +
+ """) + + pipeline_html = f""" +
+
+

Content Pipeline

+
+
+ {total_progress}% +
+
+
+ {"".join(step_items)} +
+ { + f''' +
+ + {status_message} + {processing_time} +
+ ''' + if status_message + else "" + } +
+ """ + + st.markdown(pipeline_html, unsafe_allow_html=True) diff --git a/core/pipeline_engine.py b/core/pipeline_engine.py new file mode 100644 index 0000000..f7823a3 --- /dev/null +++ b/core/pipeline_engine.py @@ -0,0 +1,308 @@ +""" +Pipeline Engine +================ + +Pure business-logic pipeline orchestration with ZERO Streamlit dependencies. +Extracts the content-generation workflow from pipeline.py so it can be driven +by any front-end (Streamlit, CLI, API server, tests). +""" + +from __future__ import annotations + +import logging +import os +import tempfile +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Protocol + +from .content_generation import ( + generate_article, + generate_outline, + generate_social_content, + generate_wisdom, + transcribe_audio, +) +from .exceptions import DatabaseError +from .notion_integration import create_notion_page, generate_ai_title +from .prompt_loader import get_prompt_for_step, load_custom_prompts, load_template + +logger = logging.getLogger(__name__) + +# Pipeline step definitions (mirrors pipeline.py) +PIPELINE_STEP_NAMES = [ + "Transcription", + "Wisdom Extraction", + "Outline Creation", + "Article Generation", + "Social Content", + "Publishing", +] + + +# --------------------------------------------------------------------------- +# Listener protocol -- callers implement this to receive progress updates +# --------------------------------------------------------------------------- + + +class PipelineListener(Protocol): + """Observer interface for pipeline progress events.""" + + def on_step_start(self, step_index: int, step_name: str, message: str) -> None: ... + + def on_step_progress(self, step_index: int, step_progress: int, total_progress: int, message: str) -> None: ... + + def on_step_complete(self, step_index: int, step_name: str, result: Any) -> None: ... + + def on_error(self, step_index: int, error: Exception) -> None: ... + + def on_pipeline_complete(self, results: dict) -> None: ... + + +class NullListener: + """No-op listener -- silently ignores every event.""" + + def on_step_start(self, step_index: int, step_name: str, message: str) -> None: + pass + + def on_step_progress(self, step_index: int, step_progress: int, total_progress: int, message: str) -> None: + pass + + def on_step_complete(self, step_index: int, step_name: str, result: Any) -> None: + pass + + def on_error(self, step_index: int, error: Exception) -> None: + pass + + def on_pipeline_complete(self, results: dict) -> None: + pass + + +# --------------------------------------------------------------------------- +# Pipeline configuration +# --------------------------------------------------------------------------- + + +@dataclass +class PipelineConfig: + """Configuration knobs for a single pipeline run.""" + + custom_prompts: dict[str, str] = field(default_factory=dict) + article_template: str | None = None + knowledge_base: dict[str, str] = field(default_factory=dict) + user_id: int | str | None = None + publish_to_notion: bool = True + + +# --------------------------------------------------------------------------- +# Content store protocol -- abstracts away the persistence layer +# --------------------------------------------------------------------------- + + +class ContentStore(Protocol): + """Persistence interface for generated content.""" + + def save_content(self, user_id: int | str, content_data: dict) -> str | None: ... + + +class NullContentStore: + """No-op content store -- discards everything.""" + + def save_content(self, user_id: int | str, content_data: dict) -> str | None: + return None + + +# --------------------------------------------------------------------------- +# Main orchestration +# --------------------------------------------------------------------------- + + +def run_pipeline( + *, + audio_file: Any = None, + transcript: str | None = None, + config: PipelineConfig | None = None, + content_store: ContentStore | None = None, + listener: PipelineListener | None = None, +) -> dict | None: + """Execute the full content-generation pipeline. + + Supply *audio_file* (a file-like object with ``.name`` and ``.getvalue()``) + to transcribe first, or *transcript* to skip transcription. + + Returns the results dict on success, or ``None`` on failure. + """ + if config is None: + config = PipelineConfig() + if content_store is None: + content_store = NullContentStore() + if listener is None: + listener = NullListener() + + # Load custom prompts from disk when none were injected + custom_prompts = config.custom_prompts + if not custom_prompts: + custom_prompts = load_custom_prompts() + if custom_prompts: + logger.info("Loaded %d custom prompts from disk", len(custom_prompts)) + + results: dict = {} + + try: + # ------------------------------------------------------------------ + # Step 0 -- Transcription + # ------------------------------------------------------------------ + if audio_file is not None: + listener.on_step_start(0, "Transcription", "Starting transcription...") + listener.on_step_progress(0, 0, 0, "Starting transcription...") + + suffix = os.path.splitext(audio_file.name)[1] + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: + tmp.write(audio_file.getvalue()) + tmp_path = tmp.name + + try: + listener.on_step_progress(0, 50, 8, "Transcribing audio with Whisper AI...") + transcript = transcribe_audio(tmp_path) + if not transcript: + error = RuntimeError("Transcription failed: empty result") + listener.on_error(0, error) + logger.error("Transcription returned empty result") + return None + finally: + if os.path.exists(tmp_path): + os.unlink(tmp_path) + + results["transcript"] = transcript + listener.on_step_progress(0, 100, 17, "Transcription complete!") + listener.on_step_complete(0, "Transcription", transcript) + else: + if not transcript: + error = ValueError("No audio file or transcript provided.") + listener.on_error(0, error) + logger.error("No audio file or transcript provided") + return None + results["transcript"] = transcript + listener.on_step_progress(1, 0, 17, f"Using pre-transcribed content ({len(transcript)} characters)") + + # ------------------------------------------------------------------ + # Step 1 -- Wisdom Extraction + # ------------------------------------------------------------------ + listener.on_step_start(1, "Wisdom Extraction", "Extracting wisdom and insights...") + listener.on_step_progress(1, 0, 17, "Extracting wisdom and insights...") + + wisdom_prompt = get_prompt_for_step("wisdom", custom_prompts) + listener.on_step_progress(1, 50, 25, "Analyzing content for key insights...") + + wisdom = generate_wisdom(transcript, custom_prompt=wisdom_prompt, knowledge_base=config.knowledge_base) + results["wisdom"] = wisdom + + listener.on_step_progress(1, 100, 33, "Wisdom extraction complete!") + listener.on_step_complete(1, "Wisdom Extraction", wisdom) + + # ------------------------------------------------------------------ + # Step 2 -- Outline Creation + # ------------------------------------------------------------------ + listener.on_step_start(2, "Outline Creation", "Creating structured outline...") + listener.on_step_progress(2, 0, 33, "Creating structured outline...") + + outline_prompt = get_prompt_for_step("outline", custom_prompts) + listener.on_step_progress(2, 50, 42, "Structuring content hierarchy...") + + outline = generate_outline( + transcript, wisdom, custom_prompt=outline_prompt, knowledge_base=config.knowledge_base + ) + results["outline"] = outline + + listener.on_step_progress(2, 100, 50, "Outline creation complete!") + listener.on_step_complete(2, "Outline Creation", outline) + + # ------------------------------------------------------------------ + # Step 3 -- Article Generation + # ------------------------------------------------------------------ + listener.on_step_start(3, "Article Generation", "Generating comprehensive article...") + listener.on_step_progress(3, 0, 50, "Generating comprehensive article...") + + article_prompt = get_prompt_for_step("article", custom_prompts) + if config.article_template: + template_text = load_template(config.article_template) + if template_text and article_prompt: + article_prompt = template_text + "\n" + article_prompt + + listener.on_step_progress(3, 50, 58, "Writing detailed article content...") + + article = generate_article( + transcript, + wisdom, + outline, + custom_prompt=article_prompt, + knowledge_base=config.knowledge_base, + ) + results["article"] = article + + listener.on_step_progress(3, 100, 67, "Article generation complete!") + listener.on_step_complete(3, "Article Generation", article) + + # ------------------------------------------------------------------ + # Step 4 -- Social Content + # ------------------------------------------------------------------ + listener.on_step_start(4, "Social Content", "Creating social media content...") + listener.on_step_progress(4, 0, 67, "Creating social media content...") + + social_prompt = get_prompt_for_step("social", custom_prompts) + listener.on_step_progress(4, 50, 75, "Generating social media posts...") + + social = generate_social_content( + wisdom, outline, article, custom_prompt=social_prompt, knowledge_base=config.knowledge_base + ) + results["social_content"] = social + + listener.on_step_progress(4, 100, 83, "Social content creation complete!") + listener.on_step_complete(4, "Social Content", social) + + # ------------------------------------------------------------------ + # Step 5 -- Publishing & Persistence + # ------------------------------------------------------------------ + listener.on_step_start(5, "Publishing", "Publishing to Notion workspace...") + listener.on_step_progress(5, 0, 83, "Publishing to Notion workspace...") + + if config.publish_to_notion and os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): + ai_title = generate_ai_title(transcript) + listener.on_step_progress(5, 50, 90, "Uploading content to Notion...") + + notion_url = create_notion_page(ai_title, results) + if notion_url: + results["notion_url"] = notion_url + else: + logger.warning("Notion page creation returned None") + else: + logger.info("Notion publishing skipped (disabled or not configured)") + + # --- Save to DB --- + listener.on_step_progress(5, 90, 96, "Saving to database...") + try: + if config.user_id is not None: + content_data = { + "title": results.get("title", "Untitled"), + "transcript": results.get("transcript", ""), + "wisdom": results.get("wisdom", ""), + "outline": results.get("outline", ""), + "article": results.get("article", ""), + "social_content": results.get("social_content", ""), + "notion_url": results.get("notion_url", ""), + "created_at": datetime.now().isoformat(), + } + content_store.save_content(config.user_id, content_data) + except DatabaseError as exc: + logger.warning("Content saved locally but database save failed: %s", exc) + + listener.on_step_progress(5, 100, 100, "Pipeline complete! All content generated successfully.") + listener.on_step_complete(5, "Publishing", results.get("notion_url")) + listener.on_pipeline_complete(results) + + return results + + except Exception as exc: + listener.on_error(0, exc) + logger.exception("Pipeline failed: %s", exc) + return None diff --git a/core/prompt_loader.py b/core/prompt_loader.py new file mode 100644 index 0000000..50e47af --- /dev/null +++ b/core/prompt_loader.py @@ -0,0 +1,59 @@ +""" +Prompt Loading Utilities +======================== + +Load and manage AI prompts from the filesystem. +""" + +import logging +import os + +logger = logging.getLogger(__name__) + + +def load_custom_prompts() -> dict[str, str]: + """Load custom prompts from the prompts/default directory.""" + prompts = {} + prompt_dir = "prompts/default" + + if os.path.exists(prompt_dir): + for filename in os.listdir(prompt_dir): + if filename.endswith(".md"): + prompt_name = filename.replace(".md", "") + try: + with open(os.path.join(prompt_dir, filename), encoding="utf-8") as f: + prompts[prompt_name] = f.read() + except OSError as e: + logger.warning(f"Failed to load prompt {filename}: {e}") + + return prompts + + +def load_template(template_name: str) -> str | None: + """Load an article template by name from the templates folder.""" + template_path = os.path.join("templates", f"{template_name}.md") + if os.path.exists(template_path): + with open(template_path, encoding="utf-8") as f: + return f.read() + return None + + +# Maps pipeline step names to prompt file basenames +_STEP_PROMPT_MAP = { + "wisdom": "wisdom_extraction", + "outline": "outline_creation", + "social": "social_media", + "article": "article_generation", +} + + +def get_prompt_for_step(step_name: str, custom_prompts: dict[str, str] | None = None) -> str | None: + """Get the appropriate prompt for a pipeline step.""" + if not custom_prompts: + custom_prompts = load_custom_prompts() + + prompt_key = _STEP_PROMPT_MAP.get(step_name) + if prompt_key and prompt_key in custom_prompts: + return custom_prompts[prompt_key] + + return None diff --git a/core/prompts.py b/core/prompts.py new file mode 100644 index 0000000..eeb73b4 --- /dev/null +++ b/core/prompts.py @@ -0,0 +1,88 @@ +"""Prompt management for WhisperForge content generation. + +Handles loading, formatting, and enhancing prompts from markdown files +with support for per-user overrides and automatic knowledge-base +concatenation. +""" + +import logging + +from .exceptions import FileProcessingError +from .path_safety import safe_path + +logger = logging.getLogger(__name__) + +# Default prompts for content generation (DEPRECATED - use load_prompt_from_file) +DEFAULT_PROMPTS = { + "wisdom_extraction": """Extract key insights, lessons, and wisdom from the transcript. Focus on actionable takeaways and profound realizations.""", + "summary": """## Summary +Create a concise summary of the main points and key messages in the transcript. +Capture the essence of the content in a few paragraphs.""", + "outline_creation": """Create a detailed outline for an article or blog post based on the transcript and extracted wisdom. Include major sections and subsections.""", + "social_media": """Generate engaging social media posts for different platforms (Twitter, LinkedIn, Instagram) based on the key insights.""", + "image_prompts": """Create detailed image generation prompts that visualize the key concepts and metaphors from the content.""", + "article_writing": """Write a comprehensive article based on the provided outline and wisdom. Maintain a clear narrative flow and engaging style.""", + "seo_analysis": """Analyze the content from an SEO perspective and provide optimization recommendations for better search visibility while maintaining content quality.""", + "editor_persona": """You are a professional content editor. Provide constructive feedback to improve the content quality.""", +} + + +def load_prompt_from_file(prompt_type: str, user_id: str = None) -> str: + """Load prompt from markdown file with user override support""" + try: + # Check for user-specific prompt first (for paid tiers) + if user_id: + user_dir = str(safe_path("prompts/users", user_id)) + user_prompt_path = safe_path(user_dir, f"{prompt_type}.md") + if user_prompt_path.exists(): + return user_prompt_path.read_text(encoding="utf-8").strip() + + # Load default prompt + default_prompt_path = safe_path("prompts/default", f"{prompt_type}.md") + if default_prompt_path.exists(): + return default_prompt_path.read_text(encoding="utf-8").strip() + + # Fallback to hardcoded prompts + fallback = DEFAULT_PROMPTS.get(prompt_type, "") + if fallback: + logger.warning(f"Using fallback prompt for {prompt_type} - consider creating markdown file") + return fallback + + logger.error(f"No prompt found for type: {prompt_type}") + return f"Please provide content for {prompt_type.replace('_', ' ')}." + + except (FileProcessingError, OSError) as e: + logger.error(f"Error loading prompt {prompt_type}: {e}") + return DEFAULT_PROMPTS.get(prompt_type, f"Error loading {prompt_type} prompt.") + + +def format_knowledge_base_context(knowledge_base: dict[str, str]) -> str: + """Format knowledge base content for auto-concatenation to prompts""" + if not knowledge_base: + return "" + + context_parts = ["## Knowledge Base Context\n"] + context_parts.append( + "Use the following knowledge base to inform your analysis and maintain consistency with established perspectives:\n" + ) + + for name, content in knowledge_base.items(): + context_parts.append(f"### {name}") + context_parts.append(content) + context_parts.append("") # Empty line for separation + + context_parts.append("---\n") + context_parts.append("## Your Task\n") + + return "\n".join(context_parts) + + +def get_enhanced_prompt(prompt_type: str, knowledge_base: dict[str, str] = None, user_id: str = None) -> str: + """Get prompt with automatic knowledge base concatenation""" + base_prompt = load_prompt_from_file(prompt_type, user_id) + + if knowledge_base: + kb_context = format_knowledge_base_context(knowledge_base) + return f"{kb_context}{base_prompt}" + + return base_prompt diff --git a/core/security.py b/core/security.py new file mode 100644 index 0000000..0ad5dff --- /dev/null +++ b/core/security.py @@ -0,0 +1,51 @@ +"""Password hashing and verification utilities for WhisperForge. + +Provides bcrypt-based password hashing for current use and a deprecated +SHA-256 helper retained solely for migrating legacy credential stores. +""" + +import hashlib +import logging +import warnings + +import bcrypt + +logger = logging.getLogger(__name__) + + +def hash_password(password: str) -> str: + """Hash a password using bcrypt with salt""" + # Generate salt and hash password + salt = bcrypt.gensalt() + hashed = bcrypt.hashpw(password.encode("utf-8"), salt) + return hashed.decode("utf-8") + + +def verify_password(password: str, hashed: str) -> bool: + """Verify a password against its hash. + + Raises on unexpected errors (e.g. DB corruption) instead of + silently returning False which would be indistinguishable from + a wrong password. + """ + try: + return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8")) + except ValueError as e: + # Malformed hash string (wrong prefix, bad encoding, etc.) + logger.error("Password verification failed โ€“ malformed hash: %s", e) + return False + + +def legacy_hash_password(password: str) -> str: + """Legacy SHA-256 hash - DEPRECATED, use for migration only. + + .. deprecated:: + Use :func:`hash_password` (bcrypt) for all new credential storage. + """ + warnings.warn( + "legacy_hash_password() is deprecated and will be removed in a future release. " + "Use hash_password() (bcrypt) instead.", + DeprecationWarning, + stacklevel=2, + ) + return hashlib.sha256(password.encode()).hexdigest() diff --git a/core/services.py b/core/services.py new file mode 100644 index 0000000..00fbba4 --- /dev/null +++ b/core/services.py @@ -0,0 +1,97 @@ +""" +Service Locator / DI Container +================================ + +Lightweight dependency-injection container for WhisperForge. +Each field defaults to ``None`` and falls back to the existing +module-level singleton on first access, so existing code keeps +working unchanged while tests and alternative front-ends can +inject their own implementations. + +Usage:: + + from core.services import get_services + + svc = get_services() + cfg = svc.get_config() # lazily loads the global Config + db = svc.get_db() # lazily loads the global SupabaseClient +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .auth_wrapper import AuthWrapper + from .config import Config + from .session_manager import SessionManager + from .supabase_integration import SupabaseClient + +logger = logging.getLogger(__name__) + + +@dataclass +class Services: + """Central service registry with lazy fallbacks to existing singletons.""" + + config: Config | None = None + db: SupabaseClient | None = None + auth: AuthWrapper | None = None + session: SessionManager | None = None + + # -- lazy getters ------------------------------------------------------- + + def get_config(self) -> Config: + """Return the injected Config or fall back to the global singleton.""" + if self.config is not None: + return self.config + from .config import get_config + + return get_config() + + def get_db(self) -> SupabaseClient: + """Return the injected DB client or fall back to the global singleton.""" + if self.db is not None: + return self.db + from .supabase_integration import get_supabase_client + + return get_supabase_client() + + def get_auth(self) -> AuthWrapper: + """Return the injected AuthWrapper or fall back to the global singleton.""" + if self.auth is not None: + return self.auth + from .auth_wrapper import get_auth + + return get_auth() + + def get_session(self) -> SessionManager: + """Return the injected SessionManager or fall back to the global singleton.""" + if self.session is not None: + return self.session + from .session_manager import get_session_manager + + return get_session_manager() + + +# --------------------------------------------------------------------------- +# Module-level singleton +# --------------------------------------------------------------------------- + +_services: Services | None = None + + +def get_services() -> Services: + """Return the current global ``Services`` instance (created on first call).""" + global _services + if _services is None: + _services = Services() + return _services + + +def set_services(services: Services) -> None: + """Replace the global ``Services`` instance (useful in tests).""" + global _services + _services = services diff --git a/core/session_manager.py b/core/session_manager.py index 7a5607e..74963eb 100644 --- a/core/session_manager.py +++ b/core/session_manager.py @@ -1,15 +1,19 @@ import json import logging -from datetime import datetime, timedelta +from datetime import UTC, datetime, timedelta from pathlib import Path + import streamlit as st +from .constants import DEFAULT_PAGE, SESSION_EXPIRY_DAYS + logger = logging.getLogger(__name__) + class SessionManager: """Simple file-based session persistence""" - def __init__(self, app_name: str = "whisperforge", expiry_days: int = 7): + def __init__(self, app_name: str = "whisperforge", expiry_days: int = SESSION_EXPIRY_DAYS): self.session_dir = Path.home() / f".{app_name}_sessions" self.session_file = self.session_dir / "session.json" self.expiry_days = expiry_days @@ -18,7 +22,7 @@ def __init__(self, app_name: str = "whisperforge", expiry_days: int = 7): "user_id": None, "user_email": None, "preferences": {}, - "current_page": "Transform", + "current_page": DEFAULT_PAGE, "pipeline_active": False, "created_at": None, "last_activity": None, @@ -28,34 +32,40 @@ def __init__(self, app_name: str = "whisperforge", expiry_days: int = 7): def _load(self): if self.session_file.exists(): try: - with open(self.session_file, "r", encoding="utf-8") as f: + with open(self.session_file, encoding="utf-8") as f: loaded = json.load(f) if loaded.get("created_at"): created = datetime.fromisoformat(loaded["created_at"]) - if datetime.utcnow() - created > timedelta(days=self.expiry_days): + if datetime.now(UTC) - created > timedelta(days=self.expiry_days): self.session_file.unlink() return self.data.update(loaded) st.session_state.update(self.data) - except Exception as e: + except (json.JSONDecodeError, OSError, KeyError) as e: logger.error(f"Session load failed: {e}") + except Exception as e: + logger.warning(f"Unexpected error type ({type(e).__name__}) in session load: {e}") def _save(self): try: self.session_dir.mkdir(parents=True, exist_ok=True) with open(self.session_file, "w", encoding="utf-8") as f: json.dump(self.data, f) - except Exception as e: + except OSError as e: logger.error(f"Session save failed: {e}") + except Exception as e: + logger.warning(f"Unexpected error type ({type(e).__name__}) in session save: {e}") def authenticate_user(self, user_id: str, email: str) -> bool: - self.data.update({ - "authenticated": True, - "user_id": str(user_id), - "user_email": email, - "created_at": datetime.utcnow().isoformat(), - "last_activity": datetime.utcnow().isoformat(), - }) + self.data.update( + { + "authenticated": True, + "user_id": str(user_id), + "user_email": email, + "created_at": datetime.now(UTC).isoformat(), + "last_activity": datetime.now(UTC).isoformat(), + } + ) st.session_state.update(self.data) self._save() return True @@ -91,7 +101,7 @@ def set_current_page(self, page: str): self._save() def get_current_page(self) -> str: - return self.data.get("current_page", "Transform") + return self.data.get("current_page", DEFAULT_PAGE) def set_pipeline_active(self, active: bool): self.data["pipeline_active"] = active @@ -109,6 +119,7 @@ def get_session_info(self): "last_activity": self.data.get("last_activity"), } + def get_session_manager() -> SessionManager: if "_session_manager" not in st.session_state: st.session_state._session_manager = SessionManager() diff --git a/core/streaming_pipeline.py b/core/streaming_pipeline.py index 7b3f526..878aad8 100644 --- a/core/streaming_pipeline.py +++ b/core/streaming_pipeline.py @@ -3,32 +3,42 @@ Enables real-time progress updates and content streaming during processing """ -import streamlit as st import time -from typing import Dict, Optional, Any -from datetime import datetime +from typing import Any + +import openai +import streamlit as st +from postgrest.exceptions import APIError + +from .constants import LARGE_FILE_THRESHOLD_MB from .content_generation import ( - transcribe_audio, generate_wisdom, generate_outline, generate_article, - generate_social_content, generate_image_prompts, editor_critique + ContentGenerationError, + generate_article, + generate_outline, + generate_social_content, + generate_wisdom, + transcribe_audio, ) -from .research_enrichment import generate_research_enrichment -from .visible_thinking import thinking_step_start, thinking_step_complete, thinking_error, render_thinking_stream -# Removed old complex progress tracker - using simple progress bars now +from .exceptions import DatabaseError, PipelineError +from .visible_thinking import thinking_error, thinking_step_complete, thinking_step_start class StreamingPipelineController: """Controls step-by-step pipeline execution with real-time UI updates""" - - # Define pipeline steps as class constant + PIPELINE_STEPS = [ - "upload_validation", "transcription", "wisdom_extraction", - "research_enrichment", "outline_creation", "article_creation", - "social_content", "image_prompts", "database_storage" + "upload_validation", + "transcription", + "wisdom_extraction", + "outline_creation", + "article_creation", + "social_content", + "database_storage", ] - + def __init__(self): self.reset_pipeline() - + def reset_pipeline(self): """Reset pipeline state for new processing""" st.session_state.pipeline_active = False @@ -36,78 +46,66 @@ def reset_pipeline(self): st.session_state.pipeline_results = {} st.session_state.pipeline_errors = {} st.session_state.pipeline_audio_file = None - + def start_pipeline(self, audio_file): """Initialize pipeline for processing with large file support""" self.reset_pipeline() st.session_state.pipeline_active = True st.session_state.pipeline_audio_file = audio_file - - # Store file info for later use + file_size_mb = len(audio_file.getvalue()) / (1024 * 1024) st.session_state.pipeline_file_info = { "name": audio_file.name, "size": len(audio_file.getvalue()), "size_mb": file_size_mb, - "is_large_file": file_size_mb > 20 # Flag for large file processing + "is_large_file": file_size_mb > LARGE_FILE_THRESHOLD_MB, } - - # Initialize required session state if missing - if not hasattr(st.session_state, 'prompts'): + + if not hasattr(st.session_state, "prompts"): st.session_state.prompts = {} - if not hasattr(st.session_state, 'knowledge_base'): + if not hasattr(st.session_state, "knowledge_base"): st.session_state.knowledge_base = {} - if not hasattr(st.session_state, 'ai_provider'): - st.session_state.ai_provider = "OpenAI" - if not hasattr(st.session_state, 'ai_model'): - st.session_state.ai_model = "gpt-4o" - + def process_next_step(self): """Process the next step in the pipeline""" if not st.session_state.pipeline_active: return False - + step_index = st.session_state.pipeline_step_index - + if step_index >= len(self.PIPELINE_STEPS): - # Pipeline complete st.session_state.pipeline_active = False return False - + step_id = self.PIPELINE_STEPS[step_index] - + try: - # Show immediate status update with st.status(f"Processing {step_id.replace('_', ' ').title()}...", expanded=True): st.write(f"Step {step_index + 1} of {len(self.PIPELINE_STEPS)}: {step_id.replace('_', ' ')}") - - # Process the step result = self._execute_step(step_id, step_index) - - # Store result st.session_state.pipeline_results[step_id] = result - - st.write("โœ… Complete!") - - # Move to next step + st.write("Complete!") + st.session_state.pipeline_step_index += 1 - return True - - except Exception as e: - # Handle step error + + except (ContentGenerationError, openai.APIError, ValueError) as e: error_msg = str(e) st.session_state.pipeline_errors[step_id] = error_msg st.session_state.pipeline_active = False - st.error(f"โŒ Error in {step_id}: {error_msg}") + st.error(f"Error in {step_id}: {error_msg}") return False - + except PipelineError as e: + error_msg = str(e) + st.session_state.pipeline_errors[step_id] = error_msg + st.session_state.pipeline_active = False + st.error(f"Unexpected error in {step_id}: {error_msg}") + return False + def _execute_step(self, step_id: str, step_index: int) -> Any: """Execute a specific pipeline step""" - - # Add visible thinking at step start thinking_step_start(step_id) - + try: if step_id == "upload_validation": result = self._step_upload_validation() @@ -115,426 +113,222 @@ def _execute_step(self, step_id: str, step_index: int) -> Any: result = self._step_transcription() elif step_id == "wisdom_extraction": result = self._step_wisdom_extraction() - elif step_id == "research_enrichment": - result = self._step_research_enrichment() elif step_id == "outline_creation": result = self._step_outline_creation() elif step_id == "article_creation": result = self._step_article_creation() elif step_id == "social_content": result = self._step_social_content() - elif step_id == "image_prompts": - result = self._step_image_prompts() elif step_id == "database_storage": result = self._step_database_storage() else: - raise Exception(f"Unknown step: {step_id}") - - # Add success thinking + raise ValueError(f"Unknown step: {step_id}") + thinking_step_complete(step_id) return result - - except Exception as e: - # Add error thinking + + except (ContentGenerationError, openai.APIError, ValueError) as e: thinking_error(step_id, str(e)) raise - - def _step_upload_validation(self) -> Dict[str, Any]: + except PipelineError as e: + thinking_error(step_id, f"Unexpected: {e!s}") + raise + + def _step_upload_validation(self) -> dict[str, Any]: """Step 1: Validate uploaded file""" file_info = st.session_state.pipeline_file_info - - if file_info["size_mb"] > 25: - raise Exception(f"File too large: {file_info['size_mb']:.1f}MB (max 25MB)") - - # Simulate validation time + + from .config import get_config + + max_file_size_mb = get_config().audio_chunk_size_mb + if file_info["size_mb"] > max_file_size_mb: + raise ValueError(f"File too large: {file_info['size_mb']:.1f}MB (max {max_file_size_mb}MB)") + time.sleep(0.5) - - return { - "status": "validated", - "file_name": file_info["name"], - "file_size_mb": file_info["size_mb"] - } - + + return {"status": "validated", "file_name": file_info["name"], "file_size_mb": file_info["size_mb"]} + def _step_transcription(self) -> str: """Step 2: Transcribe audio with large file support""" audio_file = st.session_state.pipeline_audio_file file_info = st.session_state.pipeline_file_info - - # Check if this is a large file requiring chunked processing + if file_info.get("is_large_file", False): return self._transcribe_large_file(audio_file) else: return self._transcribe_small_file(audio_file) - + def _transcribe_small_file(self, audio_file) -> str: """Transcribe small files directly""" - from .content_generation import transcribe_audio - - st.write("๐ŸŽต **Processing small file directly...**") - + st.write("Processing small file directly...") + transcript = transcribe_audio(audio_file) if not transcript: - raise Exception("Failed to transcribe audio - transcript is empty") - - if "Error" in transcript: - raise Exception(f"Transcription failed: {transcript}") - - # Store in session for access by later steps + raise ValueError("Failed to transcribe audio - transcript is empty") + st.session_state.pipeline_transcript = transcript return transcript - + def _transcribe_large_file(self, audio_file) -> str: - """๐Ÿš€ Transcribe large files using chunked processing""" - from .file_upload import LargeFileUploadManager - - st.write("๐Ÿš€ **Processing large file with chunked transcription...**") - - # Create upload manager for large file processing - upload_manager = LargeFileUploadManager() - - # Process the large file with chunking - result = upload_manager.process_large_file(audio_file) - + """Transcribe large files using chunked processing""" + from .file_upload import EnhancedLargeFileProcessor + + st.write("Processing large file with chunked transcription...") + + processor = EnhancedLargeFileProcessor() + result = processor.process_large_file(audio_file) + if not result["success"]: - raise Exception(f"Large file transcription failed: {result['error']}") - + raise ValueError(f"Large file transcription failed: {result['error']}") + transcript = result["transcript"] - if not transcript: - raise Exception("Large file transcription produced empty result") - - # Show processing summary + raise ValueError("Large file transcription produced empty result") + st.success(f""" - โœ… **Large File Transcription Complete!** - - **Chunks processed:** {result.get('chunks', 'N/A')} - - **Processing time:** {result.get('processing_time', 'N/A')} - - **Transcript length:** {len(transcript)} characters + Large File Transcription Complete! + - Chunks processed: {result.get("chunks", "N/A")} + - Processing time: {result.get("processing_time", "N/A")} + - Transcript length: {len(transcript)} characters """) - - # Store in session for access by later steps + st.session_state.pipeline_transcript = transcript return transcript - + def _step_wisdom_extraction(self) -> str: """Step 3: Extract wisdom""" transcript = st.session_state.pipeline_transcript - - # Get custom prompt if available - custom_prompt = st.session_state.prompts.get("wisdom_extraction") if hasattr(st.session_state, 'prompts') else None - + + custom_prompt = ( + st.session_state.prompts.get("wisdom_extraction") if hasattr(st.session_state, "prompts") else None + ) + wisdom = generate_wisdom( - transcript, - st.session_state.ai_provider, - st.session_state.ai_model, - custom_prompt, - st.session_state.knowledge_base + transcript, custom_prompt=custom_prompt, knowledge_base=st.session_state.knowledge_base ) - - # Handle editor mode - if st.session_state.get("editor_enabled", False): - critique = editor_critique( - wisdom, "wisdom_extraction", - st.session_state.ai_provider, - st.session_state.ai_model, - st.session_state.knowledge_base - ) - - # Store critique for display - st.session_state.pipeline_results["wisdom_critique"] = critique - - # Generate revision based on critique - revision_prompt = f"""Based on this editorial feedback, please revise the wisdom extraction: - -EDITORIAL FEEDBACK: -{critique} - -ORIGINAL WISDOM: -{wisdom} - -Please provide an improved version that addresses the feedback.""" - - wisdom = generate_wisdom( - transcript, - st.session_state.ai_provider, - st.session_state.ai_model, - revision_prompt, - st.session_state.knowledge_base - ) - - # Store in session for later steps AND results for display + st.session_state.pipeline_wisdom = wisdom return wisdom - - def _step_research_enrichment(self) -> Dict[str, Any]: - """Step 3.5: Research Enrichment - NEW STEP""" - wisdom = st.session_state.pipeline_wisdom - transcript = st.session_state.pipeline_transcript - - # Check if research enrichment is enabled (default True for paid users) - research_enabled = st.session_state.get("research_enabled", True) - - # Generate research enrichment - research_data = generate_research_enrichment( - wisdom=wisdom, - transcript=transcript, - ai_provider=st.session_state.ai_provider, - ai_model=st.session_state.ai_model, - enabled=research_enabled - ) - - # Store in session for access by later steps - st.session_state.pipeline_research = research_data - return research_data - + def _step_outline_creation(self) -> str: """Step 4: Create outline""" transcript = st.session_state.pipeline_transcript wisdom = st.session_state.pipeline_wisdom - research = st.session_state.pipeline_results.get("research_enrichment", {}) - - # Get custom prompt if available - custom_prompt = st.session_state.prompts.get("outline_creation") if hasattr(st.session_state, 'prompts') else None - + + custom_prompt = ( + st.session_state.prompts.get("outline_creation") if hasattr(st.session_state, "prompts") else None + ) + outline = generate_outline( - transcript, - wisdom, - research, - st.session_state.ai_provider, - st.session_state.ai_model, - custom_prompt, - st.session_state.knowledge_base + transcript, wisdom, custom_prompt=custom_prompt, knowledge_base=st.session_state.knowledge_base ) - - # Handle editor mode - if st.session_state.get("editor_enabled", False): - critique = editor_critique( - outline, "outline_creation", - st.session_state.ai_provider, - st.session_state.ai_model, - st.session_state.knowledge_base - ) - - st.session_state.pipeline_results["outline_critique"] = critique - - revision_prompt = f"""Based on this editorial feedback, please revise the outline: - -EDITORIAL FEEDBACK: -{critique} - -ORIGINAL OUTLINE: -{outline} - -Please provide an improved version that addresses the feedback.""" - - outline = generate_outline( - transcript, - wisdom, - research, - st.session_state.ai_provider, - st.session_state.ai_model, - revision_prompt, - st.session_state.knowledge_base - ) - - # Store in session for later steps AND results for display + st.session_state.pipeline_outline = outline return outline - + def _step_article_creation(self) -> str: """Step 5: Create article""" transcript = st.session_state.pipeline_transcript wisdom = st.session_state.pipeline_wisdom outline = st.session_state.pipeline_outline - - # Get custom prompt if available - custom_prompt = st.session_state.prompts.get("article_creation") if hasattr(st.session_state, 'prompts') else None - + + custom_prompt = ( + st.session_state.prompts.get("article_creation") if hasattr(st.session_state, "prompts") else None + ) + article = generate_article( - transcript, - wisdom, - outline, - st.session_state.ai_provider, - st.session_state.ai_model, - custom_prompt, - st.session_state.knowledge_base + transcript, wisdom, outline, custom_prompt=custom_prompt, knowledge_base=st.session_state.knowledge_base ) - - # Handle editor mode - if st.session_state.get("editor_enabled", False): - critique = editor_critique( - article, "article_writing", - st.session_state.ai_provider, - st.session_state.ai_model, - st.session_state.knowledge_base - ) - - st.session_state.pipeline_results["article_critique"] = critique - - revision_prompt = f"""Based on this editorial feedback, please revise the article: - -EDITORIAL FEEDBACK: -{critique} - -ORIGINAL ARTICLE: -{article} - -Please provide an improved version that addresses the feedback.""" - - article = generate_article( - transcript, - wisdom, - outline, - st.session_state.ai_provider, - st.session_state.ai_model, - revision_prompt, - st.session_state.knowledge_base - ) - + st.session_state.pipeline_article = article return article - + def _step_social_content(self) -> str: """Step 6: Generate social media content""" wisdom = st.session_state.pipeline_wisdom outline = st.session_state.pipeline_outline article = st.session_state.pipeline_article - - # Get custom prompt if available - custom_prompt = st.session_state.prompts.get("social_media") if hasattr(st.session_state, 'prompts') else None - + + custom_prompt = st.session_state.prompts.get("social_media") if hasattr(st.session_state, "prompts") else None + social = generate_social_content( - wisdom, - outline, - article, - st.session_state.ai_provider, - st.session_state.ai_model, - custom_prompt, - st.session_state.knowledge_base + wisdom, outline, article, custom_prompt=custom_prompt, knowledge_base=st.session_state.knowledge_base ) - - # Handle editor mode - if st.session_state.get("editor_enabled", False): - critique = editor_critique( - social, "social_media", - st.session_state.ai_provider, - st.session_state.ai_model, - st.session_state.knowledge_base - ) - - st.session_state.pipeline_results["social_critique"] = critique - - revision_prompt = f"""Based on this editorial feedback, please revise the social media content: - -EDITORIAL FEEDBACK: -{critique} - -ORIGINAL SOCIAL CONTENT: -{social} - -Please provide improved versions that address the feedback.""" - - social = generate_social_content( - wisdom, - outline, - article, - st.session_state.ai_provider, - st.session_state.ai_model, - revision_prompt, - st.session_state.knowledge_base - ) - + st.session_state.pipeline_social = social return social - - def _step_image_prompts(self) -> str: - """Step 7: Generate image prompts""" - wisdom = st.session_state.pipeline_wisdom - outline = st.session_state.pipeline_outline - - # Get custom prompt if available - custom_prompt = st.session_state.prompts.get("image_prompts") if hasattr(st.session_state, 'prompts') else None - - images = generate_image_prompts( - wisdom, - outline, - st.session_state.ai_provider, - st.session_state.ai_model, - custom_prompt, - st.session_state.knowledge_base - ) - - st.session_state.pipeline_images = images - return images - + def _step_database_storage(self) -> str: - """Step 8: Store content in database""" + """Step 7: Store content in database""" try: - # Direct Supabase access to avoid circular imports from .supabase_integration import get_supabase_client - + db = get_supabase_client() if not db: return "Database connection failed" - - # Get results with correct step names + results = st.session_state.pipeline_results - - # Direct database insert with CORRECT field names - result = db.client.table("content").insert({ - "user_id": st.session_state.user_id, - "title": f"Content from {st.session_state.pipeline_file_info['name']}", - "transcript": results.get("transcription", ""), - "wisdom": results.get("wisdom_extraction", ""), - "outline": results.get("outline_creation", ""), - "article": results.get("article_creation", ""), - "social_content": results.get("social_content", ""), - "created_at": "now()" - }).execute() - + + result = ( + db.client.table("content") + .insert( + { + "user_id": st.session_state.user_id, + "title": f"Content from {st.session_state.pipeline_file_info['name']}", + "transcript": results.get("transcription", ""), + "wisdom": results.get("wisdom_extraction", ""), + "outline": results.get("outline_creation", ""), + "article": results.get("article_creation", ""), + "social_content": results.get("social_content", ""), + "created_at": "now()", + } + ) + .execute() + ) + content_id = result.data[0]["id"] if result.data else "" if not content_id: return "Failed to save content to database" - - time.sleep(0.3) # Simulate save time + + time.sleep(0.3) return f"Content saved with ID: {content_id}" - - except Exception as e: - # Don't fail the pipeline for database errors + + except APIError as e: return f"Database save failed: {str(e)}" - + except DatabaseError as e: + return f"Unexpected database error: {str(e)}" + @property def is_active(self) -> bool: """Check if pipeline is currently active""" return st.session_state.get("pipeline_active", False) - + @property def is_complete(self) -> bool: """Check if pipeline has completed""" - return (not self.is_active and - st.session_state.get("pipeline_step_index", 0) >= len(self.PIPELINE_STEPS)) - + return not self.is_active and st.session_state.get("pipeline_step_index", 0) >= len(self.PIPELINE_STEPS) + @property def current_step_index(self) -> int: """Get current step index""" return st.session_state.get("pipeline_step_index", 0) - + @property def progress_percentage(self) -> float: """Get overall progress percentage""" return (self.current_step_index / len(self.PIPELINE_STEPS)) * 100 - - def get_results(self) -> Dict[str, Any]: + + def get_results(self) -> dict[str, Any]: """Get all pipeline results""" return st.session_state.get("pipeline_results", {}) - - def get_errors(self) -> Dict[str, str]: + + def get_errors(self) -> dict[str, str]: """Get any pipeline errors""" return st.session_state.get("pipeline_errors", {}) -# Global pipeline controller instance def get_pipeline_controller() -> StreamingPipelineController: """Get or create the global pipeline controller""" - if 'pipeline_controller' not in st.session_state: + if "pipeline_controller" not in st.session_state: st.session_state.pipeline_controller = StreamingPipelineController() - return st.session_state.pipeline_controller \ No newline at end of file + return st.session_state.pipeline_controller diff --git a/core/streaming_results.py b/core/streaming_results.py index fcd71b0..a8775b2 100644 --- a/core/streaming_results.py +++ b/core/streaming_results.py @@ -1,465 +1,247 @@ """ -Streaming Results Display for WhisperForge -Shows content as it's generated with beautiful Aurora styling +Streaming Results Display +========================= + +Shows content as it's generated with Aurora styling. +CSS loaded from static/css/streaming.css. """ -import streamlit as st -import html +import os import time -from typing import Dict, Any, Optional -from .streaming_pipeline import get_pipeline_controller import uuid -from .visible_thinking import render_thinking_stream - -# CSS for streaming results -STREAMING_RESULTS_CSS = """ - -""" -# Enhanced UI Functions for streaming results +import streamlit as st + +from .export import create_json_download, create_markdown_download, create_text_download +from .streaming_pipeline import get_pipeline_controller +from .streaming_status import ( # noqa: F401 - re-exports + show_2025_content_display, + show_enhanced_streaming_status, + show_processing_status, +) + +_STREAMING_CSS_CACHE = None + + +def _load_streaming_css(): + """Load streaming CSS from static file (cached).""" + global _STREAMING_CSS_CACHE # noqa: PLW0603 + if _STREAMING_CSS_CACHE is None: + css_path = os.path.join("static", "css", "streaming.css") + if os.path.exists(css_path): + with open(css_path, encoding="utf-8") as f: + _STREAMING_CSS_CACHE = f"" + else: + _STREAMING_CSS_CACHE = "" + return _STREAMING_CSS_CACHE + + def apply_streaming_css(): - """Apply Aurora theme CSS for streaming results""" + """Apply Aurora theme CSS for streaming results.""" + st.markdown(_load_streaming_css(), unsafe_allow_html=True) + -# Generate truly unique keys for Streamlit widgets def generate_unique_key(base_name: str) -> str: - """Generate truly unique key for Streamlit widgets to prevent DuplicateWidgetID errors""" + """Generate truly unique key for Streamlit widgets to prevent DuplicateWidgetID errors.""" return f"{base_name}_{uuid.uuid4().hex[:8]}_{int(time.time() * 1000000) % 1000000}" + def show_streaming_results(): - """Display content as it streams - REAL-TIME STREAMING IMPLEMENTATION""" + """Display content as it streams - real-time streaming implementation.""" controller = get_pipeline_controller() results = controller.get_results() - + if not results: - # Show placeholder while waiting for first results - st.markdown("### ๐ŸŒŠ Live Content Stream") - st.info("๐Ÿ”„ Waiting for processing to begin...") + st.markdown("### \U0001f30a Live Content Stream") + st.info("\U0001f504 Waiting for processing to begin...") return - - # Show real-time streaming content with smooth reveals + show_real_time_content_stream(results, controller) -def show_real_time_content_stream(results: Dict[str, Any], controller): - """๐Ÿš€ ENHANCED: Real-time content streaming with step-by-step reveals""" - st.markdown("### โœจ Content Generation Stream") - - # Define content sections with order and styling - content_sections = [ - ("transcription", "๐ŸŽ™๏ธ", "Audio Transcription", "Converting speech to text..."), - ("wisdom_extraction", "๐Ÿ’Ž", "Key Insights & Wisdom", "Extracting valuable insights..."), - ("research_enrichment", "๐Ÿ”", "Research Links", "Finding supporting resources..."), - ("outline_creation", "๐Ÿ“‹", "Content Outline", "Structuring content flow..."), - ("article_creation", "๐Ÿ“ฐ", "Full Article", "Writing comprehensive article..."), - ("social_content", "๐Ÿ“ฑ", "Social Media Posts", "Creating social content..."), - ("image_prompts", "๐Ÿ–ผ๏ธ", "Image Prompts", "Generating visual concepts..."), - ("database_storage", "๐Ÿ’พ", "Content Saved", "Storing to your library...") - ] - - # Show each section as it becomes available - for i, (step_key, icon, title, processing_msg) in enumerate(content_sections): - - if step_key in results and results[step_key]: - # Content is ready - show it with beautiful styling - show_completed_content_section(step_key, icon, title, results[step_key]) - - elif controller.current_step_index == i and controller.is_active: - # Currently processing this step - show loading state - show_processing_content_section(icon, title, processing_msg) - - elif controller.current_step_index > i: - # This step should be done but no content - show error state - show_error_content_section(icon, title, "Content generation failed") - - # Don't show future steps to avoid spoilers - - -def show_completed_content_section(step_key: str, icon: str, title: str, content: Any): - """Display completed content with beautiful Aurora styling""" - - # Convert content to string safely - content_str = str(content) if content else "No content generated" - - # Beautiful content reveal with animation - st.markdown(f""" -
-
-
-
- {icon} -

{title}

- โœ… Complete -
+def show_real_time_content_stream(results, controller): + """Show real-time streaming content with smooth reveals.""" + apply_streaming_css() + + step_map = { + "transcription": ("\U0001f399\ufe0f", "Audio Transcription"), + "wisdom_extraction": ("\U0001f48e", "Wisdom & Key Insights"), + "research_enrichment": ("\U0001f50d", "Research Enrichment"), + "outline_creation": ("\U0001f4cb", "Content Outline"), + "article_creation": ("\U0001f4f0", "Full Article"), + "social_content": ("\U0001f4f1", "Social Media Content"), + "image_prompts": ("\U0001f5bc\ufe0f", "Image Prompts"), + } + + for step_key, (icon, title) in step_map.items(): + content = results.get(step_key) + errors = controller.get_errors() if hasattr(controller, "get_errors") else {} + + if content: + show_completed_content_section(step_key, icon, title, content) + elif step_key in errors: + show_error_content_section(icon, title, errors[step_key]) + elif controller.is_active and step_key == list(step_map.keys())[controller.current_step_index]: + show_processing_content_section(icon, title, f"Generating {title.lower()}...") + + if controller.is_complete: + _show_download_options(results) + + +def show_completed_content_section(step_key: str, icon: str, title: str, content): + """Display a completed content section with Aurora styling.""" + content_str = str(content) + word_count = len(content_str.split()) + preview_length = 200 + + st.markdown( + f""" +
+
+ {icon} {title} + \u2705 Complete \u2022 {word_count} words
- - - """, unsafe_allow_html=True) - - # Show content with smart preview/expand - if len(content_str) > 600: - # Long content - show preview with expand - st.markdown("**Preview:**") - preview_text = content_str[:300] + "..." if len(content_str) > 300 else content_str - st.markdown(preview_text) - - # Expandable full content - expand_key = generate_unique_key(f"expand_{step_key}") - with st.expander("๐Ÿ“– Show Full Content", expanded=False): - st.markdown(content_str) - - # Copy button - copy_key = generate_unique_key(f"copy_{step_key}") - if st.button(f"๐Ÿ“‹ Copy {title}", key=copy_key, use_container_width=True): - st.code(content_str, language="markdown") - st.success("โœ… Copied to clipboard area!") - else: - # Short content - show directly - st.markdown(content_str) - - # Inline copy button - copy_key = generate_unique_key(f"copy_inline_{step_key}") - if st.button(f"๐Ÿ“‹ Copy {title}", key=copy_key): - st.code(content_str, language="markdown") - st.success("โœ… Copied!") - - st.markdown("---") + """, + unsafe_allow_html=True, + ) + + with st.container(): + if len(content_str) > preview_length: + with st.expander(f"View {title}", expanded=False): + st.markdown(content_str) + + col1, col2 = st.columns([1, 4]) + with col1: + copy_key = generate_unique_key(f"copy_{step_key}") + if st.button("\U0001f4cb Copy", key=copy_key, help=f"Copy {title}"): + st.code(content_str, language="markdown") + st.success("Content displayed - copy with Ctrl+A, Ctrl+C") + + # Editor section for article + if step_key == "article_creation": + st.markdown( + """ +
+
+ \u270f\ufe0f Content Editor + EDIT MODE +
+
+ """, + unsafe_allow_html=True, + ) + + edit_key = generate_unique_key("edit_article") + edited_content = st.text_area("Edit Article Content", value=content_str, height=400, key=edit_key) + + if edited_content != content_str: + save_key = generate_unique_key("save_article") + if st.button("\U0001f4be Save Changes", key=save_key): + controller = get_pipeline_controller() + controller.update_result(step_key, edited_content) + st.success("\u2705 Article updated!") + st.rerun() + else: + st.markdown( + f""" +
+ {content_str} +
+ """, + unsafe_allow_html=True, + ) def show_processing_content_section(icon: str, title: str, message: str): - """Show animated processing state for current step""" - - st.markdown(f""" -
-
-
-
- {icon} -
-

{title}

-

{message}

-
-
-
-
+ """Display a processing content section with animation.""" + st.markdown( + f""" +
+
+ {icon} {title} + + \U0001f504 Processing + +
+
+
+
+ + {message} +
- + - """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) def show_error_content_section(icon: str, title: str, error_msg: str): - """Show error state for failed step""" - - st.markdown(f""" -
-
- {icon} -
-

{title}

-

{error_msg}

-
- โŒ Failed + """Display an error content section.""" + st.markdown( + f""" +
+
+ {icon} {title} + + \u274c Error +
-
- """, unsafe_allow_html=True) - - -def show_2025_content_display(): - """๐Ÿš€ Ultra-modern 2025 Aurora content display for completed results""" - controller = get_pipeline_controller() - results = controller.get_results() - - if not results: - return - - # Beautiful completion header - st.markdown(""" -
-
-
-

โœจ Transformation Complete

-

Your audio has been transformed into structured, actionable content

-
-
-
-
-
+
+ {error_msg}
- - - """, unsafe_allow_html=True) - - # Display all results with modern cards - content_map = { - 'transcription': ('๐ŸŽ™๏ธ', 'Audio Transcription', 'Complete speech-to-text conversion'), - 'wisdom_extraction': ('๐Ÿ’Ž', 'Key Insights & Wisdom', 'Extracted insights and actionable takeaways'), - 'research_enrichment': ('๐Ÿ”', 'Research Enrichment', 'Supporting links and contextual information'), - 'outline_creation': ('๐Ÿ“‹', 'Content Outline', 'Structured organization and flow'), - 'article_creation': ('๐Ÿ“ฐ', 'Full Article', 'Complete written content ready for publication'), - 'social_content': ('๐Ÿ“ฑ', 'Social Media Content', 'Platform-optimized posts and captions'), - 'image_prompts': ('๐Ÿ–ผ๏ธ', 'Image Generation Prompts', 'AI-generated visual concept descriptions') - } - - for key, (icon, title, desc) in content_map.items(): - if key in results and results[key]: - show_streaming_content_card(icon, title, desc, results[key], is_live=False) + """, + unsafe_allow_html=True, + ) def show_streaming_content_card(icon: str, title: str, description: str, content: str, is_live: bool = False): - """๐ŸŽจ Beautiful streaming content card with Aurora effects""" - - # Create unique key for this card - card_key = generate_unique_key(f"stream_card_{title.lower()}") - - # Live vs complete styling + """Beautiful streaming content card with Aurora effects.""" border_color = "rgba(0, 255, 100, 0.2)" if is_live else "rgba(0, 255, 255, 0.15)" - bg_gradient = "rgba(0, 255, 100, 0.03), rgba(0, 255, 255, 0.05)" if is_live else "rgba(0, 255, 255, 0.03), rgba(64, 224, 208, 0.05)" + bg_gradient = ( + "rgba(0, 255, 100, 0.03), rgba(0, 255, 255, 0.05)" + if is_live + else "rgba(0, 255, 255, 0.03), rgba(64, 224, 208, 0.05)" + ) glow_color = "rgba(0, 255, 100, 0.4)" if is_live else "rgba(0, 255, 255, 0.3)" - + with st.container(): - st.markdown(f""" -
+ live_badge = '
LIVE
' if is_live else '
\u2713
' + st.markdown( + f""" +
{icon}

{title}

{description}

- {'
LIVE
' if is_live else '
โœ“
'} + {live_badge}
- + - """, unsafe_allow_html=True) - - # Content preview with smart truncation + """, + unsafe_allow_html=True, + ) + if len(content) > 300: preview = content[:300] + "..." - - # Expandable content - with st.expander(f"๐Ÿ“– Preview {title}", expanded=False): + + with st.expander(f"\U0001f4d6 Preview {title}", expanded=False): st.markdown(preview) - - with st.expander(f"๐Ÿ“„ Full {title}", expanded=False): + + with st.expander(f"\U0001f4c4 Full {title}", expanded=False): st.markdown(content) - - # Copy button + copy_key = generate_unique_key(f"copy_{title}") if st.button(f"Copy {title}", key=copy_key, help=f"Copy {title} to clipboard"): st.code(content, language="markdown") else: st.markdown(content) - - # Copy button for short content + copy_key = generate_unique_key(f"copy_short_{title}") if st.button(f"Copy {title}", key=copy_key, help=f"Copy {title} to clipboard"): st.code(content, language="markdown") -def _show_download_options(results: Dict[str, Any]): - """Show download options for generated content""" - - st.markdown(""" +def _show_download_options(results): + """Show download options for generated content.""" + st.markdown( + """
- ๐Ÿ“ฅ + \U0001f4e5 Download Options
- """, unsafe_allow_html=True) - - # Create downloadable content formats + """, + unsafe_allow_html=True, + ) + formats = { - "JSON": _create_json_download(results), - "Markdown": _create_markdown_download(results), - "Text": _create_text_download(results) + "JSON": create_json_download(results), + "Markdown": create_markdown_download(results), + "Text": create_text_download(results), } - - col1, col2, col3 = st.columns(3) - - with col1: - if "JSON" in formats: - st.download_button( - "๐Ÿ“„ JSON Format", - data=formats["JSON"], - file_name="whisperforge_results.json", - mime="application/json" - ) - - with col2: - if "Markdown" in formats: - st.download_button( - "๐Ÿ“ Markdown Format", - data=formats["Markdown"], - file_name="whisperforge_results.md", - mime="text/markdown" - ) - - with col3: - if "Text" in formats: - st.download_button( - "๐Ÿ“„ Text Format", - data=formats["Text"], - file_name="whisperforge_results.txt", - mime="text/plain" - ) - - -def _create_json_download(results: Dict[str, Any]) -> str: - """Create JSON format download""" - import json - return json.dumps(results, indent=2, ensure_ascii=False) + col1, col2, col3 = st.columns(3) -def _create_markdown_download(results: Dict[str, Any]) -> str: - """Create Markdown format download""" - content = "# WhisperForge Content Generation Results\n\n" - - sections = { - "transcription": "## ๐Ÿ“ Audio Transcription\n\n", - "wisdom_extraction": "## ๐Ÿ’Ž Key Insights & Wisdom\n\n", - "outline_creation": "## ๐Ÿ“‹ Content Outline\n\n", - "article_creation": "## ๐Ÿ“ฐ Full Article\n\n", - "social_content": "## ๐Ÿ“ฑ Social Media Content\n\n", - "image_prompts": "## ๐Ÿ–ผ๏ธ Image Generation Prompts\n\n" - } - - for key, header in sections.items(): - if key in results: - content += header + results[key] + "\n\n---\n\n" - - return content - - -def _create_text_download(results: Dict[str, Any]) -> str: - """Create plain text format download""" - content = "WHISPERFORGE CONTENT GENERATION RESULTS\n" - content += "=" * 50 + "\n\n" - - sections = { - "transcription": "AUDIO TRANSCRIPTION\n" + "-" * 20 + "\n\n", - "wisdom_extraction": "KEY INSIGHTS & WISDOM\n" + "-" * 20 + "\n\n", - "outline_creation": "CONTENT OUTLINE\n" + "-" * 15 + "\n\n", - "article_creation": "FULL ARTICLE\n" + "-" * 12 + "\n\n", - "social_content": "SOCIAL MEDIA CONTENT\n" + "-" * 20 + "\n\n", - "image_prompts": "IMAGE GENERATION PROMPTS\n" + "-" * 25 + "\n\n" - } - - for key, header in sections.items(): - if key in results: - content += header + results[key] + "\n\n" + "=" * 50 + "\n\n" - - return content - - -# Enhanced CSS for streaming results -STREAMING_RESULTS_CSS = """ - -""" + with col1: + st.download_button( + "\U0001f4c4 JSON Format", + data=formats["JSON"], + file_name="whisperforge_results.json", + mime="application/json", + ) -def show_enhanced_streaming_status(): - """PHASE 3: ENHANCED STREAMING UX OVERHAUL - 2025 st.status() integration WITH VISIBLE THINKING""" - controller = get_pipeline_controller() - - if not controller.is_active and not controller.is_complete: - return + with col2: + st.download_button( + "\U0001f4dd Markdown Format", + data=formats["Markdown"], + file_name="whisperforge_results.md", + mime="text/markdown", + ) - current_step = controller.current_step_index - pipeline_steps = [ - ("Upload Validation", "File format & compatibility check", "upload_validation"), - ("Audio Transcription", "Speech-to-text conversion", "transcription"), - ("Wisdom Extraction", "Key insights extraction", "wisdom_extraction"), - ("Research Enrichment", "Supporting links & context", "research_enrichment"), - ("Outline Generation", "Content structure creation", "outline_creation"), - ("Article Creation", "Full article generation", "article_creation"), - ("Social Media Posts", "Platform-optimized content", "social_content"), - ("Image Prompts", "Visual concept generation", "image_prompts"), - ("Database Storage", "Secure content storage", "database_storage") - ] - - results = controller.get_results() - errors = controller.get_errors() if hasattr(controller, 'get_errors') else {} - - # ๐Ÿง  VISIBLE THINKING INTEGRATION - Show AI thought bubbles during processing - if controller.is_active and st.session_state.get("thinking_enabled", True): - # Create dedicated container for thinking bubbles - thinking_container = st.container() - with thinking_container: - st.markdown(""" -
-
- ๐Ÿง  - AI Thinking Process -
-
-
- - - """, unsafe_allow_html=True) - - # Render the actual thinking stream - try: - render_thinking_stream(thinking_container) - except Exception as e: - st.info(f"๐Ÿ’ญ AI is thinking... (thinking system loading)") - - # Main processing status container with st.status() - if controller.is_active: - current_title, current_desc, current_key = pipeline_steps[current_step] - - with st.status(f"๐Ÿ”„ {current_title}", expanded=True) as status: - st.write(f"๐Ÿ“ **{current_desc}**") - - # Progress bar - progress = (current_step / len(pipeline_steps)) * 100 - st.progress(progress / 100, text=f"Progress: {progress:.0f}% ({current_step + 1}/{len(pipeline_steps)})") - - # Show previous completed steps with content preview - for i in range(current_step): - title, _, step_key = pipeline_steps[i] - if step_key in results: - st.write(f"โœ… {title} - Complete") - # Show brief preview of generated content - if step_key in results and results[step_key] and step_key not in ["upload_validation", "database_storage"]: - preview = str(results[step_key])[:100] + "..." if len(str(results[step_key])) > 100 else str(results[step_key]) - st.caption(f"Preview: {preview}") - elif step_key in errors: - st.write(f"โŒ {title} - Error: {errors[step_key]}") - else: - st.write(f"โœ… {title} - Complete") - - # Current step with enhanced styling - st.markdown(f""" -
- ๐Ÿ”„ {current_title} - {current_desc}... -
- """, unsafe_allow_html=True) - - # Show preview of remaining steps - for i in range(current_step + 1, len(pipeline_steps)): - title, _, _ = pipeline_steps[i] - st.write(f"โญ• {title} - Pending") - - # Update status based on completion - if current_step >= len(pipeline_steps) - 1: - status.update(label="โœ… Processing Complete!", state="complete", expanded=False) - else: - status.update(label=f"๐Ÿ”„ {current_title}", state="running") - - elif controller.is_complete: - # Completion status with beautiful summary - with st.status("โœ… All processing complete!", state="complete", expanded=False): - st.success("Your audio has been transformed into comprehensive content!") - - # Enhanced completion summary - st.markdown(""" -
-

๐ŸŒŸ Generation Summary

-
- - - """, unsafe_allow_html=True) - - col1, col2, col3 = st.columns(3) - - with col1: - completed_count = len([r for r in results.values() if r]) - st.metric("Steps Completed", completed_count, len(pipeline_steps)) - with col2: - error_count = len(errors) - st.metric("Errors", error_count, delta_color="inverse") - with col3: - success_rate = ((completed_count - error_count) / len(pipeline_steps)) * 100 - st.metric("Success Rate", f"{success_rate:.1f}%") - - # Show content type breakdown - if results: - st.markdown("**Generated Content Types:**") - content_types = [] - if results.get('transcription'): content_types.append("๐Ÿ“ Transcription") - if results.get('wisdom_extraction'): content_types.append("๐Ÿ’Ž Insights") - if results.get('research_enrichment'): content_types.append("๐Ÿ” Research") - if results.get('outline_creation'): content_types.append("๐Ÿ“‹ Outline") - if results.get('article_creation'): content_types.append("๐Ÿ“ฐ Article") - if results.get('social_content'): content_types.append("๐Ÿ“ฑ Social Posts") - if results.get('image_prompts'): content_types.append("๐Ÿ–ผ๏ธ Image Prompts") - - if content_types: - st.write(" โ€ข ".join(content_types)) - -def show_processing_status(): - """Display ultra-modern Aurora pipeline with real-time visibility - WRAPPER""" - show_enhanced_streaming_status() # Use the new enhanced version \ No newline at end of file + with col3: + st.download_button( + "\U0001f4c4 Text Format", + data=formats["Text"], + file_name="whisperforge_results.txt", + mime="text/plain", + ) diff --git a/core/streaming_status.py b/core/streaming_status.py new file mode 100644 index 0000000..316e1a1 --- /dev/null +++ b/core/streaming_status.py @@ -0,0 +1,382 @@ +""" +Streaming Status Display +======================== + +Enhanced streaming UX with st.status() integration and visible thinking. +Includes the 2025 content display and pipeline status visualization. +""" + +import streamlit as st + +from .exceptions import WhisperForgeError +from .streaming_pipeline import get_pipeline_controller + + +def show_2025_content_display(): + """Ultra-modern 2025 Aurora content display for completed results.""" + from .streaming_results import show_streaming_content_card + + controller = get_pipeline_controller() + results = controller.get_results() + + if not results: + return + + st.markdown( + """ +
+
+
+

\u2728 Transformation Complete

+

Your audio has been transformed into structured, actionable content

+
+
+
+
+
+
+
+ + + """, + unsafe_allow_html=True, + ) + + content_map = { + "transcription": ("\U0001f399\ufe0f", "Audio Transcription", "Complete speech-to-text conversion"), + "wisdom_extraction": ("\U0001f48e", "Key Insights & Wisdom", "Extracted insights and actionable takeaways"), + "research_enrichment": ("\U0001f50d", "Research Enrichment", "Supporting links and contextual information"), + "outline_creation": ("\U0001f4cb", "Content Outline", "Structured organization and flow"), + "article_creation": ("\U0001f4f0", "Full Article", "Complete written content ready for publication"), + "social_content": ("\U0001f4f1", "Social Media Content", "Platform-optimized posts and captions"), + "image_prompts": ("\U0001f5bc\ufe0f", "Image Generation Prompts", "AI-generated visual concept descriptions"), + } + + for key, (icon, title, desc) in content_map.items(): + if key in results and results[key]: + show_streaming_content_card(icon, title, desc, results[key], is_live=False) + + +def show_enhanced_streaming_status(): + """Enhanced streaming UX with st.status() integration and visible thinking.""" + controller = get_pipeline_controller() + + if not controller.is_active and not controller.is_complete: + return + + current_step = controller.current_step_index + pipeline_steps = [ + ("Upload Validation", "File format & compatibility check", "upload_validation"), + ("Audio Transcription", "Speech-to-text conversion", "transcription"), + ("Wisdom Extraction", "Key insights extraction", "wisdom_extraction"), + ("Research Enrichment", "Supporting links & context", "research_enrichment"), + ("Outline Generation", "Content structure creation", "outline_creation"), + ("Article Creation", "Full article generation", "article_creation"), + ("Social Media Posts", "Platform-optimized content", "social_content"), + ("Image Prompts", "Visual concept generation", "image_prompts"), + ("Database Storage", "Secure content storage", "database_storage"), + ] + + results = controller.get_results() + errors = controller.get_errors() if hasattr(controller, "get_errors") else {} + + # Visible thinking integration + if controller.is_active and st.session_state.get("thinking_enabled", True): + thinking_container = st.container() + with thinking_container: + st.markdown( + """ +
+
+ \U0001f9e0 + AI Thinking Process +
+
+
+ + + """, + unsafe_allow_html=True, + ) + + try: + from .visible_thinking import render_thinking_stream + + render_thinking_stream(thinking_container) + except (WhisperForgeError, ImportError): + st.info("\U0001f4ad AI is thinking... (thinking system loading)") + + # Main processing status + if controller.is_active: + current_title, current_desc, _current_key = pipeline_steps[current_step] + + with st.status(f"\U0001f504 {current_title}", expanded=True) as status: + st.write(f"\U0001f4dd **{current_desc}**") + + progress = (current_step / len(pipeline_steps)) * 100 + st.progress(progress / 100, text=f"Progress: {progress:.0f}% ({current_step + 1}/{len(pipeline_steps)})") + + for i in range(current_step): + title, _, step_key = pipeline_steps[i] + if step_key in results: + st.write(f"\u2705 {title} - Complete") + if ( + step_key in results + and results[step_key] + and step_key not in ["upload_validation", "database_storage"] + ): + result_str = str(results[step_key]) + preview = result_str[:100] + "..." if len(result_str) > 100 else result_str + st.caption(f"Preview: {preview}") + elif step_key in errors: + st.write(f"\u274c {title} - Error: {errors[step_key]}") + else: + st.write(f"\u2705 {title} - Complete") + + st.markdown( + f""" +
+ \U0001f504 {current_title} - {current_desc}... +
+ """, + unsafe_allow_html=True, + ) + + for i in range(current_step + 1, len(pipeline_steps)): + title, _, _ = pipeline_steps[i] + st.write(f"\u2b55 {title} - Pending") + + if current_step >= len(pipeline_steps) - 1: + status.update(label="\u2705 Processing Complete!", state="complete", expanded=False) + else: + status.update(label=f"\U0001f504 {current_title}", state="running") + + elif controller.is_complete: + with st.status("\u2705 All processing complete!", state="complete", expanded=False): + st.success("Your audio has been transformed into comprehensive content!") + + st.markdown( + """ +
+

\U0001f31f Generation Summary

+
+ + + """, + unsafe_allow_html=True, + ) + + col1, col2, col3 = st.columns(3) + + with col1: + completed_count = len([r for r in results.values() if r]) + st.metric("Steps Completed", completed_count, len(pipeline_steps)) + with col2: + error_count = len(errors) + st.metric("Errors", error_count, delta_color="inverse") + with col3: + success_rate = ((completed_count - error_count) / len(pipeline_steps)) * 100 + st.metric("Success Rate", f"{success_rate:.1f}%") + + if results: + st.markdown("**Generated Content Types:**") + content_types = [] + if results.get("transcription"): + content_types.append("\U0001f4dd Transcription") + if results.get("wisdom_extraction"): + content_types.append("\U0001f48e Insights") + if results.get("research_enrichment"): + content_types.append("\U0001f50d Research") + if results.get("outline_creation"): + content_types.append("\U0001f4cb Outline") + if results.get("article_creation"): + content_types.append("\U0001f4f0 Article") + if results.get("social_content"): + content_types.append("\U0001f4f1 Social Posts") + if results.get("image_prompts"): + content_types.append("\U0001f5bc\ufe0f Image Prompts") + + if content_types: + st.write(" \u2022 ".join(content_types)) + + +def show_processing_status(): + """Display Aurora pipeline with real-time visibility - wrapper.""" + show_enhanced_streaming_status() diff --git a/core/streamlit_monitoring.py b/core/streamlit_monitoring.py index 9500aeb..8d2f4ed 100644 --- a/core/streamlit_monitoring.py +++ b/core/streamlit_monitoring.py @@ -2,8 +2,8 @@ from __future__ import annotations +from collections.abc import Callable from functools import wraps -from typing import Callable from .monitoring import structured_logger @@ -43,4 +43,3 @@ def wrapper(*args, **kwargs): return wrapper return decorator - diff --git a/core/styling.py b/core/styling.py index 91b1c4e..ab970ba 100644 --- a/core/styling.py +++ b/core/styling.py @@ -5,24 +5,29 @@ import streamlit as st + def apply_aurora_theme(): """Apply the complete Aurora theme by loading our comprehensive CSS file""" # Load the comprehensive Aurora CSS file css_file_path = "static/css/main.css" - + try: - with open(css_file_path, 'r', encoding='utf-8') as f: + with open(css_file_path, encoding="utf-8") as f: css_content = f.read() - - st.markdown(f""" + + st.markdown( + f""" - """, unsafe_allow_html=True) - + """, + unsafe_allow_html=True, + ) + except FileNotFoundError: # Fallback to basic Aurora styling if file not found - st.markdown(""" + st.markdown( + """ - """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) + def create_aurora_header(): """Create a flagship Aurora header with integrated navigation and logout - REBUILT FOR 2025""" - + # First, inject the CSS using st.markdown() - st.markdown(""" + st.markdown( + """ - """, unsafe_allow_html=True) - + """, + unsafe_allow_html=True, + ) + # Then render the HTML structure using st.markdown() - st.markdown(""" + st.markdown( + """
@@ -175,19 +187,23 @@ def create_aurora_header():
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) + def create_aurora_nav_buttons(): """Create integrated navigation buttons for the Aurora header""" - + # Enhanced styling for integrated nav buttons - st.markdown(""" + st.markdown( + """ - """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) pages = [ ("Processing", "Content Pipeline"), - ("History", "Content History"), + ("History", "Content History"), ("Settings", "Settings"), - ("Status", "Health Check") + ("Status", "Health Check"), ] - - current_page = st.session_state.get('current_page', 'Content Pipeline') - + + current_page = st.session_state.get("current_page", "Content Pipeline") + # Create horizontal layout for nav buttons nav_cols = st.columns([1, 1, 1, 1, 0.8]) # Last column smaller for logout - + for i, (page_name, page_key) in enumerate(pages): with nav_cols[i]: st.markdown('
', unsafe_allow_html=True) if st.button( - page_name, - key=f"nav_{page_name}", + page_name, + key=f"nav_{page_name}", type="primary" if page_key == current_page else "secondary", - use_container_width=True + use_container_width=True, ): st.session_state.current_page = page_key st.rerun() - st.markdown('
', unsafe_allow_html=True) - + st.markdown("
", unsafe_allow_html=True) + # Logout button in the last column with nav_cols[4]: st.markdown('
', unsafe_allow_html=True) if st.button("Sign Out", key="logout_btn", use_container_width=True): return True # Signal logout - st.markdown('
', unsafe_allow_html=True) - + st.markdown("
", unsafe_allow_html=True) + return False # No logout + def create_aurora_progress_card(title, current_step, total_steps, description=""): """Create a beautiful Aurora progress card""" progress = (current_step / total_steps) * 100 - st.markdown(f""" + st.markdown( + f"""

{title}

@@ -277,18 +297,21 @@ def create_aurora_progress_card(title, current_step, total_steps, description="" {current_step}/{total_steps} steps โ€ข {progress:.0f}%
- +
- - {f'

{description}

' if description else ''} + + {f'

{description}

' if description else ""}
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) + def create_aurora_step_card(title, description, status="pending", progress=0): """Create a beautiful step card with Aurora styling""" - + # Determine icon and styling based on status if status == "completed": icon = "โœ…" @@ -299,7 +322,7 @@ def create_aurora_step_card(title, description, status="pending", progress=0): else: icon = "โญ•" container_class = "aurora-step-container" - + progress_bar = "" if status == "processing" and progress > 0: progress_bar = f""" @@ -325,8 +348,9 @@ def create_aurora_step_card(title, description, status="pending", progress=0):
""" - - st.markdown(f""" + + st.markdown( + f"""
@@ -336,15 +360,18 @@ def create_aurora_step_card(title, description, status="pending", progress=0):

{description}

-
{progress if status == 'processing' else (100 if status == 'completed' else 0)}%
+
{progress if status == "processing" else (100 if status == "completed" else 0)}%
{progress_bar}
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) + def create_aurora_content_card(title, content, content_type="text"): """Create a beautiful content display card""" - + # Truncate content if too long if len(content) > 500: preview_content = content[:500] + "..." @@ -352,8 +379,9 @@ def create_aurora_content_card(title, content, content_type="text"): else: preview_content = content show_full = False - - st.markdown(f""" + + st.markdown( + f"""

{title}

@@ -364,20 +392,26 @@ def create_aurora_content_card(title, content, content_type="text"): white-space: pre-wrap; ">{preview_content}
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) if show_full: with st.expander("Show full content"): st.markdown(content) + # Aurora Component Utilities + + class AuroraComponents: """Beautiful Aurora-themed UI components""" - + @staticmethod def success_message(message): """Aurora success message""" - st.markdown(f""" + st.markdown( + f"""
โœ… {message}
- """, unsafe_allow_html=True) - + """, + unsafe_allow_html=True, + ) + @staticmethod def warning_message(message): """Aurora warning message""" - st.markdown(f""" + st.markdown( + f"""
โš ๏ธ {message}
- """, unsafe_allow_html=True) - + """, + unsafe_allow_html=True, + ) + @staticmethod def error_message(message): """Aurora error message""" - st.markdown(f""" + st.markdown( + f"""
โŒ {message}
- """, unsafe_allow_html=True) \ No newline at end of file + """, + unsafe_allow_html=True, + ) diff --git a/core/supabase_integration.py b/core/supabase_integration.py index 812d53f..514942e 100644 --- a/core/supabase_integration.py +++ b/core/supabase_integration.py @@ -6,18 +6,23 @@ Designed to work with MCP (Model Context Protocol) for enhanced AI integration. """ -import os import logging -from typing import Dict, List, Optional, Any +import os from datetime import datetime, timedelta -from supabase import create_client, Client +from typing import Any + from dotenv import load_dotenv +from postgrest.exceptions import APIError +from supabase import Client, create_client + +from .constants import DEFAULT_ANALYTICS_DAYS, DEFAULT_CONTENT_QUERY_LIMIT, DEFAULT_USAGE_QUOTA_MINUTES +from .exceptions import DatabaseError +from .utils import hash_password # Load environment variables load_dotenv() # Import hash_password function from utils instead of app -from .utils import hash_password logger = logging.getLogger(__name__) @@ -26,97 +31,112 @@ class SupabaseClient: """ Supabase client wrapper with MCP integration capabilities """ - + def __init__(self): self.url = os.getenv("SUPABASE_URL") # Handle both SUPABASE_KEY and SUPABASE_ANON_KEY for backward compatibility self.key = os.getenv("SUPABASE_ANON_KEY") or os.getenv("SUPABASE_KEY") self.service_role_key = os.getenv("SUPABASE_SERVICE_ROLE_KEY") - + if not self.url or not self.key: - raise ValueError("SUPABASE_URL and SUPABASE_ANON_KEY (or SUPABASE_KEY) must be set in environment variables") - + raise ValueError( + "SUPABASE_URL and SUPABASE_ANON_KEY (or SUPABASE_KEY) must be set in environment variables" + ) + self.client: Client = create_client(self.url, self.key) - self.admin_client: Optional[Client] = None - + self.admin_client: Client | None = None + if self.service_role_key: self.admin_client = create_client(self.url, self.service_role_key) - + logger.info("Supabase client initialized successfully") - + def test_connection(self) -> bool: """Test the Supabase connection""" try: # Try a simple query to test connectivity - result = self.client.table("users").select("id").limit(1).execute() + self.client.table("users").select("id").limit(1).execute() logger.info("Supabase connection test successful") return True - except Exception as e: + except APIError as e: logger.error(f"Supabase connection test failed: {e}") return False - + except DatabaseError as e: + logger.error(f"Unexpected error in Supabase connection test: {e}") + return False + # User Management - def create_user(self, email: str, password: str, metadata: Dict[str, Any] = None) -> Dict[str, Any]: + def create_user(self, email: str, password: str, metadata: dict[str, Any] = None) -> dict[str, Any]: """Create a new user""" try: # Hash the password before storing hashed_password = hash_password(password) - + user_data = { "email": email, "password": hashed_password, # Store hashed password "created_at": datetime.now().isoformat(), - "usage_quota": 60, # Default 60 minutes per month + "usage_quota": DEFAULT_USAGE_QUOTA_MINUTES, "usage_current": 0, "is_admin": False, - "subscription_tier": "free" + "subscription_tier": "free", } - + if metadata: user_data.update(metadata) - + result = self.client.table("users").insert(user_data).execute() logger.info(f"User created successfully: {email}") return result.data[0] if result.data else {} - except Exception as e: + except APIError as e: logger.error(f"Error creating user: {e}") raise - - def get_user(self, user_id: int) -> Optional[Dict[str, Any]]: + except DatabaseError as e: + logger.error(f"Unexpected error creating user: {e}") + raise + + def get_user(self, user_id: int) -> dict[str, Any] | None: """Get user by ID""" try: result = self.client.table("users").select("*").eq("id", user_id).execute() return result.data[0] if result.data else None - except Exception as e: + except APIError as e: logger.error(f"Error fetching user: {e}") return None - - def get_user_by_email(self, email: str) -> Optional[Dict[str, Any]]: + except DatabaseError as e: + logger.error(f"Unexpected error fetching user: {e}") + return None + + def get_user_by_email(self, email: str) -> dict[str, Any] | None: """Get user by email""" try: result = self.client.table("users").select("*").eq("email", email).execute() return result.data[0] if result.data else None - except Exception as e: + except APIError as e: logger.error(f"Error fetching user by email: {e}") return None - + except DatabaseError as e: + logger.error(f"Unexpected error fetching user by email: {e}") + return None + def update_user_usage(self, user_id: int, usage_seconds: int) -> bool: """Update user's current usage""" try: # Convert seconds to minutes usage_minutes = usage_seconds / 60 - - result = self.client.table("users").update({ - "usage_current": usage_minutes - }).eq("id", user_id).execute() - + + result = self.client.table("users").update({"usage_current": usage_minutes}).eq("id", user_id).execute() + return bool(result.data) - except Exception as e: + except APIError as e: logger.error(f"Error updating user usage: {e}") return False - + except DatabaseError as e: + logger.error(f"Unexpected error updating user usage: {e}") + return False + # Content Storage - def save_content(self, user_id: int, content_data: Dict[str, Any]) -> Optional[str]: + def save_content(self, user_id: int, content_data: dict[str, Any]) -> str | None: """Save generated content to database""" try: content_record = { @@ -130,55 +150,76 @@ def save_content(self, user_id: int, content_data: Dict[str, Any]) -> Optional[s "article": content_data.get("article", ""), "metadata": content_data.get("metadata", {}), "created_at": datetime.now().isoformat(), - "updated_at": datetime.now().isoformat() + "updated_at": datetime.now().isoformat(), } - + result = self.client.table("content").insert(content_record).execute() - + if result.data: content_id = result.data[0]["id"] logger.info(f"Content saved successfully with ID: {content_id}") return content_id return None - except Exception as e: + except APIError as e: logger.error(f"Error saving content: {e}") return None - - def get_user_content(self, user_id: int, limit: int = 50) -> List[Dict[str, Any]]: + except DatabaseError as e: + logger.error(f"Unexpected error saving content: {e}") + return None + + def get_user_content(self, user_id: int, limit: int = DEFAULT_CONTENT_QUERY_LIMIT) -> list[dict[str, Any]]: """Get user's content history""" try: - result = self.client.table("content").select("*").eq("user_id", user_id).order("created_at", desc=True).limit(limit).execute() + result = ( + self.client.table("content") + .select("*") + .eq("user_id", user_id) + .order("created_at", desc=True) + .limit(limit) + .execute() + ) return result.data or [] - except Exception as e: + except APIError as e: logger.error(f"Error fetching user content: {e}") return [] - + except DatabaseError as e: + logger.error(f"Unexpected error fetching user content: {e}") + return [] + # API Key Management - def save_user_api_keys(self, user_id: int, api_keys: Dict[str, str]) -> bool: + def save_user_api_keys(self, user_id: int, api_keys: dict[str, str]) -> bool: """Save encrypted API keys for user""" try: # In production, encrypt the API keys before storing - result = self.client.table("users").update({ - "api_keys": api_keys, - "updated_at": datetime.now().isoformat() - }).eq("id", user_id).execute() - + result = ( + self.client.table("users") + .update({"api_keys": api_keys, "updated_at": datetime.now().isoformat()}) + .eq("id", user_id) + .execute() + ) + return bool(result.data) - except Exception as e: + except APIError as e: logger.error(f"Error saving API keys: {e}") return False - - def get_user_api_keys(self, user_id: int) -> Dict[str, str]: + except DatabaseError as e: + logger.error(f"Unexpected error saving API keys: {e}") + return False + + def get_user_api_keys(self, user_id: int) -> dict[str, str]: """Get user's API keys""" try: result = self.client.table("users").select("api_keys").eq("id", user_id).execute() if result.data and result.data[0]["api_keys"]: return result.data[0]["api_keys"] return {} - except Exception as e: + except APIError as e: logger.error(f"Error fetching API keys: {e}") return {} - + except DatabaseError as e: + logger.error(f"Unexpected error fetching API keys: {e}") + return {} + # Knowledge Base Management def save_knowledge_base_file(self, user_id: int, filename: str, content: str) -> bool: """Save knowledge base file for user""" @@ -188,43 +229,58 @@ def save_knowledge_base_file(self, user_id: int, filename: str, content: str) -> "filename": filename, "content": content, "created_at": datetime.now().isoformat(), - "updated_at": datetime.now().isoformat() + "updated_at": datetime.now().isoformat(), } - + # Check if file already exists for this user - existing = self.client.table("knowledge_base").select("id").eq("user_id", user_id).eq("filename", filename).execute() - + existing = ( + self.client.table("knowledge_base") + .select("id") + .eq("user_id", user_id) + .eq("filename", filename) + .execute() + ) + if existing.data: # Update existing - result = self.client.table("knowledge_base").update({ - "content": content, - "updated_at": datetime.now().isoformat() - }).eq("user_id", user_id).eq("filename", filename).execute() + result = ( + self.client.table("knowledge_base") + .update({"content": content, "updated_at": datetime.now().isoformat()}) + .eq("user_id", user_id) + .eq("filename", filename) + .execute() + ) else: # Create new result = self.client.table("knowledge_base").insert(kb_record).execute() - + return bool(result.data) - except Exception as e: + except APIError as e: logger.error(f"Error saving knowledge base file: {e}") return False - - def get_user_knowledge_base(self, user_id: int) -> Dict[str, str]: + except DatabaseError as e: + logger.error(f"Unexpected error saving knowledge base file: {e}") + return False + + def get_user_knowledge_base(self, user_id: int) -> dict[str, str]: """Get user's knowledge base files""" try: result = self.client.table("knowledge_base").select("filename, content").eq("user_id", user_id).execute() - + kb_dict = {} for item in result.data or []: # Convert filename to display name - name = item["filename"].replace('.txt', '').replace('.md', '').replace('_', ' ').title() + name = item["filename"].replace(".txt", "").replace(".md", "").replace("_", " ").title() kb_dict[name] = item["content"] - + return kb_dict - except Exception as e: + except APIError as e: logger.error(f"Error fetching knowledge base: {e}") return {} - + except DatabaseError as e: + logger.error(f"Unexpected error fetching knowledge base: {e}") + return {} + # Custom Prompts Management def save_custom_prompt(self, user_id: int, prompt_type: str, content: str) -> bool: """Save custom prompt for user""" @@ -234,43 +290,58 @@ def save_custom_prompt(self, user_id: int, prompt_type: str, content: str) -> bo "prompt_type": prompt_type, "content": content, "created_at": datetime.now().isoformat(), - "updated_at": datetime.now().isoformat() + "updated_at": datetime.now().isoformat(), } - + # Check if prompt already exists for this user - existing = self.client.table("custom_prompts").select("id").eq("user_id", user_id).eq("prompt_type", prompt_type).execute() - + existing = ( + self.client.table("custom_prompts") + .select("id") + .eq("user_id", user_id) + .eq("prompt_type", prompt_type) + .execute() + ) + if existing.data: # Update existing - result = self.client.table("custom_prompts").update({ - "content": content, - "updated_at": datetime.now().isoformat() - }).eq("user_id", user_id).eq("prompt_type", prompt_type).execute() + result = ( + self.client.table("custom_prompts") + .update({"content": content, "updated_at": datetime.now().isoformat()}) + .eq("user_id", user_id) + .eq("prompt_type", prompt_type) + .execute() + ) else: # Create new result = self.client.table("custom_prompts").insert(prompt_record).execute() - + return bool(result.data) - except Exception as e: + except APIError as e: logger.error(f"Error saving custom prompt: {e}") return False - - def get_user_prompts(self, user_id: int) -> Dict[str, str]: + except DatabaseError as e: + logger.error(f"Unexpected error saving custom prompt: {e}") + return False + + def get_user_prompts(self, user_id: int) -> dict[str, str]: """Get user's custom prompts""" try: result = self.client.table("custom_prompts").select("prompt_type, content").eq("user_id", user_id).execute() - + prompts_dict = {} for item in result.data or []: prompts_dict[item["prompt_type"]] = item["content"] - + return prompts_dict - except Exception as e: + except APIError as e: logger.error(f"Error fetching custom prompts: {e}") return {} - + except DatabaseError as e: + logger.error(f"Unexpected error fetching custom prompts: {e}") + return {} + # Analytics and Monitoring - def log_pipeline_execution(self, user_id: int, pipeline_data: Dict[str, Any]) -> bool: + def log_pipeline_execution(self, user_id: int, pipeline_data: dict[str, Any]) -> bool: """Log pipeline execution for analytics""" try: log_record = { @@ -280,125 +351,66 @@ def log_pipeline_execution(self, user_id: int, pipeline_data: Dict[str, Any]) -> "ai_provider": pipeline_data.get("ai_provider", "unknown"), "model": pipeline_data.get("model", "unknown"), "success": pipeline_data.get("success", False), - "error_message": pipeline_data.get("error", None), + "error_message": pipeline_data.get("error"), "metadata": pipeline_data.get("metadata", {}), - "created_at": datetime.now().isoformat() + "created_at": datetime.now().isoformat(), } - + result = self.client.table("pipeline_logs").insert(log_record).execute() return bool(result.data) - except Exception as e: + except APIError as e: logger.error(f"Error logging pipeline execution: {e}") return False - - def get_user_analytics(self, user_id: int, days: int = 30) -> Dict[str, Any]: + except DatabaseError as e: + logger.error(f"Unexpected error logging pipeline execution: {e}") + return False + + def get_user_analytics(self, user_id: int, days: int = DEFAULT_ANALYTICS_DAYS) -> dict[str, Any]: """Get user analytics for the last N days""" try: start_date = (datetime.now() - timedelta(days=days)).isoformat() - - result = self.client.table("pipeline_logs").select("*").eq("user_id", user_id).gte("created_at", start_date).execute() - + + result = ( + self.client.table("pipeline_logs") + .select("*") + .eq("user_id", user_id) + .gte("created_at", start_date) + .execute() + ) + logs = result.data or [] - + analytics = { "total_executions": len(logs), "successful_executions": len([log for log in logs if log["success"]]), "total_duration": sum(log["duration_seconds"] for log in logs), "ai_providers_used": list(set(log["ai_provider"] for log in logs)), "most_used_model": self._get_most_frequent(logs, "model"), - "average_duration": sum(log["duration_seconds"] for log in logs) / len(logs) if logs else 0 + "average_duration": sum(log["duration_seconds"] for log in logs) / len(logs) if logs else 0, } - + return analytics - except Exception as e: + except APIError as e: logger.error(f"Error fetching user analytics: {e}") return {} - - def _get_most_frequent(self, logs: List[Dict], field: str) -> str: + except DatabaseError as e: + logger.error(f"Unexpected error fetching user analytics: {e}") + return {} + + def _get_most_frequent(self, logs: list[dict], field: str) -> str: """Helper to get most frequent value from logs""" if not logs: return "unknown" - + from collections import Counter + values = [log.get(field, "unknown") for log in logs] return Counter(values).most_common(1)[0][0] -# MCP Integration Functions -class MCPSupabaseIntegration: - """ - Model Context Protocol integration for Supabase - Provides AI models with context about user data and preferences - """ - - def __init__(self, supabase_client: SupabaseClient): - self.db = supabase_client - - def get_user_context(self, user_id: int) -> Dict[str, Any]: - """Get comprehensive user context for AI models""" - try: - # Get user profile - user = self.db.get_user(user_id) - if not user: - return {} - - # Get user's knowledge base - knowledge_base = self.db.get_user_knowledge_base(user_id) - - # Get user's custom prompts - custom_prompts = self.db.get_user_prompts(user_id) - - # Get recent content history (for style learning) - recent_content = self.db.get_user_content(user_id, limit=10) - - # Get user analytics - analytics = self.db.get_user_analytics(user_id, days=30) - - context = { - "user_profile": { - "subscription_tier": user.get("subscription_tier", "free"), - "usage_quota": user.get("usage_quota", 60), - "usage_current": user.get("usage_current", 0), - "created_at": user.get("created_at") - }, - "knowledge_base": knowledge_base, - "custom_prompts": custom_prompts, - "content_history": recent_content, - "analytics": analytics, - "preferences": { - "preferred_ai_provider": analytics.get("ai_providers_used", ["openai"])[0] if analytics.get("ai_providers_used") else "openai", - "most_used_model": analytics.get("most_used_model", "gpt-3.5-turbo") - } - } - - return context - except Exception as e: - logger.error(f"Error getting user context for MCP: {e}") - return {} - - def update_context_from_interaction(self, user_id: int, interaction_data: Dict[str, Any]) -> bool: - """Update user context based on AI interaction results""" - try: - # Log the interaction - self.db.log_pipeline_execution(user_id, interaction_data) - - # Update usage if provided - if "duration_seconds" in interaction_data: - self.db.update_user_usage(user_id, interaction_data["duration_seconds"]) - - # Save generated content if provided - if "content" in interaction_data: - self.db.save_content(user_id, interaction_data["content"]) - - return True - except Exception as e: - logger.error(f"Error updating context from interaction: {e}") - return False - - # Global instance _supabase_client = None -_mcp_integration = None + def get_supabase_client() -> SupabaseClient: """Get or create Supabase client instance""" @@ -407,9 +419,13 @@ def get_supabase_client() -> SupabaseClient: _supabase_client = SupabaseClient() return _supabase_client -def get_mcp_integration() -> MCPSupabaseIntegration: - """Get or create MCP integration instance""" - global _mcp_integration - if _mcp_integration is None: - _mcp_integration = MCPSupabaseIntegration(get_supabase_client()) - return _mcp_integration \ No newline at end of file + +# Re-exports for backward compatibility (extracted to core.mcp_integration) +from .mcp_integration import MCPSupabaseIntegration, get_mcp_integration # noqa: E402 + +__all__ = [ + "SupabaseClient", + "get_supabase_client", + "MCPSupabaseIntegration", + "get_mcp_integration", +] diff --git a/core/utils.py b/core/utils.py index 0ed9843..35429f3 100644 --- a/core/utils.py +++ b/core/utils.py @@ -1,150 +1,100 @@ -""" -Shared utilities for WhisperForge -Contains functions that are shared between the original app and Supabase version +"""Backward-compatible re-export shim for ``core.utils``. + +.. deprecated:: + This module is **deprecated** and exists only so that existing import + sites (e.g. ``from .utils import hash_password``) continue to work + without modification. All public symbols have been relocated to + focused modules: + + * **Security** (``core.security``): ``hash_password``, + ``verify_password``, ``legacy_hash_password`` + * **Path safety** (``core.path_safety``): ``safe_path`` + * **Prompts** (``core.prompts``): ``DEFAULT_PROMPTS``, + ``load_prompt_from_file``, ``format_knowledge_base_context``, + ``get_enhanced_prompt`` + * **API clients** (``core.api_clients``): ``get_openai_client``, + ``get_anthropic_client``, ``get_grok_api_key``, ``get_grok_client`` + + New code should import directly from those modules. """ -import hashlib -import bcrypt -import os -import time import logging -import requests -from typing import Dict, Optional, Any -from pathlib import Path +import warnings + +# ---- Re-exports from core.api_clients --------------------------------- +from .api_clients import ( + get_anthropic_client, + get_grok_api_key, + get_grok_client, + get_openai_client, +) + +# ---- Re-exports from core.path_safety --------------------------------- +from .path_safety import safe_path + +# ---- Re-exports from core.prompts ------------------------------------- +from .prompts import ( + DEFAULT_PROMPTS, + format_knowledge_base_context, + get_enhanced_prompt, + load_prompt_from_file, +) + +# ---- Re-exports from core.security ------------------------------------ +from .security import hash_password, legacy_hash_password, verify_password logger = logging.getLogger(__name__) -def hash_password(password: str) -> str: - """Hash a password using bcrypt with salt""" - # Generate salt and hash password - salt = bcrypt.gensalt() - hashed = bcrypt.hashpw(password.encode('utf-8'), salt) - return hashed.decode('utf-8') - -def verify_password(password: str, hashed: str) -> bool: - """Verify a password against its hash""" - try: - return bcrypt.checkpw(password.encode('utf-8'), hashed.encode('utf-8')) - except Exception as e: - logger.error(f"Password verification error: {e}") - return False - -# Legacy SHA-256 hash function for migration purposes -def legacy_hash_password(password: str) -> str: - """Legacy SHA-256 hash - DEPRECATED, use for migration only""" - return hashlib.sha256(password.encode()).hexdigest() - -# Default prompts for content generation (DEPRECATED - use load_prompt_from_file) -DEFAULT_PROMPTS = { - "wisdom_extraction": """Extract key insights, lessons, and wisdom from the transcript. Focus on actionable takeaways and profound realizations.""", - "summary": """## Summary -Create a concise summary of the main points and key messages in the transcript. -Capture the essence of the content in a few paragraphs.""", - "outline_creation": """Create a detailed outline for an article or blog post based on the transcript and extracted wisdom. Include major sections and subsections.""", - "social_media": """Generate engaging social media posts for different platforms (Twitter, LinkedIn, Instagram) based on the key insights.""", - "image_prompts": """Create detailed image generation prompts that visualize the key concepts and metaphors from the content.""", - "article_writing": """Write a comprehensive article based on the provided outline and wisdom. Maintain a clear narrative flow and engaging style.""", - "seo_analysis": """Analyze the content from an SEO perspective and provide optimization recommendations for better search visibility while maintaining content quality.""", - "editor_persona": """You are a professional content editor. Provide constructive feedback to improve the content quality.""" -} - -def load_prompt_from_file(prompt_type: str, user_id: str = None) -> str: - """Load prompt from markdown file with user override support""" - try: - # Check for user-specific prompt first (for paid tiers) - if user_id: - user_prompt_path = Path(f"prompts/users/{user_id}/{prompt_type}.md") - if user_prompt_path.exists(): - return user_prompt_path.read_text(encoding='utf-8').strip() - - # Load default prompt - default_prompt_path = Path(f"prompts/default/{prompt_type}.md") - if default_prompt_path.exists(): - return default_prompt_path.read_text(encoding='utf-8').strip() - - # Fallback to hardcoded prompts - fallback = DEFAULT_PROMPTS.get(prompt_type, "") - if fallback: - logger.warning(f"Using fallback prompt for {prompt_type} - consider creating markdown file") - return fallback - - logger.error(f"No prompt found for type: {prompt_type}") - return f"Please provide content for {prompt_type.replace('_', ' ')}." - - except Exception as e: - logger.error(f"Error loading prompt {prompt_type}: {e}") - return DEFAULT_PROMPTS.get(prompt_type, f"Error loading {prompt_type} prompt.") - -def format_knowledge_base_context(knowledge_base: Dict[str, str]) -> str: - """Format knowledge base content for auto-concatenation to prompts""" - if not knowledge_base: - return "" - - context_parts = ["## Knowledge Base Context\n"] - context_parts.append("Use the following knowledge base to inform your analysis and maintain consistency with established perspectives:\n") - - for name, content in knowledge_base.items(): - context_parts.append(f"### {name}") - context_parts.append(content) - context_parts.append("") # Empty line for separation - - context_parts.append("---\n") - context_parts.append("## Your Task\n") - - return "\n".join(context_parts) - -def get_enhanced_prompt(prompt_type: str, knowledge_base: Dict[str, str] = None, user_id: str = None) -> str: - """Get prompt with automatic knowledge base concatenation""" - base_prompt = load_prompt_from_file(prompt_type, user_id) - - if knowledge_base: - kb_context = format_knowledge_base_context(knowledge_base) - return f"{kb_context}{base_prompt}" - - return base_prompt - -def get_openai_client(): - """Get OpenAI client with API key""" - try: - import openai - api_key = os.getenv("OPENAI_API_KEY") - if not api_key: - return None - - client = openai.OpenAI(api_key=api_key) - return client - except ImportError: - logger.error("OpenAI package not installed") - return None - except Exception as e: - logger.error(f"Error initializing OpenAI client: {e}") - return None - -def get_anthropic_client(): - """Get Anthropic client with API key""" - try: - import anthropic - api_key = os.getenv("ANTHROPIC_API_KEY") - if not api_key: - return None - - client = anthropic.Anthropic(api_key=api_key) - return client - except ImportError: - logger.error("Anthropic package not installed") - return None - except Exception as e: - logger.error(f"Error initializing Anthropic client: {e}") - return None - -def get_grok_api_key(): - """Get Grok API key""" - return os.getenv("GROK_API_KEY") + +# ---- Deprecated helpers kept inline ------------------------------------ + def update_usage_tracking(duration_seconds: float): - """Placeholder for usage tracking - implement as needed""" + """Placeholder for usage tracking - implement as needed. + + .. deprecated:: + ``update_usage_tracking`` will be removed in a future release. + """ + warnings.warn( + "update_usage_tracking() is deprecated and will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) logger.info(f"Usage tracked: {duration_seconds} seconds") -def get_prompt(prompt_type: str, prompts: Dict[str, str], default_prompts: Dict[str, str]) -> str: - """Get prompt from user prompts or defaults (DEPRECATED - use get_enhanced_prompt)""" - return prompts.get(prompt_type, default_prompts.get(prompt_type, "")) \ No newline at end of file + +def get_prompt(prompt_type: str, prompts: dict[str, str], default_prompts: dict[str, str]) -> str: + """Get prompt from user prompts or defaults. + + .. deprecated:: + Use :func:`core.prompts.get_enhanced_prompt` instead. + """ + warnings.warn( + "get_prompt() is deprecated. Use get_enhanced_prompt() from core.prompts instead.", + DeprecationWarning, + stacklevel=2, + ) + return prompts.get(prompt_type, default_prompts.get(prompt_type, "")) + + +__all__ = [ + # core.security + "hash_password", + "verify_password", + "legacy_hash_password", + # core.path_safety + "safe_path", + # core.prompts + "DEFAULT_PROMPTS", + "load_prompt_from_file", + "format_knowledge_base_context", + "get_enhanced_prompt", + # core.api_clients + "get_openai_client", + "get_anthropic_client", + "get_grok_api_key", + "get_grok_client", + # deprecated inline helpers + "update_usage_tracking", + "get_prompt", +] diff --git a/core/visible_thinking.py b/core/visible_thinking.py index 4d32132..324c24e 100644 --- a/core/visible_thinking.py +++ b/core/visible_thinking.py @@ -5,30 +5,32 @@ import logging -import streamlit as st - logger = logging.getLogger(__name__) + def thinking_step_start(step_name: str, context: str = ""): """Start a thinking step - minimal implementation""" logger.info(f"Starting step: {step_name}") # Could add visual indicators here in the future pass + def thinking_step_complete(step_name: str, result_info: str = ""): """Complete a thinking step - minimal implementation""" logger.info(f"Completed step: {step_name}") # Could add completion indicators here in the future pass + def thinking_error(step_name: str, error_msg: str): """Handle thinking step error - minimal implementation""" logger.error(f"Error in step {step_name}: {error_msg}") # Could add error indicators here in the future pass + def render_thinking_stream(): """Render thinking stream - minimal implementation""" # This function is imported but not used in current implementation # Could add visual thinking display here in the future - pass \ No newline at end of file + pass diff --git a/create_missing_tables.py b/create_missing_tables.py index cb8280e..5deaa91 100644 --- a/create_missing_tables.py +++ b/create_missing_tables.py @@ -4,8 +4,10 @@ """ import os + from core.supabase_integration import get_supabase_client + def create_prompts_table(client): """Create the prompts table""" sql = """ @@ -18,22 +20,23 @@ def create_prompts_table(client): updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), UNIQUE(user_id, prompt_type) ); - + -- Add RLS policies ALTER TABLE prompts ENABLE ROW LEVEL SECURITY; - + CREATE POLICY "Users can manage their own prompts" ON prompts FOR ALL USING (auth.uid()::text = user_id::text); """ - + try: - result = client.client.rpc('exec_sql', {'sql': sql}).execute() + client.client.rpc("exec_sql", {"sql": sql}).execute() print("โœ… Prompts table created successfully") return True except Exception as e: print(f"โŒ Failed to create prompts table: {e}") return False + def create_api_keys_table(client): """Create the api_keys table""" sql = """ @@ -46,61 +49,69 @@ def create_api_keys_table(client): updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), UNIQUE(user_id, key_name) ); - + -- Add RLS policies ALTER TABLE api_keys ENABLE ROW LEVEL SECURITY; - + CREATE POLICY "Users can manage their own API keys" ON api_keys FOR ALL USING (auth.uid()::text = user_id::text); """ - + try: - result = client.client.rpc('exec_sql', {'sql': sql}).execute() + client.client.rpc("exec_sql", {"sql": sql}).execute() print("โœ… API keys table created successfully") return True except Exception as e: print(f"โŒ Failed to create api_keys table: {e}") return False + def test_table_creation(client): """Test that tables were created and are accessible""" - + # Test prompts table try: - result = client.client.table('prompts').select('*').limit(1).execute() + client.client.table("prompts").select("*").limit(1).execute() print("โœ… Prompts table accessible") except Exception as e: print(f"โŒ Prompts table test failed: {e}") - + # Test api_keys table try: - result = client.client.table('api_keys').select('*').limit(1).execute() + client.client.table("api_keys").select("*").limit(1).execute() print("โœ… API keys table accessible") except Exception as e: print(f"โŒ API keys table test failed: {e}") + def main(): - # Set environment variables - os.environ['SUPABASE_URL'] = 'https://utyjhedtqaagihuogyuy.supabase.co' - os.environ['SUPABASE_ANON_KEY'] = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InV0eWpoZWR0cWFhZ2lodW9neXV5Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDkzMjEyMDUsImV4cCI6MjA2NDg5NzIwNX0.vpRRn7anpmCokYcje5yJr3r2iC_8s11_LXQcCTgxtR8' - - print("๐Ÿ”ง Creating Missing Database Tables") + from dotenv import load_dotenv + + load_dotenv() + + if not os.getenv("SUPABASE_URL") or not os.getenv("SUPABASE_ANON_KEY"): + print("Missing SUPABASE_URL or SUPABASE_ANON_KEY environment variables.") + print("Set them in your .env file or export them before running this script.") + return + + print("Creating Missing Database Tables") print("=" * 50) - + client = get_supabase_client() if not client: print("โŒ Failed to initialize Supabase client") return - + print("โœ… Supabase client initialized") - + # Create missing tables print("\n๐Ÿ“‹ Creating Tables:") create_prompts_table(client) create_api_keys_table(client) - + print("\n๐Ÿงช Testing Table Access:") test_table_creation(client) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/deploy_fixes.py b/deploy_fixes.py index bb70eb1..9cdcca6 100644 --- a/deploy_fixes.py +++ b/deploy_fixes.py @@ -4,38 +4,35 @@ Verifies all systems are ready for production deployment """ -import os -import sys from datetime import datetime + def verify_database_tables(): """Verify all required database tables exist""" print("๐Ÿ” Verifying Database Tables") print("=" * 40) - - required_tables = [ - "users", "content", "api_keys", "prompts", - "knowledge_base", "pipeline_logs" - ] - + + required_tables = ["users", "content", "api_keys", "prompts", "knowledge_base", "pipeline_logs"] + try: from core.supabase_integration import get_supabase_client + client = get_supabase_client() - + if not client: print("โŒ Failed to connect to Supabase") return False - + missing_tables = [] for table in required_tables: try: # Test table access - result = client.client.table(table).select("*").limit(1).execute() + client.client.table(table).select("*").limit(1).execute() print(f"โœ… {table}") except Exception: print(f"โŒ {table} - MISSING") missing_tables.append(table) - + if missing_tables: print(f"\n๐Ÿšจ Missing tables: {', '.join(missing_tables)}") print("Execute the SQL commands from SYSTEM_ANALYSIS_REPORT.md") @@ -43,29 +40,28 @@ def verify_database_tables(): else: print("โœ… All required tables exist!") return True - + except Exception as e: print(f"โŒ Database verification failed: {e}") return False + def test_core_functionality(): """Test core application functionality""" print("\n๐Ÿงช Testing Core Functionality") print("=" * 40) - + try: # Test imports - from core.content_generation import transcribe_audio, generate_wisdom - from core.file_upload import FileUploadManager, EnhancedLargeFileProcessor - from core.utils import DEFAULT_PROMPTS, load_prompt_from_file from core.supabase_integration import get_supabase_client - + from core.utils import DEFAULT_PROMPTS + print("โœ… Core imports successful") - + # Test prompt system prompts = DEFAULT_PROMPTS print(f"โœ… Prompt system: {len(prompts)} default + file-based prompts") - + # Test database connection client = get_supabase_client() if client: @@ -73,34 +69,34 @@ def test_core_functionality(): else: print("โŒ Database connection failed") return False - + return True - + except Exception as e: print(f"โŒ Core functionality test failed: {e}") return False + def main(): """Main deployment verification""" print("๐Ÿš€ WHISPERFORGE DEPLOYMENT VERIFICATION") print("=" * 60) print(f"๐Ÿ“… {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print() - + # Run verification tests db_ok = verify_database_tables() core_ok = test_core_functionality() - + # Final assessment - print(f"\n๐Ÿ“Š DEPLOYMENT READINESS") + print("\n๐Ÿ“Š DEPLOYMENT READINESS") print("=" * 60) - + if db_ok and core_ok: - print("๐ŸŽ‰ โœ… READY FOR DEPLOYMENT!") - print("\n๐Ÿš€ Start your app with:") + print("READY FOR DEPLOYMENT!") + print("\nStart your app with:") print("source venv/bin/activate") - print("export SUPABASE_URL='https://utyjhedtqaagihuogyuy.supabase.co'") - print("export SUPABASE_ANON_KEY='eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InV0eWpoZWR0cWFhZ2lodW9neXV5Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDkzMjEyMDUsImV4cCI6MjA2NDg5NzIwNX0.vp'") + print("# Ensure SUPABASE_URL and SUPABASE_ANON_KEY are set in your .env file") print("streamlit run app_simple.py --server.headless true --server.port 8502") else: print("โŒ NOT READY - Fix issues above first") @@ -110,5 +106,6 @@ def main(): print("2. Re-run this verification script") print("3. Deploy when all checks pass") + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/docs/CRITICAL_FIXES_REPORT_v2.8.0.md b/docs/CRITICAL_FIXES_REPORT_v2.8.0.md index 5f9acdd..3f20dec 100644 --- a/docs/CRITICAL_FIXES_REPORT_v2.8.0.md +++ b/docs/CRITICAL_FIXES_REPORT_v2.8.0.md @@ -1,8 +1,8 @@ # WhisperForge v2.8.0 Critical Fixes Report -**Date:** December 12, 2024 -**Version:** 2.8.0 -**Status:** โœ… All Critical Issues Resolved +**Date:** December 12, 2024 +**Version:** 2.8.0 +**Status:** โœ… All Critical Issues Resolved ## ๐ŸŽฏ Executive Summary @@ -35,7 +35,7 @@ Following the successful implementation of WhisperForge v2.8.0 with revolutionar - **Problem**: Multiple documentation files referenced deprecated `app.py` - **Impact**: User confusion, incorrect setup instructions - **Solution**: Updated all references to use `app_simple.py` -- **Files Changed**: +- **Files Changed**: - `README.md` - Complete rewrite with v2.8.0 features - `deploy_fixes.py` - Updated deployment instructions - `.devcontainer/devcontainer.json` - Fixed development container config @@ -45,7 +45,7 @@ Following the successful implementation of WhisperForge v2.8.0 with revolutionar - **Problem**: Mixed import styles across core modules - **Impact**: Code maintainability and readability issues - **Solution**: Standardized to PEP 8 compliant import patterns -- **Files Changed**: +- **Files Changed**: - `core/content_generation.py` - `core/file_upload.py` - `core/visible_thinking.py` @@ -91,4 +91,4 @@ All **5 critical issues** have been successfully resolved, resulting in a produc --- -**WhisperForge v2.8.0** - Production Ready with Revolutionary Large File Processing ๐ŸŒŒ \ No newline at end of file +**WhisperForge v2.8.0** - Production Ready with Revolutionary Large File Processing ๐ŸŒŒ diff --git a/docs/LARGE_FILE_PROCESSING_v2.8.0.md b/docs/LARGE_FILE_PROCESSING_v2.8.0.md index 9ac328d..7859e33 100644 --- a/docs/LARGE_FILE_PROCESSING_v2.8.0.md +++ b/docs/LARGE_FILE_PROCESSING_v2.8.0.md @@ -39,7 +39,7 @@ WhisperForge v2.8.0 introduces revolutionary large file processing capabilities, ```python class EnhancedLargeFileProcessor: """๐Ÿš€ Enhanced Large File Processor with FFmpeg for 2GB+ files""" - + def __init__(self): self.max_file_size = 2 * 1024 * 1024 * 1024 # 2GB self.chunk_duration_minutes = 10 # 10-minute chunks @@ -96,7 +96,7 @@ def _transcribe_chunks_parallel_ffmpeg(self, chunks: List[Dict]) -> Dict[str, An ```python upload_method = st.radio( "Choose upload method:", - ["๐ŸŽต Standard Upload (up to 25MB)", + ["๐ŸŽต Standard Upload (up to 25MB)", "๐Ÿš€ Enhanced Large File Upload (up to 2GB)"] ) ``` @@ -256,7 +256,7 @@ The combination of FFmpeg integration, parallel processing, and intelligent chun --- -**Ready for Production** โœ… -**Fully Tested** โœ… -**Backward Compatible** โœ… -**Auto-Deployable** โœ… \ No newline at end of file +**Ready for Production** โœ… +**Fully Tested** โœ… +**Backward Compatible** โœ… +**Auto-Deployable** โœ… diff --git a/docs/monitoring.md b/docs/monitoring.md index 44a9055..4904510 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -370,4 +370,4 @@ External Services: **Last Updated**: $(date) **Version**: 2.0 -**Maintained By**: WhisperForge Engineering Team \ No newline at end of file +**Maintained By**: WhisperForge Engineering Team diff --git a/env.example b/env.example index a2f3b88..31c05b8 100644 --- a/env.example +++ b/env.example @@ -25,4 +25,4 @@ CHUNK_SIZE_MB=20 # Database Settings DB_POOL_SIZE=10 -DB_TIMEOUT=30 \ No newline at end of file +DB_TIMEOUT=30 diff --git a/experiments/README.md b/experiments/README.md index e584431..d0b623a 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -11,4 +11,3 @@ This directory contains experimental features and enhancements for WhisperForge ## Current Experiments (None yet) - diff --git a/main.py b/main.py new file mode 100644 index 0000000..96ece53 --- /dev/null +++ b/main.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +"""WhisperForge launcher - sets up the environment if needed and starts the app.""" + +import os +import subprocess +import sys + +PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) +VENV_DIR = os.path.join(PROJECT_DIR, ".venv") +SETUP_SCRIPT = os.path.join(PROJECT_DIR, "setup.py") +APP_SCRIPT = os.path.join(PROJECT_DIR, "app_simple.py") + +if sys.platform == "win32": + PYTHON = os.path.join(VENV_DIR, "Scripts", "python") + STREAMLIT = os.path.join(VENV_DIR, "Scripts", "streamlit") +else: + PYTHON = os.path.join(VENV_DIR, "bin", "python") + STREAMLIT = os.path.join(VENV_DIR, "bin", "streamlit") + + +def venv_ready(): + """Check that the venv exists and streamlit is installed.""" + if not os.path.exists(PYTHON): + return False + result = subprocess.run( + [PYTHON, "-c", "import streamlit"], + capture_output=True, + ) + return result.returncode == 0 + + +def run_setup(dev=False): + """Run setup.py using the system Python to create the venv.""" + print("[whisperforge] Virtual environment not found or incomplete.") + print("[whisperforge] Running first-time setup...\n") + cmd = [sys.executable, SETUP_SCRIPT] + if dev: + cmd.append("--dev") + subprocess.check_call(cmd) + print() + + +def start_app(extra_args): + """Launch the Streamlit app inside the venv.""" + cmd = [STREAMLIT, "run", APP_SCRIPT] + extra_args + print("[whisperforge] Starting WhisperForge...") + print(f"[whisperforge] Command: {' '.join(cmd)}") + print("-" * 50) + sys.exit(subprocess.run(cmd).returncode) + + +def main(): + args = sys.argv[1:] + + # Handle explicit flags + dev = "--dev" in args + + if "--setup" in args: + run_setup(dev=dev) + return + + if "--help" in args or "-h" in args: + print( + "WhisperForge Launcher\n" + "\n" + "Usage: python main.py [options] [-- streamlit args]\n" + "\n" + "Options:\n" + " --setup Force re-create the virtual environment\n" + " --dev Include dev dependencies (testing, linting, pre-commit)\n" + " --help Show this help message\n" + "\n" + "Any arguments after -- are forwarded to streamlit.\n" + "\n" + "Examples:\n" + " python main.py Start the app\n" + " python main.py --setup Re-create the venv\n" + " python main.py --setup --dev Re-create venv with dev deps\n" + " python main.py -- --server.port 8080 Start on a custom port" + ) + return + + # Split off streamlit args after -- + st_args = [] + if "--" in args: + idx = args.index("--") + st_args = args[idx + 1 :] + + # Ensure environment is ready + if not venv_ready(): + run_setup(dev=dev) + # Re-check after setup + if not venv_ready(): + print("[whisperforge] Setup finished but environment still looks broken.") + print("[whisperforge] Try running: python setup.py") + sys.exit(1) + else: + print("[whisperforge] Virtual environment OK.") + + start_app(st_args) + + +if __name__ == "__main__": + main() diff --git a/monitoring/grafana_dashboard.json b/monitoring/grafana_dashboard.json index a31489f..6d7d202 100644 --- a/monitoring/grafana_dashboard.json +++ b/monitoring/grafana_dashboard.json @@ -290,4 +290,4 @@ ] } } -} \ No newline at end of file +} diff --git a/prompts/default/article_generation.md b/prompts/default/article_generation.md index f389f94..bea8cb4 100644 --- a/prompts/default/article_generation.md +++ b/prompts/default/article_generation.md @@ -49,4 +49,4 @@ You are a professional content writer and editor with expertise in creating enga - **Professional polish** suitable for publication - **Value-driven content** that provides genuine insights -Create an article that transforms the raw content into a polished, engaging piece that readers will find valuable and want to share. \ No newline at end of file +Create an article that transforms the raw content into a polished, engaging piece that readers will find valuable and want to share. diff --git a/prompts/default/knowledge_base/ca.md b/prompts/default/knowledge_base/ca.md index 0614ab2..213aa64 100644 --- a/prompts/default/knowledge_base/ca.md +++ b/prompts/default/knowledge_base/ca.md @@ -35,4 +35,4 @@ TOPICS TO EXPLORE: - Future-proofing creative practices - Digital identity and authentic expression -Each post should feel like it came directly from Kris's digital consciousness - provocative, insightful, and distinctly human despite discussing cutting-edge technology. Make them feel like field notes from someone living slightly ahead of our timeline, bringing back wisdom from the near-future. \ No newline at end of file +Each post should feel like it came directly from Kris's digital consciousness - provocative, insightful, and distinctly human despite discussing cutting-edge technology. Make them feel like field notes from someone living slightly ahead of our timeline, bringing back wisdom from the near-future. diff --git a/prompts/default/outline_creation.md b/prompts/default/outline_creation.md index 269a1f7..4e65644 100644 --- a/prompts/default/outline_creation.md +++ b/prompts/default/outline_creation.md @@ -1 +1 @@ -OK write in chinese. \ No newline at end of file +OK write in chinese. diff --git a/prompts/default/social_media.md b/prompts/default/social_media.md index 18a5f74..1f439a4 100644 --- a/prompts/default/social_media.md +++ b/prompts/default/social_media.md @@ -5,7 +5,7 @@ You are a social media content strategist and copywriter specializing in creatin ## Context You will be provided with: - Extracted wisdom and key insights from audio content -- A structured content outline +- A structured content outline - A full article based on the content - Relevant knowledge base information (if available) @@ -98,4 +98,4 @@ Frame 4: [Call-to-action frame] [Value-driven content with video potential reference] ``` -Remember: Great social media content doesn't just share informationโ€”it starts conversations, builds communities, and provides genuine value that makes people want to engage and share with others. \ No newline at end of file +Remember: Great social media content doesn't just share informationโ€”it starts conversations, builds communities, and provides genuine value that makes people want to engage and share with others. diff --git a/prompts/default/wisdom_extraction.md b/prompts/default/wisdom_extraction.md index c3d80ea..41ce68c 100644 --- a/prompts/default/wisdom_extraction.md +++ b/prompts/default/wisdom_extraction.md @@ -52,4 +52,4 @@ Do not start items with the same opening words. Ensure you follow ALL these instructions when creating your output. INPUT -INPUT: \ No newline at end of file +INPUT: diff --git a/prompts/default/wisdom_prompt.md b/prompts/default/wisdom_prompt.md index ee03a34..aad1dee 100644 --- a/prompts/default/wisdom_prompt.md +++ b/prompts/default/wisdom_prompt.md @@ -1 +1 @@ -write in chinese \ No newline at end of file +write in chinese diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a449447 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,29 @@ +[tool.ruff] +line-length = 120 +target-version = "py311" + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "F", # pyflakes + "W", # pycodestyle warnings + "I", # isort + "UP", # pyupgrade + "B", # flake8-bugbear + "SIM", # flake8-simplify + "S", # flake8-bandit (security) +] +ignore = [ + "E501", # line length handled by formatter + "S101", # allow assert in tests + "S603", # allow subprocess without shell=True check (not relevant here) + "S607", # allow partial executable paths +] + +[tool.ruff.lint.per-file-ignores] +"tests/*" = ["S101"] +"scripts/*" = ["S101"] + +[tool.autopep8] +max_line_length = 120 +aggressive = 1 diff --git a/pytest.ini b/pytest.ini index 31ce6d4..fe71f42 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,11 +3,10 @@ testpaths = tests python_files = test_*.py python_classes = Test* python_functions = test_* -addopts = +addopts = -v --tb=short --strict-markers - --disable-warnings --color=yes --durations=10 markers = @@ -17,5 +16,5 @@ markers = ai: marks tests that require AI API keys slow: marks tests as slow (deselect with '-m "not slow"') filterwarnings = - ignore::DeprecationWarning - ignore::PendingDeprecationWarning \ No newline at end of file + ignore::DeprecationWarning:streamlit.* + ignore::DeprecationWarning:pydub.* diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..87124d3 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,17 @@ +# Development dependencies - install with: pip install -r requirements-dev.txt +-r requirements.txt + +# Testing +pytest>=7.4.0,<9.0 +pytest-mock>=3.11.0,<4.0 +pytest-asyncio>=0.21.0,<1.0 +pytest-cov>=4.1.0,<6.0 + +# Test utilities +python-docx>=1.1.0,<2.0 +fpdf>=1.7.2,<2.0 + +# Linting & Pre-commit Hooks +ruff>=0.9.7,<1.0 +pre-commit>=4.0.0,<5.0 +autopep8>=2.3.0,<3.0 diff --git a/requirements.txt b/requirements.txt index e931345..56fa3d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,42 +1,24 @@ -# WhisperForge v3.0.0 Dependencies +# WhisperForge v3.1.0 Dependencies # Core Web Framework -streamlit>=1.28.0 +streamlit>=1.28.0,<2.0 # AI/ML Libraries -openai>=1.3.0 -anthropic>=0.7.0 +openai>=1.3.0,<3.0 +anthropic>=0.7.0,<1.0 # Audio Processing (for large file chunking) -pydub>=0.25.0 +pydub>=0.25.0,<1.0 # Database & Backend -supabase>=2.0.0 -python-dotenv>=1.0.0 - -# HTTP & Authentication -httpx>=0.24.0 -urllib3>=2.0.0 -requests>=2.31.0 -cryptography>=41.0.0 - -# File Processing -python-multipart>=0.0.6 -Pillow>=10.0.0 +supabase>=2.0.0,<3.0 +python-dotenv>=1.0.0,<2.0 # Security - Password hashing -bcrypt>=4.0.0 +bcrypt>=4.0.0,<5.0 # CLI Support (for whisperforge_cli.py) -click>=8.1.0 - -# JWT for shared/security.py (using python-jose) -python-jose>=3.3.0 +click>=8.1.0,<9.0 -# Testing Dependencies (optional - only for development) -pytest>=7.4.0 -pytest-mock>=3.11.0 -pytest-asyncio>=0.21.0 -pytest-cov>=4.1.0 -python-docx>=1.1.0 -fpdf>=1.7.2 +# JWT authentication +PyJWT>=2.8.0,<3.0 diff --git a/runtime.txt b/runtime.txt index 2000ca3..67ebc4e 100644 --- a/runtime.txt +++ b/runtime.txt @@ -1 +1 @@ -python-3.11 \ No newline at end of file +python-3.11 diff --git a/scripts/audit_project.py b/scripts/audit_project.py index 0d932cb..bc483ab 100644 --- a/scripts/audit_project.py +++ b/scripts/audit_project.py @@ -35,11 +35,12 @@ import sys from datetime import datetime from pathlib import Path -from typing import Any, Dict, List, Tuple +from typing import Any # Always use the project logger if available, but fall back to standard logging. try: from core.logging_config import logger as _wf_logger # type: ignore + SCRIPT_LOGGER = getattr(_wf_logger, "logger", _wf_logger) except Exception: SCRIPT_LOGGER = logging.getLogger(__name__) @@ -62,14 +63,13 @@ # Helper utilities # --------------------------------------------------------------------------- -def _run(cmd: List[str], cwd: Path | None = None) -> Tuple[int, str, str]: + +def _run(cmd: list[str], cwd: Path | None = None) -> tuple[int, str, str]: """Run a subprocess and capture its output. Returns (returncode, stdout, stderr). """ - SCRIPT_LOGGER.info( - "Executing command", extra={"trace_id": TRACE_ID, "cmd": " ".join(cmd)} - ) + SCRIPT_LOGGER.info("Executing command", extra={"trace_id": TRACE_ID, "cmd": " ".join(cmd)}) try: proc = subprocess.run( cmd, @@ -91,11 +91,12 @@ def _section(title: str, body: str) -> str: # Audit tasks # --------------------------------------------------------------------------- + def unit_tests() -> str: rc, out, err = _run([sys.executable, "-m", "pytest", "-q"]) status = "โœ…" if rc == 0 else "โŒ" details = err or out - return f"""**Status:** {status} + return f"""**Status:** {status} ``` {details} @@ -107,7 +108,7 @@ def health_endpoint() -> str: status_data = health_checker.get_health_status() status = "โœ…" if status_data.status == "healthy" else "โŒ" - return f"""**Status:** {status} + return f"""**Status:** {status} Payload: ```json @@ -122,14 +123,14 @@ def static_analysis() -> str: rc_m, out_m, err_m = _run(["mypy", "--strict", "core"]) body = "### Ruff\n" - body += f"""Exit code: {rc_r} + body += f"""Exit code: {rc_r} ``` {out_r or err_r} ```\n""" body += "### mypy\n" - body += f"""Exit code: {rc_m} + body += f"""Exit code: {rc_m} ``` {out_m or err_m} @@ -138,14 +139,14 @@ def static_analysis() -> str: def secret_scan() -> str: - matches: List[str] = [] + matches: list[str] = [] for file_path in ROOT.rglob("*.py"): try: content = file_path.read_text(encoding="utf-8") except UnicodeDecodeError: continue for m in SECRET_PATTERN.finditer(content): - snippet = content[max(0, m.start() - 20): m.end() + 20] + snippet = content[max(0, m.start() - 20) : m.end() + 20] matches.append(f"{file_path}: {snippet.strip()}") if not matches: @@ -158,7 +159,7 @@ def dependency_freshness() -> str: if rc != 0: return f"Failed to fetch outdated packages: {err}" try: - data: List[Dict[str, Any]] = json.loads(out) + data: list[dict[str, Any]] = json.loads(out) except json.JSONDecodeError: return "Could not parse pip output." if not data: @@ -182,9 +183,12 @@ def todo_tally() -> str: # Entry-point # --------------------------------------------------------------------------- + def main() -> None: parser = argparse.ArgumentParser(description="Run a WhisperForge audit and emit a Markdown report.") - parser.add_argument("--output", type=Path, default=Path(f"AUDIT_REPORT_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")) + parser.add_argument( + "--output", type=Path, default=Path(f"AUDIT_REPORT_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md") + ) args = parser.parse_args() report_parts = [REPORT_HEADER] @@ -198,11 +202,9 @@ def main() -> None: output_text = "\n".join(report_parts) args.output.write_text(output_text, encoding="utf-8") - SCRIPT_LOGGER.info( - "Audit completed", extra={"trace_id": TRACE_ID, "report": str(args.output)} - ) + SCRIPT_LOGGER.info("Audit completed", extra={"trace_id": TRACE_ID, "report": str(args.output)}) print(f"\n\nAudit report written to {args.output}") if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/cleanup_repo.sh b/scripts/cleanup_repo.sh deleted file mode 100755 index 1fc2454..0000000 --- a/scripts/cleanup_repo.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -# Repository cleanup script -# - Prunes stale remote tracking branches -# - Deletes local branches already merged into main -# - Removes ignored files like caches -set -e - -git fetch --all --prune - -for branch in $(git branch --merged | grep -v "^*" | grep -v main); do - git branch -d "$branch" -done - -git clean -fdX - -echo "Cleanup complete." diff --git a/scripts/integration_audit.py b/scripts/integration_audit.py index 866b4d6..7ffa80b 100644 --- a/scripts/integration_audit.py +++ b/scripts/integration_audit.py @@ -5,7 +5,6 @@ """ import sys -import os import traceback from pathlib import Path @@ -13,11 +12,13 @@ project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) + def audit_section(title: str): """Print audit section header""" - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"๐Ÿ” {title}") - print('='*60) + print("=" * 60) + def check_import(module_name: str, description: str = ""): """Check if a module can be imported""" @@ -29,204 +30,211 @@ def check_import(module_name: str, description: str = ""): print(f"โŒ {module_name} - {description}: {str(e)[:100]}") return False + def check_function(module_name: str, function_name: str, description: str = ""): """Check if a function exists in a module""" try: module = __import__(module_name, fromlist=[function_name]) - func = getattr(module, function_name) + getattr(module, function_name) print(f"โœ… {module_name}.{function_name} - {description}") return True except Exception as e: print(f"โŒ {module_name}.{function_name} - {description}: {str(e)[:100]}") return False + def check_class_method(module_name: str, class_name: str, method_name: str, description: str = ""): """Check if a class method exists""" try: module = __import__(module_name, fromlist=[class_name]) cls = getattr(module, class_name) - method = getattr(cls, method_name) + getattr(cls, method_name) print(f"โœ… {module_name}.{class_name}.{method_name} - {description}") return True except Exception as e: print(f"โŒ {module_name}.{class_name}.{method_name} - {description}: {str(e)[:100]}") return False + def main(): """Run comprehensive integration audit""" print("๐Ÿš€ WhisperForge Integration Audit") print("Verifying all components work together seamlessly...") - + # Track results results = { - 'core_imports': 0, - 'database_functions': 0, - 'ui_components': 0, - 'file_processing': 0, - 'content_generation': 0, - 'streaming_pipeline': 0, - 'authentication': 0 + "core_imports": 0, + "database_functions": 0, + "ui_components": 0, + "file_processing": 0, + "content_generation": 0, + "streaming_pipeline": 0, + "authentication": 0, } - + # 1. Core Module Imports audit_section("Core Module Imports") core_modules = [ - ('core.supabase_integration', 'Database integration'), - ('core.file_upload', 'File upload management'), - ('core.streaming_pipeline', 'Streaming pipeline'), - ('core.content_generation', 'Content generation'), - ('core.streaming_results', 'Results display'), - ('core.ui_components', 'UI components'), - ('core.styling', 'Aurora styling'), - ('core.logging_config', 'Enhanced logging'), - ('core.monitoring', 'System monitoring'), - ('core.notifications', 'User notifications'), - ('core.visible_thinking', 'Thinking display'), - ('core.research_enrichment', 'Research features'), - ('core.utils', 'Utility functions'), - ('core.config', 'Configuration'), - ('core.integrations', 'External integrations') + ("core.supabase_integration", "Database integration"), + ("core.file_upload", "File upload management"), + ("core.streaming_pipeline", "Streaming pipeline"), + ("core.content_generation", "Content generation"), + ("core.streaming_results", "Results display"), + ("core.ui_components", "UI components"), + ("core.styling", "Aurora styling"), + ("core.logging_config", "Enhanced logging"), + ("core.monitoring", "System monitoring"), + ("core.notifications", "User notifications"), + ("core.visible_thinking", "Thinking display"), + ("core.research_enrichment", "Research features"), + ("core.utils", "Utility functions"), + ("core.config", "Configuration"), + ("core.integrations", "External integrations"), ] - + for module, desc in core_modules: if check_import(module, desc): - results['core_imports'] += 1 - + results["core_imports"] += 1 + # 2. Database Functions audit_section("Database Integration") db_functions = [ - ('core.supabase_integration', 'get_supabase_client', 'Get database client'), - ('core.supabase_integration', 'SupabaseClient', 'Database client class'), - ('app', 'init_supabase', 'Initialize database'), - ('app', 'authenticate_user', 'User authentication'), - ('app', 'register_user_supabase', 'User registration'), - ('app', 'save_generated_content_supabase', 'Save content'), - ('app', 'get_user_content_history_supabase', 'Get user history') + ("core.supabase_integration", "get_supabase_client", "Get database client"), + ("core.supabase_integration", "SupabaseClient", "Database client class"), + ("app", "init_supabase", "Initialize database"), + ("app", "authenticate_user", "User authentication"), + ("app", "register_user_supabase", "User registration"), + ("app", "save_generated_content_supabase", "Save content"), + ("app", "get_user_content_history_supabase", "Get user history"), ] - + for module, func, desc in db_functions: if check_function(module, func, desc): - results['database_functions'] += 1 - + results["database_functions"] += 1 + # 3. File Processing audit_section("File Processing & Upload") file_functions = [ - ('core.file_upload', 'LargeFileUploadManager', 'Large file manager'), - ('core.file_upload', 'FileUploadManager', 'Standard file manager') + ("core.file_upload", "LargeFileUploadManager", "Large file manager"), + ("core.file_upload", "FileUploadManager", "Standard file manager"), ] - + for module, cls, desc in file_functions: if check_function(module, cls, desc): - results['file_processing'] += 1 - + results["file_processing"] += 1 + # Check LargeFileUploadManager methods file_methods = [ - ('core.file_upload', 'LargeFileUploadManager', 'validate_large_file', 'File validation'), - ('core.file_upload', 'LargeFileUploadManager', 'create_large_file_upload_zone', 'Upload UI'), - ('core.file_upload', 'LargeFileUploadManager', 'process_large_file', 'File processing') + ("core.file_upload", "LargeFileUploadManager", "validate_large_file", "File validation"), + ("core.file_upload", "LargeFileUploadManager", "create_large_file_upload_zone", "Upload UI"), + ("core.file_upload", "LargeFileUploadManager", "process_large_file", "File processing"), ] - + for module, cls, method, desc in file_methods: if check_class_method(module, cls, method, desc): - results['file_processing'] += 1 - + results["file_processing"] += 1 + # 4. Content Generation audit_section("Content Generation") content_functions = [ - ('core.content_generation', 'generate_wisdom_extraction', 'Wisdom extraction'), - ('core.content_generation', 'generate_research_enrichment', 'Research enrichment'), - ('core.content_generation', 'generate_outline_creation', 'Outline creation'), - ('core.content_generation', 'generate_article_creation', 'Article creation'), - ('core.content_generation', 'generate_social_content', 'Social media content'), - ('core.content_generation', 'generate_image_prompts', 'Image prompts') + ("core.content_generation", "generate_wisdom_extraction", "Wisdom extraction"), + ("core.content_generation", "generate_research_enrichment", "Research enrichment"), + ("core.content_generation", "generate_outline_creation", "Outline creation"), + ("core.content_generation", "generate_article_creation", "Article creation"), + ("core.content_generation", "generate_social_content", "Social media content"), + ("core.content_generation", "generate_image_prompts", "Image prompts"), ] - + for module, func, desc in content_functions: if check_function(module, func, desc): - results['content_generation'] += 1 - + results["content_generation"] += 1 + # 5. Streaming Pipeline audit_section("Streaming Pipeline") pipeline_functions = [ - ('core.streaming_pipeline', 'get_pipeline_controller', 'Pipeline controller'), - ('core.streaming_pipeline', 'StreamingPipelineController', 'Pipeline class'), - ('core.streaming_results', 'show_streaming_results', 'Results display'), - ('core.streaming_results', 'show_real_time_content_stream', 'Real-time streaming') + ("core.streaming_pipeline", "get_pipeline_controller", "Pipeline controller"), + ("core.streaming_pipeline", "StreamingPipelineController", "Pipeline class"), + ("core.streaming_results", "show_streaming_results", "Results display"), + ("core.streaming_results", "show_real_time_content_stream", "Real-time streaming"), ] - + for module, func, desc in pipeline_functions: if check_function(module, func, desc): - results['streaming_pipeline'] += 1 - + results["streaming_pipeline"] += 1 + # 6. UI Components audit_section("UI Components & Styling") ui_functions = [ - ('core.ui_components', 'load_aurora_css', 'Aurora CSS loading'), - ('core.ui_components', 'AuroraContainer', 'Aurora containers'), - ('core.ui_components', 'AuroraComponents', 'Aurora components'), - ('core.styling', 'apply_aurora_theme', 'Aurora theme'), - ('core.styling', 'create_aurora_header', 'Aurora header') + ("core.ui_components", "load_aurora_css", "Aurora CSS loading"), + ("core.ui_components", "AuroraContainer", "Aurora containers"), + ("core.ui_components", "AuroraComponents", "Aurora components"), + ("core.styling", "apply_aurora_theme", "Aurora theme"), + ("core.styling", "create_aurora_header", "Aurora header"), ] - + for module, func, desc in ui_functions: if check_function(module, func, desc): - results['ui_components'] += 1 - + results["ui_components"] += 1 + # 7. Authentication Flow audit_section("Authentication & OAuth") auth_functions = [ - ('app', 'handle_oauth_callback', 'OAuth callback handling'), - ('app', 'show_auth_page', 'Authentication page'), - ('app', 'show_main_app', 'Main application') + ("app", "handle_oauth_callback", "OAuth callback handling"), + ("app", "show_auth_page", "Authentication page"), + ("app", "show_main_app", "Main application"), ] - + for module, func, desc in auth_functions: if check_function(module, func, desc): - results['authentication'] += 1 - + results["authentication"] += 1 + # 8. Test Database Connection audit_section("Live Database Connection Test") try: from core.supabase_integration import get_supabase_client + client = get_supabase_client() if client and client.client: # Test basic query - result = client.client.table('users').select('id').limit(1).execute() + result = client.client.table("users").select("id").limit(1).execute() print("โœ… Database connection successful") print(f"โœ… Users table accessible ({len(result.data)} records found)") - results['database_functions'] += 2 + results["database_functions"] += 2 else: print("โŒ Database client not available") except Exception as e: print(f"โŒ Database connection failed: {str(e)[:100]}") - + # 9. Test App Import audit_section("Main Application Import") try: import app + print("โœ… app.py imports successfully") - + # Test key app functions db, success = app.init_supabase() print(f"โœ… init_supabase works: {success}") - results['authentication'] += 2 - + results["authentication"] += 2 + except Exception as e: print(f"โŒ app.py import failed: {str(e)[:100]}") traceback.print_exc() - + # Final Results audit_section("Integration Audit Results") total_checks = sum(results.values()) max_possible = 50 # Approximate total checks - - print(f"๐Ÿ“Š **Integration Health Score: {total_checks}/{max_possible} ({(total_checks/max_possible)*100:.1f}%)**") + + print( + f"๐Ÿ“Š **Integration Health Score: {total_checks}/{max_possible} ({(total_checks / max_possible) * 100:.1f}%)**" + ) print() print("**Component Breakdown:**") for component, count in results.items(): status = "โœ…" if count > 0 else "โŒ" print(f"{status} {component.replace('_', ' ').title()}: {count} checks passed") - + print() if total_checks >= 40: print("๐ŸŽ‰ **EXCELLENT**: All major components integrated successfully!") @@ -237,9 +245,10 @@ def main(): else: print("โš ๏ธ **ISSUES DETECTED**: Major integration problems found") print("๐Ÿ› ๏ธ **NEEDS ATTENTION**") - + return total_checks >= 40 + if __name__ == "__main__": success = main() - sys.exit(0 if success else 1) \ No newline at end of file + sys.exit(0 if success else 1) diff --git a/scripts/setup_test_env.sh b/scripts/setup_test_env.sh deleted file mode 100755 index a0938f3..0000000 --- a/scripts/setup_test_env.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -# WhisperForge Test Environment Setup Script -# This script creates a Python virtual environment and installs -# the required dependencies for running tests. - -set -e - -if [ ! -f requirements.txt ]; then - echo "Run this script from the project root where requirements.txt is located." >&2 - exit 1 -fi - -python -m venv venv -# shellcheck disable=SC1091 -source venv/bin/activate -pip install -r requirements.txt - -echo "โœ… Test environment ready. Activate it with 'source venv/bin/activate'" diff --git a/scripts/test_monitoring.py b/scripts/test_monitoring.py index f4280ce..fb997c4 100644 --- a/scripts/test_monitoring.py +++ b/scripts/test_monitoring.py @@ -7,50 +7,51 @@ in production environment. """ +import json import sys import time -import json -import os import traceback from pathlib import Path + import pytest # Skip these script-style tests when executed under pytest -SKIP_IN_PYTEST = 'pytest' in sys.modules +SKIP_IN_PYTEST = "pytest" in sys.modules # Add project root to path sys.path.insert(0, str(Path(__file__).parent.parent)) + def test_structured_logging(): """Test structured logging functionality""" if SKIP_IN_PYTEST: pytest.skip("monitoring script test") print("๐Ÿ” Testing Structured Logging...") # pragma: allow-print - + try: - from core.monitoring import structured_logger, set_trace_context, trace_operation - + from core.monitoring import set_trace_context, structured_logger, trace_operation + # Test basic logging structured_logger.info("Test info message", test_component="monitoring") structured_logger.warning("Test warning message", test_component="monitoring") structured_logger.error("Test error message", test_component="monitoring") - + # Test trace context - trace_id = set_trace_context(user_id="test_user", operation="test_operation") + set_trace_context(user_id="test_user", operation="test_operation") structured_logger.info("Message with trace context", test_data="trace_test") - + # Test trace operation context manager with trace_operation("test_context_manager", user_id="test_user"): structured_logger.info("Message within trace operation") - + # Test pipeline logging structured_logger.pipeline_start("test_pipeline", user_id="test_user") time.sleep(0.1) # Simulate work structured_logger.pipeline_complete("test_pipeline", 0.1, success=True) - + print("โœ… Structured logging tests passed") # pragma: allow-print return True - + except Exception as e: print(f"โŒ Structured logging test failed: {e}") # pragma: allow-print traceback.print_exc() @@ -62,34 +63,34 @@ def test_health_checks(): if SKIP_IN_PYTEST: pytest.skip("monitoring script test") print("๐Ÿ” Testing Health Checks...") - + try: from core.health_check import health_checker - + # Test health status health_status = health_checker.get_health_status() print(f" Health Status: {health_status.status}") print(f" Uptime: {health_status.uptime_seconds:.2f}s") - + # Test SLO metrics slo_metrics = health_checker.get_slo_metrics() print(f" Error Rate: {slo_metrics.error_rate_5xx}%") print(f" Response Time: {slo_metrics.median_response_time}ms") print(f" Active Users: {slo_metrics.active_users_1h}") - + # Test SLO violations violations = health_checker.check_slo_violations() print(f" SLO Violations: {len(violations)}") - + # Test metrics JSON export metrics_json = health_checker.get_metrics_json() metrics_data = json.loads(metrics_json) assert "health" in metrics_data assert "slo_metrics" in metrics_data - + print("โœ… Health check tests passed") return True - + except Exception as e: print(f"โŒ Health check test failed: {e}") traceback.print_exc() @@ -101,35 +102,37 @@ def test_metrics_export(): if SKIP_IN_PYTEST: pytest.skip("monitoring script test") print("๐Ÿ” Testing Metrics Export...") - + try: from core.metrics_exporter import ( - metrics_exporter, track_request, track_pipeline, - export_prometheus_metrics, export_json_metrics + export_json_metrics, + export_prometheus_metrics, + track_pipeline, + track_request, ) - + # Test request tracking track_request(0.5, 200, "GET", "/test") track_request(1.2, 500, "POST", "/api/test") - + # Test pipeline tracking track_pipeline("test_pipeline", 2.5, True) track_pipeline("test_pipeline", 5.0, False) - + # Test Prometheus export prometheus_metrics = export_prometheus_metrics() assert "whisperforge_http_requests_total" in prometheus_metrics assert "whisperforge_pipeline_success_total" in prometheus_metrics - + # Test JSON export json_metrics = export_json_metrics() assert "counters" in json_metrics assert "gauges" in json_metrics assert "histograms" in json_metrics - + print("โœ… Metrics export tests passed") return True - + except Exception as e: print(f"โŒ Metrics export test failed: {e}") traceback.print_exc() @@ -141,29 +144,22 @@ def test_error_tracking(): if SKIP_IN_PYTEST: pytest.skip("monitoring script test") print("๐Ÿ” Testing Error Tracking...") - + try: from core.monitoring import error_tracker - + # Test exception capture try: raise ValueError("Test error for monitoring") except Exception as e: - error_tracker.capture_exception(e, { - "test_context": "monitoring_test", - "user_id": "test_user" - }) - + error_tracker.capture_exception(e, {"test_context": "monitoring_test", "user_id": "test_user"}) + # Test message capture - error_tracker.capture_message( - "Test warning message", - level="warning", - context={"test": "monitoring"} - ) - + error_tracker.capture_message("Test warning message", level="warning", context={"test": "monitoring"}) + print("โœ… Error tracking tests passed") return True - + except Exception as e: print(f"โŒ Error tracking test failed: {e}") traceback.print_exc() @@ -175,31 +171,29 @@ def test_performance_tracking(): if SKIP_IN_PYTEST: pytest.skip("monitoring script test") print("๐Ÿ” Testing Performance Tracking...") - + try: - from core.monitoring import performance_tracker, monitor_function - + from core.monitoring import monitor_function, performance_tracker + # Test context manager with performance_tracker.track_operation("test_operation"): time.sleep(0.1) # Simulate work - + # Test decorator @monitor_function("test_function") def test_function(): time.sleep(0.05) return "test_result" - + result = test_function() assert result == "test_result" - + # Test pipeline performance tracking - performance_tracker.track_pipeline_performance( - "test_pipeline", 1.5, True, file_size_mb=10 - ) - + performance_tracker.track_pipeline_performance("test_pipeline", 1.5, True, file_size_mb=10) + print("โœ… Performance tracking tests passed") return True - + except Exception as e: print(f"โŒ Performance tracking test failed: {e}") traceback.print_exc() @@ -211,27 +205,25 @@ def test_streamlit_integration(): if SKIP_IN_PYTEST: pytest.skip("monitoring script test") print("๐Ÿ” Testing Streamlit Integration...") - + try: - from core.streamlit_monitoring import ( - streamlit_monitor, streamlit_page, streamlit_component - ) - + from core.streamlit_monitoring import streamlit_component, streamlit_page + # Test decorators (without actual Streamlit context) @streamlit_page("test_page") def test_page_function(): return "page_result" - + @streamlit_component("test_component") def test_component_function(): return "component_result" - + # Note: These will work but won't have full Streamlit context # In actual usage, they would have access to st.session_state - + print("โœ… Streamlit integration tests passed") return True - + except Exception as e: print(f"โŒ Streamlit integration test failed: {e}") traceback.print_exc() @@ -243,46 +235,46 @@ def test_log_file_creation(): if SKIP_IN_PYTEST: pytest.skip("monitoring script test") print("๐Ÿ” Testing Log File Creation...") - + try: from datetime import datetime from pathlib import Path - + # Check logs directory logs_dir = Path("logs") if not logs_dir.exists(): print(" Creating logs directory...") logs_dir.mkdir(exist_ok=True) - + # Check for structured log file - today = datetime.now().strftime('%Y%m%d') + today = datetime.now().strftime("%Y%m%d") log_file = logs_dir / f"whisperforge_structured_{today}.jsonl" - + if log_file.exists(): print(f" โœ… Log file exists: {log_file}") - + # Check file content - with open(log_file, 'r') as f: + with open(log_file) as f: lines = f.readlines() if lines: print(f" โœ… Log file has {len(lines)} entries") - + # Validate JSON format try: last_entry = json.loads(lines[-1].strip()) - print(f" โœ… Last log entry is valid JSON") - if 'timestamp' in last_entry and 'level' in last_entry: - print(f" โœ… Log entry has required fields") + print(" โœ… Last log entry is valid JSON") + if "timestamp" in last_entry and "level" in last_entry: + print(" โœ… Log entry has required fields") except json.JSONDecodeError: - print(f" โš ๏ธ Last log entry is not valid JSON") + print(" โš ๏ธ Last log entry is not valid JSON") else: - print(f" โš ๏ธ Log file is empty") + print(" โš ๏ธ Log file is empty") else: print(f" โš ๏ธ Log file not found: {log_file}") - + print("โœ… Log file creation tests completed") return True - + except Exception as e: print(f"โŒ Log file creation test failed: {e}") traceback.print_exc() @@ -293,7 +285,7 @@ def run_comprehensive_test(): """Run all monitoring tests""" print("๐Ÿš€ Starting Comprehensive Monitoring Test Suite") print("=" * 60) - + tests = [ ("Structured Logging", test_structured_logging), ("Health Checks", test_health_checks), @@ -303,9 +295,9 @@ def run_comprehensive_test(): ("Streamlit Integration", test_streamlit_integration), ("Log File Creation", test_log_file_creation), ] - + results = [] - + for test_name, test_func in tests: print(f"\n๐Ÿ“‹ Running {test_name} Test...") try: @@ -314,15 +306,15 @@ def run_comprehensive_test(): except Exception as e: print(f"โŒ {test_name} test crashed: {e}") results.append((test_name, False)) - + # Summary print("\n" + "=" * 60) print("๐Ÿ“Š Test Results Summary") print("=" * 60) - + passed = 0 failed = 0 - + for test_name, success in results: status = "โœ… PASS" if success else "โŒ FAIL" print(f"{status} {test_name}") @@ -330,9 +322,9 @@ def run_comprehensive_test(): passed += 1 else: failed += 1 - + print(f"\n๐Ÿ“ˆ Overall Results: {passed} passed, {failed} failed") - + if failed == 0: print("๐ŸŽ‰ All monitoring tests passed! System is ready for production.") return True @@ -356,4 +348,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/test_oauth.py b/scripts/test_oauth.py index 79b2691..69f1c5f 100644 --- a/scripts/test_oauth.py +++ b/scripts/test_oauth.py @@ -4,47 +4,48 @@ """ import sys -import os from pathlib import Path + import pytest from _pytest.outcomes import Skipped # Add the project root to path sys.path.insert(0, str(Path(__file__).parent.parent)) + def test_oauth_url_generation(): """Test that OAuth URL generation works""" print("๐Ÿ” Testing OAuth URL generation...") - + # Load environment variables from .env file from dotenv import load_dotenv + load_dotenv() - + try: from core.supabase_integration import get_supabase_client - + db = get_supabase_client() - + if not db or not db.client: print("โŒ Failed to get Supabase client") pytest.skip("Supabase client not available - check environment variables") - + # Test OAuth URL generation redirect_url = "http://localhost:8501" - auth_response = db.client.auth.sign_in_with_oauth({ - "provider": "google", - "options": {"redirect_to": redirect_url} - }) - - if hasattr(auth_response, 'url') and auth_response.url: - print(f"โœ… OAuth URL generated successfully") + auth_response = db.client.auth.sign_in_with_oauth( + {"provider": "google", "options": {"redirect_to": redirect_url}} + ) + + if hasattr(auth_response, "url") and auth_response.url: + print("โœ… OAuth URL generated successfully") print(f" URL: {auth_response.url[:50]}...") assert True else: - print(f"โŒ OAuth URL not generated properly") + print("โŒ OAuth URL not generated properly") print(f" Response: {auth_response}") - assert False, "OAuth URL not generated properly" - + raise AssertionError("OAuth URL not generated properly") + except Exception as e: print(f"โŒ Error testing OAuth: {e}") if "Invalid API key" in str(e) or "SUPABASE_URL" in str(e) or "SUPABASE_ANON_KEY" in str(e): @@ -52,13 +53,15 @@ def test_oauth_url_generation(): pytest.skip("Supabase credentials not available for OAuth testing") else: import traceback + print(f" Full error: {traceback.format_exc()}") pytest.skip(f"OAuth test failed with error: {e}") + def main(): print("๐Ÿš€ WhisperForge OAuth Test") print("=" * 30) - + try: test_oauth_url_generation() print("\n๐ŸŽ‰ OAuth URL generation is working!") @@ -74,6 +77,7 @@ def main(): print("Check your Supabase configuration.") return False + if __name__ == "__main__": success = main() - sys.exit(0 if success else 1) \ No newline at end of file + sys.exit(0 if success else 1) diff --git a/scripts/ui_ux_audit.py b/scripts/ui_ux_audit.py index 485842e..7dd5589 100644 --- a/scripts/ui_ux_audit.py +++ b/scripts/ui_ux_audit.py @@ -5,18 +5,19 @@ """ import sys -import os from pathlib import Path # Add project root to path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) + def audit_section(title: str): """Print audit section header""" - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"๐ŸŽจ {title}") - print('='*60) + print("=" * 60) + def check_ui_feature(feature_name: str, description: str, status: bool): """Check UI feature status""" @@ -24,80 +25,82 @@ def check_ui_feature(feature_name: str, description: str, status: bool): print(f"{icon} {feature_name}: {description}") return status + def main(): """Run comprehensive UI/UX audit""" print("๐ŸŽจ WhisperForge UI/UX Audit") print("Verifying OAuth, progress indicators, and user experience...") - + # Track results results = { - 'oauth_flow': 0, - 'progress_indicators': 0, - 'ui_components': 0, - 'user_feedback': 0, - 'error_handling': 0, - 'accessibility': 0 + "oauth_flow": 0, + "progress_indicators": 0, + "ui_components": 0, + "user_feedback": 0, + "error_handling": 0, + "accessibility": 0, } - + # 1. OAuth Flow Audit audit_section("OAuth & Authentication Flow") - + # Check OAuth implementation try: import app - + # OAuth callback handling oauth_features = [ - ("OAuth Callback Handler", "handle_oauth_callback function exists", hasattr(app, 'handle_oauth_callback')), + ("OAuth Callback Handler", "handle_oauth_callback function exists", hasattr(app, "handle_oauth_callback")), ("Google OAuth Integration", "Google sign-in with proper redirect", True), # Verified in code - ("Fallback Authentication", "Email/password fallback available", hasattr(app, 'authenticate_user')), - ("User Registration", "Account creation flow", hasattr(app, 'register_user_supabase')), + ("Fallback Authentication", "Email/password fallback available", hasattr(app, "authenticate_user")), + ("User Registration", "Account creation flow", hasattr(app, "register_user_supabase")), ("Session Management", "Simple session state handling", True), # Verified in code ("Local Testing Bypass", "Database unavailable bypass", True), # Verified in code ("Error Recovery", "OAuth error handling with fallback", True), # Verified in code - ("Beautiful Auth Page", "Aurora-themed authentication UI", True) # Verified in code + ("Beautiful Auth Page", "Aurora-themed authentication UI", True), # Verified in code ] - + for feature, desc, status in oauth_features: if check_ui_feature(feature, desc, status): - results['oauth_flow'] += 1 - + results["oauth_flow"] += 1 + except Exception as e: print(f"โŒ OAuth audit failed: {e}") - + # 2. Progress Indicators Audit audit_section("Progress Indicators & Status Updates") - + try: - from core.streaming_results import show_streaming_results, show_real_time_content_stream - from core.streaming_pipeline import get_pipeline_controller from core.ui_components import AuroraComponents - + progress_features = [ ("Real-time Streaming", "Live content generation display", True), ("Step-by-step Progress", "Pipeline step indicators", True), - ("Aurora Progress Bars", "Beautiful animated progress bars", hasattr(AuroraComponents, 'aurora_progress_bar')), + ( + "Aurora Progress Bars", + "Beautiful animated progress bars", + hasattr(AuroraComponents, "aurora_progress_bar"), + ), ("File Upload Progress", "Large file upload tracking", True), # Verified in file_upload.py ("Chunk Processing", "Parallel chunk progress display", True), # Verified in file_upload.py ("Status Messages", "Success/error/warning notifications", True), ("Loading States", "Processing indicators during operations", True), - ("Completion Feedback", "Clear completion status", True) + ("Completion Feedback", "Clear completion status", True), ] - + for feature, desc, status in progress_features: if check_ui_feature(feature, desc, status): - results['progress_indicators'] += 1 - + results["progress_indicators"] += 1 + except Exception as e: print(f"โŒ Progress indicators audit failed: {e}") - + # 3. UI Components Audit audit_section("UI Components & Visual Design") - + try: - from core.ui_components import AuroraContainer, AuroraComponents - from core.styling import apply_aurora_theme, create_aurora_header - + from core.ui_components import AuroraComponents + ui_features = [ ("Aurora Theme", "Consistent bioluminescent design", True), ("Responsive Layout", "Mobile and desktop friendly", True), @@ -108,19 +111,19 @@ def main(): ("Form Design", "Beautiful input fields and validation", True), ("Card Components", "Elegant content containers", True), ("Navigation", "Intuitive page navigation", True), - ("Logo & Branding", "Professional WhisperForge identity", True) + ("Logo & Branding", "Professional WhisperForge identity", True), ] - + for feature, desc, status in ui_features: if check_ui_feature(feature, desc, status): - results['ui_components'] += 1 - + results["ui_components"] += 1 + except Exception as e: print(f"โŒ UI components audit failed: {e}") - + # 4. User Feedback & Interactions audit_section("User Feedback & Interactions") - + try: feedback_features = [ ("Success Messages", "Clear success confirmations", True), @@ -132,43 +135,43 @@ def main(): ("Form Validation", "Real-time input validation", True), ("Loading Spinners", "Activity indicators", True), ("Tooltips", "Helpful contextual hints", True), - ("Status Badges", "Clear state indicators", True) + ("Status Badges", "Clear state indicators", True), ] - + for feature, desc, status in feedback_features: if check_ui_feature(feature, desc, status): - results['user_feedback'] += 1 - + results["user_feedback"] += 1 + except Exception as e: print(f"โŒ User feedback audit failed: {e}") - + # 5. Error Handling & Recovery audit_section("Error Handling & Recovery") - + try: from core.ui_components import ErrorBoundary - + error_features = [ ("Graceful Degradation", "App works without database", True), - ("Error Boundaries", "Component error isolation", hasattr(ErrorBoundary, 'wrap')), + ("Error Boundaries", "Component error isolation", hasattr(ErrorBoundary, "wrap")), ("Retry Mechanisms", "Automatic retry on failures", True), ("Fallback UI", "Alternative UI when features fail", True), ("User-friendly Errors", "Non-technical error messages", True), ("Recovery Actions", "Clear steps to resolve issues", True), ("Offline Handling", "Graceful offline behavior", True), - ("Timeout Handling", "Long operation timeouts", True) + ("Timeout Handling", "Long operation timeouts", True), ] - + for feature, desc, status in error_features: if check_ui_feature(feature, desc, status): - results['error_handling'] += 1 - + results["error_handling"] += 1 + except Exception as e: print(f"โŒ Error handling audit failed: {e}") - + # 6. Accessibility & Usability audit_section("Accessibility & Usability") - + accessibility_features = [ ("Keyboard Navigation", "Full keyboard accessibility", True), ("Screen Reader Support", "Semantic HTML structure", True), @@ -179,16 +182,16 @@ def main(): ("Touch Friendly", "Mobile touch targets", True), ("Loading States", "Clear loading indicators", True), ("Error Recovery", "Clear error resolution paths", True), - ("Intuitive Flow", "Logical user journey", True) + ("Intuitive Flow", "Logical user journey", True), ] - + for feature, desc, status in accessibility_features: if check_ui_feature(feature, desc, status): - results['accessibility'] += 1 - + results["accessibility"] += 1 + # 7. Specific OAuth Flow Test audit_section("OAuth Flow Deep Dive") - + try: # Check OAuth URL generation oauth_deep_features = [ @@ -199,28 +202,28 @@ def main(): ("User Creation", "Automatic user record creation", True), ("Session Setup", "Proper session state initialization", True), ("Error Fallback", "Email auth when OAuth fails", True), - ("Debug Information", "Helpful debug info in development", True) + ("Debug Information", "Helpful debug info in development", True), ] - + for feature, desc, status in oauth_deep_features: if check_ui_feature(feature, desc, status): - results['oauth_flow'] += 1 - + results["oauth_flow"] += 1 + except Exception as e: print(f"โŒ OAuth deep dive failed: {e}") - + # Final Results audit_section("UI/UX Audit Results") total_checks = sum(results.values()) max_possible = 64 # Total possible checks - - print(f"๐ŸŽจ **UI/UX Quality Score: {total_checks}/{max_possible} ({(total_checks/max_possible)*100:.1f}%)**") + + print(f"๐ŸŽจ **UI/UX Quality Score: {total_checks}/{max_possible} ({(total_checks / max_possible) * 100:.1f}%)**") print() print("**Component Breakdown:**") for component, count in results.items(): status = "โœ…" if count > 0 else "โŒ" print(f"{status} {component.replace('_', ' ').title()}: {count} checks passed") - + print() if total_checks >= 55: print("๐ŸŽ‰ **EXCEPTIONAL**: UI/UX meets highest professional standards!") @@ -234,7 +237,7 @@ def main(): else: print("โš ๏ธ **NEEDS IMPROVEMENT**: UX issues detected") print("๐Ÿ› ๏ธ **REQUIRES ATTENTION**") - + # Specific recommendations print("\n๐ŸŽฏ **Key Strengths:**") print("โ€ข Beautiful Aurora bioluminescent theme") @@ -243,9 +246,10 @@ def main(): print("โ€ข Large file upload with chunking progress") print("โ€ข Graceful error handling and recovery") print("โ€ข Professional visual design") - + return total_checks >= 50 + if __name__ == "__main__": success = main() - sys.exit(0 if success else 1) \ No newline at end of file + sys.exit(0 if success else 1) diff --git a/scripts/validate_app.py b/scripts/validate_app.py index fe74f80..e420095 100644 --- a/scripts/validate_app.py +++ b/scripts/validate_app.py @@ -4,65 +4,66 @@ Tests all critical functionality to prevent deployment errors """ -import sys -import os import importlib -import traceback +import os +import sys from pathlib import Path # Add the project root to path sys.path.insert(0, str(Path(__file__).parent.parent)) + class ValidationResult: def __init__(self): self.tests_passed = 0 self.tests_failed = 0 self.errors = [] self.warnings = [] - + def pass_test(self, test_name): self.tests_passed += 1 print(f"โœ… {test_name}") - + def fail_test(self, test_name, error): self.tests_failed += 1 self.errors.append(f"{test_name}: {error}") print(f"โŒ {test_name}: {error}") - + def warn_test(self, test_name, warning): self.warnings.append(f"{test_name}: {warning}") print(f"โš ๏ธ {test_name}: {warning}") - + def summary(self): total = self.tests_passed + self.tests_failed - print(f"\n{'='*50}") - print(f"VALIDATION SUMMARY") - print(f"{'='*50}") + print(f"\n{'=' * 50}") + print("VALIDATION SUMMARY") + print(f"{'=' * 50}") print(f"Tests Passed: {self.tests_passed}/{total}") print(f"Tests Failed: {self.tests_failed}") print(f"Warnings: {len(self.warnings)}") - + if self.errors: - print(f"\n๐Ÿšจ ERRORS:") + print("\n๐Ÿšจ ERRORS:") for error in self.errors: print(f" - {error}") - + if self.warnings: - print(f"\nโš ๏ธ WARNINGS:") + print("\nโš ๏ธ WARNINGS:") for warning in self.warnings: print(f" - {warning}") - + if self.tests_failed == 0: - print(f"\n๐ŸŽ‰ ALL TESTS PASSED! App is ready for deployment.") + print("\n๐ŸŽ‰ ALL TESTS PASSED! App is ready for deployment.") return True else: print(f"\n๐Ÿ’ฅ {self.tests_failed} TESTS FAILED! Fix errors before deployment.") return False + def test_imports(result): """Test all critical imports""" print("\n๐Ÿ” Testing Imports...") - + imports_to_test = [ # Core imports ("streamlit", "st"), @@ -72,13 +73,11 @@ def test_imports(result): ("datetime", "datetime"), ("tempfile", None), ("logging", None), - # Third-party ("dotenv", None), ("supabase", None), ("openai", None), ("anthropic", None), - # WhisperForge modules ("core.supabase_integration", None), ("core.utils", None), @@ -86,7 +85,7 @@ def test_imports(result): ("core.streaming_pipeline", None), ("core.monitoring", None), ] - + for module_name, alias in imports_to_test: try: if alias: @@ -100,29 +99,22 @@ def test_imports(result): except Exception as e: result.fail_test(f"Import {module_name}", f"Unexpected error: {e}") + def test_environment_variables(result): """Test required environment variables""" print("\n๐Ÿ” Testing Environment Variables...") - - required_vars = [ - "SUPABASE_URL", - "SUPABASE_ANON_KEY" - ] - - optional_vars = [ - "OPENAI_API_KEY", - "ANTHROPIC_API_KEY", - "OAUTH_REDIRECT_URL", - "STREAMLIT_APP_URL" - ] - + + required_vars = ["SUPABASE_URL", "SUPABASE_ANON_KEY"] + + optional_vars = ["OPENAI_API_KEY", "ANTHROPIC_API_KEY", "OAUTH_REDIRECT_URL", "STREAMLIT_APP_URL"] + for var in required_vars: value = os.getenv(var) if value: result.pass_test(f"Required env var {var}") else: result.fail_test(f"Required env var {var}", "Not set") - + for var in optional_vars: value = os.getenv(var) if value: @@ -130,10 +122,11 @@ def test_environment_variables(result): else: result.warn_test(f"Optional env var {var}", "Not set") + def test_file_structure(result): """Test required file structure""" print("\n๐Ÿ” Testing File Structure...") - + required_files = [ "app.py", "requirements.txt", @@ -146,97 +139,95 @@ def test_file_structure(result): "prompts/default/wisdom_extraction.md", "prompts/default/outline_creation.md", "prompts/default/social_media.md", - "prompts/default/image_prompts.md" - ] - - optional_files = [ - "static/css/whisperforge_ui.css", - "core/ui_components.py", - ".env", - "Procfile", - "runtime.txt" + "prompts/default/image_prompts.md", ] - + + optional_files = ["static/css/whisperforge_ui.css", "core/ui_components.py", ".env", "Procfile", "runtime.txt"] + for file_path in required_files: if Path(file_path).exists(): result.pass_test(f"Required file {file_path}") else: result.fail_test(f"Required file {file_path}", "Missing") - + for file_path in optional_files: if Path(file_path).exists(): result.pass_test(f"Optional file {file_path}") else: result.warn_test(f"Optional file {file_path}", "Missing") + def test_supabase_connection(result): """Test Supabase connection""" print("\n๐Ÿ” Testing Supabase Connection...") - + try: from core.supabase_integration import get_supabase_client + db, mcp = get_supabase_client() - + if db: result.pass_test("Supabase client initialization") - + # Test basic connection try: # Simple health check - response = db.client.table("users").select("count", count="exact").limit(1).execute() + db.client.table("users").select("count", count="exact").limit(1).execute() result.pass_test("Supabase database connection") except Exception as e: result.fail_test("Supabase database connection", str(e)) else: result.fail_test("Supabase client initialization", "Failed to create client") - + except Exception as e: result.fail_test("Supabase import", str(e)) + def test_oauth_configuration(result): """Test OAuth configuration""" print("\n๐Ÿ” Testing OAuth Configuration...") - + try: from core.supabase_integration import get_supabase_client + db, _ = get_supabase_client() - + if db: try: # Test OAuth URL generation redirect_url = os.getenv("OAUTH_REDIRECT_URL", "http://localhost:8501") - auth_response = db.client.auth.sign_in_with_oauth({ - "provider": "google", - "options": {"redirect_to": redirect_url} - }) - - if hasattr(auth_response, 'url') and auth_response.url: + auth_response = db.client.auth.sign_in_with_oauth( + {"provider": "google", "options": {"redirect_to": redirect_url}} + ) + + if hasattr(auth_response, "url") and auth_response.url: result.pass_test("OAuth URL generation") else: result.warn_test("OAuth URL generation", "No URL returned") - + except Exception as e: result.warn_test("OAuth URL generation", str(e)) else: result.fail_test("OAuth test", "No Supabase client") - + except Exception as e: result.fail_test("OAuth test", str(e)) + def test_prompt_files(result): """Test prompt files are readable""" print("\n๐Ÿ” Testing Prompt Files...") - + prompt_files = [ "prompts/default/wisdom_extraction.md", "prompts/default/outline_creation.md", "prompts/default/social_media.md", - "prompts/default/image_prompts.md" + "prompts/default/image_prompts.md", ] - + for prompt_file in prompt_files: try: - with open(prompt_file, 'r', encoding='utf-8') as f: + with open(prompt_file, encoding="utf-8") as f: content = f.read() if len(content) > 10: # Basic content check result.pass_test(f"Prompt file {prompt_file}") @@ -247,87 +238,93 @@ def test_prompt_files(result): except Exception as e: result.fail_test(f"Prompt file {prompt_file}", str(e)) + def test_ai_providers(result): """Test AI provider configurations""" print("\n๐Ÿ” Testing AI Providers...") - + # Test OpenAI openai_key = os.getenv("OPENAI_API_KEY") if openai_key: try: import openai + # Basic client test (don't make actual API calls in validation) - client = openai.OpenAI(api_key=openai_key) + openai.OpenAI(api_key=openai_key) result.pass_test("OpenAI configuration") except Exception as e: result.warn_test("OpenAI configuration", str(e)) else: result.warn_test("OpenAI configuration", "API key not set") - + # Test Anthropic anthropic_key = os.getenv("ANTHROPIC_API_KEY") if anthropic_key: try: import anthropic + # Basic client test (don't make actual API calls in validation) - client = anthropic.Anthropic(api_key=anthropic_key) + anthropic.Anthropic(api_key=anthropic_key) result.pass_test("Anthropic configuration") except Exception as e: result.warn_test("Anthropic configuration", str(e)) else: result.warn_test("Anthropic configuration", "API key not set") + def test_streamlit_compatibility(result): """Test Streamlit compatibility""" print("\n๐Ÿ” Testing Streamlit Compatibility...") - + try: import streamlit as st + result.pass_test("Streamlit import") - + # Check version compatibility try: version = st.__version__ - major, minor = map(int, version.split('.')[:2]) + major, minor = map(int, version.split(".")[:2]) if major >= 1 and minor >= 28: result.pass_test("Streamlit version compatibility") else: result.warn_test("Streamlit version", f"Version {version} may be outdated") - except: + except (ValueError, AttributeError): result.warn_test("Streamlit version", "Could not check version") - + except Exception as e: result.fail_test("Streamlit import", str(e)) + def test_pipeline_components(result): """Test pipeline components""" print("\n๐Ÿ” Testing Pipeline Components...") - + try: from core.streaming_pipeline import get_pipeline_controller - + # Test pipeline initialization controller = get_pipeline_controller() if controller: result.pass_test("Pipeline controller initialization") else: result.fail_test("Pipeline controller initialization", "Failed to create controller") - + except Exception as e: result.fail_test("Pipeline controller", str(e)) - + try: - from core.content_generation import transcribe_audio result.pass_test("Content generation import") except Exception as e: result.fail_test("Content generation import", str(e)) + def main(): print("๐Ÿš€ WhisperForge App Validation") print("=" * 50) - + result = ValidationResult() - + # Run all tests test_imports(result) test_environment_variables(result) @@ -338,12 +335,13 @@ def main(): test_ai_providers(result) test_streamlit_compatibility(result) test_pipeline_components(result) - + # Print summary success = result.summary() - + # Exit with appropriate code sys.exit(0 if success else 1) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..124e864 --- /dev/null +++ b/setup.py @@ -0,0 +1,115 @@ +"""WhisperForge environment setup - creates a virtual environment and installs dependencies.""" + +import os +import subprocess +import sys +import venv + +PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) +VENV_DIR = os.path.join(PROJECT_DIR, ".venv") +REQUIREMENTS = os.path.join(PROJECT_DIR, "requirements.txt") +REQUIREMENTS_DEV = os.path.join(PROJECT_DIR, "requirements-dev.txt") + +if sys.platform == "win32": + PYTHON = os.path.join(VENV_DIR, "Scripts", "python") + PIP = os.path.join(VENV_DIR, "Scripts", "pip") + PRE_COMMIT = os.path.join(VENV_DIR, "Scripts", "pre-commit") +else: + PYTHON = os.path.join(VENV_DIR, "bin", "python") + PIP = os.path.join(VENV_DIR, "bin", "pip") + PRE_COMMIT = os.path.join(VENV_DIR, "bin", "pre-commit") + + +def print_step(msg): + print(f"\n >> {msg}") + + +def create_venv(): + """Create a fresh virtual environment.""" + if os.path.exists(VENV_DIR): + print_step(f"Removing existing venv at {VENV_DIR}") + import shutil + + shutil.rmtree(VENV_DIR) + + print_step(f"Creating virtual environment in {VENV_DIR}") + venv.create(VENV_DIR, with_pip=True) + + if not os.path.exists(PYTHON): + print(" !! Failed to create virtual environment.") + sys.exit(1) + + print(" Done.") + + +def install_requirements(dev=False): + """Install project dependencies from requirements.txt (and dev deps if requested).""" + if not os.path.exists(REQUIREMENTS): + print(" !! requirements.txt not found, skipping dependency install.") + return + + print_step("Upgrading pip") + subprocess.check_call( + [PYTHON, "-m", "pip", "install", "--upgrade", "pip"], + stdout=subprocess.DEVNULL, + ) + + if dev: + req_file = REQUIREMENTS_DEV + if not os.path.exists(req_file): + print(" !! requirements-dev.txt not found, falling back to requirements.txt") + req_file = REQUIREMENTS + else: + req_file = REQUIREMENTS + + print_step(f"Installing dependencies from {os.path.basename(req_file)}") + subprocess.check_call([PIP, "install", "-r", req_file]) + print("\n All dependencies installed.") + + +def install_pre_commit_hooks(): + """Install pre-commit git hooks into the repository.""" + print_step("Installing pre-commit hooks") + subprocess.check_call([PRE_COMMIT, "install"], cwd=PROJECT_DIR) + print(" Hooks installed.") + + +def verify_install(): + """Quick sanity check that key packages are importable.""" + print_step("Verifying installation") + result = subprocess.run( + [PYTHON, "-c", "import streamlit; print(f' streamlit {streamlit.__version__}')"], + capture_output=True, + text=True, + ) + if result.returncode == 0: + print(result.stdout.strip()) + print(" Verification passed.") + else: + print(" !! Verification failed - streamlit could not be imported.") + print(result.stderr.strip()) + sys.exit(1) + + +def run(dev=False): + print("=" * 50) + print(" WhisperForge - Environment Setup") + if dev: + print(" (dev mode)") + print("=" * 50) + + create_venv() + install_requirements(dev=dev) + verify_install() + + if dev: + install_pre_commit_hooks() + + print("\n" + "=" * 50) + print(" Setup complete. Run main.py to start the app.") + print("=" * 50 + "\n") + + +if __name__ == "__main__": + dev = "--dev" in sys.argv + run(dev=dev) diff --git a/shared/__init__.py b/shared/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/shared/config.py b/shared/config.py deleted file mode 100644 index 22b4d63..0000000 --- a/shared/config.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -import logging -from dotenv import load_dotenv - -load_dotenv() - -logger = logging.getLogger(__name__) - -class Config: - # API Keys - OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") - NOTION_API_KEY = os.getenv("NOTION_API_KEY") - SERVICE_TOKEN = os.getenv("SERVICE_TOKEN") - - # Log configuration status - logger.info("=== Configuration Status ===") - logger.info(f"OpenAI API Key present: {'Yes' if OPENAI_API_KEY else 'No'}") - logger.info(f"Claude API Key present: {'Yes' if CLAUDE_API_KEY else 'No'}") - logger.info(f"Notion API Key present: {'Yes' if NOTION_API_KEY else 'No'}") - logger.info(f"Service Token present: {'Yes' if SERVICE_TOKEN else 'No'}") - - # Service URLs - TRANSCRIPTION_SERVICE_URL = 'http://transcription:8000' - PROCESSING_SERVICE_URL = 'http://processing:8000' - STORAGE_SERVICE_URL = 'http://storage:8000' - - # Cache settings - CACHE_DIR = '/app/cache' - - # Model settings - WHISPER_MODEL = 'whisper-1' - GPT_MODEL = 'gpt-4' diff --git a/shared/security.py b/shared/security.py deleted file mode 100644 index aba285e..0000000 --- a/shared/security.py +++ /dev/null @@ -1,28 +0,0 @@ -from fastapi import Security, HTTPException -from fastapi.security import HTTPBearer -from jose import jwt -from datetime import datetime, timedelta -from passlib.context import CryptContext -import os - -pwd_context = CryptContext(schemes=["bcrypt"]) -security = HTTPBearer() - -def hash_password(password: str) -> str: - return pwd_context.hash(password) - -def verify_password(plain_password: str, hashed_password: str) -> bool: - return pwd_context.verify(plain_password, hashed_password) - -def create_jwt_token(data: dict) -> str: - to_encode = data.copy() - expire = datetime.utcnow() + timedelta(minutes=30) - to_encode.update({"exp": expire}) - return jwt.encode(to_encode, os.getenv('JWT_SECRET'), "HS256") - -async def verify_token(token: str = Security(security)): - try: - payload = jwt.decode(token, os.getenv('JWT_SECRET'), "HS256") - return payload - except jwt.JWTError: - raise HTTPException(401, "Invalid token") diff --git a/start_app.sh b/start_app.sh deleted file mode 100755 index 5a27a4e..0000000 --- a/start_app.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -# WhisperForge Startup Script -echo "๐Ÿš€ Starting WhisperForge v3.0.0 with real Supabase credentials..." - -# Auto-load environment variables from .env if present -if [ -f .env ]; then - set -a - # shellcheck disable=SC1091 - source .env - set +a -fi - -# Check for required environment variables -if [ -z "$SUPABASE_URL" ] || [ -z "$SUPABASE_ANON_KEY" ]; then - echo "โŒ Missing required environment variables:" - echo " Please set SUPABASE_URL and SUPABASE_ANON_KEY before running this script." - echo " Example:" - echo " export SUPABASE_URL=''" - echo " export SUPABASE_ANON_KEY=''" - exit 1 -fi -ENVIRONMENT="${1:-${ENVIRONMENT:-development}}" -export ENVIRONMENT - -if [ "$ENVIRONMENT" = "production" ]; then - export DEBUG="${DEBUG:-false}" - export LOG_LEVEL="${LOG_LEVEL:-INFO}" -else - export DEBUG="${DEBUG:-true}" - export LOG_LEVEL="${LOG_LEVEL:-DEBUG}" -fi - -echo "Running in $ENVIRONMENT mode" - -echo "โœ… Environment variables set" -echo "๐Ÿ”— Supabase URL: $SUPABASE_URL" -echo "๐Ÿ”‘ Supabase Key: ${SUPABASE_ANON_KEY:0:20}..." - -# Test Supabase connection -echo "๐Ÿงช Testing Supabase connection..." -python -c "from core.supabase_integration import get_supabase_client; client = get_supabase_client(); print('โœ… Supabase connection successful!' if client.test_connection() else 'โŒ Connection failed')" - -# Start Streamlit app with correct file (app_simple.py is the main app) -echo "๐ŸŒ Starting WhisperForge v3.0.0 on http://localhost:8501" -echo "๐Ÿ“ Press Ctrl+C to stop the app" -echo "" - -streamlit run app_simple.py --server.port 8501 --server.address 0.0.0.0 diff --git a/static/README.md b/static/README.md index 3b147fb..ea850d0 100644 --- a/static/README.md +++ b/static/README.md @@ -30,4 +30,4 @@ When adding new assets: 1. Place CSS files in the `css` directory 2. Place JavaScript files in the `js` directory 3. Update the corresponding loader function in `app.py` -4. Document the purpose of the asset in this README \ No newline at end of file +4. Document the purpose of the asset in this README diff --git a/static/css/aurora-progress.css b/static/css/aurora-progress.css index 22bac5b..b991259 100644 --- a/static/css/aurora-progress.css +++ b/static/css/aurora-progress.css @@ -8,13 +8,13 @@ --aurora-electric-blue: #7DF9FF; --aurora-spring-green: #00FF7F; --aurora-teal: #008B8B; - + /* Aurora HSL Values for Dynamic Manipulation */ --aurora-cyan-hsl: 180, 100%, 50%; --aurora-turquoise-hsl: 174, 72%, 56%; --aurora-electric-blue-hsl: 195, 100%, 78%; --aurora-spring-green-hsl: 150, 100%, 50%; - + /* Sophisticated Glow Variables */ --glow-radius-subtle: 8px; --glow-radius-medium: 16px; @@ -22,7 +22,7 @@ --glow-opacity-subtle: 0.3; --glow-opacity-medium: 0.6; --glow-opacity-strong: 0.9; - + /* Animation Timing */ --timing-organic: cubic-bezier(0.4, 0.0, 0.2, 1); --timing-elastic: cubic-bezier(0.68, -0.55, 0.265, 1.55); @@ -44,13 +44,13 @@ margin: 20px 0; position: relative; overflow: hidden; - + /* Subtle Aurora Glow */ - box-shadow: + box-shadow: 0 0 var(--glow-radius-subtle) rgba(var(--aurora-cyan-hsl), var(--glow-opacity-subtle)), inset 0 1px 0 rgba(255, 255, 255, 0.1), 0 8px 32px rgba(0, 0, 0, 0.12); - + transition: all 0.6s var(--timing-organic); } @@ -74,7 +74,7 @@ .aurora-progress-container:hover { border-color: rgba(0, 255, 255, 0.3); - box-shadow: + box-shadow: 0 0 var(--glow-radius-medium) rgba(var(--aurora-cyan-hsl), var(--glow-opacity-medium)), inset 0 1px 0 rgba(255, 255, 255, 0.2), 0 12px 48px rgba(0, 0, 0, 0.16); @@ -156,12 +156,12 @@ border-radius: 6px; position: relative; transition: width 0.8s var(--timing-organic); - + /* Living Glow Effect */ - box-shadow: + box-shadow: 0 0 var(--glow-radius-subtle) currentColor, 0 0 var(--glow-radius-medium) rgba(var(--aurora-cyan-hsl), 0.4); - + animation: aurora-flow 2s ease-in-out infinite; } @@ -214,7 +214,7 @@ .aurora-progress-step.running { background: rgba(var(--aurora-cyan-hsl), 0.04); border: 1px solid rgba(var(--aurora-cyan-hsl), 0.2); - box-shadow: + box-shadow: 0 0 var(--glow-radius-subtle) rgba(var(--aurora-cyan-hsl), 0.2), inset 0 1px 0 rgba(255, 255, 255, 0.1); } @@ -226,7 +226,7 @@ .aurora-progress-step.completed { background: rgba(var(--aurora-spring-green-hsl), 0.04); border: 1px solid rgba(var(--aurora-spring-green-hsl), 0.2); - box-shadow: + box-shadow: 0 0 var(--glow-radius-subtle) rgba(var(--aurora-spring-green-hsl), 0.2), inset 0 1px 0 rgba(255, 255, 255, 0.1); } @@ -234,7 +234,7 @@ .aurora-progress-step.error { background: rgba(255, 107, 107, 0.04); border: 1px solid rgba(255, 107, 107, 0.2); - box-shadow: + box-shadow: 0 0 var(--glow-radius-subtle) rgba(255, 107, 107, 0.2), inset 0 1px 0 rgba(255, 255, 255, 0.1); } @@ -277,7 +277,7 @@ .aurora-progress-step.running .aurora-step-indicator { background: var(--aurora-cyan); color: #000; - box-shadow: + box-shadow: 0 0 var(--glow-radius-subtle) var(--aurora-cyan), 0 0 var(--glow-radius-medium) rgba(var(--aurora-cyan-hsl), 0.4); } @@ -289,7 +289,7 @@ .aurora-progress-step.completed .aurora-step-indicator { background: var(--aurora-spring-green); color: #000; - box-shadow: + box-shadow: 0 0 var(--glow-radius-subtle) var(--aurora-spring-green), 0 0 var(--glow-radius-medium) rgba(var(--aurora-spring-green-hsl), 0.4); } @@ -361,12 +361,12 @@ } @keyframes aurora-pulse { - 0%, 100% { - opacity: 0.6; + 0%, 100% { + opacity: 0.6; text-shadow: 0 0 var(--glow-radius-subtle) currentColor; } - 50% { - opacity: 1; + 50% { + opacity: 1; text-shadow: 0 0 var(--glow-radius-medium) currentColor; } } @@ -385,33 +385,33 @@ } @keyframes aurora-pulse-subtle { - 0%, 100% { - transform: scale(1); + 0%, 100% { + transform: scale(1); box-shadow: 0 0 var(--glow-radius-subtle) rgba(var(--aurora-cyan-hsl), var(--glow-opacity-subtle)); } - 50% { - transform: scale(1.02); + 50% { + transform: scale(1.02); box-shadow: 0 0 var(--glow-radius-medium) rgba(var(--aurora-cyan-hsl), var(--glow-opacity-medium)); } } @keyframes aurora-pulse-thinking { - 0%, 100% { + 0%, 100% { border-color: rgba(var(--aurora-turquoise-hsl), 0.3); box-shadow: 0 0 var(--glow-radius-subtle) rgba(var(--aurora-turquoise-hsl), 0.3); } - 50% { + 50% { border-color: rgba(var(--aurora-turquoise-hsl), 0.6); box-shadow: 0 0 var(--glow-radius-medium) rgba(var(--aurora-turquoise-hsl), 0.6); } } @keyframes aurora-pulse-responding { - 0%, 100% { + 0%, 100% { border-color: rgba(var(--aurora-spring-green-hsl), 0.4); box-shadow: 0 0 var(--glow-radius-subtle) rgba(var(--aurora-spring-green-hsl), 0.4); } - 50% { + 50% { border-color: rgba(var(--aurora-spring-green-hsl), 0.8); box-shadow: 0 0 var(--glow-radius-strong) rgba(var(--aurora-spring-green-hsl), 0.8); } @@ -432,15 +432,15 @@ padding: 20px; margin: 16px 0; } - + .aurora-progress-header { margin-bottom: 16px; } - + .aurora-progress-steps { gap: 10px; } - + .aurora-progress-step { padding: 10px 12px; gap: 12px; @@ -450,12 +450,12 @@ /* High-DPI/Retina Optimizations */ @media (-webkit-min-device-pixel-ratio: 2), (min-resolution: 192dpi) { .aurora-progress-fill { - box-shadow: + box-shadow: 0 0 calc(var(--glow-radius-subtle) * 0.8) currentColor, 0 0 calc(var(--glow-radius-medium) * 0.8) rgba(var(--aurora-cyan-hsl), 0.4); } - + .aurora-step-indicator { border-width: 0.5px; } -} \ No newline at end of file +} diff --git a/static/css/diagram.css b/static/css/diagram.css index b57e7af..f34ae2c 100644 --- a/static/css/diagram.css +++ b/static/css/diagram.css @@ -260,10 +260,10 @@ hr { left: 0; width: 100%; height: 2px; - background: linear-gradient(90deg, - transparent, - rgba(121, 40, 202, 0.5), - rgba(255, 0, 128, 0.5), + background: linear-gradient(90deg, + transparent, + rgba(121, 40, 202, 0.5), + rgba(255, 0, 128, 0.5), transparent ); opacity: 0.3; @@ -277,4 +277,4 @@ hr { 10% { transform: translateY(100px); opacity: 0.5; } 80% { transform: translateY(calc(100vh - 100px)); opacity: 0.5; } 100% { transform: translateY(100vh); opacity: 0; } -} \ No newline at end of file +} diff --git a/static/css/main.css b/static/css/main.css index 675ccc0..10d36d7 100644 --- a/static/css/main.css +++ b/static/css/main.css @@ -7,34 +7,34 @@ --aurora-secondary: #7DF9FF; /* Electric Blue */ --aurora-tertiary: #00FFFF; /* Aqua */ --aurora-accent: #20B2AA; /* Light Sea Green */ - + /* Background Colors */ --aurora-bg-dark: #0a0f1c; /* Deep Navy */ --aurora-bg-darker: #0d1421; /* Darker Navy */ --aurora-bg-card: rgba(64, 224, 208, 0.03); --aurora-bg-glass: rgba(64, 224, 208, 0.08); - + /* Text Colors */ --aurora-text: rgba(255, 255, 255, 0.95); --aurora-text-muted: rgba(255, 255, 255, 0.7); --aurora-text-dim: rgba(255, 255, 255, 0.5); - + /* Border & Effects */ --aurora-border: rgba(64, 224, 208, 0.2); --aurora-border-hover: rgba(64, 224, 208, 0.4); --aurora-border-active: rgba(64, 224, 208, 0.6); - + /* Status Colors */ --aurora-success: #00FF88; --aurora-warning: #FFB800; --aurora-error: #FF4444; --aurora-info: #40E0D0; - + /* Glow Effects */ --aurora-glow: 0 0 20px rgba(64, 224, 208, 0.3); --aurora-glow-strong: 0 0 30px rgba(64, 224, 208, 0.5); --aurora-glow-subtle: 0 0 10px rgba(64, 224, 208, 0.2); - + /* Spacing & Layout */ --aurora-radius: 12px; --aurora-radius-large: 16px; @@ -42,12 +42,19 @@ --aurora-spacing: 16px; --aurora-spacing-large: 24px; --aurora-spacing-small: 8px; - + /* Typography */ --aurora-font-primary: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; --aurora-font-mono: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace; } +/* Hide Streamlit default header bar */ +header[data-testid="stHeader"], +.stAppHeader { + background: transparent !important; + backdrop-filter: none !important; +} + /* Global Styles */ .stApp { background: linear-gradient(135deg, var(--aurora-bg-dark) 0%, var(--aurora-bg-darker) 100%); @@ -133,12 +140,32 @@ font-size: 0.9rem; font-weight: 500; transition: all 0.3s ease; + user-select: none; } -.pipeline-badge:hover { - border-color: var(--aurora-border-hover); - box-shadow: var(--aurora-glow-subtle); +/* Completed badge - results available, clickable */ +.pipeline-badge.badge-completed { + border-color: var(--aurora-primary); + background: rgba(64, 224, 208, 0.15); + color: var(--aurora-primary); + box-shadow: 0 0 8px rgba(64, 224, 208, 0.2); +} + +.pipeline-badge.badge-completed:hover { + border-color: var(--aurora-secondary); + box-shadow: 0 0 16px rgba(64, 224, 208, 0.4); transform: translateY(-2px); + background: rgba(64, 224, 208, 0.25); +} + +/* Pending badge - no results yet */ +.pipeline-badge.badge-pending { + opacity: 0.5; + border-color: rgba(64, 224, 208, 0.1); +} + +.pipeline-badge.badge-pending:hover { + opacity: 0.6; } /* Completion Celebration */ @@ -237,8 +264,12 @@ /* Text Inputs */ .stTextInput > div > div > input, -.stTextArea > div > div > textarea { - background: var(--aurora-bg-card) !important; +.stTextArea > div > div > textarea, +.stTextInput input, +.stTextArea textarea, +[data-testid="stTextInput"] input, +[data-testid="stTextArea"] textarea { + background: var(--aurora-bg-darker) !important; border: 1px solid var(--aurora-border) !important; border-radius: var(--aurora-radius-small) !important; color: var(--aurora-text) !important; @@ -246,19 +277,47 @@ } .stTextInput > div > div > input:focus, -.stTextArea > div > div > textarea:focus { +.stTextArea > div > div > textarea:focus, +.stTextInput input:focus, +.stTextArea textarea:focus, +[data-testid="stTextInput"] input:focus, +[data-testid="stTextArea"] textarea:focus { + background: var(--aurora-bg-darker) !important; border-color: var(--aurora-border-hover) !important; box-shadow: var(--aurora-glow-subtle) !important; } +/* Text area label styling */ +[data-testid="stTextArea"] label, +[data-testid="stTextInput"] label { + color: var(--aurora-text) !important; +} + /* Select Boxes */ -.stSelectbox > div > div { +.stSelectbox > div > div, +[data-testid="stSelectbox"] > div > div { background: var(--aurora-bg-card) !important; border: 1px solid var(--aurora-border) !important; border-radius: var(--aurora-radius-small) !important; color: var(--aurora-text) !important; } +/* Number Inputs */ +[data-testid="stNumberInput"] input, +.stNumberInput input { + background: var(--aurora-bg-card) !important; + border: 1px solid var(--aurora-border) !important; + border-radius: var(--aurora-radius-small) !important; + color: var(--aurora-text) !important; + font-family: var(--aurora-font-primary) !important; +} + +[data-testid="stNumberInput"] input:focus, +.stNumberInput input:focus { + border-color: var(--aurora-border-hover) !important; + box-shadow: var(--aurora-glow-subtle) !important; +} + /* Progress Bars */ .stProgress > div > div > div { background: linear-gradient(90deg, var(--aurora-primary), var(--aurora-secondary)) !important; @@ -340,39 +399,39 @@ audio { /* Animations */ @keyframes aurora-scan { - 0%, 100% { - left: -100%; - opacity: 0; + 0%, 100% { + left: -100%; + opacity: 0; } - 25% { - opacity: 1; + 25% { + opacity: 1; } - 75% { - opacity: 1; + 75% { + opacity: 1; } - 100% { - left: 100%; - opacity: 0; + 100% { + left: 100%; + opacity: 0; } } @keyframes aurora-pulse { - 0%, 100% { - opacity: 1; - transform: scale(1); + 0%, 100% { + opacity: 1; + transform: scale(1); } - 50% { - opacity: 0.7; - transform: scale(1.05); + 50% { + opacity: 0.7; + transform: scale(1.05); } } @keyframes aurora-glow { - 0%, 100% { - box-shadow: var(--aurora-glow-subtle); + 0%, 100% { + box-shadow: var(--aurora-glow-subtle); } - 50% { - box-shadow: var(--aurora-glow); + 50% { + box-shadow: var(--aurora-glow); } } @@ -382,13 +441,13 @@ audio { } @keyframes fadeIn { - from { - opacity: 0; - transform: translateY(20px); + from { + opacity: 0; + transform: translateY(20px); } - to { - opacity: 1; - transform: translateY(0); + to { + opacity: 1; + transform: translateY(0); } } @@ -397,24 +456,24 @@ audio { .main .block-container { padding: var(--aurora-spacing-small); } - + .aurora-title { font-size: 2rem; } - + .aurora-pipeline { gap: var(--aurora-spacing-small); } - + .pipeline-badge { font-size: 0.8rem; padding: 6px 12px; } - + .aurora-celebration-title { font-size: 2.5rem; } - + .aurora-processing-title { font-size: 2rem; } @@ -759,15 +818,15 @@ audio { .aurora-upload-method-selector { grid-template-columns: 1fr; } - + .aurora-upload-method-icon { font-size: 2.5rem; } - + .aurora-upload-icon { font-size: 3rem; } - + .aurora-processing-metrics { grid-template-columns: 1fr 1fr; } @@ -1060,28 +1119,28 @@ audio { overflow-x: auto; padding: 2px; } - + .aurora-tab-button { padding: 10px 16px; font-size: 0.8rem; min-width: 120px; } - + .aurora-tab-icon { font-size: 1rem; } - + .aurora-tabs-actions { flex-direction: column; gap: var(--aurora-spacing-small); align-items: stretch; } - + .aurora-tabs-quick-actions { justify-content: center; flex-wrap: wrap; } - + .aurora-quick-action-btn { flex: 1; justify-content: center; @@ -1199,7 +1258,7 @@ audio { flex-direction: column; gap: var(--aurora-spacing-small); } - + .aurora-stat-item { width: 100%; flex-direction: row; @@ -1207,35 +1266,60 @@ audio { text-align: left; min-width: auto; } - + .aurora-stat-icon { margin-bottom: 0; margin-right: var(--aurora-spacing); font-size: 1.5rem; } - + .aurora-stat-value { font-size: 1.3rem; } - + .aurora-results-title { font-size: 1.8rem; } - + .aurora-results-subtitle { font-size: 1rem; } } -/* Override Streamlit Default Tab Styling */ -.stTabs { - display: none !important; +/* Aurora-styled Streamlit Tabs */ +.stTabs [data-baseweb="tab-list"] { + gap: 8px; + border-bottom: 1px solid rgba(64, 224, 208, 0.2); + padding-bottom: 4px; } -/* Ensure our custom tabs are visible */ -.aurora-tabs-container { - display: block !important; - visibility: visible !important; +.stTabs [data-baseweb="tab"] { + background: rgba(64, 224, 208, 0.05); + border: 1px solid rgba(64, 224, 208, 0.15); + border-radius: 8px 8px 0 0; + color: rgba(255, 255, 255, 0.7); + padding: 8px 20px; + transition: all 0.3s ease; +} + +.stTabs [data-baseweb="tab"]:hover { + background: rgba(64, 224, 208, 0.1); + border-color: rgba(64, 224, 208, 0.3); + color: #7DF9FF; +} + +.stTabs [aria-selected="true"] { + background: rgba(64, 224, 208, 0.12) !important; + border-color: #40E0D0 !important; + color: #40E0D0 !important; +} + +.stTabs [data-baseweb="tab-highlight"] { + background-color: #40E0D0 !important; +} + +.stTabs [data-baseweb="tab-border"] { + display: none; } /* Enhanced Content Cards (moved from previous location) */ @@ -1470,19 +1554,19 @@ audio { gap: var(--aurora-spacing-small); align-items: flex-start; } - + .aurora-content-card-actions { width: 100%; justify-content: flex-end; } - + .aurora-content-card-body { padding: var(--aurora-spacing); } - + .aurora-content-stats { flex-direction: column; gap: var(--aurora-spacing-small); align-items: flex-start; } -} \ No newline at end of file +} diff --git a/static/css/production.css b/static/css/production.css index 1939aa3..0da9a03 100644 --- a/static/css/production.css +++ b/static/css/production.css @@ -101,4 +101,4 @@ color: var(--text-primary); border-bottom: 1px solid rgba(121, 40, 202, 0.1); padding-bottom: 10px; -} \ No newline at end of file +} diff --git a/static/css/streaming.css b/static/css/streaming.css new file mode 100644 index 0000000..81a8157 --- /dev/null +++ b/static/css/streaming.css @@ -0,0 +1,157 @@ +/* Aurora Progress Animation */ +@keyframes aurora-flow { + 0%, 100% { left: -100%; opacity: 0; } + 25% { opacity: 1; } + 75% { opacity: 1; } + 100% { left: 100%; opacity: 0; } +} + +@keyframes aurora-pulse { + 0%, 100% { opacity: 1; transform: scale(1); } + 50% { opacity: 0.7; transform: scale(1.1); } +} + +@keyframes completion-glow { + 0%, 100% { left: -100%; opacity: 0; } + 20% { opacity: 1; } + 80% { opacity: 1; } + 100% { left: 100%; opacity: 0; } +} + +/* Enhanced Button Styling */ +.stButton > button { + background: linear-gradient(135deg, rgba(0, 255, 255, 0.1), rgba(64, 224, 208, 0.15)) !important; + border: 1px solid rgba(0, 255, 255, 0.2) !important; + color: rgba(255, 255, 255, 0.9) !important; + border-radius: 8px !important; + font-weight: 500 !important; + transition: all 0.3s ease !important; +} + +.stButton > button:hover { + background: linear-gradient(135deg, rgba(0, 255, 255, 0.15), rgba(64, 224, 208, 0.2)) !important; + border-color: rgba(0, 255, 255, 0.3) !important; + color: white !important; + transform: translateY(-1px); + box-shadow: 0 4px 15px rgba(0, 255, 255, 0.15); +} + +/* Expander Styling */ +.streamlit-expanderHeader { + background: rgba(0, 255, 255, 0.03) !important; + border: 1px solid rgba(0, 255, 255, 0.1) !important; + border-radius: 8px !important; +} + +.streamlit-expanderContent { + background: rgba(0, 255, 255, 0.02) !important; + border: 1px solid rgba(0, 255, 255, 0.1) !important; + border-top: none !important; + border-radius: 0 0 8px 8px !important; +} + +/* Aurora Streaming Results Styling */ +.aurora-content-card { + background: linear-gradient(135deg, rgba(0, 255, 255, 0.05), rgba(64, 224, 208, 0.08)); + backdrop-filter: blur(24px) saturate(180%); + border: 1px solid rgba(0, 255, 255, 0.15); + border-radius: 16px; + padding: 24px; + margin: 16px 0; + position: relative; + overflow: hidden; + transition: all 0.4s cubic-bezier(0.4, 0.0, 0.2, 1); +} + +.aurora-content-card::before { + content: ""; + position: absolute; + top: 0; + left: -100%; + width: 100%; + height: 2px; + background: linear-gradient(90deg, transparent, #00FFFF, #40E0D0, transparent); + animation: aurora-scan 6s ease-in-out infinite; +} + +.aurora-content-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 16px; + padding-bottom: 12px; + border-bottom: 1px solid rgba(0, 255, 255, 0.1); +} + +.aurora-content-title { + font-size: 1.1rem; + font-weight: 600; + color: rgba(255, 255, 255, 0.95); +} + +.aurora-status-badge { + padding: 4px 12px; + border-radius: 12px; + font-size: 0.75rem; + font-weight: 500; + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.aurora-status-badge.completed { + background: rgba(0, 255, 127, 0.15); + color: #00FF7F; + border: 1px solid rgba(0, 255, 127, 0.3); +} + +.aurora-content-body { + color: rgba(255, 255, 255, 0.85); + line-height: 1.6; + font-size: 0.95rem; +} + +.aurora-editor-section { + background: rgba(255, 255, 127, 0.05); + border: 1px solid rgba(255, 255, 127, 0.15); + border-radius: 12px; + padding: 16px; + margin: 16px 0; +} + +.aurora-editor-header { + display: flex; + justify-content: space-between; + align-items: center; +} + +.aurora-editor-title { + font-weight: 600; + color: rgba(255, 255, 255, 0.95); +} + +.aurora-editor-badge { + background: rgba(255, 255, 127, 0.15); + color: #FFFF7F; + padding: 4px 8px; + border-radius: 8px; + font-size: 0.7rem; + font-weight: 500; +} + +.aurora-critique-card { + background: rgba(255, 255, 255, 0.02); + border-radius: 8px; + padding: 16px; + margin: 12px 0; +} + +.aurora-critique-content { + color: rgba(255, 255, 255, 0.8); + line-height: 1.5; + font-size: 0.9rem; +} + +@keyframes aurora-scan { + 0%, 100% { left: -100%; } + 50% { left: 100%; } +} diff --git a/static/css/upload.css b/static/css/upload.css new file mode 100644 index 0000000..03751a0 --- /dev/null +++ b/static/css/upload.css @@ -0,0 +1,315 @@ +/* Upload Zone - FileUploadManager */ +.large-upload-zone-container { + margin: 20px 0; +} + +.large-upload-zone { + border: 3px dashed rgba(0, 255, 255, 0.3); + border-radius: 16px; + padding: 50px 30px; + text-align: center; + background: linear-gradient(135deg, + rgba(0, 255, 255, 0.03) 0%, + rgba(64, 224, 208, 0.05) 100%); + transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1); + cursor: pointer; + position: relative; + overflow: hidden; +} + +.large-upload-zone:hover { + border-color: rgba(0, 255, 255, 0.6); + background: linear-gradient(135deg, + rgba(0, 255, 255, 0.08) 0%, + rgba(64, 224, 208, 0.12) 100%); + transform: translateY(-3px); + box-shadow: 0 12px 35px rgba(0, 255, 255, 0.2); +} + +.large-upload-zone::before { + content: ""; + position: absolute; + top: 0; + left: -100%; + width: 100%; + height: 100%; + background: linear-gradient(90deg, + transparent, + rgba(0, 255, 255, 0.15), + transparent); + transition: left 0.6s ease; +} + +.large-upload-zone:hover::before { + left: 100%; +} + +.upload-icon-inner { + font-size: 64px; + opacity: 0.8; + transition: all 0.4s ease; + display: inline-block; +} + +.large-upload-zone:hover .upload-icon-inner { + opacity: 1; + transform: scale(1.15) rotate(10deg); +} + +.upload-text h3 { + color: #00FFFF; + font-size: 1.5rem; + margin: 16px 0 8px 0; + font-weight: 600; +} + +.upload-text p { + color: rgba(255, 255, 255, 0.7); + margin: 0 0 20px 0; + font-size: 1rem; +} + +.upload-info { + display: flex; + justify-content: center; + gap: 15px; + flex-wrap: wrap; + margin-bottom: 20px; +} + +.supported-formats, .max-size, .chunk-info { + font-size: 0.85rem; + color: rgba(255, 255, 255, 0.6); + background: rgba(0, 255, 255, 0.1); + padding: 6px 12px; + border-radius: 6px; + border: 1px solid rgba(0, 255, 255, 0.2); +} + +.upload-features { + display: flex; + justify-content: center; + gap: 30px; + flex-wrap: wrap; +} + +.feature { + display: flex; + align-items: center; + gap: 8px; + color: rgba(255, 255, 255, 0.8); + font-size: 0.9rem; +} + +.feature-icon { + font-size: 1.2rem; +} + +/* Upload Progress Indicator */ +.upload-progress-container { + background: var(--bg-secondary); + border-radius: var(--card-radius); + padding: 15px; + margin: 10px 0; + border: 1px solid rgba(121, 40, 202, 0.2); +} + +.upload-progress-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 8px; +} + +.upload-filename { + color: var(--text-primary); + font-weight: 500; + font-size: 0.9rem; +} + +.upload-percentage { + color: var(--accent-primary); + font-family: var(--terminal-font); + font-weight: 600; +} + +.upload-progress-bar { + height: 4px; + background: rgba(255, 255, 255, 0.1); + border-radius: 2px; + position: relative; + overflow: hidden; + margin-bottom: 8px; +} + +.upload-progress-fill { + height: 100%; + background: linear-gradient(90deg, #7928CA, #FF0080); + border-radius: 2px; + transition: width 0.3s ease; +} + +.upload-progress-shimmer { + position: absolute; + top: 0; + left: 0; + height: 100%; + background: linear-gradient(90deg, + transparent, + rgba(255, 255, 255, 0.2), + transparent); + animation: shimmer 1.5s ease-in-out infinite; +} + +.upload-status { + color: var(--text-secondary); + font-size: 0.8rem; + text-align: center; +} + +@keyframes shimmer { + 0% { transform: translateX(-100%); } + 100% { transform: translateX(100%); } +} + +/* Enhanced Upload Zone - EnhancedLargeFileProcessor */ +.enhanced-upload-container { + margin: 25px 0; +} + +.enhanced-upload-zone { + border: 3px dashed var(--aurora-border); + border-radius: var(--aurora-radius-large); + padding: var(--aurora-spacing-large); + text-align: center; + background: var(--aurora-bg-glass); + transition: all 0.5s cubic-bezier(0.4, 0, 0.2, 1); + cursor: pointer; + position: relative; + overflow: hidden; +} + +.enhanced-upload-zone:hover { + border-color: var(--aurora-border-hover); + background: var(--aurora-bg-glass); + transform: translateY(-5px); + box-shadow: var(--aurora-glow); +} + +.upload-icon-large { + position: relative; + margin-bottom: 20px; +} + +.enhanced-upload-zone .upload-icon-inner { + font-size: 80px; + color: var(--aurora-primary); + opacity: 0.9; + transition: all 0.5s ease; + display: inline-block; + position: relative; + z-index: 2; +} + +.upload-pulse { + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + width: 120px; + height: 120px; + border: 2px solid var(--aurora-border); + border-radius: 50%; + animation: aurora-upload-pulse 3s ease-in-out infinite; +} + +.enhanced-upload-zone:hover .upload-icon-inner { + transform: scale(1.2) rotate(15deg); + opacity: 1; +} + +.upload-content h2 { + color: var(--aurora-primary); + font-size: 1.8rem; + margin: 0 0 8px 0; + font-weight: 700; +} + +.upload-subtitle { + color: var(--aurora-text-muted); + margin: 0 0 25px 0; + font-size: 1.1rem; + font-weight: 500; +} + +.upload-features-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(120px, 1fr)); + gap: 15px; + margin: 25px 0; + max-width: 500px; + margin-left: auto; + margin-right: auto; +} + +.feature-card { + background: var(--aurora-bg-glass); + border: 1px solid var(--aurora-border); + border-radius: var(--aurora-radius); + padding: 12px 8px; + display: flex; + flex-direction: column; + align-items: center; + gap: 6px; + transition: all 0.3s ease; +} + +.feature-card:hover { + background: var(--aurora-bg-glass); + border-color: var(--aurora-border-hover); + transform: translateY(-2px); + box-shadow: var(--aurora-glow-subtle); +} + +.feature-card .feature-icon { + font-size: 1.5rem; + color: var(--aurora-primary); +} + +.feature-text { + font-size: 0.85rem; + color: var(--aurora-text); + font-weight: 500; +} + +.supported-formats-enhanced { + margin-top: 20px; + display: flex; + justify-content: center; + gap: 20px; + flex-wrap: wrap; +} + +.format-group { + background: var(--aurora-bg-card); + border: 1px solid var(--aurora-border); + border-radius: var(--aurora-radius-small); + padding: 8px 12px; + font-size: 0.85rem; + color: var(--aurora-text-muted); +} + +.format-group strong { + color: var(--aurora-primary); +} + +@keyframes aurora-upload-pulse { + 0%, 100% { + transform: translate(-50%, -50%) scale(1); + opacity: 0.6; + } + 50% { + transform: translate(-50%, -50%) scale(1.2); + opacity: 0.3; + } +} diff --git a/static/css/whisperforge_ui.css b/static/css/whisperforge_ui.css index 7ecb7d5..47030a7 100644 --- a/static/css/whisperforge_ui.css +++ b/static/css/whisperforge_ui.css @@ -293,12 +293,12 @@ padding: 0.5rem; gap: 1rem; } - + .auth-container { margin: 1rem; padding: 1.5rem; } - + .content-section, .upload-section { padding: 1rem; @@ -409,11 +409,11 @@ header { visibility: hidden; } background: white !important; color: black !important; } - + .whisperforge-container, .auth-container, .main-container { box-shadow: none !important; border: 1px solid #ccc !important; } -} \ No newline at end of file +} diff --git a/static/js/cookie-consent.js b/static/js/cookie-consent.js index ec4aa84..c58a3e7 100644 --- a/static/js/cookie-consent.js +++ b/static/js/cookie-consent.js @@ -9,7 +9,7 @@ document.addEventListener('DOMContentLoaded', function() { cookieBanner.style.display = 'none'; } } - + // Set up event listener for the accept button const acceptButton = document.querySelector('.cookie-banner button'); if (acceptButton) { @@ -18,4 +18,4 @@ document.addEventListener('DOMContentLoaded', function() { document.querySelector('.cookie-banner').style.display = 'none'; }); } -}); \ No newline at end of file +}); diff --git a/static/js/ui-interactions.js b/static/js/ui-interactions.js index 226a906..b73aefb 100644 --- a/static/js/ui-interactions.js +++ b/static/js/ui-interactions.js @@ -6,13 +6,13 @@ document.addEventListener('DOMContentLoaded', function() { const scannerLine = document.createElement('div'); scannerLine.className = 'scanner-line'; document.body.appendChild(scannerLine); - + // Function to add toast notifications window.showToast = function(message, type = 'success') { const toast = document.createElement('div'); toast.className = 'toast-notification'; toast.textContent = message; - + if (type === 'error') { toast.style.borderLeftColor = 'var(--error)'; } else if (type === 'warning') { @@ -20,29 +20,29 @@ document.addEventListener('DOMContentLoaded', function() { } else if (type === 'info') { toast.style.borderLeftColor = 'var(--info)'; } - + document.body.appendChild(toast); - + // Remove toast after 3 seconds setTimeout(function() { toast.style.opacity = '0'; toast.style.transform = 'translateX(100%)'; - + // Remove from DOM after animation setTimeout(function() { document.body.removeChild(toast); }, 300); }, 3000); }; - + // Add active class to current navigation item const currentPage = new URLSearchParams(window.location.search).get('page') || 'home'; const navItems = document.querySelectorAll('.nav-item'); - + navItems.forEach(function(item) { const itemPage = new URL(item.href).searchParams.get('page') || 'home'; if (itemPage === currentPage) { item.classList.add('active'); } }); -}); \ No newline at end of file +}); diff --git a/tests/__init__.py b/tests/__init__.py index df6b49b..d218072 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1 @@ -# WhisperForge Test Suite \ No newline at end of file +# WhisperForge Test Suite diff --git a/tests/conftest.py b/tests/conftest.py index 8bcda22..2b4fa08 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,28 +2,31 @@ Pytest configuration and fixtures for WhisperForge testing """ -import pytest import os -import tempfile import shutil + +# Add project root to path +import sys +import tempfile from pathlib import Path from unittest.mock import Mock, patch -import streamlit as st + +import pytest from streamlit.testing.v1 import AppTest -# Add project root to path -import sys +from core.logging_config import logger +from core.supabase_integration import get_supabase_client + sys.path.insert(0, str(Path(__file__).parent.parent)) # Load environment variables from .env file if it exists try: from dotenv import load_dotenv + load_dotenv() except ImportError: pass # python-dotenv not installed, skip -from core.logging_config import logger -from core.supabase_integration import get_supabase_client @pytest.fixture(scope="session") def test_env(): @@ -31,21 +34,21 @@ def test_env(): # Store original env vars original_env = {} test_vars = { - 'SUPABASE_URL': os.getenv('SUPABASE_URL', 'https://test.supabase.co'), - 'SUPABASE_ANON_KEY': os.getenv('SUPABASE_ANON_KEY', 'test-anon-key'), - 'OPENAI_API_KEY': os.getenv('OPENAI_API_KEY', 'test-openai-key'), - 'ANTHROPIC_API_KEY': os.getenv('ANTHROPIC_API_KEY', 'test-anthropic-key'), - 'GROQ_API_KEY': os.getenv('GROQ_API_KEY', 'test-groq-key'), - 'TESTING': 'true' + "SUPABASE_URL": os.getenv("SUPABASE_URL", "https://test.supabase.co"), + "SUPABASE_ANON_KEY": os.getenv("SUPABASE_ANON_KEY", "test-anon-key"), + "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", "test-openai-key"), + "ANTHROPIC_API_KEY": os.getenv("ANTHROPIC_API_KEY", "test-anthropic-key"), + "GROQ_API_KEY": os.getenv("GROQ_API_KEY", "test-groq-key"), + "TESTING": "true", } - + # Set test environment for key, value in test_vars.items(): original_env[key] = os.getenv(key) os.environ[key] = value - + yield test_vars - + # Restore original environment for key, value in original_env.items(): if value is None: @@ -53,6 +56,7 @@ def test_env(): else: os.environ[key] = value + @pytest.fixture def temp_dir(): """Create temporary directory for test files""" @@ -60,53 +64,59 @@ def temp_dir(): yield Path(temp_path) shutil.rmtree(temp_path) + @pytest.fixture def sample_audio_file(temp_dir): """Create a sample audio file for testing""" # Create a minimal WAV file (just headers, no actual audio) audio_file = temp_dir / "test_audio.wav" - + # WAV file header (44 bytes) - wav_header = b'RIFF\x24\x08\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x02\x00\x44\xac\x00\x00\x10\xb1\x02\x00\x04\x00\x10\x00data\x00\x08\x00\x00' - - with open(audio_file, 'wb') as f: + wav_header = b"RIFF\x24\x08\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x02\x00\x44\xac\x00\x00\x10\xb1\x02\x00\x04\x00\x10\x00data\x00\x08\x00\x00" + + with open(audio_file, "wb") as f: f.write(wav_header) # Add some dummy audio data - f.write(b'\x00' * 2048) - + f.write(b"\x00" * 2048) + return audio_file + @pytest.fixture def large_audio_file(temp_dir): """Create a large audio file for testing chunking""" audio_file = temp_dir / "large_test_audio.wav" - + # WAV file header - wav_header = b'RIFF\x24\x08\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x02\x00\x44\xac\x00\x00\x10\xb1\x02\x00\x04\x00\x10\x00data\x00\x08\x00\x00' - - with open(audio_file, 'wb') as f: + wav_header = b"RIFF\x24\x08\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x02\x00\x44\xac\x00\x00\x10\xb1\x02\x00\x04\x00\x10\x00data\x00\x08\x00\x00" + + with open(audio_file, "wb") as f: f.write(wav_header) # Create ~30MB file to trigger chunking chunk_size = 1024 * 1024 # 1MB chunks for _ in range(30): - f.write(b'\x00' * chunk_size) - + f.write(b"\x00" * chunk_size) + return audio_file + @pytest.fixture def mock_supabase(): """Mock Supabase client for testing""" - with patch('core.supabase_client.create_client') as mock_create: + with patch("core.supabase_integration.create_client") as mock_create: mock_client = Mock() mock_create.return_value = mock_client - + # Mock successful responses - mock_client.table.return_value.insert.return_value.execute.return_value.data = [{'id': 'test-id'}] + mock_client.table.return_value.insert.return_value.execute.return_value.data = [{"id": "test-id"}] mock_client.table.return_value.select.return_value.execute.return_value.data = [] - mock_client.table.return_value.update.return_value.eq.return_value.execute.return_value.data = [{'id': 'test-id'}] - + mock_client.table.return_value.update.return_value.eq.return_value.execute.return_value.data = [ + {"id": "test-id"} + ] + yield mock_client + @pytest.fixture def real_supabase(): """Real Supabase client for integration testing""" @@ -119,64 +129,63 @@ def real_supabase(): except Exception as e: pytest.skip(f"Supabase connection failed: {e}") + @pytest.fixture def streamlit_app(): """Streamlit app test fixture - Updated to use app_simple.py""" app = AppTest.from_file("app_simple.py") return app + @pytest.fixture def mock_openai(): """Mock OpenAI API responses""" - with patch('openai.OpenAI') as mock_openai: + with patch("openai.OpenAI") as mock_openai: mock_client = Mock() mock_openai.return_value = mock_client - + # Mock transcription response mock_client.audio.transcriptions.create.return_value.text = "This is a test transcription." - + # Mock chat completion response mock_response = Mock() mock_response.choices = [Mock()] mock_response.choices[0].message.content = "This is a test AI response." mock_client.chat.completions.create.return_value = mock_response - + yield mock_client + @pytest.fixture def mock_anthropic(): """Mock Anthropic API responses""" - with patch('anthropic.Anthropic') as mock_anthropic: + with patch("anthropic.Anthropic") as mock_anthropic: mock_client = Mock() mock_anthropic.return_value = mock_client - + # Mock message response mock_response = Mock() mock_response.content = [Mock()] mock_response.content[0].text = "This is a test Anthropic response." mock_client.messages.create.return_value = mock_response - + yield mock_client + @pytest.fixture(autouse=True) def setup_logging(): """Set up logging for tests""" - logger.logger.info("๐Ÿงช Starting test session") + logger.info("๐Ÿงช Starting test session") yield - logger.logger.info("๐Ÿงช Test session completed") + logger.info("๐Ÿงช Test session completed") + # Test markers + + def pytest_configure(config): """Configure pytest markers""" - config.addinivalue_line( - "markers", "integration: marks tests as integration tests (may be slow)" - ) - config.addinivalue_line( - "markers", "unit: marks tests as unit tests (fast)" - ) - config.addinivalue_line( - "markers", "supabase: marks tests that require Supabase connection" - ) - config.addinivalue_line( - "markers", "ai: marks tests that require AI API keys" - ) \ No newline at end of file + config.addinivalue_line("markers", "integration: marks tests as integration tests (may be slow)") + config.addinivalue_line("markers", "unit: marks tests as unit tests (fast)") + config.addinivalue_line("markers", "supabase: marks tests that require Supabase connection") + config.addinivalue_line("markers", "ai: marks tests that require AI API keys") diff --git a/tests/test_basic_functionality.py b/tests/test_basic_functionality.py index e4d764c..67036b8 100644 --- a/tests/test_basic_functionality.py +++ b/tests/test_basic_functionality.py @@ -2,109 +2,120 @@ Basic functionality tests for WhisperForge v3.1.0 """ -import pytest -import os -from pathlib import Path import sys +from pathlib import Path + +import pytest # Add project root to path sys.path.insert(0, str(Path(__file__).parent.parent)) + def test_imports(): """Test that core modules can be imported without errors""" try: - from core.content_generation import transcribe_audio, generate_wisdom - from core.file_upload import FileUploadManager, LargeFileUploadManager - from core.supabase_integration import get_supabase_client - from core.utils import hash_password, DEFAULT_PROMPTS - from core.visible_thinking import thinking_step_start + from core.content_generation import generate_wisdom, transcribe_audio # noqa: F401 + from core.file_upload import FileUploadManager # noqa: F401 + from core.large_file_processor import EnhancedLargeFileProcessor # noqa: F401 + from core.supabase_integration import get_supabase_client # noqa: F401 + from core.utils import DEFAULT_PROMPTS, hash_password # noqa: F401 + from core.visible_thinking import thinking_step_start # noqa: F401 + assert True, "All core imports successful" except ImportError as e: pytest.fail(f"Import error: {e}") + def test_version_file(): """Test that VERSION file exists and contains valid version""" version_file = Path(__file__).parent.parent / "VERSION" assert version_file.exists(), "VERSION file should exist" - + version = version_file.read_text().strip() assert version == "3.1.0", f"Expected version 3.1.0, got {version}" + def test_app_files_exist(): """Test that required app files exist""" root = Path(__file__).parent.parent - + # Main app should exist assert (root / "app_simple.py").exists(), "app_simple.py should exist" - + # Redirect app should exist assert (root / "app.py").exists(), "app.py redirect should exist" - + # Procfile should exist and point to app_simple.py procfile = root / "Procfile" assert procfile.exists(), "Procfile should exist" - + content = procfile.read_text() assert "app_simple.py" in content, "Procfile should reference app_simple.py" + def test_requirements_file(): """Test that requirements.txt exists and has core dependencies""" req_file = Path(__file__).parent.parent / "requirements.txt" assert req_file.exists(), "requirements.txt should exist" - + content = req_file.read_text() required_deps = ["streamlit", "openai", "supabase", "pydub"] - + for dep in required_deps: assert dep in content, f"Required dependency {dep} not found in requirements.txt" + @pytest.mark.unit def test_hash_password(): """Test password hashing utility""" from core.utils import hash_password - - password = "test_password_123" + + password = "test_password_123" # noqa: S105 hashed = hash_password(password) - + assert hashed != password, "Password should be hashed" assert len(hashed) > 20, "Hashed password should be longer than original" + @pytest.mark.unit def test_default_prompts(): """Test that default prompts are available""" from core.utils import DEFAULT_PROMPTS - + assert isinstance(DEFAULT_PROMPTS, dict), "DEFAULT_PROMPTS should be a dictionary" assert len(DEFAULT_PROMPTS) > 0, "Should have at least one default prompt" + @pytest.mark.unit def test_visible_thinking_functions(): """Test that visible thinking functions work without errors""" - from core.visible_thinking import thinking_step_start, thinking_step_complete, thinking_error - + from core.visible_thinking import thinking_error, thinking_step_complete, thinking_step_start + # These should not raise exceptions thinking_step_start("test_step") thinking_step_complete("test_step") thinking_error("test_step", "test error") - + assert True, "Visible thinking functions executed without errors" + def test_core_directory_structure(): """Test that core directory has expected modules""" core_dir = Path(__file__).parent.parent / "core" assert core_dir.exists(), "core directory should exist" - + expected_modules = [ "content_generation.py", - "file_upload.py", + "file_upload.py", "supabase_integration.py", "utils.py", - "visible_thinking.py" + "visible_thinking.py", ] - + for module in expected_modules: module_path = core_dir / module assert module_path.exists(), f"Core module {module} should exist" + if __name__ == "__main__": - pytest.main([__file__, "-v"]) \ No newline at end of file + pytest.main([__file__, "-v"]) diff --git a/whisperforge2.code-workspace b/whisperforge2.code-workspace index ef9f5d2..b51044b 100644 --- a/whisperforge2.code-workspace +++ b/whisperforge2.code-workspace @@ -4,4 +4,4 @@ "path": "." } ] -} \ No newline at end of file +} diff --git a/whisperforge_cli.py b/whisperforge_cli.py index bd228a2..c4e0b6c 100644 --- a/whisperforge_cli.py +++ b/whisperforge_cli.py @@ -4,12 +4,11 @@ Run the WhisperForge pipeline from the command line """ -import click import os import sys -import tempfile from pathlib import Path -from typing import Optional, Dict, Any + +import click # Add the project root to Python path project_root = Path(__file__).parent @@ -17,16 +16,12 @@ # Import core functionality try: - from core.content_generation import ( - transcribe_audio, - generate_wisdom, - generate_outline, - generate_article, - ) - from core.logging_config import logger - from core.utils import DEFAULT_PROMPTS from dotenv import load_dotenv + from core.content_generation import transcribe_audio + from core.logging_config import logger + from core.pipeline_engine import NullListener, PipelineConfig, run_pipeline + # Load environment variables load_dotenv() @@ -36,6 +31,19 @@ sys.exit(1) +class CLIPipelineListener(NullListener): + """Pipeline listener that prints progress to the terminal via click.""" + + def on_step_start(self, step_index, step_name, message): + click.echo(f" [{step_index + 1}/6] {step_name}: {message}") + + def on_step_complete(self, step_index, step_name, result): + click.echo(f" [{step_index + 1}/6] {step_name}: done") + + def on_error(self, step_index, error): + click.echo(f" Error at step {step_index}: {error}", err=True) + + class CLIFile: """Simple file wrapper for CLI usage""" @@ -159,14 +167,14 @@ def pipeline(): def run( input_file: str, model: str, - output: Optional[str], + output: str | None, output_format: str, verbose: bool, ): """Run the complete WhisperForge pipeline on an audio file""" if verbose: - click.echo(f"๐Ÿš€ Starting WhisperForge pipeline...") + click.echo("๐Ÿš€ Starting WhisperForge pipeline...") click.echo(f"Input file: {input_file}") click.echo(f"Model: {model}") click.echo(f"Output format: {output_format}") @@ -187,75 +195,72 @@ def run( # Create CLI file wrapper audio_file = CLIFile(input_file) + stem = Path(input_file).stem try: - # Step 1: Transcription - click.echo("๐ŸŽต Transcribing audio...") - transcript = transcribe_audio(audio_file) + # --- Transcription-only shortcut --- + if output_format == "transcript": + click.echo("Transcribing audio...") + transcript = transcribe_audio(str(audio_file.file_path)) + if not transcript: + click.echo("Transcription failed: empty result", err=True) + sys.exit(1) + + transcript_file = output_dir / f"{stem}_transcript.txt" + with open(transcript_file, "w", encoding="utf-8") as f: + f.write(transcript) + click.echo(f"Transcript saved: {transcript_file}") + if verbose: + click.echo(f"Preview: {transcript[:200]}...") + return - if not transcript or "Error" in transcript: - click.echo(f"โŒ Transcription failed: {transcript}", err=True) - sys.exit(1) + # --- Full pipeline via engine --- + config = PipelineConfig(publish_to_notion=False) + listener = CLIPipelineListener() if verbose else NullListener() - # Save transcript - transcript_file = output_dir / f"{Path(input_file).stem}_transcript.txt" - with open(transcript_file, "w", encoding="utf-8") as f: - f.write(transcript) + results = run_pipeline( + audio_file=audio_file, + config=config, + listener=listener, + ) - click.echo(f"โœ… Transcript saved: {transcript_file}") + if results is None: + click.echo("Pipeline failed -- see errors above.", err=True) + sys.exit(1) - if output_format == "transcript": - click.echo(f"๐Ÿ“„ Transcript preview: {transcript[:200]}...") - return + # --- Write results to files --- + output_map = { + "transcript": ("_transcript.txt", "transcript"), + "wisdom": ("_wisdom.md", "wisdom"), + "outline": ("_outline.md", "outline"), + "article": ("_article.md", "article"), + "social_content": ("_social.md", "social_content"), + } - # Step 2: Generate content based on format - results = {} - - if output_format in ["wisdom", "all"]: - click.echo("๐Ÿง  Generating wisdom extraction...") - wisdom = generate_wisdom(transcript, DEFAULT_PROMPTS["wisdom_extraction"]) - if wisdom and "Error" not in wisdom: - wisdom_file = output_dir / f"{Path(input_file).stem}_wisdom.md" - with open(wisdom_file, "w", encoding="utf-8") as f: - f.write(wisdom) - results["wisdom"] = wisdom_file - click.echo(f"โœ… Wisdom saved: {wisdom_file}") - - if output_format in ["outline", "all"]: - click.echo("๐Ÿ“‹ Generating outline...") - outline = generate_outline(transcript, DEFAULT_PROMPTS["outline_creation"]) - if outline and "Error" not in outline: - outline_file = output_dir / f"{Path(input_file).stem}_outline.md" - with open(outline_file, "w", encoding="utf-8") as f: - f.write(outline) - results["outline"] = outline_file - click.echo(f"โœ… Outline saved: {outline_file}") - - if output_format in ["article", "all"]: - click.echo("๐Ÿ“ Generating article...") - article = generate_article(transcript, DEFAULT_PROMPTS["article_writing"]) - if article and "Error" not in article: - article_file = output_dir / f"{Path(input_file).stem}_article.md" - with open(article_file, "w", encoding="utf-8") as f: - f.write(article) - results["article"] = article_file - click.echo(f"โœ… Article saved: {article_file}") + saved_files: dict[str, Path] = {} + for key, (suffix, result_key) in output_map.items(): + content = results.get(result_key) + if not content: + continue + if output_format not in ("all", key): + continue + out_path = output_dir / f"{stem}{suffix}" + with open(out_path, "w", encoding="utf-8") as f: + f.write(content) + saved_files[key] = out_path # Summary - click.echo("\n๐ŸŽ‰ Pipeline completed successfully!") - click.echo(f"๐Ÿ“ Output directory: {output_dir}") - click.echo(f"๐Ÿ“„ Transcript: {transcript_file}") + click.echo("\nPipeline completed successfully!") + click.echo(f"Output directory: {output_dir}") + for content_type, file_path in saved_files.items(): + click.echo(f" {content_type}: {file_path}") - for content_type, file_path in results.items(): - click.echo(f"๐Ÿ“ {content_type.title()}: {file_path}") - - # Show preview - if verbose and transcript: - click.echo(f"\n๐Ÿ“„ Transcript preview:\n{transcript[:300]}...") + if verbose and results.get("transcript"): + click.echo(f"\nTranscript preview:\n{results['transcript'][:300]}...") except Exception as e: - logger.logger.error(f"CLI pipeline error: {e}") - click.echo(f"โŒ Pipeline failed: {e}", err=True) + logger.error(f"CLI pipeline error: {e}") + click.echo(f"Pipeline failed: {e}", err=True) sys.exit(1) @@ -274,7 +279,7 @@ def run( type=click.Path(), help="Output file path (default: input_name_transcript.txt)", ) -def transcribe(input_file: str, output: Optional[str]): +def transcribe(input_file: str, output: str | None): """Transcribe audio file to text only""" if not validate_audio_file(input_file): @@ -284,32 +289,26 @@ def transcribe(input_file: str, output: Optional[str]): sys.exit(1) # Set up output file - if output: - output_file = Path(output) - else: - output_file = Path(f"{Path(input_file).stem}_transcript.txt") - - # Create CLI file wrapper - audio_file = CLIFile(input_file) + output_file = Path(output) if output else Path(f"{Path(input_file).stem}_transcript.txt") try: - click.echo("๐ŸŽต Transcribing audio...") - transcript = transcribe_audio(audio_file) + click.echo("Transcribing audio...") + transcript = transcribe_audio(input_file) - if not transcript or "Error" in transcript: - click.echo(f"โŒ Transcription failed: {transcript}", err=True) + if not transcript: + click.echo("Transcription failed: empty result", err=True) sys.exit(1) # Save transcript with open(output_file, "w", encoding="utf-8") as f: f.write(transcript) - click.echo(f"โœ… Transcript saved: {output_file}") - click.echo(f"๐Ÿ“„ Preview: {transcript[:200]}...") + click.echo(f"Transcript saved: {output_file}") + click.echo(f"Preview: {transcript[:200]}...") except Exception as e: - logger.logger.error(f"CLI transcription error: {e}") - click.echo(f"โŒ Transcription failed: {e}", err=True) + logger.error(f"CLI transcription error: {e}") + click.echo(f"Transcription failed: {e}", err=True) sys.exit(1) @@ -333,14 +332,14 @@ def status(): # Check dependencies try: - import openai + import openai # noqa: F401 click.echo("OpenAI library: โœ… Available") except ImportError: click.echo("OpenAI library: โŒ Not available") try: - import anthropic + import anthropic # noqa: F401 click.echo("Anthropic library: โœ… Available") except ImportError: @@ -348,7 +347,7 @@ def status(): # Check audio processing try: - from pydub import AudioSegment + from pydub import AudioSegment # noqa: F401 click.echo("Audio processing (pydub): โœ… Available") except ImportError: