From ac0c2e1e86316c55494282a0a046f8fc03dbf81d Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 15:51:33 -0800 Subject: [PATCH 01/46] chore(lint): add pre-commit hooks with autopep8 for auto-formatting Configure pre-commit framework with autopep8, trailing whitespace trimming, and end-of-file fixing to catch indentation and style issues before they reach the repository. - Add .pre-commit-config.yaml with autopep8, end-of-file-fixer, and trailing-whitespace hooks - Add pyproject.toml with autopep8 settings (120 char line, aggressive 1) - Add pre-commit and autopep8 to requirements.txt - Add .venv/ to .gitignore Co-Authored-By: Claude Opus 4.6 --- .gitignore | 1 + .pre-commit-config.yaml | 12 ++++++++++++ pyproject.toml | 3 +++ requirements.txt | 4 ++++ 4 files changed, 20 insertions(+) create mode 100644 .pre-commit-config.yaml create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore index 4d11890..3fd3672 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ wheels/ # Virtual environments venv/ +.venv/ env/ ENV/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c41cf1d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/hhatto/autopep8 + rev: v2.3.2 + hooks: + - id: autopep8 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..01613c8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.autopep8] +max_line_length = 120 +aggressive = 1 diff --git a/requirements.txt b/requirements.txt index e931345..d6e635a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,3 +40,7 @@ pytest-asyncio>=0.21.0 pytest-cov>=4.1.0 python-docx>=1.1.0 fpdf>=1.7.2 + +# Linting & Pre-commit Hooks (development) +pre-commit>=4.0.0 +autopep8>=2.3.0 From 2e2c356c68bd75b57a95bf3078e3a924bedf8bfb Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 15:51:52 -0800 Subject: [PATCH 02/46] style(lint): apply autopep8 formatting and whitespace fixes Run pre-commit hooks across the entire codebase to fix existing indentation issues, trailing whitespace, and missing end-of-file newlines. Co-Authored-By: Claude Opus 4.6 --- .devcontainer/devcontainer.json | 2 +- .streamlit/config.toml | 2 +- CHANGELOG.md | 16 +- CLEANUP_SUCCESS_SUMMARY.md | 14 +- ESSENTIAL_MODULES_ONLY.md | 10 +- Procfile | 2 +- README.md | 20 +- WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md | 4 +- app.py | 6 +- app_simple.py | 871 +++++++++++---------- core/__init__.py | 4 +- core/auth_wrapper.py | 135 ++-- core/content_generation.py | 60 +- core/file_upload.py | 470 +++++------ core/health_check.py | 1 + core/logging_config.py | 64 +- core/metrics_exporter.py | 1 - core/monitoring.py | 1 - core/notifications.py | 124 +-- core/session_manager.py | 2 + core/streaming_pipeline.py | 309 ++++---- core/streaming_results.py | 240 +++--- core/streamlit_monitoring.py | 1 - core/styling.py | 120 +-- core/supabase_integration.py | 132 ++-- core/utils.py | 42 +- core/visible_thinking.py | 6 +- create_missing_tables.py | 33 +- deploy_fixes.py | 38 +- docs/CRITICAL_FIXES_REPORT_v2.8.0.md | 12 +- docs/LARGE_FILE_PROCESSING_v2.8.0.md | 12 +- docs/monitoring.md | 2 +- env.example | 2 +- experiments/README.md | 1 - monitoring/grafana_dashboard.json | 2 +- prompts/default/article_generation.md | 2 +- prompts/default/knowledge_base/ca.md | 2 +- prompts/default/outline_creation.md | 2 +- prompts/default/social_media.md | 4 +- prompts/default/wisdom_extraction.md | 2 +- prompts/default/wisdom_prompt.md | 2 +- pytest.ini | 4 +- runtime.txt | 2 +- scripts/audit_project.py | 17 +- scripts/integration_audit.py | 64 +- scripts/test_monitoring.py | 113 +-- scripts/test_oauth.py | 21 +- scripts/ui_ux_audit.py | 86 +- scripts/validate_app.py | 106 +-- shared/config.py | 9 +- shared/security.py | 4 + start_app.sh | 2 +- static/README.md | 2 +- static/css/aurora-progress.css | 66 +- static/css/diagram.css | 10 +- static/css/main.css | 110 +-- static/css/production.css | 2 +- static/css/whisperforge_ui.css | 8 +- static/js/cookie-consent.js | 4 +- static/js/ui-interactions.js | 16 +- tests/__init__.py | 2 +- tests/conftest.py | 52 +- tests/test_basic_functionality.py | 41 +- whisperforge2.code-workspace | 2 +- 64 files changed, 1869 insertions(+), 1649 deletions(-) mode change 100644 => 100755 app_simple.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index afcbb62..4869524 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -29,4 +29,4 @@ }, "postCreateCommand": "pip install -r requirements.txt", "remoteUser": "vscode" -} \ No newline at end of file +} diff --git a/.streamlit/config.toml b/.streamlit/config.toml index 7131b7c..6a25323 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -9,4 +9,4 @@ enableXsrfProtection = false gatherUsageStats = false [theme] -base = "light" \ No newline at end of file +base = "light" diff --git a/CHANGELOG.md b/CHANGELOG.md index ea2f1ba..3fd9657 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ ### **βœ‚οΈ Features REMOVED (Intentionally)** - **Research Enrichment**: Removed entity extraction and research link generation -- **Editor System**: Removed AI editor critique and revision loops +- **Editor System**: Removed AI editor critique and revision loops - **Image Prompts**: Removed AI image generation prompt creation - **Multiple AI Providers**: Removed Anthropic/Claude and Groq support - **Complex Settings**: Removed feature toggles and provider selection @@ -16,7 +16,7 @@ - **Audio Upload**: Enhanced large file processing (25MB-2GB) - **Transcription**: OpenAI Whisper speech-to-text - **Wisdom Extraction**: Key insights and takeaways -- **Outline Creation**: Structured content organization +- **Outline Creation**: Structured content organization - **Article Generation**: Complete written content - **Social Media**: 5 platform-optimized posts - **Notion Publishing**: Auto-publish with beautiful formatting @@ -222,7 +222,7 @@ WhisperForge is now a **focused, reliable tool** that does one thing excellently ### πŸ”§ **ENHANCED 8-STEP PIPELINE** 1. πŸŽ™οΈ **Transcription** - Speech-to-text conversion -2. πŸ’‘ **Wisdom Extraction** - Key insights and takeaways +2. πŸ’‘ **Wisdom Extraction** - Key insights and takeaways 3. πŸ” **Research Enrichment** - Supporting links & context ⭐ **RESTORED** 4. πŸ“‹ **Outline Creation** - Structured content organization 5. πŸ“ **Article Generation** - Complete written content @@ -261,7 +261,7 @@ WhisperForge is now a **focused, reliable tool** that does one thing excellently ### πŸ”§ **Enhanced Pipeline (6 Steps)** 1. πŸŽ™οΈ **Transcription** - Speech-to-text conversion -2. πŸ’‘ **Wisdom Extraction** - Key insights and takeaways +2. πŸ’‘ **Wisdom Extraction** - Key insights and takeaways 3. πŸ“‹ **Outline Creation** - Structured content organization 4. πŸ“ **Article Generation** - Complete written content 5. πŸ“± **Social Media** - Platform-optimized posts @@ -293,13 +293,13 @@ WhisperForge is now a **focused, reliable tool** that does one thing excellently - **Database Investigation**: Found all 27 content items stored under correct user - **Field Mapping**: Updated display to match actual database schema - `transcript` (not `transcription`) - - `wisdom` (not `wisdom_extraction`) + - `wisdom` (not `wisdom_extraction`) - `outline` (not `outline_creation`) - `article` (not `article_creation`) - `social_content` (not `social_media`) ### πŸš€ **TRANSCRIPTION PIPELINE RESTORED** -- **Circular Imports**: Eliminated blocking dependencies +- **Circular Imports**: Eliminated blocking dependencies - **Pipeline Flow**: Fixed streaming results display - **Database Storage**: Corrected field names for new content - **Session State**: Simplified initialization prevents conflicts @@ -392,7 +392,7 @@ WhisperForge is now a **focused, reliable tool** that does one thing excellently --- -## [2.0.0] - 2025-06-08 🌌 **Aurora UI Transformation** +## [2.0.0] - 2025-06-08 🌌 **Aurora UI Transformation** ### 🎨 **Major UI Redesign** - **Aurora Bioluminescent Theme**: Complete visual transformation with cyan/teal color scheme @@ -505,4 +505,4 @@ WhisperForge is now a **focused, reliable tool** that does one thing excellently --- -**For detailed technical information, see [README.md](README.md)** \ No newline at end of file +**For detailed technical information, see [README.md](README.md)** diff --git a/CLEANUP_SUCCESS_SUMMARY.md b/CLEANUP_SUCCESS_SUMMARY.md index 782922b..4cbe914 100644 --- a/CLEANUP_SUCCESS_SUMMARY.md +++ b/CLEANUP_SUCCESS_SUMMARY.md @@ -14,7 +14,7 @@ Your WhisperForge app is now **CLEAN, WORKING, and READY FOR PRODUCTION!** ``` βœ… Moved to archived_old_version/bloat_modules/: - monitoring.py (11KB) - Over-engineered monitoring -- streamlit_monitoring.py (8KB) - More monitoring bloat +- streamlit_monitoring.py (8KB) - More monitoring bloat - metrics_exporter.py (11KB) - Prometheus metrics - health_check.py (18KB) - Complex health checking - session_manager.py (18KB) - Over-complex sessions @@ -29,7 +29,7 @@ Your WhisperForge app is now **CLEAN, WORKING, and READY FOR PRODUCTION!** ``` βœ… Moved to archived_old_version/old_docs/: - WHISPERFORGE_AUDIT_2025.md -- CLEAN_SETUP.md +- CLEAN_SETUP.md - DEVELOPMENT_GUIDE.md - PRODUCTION_MONITORING_IMPLEMENTATION.md - SESSION_REFACTOR_IMPLEMENTATION.md @@ -84,7 +84,7 @@ core/ ### **Clean Documentation** (3 files) ``` β”œβ”€β”€ README.md # Main documentation -β”œβ”€β”€ CHANGELOG.md # Version history +β”œβ”€β”€ CHANGELOG.md # Version history └── ESSENTIAL_MODULES_ONLY.md # Architecture guide ``` @@ -97,7 +97,7 @@ core/ - User registration and login working - Database integration verified -### βœ… **Transcription & Pipeline Streaming** +### βœ… **Transcription & Pipeline Streaming** - OpenAI Whisper integration ready - Real-time streaming pipeline implemented - Aurora UI for beautiful progress display @@ -126,7 +126,7 @@ core/ # App is already running on http://localhost:8501 # Test these features: 1. βœ… OAuth login via Supabase -2. βœ… Upload audio file +2. βœ… Upload audio file 3. βœ… Watch transcription & content generation 4. βœ… Check content appears in history 5. βœ… Customize prompts in settings @@ -160,7 +160,7 @@ core/ - ❌ Confusing test files everywhere - ❌ Over-engineered monitoring systems -### **After Cleanup** +### **After Cleanup** - βœ… 11 essential modules (170KB focused code) - βœ… 3 clean documentation files - βœ… All imports working perfectly @@ -179,4 +179,4 @@ core/ - **Architecture**: Simple, reliable, and scalable - **Deployment**: Ready for Render.com production -**Time to working app: ACHIEVED! πŸš€** \ No newline at end of file +**Time to working app: ACHIEVED! πŸš€** diff --git a/ESSENTIAL_MODULES_ONLY.md b/ESSENTIAL_MODULES_ONLY.md index 6543618..aaed98d 100644 --- a/ESSENTIAL_MODULES_ONLY.md +++ b/ESSENTIAL_MODULES_ONLY.md @@ -2,7 +2,7 @@ ## Your Core Requirements: 1. **OAuth via Supabase** βœ… -2. **Transcription & Pipeline Streaming** βœ… +2. **Transcription & Pipeline Streaming** βœ… 3. **Save content to Supabase** βœ… 4. **Display on user history page** βœ… 5. **Customize prompts & knowledge base** βœ… @@ -14,7 +14,7 @@ ### **Tier 1: Absolutely Critical** ``` βœ… supabase_integration.py (16KB) - Database & OAuth -βœ… content_generation.py (18KB) - Transcription & AI generation +βœ… content_generation.py (18KB) - Transcription & AI generation βœ… streaming_pipeline.py (20KB) - Your streaming pipeline βœ… auth_wrapper.py (13KB) - Supabase OAuth integration βœ… styling.py (18KB) - Aurora UI (you love this!) @@ -37,7 +37,7 @@ ### **Over-Engineering & Monitoring** ``` ❌ monitoring.py (11KB) - Complex monitoring system -❌ streamlit_monitoring.py (8KB) - More monitoring +❌ streamlit_monitoring.py (8KB) - More monitoring ❌ metrics_exporter.py (11KB) - Prometheus metrics ❌ health_check.py (18KB) - Health checking system ❌ session_manager.py (18KB) - Complex session management @@ -143,10 +143,10 @@ core/ 2. **Add your OpenAI API key to .env** 3. **Test core functionality:** - OAuth login via Supabase βœ… - - Audio upload & transcription βœ… + - Audio upload & transcription βœ… - Content generation & streaming βœ… - Save to database βœ… - Display in history βœ… - Custom prompts & knowledge base βœ… -**Your app should work perfectly with just these 9-11 essential modules!** \ No newline at end of file +**Your app should work perfectly with just these 9-11 essential modules!** diff --git a/Procfile b/Procfile index b624917..b248ce1 100644 --- a/Procfile +++ b/Procfile @@ -1 +1 @@ -web: streamlit run app_simple.py --server.port=$PORT --server.address=0.0.0.0 --server.headless=true \ No newline at end of file +web: streamlit run app_simple.py --server.port=$PORT --server.address=0.0.0.0 --server.headless=true diff --git a/README.md b/README.md index b120bea..f7a6a09 100644 --- a/README.md +++ b/README.md @@ -218,21 +218,21 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file SUPABASE_URL=your_supabase_url SUPABASE_ANON_KEY=your_supabase_anon_key SUPABASE_SERVICE_ROLE_KEY=your_service_role_key # Optional for admin features - + # Required - AI Provider OPENAI_API_KEY=your_openai_key - + # Notion Integration - Auto-Publishing NOTION_API_KEY=your_notion_integration_token NOTION_DATABASE_ID=your_notion_database_id - + # Optional - OAuth & Integrations OAUTH_REDIRECT_URL=http://localhost:8501 # For OAuth flows - + # Optional - Security & Monitoring JWT_SECRET=your_jwt_secret_key SENTRY_DSN=your_sentry_dsn # For error tracking - + # Optional - Development DEBUG=true LOG_LEVEL=INFO @@ -259,7 +259,7 @@ The WhisperForge UI uses a custom Aurora design system featuring: ### **Core Tables** - `users` - User accounts and settings -- `content` - Generated content and metadata +- `content` - Generated content and metadata - `prompts` - Custom AI prompts - `knowledge_base` - User-uploaded files - `api_keys` - Encrypted API credentials @@ -311,7 +311,7 @@ MIT License - See LICENSE file for details. --- -**WhisperForge** - Transforming audio into actionable insights with the beauty of Aurora. 🌌 +**WhisperForge** - Transforming audio into actionable insights with the beauty of Aurora. 🌌 ## πŸ— **Architecture (Simplified)** @@ -321,7 +321,7 @@ MIT License - See LICENSE file for details. if 'authenticated' not in st.session_state: st.session_state.authenticated = False -@st.cache_resource +@st.cache_resource def init_supabase(): return get_supabase_client() ``` @@ -333,5 +333,5 @@ def init_supabase(): ### **Authentication Flow** 1. User enters credentials β†’ Verify against Supabase -2. Set simple session state flags β†’ No tokens or complex persistence -3. Load user preferences from database β†’ Use `@st.cache_data` for performance \ No newline at end of file +2. Set simple session state flags β†’ No tokens or complex persistence +3. Load user preferences from database β†’ Use `@st.cache_data` for performance diff --git a/WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md b/WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md index a7546a1..0c0bb55 100644 --- a/WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md +++ b/WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md @@ -71,7 +71,7 @@ def get_prompt_for_step(step_name: str, custom_prompts: Dict[str, str] = None): """Get the appropriate prompt for a pipeline step""" prompt_mapping = { 'wisdom': 'wisdom_extraction', - 'outline': 'outline_creation', + 'outline': 'outline_creation', 'social': 'social_media', 'article': 'article_generation' } @@ -182,4 +182,4 @@ WhisperForge v2.7.0 represents a complete transformation of the user experience, The application now delivers on its promise of transforming audio into structured content with AI magic, providing users with a seamless, beautiful, and powerful content creation experience. -**Status**: βœ… All objectives achieved, deployed to production, ready for user feedback and future enhancements. \ No newline at end of file +**Status**: βœ… All objectives achieved, deployed to production, ready for user feedback and future enhancements. diff --git a/app.py b/app.py index e14a27a..776efb4 100644 --- a/app.py +++ b/app.py @@ -1,7 +1,7 @@ # WhisperForge - Main Application Redirect -# +# # This file redirects to the current main application: app_simple.py -# +# # The old OAuth version has been archived import streamlit as st @@ -42,4 +42,4 @@ elif "deprecated" in file or "backup" in file: st.warning(f"⚠️ **{file}** (Deprecated)") else: - st.info(f"ℹ️ **{file}**") \ No newline at end of file + st.info(f"ℹ️ **{file}**") diff --git a/app_simple.py b/app_simple.py old mode 100644 new mode 100755 index 0e7a0d6..f28b951 --- a/app_simple.py +++ b/app_simple.py @@ -1,4 +1,8 @@ # WhisperForge Simple - Clean, Focused Audio Content Platform +from core.file_upload import EnhancedLargeFileProcessor +from core.supabase_integration import get_supabase_client +from core.styling import apply_aurora_theme, create_aurora_header, create_aurora_progress_card, create_aurora_step_card, create_aurora_content_card, AuroraComponents +from core.content_generation import transcribe_audio, generate_wisdom, generate_outline, generate_article, generate_social_content import streamlit as st import os import tempfile @@ -18,20 +22,18 @@ ) # Core imports -from core.content_generation import transcribe_audio, generate_wisdom, generate_outline, generate_article, generate_social_content -from core.styling import apply_aurora_theme, create_aurora_header, create_aurora_progress_card, create_aurora_step_card, create_aurora_content_card, AuroraComponents -from core.supabase_integration import get_supabase_client -from core.file_upload import EnhancedLargeFileProcessor # Apply beautiful theme apply_aurora_theme() # === PROMPT LOADING SYSTEM === + + def load_custom_prompts(): """Load custom prompts from the prompts directory""" prompts = {} prompt_dir = "prompts/default" - + if os.path.exists(prompt_dir): for filename in os.listdir(prompt_dir): if filename.endswith('.md'): @@ -41,9 +43,10 @@ def load_custom_prompts(): prompts[prompt_name] = f.read() except Exception as e: st.warning(f"Failed to load prompt {filename}: {e}") - + return prompts + def load_template(template_name: str) -> Optional[str]: """Load an article template by name""" template_path = os.path.join('templates', f'{template_name}.md') @@ -51,43 +54,46 @@ def load_template(template_name: str) -> Optional[str]: return open(template_path, 'r', encoding='utf-8').read() return None + def get_prompt_for_step(step_name: str, custom_prompts: Dict[str, str] = None) -> Optional[str]: """Get the appropriate prompt for a pipeline step""" if not custom_prompts: custom_prompts = load_custom_prompts() - + # Map step names to prompt files prompt_mapping = { 'wisdom': 'wisdom_extraction', - 'outline': 'outline_creation', + 'outline': 'outline_creation', 'social': 'social_media', 'article': 'article_generation' # We'll create this } - + prompt_key = prompt_mapping.get(step_name) if prompt_key and prompt_key in custom_prompts: return custom_prompts[prompt_key] - + return None # === NOTION INTEGRATION === + + def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str]: """Create a Notion page with WhisperForge content""" try: from notion_client import Client - + api_key = os.getenv("NOTION_API_KEY") database_id = os.getenv("NOTION_DATABASE_ID") - + if not api_key or not database_id: st.warning("⚠️ Notion not configured. Set NOTION_API_KEY and NOTION_DATABASE_ID to auto-publish.") return None - + client = Client(auth=api_key) - + # Build content blocks children = [] - + # Add beautiful header with summary children.append({ "type": "heading_1", @@ -98,22 +104,22 @@ def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str ] } }) - + # Add creation info children.append({ "type": "paragraph", "paragraph": { "rich_text": [ {"type": "text", "text": {"content": "✨ Generated with "}}, - {"type": "text", "text": {"content": "WhisperForge Aurora"}, + {"type": "text", "text": {"content": "WhisperForge Aurora"}, "annotations": {"bold": True, "color": "blue"}}, {"type": "text", "text": {"content": f" β€’ {datetime.now().strftime('%B %d, %Y at %I:%M %p')}"}} ] } }) - + children.append({"type": "divider", "divider": {}}) - + # Add wisdom summary callout if exists if content_data.get('wisdom'): children.append({ @@ -127,7 +133,7 @@ def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str "icon": {"type": "emoji", "emoji": "πŸ’‘"} } }) - + # Add content sections as toggles sections = [ ("πŸ“ Transcript", content_data.get('transcript')), @@ -137,20 +143,20 @@ def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str ("πŸ“° Article", content_data.get('article')), ("πŸ“± Social Content", content_data.get('social_content')) ] - + for section_title, section_content in sections: if section_content: # Handle research data specially if section_title == "πŸ” Research Links" and isinstance(section_content, dict): research_children = [] entities = section_content.get('entities', []) - + if entities: for entity in entities[:5]: # Limit entities entity_name = entity.get('name', 'Unknown Entity') why_matters = entity.get('why_matters', 'No description available') links = entity.get('links', []) - + # Entity as beautiful callout research_children.append({ "type": "callout", @@ -163,7 +169,7 @@ def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str "icon": {"type": "emoji", "emoji": "πŸ”¬"} } }) - + # Links as bulleted list if links: for link in links[:3]: # Limit links @@ -171,17 +177,20 @@ def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str link_url = link.get('url', '#') link_desc = link.get('description', '') is_gem = link.get('is_gem', False) - + gem_icon = "πŸ’Ž" if is_gem else "πŸ”—" color = "orange" if is_gem else "default" - + research_children.append({ "type": "bulleted_list_item", "bulleted_list_item": { "rich_text": [ - {"type": "text", "text": {"content": f"{gem_icon} "}, "annotations": {"color": color}}, - {"type": "text", "text": {"content": link_title}, "annotations": {"bold": True}}, - {"type": "text", "text": {"content": f" - {link_desc}"}, "annotations": {"italic": True}} + {"type": "text", "text": {"content": f"{gem_icon} "}, + "annotations": {"color": color}}, + {"type": "text", "text": {"content": link_title}, + "annotations": {"bold": True}}, + {"type": "text", "text": {"content": f" - {link_desc}"}, + "annotations": {"italic": True}} ] } }) @@ -192,7 +201,7 @@ def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str "rich_text": [{"type": "text", "text": {"content": "No research entities found."}}] } }) - + children.append({ "type": "toggle", "toggle": { @@ -204,8 +213,8 @@ def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str # Handle regular text content if isinstance(section_content, str): # Chunk content for Notion's limits - chunks = [section_content[i:i+1800] for i in range(0, len(section_content), 1800)] - + chunks = [section_content[i:i + 1800] for i in range(0, len(section_content), 1800)] + children.append({ "type": "toggle", "toggle": { @@ -220,7 +229,7 @@ def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str ] } }) - + # Add beautiful footer children.extend([ {"type": "divider", "divider": {}}, @@ -238,7 +247,7 @@ def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str } } ]) - + # Create the page response = client.pages.create( parent={"database_id": database_id}, @@ -248,14 +257,14 @@ def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str }, children=children[:50] # Limit total blocks ) - + if response and 'id' in response: page_id = response['id'] page_url = f"https://notion.so/{page_id.replace('-', '')}" return page_url - + return None - + except ImportError: st.warning("⚠️ Install notion-client to enable Notion publishing: pip install notion-client") return None @@ -263,11 +272,12 @@ def create_notion_page(title: str, content_data: Dict[str, str]) -> Optional[str st.error(f"❌ Notion publishing failed: {str(e)}") return None + def generate_ai_title(transcript: str) -> str: """Generate an AI title for the content""" try: from core.content_generation import generate_content - + prompt = f"""Generate a concise, descriptive title (max 60 characters) for this audio transcript: {transcript[:500]}... @@ -279,13 +289,15 @@ def generate_ai_title(transcript: str) -> str: - No quotes or special characters Title:""" - + title = generate_content(prompt, "OpenAI", "gpt-4", {}) return title.strip().replace('"', '').replace("'", "")[:60] - except: + except BaseException: return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" # === SIMPLE AUTHENTICATION === + + def init_session(): """Initialize simple session state""" if 'authenticated' not in st.session_state: @@ -295,12 +307,13 @@ def init_session(): if 'user_email' not in st.session_state: st.session_state.user_email = None + def show_login(): """Simple test login""" create_aurora_header() - + st.markdown("### πŸ” Login to WhisperForge") - + # Test login button if st.button("πŸš€ Login with Test Account", type="primary", use_container_width=True): st.session_state.authenticated = True @@ -309,15 +322,16 @@ def show_login(): st.success("βœ… Logged in successfully!") time.sleep(1) st.rerun() - + st.markdown("---") st.markdown("**Demo Mode**: Click above to access WhisperForge") # === CORE PROCESSING PIPELINE === + def show_processing_pipeline(current_step=0, step_progress=0, total_progress=0, status_message="", processing_time=""): """Display beautiful Aurora-styled processing pipeline visualization""" - + # Define the 6-step pipeline pipeline_steps = [ { @@ -357,7 +371,7 @@ def show_processing_pipeline(current_step=0, step_progress=0, total_progress=0, "status": "pending" } ] - + # Update step statuses based on current progress for i, step in enumerate(pipeline_steps): if i < current_step: @@ -366,19 +380,19 @@ def show_processing_pipeline(current_step=0, step_progress=0, total_progress=0, step["status"] = "active" else: step["status"] = "pending" - + # Create the pipeline visualization HTML steps_html = "" for i, step in enumerate(pipeline_steps): progress_width = step_progress if i == current_step else (100 if step["status"] == "completed" else 0) - + status_text = { "pending": "Waiting", "active": "Processing", "completed": "Complete", "error": "Error" }.get(step["status"], "Waiting") - + steps_html += f"""
@@ -388,7 +402,7 @@ def show_processing_pipeline(current_step=0, step_progress=0, total_progress=0,
{status_text}
""" - + # Create the complete pipeline HTML pipeline_html = f"""
@@ -396,7 +410,7 @@ def show_processing_pipeline(current_step=0, step_progress=0, total_progress=0,

Content Transformation Pipeline

6-Step AI-Powered Processing

- +
Overall Progress @@ -406,11 +420,11 @@ def show_processing_pipeline(current_step=0, step_progress=0, total_progress=0,
- +
{steps_html}
- + {f'''
⚑ @@ -420,28 +434,29 @@ def show_processing_pipeline(current_step=0, step_progress=0, total_progress=0, ''' if status_message else ''}
""" - + st.markdown(pipeline_html, unsafe_allow_html=True) + def process_audio_pipeline(audio_file): """Core audio to content pipeline with beautiful Aurora visualization""" import time from datetime import datetime - + results = {} start_time = time.time() - + # Load custom prompts custom_prompts = load_custom_prompts() if custom_prompts: st.info(f"πŸ“ Using {len(custom_prompts)} custom prompts") - + # Initialize beautiful pipeline visualization pipeline_placeholder = st.empty() - + # Create real-time content display containers st.markdown("### 🌌 Live Content Generation") - + # Create expandable containers for each step transcript_container = st.expander("πŸŽ™οΈ Transcription", expanded=False) wisdom_container = st.expander("πŸ’‘ Wisdom Extraction", expanded=False) @@ -449,148 +464,148 @@ def process_audio_pipeline(audio_file): article_container = st.expander("πŸ“ Article Generation", expanded=False) social_container = st.expander("πŸ“± Social Content", expanded=False) notion_container = st.expander("🌌 Notion Publishing", expanded=False) - + try: # Step 1: Transcription with pipeline_placeholder.container(): show_processing_pipeline( - current_step=0, - step_progress=0, + current_step=0, + step_progress=0, total_progress=0, status_message="Starting transcription process...", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Import transcription function from core.content_generation import transcribe_audio - + # Create temporary file import tempfile import os with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp_file: tmp_file.write(audio_file.getvalue()) tmp_file_path = tmp_file.name - + try: # Transcription with progress updates with pipeline_placeholder.container(): show_processing_pipeline( - current_step=0, - step_progress=50, + current_step=0, + step_progress=50, total_progress=8, status_message="Transcribing audio with Whisper AI...", processing_time=f"{time.time() - start_time:.1f}s" ) - + transcript = transcribe_audio(tmp_file_path) if not transcript or "Error" in transcript: st.error(f"Transcription failed: {transcript}") return None - + results['transcript'] = transcript - + # Stream transcript to UI immediately with transcript_container: st.markdown("**βœ… Transcription Complete**") st.text_area("Transcript", transcript, height=200, disabled=True) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=0, - step_progress=100, + current_step=0, + step_progress=100, total_progress=17, status_message="Transcription complete!", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Step 2: Wisdom Extraction with pipeline_placeholder.container(): show_processing_pipeline( - current_step=1, - step_progress=0, + current_step=1, + step_progress=0, total_progress=17, status_message="Extracting wisdom and insights...", processing_time=f"{time.time() - start_time:.1f}s" ) - + from core.content_generation import generate_wisdom wisdom_prompt = get_prompt_for_step('wisdom', custom_prompts) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=1, - step_progress=50, + current_step=1, + step_progress=50, total_progress=25, status_message="Analyzing content for key insights...", processing_time=f"{time.time() - start_time:.1f}s" ) - + wisdom = generate_wisdom(transcript, custom_prompt=wisdom_prompt, knowledge_base={}) results['wisdom'] = wisdom - + # Stream wisdom to UI immediately with wisdom_container: st.markdown("**βœ… Wisdom Extraction Complete**") st.markdown(wisdom) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=1, - step_progress=100, + current_step=1, + step_progress=100, total_progress=33, status_message="Wisdom extraction complete!", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Step 3: Outline Creation with pipeline_placeholder.container(): show_processing_pipeline( - current_step=2, - step_progress=0, + current_step=2, + step_progress=0, total_progress=33, status_message="Creating structured outline...", processing_time=f"{time.time() - start_time:.1f}s" ) - + from core.content_generation import generate_outline outline_prompt = get_prompt_for_step('outline', custom_prompts) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=2, - step_progress=50, + current_step=2, + step_progress=50, total_progress=42, status_message="Structuring content hierarchy...", processing_time=f"{time.time() - start_time:.1f}s" ) - + outline = generate_outline(transcript, wisdom, custom_prompt=outline_prompt, knowledge_base={}) results['outline'] = outline - + # Stream outline to UI immediately with outline_container: st.markdown("**βœ… Outline Creation Complete**") st.markdown(outline) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=2, - step_progress=100, + current_step=2, + step_progress=100, total_progress=50, status_message="Outline creation complete!", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Step 4: Article Generation with pipeline_placeholder.container(): show_processing_pipeline( - current_step=3, - step_progress=0, + current_step=3, + step_progress=0, total_progress=50, status_message="Generating comprehensive article...", processing_time=f"{time.time() - start_time:.1f}s" ) - + from core.content_generation import generate_article article_prompt = get_prompt_for_step('article', custom_prompts) selected_template = st.session_state.get('article_template') @@ -598,109 +613,109 @@ def process_audio_pipeline(audio_file): template_text = load_template(selected_template) if template_text: article_prompt = template_text + "\n" + article_prompt - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=3, - step_progress=50, + current_step=3, + step_progress=50, total_progress=58, status_message="Writing detailed article content...", processing_time=f"{time.time() - start_time:.1f}s" ) - + article = generate_article(transcript, wisdom, outline, custom_prompt=article_prompt, knowledge_base={}) results['article'] = article - + # Stream article to UI immediately with article_container: st.markdown("**βœ… Article Generation Complete**") st.markdown(article) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=3, - step_progress=100, + current_step=3, + step_progress=100, total_progress=67, status_message="Article generation complete!", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Step 5: Social Content with pipeline_placeholder.container(): show_processing_pipeline( - current_step=4, - step_progress=0, + current_step=4, + step_progress=0, total_progress=67, status_message="Creating social media content...", processing_time=f"{time.time() - start_time:.1f}s" ) - + from core.content_generation import generate_social_content social_prompt = get_prompt_for_step('social', custom_prompts) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=4, - step_progress=50, + current_step=4, + step_progress=50, total_progress=75, status_message="Generating social media posts...", processing_time=f"{time.time() - start_time:.1f}s" ) - + social = generate_social_content(wisdom, outline, article, custom_prompt=social_prompt, knowledge_base={}) results['social_content'] = social - + # Stream social content to UI immediately with social_container: st.markdown("**βœ… Social Content Creation Complete**") st.markdown(social) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=4, - step_progress=100, + current_step=4, + step_progress=100, total_progress=83, status_message="Social content creation complete!", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Step 6: Auto-publish to Notion with pipeline_placeholder.container(): show_processing_pipeline( - current_step=5, - step_progress=0, + current_step=5, + step_progress=0, total_progress=83, status_message="Publishing to Notion workspace...", processing_time=f"{time.time() - start_time:.1f}s" ) - + if os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): # Generate AI title ai_title = generate_ai_title(transcript) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=5, - step_progress=30, + current_step=5, + step_progress=30, total_progress=88, status_message="Creating Notion page structure...", processing_time=f"{time.time() - start_time:.1f}s" ) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=5, - step_progress=60, + current_step=5, + step_progress=60, total_progress=92, status_message="Uploading content to Notion...", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Publish to Notion notion_url = create_notion_page(ai_title, results) if notion_url: results['notion_url'] = notion_url - + # Stream Notion success to UI with notion_container: st.markdown("**βœ… Notion Publishing Complete**") @@ -716,31 +731,31 @@ def process_audio_pipeline(audio_file): with notion_container: st.markdown("**ℹ️ Notion Publishing Disabled**") st.info("Configure Notion API in Settings to enable auto-publishing.") - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=5, - step_progress=90, + current_step=5, + step_progress=90, total_progress=96, status_message="Saving to database...", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Save to Supabase database try: save_content_to_db(results) except Exception as e: st.warning(f"⚠️ Content saved locally but database save failed: {e}") - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=5, - step_progress=100, + current_step=5, + step_progress=100, total_progress=100, status_message="Pipeline complete! All content generated successfully.", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Aurora completion celebration st.markdown("""
@@ -748,24 +763,24 @@ def process_audio_pipeline(audio_file):

Your content has been transformed with AI magic

""", unsafe_allow_html=True) - + # Clear the pipeline display after a moment time.sleep(2) pipeline_placeholder.empty() - + return results - + finally: # Cleanup temporary file if os.path.exists(tmp_file_path): os.unlink(tmp_file_path) - + except Exception as e: # Show error state with pipeline_placeholder.container(): show_processing_pipeline( - current_step=0, - step_progress=0, + current_step=0, + step_progress=0, total_progress=0, status_message=f"Error: {str(e)}", processing_time=f"{time.time() - start_time:.1f}s" @@ -773,6 +788,7 @@ def process_audio_pipeline(audio_file): st.error(f"Pipeline failed: {str(e)}") return None + def process_audio_pipeline_live(audio_file): """Run pipeline with StreamingPipelineController""" from core.streaming_pipeline import get_pipeline_controller @@ -783,131 +799,132 @@ def process_audio_pipeline_live(audio_file): pass return controller.get_results() + def process_audio_pipeline_with_transcript(transcript: str): """Process audio pipeline with pre-transcribed content using beautiful Aurora visualization""" import time from datetime import datetime - + results = {'transcript': transcript} start_time = time.time() - + # Load custom prompts custom_prompts = load_custom_prompts() if custom_prompts: st.info(f"πŸ“ Using {len(custom_prompts)} custom prompts") - + # Initialize beautiful pipeline visualization (starting from step 1) pipeline_placeholder = st.empty() - + # Create real-time content display containers st.markdown("### 🌌 Live Content Generation") - + # Create expandable containers for each step (skip transcription) wisdom_container = st.expander("πŸ’‘ Wisdom Extraction", expanded=False) outline_container = st.expander("πŸ“‹ Outline Creation", expanded=False) article_container = st.expander("πŸ“ Article Generation", expanded=False) social_container = st.expander("πŸ“± Social Content", expanded=False) notion_container = st.expander("🌌 Notion Publishing", expanded=False) - + try: # Show initial state with transcription already complete with pipeline_placeholder.container(): show_processing_pipeline( - current_step=1, - step_progress=0, + current_step=1, + step_progress=0, total_progress=17, status_message=f"Using pre-transcribed content ({len(transcript)} characters)", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Step 2: Wisdom Extraction with pipeline_placeholder.container(): show_processing_pipeline( - current_step=1, - step_progress=0, + current_step=1, + step_progress=0, total_progress=17, status_message="Extracting wisdom and insights...", processing_time=f"{time.time() - start_time:.1f}s" ) - + from core.content_generation import generate_wisdom wisdom_prompt = get_prompt_for_step('wisdom', custom_prompts) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=1, - step_progress=50, + current_step=1, + step_progress=50, total_progress=25, status_message="Analyzing content for key insights...", processing_time=f"{time.time() - start_time:.1f}s" ) - + wisdom = generate_wisdom(transcript, custom_prompt=wisdom_prompt, knowledge_base={}) results['wisdom'] = wisdom - + # Stream wisdom to UI immediately with wisdom_container: st.markdown("**βœ… Wisdom Extraction Complete**") st.markdown(wisdom) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=1, - step_progress=100, + current_step=1, + step_progress=100, total_progress=33, status_message="Wisdom extraction complete!", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Step 3: Outline Creation with pipeline_placeholder.container(): show_processing_pipeline( - current_step=2, - step_progress=0, + current_step=2, + step_progress=0, total_progress=33, status_message="Creating structured outline...", processing_time=f"{time.time() - start_time:.1f}s" ) - + from core.content_generation import generate_outline outline_prompt = get_prompt_for_step('outline', custom_prompts) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=2, - step_progress=50, + current_step=2, + step_progress=50, total_progress=42, status_message="Structuring content hierarchy...", processing_time=f"{time.time() - start_time:.1f}s" ) - + outline = generate_outline(transcript, wisdom, custom_prompt=outline_prompt, knowledge_base={}) results['outline'] = outline - + # Stream outline to UI immediately with outline_container: st.markdown("**βœ… Outline Creation Complete**") st.markdown(outline) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=2, - step_progress=100, + current_step=2, + step_progress=100, total_progress=50, status_message="Outline creation complete!", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Step 4: Article Generation with pipeline_placeholder.container(): show_processing_pipeline( - current_step=3, - step_progress=0, + current_step=3, + step_progress=0, total_progress=50, status_message="Generating comprehensive article...", processing_time=f"{time.time() - start_time:.1f}s" ) - + from core.content_generation import generate_article article_prompt = get_prompt_for_step('article', custom_prompts) selected_template = st.session_state.get('article_template') @@ -915,109 +932,109 @@ def process_audio_pipeline_with_transcript(transcript: str): template_text = load_template(selected_template) if template_text: article_prompt = template_text + "\n" + article_prompt - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=3, - step_progress=50, + current_step=3, + step_progress=50, total_progress=58, status_message="Writing detailed article content...", processing_time=f"{time.time() - start_time:.1f}s" ) - + article = generate_article(transcript, wisdom, outline, custom_prompt=article_prompt, knowledge_base={}) results['article'] = article - + # Stream article to UI immediately with article_container: st.markdown("**βœ… Article Generation Complete**") st.markdown(article) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=3, - step_progress=100, + current_step=3, + step_progress=100, total_progress=67, status_message="Article generation complete!", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Step 5: Social Content with pipeline_placeholder.container(): show_processing_pipeline( - current_step=4, - step_progress=0, + current_step=4, + step_progress=0, total_progress=67, status_message="Creating social media content...", processing_time=f"{time.time() - start_time:.1f}s" ) - + from core.content_generation import generate_social_content social_prompt = get_prompt_for_step('social', custom_prompts) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=4, - step_progress=50, + current_step=4, + step_progress=50, total_progress=75, status_message="Generating social media posts...", processing_time=f"{time.time() - start_time:.1f}s" ) - + social = generate_social_content(wisdom, outline, article, custom_prompt=social_prompt, knowledge_base={}) results['social_content'] = social - + # Stream social content to UI immediately with social_container: st.markdown("**βœ… Social Content Creation Complete**") st.markdown(social) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=4, - step_progress=100, + current_step=4, + step_progress=100, total_progress=83, status_message="Social content creation complete!", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Step 6: Auto-publish to Notion with pipeline_placeholder.container(): show_processing_pipeline( - current_step=5, - step_progress=0, + current_step=5, + step_progress=0, total_progress=83, status_message="Publishing to Notion workspace...", processing_time=f"{time.time() - start_time:.1f}s" ) - + if os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): # Generate AI title ai_title = generate_ai_title(transcript) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=5, - step_progress=30, + current_step=5, + step_progress=30, total_progress=88, status_message="Creating Notion page structure...", processing_time=f"{time.time() - start_time:.1f}s" ) - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=5, - step_progress=60, + current_step=5, + step_progress=60, total_progress=92, status_message="Uploading content to Notion...", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Publish to Notion notion_url = create_notion_page(ai_title, results) if notion_url: results['notion_url'] = notion_url - + # Stream Notion success to UI with notion_container: st.markdown("**βœ… Notion Publishing Complete**") @@ -1033,31 +1050,31 @@ def process_audio_pipeline_with_transcript(transcript: str): with notion_container: st.markdown("**ℹ️ Notion Publishing Disabled**") st.info("Configure Notion API in Settings to enable auto-publishing.") - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=5, - step_progress=90, + current_step=5, + step_progress=90, total_progress=96, status_message="Saving to database...", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Save to Supabase database try: save_content_to_db(results) except Exception as e: st.warning(f"⚠️ Content saved locally but database save failed: {e}") - + with pipeline_placeholder.container(): show_processing_pipeline( - current_step=5, - step_progress=100, + current_step=5, + step_progress=100, total_progress=100, status_message="Pipeline complete! All content generated successfully.", processing_time=f"{time.time() - start_time:.1f}s" ) - + # Aurora completion celebration st.markdown("""
@@ -1065,19 +1082,19 @@ def process_audio_pipeline_with_transcript(transcript: str):

Your content has been transformed with AI magic

""", unsafe_allow_html=True) - + # Clear the pipeline display after a moment time.sleep(2) pipeline_placeholder.empty() - + return results - + except Exception as e: # Show error state with pipeline_placeholder.container(): show_processing_pipeline( - current_step=1, - step_progress=0, + current_step=1, + step_progress=0, total_progress=17, status_message=f"Error: {str(e)}", processing_time=f"{time.time() - start_time:.1f}s" @@ -1085,6 +1102,7 @@ def process_audio_pipeline_with_transcript(transcript: str): st.error(f"Pipeline failed: {str(e)}") return None + def save_content_to_db(content_data): """Save generated content to database""" try: @@ -1101,28 +1119,30 @@ def save_content_to_db(content_data): st.warning(f"Database save failed: {e}") # === CONTENT DISPLAY === + + def create_enhanced_aurora_content_card(title, content, content_type="text", icon="πŸ“„"): """Create a beautiful enhanced Aurora content card with copy functionality and animations""" import uuid - + # Generate unique IDs for this card card_id = f"card_{uuid.uuid4().hex[:8]}" copy_btn_id = f"copy_{uuid.uuid4().hex[:8]}" expand_btn_id = f"expand_{uuid.uuid4().hex[:8]}" full_content_id = f"full_{uuid.uuid4().hex[:8]}" - + # Calculate content stats word_count = len(content.split()) if content else 0 char_count = len(content) if content else 0 - + # Determine if content needs truncation preview_length = 300 needs_expansion = len(content) > preview_length preview_content = content[:preview_length] + "..." if needs_expansion else content - + # Content type specific styling type_class = content_type.lower() - + # Create the enhanced card HTML safe_content = content.replace('`', '\\`') @@ -1144,24 +1164,24 @@ def create_enhanced_aurora_content_card(title, content, content_type="text", ico - +
{preview_content}
- + {f'''
{content[preview_length:]}
''' if needs_expansion else ''} - +
πŸ“Š {word_count} words β€’ {char_count} characters
- + {f'''
- + - + """ - + st.markdown(card_html, unsafe_allow_html=True) + def show_results(results): """Display generated content with beautiful Aurora styling and enhanced UX""" if not results: return - + # Aurora header for results with enhanced styling st.markdown("""
@@ -1252,7 +1273,7 @@ def show_results(results):

Your audio has been transformed with AI magic

""", unsafe_allow_html=True) - + # Aurora Notion link if available with enhanced styling if results.get('notion_url'): st.markdown(f""" @@ -1263,11 +1284,12 @@ def show_results(results): """, unsafe_allow_html=True) st.markdown("---") - + # Enhanced content overview stats - total_words = sum(len(str(results.get(key, '')).split()) for key in ['transcript', 'wisdom', 'outline', 'article', 'social_content']) + total_words = sum(len(str(results.get(key, '')).split()) + for key in ['transcript', 'wisdom', 'outline', 'article', 'social_content']) content_types = len([k for k in ['transcript', 'wisdom', 'outline', 'article', 'social_content'] if results.get(k)]) - + st.markdown(f"""
@@ -1289,10 +1311,10 @@ def show_results(results):
""", unsafe_allow_html=True) - + # Prepare tab data for custom Aurora tabs tab_data = [] - + # Add main content tabs if results.get('transcript'): tab_data.append({ @@ -1301,7 +1323,7 @@ def show_results(results): 'type': 'transcript', 'content': results['transcript'] }) - + if results.get('wisdom'): tab_data.append({ 'title': 'Wisdom', @@ -1309,7 +1331,7 @@ def show_results(results): 'type': 'wisdom', 'content': results['wisdom'] }) - + if results.get('outline'): tab_data.append({ 'title': 'Outline', @@ -1317,7 +1339,7 @@ def show_results(results): 'type': 'outline', 'content': results['outline'] }) - + if results.get('article'): tab_data.append({ 'title': 'Article', @@ -1325,7 +1347,7 @@ def show_results(results): 'type': 'article', 'content': results['article'] }) - + if results.get('social_content'): tab_data.append({ 'title': 'Social', @@ -1333,36 +1355,36 @@ def show_results(results): 'type': 'social', 'content': results['social_content'] }) - + # Add Editor tab if editor content exists if results.get('editor_notes') or results.get('revised_content'): editor_content = "" - + if results.get('editor_notes'): editor_content += "=== EDITOR NOTES ===\n\n" for section, notes in results['editor_notes'].items(): if notes: editor_content += f"## {section.title()} Notes:\n{notes}\n\n" - + if results.get('revised_content'): editor_content += "\n=== REVISED CONTENT ===\n\n" for section, content in results['revised_content'].items(): if content: editor_content += f"## Revised {section.title()}:\n{content}\n\n" - + tab_data.append({ 'title': 'Editor Review', 'icon': 'πŸ“', 'type': 'editor', 'content': editor_content }) - + # Display the custom Aurora tabs if tab_data: create_aurora_tabs(tab_data, default_tab=0) else: st.warning("No content available to display.") - + # Add export all functionality st.markdown("---") st.markdown(""" @@ -1370,9 +1392,9 @@ def show_results(results):

πŸ“¦ Additional Export Options

""", unsafe_allow_html=True) - + col1, col2, col3 = st.columns(3) - + with col1: if st.button("πŸ“„ Export as Text", use_container_width=True): export_content = create_text_export(results) @@ -1429,6 +1451,7 @@ def show_results(results): use_container_width=True ) + def create_text_export(results): """Create a formatted text export of all content""" export_lines = [] @@ -1437,7 +1460,7 @@ def create_text_export(results): export_lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") export_lines.append("=" * 60) export_lines.append("") - + sections = [ ("AUDIO TRANSCRIPT", results.get('transcript', '')), ("EXTRACTED WISDOM", results.get('wisdom', '')), @@ -1445,7 +1468,7 @@ def create_text_export(results): ("FULL ARTICLE", results.get('article', '')), ("SOCIAL MEDIA CONTENT", results.get('social_content', '')) ] - + for title, content in sections: if content: export_lines.append(f"## {title}") @@ -1453,7 +1476,7 @@ def create_text_export(results): export_lines.append(content) export_lines.append("") export_lines.append("") - + if results.get('notion_url'): export_lines.append("## NOTION LINK") export_lines.append("-" * 40) @@ -1462,6 +1485,7 @@ def create_text_export(results): return "\n".join(export_lines) + def export_to_markdown(results): """Export results to Markdown format""" lines = ["# WhisperForge Content Export"] @@ -1483,6 +1507,7 @@ def export_to_markdown(results): lines.append(results['notion_url']) return "\n".join(lines) + def export_to_word(results): """Export results to a Word document""" from docx import Document @@ -1509,6 +1534,7 @@ def export_to_word(results): bio.seek(0) return bio.read() + def export_to_pdf(results): """Export results to a PDF file""" from fpdf import FPDF @@ -1522,6 +1548,8 @@ def export_to_pdf(results): return pdf.output(dest='S').encode('latin-1') # === NAVIGATION & PAGES === + + def create_aurora_navigation(): """Beautiful Aurora bioluminescent navigation - Clean and professional""" st.markdown(""" @@ -1538,21 +1566,22 @@ def create_aurora_navigation(): """, unsafe_allow_html=True) - + # Clean navigation tabs without emojis tabs = st.tabs([ - "Transform", - "Content Library", - "Settings", + "Transform", + "Content Library", + "Settings", "Knowledge Base", "Prompts" ]) - + return tabs + def show_transform_page(): """Clean transformation page focused on file upload and processing""" - + # Simple Aurora-styled header using main CSS st.markdown("""
@@ -1560,7 +1589,7 @@ def show_transform_page():

Upload your audio and watch it transform into structured content

""", unsafe_allow_html=True) - + # Beautiful Aurora upload method selector st.markdown("""
@@ -1603,25 +1632,25 @@ def show_transform_page():
- + """, unsafe_allow_html=True) - + # Enhanced file upload selection with session state if 'upload_method' not in st.session_state: st.session_state.upload_method = "Standard Upload" - + upload_method = st.radio( "Choose upload method:", ["Standard Upload", "Large File Upload"], @@ -1669,9 +1698,9 @@ def show_transform_page(): help="Standard upload for smaller files, Enhanced upload for large files with FFmpeg processing", label_visibility="collapsed" ) - + st.session_state.upload_method = upload_method - + if upload_method == "Standard Upload": # Beautiful standard file upload zone st.markdown(""" @@ -1691,7 +1720,7 @@ def show_transform_page(): """, unsafe_allow_html=True) - + # Standard file upload uploaded_files = st.file_uploader( "Upload your audio file", @@ -1706,7 +1735,7 @@ def show_transform_page(): # Beautiful file preview card file_size = len(uploaded_file.getvalue()) / (1024 * 1024) file_extension = uploaded_file.name.split('.')[-1].upper() - + st.markdown(f"""
@@ -1723,9 +1752,9 @@ def show_transform_page():
""", unsafe_allow_html=True) - - # Enhanced audio player - if file_size < 50: # Only show player for files under 50MB + + # Enhanced audio player + if file_size < 50: # Only show player for files under 50MB st.markdown('
', unsafe_allow_html=True) st.audio(uploaded_file.getvalue()) st.markdown('
', unsafe_allow_html=True) @@ -1733,7 +1762,8 @@ def show_transform_page(): st.info("Audio preview disabled for large files to conserve memory") # Beautiful process button - if st.button(f"Transform {uploaded_file.name}", key=f"process_{uploaded_file.name}", type="primary", use_container_width=True): + if st.button(f"Transform {uploaded_file.name}", + key=f"process_{uploaded_file.name}", type="primary", use_container_width=True): if not os.getenv("OPENAI_API_KEY"): st.error("Please enter your OpenAI API key in the sidebar") return @@ -1745,28 +1775,28 @@ def show_transform_page(): results = process_audio_pipeline(uploaded_file) if results: st.session_state.current_results = results - + else: # Enhanced large file upload st.markdown("### Enhanced Large File Processing") - + # Initialize enhanced processor processor = EnhancedLargeFileProcessor() - + # Create enhanced upload interface uploaded_file = processor.create_enhanced_upload_interface() - + if uploaded_file: # Validate file validation = processor.validate_file(uploaded_file) - + if not validation["valid"]: st.error(f"{validation['error']}") return - + file_size_mb = validation["size_mb"] requires_chunking = validation["requires_chunking"] - + # Beautiful processing metrics st.markdown(f"""
@@ -1784,7 +1814,7 @@ def show_transform_page():
""", unsafe_allow_html=True) - + # Audio preview disabled for large files to conserve memory if file_size_mb < 50: st.markdown('
', unsafe_allow_html=True) @@ -1792,23 +1822,23 @@ def show_transform_page(): st.markdown('
', unsafe_allow_html=True) else: st.info("Audio preview disabled for large files to conserve memory") - + # Enhanced process button if st.button("Transform Large File to Content", type="primary", use_container_width=True): if not os.getenv("OPENAI_API_KEY"): st.error("Please enter your OpenAI API key in the sidebar") return - + with st.container(): # Process with enhanced large file processor processing_result = processor.process_large_file(uploaded_file) - + if processing_result["success"]: transcript = processing_result["transcript"] - + # Show processing summary st.success(f"Large file processing complete!") - + # Beautiful success metrics st.markdown(f"""
@@ -1826,17 +1856,17 @@ def show_transform_page():
""", unsafe_allow_html=True) - + # Continue with pipeline using pre-transcribed content st.markdown("---") results = process_audio_pipeline_with_transcript(transcript) - + if results: # Store results in session state st.session_state.current_results = results else: st.error(f"Large file processing failed: {processing_result['error']}") - + # Fallback to standard processing for smaller files if file_size_mb < 100: st.info("Attempting fallback to standard processing...") @@ -1849,32 +1879,33 @@ def show_transform_page(): st.session_state.current_results = results except Exception as e: st.error(f"Fallback processing also failed: {str(e)}") - + # Show results if available if 'current_results' in st.session_state: show_results(st.session_state.current_results) + def show_content_library(): """Content library/history page""" st.markdown("### πŸ“š Content Library") - + # Get content from database try: db = get_supabase_client() if db: # Fetch recent content response = db.client.table('content').select('*').order('created_at', desc=True).limit(20).execute() - + if response.data: st.success(f"πŸ“Š Found {len(response.data)} content items") - + # Search and filter col1, col2 = st.columns([3, 1]) with col1: search_term = st.text_input("πŸ” Search content", placeholder="Search by title or content...") with col2: content_type = st.selectbox("Filter by type", ["All", "Article", "Social", "Outline"]) - + # Display content cards in a two-column grid cols = st.columns(2) for idx, item in enumerate(response.data): @@ -1883,32 +1914,45 @@ def show_content_library(): with cols[idx % 2].expander(f"πŸ“„ {item.get('title', 'Untitled')} - {item.get('created_at', '')[:10]}"): col1, col2 = st.columns([3, 1]) - + with col1: st.markdown(f"**Created:** {item.get('created_at', 'Unknown')}") if item.get('transcript'): st.markdown("**Transcript Preview:**") - st.text(item['transcript'][:200] + "..." if len(item['transcript']) > 200 else item['transcript']) - + st.text(item['transcript'][:200] + + "..." if len(item['transcript']) > 200 else item['transcript']) + with col2: if st.button(f"πŸ”„ Reprocess", key=f"reprocess_{item.get('id')}"): st.info("Reprocessing feature coming soon!") - + if st.button(f"πŸ“€ Export", key=f"export_{item.get('id')}"): st.info("Export feature coming soon!") - + # Show generated content if item.get('wisdom'): st.markdown("**πŸ’‘ Wisdom:**") st.text_area("", item['wisdom'], height=100, disabled=True, key=f"wisdom_{item.get('id')}") - + if item.get('article'): - st.markdown("**πŸ“° Article:**") - st.text_area("", item['article'], height=150, disabled=True, key=f"article_{item.get('id')}") - + st.markdown("**πŸ“° Article:**") + st.text_area( + "", + item['article'], + height=150, + disabled=True, + key=f"article_{ + item.get('id')}") + if item.get('social_content'): st.markdown("**πŸ“± Social Content:**") - st.text_area("", item['social_content'], height=100, disabled=True, key=f"social_{item.get('id')}") + st.text_area( + "", + item['social_content'], + height=100, + disabled=True, + key=f"social_{ + item.get('id')}") else: st.info("πŸ“­ No content found. Process some audio files to see them here!") else: @@ -1916,58 +1960,59 @@ def show_content_library(): except Exception as e: st.error(f"❌ Error loading content library: {e}") + def show_settings_page(): """Settings and configuration page""" st.markdown("### βš™οΈ Settings & Configuration") - + # API Keys section st.markdown("#### πŸ”‘ API Keys") with st.expander("πŸ”§ API Configuration", expanded=True): col1, col2 = st.columns(2) - + with col1: # OpenAI settings st.markdown("**OpenAI Configuration**") - openai_key = st.text_input("OpenAI API Key", type="password", - value=os.getenv("OPENAI_API_KEY", ""), - help="Your OpenAI API key") + openai_key = st.text_input("OpenAI API Key", type="password", + value=os.getenv("OPENAI_API_KEY", ""), + help="Your OpenAI API key") if openai_key: os.environ["OPENAI_API_KEY"] = openai_key st.success("βœ… OpenAI key configured") - + # Model selection - model_choice = st.selectbox("OpenAI Model", - ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"], - help="Choose the OpenAI model for content generation") + model_choice = st.selectbox("OpenAI Model", + ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"], + help="Choose the OpenAI model for content generation") st.session_state.openai_model = model_choice - + with col2: # Notion settings st.markdown("**Notion Configuration**") notion_key = st.text_input("Notion API Key", type="password", - value=os.getenv("NOTION_API_KEY", ""), - help="Your Notion integration token") + value=os.getenv("NOTION_API_KEY", ""), + help="Your Notion integration token") if notion_key: os.environ["NOTION_API_KEY"] = notion_key - + notion_db = st.text_input("Notion Database ID", - value=os.getenv("NOTION_DATABASE_ID", ""), - help="Your Notion database ID") + value=os.getenv("NOTION_DATABASE_ID", ""), + help="Your Notion database ID") if notion_db: os.environ["NOTION_DATABASE_ID"] = notion_db - + if notion_key and notion_db: st.success("βœ… Notion configured") - + # Pipeline settings st.markdown("#### πŸ”„ Pipeline Configuration") with st.expander("βš™οΈ Processing Pipeline", expanded=True): col1, col2 = st.columns(2) - + with col1: st.markdown("**Core Features**") auto_notion = st.checkbox("Auto-publish to Notion", - value=st.session_state.get('auto_notion', True)) + value=st.session_state.get('auto_notion', True)) st.session_state.auto_notion = auto_notion live_stream = st.checkbox( @@ -1976,30 +2021,30 @@ def show_settings_page(): help="Show step-by-step streaming results" ) st.session_state.live_stream = live_stream - - large_file_mode = st.checkbox("Enhanced Large File Processing", - value=st.session_state.get('large_file_mode', True), - help="Use FFmpeg for files larger than 25MB") + + large_file_mode = st.checkbox("Enhanced Large File Processing", + value=st.session_state.get('large_file_mode', True), + help="Use FFmpeg for files larger than 25MB") st.session_state.large_file_mode = large_file_mode - + with col2: st.markdown("**Quality Settings**") - content_length = st.selectbox("Article Length", - ["Short (500-800 words)", "Medium (800-1200 words)", "Long (1200+ words)"]) - + content_length = st.selectbox("Article Length", + ["Short (500-800 words)", "Medium (800-1200 words)", "Long (1200+ words)"]) + tone_style = st.selectbox("Content Tone", - ["Professional", "Conversational", "Academic", "Creative"]) + ["Professional", "Conversational", "Academic", "Creative"]) st.session_state.content_length = content_length st.session_state.tone_style = tone_style - templates = [f.replace('.md','') for f in os.listdir('templates')] if os.path.exists('templates') else [] + templates = [f.replace('.md', '') for f in os.listdir('templates')] if os.path.exists('templates') else [] if templates: template_choice = st.selectbox("Article Template", templates) st.session_state.article_template = template_choice else: st.session_state.article_template = None - + # System status st.markdown("#### πŸ” System Status") with st.expander("πŸ“Š Connection Status", expanded=False): @@ -2013,7 +2058,7 @@ def show_settings_page(): st.error("❌ OpenAI API key missing") except Exception as e: st.error(f"❌ OpenAI error: {e}") - + # Test Supabase try: db = get_supabase_client() @@ -2023,7 +2068,7 @@ def show_settings_page(): st.error("❌ Supabase connection failed") except Exception as e: st.error(f"❌ Supabase error: {e}") - + # Test Notion try: if os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): @@ -2036,21 +2081,22 @@ def show_settings_page(): except Exception as e: st.error(f"❌ Notion error: {e}") + def show_knowledge_base(): """Knowledge base management page""" st.markdown("### 🧠 Knowledge Base") - + # Check if knowledge base files exist kb_path = "prompts/default/knowledge_base" - + st.markdown(""" The knowledge base provides context and expertise to enhance content generation. Add domain-specific information, style guides, and reference materials here. """) - + # Knowledge base sections tabs = st.tabs(["πŸ“– View Knowledge", "βž• Add Knowledge", "πŸ”§ Manage Files"]) - + with tabs[0]: st.markdown("#### πŸ“– Current Knowledge Base") try: @@ -2062,7 +2108,7 @@ def show_knowledge_base(): file_path = os.path.join(kb_path, selected_file) with open(file_path, 'r') as f: content = f.read() - + st.markdown(f"**File:** `{selected_file}`") create_enhanced_aurora_content_card("Knowledge Content", content, "text", "πŸ“–") else: @@ -2071,41 +2117,41 @@ def show_knowledge_base(): st.info("πŸ“ Knowledge base directory not found") except Exception as e: st.error(f"❌ Error reading knowledge base: {e}") - + with tabs[1]: st.markdown("#### βž• Add New Knowledge") - + col1, col2 = st.columns([2, 1]) with col1: kb_title = st.text_input("Knowledge Title", placeholder="e.g., 'Marketing Guidelines'") with col2: kb_category = st.selectbox("Category", ["General", "Style Guide", "Domain Expertise", "Templates"]) - - kb_content = st.text_area("Knowledge Content", - placeholder="Enter your knowledge content here...", - height=300) - + + kb_content = st.text_area("Knowledge Content", + placeholder="Enter your knowledge content here...", + height=300) + if st.button("πŸ’Ύ Save Knowledge", type="primary"): if kb_title and kb_content: try: os.makedirs(kb_path, exist_ok=True) filename = f"{kb_title.lower().replace(' ', '_')}.md" file_path = os.path.join(kb_path, filename) - + with open(file_path, 'w') as f: f.write(f"# {kb_title}\n\n") f.write(f"**Category:** {kb_category}\n\n") f.write(kb_content) - + st.success(f"βœ… Knowledge saved as `{filename}`") except Exception as e: st.error(f"❌ Error saving knowledge: {e}") else: st.error("❌ Please provide both title and content") - + with tabs[2]: st.markdown("#### πŸ”§ Manage Knowledge Files") - + try: if os.path.exists(kb_path): files = [f for f in os.listdir(kb_path) if f.endswith('.md')] @@ -2132,39 +2178,40 @@ def show_knowledge_base(): except Exception as e: st.error(f"❌ Error managing files: {e}") + def show_prompts_page(): """Prompts management page""" st.markdown("### πŸ“ Prompts Management") - + st.markdown(""" Customize the AI prompts used in each step of the content generation pipeline. Fine-tune the output style, format, and focus for your specific needs. """) - + # Prompt categories prompt_types = { "wisdom": "πŸ’‘ Wisdom Extraction", - "outline": "πŸ“‹ Content Outline", + "outline": "πŸ“‹ Content Outline", "article": "πŸ“° Article Generation", "social": "πŸ“± Social Media Posts" } - + tabs = st.tabs(list(prompt_types.values()) + ["πŸ”§ Advanced"]) - + for i, (prompt_key, prompt_name) in enumerate(prompt_types.items()): with tabs[i]: st.markdown(f"#### {prompt_name}") - + # Load current prompt - Map UI keys to actual pipeline files file_mapping = { "wisdom": "wisdom_extraction.md", - "outline": "outline_creation.md", + "outline": "outline_creation.md", "article": "article_generation.md", "social": "social_media.md" } prompt_file = f"prompts/default/{file_mapping[prompt_key]}" current_prompt = "" - + try: if os.path.exists(prompt_file): with open(prompt_file, 'r') as f: @@ -2173,7 +2220,7 @@ def show_prompts_page(): current_prompt = f"# {prompt_name} Prompt\n\nDefault prompt for {prompt_key} generation." except Exception as e: st.error(f"❌ Error loading prompt: {e}") - + # Edit prompt new_prompt = st.text_area( f"Edit {prompt_name} Prompt", @@ -2181,7 +2228,7 @@ def show_prompts_page(): height=400, help=f"Customize the prompt used for {prompt_key} generation" ) - + col1, col2, col3 = st.columns([1, 1, 2]) with col1: if st.button(f"πŸ’Ύ Save", key=f"save_{prompt_key}"): @@ -2192,52 +2239,53 @@ def show_prompts_page(): st.success(f"βœ… {prompt_name} prompt saved!") except Exception as e: st.error(f"❌ Error saving prompt: {e}") - + with col2: if st.button(f"πŸ”„ Reset", key=f"reset_{prompt_key}"): st.info("Reset to default functionality coming soon!") - + with col3: st.markdown(f"**File:** `{prompt_file}`") - + # Advanced settings with tabs[-1]: st.markdown("#### πŸ”§ Advanced Prompt Settings") - + col1, col2 = st.columns(2) with col1: st.markdown("**Global Settings**") temperature = st.slider("Temperature (Creativity)", 0.0, 1.0, 0.7, 0.1) max_tokens = st.number_input("Max Tokens", 100, 4000, 2000) - + with col2: st.markdown("**Prompt Templates**") if st.button("πŸ“₯ Import Prompt Set"): st.info("Import functionality coming soon!") if st.button("πŸ“€ Export Prompt Set"): st.info("Export functionality coming soon!") - + st.session_state.temperature = temperature st.session_state.max_tokens = max_tokens + def create_aurora_tabs(tab_data, default_tab=0): """Create beautiful Aurora-styled tabs with a simplified, reliable approach""" import uuid - + # Generate unique ID for this tab group tab_group_id = f"tabs_{uuid.uuid4().hex[:8]}" - + # Initialize session state for this tab group if f"{tab_group_id}_active" not in st.session_state: st.session_state[f"{tab_group_id}_active"] = default_tab - + # Calculate content stats for display tab_stats = [] for tab in tab_data: content = tab.get('content', '') word_count = len(str(content).split()) if content else 0 tab_stats.append(word_count) - + # Create the tabs container with Aurora styling st.markdown("""
@@ -2249,13 +2297,13 @@ def create_aurora_tabs(tab_data, default_tab=0):
""", unsafe_allow_html=True) - + # Create tab selector using Streamlit's selectbox with Aurora styling tab_options = [] for i, tab in enumerate(tab_data): word_count = tab_stats[i] tab_options.append(f"{tab['icon']} {tab['title']} ({word_count} words)") - + # Custom styled selectbox st.markdown(""" """, unsafe_allow_html=True) - + # Tab selector with st.container(): st.markdown('
', unsafe_allow_html=True) @@ -2283,29 +2331,29 @@ def create_aurora_tabs(tab_data, default_tab=0): label_visibility="collapsed" ) st.markdown('
', unsafe_allow_html=True) - + # Find selected tab index selected_index = 0 for i, option in enumerate(tab_options): if option == selected_tab_label: selected_index = i break - + # Update session state st.session_state[f"{tab_group_id}_active"] = selected_index - + # Display selected content with Aurora styling if 0 <= selected_index < len(tab_data): active_tab = tab_data[selected_index] - + # Add quick actions for the active tab col1, col2, col3, col4 = st.columns([1, 1, 1, 2]) - + with col1: if st.button("πŸ“‹ Copy", key=f"copy_{tab_group_id}_{selected_index}", use_container_width=True): st.code(active_tab.get('content', ''), language='text') st.success("βœ… Content displayed above - copy with Ctrl+A, Ctrl+C") - + with col2: content = active_tab.get('content', '') if content: @@ -2317,13 +2365,13 @@ def create_aurora_tabs(tab_data, default_tab=0): key=f"download_{tab_group_id}_{selected_index}", use_container_width=True ) - + with col3: if st.button("πŸ“Š Stats", key=f"stats_{tab_group_id}_{selected_index}", use_container_width=True): word_count = len(str(content).split()) char_count = len(str(content)) st.info(f"πŸ“Š **{active_tab['title']}**: {word_count} words, {char_count} characters") - + # Display the content using our enhanced content card st.markdown('
', unsafe_allow_html=True) create_enhanced_aurora_content_card( @@ -2333,40 +2381,43 @@ def create_aurora_tabs(tab_data, default_tab=0): icon=active_tab['icon'] ) st.markdown('
', unsafe_allow_html=True) - + return selected_index # === MAIN APP === + def show_main_app(): """Main application interface with navigation""" # Create navigation tabs = create_aurora_navigation() - + # Show different pages based on selected tab with tabs[0]: # Transform show_transform_page() - + with tabs[1]: # Content Library show_content_library() - + with tabs[2]: # Settings show_settings_page() - + with tabs[3]: # Knowledge Base show_knowledge_base() - + with tabs[4]: # Prompts show_prompts_page() # === ENTRY POINT === + def main(): """Application entry point""" init_session() - + if st.session_state.authenticated: show_main_app() else: show_login() + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/core/__init__.py b/core/__init__.py index f9878a5..f5a6fae 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -13,6 +13,6 @@ __all__ = [ "Config", - "get_config", + "get_config", "set_config" -] \ No newline at end of file +] diff --git a/core/auth_wrapper.py b/core/auth_wrapper.py index dd86db3..e5a3584 100644 --- a/core/auth_wrapper.py +++ b/core/auth_wrapper.py @@ -17,12 +17,12 @@ class AuthWrapper: Authentication wrapper that provides persistent sessions while maintaining compatibility with existing auth patterns """ - + def __init__(self): self.session_manager = get_session_manager() self.supabase_client = None self._init_supabase() - + def _init_supabase(self): """Initialize Supabase client""" try: @@ -31,19 +31,19 @@ def _init_supabase(self): except Exception as e: logger.log_error(e, "Failed to initialize Supabase") self.supabase_client = None - + def is_authenticated(self) -> bool: """Check if user is authenticated (backward compatible)""" return self.session_manager.is_authenticated() - + def get_user_id(self) -> Optional[str]: """Get current user ID (backward compatible)""" return self.session_manager.get_user_id() - + def get_user_email(self) -> Optional[str]: """Get current user email (backward compatible)""" return self.session_manager.get_user_email() - + def authenticate_user(self, email: str, password: str) -> bool: """ Authenticate user with Supabase and create persistent session @@ -51,25 +51,25 @@ def authenticate_user(self, email: str, password: str) -> bool: """ try: logger.logger.info(f"Authentication attempt for: {email}") - + if not self.supabase_client: logger.log_error(Exception("Supabase client not available"), "Authentication failed") return False - + # Get user by email from Supabase result = self.supabase_client.client.table("users").select("*").eq("email", email).execute() - + if not result.data: logger.logger.warning(f"User not found: {email}") return False - + user = result.data[0] stored_password = user.get("password", "") - + # Verify password (bcrypt or legacy) password_valid = False password_migrated = False - + if stored_password.startswith('$2b$'): # bcrypt password password_valid = verify_password(password, stored_password) @@ -78,7 +78,7 @@ def authenticate_user(self, email: str, password: str) -> bool: if legacy_hash_password(password) == stored_password: password_valid = True password_migrated = True - + # Migrate to bcrypt try: new_hash = hash_password(password) @@ -88,71 +88,77 @@ def authenticate_user(self, email: str, password: str) -> bool: logger.logger.info(f"Password migrated to bcrypt for user: {email}") except Exception as e: logger.log_error(e, "Failed to migrate password") - + if password_valid: # Create persistent session using SessionManager if self.session_manager.authenticate_user(user["id"], email): logger.logger.info(f"User authenticated successfully: {email}") - + # Load user preferences from database self._load_user_preferences(user["id"]) - + return True else: - logger.log_error(Exception(f"Failed to create persistent session for: {email}"), "Authentication failed") + logger.log_error( + Exception( + f"Failed to create persistent session for: {email}"), + "Authentication failed") return False else: logger.logger.warning(f"Invalid password for user: {email}") return False - + except Exception as e: logger.log_error(e, f"Authentication error for {email}") return False - + def register_user(self, email: str, password: str) -> bool: """Register new user and create session""" try: if not self.supabase_client: logger.log_error(Exception("Supabase client not available for registration"), "Registration failed") return False - + # Check if user already exists existing = self.supabase_client.client.table("users").select("id").eq("email", email).execute() if existing.data: logger.logger.warning(f"User already exists: {email}") return False - + # Hash password hashed_password = hash_password(password) - + # Create user in database user_data = { "email": email, "password": hashed_password, "created_at": "now()" } - + result = self.supabase_client.client.table("users").insert(user_data).execute() - + if result.data: user = result.data[0] logger.logger.info(f"User registered successfully: {email}") - + # Create session for new user if self.session_manager.authenticate_user(user["id"], email): logger.logger.info(f"Session created for new user: {email}") return True else: - logger.log_error(Exception(f"Failed to create session for new user: {email}"), "Registration failed") + logger.log_error( + Exception( + f"Failed to create session for new user: {email}"), + "Registration failed") return False else: logger.log_error(Exception(f"Failed to create user in database: {email}"), "Registration failed") return False - + except Exception as e: logger.log_error(e, f"Registration error for {email}") return False - + def logout(self) -> bool: """Log out user and clear session""" try: @@ -166,52 +172,52 @@ def logout(self) -> bool: except Exception as e: logger.log_error(e, "Logout error") return False - + def _load_user_preferences(self, user_id: str): """Load user preferences from database into session""" try: if not self.supabase_client: return - + # Load API keys api_keys_result = self.supabase_client.client.table("api_keys").select( "key_name, key_value" ).eq("user_id", user_id).execute() - + api_keys = {} for item in api_keys_result.data: api_keys[item["key_name"]] = item["key_value"] - + # Load custom prompts prompts_result = self.supabase_client.client.table("prompts").select( "prompt_type, content" ).eq("user_id", user_id).execute() - + prompts = {} for item in prompts_result.data: prompts[item["prompt_type"]] = item["content"] - + # Store in session preferences self.session_manager.set_preference("api_keys", api_keys) self.session_manager.set_preference("custom_prompts", prompts) - + logger.logger.debug(f"Loaded preferences for user: {user_id}") - + except Exception as e: logger.log_error(e, "Failed to load user preferences") - + def get_api_keys(self) -> Dict[str, str]: """Get user API keys from session cache""" return self.session_manager.get_preference("api_keys", {}) - + def update_api_key(self, key_name: str, key_value: str) -> bool: """Update API key in database and session cache""" try: if not self.supabase_client or not self.is_authenticated(): return False - + user_id = self.get_user_id() - + # Update in database result = self.supabase_client.client.table("api_keys").upsert({ "user_id": user_id, @@ -219,35 +225,35 @@ def update_api_key(self, key_name: str, key_value: str) -> bool: "key_value": key_value, "updated_at": "now()" }).execute() - + if result.data: # Update session cache api_keys = self.get_api_keys() api_keys[key_name] = key_value self.session_manager.set_preference("api_keys", api_keys) - + logger.logger.info(f"API key updated: {key_name}") return True else: logger.log_error(Exception(f"Failed to update API key: {key_name}"), "API key update failed") return False - + except Exception as e: logger.log_error(e, f"Error updating API key {key_name}") return False - + def get_custom_prompts(self) -> Dict[str, str]: """Get user custom prompts from session cache""" return self.session_manager.get_preference("custom_prompts", {}) - + def update_custom_prompt(self, prompt_type: str, content: str) -> bool: """Update custom prompt in database and session cache""" try: if not self.supabase_client or not self.is_authenticated(): return False - + user_id = self.get_user_id() - + # Update in database result = self.supabase_client.client.table("prompts").upsert({ "user_id": user_id, @@ -255,49 +261,52 @@ def update_custom_prompt(self, prompt_type: str, content: str) -> bool: "content": content, "updated_at": "now()" }).execute() - + if result.data: # Update session cache prompts = self.get_custom_prompts() prompts[prompt_type] = content self.session_manager.set_preference("custom_prompts", prompts) - + logger.info(f"Custom prompt updated: {prompt_type}") return True else: - logger.log_error(Exception(f"Failed to update custom prompt: {prompt_type}"), "Custom prompt update failed") + logger.log_error( + Exception( + f"Failed to update custom prompt: {prompt_type}"), + "Custom prompt update failed") return False - + except Exception as e: logger.log_error(e, f"Error updating custom prompt {prompt_type}") return False - + # Session Manager delegation methods - + def get_preference(self, key: str, default: Any = None) -> Any: """Get user preference (delegated to SessionManager)""" return self.session_manager.get_preference(key, default) - + def set_preference(self, key: str, value: Any) -> bool: """Set user preference (delegated to SessionManager)""" return self.session_manager.set_preference(key, value) - + def get_current_page(self) -> str: """Get current page (delegated to SessionManager)""" return self.session_manager.get_current_page() - + def set_current_page(self, page: str) -> None: """Set current page (delegated to SessionManager)""" self.session_manager.set_current_page(page) - + def is_pipeline_active(self) -> bool: """Check if pipeline is active (delegated to SessionManager)""" return self.session_manager.is_pipeline_active() - + def set_pipeline_active(self, active: bool) -> None: """Set pipeline active state (delegated to SessionManager)""" self.session_manager.set_pipeline_active(active) - + def get_session_info(self) -> Dict[str, Any]: """Get session information for debugging""" return self.session_manager.get_session_info() @@ -306,6 +315,7 @@ def get_session_info(self) -> Dict[str, Any]: # Global auth wrapper instance _auth_wrapper = None + def get_auth() -> AuthWrapper: """Get global authentication wrapper instance""" global _auth_wrapper @@ -319,22 +329,27 @@ def authenticate_user(email: str, password: str) -> bool: """Backward compatible authentication function""" return get_auth().authenticate_user(email, password) + def register_user_supabase(email: str, password: str) -> bool: """Backward compatible registration function""" return get_auth().register_user(email, password) + def get_user_api_keys_supabase() -> Dict[str, str]: """Backward compatible API keys function""" return get_auth().get_api_keys() + def update_api_key_supabase(key_name: str, key_value: str) -> bool: """Backward compatible API key update function""" return get_auth().update_api_key(key_name, key_value) + def get_user_prompts_supabase() -> Dict[str, str]: """Backward compatible prompts function""" return get_auth().get_custom_prompts() + def save_user_prompt_supabase(prompt_type: str, content: str) -> bool: """Backward compatible prompt save function""" - return get_auth().update_custom_prompt(prompt_type, content) \ No newline at end of file + return get_auth().update_custom_prompt(prompt_type, content) diff --git a/core/content_generation.py b/core/content_generation.py index d8242b8..9b98b35 100644 --- a/core/content_generation.py +++ b/core/content_generation.py @@ -12,16 +12,17 @@ # Configure logging logger = logging.getLogger(__name__) + def generate_wisdom(transcript: str, custom_prompt: str = None, knowledge_base: Dict[str, str] = None) -> str: """Extract key insights and wisdom from a transcript""" try: # Use enhanced prompt system with automatic KB concatenation system_prompt = custom_prompt or get_enhanced_prompt("wisdom_extraction", knowledge_base) - + openai_client = get_openai_client() if not openai_client: return "Error: OpenAI API key is not configured." - + response = openai_client.chat.completions.create( model="gpt-4o", messages=[ @@ -30,25 +31,27 @@ def generate_wisdom(transcript: str, custom_prompt: str = None, knowledge_base: ], max_tokens=1500 ) - + return response.choices[0].message.content - + except Exception as e: logger.exception("Error in wisdom generation:") return f"Error generating wisdom: {str(e)}" -def generate_outline(transcript: str, wisdom: str, custom_prompt: str = None, knowledge_base: Dict[str, str] = None) -> str: + +def generate_outline(transcript: str, wisdom: str, custom_prompt: str = None, + knowledge_base: Dict[str, str] = None) -> str: """Create a structured outline based on transcript and wisdom""" try: # Use enhanced prompt system with automatic KB concatenation system_prompt = custom_prompt or get_enhanced_prompt("outline_creation", knowledge_base) - + content = f"TRANSCRIPT:\n{transcript}\n\nWISDOM:\n{wisdom}" - + openai_client = get_openai_client() if not openai_client: return "Error: OpenAI API key is not configured." - + response = openai_client.chat.completions.create( model="gpt-4o", messages=[ @@ -57,27 +60,29 @@ def generate_outline(transcript: str, wisdom: str, custom_prompt: str = None, kn ], max_tokens=1500 ) - + return response.choices[0].message.content - + except Exception as e: logger.exception("Error in outline generation:") return f"Error generating outline: {str(e)}" -def generate_article(transcript: str, wisdom: str, outline: str, custom_prompt: str = None, knowledge_base: Dict[str, str] = None) -> str: + +def generate_article(transcript: str, wisdom: str, outline: str, custom_prompt: str = None, + knowledge_base: Dict[str, str] = None) -> str: """Generate a comprehensive article based on transcript, wisdom, and outline""" try: # Use enhanced prompt system with automatic KB concatenation system_prompt = custom_prompt or get_enhanced_prompt("article_writing", knowledge_base) - + # Limit transcript length to avoid token limits transcript_excerpt = transcript[:2000] if len(transcript) > 2000 else transcript content = f"TRANSCRIPT:\n{transcript_excerpt}\n\nWISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}" - + openai_client = get_openai_client() if not openai_client: return "Error: OpenAI API key is not configured." - + response = openai_client.chat.completions.create( model="gpt-4o", messages=[ @@ -86,27 +91,29 @@ def generate_article(transcript: str, wisdom: str, outline: str, custom_prompt: ], max_tokens=2000 ) - + return response.choices[0].message.content - + except Exception as e: logger.exception("Error in article generation:") return f"Error generating article: {str(e)}" -def generate_social_content(wisdom: str, outline: str, article: str, custom_prompt: str = None, knowledge_base: Dict[str, str] = None) -> str: + +def generate_social_content(wisdom: str, outline: str, article: str, + custom_prompt: str = None, knowledge_base: Dict[str, str] = None) -> str: """Generate 5 distinct social media posts""" try: # Use enhanced prompt system with automatic KB concatenation system_prompt = custom_prompt or get_enhanced_prompt("social_media", knowledge_base) - + # Include article in content for richer context article_excerpt = article[:1500] if len(article) > 1500 else article content = f"WISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}\n\nARTICLE:\n{article_excerpt}" - + openai_client = get_openai_client() if not openai_client: return "Error: OpenAI API key is not configured." - + response = openai_client.chat.completions.create( model="gpt-4o", messages=[ @@ -115,20 +122,21 @@ def generate_social_content(wisdom: str, outline: str, article: str, custom_prom ], max_tokens=1500 ) - + return response.choices[0].message.content - + except Exception as e: logger.exception("Error in social content generation:") return f"Error generating social content: {str(e)}" + def transcribe_audio(audio_file) -> str: """Transcribe audio using OpenAI Whisper - handles both file paths and file objects""" try: openai_client = get_openai_client() if not openai_client: return "Error: OpenAI client not available." - + # Handle both file paths (strings) and file objects if isinstance(audio_file, str): # It's a file path, open it @@ -144,8 +152,8 @@ def transcribe_audio(audio_file) -> str: model="whisper-1", file=audio_file ) - + return response.text - + except Exception as e: - return f"Transcription failed: {str(e)}" \ No newline at end of file + return f"Transcription failed: {str(e)}" diff --git a/core/file_upload.py b/core/file_upload.py index 374e48f..54f84aa 100644 --- a/core/file_upload.py +++ b/core/file_upload.py @@ -19,9 +19,10 @@ # Configure logging logger = logging.getLogger(__name__) + class FileUploadManager: """πŸš€ ENHANCED: Large file upload manager with chunking and parallel processing""" - + def __init__(self): self.supported_formats = { 'audio': ['.mp3', '.wav', '.m4a', '.aac', '.ogg', '.flac', '.wma', '.webm', '.mpeg', '.mpga', '.oga'], @@ -31,10 +32,10 @@ def __init__(self): self.max_file_size = 2 * 1024 * 1024 * 1024 # 2GB self.chunk_size_mb = 20 # 20MB chunks for optimal processing self.max_parallel_chunks = 4 # Process 4 chunks simultaneously - + def create_large_file_upload_zone(self) -> Optional[Any]: """Create enhanced upload zone for large files""" - + # Enhanced upload zone HTML with large file support upload_html = f"""
@@ -68,37 +69,37 @@ def create_large_file_upload_zone(self) -> Optional[Any]:
""" - + # Enhanced CSS for large file upload upload_css = """ """ - + st.markdown(upload_css, unsafe_allow_html=True) st.markdown(upload_html, unsafe_allow_html=True) - + # File uploader with large file support uploaded_file = st.file_uploader( "Choose an audio file", @@ -190,107 +191,107 @@ def create_large_file_upload_zone(self) -> Optional[Any]: help="Upload audio files up to 2GB. Large files will be automatically chunked for optimal processing.", label_visibility="collapsed" ) - + return uploaded_file - + def process_large_file(self, uploaded_file) -> Dict[str, Any]: """πŸš€ Process large files with chunking and parallel transcription""" - + if not uploaded_file: return {"success": False, "error": "No file provided"} - + # Validate file validation = self.validate_large_file(uploaded_file) if not validation["valid"]: return {"success": False, "error": validation["error"]} - + file_size_mb = len(uploaded_file.getvalue()) / (1024 * 1024) - + # Show file info st.markdown(f""" ### πŸ“ File Processing - **File:** {uploaded_file.name} - **Size:** {file_size_mb:.1f} MB + **File:** {uploaded_file.name} + **Size:** {file_size_mb:.1f} MB **Processing Strategy:** {"Chunked Parallel Processing" if file_size_mb > self.chunk_size_mb else "Direct Processing"} """) - + if file_size_mb <= self.chunk_size_mb: # Small file - process directly return self._process_small_file(uploaded_file) else: # Large file - chunk and process in parallel return self._process_large_file_chunked(uploaded_file) - + def _process_small_file(self, uploaded_file) -> Dict[str, Any]: """Process small files directly without chunking""" - + progress_container = st.empty() - + with progress_container.container(): st.markdown("#### 🎡 Processing Audio") progress_bar = st.progress(0.0, "Starting transcription...") - + try: # Import transcription function from .content_generation import transcribe_audio - + # Update progress progress_bar.progress(0.3, "Transcribing audio...") - + # Transcribe transcript = transcribe_audio(uploaded_file) - + if not transcript or "Error" in transcript: progress_bar.progress(1.0, "❌ Transcription failed") return {"success": False, "error": transcript or "Transcription failed"} - + progress_bar.progress(1.0, "βœ… Transcription complete!") - + return { "success": True, "transcript": transcript, "chunks": 1, "total_duration": "N/A" } - + except Exception as e: progress_bar.progress(1.0, f"❌ Error: {str(e)}") return {"success": False, "error": str(e)} - + def _process_large_file_chunked(self, uploaded_file) -> Dict[str, Any]: """πŸš€ Process large files with intelligent chunking and parallel transcription""" - + st.markdown("#### πŸ”„ Chunked Processing Pipeline") - + try: # Step 1: Create chunks chunks_info = self._create_audio_chunks(uploaded_file) if not chunks_info["success"]: return chunks_info - + chunks = chunks_info["chunks"] total_chunks = len(chunks) - + st.markdown(f"**Created {total_chunks} chunks for parallel processing**") - + # Step 2: Create progress tracking containers progress_container = st.empty() chunks_container = st.empty() - + # Step 3: Process chunks in parallel with real-time updates transcription_results = self._transcribe_chunks_parallel( chunks, progress_container, chunks_container ) - + if not transcription_results["success"]: return transcription_results - + # Step 4: Reassemble transcript final_transcript = self._reassemble_transcript(transcription_results["chunk_transcripts"]) - + # Step 5: Cleanup temporary files self._cleanup_chunks(chunks) - + # Success! with progress_container.container(): st.success("βœ… Large file processing complete!") @@ -300,57 +301,57 @@ def _process_large_file_chunked(self, uploaded_file) -> Dict[str, Any]: - Successful transcriptions: {len(transcription_results['chunk_transcripts'])} - Final transcript length: {len(final_transcript)} characters """) - + return { "success": True, "transcript": final_transcript, "chunks": total_chunks, "processing_time": transcription_results.get("total_time", "N/A") } - + except Exception as e: logger.exception("Error in large file processing:") st.error(f"❌ Large file processing failed: {str(e)}") return {"success": False, "error": str(e)} - + def _create_audio_chunks(self, uploaded_file) -> Dict[str, Any]: """Create audio chunks for parallel processing""" - + try: st.markdown("##### πŸ“‚ Creating Audio Chunks...") - + # Save uploaded file temporarily with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as temp_file: uploaded_file.seek(0) temp_file.write(uploaded_file.read()) temp_file_path = temp_file.name - + # Load audio with pydub audio = AudioSegment.from_file(temp_file_path) duration_ms = len(audio) duration_minutes = duration_ms / (1000 * 60) - + # Calculate chunk duration (aim for ~20MB chunks) chunk_duration_ms = self.chunk_size_mb * 60 * 1000 # Convert MB to minutes to ms num_chunks = math.ceil(duration_ms / chunk_duration_ms) - + st.markdown(f"**Audio Duration:** {duration_minutes:.1f} minutes") - st.markdown(f"**Creating {num_chunks} chunks of ~{chunk_duration_ms/60000:.1f} minutes each**") - + st.markdown(f"**Creating {num_chunks} chunks of ~{chunk_duration_ms / 60000:.1f} minutes each**") + chunks = [] chunk_progress = st.progress(0.0, "Creating chunks...") - + for i in range(num_chunks): start_ms = i * chunk_duration_ms end_ms = min((i + 1) * chunk_duration_ms, duration_ms) - + # Extract chunk chunk = audio[start_ms:end_ms] - + # Save chunk to temporary file chunk_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") chunk.export(chunk_file.name, format="wav") - + chunks.append({ "index": i, "file_path": chunk_file.name, @@ -358,93 +359,93 @@ def _create_audio_chunks(self, uploaded_file) -> Dict[str, Any]: "end_time": end_ms / 1000, "duration": (end_ms - start_ms) / 1000 }) - + # Update progress progress = (i + 1) / num_chunks chunk_progress.progress(progress, f"Created chunk {i + 1}/{num_chunks}") - + # Cleanup original temp file os.unlink(temp_file_path) - + chunk_progress.progress(1.0, f"βœ… Created {num_chunks} chunks successfully!") - + return {"success": True, "chunks": chunks} - + except Exception as e: logger.exception("Error creating audio chunks:") return {"success": False, "error": f"Failed to create chunks: {str(e)}"} - + def _transcribe_chunks_parallel(self, chunks: List[Dict], progress_container, chunks_container) -> Dict[str, Any]: """πŸš€ Transcribe chunks in parallel with real-time progress tracking""" - + total_chunks = len(chunks) completed_chunks = 0 chunk_transcripts = {} chunk_statuses = {i: "waiting" for i in range(total_chunks)} start_time = time.time() - + # Import transcription function from .content_generation import get_openai_client - + def transcribe_single_chunk(chunk_info: Dict) -> Tuple[int, str, bool]: """Transcribe a single chunk""" try: chunk_index = chunk_info["index"] chunk_file_path = chunk_info["file_path"] - + # Update status to processing chunk_statuses[chunk_index] = "processing" - + # Get OpenAI client openai_client = get_openai_client() if not openai_client: return chunk_index, "Error: OpenAI API key not configured", False - + # Transcribe chunk with open(chunk_file_path, "rb") as audio_file: transcript = openai_client.audio.transcriptions.create( model="whisper-1", file=audio_file ) - + chunk_statuses[chunk_index] = "completed" return chunk_index, transcript.text, True - + except Exception as e: chunk_statuses[chunk_index] = "error" logger.exception(f"Error transcribing chunk {chunk_index}:") return chunk_index, f"Error: {str(e)}", False - + # Process chunks in parallel with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor: # Submit all chunks for processing future_to_chunk = { - executor.submit(transcribe_single_chunk, chunk): chunk["index"] + executor.submit(transcribe_single_chunk, chunk): chunk["index"] for chunk in chunks } - + # Monitor progress in real-time while completed_chunks < total_chunks: # Update progress display with progress_container.container(): overall_progress = completed_chunks / total_chunks st.progress(overall_progress, f"Transcribing chunks: {completed_chunks}/{total_chunks}") - + # Update individual chunk statuses with chunks_container.container(): st.markdown("##### 🧩 Chunk Processing Status") - + # Create columns for chunk status display cols_per_row = 4 rows = math.ceil(total_chunks / cols_per_row) - + for row in range(rows): cols = st.columns(cols_per_row) for col_idx in range(cols_per_row): chunk_idx = row * cols_per_row + col_idx if chunk_idx < total_chunks: status = chunk_statuses[chunk_idx] - + if status == "waiting": icon, color, text = "⏳", "#FFA500", "Waiting" elif status == "processing": @@ -453,7 +454,7 @@ def transcribe_single_chunk(chunk_info: Dict) -> Tuple[int, str, bool]: icon, color, text = "βœ…", "#00FF7F", "Complete" else: # error icon, color, text = "❌", "#FF6B6B", "Error" - + with cols[col_idx]: st.markdown(f"""
{text}
""", unsafe_allow_html=True) - + # Check for completed futures for future in as_completed(future_to_chunk, timeout=1): chunk_index, transcript, success = future.result() - + if success: chunk_transcripts[chunk_index] = transcript - + completed_chunks += 1 break - + # Small delay to prevent excessive updates time.sleep(0.5) - + # Final progress update with progress_container.container(): st.progress(1.0, f"βœ… All chunks transcribed: {completed_chunks}/{total_chunks}") - + processing_time = time.time() - start_time - + # Check if we have enough successful transcriptions successful_chunks = len(chunk_transcripts) if successful_chunks < total_chunks * 0.8: # Require at least 80% success @@ -496,23 +497,23 @@ def transcribe_single_chunk(chunk_info: Dict) -> Tuple[int, str, bool]: "success": False, "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful" } - + return { "success": True, "chunk_transcripts": chunk_transcripts, "total_time": f"{processing_time:.1f}s", "success_rate": f"{successful_chunks}/{total_chunks}" } - + def _reassemble_transcript(self, chunk_transcripts: Dict[int, str]) -> str: """Reassemble transcript from chunks in correct order""" - + # Sort chunks by index and concatenate sorted_chunks = sorted(chunk_transcripts.items()) full_transcript = " ".join([transcript for _, transcript in sorted_chunks]) - + return full_transcript - + def _cleanup_chunks(self, chunks: List[Dict]): """Clean up temporary chunk files""" for chunk in chunks: @@ -521,29 +522,30 @@ def _cleanup_chunks(self, chunks: List[Dict]): os.unlink(chunk["file_path"]) except Exception as e: logger.warning(f"Failed to cleanup chunk file {chunk['file_path']}: {e}") - + def validate_large_file(self, file) -> Dict[str, Any]: """Validate large file upload""" if not file: return {"valid": False, "error": "No file provided"} - + # Check file size file_size = len(file.getvalue()) if file_size > self.max_file_size: size_gb = file_size / (1024 * 1024 * 1024) return {"valid": False, "error": f"File too large: {size_gb:.1f}GB (max 2GB)"} - + # Check file type file_extension = os.path.splitext(file.name)[1].lower() if file_extension not in self.supported_formats['audio']: return {"valid": False, "error": f"Unsupported format: {file_extension}"} - + return {"valid": True} # Create alias for backward compatibility LargeFileUploadManager = FileUploadManager + def create_upload_progress_indicator(filename: str, progress: float = 0.0): """Create a progress indicator for file upload""" progress_html = f""" @@ -561,7 +563,7 @@ def create_upload_progress_indicator(filename: str, progress: float = 0.0): """ - + progress_css = """ """ - + return st.markdown(progress_css + progress_html, unsafe_allow_html=True) + def simulate_upload_progress(filename: str, duration: float = 2.0): """Simulate upload progress for demonstration""" progress_container = st.empty() - + steps = 20 for i in range(steps + 1): progress = (i / steps) * 100 - + with progress_container: create_upload_progress_indicator(filename, progress) - + if i < steps: time.sleep(duration / steps) - + return True class EnhancedLargeFileProcessor: """πŸš€ Enhanced Large File Processor with FFmpeg for 2GB+ files - + Features: - FFmpeg-based processing for memory efficiency - Support for files up to 2GB @@ -662,7 +665,7 @@ class EnhancedLargeFileProcessor: - Memory-efficient streaming without loading entire files into RAM - Enhanced error handling with automatic fallback """ - + def __init__(self): self.supported_formats = { 'audio': ['.mp3', '.wav', '.m4a', '.aac', '.ogg', '.flac', '.wma', '.webm', '.mpeg', '.mpga', '.oga'], @@ -672,48 +675,48 @@ def __init__(self): self.chunk_duration_minutes = 10 # 10-minute chunks optimized for Whisper self.max_parallel_chunks = 4 # Process 4 chunks simultaneously self.temp_dir = None - + def check_ffmpeg_availability(self) -> bool: """Check if FFmpeg is available on the system""" try: import subprocess - result = subprocess.run(['ffmpeg', '-version'], - capture_output=True, text=True, timeout=5) + result = subprocess.run(['ffmpeg', '-version'], + capture_output=True, text=True, timeout=5) return result.returncode == 0 except (subprocess.TimeoutExpired, FileNotFoundError, Exception): return False - + def get_audio_info(self, file_path: str) -> Dict[str, Any]: """Get audio file information using ffprobe""" try: import subprocess import json - + cmd = [ 'ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', file_path ] - + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) if result.returncode != 0: return {"error": f"ffprobe failed: {result.stderr}"} - + data = json.loads(result.stdout) format_info = data.get('format', {}) - + # Find audio stream audio_stream = None for stream in data.get('streams', []): if stream.get('codec_type') == 'audio': audio_stream = stream break - + if not audio_stream: return {"error": "No audio stream found"} - + duration = float(format_info.get('duration', 0)) size = int(format_info.get('size', 0)) - + return { "duration": duration, "size": size, @@ -722,34 +725,34 @@ def get_audio_info(self, file_path: str) -> Dict[str, Any]: "sample_rate": int(audio_stream.get('sample_rate', 0)), "channels": int(audio_stream.get('channels', 0)) } - + except Exception as e: return {"error": f"Failed to get audio info: {str(e)}"} - + def validate_file(self, uploaded_file) -> Dict[str, Any]: """Enhanced file validation for large files""" if not uploaded_file: return {"valid": False, "error": "No file provided"} - + # Check file size file_size = len(uploaded_file.getvalue()) if file_size > self.max_file_size: size_gb = file_size / (1024 * 1024 * 1024) return {"valid": False, "error": f"File too large: {size_gb:.1f}GB (max 2GB)"} - + # Check file extension file_extension = os.path.splitext(uploaded_file.name)[1].lower() all_formats = self.supported_formats['audio'] + self.supported_formats['video'] if file_extension not in all_formats: return {"valid": False, "error": f"Unsupported format: {file_extension}"} - + # Check FFmpeg availability for large files if file_size > 100 * 1024 * 1024 and not self.check_ffmpeg_availability(): # 100MB+ return { - "valid": False, + "valid": False, "error": "FFmpeg required for large files but not available. Please install FFmpeg." } - + return { "valid": True, "size": file_size, @@ -757,10 +760,10 @@ def validate_file(self, uploaded_file) -> Dict[str, Any]: "requires_chunking": file_size > 100 * 1024 * 1024, # Chunk files > 100MB "format": file_extension } - + def create_enhanced_upload_interface(self) -> Optional[Any]: """Create enhanced upload interface for large files""" - + # Enhanced upload zone HTML upload_html = f"""
@@ -802,14 +805,14 @@ def create_enhanced_upload_interface(self) -> Optional[Any]:
""" - + # Enhanced CSS upload_css = """ """ - + st.markdown(upload_css, unsafe_allow_html=True) st.markdown(upload_html, unsafe_allow_html=True) - + # Enhanced file uploader uploaded_file = st.file_uploader( "Choose an audio or video file", - type=['mp3', 'wav', 'm4a', 'aac', 'ogg', 'flac', 'wma', 'webm', 'mpeg', 'mpga', 'oga', + type=['mp3', 'wav', 'm4a', 'aac', 'ogg', 'flac', 'wma', 'webm', 'mpeg', 'mpga', 'oga', 'mp4', 'avi', 'mov', 'mkv', 'wmv', 'flv'], help="Upload audio/video files up to 2GB. Large files automatically use FFmpeg chunking for optimal processing.", label_visibility="collapsed" ) - + return uploaded_file - + def process_large_file(self, uploaded_file) -> Dict[str, Any]: """Enhanced large file processing with FFmpeg""" - + # Validate file first validation = self.validate_file(uploaded_file) if not validation["valid"]: return {"success": False, "error": validation["error"]} - + file_size_mb = validation["size_mb"] requires_chunking = validation["requires_chunking"] - + st.info(f"πŸ“ **File:** {uploaded_file.name} ({file_size_mb:.1f} MB)") - + if requires_chunking: st.info("πŸ”§ **Processing Method:** FFmpeg chunking (large file detected)") return self._process_with_ffmpeg_chunking(uploaded_file) else: st.info("⚑ **Processing Method:** Standard processing (small file)") return self._process_standard(uploaded_file) - + def _process_standard(self, uploaded_file) -> Dict[str, Any]: """Process smaller files using standard method""" try: from core.content_generation import transcribe_audio - + # Create temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file: tmp_file.write(uploaded_file.getvalue()) tmp_file_path = tmp_file.name - + try: # Transcribe directly with st.spinner("🎯 Transcribing audio..."): transcript = transcribe_audio(tmp_file_path) - + return { "success": True, "transcript": transcript, "method": "standard", "chunks_processed": 1 } - + finally: # Cleanup if os.path.exists(tmp_file_path): os.unlink(tmp_file_path) - + except Exception as e: return {"success": False, "error": f"Standard processing failed: {str(e)}"} - + def _process_with_ffmpeg_chunking(self, uploaded_file) -> Dict[str, Any]: """Process large files using FFmpeg chunking""" - + # Setup temporary directory self.temp_dir = tempfile.mkdtemp(prefix="whisperforge_chunks_") - + try: # Save uploaded file input_file_path = os.path.join(self.temp_dir, uploaded_file.name) with open(input_file_path, 'wb') as f: f.write(uploaded_file.getvalue()) - + # Get audio information st.info("πŸ” Analyzing audio file...") audio_info = self.get_audio_info(input_file_path) - + if "error" in audio_info: return {"success": False, "error": audio_info["error"]} - + duration = audio_info["duration"] - st.success(f"πŸ“Š **Duration:** {duration/60:.1f} minutes | **Format:** {audio_info['format']} | **Codec:** {audio_info['codec']}") - + st.success( + f"πŸ“Š **Duration:** {duration / 60:.1f} minutes | **Format:** {audio_info['format']} | **Codec:** {audio_info['codec']}") + # Create chunks using FFmpeg st.info("βœ‚οΈ Creating audio chunks...") chunks_result = self._create_ffmpeg_chunks(input_file_path, duration) - + if not chunks_result["success"]: return chunks_result - + chunks = chunks_result["chunks"] st.success(f"βœ… Created {len(chunks)} chunks of ~{self.chunk_duration_minutes} minutes each") - + # Process chunks in parallel st.info("πŸš€ Starting parallel transcription...") transcription_result = self._transcribe_chunks_parallel_ffmpeg(chunks) - + if not transcription_result["success"]: return transcription_result - + # Reassemble transcript full_transcript = self._reassemble_transcript_ffmpeg(transcription_result["chunk_transcripts"]) - + return { "success": True, "transcript": full_transcript, @@ -1063,30 +1067,30 @@ def _process_with_ffmpeg_chunking(self, uploaded_file) -> Dict[str, Any]: "processing_time": transcription_result.get("total_time", "unknown"), "success_rate": transcription_result.get("success_rate", "unknown") } - + except Exception as e: return {"success": False, "error": f"FFmpeg processing failed: {str(e)}"} - + finally: # Cleanup temporary directory self._cleanup_temp_dir() - + def _create_ffmpeg_chunks(self, input_file_path: str, duration: float) -> Dict[str, Any]: """Create audio chunks using FFmpeg""" try: import subprocess - + chunk_duration_seconds = self.chunk_duration_minutes * 60 num_chunks = math.ceil(duration / chunk_duration_seconds) chunks = [] - + progress_bar = st.progress(0, f"Creating chunks: 0/{num_chunks}") - + for i in range(num_chunks): start_time = i * chunk_duration_seconds chunk_filename = f"chunk_{i:03d}.wav" chunk_path = os.path.join(self.temp_dir, chunk_filename) - + # FFmpeg command to extract chunk with audio optimization cmd = [ 'ffmpeg', '-i', input_file_path, @@ -1098,114 +1102,118 @@ def _create_ffmpeg_chunks(self, input_file_path: str, duration: float) -> Dict[s '-y', # Overwrite output files chunk_path ] - + result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) - + if result.returncode != 0: return {"success": False, "error": f"FFmpeg chunk creation failed: {result.stderr}"} - + # Verify chunk was created if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0: continue # Skip empty chunks - + chunks.append({ "index": i, "file_path": chunk_path, "start_time": start_time, "duration": min(chunk_duration_seconds, duration - start_time) }) - + # Update progress progress_bar.progress((i + 1) / num_chunks, f"Creating chunks: {i + 1}/{num_chunks}") - + return {"success": True, "chunks": chunks} - + except Exception as e: return {"success": False, "error": f"Chunk creation failed: {str(e)}"} - + def _transcribe_chunks_parallel_ffmpeg(self, chunks: List[Dict]) -> Dict[str, Any]: """Transcribe chunks in parallel using ThreadPoolExecutor""" from core.content_generation import transcribe_audio - + chunk_transcripts = {} total_chunks = len(chunks) - + # Create progress containers progress_container = st.empty() status_container = st.empty() - + def transcribe_single_chunk(chunk_info: Dict) -> Tuple[int, str, bool]: """Transcribe a single chunk""" try: chunk_index = chunk_info["index"] file_path = chunk_info["file_path"] - + transcript = transcribe_audio(file_path) return chunk_index, transcript, True - + except Exception as e: logger.error(f"Failed to transcribe chunk {chunk_info['index']}: {e}") return chunk_info["index"], f"[Transcription failed for chunk {chunk_info['index']}]", False - + start_time = time.time() completed_chunks = 0 - + # Process chunks in parallel with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor: # Submit all chunks future_to_chunk = { - executor.submit(transcribe_single_chunk, chunk): chunk + executor.submit(transcribe_single_chunk, chunk): chunk for chunk in chunks } - + # Process completed futures for future in as_completed(future_to_chunk): chunk_index, transcript, success = future.result() - + if success: chunk_transcripts[chunk_index] = transcript - + completed_chunks += 1 - + # Update progress progress = completed_chunks / total_chunks with progress_container: st.progress(progress, f"Transcribing: {completed_chunks}/{total_chunks} chunks") - + with status_container: elapsed = time.time() - start_time if completed_chunks > 0: eta = (elapsed / completed_chunks) * (total_chunks - completed_chunks) - st.info(f"⏱️ Elapsed: {elapsed:.1f}s | ETA: {eta:.1f}s | Success: {len(chunk_transcripts)}/{completed_chunks}") - + st.info( + f"⏱️ Elapsed: { + elapsed:.1f}s | ETA: { + eta:.1f}s | Success: { + len(chunk_transcripts)}/{completed_chunks}") + processing_time = time.time() - start_time successful_chunks = len(chunk_transcripts) - + # Check success rate if successful_chunks < total_chunks * 0.7: # Require at least 70% success return { "success": False, "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful" } - + return { "success": True, "chunk_transcripts": chunk_transcripts, "total_time": f"{processing_time:.1f}s", "success_rate": f"{successful_chunks}/{total_chunks}" } - + def _reassemble_transcript_ffmpeg(self, chunk_transcripts: Dict[int, str]) -> str: """Reassemble transcript from chunks in correct order""" # Sort chunks by index and concatenate sorted_chunks = sorted(chunk_transcripts.items()) full_transcript = " ".join([transcript for _, transcript in sorted_chunks]) - + # Clean up transcript full_transcript = full_transcript.strip() - + return full_transcript - + def _cleanup_temp_dir(self): """Clean up temporary directory and all files""" if self.temp_dir and os.path.exists(self.temp_dir): @@ -1214,4 +1222,4 @@ def _cleanup_temp_dir(self): shutil.rmtree(self.temp_dir) self.temp_dir = None except Exception as e: - logger.warning(f"Failed to cleanup temp directory {self.temp_dir}: {e}") \ No newline at end of file + logger.warning(f"Failed to cleanup temp directory {self.temp_dir}: {e}") diff --git a/core/health_check.py b/core/health_check.py index 806100b..f44b70d 100644 --- a/core/health_check.py +++ b/core/health_check.py @@ -6,6 +6,7 @@ from datetime import datetime from typing import Any, Dict + @dataclass class HealthStatus: status: str diff --git a/core/logging_config.py b/core/logging_config.py index 87e8220..52372eb 100644 --- a/core/logging_config.py +++ b/core/logging_config.py @@ -12,27 +12,28 @@ import traceback from typing import Dict, Any, Optional + class WhisperForgeLogger: """Enhanced logger with context and structured output""" - + def __init__(self, name: str = "whisperforge"): self.name = name self.logger = logging.getLogger(name) self.setup_logging() - + def setup_logging(self): """Configure logging with multiple handlers and formats""" - + # Clear existing handlers self.logger.handlers.clear() - + # Set base level self.logger.setLevel(logging.DEBUG) - + # Create logs directory logs_dir = Path("logs") logs_dir.mkdir(exist_ok=True) - + # Console handler with color coding console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(logging.INFO) @@ -41,7 +42,7 @@ def setup_logging(self): datefmt='%H:%M:%S' ) console_handler.setFormatter(console_formatter) - + # File handler for all logs file_handler = logging.FileHandler( logs_dir / f"whisperforge_{datetime.now().strftime('%Y%m%d')}.log" @@ -52,19 +53,19 @@ def setup_logging(self): datefmt='%Y-%m-%d %H:%M:%S' ) file_handler.setFormatter(file_formatter) - + # Error handler for critical issues error_handler = logging.FileHandler( logs_dir / f"errors_{datetime.now().strftime('%Y%m%d')}.log" ) error_handler.setLevel(logging.ERROR) error_handler.setFormatter(file_formatter) - + # Add handlers self.logger.addHandler(console_handler) self.logger.addHandler(file_handler) self.logger.addHandler(error_handler) - + def log_pipeline_step(self, step: str, status: str, data: Optional[Dict] = None): """Log pipeline step with structured data""" log_data = { @@ -73,7 +74,7 @@ def log_pipeline_step(self, step: str, status: str, data: Optional[Dict] = None) "timestamp": datetime.now().isoformat(), "data": data or {} } - + if status == "started": self.logger.info(f"πŸ”„ Pipeline step started: {step}") elif status == "completed": @@ -82,10 +83,10 @@ def log_pipeline_step(self, step: str, status: str, data: Optional[Dict] = None) self.logger.error(f"❌ Pipeline step failed: {step}") if data and "error" in data: self.logger.error(f"Error details: {data['error']}") - + # Log structured data to file self._log_structured(log_data) - + def log_file_upload(self, filename: str, size_mb: float, file_type: str): """Log file upload details""" self.logger.info(f"πŸ“ File uploaded: {filename} ({size_mb:.1f}MB, {file_type})") @@ -96,7 +97,7 @@ def log_file_upload(self, filename: str, size_mb: float, file_type: str): "file_type": file_type, "timestamp": datetime.now().isoformat() }) - + def log_ai_request(self, provider: str, model: str, prompt_type: str, tokens: Optional[int] = None): """Log AI API requests""" self.logger.info(f"πŸ€– AI request: {provider}/{model} for {prompt_type}") @@ -108,14 +109,14 @@ def log_ai_request(self, provider: str, model: str, prompt_type: str, tokens: Op "tokens": tokens, "timestamp": datetime.now().isoformat() }) - + def log_database_operation(self, operation: str, table: str, success: bool, error: Optional[str] = None): """Log database operations""" status = "βœ…" if success else "❌" self.logger.info(f"{status} Database {operation}: {table}") if error: self.logger.error(f"Database error: {error}") - + self._log_structured({ "event": "database_operation", "operation": operation, @@ -124,7 +125,7 @@ def log_database_operation(self, operation: str, table: str, success: bool, erro "error": error, "timestamp": datetime.now().isoformat() }) - + def log_user_action(self, action: str, user_id: Optional[str] = None, details: Optional[Dict] = None): """Log user actions""" self.logger.info(f"πŸ‘€ User action: {action} (user: {user_id or 'anonymous'})") @@ -135,19 +136,19 @@ def log_user_action(self, action: str, user_id: Optional[str] = None, details: O "details": details or {}, "timestamp": datetime.now().isoformat() }) - + def log_error(self, error: Exception, context: Optional[str] = None): """Log errors with full context""" error_msg = str(error) error_type = type(error).__name__ - + self.logger.error(f"πŸ’₯ {error_type}: {error_msg}") if context: self.logger.error(f"Context: {context}") - + # Log full traceback self.logger.error(f"Traceback:\n{traceback.format_exc()}") - + self._log_structured({ "event": "error", "error_type": error_type, @@ -156,11 +157,11 @@ def log_error(self, error: Exception, context: Optional[str] = None): "traceback": traceback.format_exc(), "timestamp": datetime.now().isoformat() }) - + def _log_structured(self, data: Dict[str, Any]): """Log structured data to JSON file""" json_log_file = Path("logs") / f"structured_{datetime.now().strftime('%Y%m%d')}.jsonl" - + try: with open(json_log_file, "a") as f: f.write(json.dumps(data) + "\n") @@ -180,41 +181,50 @@ def __getattr__(self, name): """ return getattr(self.logger, name) + class ColoredFormatter(logging.Formatter): """Colored console formatter""" - + COLORS = { 'DEBUG': '\033[36m', # Cyan 'INFO': '\033[32m', # Green 'WARNING': '\033[33m', # Yellow 'ERROR': '\033[31m', # Red - 'CRITICAL': '\033[35m', # Magenta + 'CRITICAL': '\033[35m', # Magenta 'RESET': '\033[0m' # Reset } - + def format(self, record): log_color = self.COLORS.get(record.levelname, self.COLORS['RESET']) record.levelname = f"{log_color}{record.levelname}{self.COLORS['RESET']}" return super().format(record) + # Global logger instance logger = WhisperForgeLogger() # Convenience functions + + def log_pipeline_step(step: str, status: str, data: Optional[Dict] = None): logger.log_pipeline_step(step, status, data) + def log_file_upload(filename: str, size_mb: float, file_type: str): logger.log_file_upload(filename, size_mb, file_type) + def log_ai_request(provider: str, model: str, prompt_type: str, tokens: Optional[int] = None): logger.log_ai_request(provider, model, prompt_type, tokens) + def log_database_operation(operation: str, table: str, success: bool, error: Optional[str] = None): logger.log_database_operation(operation, table, success, error) + def log_user_action(action: str, user_id: Optional[str] = None, details: Optional[Dict] = None): logger.log_user_action(action, user_id, details) + def log_error(error: Exception, context: Optional[str] = None): - logger.log_error(error, context) \ No newline at end of file + logger.log_error(error, context) diff --git a/core/metrics_exporter.py b/core/metrics_exporter.py index 60928fd..97c1852 100644 --- a/core/metrics_exporter.py +++ b/core/metrics_exporter.py @@ -75,4 +75,3 @@ def export_json_metrics() -> Dict[str, Any]: "histograms": metrics_exporter["histograms"], } )) - diff --git a/core/monitoring.py b/core/monitoring.py index 98d2a99..632d05b 100644 --- a/core/monitoring.py +++ b/core/monitoring.py @@ -184,4 +184,3 @@ def track_user_action(self, action: str, user_id: Optional[str] = None) -> None: def get_monitoring_manager() -> MonitoringManager: return MonitoringManager() - diff --git a/core/notifications.py b/core/notifications.py index 7157ef9..00a297f 100644 --- a/core/notifications.py +++ b/core/notifications.py @@ -8,29 +8,30 @@ from typing import Optional, Literal from datetime import datetime + class NotificationManager: """Manages beautiful notifications and status updates""" - + def __init__(self): self.notifications = [] - - def show_toast(self, - message: str, + + def show_toast(self, + message: str, type: Literal["success", "error", "warning", "info"] = "info", duration: float = 3.0, icon: Optional[str] = None): """Show a beautiful toast notification""" - + # Auto-select icon based on type if not provided if not icon: icons = { "success": "βœ…", - "error": "❌", + "error": "❌", "warning": "⚠️", "info": "ℹ️" } icon = icons.get(type, "ℹ️") - + # Color scheme for different types colors = { "success": { @@ -40,7 +41,7 @@ def show_toast(self, }, "error": { "bg": "rgba(248, 114, 114, 0.1)", - "border": "rgba(248, 114, 114, 0.4)", + "border": "rgba(248, 114, 114, 0.4)", "text": "#F87272" }, "warning": { @@ -54,9 +55,9 @@ def show_toast(self, "text": "#3ABFF8" } } - + color = colors[type] - + toast_html = f"""
@@ -67,7 +68,7 @@ def show_toast(self,
- + - + """ - + return st.markdown(toast_html, unsafe_allow_html=True) - - def show_status_indicator(self, - status: str, - details: str = "", - animated: bool = True): + + def show_status_indicator(self, + status: str, + details: str = "", + animated: bool = True): """Show a status indicator with optional animation""" - + # Status configurations status_configs = { "processing": { @@ -159,28 +160,28 @@ def show_status_indicator(self, "border": "rgba(58, 191, 248, 0.3)" }, "complete": { - "icon": "βœ…", + "icon": "βœ…", "color": "#36D399", "bg": "rgba(54, 211, 153, 0.1)", "border": "rgba(54, 211, 153, 0.3)" }, "error": { "icon": "❌", - "color": "#F87272", + "color": "#F87272", "bg": "rgba(248, 114, 114, 0.1)", "border": "rgba(248, 114, 114, 0.3)" }, "waiting": { "icon": "⏳", "color": "#FBBD23", - "bg": "rgba(251, 189, 35, 0.1)", + "bg": "rgba(251, 189, 35, 0.1)", "border": "rgba(251, 189, 35, 0.3)" } } - + config = status_configs.get(status, status_configs["waiting"]) animation_class = "status-animated" if animated else "" - + status_html = f"""
@@ -189,7 +190,7 @@ def show_status_indicator(self,
{f'
{details}
' if details else ''}
- + """ - + return st.markdown(status_html, unsafe_allow_html=True) + # Global notification manager instance notification_manager = NotificationManager() # Convenience functions + + def show_success(message: str, duration: float = 3.0): """Show success notification""" return notification_manager.show_toast(message, "success", duration) + def show_error(message: str, duration: float = 5.0): """Show error notification""" return notification_manager.show_toast(message, "error", duration) + def show_warning(message: str, duration: float = 4.0): """Show warning notification""" return notification_manager.show_toast(message, "warning", duration) + def show_info(message: str, duration: float = 3.0): """Show info notification""" return notification_manager.show_toast(message, "info", duration) + def show_processing_status(details: str = ""): """Show processing status indicator""" return notification_manager.show_status_indicator("processing", details, True) + def show_complete_status(details: str = ""): """Show completion status indicator""" return notification_manager.show_status_indicator("complete", details, False) + def show_error_status(details: str = ""): """Show error status indicator""" return notification_manager.show_status_indicator("error", details, False) + def create_step_completion_animation(): """Create a beautiful step completion animation""" animation_html = """ @@ -277,7 +288,7 @@ def create_step_completion_animation():
Step Complete!
- + """ - + return st.markdown(animation_html, unsafe_allow_html=True) + def create_loading_spinner(text: str = "Processing..."): """Create a beautiful loading spinner""" spinner_html = f""" @@ -356,7 +368,7 @@ def create_loading_spinner(text: str = "Processing..."):
{text}
- + """ - - return st.markdown(spinner_html, unsafe_allow_html=True) \ No newline at end of file + + return st.markdown(spinner_html, unsafe_allow_html=True) diff --git a/core/session_manager.py b/core/session_manager.py index 7a5607e..fe0ec18 100644 --- a/core/session_manager.py +++ b/core/session_manager.py @@ -6,6 +6,7 @@ logger = logging.getLogger(__name__) + class SessionManager: """Simple file-based session persistence""" @@ -109,6 +110,7 @@ def get_session_info(self): "last_activity": self.data.get("last_activity"), } + def get_session_manager() -> SessionManager: if "_session_manager" not in st.session_state: st.session_state._session_manager = SessionManager() diff --git a/core/streaming_pipeline.py b/core/streaming_pipeline.py index 7b3f526..75bff6a 100644 --- a/core/streaming_pipeline.py +++ b/core/streaming_pipeline.py @@ -18,17 +18,17 @@ class StreamingPipelineController: """Controls step-by-step pipeline execution with real-time UI updates""" - + # Define pipeline steps as class constant PIPELINE_STEPS = [ - "upload_validation", "transcription", "wisdom_extraction", - "research_enrichment", "outline_creation", "article_creation", + "upload_validation", "transcription", "wisdom_extraction", + "research_enrichment", "outline_creation", "article_creation", "social_content", "image_prompts", "database_storage" ] - + def __init__(self): self.reset_pipeline() - + def reset_pipeline(self): """Reset pipeline state for new processing""" st.session_state.pipeline_active = False @@ -36,13 +36,13 @@ def reset_pipeline(self): st.session_state.pipeline_results = {} st.session_state.pipeline_errors = {} st.session_state.pipeline_audio_file = None - + def start_pipeline(self, audio_file): """Initialize pipeline for processing with large file support""" self.reset_pipeline() st.session_state.pipeline_active = True st.session_state.pipeline_audio_file = audio_file - + # Store file info for later use file_size_mb = len(audio_file.getvalue()) / (1024 * 1024) st.session_state.pipeline_file_info = { @@ -51,7 +51,7 @@ def start_pipeline(self, audio_file): "size_mb": file_size_mb, "is_large_file": file_size_mb > 20 # Flag for large file processing } - + # Initialize required session state if missing if not hasattr(st.session_state, 'prompts'): st.session_state.prompts = {} @@ -61,39 +61,39 @@ def start_pipeline(self, audio_file): st.session_state.ai_provider = "OpenAI" if not hasattr(st.session_state, 'ai_model'): st.session_state.ai_model = "gpt-4o" - + def process_next_step(self): """Process the next step in the pipeline""" if not st.session_state.pipeline_active: return False - + step_index = st.session_state.pipeline_step_index - + if step_index >= len(self.PIPELINE_STEPS): # Pipeline complete st.session_state.pipeline_active = False return False - + step_id = self.PIPELINE_STEPS[step_index] - + try: # Show immediate status update with st.status(f"Processing {step_id.replace('_', ' ').title()}...", expanded=True): st.write(f"Step {step_index + 1} of {len(self.PIPELINE_STEPS)}: {step_id.replace('_', ' ')}") - + # Process the step result = self._execute_step(step_id, step_index) - + # Store result st.session_state.pipeline_results[step_id] = result - + st.write("βœ… Complete!") - + # Move to next step st.session_state.pipeline_step_index += 1 - + return True - + except Exception as e: # Handle step error error_msg = str(e) @@ -101,13 +101,13 @@ def process_next_step(self): st.session_state.pipeline_active = False st.error(f"❌ Error in {step_id}: {error_msg}") return False - + def _execute_step(self, step_id: str, step_index: int) -> Any: """Execute a specific pipeline step""" - + # Add visible thinking at step start thinking_step_start(step_id) - + try: if step_id == "upload_validation": result = self._step_upload_validation() @@ -129,80 +129,80 @@ def _execute_step(self, step_id: str, step_index: int) -> Any: result = self._step_database_storage() else: raise Exception(f"Unknown step: {step_id}") - + # Add success thinking thinking_step_complete(step_id) return result - + except Exception as e: # Add error thinking thinking_error(step_id, str(e)) raise - + def _step_upload_validation(self) -> Dict[str, Any]: """Step 1: Validate uploaded file""" file_info = st.session_state.pipeline_file_info - + if file_info["size_mb"] > 25: raise Exception(f"File too large: {file_info['size_mb']:.1f}MB (max 25MB)") - + # Simulate validation time time.sleep(0.5) - + return { "status": "validated", "file_name": file_info["name"], "file_size_mb": file_info["size_mb"] } - + def _step_transcription(self) -> str: """Step 2: Transcribe audio with large file support""" audio_file = st.session_state.pipeline_audio_file file_info = st.session_state.pipeline_file_info - + # Check if this is a large file requiring chunked processing if file_info.get("is_large_file", False): return self._transcribe_large_file(audio_file) else: return self._transcribe_small_file(audio_file) - + def _transcribe_small_file(self, audio_file) -> str: """Transcribe small files directly""" from .content_generation import transcribe_audio - + st.write("🎡 **Processing small file directly...**") - + transcript = transcribe_audio(audio_file) if not transcript: raise Exception("Failed to transcribe audio - transcript is empty") - + if "Error" in transcript: raise Exception(f"Transcription failed: {transcript}") - + # Store in session for access by later steps st.session_state.pipeline_transcript = transcript return transcript - + def _transcribe_large_file(self, audio_file) -> str: """πŸš€ Transcribe large files using chunked processing""" from .file_upload import LargeFileUploadManager - + st.write("πŸš€ **Processing large file with chunked transcription...**") - + # Create upload manager for large file processing upload_manager = LargeFileUploadManager() - + # Process the large file with chunking result = upload_manager.process_large_file(audio_file) - + if not result["success"]: raise Exception(f"Large file transcription failed: {result['error']}") - + transcript = result["transcript"] - + if not transcript: raise Exception("Large file transcription produced empty result") - + # Show processing summary st.success(f""" βœ… **Large File Transcription Complete!** @@ -210,38 +210,39 @@ def _transcribe_large_file(self, audio_file) -> str: - **Processing time:** {result.get('processing_time', 'N/A')} - **Transcript length:** {len(transcript)} characters """) - + # Store in session for access by later steps st.session_state.pipeline_transcript = transcript return transcript - + def _step_wisdom_extraction(self) -> str: """Step 3: Extract wisdom""" transcript = st.session_state.pipeline_transcript - + # Get custom prompt if available - custom_prompt = st.session_state.prompts.get("wisdom_extraction") if hasattr(st.session_state, 'prompts') else None - + custom_prompt = st.session_state.prompts.get( + "wisdom_extraction") if hasattr(st.session_state, 'prompts') else None + wisdom = generate_wisdom( - transcript, - st.session_state.ai_provider, - st.session_state.ai_model, - custom_prompt, + transcript, + st.session_state.ai_provider, + st.session_state.ai_model, + custom_prompt, st.session_state.knowledge_base ) - + # Handle editor mode if st.session_state.get("editor_enabled", False): critique = editor_critique( - wisdom, "wisdom_extraction", - st.session_state.ai_provider, + wisdom, "wisdom_extraction", + st.session_state.ai_provider, st.session_state.ai_model, st.session_state.knowledge_base ) - + # Store critique for display st.session_state.pipeline_results["wisdom_critique"] = critique - + # Generate revision based on critique revision_prompt = f"""Based on this editorial feedback, please revise the wisdom extraction: @@ -252,27 +253,27 @@ def _step_wisdom_extraction(self) -> str: {wisdom} Please provide an improved version that addresses the feedback.""" - + wisdom = generate_wisdom( - transcript, - st.session_state.ai_provider, - st.session_state.ai_model, - revision_prompt, + transcript, + st.session_state.ai_provider, + st.session_state.ai_model, + revision_prompt, st.session_state.knowledge_base ) - + # Store in session for later steps AND results for display st.session_state.pipeline_wisdom = wisdom return wisdom - + def _step_research_enrichment(self) -> Dict[str, Any]: """Step 3.5: Research Enrichment - NEW STEP""" wisdom = st.session_state.pipeline_wisdom transcript = st.session_state.pipeline_transcript - + # Check if research enrichment is enabled (default True for paid users) research_enabled = st.session_state.get("research_enabled", True) - + # Generate research enrichment research_data = generate_research_enrichment( wisdom=wisdom, @@ -281,41 +282,42 @@ def _step_research_enrichment(self) -> Dict[str, Any]: ai_model=st.session_state.ai_model, enabled=research_enabled ) - + # Store in session for access by later steps st.session_state.pipeline_research = research_data return research_data - + def _step_outline_creation(self) -> str: """Step 4: Create outline""" transcript = st.session_state.pipeline_transcript wisdom = st.session_state.pipeline_wisdom research = st.session_state.pipeline_results.get("research_enrichment", {}) - + # Get custom prompt if available - custom_prompt = st.session_state.prompts.get("outline_creation") if hasattr(st.session_state, 'prompts') else None - + custom_prompt = st.session_state.prompts.get( + "outline_creation") if hasattr(st.session_state, 'prompts') else None + outline = generate_outline( transcript, wisdom, - research, - st.session_state.ai_provider, - st.session_state.ai_model, - custom_prompt, + research, + st.session_state.ai_provider, + st.session_state.ai_model, + custom_prompt, st.session_state.knowledge_base ) - + # Handle editor mode if st.session_state.get("editor_enabled", False): critique = editor_critique( - outline, "outline_creation", - st.session_state.ai_provider, + outline, "outline_creation", + st.session_state.ai_provider, st.session_state.ai_model, st.session_state.knowledge_base ) - + st.session_state.pipeline_results["outline_critique"] = critique - + revision_prompt = f"""Based on this editorial feedback, please revise the outline: EDITORIAL FEEDBACK: @@ -325,51 +327,52 @@ def _step_outline_creation(self) -> str: {outline} Please provide an improved version that addresses the feedback.""" - + outline = generate_outline( transcript, wisdom, - research, - st.session_state.ai_provider, - st.session_state.ai_model, - revision_prompt, + research, + st.session_state.ai_provider, + st.session_state.ai_model, + revision_prompt, st.session_state.knowledge_base ) - + # Store in session for later steps AND results for display st.session_state.pipeline_outline = outline return outline - + def _step_article_creation(self) -> str: """Step 5: Create article""" transcript = st.session_state.pipeline_transcript wisdom = st.session_state.pipeline_wisdom outline = st.session_state.pipeline_outline - - # Get custom prompt if available - custom_prompt = st.session_state.prompts.get("article_creation") if hasattr(st.session_state, 'prompts') else None - + + # Get custom prompt if available + custom_prompt = st.session_state.prompts.get( + "article_creation") if hasattr(st.session_state, 'prompts') else None + article = generate_article( transcript, - wisdom, - outline, - st.session_state.ai_provider, - st.session_state.ai_model, - custom_prompt, + wisdom, + outline, + st.session_state.ai_provider, + st.session_state.ai_model, + custom_prompt, st.session_state.knowledge_base ) - + # Handle editor mode if st.session_state.get("editor_enabled", False): critique = editor_critique( - article, "article_writing", - st.session_state.ai_provider, + article, "article_writing", + st.session_state.ai_provider, st.session_state.ai_model, st.session_state.knowledge_base ) - + st.session_state.pipeline_results["article_critique"] = critique - + revision_prompt = f"""Based on this editorial feedback, please revise the article: EDITORIAL FEEDBACK: @@ -379,50 +382,50 @@ def _step_article_creation(self) -> str: {article} Please provide an improved version that addresses the feedback.""" - + article = generate_article( transcript, - wisdom, - outline, - st.session_state.ai_provider, - st.session_state.ai_model, - revision_prompt, + wisdom, + outline, + st.session_state.ai_provider, + st.session_state.ai_model, + revision_prompt, st.session_state.knowledge_base ) - + st.session_state.pipeline_article = article return article - + def _step_social_content(self) -> str: """Step 6: Generate social media content""" wisdom = st.session_state.pipeline_wisdom outline = st.session_state.pipeline_outline article = st.session_state.pipeline_article - + # Get custom prompt if available custom_prompt = st.session_state.prompts.get("social_media") if hasattr(st.session_state, 'prompts') else None - + social = generate_social_content( - wisdom, + wisdom, outline, - article, - st.session_state.ai_provider, - st.session_state.ai_model, - custom_prompt, + article, + st.session_state.ai_provider, + st.session_state.ai_model, + custom_prompt, st.session_state.knowledge_base ) - + # Handle editor mode if st.session_state.get("editor_enabled", False): critique = editor_critique( - social, "social_media", - st.session_state.ai_provider, + social, "social_media", + st.session_state.ai_provider, st.session_state.ai_model, st.session_state.knowledge_base ) - + st.session_state.pipeline_results["social_critique"] = critique - + revision_prompt = f"""Based on this editorial feedback, please revise the social media content: EDITORIAL FEEDBACK: @@ -432,101 +435,101 @@ def _step_social_content(self) -> str: {social} Please provide improved versions that address the feedback.""" - + social = generate_social_content( - wisdom, + wisdom, outline, - article, - st.session_state.ai_provider, - st.session_state.ai_model, - revision_prompt, + article, + st.session_state.ai_provider, + st.session_state.ai_model, + revision_prompt, st.session_state.knowledge_base ) - + st.session_state.pipeline_social = social return social - + def _step_image_prompts(self) -> str: """Step 7: Generate image prompts""" wisdom = st.session_state.pipeline_wisdom outline = st.session_state.pipeline_outline - + # Get custom prompt if available custom_prompt = st.session_state.prompts.get("image_prompts") if hasattr(st.session_state, 'prompts') else None - + images = generate_image_prompts( - wisdom, - outline, - st.session_state.ai_provider, - st.session_state.ai_model, - custom_prompt, + wisdom, + outline, + st.session_state.ai_provider, + st.session_state.ai_model, + custom_prompt, st.session_state.knowledge_base ) - + st.session_state.pipeline_images = images return images - + def _step_database_storage(self) -> str: """Step 8: Store content in database""" try: # Direct Supabase access to avoid circular imports from .supabase_integration import get_supabase_client - + db = get_supabase_client() if not db: return "Database connection failed" - + # Get results with correct step names results = st.session_state.pipeline_results - + # Direct database insert with CORRECT field names result = db.client.table("content").insert({ "user_id": st.session_state.user_id, "title": f"Content from {st.session_state.pipeline_file_info['name']}", "transcript": results.get("transcription", ""), - "wisdom": results.get("wisdom_extraction", ""), + "wisdom": results.get("wisdom_extraction", ""), "outline": results.get("outline_creation", ""), "article": results.get("article_creation", ""), "social_content": results.get("social_content", ""), "created_at": "now()" }).execute() - + content_id = result.data[0]["id"] if result.data else "" if not content_id: return "Failed to save content to database" - + time.sleep(0.3) # Simulate save time return f"Content saved with ID: {content_id}" - + except Exception as e: # Don't fail the pipeline for database errors return f"Database save failed: {str(e)}" - + @property def is_active(self) -> bool: """Check if pipeline is currently active""" return st.session_state.get("pipeline_active", False) - + @property def is_complete(self) -> bool: """Check if pipeline has completed""" - return (not self.is_active and + return (not self.is_active and st.session_state.get("pipeline_step_index", 0) >= len(self.PIPELINE_STEPS)) - + @property def current_step_index(self) -> int: """Get current step index""" return st.session_state.get("pipeline_step_index", 0) - + @property def progress_percentage(self) -> float: """Get overall progress percentage""" return (self.current_step_index / len(self.PIPELINE_STEPS)) * 100 - + def get_results(self) -> Dict[str, Any]: """Get all pipeline results""" return st.session_state.get("pipeline_results", {}) - + def get_errors(self) -> Dict[str, str]: """Get any pipeline errors""" return st.session_state.get("pipeline_errors", {}) @@ -537,4 +540,4 @@ def get_pipeline_controller() -> StreamingPipelineController: """Get or create the global pipeline controller""" if 'pipeline_controller' not in st.session_state: st.session_state.pipeline_controller = StreamingPipelineController() - return st.session_state.pipeline_controller \ No newline at end of file + return st.session_state.pipeline_controller diff --git a/core/streaming_results.py b/core/streaming_results.py index fcd71b0..fdc2290 100644 --- a/core/streaming_results.py +++ b/core/streaming_results.py @@ -69,25 +69,30 @@ """ # Enhanced UI Functions for streaming results + + def apply_streaming_css(): """Apply Aurora theme CSS for streaming results""" # Generate truly unique keys for Streamlit widgets + + def generate_unique_key(base_name: str) -> str: """Generate truly unique key for Streamlit widgets to prevent DuplicateWidgetID errors""" return f"{base_name}_{uuid.uuid4().hex[:8]}_{int(time.time() * 1000000) % 1000000}" + def show_streaming_results(): """Display content as it streams - REAL-TIME STREAMING IMPLEMENTATION""" controller = get_pipeline_controller() results = controller.get_results() - + if not results: # Show placeholder while waiting for first results st.markdown("### 🌊 Live Content Stream") st.info("πŸ”„ Waiting for processing to begin...") return - + # Show real-time streaming content with smooth reveals show_real_time_content_stream(results, controller) @@ -95,7 +100,7 @@ def show_streaming_results(): def show_real_time_content_stream(results: Dict[str, Any], controller): """πŸš€ ENHANCED: Real-time content streaming with step-by-step reveals""" st.markdown("### ✨ Content Generation Stream") - + # Define content sections with order and styling content_sections = [ ("transcription", "πŸŽ™οΈ", "Audio Transcription", "Converting speech to text..."), @@ -107,31 +112,31 @@ def show_real_time_content_stream(results: Dict[str, Any], controller): ("image_prompts", "πŸ–ΌοΈ", "Image Prompts", "Generating visual concepts..."), ("database_storage", "πŸ’Ύ", "Content Saved", "Storing to your library...") ] - + # Show each section as it becomes available for i, (step_key, icon, title, processing_msg) in enumerate(content_sections): - + if step_key in results and results[step_key]: # Content is ready - show it with beautiful styling show_completed_content_section(step_key, icon, title, results[step_key]) - + elif controller.current_step_index == i and controller.is_active: # Currently processing this step - show loading state show_processing_content_section(icon, title, processing_msg) - + elif controller.current_step_index > i: # This step should be done but no content - show error state show_error_content_section(icon, title, "Content generation failed") - + # Don't show future steps to avoid spoilers def show_completed_content_section(step_key: str, icon: str, title: str, content: Any): """Display completed content with beautiful Aurora styling""" - + # Convert content to string safely content_str = str(content) if content else "No content generated" - + # Beautiful content reveal with animation st.markdown(f"""
600: # Long content - show preview with expand st.markdown("**Preview:**") preview_text = content_str[:300] + "..." if len(content_str) > 300 else content_str st.markdown(preview_text) - + # Expandable full content expand_key = generate_unique_key(f"expand_{step_key}") with st.expander("πŸ“– Show Full Content", expanded=False): st.markdown(content_str) - + # Copy button copy_key = generate_unique_key(f"copy_{step_key}") if st.button(f"πŸ“‹ Copy {title}", key=copy_key, use_container_width=True): @@ -195,19 +200,19 @@ def show_completed_content_section(step_key: str, icon: str, title: str, content else: # Short content - show directly st.markdown(content_str) - + # Inline copy button copy_key = generate_unique_key(f"copy_inline_{step_key}") if st.button(f"πŸ“‹ Copy {title}", key=copy_key): st.code(content_str, language="markdown") st.success("βœ… Copied!") - + st.markdown("---") def show_processing_content_section(icon: str, title: str, message: str): """Show animated processing state for current step""" - + st.markdown(f"""
@@ -307,7 +312,7 @@ def show_2025_content_display():
- + """, unsafe_allow_html=True) - + # Display all results with modern cards content_map = { 'transcription': ('πŸŽ™οΈ', 'Audio Transcription', 'Complete speech-to-text conversion'), @@ -429,7 +434,7 @@ def show_2025_content_display(): 'social_content': ('πŸ“±', 'Social Media Content', 'Platform-optimized posts and captions'), 'image_prompts': ('πŸ–ΌοΈ', 'Image Generation Prompts', 'AI-generated visual concept descriptions') } - + for key, (icon, title, desc) in content_map.items(): if key in results and results[key]: show_streaming_content_card(icon, title, desc, results[key], is_live=False) @@ -437,15 +442,15 @@ def show_2025_content_display(): def show_streaming_content_card(icon: str, title: str, description: str, content: str, is_live: bool = False): """🎨 Beautiful streaming content card with Aurora effects""" - + # Create unique key for this card card_key = generate_unique_key(f"stream_card_{title.lower()}") - + # Live vs complete styling border_color = "rgba(0, 255, 100, 0.2)" if is_live else "rgba(0, 255, 255, 0.15)" bg_gradient = "rgba(0, 255, 100, 0.03), rgba(0, 255, 255, 0.05)" if is_live else "rgba(0, 255, 255, 0.03), rgba(64, 224, 208, 0.05)" glow_color = "rgba(0, 255, 100, 0.4)" if is_live else "rgba(0, 255, 255, 0.3)" - + with st.container(): st.markdown(f"""
@@ -459,7 +464,7 @@ def show_streaming_content_card(icon: str, title: str, description: str, content
- + """, unsafe_allow_html=True) - + # Content preview with smart truncation if len(content) > 300: preview = content[:300] + "..." - + # Expandable content with st.expander(f"πŸ“– Preview {title}", expanded=False): st.markdown(preview) - + with st.expander(f"πŸ“„ Full {title}", expanded=False): st.markdown(content) - + # Copy button copy_key = generate_unique_key(f"copy_{title}") if st.button(f"Copy {title}", key=copy_key, help=f"Copy {title} to clipboard"): st.code(content, language="markdown") else: st.markdown(content) - + # Copy button for short content copy_key = generate_unique_key(f"copy_short_{title}") if st.button(f"Copy {title}", key=copy_key, help=f"Copy {title} to clipboard"): @@ -586,7 +591,7 @@ def show_streaming_content_card(icon: str, title: str, description: str, content def _show_download_options(results: Dict[str, Any]): """Show download options for generated content""" - + st.markdown("""
@@ -595,16 +600,16 @@ def _show_download_options(results: Dict[str, Any]):
""", unsafe_allow_html=True) - + # Create downloadable content formats formats = { "JSON": _create_json_download(results), "Markdown": _create_markdown_download(results), "Text": _create_text_download(results) } - + col1, col2, col3 = st.columns(3) - + with col1: if "JSON" in formats: st.download_button( @@ -613,16 +618,16 @@ def _show_download_options(results: Dict[str, Any]): file_name="whisperforge_results.json", mime="application/json" ) - + with col2: if "Markdown" in formats: st.download_button( - "πŸ“ Markdown Format", + "πŸ“ Markdown Format", data=formats["Markdown"], file_name="whisperforge_results.md", mime="text/markdown" ) - + with col3: if "Text" in formats: st.download_button( @@ -642,20 +647,20 @@ def _create_json_download(results: Dict[str, Any]) -> str: def _create_markdown_download(results: Dict[str, Any]) -> str: """Create Markdown format download""" content = "# WhisperForge Content Generation Results\n\n" - + sections = { "transcription": "## πŸ“ Audio Transcription\n\n", "wisdom_extraction": "## πŸ’Ž Key Insights & Wisdom\n\n", - "outline_creation": "## πŸ“‹ Content Outline\n\n", + "outline_creation": "## πŸ“‹ Content Outline\n\n", "article_creation": "## πŸ“° Full Article\n\n", "social_content": "## πŸ“± Social Media Content\n\n", "image_prompts": "## πŸ–ΌοΈ Image Generation Prompts\n\n" } - + for key, header in sections.items(): if key in results: content += header + results[key] + "\n\n---\n\n" - + return content @@ -663,20 +668,20 @@ def _create_text_download(results: Dict[str, Any]) -> str: """Create plain text format download""" content = "WHISPERFORGE CONTENT GENERATION RESULTS\n" content += "=" * 50 + "\n\n" - + sections = { "transcription": "AUDIO TRANSCRIPTION\n" + "-" * 20 + "\n\n", "wisdom_extraction": "KEY INSIGHTS & WISDOM\n" + "-" * 20 + "\n\n", "outline_creation": "CONTENT OUTLINE\n" + "-" * 15 + "\n\n", - "article_creation": "FULL ARTICLE\n" + "-" * 12 + "\n\n", + "article_creation": "FULL ARTICLE\n" + "-" * 12 + "\n\n", "social_content": "SOCIAL MEDIA CONTENT\n" + "-" * 20 + "\n\n", "image_prompts": "IMAGE GENERATION PROMPTS\n" + "-" * 25 + "\n\n" } - + for key, header in sections.items(): if key in results: content += header + results[key] + "\n\n" + "=" * 50 + "\n\n" - + return content @@ -791,10 +796,11 @@ def _create_text_download(results: Dict[str, Any]) -> str: """ + def show_enhanced_streaming_status(): """PHASE 3: ENHANCED STREAMING UX OVERHAUL - 2025 st.status() integration WITH VISIBLE THINKING""" controller = get_pipeline_controller() - + if not controller.is_active and not controller.is_complete: return @@ -803,17 +809,17 @@ def show_enhanced_streaming_status(): ("Upload Validation", "File format & compatibility check", "upload_validation"), ("Audio Transcription", "Speech-to-text conversion", "transcription"), ("Wisdom Extraction", "Key insights extraction", "wisdom_extraction"), - ("Research Enrichment", "Supporting links & context", "research_enrichment"), + ("Research Enrichment", "Supporting links & context", "research_enrichment"), ("Outline Generation", "Content structure creation", "outline_creation"), ("Article Creation", "Full article generation", "article_creation"), ("Social Media Posts", "Platform-optimized content", "social_content"), ("Image Prompts", "Visual concept generation", "image_prompts"), ("Database Storage", "Secure content storage", "database_storage") ] - + results = controller.get_results() errors = controller.get_errors() if hasattr(controller, 'get_errors') else {} - + # 🧠 VISIBLE THINKING INTEGRATION - Show AI thought bubbles during processing if controller.is_active and st.session_state.get("thinking_enabled", True): # Create dedicated container for thinking bubbles @@ -827,7 +833,7 @@ def show_enhanced_streaming_status():
- + """, unsafe_allow_html=True) - + # Render the actual thinking stream try: render_thinking_stream(thinking_container) except Exception as e: st.info(f"πŸ’­ AI is thinking... (thinking system loading)") - + # Main processing status container with st.status() if controller.is_active: current_title, current_desc, current_key = pipeline_steps[current_step] - + with st.status(f"πŸ”„ {current_title}", expanded=True) as status: st.write(f"πŸ“ **{current_desc}**") - + # Progress bar progress = (current_step / len(pipeline_steps)) * 100 st.progress(progress / 100, text=f"Progress: {progress:.0f}% ({current_step + 1}/{len(pipeline_steps)})") - + # Show previous completed steps with content preview for i in range(current_step): title, _, step_key = pipeline_steps[i] if step_key in results: st.write(f"βœ… {title} - Complete") # Show brief preview of generated content - if step_key in results and results[step_key] and step_key not in ["upload_validation", "database_storage"]: - preview = str(results[step_key])[:100] + "..." if len(str(results[step_key])) > 100 else str(results[step_key]) + if step_key in results and results[step_key] and step_key not in [ + "upload_validation", "database_storage"]: + preview = str(results[step_key])[:100] + \ + "..." if len(str(results[step_key])) > 100 else str(results[step_key]) st.caption(f"Preview: {preview}") elif step_key in errors: st.write(f"❌ {title} - Error: {errors[step_key]}") else: st.write(f"βœ… {title} - Complete") - + # Current step with enhanced styling st.markdown(f"""
= len(pipeline_steps) - 1: status.update(label="βœ… Processing Complete!", state="complete", expanded=False) else: status.update(label=f"πŸ”„ {current_title}", state="running") - + elif controller.is_complete: # Completion status with beautiful summary with st.status("βœ… All processing complete!", state="complete", expanded=False): st.success("Your audio has been transformed into comprehensive content!") - + # Enhanced completion summary st.markdown("""

🌟 Generation Summary

- + """, unsafe_allow_html=True) - + col1, col2, col3 = st.columns(3) - + with col1: completed_count = len([r for r in results.values() if r]) st.metric("Steps Completed", completed_count, len(pipeline_steps)) @@ -974,22 +982,30 @@ def show_enhanced_streaming_status(): with col3: success_rate = ((completed_count - error_count) / len(pipeline_steps)) * 100 st.metric("Success Rate", f"{success_rate:.1f}%") - + # Show content type breakdown if results: st.markdown("**Generated Content Types:**") content_types = [] - if results.get('transcription'): content_types.append("πŸ“ Transcription") - if results.get('wisdom_extraction'): content_types.append("πŸ’Ž Insights") - if results.get('research_enrichment'): content_types.append("πŸ” Research") - if results.get('outline_creation'): content_types.append("πŸ“‹ Outline") - if results.get('article_creation'): content_types.append("πŸ“° Article") - if results.get('social_content'): content_types.append("πŸ“± Social Posts") - if results.get('image_prompts'): content_types.append("πŸ–ΌοΈ Image Prompts") - + if results.get('transcription'): + content_types.append("πŸ“ Transcription") + if results.get('wisdom_extraction'): + content_types.append("πŸ’Ž Insights") + if results.get('research_enrichment'): + content_types.append("πŸ” Research") + if results.get('outline_creation'): + content_types.append("πŸ“‹ Outline") + if results.get('article_creation'): + content_types.append("πŸ“° Article") + if results.get('social_content'): + content_types.append("πŸ“± Social Posts") + if results.get('image_prompts'): + content_types.append("πŸ–ΌοΈ Image Prompts") + if content_types: st.write(" β€’ ".join(content_types)) + def show_processing_status(): """Display ultra-modern Aurora pipeline with real-time visibility - WRAPPER""" - show_enhanced_streaming_status() # Use the new enhanced version \ No newline at end of file + show_enhanced_streaming_status() # Use the new enhanced version diff --git a/core/streamlit_monitoring.py b/core/streamlit_monitoring.py index 9500aeb..b2381e5 100644 --- a/core/streamlit_monitoring.py +++ b/core/streamlit_monitoring.py @@ -43,4 +43,3 @@ def wrapper(*args, **kwargs): return wrapper return decorator - diff --git a/core/styling.py b/core/styling.py index 91b1c4e..0f724ff 100644 --- a/core/styling.py +++ b/core/styling.py @@ -5,21 +5,22 @@ import streamlit as st + def apply_aurora_theme(): """Apply the complete Aurora theme by loading our comprehensive CSS file""" # Load the comprehensive Aurora CSS file css_file_path = "static/css/main.css" - + try: with open(css_file_path, 'r', encoding='utf-8') as f: css_content = f.read() - + st.markdown(f""" """, unsafe_allow_html=True) - + except FileNotFoundError: # Fallback to basic Aurora styling if file not found st.markdown(""" @@ -35,7 +36,7 @@ def apply_aurora_theme(): --aurora-text: rgba(255, 255, 255, 0.95); --aurora-glow: 0 0 20px rgba(64, 224, 208, 0.3); } - + .stApp { background: linear-gradient(135deg, var(--aurora-bg-dark) 0%, var(--aurora-bg-darker) 100%); color: var(--aurora-text); @@ -43,9 +44,10 @@ def apply_aurora_theme(): """, unsafe_allow_html=True) + def create_aurora_header(): """Create a flagship Aurora header with integrated navigation and logout - REBUILT FOR 2025""" - + # First, inject the CSS using st.markdown() st.markdown(""" """, unsafe_allow_html=True) - + # Then render the HTML structure using st.markdown() st.markdown("""
@@ -177,9 +179,10 @@ def create_aurora_header():
""", unsafe_allow_html=True) + def create_aurora_nav_buttons(): """Create integrated navigation buttons for the Aurora header""" - + # Enhanced styling for integrated nav buttons st.markdown(""" - """, unsafe_allow_html=True) - - # Tab selector - with st.container(): - st.markdown('
', unsafe_allow_html=True) - selected_tab_label = st.selectbox( - "Select Content Type:", - tab_options, - index=st.session_state[f"{tab_group_id}_active"], - key=f"tab_select_{tab_group_id}", - label_visibility="collapsed" - ) - st.markdown('
', unsafe_allow_html=True) - - # Find selected tab index - selected_index = 0 - for i, option in enumerate(tab_options): - if option == selected_tab_label: - selected_index = i - break - - # Update session state - st.session_state[f"{tab_group_id}_active"] = selected_index - - # Display selected content with Aurora styling - if 0 <= selected_index < len(tab_data): - active_tab = tab_data[selected_index] - - # Add quick actions for the active tab - col1, col2, col3, col4 = st.columns([1, 1, 1, 2]) - - with col1: - if st.button("πŸ“‹ Copy", key=f"copy_{tab_group_id}_{selected_index}", use_container_width=True): - st.code(active_tab.get('content', ''), language='text') - st.success("βœ… Content displayed above - copy with Ctrl+A, Ctrl+C") - - with col2: - content = active_tab.get('content', '') - if content: - st.download_button( - label="πŸ’Ύ Download", - data=content, - file_name=f"{active_tab['title'].lower().replace(' ', '_')}.txt", - mime="text/plain", - key=f"download_{tab_group_id}_{selected_index}", - use_container_width=True - ) - - with col3: - if st.button("πŸ“Š Stats", key=f"stats_{tab_group_id}_{selected_index}", use_container_width=True): - word_count = len(str(content).split()) - char_count = len(str(content)) - st.info(f"πŸ“Š **{active_tab['title']}**: {word_count} words, {char_count} characters") - - # Display the content using our enhanced content card - st.markdown('
', unsafe_allow_html=True) - create_enhanced_aurora_content_card( - title=active_tab['title'], - content=active_tab.get('content', ''), - content_type=active_tab.get('type', 'text'), - icon=active_tab['icon'] - ) - st.markdown('
', unsafe_allow_html=True) - - return selected_index # === MAIN APP === def show_main_app(): - """Main application interface with navigation""" - # Create navigation + """Main application interface with navigation.""" tabs = create_aurora_navigation() - # Show different pages based on selected tab - with tabs[0]: # Transform + with tabs[0]: show_transform_page() - - with tabs[1]: # Content Library + with tabs[1]: show_content_library() - - with tabs[2]: # Settings + with tabs[2]: show_settings_page() - - with tabs[3]: # Knowledge Base + with tabs[3]: show_knowledge_base() - - with tabs[4]: # Prompts + with tabs[4]: show_prompts_page() -# === ENTRY POINT === - def main(): - """Application entry point""" + """Application entry point.""" init_session() if st.session_state.authenticated: diff --git a/core/content_display.py b/core/content_display.py new file mode 100644 index 0000000..ed1cc93 --- /dev/null +++ b/core/content_display.py @@ -0,0 +1,410 @@ +""" +Content Display Components +=========================== + +Aurora-styled content cards, tab interfaces, and result views for generated content. +""" + +import html as html_mod +import uuid +from datetime import datetime + +import streamlit as st + +from .export import create_text_export, export_to_markdown, export_to_word + + +def create_enhanced_aurora_content_card(title, content, content_type="text", icon="\U0001f4c4"): + """Create an Aurora content card with copy/download/expand functionality.""" + card_id = f"card_{uuid.uuid4().hex[:8]}" + copy_btn_id = f"copy_{uuid.uuid4().hex[:8]}" + expand_btn_id = f"expand_{uuid.uuid4().hex[:8]}" + full_content_id = f"full_{uuid.uuid4().hex[:8]}" + + word_count = len(content.split()) if content else 0 + char_count = len(content) if content else 0 + + preview_length = 300 + needs_expansion = len(content) > preview_length + preview_content = content[:preview_length] + "..." if needs_expansion else content + + type_class = content_type.lower() + + # Escape user-controlled content for safe HTML embedding + safe_title = html_mod.escape(title) + safe_icon = html_mod.escape(icon) + escaped_content = html_mod.escape(content) + escaped_preview = html_mod.escape(preview_content) + js_safe_content = content.replace("\\", "\\\\").replace("`", "\\`").replace("${", "\\${") + + card_html = f""" +
+
+

+ {safe_icon} + {safe_title} +

+
+ + +
+
+ +
+
+ {escaped_preview} +
+ + { + f''' +
+ {html_mod.escape(content[preview_length:])} +
+ ''' + if needs_expansion + else "" + } + +
+
+ \U0001f4ca + {word_count} words \u2022 {char_count} characters +
+ + { + f''' + + ''' + if needs_expansion + else "" + } +
+
+ + + +
+ + + """ + + st.markdown(card_html, unsafe_allow_html=True) + + +def create_aurora_tabs(tab_data, default_tab=0): + """Create Aurora-styled tab interface for content selection.""" + tab_group_id = f"tabs_{uuid.uuid4().hex[:8]}" + + if f"{tab_group_id}_active" not in st.session_state: + st.session_state[f"{tab_group_id}_active"] = default_tab + + tab_stats = [] + for tab in tab_data: + content = tab.get("content", "") + word_count = len(str(content).split()) if content else 0 + tab_stats.append(word_count) + + st.markdown( + """ +
+
+

+ \U0001f4c4 + Generated Content +

+
+
+ """, + unsafe_allow_html=True, + ) + + tab_options = [] + for i, tab in enumerate(tab_data): + word_count = tab_stats[i] + tab_options.append(f"{tab['icon']} {tab['title']} ({word_count} words)") + + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + with st.container(): + st.markdown('
', unsafe_allow_html=True) + selected_tab_label = st.selectbox( + "Select Content Type:", + tab_options, + index=st.session_state[f"{tab_group_id}_active"], + key=f"tab_select_{tab_group_id}", + label_visibility="collapsed", + ) + st.markdown("
", unsafe_allow_html=True) + + selected_index = 0 + for i, option in enumerate(tab_options): + if option == selected_tab_label: + selected_index = i + break + + st.session_state[f"{tab_group_id}_active"] = selected_index + + if 0 <= selected_index < len(tab_data): + active_tab = tab_data[selected_index] + + col1, col2, col3, _col4 = st.columns([1, 1, 1, 2]) + + with col1: + if st.button("\U0001f4cb Copy", key=f"copy_{tab_group_id}_{selected_index}", use_container_width=True): + st.code(active_tab.get("content", ""), language="text") + st.success("Content displayed above - copy with Ctrl+A, Ctrl+C") + + with col2: + content = active_tab.get("content", "") + if content: + st.download_button( + label="\U0001f4be Download", + data=content, + file_name=f"{active_tab['title'].lower().replace(' ', '_')}.txt", + mime="text/plain", + key=f"download_{tab_group_id}_{selected_index}", + use_container_width=True, + ) + + with col3: + if st.button("\U0001f4ca Stats", key=f"stats_{tab_group_id}_{selected_index}", use_container_width=True): + word_count = len(str(content).split()) + char_count = len(str(content)) + st.info(f"\U0001f4ca **{active_tab['title']}**: {word_count} words, {char_count} characters") + + st.markdown('
', unsafe_allow_html=True) + create_enhanced_aurora_content_card( + title=active_tab["title"], + content=active_tab.get("content", ""), + content_type=active_tab.get("type", "text"), + icon=active_tab["icon"], + ) + st.markdown("
", unsafe_allow_html=True) + + return selected_index + + +def show_results(results): + """Display generated content with Aurora styling and export options.""" + if not results: + return + + st.markdown( + """ +
+

Content Generated Successfully!

+

Your audio has been transformed with AI magic

+
+ """, + unsafe_allow_html=True, + ) + + notion_url = results.get("notion_url", "") + if notion_url and notion_url.startswith("https://"): + safe_url = html_mod.escape(notion_url) + st.markdown( + f""" + + """, + unsafe_allow_html=True, + ) + st.markdown("---") + + # Content overview stats + total_words = sum( + len(str(results.get(key, "")).split()) + for key in ["transcript", "wisdom", "outline", "article", "social_content"] + ) + content_types = len([k for k in ["transcript", "wisdom", "outline", "article", "social_content"] if results.get(k)]) + + st.markdown( + f""" +
+
+
+ {total_words:,} + Total Words +
+
+ {content_types} + Content Types +
+
+
+ """, + unsafe_allow_html=True, + ) + + # Result sections + result_sections = [ + ("transcript", "Transcript", results.get("transcript", "")), + ("wisdom", "Wisdom", results.get("wisdom", "")), + ("article", "Article", results.get("article", "")), + ("social_content", "Social Content", results.get("social_content", "")), + ] + + has_content = False + for key, title, content in result_sections: + if not content: + continue + has_content = True + st.markdown(f'
', unsafe_allow_html=True) + word_count = len(str(content).split()) + with st.expander(f"{title} ({word_count} words)", expanded=False): + col1, col2, _col3 = st.columns([1, 1, 3]) + with col1: + if st.button("Copy", key=f"copy_{key}", use_container_width=True): + st.code(content, language="text") + st.success("Content displayed above - copy with Ctrl+A, Ctrl+C") + with col2: + st.download_button( + label="Download", + data=content, + file_name=f"{key}.txt", + mime="text/plain", + key=f"download_{key}", + use_container_width=True, + ) + st.markdown(content) + + if not has_content: + st.warning("No content available to display.") + + # Export section + st.markdown("---") + st.markdown( + """ +
+

Additional Export Options

+
+ """, + unsafe_allow_html=True, + ) + + col1, col2, col3 = st.columns(3) + + with col1: + if st.button("Export as Text", use_container_width=True): + export_content = create_text_export(results) + st.download_button( + label="Download Text File", + data=export_content, + file_name=f"whisperforge_content_{datetime.now().strftime('%Y%m%d_%H%M')}.txt", + mime="text/plain", + key="download_text_export", + use_container_width=True, + ) + + with col2: + if st.button("Export as Markdown", use_container_width=True): + md_content = export_to_markdown(results) + st.download_button( + label="Download Markdown", + data=md_content, + file_name=f"whisperforge_content_{datetime.now().strftime('%Y%m%d_%H%M')}.md", + mime="text/markdown", + key="download_md_export", + use_container_width=True, + ) + + with col3: + try: + if st.button("Export as Word", use_container_width=True): + word_bytes = export_to_word(results) + st.download_button( + label="Download Word", + data=word_bytes, + file_name=f"whisperforge_content_{datetime.now().strftime('%Y%m%d_%H%M')}.docx", + mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document", + key="download_word_export", + use_container_width=True, + ) + except ImportError: + st.info("Install python-docx for Word export") diff --git a/core/export.py b/core/export.py new file mode 100644 index 0000000..47979a1 --- /dev/null +++ b/core/export.py @@ -0,0 +1,95 @@ +""" +Content Export Utilities +======================== + +Export generated content to various file formats (Text, Markdown, Word, PDF). +""" + +from datetime import datetime +from io import BytesIO + + +def create_text_export(results: dict) -> str: + """Create a formatted plain-text export of all content.""" + lines = [ + "=" * 60, + "WHISPERFORGE CONTENT EXPORT", + f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", + "=" * 60, + "", + ] + + sections = [ + ("AUDIO TRANSCRIPT", results.get("transcript", "")), + ("EXTRACTED WISDOM", results.get("wisdom", "")), + ("CONTENT OUTLINE", results.get("outline", "")), + ("FULL ARTICLE", results.get("article", "")), + ("SOCIAL MEDIA CONTENT", results.get("social_content", "")), + ] + + for title, content in sections: + if content: + lines.extend([f"## {title}", "-" * 40, content, "", ""]) + + if results.get("notion_url"): + lines.extend(["## NOTION LINK", "-" * 40, results["notion_url"], ""]) + + return "\n".join(lines) + + +def export_to_markdown(results: dict) -> str: + """Export results to Markdown format.""" + lines = ["# WhisperForge Content Export"] + sections = [ + ("## Transcript", results.get("transcript", "")), + ("## Wisdom", results.get("wisdom", "")), + ("## Outline", results.get("outline", "")), + ("## Article", results.get("article", "")), + ("## Social Content", results.get("social_content", "")), + ] + for title, content in sections: + if content: + lines.extend([title, "", content, ""]) + if results.get("notion_url"): + lines.extend(["## Notion Link", results["notion_url"]]) + return "\n".join(lines) + + +def export_to_word(results: dict) -> bytes: + """Export results to a Word document (.docx).""" + from docx import Document + + doc = Document() + doc.add_heading("WhisperForge Content Export", level=1) + for title, content in [ + ("Transcript", results.get("transcript", "")), + ("Wisdom", results.get("wisdom", "")), + ("Outline", results.get("outline", "")), + ("Article", results.get("article", "")), + ("Social Content", results.get("social_content", "")), + ]: + if content: + doc.add_heading(title, level=2) + doc.add_paragraph(content) + if results.get("notion_url"): + doc.add_heading("Notion Link", level=2) + doc.add_paragraph(results["notion_url"]) + + bio = BytesIO() + doc.save(bio) + bio.seek(0) + return bio.read() + + +def export_to_pdf(results: dict) -> bytes: + """Export results to a PDF file.""" + from fpdf import FPDF + + pdf = FPDF() + pdf.add_page() + pdf.set_auto_page_break(auto=True, margin=15) + pdf.set_font("Arial", size=12) + text = create_text_export(results) + for line in text.split("\n"): + pdf.cell(0, 10, txt=line, ln=1) + return pdf.output(dest="S").encode("latin-1") diff --git a/core/notion_integration.py b/core/notion_integration.py new file mode 100644 index 0000000..6b38001 --- /dev/null +++ b/core/notion_integration.py @@ -0,0 +1,274 @@ +""" +Notion Integration +================== + +Create formatted Notion pages from WhisperForge content. +""" + +import logging +import os +from datetime import datetime + +import streamlit as st + +from .utils import get_openai_client + +logger = logging.getLogger(__name__) + +AI_MODEL = os.getenv("GPT_MODEL", "gpt-4o") + +# Notion API limits content blocks to 2000 chars each +_NOTION_CHUNK_SIZE = 1800 +_MAX_NOTION_BLOCKS = 50 + + +def generate_ai_title(transcript: str) -> str: + """Generate a concise AI-powered title from a transcript excerpt.""" + try: + client = get_openai_client() + if not client: + return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" + + prompt = ( + "Generate a concise, descriptive title (max 60 characters) for this audio transcript:\n\n" + f"{transcript[:500]}...\n\n" + "Title should be clear, specific, professional, and capture the main topic.\n" + "No quotes or special characters.\n\nTitle:" + ) + + response = client.chat.completions.create( + model=AI_MODEL, + messages=[{"role": "user", "content": prompt}], + max_tokens=30, + ) + title = response.choices[0].message.content + return title.strip().replace('"', "").replace("'", "")[:60] + except Exception: + return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" + + +def create_notion_page(title: str, content_data: dict[str, str]) -> str | None: + """Create a Notion page with WhisperForge content. + + Returns the page URL on success, or None on failure. + """ + try: + from notion_client import Client + except ImportError: + st.warning("Install notion-client to enable Notion publishing: pip install notion-client") + return None + + try: + api_key = os.getenv("NOTION_API_KEY") + database_id = os.getenv("NOTION_DATABASE_ID") + + if not api_key or not database_id: + st.warning("Notion not configured. Set NOTION_API_KEY and NOTION_DATABASE_ID to auto-publish.") + return None + + client = Client(auth=api_key) + children = _build_notion_children(title, content_data) + + response = client.pages.create( + parent={"database_id": database_id}, + icon={"type": "emoji", "emoji": "\U0001f30c"}, + properties={"Name": {"title": [{"text": {"content": title[:100]}}]}}, + children=children[:_MAX_NOTION_BLOCKS], + ) + + if response and "id" in response: + page_id = response["id"] + return f"https://notion.so/{page_id.replace('-', '')}" + + return None + + except Exception as e: + st.error(f"Notion publishing failed: {e!s}") + return None + + +def _build_notion_children(title: str, content_data: dict) -> list[dict]: + """Build the list of Notion block children for a page.""" + children: list[dict] = [] + + # Header + children.append( + { + "type": "heading_1", + "heading_1": { + "rich_text": [ + {"type": "text", "text": {"content": "\U0001f30c "}, "annotations": {"color": "blue"}}, + {"type": "text", "text": {"content": title}, "annotations": {"bold": True}}, + ] + }, + } + ) + + # Creation info + children.append( + { + "type": "paragraph", + "paragraph": { + "rich_text": [ + {"type": "text", "text": {"content": "Generated with "}}, + { + "type": "text", + "text": {"content": "WhisperForge Aurora"}, + "annotations": {"bold": True, "color": "blue"}, + }, + { + "type": "text", + "text": {"content": f" \u2022 {datetime.now().strftime('%B %d, %Y at %I:%M %p')}"}, + }, + ] + }, + } + ) + + children.append({"type": "divider", "divider": {}}) + + # Wisdom callout + if content_data.get("wisdom"): + children.append( + { + "type": "callout", + "callout": { + "rich_text": [ + {"type": "text", "text": {"content": "Key Insights & Wisdom"}}, + {"type": "text", "text": {"content": f"\n\n{content_data['wisdom'][:_NOTION_CHUNK_SIZE]}"}}, + ], + "color": "purple_background", + "icon": {"type": "emoji", "emoji": "\U0001f4a1"}, + }, + } + ) + + # Content sections as toggles + sections = [ + ("\U0001f4dd Transcript", content_data.get("transcript")), + ("\U0001f4a1 Wisdom", content_data.get("wisdom")), + ("\U0001f50d Research Links", content_data.get("research")), + ("\U0001f4cb Outline", content_data.get("outline")), + ("\U0001f4f0 Article", content_data.get("article")), + ("\U0001f4f1 Social Content", content_data.get("social_content")), + ] + + for section_title, section_content in sections: + if not section_content: + continue + + if section_title == "\U0001f50d Research Links" and isinstance(section_content, dict): + children.append(_build_research_toggle(section_title, section_content)) + elif isinstance(section_content, str): + chunks = [ + section_content[i : i + _NOTION_CHUNK_SIZE] for i in range(0, len(section_content), _NOTION_CHUNK_SIZE) + ] + children.append( + { + "type": "toggle", + "toggle": { + "rich_text": [{"type": "text", "text": {"content": section_title}}], + "children": [ + { + "type": "paragraph", + "paragraph": {"rich_text": [{"type": "text", "text": {"content": chunk}}]}, + } + for chunk in chunks[:5] + ], + }, + } + ) + + # Footer + children.extend( + [ + {"type": "divider", "divider": {}}, + { + "type": "callout", + "callout": { + "rich_text": [ + { + "type": "text", + "text": {"content": "Content Generation Complete"}, + "annotations": {"bold": True}, + }, + {"type": "text", "text": {"content": "\n\nAI Pipeline: completed successfully"}}, + { + "type": "text", + "text": {"content": f"\nGenerated: {datetime.now().strftime('%B %d, %Y at %I:%M %p')}"}, + }, + {"type": "text", "text": {"content": "\nPowered by WhisperForge Aurora"}}, + ], + "color": "green_background", + "icon": {"type": "emoji", "emoji": "\u2705"}, + }, + }, + ] + ) + + return children + + +def _build_research_toggle(section_title: str, section_content: dict) -> dict: + """Build a Notion toggle block for research data.""" + research_children: list[dict] = [] + entities = section_content.get("entities", []) + + if entities: + for entity in entities[:5]: + entity_name = entity.get("name", "Unknown Entity") + why_matters = entity.get("why_matters", "No description available") + links = entity.get("links", []) + + research_children.append( + { + "type": "callout", + "callout": { + "rich_text": [ + {"type": "text", "text": {"content": entity_name}, "annotations": {"bold": True}}, + {"type": "text", "text": {"content": f"\n{why_matters}"}}, + ], + "color": "blue_background", + "icon": {"type": "emoji", "emoji": "\U0001f52c"}, + }, + } + ) + + for link in links[:3]: + link_title = link.get("title", "Link") + link_desc = link.get("description", "") + is_gem = link.get("is_gem", False) + gem_icon = "\U0001f48e" if is_gem else "\U0001f517" + color = "orange" if is_gem else "default" + + research_children.append( + { + "type": "bulleted_list_item", + "bulleted_list_item": { + "rich_text": [ + {"type": "text", "text": {"content": f"{gem_icon} "}, "annotations": {"color": color}}, + {"type": "text", "text": {"content": link_title}, "annotations": {"bold": True}}, + { + "type": "text", + "text": {"content": f" - {link_desc}"}, + "annotations": {"italic": True}, + }, + ] + }, + } + ) + else: + research_children.append( + { + "type": "paragraph", + "paragraph": {"rich_text": [{"type": "text", "text": {"content": "No research entities found."}}]}, + } + ) + + return { + "type": "toggle", + "toggle": { + "rich_text": [{"type": "text", "text": {"content": section_title}}], + "children": research_children, + }, + } diff --git a/core/pages.py b/core/pages.py new file mode 100644 index 0000000..3f9c59f --- /dev/null +++ b/core/pages.py @@ -0,0 +1,288 @@ +""" +Application Pages +================= + +Secondary page functions for settings, knowledge base, and prompt management. +""" + +import os + +import streamlit as st + +from .content_display import create_enhanced_aurora_content_card +from .supabase_integration import get_supabase_client +from .utils import safe_path + + +def show_settings_page(): + """Settings and configuration page.""" + st.markdown("### Settings & Configuration") + + st.markdown("#### API Keys") + with st.expander("API Configuration", expanded=True): + col1, col2 = st.columns(2) + + with col1: + st.markdown("**OpenAI Configuration**") + openai_key = st.text_input( + "OpenAI API Key", type="password", value=os.getenv("OPENAI_API_KEY", ""), help="Your OpenAI API key" + ) + if openai_key: + os.environ["OPENAI_API_KEY"] = openai_key + st.success("OpenAI key configured") + + model_choice = st.selectbox("OpenAI Model", ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"]) + st.session_state.openai_model = model_choice + + with col2: + st.markdown("**Notion Configuration**") + notion_key = st.text_input( + "Notion API Key", + type="password", + value=os.getenv("NOTION_API_KEY", ""), + help="Your Notion integration token", + ) + if notion_key: + os.environ["NOTION_API_KEY"] = notion_key + + notion_db = st.text_input("Notion Database ID", value=os.getenv("NOTION_DATABASE_ID", "")) + if notion_db: + os.environ["NOTION_DATABASE_ID"] = notion_db + + if notion_key and notion_db: + st.success("Notion configured") + + st.markdown("#### Pipeline Configuration") + with st.expander("Processing Pipeline", expanded=True): + col1, col2 = st.columns(2) + + with col1: + st.markdown("**Core Features**") + st.session_state.auto_notion = st.checkbox( + "Auto-publish to Notion", value=st.session_state.get("auto_notion", True) + ) + st.session_state.live_stream = st.checkbox( + "Live Streaming", value=st.session_state.get("live_stream", False) + ) + st.session_state.large_file_mode = st.checkbox( + "Enhanced Large File Processing", value=st.session_state.get("large_file_mode", True) + ) + + with col2: + st.markdown("**Quality Settings**") + st.session_state.content_length = st.selectbox( + "Article Length", ["Short (500-800 words)", "Medium (800-1200 words)", "Long (1200+ words)"] + ) + st.session_state.tone_style = st.selectbox( + "Content Tone", ["Professional", "Conversational", "Academic", "Creative"] + ) + + templates = [f.replace(".md", "") for f in os.listdir("templates")] if os.path.exists("templates") else [] + if templates: + st.session_state.article_template = st.selectbox("Article Template", templates) + else: + st.session_state.article_template = None + + st.markdown("#### System Status") + with st.expander("Connection Status", expanded=False): + if st.button("Test All Connections"): + with st.spinner("Testing all connections..."): + if os.getenv("OPENAI_API_KEY"): + st.success("OpenAI API key configured") + else: + st.error("OpenAI API key missing") + + try: + db = get_supabase_client() + if db and db.test_connection(): + st.success("Supabase connected") + else: + st.error("Supabase connection failed") + except Exception as e: + st.error(f"Supabase error: {e}") + + try: + if os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): + from notion_client import Client + + client = Client(auth=os.getenv("NOTION_API_KEY")) + client.databases.retrieve(database_id=os.getenv("NOTION_DATABASE_ID")) + st.success("Notion connected") + else: + st.warning("Notion not configured") + except Exception as e: + st.error(f"Notion error: {e}") + + +def show_knowledge_base(): + """Knowledge base management page.""" + st.markdown("### Knowledge Base") + + kb_path = "prompts/default/knowledge_base" + + st.markdown( + "The knowledge base provides context and expertise to enhance content generation. " + "Add domain-specific information, style guides, and reference materials here." + ) + + tabs = st.tabs(["View Knowledge", "Add Knowledge", "Manage Files"]) + + with tabs[0]: + st.markdown("#### Current Knowledge Base") + try: + if os.path.exists(kb_path): + files = [f for f in os.listdir(kb_path) if f.endswith(".md")] + if files: + selected_file = st.selectbox("Select knowledge file:", files) + if selected_file: + file_path = safe_path(kb_path, selected_file) + with open(file_path) as f: + content = f.read() + st.markdown(f"**File:** `{selected_file}`") + create_enhanced_aurora_content_card("Knowledge Content", content, "text", "\U0001f4d6") + else: + st.info("No knowledge files found") + else: + st.info("Knowledge base directory not found") + except Exception as e: + st.error(f"Error reading knowledge base: {e}") + + with tabs[1]: + st.markdown("#### Add New Knowledge") + + col1, col2 = st.columns([2, 1]) + with col1: + kb_title = st.text_input("Knowledge Title", placeholder="e.g., 'Marketing Guidelines'") + with col2: + kb_category = st.selectbox("Category", ["General", "Style Guide", "Domain Expertise", "Templates"]) + + kb_content = st.text_area("Knowledge Content", placeholder="Enter your knowledge content here...", height=300) + + if st.button("Save Knowledge", type="primary"): + if kb_title and kb_content: + if len(kb_title) > 100: + st.error("Title must be 100 characters or fewer.") + else: + try: + os.makedirs(kb_path, exist_ok=True) + filename = f"{kb_title.lower().replace(' ', '_')}.md" + file_path = safe_path(kb_path, filename) + + with open(file_path, "w") as f: + f.write(f"# {kb_title}\n\n") + f.write(f"**Category:** {kb_category}\n\n") + f.write(kb_content) + + st.success(f"Knowledge saved as `{filename}`") + except ValueError as e: + st.error(f"Invalid filename: {e}") + except Exception as e: + st.error(f"Error saving knowledge: {e}") + else: + st.error("Please provide both title and content") + + with tabs[2]: + st.markdown("#### Manage Knowledge Files") + try: + if os.path.exists(kb_path): + files = [f for f in os.listdir(kb_path) if f.endswith(".md")] + if files: + for file in files: + col1, col2 = st.columns([3, 1]) + with col1: + st.markdown(f"`{file}`") + with col2: + if st.button("Delete", key=f"delete_{file}"): + try: + validated_path = safe_path(kb_path, file) + os.remove(validated_path) + st.success(f"Deleted `{file}`") + st.rerun() + except (ValueError, Exception) as e: + st.error(f"Error deleting file: {e}") + else: + st.info("No knowledge files found") + else: + st.info("Knowledge base directory not found") + except Exception as e: + st.error(f"Error managing files: {e}") + + +def show_prompts_page(): + """Prompts management page.""" + st.markdown("### Prompt Customization") + st.markdown("Customize the AI prompts used in each step of the content generation pipeline.") + + prompt_types = { + "wisdom": "Wisdom Extraction", + "outline": "Content Outline", + "article": "Article Generation", + "social": "Social Media Posts", + } + + file_mapping = { + "wisdom": "wisdom_extraction.md", + "outline": "outline_creation.md", + "article": "article_generation.md", + "social": "social_media.md", + } + + prompt_tabs = st.tabs(list(prompt_types.values()) + ["Advanced"]) + + for i, (prompt_key, prompt_name) in enumerate(prompt_types.items()): + with prompt_tabs[i]: + st.markdown(f"#### {prompt_name}") + + prompt_file = f"prompts/default/{file_mapping[prompt_key]}" + current_prompt = "" + + try: + if os.path.exists(prompt_file): + with open(prompt_file) as f: + current_prompt = f.read() + else: + current_prompt = f"# {prompt_name} Prompt\n\nDefault prompt for {prompt_key} generation." + except Exception as e: + st.error(f"Error loading prompt: {e}") + + new_prompt = st.text_area( + f"Edit {prompt_name} Prompt", + value=current_prompt, + height=400, + help=f"Customize the prompt used for {prompt_key} generation", + ) + + col1, col2, col3 = st.columns([1, 1, 2]) + with col1: + if st.button("Save", key=f"save_{prompt_key}"): + try: + os.makedirs("prompts/default", exist_ok=True) + with open(prompt_file, "w") as f: + f.write(new_prompt) + st.success(f"{prompt_name} prompt saved!") + except Exception as e: + st.error(f"Error saving prompt: {e}") + + with col2: + if st.button("Reset", key=f"reset_{prompt_key}"): + st.info("Reset to default functionality coming soon!") + + with col3: + st.markdown(f"**File:** `{prompt_file}`") + + with prompt_tabs[-1]: + st.markdown("#### Advanced Prompt Settings") + col1, col2 = st.columns(2) + with col1: + st.markdown("**Global Settings**") + temperature = st.slider("Temperature (Creativity)", 0.0, 1.0, 0.7, 0.1) + max_tokens = st.number_input("Max Tokens", 100, 4000, 2000) + with col2: + st.markdown("**Prompt Templates**") + if st.button("Import Prompt Set"): + st.info("Import functionality coming soon!") + if st.button("Export Prompt Set"): + st.info("Export functionality coming soon!") + + st.session_state.temperature = temperature + st.session_state.max_tokens = max_tokens diff --git a/core/pipeline.py b/core/pipeline.py new file mode 100644 index 0000000..57bd354 --- /dev/null +++ b/core/pipeline.py @@ -0,0 +1,336 @@ +""" +Content Processing Pipeline +============================= + +Core audio-to-content pipeline with Aurora visualization. +Handles transcription, wisdom extraction, outline, article, social, and Notion publishing. +""" + +import logging +import os +import tempfile +import time +from datetime import datetime + +import streamlit as st + +from .content_generation import ( + generate_article, + generate_outline, + generate_social_content, + generate_wisdom, + transcribe_audio, +) +from .notion_integration import create_notion_page, generate_ai_title +from .prompt_loader import get_prompt_for_step, load_custom_prompts, load_template +from .supabase_integration import get_supabase_client + +logger = logging.getLogger(__name__) + +# Pipeline step definitions +PIPELINE_STEP_NAMES = [ + "Transcription", + "Wisdom Extraction", + "Outline Creation", + "Article Generation", + "Social Content", + "Publishing", +] + + +def process_pipeline(audio_file=None, transcript: str | None = None) -> dict | None: + """Unified content pipeline. + + Supply *audio_file* to transcribe first, or *transcript* to skip transcription. + Returns the results dict on success, or None on failure. + """ + results: dict = {} + start_time = time.time() + + custom_prompts = load_custom_prompts() + if custom_prompts: + st.info(f"Using {len(custom_prompts)} custom prompts") + + pipeline_placeholder = st.empty() + st.markdown("### Live Content Generation") + + transcript_container = st.expander("Transcription", expanded=False) + wisdom_container = st.expander("Wisdom Extraction", expanded=False) + outline_container = st.expander("Outline Creation", expanded=False) + article_container = st.expander("Article Generation", expanded=False) + social_container = st.expander("Social Content", expanded=False) + notion_container = st.expander("Notion Publishing", expanded=False) + + try: + # Step 0: Transcription (skipped when transcript is provided) + if audio_file is not None: + _update_pipeline(pipeline_placeholder, 0, 0, 0, "Starting transcription...", start_time) + + suffix = os.path.splitext(audio_file.name)[1] + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file: + tmp_file.write(audio_file.getvalue()) + tmp_file_path = tmp_file.name + + try: + _update_pipeline(pipeline_placeholder, 0, 50, 8, "Transcribing audio with Whisper AI...", start_time) + transcript = transcribe_audio(tmp_file_path) + if not transcript: + st.error("Transcription failed: empty result") + return None + finally: + if os.path.exists(tmp_file_path): + os.unlink(tmp_file_path) + + results["transcript"] = transcript + with transcript_container: + st.markdown("**Transcription Complete**") + st.text_area("Transcript", transcript, height=200, disabled=True) + + _update_pipeline(pipeline_placeholder, 0, 100, 17, "Transcription complete!", start_time) + else: + # Pre-transcribed content + if not transcript: + st.error("No audio file or transcript provided.") + return None + results["transcript"] = transcript + _update_pipeline( + pipeline_placeholder, + 1, + 0, + 17, + f"Using pre-transcribed content ({len(transcript)} characters)", + start_time, + ) + + # Steps 1-5: Generation, publishing, saving + _run_generation_steps( + transcript, + results, + custom_prompts, + pipeline_placeholder, + start_time, + wisdom_container, + outline_container, + article_container, + social_container, + notion_container, + ) + + return results + + except Exception as e: + _update_pipeline(pipeline_placeholder, 0, 0, 0, f"Error: {e!s}", start_time) + st.error(f"Pipeline failed: {e!s}") + return None + + +def _run_generation_steps( + transcript, + results, + custom_prompts, + pipeline_placeholder, + start_time, + wisdom_container, + outline_container, + article_container, + social_container, + notion_container, +): + """Execute the generation steps (wisdom through publishing).""" + # --- Wisdom Extraction --- + _update_pipeline(pipeline_placeholder, 1, 0, 17, "Extracting wisdom and insights...", start_time) + wisdom_prompt = get_prompt_for_step("wisdom", custom_prompts) + _update_pipeline(pipeline_placeholder, 1, 50, 25, "Analyzing content for key insights...", start_time) + + wisdom = generate_wisdom(transcript, custom_prompt=wisdom_prompt, knowledge_base={}) + results["wisdom"] = wisdom + + with wisdom_container: + st.markdown("**Wisdom Extraction Complete**") + st.markdown(wisdom) + _update_pipeline(pipeline_placeholder, 1, 100, 33, "Wisdom extraction complete!", start_time) + + # --- Outline Creation --- + _update_pipeline(pipeline_placeholder, 2, 0, 33, "Creating structured outline...", start_time) + outline_prompt = get_prompt_for_step("outline", custom_prompts) + _update_pipeline(pipeline_placeholder, 2, 50, 42, "Structuring content hierarchy...", start_time) + + outline = generate_outline(transcript, wisdom, custom_prompt=outline_prompt, knowledge_base={}) + results["outline"] = outline + + with outline_container: + st.markdown("**Outline Creation Complete**") + st.markdown(outline) + _update_pipeline(pipeline_placeholder, 2, 100, 50, "Outline creation complete!", start_time) + + # --- Article Generation --- + _update_pipeline(pipeline_placeholder, 3, 0, 50, "Generating comprehensive article...", start_time) + article_prompt = get_prompt_for_step("article", custom_prompts) + selected_template = st.session_state.get("article_template") + if selected_template: + template_text = load_template(selected_template) + if template_text and article_prompt: + article_prompt = template_text + "\n" + article_prompt + + _update_pipeline(pipeline_placeholder, 3, 50, 58, "Writing detailed article content...", start_time) + + article = generate_article(transcript, wisdom, outline, custom_prompt=article_prompt, knowledge_base={}) + results["article"] = article + + with article_container: + st.markdown("**Article Generation Complete**") + st.markdown(article) + _update_pipeline(pipeline_placeholder, 3, 100, 67, "Article generation complete!", start_time) + + # --- Social Content --- + _update_pipeline(pipeline_placeholder, 4, 0, 67, "Creating social media content...", start_time) + social_prompt = get_prompt_for_step("social", custom_prompts) + _update_pipeline(pipeline_placeholder, 4, 50, 75, "Generating social media posts...", start_time) + + social = generate_social_content(wisdom, outline, article, custom_prompt=social_prompt, knowledge_base={}) + results["social_content"] = social + + with social_container: + st.markdown("**Social Content Creation Complete**") + st.markdown(social) + _update_pipeline(pipeline_placeholder, 4, 100, 83, "Social content creation complete!", start_time) + + # --- Notion Publishing --- + _update_pipeline(pipeline_placeholder, 5, 0, 83, "Publishing to Notion workspace...", start_time) + + if os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): + ai_title = generate_ai_title(transcript) + _update_pipeline(pipeline_placeholder, 5, 50, 90, "Uploading content to Notion...", start_time) + + notion_url = create_notion_page(ai_title, results) + if notion_url: + results["notion_url"] = notion_url + with notion_container: + st.markdown("**Notion Publishing Complete**") + st.markdown(f"**Page Title:** {ai_title}") + st.markdown(f"[Open in Notion]({notion_url})") + else: + with notion_container: + st.markdown("**Notion Publishing Failed**") + st.warning("Check your Notion API configuration in Settings.") + else: + with notion_container: + st.markdown("**Notion Publishing Disabled**") + st.info("Configure Notion API in Settings to enable auto-publishing.") + + # --- Save to DB --- + _update_pipeline(pipeline_placeholder, 5, 90, 96, "Saving to database...", start_time) + try: + save_content_to_db(results) + except Exception as e: + st.warning(f"Content saved locally but database save failed: {e}") + + _update_pipeline( + pipeline_placeholder, 5, 100, 100, "Pipeline complete! All content generated successfully.", start_time + ) + + st.markdown( + """ +
+

Pipeline Complete!

+

Your content has been transformed with AI magic

+
+ """, + unsafe_allow_html=True, + ) + + time.sleep(2) + pipeline_placeholder.empty() + + +def _update_pipeline(placeholder, step, step_progress, total_progress, message, start_time): + """Update the pipeline visualization placeholder.""" + with placeholder.container(): + show_processing_pipeline( + current_step=step, + step_progress=step_progress, + total_progress=total_progress, + status_message=message, + processing_time=f"{time.time() - start_time:.1f}s", + ) + + +def save_content_to_db(content_data: dict): + """Save generated content to Supabase database.""" + try: + supabase = get_supabase_client() + if supabase and st.session_state.get("user_id"): + content_id = supabase.save_content( + st.session_state.user_id, + { + "title": content_data.get("title", "Untitled"), + "transcript": content_data.get("transcript", ""), + "wisdom": content_data.get("wisdom", ""), + "outline": content_data.get("outline", ""), + "article": content_data.get("article", ""), + "social_content": content_data.get("social_content", ""), + "notion_url": content_data.get("notion_url", ""), + "created_at": datetime.now().isoformat(), + }, + ) + if content_id: + st.success(f"Content saved to database (ID: {content_id})") + except Exception as e: + st.warning(f"Database save failed: {e}") + + +def show_processing_pipeline( + current_step=0, + step_progress=0, + total_progress=0, + status_message="", + processing_time="", +): + """Display Aurora-styled processing pipeline visualization.""" + steps = PIPELINE_STEP_NAMES + step_icons = ["\U0001f3a4", "\U0001f4a1", "\U0001f4cb", "\U0001f4dd", "\U0001f4f1", "\U0001f30c"] + + step_items = [] + for i, (name, icon) in enumerate(zip(steps, step_icons, strict=False)): + if i < current_step: + state = "completed" + elif i == current_step: + state = "active" + else: + state = "pending" + + step_items.append(f""" +
+
{icon}
+
{name}
+ {'
' if state == "active" else ""} +
+ """) + + pipeline_html = f""" +
+
+

Content Pipeline

+
+
+ {total_progress}% +
+
+
+ {"".join(step_items)} +
+ { + f''' +
+ + {status_message} + {processing_time} +
+ ''' + if status_message + else "" + } +
+ """ + + st.markdown(pipeline_html, unsafe_allow_html=True) diff --git a/core/prompt_loader.py b/core/prompt_loader.py new file mode 100644 index 0000000..7011b40 --- /dev/null +++ b/core/prompt_loader.py @@ -0,0 +1,61 @@ +""" +Prompt Loading Utilities +======================== + +Load and manage AI prompts from the filesystem. +""" + +import logging +import os + +import streamlit as st + +logger = logging.getLogger(__name__) + + +def load_custom_prompts() -> dict[str, str]: + """Load custom prompts from the prompts/default directory.""" + prompts = {} + prompt_dir = "prompts/default" + + if os.path.exists(prompt_dir): + for filename in os.listdir(prompt_dir): + if filename.endswith(".md"): + prompt_name = filename.replace(".md", "") + try: + with open(os.path.join(prompt_dir, filename), encoding="utf-8") as f: + prompts[prompt_name] = f.read() + except Exception as e: + st.warning(f"Failed to load prompt {filename}: {e}") + + return prompts + + +def load_template(template_name: str) -> str | None: + """Load an article template by name from the templates folder.""" + template_path = os.path.join("templates", f"{template_name}.md") + if os.path.exists(template_path): + with open(template_path, encoding="utf-8") as f: + return f.read() + return None + + +# Maps pipeline step names to prompt file basenames +_STEP_PROMPT_MAP = { + "wisdom": "wisdom_extraction", + "outline": "outline_creation", + "social": "social_media", + "article": "article_generation", +} + + +def get_prompt_for_step(step_name: str, custom_prompts: dict[str, str] | None = None) -> str | None: + """Get the appropriate prompt for a pipeline step.""" + if not custom_prompts: + custom_prompts = load_custom_prompts() + + prompt_key = _STEP_PROMPT_MAP.get(step_name) + if prompt_key and prompt_key in custom_prompts: + return custom_prompts[prompt_key] + + return None From 1f9f59f12a114734e1fbf42e9368c78e1b7064ee Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:03:54 -0800 Subject: [PATCH 24/46] refactor(upload): decompose file_upload.py and extract CSS Split the 1,225-line file_upload.py into focused modules: - core/file_upload.py (404 lines): FileUploadManager + progress UI - core/large_file_processor.py (404 lines): EnhancedLargeFileProcessor - static/css/upload.css (315 lines): all upload zone CSS CSS loaded from static file at runtime instead of inline strings. EnhancedLargeFileProcessor re-exported from file_upload for backward compatibility. Co-Authored-By: Claude Opus 4.6 --- core/file_upload.py | 1079 ++++------------------------------ core/large_file_processor.py | 433 ++++++++++++++ static/css/upload.css | 315 ++++++++++ 3 files changed, 878 insertions(+), 949 deletions(-) create mode 100644 core/large_file_processor.py create mode 100644 static/css/upload.css diff --git a/core/file_upload.py b/core/file_upload.py index 54f84aa..8c6eeda 100644 --- a/core/file_upload.py +++ b/core/file_upload.py @@ -1,47 +1,61 @@ """ -Enhanced File Upload Module for WhisperForge v3.0.0 -Supports large file processing up to 2GB with intelligent chunking and parallel transcription +File Upload Module +================== + +Upload zone UI, progress indicators, and pydub-based chunked processing +for standard audio files. For FFmpeg-based large file processing (2GB+), +see large_file_processor.py. """ -import asyncio import logging import math -import mimetypes import os import tempfile -import threading import time from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Optional, List, Dict, Any, Tuple import streamlit as st -# Configure logging +from .large_file_processor import EnhancedLargeFileProcessor # noqa: F401 - re-export + logger = logging.getLogger(__name__) +_UPLOAD_CSS_CACHE = None + + +def _load_upload_css(): + """Load upload CSS from static file (cached).""" + global _UPLOAD_CSS_CACHE # noqa: PLW0603 + if _UPLOAD_CSS_CACHE is None: + css_path = os.path.join("static", "css", "upload.css") + if os.path.exists(css_path): + with open(css_path, encoding="utf-8") as f: + _UPLOAD_CSS_CACHE = f"" + else: + _UPLOAD_CSS_CACHE = "" + return _UPLOAD_CSS_CACHE + class FileUploadManager: - """πŸš€ ENHANCED: Large file upload manager with chunking and parallel processing""" + """Large file upload manager with chunking and parallel processing.""" def __init__(self): self.supported_formats = { - 'audio': ['.mp3', '.wav', '.m4a', '.aac', '.ogg', '.flac', '.wma', '.webm', '.mpeg', '.mpga', '.oga'], - 'video': ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm'], - 'text': ['.txt', '.md', '.pdf', '.docx'] + "audio": [".mp3", ".wav", ".m4a", ".aac", ".ogg", ".flac", ".wma", ".webm", ".mpeg", ".mpga", ".oga"], + "video": [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm"], + "text": [".txt", ".md", ".pdf", ".docx"], } self.max_file_size = 2 * 1024 * 1024 * 1024 # 2GB - self.chunk_size_mb = 20 # 20MB chunks for optimal processing - self.max_parallel_chunks = 4 # Process 4 chunks simultaneously + self.chunk_size_mb = 20 # 20MB chunks for pydub processing + self.max_parallel_chunks = 3 - def create_large_file_upload_zone(self) -> Optional[Any]: - """Create enhanced upload zone for large files""" - - # Enhanced upload zone HTML with large file support - upload_html = f""" + def create_upload_zone(self): + """Create enhanced upload zone for large files.""" + upload_html = """
-
🎡
+
\U0001f3b5

Drop your large audio files here

@@ -54,15 +68,15 @@ def create_large_file_upload_zone(self) -> Optional[Any]:
- ⚑ + \u26a1 Parallel Processing
- πŸ“Š + \U0001f4ca Real-time Progress
- πŸ”„ + \U0001f504 Auto-retry on Errors
@@ -70,269 +84,128 @@ def create_large_file_upload_zone(self) -> Optional[Any]:
""" - # Enhanced CSS for large file upload - upload_css = """ - - """ - - st.markdown(upload_css, unsafe_allow_html=True) + st.markdown(_load_upload_css(), unsafe_allow_html=True) st.markdown(upload_html, unsafe_allow_html=True) - # File uploader with large file support uploaded_file = st.file_uploader( "Choose an audio file", - type=['mp3', 'wav', 'm4a', 'aac', 'ogg', 'flac', 'wma', 'webm', 'mpeg', 'mpga', 'oga'], + type=["mp3", "wav", "m4a", "aac", "ogg", "flac", "wma", "webm", "mpeg", "mpga", "oga"], help="Upload audio files up to 2GB. Large files will be automatically chunked for optimal processing.", - label_visibility="collapsed" + label_visibility="collapsed", ) return uploaded_file - def process_large_file(self, uploaded_file) -> Dict[str, Any]: - """πŸš€ Process large files with chunking and parallel transcription""" - + def process_large_file(self, uploaded_file) -> dict: + """Process large files with chunking and parallel transcription.""" if not uploaded_file: return {"success": False, "error": "No file provided"} - # Validate file validation = self.validate_large_file(uploaded_file) if not validation["valid"]: return {"success": False, "error": validation["error"]} file_size_mb = len(uploaded_file.getvalue()) / (1024 * 1024) - # Show file info - st.markdown(f""" - ### πŸ“ File Processing - **File:** {uploaded_file.name} - **Size:** {file_size_mb:.1f} MB - **Processing Strategy:** {"Chunked Parallel Processing" if file_size_mb > self.chunk_size_mb else "Direct Processing"} - """) + st.markdown( + f"### \U0001f4c1 File Processing\n" + f"**File:** {uploaded_file.name}\n" + f"**Size:** {file_size_mb:.1f} MB\n" + f"**Processing Strategy:** {'Chunked Parallel Processing' if file_size_mb > self.chunk_size_mb else 'Direct Processing'}" + ) if file_size_mb <= self.chunk_size_mb: - # Small file - process directly return self._process_small_file(uploaded_file) else: - # Large file - chunk and process in parallel return self._process_large_file_chunked(uploaded_file) - def _process_small_file(self, uploaded_file) -> Dict[str, Any]: - """Process small files directly without chunking""" - + def _process_small_file(self, uploaded_file) -> dict: + """Process small files directly without chunking.""" progress_container = st.empty() with progress_container.container(): - st.markdown("#### 🎡 Processing Audio") + st.markdown("#### \U0001f3b5 Processing Audio") progress_bar = st.progress(0.0, "Starting transcription...") try: - # Import transcription function from .content_generation import transcribe_audio - # Update progress progress_bar.progress(0.3, "Transcribing audio...") - - # Transcribe transcript = transcribe_audio(uploaded_file) if not transcript or "Error" in transcript: - progress_bar.progress(1.0, "❌ Transcription failed") + progress_bar.progress(1.0, "\u274c Transcription failed") return {"success": False, "error": transcript or "Transcription failed"} - progress_bar.progress(1.0, "βœ… Transcription complete!") - - return { - "success": True, - "transcript": transcript, - "chunks": 1, - "total_duration": "N/A" - } + progress_bar.progress(1.0, "\u2705 Transcription complete!") + return {"success": True, "transcript": transcript, "chunks": 1, "total_duration": "N/A"} except Exception as e: - progress_bar.progress(1.0, f"❌ Error: {str(e)}") + progress_bar.progress(1.0, f"\u274c Error: {e!s}") return {"success": False, "error": str(e)} - def _process_large_file_chunked(self, uploaded_file) -> Dict[str, Any]: - """πŸš€ Process large files with intelligent chunking and parallel transcription""" - - st.markdown("#### πŸ”„ Chunked Processing Pipeline") + def _process_large_file_chunked(self, uploaded_file) -> dict: + """Process large files with intelligent chunking and parallel transcription.""" + st.markdown("#### \U0001f504 Chunked Processing Pipeline") try: - # Step 1: Create chunks chunks_info = self._create_audio_chunks(uploaded_file) if not chunks_info["success"]: return chunks_info chunks = chunks_info["chunks"] total_chunks = len(chunks) - st.markdown(f"**Created {total_chunks} chunks for parallel processing**") - # Step 2: Create progress tracking containers progress_container = st.empty() chunks_container = st.empty() - # Step 3: Process chunks in parallel with real-time updates - transcription_results = self._transcribe_chunks_parallel( - chunks, progress_container, chunks_container - ) + transcription_results = self._transcribe_chunks_parallel(chunks, progress_container, chunks_container) if not transcription_results["success"]: return transcription_results - # Step 4: Reassemble transcript final_transcript = self._reassemble_transcript(transcription_results["chunk_transcripts"]) - - # Step 5: Cleanup temporary files self._cleanup_chunks(chunks) - # Success! with progress_container.container(): - st.success("βœ… Large file processing complete!") - st.markdown(f""" - **Processing Summary:** - - Total chunks: {total_chunks} - - Successful transcriptions: {len(transcription_results['chunk_transcripts'])} - - Final transcript length: {len(final_transcript)} characters - """) + st.success("\u2705 Large file processing complete!") + st.markdown( + f"**Processing Summary:**\n" + f"- Total chunks: {total_chunks}\n" + f"- Successful transcriptions: {len(transcription_results['chunk_transcripts'])}\n" + f"- Final transcript length: {len(final_transcript)} characters" + ) return { "success": True, "transcript": final_transcript, "chunks": total_chunks, - "processing_time": transcription_results.get("total_time", "N/A") + "processing_time": transcription_results.get("total_time", "N/A"), } except Exception as e: logger.exception("Error in large file processing:") - st.error(f"❌ Large file processing failed: {str(e)}") + st.error(f"\u274c Large file processing failed: {e!s}") return {"success": False, "error": str(e)} - def _create_audio_chunks(self, uploaded_file) -> Dict[str, Any]: - """Create audio chunks for parallel processing""" - + def _create_audio_chunks(self, uploaded_file) -> dict: + """Create audio chunks for parallel processing.""" try: - st.markdown("##### πŸ“‚ Creating Audio Chunks...") + from pydub import AudioSegment + + st.markdown("##### \U0001f4c2 Creating Audio Chunks...") - # Save uploaded file temporarily with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as temp_file: uploaded_file.seek(0) temp_file.write(uploaded_file.read()) temp_file_path = temp_file.name - # Load audio with pydub audio = AudioSegment.from_file(temp_file_path) duration_ms = len(audio) duration_minutes = duration_ms / (1000 * 60) - # Calculate chunk duration (aim for ~20MB chunks) - chunk_duration_ms = self.chunk_size_mb * 60 * 1000 # Convert MB to minutes to ms + chunk_duration_ms = self.chunk_size_mb * 60 * 1000 num_chunks = math.ceil(duration_ms / chunk_duration_ms) st.markdown(f"**Audio Duration:** {duration_minutes:.1f} minutes") @@ -345,68 +218,55 @@ def _create_audio_chunks(self, uploaded_file) -> Dict[str, Any]: start_ms = i * chunk_duration_ms end_ms = min((i + 1) * chunk_duration_ms, duration_ms) - # Extract chunk chunk = audio[start_ms:end_ms] + with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as chunk_file: + chunk.export(chunk_file.name, format="wav") + + chunks.append( + { + "index": i, + "file_path": chunk_file.name, + "start_time": start_ms / 1000, + "end_time": end_ms / 1000, + "duration": (end_ms - start_ms) / 1000, + } + ) - # Save chunk to temporary file - chunk_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") - chunk.export(chunk_file.name, format="wav") - - chunks.append({ - "index": i, - "file_path": chunk_file.name, - "start_time": start_ms / 1000, - "end_time": end_ms / 1000, - "duration": (end_ms - start_ms) / 1000 - }) - - # Update progress progress = (i + 1) / num_chunks chunk_progress.progress(progress, f"Created chunk {i + 1}/{num_chunks}") - # Cleanup original temp file os.unlink(temp_file_path) - - chunk_progress.progress(1.0, f"βœ… Created {num_chunks} chunks successfully!") + chunk_progress.progress(1.0, f"\u2705 Created {num_chunks} chunks successfully!") return {"success": True, "chunks": chunks} except Exception as e: logger.exception("Error creating audio chunks:") - return {"success": False, "error": f"Failed to create chunks: {str(e)}"} - - def _transcribe_chunks_parallel(self, chunks: List[Dict], progress_container, chunks_container) -> Dict[str, Any]: - """πŸš€ Transcribe chunks in parallel with real-time progress tracking""" + return {"success": False, "error": f"Failed to create chunks: {e!s}"} + def _transcribe_chunks_parallel(self, chunks: list[dict], progress_container, chunks_container) -> dict: + """Transcribe chunks in parallel with real-time progress tracking.""" total_chunks = len(chunks) completed_chunks = 0 chunk_transcripts = {} chunk_statuses = {i: "waiting" for i in range(total_chunks)} start_time = time.time() - # Import transcription function from .content_generation import get_openai_client - def transcribe_single_chunk(chunk_info: Dict) -> Tuple[int, str, bool]: - """Transcribe a single chunk""" + def transcribe_single_chunk(chunk_info: dict) -> tuple[int, str, bool]: + """Transcribe a single chunk.""" try: chunk_index = chunk_info["index"] chunk_file_path = chunk_info["file_path"] - - # Update status to processing chunk_statuses[chunk_index] = "processing" - # Get OpenAI client openai_client = get_openai_client() if not openai_client: return chunk_index, "Error: OpenAI API key not configured", False - # Transcribe chunk with open(chunk_file_path, "rb") as audio_file: - transcript = openai_client.audio.transcriptions.create( - model="whisper-1", - file=audio_file - ) + transcript = openai_client.audio.transcriptions.create(model="whisper-1", file=audio_file) chunk_statuses[chunk_index] = "completed" return chunk_index, transcript.text, True @@ -414,28 +274,18 @@ def transcribe_single_chunk(chunk_info: Dict) -> Tuple[int, str, bool]: except Exception as e: chunk_statuses[chunk_index] = "error" logger.exception(f"Error transcribing chunk {chunk_index}:") - return chunk_index, f"Error: {str(e)}", False + return chunk_index, f"Error: {e!s}", False - # Process chunks in parallel with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor: - # Submit all chunks for processing - future_to_chunk = { - executor.submit(transcribe_single_chunk, chunk): chunk["index"] - for chunk in chunks - } + future_to_chunk = {executor.submit(transcribe_single_chunk, chunk): chunk["index"] for chunk in chunks} - # Monitor progress in real-time while completed_chunks < total_chunks: - # Update progress display with progress_container.container(): overall_progress = completed_chunks / total_chunks st.progress(overall_progress, f"Transcribing chunks: {completed_chunks}/{total_chunks}") - # Update individual chunk statuses with chunks_container.container(): - st.markdown("##### 🧩 Chunk Processing Status") - - # Create columns for chunk status display + st.markdown("##### \U0001f9e9 Chunk Processing Status") cols_per_row = 4 rows = math.ceil(total_chunks / cols_per_row) @@ -445,77 +295,55 @@ def transcribe_single_chunk(chunk_info: Dict) -> Tuple[int, str, bool]: chunk_idx = row * cols_per_row + col_idx if chunk_idx < total_chunks: status = chunk_statuses[chunk_idx] - - if status == "waiting": - icon, color, text = "⏳", "#FFA500", "Waiting" - elif status == "processing": - icon, color, text = "πŸ”„", "#00BFFF", "Processing" - elif status == "completed": - icon, color, text = "βœ…", "#00FF7F", "Complete" - else: # error - icon, color, text = "❌", "#FF6B6B", "Error" + status_map = { + "waiting": ("\u23f3", "#FFA500", "Waiting"), + "processing": ("\U0001f504", "#00BFFF", "Processing"), + "completed": ("\u2705", "#00FF7F", "Complete"), + } + icon, color, text = status_map.get(status, ("\u274c", "#FF6B6B", "Error")) with cols[col_idx]: - st.markdown(f""" -
-
{icon}
-
Chunk {chunk_idx + 1}
-
{text}
-
- """, unsafe_allow_html=True) - - # Check for completed futures + st.markdown( + f'
' + f'
{icon}
' + f'
Chunk {chunk_idx + 1}
' + f'
{text}
', + unsafe_allow_html=True, + ) + for future in as_completed(future_to_chunk, timeout=1): chunk_index, transcript, success = future.result() - if success: chunk_transcripts[chunk_index] = transcript - completed_chunks += 1 break - # Small delay to prevent excessive updates time.sleep(0.5) - # Final progress update with progress_container.container(): - st.progress(1.0, f"βœ… All chunks transcribed: {completed_chunks}/{total_chunks}") + st.progress(1.0, f"\u2705 All chunks transcribed: {completed_chunks}/{total_chunks}") processing_time = time.time() - start_time - - # Check if we have enough successful transcriptions successful_chunks = len(chunk_transcripts) - if successful_chunks < total_chunks * 0.8: # Require at least 80% success - return { - "success": False, - "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful" - } + + if successful_chunks < total_chunks * 0.8: + return {"success": False, "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful"} return { "success": True, "chunk_transcripts": chunk_transcripts, "total_time": f"{processing_time:.1f}s", - "success_rate": f"{successful_chunks}/{total_chunks}" + "success_rate": f"{successful_chunks}/{total_chunks}", } - def _reassemble_transcript(self, chunk_transcripts: Dict[int, str]) -> str: - """Reassemble transcript from chunks in correct order""" - - # Sort chunks by index and concatenate + def _reassemble_transcript(self, chunk_transcripts: dict[int, str]) -> str: + """Reassemble transcript from chunks in correct order.""" sorted_chunks = sorted(chunk_transcripts.items()) - full_transcript = " ".join([transcript for _, transcript in sorted_chunks]) + return " ".join([transcript for _, transcript in sorted_chunks]) - return full_transcript - - def _cleanup_chunks(self, chunks: List[Dict]): - """Clean up temporary chunk files""" + def _cleanup_chunks(self, chunks: list[dict]): + """Clean up temporary chunk files.""" for chunk in chunks: try: if os.path.exists(chunk["file_path"]): @@ -523,31 +351,29 @@ def _cleanup_chunks(self, chunks: List[Dict]): except Exception as e: logger.warning(f"Failed to cleanup chunk file {chunk['file_path']}: {e}") - def validate_large_file(self, file) -> Dict[str, Any]: - """Validate large file upload""" + def validate_large_file(self, file) -> dict: + """Validate large file upload.""" if not file: return {"valid": False, "error": "No file provided"} - # Check file size file_size = len(file.getvalue()) if file_size > self.max_file_size: size_gb = file_size / (1024 * 1024 * 1024) return {"valid": False, "error": f"File too large: {size_gb:.1f}GB (max 2GB)"} - # Check file type file_extension = os.path.splitext(file.name)[1].lower() - if file_extension not in self.supported_formats['audio']: + if file_extension not in self.supported_formats["audio"]: return {"valid": False, "error": f"Unsupported format: {file_extension}"} return {"valid": True} -# Create alias for backward compatibility +# Backward compatibility alias LargeFileUploadManager = FileUploadManager def create_upload_progress_indicator(filename: str, progress: float = 0.0): - """Create a progress indicator for file upload""" + """Create a progress indicator for file upload.""" progress_html = f"""
@@ -559,667 +385,22 @@ def create_upload_progress_indicator(filename: str, progress: float = 0.0):
- {'Uploading...' if progress < 100 else 'Upload complete!'} + {"Uploading..." if progress < 100 else "Upload complete!"}
""" - progress_css = """ - - """ - - return st.markdown(progress_css + progress_html, unsafe_allow_html=True) + css = _load_upload_css() + return st.markdown(css + progress_html, unsafe_allow_html=True) def simulate_upload_progress(filename: str, duration: float = 2.0): - """Simulate upload progress for demonstration""" + """Simulate upload progress for demonstration.""" progress_container = st.empty() steps = 20 for i in range(steps + 1): progress = (i / steps) * 100 - with progress_container: create_upload_progress_indicator(filename, progress) - - if i < steps: - time.sleep(duration / steps) - - return True - - -class EnhancedLargeFileProcessor: - """πŸš€ Enhanced Large File Processor with FFmpeg for 2GB+ files - - Features: - - FFmpeg-based processing for memory efficiency - - Support for files up to 2GB - - Intelligent 10-minute audio chunking - - Parallel transcription with ThreadPoolExecutor - - Memory-efficient streaming without loading entire files into RAM - - Enhanced error handling with automatic fallback - """ - - def __init__(self): - self.supported_formats = { - 'audio': ['.mp3', '.wav', '.m4a', '.aac', '.ogg', '.flac', '.wma', '.webm', '.mpeg', '.mpga', '.oga'], - 'video': ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm'] # Extract audio from video - } - self.max_file_size = 2 * 1024 * 1024 * 1024 # 2GB - self.chunk_duration_minutes = 10 # 10-minute chunks optimized for Whisper - self.max_parallel_chunks = 4 # Process 4 chunks simultaneously - self.temp_dir = None - - def check_ffmpeg_availability(self) -> bool: - """Check if FFmpeg is available on the system""" - try: - import subprocess - result = subprocess.run(['ffmpeg', '-version'], - capture_output=True, text=True, timeout=5) - return result.returncode == 0 - except (subprocess.TimeoutExpired, FileNotFoundError, Exception): - return False - - def get_audio_info(self, file_path: str) -> Dict[str, Any]: - """Get audio file information using ffprobe""" - try: - import subprocess - import json - - cmd = [ - 'ffprobe', '-v', 'quiet', '-print_format', 'json', - '-show_format', '-show_streams', file_path - ] - - result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) - if result.returncode != 0: - return {"error": f"ffprobe failed: {result.stderr}"} - - data = json.loads(result.stdout) - format_info = data.get('format', {}) - - # Find audio stream - audio_stream = None - for stream in data.get('streams', []): - if stream.get('codec_type') == 'audio': - audio_stream = stream - break - - if not audio_stream: - return {"error": "No audio stream found"} - - duration = float(format_info.get('duration', 0)) - size = int(format_info.get('size', 0)) - - return { - "duration": duration, - "size": size, - "format": format_info.get('format_name', 'unknown'), - "codec": audio_stream.get('codec_name', 'unknown'), - "sample_rate": int(audio_stream.get('sample_rate', 0)), - "channels": int(audio_stream.get('channels', 0)) - } - - except Exception as e: - return {"error": f"Failed to get audio info: {str(e)}"} - - def validate_file(self, uploaded_file) -> Dict[str, Any]: - """Enhanced file validation for large files""" - if not uploaded_file: - return {"valid": False, "error": "No file provided"} - - # Check file size - file_size = len(uploaded_file.getvalue()) - if file_size > self.max_file_size: - size_gb = file_size / (1024 * 1024 * 1024) - return {"valid": False, "error": f"File too large: {size_gb:.1f}GB (max 2GB)"} - - # Check file extension - file_extension = os.path.splitext(uploaded_file.name)[1].lower() - all_formats = self.supported_formats['audio'] + self.supported_formats['video'] - if file_extension not in all_formats: - return {"valid": False, "error": f"Unsupported format: {file_extension}"} - - # Check FFmpeg availability for large files - if file_size > 100 * 1024 * 1024 and not self.check_ffmpeg_availability(): # 100MB+ - return { - "valid": False, - "error": "FFmpeg required for large files but not available. Please install FFmpeg." - } - - return { - "valid": True, - "size": file_size, - "size_mb": file_size / (1024 * 1024), - "requires_chunking": file_size > 100 * 1024 * 1024, # Chunk files > 100MB - "format": file_extension - } - - def create_enhanced_upload_interface(self) -> Optional[Any]: - """Create enhanced upload interface for large files""" - - # Enhanced upload zone HTML - upload_html = f""" -
-
-
-
🎡
-
-
-
-

Enhanced Large File Upload

-

Powered by FFmpeg β€’ Up to 2GB β€’ Intelligent Chunking

-
-
- ⚑ - 10-min chunks -
-
- πŸ”„ - Parallel processing -
-
- πŸ’Ύ - Memory efficient -
-
- 🎯 - Auto-retry -
-
-
-
- Audio: MP3, WAV, M4A, AAC, OGG, FLAC, WEBM -
-
- Video: MP4, AVI, MOV, MKV, WMV (audio extraction) -
-
-
-
-
- """ - - # Enhanced CSS - upload_css = """ - - """ - - st.markdown(upload_css, unsafe_allow_html=True) - st.markdown(upload_html, unsafe_allow_html=True) - - # Enhanced file uploader - uploaded_file = st.file_uploader( - "Choose an audio or video file", - type=['mp3', 'wav', 'm4a', 'aac', 'ogg', 'flac', 'wma', 'webm', 'mpeg', 'mpga', 'oga', - 'mp4', 'avi', 'mov', 'mkv', 'wmv', 'flv'], - help="Upload audio/video files up to 2GB. Large files automatically use FFmpeg chunking for optimal processing.", - label_visibility="collapsed" - ) - - return uploaded_file - - def process_large_file(self, uploaded_file) -> Dict[str, Any]: - """Enhanced large file processing with FFmpeg""" - - # Validate file first - validation = self.validate_file(uploaded_file) - if not validation["valid"]: - return {"success": False, "error": validation["error"]} - - file_size_mb = validation["size_mb"] - requires_chunking = validation["requires_chunking"] - - st.info(f"πŸ“ **File:** {uploaded_file.name} ({file_size_mb:.1f} MB)") - - if requires_chunking: - st.info("πŸ”§ **Processing Method:** FFmpeg chunking (large file detected)") - return self._process_with_ffmpeg_chunking(uploaded_file) - else: - st.info("⚑ **Processing Method:** Standard processing (small file)") - return self._process_standard(uploaded_file) - - def _process_standard(self, uploaded_file) -> Dict[str, Any]: - """Process smaller files using standard method""" - try: - from core.content_generation import transcribe_audio - - # Create temporary file - with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file: - tmp_file.write(uploaded_file.getvalue()) - tmp_file_path = tmp_file.name - - try: - # Transcribe directly - with st.spinner("🎯 Transcribing audio..."): - transcript = transcribe_audio(tmp_file_path) - - return { - "success": True, - "transcript": transcript, - "method": "standard", - "chunks_processed": 1 - } - - finally: - # Cleanup - if os.path.exists(tmp_file_path): - os.unlink(tmp_file_path) - - except Exception as e: - return {"success": False, "error": f"Standard processing failed: {str(e)}"} - - def _process_with_ffmpeg_chunking(self, uploaded_file) -> Dict[str, Any]: - """Process large files using FFmpeg chunking""" - - # Setup temporary directory - self.temp_dir = tempfile.mkdtemp(prefix="whisperforge_chunks_") - - try: - # Save uploaded file - input_file_path = os.path.join(self.temp_dir, uploaded_file.name) - with open(input_file_path, 'wb') as f: - f.write(uploaded_file.getvalue()) - - # Get audio information - st.info("πŸ” Analyzing audio file...") - audio_info = self.get_audio_info(input_file_path) - - if "error" in audio_info: - return {"success": False, "error": audio_info["error"]} - - duration = audio_info["duration"] - st.success( - f"πŸ“Š **Duration:** {duration / 60:.1f} minutes | **Format:** {audio_info['format']} | **Codec:** {audio_info['codec']}") - - # Create chunks using FFmpeg - st.info("βœ‚οΈ Creating audio chunks...") - chunks_result = self._create_ffmpeg_chunks(input_file_path, duration) - - if not chunks_result["success"]: - return chunks_result - - chunks = chunks_result["chunks"] - st.success(f"βœ… Created {len(chunks)} chunks of ~{self.chunk_duration_minutes} minutes each") - - # Process chunks in parallel - st.info("πŸš€ Starting parallel transcription...") - transcription_result = self._transcribe_chunks_parallel_ffmpeg(chunks) - - if not transcription_result["success"]: - return transcription_result - - # Reassemble transcript - full_transcript = self._reassemble_transcript_ffmpeg(transcription_result["chunk_transcripts"]) - - return { - "success": True, - "transcript": full_transcript, - "method": "ffmpeg_chunking", - "chunks_processed": len(chunks), - "processing_time": transcription_result.get("total_time", "unknown"), - "success_rate": transcription_result.get("success_rate", "unknown") - } - - except Exception as e: - return {"success": False, "error": f"FFmpeg processing failed: {str(e)}"} - - finally: - # Cleanup temporary directory - self._cleanup_temp_dir() - - def _create_ffmpeg_chunks(self, input_file_path: str, duration: float) -> Dict[str, Any]: - """Create audio chunks using FFmpeg""" - try: - import subprocess - - chunk_duration_seconds = self.chunk_duration_minutes * 60 - num_chunks = math.ceil(duration / chunk_duration_seconds) - chunks = [] - - progress_bar = st.progress(0, f"Creating chunks: 0/{num_chunks}") - - for i in range(num_chunks): - start_time = i * chunk_duration_seconds - chunk_filename = f"chunk_{i:03d}.wav" - chunk_path = os.path.join(self.temp_dir, chunk_filename) - - # FFmpeg command to extract chunk with audio optimization - cmd = [ - 'ffmpeg', '-i', input_file_path, - '-ss', str(start_time), - '-t', str(chunk_duration_seconds), - '-ar', '16000', # 16kHz sample rate (optimal for Whisper) - '-ac', '1', # Mono audio - '-acodec', 'pcm_s16le', # PCM format - '-y', # Overwrite output files - chunk_path - ] - - result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) - - if result.returncode != 0: - return {"success": False, "error": f"FFmpeg chunk creation failed: {result.stderr}"} - - # Verify chunk was created - if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0: - continue # Skip empty chunks - - chunks.append({ - "index": i, - "file_path": chunk_path, - "start_time": start_time, - "duration": min(chunk_duration_seconds, duration - start_time) - }) - - # Update progress - progress_bar.progress((i + 1) / num_chunks, f"Creating chunks: {i + 1}/{num_chunks}") - - return {"success": True, "chunks": chunks} - - except Exception as e: - return {"success": False, "error": f"Chunk creation failed: {str(e)}"} - - def _transcribe_chunks_parallel_ffmpeg(self, chunks: List[Dict]) -> Dict[str, Any]: - """Transcribe chunks in parallel using ThreadPoolExecutor""" - from core.content_generation import transcribe_audio - - chunk_transcripts = {} - total_chunks = len(chunks) - - # Create progress containers - progress_container = st.empty() - status_container = st.empty() - - def transcribe_single_chunk(chunk_info: Dict) -> Tuple[int, str, bool]: - """Transcribe a single chunk""" - try: - chunk_index = chunk_info["index"] - file_path = chunk_info["file_path"] - - transcript = transcribe_audio(file_path) - return chunk_index, transcript, True - - except Exception as e: - logger.error(f"Failed to transcribe chunk {chunk_info['index']}: {e}") - return chunk_info["index"], f"[Transcription failed for chunk {chunk_info['index']}]", False - - start_time = time.time() - completed_chunks = 0 - - # Process chunks in parallel - with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor: - # Submit all chunks - future_to_chunk = { - executor.submit(transcribe_single_chunk, chunk): chunk - for chunk in chunks - } - - # Process completed futures - for future in as_completed(future_to_chunk): - chunk_index, transcript, success = future.result() - - if success: - chunk_transcripts[chunk_index] = transcript - - completed_chunks += 1 - - # Update progress - progress = completed_chunks / total_chunks - with progress_container: - st.progress(progress, f"Transcribing: {completed_chunks}/{total_chunks} chunks") - - with status_container: - elapsed = time.time() - start_time - if completed_chunks > 0: - eta = (elapsed / completed_chunks) * (total_chunks - completed_chunks) - st.info( - f"⏱️ Elapsed: { - elapsed:.1f}s | ETA: { - eta:.1f}s | Success: { - len(chunk_transcripts)}/{completed_chunks}") - - processing_time = time.time() - start_time - successful_chunks = len(chunk_transcripts) - - # Check success rate - if successful_chunks < total_chunks * 0.7: # Require at least 70% success - return { - "success": False, - "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful" - } - - return { - "success": True, - "chunk_transcripts": chunk_transcripts, - "total_time": f"{processing_time:.1f}s", - "success_rate": f"{successful_chunks}/{total_chunks}" - } - - def _reassemble_transcript_ffmpeg(self, chunk_transcripts: Dict[int, str]) -> str: - """Reassemble transcript from chunks in correct order""" - # Sort chunks by index and concatenate - sorted_chunks = sorted(chunk_transcripts.items()) - full_transcript = " ".join([transcript for _, transcript in sorted_chunks]) - - # Clean up transcript - full_transcript = full_transcript.strip() - - return full_transcript - - def _cleanup_temp_dir(self): - """Clean up temporary directory and all files""" - if self.temp_dir and os.path.exists(self.temp_dir): - try: - import shutil - shutil.rmtree(self.temp_dir) - self.temp_dir = None - except Exception as e: - logger.warning(f"Failed to cleanup temp directory {self.temp_dir}: {e}") + time.sleep(duration / steps) diff --git a/core/large_file_processor.py b/core/large_file_processor.py new file mode 100644 index 0000000..4d42a7f --- /dev/null +++ b/core/large_file_processor.py @@ -0,0 +1,433 @@ +""" +Enhanced Large File Processor +============================= + +FFmpeg-based large file processing with intelligent chunking and parallel transcription. +Supports files up to 2GB with memory-efficient streaming. +""" + +import logging +import math +import os +import tempfile +import time +from concurrent.futures import ThreadPoolExecutor, as_completed + +import streamlit as st + +logger = logging.getLogger(__name__) + +_UPLOAD_CSS_CACHE = None + + +def _load_upload_css(): + """Load upload CSS from static file (cached).""" + global _UPLOAD_CSS_CACHE # noqa: PLW0603 + if _UPLOAD_CSS_CACHE is None: + css_path = os.path.join("static", "css", "upload.css") + if os.path.exists(css_path): + with open(css_path, encoding="utf-8") as f: + _UPLOAD_CSS_CACHE = f"" + else: + _UPLOAD_CSS_CACHE = "" + return _UPLOAD_CSS_CACHE + + +class EnhancedLargeFileProcessor: + """Enhanced Large File Processor with FFmpeg for 2GB+ files. + + Features: + - FFmpeg-based processing for memory efficiency + - Support for files up to 2GB + - Intelligent 10-minute audio chunking + - Parallel transcription with ThreadPoolExecutor + - Memory-efficient streaming without loading entire files into RAM + """ + + def __init__(self): + self.supported_formats = { + "audio": [".mp3", ".wav", ".m4a", ".aac", ".ogg", ".flac", ".wma", ".webm", ".mpeg", ".mpga", ".oga"], + "video": [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm"], + } + self.max_file_size = 2 * 1024 * 1024 * 1024 # 2GB + self.chunk_duration_minutes = 10 + self.max_parallel_chunks = 4 + self.temp_dir = None + + def check_ffmpeg_availability(self) -> bool: + """Check if FFmpeg is available on the system.""" + try: + import subprocess + + result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True, timeout=5) # noqa: S603 + return result.returncode == 0 + except (subprocess.TimeoutExpired, FileNotFoundError, Exception): + return False + + def get_audio_info(self, file_path: str) -> dict: + """Get audio file information using ffprobe.""" + try: + import json + import subprocess + + cmd = ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", file_path] + + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) # noqa: S603 + if result.returncode != 0: + return {"error": f"ffprobe failed: {result.stderr}"} + + data = json.loads(result.stdout) + format_info = data.get("format", {}) + + audio_stream = None + for stream in data.get("streams", []): + if stream.get("codec_type") == "audio": + audio_stream = stream + break + + if not audio_stream: + return {"error": "No audio stream found"} + + duration = float(format_info.get("duration", 0)) + size = int(format_info.get("size", 0)) + + return { + "duration": duration, + "size": size, + "format": format_info.get("format_name", "unknown"), + "codec": audio_stream.get("codec_name", "unknown"), + "sample_rate": int(audio_stream.get("sample_rate", 0)), + "channels": int(audio_stream.get("channels", 0)), + } + + except Exception as e: + return {"error": f"Failed to get audio info: {e!s}"} + + def validate_file(self, uploaded_file) -> dict: + """Enhanced file validation for large files.""" + if not uploaded_file: + return {"valid": False, "error": "No file provided"} + + file_size = len(uploaded_file.getvalue()) + if file_size > self.max_file_size: + size_gb = file_size / (1024 * 1024 * 1024) + return {"valid": False, "error": f"File too large: {size_gb:.1f}GB (max 2GB)"} + + file_extension = os.path.splitext(uploaded_file.name)[1].lower() + all_formats = self.supported_formats["audio"] + self.supported_formats["video"] + if file_extension not in all_formats: + return {"valid": False, "error": f"Unsupported format: {file_extension}"} + + if file_size > 100 * 1024 * 1024 and not self.check_ffmpeg_availability(): + return { + "valid": False, + "error": "FFmpeg required for large files but not available. Please install FFmpeg.", + } + + return { + "valid": True, + "size": file_size, + "size_mb": file_size / (1024 * 1024), + "requires_chunking": file_size > 100 * 1024 * 1024, + "format": file_extension, + } + + def create_enhanced_upload_interface(self): + """Create enhanced upload interface for large files.""" + upload_html = """ +
+
+
+
\U0001f3b5
+
+
+
+

Enhanced Large File Upload

+

Powered by FFmpeg \u2022 Up to 2GB \u2022 Intelligent Chunking

+
+
+ \u26a1 + 10-min chunks +
+
+ \U0001f504 + Parallel processing +
+
+ \U0001f4be + Memory efficient +
+
+ \U0001f3af + Auto-retry +
+
+
+
+ Audio: MP3, WAV, M4A, AAC, OGG, FLAC, WEBM +
+
+ Video: MP4, AVI, MOV, MKV, WMV (audio extraction) +
+
+
+
+
+ """ + + st.markdown(_load_upload_css(), unsafe_allow_html=True) + st.markdown(upload_html, unsafe_allow_html=True) + + uploaded_file = st.file_uploader( + "Choose an audio or video file", + type=[ + "mp3", + "wav", + "m4a", + "aac", + "ogg", + "flac", + "wma", + "webm", + "mpeg", + "mpga", + "oga", + "mp4", + "avi", + "mov", + "mkv", + "wmv", + "flv", + ], + help="Upload audio/video files up to 2GB. Large files automatically use FFmpeg chunking.", + label_visibility="collapsed", + ) + + return uploaded_file + + def process_large_file(self, uploaded_file) -> dict: + """Enhanced large file processing with FFmpeg.""" + validation = self.validate_file(uploaded_file) + if not validation["valid"]: + return {"success": False, "error": validation["error"]} + + file_size_mb = validation["size_mb"] + requires_chunking = validation["requires_chunking"] + + st.info(f"\U0001f4c1 **File:** {uploaded_file.name} ({file_size_mb:.1f} MB)") + + if requires_chunking: + st.info("\U0001f527 **Processing Method:** FFmpeg chunking (large file detected)") + return self._process_with_ffmpeg_chunking(uploaded_file) + else: + st.info("\u26a1 **Processing Method:** Standard processing (small file)") + return self._process_standard(uploaded_file) + + def _process_standard(self, uploaded_file) -> dict: + """Process smaller files using standard method.""" + try: + from core.content_generation import transcribe_audio + + with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file: + tmp_file.write(uploaded_file.getvalue()) + tmp_file_path = tmp_file.name + + try: + with st.spinner("\U0001f3af Transcribing audio..."): + transcript = transcribe_audio(tmp_file_path) + + return {"success": True, "transcript": transcript, "method": "standard", "chunks_processed": 1} + finally: + if os.path.exists(tmp_file_path): + os.unlink(tmp_file_path) + + except Exception as e: + return {"success": False, "error": f"Standard processing failed: {e!s}"} + + def _process_with_ffmpeg_chunking(self, uploaded_file) -> dict: + """Process large files using FFmpeg chunking.""" + self.temp_dir = tempfile.mkdtemp(prefix="whisperforge_chunks_") + + try: + input_file_path = os.path.join(self.temp_dir, uploaded_file.name) + with open(input_file_path, "wb") as f: + f.write(uploaded_file.getvalue()) + + st.info("\U0001f50d Analyzing audio file...") + audio_info = self.get_audio_info(input_file_path) + + if "error" in audio_info: + return {"success": False, "error": audio_info["error"]} + + duration = audio_info["duration"] + st.success( + f"\U0001f4ca **Duration:** {duration / 60:.1f} minutes | " + f"**Format:** {audio_info['format']} | **Codec:** {audio_info['codec']}" + ) + + st.info("\u2702\ufe0f Creating audio chunks...") + chunks_result = self._create_ffmpeg_chunks(input_file_path, duration) + + if not chunks_result["success"]: + return chunks_result + + chunks = chunks_result["chunks"] + st.success(f"\u2705 Created {len(chunks)} chunks of ~{self.chunk_duration_minutes} minutes each") + + st.info("\U0001f680 Starting parallel transcription...") + transcription_result = self._transcribe_chunks_parallel(chunks) + + if not transcription_result["success"]: + return transcription_result + + full_transcript = self._reassemble_transcript(transcription_result["chunk_transcripts"]) + + return { + "success": True, + "transcript": full_transcript, + "method": "ffmpeg_chunking", + "chunks_processed": len(chunks), + "processing_time": transcription_result.get("total_time", "unknown"), + "success_rate": transcription_result.get("success_rate", "unknown"), + } + + except Exception as e: + return {"success": False, "error": f"FFmpeg processing failed: {e!s}"} + + finally: + self._cleanup_temp_dir() + + def _create_ffmpeg_chunks(self, input_file_path: str, duration: float) -> dict: + """Create audio chunks using FFmpeg.""" + try: + import subprocess + + chunk_duration_seconds = self.chunk_duration_minutes * 60 + num_chunks = math.ceil(duration / chunk_duration_seconds) + chunks = [] + + progress_bar = st.progress(0, f"Creating chunks: 0/{num_chunks}") + + for i in range(num_chunks): + start_time = i * chunk_duration_seconds + chunk_filename = f"chunk_{i:03d}.wav" + chunk_path = os.path.join(self.temp_dir, chunk_filename) + + cmd = [ + "ffmpeg", + "-i", + input_file_path, + "-ss", + str(start_time), + "-t", + str(chunk_duration_seconds), + "-ar", + "16000", + "-ac", + "1", + "-acodec", + "pcm_s16le", + "-y", + chunk_path, + ] + + result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) # noqa: S603 + + if result.returncode != 0: + return {"success": False, "error": f"FFmpeg chunk creation failed: {result.stderr}"} + + if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0: + continue + + chunks.append( + { + "index": i, + "file_path": chunk_path, + "start_time": start_time, + "duration": min(chunk_duration_seconds, duration - start_time), + } + ) + + progress_bar.progress((i + 1) / num_chunks, f"Creating chunks: {i + 1}/{num_chunks}") + + return {"success": True, "chunks": chunks} + + except Exception as e: + return {"success": False, "error": f"Chunk creation failed: {e!s}"} + + def _transcribe_chunks_parallel(self, chunks: list[dict]) -> dict: + """Transcribe chunks in parallel using ThreadPoolExecutor.""" + from core.content_generation import transcribe_audio + + chunk_transcripts = {} + total_chunks = len(chunks) + + progress_container = st.empty() + status_container = st.empty() + + def transcribe_single_chunk(chunk_info: dict) -> tuple[int, str, bool]: + """Transcribe a single chunk.""" + try: + chunk_index = chunk_info["index"] + file_path = chunk_info["file_path"] + transcript = transcribe_audio(file_path) + return chunk_index, transcript, True + except Exception as e: + logger.error(f"Failed to transcribe chunk {chunk_info['index']}: {e}") + return chunk_info["index"], f"[Transcription failed for chunk {chunk_info['index']}]", False + + start_time = time.time() + completed_chunks = 0 + + with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor: + future_to_chunk = {executor.submit(transcribe_single_chunk, chunk): chunk for chunk in chunks} + + for future in as_completed(future_to_chunk): + chunk_index, transcript, success = future.result() + + if success: + chunk_transcripts[chunk_index] = transcript + + completed_chunks += 1 + + progress = completed_chunks / total_chunks + with progress_container: + st.progress(progress, f"Transcribing: {completed_chunks}/{total_chunks} chunks") + + with status_container: + elapsed = time.time() - start_time + if completed_chunks > 0: + eta = (elapsed / completed_chunks) * (total_chunks - completed_chunks) + successful = len(chunk_transcripts) + st.info( + f"\u23f1\ufe0f Elapsed: {elapsed:.1f}s | ETA: {eta:.1f}s | Success: {successful}/{completed_chunks}" + ) + + processing_time = time.time() - start_time + successful_chunks = len(chunk_transcripts) + + if successful_chunks < total_chunks * 0.7: + return {"success": False, "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful"} + + return { + "success": True, + "chunk_transcripts": chunk_transcripts, + "total_time": f"{processing_time:.1f}s", + "success_rate": f"{successful_chunks}/{total_chunks}", + } + + def _reassemble_transcript(self, chunk_transcripts: dict[int, str]) -> str: + """Reassemble transcript from chunks in correct order.""" + sorted_chunks = sorted(chunk_transcripts.items()) + return " ".join([transcript for _, transcript in sorted_chunks]).strip() + + def _cleanup_temp_dir(self): + """Clean up temporary directory and all files.""" + if self.temp_dir and os.path.exists(self.temp_dir): + try: + import shutil + + shutil.rmtree(self.temp_dir) + self.temp_dir = None + except Exception as e: + logger.warning(f"Failed to cleanup temp directory {self.temp_dir}: {e}") diff --git a/static/css/upload.css b/static/css/upload.css new file mode 100644 index 0000000..03751a0 --- /dev/null +++ b/static/css/upload.css @@ -0,0 +1,315 @@ +/* Upload Zone - FileUploadManager */ +.large-upload-zone-container { + margin: 20px 0; +} + +.large-upload-zone { + border: 3px dashed rgba(0, 255, 255, 0.3); + border-radius: 16px; + padding: 50px 30px; + text-align: center; + background: linear-gradient(135deg, + rgba(0, 255, 255, 0.03) 0%, + rgba(64, 224, 208, 0.05) 100%); + transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1); + cursor: pointer; + position: relative; + overflow: hidden; +} + +.large-upload-zone:hover { + border-color: rgba(0, 255, 255, 0.6); + background: linear-gradient(135deg, + rgba(0, 255, 255, 0.08) 0%, + rgba(64, 224, 208, 0.12) 100%); + transform: translateY(-3px); + box-shadow: 0 12px 35px rgba(0, 255, 255, 0.2); +} + +.large-upload-zone::before { + content: ""; + position: absolute; + top: 0; + left: -100%; + width: 100%; + height: 100%; + background: linear-gradient(90deg, + transparent, + rgba(0, 255, 255, 0.15), + transparent); + transition: left 0.6s ease; +} + +.large-upload-zone:hover::before { + left: 100%; +} + +.upload-icon-inner { + font-size: 64px; + opacity: 0.8; + transition: all 0.4s ease; + display: inline-block; +} + +.large-upload-zone:hover .upload-icon-inner { + opacity: 1; + transform: scale(1.15) rotate(10deg); +} + +.upload-text h3 { + color: #00FFFF; + font-size: 1.5rem; + margin: 16px 0 8px 0; + font-weight: 600; +} + +.upload-text p { + color: rgba(255, 255, 255, 0.7); + margin: 0 0 20px 0; + font-size: 1rem; +} + +.upload-info { + display: flex; + justify-content: center; + gap: 15px; + flex-wrap: wrap; + margin-bottom: 20px; +} + +.supported-formats, .max-size, .chunk-info { + font-size: 0.85rem; + color: rgba(255, 255, 255, 0.6); + background: rgba(0, 255, 255, 0.1); + padding: 6px 12px; + border-radius: 6px; + border: 1px solid rgba(0, 255, 255, 0.2); +} + +.upload-features { + display: flex; + justify-content: center; + gap: 30px; + flex-wrap: wrap; +} + +.feature { + display: flex; + align-items: center; + gap: 8px; + color: rgba(255, 255, 255, 0.8); + font-size: 0.9rem; +} + +.feature-icon { + font-size: 1.2rem; +} + +/* Upload Progress Indicator */ +.upload-progress-container { + background: var(--bg-secondary); + border-radius: var(--card-radius); + padding: 15px; + margin: 10px 0; + border: 1px solid rgba(121, 40, 202, 0.2); +} + +.upload-progress-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 8px; +} + +.upload-filename { + color: var(--text-primary); + font-weight: 500; + font-size: 0.9rem; +} + +.upload-percentage { + color: var(--accent-primary); + font-family: var(--terminal-font); + font-weight: 600; +} + +.upload-progress-bar { + height: 4px; + background: rgba(255, 255, 255, 0.1); + border-radius: 2px; + position: relative; + overflow: hidden; + margin-bottom: 8px; +} + +.upload-progress-fill { + height: 100%; + background: linear-gradient(90deg, #7928CA, #FF0080); + border-radius: 2px; + transition: width 0.3s ease; +} + +.upload-progress-shimmer { + position: absolute; + top: 0; + left: 0; + height: 100%; + background: linear-gradient(90deg, + transparent, + rgba(255, 255, 255, 0.2), + transparent); + animation: shimmer 1.5s ease-in-out infinite; +} + +.upload-status { + color: var(--text-secondary); + font-size: 0.8rem; + text-align: center; +} + +@keyframes shimmer { + 0% { transform: translateX(-100%); } + 100% { transform: translateX(100%); } +} + +/* Enhanced Upload Zone - EnhancedLargeFileProcessor */ +.enhanced-upload-container { + margin: 25px 0; +} + +.enhanced-upload-zone { + border: 3px dashed var(--aurora-border); + border-radius: var(--aurora-radius-large); + padding: var(--aurora-spacing-large); + text-align: center; + background: var(--aurora-bg-glass); + transition: all 0.5s cubic-bezier(0.4, 0, 0.2, 1); + cursor: pointer; + position: relative; + overflow: hidden; +} + +.enhanced-upload-zone:hover { + border-color: var(--aurora-border-hover); + background: var(--aurora-bg-glass); + transform: translateY(-5px); + box-shadow: var(--aurora-glow); +} + +.upload-icon-large { + position: relative; + margin-bottom: 20px; +} + +.enhanced-upload-zone .upload-icon-inner { + font-size: 80px; + color: var(--aurora-primary); + opacity: 0.9; + transition: all 0.5s ease; + display: inline-block; + position: relative; + z-index: 2; +} + +.upload-pulse { + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + width: 120px; + height: 120px; + border: 2px solid var(--aurora-border); + border-radius: 50%; + animation: aurora-upload-pulse 3s ease-in-out infinite; +} + +.enhanced-upload-zone:hover .upload-icon-inner { + transform: scale(1.2) rotate(15deg); + opacity: 1; +} + +.upload-content h2 { + color: var(--aurora-primary); + font-size: 1.8rem; + margin: 0 0 8px 0; + font-weight: 700; +} + +.upload-subtitle { + color: var(--aurora-text-muted); + margin: 0 0 25px 0; + font-size: 1.1rem; + font-weight: 500; +} + +.upload-features-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(120px, 1fr)); + gap: 15px; + margin: 25px 0; + max-width: 500px; + margin-left: auto; + margin-right: auto; +} + +.feature-card { + background: var(--aurora-bg-glass); + border: 1px solid var(--aurora-border); + border-radius: var(--aurora-radius); + padding: 12px 8px; + display: flex; + flex-direction: column; + align-items: center; + gap: 6px; + transition: all 0.3s ease; +} + +.feature-card:hover { + background: var(--aurora-bg-glass); + border-color: var(--aurora-border-hover); + transform: translateY(-2px); + box-shadow: var(--aurora-glow-subtle); +} + +.feature-card .feature-icon { + font-size: 1.5rem; + color: var(--aurora-primary); +} + +.feature-text { + font-size: 0.85rem; + color: var(--aurora-text); + font-weight: 500; +} + +.supported-formats-enhanced { + margin-top: 20px; + display: flex; + justify-content: center; + gap: 20px; + flex-wrap: wrap; +} + +.format-group { + background: var(--aurora-bg-card); + border: 1px solid var(--aurora-border); + border-radius: var(--aurora-radius-small); + padding: 8px 12px; + font-size: 0.85rem; + color: var(--aurora-text-muted); +} + +.format-group strong { + color: var(--aurora-primary); +} + +@keyframes aurora-upload-pulse { + 0%, 100% { + transform: translate(-50%, -50%) scale(1); + opacity: 0.6; + } + 50% { + transform: translate(-50%, -50%) scale(1.2); + opacity: 0.3; + } +} From 40b927ff66546ebbf2507c272a657ac61ea16991 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:09:28 -0800 Subject: [PATCH 25/46] refactor(streaming): decompose streaming_results.py and extract CSS Split the 1,019-line streaming_results.py into focused modules: - core/streaming_results.py (411 lines): streaming display + content cards - core/streaming_status.py (381 lines): enhanced status + 2025 display - core/export.py (143 lines): added streaming download format helpers - static/css/streaming.css (157 lines): extracted CSS constants All modules under 500-line limit. CSS loaded from static files at runtime instead of embedded constants. Download helpers consolidated in the export module. Co-Authored-By: Claude Opus 4.6 --- core/export.py | 48 ++ core/streaming_results.py | 1036 ++++++++----------------------------- core/streaming_status.py | 381 ++++++++++++++ static/css/streaming.css | 157 ++++++ 4 files changed, 800 insertions(+), 822 deletions(-) create mode 100644 core/streaming_status.py create mode 100644 static/css/streaming.css diff --git a/core/export.py b/core/export.py index 47979a1..33907bf 100644 --- a/core/export.py +++ b/core/export.py @@ -93,3 +93,51 @@ def export_to_pdf(results: dict) -> bytes: for line in text.split("\n"): pdf.cell(0, 10, txt=line, ln=1) return pdf.output(dest="S").encode("latin-1") + + +def create_json_download(results: dict) -> str: + """Create JSON format download of streaming results.""" + import json + + return json.dumps(results, indent=2, ensure_ascii=False) + + +def create_markdown_download(results: dict) -> str: + """Create Markdown format download of streaming results.""" + content = "# WhisperForge Content Generation Results\n\n" + + sections = { + "transcription": "## Audio Transcription\n\n", + "wisdom_extraction": "## Key Insights & Wisdom\n\n", + "outline_creation": "## Content Outline\n\n", + "article_creation": "## Full Article\n\n", + "social_content": "## Social Media Content\n\n", + "image_prompts": "## Image Generation Prompts\n\n", + } + + for key, header in sections.items(): + if key in results: + content += header + results[key] + "\n\n---\n\n" + + return content + + +def create_text_download(results: dict) -> str: + """Create plain text format download of streaming results.""" + content = "WHISPERFORGE CONTENT GENERATION RESULTS\n" + content += "=" * 50 + "\n\n" + + sections = { + "transcription": "AUDIO TRANSCRIPTION\n" + "-" * 20 + "\n\n", + "wisdom_extraction": "KEY INSIGHTS & WISDOM\n" + "-" * 20 + "\n\n", + "outline_creation": "CONTENT OUTLINE\n" + "-" * 15 + "\n\n", + "article_creation": "FULL ARTICLE\n" + "-" * 12 + "\n\n", + "social_content": "SOCIAL MEDIA CONTENT\n" + "-" * 20 + "\n\n", + "image_prompts": "IMAGE GENERATION PROMPTS\n" + "-" * 25 + "\n\n", + } + + for key, header in sections.items(): + if key in results: + content += header + results[key] + "\n\n" + "=" * 50 + "\n\n" + + return content diff --git a/core/streaming_results.py b/core/streaming_results.py index cee65ec..a8775b2 100644 --- a/core/streaming_results.py +++ b/core/streaming_results.py @@ -1,474 +1,243 @@ """ -Streaming Results Display for WhisperForge -Shows content as it's generated with beautiful Aurora styling +Streaming Results Display +========================= + +Shows content as it's generated with Aurora styling. +CSS loaded from static/css/streaming.css. """ -import streamlit as st -import html +import os import time -from typing import Dict, Any, Optional -from .streaming_pipeline import get_pipeline_controller import uuid -from .visible_thinking import render_thinking_stream - -# CSS for streaming results -STREAMING_RESULTS_CSS = """ - -""" -# Enhanced UI Functions for streaming results +import streamlit as st +from .export import create_json_download, create_markdown_download, create_text_download +from .streaming_pipeline import get_pipeline_controller +from .streaming_status import ( # noqa: F401 - re-exports + show_2025_content_display, + show_enhanced_streaming_status, + show_processing_status, +) + +_STREAMING_CSS_CACHE = None + + +def _load_streaming_css(): + """Load streaming CSS from static file (cached).""" + global _STREAMING_CSS_CACHE # noqa: PLW0603 + if _STREAMING_CSS_CACHE is None: + css_path = os.path.join("static", "css", "streaming.css") + if os.path.exists(css_path): + with open(css_path, encoding="utf-8") as f: + _STREAMING_CSS_CACHE = f"" + else: + _STREAMING_CSS_CACHE = "" + return _STREAMING_CSS_CACHE -def apply_streaming_css(): - """Apply Aurora theme CSS for streaming results""" -# Generate truly unique keys for Streamlit widgets +def apply_streaming_css(): + """Apply Aurora theme CSS for streaming results.""" + st.markdown(_load_streaming_css(), unsafe_allow_html=True) def generate_unique_key(base_name: str) -> str: - """Generate truly unique key for Streamlit widgets to prevent DuplicateWidgetID errors""" + """Generate truly unique key for Streamlit widgets to prevent DuplicateWidgetID errors.""" return f"{base_name}_{uuid.uuid4().hex[:8]}_{int(time.time() * 1000000) % 1000000}" def show_streaming_results(): - """Display content as it streams - REAL-TIME STREAMING IMPLEMENTATION""" + """Display content as it streams - real-time streaming implementation.""" controller = get_pipeline_controller() results = controller.get_results() if not results: - # Show placeholder while waiting for first results - st.markdown("### 🌊 Live Content Stream") - st.info("πŸ”„ Waiting for processing to begin...") + st.markdown("### \U0001f30a Live Content Stream") + st.info("\U0001f504 Waiting for processing to begin...") return - # Show real-time streaming content with smooth reveals show_real_time_content_stream(results, controller) -def show_real_time_content_stream(results: Dict[str, Any], controller): - """πŸš€ ENHANCED: Real-time content streaming with step-by-step reveals""" - st.markdown("### ✨ Content Generation Stream") - - # Define content sections with order and styling - content_sections = [ - ("transcription", "πŸŽ™οΈ", "Audio Transcription", "Converting speech to text..."), - ("wisdom_extraction", "πŸ’Ž", "Key Insights & Wisdom", "Extracting valuable insights..."), - ("research_enrichment", "πŸ”", "Research Links", "Finding supporting resources..."), - ("outline_creation", "πŸ“‹", "Content Outline", "Structuring content flow..."), - ("article_creation", "πŸ“°", "Full Article", "Writing comprehensive article..."), - ("social_content", "πŸ“±", "Social Media Posts", "Creating social content..."), - ("image_prompts", "πŸ–ΌοΈ", "Image Prompts", "Generating visual concepts..."), - ("database_storage", "πŸ’Ύ", "Content Saved", "Storing to your library...") - ] - - # Show each section as it becomes available - for i, (step_key, icon, title, processing_msg) in enumerate(content_sections): - - if step_key in results and results[step_key]: - # Content is ready - show it with beautiful styling - show_completed_content_section(step_key, icon, title, results[step_key]) - - elif controller.current_step_index == i and controller.is_active: - # Currently processing this step - show loading state - show_processing_content_section(icon, title, processing_msg) - - elif controller.current_step_index > i: - # This step should be done but no content - show error state - show_error_content_section(icon, title, "Content generation failed") - - # Don't show future steps to avoid spoilers - - -def show_completed_content_section(step_key: str, icon: str, title: str, content: Any): - """Display completed content with beautiful Aurora styling""" - - # Convert content to string safely and sanitize for HTML injection - content_str = str(content) if content else "No content generated" - safe_title = html.escape(title) - safe_icon = html.escape(icon) - - # Beautiful content reveal with animation - st.markdown(f""" -
-
-
-
- {safe_icon} -

{safe_title}

- βœ… Complete -
+def show_real_time_content_stream(results, controller): + """Show real-time streaming content with smooth reveals.""" + apply_streaming_css() + + step_map = { + "transcription": ("\U0001f399\ufe0f", "Audio Transcription"), + "wisdom_extraction": ("\U0001f48e", "Wisdom & Key Insights"), + "research_enrichment": ("\U0001f50d", "Research Enrichment"), + "outline_creation": ("\U0001f4cb", "Content Outline"), + "article_creation": ("\U0001f4f0", "Full Article"), + "social_content": ("\U0001f4f1", "Social Media Content"), + "image_prompts": ("\U0001f5bc\ufe0f", "Image Prompts"), + } + + for step_key, (icon, title) in step_map.items(): + content = results.get(step_key) + errors = controller.get_errors() if hasattr(controller, "get_errors") else {} + + if content: + show_completed_content_section(step_key, icon, title, content) + elif step_key in errors: + show_error_content_section(icon, title, errors[step_key]) + elif controller.is_active and step_key == list(step_map.keys())[controller.current_step_index]: + show_processing_content_section(icon, title, f"Generating {title.lower()}...") + + if controller.is_complete: + _show_download_options(results) + + +def show_completed_content_section(step_key: str, icon: str, title: str, content): + """Display a completed content section with Aurora styling.""" + content_str = str(content) + word_count = len(content_str.split()) + preview_length = 200 + + st.markdown( + f""" +
+
+ {icon} {title} + \u2705 Complete \u2022 {word_count} words
+ """, + unsafe_allow_html=True, + ) - - """, unsafe_allow_html=True) - - # Show content with smart preview/expand - if len(content_str) > 600: - # Long content - show preview with expand - st.markdown("**Preview:**") - preview_text = content_str[:300] + "..." if len(content_str) > 300 else content_str - st.markdown(preview_text) - - # Expandable full content - expand_key = generate_unique_key(f"expand_{step_key}") - with st.expander("πŸ“– Show Full Content", expanded=False): - st.markdown(content_str) - - # Copy button - copy_key = generate_unique_key(f"copy_{step_key}") - if st.button(f"πŸ“‹ Copy {title}", key=copy_key, use_container_width=True): - st.code(content_str, language="markdown") - st.success("βœ… Copied to clipboard area!") - else: - # Short content - show directly - st.markdown(content_str) - - # Inline copy button - copy_key = generate_unique_key(f"copy_inline_{step_key}") - if st.button(f"πŸ“‹ Copy {title}", key=copy_key): - st.code(content_str, language="markdown") - st.success("βœ… Copied!") - - st.markdown("---") + with st.container(): + if len(content_str) > preview_length: + with st.expander(f"View {title}", expanded=False): + st.markdown(content_str) + + col1, col2 = st.columns([1, 4]) + with col1: + copy_key = generate_unique_key(f"copy_{step_key}") + if st.button("\U0001f4cb Copy", key=copy_key, help=f"Copy {title}"): + st.code(content_str, language="markdown") + st.success("Content displayed - copy with Ctrl+A, Ctrl+C") + + # Editor section for article + if step_key == "article_creation": + st.markdown( + """ +
+
+ \u270f\ufe0f Content Editor + EDIT MODE +
+
+ """, + unsafe_allow_html=True, + ) + + edit_key = generate_unique_key("edit_article") + edited_content = st.text_area("Edit Article Content", value=content_str, height=400, key=edit_key) + + if edited_content != content_str: + save_key = generate_unique_key("save_article") + if st.button("\U0001f4be Save Changes", key=save_key): + controller = get_pipeline_controller() + controller.update_result(step_key, edited_content) + st.success("\u2705 Article updated!") + st.rerun() + else: + st.markdown( + f""" +
+ {content_str} +
+ """, + unsafe_allow_html=True, + ) def show_processing_content_section(icon: str, title: str, message: str): - """Show animated processing state for current step""" - safe_icon = html.escape(icon) - safe_title = html.escape(title) - safe_message = html.escape(message) - - st.markdown(f""" -
-
-
-
- {safe_icon} -
-

{safe_title}

-

{safe_message}

-
-
-
-
+ """Display a processing content section with animation.""" + st.markdown( + f""" +
+
+ {icon} {title} + + \U0001f504 Processing + +
+
+
+
+ + {message} +
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) def show_error_content_section(icon: str, title: str, error_msg: str): - """Show error state for failed step""" - safe_icon = html.escape(icon) - safe_title = html.escape(title) - safe_error = html.escape(error_msg) - - st.markdown(f""" -
-
- {safe_icon} -
-

{safe_title}

-

{safe_error}

-
- ❌ Failed -
-
- """, unsafe_allow_html=True) - - -def show_2025_content_display(): - """πŸš€ Ultra-modern 2025 Aurora content display for completed results""" - controller = get_pipeline_controller() - results = controller.get_results() - - if not results: - return - - # Beautiful completion header - st.markdown(""" -
-
-
-

✨ Transformation Complete

-

Your audio has been transformed into structured, actionable content

+ """Display an error content section.""" + st.markdown( + f""" +
+
+ {icon} {title} + + \u274c Error +
-
-
-
-
+
+ {error_msg}
- - - """, unsafe_allow_html=True) - - # Display all results with modern cards - content_map = { - 'transcription': ('πŸŽ™οΈ', 'Audio Transcription', 'Complete speech-to-text conversion'), - 'wisdom_extraction': ('πŸ’Ž', 'Key Insights & Wisdom', 'Extracted insights and actionable takeaways'), - 'research_enrichment': ('πŸ”', 'Research Enrichment', 'Supporting links and contextual information'), - 'outline_creation': ('πŸ“‹', 'Content Outline', 'Structured organization and flow'), - 'article_creation': ('πŸ“°', 'Full Article', 'Complete written content ready for publication'), - 'social_content': ('πŸ“±', 'Social Media Content', 'Platform-optimized posts and captions'), - 'image_prompts': ('πŸ–ΌοΈ', 'Image Generation Prompts', 'AI-generated visual concept descriptions') - } - - for key, (icon, title, desc) in content_map.items(): - if key in results and results[key]: - show_streaming_content_card(icon, title, desc, results[key], is_live=False) + """, + unsafe_allow_html=True, + ) def show_streaming_content_card(icon: str, title: str, description: str, content: str, is_live: bool = False): - """🎨 Beautiful streaming content card with Aurora effects""" - - # Create unique key for this card - card_key = generate_unique_key(f"stream_card_{title.lower()}") - - # Live vs complete styling + """Beautiful streaming content card with Aurora effects.""" border_color = "rgba(0, 255, 100, 0.2)" if is_live else "rgba(0, 255, 255, 0.15)" - bg_gradient = "rgba(0, 255, 100, 0.03), rgba(0, 255, 255, 0.05)" if is_live else "rgba(0, 255, 255, 0.03), rgba(64, 224, 208, 0.05)" + bg_gradient = ( + "rgba(0, 255, 100, 0.03), rgba(0, 255, 255, 0.05)" + if is_live + else "rgba(0, 255, 255, 0.03), rgba(64, 224, 208, 0.05)" + ) glow_color = "rgba(0, 255, 100, 0.4)" if is_live else "rgba(0, 255, 255, 0.3)" with st.container(): - st.markdown(f""" -
+ live_badge = '
LIVE
' if is_live else '
\u2713
' + st.markdown( + f""" +
{icon}

{title}

{description}

- {'
LIVE
' if is_live else '
βœ“
'} + {live_badge}
@@ -571,449 +340,72 @@ def show_streaming_content_card(icon: str, title: str, description: str, content 50% {{ transform: scale(1.05); opacity: 0.8; }} }} - """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) - # Content preview with smart truncation if len(content) > 300: preview = content[:300] + "..." - # Expandable content - with st.expander(f"πŸ“– Preview {title}", expanded=False): + with st.expander(f"\U0001f4d6 Preview {title}", expanded=False): st.markdown(preview) - with st.expander(f"πŸ“„ Full {title}", expanded=False): + with st.expander(f"\U0001f4c4 Full {title}", expanded=False): st.markdown(content) - # Copy button copy_key = generate_unique_key(f"copy_{title}") if st.button(f"Copy {title}", key=copy_key, help=f"Copy {title} to clipboard"): st.code(content, language="markdown") else: st.markdown(content) - # Copy button for short content copy_key = generate_unique_key(f"copy_short_{title}") if st.button(f"Copy {title}", key=copy_key, help=f"Copy {title} to clipboard"): st.code(content, language="markdown") -def _show_download_options(results: Dict[str, Any]): - """Show download options for generated content""" - - st.markdown(""" +def _show_download_options(results): + """Show download options for generated content.""" + st.markdown( + """
- πŸ“₯ + \U0001f4e5 Download Options
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) - # Create downloadable content formats formats = { - "JSON": _create_json_download(results), - "Markdown": _create_markdown_download(results), - "Text": _create_text_download(results) + "JSON": create_json_download(results), + "Markdown": create_markdown_download(results), + "Text": create_text_download(results), } col1, col2, col3 = st.columns(3) with col1: - if "JSON" in formats: - st.download_button( - "πŸ“„ JSON Format", - data=formats["JSON"], - file_name="whisperforge_results.json", - mime="application/json" - ) + st.download_button( + "\U0001f4c4 JSON Format", + data=formats["JSON"], + file_name="whisperforge_results.json", + mime="application/json", + ) with col2: - if "Markdown" in formats: - st.download_button( - "πŸ“ Markdown Format", - data=formats["Markdown"], - file_name="whisperforge_results.md", - mime="text/markdown" - ) + st.download_button( + "\U0001f4dd Markdown Format", + data=formats["Markdown"], + file_name="whisperforge_results.md", + mime="text/markdown", + ) with col3: - if "Text" in formats: - st.download_button( - "πŸ“„ Text Format", - data=formats["Text"], - file_name="whisperforge_results.txt", - mime="text/plain" - ) - - -def _create_json_download(results: Dict[str, Any]) -> str: - """Create JSON format download""" - import json - return json.dumps(results, indent=2, ensure_ascii=False) - - -def _create_markdown_download(results: Dict[str, Any]) -> str: - """Create Markdown format download""" - content = "# WhisperForge Content Generation Results\n\n" - - sections = { - "transcription": "## πŸ“ Audio Transcription\n\n", - "wisdom_extraction": "## πŸ’Ž Key Insights & Wisdom\n\n", - "outline_creation": "## πŸ“‹ Content Outline\n\n", - "article_creation": "## πŸ“° Full Article\n\n", - "social_content": "## πŸ“± Social Media Content\n\n", - "image_prompts": "## πŸ–ΌοΈ Image Generation Prompts\n\n" - } - - for key, header in sections.items(): - if key in results: - content += header + results[key] + "\n\n---\n\n" - - return content - - -def _create_text_download(results: Dict[str, Any]) -> str: - """Create plain text format download""" - content = "WHISPERFORGE CONTENT GENERATION RESULTS\n" - content += "=" * 50 + "\n\n" - - sections = { - "transcription": "AUDIO TRANSCRIPTION\n" + "-" * 20 + "\n\n", - "wisdom_extraction": "KEY INSIGHTS & WISDOM\n" + "-" * 20 + "\n\n", - "outline_creation": "CONTENT OUTLINE\n" + "-" * 15 + "\n\n", - "article_creation": "FULL ARTICLE\n" + "-" * 12 + "\n\n", - "social_content": "SOCIAL MEDIA CONTENT\n" + "-" * 20 + "\n\n", - "image_prompts": "IMAGE GENERATION PROMPTS\n" + "-" * 25 + "\n\n" - } - - for key, header in sections.items(): - if key in results: - content += header + results[key] + "\n\n" + "=" * 50 + "\n\n" - - return content - - -# Enhanced CSS for streaming results -STREAMING_RESULTS_CSS = """ - -""" - - -def show_enhanced_streaming_status(): - """PHASE 3: ENHANCED STREAMING UX OVERHAUL - 2025 st.status() integration WITH VISIBLE THINKING""" - controller = get_pipeline_controller() - - if not controller.is_active and not controller.is_complete: - return - - current_step = controller.current_step_index - pipeline_steps = [ - ("Upload Validation", "File format & compatibility check", "upload_validation"), - ("Audio Transcription", "Speech-to-text conversion", "transcription"), - ("Wisdom Extraction", "Key insights extraction", "wisdom_extraction"), - ("Research Enrichment", "Supporting links & context", "research_enrichment"), - ("Outline Generation", "Content structure creation", "outline_creation"), - ("Article Creation", "Full article generation", "article_creation"), - ("Social Media Posts", "Platform-optimized content", "social_content"), - ("Image Prompts", "Visual concept generation", "image_prompts"), - ("Database Storage", "Secure content storage", "database_storage") - ] - - results = controller.get_results() - errors = controller.get_errors() if hasattr(controller, 'get_errors') else {} - - # 🧠 VISIBLE THINKING INTEGRATION - Show AI thought bubbles during processing - if controller.is_active and st.session_state.get("thinking_enabled", True): - # Create dedicated container for thinking bubbles - thinking_container = st.container() - with thinking_container: - st.markdown(""" -
-
- 🧠 - AI Thinking Process -
-
-
- - - """, unsafe_allow_html=True) - - # Render the actual thinking stream - try: - render_thinking_stream(thinking_container) - except Exception as e: - st.info(f"πŸ’­ AI is thinking... (thinking system loading)") - - # Main processing status container with st.status() - if controller.is_active: - current_title, current_desc, current_key = pipeline_steps[current_step] - - with st.status(f"πŸ”„ {current_title}", expanded=True) as status: - st.write(f"πŸ“ **{current_desc}**") - - # Progress bar - progress = (current_step / len(pipeline_steps)) * 100 - st.progress(progress / 100, text=f"Progress: {progress:.0f}% ({current_step + 1}/{len(pipeline_steps)})") - - # Show previous completed steps with content preview - for i in range(current_step): - title, _, step_key = pipeline_steps[i] - if step_key in results: - st.write(f"βœ… {title} - Complete") - # Show brief preview of generated content - if step_key in results and results[step_key] and step_key not in [ - "upload_validation", "database_storage"]: - preview = str(results[step_key])[:100] + \ - "..." if len(str(results[step_key])) > 100 else str(results[step_key]) - st.caption(f"Preview: {preview}") - elif step_key in errors: - st.write(f"❌ {title} - Error: {errors[step_key]}") - else: - st.write(f"βœ… {title} - Complete") - - # Current step with enhanced styling - st.markdown(f""" -
- πŸ”„ {current_title} - {current_desc}... -
- """, unsafe_allow_html=True) - - # Show preview of remaining steps - for i in range(current_step + 1, len(pipeline_steps)): - title, _, _ = pipeline_steps[i] - st.write(f"β­• {title} - Pending") - - # Update status based on completion - if current_step >= len(pipeline_steps) - 1: - status.update(label="βœ… Processing Complete!", state="complete", expanded=False) - else: - status.update(label=f"πŸ”„ {current_title}", state="running") - - elif controller.is_complete: - # Completion status with beautiful summary - with st.status("βœ… All processing complete!", state="complete", expanded=False): - st.success("Your audio has been transformed into comprehensive content!") - - # Enhanced completion summary - st.markdown(""" -
-

🌟 Generation Summary

-
- - - """, unsafe_allow_html=True) - - col1, col2, col3 = st.columns(3) - - with col1: - completed_count = len([r for r in results.values() if r]) - st.metric("Steps Completed", completed_count, len(pipeline_steps)) - with col2: - error_count = len(errors) - st.metric("Errors", error_count, delta_color="inverse") - with col3: - success_rate = ((completed_count - error_count) / len(pipeline_steps)) * 100 - st.metric("Success Rate", f"{success_rate:.1f}%") - - # Show content type breakdown - if results: - st.markdown("**Generated Content Types:**") - content_types = [] - if results.get('transcription'): - content_types.append("πŸ“ Transcription") - if results.get('wisdom_extraction'): - content_types.append("πŸ’Ž Insights") - if results.get('research_enrichment'): - content_types.append("πŸ” Research") - if results.get('outline_creation'): - content_types.append("πŸ“‹ Outline") - if results.get('article_creation'): - content_types.append("πŸ“° Article") - if results.get('social_content'): - content_types.append("πŸ“± Social Posts") - if results.get('image_prompts'): - content_types.append("πŸ–ΌοΈ Image Prompts") - - if content_types: - st.write(" β€’ ".join(content_types)) - - -def show_processing_status(): - """Display ultra-modern Aurora pipeline with real-time visibility - WRAPPER""" - show_enhanced_streaming_status() # Use the new enhanced version + st.download_button( + "\U0001f4c4 Text Format", + data=formats["Text"], + file_name="whisperforge_results.txt", + mime="text/plain", + ) diff --git a/core/streaming_status.py b/core/streaming_status.py new file mode 100644 index 0000000..0f40c5a --- /dev/null +++ b/core/streaming_status.py @@ -0,0 +1,381 @@ +""" +Streaming Status Display +======================== + +Enhanced streaming UX with st.status() integration and visible thinking. +Includes the 2025 content display and pipeline status visualization. +""" + +import streamlit as st + +from .streaming_pipeline import get_pipeline_controller + + +def show_2025_content_display(): + """Ultra-modern 2025 Aurora content display for completed results.""" + from .streaming_results import show_streaming_content_card + + controller = get_pipeline_controller() + results = controller.get_results() + + if not results: + return + + st.markdown( + """ +
+
+
+

\u2728 Transformation Complete

+

Your audio has been transformed into structured, actionable content

+
+
+
+
+
+
+
+ + + """, + unsafe_allow_html=True, + ) + + content_map = { + "transcription": ("\U0001f399\ufe0f", "Audio Transcription", "Complete speech-to-text conversion"), + "wisdom_extraction": ("\U0001f48e", "Key Insights & Wisdom", "Extracted insights and actionable takeaways"), + "research_enrichment": ("\U0001f50d", "Research Enrichment", "Supporting links and contextual information"), + "outline_creation": ("\U0001f4cb", "Content Outline", "Structured organization and flow"), + "article_creation": ("\U0001f4f0", "Full Article", "Complete written content ready for publication"), + "social_content": ("\U0001f4f1", "Social Media Content", "Platform-optimized posts and captions"), + "image_prompts": ("\U0001f5bc\ufe0f", "Image Generation Prompts", "AI-generated visual concept descriptions"), + } + + for key, (icon, title, desc) in content_map.items(): + if key in results and results[key]: + show_streaming_content_card(icon, title, desc, results[key], is_live=False) + + +def show_enhanced_streaming_status(): + """Enhanced streaming UX with st.status() integration and visible thinking.""" + controller = get_pipeline_controller() + + if not controller.is_active and not controller.is_complete: + return + + current_step = controller.current_step_index + pipeline_steps = [ + ("Upload Validation", "File format & compatibility check", "upload_validation"), + ("Audio Transcription", "Speech-to-text conversion", "transcription"), + ("Wisdom Extraction", "Key insights extraction", "wisdom_extraction"), + ("Research Enrichment", "Supporting links & context", "research_enrichment"), + ("Outline Generation", "Content structure creation", "outline_creation"), + ("Article Creation", "Full article generation", "article_creation"), + ("Social Media Posts", "Platform-optimized content", "social_content"), + ("Image Prompts", "Visual concept generation", "image_prompts"), + ("Database Storage", "Secure content storage", "database_storage"), + ] + + results = controller.get_results() + errors = controller.get_errors() if hasattr(controller, "get_errors") else {} + + # Visible thinking integration + if controller.is_active and st.session_state.get("thinking_enabled", True): + thinking_container = st.container() + with thinking_container: + st.markdown( + """ +
+
+ \U0001f9e0 + AI Thinking Process +
+
+
+ + + """, + unsafe_allow_html=True, + ) + + try: + from .visible_thinking import render_thinking_stream + + render_thinking_stream(thinking_container) + except Exception: + st.info("\U0001f4ad AI is thinking... (thinking system loading)") + + # Main processing status + if controller.is_active: + current_title, current_desc, _current_key = pipeline_steps[current_step] + + with st.status(f"\U0001f504 {current_title}", expanded=True) as status: + st.write(f"\U0001f4dd **{current_desc}**") + + progress = (current_step / len(pipeline_steps)) * 100 + st.progress(progress / 100, text=f"Progress: {progress:.0f}% ({current_step + 1}/{len(pipeline_steps)})") + + for i in range(current_step): + title, _, step_key = pipeline_steps[i] + if step_key in results: + st.write(f"\u2705 {title} - Complete") + if ( + step_key in results + and results[step_key] + and step_key not in ["upload_validation", "database_storage"] + ): + result_str = str(results[step_key]) + preview = result_str[:100] + "..." if len(result_str) > 100 else result_str + st.caption(f"Preview: {preview}") + elif step_key in errors: + st.write(f"\u274c {title} - Error: {errors[step_key]}") + else: + st.write(f"\u2705 {title} - Complete") + + st.markdown( + f""" +
+ \U0001f504 {current_title} - {current_desc}... +
+ """, + unsafe_allow_html=True, + ) + + for i in range(current_step + 1, len(pipeline_steps)): + title, _, _ = pipeline_steps[i] + st.write(f"\u2b55 {title} - Pending") + + if current_step >= len(pipeline_steps) - 1: + status.update(label="\u2705 Processing Complete!", state="complete", expanded=False) + else: + status.update(label=f"\U0001f504 {current_title}", state="running") + + elif controller.is_complete: + with st.status("\u2705 All processing complete!", state="complete", expanded=False): + st.success("Your audio has been transformed into comprehensive content!") + + st.markdown( + """ +
+

\U0001f31f Generation Summary

+
+ + + """, + unsafe_allow_html=True, + ) + + col1, col2, col3 = st.columns(3) + + with col1: + completed_count = len([r for r in results.values() if r]) + st.metric("Steps Completed", completed_count, len(pipeline_steps)) + with col2: + error_count = len(errors) + st.metric("Errors", error_count, delta_color="inverse") + with col3: + success_rate = ((completed_count - error_count) / len(pipeline_steps)) * 100 + st.metric("Success Rate", f"{success_rate:.1f}%") + + if results: + st.markdown("**Generated Content Types:**") + content_types = [] + if results.get("transcription"): + content_types.append("\U0001f4dd Transcription") + if results.get("wisdom_extraction"): + content_types.append("\U0001f48e Insights") + if results.get("research_enrichment"): + content_types.append("\U0001f50d Research") + if results.get("outline_creation"): + content_types.append("\U0001f4cb Outline") + if results.get("article_creation"): + content_types.append("\U0001f4f0 Article") + if results.get("social_content"): + content_types.append("\U0001f4f1 Social Posts") + if results.get("image_prompts"): + content_types.append("\U0001f5bc\ufe0f Image Prompts") + + if content_types: + st.write(" \u2022 ".join(content_types)) + + +def show_processing_status(): + """Display Aurora pipeline with real-time visibility - wrapper.""" + show_enhanced_streaming_status() diff --git a/static/css/streaming.css b/static/css/streaming.css new file mode 100644 index 0000000..81a8157 --- /dev/null +++ b/static/css/streaming.css @@ -0,0 +1,157 @@ +/* Aurora Progress Animation */ +@keyframes aurora-flow { + 0%, 100% { left: -100%; opacity: 0; } + 25% { opacity: 1; } + 75% { opacity: 1; } + 100% { left: 100%; opacity: 0; } +} + +@keyframes aurora-pulse { + 0%, 100% { opacity: 1; transform: scale(1); } + 50% { opacity: 0.7; transform: scale(1.1); } +} + +@keyframes completion-glow { + 0%, 100% { left: -100%; opacity: 0; } + 20% { opacity: 1; } + 80% { opacity: 1; } + 100% { left: 100%; opacity: 0; } +} + +/* Enhanced Button Styling */ +.stButton > button { + background: linear-gradient(135deg, rgba(0, 255, 255, 0.1), rgba(64, 224, 208, 0.15)) !important; + border: 1px solid rgba(0, 255, 255, 0.2) !important; + color: rgba(255, 255, 255, 0.9) !important; + border-radius: 8px !important; + font-weight: 500 !important; + transition: all 0.3s ease !important; +} + +.stButton > button:hover { + background: linear-gradient(135deg, rgba(0, 255, 255, 0.15), rgba(64, 224, 208, 0.2)) !important; + border-color: rgba(0, 255, 255, 0.3) !important; + color: white !important; + transform: translateY(-1px); + box-shadow: 0 4px 15px rgba(0, 255, 255, 0.15); +} + +/* Expander Styling */ +.streamlit-expanderHeader { + background: rgba(0, 255, 255, 0.03) !important; + border: 1px solid rgba(0, 255, 255, 0.1) !important; + border-radius: 8px !important; +} + +.streamlit-expanderContent { + background: rgba(0, 255, 255, 0.02) !important; + border: 1px solid rgba(0, 255, 255, 0.1) !important; + border-top: none !important; + border-radius: 0 0 8px 8px !important; +} + +/* Aurora Streaming Results Styling */ +.aurora-content-card { + background: linear-gradient(135deg, rgba(0, 255, 255, 0.05), rgba(64, 224, 208, 0.08)); + backdrop-filter: blur(24px) saturate(180%); + border: 1px solid rgba(0, 255, 255, 0.15); + border-radius: 16px; + padding: 24px; + margin: 16px 0; + position: relative; + overflow: hidden; + transition: all 0.4s cubic-bezier(0.4, 0.0, 0.2, 1); +} + +.aurora-content-card::before { + content: ""; + position: absolute; + top: 0; + left: -100%; + width: 100%; + height: 2px; + background: linear-gradient(90deg, transparent, #00FFFF, #40E0D0, transparent); + animation: aurora-scan 6s ease-in-out infinite; +} + +.aurora-content-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 16px; + padding-bottom: 12px; + border-bottom: 1px solid rgba(0, 255, 255, 0.1); +} + +.aurora-content-title { + font-size: 1.1rem; + font-weight: 600; + color: rgba(255, 255, 255, 0.95); +} + +.aurora-status-badge { + padding: 4px 12px; + border-radius: 12px; + font-size: 0.75rem; + font-weight: 500; + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.aurora-status-badge.completed { + background: rgba(0, 255, 127, 0.15); + color: #00FF7F; + border: 1px solid rgba(0, 255, 127, 0.3); +} + +.aurora-content-body { + color: rgba(255, 255, 255, 0.85); + line-height: 1.6; + font-size: 0.95rem; +} + +.aurora-editor-section { + background: rgba(255, 255, 127, 0.05); + border: 1px solid rgba(255, 255, 127, 0.15); + border-radius: 12px; + padding: 16px; + margin: 16px 0; +} + +.aurora-editor-header { + display: flex; + justify-content: space-between; + align-items: center; +} + +.aurora-editor-title { + font-weight: 600; + color: rgba(255, 255, 255, 0.95); +} + +.aurora-editor-badge { + background: rgba(255, 255, 127, 0.15); + color: #FFFF7F; + padding: 4px 8px; + border-radius: 8px; + font-size: 0.7rem; + font-weight: 500; +} + +.aurora-critique-card { + background: rgba(255, 255, 255, 0.02); + border-radius: 8px; + padding: 16px; + margin: 12px 0; +} + +.aurora-critique-content { + color: rgba(255, 255, 255, 0.8); + line-height: 1.5; + font-size: 0.9rem; +} + +@keyframes aurora-scan { + 0%, 100% { left: -100%; } + 50% { left: 100%; } +} From df5c4038352a45ed63b4a2c1690311bd30dcfc89 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:12:10 -0800 Subject: [PATCH 26/46] refactor(config): extract magic numbers to named constants Replace hardcoded values with named constants: - MAX_TOKENS_DEFAULT, MAX_TOKENS_ARTICLE for AI generation limits - TRANSCRIPT_PREVIEW_LENGTH, ARTICLE_PREVIEW_LENGTH for truncation - Centralize audio_chunk_size_mb via Config dataclass streaming_pipeline.py and file_upload.py now read chunk/file size from core.config instead of local magic numbers. Also fixes pre-existing ruff issues (import sorting, deprecated typing.Dict, unused imports) in content_generation.py and streaming_pipeline.py. Co-Authored-By: Claude Opus 4.6 --- core/content_generation.py | 83 +++++++++++------------- core/file_upload.py | 4 +- core/streaming_pipeline.py | 127 ++++++++++++++++++------------------- 3 files changed, 103 insertions(+), 111 deletions(-) diff --git a/core/content_generation.py b/core/content_generation.py index 2cb0de1..4aeee4f 100644 --- a/core/content_generation.py +++ b/core/content_generation.py @@ -5,18 +5,24 @@ import logging import os -from typing import Dict, Optional -from .utils import get_openai_client, get_prompt, DEFAULT_PROMPTS, get_enhanced_prompt +from .utils import get_enhanced_prompt, get_openai_client logger = logging.getLogger(__name__) AI_MODEL = os.getenv("GPT_MODEL", "gpt-4o") WHISPER_MODEL = os.getenv("WHISPER_MODEL", "whisper-1") +# Token and truncation limits for AI generation calls +MAX_TOKENS_DEFAULT = 1500 +MAX_TOKENS_ARTICLE = 2000 +TRANSCRIPT_PREVIEW_LENGTH = 2000 +ARTICLE_PREVIEW_LENGTH = 1500 + class ContentGenerationError(Exception): """Raised when content generation fails""" + pass @@ -28,85 +34,76 @@ def _get_client(): return client -def generate_wisdom(transcript: str, custom_prompt: str = None, knowledge_base: Dict[str, str] = None) -> str: +def generate_wisdom(transcript: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None) -> str: """Extract key insights and wisdom from a transcript""" - system_prompt = custom_prompt or get_enhanced_prompt( - "wisdom_extraction", knowledge_base) + system_prompt = custom_prompt or get_enhanced_prompt("wisdom_extraction", knowledge_base) client = _get_client() response = client.chat.completions.create( model=AI_MODEL, messages=[ {"role": "system", "content": system_prompt}, - {"role": "user", "content": f"Here's the transcription to analyze:\n\n{transcript}"} + {"role": "user", "content": f"Here's the transcription to analyze:\n\n{transcript}"}, ], - max_tokens=1500 + max_tokens=MAX_TOKENS_DEFAULT, ) return response.choices[0].message.content -def generate_outline(transcript: str, wisdom: str, custom_prompt: str = None, - knowledge_base: Dict[str, str] = None) -> str: +def generate_outline( + transcript: str, wisdom: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None +) -> str: """Create a structured outline based on transcript and wisdom""" - system_prompt = custom_prompt or get_enhanced_prompt( - "outline_creation", knowledge_base) + system_prompt = custom_prompt or get_enhanced_prompt("outline_creation", knowledge_base) content = f"TRANSCRIPT:\n{transcript}\n\nWISDOM:\n{wisdom}" client = _get_client() response = client.chat.completions.create( model=AI_MODEL, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": content} - ], - max_tokens=1500 + messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": content}], + max_tokens=MAX_TOKENS_DEFAULT, ) return response.choices[0].message.content -def generate_article(transcript: str, wisdom: str, outline: str, custom_prompt: str = None, - knowledge_base: Dict[str, str] = None) -> str: +def generate_article( + transcript: str, wisdom: str, outline: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None +) -> str: """Generate a comprehensive article based on transcript, wisdom, and outline""" - system_prompt = custom_prompt or get_enhanced_prompt( - "article_writing", knowledge_base) + system_prompt = custom_prompt or get_enhanced_prompt("article_writing", knowledge_base) - transcript_excerpt = transcript[:2000] if len( - transcript) > 2000 else transcript + transcript_excerpt = ( + transcript[:TRANSCRIPT_PREVIEW_LENGTH] if len(transcript) > TRANSCRIPT_PREVIEW_LENGTH else transcript + ) content = f"TRANSCRIPT:\n{transcript_excerpt}\n\nWISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}" client = _get_client() response = client.chat.completions.create( model=AI_MODEL, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": content} - ], - max_tokens=2000 + messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": content}], + max_tokens=MAX_TOKENS_ARTICLE, ) return response.choices[0].message.content -def generate_social_content(wisdom: str, outline: str, article: str, - custom_prompt: str = None, knowledge_base: Dict[str, str] = None) -> str: +def generate_social_content( + wisdom: str, outline: str, article: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None +) -> str: """Generate 5 distinct social media posts""" - system_prompt = custom_prompt or get_enhanced_prompt( - "social_media", knowledge_base) + system_prompt = custom_prompt or get_enhanced_prompt("social_media", knowledge_base) - article_excerpt = article[:1500] if len(article) > 1500 else article + article_excerpt = article[:ARTICLE_PREVIEW_LENGTH] if len(article) > ARTICLE_PREVIEW_LENGTH else article content = f"WISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}\n\nARTICLE:\n{article_excerpt}" client = _get_client() response = client.chat.completions.create( model=AI_MODEL, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": content} - ], - max_tokens=1500 + messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": content}], + max_tokens=MAX_TOKENS_DEFAULT, ) return response.choices[0].message.content @@ -117,16 +114,10 @@ def transcribe_audio(audio_file) -> str: client = _get_client() if isinstance(audio_file, str): - with open(audio_file, 'rb') as f: - response = client.audio.transcriptions.create( - model=WHISPER_MODEL, - file=f - ) + with open(audio_file, "rb") as f: + response = client.audio.transcriptions.create(model=WHISPER_MODEL, file=f) else: audio_file.seek(0) - response = client.audio.transcriptions.create( - model=WHISPER_MODEL, - file=audio_file - ) + response = client.audio.transcriptions.create(model=WHISPER_MODEL, file=audio_file) return response.text diff --git a/core/file_upload.py b/core/file_upload.py index 8c6eeda..1844b1d 100644 --- a/core/file_upload.py +++ b/core/file_upload.py @@ -45,8 +45,10 @@ def __init__(self): "video": [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm"], "text": [".txt", ".md", ".pdf", ".docx"], } + from .config import get_config + self.max_file_size = 2 * 1024 * 1024 * 1024 # 2GB - self.chunk_size_mb = 20 # 20MB chunks for pydub processing + self.chunk_size_mb = get_config().audio_chunk_size_mb self.max_parallel_chunks = 3 def create_upload_zone(self): diff --git a/core/streaming_pipeline.py b/core/streaming_pipeline.py index f2779e2..df09144 100644 --- a/core/streaming_pipeline.py +++ b/core/streaming_pipeline.py @@ -3,24 +3,32 @@ Enables real-time progress updates and content streaming during processing """ -import streamlit as st import time -from typing import Dict, Optional, Any -from datetime import datetime +from typing import Any + +import streamlit as st + from .content_generation import ( - transcribe_audio, generate_wisdom, generate_outline, generate_article, - generate_social_content + generate_article, + generate_outline, + generate_social_content, + generate_wisdom, + transcribe_audio, ) -from .visible_thinking import thinking_step_start, thinking_step_complete, thinking_error, render_thinking_stream +from .visible_thinking import thinking_error, thinking_step_complete, thinking_step_start class StreamingPipelineController: """Controls step-by-step pipeline execution with real-time UI updates""" PIPELINE_STEPS = [ - "upload_validation", "transcription", "wisdom_extraction", - "outline_creation", "article_creation", - "social_content", "database_storage" + "upload_validation", + "transcription", + "wisdom_extraction", + "outline_creation", + "article_creation", + "social_content", + "database_storage", ] def __init__(self): @@ -45,12 +53,12 @@ def start_pipeline(self, audio_file): "name": audio_file.name, "size": len(audio_file.getvalue()), "size_mb": file_size_mb, - "is_large_file": file_size_mb > 20 + "is_large_file": file_size_mb > 20, } - if not hasattr(st.session_state, 'prompts'): + if not hasattr(st.session_state, "prompts"): st.session_state.prompts = {} - if not hasattr(st.session_state, 'knowledge_base'): + if not hasattr(st.session_state, "knowledge_base"): st.session_state.knowledge_base = {} def process_next_step(self): @@ -112,22 +120,19 @@ def _execute_step(self, step_id: str, step_index: int) -> Any: thinking_error(step_id, str(e)) raise - def _step_upload_validation(self) -> Dict[str, Any]: + def _step_upload_validation(self) -> dict[str, Any]: """Step 1: Validate uploaded file""" file_info = st.session_state.pipeline_file_info - MAX_FILE_SIZE_MB = 25 - if file_info["size_mb"] > MAX_FILE_SIZE_MB: - raise ValueError( - f"File too large: {file_info['size_mb']:.1f}MB (max {MAX_FILE_SIZE_MB}MB)") + from .config import get_config + + max_file_size_mb = get_config().audio_chunk_size_mb + if file_info["size_mb"] > max_file_size_mb: + raise ValueError(f"File too large: {file_info['size_mb']:.1f}MB (max {max_file_size_mb}MB)") time.sleep(0.5) - return { - "status": "validated", - "file_name": file_info["name"], - "file_size_mb": file_info["size_mb"] - } + return {"status": "validated", "file_name": file_info["name"], "file_size_mb": file_info["size_mb"]} def _step_transcription(self) -> str: """Step 2: Transcribe audio with large file support""" @@ -168,8 +173,8 @@ def _transcribe_large_file(self, audio_file) -> str: st.success(f""" Large File Transcription Complete! - - Chunks processed: {result.get('chunks', 'N/A')} - - Processing time: {result.get('processing_time', 'N/A')} + - Chunks processed: {result.get("chunks", "N/A")} + - Processing time: {result.get("processing_time", "N/A")} - Transcript length: {len(transcript)} characters """) @@ -180,13 +185,12 @@ def _step_wisdom_extraction(self) -> str: """Step 3: Extract wisdom""" transcript = st.session_state.pipeline_transcript - custom_prompt = st.session_state.prompts.get( - "wisdom_extraction") if hasattr(st.session_state, 'prompts') else None + custom_prompt = ( + st.session_state.prompts.get("wisdom_extraction") if hasattr(st.session_state, "prompts") else None + ) wisdom = generate_wisdom( - transcript, - custom_prompt=custom_prompt, - knowledge_base=st.session_state.knowledge_base + transcript, custom_prompt=custom_prompt, knowledge_base=st.session_state.knowledge_base ) st.session_state.pipeline_wisdom = wisdom @@ -197,14 +201,12 @@ def _step_outline_creation(self) -> str: transcript = st.session_state.pipeline_transcript wisdom = st.session_state.pipeline_wisdom - custom_prompt = st.session_state.prompts.get( - "outline_creation") if hasattr(st.session_state, 'prompts') else None + custom_prompt = ( + st.session_state.prompts.get("outline_creation") if hasattr(st.session_state, "prompts") else None + ) outline = generate_outline( - transcript, - wisdom, - custom_prompt=custom_prompt, - knowledge_base=st.session_state.knowledge_base + transcript, wisdom, custom_prompt=custom_prompt, knowledge_base=st.session_state.knowledge_base ) st.session_state.pipeline_outline = outline @@ -216,15 +218,12 @@ def _step_article_creation(self) -> str: wisdom = st.session_state.pipeline_wisdom outline = st.session_state.pipeline_outline - custom_prompt = st.session_state.prompts.get( - "article_creation") if hasattr(st.session_state, 'prompts') else None + custom_prompt = ( + st.session_state.prompts.get("article_creation") if hasattr(st.session_state, "prompts") else None + ) article = generate_article( - transcript, - wisdom, - outline, - custom_prompt=custom_prompt, - knowledge_base=st.session_state.knowledge_base + transcript, wisdom, outline, custom_prompt=custom_prompt, knowledge_base=st.session_state.knowledge_base ) st.session_state.pipeline_article = article @@ -236,15 +235,10 @@ def _step_social_content(self) -> str: outline = st.session_state.pipeline_outline article = st.session_state.pipeline_article - custom_prompt = st.session_state.prompts.get( - "social_media") if hasattr(st.session_state, 'prompts') else None + custom_prompt = st.session_state.prompts.get("social_media") if hasattr(st.session_state, "prompts") else None social = generate_social_content( - wisdom, - outline, - article, - custom_prompt=custom_prompt, - knowledge_base=st.session_state.knowledge_base + wisdom, outline, article, custom_prompt=custom_prompt, knowledge_base=st.session_state.knowledge_base ) st.session_state.pipeline_social = social @@ -261,16 +255,22 @@ def _step_database_storage(self) -> str: results = st.session_state.pipeline_results - result = db.client.table("content").insert({ - "user_id": st.session_state.user_id, - "title": f"Content from {st.session_state.pipeline_file_info['name']}", - "transcript": results.get("transcription", ""), - "wisdom": results.get("wisdom_extraction", ""), - "outline": results.get("outline_creation", ""), - "article": results.get("article_creation", ""), - "social_content": results.get("social_content", ""), - "created_at": "now()" - }).execute() + result = ( + db.client.table("content") + .insert( + { + "user_id": st.session_state.user_id, + "title": f"Content from {st.session_state.pipeline_file_info['name']}", + "transcript": results.get("transcription", ""), + "wisdom": results.get("wisdom_extraction", ""), + "outline": results.get("outline_creation", ""), + "article": results.get("article_creation", ""), + "social_content": results.get("social_content", ""), + "created_at": "now()", + } + ) + .execute() + ) content_id = result.data[0]["id"] if result.data else "" if not content_id: @@ -290,8 +290,7 @@ def is_active(self) -> bool: @property def is_complete(self) -> bool: """Check if pipeline has completed""" - return (not self.is_active and - st.session_state.get("pipeline_step_index", 0) >= len(self.PIPELINE_STEPS)) + return not self.is_active and st.session_state.get("pipeline_step_index", 0) >= len(self.PIPELINE_STEPS) @property def current_step_index(self) -> int: @@ -303,17 +302,17 @@ def progress_percentage(self) -> float: """Get overall progress percentage""" return (self.current_step_index / len(self.PIPELINE_STEPS)) * 100 - def get_results(self) -> Dict[str, Any]: + def get_results(self) -> dict[str, Any]: """Get all pipeline results""" return st.session_state.get("pipeline_results", {}) - def get_errors(self) -> Dict[str, str]: + def get_errors(self) -> dict[str, str]: """Get any pipeline errors""" return st.session_state.get("pipeline_errors", {}) def get_pipeline_controller() -> StreamingPipelineController: """Get or create the global pipeline controller""" - if 'pipeline_controller' not in st.session_state: + if "pipeline_controller" not in st.session_state: st.session_state.pipeline_controller = StreamingPipelineController() return st.session_state.pipeline_controller From 549636105abdeb6470fc43e778e6170f5219347c Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:14:54 -0800 Subject: [PATCH 27/46] fix(utils): stop swallowing unexpected errors in auth and client init verify_password now catches only ValueError (malformed hash) instead of all exceptions, so DB errors propagate instead of looking like wrong passwords. get_openai_client and get_anthropic_client only return None for missing keys or ImportError; other failures now surface to callers. Co-Authored-By: Claude Opus 4.6 --- core/utils.py | 52 ++++++++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/core/utils.py b/core/utils.py index ad3dcd4..3929d55 100644 --- a/core/utils.py +++ b/core/utils.py @@ -50,11 +50,17 @@ def hash_password(password: str) -> str: def verify_password(password: str, hashed: str) -> bool: - """Verify a password against its hash""" + """Verify a password against its hash. + + Raises on unexpected errors (e.g. DB corruption) instead of + silently returning False which would be indistinguishable from + a wrong password. + """ try: return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8")) - except Exception as e: - logger.error(f"Password verification error: {e}") + except ValueError as e: + # Malformed hash string (wrong prefix, bad encoding, etc.) + logger.error("Password verification failed – malformed hash: %s", e) return False @@ -143,42 +149,42 @@ def get_enhanced_prompt(prompt_type: str, knowledge_base: dict[str, str] = None, def get_openai_client(): - """Get OpenAI client with API key""" + """Get OpenAI client with API key. + + Returns None only when the key is missing or the package isn't installed. + Other errors (e.g. network, auth) propagate so callers can react. + """ try: import openai - - api_key = os.getenv("OPENAI_API_KEY") - if not api_key: - return None - - client = openai.OpenAI(api_key=api_key) - return client except ImportError: logger.error("OpenAI package not installed") return None - except Exception as e: - logger.error(f"Error initializing OpenAI client: {e}") + + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: return None + return openai.OpenAI(api_key=api_key) + def get_anthropic_client(): - """Get Anthropic client with API key""" + """Get Anthropic client with API key. + + Returns None only when the key is missing or the package isn't installed. + Other errors propagate so callers can react. + """ try: import anthropic - - api_key = os.getenv("ANTHROPIC_API_KEY") - if not api_key: - return None - - client = anthropic.Anthropic(api_key=api_key) - return client except ImportError: logger.error("Anthropic package not installed") return None - except Exception as e: - logger.error(f"Error initializing Anthropic client: {e}") + + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: return None + return anthropic.Anthropic(api_key=api_key) + def get_grok_api_key(): """Get Grok API key""" From b3a7f892e06a8405bdf83180cc5b3f0399cb4f3d Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:17:48 -0800 Subject: [PATCH 28/46] fix(launcher): replace os.execv with cross-platform subprocess.run os.execv is Unix-only and crashes on Windows. subprocess.run works on all platforms with negligible overhead for a launcher process. Co-Authored-By: Claude Opus 4.6 --- main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index c6d01c1..96ece53 100644 --- a/main.py +++ b/main.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 """WhisperForge launcher - sets up the environment if needed and starts the app.""" +import os import subprocess import sys -import os PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) VENV_DIR = os.path.join(PROJECT_DIR, ".venv") @@ -46,7 +46,7 @@ def start_app(extra_args): print("[whisperforge] Starting WhisperForge...") print(f"[whisperforge] Command: {' '.join(cmd)}") print("-" * 50) - os.execv(STREAMLIT, cmd) + sys.exit(subprocess.run(cmd).returncode) def main(): @@ -84,7 +84,7 @@ def main(): st_args = [] if "--" in args: idx = args.index("--") - st_args = args[idx + 1:] + st_args = args[idx + 1 :] # Ensure environment is ready if not venv_ready(): From 36306ae07d2423981cca8e8e9bdf17b06b557ddb Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:19:56 -0800 Subject: [PATCH 29/46] refactor(scripts): remove platform-specific shell scripts Delete start_app.sh, setup_test_env.sh, and cleanup_repo.sh. Their functionality is already covered by the cross-platform Python launchers (main.py and setup.py). Update CONTRIBUTING.md to point contributors at `python main.py --setup --dev` instead of the removed script. Co-Authored-By: Claude Opus 4.6 --- CONTRIBUTING.md | 14 ++++------- scripts/cleanup_repo.sh | 24 ------------------- scripts/setup_test_env.sh | 18 -------------- start_app.sh | 50 --------------------------------------- 4 files changed, 5 insertions(+), 101 deletions(-) delete mode 100755 scripts/cleanup_repo.sh delete mode 100755 scripts/setup_test_env.sh delete mode 100755 start_app.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 15d88bc..db4ec87 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,17 +5,13 @@ Thank you for considering contributing to WhisperForge! ## Getting Started 1. Fork the repository and clone your fork. -2. Create a virtual environment: +2. Set up the development environment: ```bash - python -m venv venv - source venv/bin/activate + python main.py --setup --dev ``` -3. Install dependencies required for development and testing: - ```bash - pip install -r requirements.txt - ``` - You can also run `scripts/setup_test_env.sh` which performs these steps for you. -4. Run the test suite to ensure everything works: + This creates a `.venv`, installs all dependencies (including dev/test), and + configures pre-commit hooks. +3. Run the test suite to ensure everything works: ```bash pytest ``` diff --git a/scripts/cleanup_repo.sh b/scripts/cleanup_repo.sh deleted file mode 100755 index bb467d9..0000000 --- a/scripts/cleanup_repo.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -# Repository cleanup script -# - Prunes stale remote tracking branches -# - Deletes local branches already merged into main -# - Removes ignored files like caches -set -euo pipefail - -git fetch --all --prune - -for branch in $(git branch --merged | grep -v "^*" | grep -v main); do - git branch -d "$branch" -done - -echo "The following ignored files will be removed:" -git clean -ndX -read -rp "Proceed with cleanup? [y/N] " confirm -if [[ ! "$confirm" =~ ^[Yy]$ ]]; then - echo "Cleanup cancelled." - exit 0 -fi - -git clean -fdX - -echo "Cleanup complete." diff --git a/scripts/setup_test_env.sh b/scripts/setup_test_env.sh deleted file mode 100755 index a0938f3..0000000 --- a/scripts/setup_test_env.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -# WhisperForge Test Environment Setup Script -# This script creates a Python virtual environment and installs -# the required dependencies for running tests. - -set -e - -if [ ! -f requirements.txt ]; then - echo "Run this script from the project root where requirements.txt is located." >&2 - exit 1 -fi - -python -m venv venv -# shellcheck disable=SC1091 -source venv/bin/activate -pip install -r requirements.txt - -echo "βœ… Test environment ready. Activate it with 'source venv/bin/activate'" diff --git a/start_app.sh b/start_app.sh deleted file mode 100755 index 13e9276..0000000 --- a/start_app.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# WhisperForge Startup Script -echo "Starting WhisperForge..." - -# Auto-load environment variables from .env if present -if [ -f .env ]; then - set -a - # shellcheck disable=SC1091 - source .env - set +a -fi - -# Check for required environment variables -if [ -z "${SUPABASE_URL:-}" ] || [ -z "${SUPABASE_ANON_KEY:-}" ]; then - echo "Missing required environment variables:" - echo " Please set SUPABASE_URL and SUPABASE_ANON_KEY before running this script." - echo " Example:" - echo " export SUPABASE_URL=''" - echo " export SUPABASE_ANON_KEY=''" - exit 1 -fi -ENVIRONMENT="${1:-${ENVIRONMENT:-development}}" -export ENVIRONMENT - -if [ "$ENVIRONMENT" = "production" ]; then - export DEBUG="${DEBUG:-false}" - export LOG_LEVEL="${LOG_LEVEL:-INFO}" -else - export DEBUG="${DEBUG:-true}" - export LOG_LEVEL="${LOG_LEVEL:-DEBUG}" -fi - -echo "Running in $ENVIRONMENT mode" - -echo "Environment variables set" -echo "Supabase URL: [set]" -echo "Supabase Key: [set]" - -# Test Supabase connection -echo "Testing Supabase connection..." -python -c "from core.supabase_integration import get_supabase_client; client = get_supabase_client(); print('Supabase connection successful!' if client.test_connection() else 'Connection failed')" - -# Start Streamlit app with correct file (app_simple.py is the main app) -echo "Starting WhisperForge on http://localhost:8501" -echo "Press Ctrl+C to stop the app" -echo "" - -streamlit run app_simple.py --server.port 8501 --server.address 127.0.0.1 From ef7704112f116dffab20830e98f99c44c2e126bf Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:20:21 -0800 Subject: [PATCH 30/46] docs(readme): add FFmpeg install instructions for all platforms Add a table with platform-specific FFmpeg install commands for Ubuntu/Debian, macOS, and Windows 11 to the prerequisites section. Co-Authored-By: Claude Opus 4.6 --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ad55b2d..f197616 100644 --- a/README.md +++ b/README.md @@ -22,10 +22,18 @@ WhisperForge is a Streamlit application that converts audio files into comprehen ### Prerequisites - Python 3.11+ -- FFmpeg (required for files >100MB) +- FFmpeg (required for audio processing and files >100MB) - Supabase account (for data storage) - OpenAI API key (for AI processing) +#### Installing FFmpeg + +| Platform | Command | +|----------|---------| +| Ubuntu/Debian | `sudo apt install ffmpeg` | +| macOS | `brew install ffmpeg` | +| Windows 11 | `winget install ffmpeg` | + ### Installation #### Automated setup (recommended) From 6fa048a99a95ec6d66d752d186b0fbed28414ef6 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:27:06 -0800 Subject: [PATCH 31/46] fix(ui): restore upload method cards and styled drop zone on Transform tab MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The C1 decomposition removed the HTML for the two side-by-side upload method cards (Standard/Large) and the styled file drop zone. The CSS already existed in main.css β€” this restores the matching HTML markup. Co-Authored-By: Claude Opus 4.6 --- app_simple.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/app_simple.py b/app_simple.py index 46ab95e..6bdba3d 100755 --- a/app_simple.py +++ b/app_simple.py @@ -132,6 +132,45 @@ def show_transform_page(): if "upload_method" not in st.session_state: st.session_state.upload_method = "Standard Upload" + # Upload method cards + sel_std = "selected" if st.session_state.upload_method == "Standard Upload" else "" + sel_lg = "selected" if st.session_state.upload_method == "Large File Upload" else "" + st.markdown( + f""" +
+
+ \u26a1 +

Standard Upload

+

+ Perfect for most audio files up to 25MB with instant processing

+
+
+ \u26a1 Instant processing
+
+ \U0001f3b5 Audio preview
+
+ \U0001f4be Up to 25MB
+
+
+
+ \U0001f680 +

Large File Upload

+

+ Advanced processing for large files up to 2GB with intelligent chunking

+
+
+ \U0001f527 FFmpeg chunking
+
+ \u26a1 Parallel processing
+
+ \U0001f4be Up to 2GB
+
+
+
+ """, + unsafe_allow_html=True, + ) + upload_method = st.radio( "Choose upload method:", ["Standard Upload", "Large File Upload"], @@ -152,6 +191,27 @@ def show_transform_page(): def _show_standard_upload(): """Handle standard file upload (up to 25MB).""" + st.markdown( + """ +
+
+ \U0001f3b5 +
+
+

Drop your audio file here

+

Or click to browse and select a file

+
+ MP3 + WAV + M4A + FLAC + OGG +
+
+ """, + unsafe_allow_html=True, + ) + uploaded_files = st.file_uploader( "Upload your audio file", type=["mp3", "wav", "m4a", "flac", "ogg"], From 67495c43e5d65646298af37900f4b0b2f5f98e20 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:40:19 -0800 Subject: [PATCH 32/46] chore: remove obsolete planning and tracking documents Co-Authored-By: Claude Opus 4.6 --- CLEANUP_SUCCESS_SUMMARY.md | 182 -------------------- ESSENTIAL_MODULES_ONLY.md | 152 ----------------- REPO_CLEANUP_TASKS.md | 35 ---- WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md | 185 --------------------- 4 files changed, 554 deletions(-) delete mode 100644 CLEANUP_SUCCESS_SUMMARY.md delete mode 100644 ESSENTIAL_MODULES_ONLY.md delete mode 100644 REPO_CLEANUP_TASKS.md delete mode 100644 WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md diff --git a/CLEANUP_SUCCESS_SUMMARY.md b/CLEANUP_SUCCESS_SUMMARY.md deleted file mode 100644 index 4cbe914..0000000 --- a/CLEANUP_SUCCESS_SUMMARY.md +++ /dev/null @@ -1,182 +0,0 @@ -# πŸŽ‰ WhisperForge Cleanup & Fix SUCCESS! - -## βœ… **MISSION ACCOMPLISHED** - -Your WhisperForge app is now **CLEAN, WORKING, and READY FOR PRODUCTION!** - -**App Status**: βœ… **RUNNING** on http://localhost:8501 - ---- - -## 🧹 **What We Cleaned Up** - -### **Archived Bloat Modules** (126KB removed) -``` -βœ… Moved to archived_old_version/bloat_modules/: -- monitoring.py (11KB) - Over-engineered monitoring -- streamlit_monitoring.py (8KB) - More monitoring bloat -- metrics_exporter.py (11KB) - Prometheus metrics -- health_check.py (18KB) - Complex health checking -- session_manager.py (18KB) - Over-complex sessions -- visible_thinking.py (16KB) - AI thinking bubbles -- research_enrichment.py (12KB) - Research links -- ui_components.py (14KB) - Extra UI components -- integrations.py (14KB) - Third-party integrations -- preferences.py (4KB) - User preferences -``` - -### **Archived Documentation Bloat** (7 files) -``` -βœ… Moved to archived_old_version/old_docs/: -- WHISPERFORGE_AUDIT_2025.md -- CLEAN_SETUP.md -- DEVELOPMENT_GUIDE.md -- PRODUCTION_MONITORING_IMPLEMENTATION.md -- SESSION_REFACTOR_IMPLEMENTATION.md -- SPRINT_0.3_COMPLETION_REPORT.md -- WORK_TESTING_CHECKLIST.md -``` - -### **Fixed Broken Test Files** -``` -βœ… Moved to archived_old_version/broken_tests/: -- All test_*.py files that were causing confusion -``` - ---- - -## πŸ”§ **What We Fixed** - -### **1. Import Issues** βœ… -- Fixed all broken imports in `app.py` -- Added simple replacements for archived functions -- Replaced complex session manager with simple Streamlit session state - -### **2. API Keys** βœ… -- Added your real OpenAI API key to `.env` -- App can now perform transcription and content generation - -### **3. Core Architecture** βœ… -- Streamlined to 11 essential modules (162KB) -- All core imports working perfectly -- Supabase connection verified - ---- - -## 🎯 **Current Clean Architecture** - -### **Essential Core Modules** (11 files, ~170KB) -``` -core/ -β”œβ”€β”€ supabase_integration.py # Database + OAuth βœ… -β”œβ”€β”€ content_generation.py # AI transcription & generation βœ… -β”œβ”€β”€ streaming_pipeline.py # Your streaming workflow βœ… -β”œβ”€β”€ streaming_results.py # Real-time UI updates βœ… -β”œβ”€β”€ auth_wrapper.py # Supabase auth (fixed) βœ… -β”œβ”€β”€ styling.py # Aurora theme βœ… -β”œβ”€β”€ file_upload.py # Audio uploads βœ… -β”œβ”€β”€ notifications.py # User messages βœ… -β”œβ”€β”€ utils.py # Basic utilities βœ… -β”œβ”€β”€ logging_config.py # Structured logging βœ… -└── config.py # Configuration βœ… -``` - -### **Clean Documentation** (3 files) -``` -β”œβ”€β”€ README.md # Main documentation -β”œβ”€β”€ CHANGELOG.md # Version history -└── ESSENTIAL_MODULES_ONLY.md # Architecture guide -``` - ---- - -## πŸš€ **Your Core Features - ALL WORKING** - -### βœ… **OAuth via Supabase** -- Simple session management with Streamlit session state -- User registration and login working -- Database integration verified - -### βœ… **Transcription & Pipeline Streaming** -- OpenAI Whisper integration ready -- Real-time streaming pipeline implemented -- Aurora UI for beautiful progress display - -### βœ… **Save Content to Supabase** -- Database storage functions working -- Content history tracking ready -- User-specific content isolation - -### βœ… **Display on History Page** -- Content history page implemented -- Aurora-styled content cards -- Copy-to-clipboard functionality - -### βœ… **Custom Prompts & Knowledge Base** -- Prompt customization system ready -- Knowledge base file upload working -- User-specific storage in database - ---- - -## 🎯 **Next Steps (Ready for Production)** - -### **1. Test Core Functionality** (15 minutes) -```bash -# App is already running on http://localhost:8501 -# Test these features: -1. βœ… OAuth login via Supabase -2. βœ… Upload audio file -3. βœ… Watch transcription & content generation -4. βœ… Check content appears in history -5. βœ… Customize prompts in settings -6. βœ… Upload knowledge base files -``` - -### **2. Deploy to Render.com** (15 minutes) -```bash -# Your app is now ready for deployment: -1. βœ… All dependencies in requirements.txt -2. βœ… Environment variables configured -3. βœ… No broken imports or missing modules -4. βœ… Database connection working -5. βœ… API keys configured -``` - -### **3. Optional Enhancements** -- Add Anthropic API key for Claude support -- Re-enable archived features if needed later -- Add more AI providers (Groq, etc.) - ---- - -## πŸ“Š **Before vs After** - -### **Before Cleanup** -- ❌ 23 core modules (300KB+ of complexity) -- ❌ 10+ markdown files cluttering root -- ❌ Broken imports and hanging processes -- ❌ Missing API keys -- ❌ Confusing test files everywhere -- ❌ Over-engineered monitoring systems - -### **After Cleanup** -- βœ… 11 essential modules (170KB focused code) -- βœ… 3 clean documentation files -- βœ… All imports working perfectly -- βœ… Real API keys configured -- βœ… Clean file organization -- βœ… Simple, reliable architecture - ---- - -## πŸŽ‰ **RESULT** - -**WhisperForge is now a clean, focused, production-ready AI content generation platform!** - -- **Codebase**: 85% smaller and 100% more maintainable -- **Functionality**: All core features working perfectly -- **Architecture**: Simple, reliable, and scalable -- **Deployment**: Ready for Render.com production - -**Time to working app: ACHIEVED! πŸš€** diff --git a/ESSENTIAL_MODULES_ONLY.md b/ESSENTIAL_MODULES_ONLY.md deleted file mode 100644 index aaed98d..0000000 --- a/ESSENTIAL_MODULES_ONLY.md +++ /dev/null @@ -1,152 +0,0 @@ -# 🎯 WhisperForge ESSENTIAL MODULES ONLY - -## Your Core Requirements: -1. **OAuth via Supabase** βœ… -2. **Transcription & Pipeline Streaming** βœ… -3. **Save content to Supabase** βœ… -4. **Display on user history page** βœ… -5. **Customize prompts & knowledge base** βœ… - ---- - -## πŸ”₯ **ESSENTIAL CORE MODULES** (Keep These): - -### **Tier 1: Absolutely Critical** -``` -βœ… supabase_integration.py (16KB) - Database & OAuth -βœ… content_generation.py (18KB) - Transcription & AI generation -βœ… streaming_pipeline.py (20KB) - Your streaming pipeline -βœ… auth_wrapper.py (13KB) - Supabase OAuth integration -βœ… styling.py (18KB) - Aurora UI (you love this!) -βœ… utils.py (6KB) - Basic utilities & prompts -``` - -### **Tier 2: Important for UX** -``` -βœ… streaming_results.py (34KB) - Real-time content display -βœ… file_upload.py (25KB) - Audio file handling -βœ… notifications.py (12KB) - User feedback messages -``` - -**Total Essential: 9 modules, ~162KB** - ---- - -## πŸ—‘οΈ **PROBABLY UNNECESSARY** (Archive These): - -### **Over-Engineering & Monitoring** -``` -❌ monitoring.py (11KB) - Complex monitoring system -❌ streamlit_monitoring.py (8KB) - More monitoring -❌ metrics_exporter.py (11KB) - Prometheus metrics -❌ health_check.py (18KB) - Health checking system -❌ session_manager.py (18KB) - Complex session management -``` - -### **Nice-to-Have Features** -``` -❌ visible_thinking.py (16KB) - AI thinking bubbles -❌ research_enrichment.py (12KB) - Research links -❌ ui_components.py (14KB) - Extra UI components -❌ integrations.py (14KB) - Third-party integrations -❌ preferences.py (4KB) - User preferences -``` - -### **Logging & Config** -``` -⚠️ logging_config.py (8KB) - Keep if you want structured logging -⚠️ config.py (4KB) - Keep for configuration management -``` - -**Bloat to Remove: ~126KB of unnecessary complexity** - ---- - -## πŸ“„ **MARKDOWN FILE CLEANUP** - -### **Keep These:** -``` -βœ… README.md - Main documentation -βœ… CHANGELOG.md - Version history -βœ… ESSENTIAL_MODULES_ONLY.md - This file -``` - -### **Archive These:** -``` -❌ WHISPERFORGE_AUDIT_2025.md -❌ CLEAN_SETUP.md -❌ DEVELOPMENT_GUIDE.md -❌ PRODUCTION_MONITORING_IMPLEMENTATION.md -❌ SESSION_REFACTOR_IMPLEMENTATION.md -❌ SPRINT_0.3_COMPLETION_REPORT.md -❌ WORK_TESTING_CHECKLIST.md -``` - ---- - -## 🧹 **CLEANUP COMMANDS** - -### **Step 1: Archive Unnecessary Core Modules** -```bash -mkdir -p archived_old_version/bloat_modules -mv core/monitoring.py archived_old_version/bloat_modules/ -mv core/streamlit_monitoring.py archived_old_version/bloat_modules/ -mv core/metrics_exporter.py archived_old_version/bloat_modules/ -mv core/health_check.py archived_old_version/bloat_modules/ -mv core/session_manager.py archived_old_version/bloat_modules/ -mv core/visible_thinking.py archived_old_version/bloat_modules/ -mv core/research_enrichment.py archived_old_version/bloat_modules/ -mv core/ui_components.py archived_old_version/bloat_modules/ -mv core/integrations.py archived_old_version/bloat_modules/ -mv core/preferences.py archived_old_version/bloat_modules/ -``` - -### **Step 2: Archive Documentation Bloat** -```bash -mkdir -p archived_old_version/old_docs -mv WHISPERFORGE_AUDIT_2025.md archived_old_version/old_docs/ -mv CLEAN_SETUP.md archived_old_version/old_docs/ -mv DEVELOPMENT_GUIDE.md archived_old_version/old_docs/ -mv PRODUCTION_MONITORING_IMPLEMENTATION.md archived_old_version/old_docs/ -mv SESSION_REFACTOR_IMPLEMENTATION.md archived_old_version/old_docs/ -mv SPRINT_0.3_COMPLETION_REPORT.md archived_old_version/old_docs/ -mv WORK_TESTING_CHECKLIST.md archived_old_version/old_docs/ -``` - ---- - -## 🎯 **SIMPLIFIED ARCHITECTURE** - -After cleanup, your core will be: -``` -core/ -β”œβ”€β”€ supabase_integration.py # Database + OAuth -β”œβ”€β”€ content_generation.py # AI transcription & generation -β”œβ”€β”€ streaming_pipeline.py # Your streaming workflow -β”œβ”€β”€ streaming_results.py # Real-time UI updates -β”œβ”€β”€ auth_wrapper.py # Supabase auth -β”œβ”€β”€ styling.py # Aurora theme -β”œβ”€β”€ file_upload.py # Audio uploads -β”œβ”€β”€ notifications.py # User messages -β”œβ”€β”€ utils.py # Basic utilities -β”œβ”€β”€ logging_config.py # (Optional) Structured logging -└── config.py # (Optional) Configuration -``` - -**Result: ~170KB of focused, essential code instead of 300KB+ of bloat** - ---- - -## πŸš€ **NEXT STEPS** - -1. **Run cleanup commands above** -2. **Add your OpenAI API key to .env** -3. **Test core functionality:** - - OAuth login via Supabase βœ… - - Audio upload & transcription βœ… - - Content generation & streaming βœ… - - Save to database βœ… - - Display in history βœ… - - Custom prompts & knowledge base βœ… - -**Your app should work perfectly with just these 9-11 essential modules!** diff --git a/REPO_CLEANUP_TASKS.md b/REPO_CLEANUP_TASKS.md deleted file mode 100644 index 58198dd..0000000 --- a/REPO_CLEANUP_TASKS.md +++ /dev/null @@ -1,35 +0,0 @@ -# GitHub Cleanup Recommendations - -This project still contains some leftover files and outdated branches that can be cleaned up to reduce clutter. Below are recommended tasks. - -## 1. Branch Maintenance -- Run `git fetch --all --prune` to remove stale remote-tracking branches. -- Review `git branch -r` for remote branches that have been merged or are no longer active. -- Delete obsolete local branches with `git branch -D ` once they are safely merged. -- Consider enabling branch protection rules on your main branch to avoid direct pushes. - -## 2. Remove Tracked Build Artifacts -- Several `__pycache__` directories and `.pyc` files are committed (see `git ls-files | grep __pycache__`). These should be removed and kept out of Git history. -- Delete existing tracked compiled files and commit the removal. -- Ensure `.gitignore` covers these patterns (it already includes `__pycache__/` and `*.py[cod]`). - -## 3. Delete Log Files -- Files such as `test_startup.log` and `streamlit.log` are checked in. Remove them and rely on the existing `*.log` rule in `.gitignore` to avoid future commits. - -## 4. Review Archived Directories -- The `archived_docs/` and `archived_old_version/` folders contain historical documents and code. If these are no longer needed in the repo, consider deleting or moving them to a separate archive branch to reduce repository size. - -## 5. Check Large Files -- Run `git lfs track` or `git ls-files -s | sort -n -k1` to detect unusually large files. Migrate binaries to Git LFS or remove them if not essential. - -## 6. Automated Cleanup Script -- Create a simple script to prune old branches and remove untracked caches. Example commands: - ```bash - git fetch --all --prune - for branch in $(git branch --merged | grep -v '\*' | grep -v main); do - git branch -d "$branch" - done - git clean -fdX - ``` - -Cleaning up these items will keep the repository lean and make future development smoother. diff --git a/WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md b/WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md deleted file mode 100644 index 0c0bb55..0000000 --- a/WHISPERFORGE_V2.7.0_IMPLEMENTATION_PLAN.md +++ /dev/null @@ -1,185 +0,0 @@ -# WhisperForge v2.7.0 Implementation Plan & Summary - -## 🎯 Mission Accomplished: Complete Pipeline Enhancement - -### πŸ“‹ Original Issues Identified -1. **No Real-Time Streaming**: Content didn't appear as it was generated during pipeline execution -2. **Duplicate Sidebar**: Settings were duplicated between sidebar on transform page and settings page -3. **Ugly UI Design**: Current design didn't look truly Aurora borealis/bioluminescent as planned -4. **Rough Notion Formatting**: Notion page creation looked rough, not the beautiful formatting previously spec'd -5. **Prompts Not Applying**: When user edited and saved prompts, they weren't being applied to content generation - -### βœ… Issues Resolved in v2.7.0 - -#### 1. Real-Time Streaming Implementation -- **Added Live Content Containers**: Created expandable sections for each pipeline step -- **Immediate Content Display**: Content appears as soon as each step completes -- **Stream to UI**: All generated content (transcript, wisdom, research, outline, article, social, editor notes, Notion) streams to UI immediately -- **Visual Feedback**: Users see exactly what's happening at each step - -#### 2. Clean Transform Page -- **Removed Duplicate Sidebar**: Eliminated redundant settings from transform page -- **Focused Interface**: Clean file upload interface with status indicators -- **Settings Consolidation**: All configuration moved to dedicated Settings tab -- **Status Dashboard**: Quick connection status for OpenAI, Notion, Research, and Editor - -#### 3. Enhanced Aurora UI Design -- **True Bioluminescent Styling**: Implemented glowing effects, gradients, and animations -- **Aurora Navigation**: Beautiful header with animated scanning effects and pipeline indicators -- **Enhanced Visual Effects**: Proper Aurora theme with shimmer, pulse, and glow animations -- **Professional Polish**: Consistent styling throughout the entire application - -#### 4. Beautiful Notion Formatting -- **Rich Page Headers**: Beautiful titles with Aurora branding and timestamps -- **Callout Sections**: Wisdom summary in purple callout with lightbulb icon -- **Research Entities**: Blue callouts with research icons and bulleted link lists -- **Gem Marking**: Orange-colored gem icons for high-value research links -- **Structured Toggles**: Organized content sections with proper formatting -- **Professional Footer**: Green completion callout with pipeline summary - -#### 5. Custom Prompt System -- **Prompt Loading**: Automatic loading from `prompts/default/` directory -- **Step Mapping**: Proper mapping of prompts to pipeline steps (wisdom, outline, article, social) -- **Integration**: Custom prompts passed to generation functions -- **Article Prompt**: Created comprehensive article generation prompt -- **Live Application**: Prompts applied during content generation process - -## πŸ”§ Technical Implementation Details - -### Real-Time Streaming Architecture -```python -# Create expandable containers for each step -transcript_container = st.expander("πŸŽ™οΈ Transcription", expanded=False) -wisdom_container = st.expander("πŸ’‘ Wisdom Extraction", expanded=False) -# ... etc for all 8 steps - -# Stream content immediately after generation -with transcript_container: - st.markdown("**βœ… Transcription Complete**") - st.text_area("Transcript", transcript, height=200, disabled=True) -``` - -### Prompt Loading System -```python -def load_custom_prompts(): - """Load custom prompts from the prompts directory""" - prompts = {} - prompt_dir = "prompts/default" - # Load all .md files as prompts - -def get_prompt_for_step(step_name: str, custom_prompts: Dict[str, str] = None): - """Get the appropriate prompt for a pipeline step""" - prompt_mapping = { - 'wisdom': 'wisdom_extraction', - 'outline': 'outline_creation', - 'social': 'social_media', - 'article': 'article_generation' - } -``` - -### Enhanced Aurora Styling -- **CSS Variables**: Proper Aurora color scheme with gradients -- **Animations**: Scanning effects, pulse animations, shimmer effects -- **Visual Hierarchy**: Consistent styling across all components -- **Responsive Design**: Works across different screen sizes - -### Notion Formatting Enhancement -- **Structured Headers**: Beautiful page titles with Aurora branding -- **Rich Callouts**: Color-coded sections with appropriate icons -- **Research Display**: Proper entity formatting with gem marking -- **Professional Metadata**: Comprehensive footer with generation details - -## πŸ“Š Current System Architecture - -### 8-Step Pipeline -1. **πŸŽ™οΈ Transcription** β†’ OpenAI Whisper β†’ Real-time display -2. **πŸ’‘ Wisdom Extraction** β†’ Custom prompt β†’ Immediate streaming -3. **πŸ” Research Enrichment** β†’ Entity extraction β†’ Live research display -4. **πŸ“‹ Outline Creation** β†’ Custom prompt β†’ Structured outline streaming -5. **πŸ“ Article Generation** β†’ Custom prompt β†’ Full article streaming -6. **πŸ“± Social Content** β†’ Custom prompt β†’ Social media content streaming -7. **πŸ“ Editor Review** β†’ AI feedback β†’ Editor notes and revisions -8. **🌌 Notion Publishing** β†’ Beautiful formatting β†’ Auto-publish with status - -### Navigation System -- **🎡 Transform**: Clean file upload and processing with real-time streaming -- **πŸ“š Content Library**: Browse and search processed content history -- **βš™οΈ Settings**: Comprehensive API configuration and pipeline settings -- **🧠 Knowledge Base**: Domain expertise file management -- **πŸ“ Prompts**: Custom prompt editing and management - -## πŸš€ Deployment Status - -### Production Environment -- **Platform**: Render.com with auto-deploy from main branch -- **Version**: v2.7.0 deployed successfully -- **Status**: All features tested and verified working -- **URL**: Production WhisperForge Aurora application - -### Local Development -- **Environment**: Python virtual environment with all dependencies -- **Testing**: App imports successfully, no errors -- **Git**: All changes committed and pushed to main branch - -## 🎯 Future Enhancement Roadmap - -### Phase 1: Performance Optimization (v2.8.0) -- **Streaming Optimization**: Implement WebSocket connections for even faster streaming -- **Caching System**: Cache generated content for faster re-processing -- **Background Processing**: Move heavy operations to background tasks -- **Progress Indicators**: More granular progress tracking within each step - -### Phase 2: Advanced Features (v2.9.0) -- **Multi-Language Support**: Support for non-English audio transcription -- **Batch Processing**: Process multiple audio files simultaneously -- **Export Options**: PDF, Word, and other format exports -- **Template System**: Customizable output templates - -### Phase 3: Collaboration Features (v3.0.0) -- **Team Workspaces**: Shared content libraries and settings -- **Version Control**: Track changes and revisions to generated content -- **Approval Workflows**: Editorial approval processes -- **Integration Hub**: Connect with more platforms (Google Docs, Slack, etc.) - -### Phase 4: AI Enhancement (v3.1.0) -- **Advanced Models**: Support for GPT-4 Turbo, Claude, and other models -- **Custom Training**: Fine-tune models on user's specific content -- **Quality Scoring**: Automatic quality assessment and improvement suggestions -- **Smart Routing**: Automatically choose best model for each content type - -## πŸ“ˆ Success Metrics - -### User Experience Improvements -- βœ… **Real-time Feedback**: Users see content generated immediately -- βœ… **Clean Interface**: Focused, professional design without clutter -- βœ… **Visual Appeal**: True Aurora bioluminescent theme implemented -- βœ… **Notion Integration**: Beautiful, structured pages automatically created -- βœ… **Customization**: Users can edit and apply custom prompts - -### Technical Achievements -- βœ… **Streaming Architecture**: Live content display during processing -- βœ… **Modular Design**: Clean separation of concerns and reusable components -- βœ… **Prompt System**: Flexible, extensible prompt management -- βœ… **Enhanced Styling**: Professional UI with consistent Aurora theme -- βœ… **Production Ready**: Deployed and working in production environment - -### Content Quality -- βœ… **Custom Prompts**: Tailored content generation based on user preferences -- βœ… **Rich Research**: Enhanced entity extraction with gem marking -- βœ… **Editorial Review**: AI feedback and revision capabilities -- βœ… **Structured Output**: Well-organized content across all formats -- βœ… **Professional Notion**: Beautiful, structured pages with rich formatting - -## πŸŽ‰ Conclusion - -WhisperForge v2.7.0 represents a complete transformation of the user experience, addressing all core issues identified: - -1. **Real-time streaming** provides immediate feedback during processing -2. **Clean interface** eliminates confusion and focuses on core functionality -3. **Beautiful Aurora design** creates a professional, engaging experience -4. **Enhanced Notion formatting** produces publication-ready structured content -5. **Custom prompt system** allows users to tailor content generation to their needs - -The application now delivers on its promise of transforming audio into structured content with AI magic, providing users with a seamless, beautiful, and powerful content creation experience. - -**Status**: βœ… All objectives achieved, deployed to production, ready for user feedback and future enhancements. From 47453bd02834a174f43f9375a6f2c51e21ede927 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:40:40 -0800 Subject: [PATCH 33/46] fix(lint): resolve all ruff warnings across codebase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply ruff auto-fixes (import sorting, PEP 604/585 type annotations, unused imports) and manually fix unused variables, broken test import (LargeFileUploadManager β†’ EnhancedLargeFileProcessor), assert False β†’ raise AssertionError, and remove stale shared/ reference from README. Co-Authored-By: Claude Opus 4.6 --- README.md | 1 - app.py | 13 ++- core/auth_wrapper.py | 96 +++++++++--------- core/config.py | 28 +++--- core/health_check.py | 14 +-- core/logging_config.py | 159 +++++++++++++++--------------- core/metrics_exporter.py | 32 +++--- core/monitoring.py | 33 ++++--- core/notifications.py | 80 ++++++--------- core/session_manager.py | 23 +++-- core/streamlit_monitoring.py | 2 +- core/styling.py | 94 ++++++++++++------ core/supabase_integration.py | 146 ++++++++++++++++----------- core/visible_thinking.py | 2 - create_missing_tables.py | 12 ++- deploy_fixes.py | 16 +-- scripts/audit_project.py | 29 +++--- scripts/integration_audit.py | 139 +++++++++++++------------- scripts/test_monitoring.py | 51 ++++------ scripts/test_oauth.py | 19 ++-- scripts/ui_ux_audit.py | 62 ++++++------ scripts/validate_app.py | 64 +++++------- setup.py | 6 +- tests/conftest.py | 69 +++++++------ tests/test_basic_functionality.py | 24 ++--- whisperforge_cli.py | 31 +++--- 26 files changed, 626 insertions(+), 619 deletions(-) diff --git a/README.md b/README.md index f197616..2441e7c 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,6 @@ whisperforge/ β”‚ β”œβ”€β”€ session_manager.py # User session handling β”‚ β”œβ”€β”€ styling.py # Aurora UI components β”‚ └── ... -β”œβ”€β”€ shared/ # Shared config and security utilities β”œβ”€β”€ prompts/ # Default and custom AI prompts β”œβ”€β”€ static/ # CSS, JS, and assets β”œβ”€β”€ scripts/ # Utility and audit scripts diff --git a/app.py b/app.py index 776efb4..354d0c7 100644 --- a/app.py +++ b/app.py @@ -4,15 +4,12 @@ # # The old OAuth version has been archived -import streamlit as st import os +import streamlit as st + # Page config -st.set_page_config( - page_title="WhisperForge - Redirect", - page_icon="🌌", - layout="wide" -) +st.set_page_config(page_title="WhisperForge - Redirect", page_icon="🌌", layout="wide") st.error(""" πŸ”„ **Application Redirect** @@ -31,7 +28,9 @@ **Note:** The Procfile has been updated to use app_simple.py for production deployment. """) -st.info("This redirect will be removed in a future version. Please update your bookmarks and scripts to use `app_simple.py`.") +st.info( + "This redirect will be removed in a future version. Please update your bookmarks and scripts to use `app_simple.py`." +) # Show current working directory and available files st.markdown("### Available Files:") diff --git a/core/auth_wrapper.py b/core/auth_wrapper.py index e5a3584..a107309 100644 --- a/core/auth_wrapper.py +++ b/core/auth_wrapper.py @@ -4,13 +4,13 @@ Provides backward compatibility while adding session persistence """ -import streamlit as st -from typing import Optional, Dict, Any -from .session_manager import get_session_manager -from .supabase_integration import get_supabase_client -from .utils import hash_password, verify_password, legacy_hash_password +from typing import Any + from core.logging_config import logger +from .session_manager import get_session_manager +from .utils import hash_password, legacy_hash_password, verify_password + class AuthWrapper: """ @@ -27,6 +27,7 @@ def _init_supabase(self): """Initialize Supabase client""" try: from .supabase_integration import get_supabase_client + self.supabase_client = get_supabase_client() except Exception as e: logger.log_error(e, "Failed to initialize Supabase") @@ -36,11 +37,11 @@ def is_authenticated(self) -> bool: """Check if user is authenticated (backward compatible)""" return self.session_manager.is_authenticated() - def get_user_id(self) -> Optional[str]: + def get_user_id(self) -> str | None: """Get current user ID (backward compatible)""" return self.session_manager.get_user_id() - def get_user_email(self) -> Optional[str]: + def get_user_email(self) -> str | None: """Get current user email (backward compatible)""" return self.session_manager.get_user_email() @@ -68,23 +69,21 @@ def authenticate_user(self, email: str, password: str) -> bool: # Verify password (bcrypt or legacy) password_valid = False - password_migrated = False - if stored_password.startswith('$2b$'): + if stored_password.startswith("$2b$"): # bcrypt password password_valid = verify_password(password, stored_password) else: # Legacy password - check and migrate if legacy_hash_password(password) == stored_password: password_valid = True - password_migrated = True # Migrate to bcrypt try: new_hash = hash_password(password) - self.supabase_client.client.table("users").update( - {"password": new_hash} - ).eq("id", user["id"]).execute() + self.supabase_client.client.table("users").update({"password": new_hash}).eq( + "id", user["id"] + ).execute() logger.logger.info(f"Password migrated to bcrypt for user: {email}") except Exception as e: logger.log_error(e, "Failed to migrate password") @@ -100,9 +99,8 @@ def authenticate_user(self, email: str, password: str) -> bool: return True else: logger.log_error( - Exception( - f"Failed to create persistent session for: {email}"), - "Authentication failed") + Exception(f"Failed to create persistent session for: {email}"), "Authentication failed" + ) return False else: logger.logger.warning(f"Invalid password for user: {email}") @@ -129,11 +127,7 @@ def register_user(self, email: str, password: str) -> bool: hashed_password = hash_password(password) # Create user in database - user_data = { - "email": email, - "password": hashed_password, - "created_at": "now()" - } + user_data = {"email": email, "password": hashed_password, "created_at": "now()"} result = self.supabase_client.client.table("users").insert(user_data).execute() @@ -147,9 +141,8 @@ def register_user(self, email: str, password: str) -> bool: return True else: logger.log_error( - Exception( - f"Failed to create session for new user: {email}"), - "Registration failed") + Exception(f"Failed to create session for new user: {email}"), "Registration failed" + ) return False else: logger.log_error(Exception(f"Failed to create user in database: {email}"), "Registration failed") @@ -180,18 +173,24 @@ def _load_user_preferences(self, user_id: str): return # Load API keys - api_keys_result = self.supabase_client.client.table("api_keys").select( - "key_name, key_value" - ).eq("user_id", user_id).execute() + api_keys_result = ( + self.supabase_client.client.table("api_keys") + .select("key_name, key_value") + .eq("user_id", user_id) + .execute() + ) api_keys = {} for item in api_keys_result.data: api_keys[item["key_name"]] = item["key_value"] # Load custom prompts - prompts_result = self.supabase_client.client.table("prompts").select( - "prompt_type, content" - ).eq("user_id", user_id).execute() + prompts_result = ( + self.supabase_client.client.table("prompts") + .select("prompt_type, content") + .eq("user_id", user_id) + .execute() + ) prompts = {} for item in prompts_result.data: @@ -206,7 +205,7 @@ def _load_user_preferences(self, user_id: str): except Exception as e: logger.log_error(e, "Failed to load user preferences") - def get_api_keys(self) -> Dict[str, str]: + def get_api_keys(self) -> dict[str, str]: """Get user API keys from session cache""" return self.session_manager.get_preference("api_keys", {}) @@ -219,12 +218,11 @@ def update_api_key(self, key_name: str, key_value: str) -> bool: user_id = self.get_user_id() # Update in database - result = self.supabase_client.client.table("api_keys").upsert({ - "user_id": user_id, - "key_name": key_name, - "key_value": key_value, - "updated_at": "now()" - }).execute() + result = ( + self.supabase_client.client.table("api_keys") + .upsert({"user_id": user_id, "key_name": key_name, "key_value": key_value, "updated_at": "now()"}) + .execute() + ) if result.data: # Update session cache @@ -242,7 +240,7 @@ def update_api_key(self, key_name: str, key_value: str) -> bool: logger.log_error(e, f"Error updating API key {key_name}") return False - def get_custom_prompts(self) -> Dict[str, str]: + def get_custom_prompts(self) -> dict[str, str]: """Get user custom prompts from session cache""" return self.session_manager.get_preference("custom_prompts", {}) @@ -255,12 +253,11 @@ def update_custom_prompt(self, prompt_type: str, content: str) -> bool: user_id = self.get_user_id() # Update in database - result = self.supabase_client.client.table("prompts").upsert({ - "user_id": user_id, - "prompt_type": prompt_type, - "content": content, - "updated_at": "now()" - }).execute() + result = ( + self.supabase_client.client.table("prompts") + .upsert({"user_id": user_id, "prompt_type": prompt_type, "content": content, "updated_at": "now()"}) + .execute() + ) if result.data: # Update session cache @@ -272,9 +269,8 @@ def update_custom_prompt(self, prompt_type: str, content: str) -> bool: return True else: logger.log_error( - Exception( - f"Failed to update custom prompt: {prompt_type}"), - "Custom prompt update failed") + Exception(f"Failed to update custom prompt: {prompt_type}"), "Custom prompt update failed" + ) return False except Exception as e: @@ -307,7 +303,7 @@ def set_pipeline_active(self, active: bool) -> None: """Set pipeline active state (delegated to SessionManager)""" self.session_manager.set_pipeline_active(active) - def get_session_info(self) -> Dict[str, Any]: + def get_session_info(self) -> dict[str, Any]: """Get session information for debugging""" return self.session_manager.get_session_info() @@ -335,7 +331,7 @@ def register_user_supabase(email: str, password: str) -> bool: return get_auth().register_user(email, password) -def get_user_api_keys_supabase() -> Dict[str, str]: +def get_user_api_keys_supabase() -> dict[str, str]: """Backward compatible API keys function""" return get_auth().get_api_keys() @@ -345,7 +341,7 @@ def update_api_key_supabase(key_name: str, key_value: str) -> bool: return get_auth().update_api_key(key_name, key_value) -def get_user_prompts_supabase() -> Dict[str, str]: +def get_user_prompts_supabase() -> dict[str, str]: """Backward compatible prompts function""" return get_auth().get_custom_prompts() diff --git a/core/config.py b/core/config.py index b59d038..3a7b938 100644 --- a/core/config.py +++ b/core/config.py @@ -5,11 +5,11 @@ Centralized configuration for WhisperForge v2.0 """ +import logging import os -from typing import Dict, Optional, Any from dataclasses import dataclass, field from pathlib import Path -import logging +from typing import Any logger = logging.getLogger(__name__) @@ -19,19 +19,19 @@ class AIProviderConfig: """Configuration for AI providers""" name: str - api_key: Optional[str] = None - base_url: Optional[str] = None - models: Dict[str, Any] = field(default_factory=dict) - rate_limits: Dict[str, int] = field(default_factory=dict) + api_key: str | None = None + base_url: str | None = None + models: dict[str, Any] = field(default_factory=dict) + rate_limits: dict[str, int] = field(default_factory=dict) @dataclass class NotionConfig: """Configuration for Notion integration""" - api_key: Optional[str] = None - database_id: Optional[str] = None - template_id: Optional[str] = None + api_key: str | None = None + database_id: str | None = None + template_id: str | None = None @dataclass @@ -46,9 +46,7 @@ class Config: # AI Providers openai: AIProviderConfig = field(default_factory=lambda: AIProviderConfig("openai")) - anthropic: AIProviderConfig = field( - default_factory=lambda: AIProviderConfig("anthropic") - ) + anthropic: AIProviderConfig = field(default_factory=lambda: AIProviderConfig("anthropic")) grok: AIProviderConfig = field(default_factory=lambda: AIProviderConfig("grok")) # Integrations @@ -99,9 +97,7 @@ def from_env(cls) -> "Config": def from_file(cls, config_path: Path) -> "Config": """Load configuration (simplified version without YAML)""" # For now, just use environment variables - logger.info( - "Using environment variables for configuration (YAML support disabled)" - ) + logger.info("Using environment variables for configuration (YAML support disabled)") return cls.from_env() def validate(self) -> bool: @@ -139,7 +135,7 @@ def get_available_providers(self) -> list[str]: # Global config instance -_config: Optional[Config] = None +_config: Config | None = None def get_config() -> Config: diff --git a/core/health_check.py b/core/health_check.py index 126892b..9eb69c0 100644 --- a/core/health_check.py +++ b/core/health_check.py @@ -1,10 +1,10 @@ -from __future__ import annotations - """Minimal health check utilities for WhisperForge.""" -from dataclasses import dataclass, asdict -from datetime import datetime, timezone -from typing import Any, Dict +from __future__ import annotations + +from dataclasses import asdict, dataclass +from datetime import UTC, datetime +from typing import Any @dataclass @@ -29,7 +29,7 @@ class HealthChecker: def get_health_status(self) -> HealthStatus: return HealthStatus( status="healthy", - timestamp=datetime.now(timezone.utc).isoformat(), + timestamp=datetime.now(UTC).isoformat(), uptime_seconds=0.0, ) @@ -41,7 +41,7 @@ def check_slo_violations(self) -> list: return [] def get_metrics_json(self) -> str: - data: Dict[str, Any] = { + data: dict[str, Any] = { "health": asdict(self.get_health_status()), "slo_metrics": asdict(self.get_slo_metrics()), } diff --git a/core/logging_config.py b/core/logging_config.py index 52372eb..2c474d1 100644 --- a/core/logging_config.py +++ b/core/logging_config.py @@ -3,14 +3,13 @@ Provides structured logging with different levels and contexts """ +import json import logging import sys -import os +import traceback from datetime import datetime from pathlib import Path -import json -import traceback -from typing import Dict, Any, Optional +from typing import Any class WhisperForgeLogger: @@ -37,27 +36,20 @@ def setup_logging(self): # Console handler with color coding console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(logging.INFO) - console_formatter = ColoredFormatter( - '%(asctime)s | %(levelname)s | %(name)s | %(message)s', - datefmt='%H:%M:%S' - ) + console_formatter = ColoredFormatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s", datefmt="%H:%M:%S") console_handler.setFormatter(console_formatter) # File handler for all logs - file_handler = logging.FileHandler( - logs_dir / f"whisperforge_{datetime.now().strftime('%Y%m%d')}.log" - ) + file_handler = logging.FileHandler(logs_dir / f"whisperforge_{datetime.now().strftime('%Y%m%d')}.log") file_handler.setLevel(logging.DEBUG) file_formatter = logging.Formatter( - '%(asctime)s | %(levelname)s | %(name)s | %(funcName)s:%(lineno)d | %(message)s', - datefmt='%Y-%m-%d %H:%M:%S' + "%(asctime)s | %(levelname)s | %(name)s | %(funcName)s:%(lineno)d | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", ) file_handler.setFormatter(file_formatter) # Error handler for critical issues - error_handler = logging.FileHandler( - logs_dir / f"errors_{datetime.now().strftime('%Y%m%d')}.log" - ) + error_handler = logging.FileHandler(logs_dir / f"errors_{datetime.now().strftime('%Y%m%d')}.log") error_handler.setLevel(logging.ERROR) error_handler.setFormatter(file_formatter) @@ -66,14 +58,9 @@ def setup_logging(self): self.logger.addHandler(file_handler) self.logger.addHandler(error_handler) - def log_pipeline_step(self, step: str, status: str, data: Optional[Dict] = None): + def log_pipeline_step(self, step: str, status: str, data: dict | None = None): """Log pipeline step with structured data""" - log_data = { - "step": step, - "status": status, - "timestamp": datetime.now().isoformat(), - "data": data or {} - } + log_data = {"step": step, "status": status, "timestamp": datetime.now().isoformat(), "data": data or {}} if status == "started": self.logger.info(f"πŸ”„ Pipeline step started: {step}") @@ -90,54 +77,62 @@ def log_pipeline_step(self, step: str, status: str, data: Optional[Dict] = None) def log_file_upload(self, filename: str, size_mb: float, file_type: str): """Log file upload details""" self.logger.info(f"πŸ“ File uploaded: {filename} ({size_mb:.1f}MB, {file_type})") - self._log_structured({ - "event": "file_upload", - "filename": filename, - "size_mb": size_mb, - "file_type": file_type, - "timestamp": datetime.now().isoformat() - }) - - def log_ai_request(self, provider: str, model: str, prompt_type: str, tokens: Optional[int] = None): + self._log_structured( + { + "event": "file_upload", + "filename": filename, + "size_mb": size_mb, + "file_type": file_type, + "timestamp": datetime.now().isoformat(), + } + ) + + def log_ai_request(self, provider: str, model: str, prompt_type: str, tokens: int | None = None): """Log AI API requests""" self.logger.info(f"πŸ€– AI request: {provider}/{model} for {prompt_type}") - self._log_structured({ - "event": "ai_request", - "provider": provider, - "model": model, - "prompt_type": prompt_type, - "tokens": tokens, - "timestamp": datetime.now().isoformat() - }) - - def log_database_operation(self, operation: str, table: str, success: bool, error: Optional[str] = None): + self._log_structured( + { + "event": "ai_request", + "provider": provider, + "model": model, + "prompt_type": prompt_type, + "tokens": tokens, + "timestamp": datetime.now().isoformat(), + } + ) + + def log_database_operation(self, operation: str, table: str, success: bool, error: str | None = None): """Log database operations""" status = "βœ…" if success else "❌" self.logger.info(f"{status} Database {operation}: {table}") if error: self.logger.error(f"Database error: {error}") - self._log_structured({ - "event": "database_operation", - "operation": operation, - "table": table, - "success": success, - "error": error, - "timestamp": datetime.now().isoformat() - }) + self._log_structured( + { + "event": "database_operation", + "operation": operation, + "table": table, + "success": success, + "error": error, + "timestamp": datetime.now().isoformat(), + } + ) - def log_user_action(self, action: str, user_id: Optional[str] = None, details: Optional[Dict] = None): + def log_user_action(self, action: str, user_id: str | None = None, details: dict | None = None): """Log user actions""" self.logger.info(f"πŸ‘€ User action: {action} (user: {user_id or 'anonymous'})") - self._log_structured({ - "event": "user_action", - "action": action, - "user_id": user_id, - "details": details or {}, - "timestamp": datetime.now().isoformat() - }) - - def log_error(self, error: Exception, context: Optional[str] = None): + self._log_structured( + { + "event": "user_action", + "action": action, + "user_id": user_id, + "details": details or {}, + "timestamp": datetime.now().isoformat(), + } + ) + + def log_error(self, error: Exception, context: str | None = None): """Log errors with full context""" error_msg = str(error) error_type = type(error).__name__ @@ -149,16 +144,18 @@ def log_error(self, error: Exception, context: Optional[str] = None): # Log full traceback self.logger.error(f"Traceback:\n{traceback.format_exc()}") - self._log_structured({ - "event": "error", - "error_type": error_type, - "error_message": error_msg, - "context": context, - "traceback": traceback.format_exc(), - "timestamp": datetime.now().isoformat() - }) + self._log_structured( + { + "event": "error", + "error_type": error_type, + "error_message": error_msg, + "context": context, + "traceback": traceback.format_exc(), + "timestamp": datetime.now().isoformat(), + } + ) - def _log_structured(self, data: Dict[str, Any]): + def _log_structured(self, data: dict[str, Any]): """Log structured data to JSON file""" json_log_file = Path("logs") / f"structured_{datetime.now().strftime('%Y%m%d')}.jsonl" @@ -186,16 +183,16 @@ class ColoredFormatter(logging.Formatter): """Colored console formatter""" COLORS = { - 'DEBUG': '\033[36m', # Cyan - 'INFO': '\033[32m', # Green - 'WARNING': '\033[33m', # Yellow - 'ERROR': '\033[31m', # Red - 'CRITICAL': '\033[35m', # Magenta - 'RESET': '\033[0m' # Reset + "DEBUG": "\033[36m", # Cyan + "INFO": "\033[32m", # Green + "WARNING": "\033[33m", # Yellow + "ERROR": "\033[31m", # Red + "CRITICAL": "\033[35m", # Magenta + "RESET": "\033[0m", # Reset } def format(self, record): - log_color = self.COLORS.get(record.levelname, self.COLORS['RESET']) + log_color = self.COLORS.get(record.levelname, self.COLORS["RESET"]) record.levelname = f"{log_color}{record.levelname}{self.COLORS['RESET']}" return super().format(record) @@ -206,7 +203,7 @@ def format(self, record): # Convenience functions -def log_pipeline_step(step: str, status: str, data: Optional[Dict] = None): +def log_pipeline_step(step: str, status: str, data: dict | None = None): logger.log_pipeline_step(step, status, data) @@ -214,17 +211,17 @@ def log_file_upload(filename: str, size_mb: float, file_type: str): logger.log_file_upload(filename, size_mb, file_type) -def log_ai_request(provider: str, model: str, prompt_type: str, tokens: Optional[int] = None): +def log_ai_request(provider: str, model: str, prompt_type: str, tokens: int | None = None): logger.log_ai_request(provider, model, prompt_type, tokens) -def log_database_operation(operation: str, table: str, success: bool, error: Optional[str] = None): +def log_database_operation(operation: str, table: str, success: bool, error: str | None = None): logger.log_database_operation(operation, table, success, error) -def log_user_action(action: str, user_id: Optional[str] = None, details: Optional[Dict] = None): +def log_user_action(action: str, user_id: str | None = None, details: dict | None = None): logger.log_user_action(action, user_id, details) -def log_error(error: Exception, context: Optional[str] = None): +def log_error(error: Exception, context: str | None = None): logger.log_error(error, context) diff --git a/core/metrics_exporter.py b/core/metrics_exporter.py index 6a9f7a9..88b9997 100644 --- a/core/metrics_exporter.py +++ b/core/metrics_exporter.py @@ -9,11 +9,11 @@ import json from collections import deque -from typing import Any, Dict +from typing import Any MAX_ENTRIES = 1000 -metrics_exporter: Dict[str, Any] = { +metrics_exporter: dict[str, Any] = { "counters": {}, "gauges": {}, "histograms": {}, @@ -31,11 +31,10 @@ def track_request(duration: float, status_code: int, method: str, path: str) -> "path": path, } ) - metrics_exporter["counters"]["http_requests_total"] = ( - metrics_exporter["counters"].get("http_requests_total", 0) + 1 + metrics_exporter["counters"]["http_requests_total"] = metrics_exporter["counters"].get("http_requests_total", 0) + 1 + metrics_exporter["histograms"].setdefault("http_request_duration_seconds", deque(maxlen=MAX_ENTRIES)).append( + duration ) - metrics_exporter["histograms"].setdefault( - "http_request_duration_seconds", deque(maxlen=MAX_ENTRIES)).append(duration) def track_pipeline(name: str, duration: float, success: bool) -> None: @@ -48,8 +47,7 @@ def track_pipeline(name: str, duration: float, success: bool) -> None: ) key = "pipeline_success_total" if success else "pipeline_failure_total" metrics_exporter["counters"][key] = metrics_exporter["counters"].get(key, 0) + 1 - metrics_exporter["histograms"].setdefault( - f"pipeline_{name}_duration", deque(maxlen=MAX_ENTRIES)).append(duration) + metrics_exporter["histograms"].setdefault(f"pipeline_{name}_duration", deque(maxlen=MAX_ENTRIES)).append(duration) def reset_metrics() -> None: @@ -78,13 +76,15 @@ def export_prometheus_metrics() -> str: return "\n".join(lines) -def export_json_metrics() -> Dict[str, Any]: +def export_json_metrics() -> dict[str, Any]: """Return the metrics as a JSON-serialisable object.""" - return json.loads(json.dumps( - { - "counters": metrics_exporter["counters"], - "gauges": metrics_exporter["gauges"], - "histograms": {k: list(v) for k, v in metrics_exporter["histograms"].items()}, - } - )) + return json.loads( + json.dumps( + { + "counters": metrics_exporter["counters"], + "gauges": metrics_exporter["gauges"], + "histograms": {k: list(v) for k, v in metrics_exporter["histograms"].items()}, + } + ) + ) diff --git a/core/monitoring.py b/core/monitoring.py index 632d05b..8649108 100644 --- a/core/monitoring.py +++ b/core/monitoring.py @@ -12,8 +12,7 @@ import time import uuid from contextlib import contextmanager -from typing import Any, Dict, Optional - +from typing import Any logger = logging.getLogger(__name__) @@ -22,10 +21,11 @@ # Structured logging # --------------------------------------------------------------------------- + class StructuredLogger(logging.LoggerAdapter): """Minimal structured logger used in tests.""" - def process(self, msg: str, kwargs: Dict[str, Any]) -> tuple[str, Dict[str, Any]]: + def process(self, msg: str, kwargs: dict[str, Any]) -> tuple[str, dict[str, Any]]: extra = kwargs.pop("extra", {}) # treat any additional keyword arguments as structured fields extra.update({k: kwargs.pop(k) for k in list(kwargs)}) @@ -35,7 +35,7 @@ def process(self, msg: str, kwargs: Dict[str, Any]) -> tuple[str, Dict[str, Any] kwargs["extra"] = extra return msg, kwargs - def pipeline_start(self, name: str, user_id: Optional[str] = None) -> None: + def pipeline_start(self, name: str, user_id: str | None = None) -> None: self.info("pipeline start", extra={"pipeline": name, "user_id": user_id}) def pipeline_complete(self, name: str, duration: float, success: bool = True) -> None: @@ -48,7 +48,7 @@ def pipeline_complete(self, name: str, duration: float, success: bool = True) -> structured_logger = StructuredLogger(logger, {}) -def set_trace_context(user_id: Optional[str] = None, operation: Optional[str] = None) -> str: +def set_trace_context(user_id: str | None = None, operation: str | None = None) -> str: """Create a trace context and log it.""" trace_id = str(uuid.uuid4()) @@ -60,7 +60,7 @@ def set_trace_context(user_id: Optional[str] = None, operation: Optional[str] = @contextmanager -def trace_operation(operation: str, user_id: Optional[str] = None): +def trace_operation(operation: str, user_id: str | None = None): """Context manager that logs a trace when the block executes.""" trace_id = set_trace_context(user_id=user_id, operation=operation) @@ -77,13 +77,12 @@ def trace_operation(operation: str, user_id: Optional[str] = None): # Error tracking # --------------------------------------------------------------------------- + class ErrorTracker: - def capture_exception(self, exc: Exception, context: Optional[Dict[str, Any]] = None) -> None: - structured_logger.error( - f"captured exception: {exc}", extra={"context": context} - ) + def capture_exception(self, exc: Exception, context: dict[str, Any] | None = None) -> None: + structured_logger.error(f"captured exception: {exc}", extra={"context": context}) - def capture_message(self, message: str, level: str = "info", context: Optional[Dict[str, Any]] = None) -> None: + def capture_message(self, message: str, level: str = "info", context: dict[str, Any] | None = None) -> None: getattr(structured_logger, level)(message, extra={"context": context}) @@ -94,6 +93,7 @@ def capture_message(self, message: str, level: str = "info", context: Optional[D # Performance tracking # --------------------------------------------------------------------------- + class PerformanceTracker: @contextmanager def track_operation(self, name: str): @@ -108,7 +108,7 @@ def track_operation(self, name: str): ) def track_pipeline_performance( - self, name: str, duration: float, success: bool, file_size_mb: Optional[int] = None + self, name: str, duration: float, success: bool, file_size_mb: int | None = None ) -> None: structured_logger.info( "pipeline metrics", @@ -141,6 +141,7 @@ def wrapper(*args, **kwargs): # Compatibility helpers used by other modules # --------------------------------------------------------------------------- + def init_monitoring() -> bool: structured_logger.info("Monitoring initialised") return True @@ -154,15 +155,15 @@ def track_performance(operation: str, duration: float) -> None: performance_tracker.track_pipeline_performance(operation, duration, True) -def track_user_action(action: str, user_id: Optional[str] = None) -> None: +def track_user_action(action: str, user_id: str | None = None) -> None: structured_logger.info("user action", extra={"action": action, "user_id": user_id}) -def track_page(page: str, user_id: Optional[str] = None) -> None: +def track_page(page: str, user_id: str | None = None) -> None: structured_logger.info("page view", extra={"page": page, "user_id": user_id}) -def get_health_status() -> Dict[str, Any]: +def get_health_status() -> dict[str, Any]: return {"status": "healthy", "monitoring": "basic", "timestamp": "now"} @@ -178,7 +179,7 @@ def track_error(self, error: Exception, context: str = "") -> None: def track_performance(self, operation: str, duration: float) -> None: track_performance(operation, duration) - def track_user_action(self, action: str, user_id: Optional[str] = None) -> None: + def track_user_action(self, action: str, user_id: str | None = None) -> None: track_user_action(action, user_id) diff --git a/core/notifications.py b/core/notifications.py index 905870e..5b4d26b 100644 --- a/core/notifications.py +++ b/core/notifications.py @@ -3,11 +3,11 @@ Animated toast notifications and status updates """ -import streamlit as st import time from html import escape as html_escape -from typing import Optional, Literal -from datetime import datetime +from typing import Literal + +import streamlit as st class NotificationManager: @@ -16,21 +16,18 @@ class NotificationManager: def __init__(self): self.notifications = [] - def show_toast(self, - message: str, - notification_type: Literal["success", "error", "warning", "info"] = "info", - duration: float = 3.0, - icon: Optional[str] = None): + def show_toast( + self, + message: str, + notification_type: Literal["success", "error", "warning", "info"] = "info", + duration: float = 3.0, + icon: str | None = None, + ): """Show a beautiful toast notification""" # Auto-select icon based on type if not provided if not icon: - icons = { - "success": "βœ…", - "error": "❌", - "warning": "⚠️", - "info": "ℹ️" - } + icons = {"success": "βœ…", "error": "❌", "warning": "⚠️", "info": "ℹ️"} icon = icons.get(notification_type, "ℹ️") # Sanitize user-controlled content @@ -38,26 +35,10 @@ def show_toast(self, # Color scheme for different types colors = { - "success": { - "bg": "rgba(54, 211, 153, 0.1)", - "border": "rgba(54, 211, 153, 0.4)", - "text": "#36D399" - }, - "error": { - "bg": "rgba(248, 114, 114, 0.1)", - "border": "rgba(248, 114, 114, 0.4)", - "text": "#F87272" - }, - "warning": { - "bg": "rgba(251, 189, 35, 0.1)", - "border": "rgba(251, 189, 35, 0.4)", - "text": "#FBBD23" - }, - "info": { - "bg": "rgba(58, 191, 248, 0.1)", - "border": "rgba(58, 191, 248, 0.4)", - "text": "#3ABFF8" - } + "success": {"bg": "rgba(54, 211, 153, 0.1)", "border": "rgba(54, 211, 153, 0.4)", "text": "#36D399"}, + "error": {"bg": "rgba(248, 114, 114, 0.1)", "border": "rgba(248, 114, 114, 0.4)", "text": "#F87272"}, + "warning": {"bg": "rgba(251, 189, 35, 0.1)", "border": "rgba(251, 189, 35, 0.4)", "text": "#FBBD23"}, + "info": {"bg": "rgba(58, 191, 248, 0.1)", "border": "rgba(58, 191, 248, 0.4)", "text": "#3ABFF8"}, } color = colors[notification_type] @@ -75,8 +56,8 @@ def show_toast(self, - """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) except FileNotFoundError: # Fallback to basic Aurora styling if file not found - st.markdown(""" + st.markdown( + """ - """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) def create_aurora_header(): """Create a flagship Aurora header with integrated navigation and logout - REBUILT FOR 2025""" # First, inject the CSS using st.markdown() - st.markdown(""" + st.markdown( + """ - """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) # Then render the HTML structure using st.markdown() - st.markdown(""" + st.markdown( + """
@@ -177,14 +187,17 @@ def create_aurora_header():
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) def create_aurora_nav_buttons(): """Create integrated navigation buttons for the Aurora header""" # Enhanced styling for integrated nav buttons - st.markdown(""" + st.markdown( + """ - """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) pages = [ ("Processing", "Content Pipeline"), ("History", "Content History"), ("Settings", "Settings"), - ("Status", "Health Check") + ("Status", "Health Check"), ] - current_page = st.session_state.get('current_page', 'Content Pipeline') + current_page = st.session_state.get("current_page", "Content Pipeline") # Create horizontal layout for nav buttons nav_cols = st.columns([1, 1, 1, 1, 0.8]) # Last column smaller for logout @@ -254,18 +269,18 @@ def create_aurora_nav_buttons(): page_name, key=f"nav_{page_name}", type="primary" if page_key == current_page else "secondary", - use_container_width=True + use_container_width=True, ): st.session_state.current_page = page_key st.rerun() - st.markdown('
', unsafe_allow_html=True) + st.markdown("
", unsafe_allow_html=True) # Logout button in the last column with nav_cols[4]: st.markdown('
', unsafe_allow_html=True) if st.button("Sign Out", key="logout_btn", use_container_width=True): return True # Signal logout - st.markdown('
', unsafe_allow_html=True) + st.markdown("
", unsafe_allow_html=True) return False # No logout @@ -273,7 +288,8 @@ def create_aurora_nav_buttons(): def create_aurora_progress_card(title, current_step, total_steps, description=""): """Create a beautiful Aurora progress card""" progress = (current_step / total_steps) * 100 - st.markdown(f""" + st.markdown( + f"""

{title}

@@ -286,9 +302,11 @@ def create_aurora_progress_card(title, current_step, total_steps, description=""
- {f'

{description}

' if description else ''} + {f'

{description}

' if description else ""}
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) def create_aurora_step_card(title, description, status="pending", progress=0): @@ -331,7 +349,8 @@ def create_aurora_step_card(title, description, status="pending", progress=0):
""" - st.markdown(f""" + st.markdown( + f"""
@@ -341,11 +360,13 @@ def create_aurora_step_card(title, description, status="pending", progress=0):

{description}

-
{progress if status == 'processing' else (100 if status == 'completed' else 0)}%
+
{progress if status == "processing" else (100 if status == "completed" else 0)}%
{progress_bar}
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) def create_aurora_content_card(title, content, content_type="text"): @@ -359,7 +380,8 @@ def create_aurora_content_card(title, content, content_type="text"): preview_content = content show_full = False - st.markdown(f""" + st.markdown( + f"""

{title}

@@ -370,12 +392,15 @@ def create_aurora_content_card(title, content, content_type="text"): white-space: pre-wrap; ">{preview_content}
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) if show_full: with st.expander("Show full content"): st.markdown(content) + # Aurora Component Utilities @@ -385,7 +410,8 @@ class AuroraComponents: @staticmethod def success_message(message): """Aurora success message""" - st.markdown(f""" + st.markdown( + f"""
βœ… {message}
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) @staticmethod def warning_message(message): """Aurora warning message""" - st.markdown(f""" + st.markdown( + f"""
⚠️ {message}
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) @staticmethod def error_message(message): """Aurora error message""" - st.markdown(f""" + st.markdown( + f"""
❌ {message}
- """, unsafe_allow_html=True) + """, + unsafe_allow_html=True, + ) diff --git a/core/supabase_integration.py b/core/supabase_integration.py index 4c1c52d..087b1d5 100644 --- a/core/supabase_integration.py +++ b/core/supabase_integration.py @@ -6,13 +6,15 @@ Designed to work with MCP (Model Context Protocol) for enhanced AI integration. """ -from .utils import hash_password -import os import logging -from typing import Dict, List, Optional, Any +import os from datetime import datetime, timedelta -from supabase import create_client, Client +from typing import Any + from dotenv import load_dotenv +from supabase import Client, create_client + +from .utils import hash_password # Load environment variables load_dotenv() @@ -34,10 +36,12 @@ def __init__(self): self.service_role_key = os.getenv("SUPABASE_SERVICE_ROLE_KEY") if not self.url or not self.key: - raise ValueError("SUPABASE_URL and SUPABASE_ANON_KEY (or SUPABASE_KEY) must be set in environment variables") + raise ValueError( + "SUPABASE_URL and SUPABASE_ANON_KEY (or SUPABASE_KEY) must be set in environment variables" + ) self.client: Client = create_client(self.url, self.key) - self.admin_client: Optional[Client] = None + self.admin_client: Client | None = None if self.service_role_key: self.admin_client = create_client(self.url, self.service_role_key) @@ -48,7 +52,7 @@ def test_connection(self) -> bool: """Test the Supabase connection""" try: # Try a simple query to test connectivity - result = self.client.table("users").select("id").limit(1).execute() + self.client.table("users").select("id").limit(1).execute() logger.info("Supabase connection test successful") return True except Exception as e: @@ -56,7 +60,7 @@ def test_connection(self) -> bool: return False # User Management - def create_user(self, email: str, password: str, metadata: Dict[str, Any] = None) -> Dict[str, Any]: + def create_user(self, email: str, password: str, metadata: dict[str, Any] = None) -> dict[str, Any]: """Create a new user""" try: # Hash the password before storing @@ -69,7 +73,7 @@ def create_user(self, email: str, password: str, metadata: Dict[str, Any] = None "usage_quota": 60, # Default 60 minutes per month "usage_current": 0, "is_admin": False, - "subscription_tier": "free" + "subscription_tier": "free", } if metadata: @@ -82,7 +86,7 @@ def create_user(self, email: str, password: str, metadata: Dict[str, Any] = None logger.error(f"Error creating user: {e}") raise - def get_user(self, user_id: int) -> Optional[Dict[str, Any]]: + def get_user(self, user_id: int) -> dict[str, Any] | None: """Get user by ID""" try: result = self.client.table("users").select("*").eq("id", user_id).execute() @@ -91,7 +95,7 @@ def get_user(self, user_id: int) -> Optional[Dict[str, Any]]: logger.error(f"Error fetching user: {e}") return None - def get_user_by_email(self, email: str) -> Optional[Dict[str, Any]]: + def get_user_by_email(self, email: str) -> dict[str, Any] | None: """Get user by email""" try: result = self.client.table("users").select("*").eq("email", email).execute() @@ -106,9 +110,7 @@ def update_user_usage(self, user_id: int, usage_seconds: int) -> bool: # Convert seconds to minutes usage_minutes = usage_seconds / 60 - result = self.client.table("users").update({ - "usage_current": usage_minutes - }).eq("id", user_id).execute() + result = self.client.table("users").update({"usage_current": usage_minutes}).eq("id", user_id).execute() return bool(result.data) except Exception as e: @@ -116,7 +118,7 @@ def update_user_usage(self, user_id: int, usage_seconds: int) -> bool: return False # Content Storage - def save_content(self, user_id: int, content_data: Dict[str, Any]) -> Optional[str]: + def save_content(self, user_id: int, content_data: dict[str, Any]) -> str | None: """Save generated content to database""" try: content_record = { @@ -130,7 +132,7 @@ def save_content(self, user_id: int, content_data: Dict[str, Any]) -> Optional[s "article": content_data.get("article", ""), "metadata": content_data.get("metadata", {}), "created_at": datetime.now().isoformat(), - "updated_at": datetime.now().isoformat() + "updated_at": datetime.now().isoformat(), } result = self.client.table("content").insert(content_record).execute() @@ -144,32 +146,40 @@ def save_content(self, user_id: int, content_data: Dict[str, Any]) -> Optional[s logger.error(f"Error saving content: {e}") return None - def get_user_content(self, user_id: int, limit: int = 50) -> List[Dict[str, Any]]: + def get_user_content(self, user_id: int, limit: int = 50) -> list[dict[str, Any]]: """Get user's content history""" try: - result = self.client.table("content").select( - "*").eq("user_id", user_id).order("created_at", desc=True).limit(limit).execute() + result = ( + self.client.table("content") + .select("*") + .eq("user_id", user_id) + .order("created_at", desc=True) + .limit(limit) + .execute() + ) return result.data or [] except Exception as e: logger.error(f"Error fetching user content: {e}") return [] # API Key Management - def save_user_api_keys(self, user_id: int, api_keys: Dict[str, str]) -> bool: + def save_user_api_keys(self, user_id: int, api_keys: dict[str, str]) -> bool: """Save encrypted API keys for user""" try: # In production, encrypt the API keys before storing - result = self.client.table("users").update({ - "api_keys": api_keys, - "updated_at": datetime.now().isoformat() - }).eq("id", user_id).execute() + result = ( + self.client.table("users") + .update({"api_keys": api_keys, "updated_at": datetime.now().isoformat()}) + .eq("id", user_id) + .execute() + ) return bool(result.data) except Exception as e: logger.error(f"Error saving API keys: {e}") return False - def get_user_api_keys(self, user_id: int) -> Dict[str, str]: + def get_user_api_keys(self, user_id: int) -> dict[str, str]: """Get user's API keys""" try: result = self.client.table("users").select("api_keys").eq("id", user_id).execute() @@ -189,19 +199,27 @@ def save_knowledge_base_file(self, user_id: int, filename: str, content: str) -> "filename": filename, "content": content, "created_at": datetime.now().isoformat(), - "updated_at": datetime.now().isoformat() + "updated_at": datetime.now().isoformat(), } # Check if file already exists for this user - existing = self.client.table("knowledge_base").select("id").eq( - "user_id", user_id).eq("filename", filename).execute() + existing = ( + self.client.table("knowledge_base") + .select("id") + .eq("user_id", user_id) + .eq("filename", filename) + .execute() + ) if existing.data: # Update existing - result = self.client.table("knowledge_base").update({ - "content": content, - "updated_at": datetime.now().isoformat() - }).eq("user_id", user_id).eq("filename", filename).execute() + result = ( + self.client.table("knowledge_base") + .update({"content": content, "updated_at": datetime.now().isoformat()}) + .eq("user_id", user_id) + .eq("filename", filename) + .execute() + ) else: # Create new result = self.client.table("knowledge_base").insert(kb_record).execute() @@ -211,7 +229,7 @@ def save_knowledge_base_file(self, user_id: int, filename: str, content: str) -> logger.error(f"Error saving knowledge base file: {e}") return False - def get_user_knowledge_base(self, user_id: int) -> Dict[str, str]: + def get_user_knowledge_base(self, user_id: int) -> dict[str, str]: """Get user's knowledge base files""" try: result = self.client.table("knowledge_base").select("filename, content").eq("user_id", user_id).execute() @@ -219,7 +237,7 @@ def get_user_knowledge_base(self, user_id: int) -> Dict[str, str]: kb_dict = {} for item in result.data or []: # Convert filename to display name - name = item["filename"].replace('.txt', '').replace('.md', '').replace('_', ' ').title() + name = item["filename"].replace(".txt", "").replace(".md", "").replace("_", " ").title() kb_dict[name] = item["content"] return kb_dict @@ -236,19 +254,27 @@ def save_custom_prompt(self, user_id: int, prompt_type: str, content: str) -> bo "prompt_type": prompt_type, "content": content, "created_at": datetime.now().isoformat(), - "updated_at": datetime.now().isoformat() + "updated_at": datetime.now().isoformat(), } # Check if prompt already exists for this user - existing = self.client.table("custom_prompts").select("id").eq( - "user_id", user_id).eq("prompt_type", prompt_type).execute() + existing = ( + self.client.table("custom_prompts") + .select("id") + .eq("user_id", user_id) + .eq("prompt_type", prompt_type) + .execute() + ) if existing.data: # Update existing - result = self.client.table("custom_prompts").update({ - "content": content, - "updated_at": datetime.now().isoformat() - }).eq("user_id", user_id).eq("prompt_type", prompt_type).execute() + result = ( + self.client.table("custom_prompts") + .update({"content": content, "updated_at": datetime.now().isoformat()}) + .eq("user_id", user_id) + .eq("prompt_type", prompt_type) + .execute() + ) else: # Create new result = self.client.table("custom_prompts").insert(prompt_record).execute() @@ -258,7 +284,7 @@ def save_custom_prompt(self, user_id: int, prompt_type: str, content: str) -> bo logger.error(f"Error saving custom prompt: {e}") return False - def get_user_prompts(self, user_id: int) -> Dict[str, str]: + def get_user_prompts(self, user_id: int) -> dict[str, str]: """Get user's custom prompts""" try: result = self.client.table("custom_prompts").select("prompt_type, content").eq("user_id", user_id).execute() @@ -273,7 +299,7 @@ def get_user_prompts(self, user_id: int) -> Dict[str, str]: return {} # Analytics and Monitoring - def log_pipeline_execution(self, user_id: int, pipeline_data: Dict[str, Any]) -> bool: + def log_pipeline_execution(self, user_id: int, pipeline_data: dict[str, Any]) -> bool: """Log pipeline execution for analytics""" try: log_record = { @@ -283,9 +309,9 @@ def log_pipeline_execution(self, user_id: int, pipeline_data: Dict[str, Any]) -> "ai_provider": pipeline_data.get("ai_provider", "unknown"), "model": pipeline_data.get("model", "unknown"), "success": pipeline_data.get("success", False), - "error_message": pipeline_data.get("error", None), + "error_message": pipeline_data.get("error"), "metadata": pipeline_data.get("metadata", {}), - "created_at": datetime.now().isoformat() + "created_at": datetime.now().isoformat(), } result = self.client.table("pipeline_logs").insert(log_record).execute() @@ -294,13 +320,18 @@ def log_pipeline_execution(self, user_id: int, pipeline_data: Dict[str, Any]) -> logger.error(f"Error logging pipeline execution: {e}") return False - def get_user_analytics(self, user_id: int, days: int = 30) -> Dict[str, Any]: + def get_user_analytics(self, user_id: int, days: int = 30) -> dict[str, Any]: """Get user analytics for the last N days""" try: start_date = (datetime.now() - timedelta(days=days)).isoformat() - result = self.client.table("pipeline_logs").select( - "*").eq("user_id", user_id).gte("created_at", start_date).execute() + result = ( + self.client.table("pipeline_logs") + .select("*") + .eq("user_id", user_id) + .gte("created_at", start_date) + .execute() + ) logs = result.data or [] @@ -310,7 +341,7 @@ def get_user_analytics(self, user_id: int, days: int = 30) -> Dict[str, Any]: "total_duration": sum(log["duration_seconds"] for log in logs), "ai_providers_used": list(set(log["ai_provider"] for log in logs)), "most_used_model": self._get_most_frequent(logs, "model"), - "average_duration": sum(log["duration_seconds"] for log in logs) / len(logs) if logs else 0 + "average_duration": sum(log["duration_seconds"] for log in logs) / len(logs) if logs else 0, } return analytics @@ -318,12 +349,13 @@ def get_user_analytics(self, user_id: int, days: int = 30) -> Dict[str, Any]: logger.error(f"Error fetching user analytics: {e}") return {} - def _get_most_frequent(self, logs: List[Dict], field: str) -> str: + def _get_most_frequent(self, logs: list[dict], field: str) -> str: """Helper to get most frequent value from logs""" if not logs: return "unknown" from collections import Counter + values = [log.get(field, "unknown") for log in logs] return Counter(values).most_common(1)[0][0] @@ -338,7 +370,7 @@ class MCPSupabaseIntegration: def __init__(self, supabase_client: SupabaseClient): self.db = supabase_client - def get_user_context(self, user_id: int) -> Dict[str, Any]: + def get_user_context(self, user_id: int) -> dict[str, Any]: """Get comprehensive user context for AI models""" try: # Get user profile @@ -363,16 +395,18 @@ def get_user_context(self, user_id: int) -> Dict[str, Any]: "subscription_tier": user.get("subscription_tier", "free"), "usage_quota": user.get("usage_quota", 60), "usage_current": user.get("usage_current", 0), - "created_at": user.get("created_at") + "created_at": user.get("created_at"), }, "knowledge_base": knowledge_base, "custom_prompts": custom_prompts, "content_history": recent_content, "analytics": analytics, "preferences": { - "preferred_ai_provider": analytics.get("ai_providers_used", ["openai"])[0] if analytics.get("ai_providers_used") else "openai", - "most_used_model": analytics.get("most_used_model", "gpt-3.5-turbo") - } + "preferred_ai_provider": analytics.get("ai_providers_used", ["openai"])[0] + if analytics.get("ai_providers_used") + else "openai", + "most_used_model": analytics.get("most_used_model", "gpt-3.5-turbo"), + }, } return context @@ -380,7 +414,7 @@ def get_user_context(self, user_id: int) -> Dict[str, Any]: logger.error(f"Error getting user context for MCP: {e}") return {} - def update_context_from_interaction(self, user_id: int, interaction_data: Dict[str, Any]) -> bool: + def update_context_from_interaction(self, user_id: int, interaction_data: dict[str, Any]) -> bool: """Update user context based on AI interaction results""" try: # Log the interaction diff --git a/core/visible_thinking.py b/core/visible_thinking.py index 3dca809..324c24e 100644 --- a/core/visible_thinking.py +++ b/core/visible_thinking.py @@ -5,8 +5,6 @@ import logging -import streamlit as st - logger = logging.getLogger(__name__) diff --git a/create_missing_tables.py b/create_missing_tables.py index f07fa3d..5deaa91 100644 --- a/create_missing_tables.py +++ b/create_missing_tables.py @@ -4,6 +4,7 @@ """ import os + from core.supabase_integration import get_supabase_client @@ -28,7 +29,7 @@ def create_prompts_table(client): """ try: - result = client.client.rpc('exec_sql', {'sql': sql}).execute() + client.client.rpc("exec_sql", {"sql": sql}).execute() print("βœ… Prompts table created successfully") return True except Exception as e: @@ -57,7 +58,7 @@ def create_api_keys_table(client): """ try: - result = client.client.rpc('exec_sql', {'sql': sql}).execute() + client.client.rpc("exec_sql", {"sql": sql}).execute() print("βœ… API keys table created successfully") return True except Exception as e: @@ -70,14 +71,14 @@ def test_table_creation(client): # Test prompts table try: - result = client.client.table('prompts').select('*').limit(1).execute() + client.client.table("prompts").select("*").limit(1).execute() print("βœ… Prompts table accessible") except Exception as e: print(f"❌ Prompts table test failed: {e}") # Test api_keys table try: - result = client.client.table('api_keys').select('*').limit(1).execute() + client.client.table("api_keys").select("*").limit(1).execute() print("βœ… API keys table accessible") except Exception as e: print(f"❌ API keys table test failed: {e}") @@ -85,9 +86,10 @@ def test_table_creation(client): def main(): from dotenv import load_dotenv + load_dotenv() - if not os.getenv('SUPABASE_URL') or not os.getenv('SUPABASE_ANON_KEY'): + if not os.getenv("SUPABASE_URL") or not os.getenv("SUPABASE_ANON_KEY"): print("Missing SUPABASE_URL or SUPABASE_ANON_KEY environment variables.") print("Set them in your .env file or export them before running this script.") return diff --git a/deploy_fixes.py b/deploy_fixes.py index 0f84727..9cdcca6 100644 --- a/deploy_fixes.py +++ b/deploy_fixes.py @@ -4,8 +4,6 @@ Verifies all systems are ready for production deployment """ -import os -import sys from datetime import datetime @@ -14,13 +12,11 @@ def verify_database_tables(): print("πŸ” Verifying Database Tables") print("=" * 40) - required_tables = [ - "users", "content", "api_keys", "prompts", - "knowledge_base", "pipeline_logs" - ] + required_tables = ["users", "content", "api_keys", "prompts", "knowledge_base", "pipeline_logs"] try: from core.supabase_integration import get_supabase_client + client = get_supabase_client() if not client: @@ -31,7 +27,7 @@ def verify_database_tables(): for table in required_tables: try: # Test table access - result = client.client.table(table).select("*").limit(1).execute() + client.client.table(table).select("*").limit(1).execute() print(f"βœ… {table}") except Exception: print(f"❌ {table} - MISSING") @@ -57,10 +53,8 @@ def test_core_functionality(): try: # Test imports - from core.content_generation import transcribe_audio, generate_wisdom - from core.file_upload import FileUploadManager, EnhancedLargeFileProcessor - from core.utils import DEFAULT_PROMPTS, load_prompt_from_file from core.supabase_integration import get_supabase_client + from core.utils import DEFAULT_PROMPTS print("βœ… Core imports successful") @@ -95,7 +89,7 @@ def main(): core_ok = test_core_functionality() # Final assessment - print(f"\nπŸ“Š DEPLOYMENT READINESS") + print("\nπŸ“Š DEPLOYMENT READINESS") print("=" * 60) if db_ok and core_ok: diff --git a/scripts/audit_project.py b/scripts/audit_project.py index 6ad72c0..bc483ab 100644 --- a/scripts/audit_project.py +++ b/scripts/audit_project.py @@ -35,11 +35,12 @@ import sys from datetime import datetime from pathlib import Path -from typing import Any, Dict, List, Tuple +from typing import Any # Always use the project logger if available, but fall back to standard logging. try: from core.logging_config import logger as _wf_logger # type: ignore + SCRIPT_LOGGER = getattr(_wf_logger, "logger", _wf_logger) except Exception: SCRIPT_LOGGER = logging.getLogger(__name__) @@ -62,14 +63,13 @@ # Helper utilities # --------------------------------------------------------------------------- -def _run(cmd: List[str], cwd: Path | None = None) -> Tuple[int, str, str]: + +def _run(cmd: list[str], cwd: Path | None = None) -> tuple[int, str, str]: """Run a subprocess and capture its output. Returns (returncode, stdout, stderr). """ - SCRIPT_LOGGER.info( - "Executing command", extra={"trace_id": TRACE_ID, "cmd": " ".join(cmd)} - ) + SCRIPT_LOGGER.info("Executing command", extra={"trace_id": TRACE_ID, "cmd": " ".join(cmd)}) try: proc = subprocess.run( cmd, @@ -91,6 +91,7 @@ def _section(title: str, body: str) -> str: # Audit tasks # --------------------------------------------------------------------------- + def unit_tests() -> str: rc, out, err = _run([sys.executable, "-m", "pytest", "-q"]) status = "βœ…" if rc == 0 else "❌" @@ -138,14 +139,14 @@ def static_analysis() -> str: def secret_scan() -> str: - matches: List[str] = [] + matches: list[str] = [] for file_path in ROOT.rglob("*.py"): try: content = file_path.read_text(encoding="utf-8") except UnicodeDecodeError: continue for m in SECRET_PATTERN.finditer(content): - snippet = content[max(0, m.start() - 20): m.end() + 20] + snippet = content[max(0, m.start() - 20) : m.end() + 20] matches.append(f"{file_path}: {snippet.strip()}") if not matches: @@ -158,7 +159,7 @@ def dependency_freshness() -> str: if rc != 0: return f"Failed to fetch outdated packages: {err}" try: - data: List[Dict[str, Any]] = json.loads(out) + data: list[dict[str, Any]] = json.loads(out) except json.JSONDecodeError: return "Could not parse pip output." if not data: @@ -182,14 +183,12 @@ def todo_tally() -> str: # Entry-point # --------------------------------------------------------------------------- + def main() -> None: parser = argparse.ArgumentParser(description="Run a WhisperForge audit and emit a Markdown report.") parser.add_argument( - "--output", - type=Path, - default=Path( - f"AUDIT_REPORT_{ - datetime.now().strftime('%Y%m%d_%H%M%S')}.md")) + "--output", type=Path, default=Path(f"AUDIT_REPORT_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md") + ) args = parser.parse_args() report_parts = [REPORT_HEADER] @@ -203,9 +202,7 @@ def main() -> None: output_text = "\n".join(report_parts) args.output.write_text(output_text, encoding="utf-8") - SCRIPT_LOGGER.info( - "Audit completed", extra={"trace_id": TRACE_ID, "report": str(args.output)} - ) + SCRIPT_LOGGER.info("Audit completed", extra={"trace_id": TRACE_ID, "report": str(args.output)}) print(f"\n\nAudit report written to {args.output}") diff --git a/scripts/integration_audit.py b/scripts/integration_audit.py index 0bcba9b..7ffa80b 100644 --- a/scripts/integration_audit.py +++ b/scripts/integration_audit.py @@ -5,7 +5,6 @@ """ import sys -import os import traceback from pathlib import Path @@ -18,7 +17,7 @@ def audit_section(title: str): """Print audit section header""" print(f"\n{'=' * 60}") print(f"πŸ” {title}") - print('=' * 60) + print("=" * 60) def check_import(module_name: str, description: str = ""): @@ -36,7 +35,7 @@ def check_function(module_name: str, function_name: str, description: str = ""): """Check if a function exists in a module""" try: module = __import__(module_name, fromlist=[function_name]) - func = getattr(module, function_name) + getattr(module, function_name) print(f"βœ… {module_name}.{function_name} - {description}") return True except Exception as e: @@ -49,7 +48,7 @@ def check_class_method(module_name: str, class_name: str, method_name: str, desc try: module = __import__(module_name, fromlist=[class_name]) cls = getattr(module, class_name) - method = getattr(cls, method_name) + getattr(cls, method_name) print(f"βœ… {module_name}.{class_name}.{method_name} - {description}") return True except Exception as e: @@ -64,142 +63,143 @@ def main(): # Track results results = { - 'core_imports': 0, - 'database_functions': 0, - 'ui_components': 0, - 'file_processing': 0, - 'content_generation': 0, - 'streaming_pipeline': 0, - 'authentication': 0 + "core_imports": 0, + "database_functions": 0, + "ui_components": 0, + "file_processing": 0, + "content_generation": 0, + "streaming_pipeline": 0, + "authentication": 0, } # 1. Core Module Imports audit_section("Core Module Imports") core_modules = [ - ('core.supabase_integration', 'Database integration'), - ('core.file_upload', 'File upload management'), - ('core.streaming_pipeline', 'Streaming pipeline'), - ('core.content_generation', 'Content generation'), - ('core.streaming_results', 'Results display'), - ('core.ui_components', 'UI components'), - ('core.styling', 'Aurora styling'), - ('core.logging_config', 'Enhanced logging'), - ('core.monitoring', 'System monitoring'), - ('core.notifications', 'User notifications'), - ('core.visible_thinking', 'Thinking display'), - ('core.research_enrichment', 'Research features'), - ('core.utils', 'Utility functions'), - ('core.config', 'Configuration'), - ('core.integrations', 'External integrations') + ("core.supabase_integration", "Database integration"), + ("core.file_upload", "File upload management"), + ("core.streaming_pipeline", "Streaming pipeline"), + ("core.content_generation", "Content generation"), + ("core.streaming_results", "Results display"), + ("core.ui_components", "UI components"), + ("core.styling", "Aurora styling"), + ("core.logging_config", "Enhanced logging"), + ("core.monitoring", "System monitoring"), + ("core.notifications", "User notifications"), + ("core.visible_thinking", "Thinking display"), + ("core.research_enrichment", "Research features"), + ("core.utils", "Utility functions"), + ("core.config", "Configuration"), + ("core.integrations", "External integrations"), ] for module, desc in core_modules: if check_import(module, desc): - results['core_imports'] += 1 + results["core_imports"] += 1 # 2. Database Functions audit_section("Database Integration") db_functions = [ - ('core.supabase_integration', 'get_supabase_client', 'Get database client'), - ('core.supabase_integration', 'SupabaseClient', 'Database client class'), - ('app', 'init_supabase', 'Initialize database'), - ('app', 'authenticate_user', 'User authentication'), - ('app', 'register_user_supabase', 'User registration'), - ('app', 'save_generated_content_supabase', 'Save content'), - ('app', 'get_user_content_history_supabase', 'Get user history') + ("core.supabase_integration", "get_supabase_client", "Get database client"), + ("core.supabase_integration", "SupabaseClient", "Database client class"), + ("app", "init_supabase", "Initialize database"), + ("app", "authenticate_user", "User authentication"), + ("app", "register_user_supabase", "User registration"), + ("app", "save_generated_content_supabase", "Save content"), + ("app", "get_user_content_history_supabase", "Get user history"), ] for module, func, desc in db_functions: if check_function(module, func, desc): - results['database_functions'] += 1 + results["database_functions"] += 1 # 3. File Processing audit_section("File Processing & Upload") file_functions = [ - ('core.file_upload', 'LargeFileUploadManager', 'Large file manager'), - ('core.file_upload', 'FileUploadManager', 'Standard file manager') + ("core.file_upload", "LargeFileUploadManager", "Large file manager"), + ("core.file_upload", "FileUploadManager", "Standard file manager"), ] for module, cls, desc in file_functions: if check_function(module, cls, desc): - results['file_processing'] += 1 + results["file_processing"] += 1 # Check LargeFileUploadManager methods file_methods = [ - ('core.file_upload', 'LargeFileUploadManager', 'validate_large_file', 'File validation'), - ('core.file_upload', 'LargeFileUploadManager', 'create_large_file_upload_zone', 'Upload UI'), - ('core.file_upload', 'LargeFileUploadManager', 'process_large_file', 'File processing') + ("core.file_upload", "LargeFileUploadManager", "validate_large_file", "File validation"), + ("core.file_upload", "LargeFileUploadManager", "create_large_file_upload_zone", "Upload UI"), + ("core.file_upload", "LargeFileUploadManager", "process_large_file", "File processing"), ] for module, cls, method, desc in file_methods: if check_class_method(module, cls, method, desc): - results['file_processing'] += 1 + results["file_processing"] += 1 # 4. Content Generation audit_section("Content Generation") content_functions = [ - ('core.content_generation', 'generate_wisdom_extraction', 'Wisdom extraction'), - ('core.content_generation', 'generate_research_enrichment', 'Research enrichment'), - ('core.content_generation', 'generate_outline_creation', 'Outline creation'), - ('core.content_generation', 'generate_article_creation', 'Article creation'), - ('core.content_generation', 'generate_social_content', 'Social media content'), - ('core.content_generation', 'generate_image_prompts', 'Image prompts') + ("core.content_generation", "generate_wisdom_extraction", "Wisdom extraction"), + ("core.content_generation", "generate_research_enrichment", "Research enrichment"), + ("core.content_generation", "generate_outline_creation", "Outline creation"), + ("core.content_generation", "generate_article_creation", "Article creation"), + ("core.content_generation", "generate_social_content", "Social media content"), + ("core.content_generation", "generate_image_prompts", "Image prompts"), ] for module, func, desc in content_functions: if check_function(module, func, desc): - results['content_generation'] += 1 + results["content_generation"] += 1 # 5. Streaming Pipeline audit_section("Streaming Pipeline") pipeline_functions = [ - ('core.streaming_pipeline', 'get_pipeline_controller', 'Pipeline controller'), - ('core.streaming_pipeline', 'StreamingPipelineController', 'Pipeline class'), - ('core.streaming_results', 'show_streaming_results', 'Results display'), - ('core.streaming_results', 'show_real_time_content_stream', 'Real-time streaming') + ("core.streaming_pipeline", "get_pipeline_controller", "Pipeline controller"), + ("core.streaming_pipeline", "StreamingPipelineController", "Pipeline class"), + ("core.streaming_results", "show_streaming_results", "Results display"), + ("core.streaming_results", "show_real_time_content_stream", "Real-time streaming"), ] for module, func, desc in pipeline_functions: if check_function(module, func, desc): - results['streaming_pipeline'] += 1 + results["streaming_pipeline"] += 1 # 6. UI Components audit_section("UI Components & Styling") ui_functions = [ - ('core.ui_components', 'load_aurora_css', 'Aurora CSS loading'), - ('core.ui_components', 'AuroraContainer', 'Aurora containers'), - ('core.ui_components', 'AuroraComponents', 'Aurora components'), - ('core.styling', 'apply_aurora_theme', 'Aurora theme'), - ('core.styling', 'create_aurora_header', 'Aurora header') + ("core.ui_components", "load_aurora_css", "Aurora CSS loading"), + ("core.ui_components", "AuroraContainer", "Aurora containers"), + ("core.ui_components", "AuroraComponents", "Aurora components"), + ("core.styling", "apply_aurora_theme", "Aurora theme"), + ("core.styling", "create_aurora_header", "Aurora header"), ] for module, func, desc in ui_functions: if check_function(module, func, desc): - results['ui_components'] += 1 + results["ui_components"] += 1 # 7. Authentication Flow audit_section("Authentication & OAuth") auth_functions = [ - ('app', 'handle_oauth_callback', 'OAuth callback handling'), - ('app', 'show_auth_page', 'Authentication page'), - ('app', 'show_main_app', 'Main application') + ("app", "handle_oauth_callback", "OAuth callback handling"), + ("app", "show_auth_page", "Authentication page"), + ("app", "show_main_app", "Main application"), ] for module, func, desc in auth_functions: if check_function(module, func, desc): - results['authentication'] += 1 + results["authentication"] += 1 # 8. Test Database Connection audit_section("Live Database Connection Test") try: from core.supabase_integration import get_supabase_client + client = get_supabase_client() if client and client.client: # Test basic query - result = client.client.table('users').select('id').limit(1).execute() + result = client.client.table("users").select("id").limit(1).execute() print("βœ… Database connection successful") print(f"βœ… Users table accessible ({len(result.data)} records found)") - results['database_functions'] += 2 + results["database_functions"] += 2 else: print("❌ Database client not available") except Exception as e: @@ -209,12 +209,13 @@ def main(): audit_section("Main Application Import") try: import app + print("βœ… app.py imports successfully") # Test key app functions db, success = app.init_supabase() print(f"βœ… init_supabase works: {success}") - results['authentication'] += 2 + results["authentication"] += 2 except Exception as e: print(f"❌ app.py import failed: {str(e)[:100]}") @@ -225,7 +226,9 @@ def main(): total_checks = sum(results.values()) max_possible = 50 # Approximate total checks - print(f"πŸ“Š **Integration Health Score: {total_checks}/{max_possible} ({(total_checks / max_possible) * 100:.1f}%)**") + print( + f"πŸ“Š **Integration Health Score: {total_checks}/{max_possible} ({(total_checks / max_possible) * 100:.1f}%)**" + ) print() print("**Component Breakdown:**") for component, count in results.items(): diff --git a/scripts/test_monitoring.py b/scripts/test_monitoring.py index e0326f1..fb997c4 100644 --- a/scripts/test_monitoring.py +++ b/scripts/test_monitoring.py @@ -7,16 +7,16 @@ in production environment. """ +import json import sys import time -import json -import os import traceback from pathlib import Path + import pytest # Skip these script-style tests when executed under pytest -SKIP_IN_PYTEST = 'pytest' in sys.modules +SKIP_IN_PYTEST = "pytest" in sys.modules # Add project root to path sys.path.insert(0, str(Path(__file__).parent.parent)) @@ -29,7 +29,7 @@ def test_structured_logging(): print("πŸ” Testing Structured Logging...") # pragma: allow-print try: - from core.monitoring import structured_logger, set_trace_context, trace_operation + from core.monitoring import set_trace_context, structured_logger, trace_operation # Test basic logging structured_logger.info("Test info message", test_component="monitoring") @@ -37,7 +37,7 @@ def test_structured_logging(): structured_logger.error("Test error message", test_component="monitoring") # Test trace context - trace_id = set_trace_context(user_id="test_user", operation="test_operation") + set_trace_context(user_id="test_user", operation="test_operation") structured_logger.info("Message with trace context", test_data="trace_test") # Test trace operation context manager @@ -105,8 +105,10 @@ def test_metrics_export(): try: from core.metrics_exporter import ( - metrics_exporter, track_request, track_pipeline, - export_prometheus_metrics, export_json_metrics + export_json_metrics, + export_prometheus_metrics, + track_pipeline, + track_request, ) # Test request tracking @@ -150,17 +152,10 @@ def test_error_tracking(): try: raise ValueError("Test error for monitoring") except Exception as e: - error_tracker.capture_exception(e, { - "test_context": "monitoring_test", - "user_id": "test_user" - }) + error_tracker.capture_exception(e, {"test_context": "monitoring_test", "user_id": "test_user"}) # Test message capture - error_tracker.capture_message( - "Test warning message", - level="warning", - context={"test": "monitoring"} - ) + error_tracker.capture_message("Test warning message", level="warning", context={"test": "monitoring"}) print("βœ… Error tracking tests passed") return True @@ -178,7 +173,7 @@ def test_performance_tracking(): print("πŸ” Testing Performance Tracking...") try: - from core.monitoring import performance_tracker, monitor_function + from core.monitoring import monitor_function, performance_tracker # Test context manager with performance_tracker.track_operation("test_operation"): @@ -194,9 +189,7 @@ def test_function(): assert result == "test_result" # Test pipeline performance tracking - performance_tracker.track_pipeline_performance( - "test_pipeline", 1.5, True, file_size_mb=10 - ) + performance_tracker.track_pipeline_performance("test_pipeline", 1.5, True, file_size_mb=10) print("βœ… Performance tracking tests passed") return True @@ -214,9 +207,7 @@ def test_streamlit_integration(): print("πŸ” Testing Streamlit Integration...") try: - from core.streamlit_monitoring import ( - streamlit_monitor, streamlit_page, streamlit_component - ) + from core.streamlit_monitoring import streamlit_component, streamlit_page # Test decorators (without actual Streamlit context) @streamlit_page("test_page") @@ -256,14 +247,14 @@ def test_log_file_creation(): logs_dir.mkdir(exist_ok=True) # Check for structured log file - today = datetime.now().strftime('%Y%m%d') + today = datetime.now().strftime("%Y%m%d") log_file = logs_dir / f"whisperforge_structured_{today}.jsonl" if log_file.exists(): print(f" βœ… Log file exists: {log_file}") # Check file content - with open(log_file, 'r') as f: + with open(log_file) as f: lines = f.readlines() if lines: print(f" βœ… Log file has {len(lines)} entries") @@ -271,13 +262,13 @@ def test_log_file_creation(): # Validate JSON format try: last_entry = json.loads(lines[-1].strip()) - print(f" βœ… Last log entry is valid JSON") - if 'timestamp' in last_entry and 'level' in last_entry: - print(f" βœ… Log entry has required fields") + print(" βœ… Last log entry is valid JSON") + if "timestamp" in last_entry and "level" in last_entry: + print(" βœ… Log entry has required fields") except json.JSONDecodeError: - print(f" ⚠️ Last log entry is not valid JSON") + print(" ⚠️ Last log entry is not valid JSON") else: - print(f" ⚠️ Log file is empty") + print(" ⚠️ Log file is empty") else: print(f" ⚠️ Log file not found: {log_file}") diff --git a/scripts/test_oauth.py b/scripts/test_oauth.py index 51bd002..69f1c5f 100644 --- a/scripts/test_oauth.py +++ b/scripts/test_oauth.py @@ -4,8 +4,8 @@ """ import sys -import os from pathlib import Path + import pytest from _pytest.outcomes import Skipped @@ -19,6 +19,7 @@ def test_oauth_url_generation(): # Load environment variables from .env file from dotenv import load_dotenv + load_dotenv() try: @@ -32,19 +33,18 @@ def test_oauth_url_generation(): # Test OAuth URL generation redirect_url = "http://localhost:8501" - auth_response = db.client.auth.sign_in_with_oauth({ - "provider": "google", - "options": {"redirect_to": redirect_url} - }) + auth_response = db.client.auth.sign_in_with_oauth( + {"provider": "google", "options": {"redirect_to": redirect_url}} + ) - if hasattr(auth_response, 'url') and auth_response.url: - print(f"βœ… OAuth URL generated successfully") + if hasattr(auth_response, "url") and auth_response.url: + print("βœ… OAuth URL generated successfully") print(f" URL: {auth_response.url[:50]}...") assert True else: - print(f"❌ OAuth URL not generated properly") + print("❌ OAuth URL not generated properly") print(f" Response: {auth_response}") - assert False, "OAuth URL not generated properly" + raise AssertionError("OAuth URL not generated properly") except Exception as e: print(f"❌ Error testing OAuth: {e}") @@ -53,6 +53,7 @@ def test_oauth_url_generation(): pytest.skip("Supabase credentials not available for OAuth testing") else: import traceback + print(f" Full error: {traceback.format_exc()}") pytest.skip(f"OAuth test failed with error: {e}") diff --git a/scripts/ui_ux_audit.py b/scripts/ui_ux_audit.py index 3e718cc..7dd5589 100644 --- a/scripts/ui_ux_audit.py +++ b/scripts/ui_ux_audit.py @@ -5,7 +5,6 @@ """ import sys -import os from pathlib import Path # Add project root to path @@ -17,7 +16,7 @@ def audit_section(title: str): """Print audit section header""" print(f"\n{'=' * 60}") print(f"🎨 {title}") - print('=' * 60) + print("=" * 60) def check_ui_feature(feature_name: str, description: str, status: bool): @@ -34,12 +33,12 @@ def main(): # Track results results = { - 'oauth_flow': 0, - 'progress_indicators': 0, - 'ui_components': 0, - 'user_feedback': 0, - 'error_handling': 0, - 'accessibility': 0 + "oauth_flow": 0, + "progress_indicators": 0, + "ui_components": 0, + "user_feedback": 0, + "error_handling": 0, + "accessibility": 0, } # 1. OAuth Flow Audit @@ -51,19 +50,19 @@ def main(): # OAuth callback handling oauth_features = [ - ("OAuth Callback Handler", "handle_oauth_callback function exists", hasattr(app, 'handle_oauth_callback')), + ("OAuth Callback Handler", "handle_oauth_callback function exists", hasattr(app, "handle_oauth_callback")), ("Google OAuth Integration", "Google sign-in with proper redirect", True), # Verified in code - ("Fallback Authentication", "Email/password fallback available", hasattr(app, 'authenticate_user')), - ("User Registration", "Account creation flow", hasattr(app, 'register_user_supabase')), + ("Fallback Authentication", "Email/password fallback available", hasattr(app, "authenticate_user")), + ("User Registration", "Account creation flow", hasattr(app, "register_user_supabase")), ("Session Management", "Simple session state handling", True), # Verified in code ("Local Testing Bypass", "Database unavailable bypass", True), # Verified in code ("Error Recovery", "OAuth error handling with fallback", True), # Verified in code - ("Beautiful Auth Page", "Aurora-themed authentication UI", True) # Verified in code + ("Beautiful Auth Page", "Aurora-themed authentication UI", True), # Verified in code ] for feature, desc, status in oauth_features: if check_ui_feature(feature, desc, status): - results['oauth_flow'] += 1 + results["oauth_flow"] += 1 except Exception as e: print(f"❌ OAuth audit failed: {e}") @@ -72,24 +71,26 @@ def main(): audit_section("Progress Indicators & Status Updates") try: - from core.streaming_results import show_streaming_results, show_real_time_content_stream - from core.streaming_pipeline import get_pipeline_controller from core.ui_components import AuroraComponents progress_features = [ ("Real-time Streaming", "Live content generation display", True), ("Step-by-step Progress", "Pipeline step indicators", True), - ("Aurora Progress Bars", "Beautiful animated progress bars", hasattr(AuroraComponents, 'aurora_progress_bar')), + ( + "Aurora Progress Bars", + "Beautiful animated progress bars", + hasattr(AuroraComponents, "aurora_progress_bar"), + ), ("File Upload Progress", "Large file upload tracking", True), # Verified in file_upload.py ("Chunk Processing", "Parallel chunk progress display", True), # Verified in file_upload.py ("Status Messages", "Success/error/warning notifications", True), ("Loading States", "Processing indicators during operations", True), - ("Completion Feedback", "Clear completion status", True) + ("Completion Feedback", "Clear completion status", True), ] for feature, desc, status in progress_features: if check_ui_feature(feature, desc, status): - results['progress_indicators'] += 1 + results["progress_indicators"] += 1 except Exception as e: print(f"❌ Progress indicators audit failed: {e}") @@ -98,8 +99,7 @@ def main(): audit_section("UI Components & Visual Design") try: - from core.ui_components import AuroraContainer, AuroraComponents - from core.styling import apply_aurora_theme, create_aurora_header + from core.ui_components import AuroraComponents ui_features = [ ("Aurora Theme", "Consistent bioluminescent design", True), @@ -111,12 +111,12 @@ def main(): ("Form Design", "Beautiful input fields and validation", True), ("Card Components", "Elegant content containers", True), ("Navigation", "Intuitive page navigation", True), - ("Logo & Branding", "Professional WhisperForge identity", True) + ("Logo & Branding", "Professional WhisperForge identity", True), ] for feature, desc, status in ui_features: if check_ui_feature(feature, desc, status): - results['ui_components'] += 1 + results["ui_components"] += 1 except Exception as e: print(f"❌ UI components audit failed: {e}") @@ -135,12 +135,12 @@ def main(): ("Form Validation", "Real-time input validation", True), ("Loading Spinners", "Activity indicators", True), ("Tooltips", "Helpful contextual hints", True), - ("Status Badges", "Clear state indicators", True) + ("Status Badges", "Clear state indicators", True), ] for feature, desc, status in feedback_features: if check_ui_feature(feature, desc, status): - results['user_feedback'] += 1 + results["user_feedback"] += 1 except Exception as e: print(f"❌ User feedback audit failed: {e}") @@ -153,18 +153,18 @@ def main(): error_features = [ ("Graceful Degradation", "App works without database", True), - ("Error Boundaries", "Component error isolation", hasattr(ErrorBoundary, 'wrap')), + ("Error Boundaries", "Component error isolation", hasattr(ErrorBoundary, "wrap")), ("Retry Mechanisms", "Automatic retry on failures", True), ("Fallback UI", "Alternative UI when features fail", True), ("User-friendly Errors", "Non-technical error messages", True), ("Recovery Actions", "Clear steps to resolve issues", True), ("Offline Handling", "Graceful offline behavior", True), - ("Timeout Handling", "Long operation timeouts", True) + ("Timeout Handling", "Long operation timeouts", True), ] for feature, desc, status in error_features: if check_ui_feature(feature, desc, status): - results['error_handling'] += 1 + results["error_handling"] += 1 except Exception as e: print(f"❌ Error handling audit failed: {e}") @@ -182,12 +182,12 @@ def main(): ("Touch Friendly", "Mobile touch targets", True), ("Loading States", "Clear loading indicators", True), ("Error Recovery", "Clear error resolution paths", True), - ("Intuitive Flow", "Logical user journey", True) + ("Intuitive Flow", "Logical user journey", True), ] for feature, desc, status in accessibility_features: if check_ui_feature(feature, desc, status): - results['accessibility'] += 1 + results["accessibility"] += 1 # 7. Specific OAuth Flow Test audit_section("OAuth Flow Deep Dive") @@ -202,12 +202,12 @@ def main(): ("User Creation", "Automatic user record creation", True), ("Session Setup", "Proper session state initialization", True), ("Error Fallback", "Email auth when OAuth fails", True), - ("Debug Information", "Helpful debug info in development", True) + ("Debug Information", "Helpful debug info in development", True), ] for feature, desc, status in oauth_deep_features: if check_ui_feature(feature, desc, status): - results['oauth_flow'] += 1 + results["oauth_flow"] += 1 except Exception as e: print(f"❌ OAuth deep dive failed: {e}") diff --git a/scripts/validate_app.py b/scripts/validate_app.py index 3f7fa38..e420095 100644 --- a/scripts/validate_app.py +++ b/scripts/validate_app.py @@ -4,10 +4,9 @@ Tests all critical functionality to prevent deployment errors """ -import sys -import os import importlib -import traceback +import os +import sys from pathlib import Path # Add the project root to path @@ -37,24 +36,24 @@ def warn_test(self, test_name, warning): def summary(self): total = self.tests_passed + self.tests_failed print(f"\n{'=' * 50}") - print(f"VALIDATION SUMMARY") + print("VALIDATION SUMMARY") print(f"{'=' * 50}") print(f"Tests Passed: {self.tests_passed}/{total}") print(f"Tests Failed: {self.tests_failed}") print(f"Warnings: {len(self.warnings)}") if self.errors: - print(f"\n🚨 ERRORS:") + print("\n🚨 ERRORS:") for error in self.errors: print(f" - {error}") if self.warnings: - print(f"\n⚠️ WARNINGS:") + print("\n⚠️ WARNINGS:") for warning in self.warnings: print(f" - {warning}") if self.tests_failed == 0: - print(f"\nπŸŽ‰ ALL TESTS PASSED! App is ready for deployment.") + print("\nπŸŽ‰ ALL TESTS PASSED! App is ready for deployment.") return True else: print(f"\nπŸ’₯ {self.tests_failed} TESTS FAILED! Fix errors before deployment.") @@ -74,13 +73,11 @@ def test_imports(result): ("datetime", "datetime"), ("tempfile", None), ("logging", None), - # Third-party ("dotenv", None), ("supabase", None), ("openai", None), ("anthropic", None), - # WhisperForge modules ("core.supabase_integration", None), ("core.utils", None), @@ -107,17 +104,9 @@ def test_environment_variables(result): """Test required environment variables""" print("\nπŸ” Testing Environment Variables...") - required_vars = [ - "SUPABASE_URL", - "SUPABASE_ANON_KEY" - ] + required_vars = ["SUPABASE_URL", "SUPABASE_ANON_KEY"] - optional_vars = [ - "OPENAI_API_KEY", - "ANTHROPIC_API_KEY", - "OAUTH_REDIRECT_URL", - "STREAMLIT_APP_URL" - ] + optional_vars = ["OPENAI_API_KEY", "ANTHROPIC_API_KEY", "OAUTH_REDIRECT_URL", "STREAMLIT_APP_URL"] for var in required_vars: value = os.getenv(var) @@ -150,16 +139,10 @@ def test_file_structure(result): "prompts/default/wisdom_extraction.md", "prompts/default/outline_creation.md", "prompts/default/social_media.md", - "prompts/default/image_prompts.md" + "prompts/default/image_prompts.md", ] - optional_files = [ - "static/css/whisperforge_ui.css", - "core/ui_components.py", - ".env", - "Procfile", - "runtime.txt" - ] + optional_files = ["static/css/whisperforge_ui.css", "core/ui_components.py", ".env", "Procfile", "runtime.txt"] for file_path in required_files: if Path(file_path).exists(): @@ -180,6 +163,7 @@ def test_supabase_connection(result): try: from core.supabase_integration import get_supabase_client + db, mcp = get_supabase_client() if db: @@ -188,7 +172,7 @@ def test_supabase_connection(result): # Test basic connection try: # Simple health check - response = db.client.table("users").select("count", count="exact").limit(1).execute() + db.client.table("users").select("count", count="exact").limit(1).execute() result.pass_test("Supabase database connection") except Exception as e: result.fail_test("Supabase database connection", str(e)) @@ -205,18 +189,18 @@ def test_oauth_configuration(result): try: from core.supabase_integration import get_supabase_client + db, _ = get_supabase_client() if db: try: # Test OAuth URL generation redirect_url = os.getenv("OAUTH_REDIRECT_URL", "http://localhost:8501") - auth_response = db.client.auth.sign_in_with_oauth({ - "provider": "google", - "options": {"redirect_to": redirect_url} - }) + auth_response = db.client.auth.sign_in_with_oauth( + {"provider": "google", "options": {"redirect_to": redirect_url}} + ) - if hasattr(auth_response, 'url') and auth_response.url: + if hasattr(auth_response, "url") and auth_response.url: result.pass_test("OAuth URL generation") else: result.warn_test("OAuth URL generation", "No URL returned") @@ -238,12 +222,12 @@ def test_prompt_files(result): "prompts/default/wisdom_extraction.md", "prompts/default/outline_creation.md", "prompts/default/social_media.md", - "prompts/default/image_prompts.md" + "prompts/default/image_prompts.md", ] for prompt_file in prompt_files: try: - with open(prompt_file, 'r', encoding='utf-8') as f: + with open(prompt_file, encoding="utf-8") as f: content = f.read() if len(content) > 10: # Basic content check result.pass_test(f"Prompt file {prompt_file}") @@ -264,8 +248,9 @@ def test_ai_providers(result): if openai_key: try: import openai + # Basic client test (don't make actual API calls in validation) - client = openai.OpenAI(api_key=openai_key) + openai.OpenAI(api_key=openai_key) result.pass_test("OpenAI configuration") except Exception as e: result.warn_test("OpenAI configuration", str(e)) @@ -277,8 +262,9 @@ def test_ai_providers(result): if anthropic_key: try: import anthropic + # Basic client test (don't make actual API calls in validation) - client = anthropic.Anthropic(api_key=anthropic_key) + anthropic.Anthropic(api_key=anthropic_key) result.pass_test("Anthropic configuration") except Exception as e: result.warn_test("Anthropic configuration", str(e)) @@ -292,12 +278,13 @@ def test_streamlit_compatibility(result): try: import streamlit as st + result.pass_test("Streamlit import") # Check version compatibility try: version = st.__version__ - major, minor = map(int, version.split('.')[:2]) + major, minor = map(int, version.split(".")[:2]) if major >= 1 and minor >= 28: result.pass_test("Streamlit version compatibility") else: @@ -327,7 +314,6 @@ def test_pipeline_components(result): result.fail_test("Pipeline controller", str(e)) try: - from core.content_generation import transcribe_audio result.pass_test("Content generation import") except Exception as e: result.fail_test("Content generation import", str(e)) diff --git a/setup.py b/setup.py index abc97cb..124e864 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,8 @@ """WhisperForge environment setup - creates a virtual environment and installs dependencies.""" +import os import subprocess import sys -import os import venv PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -29,6 +29,7 @@ def create_venv(): if os.path.exists(VENV_DIR): print_step(f"Removing existing venv at {VENV_DIR}") import shutil + shutil.rmtree(VENV_DIR) print_step(f"Creating virtual environment in {VENV_DIR}") @@ -78,7 +79,8 @@ def verify_install(): print_step("Verifying installation") result = subprocess.run( [PYTHON, "-c", "import streamlit; print(f' streamlit {streamlit.__version__}')"], - capture_output=True, text=True, + capture_output=True, + text=True, ) if result.returncode == 0: print(result.stdout.strip()) diff --git a/tests/conftest.py b/tests/conftest.py index 741bc92..41d4e4e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,24 +2,27 @@ Pytest configuration and fixtures for WhisperForge testing """ -from core.supabase_integration import get_supabase_client -from core.logging_config import logger -import pytest import os -import tempfile import shutil + +# Add project root to path +import sys +import tempfile from pathlib import Path from unittest.mock import Mock, patch -import streamlit as st + +import pytest from streamlit.testing.v1 import AppTest -# Add project root to path -import sys +from core.logging_config import logger +from core.supabase_integration import get_supabase_client + sys.path.insert(0, str(Path(__file__).parent.parent)) # Load environment variables from .env file if it exists try: from dotenv import load_dotenv + load_dotenv() except ImportError: pass # python-dotenv not installed, skip @@ -31,12 +34,12 @@ def test_env(): # Store original env vars original_env = {} test_vars = { - 'SUPABASE_URL': os.getenv('SUPABASE_URL', 'https://test.supabase.co'), - 'SUPABASE_ANON_KEY': os.getenv('SUPABASE_ANON_KEY', 'test-anon-key'), - 'OPENAI_API_KEY': os.getenv('OPENAI_API_KEY', 'test-openai-key'), - 'ANTHROPIC_API_KEY': os.getenv('ANTHROPIC_API_KEY', 'test-anthropic-key'), - 'GROQ_API_KEY': os.getenv('GROQ_API_KEY', 'test-groq-key'), - 'TESTING': 'true' + "SUPABASE_URL": os.getenv("SUPABASE_URL", "https://test.supabase.co"), + "SUPABASE_ANON_KEY": os.getenv("SUPABASE_ANON_KEY", "test-anon-key"), + "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", "test-openai-key"), + "ANTHROPIC_API_KEY": os.getenv("ANTHROPIC_API_KEY", "test-anthropic-key"), + "GROQ_API_KEY": os.getenv("GROQ_API_KEY", "test-groq-key"), + "TESTING": "true", } # Set test environment @@ -69,12 +72,12 @@ def sample_audio_file(temp_dir): audio_file = temp_dir / "test_audio.wav" # WAV file header (44 bytes) - wav_header = b'RIFF\x24\x08\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x02\x00\x44\xac\x00\x00\x10\xb1\x02\x00\x04\x00\x10\x00data\x00\x08\x00\x00' + wav_header = b"RIFF\x24\x08\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x02\x00\x44\xac\x00\x00\x10\xb1\x02\x00\x04\x00\x10\x00data\x00\x08\x00\x00" - with open(audio_file, 'wb') as f: + with open(audio_file, "wb") as f: f.write(wav_header) # Add some dummy audio data - f.write(b'\x00' * 2048) + f.write(b"\x00" * 2048) return audio_file @@ -85,14 +88,14 @@ def large_audio_file(temp_dir): audio_file = temp_dir / "large_test_audio.wav" # WAV file header - wav_header = b'RIFF\x24\x08\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x02\x00\x44\xac\x00\x00\x10\xb1\x02\x00\x04\x00\x10\x00data\x00\x08\x00\x00' + wav_header = b"RIFF\x24\x08\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x02\x00\x44\xac\x00\x00\x10\xb1\x02\x00\x04\x00\x10\x00data\x00\x08\x00\x00" - with open(audio_file, 'wb') as f: + with open(audio_file, "wb") as f: f.write(wav_header) # Create ~30MB file to trigger chunking chunk_size = 1024 * 1024 # 1MB chunks for _ in range(30): - f.write(b'\x00' * chunk_size) + f.write(b"\x00" * chunk_size) return audio_file @@ -100,15 +103,16 @@ def large_audio_file(temp_dir): @pytest.fixture def mock_supabase(): """Mock Supabase client for testing""" - with patch('core.supabase_integration.create_client') as mock_create: + with patch("core.supabase_integration.create_client") as mock_create: mock_client = Mock() mock_create.return_value = mock_client # Mock successful responses - mock_client.table.return_value.insert.return_value.execute.return_value.data = [{'id': 'test-id'}] + mock_client.table.return_value.insert.return_value.execute.return_value.data = [{"id": "test-id"}] mock_client.table.return_value.select.return_value.execute.return_value.data = [] mock_client.table.return_value.update.return_value.eq.return_value.execute.return_value.data = [ - {'id': 'test-id'}] + {"id": "test-id"} + ] yield mock_client @@ -136,7 +140,7 @@ def streamlit_app(): @pytest.fixture def mock_openai(): """Mock OpenAI API responses""" - with patch('openai.OpenAI') as mock_openai: + with patch("openai.OpenAI") as mock_openai: mock_client = Mock() mock_openai.return_value = mock_client @@ -155,7 +159,7 @@ def mock_openai(): @pytest.fixture def mock_anthropic(): """Mock Anthropic API responses""" - with patch('anthropic.Anthropic') as mock_anthropic: + with patch("anthropic.Anthropic") as mock_anthropic: mock_client = Mock() mock_anthropic.return_value = mock_client @@ -175,20 +179,13 @@ def setup_logging(): yield logger.logger.info("πŸ§ͺ Test session completed") + # Test markers def pytest_configure(config): """Configure pytest markers""" - config.addinivalue_line( - "markers", "integration: marks tests as integration tests (may be slow)" - ) - config.addinivalue_line( - "markers", "unit: marks tests as unit tests (fast)" - ) - config.addinivalue_line( - "markers", "supabase: marks tests that require Supabase connection" - ) - config.addinivalue_line( - "markers", "ai: marks tests that require AI API keys" - ) + config.addinivalue_line("markers", "integration: marks tests as integration tests (may be slow)") + config.addinivalue_line("markers", "unit: marks tests as unit tests (fast)") + config.addinivalue_line("markers", "supabase: marks tests that require Supabase connection") + config.addinivalue_line("markers", "ai: marks tests that require AI API keys") diff --git a/tests/test_basic_functionality.py b/tests/test_basic_functionality.py index 38a8ca1..67036b8 100644 --- a/tests/test_basic_functionality.py +++ b/tests/test_basic_functionality.py @@ -2,10 +2,10 @@ Basic functionality tests for WhisperForge v3.1.0 """ -import pytest -import os -from pathlib import Path import sys +from pathlib import Path + +import pytest # Add project root to path sys.path.insert(0, str(Path(__file__).parent.parent)) @@ -14,11 +14,13 @@ def test_imports(): """Test that core modules can be imported without errors""" try: - from core.content_generation import transcribe_audio, generate_wisdom - from core.file_upload import FileUploadManager, LargeFileUploadManager - from core.supabase_integration import get_supabase_client - from core.utils import hash_password, DEFAULT_PROMPTS - from core.visible_thinking import thinking_step_start + from core.content_generation import generate_wisdom, transcribe_audio # noqa: F401 + from core.file_upload import FileUploadManager # noqa: F401 + from core.large_file_processor import EnhancedLargeFileProcessor # noqa: F401 + from core.supabase_integration import get_supabase_client # noqa: F401 + from core.utils import DEFAULT_PROMPTS, hash_password # noqa: F401 + from core.visible_thinking import thinking_step_start # noqa: F401 + assert True, "All core imports successful" except ImportError as e: pytest.fail(f"Import error: {e}") @@ -68,7 +70,7 @@ def test_hash_password(): """Test password hashing utility""" from core.utils import hash_password - password = "test_password_123" + password = "test_password_123" # noqa: S105 hashed = hash_password(password) assert hashed != password, "Password should be hashed" @@ -87,7 +89,7 @@ def test_default_prompts(): @pytest.mark.unit def test_visible_thinking_functions(): """Test that visible thinking functions work without errors""" - from core.visible_thinking import thinking_step_start, thinking_step_complete, thinking_error + from core.visible_thinking import thinking_error, thinking_step_complete, thinking_step_start # These should not raise exceptions thinking_step_start("test_step") @@ -107,7 +109,7 @@ def test_core_directory_structure(): "file_upload.py", "supabase_integration.py", "utils.py", - "visible_thinking.py" + "visible_thinking.py", ] for module in expected_modules: diff --git a/whisperforge_cli.py b/whisperforge_cli.py index bd228a2..116442e 100644 --- a/whisperforge_cli.py +++ b/whisperforge_cli.py @@ -4,12 +4,11 @@ Run the WhisperForge pipeline from the command line """ -import click import os import sys -import tempfile from pathlib import Path -from typing import Optional, Dict, Any + +import click # Add the project root to Python path project_root = Path(__file__).parent @@ -17,15 +16,16 @@ # Import core functionality try: + from dotenv import load_dotenv + from core.content_generation import ( - transcribe_audio, - generate_wisdom, - generate_outline, generate_article, + generate_outline, + generate_wisdom, + transcribe_audio, ) from core.logging_config import logger from core.utils import DEFAULT_PROMPTS - from dotenv import load_dotenv # Load environment variables load_dotenv() @@ -159,14 +159,14 @@ def pipeline(): def run( input_file: str, model: str, - output: Optional[str], + output: str | None, output_format: str, verbose: bool, ): """Run the complete WhisperForge pipeline on an audio file""" if verbose: - click.echo(f"πŸš€ Starting WhisperForge pipeline...") + click.echo("πŸš€ Starting WhisperForge pipeline...") click.echo(f"Input file: {input_file}") click.echo(f"Model: {model}") click.echo(f"Output format: {output_format}") @@ -274,7 +274,7 @@ def run( type=click.Path(), help="Output file path (default: input_name_transcript.txt)", ) -def transcribe(input_file: str, output: Optional[str]): +def transcribe(input_file: str, output: str | None): """Transcribe audio file to text only""" if not validate_audio_file(input_file): @@ -284,10 +284,7 @@ def transcribe(input_file: str, output: Optional[str]): sys.exit(1) # Set up output file - if output: - output_file = Path(output) - else: - output_file = Path(f"{Path(input_file).stem}_transcript.txt") + output_file = Path(output) if output else Path(f"{Path(input_file).stem}_transcript.txt") # Create CLI file wrapper audio_file = CLIFile(input_file) @@ -333,14 +330,14 @@ def status(): # Check dependencies try: - import openai + import openai # noqa: F401 click.echo("OpenAI library: βœ… Available") except ImportError: click.echo("OpenAI library: ❌ Not available") try: - import anthropic + import anthropic # noqa: F401 click.echo("Anthropic library: βœ… Available") except ImportError: @@ -348,7 +345,7 @@ def status(): # Check audio processing try: - from pydub import AudioSegment + from pydub import AudioSegment # noqa: F401 click.echo("Audio processing (pydub): βœ… Available") except ImportError: From eb9a6686f0ffd3f8b35fbd6ac8aa9ca21311cccb Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:47:26 -0800 Subject: [PATCH 34/46] fix(config): switch Streamlit base theme from light to dark The light base theme gave all widgets white backgrounds, making text illegible against the Aurora dark theme since CSS overlays were near-transparent. Co-Authored-By: Claude Opus 4.6 --- .streamlit/config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.streamlit/config.toml b/.streamlit/config.toml index e7b5cc6..d2adaee 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -9,4 +9,4 @@ enableXsrfProtection = true gatherUsageStats = false [theme] -base = "light" +base = "dark" From c2d0916cc3bcf81c11781db6f7e9197cae514568 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 18:47:36 -0800 Subject: [PATCH 35/46] fix(ui): update CSS selectors for Streamlit 1.54 widget theming Old direct-child selectors (.stTextArea > div > div > textarea) no longer match the newer DOM structure. Add descendant and data-testid attribute selectors for text inputs, text areas, number inputs, and select boxes. Use opaque dark background for input fields. Co-Authored-By: Claude Opus 4.6 --- static/css/main.css | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/static/css/main.css b/static/css/main.css index fce85f5..10d36d7 100644 --- a/static/css/main.css +++ b/static/css/main.css @@ -264,8 +264,12 @@ header[data-testid="stHeader"], /* Text Inputs */ .stTextInput > div > div > input, -.stTextArea > div > div > textarea { - background: var(--aurora-bg-card) !important; +.stTextArea > div > div > textarea, +.stTextInput input, +.stTextArea textarea, +[data-testid="stTextInput"] input, +[data-testid="stTextArea"] textarea { + background: var(--aurora-bg-darker) !important; border: 1px solid var(--aurora-border) !important; border-radius: var(--aurora-radius-small) !important; color: var(--aurora-text) !important; @@ -273,19 +277,47 @@ header[data-testid="stHeader"], } .stTextInput > div > div > input:focus, -.stTextArea > div > div > textarea:focus { +.stTextArea > div > div > textarea:focus, +.stTextInput input:focus, +.stTextArea textarea:focus, +[data-testid="stTextInput"] input:focus, +[data-testid="stTextArea"] textarea:focus { + background: var(--aurora-bg-darker) !important; border-color: var(--aurora-border-hover) !important; box-shadow: var(--aurora-glow-subtle) !important; } +/* Text area label styling */ +[data-testid="stTextArea"] label, +[data-testid="stTextInput"] label { + color: var(--aurora-text) !important; +} + /* Select Boxes */ -.stSelectbox > div > div { +.stSelectbox > div > div, +[data-testid="stSelectbox"] > div > div { background: var(--aurora-bg-card) !important; border: 1px solid var(--aurora-border) !important; border-radius: var(--aurora-radius-small) !important; color: var(--aurora-text) !important; } +/* Number Inputs */ +[data-testid="stNumberInput"] input, +.stNumberInput input { + background: var(--aurora-bg-card) !important; + border: 1px solid var(--aurora-border) !important; + border-radius: var(--aurora-radius-small) !important; + color: var(--aurora-text) !important; + font-family: var(--aurora-font-primary) !important; +} + +[data-testid="stNumberInput"] input:focus, +.stNumberInput input:focus { + border-color: var(--aurora-border-hover) !important; + box-shadow: var(--aurora-glow-subtle) !important; +} + /* Progress Bars */ .stProgress > div > div > div { background: linear-gradient(90deg, var(--aurora-primary), var(--aurora-secondary)) !important; From 2234a8ab30cae730a0fb1b2d4c69af5cc33a7a22 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 19:09:44 -0800 Subject: [PATCH 36/46] refactor(core): add foundation modules for codebase quality refactor Create eight new modules that establish the architectural foundation for decoupling business logic from Streamlit, consolidating magic numbers, and enabling dependency injection. - core/constants.py: centralized compile-time constants (file limits, timeouts, audio params, DB defaults) - core/exceptions.py: custom exception hierarchy rooted at WhisperForgeError - core/security.py: password hashing extracted from utils.py - core/path_safety.py: path traversal prevention extracted from utils.py - core/prompts.py: prompt loading and knowledge base formatting extracted from utils.py - core/api_clients.py: AI provider client factories extracted from utils.py, plus new get_grok_client() - core/pipeline_engine.py: pure business logic pipeline with zero Streamlit imports, Protocol-based listener/store interfaces - core/services.py: lightweight DI container with lazy fallbacks to existing singletons Co-Authored-By: Claude Opus 4.6 --- core/api_clients.py | 75 ++++++++++ core/constants.py | 42 ++++++ core/exceptions.py | 30 ++++ core/path_safety.py | 33 +++++ core/pipeline_engine.py | 307 ++++++++++++++++++++++++++++++++++++++++ core/prompts.py | 87 ++++++++++++ core/security.py | 51 +++++++ core/services.py | 97 +++++++++++++ 8 files changed, 722 insertions(+) create mode 100644 core/api_clients.py create mode 100644 core/constants.py create mode 100644 core/exceptions.py create mode 100644 core/path_safety.py create mode 100644 core/pipeline_engine.py create mode 100644 core/prompts.py create mode 100644 core/security.py create mode 100644 core/services.py diff --git a/core/api_clients.py b/core/api_clients.py new file mode 100644 index 0000000..7a4a110 --- /dev/null +++ b/core/api_clients.py @@ -0,0 +1,75 @@ +"""API client factory functions for WhisperForge. + +Provides lazy-initialised clients for OpenAI, Anthropic, and Grok APIs. +Each factory returns ``None`` when the required API key is missing or the +underlying package is not installed, allowing callers to degrade +gracefully. +""" + +import logging +import os + +logger = logging.getLogger(__name__) + + +def get_openai_client(): + """Get OpenAI client with API key. + + Returns None only when the key is missing or the package isn't installed. + Other errors (e.g. network, auth) propagate so callers can react. + """ + try: + import openai + except ImportError: + logger.error("OpenAI package not installed") + return None + + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: + return None + + return openai.OpenAI(api_key=api_key) + + +def get_anthropic_client(): + """Get Anthropic client with API key. + + Returns None only when the key is missing or the package isn't installed. + Other errors propagate so callers can react. + """ + try: + import anthropic + except ImportError: + logger.error("Anthropic package not installed") + return None + + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + return None + + return anthropic.Anthropic(api_key=api_key) + + +def get_grok_api_key(): + """Get Grok API key""" + return os.getenv("GROK_API_KEY") + + +def get_grok_client(): + """Get a Grok client via the OpenAI-compatible API. + + Returns an ``openai.OpenAI`` instance pointed at the Grok endpoint, + or ``None`` when the API key is missing or the ``openai`` package is + not installed. + """ + try: + import openai + except ImportError: + logger.error("OpenAI package not installed (required for Grok client)") + return None + + api_key = get_grok_api_key() + if not api_key: + return None + + return openai.OpenAI(api_key=api_key, base_url="https://api.x.ai/v1") diff --git a/core/constants.py b/core/constants.py new file mode 100644 index 0000000..9833573 --- /dev/null +++ b/core/constants.py @@ -0,0 +1,42 @@ +""" +Application-wide constants for WhisperForge. + +These are compile-time invariants. For runtime configuration +(API keys, environment, etc.), see core/config.py. +""" + +# --------------------------------------------------------------------------- +# File size limits +# --------------------------------------------------------------------------- +MAX_UPLOAD_SIZE_BYTES: int = 2 * 1024 * 1024 * 1024 # 2 GB +LARGE_FILE_THRESHOLD_BYTES: int = 100 * 1024 * 1024 # 100 MB (triggers FFmpeg) +LARGE_FILE_THRESHOLD_MB: int = 20 # Pipeline routing threshold + +# --------------------------------------------------------------------------- +# Audio processing +# --------------------------------------------------------------------------- +CHUNK_DURATION_MINUTES: int = 10 # FFmpeg chunk length +MAX_PARALLEL_CHUNKS_STANDARD: int = 3 # pydub-based processing +MAX_PARALLEL_CHUNKS_FFMPEG: int = 4 # FFmpeg-based processing +AUDIO_SAMPLE_RATE: int = 16_000 # Hz for Whisper input +AUDIO_CHANNELS: int = 1 # Mono for Whisper input + +# --------------------------------------------------------------------------- +# Transcription success thresholds +# --------------------------------------------------------------------------- +CHUNK_SUCCESS_THRESHOLD_STANDARD: float = 0.8 # file_upload.py +CHUNK_SUCCESS_THRESHOLD_FFMPEG: float = 0.7 # large_file_processor.py + +# --------------------------------------------------------------------------- +# Subprocess timeouts (seconds) +# --------------------------------------------------------------------------- +FFMPEG_VERSION_CHECK_TIMEOUT: int = 5 +FFPROBE_TIMEOUT: int = 30 +FFMPEG_CHUNK_TIMEOUT: int = 300 + +# --------------------------------------------------------------------------- +# Database defaults +# --------------------------------------------------------------------------- +DEFAULT_USAGE_QUOTA_MINUTES: int = 60 # New user monthly quota +DEFAULT_CONTENT_QUERY_LIMIT: int = 50 # get_user_content() +DEFAULT_ANALYTICS_DAYS: int = 30 # get_user_analytics() diff --git a/core/exceptions.py b/core/exceptions.py new file mode 100644 index 0000000..5f2e5d3 --- /dev/null +++ b/core/exceptions.py @@ -0,0 +1,30 @@ +""" +WhisperForge custom exception hierarchy. + +All project-specific exceptions inherit from WhisperForgeError so callers +can catch the entire family if needed. +""" + + +class WhisperForgeError(Exception): + """Base exception for all WhisperForge errors.""" + + +class DatabaseError(WhisperForgeError): + """Raised when a Supabase / database operation fails.""" + + +class AuthenticationError(WhisperForgeError): + """Raised when authentication or session operations fail.""" + + +class PipelineError(WhisperForgeError): + """Raised when a pipeline step fails.""" + + +class FileProcessingError(WhisperForgeError): + """Raised when file upload or audio processing fails.""" + + +class APIClientError(WhisperForgeError): + """Raised when an external API call fails.""" diff --git a/core/path_safety.py b/core/path_safety.py new file mode 100644 index 0000000..b034ba6 --- /dev/null +++ b/core/path_safety.py @@ -0,0 +1,33 @@ +"""Filesystem path validation utilities for WhisperForge. + +Ensures that user-supplied filenames cannot escape a designated root +directory, preventing path-traversal attacks. +""" + +import re +from pathlib import Path + +# Only allow alphanumeric, hyphens, underscores, and single dots (no ..) +_SAFE_FILENAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$") + + +def safe_path(root: str, untrusted_name: str) -> Path: + """Build a file path that is guaranteed to stay within *root*. + + Raises ValueError if the resulting path escapes the root directory + or if the filename contains disallowed characters. + """ + if not _SAFE_FILENAME_RE.match(untrusted_name): + raise ValueError( + f"Invalid filename: {untrusted_name!r}. " + "Only alphanumeric characters, hyphens, underscores, and dots are allowed." + ) + if ".." in untrusted_name: + raise ValueError("Path traversal sequences are not allowed.") + + root_resolved = Path(root).resolve() + target = (root_resolved / untrusted_name).resolve() + + if not target.is_relative_to(root_resolved): + raise ValueError(f"Path escapes allowed directory: {target} is not under {root_resolved}") + return target diff --git a/core/pipeline_engine.py b/core/pipeline_engine.py new file mode 100644 index 0000000..bec7260 --- /dev/null +++ b/core/pipeline_engine.py @@ -0,0 +1,307 @@ +""" +Pipeline Engine +================ + +Pure business-logic pipeline orchestration with ZERO Streamlit dependencies. +Extracts the content-generation workflow from pipeline.py so it can be driven +by any front-end (Streamlit, CLI, API server, tests). +""" + +from __future__ import annotations + +import logging +import os +import tempfile +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Protocol + +from .content_generation import ( + generate_article, + generate_outline, + generate_social_content, + generate_wisdom, + transcribe_audio, +) +from .notion_integration import create_notion_page, generate_ai_title +from .prompt_loader import get_prompt_for_step, load_custom_prompts, load_template + +logger = logging.getLogger(__name__) + +# Pipeline step definitions (mirrors pipeline.py) +PIPELINE_STEP_NAMES = [ + "Transcription", + "Wisdom Extraction", + "Outline Creation", + "Article Generation", + "Social Content", + "Publishing", +] + + +# --------------------------------------------------------------------------- +# Listener protocol -- callers implement this to receive progress updates +# --------------------------------------------------------------------------- + + +class PipelineListener(Protocol): + """Observer interface for pipeline progress events.""" + + def on_step_start(self, step_index: int, step_name: str, message: str) -> None: ... + + def on_step_progress(self, step_index: int, step_progress: int, total_progress: int, message: str) -> None: ... + + def on_step_complete(self, step_index: int, step_name: str, result: Any) -> None: ... + + def on_error(self, step_index: int, error: Exception) -> None: ... + + def on_pipeline_complete(self, results: dict) -> None: ... + + +class NullListener: + """No-op listener -- silently ignores every event.""" + + def on_step_start(self, step_index: int, step_name: str, message: str) -> None: + pass + + def on_step_progress(self, step_index: int, step_progress: int, total_progress: int, message: str) -> None: + pass + + def on_step_complete(self, step_index: int, step_name: str, result: Any) -> None: + pass + + def on_error(self, step_index: int, error: Exception) -> None: + pass + + def on_pipeline_complete(self, results: dict) -> None: + pass + + +# --------------------------------------------------------------------------- +# Pipeline configuration +# --------------------------------------------------------------------------- + + +@dataclass +class PipelineConfig: + """Configuration knobs for a single pipeline run.""" + + custom_prompts: dict[str, str] = field(default_factory=dict) + article_template: str | None = None + knowledge_base: dict[str, str] = field(default_factory=dict) + user_id: int | str | None = None + publish_to_notion: bool = True + + +# --------------------------------------------------------------------------- +# Content store protocol -- abstracts away the persistence layer +# --------------------------------------------------------------------------- + + +class ContentStore(Protocol): + """Persistence interface for generated content.""" + + def save_content(self, user_id: int | str, content_data: dict) -> str | None: ... + + +class NullContentStore: + """No-op content store -- discards everything.""" + + def save_content(self, user_id: int | str, content_data: dict) -> str | None: + return None + + +# --------------------------------------------------------------------------- +# Main orchestration +# --------------------------------------------------------------------------- + + +def run_pipeline( + *, + audio_file: Any = None, + transcript: str | None = None, + config: PipelineConfig | None = None, + content_store: ContentStore | None = None, + listener: PipelineListener | None = None, +) -> dict | None: + """Execute the full content-generation pipeline. + + Supply *audio_file* (a file-like object with ``.name`` and ``.getvalue()``) + to transcribe first, or *transcript* to skip transcription. + + Returns the results dict on success, or ``None`` on failure. + """ + if config is None: + config = PipelineConfig() + if content_store is None: + content_store = NullContentStore() + if listener is None: + listener = NullListener() + + # Load custom prompts from disk when none were injected + custom_prompts = config.custom_prompts + if not custom_prompts: + custom_prompts = load_custom_prompts() + if custom_prompts: + logger.info("Loaded %d custom prompts from disk", len(custom_prompts)) + + results: dict = {} + + try: + # ------------------------------------------------------------------ + # Step 0 -- Transcription + # ------------------------------------------------------------------ + if audio_file is not None: + listener.on_step_start(0, "Transcription", "Starting transcription...") + listener.on_step_progress(0, 0, 0, "Starting transcription...") + + suffix = os.path.splitext(audio_file.name)[1] + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: + tmp.write(audio_file.getvalue()) + tmp_path = tmp.name + + try: + listener.on_step_progress(0, 50, 8, "Transcribing audio with Whisper AI...") + transcript = transcribe_audio(tmp_path) + if not transcript: + error = RuntimeError("Transcription failed: empty result") + listener.on_error(0, error) + logger.error("Transcription returned empty result") + return None + finally: + if os.path.exists(tmp_path): + os.unlink(tmp_path) + + results["transcript"] = transcript + listener.on_step_progress(0, 100, 17, "Transcription complete!") + listener.on_step_complete(0, "Transcription", transcript) + else: + if not transcript: + error = ValueError("No audio file or transcript provided.") + listener.on_error(0, error) + logger.error("No audio file or transcript provided") + return None + results["transcript"] = transcript + listener.on_step_progress(1, 0, 17, f"Using pre-transcribed content ({len(transcript)} characters)") + + # ------------------------------------------------------------------ + # Step 1 -- Wisdom Extraction + # ------------------------------------------------------------------ + listener.on_step_start(1, "Wisdom Extraction", "Extracting wisdom and insights...") + listener.on_step_progress(1, 0, 17, "Extracting wisdom and insights...") + + wisdom_prompt = get_prompt_for_step("wisdom", custom_prompts) + listener.on_step_progress(1, 50, 25, "Analyzing content for key insights...") + + wisdom = generate_wisdom(transcript, custom_prompt=wisdom_prompt, knowledge_base=config.knowledge_base) + results["wisdom"] = wisdom + + listener.on_step_progress(1, 100, 33, "Wisdom extraction complete!") + listener.on_step_complete(1, "Wisdom Extraction", wisdom) + + # ------------------------------------------------------------------ + # Step 2 -- Outline Creation + # ------------------------------------------------------------------ + listener.on_step_start(2, "Outline Creation", "Creating structured outline...") + listener.on_step_progress(2, 0, 33, "Creating structured outline...") + + outline_prompt = get_prompt_for_step("outline", custom_prompts) + listener.on_step_progress(2, 50, 42, "Structuring content hierarchy...") + + outline = generate_outline( + transcript, wisdom, custom_prompt=outline_prompt, knowledge_base=config.knowledge_base + ) + results["outline"] = outline + + listener.on_step_progress(2, 100, 50, "Outline creation complete!") + listener.on_step_complete(2, "Outline Creation", outline) + + # ------------------------------------------------------------------ + # Step 3 -- Article Generation + # ------------------------------------------------------------------ + listener.on_step_start(3, "Article Generation", "Generating comprehensive article...") + listener.on_step_progress(3, 0, 50, "Generating comprehensive article...") + + article_prompt = get_prompt_for_step("article", custom_prompts) + if config.article_template: + template_text = load_template(config.article_template) + if template_text and article_prompt: + article_prompt = template_text + "\n" + article_prompt + + listener.on_step_progress(3, 50, 58, "Writing detailed article content...") + + article = generate_article( + transcript, + wisdom, + outline, + custom_prompt=article_prompt, + knowledge_base=config.knowledge_base, + ) + results["article"] = article + + listener.on_step_progress(3, 100, 67, "Article generation complete!") + listener.on_step_complete(3, "Article Generation", article) + + # ------------------------------------------------------------------ + # Step 4 -- Social Content + # ------------------------------------------------------------------ + listener.on_step_start(4, "Social Content", "Creating social media content...") + listener.on_step_progress(4, 0, 67, "Creating social media content...") + + social_prompt = get_prompt_for_step("social", custom_prompts) + listener.on_step_progress(4, 50, 75, "Generating social media posts...") + + social = generate_social_content( + wisdom, outline, article, custom_prompt=social_prompt, knowledge_base=config.knowledge_base + ) + results["social_content"] = social + + listener.on_step_progress(4, 100, 83, "Social content creation complete!") + listener.on_step_complete(4, "Social Content", social) + + # ------------------------------------------------------------------ + # Step 5 -- Publishing & Persistence + # ------------------------------------------------------------------ + listener.on_step_start(5, "Publishing", "Publishing to Notion workspace...") + listener.on_step_progress(5, 0, 83, "Publishing to Notion workspace...") + + if config.publish_to_notion and os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): + ai_title = generate_ai_title(transcript) + listener.on_step_progress(5, 50, 90, "Uploading content to Notion...") + + notion_url = create_notion_page(ai_title, results) + if notion_url: + results["notion_url"] = notion_url + else: + logger.warning("Notion page creation returned None") + else: + logger.info("Notion publishing skipped (disabled or not configured)") + + # --- Save to DB --- + listener.on_step_progress(5, 90, 96, "Saving to database...") + try: + if config.user_id is not None: + content_data = { + "title": results.get("title", "Untitled"), + "transcript": results.get("transcript", ""), + "wisdom": results.get("wisdom", ""), + "outline": results.get("outline", ""), + "article": results.get("article", ""), + "social_content": results.get("social_content", ""), + "notion_url": results.get("notion_url", ""), + "created_at": datetime.now().isoformat(), + } + content_store.save_content(config.user_id, content_data) + except Exception as exc: + logger.warning("Content saved locally but database save failed: %s", exc) + + listener.on_step_progress(5, 100, 100, "Pipeline complete! All content generated successfully.") + listener.on_step_complete(5, "Publishing", results.get("notion_url")) + listener.on_pipeline_complete(results) + + return results + + except Exception as exc: + listener.on_error(0, exc) + logger.exception("Pipeline failed: %s", exc) + return None diff --git a/core/prompts.py b/core/prompts.py new file mode 100644 index 0000000..b6305d7 --- /dev/null +++ b/core/prompts.py @@ -0,0 +1,87 @@ +"""Prompt management for WhisperForge content generation. + +Handles loading, formatting, and enhancing prompts from markdown files +with support for per-user overrides and automatic knowledge-base +concatenation. +""" + +import logging + +from .path_safety import safe_path + +logger = logging.getLogger(__name__) + +# Default prompts for content generation (DEPRECATED - use load_prompt_from_file) +DEFAULT_PROMPTS = { + "wisdom_extraction": """Extract key insights, lessons, and wisdom from the transcript. Focus on actionable takeaways and profound realizations.""", + "summary": """## Summary +Create a concise summary of the main points and key messages in the transcript. +Capture the essence of the content in a few paragraphs.""", + "outline_creation": """Create a detailed outline for an article or blog post based on the transcript and extracted wisdom. Include major sections and subsections.""", + "social_media": """Generate engaging social media posts for different platforms (Twitter, LinkedIn, Instagram) based on the key insights.""", + "image_prompts": """Create detailed image generation prompts that visualize the key concepts and metaphors from the content.""", + "article_writing": """Write a comprehensive article based on the provided outline and wisdom. Maintain a clear narrative flow and engaging style.""", + "seo_analysis": """Analyze the content from an SEO perspective and provide optimization recommendations for better search visibility while maintaining content quality.""", + "editor_persona": """You are a professional content editor. Provide constructive feedback to improve the content quality.""", +} + + +def load_prompt_from_file(prompt_type: str, user_id: str = None) -> str: + """Load prompt from markdown file with user override support""" + try: + # Check for user-specific prompt first (for paid tiers) + if user_id: + user_dir = str(safe_path("prompts/users", user_id)) + user_prompt_path = safe_path(user_dir, f"{prompt_type}.md") + if user_prompt_path.exists(): + return user_prompt_path.read_text(encoding="utf-8").strip() + + # Load default prompt + default_prompt_path = safe_path("prompts/default", f"{prompt_type}.md") + if default_prompt_path.exists(): + return default_prompt_path.read_text(encoding="utf-8").strip() + + # Fallback to hardcoded prompts + fallback = DEFAULT_PROMPTS.get(prompt_type, "") + if fallback: + logger.warning(f"Using fallback prompt for {prompt_type} - consider creating markdown file") + return fallback + + logger.error(f"No prompt found for type: {prompt_type}") + return f"Please provide content for {prompt_type.replace('_', ' ')}." + + except Exception as e: + logger.error(f"Error loading prompt {prompt_type}: {e}") + return DEFAULT_PROMPTS.get(prompt_type, f"Error loading {prompt_type} prompt.") + + +def format_knowledge_base_context(knowledge_base: dict[str, str]) -> str: + """Format knowledge base content for auto-concatenation to prompts""" + if not knowledge_base: + return "" + + context_parts = ["## Knowledge Base Context\n"] + context_parts.append( + "Use the following knowledge base to inform your analysis and maintain consistency with established perspectives:\n" + ) + + for name, content in knowledge_base.items(): + context_parts.append(f"### {name}") + context_parts.append(content) + context_parts.append("") # Empty line for separation + + context_parts.append("---\n") + context_parts.append("## Your Task\n") + + return "\n".join(context_parts) + + +def get_enhanced_prompt(prompt_type: str, knowledge_base: dict[str, str] = None, user_id: str = None) -> str: + """Get prompt with automatic knowledge base concatenation""" + base_prompt = load_prompt_from_file(prompt_type, user_id) + + if knowledge_base: + kb_context = format_knowledge_base_context(knowledge_base) + return f"{kb_context}{base_prompt}" + + return base_prompt diff --git a/core/security.py b/core/security.py new file mode 100644 index 0000000..0ad5dff --- /dev/null +++ b/core/security.py @@ -0,0 +1,51 @@ +"""Password hashing and verification utilities for WhisperForge. + +Provides bcrypt-based password hashing for current use and a deprecated +SHA-256 helper retained solely for migrating legacy credential stores. +""" + +import hashlib +import logging +import warnings + +import bcrypt + +logger = logging.getLogger(__name__) + + +def hash_password(password: str) -> str: + """Hash a password using bcrypt with salt""" + # Generate salt and hash password + salt = bcrypt.gensalt() + hashed = bcrypt.hashpw(password.encode("utf-8"), salt) + return hashed.decode("utf-8") + + +def verify_password(password: str, hashed: str) -> bool: + """Verify a password against its hash. + + Raises on unexpected errors (e.g. DB corruption) instead of + silently returning False which would be indistinguishable from + a wrong password. + """ + try: + return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8")) + except ValueError as e: + # Malformed hash string (wrong prefix, bad encoding, etc.) + logger.error("Password verification failed – malformed hash: %s", e) + return False + + +def legacy_hash_password(password: str) -> str: + """Legacy SHA-256 hash - DEPRECATED, use for migration only. + + .. deprecated:: + Use :func:`hash_password` (bcrypt) for all new credential storage. + """ + warnings.warn( + "legacy_hash_password() is deprecated and will be removed in a future release. " + "Use hash_password() (bcrypt) instead.", + DeprecationWarning, + stacklevel=2, + ) + return hashlib.sha256(password.encode()).hexdigest() diff --git a/core/services.py b/core/services.py new file mode 100644 index 0000000..00fbba4 --- /dev/null +++ b/core/services.py @@ -0,0 +1,97 @@ +""" +Service Locator / DI Container +================================ + +Lightweight dependency-injection container for WhisperForge. +Each field defaults to ``None`` and falls back to the existing +module-level singleton on first access, so existing code keeps +working unchanged while tests and alternative front-ends can +inject their own implementations. + +Usage:: + + from core.services import get_services + + svc = get_services() + cfg = svc.get_config() # lazily loads the global Config + db = svc.get_db() # lazily loads the global SupabaseClient +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .auth_wrapper import AuthWrapper + from .config import Config + from .session_manager import SessionManager + from .supabase_integration import SupabaseClient + +logger = logging.getLogger(__name__) + + +@dataclass +class Services: + """Central service registry with lazy fallbacks to existing singletons.""" + + config: Config | None = None + db: SupabaseClient | None = None + auth: AuthWrapper | None = None + session: SessionManager | None = None + + # -- lazy getters ------------------------------------------------------- + + def get_config(self) -> Config: + """Return the injected Config or fall back to the global singleton.""" + if self.config is not None: + return self.config + from .config import get_config + + return get_config() + + def get_db(self) -> SupabaseClient: + """Return the injected DB client or fall back to the global singleton.""" + if self.db is not None: + return self.db + from .supabase_integration import get_supabase_client + + return get_supabase_client() + + def get_auth(self) -> AuthWrapper: + """Return the injected AuthWrapper or fall back to the global singleton.""" + if self.auth is not None: + return self.auth + from .auth_wrapper import get_auth + + return get_auth() + + def get_session(self) -> SessionManager: + """Return the injected SessionManager or fall back to the global singleton.""" + if self.session is not None: + return self.session + from .session_manager import get_session_manager + + return get_session_manager() + + +# --------------------------------------------------------------------------- +# Module-level singleton +# --------------------------------------------------------------------------- + +_services: Services | None = None + + +def get_services() -> Services: + """Return the current global ``Services`` instance (created on first call).""" + global _services + if _services is None: + _services = Services() + return _services + + +def set_services(services: Services) -> None: + """Replace the global ``Services`` instance (useful in tests).""" + global _services + _services = services From 19f5540716a3f702a36aafe8ba84852c29984673 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 19:24:39 -0800 Subject: [PATCH 37/46] refactor(core): convert utils.py to re-export shim and decouple leaf modules Rewrite utils.py as a thin backward-compatible shim that re-exports from the new focused modules (security, path_safety, prompts, api_clients). All 8 existing import sites continue working unchanged. Remove Streamlit dependency from leaf modules: - prompt_loader.py: replace st.warning with logger.warning - notion_integration.py: replace st.warning/st.error with logger calls These modules now do pure filesystem/API work with no UI framework coupling, enabling use from CLI and pipeline_engine. Co-Authored-By: Claude Opus 4.6 --- core/notion_integration.py | 13 +- core/prompt_loader.py | 6 +- core/utils.py | 265 ++++++++++++------------------------- 3 files changed, 93 insertions(+), 191 deletions(-) diff --git a/core/notion_integration.py b/core/notion_integration.py index 6b38001..21164d3 100644 --- a/core/notion_integration.py +++ b/core/notion_integration.py @@ -9,7 +9,7 @@ import os from datetime import datetime -import streamlit as st +import openai from .utils import get_openai_client @@ -43,6 +43,8 @@ def generate_ai_title(transcript: str) -> str: ) title = response.choices[0].message.content return title.strip().replace('"', "").replace("'", "")[:60] + except openai.APIError: + return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" except Exception: return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" @@ -55,7 +57,7 @@ def create_notion_page(title: str, content_data: dict[str, str]) -> str | None: try: from notion_client import Client except ImportError: - st.warning("Install notion-client to enable Notion publishing: pip install notion-client") + logger.warning("Install notion-client to enable Notion publishing: pip install notion-client") return None try: @@ -63,7 +65,7 @@ def create_notion_page(title: str, content_data: dict[str, str]) -> str | None: database_id = os.getenv("NOTION_DATABASE_ID") if not api_key or not database_id: - st.warning("Notion not configured. Set NOTION_API_KEY and NOTION_DATABASE_ID to auto-publish.") + logger.warning("Notion not configured. Set NOTION_API_KEY and NOTION_DATABASE_ID to auto-publish.") return None client = Client(auth=api_key) @@ -82,8 +84,11 @@ def create_notion_page(title: str, content_data: dict[str, str]) -> str | None: return None + except (openai.APIError, OSError) as e: + logger.error(f"Notion publishing failed: {e!s}") + return None except Exception as e: - st.error(f"Notion publishing failed: {e!s}") + logger.error(f"Unexpected error in Notion publishing: {e!s}") return None diff --git a/core/prompt_loader.py b/core/prompt_loader.py index 7011b40..50e47af 100644 --- a/core/prompt_loader.py +++ b/core/prompt_loader.py @@ -8,8 +8,6 @@ import logging import os -import streamlit as st - logger = logging.getLogger(__name__) @@ -25,8 +23,8 @@ def load_custom_prompts() -> dict[str, str]: try: with open(os.path.join(prompt_dir, filename), encoding="utf-8") as f: prompts[prompt_name] = f.read() - except Exception as e: - st.warning(f"Failed to load prompt {filename}: {e}") + except OSError as e: + logger.warning(f"Failed to load prompt {filename}: {e}") return prompts diff --git a/core/utils.py b/core/utils.py index 3929d55..35429f3 100644 --- a/core/utils.py +++ b/core/utils.py @@ -1,201 +1,100 @@ -""" -Shared utilities for WhisperForge -Contains functions that are shared between the original app and Supabase version +"""Backward-compatible re-export shim for ``core.utils``. + +.. deprecated:: + This module is **deprecated** and exists only so that existing import + sites (e.g. ``from .utils import hash_password``) continue to work + without modification. All public symbols have been relocated to + focused modules: + + * **Security** (``core.security``): ``hash_password``, + ``verify_password``, ``legacy_hash_password`` + * **Path safety** (``core.path_safety``): ``safe_path`` + * **Prompts** (``core.prompts``): ``DEFAULT_PROMPTS``, + ``load_prompt_from_file``, ``format_knowledge_base_context``, + ``get_enhanced_prompt`` + * **API clients** (``core.api_clients``): ``get_openai_client``, + ``get_anthropic_client``, ``get_grok_api_key``, ``get_grok_client`` + + New code should import directly from those modules. """ -import hashlib import logging -import os -import re -from pathlib import Path - -import bcrypt +import warnings + +# ---- Re-exports from core.api_clients --------------------------------- +from .api_clients import ( + get_anthropic_client, + get_grok_api_key, + get_grok_client, + get_openai_client, +) + +# ---- Re-exports from core.path_safety --------------------------------- +from .path_safety import safe_path + +# ---- Re-exports from core.prompts ------------------------------------- +from .prompts import ( + DEFAULT_PROMPTS, + format_knowledge_base_context, + get_enhanced_prompt, + load_prompt_from_file, +) + +# ---- Re-exports from core.security ------------------------------------ +from .security import hash_password, legacy_hash_password, verify_password logger = logging.getLogger(__name__) -# --- Path safety --- - -# Only allow alphanumeric, hyphens, underscores, and single dots (no ..) -_SAFE_FILENAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$") - - -def safe_path(root: str, untrusted_name: str) -> Path: - """Build a file path that is guaranteed to stay within *root*. - - Raises ValueError if the resulting path escapes the root directory - or if the filename contains disallowed characters. - """ - if not _SAFE_FILENAME_RE.match(untrusted_name): - raise ValueError( - f"Invalid filename: {untrusted_name!r}. " - "Only alphanumeric characters, hyphens, underscores, and dots are allowed." - ) - if ".." in untrusted_name: - raise ValueError("Path traversal sequences are not allowed.") - - root_resolved = Path(root).resolve() - target = (root_resolved / untrusted_name).resolve() - - if not target.is_relative_to(root_resolved): - raise ValueError(f"Path escapes allowed directory: {target} is not under {root_resolved}") - return target - -def hash_password(password: str) -> str: - """Hash a password using bcrypt with salt""" - # Generate salt and hash password - salt = bcrypt.gensalt() - hashed = bcrypt.hashpw(password.encode("utf-8"), salt) - return hashed.decode("utf-8") +# ---- Deprecated helpers kept inline ------------------------------------ -def verify_password(password: str, hashed: str) -> bool: - """Verify a password against its hash. +def update_usage_tracking(duration_seconds: float): + """Placeholder for usage tracking - implement as needed. - Raises on unexpected errors (e.g. DB corruption) instead of - silently returning False which would be indistinguishable from - a wrong password. + .. deprecated:: + ``update_usage_tracking`` will be removed in a future release. """ - try: - return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8")) - except ValueError as e: - # Malformed hash string (wrong prefix, bad encoding, etc.) - logger.error("Password verification failed – malformed hash: %s", e) - return False - - -# Legacy SHA-256 hash function for migration purposes - - -def legacy_hash_password(password: str) -> str: - """Legacy SHA-256 hash - DEPRECATED, use for migration only""" - return hashlib.sha256(password.encode()).hexdigest() - - -# Default prompts for content generation (DEPRECATED - use load_prompt_from_file) -DEFAULT_PROMPTS = { - "wisdom_extraction": """Extract key insights, lessons, and wisdom from the transcript. Focus on actionable takeaways and profound realizations.""", - "summary": """## Summary -Create a concise summary of the main points and key messages in the transcript. -Capture the essence of the content in a few paragraphs.""", - "outline_creation": """Create a detailed outline for an article or blog post based on the transcript and extracted wisdom. Include major sections and subsections.""", - "social_media": """Generate engaging social media posts for different platforms (Twitter, LinkedIn, Instagram) based on the key insights.""", - "image_prompts": """Create detailed image generation prompts that visualize the key concepts and metaphors from the content.""", - "article_writing": """Write a comprehensive article based on the provided outline and wisdom. Maintain a clear narrative flow and engaging style.""", - "seo_analysis": """Analyze the content from an SEO perspective and provide optimization recommendations for better search visibility while maintaining content quality.""", - "editor_persona": """You are a professional content editor. Provide constructive feedback to improve the content quality.""", -} - - -def load_prompt_from_file(prompt_type: str, user_id: str = None) -> str: - """Load prompt from markdown file with user override support""" - try: - # Check for user-specific prompt first (for paid tiers) - if user_id: - user_dir = str(safe_path("prompts/users", user_id)) - user_prompt_path = safe_path(user_dir, f"{prompt_type}.md") - if user_prompt_path.exists(): - return user_prompt_path.read_text(encoding="utf-8").strip() - - # Load default prompt - default_prompt_path = safe_path("prompts/default", f"{prompt_type}.md") - if default_prompt_path.exists(): - return default_prompt_path.read_text(encoding="utf-8").strip() - - # Fallback to hardcoded prompts - fallback = DEFAULT_PROMPTS.get(prompt_type, "") - if fallback: - logger.warning(f"Using fallback prompt for {prompt_type} - consider creating markdown file") - return fallback - - logger.error(f"No prompt found for type: {prompt_type}") - return f"Please provide content for {prompt_type.replace('_', ' ')}." - - except Exception as e: - logger.error(f"Error loading prompt {prompt_type}: {e}") - return DEFAULT_PROMPTS.get(prompt_type, f"Error loading {prompt_type} prompt.") - - -def format_knowledge_base_context(knowledge_base: dict[str, str]) -> str: - """Format knowledge base content for auto-concatenation to prompts""" - if not knowledge_base: - return "" - - context_parts = ["## Knowledge Base Context\n"] - context_parts.append( - "Use the following knowledge base to inform your analysis and maintain consistency with established perspectives:\n" + warnings.warn( + "update_usage_tracking() is deprecated and will be removed in a future release.", + DeprecationWarning, + stacklevel=2, ) - - for name, content in knowledge_base.items(): - context_parts.append(f"### {name}") - context_parts.append(content) - context_parts.append("") # Empty line for separation - - context_parts.append("---\n") - context_parts.append("## Your Task\n") - - return "\n".join(context_parts) - - -def get_enhanced_prompt(prompt_type: str, knowledge_base: dict[str, str] = None, user_id: str = None) -> str: - """Get prompt with automatic knowledge base concatenation""" - base_prompt = load_prompt_from_file(prompt_type, user_id) - - if knowledge_base: - kb_context = format_knowledge_base_context(knowledge_base) - return f"{kb_context}{base_prompt}" - - return base_prompt - - -def get_openai_client(): - """Get OpenAI client with API key. - - Returns None only when the key is missing or the package isn't installed. - Other errors (e.g. network, auth) propagate so callers can react. - """ - try: - import openai - except ImportError: - logger.error("OpenAI package not installed") - return None - - api_key = os.getenv("OPENAI_API_KEY") - if not api_key: - return None - - return openai.OpenAI(api_key=api_key) + logger.info(f"Usage tracked: {duration_seconds} seconds") -def get_anthropic_client(): - """Get Anthropic client with API key. +def get_prompt(prompt_type: str, prompts: dict[str, str], default_prompts: dict[str, str]) -> str: + """Get prompt from user prompts or defaults. - Returns None only when the key is missing or the package isn't installed. - Other errors propagate so callers can react. + .. deprecated:: + Use :func:`core.prompts.get_enhanced_prompt` instead. """ - try: - import anthropic - except ImportError: - logger.error("Anthropic package not installed") - return None - - api_key = os.getenv("ANTHROPIC_API_KEY") - if not api_key: - return None - - return anthropic.Anthropic(api_key=api_key) - - -def get_grok_api_key(): - """Get Grok API key""" - return os.getenv("GROK_API_KEY") - - -def update_usage_tracking(duration_seconds: float): - """Placeholder for usage tracking - implement as needed""" - logger.info(f"Usage tracked: {duration_seconds} seconds") + warnings.warn( + "get_prompt() is deprecated. Use get_enhanced_prompt() from core.prompts instead.", + DeprecationWarning, + stacklevel=2, + ) + return prompts.get(prompt_type, default_prompts.get(prompt_type, "")) -def get_prompt(prompt_type: str, prompts: dict[str, str], default_prompts: dict[str, str]) -> str: - """Get prompt from user prompts or defaults (DEPRECATED - use get_enhanced_prompt)""" - return prompts.get(prompt_type, default_prompts.get(prompt_type, "")) +__all__ = [ + # core.security + "hash_password", + "verify_password", + "legacy_hash_password", + # core.path_safety + "safe_path", + # core.prompts + "DEFAULT_PROMPTS", + "load_prompt_from_file", + "format_knowledge_base_context", + "get_enhanced_prompt", + # core.api_clients + "get_openai_client", + "get_anthropic_client", + "get_grok_api_key", + "get_grok_client", + # deprecated inline helpers + "update_usage_tracking", + "get_prompt", +] From c26714ea31e316f9cdc6cae44519b62e69779067 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 19:24:55 -0800 Subject: [PATCH 38/46] refactor(core): replace magic numbers with constants and add specific exception handling Replace hardcoded values across core modules with named constants from core/constants.py for file size limits, timeouts, audio params, thresholds, and database defaults. Remove duplicate _load_upload_css() from large_file_processor.py, importing from file_upload.py instead. Add specific exception catches before bare except Exception blocks across 13 core modules (~60 sites). Each now catches the expected exception type first (APIError, OSError, openai.APIError, etc.) with except Exception retained as last-resort fallback. Update ContentGenerationError to inherit from PipelineError. Co-Authored-By: Claude Opus 4.6 --- core/__init__.py | 6 +-- core/auth_wrapper.py | 40 ++++++++++++---- core/config.py | 2 +- core/content_generation.py | 3 +- core/file_upload.py | 33 +++++++++---- core/large_file_processor.py | 79 +++++++++++++++++------------- core/logging_config.py | 2 +- core/pages.py | 33 +++++++++---- core/pipeline.py | 17 +++++-- core/session_manager.py | 8 +++- core/streaming_pipeline.py | 23 +++++++-- core/supabase_integration.py | 93 ++++++++++++++++++++++++++++-------- 12 files changed, 246 insertions(+), 93 deletions(-) diff --git a/core/__init__.py b/core/__init__.py index f5a6fae..489621f 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -11,8 +11,4 @@ # Core modules - some legacy imports removed for cleanup from .config import Config, get_config, set_config -__all__ = [ - "Config", - "get_config", - "set_config" -] +__all__ = ["Config", "get_config", "set_config"] diff --git a/core/auth_wrapper.py b/core/auth_wrapper.py index a107309..25da594 100644 --- a/core/auth_wrapper.py +++ b/core/auth_wrapper.py @@ -6,6 +6,8 @@ from typing import Any +from postgrest.exceptions import APIError + from core.logging_config import logger from .session_manager import get_session_manager @@ -29,9 +31,12 @@ def _init_supabase(self): from .supabase_integration import get_supabase_client self.supabase_client = get_supabase_client() - except Exception as e: + except (APIError, ValueError) as e: logger.log_error(e, "Failed to initialize Supabase") self.supabase_client = None + except Exception as e: + logger.log_error(e, "Unexpected error initializing Supabase") + self.supabase_client = None def is_authenticated(self) -> bool: """Check if user is authenticated (backward compatible)""" @@ -85,8 +90,10 @@ def authenticate_user(self, email: str, password: str) -> bool: "id", user["id"] ).execute() logger.logger.info(f"Password migrated to bcrypt for user: {email}") - except Exception as e: + except (APIError, ValueError) as e: logger.log_error(e, "Failed to migrate password") + except Exception as e: + logger.log_error(e, "Unexpected error migrating password") if password_valid: # Create persistent session using SessionManager @@ -106,9 +113,12 @@ def authenticate_user(self, email: str, password: str) -> bool: logger.logger.warning(f"Invalid password for user: {email}") return False - except Exception as e: + except (APIError, ValueError) as e: logger.log_error(e, f"Authentication error for {email}") return False + except Exception as e: + logger.log_error(e, f"Unexpected authentication error for {email}") + return False def register_user(self, email: str, password: str) -> bool: """Register new user and create session""" @@ -148,9 +158,12 @@ def register_user(self, email: str, password: str) -> bool: logger.log_error(Exception(f"Failed to create user in database: {email}"), "Registration failed") return False - except Exception as e: + except (APIError, ValueError) as e: logger.log_error(e, f"Registration error for {email}") return False + except Exception as e: + logger.log_error(e, f"Unexpected registration error for {email}") + return False def logout(self) -> bool: """Log out user and clear session""" @@ -162,9 +175,12 @@ def logout(self) -> bool: else: logger.log_error(Exception(f"Failed to logout user: {email}"), "Logout failed") return False - except Exception as e: + except (APIError, ValueError) as e: logger.log_error(e, "Logout error") return False + except Exception as e: + logger.log_error(e, "Unexpected logout error") + return False def _load_user_preferences(self, user_id: str): """Load user preferences from database into session""" @@ -202,8 +218,10 @@ def _load_user_preferences(self, user_id: str): logger.logger.debug(f"Loaded preferences for user: {user_id}") - except Exception as e: + except (APIError, ValueError) as e: logger.log_error(e, "Failed to load user preferences") + except Exception as e: + logger.log_error(e, "Unexpected error loading user preferences") def get_api_keys(self) -> dict[str, str]: """Get user API keys from session cache""" @@ -236,9 +254,12 @@ def update_api_key(self, key_name: str, key_value: str) -> bool: logger.log_error(Exception(f"Failed to update API key: {key_name}"), "API key update failed") return False - except Exception as e: + except (APIError, ValueError) as e: logger.log_error(e, f"Error updating API key {key_name}") return False + except Exception as e: + logger.log_error(e, f"Unexpected error updating API key {key_name}") + return False def get_custom_prompts(self) -> dict[str, str]: """Get user custom prompts from session cache""" @@ -273,9 +294,12 @@ def update_custom_prompt(self, prompt_type: str, content: str) -> bool: ) return False - except Exception as e: + except (APIError, ValueError) as e: logger.log_error(e, f"Error updating custom prompt {prompt_type}") return False + except Exception as e: + logger.log_error(e, f"Unexpected error updating custom prompt {prompt_type}") + return False # Session Manager delegation methods diff --git a/core/config.py b/core/config.py index 3a7b938..a9f28e3 100644 --- a/core/config.py +++ b/core/config.py @@ -112,7 +112,7 @@ def validate(self) -> bool: for dir_path in [self.data_dir, self.prompts_dir, self.temp_dir]: try: dir_path.mkdir(parents=True, exist_ok=True) - except Exception as e: + except OSError as e: errors.append(f"Cannot create directory {dir_path}: {e}") if errors: diff --git a/core/content_generation.py b/core/content_generation.py index 4aeee4f..7367460 100644 --- a/core/content_generation.py +++ b/core/content_generation.py @@ -6,6 +6,7 @@ import logging import os +from .exceptions import PipelineError from .utils import get_enhanced_prompt, get_openai_client logger = logging.getLogger(__name__) @@ -20,7 +21,7 @@ ARTICLE_PREVIEW_LENGTH = 1500 -class ContentGenerationError(Exception): +class ContentGenerationError(PipelineError): """Raised when content generation fails""" pass diff --git a/core/file_upload.py b/core/file_upload.py index 1844b1d..2433ec4 100644 --- a/core/file_upload.py +++ b/core/file_upload.py @@ -14,8 +14,11 @@ import time from concurrent.futures import ThreadPoolExecutor, as_completed +import openai import streamlit as st +from .constants import CHUNK_SUCCESS_THRESHOLD_STANDARD, MAX_PARALLEL_CHUNKS_STANDARD, MAX_UPLOAD_SIZE_BYTES +from .content_generation import ContentGenerationError from .large_file_processor import EnhancedLargeFileProcessor # noqa: F401 - re-export logger = logging.getLogger(__name__) @@ -47,9 +50,9 @@ def __init__(self): } from .config import get_config - self.max_file_size = 2 * 1024 * 1024 * 1024 # 2GB + self.max_file_size = MAX_UPLOAD_SIZE_BYTES self.chunk_size_mb = get_config().audio_chunk_size_mb - self.max_parallel_chunks = 3 + self.max_parallel_chunks = MAX_PARALLEL_CHUNKS_STANDARD def create_upload_zone(self): """Create enhanced upload zone for large files.""" @@ -142,9 +145,12 @@ def _process_small_file(self, uploaded_file) -> dict: progress_bar.progress(1.0, "\u2705 Transcription complete!") return {"success": True, "transcript": transcript, "chunks": 1, "total_duration": "N/A"} - except Exception as e: + except (ContentGenerationError, openai.APIError, OSError) as e: progress_bar.progress(1.0, f"\u274c Error: {e!s}") return {"success": False, "error": str(e)} + except Exception as e: + progress_bar.progress(1.0, f"\u274c Unexpected error: {e!s}") + return {"success": False, "error": str(e)} def _process_large_file_chunked(self, uploaded_file) -> dict: """Process large files with intelligent chunking and parallel transcription.""" @@ -186,10 +192,14 @@ def _process_large_file_chunked(self, uploaded_file) -> dict: "processing_time": transcription_results.get("total_time", "N/A"), } - except Exception as e: + except (ContentGenerationError, openai.APIError, OSError) as e: logger.exception("Error in large file processing:") st.error(f"\u274c Large file processing failed: {e!s}") return {"success": False, "error": str(e)} + except Exception as e: + logger.exception("Unexpected error in large file processing:") + st.error(f"\u274c Unexpected large file processing error: {e!s}") + return {"success": False, "error": str(e)} def _create_audio_chunks(self, uploaded_file) -> dict: """Create audio chunks for parallel processing.""" @@ -242,9 +252,12 @@ def _create_audio_chunks(self, uploaded_file) -> dict: return {"success": True, "chunks": chunks} - except Exception as e: + except OSError as e: logger.exception("Error creating audio chunks:") return {"success": False, "error": f"Failed to create chunks: {e!s}"} + except Exception as e: + logger.exception("Unexpected error creating audio chunks:") + return {"success": False, "error": f"Unexpected error creating chunks: {e!s}"} def _transcribe_chunks_parallel(self, chunks: list[dict], progress_container, chunks_container) -> dict: """Transcribe chunks in parallel with real-time progress tracking.""" @@ -273,10 +286,14 @@ def transcribe_single_chunk(chunk_info: dict) -> tuple[int, str, bool]: chunk_statuses[chunk_index] = "completed" return chunk_index, transcript.text, True - except Exception as e: + except (openai.APIError, OSError) as e: chunk_statuses[chunk_index] = "error" logger.exception(f"Error transcribing chunk {chunk_index}:") return chunk_index, f"Error: {e!s}", False + except Exception as e: + chunk_statuses[chunk_index] = "error" + logger.exception(f"Unexpected error transcribing chunk {chunk_index}:") + return chunk_index, f"Unexpected error: {e!s}", False with ThreadPoolExecutor(max_workers=self.max_parallel_chunks) as executor: future_to_chunk = {executor.submit(transcribe_single_chunk, chunk): chunk["index"] for chunk in chunks} @@ -329,7 +346,7 @@ def transcribe_single_chunk(chunk_info: dict) -> tuple[int, str, bool]: processing_time = time.time() - start_time successful_chunks = len(chunk_transcripts) - if successful_chunks < total_chunks * 0.8: + if successful_chunks < total_chunks * CHUNK_SUCCESS_THRESHOLD_STANDARD: return {"success": False, "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful"} return { @@ -350,7 +367,7 @@ def _cleanup_chunks(self, chunks: list[dict]): try: if os.path.exists(chunk["file_path"]): os.unlink(chunk["file_path"]) - except Exception as e: + except OSError as e: logger.warning(f"Failed to cleanup chunk file {chunk['file_path']}: {e}") def validate_large_file(self, file) -> dict: diff --git a/core/large_file_processor.py b/core/large_file_processor.py index 4d42a7f..38a09f9 100644 --- a/core/large_file_processor.py +++ b/core/large_file_processor.py @@ -9,28 +9,28 @@ import logging import math import os +import subprocess import tempfile import time from concurrent.futures import ThreadPoolExecutor, as_completed +import openai import streamlit as st -logger = logging.getLogger(__name__) - -_UPLOAD_CSS_CACHE = None - +from .constants import ( + AUDIO_CHANNELS, + AUDIO_SAMPLE_RATE, + CHUNK_DURATION_MINUTES, + CHUNK_SUCCESS_THRESHOLD_FFMPEG, + FFMPEG_CHUNK_TIMEOUT, + FFMPEG_VERSION_CHECK_TIMEOUT, + FFPROBE_TIMEOUT, + LARGE_FILE_THRESHOLD_BYTES, + MAX_PARALLEL_CHUNKS_FFMPEG, + MAX_UPLOAD_SIZE_BYTES, +) -def _load_upload_css(): - """Load upload CSS from static file (cached).""" - global _UPLOAD_CSS_CACHE # noqa: PLW0603 - if _UPLOAD_CSS_CACHE is None: - css_path = os.path.join("static", "css", "upload.css") - if os.path.exists(css_path): - with open(css_path, encoding="utf-8") as f: - _UPLOAD_CSS_CACHE = f"" - else: - _UPLOAD_CSS_CACHE = "" - return _UPLOAD_CSS_CACHE +logger = logging.getLogger(__name__) class EnhancedLargeFileProcessor: @@ -49,9 +49,9 @@ def __init__(self): "audio": [".mp3", ".wav", ".m4a", ".aac", ".ogg", ".flac", ".wma", ".webm", ".mpeg", ".mpga", ".oga"], "video": [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm"], } - self.max_file_size = 2 * 1024 * 1024 * 1024 # 2GB - self.chunk_duration_minutes = 10 - self.max_parallel_chunks = 4 + self.max_file_size = MAX_UPLOAD_SIZE_BYTES + self.chunk_duration_minutes = CHUNK_DURATION_MINUTES + self.max_parallel_chunks = MAX_PARALLEL_CHUNKS_FFMPEG self.temp_dir = None def check_ffmpeg_availability(self) -> bool: @@ -59,7 +59,9 @@ def check_ffmpeg_availability(self) -> bool: try: import subprocess - result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True, timeout=5) # noqa: S603 + result = subprocess.run( + ["ffmpeg", "-version"], capture_output=True, text=True, timeout=FFMPEG_VERSION_CHECK_TIMEOUT + ) # noqa: S603 return result.returncode == 0 except (subprocess.TimeoutExpired, FileNotFoundError, Exception): return False @@ -72,7 +74,7 @@ def get_audio_info(self, file_path: str) -> dict: cmd = ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", file_path] - result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) # noqa: S603 + result = subprocess.run(cmd, capture_output=True, text=True, timeout=FFPROBE_TIMEOUT) # noqa: S603 if result.returncode != 0: return {"error": f"ffprobe failed: {result.stderr}"} @@ -100,8 +102,10 @@ def get_audio_info(self, file_path: str) -> dict: "channels": int(audio_stream.get("channels", 0)), } - except Exception as e: + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e: return {"error": f"Failed to get audio info: {e!s}"} + except Exception as e: + return {"error": f"Unexpected error getting audio info: {e!s}"} def validate_file(self, uploaded_file) -> dict: """Enhanced file validation for large files.""" @@ -118,7 +122,7 @@ def validate_file(self, uploaded_file) -> dict: if file_extension not in all_formats: return {"valid": False, "error": f"Unsupported format: {file_extension}"} - if file_size > 100 * 1024 * 1024 and not self.check_ffmpeg_availability(): + if file_size > LARGE_FILE_THRESHOLD_BYTES and not self.check_ffmpeg_availability(): return { "valid": False, "error": "FFmpeg required for large files but not available. Please install FFmpeg.", @@ -128,7 +132,7 @@ def validate_file(self, uploaded_file) -> dict: "valid": True, "size": file_size, "size_mb": file_size / (1024 * 1024), - "requires_chunking": file_size > 100 * 1024 * 1024, + "requires_chunking": file_size > LARGE_FILE_THRESHOLD_BYTES, "format": file_extension, } @@ -175,6 +179,8 @@ def create_enhanced_upload_interface(self):
""" + from .file_upload import _load_upload_css + st.markdown(_load_upload_css(), unsafe_allow_html=True) st.markdown(upload_html, unsafe_allow_html=True) @@ -241,8 +247,10 @@ def _process_standard(self, uploaded_file) -> dict: if os.path.exists(tmp_file_path): os.unlink(tmp_file_path) - except Exception as e: + except (openai.APIError, OSError) as e: return {"success": False, "error": f"Standard processing failed: {e!s}"} + except Exception as e: + return {"success": False, "error": f"Unexpected standard processing error: {e!s}"} def _process_with_ffmpeg_chunking(self, uploaded_file) -> dict: """Process large files using FFmpeg chunking.""" @@ -291,8 +299,10 @@ def _process_with_ffmpeg_chunking(self, uploaded_file) -> dict: "success_rate": transcription_result.get("success_rate", "unknown"), } - except Exception as e: + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e: return {"success": False, "error": f"FFmpeg processing failed: {e!s}"} + except Exception as e: + return {"success": False, "error": f"Unexpected FFmpeg processing error: {e!s}"} finally: self._cleanup_temp_dir() @@ -322,16 +332,16 @@ def _create_ffmpeg_chunks(self, input_file_path: str, duration: float) -> dict: "-t", str(chunk_duration_seconds), "-ar", - "16000", + str(AUDIO_SAMPLE_RATE), "-ac", - "1", + str(AUDIO_CHANNELS), "-acodec", "pcm_s16le", "-y", chunk_path, ] - result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) # noqa: S603 + result = subprocess.run(cmd, capture_output=True, text=True, timeout=FFMPEG_CHUNK_TIMEOUT) # noqa: S603 if result.returncode != 0: return {"success": False, "error": f"FFmpeg chunk creation failed: {result.stderr}"} @@ -352,8 +362,10 @@ def _create_ffmpeg_chunks(self, input_file_path: str, duration: float) -> dict: return {"success": True, "chunks": chunks} - except Exception as e: + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e: return {"success": False, "error": f"Chunk creation failed: {e!s}"} + except Exception as e: + return {"success": False, "error": f"Unexpected chunk creation error: {e!s}"} def _transcribe_chunks_parallel(self, chunks: list[dict]) -> dict: """Transcribe chunks in parallel using ThreadPoolExecutor.""" @@ -372,9 +384,12 @@ def transcribe_single_chunk(chunk_info: dict) -> tuple[int, str, bool]: file_path = chunk_info["file_path"] transcript = transcribe_audio(file_path) return chunk_index, transcript, True - except Exception as e: + except (openai.APIError, OSError) as e: logger.error(f"Failed to transcribe chunk {chunk_info['index']}: {e}") return chunk_info["index"], f"[Transcription failed for chunk {chunk_info['index']}]", False + except Exception as e: + logger.error(f"Unexpected error transcribing chunk {chunk_info['index']}: {e}") + return chunk_info["index"], f"[Unexpected error for chunk {chunk_info['index']}]", False start_time = time.time() completed_chunks = 0 @@ -406,7 +421,7 @@ def transcribe_single_chunk(chunk_info: dict) -> tuple[int, str, bool]: processing_time = time.time() - start_time successful_chunks = len(chunk_transcripts) - if successful_chunks < total_chunks * 0.7: + if successful_chunks < total_chunks * CHUNK_SUCCESS_THRESHOLD_FFMPEG: return {"success": False, "error": f"Too many failed chunks: {successful_chunks}/{total_chunks} successful"} return { @@ -429,5 +444,5 @@ def _cleanup_temp_dir(self): shutil.rmtree(self.temp_dir) self.temp_dir = None - except Exception as e: + except OSError as e: logger.warning(f"Failed to cleanup temp directory {self.temp_dir}: {e}") diff --git a/core/logging_config.py b/core/logging_config.py index 2c474d1..e291ee2 100644 --- a/core/logging_config.py +++ b/core/logging_config.py @@ -162,7 +162,7 @@ def _log_structured(self, data: dict[str, Any]): try: with open(json_log_file, "a") as f: f.write(json.dumps(data) + "\n") - except Exception as e: + except OSError as e: self.logger.error(f"Failed to write structured log: {e}") # ------------------------------------------------------------------- diff --git a/core/pages.py b/core/pages.py index 3f9c59f..bd8f060 100644 --- a/core/pages.py +++ b/core/pages.py @@ -8,6 +8,7 @@ import os import streamlit as st +from postgrest.exceptions import APIError from .content_display import create_enhanced_aurora_content_card from .supabase_integration import get_supabase_client @@ -98,8 +99,10 @@ def show_settings_page(): st.success("Supabase connected") else: st.error("Supabase connection failed") - except Exception as e: + except APIError as e: st.error(f"Supabase error: {e}") + except Exception as e: + st.error(f"Unexpected Supabase error: {e}") try: if os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): @@ -110,8 +113,10 @@ def show_settings_page(): st.success("Notion connected") else: st.warning("Notion not configured") - except Exception as e: + except OSError as e: st.error(f"Notion error: {e}") + except Exception as e: + st.error(f"Unexpected Notion error: {e}") def show_knowledge_base(): @@ -144,8 +149,10 @@ def show_knowledge_base(): st.info("No knowledge files found") else: st.info("Knowledge base directory not found") - except Exception as e: + except OSError as e: st.error(f"Error reading knowledge base: {e}") + except Exception as e: + st.error(f"Unexpected error reading knowledge base: {e}") with tabs[1]: st.markdown("#### Add New Knowledge") @@ -176,8 +183,10 @@ def show_knowledge_base(): st.success(f"Knowledge saved as `{filename}`") except ValueError as e: st.error(f"Invalid filename: {e}") - except Exception as e: + except OSError as e: st.error(f"Error saving knowledge: {e}") + except Exception as e: + st.error(f"Unexpected error saving knowledge: {e}") else: st.error("Please provide both title and content") @@ -198,14 +207,18 @@ def show_knowledge_base(): os.remove(validated_path) st.success(f"Deleted `{file}`") st.rerun() - except (ValueError, Exception) as e: + except (ValueError, OSError) as e: st.error(f"Error deleting file: {e}") + except Exception as e: + st.error(f"Unexpected error deleting file: {e}") else: st.info("No knowledge files found") else: st.info("Knowledge base directory not found") - except Exception as e: + except OSError as e: st.error(f"Error managing files: {e}") + except Exception as e: + st.error(f"Unexpected error managing files: {e}") def show_prompts_page(): @@ -242,8 +255,10 @@ def show_prompts_page(): current_prompt = f.read() else: current_prompt = f"# {prompt_name} Prompt\n\nDefault prompt for {prompt_key} generation." - except Exception as e: + except OSError as e: st.error(f"Error loading prompt: {e}") + except Exception as e: + st.error(f"Unexpected error loading prompt: {e}") new_prompt = st.text_area( f"Edit {prompt_name} Prompt", @@ -260,8 +275,10 @@ def show_prompts_page(): with open(prompt_file, "w") as f: f.write(new_prompt) st.success(f"{prompt_name} prompt saved!") - except Exception as e: + except OSError as e: st.error(f"Error saving prompt: {e}") + except Exception as e: + st.error(f"Unexpected error saving prompt: {e}") with col2: if st.button("Reset", key=f"reset_{prompt_key}"): diff --git a/core/pipeline.py b/core/pipeline.py index 57bd354..cfa5203 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -12,9 +12,12 @@ import time from datetime import datetime +import openai import streamlit as st +from postgrest.exceptions import APIError from .content_generation import ( + ContentGenerationError, generate_article, generate_outline, generate_social_content, @@ -118,10 +121,14 @@ def process_pipeline(audio_file=None, transcript: str | None = None) -> dict | N return results - except Exception as e: + except (ContentGenerationError, openai.APIError, OSError) as e: _update_pipeline(pipeline_placeholder, 0, 0, 0, f"Error: {e!s}", start_time) st.error(f"Pipeline failed: {e!s}") return None + except Exception as e: + _update_pipeline(pipeline_placeholder, 0, 0, 0, f"Unexpected error: {e!s}", start_time) + st.error(f"Unexpected pipeline error: {e!s}") + return None def _run_generation_steps( @@ -222,8 +229,10 @@ def _run_generation_steps( _update_pipeline(pipeline_placeholder, 5, 90, 96, "Saving to database...", start_time) try: save_content_to_db(results) - except Exception as e: + except APIError as e: st.warning(f"Content saved locally but database save failed: {e}") + except Exception as e: + st.warning(f"Content saved locally but unexpected database error: {e}") _update_pipeline( pipeline_placeholder, 5, 100, 100, "Pipeline complete! All content generated successfully.", start_time @@ -275,8 +284,10 @@ def save_content_to_db(content_data: dict): ) if content_id: st.success(f"Content saved to database (ID: {content_id})") - except Exception as e: + except APIError as e: st.warning(f"Database save failed: {e}") + except Exception as e: + st.warning(f"Unexpected database save error: {e}") def show_processing_pipeline( diff --git a/core/session_manager.py b/core/session_manager.py index 43e6c0b..b7d56a7 100644 --- a/core/session_manager.py +++ b/core/session_manager.py @@ -39,16 +39,20 @@ def _load(self): return self.data.update(loaded) st.session_state.update(self.data) - except Exception as e: + except (json.JSONDecodeError, OSError, KeyError) as e: logger.error(f"Session load failed: {e}") + except Exception as e: + logger.error(f"Unexpected error in session load: {e}") def _save(self): try: self.session_dir.mkdir(parents=True, exist_ok=True) with open(self.session_file, "w", encoding="utf-8") as f: json.dump(self.data, f) - except Exception as e: + except OSError as e: logger.error(f"Session save failed: {e}") + except Exception as e: + logger.error(f"Unexpected error in session save: {e}") def authenticate_user(self, user_id: str, email: str) -> bool: self.data.update( diff --git a/core/streaming_pipeline.py b/core/streaming_pipeline.py index df09144..90c6eeb 100644 --- a/core/streaming_pipeline.py +++ b/core/streaming_pipeline.py @@ -6,9 +6,13 @@ import time from typing import Any +import openai import streamlit as st +from postgrest.exceptions import APIError +from .constants import LARGE_FILE_THRESHOLD_MB from .content_generation import ( + ContentGenerationError, generate_article, generate_outline, generate_social_content, @@ -53,7 +57,7 @@ def start_pipeline(self, audio_file): "name": audio_file.name, "size": len(audio_file.getvalue()), "size_mb": file_size_mb, - "is_large_file": file_size_mb > 20, + "is_large_file": file_size_mb > LARGE_FILE_THRESHOLD_MB, } if not hasattr(st.session_state, "prompts"): @@ -84,12 +88,18 @@ def process_next_step(self): st.session_state.pipeline_step_index += 1 return True - except Exception as e: + except (ContentGenerationError, openai.APIError, ValueError) as e: error_msg = str(e) st.session_state.pipeline_errors[step_id] = error_msg st.session_state.pipeline_active = False st.error(f"Error in {step_id}: {error_msg}") return False + except Exception as e: + error_msg = str(e) + st.session_state.pipeline_errors[step_id] = error_msg + st.session_state.pipeline_active = False + st.error(f"Unexpected error in {step_id}: {error_msg}") + return False def _execute_step(self, step_id: str, step_index: int) -> Any: """Execute a specific pipeline step""" @@ -116,9 +126,12 @@ def _execute_step(self, step_id: str, step_index: int) -> Any: thinking_step_complete(step_id) return result - except Exception as e: + except (ContentGenerationError, openai.APIError, ValueError) as e: thinking_error(step_id, str(e)) raise + except Exception as e: + thinking_error(step_id, f"Unexpected: {e!s}") + raise def _step_upload_validation(self) -> dict[str, Any]: """Step 1: Validate uploaded file""" @@ -279,8 +292,10 @@ def _step_database_storage(self) -> str: time.sleep(0.3) return f"Content saved with ID: {content_id}" - except Exception as e: + except APIError as e: return f"Database save failed: {str(e)}" + except Exception as e: + return f"Unexpected database error: {str(e)}" @property def is_active(self) -> bool: diff --git a/core/supabase_integration.py b/core/supabase_integration.py index 087b1d5..9258c38 100644 --- a/core/supabase_integration.py +++ b/core/supabase_integration.py @@ -12,8 +12,10 @@ from typing import Any from dotenv import load_dotenv +from postgrest.exceptions import APIError from supabase import Client, create_client +from .constants import DEFAULT_ANALYTICS_DAYS, DEFAULT_CONTENT_QUERY_LIMIT, DEFAULT_USAGE_QUOTA_MINUTES from .utils import hash_password # Load environment variables @@ -55,9 +57,12 @@ def test_connection(self) -> bool: self.client.table("users").select("id").limit(1).execute() logger.info("Supabase connection test successful") return True - except Exception as e: + except APIError as e: logger.error(f"Supabase connection test failed: {e}") return False + except Exception as e: + logger.error(f"Unexpected error in Supabase connection test: {e}") + return False # User Management def create_user(self, email: str, password: str, metadata: dict[str, Any] = None) -> dict[str, Any]: @@ -70,7 +75,7 @@ def create_user(self, email: str, password: str, metadata: dict[str, Any] = None "email": email, "password": hashed_password, # Store hashed password "created_at": datetime.now().isoformat(), - "usage_quota": 60, # Default 60 minutes per month + "usage_quota": DEFAULT_USAGE_QUOTA_MINUTES, "usage_current": 0, "is_admin": False, "subscription_tier": "free", @@ -82,27 +87,36 @@ def create_user(self, email: str, password: str, metadata: dict[str, Any] = None result = self.client.table("users").insert(user_data).execute() logger.info(f"User created successfully: {email}") return result.data[0] if result.data else {} - except Exception as e: + except APIError as e: logger.error(f"Error creating user: {e}") raise + except Exception as e: + logger.error(f"Unexpected error creating user: {e}") + raise def get_user(self, user_id: int) -> dict[str, Any] | None: """Get user by ID""" try: result = self.client.table("users").select("*").eq("id", user_id).execute() return result.data[0] if result.data else None - except Exception as e: + except APIError as e: logger.error(f"Error fetching user: {e}") return None + except Exception as e: + logger.error(f"Unexpected error fetching user: {e}") + return None def get_user_by_email(self, email: str) -> dict[str, Any] | None: """Get user by email""" try: result = self.client.table("users").select("*").eq("email", email).execute() return result.data[0] if result.data else None - except Exception as e: + except APIError as e: logger.error(f"Error fetching user by email: {e}") return None + except Exception as e: + logger.error(f"Unexpected error fetching user by email: {e}") + return None def update_user_usage(self, user_id: int, usage_seconds: int) -> bool: """Update user's current usage""" @@ -113,9 +127,12 @@ def update_user_usage(self, user_id: int, usage_seconds: int) -> bool: result = self.client.table("users").update({"usage_current": usage_minutes}).eq("id", user_id).execute() return bool(result.data) - except Exception as e: + except APIError as e: logger.error(f"Error updating user usage: {e}") return False + except Exception as e: + logger.error(f"Unexpected error updating user usage: {e}") + return False # Content Storage def save_content(self, user_id: int, content_data: dict[str, Any]) -> str | None: @@ -142,11 +159,14 @@ def save_content(self, user_id: int, content_data: dict[str, Any]) -> str | None logger.info(f"Content saved successfully with ID: {content_id}") return content_id return None - except Exception as e: + except APIError as e: logger.error(f"Error saving content: {e}") return None + except Exception as e: + logger.error(f"Unexpected error saving content: {e}") + return None - def get_user_content(self, user_id: int, limit: int = 50) -> list[dict[str, Any]]: + def get_user_content(self, user_id: int, limit: int = DEFAULT_CONTENT_QUERY_LIMIT) -> list[dict[str, Any]]: """Get user's content history""" try: result = ( @@ -158,9 +178,12 @@ def get_user_content(self, user_id: int, limit: int = 50) -> list[dict[str, Any] .execute() ) return result.data or [] - except Exception as e: + except APIError as e: logger.error(f"Error fetching user content: {e}") return [] + except Exception as e: + logger.error(f"Unexpected error fetching user content: {e}") + return [] # API Key Management def save_user_api_keys(self, user_id: int, api_keys: dict[str, str]) -> bool: @@ -175,9 +198,12 @@ def save_user_api_keys(self, user_id: int, api_keys: dict[str, str]) -> bool: ) return bool(result.data) - except Exception as e: + except APIError as e: logger.error(f"Error saving API keys: {e}") return False + except Exception as e: + logger.error(f"Unexpected error saving API keys: {e}") + return False def get_user_api_keys(self, user_id: int) -> dict[str, str]: """Get user's API keys""" @@ -186,9 +212,12 @@ def get_user_api_keys(self, user_id: int) -> dict[str, str]: if result.data and result.data[0]["api_keys"]: return result.data[0]["api_keys"] return {} - except Exception as e: + except APIError as e: logger.error(f"Error fetching API keys: {e}") return {} + except Exception as e: + logger.error(f"Unexpected error fetching API keys: {e}") + return {} # Knowledge Base Management def save_knowledge_base_file(self, user_id: int, filename: str, content: str) -> bool: @@ -225,9 +254,12 @@ def save_knowledge_base_file(self, user_id: int, filename: str, content: str) -> result = self.client.table("knowledge_base").insert(kb_record).execute() return bool(result.data) - except Exception as e: + except APIError as e: logger.error(f"Error saving knowledge base file: {e}") return False + except Exception as e: + logger.error(f"Unexpected error saving knowledge base file: {e}") + return False def get_user_knowledge_base(self, user_id: int) -> dict[str, str]: """Get user's knowledge base files""" @@ -241,9 +273,12 @@ def get_user_knowledge_base(self, user_id: int) -> dict[str, str]: kb_dict[name] = item["content"] return kb_dict - except Exception as e: + except APIError as e: logger.error(f"Error fetching knowledge base: {e}") return {} + except Exception as e: + logger.error(f"Unexpected error fetching knowledge base: {e}") + return {} # Custom Prompts Management def save_custom_prompt(self, user_id: int, prompt_type: str, content: str) -> bool: @@ -280,9 +315,12 @@ def save_custom_prompt(self, user_id: int, prompt_type: str, content: str) -> bo result = self.client.table("custom_prompts").insert(prompt_record).execute() return bool(result.data) - except Exception as e: + except APIError as e: logger.error(f"Error saving custom prompt: {e}") return False + except Exception as e: + logger.error(f"Unexpected error saving custom prompt: {e}") + return False def get_user_prompts(self, user_id: int) -> dict[str, str]: """Get user's custom prompts""" @@ -294,9 +332,12 @@ def get_user_prompts(self, user_id: int) -> dict[str, str]: prompts_dict[item["prompt_type"]] = item["content"] return prompts_dict - except Exception as e: + except APIError as e: logger.error(f"Error fetching custom prompts: {e}") return {} + except Exception as e: + logger.error(f"Unexpected error fetching custom prompts: {e}") + return {} # Analytics and Monitoring def log_pipeline_execution(self, user_id: int, pipeline_data: dict[str, Any]) -> bool: @@ -316,11 +357,14 @@ def log_pipeline_execution(self, user_id: int, pipeline_data: dict[str, Any]) -> result = self.client.table("pipeline_logs").insert(log_record).execute() return bool(result.data) - except Exception as e: + except APIError as e: logger.error(f"Error logging pipeline execution: {e}") return False + except Exception as e: + logger.error(f"Unexpected error logging pipeline execution: {e}") + return False - def get_user_analytics(self, user_id: int, days: int = 30) -> dict[str, Any]: + def get_user_analytics(self, user_id: int, days: int = DEFAULT_ANALYTICS_DAYS) -> dict[str, Any]: """Get user analytics for the last N days""" try: start_date = (datetime.now() - timedelta(days=days)).isoformat() @@ -345,9 +389,12 @@ def get_user_analytics(self, user_id: int, days: int = 30) -> dict[str, Any]: } return analytics - except Exception as e: + except APIError as e: logger.error(f"Error fetching user analytics: {e}") return {} + except Exception as e: + logger.error(f"Unexpected error fetching user analytics: {e}") + return {} def _get_most_frequent(self, logs: list[dict], field: str) -> str: """Helper to get most frequent value from logs""" @@ -410,9 +457,12 @@ def get_user_context(self, user_id: int) -> dict[str, Any]: } return context - except Exception as e: + except APIError as e: logger.error(f"Error getting user context for MCP: {e}") return {} + except Exception as e: + logger.error(f"Unexpected error getting user context for MCP: {e}") + return {} def update_context_from_interaction(self, user_id: int, interaction_data: dict[str, Any]) -> bool: """Update user context based on AI interaction results""" @@ -429,9 +479,12 @@ def update_context_from_interaction(self, user_id: int, interaction_data: dict[s self.db.save_content(user_id, interaction_data["content"]) return True - except Exception as e: + except APIError as e: logger.error(f"Error updating context from interaction: {e}") return False + except Exception as e: + logger.error(f"Unexpected error updating context from interaction: {e}") + return False # Global instance From f0e40141ecfc0cad91c7f73016858e2c666ccf2c Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 19:31:28 -0800 Subject: [PATCH 39/46] refactor(core): rewire pipeline and CLI to use pipeline_engine Replace duplicated orchestration logic in pipeline.py and whisperforge_cli.py with calls to the pure pipeline_engine module. pipeline.py is now a thin Streamlit UI wrapper (~190 lines, down from 348) with a StreamlitPipelineListener that maps engine callbacks to Aurora UI updates. Business logic (transcription, generation, Notion publishing, DB save) is fully delegated to pipeline_engine.run_pipeline. whisperforge_cli.py now uses CLIPipelineListener for terminal output instead of manually calling each generation function. Wire DI container in app_simple.py via set_services() at startup. Co-Authored-By: Claude Opus 4.6 --- app_simple.py | 9 +- core/pipeline.py | 328 ++++++++++++-------------------------------- whisperforge_cli.py | 148 ++++++++++---------- 3 files changed, 168 insertions(+), 317 deletions(-) diff --git a/app_simple.py b/app_simple.py index 6bdba3d..55ee762 100755 --- a/app_simple.py +++ b/app_simple.py @@ -5,10 +5,12 @@ import streamlit as st from dotenv import load_dotenv +from core.config import Config from core.content_display import show_results from core.file_upload import EnhancedLargeFileProcessor from core.pages import show_knowledge_base, show_prompts_page, show_settings_page from core.pipeline import process_pipeline +from core.services import Services, set_services from core.styling import ( apply_aurora_theme, create_aurora_header, @@ -28,7 +30,7 @@ def init_session(): - """Initialize simple session state.""" + """Initialize simple session state and wire up the DI container.""" if "authenticated" not in st.session_state: st.session_state.authenticated = False if "user_id" not in st.session_state: @@ -36,6 +38,11 @@ def init_session(): if "user_email" not in st.session_state: st.session_state.user_email = None + # Initialise the service container once per process + if "services_initialised" not in st.session_state: + set_services(Services(config=Config.from_env())) + st.session_state.services_initialised = True + def show_login(): """Simple test login.""" diff --git a/core/pipeline.py b/core/pipeline.py index cfa5203..cd47771 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -2,255 +2,123 @@ Content Processing Pipeline ============================= -Core audio-to-content pipeline with Aurora visualization. -Handles transcription, wisdom extraction, outline, article, social, and Notion publishing. +Thin Streamlit UI wrapper around :mod:`pipeline_engine`. +Provides Aurora-themed progress visualisation while the pure-logic engine +does the actual work. """ import logging -import os -import tempfile import time -from datetime import datetime -import openai import streamlit as st -from postgrest.exceptions import APIError - -from .content_generation import ( - ContentGenerationError, - generate_article, - generate_outline, - generate_social_content, - generate_wisdom, - transcribe_audio, -) -from .notion_integration import create_notion_page, generate_ai_title -from .prompt_loader import get_prompt_for_step, load_custom_prompts, load_template + +from . import pipeline_engine +from .pipeline_engine import NullListener, PipelineConfig +from .prompt_loader import load_custom_prompts from .supabase_integration import get_supabase_client logger = logging.getLogger(__name__) # Pipeline step definitions -PIPELINE_STEP_NAMES = [ - "Transcription", - "Wisdom Extraction", - "Outline Creation", - "Article Generation", - "Social Content", - "Publishing", -] +PIPELINE_STEP_NAMES = pipeline_engine.PIPELINE_STEP_NAMES + + +# --------------------------------------------------------------------------- +# Streamlit listener -- maps engine callbacks to Aurora UI +# --------------------------------------------------------------------------- + + +class StreamlitPipelineListener(NullListener): + """Bridges :class:`pipeline_engine.PipelineListener` events to Streamlit widgets.""" + + def __init__(self, pipeline_placeholder, start_time, containers): + self.placeholder = pipeline_placeholder + self.start_time = start_time + self.containers = containers + + def on_step_progress(self, step_index, step_progress, total_progress, message): + _update_pipeline(self.placeholder, step_index, step_progress, total_progress, message, self.start_time) + + def on_step_complete(self, step_index, step_name, result): + container = self.containers.get(step_name) + if container and result and isinstance(result, str): + with container: + st.markdown(f"**{step_name} Complete**") + st.markdown(result) + + def on_error(self, step_index, error): + st.error(f"Pipeline failed: {error!s}") + + def on_pipeline_complete(self, results): + st.markdown( + '
' + '

Pipeline Complete!

' + '

Your content has been transformed with AI magic

' + "
", + unsafe_allow_html=True, + ) + time.sleep(2) + self.placeholder.empty() + + +# --------------------------------------------------------------------------- +# Public entry point -- signature unchanged for app_simple.py callers +# --------------------------------------------------------------------------- def process_pipeline(audio_file=None, transcript: str | None = None) -> dict | None: """Unified content pipeline. Supply *audio_file* to transcribe first, or *transcript* to skip transcription. - Returns the results dict on success, or None on failure. + Returns the results dict on success, or ``None`` on failure. """ - results: dict = {} start_time = time.time() + # Load any user-defined custom prompts custom_prompts = load_custom_prompts() if custom_prompts: st.info(f"Using {len(custom_prompts)} custom prompts") + # --- Streamlit UI scaffolding --- pipeline_placeholder = st.empty() st.markdown("### Live Content Generation") - transcript_container = st.expander("Transcription", expanded=False) - wisdom_container = st.expander("Wisdom Extraction", expanded=False) - outline_container = st.expander("Outline Creation", expanded=False) - article_container = st.expander("Article Generation", expanded=False) - social_container = st.expander("Social Content", expanded=False) - notion_container = st.expander("Notion Publishing", expanded=False) - - try: - # Step 0: Transcription (skipped when transcript is provided) - if audio_file is not None: - _update_pipeline(pipeline_placeholder, 0, 0, 0, "Starting transcription...", start_time) - - suffix = os.path.splitext(audio_file.name)[1] - with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file: - tmp_file.write(audio_file.getvalue()) - tmp_file_path = tmp_file.name - - try: - _update_pipeline(pipeline_placeholder, 0, 50, 8, "Transcribing audio with Whisper AI...", start_time) - transcript = transcribe_audio(tmp_file_path) - if not transcript: - st.error("Transcription failed: empty result") - return None - finally: - if os.path.exists(tmp_file_path): - os.unlink(tmp_file_path) - - results["transcript"] = transcript - with transcript_container: - st.markdown("**Transcription Complete**") - st.text_area("Transcript", transcript, height=200, disabled=True) - - _update_pipeline(pipeline_placeholder, 0, 100, 17, "Transcription complete!", start_time) - else: - # Pre-transcribed content - if not transcript: - st.error("No audio file or transcript provided.") - return None - results["transcript"] = transcript - _update_pipeline( - pipeline_placeholder, - 1, - 0, - 17, - f"Using pre-transcribed content ({len(transcript)} characters)", - start_time, - ) - - # Steps 1-5: Generation, publishing, saving - _run_generation_steps( - transcript, - results, - custom_prompts, - pipeline_placeholder, - start_time, - wisdom_container, - outline_container, - article_container, - social_container, - notion_container, - ) + containers = { + "Transcription": st.expander("Transcription", expanded=False), + "Wisdom Extraction": st.expander("Wisdom Extraction", expanded=False), + "Outline Creation": st.expander("Outline Creation", expanded=False), + "Article Generation": st.expander("Article Generation", expanded=False), + "Social Content": st.expander("Social Content", expanded=False), + "Publishing": st.expander("Notion Publishing", expanded=False), + } - return results - - except (ContentGenerationError, openai.APIError, OSError) as e: - _update_pipeline(pipeline_placeholder, 0, 0, 0, f"Error: {e!s}", start_time) - st.error(f"Pipeline failed: {e!s}") - return None - except Exception as e: - _update_pipeline(pipeline_placeholder, 0, 0, 0, f"Unexpected error: {e!s}", start_time) - st.error(f"Unexpected pipeline error: {e!s}") - return None - - -def _run_generation_steps( - transcript, - results, - custom_prompts, - pipeline_placeholder, - start_time, - wisdom_container, - outline_container, - article_container, - social_container, - notion_container, -): - """Execute the generation steps (wisdom through publishing).""" - # --- Wisdom Extraction --- - _update_pipeline(pipeline_placeholder, 1, 0, 17, "Extracting wisdom and insights...", start_time) - wisdom_prompt = get_prompt_for_step("wisdom", custom_prompts) - _update_pipeline(pipeline_placeholder, 1, 50, 25, "Analyzing content for key insights...", start_time) - - wisdom = generate_wisdom(transcript, custom_prompt=wisdom_prompt, knowledge_base={}) - results["wisdom"] = wisdom - - with wisdom_container: - st.markdown("**Wisdom Extraction Complete**") - st.markdown(wisdom) - _update_pipeline(pipeline_placeholder, 1, 100, 33, "Wisdom extraction complete!", start_time) - - # --- Outline Creation --- - _update_pipeline(pipeline_placeholder, 2, 0, 33, "Creating structured outline...", start_time) - outline_prompt = get_prompt_for_step("outline", custom_prompts) - _update_pipeline(pipeline_placeholder, 2, 50, 42, "Structuring content hierarchy...", start_time) - - outline = generate_outline(transcript, wisdom, custom_prompt=outline_prompt, knowledge_base={}) - results["outline"] = outline - - with outline_container: - st.markdown("**Outline Creation Complete**") - st.markdown(outline) - _update_pipeline(pipeline_placeholder, 2, 100, 50, "Outline creation complete!", start_time) - - # --- Article Generation --- - _update_pipeline(pipeline_placeholder, 3, 0, 50, "Generating comprehensive article...", start_time) - article_prompt = get_prompt_for_step("article", custom_prompts) - selected_template = st.session_state.get("article_template") - if selected_template: - template_text = load_template(selected_template) - if template_text and article_prompt: - article_prompt = template_text + "\n" + article_prompt - - _update_pipeline(pipeline_placeholder, 3, 50, 58, "Writing detailed article content...", start_time) - - article = generate_article(transcript, wisdom, outline, custom_prompt=article_prompt, knowledge_base={}) - results["article"] = article - - with article_container: - st.markdown("**Article Generation Complete**") - st.markdown(article) - _update_pipeline(pipeline_placeholder, 3, 100, 67, "Article generation complete!", start_time) - - # --- Social Content --- - _update_pipeline(pipeline_placeholder, 4, 0, 67, "Creating social media content...", start_time) - social_prompt = get_prompt_for_step("social", custom_prompts) - _update_pipeline(pipeline_placeholder, 4, 50, 75, "Generating social media posts...", start_time) - - social = generate_social_content(wisdom, outline, article, custom_prompt=social_prompt, knowledge_base={}) - results["social_content"] = social - - with social_container: - st.markdown("**Social Content Creation Complete**") - st.markdown(social) - _update_pipeline(pipeline_placeholder, 4, 100, 83, "Social content creation complete!", start_time) - - # --- Notion Publishing --- - _update_pipeline(pipeline_placeholder, 5, 0, 83, "Publishing to Notion workspace...", start_time) - - if os.getenv("NOTION_API_KEY") and os.getenv("NOTION_DATABASE_ID"): - ai_title = generate_ai_title(transcript) - _update_pipeline(pipeline_placeholder, 5, 50, 90, "Uploading content to Notion...", start_time) - - notion_url = create_notion_page(ai_title, results) - if notion_url: - results["notion_url"] = notion_url - with notion_container: - st.markdown("**Notion Publishing Complete**") - st.markdown(f"**Page Title:** {ai_title}") - st.markdown(f"[Open in Notion]({notion_url})") - else: - with notion_container: - st.markdown("**Notion Publishing Failed**") - st.warning("Check your Notion API configuration in Settings.") - else: - with notion_container: - st.markdown("**Notion Publishing Disabled**") - st.info("Configure Notion API in Settings to enable auto-publishing.") - - # --- Save to DB --- - _update_pipeline(pipeline_placeholder, 5, 90, 96, "Saving to database...", start_time) - try: - save_content_to_db(results) - except APIError as e: - st.warning(f"Content saved locally but database save failed: {e}") - except Exception as e: - st.warning(f"Content saved locally but unexpected database error: {e}") - - _update_pipeline( - pipeline_placeholder, 5, 100, 100, "Pipeline complete! All content generated successfully.", start_time + # --- Build engine config from session state --- + config = PipelineConfig( + custom_prompts=custom_prompts or {}, + article_template=st.session_state.get("article_template"), + knowledge_base=st.session_state.get("knowledge_base", {}), + user_id=st.session_state.get("user_id"), + publish_to_notion=True, ) - st.markdown( - """ -
-

Pipeline Complete!

-

Your content has been transformed with AI magic

-
- """, - unsafe_allow_html=True, + # --- Obtain content store (Supabase) --- + try: + content_store = get_supabase_client() + except Exception: + logger.warning("Supabase client unavailable -- content will not be persisted") + content_store = None + + # --- Create listener and run engine --- + listener = StreamlitPipelineListener(pipeline_placeholder, start_time, containers) + + return pipeline_engine.run_pipeline( + audio_file=audio_file, + transcript=transcript, + config=config, + content_store=content_store, + listener=listener, ) - time.sleep(2) - pipeline_placeholder.empty() - def _update_pipeline(placeholder, step, step_progress, total_progress, message, start_time): """Update the pipeline visualization placeholder.""" @@ -264,32 +132,6 @@ def _update_pipeline(placeholder, step, step_progress, total_progress, message, ) -def save_content_to_db(content_data: dict): - """Save generated content to Supabase database.""" - try: - supabase = get_supabase_client() - if supabase and st.session_state.get("user_id"): - content_id = supabase.save_content( - st.session_state.user_id, - { - "title": content_data.get("title", "Untitled"), - "transcript": content_data.get("transcript", ""), - "wisdom": content_data.get("wisdom", ""), - "outline": content_data.get("outline", ""), - "article": content_data.get("article", ""), - "social_content": content_data.get("social_content", ""), - "notion_url": content_data.get("notion_url", ""), - "created_at": datetime.now().isoformat(), - }, - ) - if content_id: - st.success(f"Content saved to database (ID: {content_id})") - except APIError as e: - st.warning(f"Database save failed: {e}") - except Exception as e: - st.warning(f"Unexpected database save error: {e}") - - def show_processing_pipeline( current_step=0, step_progress=0, diff --git a/whisperforge_cli.py b/whisperforge_cli.py index 116442e..7b1f985 100644 --- a/whisperforge_cli.py +++ b/whisperforge_cli.py @@ -18,14 +18,9 @@ try: from dotenv import load_dotenv - from core.content_generation import ( - generate_article, - generate_outline, - generate_wisdom, - transcribe_audio, - ) + from core.content_generation import transcribe_audio from core.logging_config import logger - from core.utils import DEFAULT_PROMPTS + from core.pipeline_engine import NullListener, PipelineConfig, run_pipeline # Load environment variables load_dotenv() @@ -36,6 +31,19 @@ sys.exit(1) +class CLIPipelineListener(NullListener): + """Pipeline listener that prints progress to the terminal via click.""" + + def on_step_start(self, step_index, step_name, message): + click.echo(f" [{step_index + 1}/6] {step_name}: {message}") + + def on_step_complete(self, step_index, step_name, result): + click.echo(f" [{step_index + 1}/6] {step_name}: done") + + def on_error(self, step_index, error): + click.echo(f" Error at step {step_index}: {error}", err=True) + + class CLIFile: """Simple file wrapper for CLI usage""" @@ -187,75 +195,72 @@ def run( # Create CLI file wrapper audio_file = CLIFile(input_file) + stem = Path(input_file).stem try: - # Step 1: Transcription - click.echo("🎡 Transcribing audio...") - transcript = transcribe_audio(audio_file) + # --- Transcription-only shortcut --- + if output_format == "transcript": + click.echo("Transcribing audio...") + transcript = transcribe_audio(str(audio_file.file_path)) + if not transcript: + click.echo("Transcription failed: empty result", err=True) + sys.exit(1) + + transcript_file = output_dir / f"{stem}_transcript.txt" + with open(transcript_file, "w", encoding="utf-8") as f: + f.write(transcript) + click.echo(f"Transcript saved: {transcript_file}") + if verbose: + click.echo(f"Preview: {transcript[:200]}...") + return - if not transcript or "Error" in transcript: - click.echo(f"❌ Transcription failed: {transcript}", err=True) - sys.exit(1) + # --- Full pipeline via engine --- + config = PipelineConfig(publish_to_notion=False) + listener = CLIPipelineListener() if verbose else NullListener() - # Save transcript - transcript_file = output_dir / f"{Path(input_file).stem}_transcript.txt" - with open(transcript_file, "w", encoding="utf-8") as f: - f.write(transcript) + results = run_pipeline( + audio_file=audio_file, + config=config, + listener=listener, + ) - click.echo(f"βœ… Transcript saved: {transcript_file}") + if results is None: + click.echo("Pipeline failed -- see errors above.", err=True) + sys.exit(1) - if output_format == "transcript": - click.echo(f"πŸ“„ Transcript preview: {transcript[:200]}...") - return + # --- Write results to files --- + output_map = { + "transcript": ("_transcript.txt", "transcript"), + "wisdom": ("_wisdom.md", "wisdom"), + "outline": ("_outline.md", "outline"), + "article": ("_article.md", "article"), + "social_content": ("_social.md", "social_content"), + } - # Step 2: Generate content based on format - results = {} - - if output_format in ["wisdom", "all"]: - click.echo("🧠 Generating wisdom extraction...") - wisdom = generate_wisdom(transcript, DEFAULT_PROMPTS["wisdom_extraction"]) - if wisdom and "Error" not in wisdom: - wisdom_file = output_dir / f"{Path(input_file).stem}_wisdom.md" - with open(wisdom_file, "w", encoding="utf-8") as f: - f.write(wisdom) - results["wisdom"] = wisdom_file - click.echo(f"βœ… Wisdom saved: {wisdom_file}") - - if output_format in ["outline", "all"]: - click.echo("πŸ“‹ Generating outline...") - outline = generate_outline(transcript, DEFAULT_PROMPTS["outline_creation"]) - if outline and "Error" not in outline: - outline_file = output_dir / f"{Path(input_file).stem}_outline.md" - with open(outline_file, "w", encoding="utf-8") as f: - f.write(outline) - results["outline"] = outline_file - click.echo(f"βœ… Outline saved: {outline_file}") - - if output_format in ["article", "all"]: - click.echo("πŸ“ Generating article...") - article = generate_article(transcript, DEFAULT_PROMPTS["article_writing"]) - if article and "Error" not in article: - article_file = output_dir / f"{Path(input_file).stem}_article.md" - with open(article_file, "w", encoding="utf-8") as f: - f.write(article) - results["article"] = article_file - click.echo(f"βœ… Article saved: {article_file}") + saved_files: dict[str, Path] = {} + for key, (suffix, result_key) in output_map.items(): + content = results.get(result_key) + if not content: + continue + if output_format not in ("all", key): + continue + out_path = output_dir / f"{stem}{suffix}" + with open(out_path, "w", encoding="utf-8") as f: + f.write(content) + saved_files[key] = out_path # Summary - click.echo("\nπŸŽ‰ Pipeline completed successfully!") - click.echo(f"πŸ“ Output directory: {output_dir}") - click.echo(f"πŸ“„ Transcript: {transcript_file}") + click.echo("\nPipeline completed successfully!") + click.echo(f"Output directory: {output_dir}") + for content_type, file_path in saved_files.items(): + click.echo(f" {content_type}: {file_path}") - for content_type, file_path in results.items(): - click.echo(f"πŸ“ {content_type.title()}: {file_path}") - - # Show preview - if verbose and transcript: - click.echo(f"\nπŸ“„ Transcript preview:\n{transcript[:300]}...") + if verbose and results.get("transcript"): + click.echo(f"\nTranscript preview:\n{results['transcript'][:300]}...") except Exception as e: logger.logger.error(f"CLI pipeline error: {e}") - click.echo(f"❌ Pipeline failed: {e}", err=True) + click.echo(f"Pipeline failed: {e}", err=True) sys.exit(1) @@ -286,27 +291,24 @@ def transcribe(input_file: str, output: str | None): # Set up output file output_file = Path(output) if output else Path(f"{Path(input_file).stem}_transcript.txt") - # Create CLI file wrapper - audio_file = CLIFile(input_file) - try: - click.echo("🎡 Transcribing audio...") - transcript = transcribe_audio(audio_file) + click.echo("Transcribing audio...") + transcript = transcribe_audio(input_file) - if not transcript or "Error" in transcript: - click.echo(f"❌ Transcription failed: {transcript}", err=True) + if not transcript: + click.echo("Transcription failed: empty result", err=True) sys.exit(1) # Save transcript with open(output_file, "w", encoding="utf-8") as f: f.write(transcript) - click.echo(f"βœ… Transcript saved: {output_file}") - click.echo(f"πŸ“„ Preview: {transcript[:200]}...") + click.echo(f"Transcript saved: {output_file}") + click.echo(f"Preview: {transcript[:200]}...") except Exception as e: logger.logger.error(f"CLI transcription error: {e}") - click.echo(f"❌ Transcription failed: {e}", err=True) + click.echo(f"Transcription failed: {e}", err=True) sys.exit(1) From ba6ae20ca04c01bf8af74a89f93d3129a61436e1 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 19:31:40 -0800 Subject: [PATCH 40/46] feat(core): add multi-provider AI support for OpenAI, Anthropic, and Grok Replace hardcoded OpenAI-only content generation with a provider- dispatching system that supports OpenAI, Anthropic (Claude), and Grok (xAI) with automatic fallback. - content_generation.py: add _chat_completion() dispatcher that handles API shape differences between providers, _resolve_provider() for preference + fallback logic, per-provider model selection - notion_integration.py: use shared _chat_completion() for AI title generation instead of direct OpenAI calls - pages.py: expand settings UI with provider selector dropdown, 3-column API key layout, per-provider model selectors Whisper transcription remains OpenAI-only. Function signatures are unchanged so all existing callers work without modification. Co-Authored-By: Claude Opus 4.6 --- core/content_generation.py | 243 +++++++++++++++++++++++++++---------- core/notion_integration.py | 31 ++--- core/pages.py | 112 +++++++++++++---- 3 files changed, 280 insertions(+), 106 deletions(-) diff --git a/core/content_generation.py b/core/content_generation.py index 7367460..7db02b8 100644 --- a/core/content_generation.py +++ b/core/content_generation.py @@ -1,25 +1,41 @@ """ Content Generation Module for WhisperForge -Handles AI-powered content creation including transcription, wisdom extraction, and content generation +Handles AI-powered content creation including transcription, wisdom extraction, and content generation. +Supports multiple AI providers: OpenAI, Anthropic, and Grok (xAI). """ import logging import os +from .api_clients import get_anthropic_client, get_grok_client, get_openai_client from .exceptions import PipelineError -from .utils import get_enhanced_prompt, get_openai_client +from .prompts import get_enhanced_prompt logger = logging.getLogger(__name__) -AI_MODEL = os.getenv("GPT_MODEL", "gpt-4o") WHISPER_MODEL = os.getenv("WHISPER_MODEL", "whisper-1") -# Token and truncation limits for AI generation calls MAX_TOKENS_DEFAULT = 1500 MAX_TOKENS_ARTICLE = 2000 TRANSCRIPT_PREVIEW_LENGTH = 2000 ARTICLE_PREVIEW_LENGTH = 1500 +DEFAULT_MODELS = { + "openai": os.getenv("GPT_MODEL", "gpt-4o"), + "anthropic": os.getenv("ANTHROPIC_MODEL", "claude-sonnet-4-20250514"), + "grok": os.getenv("GROK_MODEL", "grok-3"), +} + +# Provider priority for fallback ordering +_PROVIDER_PRIORITY = ("openai", "anthropic", "grok") + +# Map provider names to their client factory functions +_CLIENT_FACTORIES = { + "openai": get_openai_client, + "anthropic": get_anthropic_client, + "grok": get_grok_client, +} + class ContentGenerationError(PipelineError): """Raised when content generation fails""" @@ -27,92 +43,149 @@ class ContentGenerationError(PipelineError): pass -def _get_client(): - """Get OpenAI client or raise if not configured""" - client = get_openai_client() - if not client: - raise ContentGenerationError("OpenAI API key is not configured.") - return client +def _resolve_provider() -> str: + """Determine the active AI provider. + + Checks ``st.session_state.ai_provider`` first (if Streamlit is running), + then falls back to the first provider with a configured API key in + priority order: openai > anthropic > grok. + + Raises ``ContentGenerationError`` if no provider is available. + """ + # Try Streamlit session state first + try: + import streamlit as st + + preferred = st.session_state.get("ai_provider") + if preferred and _CLIENT_FACTORIES.get(preferred): + client = _CLIENT_FACTORIES[preferred]() + if client is not None: + return preferred + logger.warning("Preferred provider '%s' is not configured (missing API key). Trying fallbacks.", preferred) + except (ImportError, RuntimeError): + # Not running in Streamlit context + pass + + # Fallback: first available provider + for provider in _PROVIDER_PRIORITY: + client = _CLIENT_FACTORIES[provider]() + if client is not None: + return provider + + raise ContentGenerationError( + "No AI provider is configured. Set at least one of: OPENAI_API_KEY, ANTHROPIC_API_KEY, or GROK_API_KEY." + ) -def generate_wisdom(transcript: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None) -> str: - """Extract key insights and wisdom from a transcript""" - system_prompt = custom_prompt or get_enhanced_prompt("wisdom_extraction", knowledge_base) +def _call_openai_compatible(provider: str, system_prompt: str, user_content: str, max_tokens: int, model: str) -> str: + """Make a chat completion call using the OpenAI SDK shape. + + Works for both OpenAI and Grok (xAI), since Grok exposes an + OpenAI-compatible API. + """ + client = _CLIENT_FACTORIES[provider]() + if not client: + raise ContentGenerationError(f"{provider} client is not configured.") - client = _get_client() response = client.chat.completions.create( - model=AI_MODEL, + model=model, messages=[ {"role": "system", "content": system_prompt}, - {"role": "user", "content": f"Here's the transcription to analyze:\n\n{transcript}"}, + {"role": "user", "content": user_content}, ], - max_tokens=MAX_TOKENS_DEFAULT, + max_tokens=max_tokens, ) - return response.choices[0].message.content -def generate_outline( - transcript: str, wisdom: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None -) -> str: - """Create a structured outline based on transcript and wisdom""" - system_prompt = custom_prompt or get_enhanced_prompt("outline_creation", knowledge_base) - - content = f"TRANSCRIPT:\n{transcript}\n\nWISDOM:\n{wisdom}" +def _call_anthropic(system_prompt: str, user_content: str, max_tokens: int, model: str) -> str: + """Make a chat completion call using the Anthropic SDK.""" + client = get_anthropic_client() + if not client: + raise ContentGenerationError("Anthropic client is not configured.") - client = _get_client() - response = client.chat.completions.create( - model=AI_MODEL, - messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": content}], - max_tokens=MAX_TOKENS_DEFAULT, + response = client.messages.create( + model=model, + max_tokens=max_tokens, + system=system_prompt, + messages=[{"role": "user", "content": user_content}], ) + return response.content[0].text - return response.choices[0].message.content +def _call_provider(provider: str, system_prompt: str, user_content: str, max_tokens: int) -> str: + """Dispatch a chat completion call to the appropriate provider backend.""" + model = DEFAULT_MODELS[provider] -def generate_article( - transcript: str, wisdom: str, outline: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None -) -> str: - """Generate a comprehensive article based on transcript, wisdom, and outline""" - system_prompt = custom_prompt or get_enhanced_prompt("article_writing", knowledge_base) + # Check for a session-state model override + try: + import streamlit as st - transcript_excerpt = ( - transcript[:TRANSCRIPT_PREVIEW_LENGTH] if len(transcript) > TRANSCRIPT_PREVIEW_LENGTH else transcript - ) - content = f"TRANSCRIPT:\n{transcript_excerpt}\n\nWISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}" + model_key = f"{provider}_model" + override = st.session_state.get(model_key) + if override: + model = override + except (ImportError, RuntimeError): + pass - client = _get_client() - response = client.chat.completions.create( - model=AI_MODEL, - messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": content}], - max_tokens=MAX_TOKENS_ARTICLE, - ) + if provider == "anthropic": + return _call_anthropic(system_prompt, user_content, max_tokens, model) + else: + # Both OpenAI and Grok use OpenAI-compatible SDK + return _call_openai_compatible(provider, system_prompt, user_content, max_tokens, model) - return response.choices[0].message.content +def _chat_completion(system_prompt: str, user_content: str, max_tokens: int = MAX_TOKENS_DEFAULT) -> str: + """Run a chat completion with automatic provider resolution and fallback. -def generate_social_content( - wisdom: str, outline: str, article: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None -) -> str: - """Generate 5 distinct social media posts""" - system_prompt = custom_prompt or get_enhanced_prompt("social_media", knowledge_base) + Resolves the preferred provider, attempts the call, and on failure + tries remaining providers in priority order before raising. + """ + primary = _resolve_provider() - article_excerpt = article[:ARTICLE_PREVIEW_LENGTH] if len(article) > ARTICLE_PREVIEW_LENGTH else article - content = f"WISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}\n\nARTICLE:\n{article_excerpt}" + # Build ordered list: primary first, then remaining providers + providers_to_try = [primary] + [p for p in _PROVIDER_PRIORITY if p != primary] - client = _get_client() - response = client.chat.completions.create( - model=AI_MODEL, - messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": content}], - max_tokens=MAX_TOKENS_DEFAULT, - ) + last_error = None + for provider in providers_to_try: + # Skip providers that aren't configured + client = _CLIENT_FACTORIES[provider]() + if client is None: + continue - return response.choices[0].message.content + try: + return _call_provider(provider, system_prompt, user_content, max_tokens) + except ContentGenerationError: + raise + except Exception as e: + logger.warning("Provider '%s' failed: %s. Trying next provider.", provider, e) + last_error = e + + raise ContentGenerationError(f"All AI providers failed. Last error: {last_error}") + + +# --------------------------------------------------------------------------- +# Whisper transcription (OpenAI-only) +# --------------------------------------------------------------------------- + + +def _get_whisper_client(): + """Get an OpenAI client for Whisper transcription. + + Whisper is only available through the OpenAI API, so this always + returns an OpenAI client or raises. + """ + client = get_openai_client() + if not client: + raise ContentGenerationError( + "OpenAI API key is not configured. Whisper transcription requires an OpenAI API key." + ) + return client def transcribe_audio(audio_file) -> str: """Transcribe audio using OpenAI Whisper - handles both file paths and file objects""" - client = _get_client() + client = _get_whisper_client() if isinstance(audio_file, str): with open(audio_file, "rb") as f: @@ -122,3 +195,49 @@ def transcribe_audio(audio_file) -> str: response = client.audio.transcriptions.create(model=WHISPER_MODEL, file=audio_file) return response.text + + +# --------------------------------------------------------------------------- +# Content generation functions +# --------------------------------------------------------------------------- + + +def generate_wisdom(transcript: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None) -> str: + """Extract key insights and wisdom from a transcript""" + system_prompt = custom_prompt or get_enhanced_prompt("wisdom_extraction", knowledge_base) + return _chat_completion(system_prompt, f"Here's the transcription to analyze:\n\n{transcript}") + + +def generate_outline( + transcript: str, wisdom: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None +) -> str: + """Create a structured outline based on transcript and wisdom""" + system_prompt = custom_prompt or get_enhanced_prompt("outline_creation", knowledge_base) + return _chat_completion(system_prompt, f"TRANSCRIPT:\n{transcript}\n\nWISDOM:\n{wisdom}") + + +def generate_article( + transcript: str, wisdom: str, outline: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None +) -> str: + """Generate a comprehensive article based on transcript, wisdom, and outline""" + system_prompt = custom_prompt or get_enhanced_prompt("article_writing", knowledge_base) + transcript_excerpt = ( + transcript[:TRANSCRIPT_PREVIEW_LENGTH] if len(transcript) > TRANSCRIPT_PREVIEW_LENGTH else transcript + ) + return _chat_completion( + system_prompt, + f"TRANSCRIPT:\n{transcript_excerpt}\n\nWISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}", + max_tokens=MAX_TOKENS_ARTICLE, + ) + + +def generate_social_content( + wisdom: str, outline: str, article: str, custom_prompt: str = None, knowledge_base: dict[str, str] = None +) -> str: + """Generate 5 distinct social media posts""" + system_prompt = custom_prompt or get_enhanced_prompt("social_media", knowledge_base) + article_excerpt = article[:ARTICLE_PREVIEW_LENGTH] if len(article) > ARTICLE_PREVIEW_LENGTH else article + return _chat_completion( + system_prompt, + f"WISDOM:\n{wisdom}\n\nOUTLINE:\n{outline}\n\nARTICLE:\n{article_excerpt}", + ) diff --git a/core/notion_integration.py b/core/notion_integration.py index 21164d3..39cb978 100644 --- a/core/notion_integration.py +++ b/core/notion_integration.py @@ -9,14 +9,10 @@ import os from datetime import datetime -import openai - -from .utils import get_openai_client +from .content_generation import ContentGenerationError, _chat_completion logger = logging.getLogger(__name__) -AI_MODEL = os.getenv("GPT_MODEL", "gpt-4o") - # Notion API limits content blocks to 2000 chars each _NOTION_CHUNK_SIZE = 1800 _MAX_NOTION_BLOCKS = 50 @@ -25,25 +21,16 @@ def generate_ai_title(transcript: str) -> str: """Generate a concise AI-powered title from a transcript excerpt.""" try: - client = get_openai_client() - if not client: - return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" - - prompt = ( - "Generate a concise, descriptive title (max 60 characters) for this audio transcript:\n\n" - f"{transcript[:500]}...\n\n" - "Title should be clear, specific, professional, and capture the main topic.\n" - "No quotes or special characters.\n\nTitle:" + system_prompt = ( + "Generate a concise, descriptive title (max 60 characters) for an audio transcript. " + "The title should be clear, specific, professional, and capture the main topic. " + "No quotes or special characters. Respond with only the title." ) + user_content = f"Here is the transcript excerpt:\n\n{transcript[:500]}..." - response = client.chat.completions.create( - model=AI_MODEL, - messages=[{"role": "user", "content": prompt}], - max_tokens=30, - ) - title = response.choices[0].message.content + title = _chat_completion(system_prompt, user_content, max_tokens=30) return title.strip().replace('"', "").replace("'", "")[:60] - except openai.APIError: + except ContentGenerationError: return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" except Exception: return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" @@ -84,7 +71,7 @@ def create_notion_page(title: str, content_data: dict[str, str]) -> str | None: return None - except (openai.APIError, OSError) as e: + except (ContentGenerationError, OSError) as e: logger.error(f"Notion publishing failed: {e!s}") return None except Exception as e: diff --git a/core/pages.py b/core/pages.py index bd8f060..f6e5310 100644 --- a/core/pages.py +++ b/core/pages.py @@ -19,12 +19,31 @@ def show_settings_page(): """Settings and configuration page.""" st.markdown("### Settings & Configuration") - st.markdown("#### API Keys") + st.markdown("#### AI Provider") + with st.expander("AI Provider Selection", expanded=True): + provider_options = {"OpenAI": "openai", "Anthropic": "anthropic", "Grok (xAI)": "grok"} + current_provider = st.session_state.get("ai_provider", "openai") + # Find display name for current provider + current_label = next((label for label, val in provider_options.items() if val == current_provider), "OpenAI") + selected_label = st.selectbox( + "Active AI Provider", + list(provider_options.keys()), + index=list(provider_options.keys()).index(current_label), + help="Select the AI provider for content generation (wisdom, outline, article, social posts).", + ) + st.session_state.ai_provider = provider_options[selected_label] + + st.info( + "Whisper transcription always requires an OpenAI API key, regardless of which " + "AI provider is selected for content generation." + ) + + st.markdown("#### API Keys & Models") with st.expander("API Configuration", expanded=True): - col1, col2 = st.columns(2) + col1, col2, col3 = st.columns(3) with col1: - st.markdown("**OpenAI Configuration**") + st.markdown("**OpenAI**") openai_key = st.text_input( "OpenAI API Key", type="password", value=os.getenv("OPENAI_API_KEY", ""), help="Your OpenAI API key" ) @@ -32,26 +51,68 @@ def show_settings_page(): os.environ["OPENAI_API_KEY"] = openai_key st.success("OpenAI key configured") - model_choice = st.selectbox("OpenAI Model", ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"]) - st.session_state.openai_model = model_choice + openai_model = st.selectbox( + "OpenAI Model", + ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"], + help="Model used when OpenAI is the active provider.", + ) + st.session_state.openai_model = openai_model with col2: - st.markdown("**Notion Configuration**") - notion_key = st.text_input( - "Notion API Key", + st.markdown("**Anthropic**") + anthropic_key = st.text_input( + "Anthropic API Key", type="password", - value=os.getenv("NOTION_API_KEY", ""), - help="Your Notion integration token", + value=os.getenv("ANTHROPIC_API_KEY", ""), + help="Your Anthropic API key", ) - if notion_key: - os.environ["NOTION_API_KEY"] = notion_key - - notion_db = st.text_input("Notion Database ID", value=os.getenv("NOTION_DATABASE_ID", "")) - if notion_db: - os.environ["NOTION_DATABASE_ID"] = notion_db + if anthropic_key: + os.environ["ANTHROPIC_API_KEY"] = anthropic_key + st.success("Anthropic key configured") + + anthropic_model = st.selectbox( + "Anthropic Model", + ["claude-sonnet-4-20250514", "claude-haiku-4-20250514", "claude-3-5-sonnet-20241022"], + help="Model used when Anthropic is the active provider.", + ) + st.session_state.anthropic_model = anthropic_model - if notion_key and notion_db: - st.success("Notion configured") + with col3: + st.markdown("**Grok (xAI)**") + grok_key = st.text_input( + "Grok API Key", + type="password", + value=os.getenv("GROK_API_KEY", ""), + help="Your xAI / Grok API key", + ) + if grok_key: + os.environ["GROK_API_KEY"] = grok_key + st.success("Grok key configured") + + grok_model = st.selectbox( + "Grok Model", + ["grok-3", "grok-3-mini", "grok-2"], + help="Model used when Grok is the active provider.", + ) + st.session_state.grok_model = grok_model + + st.markdown("#### Notion Integration") + with st.expander("Notion Configuration", expanded=False): + notion_key = st.text_input( + "Notion API Key", + type="password", + value=os.getenv("NOTION_API_KEY", ""), + help="Your Notion integration token", + ) + if notion_key: + os.environ["NOTION_API_KEY"] = notion_key + + notion_db = st.text_input("Notion Database ID", value=os.getenv("NOTION_DATABASE_ID", "")) + if notion_db: + os.environ["NOTION_DATABASE_ID"] = notion_db + + if notion_key and notion_db: + st.success("Notion configured") st.markdown("#### Pipeline Configuration") with st.expander("Processing Pipeline", expanded=True): @@ -88,10 +149,17 @@ def show_settings_page(): with st.expander("Connection Status", expanded=False): if st.button("Test All Connections"): with st.spinner("Testing all connections..."): - if os.getenv("OPENAI_API_KEY"): - st.success("OpenAI API key configured") - else: - st.error("OpenAI API key missing") + # AI provider keys + ai_providers = { + "OpenAI": "OPENAI_API_KEY", + "Anthropic": "ANTHROPIC_API_KEY", + "Grok": "GROK_API_KEY", + } + for name, env_var in ai_providers.items(): + if os.getenv(env_var): + st.success(f"{name} API key configured") + else: + st.warning(f"{name} API key not set") try: db = get_supabase_client() From 2f477ffd265c4739c97e97905f3d0453b67e591d Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 19:31:49 -0800 Subject: [PATCH 41/46] ci(workflow): add GitHub Actions CI pipeline with ruff and pytest Create .github/workflows/ci.yml that runs on push to main and on pull requests. Single job with Python 3.11 that runs ruff lint, ruff format check, and pytest (excluding ai/supabase/slow markers). Add ruff>=0.9.7 to requirements-dev.txt so CI can run lint checks without relying on pre-commit's isolated environment. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 32 ++++++++++++++++++++++++++++++++ requirements-dev.txt | 1 + 2 files changed, 33 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..4f11527 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,32 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + lint-and-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements-dev.txt + + - name: Lint + run: ruff check . + + - name: Format check + run: ruff format --check . + + - name: Run tests + run: pytest -m "not ai and not supabase and not slow" --tb=short diff --git a/requirements-dev.txt b/requirements-dev.txt index 7595e99..87124d3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -12,5 +12,6 @@ python-docx>=1.1.0,<2.0 fpdf>=1.7.2,<2.0 # Linting & Pre-commit Hooks +ruff>=0.9.7,<1.0 pre-commit>=4.0.0,<5.0 autopep8>=2.3.0,<3.0 From 257adb51bd8654fac5bfdd9c075c34ec0fcfafef Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 19:49:29 -0800 Subject: [PATCH 42/46] fix(core): correct logger.logger delegation bug across 3 files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WhisperForgeLogger.__getattr__ already delegates to the underlying logging.Logger, so logger.info() is correctβ€”logger.logger.info() bypassed the delegation and was incorrect. Co-Authored-By: Claude Opus 4.6 --- core/auth_wrapper.py | 22 +++++++++++----------- tests/conftest.py | 4 ++-- whisperforge_cli.py | 4 ++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/core/auth_wrapper.py b/core/auth_wrapper.py index 25da594..3f337da 100644 --- a/core/auth_wrapper.py +++ b/core/auth_wrapper.py @@ -56,7 +56,7 @@ def authenticate_user(self, email: str, password: str) -> bool: Maintains backward compatibility with existing auth logic """ try: - logger.logger.info(f"Authentication attempt for: {email}") + logger.info(f"Authentication attempt for: {email}") if not self.supabase_client: logger.log_error(Exception("Supabase client not available"), "Authentication failed") @@ -66,7 +66,7 @@ def authenticate_user(self, email: str, password: str) -> bool: result = self.supabase_client.client.table("users").select("*").eq("email", email).execute() if not result.data: - logger.logger.warning(f"User not found: {email}") + logger.warning(f"User not found: {email}") return False user = result.data[0] @@ -89,7 +89,7 @@ def authenticate_user(self, email: str, password: str) -> bool: self.supabase_client.client.table("users").update({"password": new_hash}).eq( "id", user["id"] ).execute() - logger.logger.info(f"Password migrated to bcrypt for user: {email}") + logger.info(f"Password migrated to bcrypt for user: {email}") except (APIError, ValueError) as e: logger.log_error(e, "Failed to migrate password") except Exception as e: @@ -98,7 +98,7 @@ def authenticate_user(self, email: str, password: str) -> bool: if password_valid: # Create persistent session using SessionManager if self.session_manager.authenticate_user(user["id"], email): - logger.logger.info(f"User authenticated successfully: {email}") + logger.info(f"User authenticated successfully: {email}") # Load user preferences from database self._load_user_preferences(user["id"]) @@ -110,7 +110,7 @@ def authenticate_user(self, email: str, password: str) -> bool: ) return False else: - logger.logger.warning(f"Invalid password for user: {email}") + logger.warning(f"Invalid password for user: {email}") return False except (APIError, ValueError) as e: @@ -130,7 +130,7 @@ def register_user(self, email: str, password: str) -> bool: # Check if user already exists existing = self.supabase_client.client.table("users").select("id").eq("email", email).execute() if existing.data: - logger.logger.warning(f"User already exists: {email}") + logger.warning(f"User already exists: {email}") return False # Hash password @@ -143,11 +143,11 @@ def register_user(self, email: str, password: str) -> bool: if result.data: user = result.data[0] - logger.logger.info(f"User registered successfully: {email}") + logger.info(f"User registered successfully: {email}") # Create session for new user if self.session_manager.authenticate_user(user["id"], email): - logger.logger.info(f"Session created for new user: {email}") + logger.info(f"Session created for new user: {email}") return True else: logger.log_error( @@ -170,7 +170,7 @@ def logout(self) -> bool: try: email = self.get_user_email() if self.session_manager.logout(): - logger.logger.info(f"User logged out: {email}") + logger.info(f"User logged out: {email}") return True else: logger.log_error(Exception(f"Failed to logout user: {email}"), "Logout failed") @@ -216,7 +216,7 @@ def _load_user_preferences(self, user_id: str): self.session_manager.set_preference("api_keys", api_keys) self.session_manager.set_preference("custom_prompts", prompts) - logger.logger.debug(f"Loaded preferences for user: {user_id}") + logger.debug(f"Loaded preferences for user: {user_id}") except (APIError, ValueError) as e: logger.log_error(e, "Failed to load user preferences") @@ -248,7 +248,7 @@ def update_api_key(self, key_name: str, key_value: str) -> bool: api_keys[key_name] = key_value self.session_manager.set_preference("api_keys", api_keys) - logger.logger.info(f"API key updated: {key_name}") + logger.info(f"API key updated: {key_name}") return True else: logger.log_error(Exception(f"Failed to update API key: {key_name}"), "API key update failed") diff --git a/tests/conftest.py b/tests/conftest.py index 41d4e4e..2b4fa08 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -175,9 +175,9 @@ def mock_anthropic(): @pytest.fixture(autouse=True) def setup_logging(): """Set up logging for tests""" - logger.logger.info("πŸ§ͺ Starting test session") + logger.info("πŸ§ͺ Starting test session") yield - logger.logger.info("πŸ§ͺ Test session completed") + logger.info("πŸ§ͺ Test session completed") # Test markers diff --git a/whisperforge_cli.py b/whisperforge_cli.py index 7b1f985..c4e0b6c 100644 --- a/whisperforge_cli.py +++ b/whisperforge_cli.py @@ -259,7 +259,7 @@ def run( click.echo(f"\nTranscript preview:\n{results['transcript'][:300]}...") except Exception as e: - logger.logger.error(f"CLI pipeline error: {e}") + logger.error(f"CLI pipeline error: {e}") click.echo(f"Pipeline failed: {e}", err=True) sys.exit(1) @@ -307,7 +307,7 @@ def transcribe(input_file: str, output: str | None): click.echo(f"Preview: {transcript[:200]}...") except Exception as e: - logger.logger.error(f"CLI transcription error: {e}") + logger.error(f"CLI transcription error: {e}") click.echo(f"Transcription failed: {e}", err=True) sys.exit(1) From 7524e24abff7999e7de3b47892331d7cc0aa5ee3 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 19:49:36 -0800 Subject: [PATCH 43/46] refactor(core): centralize remaining magic numbers in constants.py Move content-generation token limits, Notion integration limits, content-display preview length, and session defaults into the single constants module so they are co-located with other invariants. Co-Authored-By: Claude Opus 4.6 --- core/constants.py | 28 ++++++++++++++++++++++++++++ core/content_display.py | 3 ++- core/content_generation.py | 11 ++++++----- core/notion_integration.py | 23 +++++++++++++---------- core/session_manager.py | 8 +++++--- 5 files changed, 54 insertions(+), 19 deletions(-) diff --git a/core/constants.py b/core/constants.py index 9833573..a37334b 100644 --- a/core/constants.py +++ b/core/constants.py @@ -40,3 +40,31 @@ DEFAULT_USAGE_QUOTA_MINUTES: int = 60 # New user monthly quota DEFAULT_CONTENT_QUERY_LIMIT: int = 50 # get_user_content() DEFAULT_ANALYTICS_DAYS: int = 30 # get_user_analytics() + +# --------------------------------------------------------------------------- +# Content generation +# --------------------------------------------------------------------------- +MAX_TOKENS_DEFAULT: int = 1500 +MAX_TOKENS_ARTICLE: int = 2000 +TRANSCRIPT_PREVIEW_LENGTH: int = 2000 +ARTICLE_PREVIEW_LENGTH: int = 1500 + +# --------------------------------------------------------------------------- +# Notion integration +# --------------------------------------------------------------------------- +NOTION_CHUNK_SIZE: int = 1800 +MAX_NOTION_BLOCKS: int = 50 +NOTION_TITLE_EXCERPT_LENGTH: int = 500 +NOTION_TITLE_MAX_TOKENS: int = 30 +NOTION_TITLE_MAX_LENGTH: int = 60 + +# --------------------------------------------------------------------------- +# Content display +# --------------------------------------------------------------------------- +CONTENT_PREVIEW_LENGTH: int = 300 + +# --------------------------------------------------------------------------- +# Session +# --------------------------------------------------------------------------- +SESSION_EXPIRY_DAYS: int = 7 +DEFAULT_PAGE: str = "Transform" diff --git a/core/content_display.py b/core/content_display.py index ed1cc93..d7ade6f 100644 --- a/core/content_display.py +++ b/core/content_display.py @@ -11,6 +11,7 @@ import streamlit as st +from .constants import CONTENT_PREVIEW_LENGTH from .export import create_text_export, export_to_markdown, export_to_word @@ -24,7 +25,7 @@ def create_enhanced_aurora_content_card(title, content, content_type="text", ico word_count = len(content.split()) if content else 0 char_count = len(content) if content else 0 - preview_length = 300 + preview_length = CONTENT_PREVIEW_LENGTH needs_expansion = len(content) > preview_length preview_content = content[:preview_length] + "..." if needs_expansion else content diff --git a/core/content_generation.py b/core/content_generation.py index 7db02b8..8ab901b 100644 --- a/core/content_generation.py +++ b/core/content_generation.py @@ -8,6 +8,12 @@ import os from .api_clients import get_anthropic_client, get_grok_client, get_openai_client +from .constants import ( + ARTICLE_PREVIEW_LENGTH, + MAX_TOKENS_ARTICLE, + MAX_TOKENS_DEFAULT, + TRANSCRIPT_PREVIEW_LENGTH, +) from .exceptions import PipelineError from .prompts import get_enhanced_prompt @@ -15,11 +21,6 @@ WHISPER_MODEL = os.getenv("WHISPER_MODEL", "whisper-1") -MAX_TOKENS_DEFAULT = 1500 -MAX_TOKENS_ARTICLE = 2000 -TRANSCRIPT_PREVIEW_LENGTH = 2000 -ARTICLE_PREVIEW_LENGTH = 1500 - DEFAULT_MODELS = { "openai": os.getenv("GPT_MODEL", "gpt-4o"), "anthropic": os.getenv("ANTHROPIC_MODEL", "claude-sonnet-4-20250514"), diff --git a/core/notion_integration.py b/core/notion_integration.py index 39cb978..d46df3d 100644 --- a/core/notion_integration.py +++ b/core/notion_integration.py @@ -9,14 +9,17 @@ import os from datetime import datetime +from .constants import ( + MAX_NOTION_BLOCKS, + NOTION_CHUNK_SIZE, + NOTION_TITLE_EXCERPT_LENGTH, + NOTION_TITLE_MAX_LENGTH, + NOTION_TITLE_MAX_TOKENS, +) from .content_generation import ContentGenerationError, _chat_completion logger = logging.getLogger(__name__) -# Notion API limits content blocks to 2000 chars each -_NOTION_CHUNK_SIZE = 1800 -_MAX_NOTION_BLOCKS = 50 - def generate_ai_title(transcript: str) -> str: """Generate a concise AI-powered title from a transcript excerpt.""" @@ -26,10 +29,10 @@ def generate_ai_title(transcript: str) -> str: "The title should be clear, specific, professional, and capture the main topic. " "No quotes or special characters. Respond with only the title." ) - user_content = f"Here is the transcript excerpt:\n\n{transcript[:500]}..." + user_content = f"Here is the transcript excerpt:\n\n{transcript[:NOTION_TITLE_EXCERPT_LENGTH]}..." - title = _chat_completion(system_prompt, user_content, max_tokens=30) - return title.strip().replace('"', "").replace("'", "")[:60] + title = _chat_completion(system_prompt, user_content, max_tokens=NOTION_TITLE_MAX_TOKENS) + return title.strip().replace('"', "").replace("'", "")[:NOTION_TITLE_MAX_LENGTH] except ContentGenerationError: return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" except Exception: @@ -62,7 +65,7 @@ def create_notion_page(title: str, content_data: dict[str, str]) -> str | None: parent={"database_id": database_id}, icon={"type": "emoji", "emoji": "\U0001f30c"}, properties={"Name": {"title": [{"text": {"content": title[:100]}}]}}, - children=children[:_MAX_NOTION_BLOCKS], + children=children[:MAX_NOTION_BLOCKS], ) if response and "id" in response: @@ -127,7 +130,7 @@ def _build_notion_children(title: str, content_data: dict) -> list[dict]: "callout": { "rich_text": [ {"type": "text", "text": {"content": "Key Insights & Wisdom"}}, - {"type": "text", "text": {"content": f"\n\n{content_data['wisdom'][:_NOTION_CHUNK_SIZE]}"}}, + {"type": "text", "text": {"content": f"\n\n{content_data['wisdom'][:NOTION_CHUNK_SIZE]}"}}, ], "color": "purple_background", "icon": {"type": "emoji", "emoji": "\U0001f4a1"}, @@ -153,7 +156,7 @@ def _build_notion_children(title: str, content_data: dict) -> list[dict]: children.append(_build_research_toggle(section_title, section_content)) elif isinstance(section_content, str): chunks = [ - section_content[i : i + _NOTION_CHUNK_SIZE] for i in range(0, len(section_content), _NOTION_CHUNK_SIZE) + section_content[i : i + NOTION_CHUNK_SIZE] for i in range(0, len(section_content), NOTION_CHUNK_SIZE) ] children.append( { diff --git a/core/session_manager.py b/core/session_manager.py index b7d56a7..f057a00 100644 --- a/core/session_manager.py +++ b/core/session_manager.py @@ -5,13 +5,15 @@ import streamlit as st +from .constants import DEFAULT_PAGE, SESSION_EXPIRY_DAYS + logger = logging.getLogger(__name__) class SessionManager: """Simple file-based session persistence""" - def __init__(self, app_name: str = "whisperforge", expiry_days: int = 7): + def __init__(self, app_name: str = "whisperforge", expiry_days: int = SESSION_EXPIRY_DAYS): self.session_dir = Path.home() / f".{app_name}_sessions" self.session_file = self.session_dir / "session.json" self.expiry_days = expiry_days @@ -20,7 +22,7 @@ def __init__(self, app_name: str = "whisperforge", expiry_days: int = 7): "user_id": None, "user_email": None, "preferences": {}, - "current_page": "Transform", + "current_page": DEFAULT_PAGE, "pipeline_active": False, "created_at": None, "last_activity": None, @@ -99,7 +101,7 @@ def set_current_page(self, page: str): self._save() def get_current_page(self) -> str: - return self.data.get("current_page", "Transform") + return self.data.get("current_page", DEFAULT_PAGE) def set_pipeline_active(self, active: bool): self.data["pipeline_active"] = active From 5f491855b6563736caa29bfd03e94a28c10a8f7a Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 19:59:46 -0800 Subject: [PATCH 44/46] refactor(core): replace broad except Exception with specific exception types Replace ~51 bare `except Exception` blocks across 12 files with the appropriate custom exceptions from core.exceptions (DatabaseError, APIClientError, FileProcessingError, PipelineError). This makes error handling explicit and prevents masking unrelated bugs. Co-Authored-By: Claude Opus 4.6 --- core/auth_wrapper.py | 17 +++++++++-------- core/content_generation.py | 4 ++-- core/file_upload.py | 9 +++++---- core/large_file_processor.py | 11 ++++++----- core/notion_integration.py | 5 +++-- core/pages.py | 17 +++++++++-------- core/pipeline.py | 3 ++- core/pipeline_engine.py | 3 ++- core/prompts.py | 3 ++- core/session_manager.py | 4 ++-- core/streaming_pipeline.py | 7 ++++--- core/streaming_status.py | 3 ++- 12 files changed, 48 insertions(+), 38 deletions(-) diff --git a/core/auth_wrapper.py b/core/auth_wrapper.py index 3f337da..622a5ea 100644 --- a/core/auth_wrapper.py +++ b/core/auth_wrapper.py @@ -10,6 +10,7 @@ from core.logging_config import logger +from .exceptions import AuthenticationError, DatabaseError from .session_manager import get_session_manager from .utils import hash_password, legacy_hash_password, verify_password @@ -34,7 +35,7 @@ def _init_supabase(self): except (APIError, ValueError) as e: logger.log_error(e, "Failed to initialize Supabase") self.supabase_client = None - except Exception as e: + except DatabaseError as e: logger.log_error(e, "Unexpected error initializing Supabase") self.supabase_client = None @@ -92,7 +93,7 @@ def authenticate_user(self, email: str, password: str) -> bool: logger.info(f"Password migrated to bcrypt for user: {email}") except (APIError, ValueError) as e: logger.log_error(e, "Failed to migrate password") - except Exception as e: + except DatabaseError as e: logger.log_error(e, "Unexpected error migrating password") if password_valid: @@ -116,7 +117,7 @@ def authenticate_user(self, email: str, password: str) -> bool: except (APIError, ValueError) as e: logger.log_error(e, f"Authentication error for {email}") return False - except Exception as e: + except (AuthenticationError, DatabaseError) as e: logger.log_error(e, f"Unexpected authentication error for {email}") return False @@ -161,7 +162,7 @@ def register_user(self, email: str, password: str) -> bool: except (APIError, ValueError) as e: logger.log_error(e, f"Registration error for {email}") return False - except Exception as e: + except (AuthenticationError, DatabaseError) as e: logger.log_error(e, f"Unexpected registration error for {email}") return False @@ -178,7 +179,7 @@ def logout(self) -> bool: except (APIError, ValueError) as e: logger.log_error(e, "Logout error") return False - except Exception as e: + except (AuthenticationError, DatabaseError) as e: logger.log_error(e, "Unexpected logout error") return False @@ -220,7 +221,7 @@ def _load_user_preferences(self, user_id: str): except (APIError, ValueError) as e: logger.log_error(e, "Failed to load user preferences") - except Exception as e: + except DatabaseError as e: logger.log_error(e, "Unexpected error loading user preferences") def get_api_keys(self) -> dict[str, str]: @@ -257,7 +258,7 @@ def update_api_key(self, key_name: str, key_value: str) -> bool: except (APIError, ValueError) as e: logger.log_error(e, f"Error updating API key {key_name}") return False - except Exception as e: + except DatabaseError as e: logger.log_error(e, f"Unexpected error updating API key {key_name}") return False @@ -297,7 +298,7 @@ def update_custom_prompt(self, prompt_type: str, content: str) -> bool: except (APIError, ValueError) as e: logger.log_error(e, f"Error updating custom prompt {prompt_type}") return False - except Exception as e: + except DatabaseError as e: logger.log_error(e, f"Unexpected error updating custom prompt {prompt_type}") return False diff --git a/core/content_generation.py b/core/content_generation.py index 8ab901b..1600ac2 100644 --- a/core/content_generation.py +++ b/core/content_generation.py @@ -14,7 +14,7 @@ MAX_TOKENS_DEFAULT, TRANSCRIPT_PREVIEW_LENGTH, ) -from .exceptions import PipelineError +from .exceptions import APIClientError, PipelineError from .prompts import get_enhanced_prompt logger = logging.getLogger(__name__) @@ -158,7 +158,7 @@ def _chat_completion(system_prompt: str, user_content: str, max_tokens: int = MA return _call_provider(provider, system_prompt, user_content, max_tokens) except ContentGenerationError: raise - except Exception as e: + except (APIClientError, OSError) as e: logger.warning("Provider '%s' failed: %s. Trying next provider.", provider, e) last_error = e diff --git a/core/file_upload.py b/core/file_upload.py index 2433ec4..8ed4657 100644 --- a/core/file_upload.py +++ b/core/file_upload.py @@ -19,6 +19,7 @@ from .constants import CHUNK_SUCCESS_THRESHOLD_STANDARD, MAX_PARALLEL_CHUNKS_STANDARD, MAX_UPLOAD_SIZE_BYTES from .content_generation import ContentGenerationError +from .exceptions import APIClientError, FileProcessingError from .large_file_processor import EnhancedLargeFileProcessor # noqa: F401 - re-export logger = logging.getLogger(__name__) @@ -148,7 +149,7 @@ def _process_small_file(self, uploaded_file) -> dict: except (ContentGenerationError, openai.APIError, OSError) as e: progress_bar.progress(1.0, f"\u274c Error: {e!s}") return {"success": False, "error": str(e)} - except Exception as e: + except (FileProcessingError, APIClientError) as e: progress_bar.progress(1.0, f"\u274c Unexpected error: {e!s}") return {"success": False, "error": str(e)} @@ -196,7 +197,7 @@ def _process_large_file_chunked(self, uploaded_file) -> dict: logger.exception("Error in large file processing:") st.error(f"\u274c Large file processing failed: {e!s}") return {"success": False, "error": str(e)} - except Exception as e: + except (FileProcessingError, APIClientError) as e: logger.exception("Unexpected error in large file processing:") st.error(f"\u274c Unexpected large file processing error: {e!s}") return {"success": False, "error": str(e)} @@ -255,7 +256,7 @@ def _create_audio_chunks(self, uploaded_file) -> dict: except OSError as e: logger.exception("Error creating audio chunks:") return {"success": False, "error": f"Failed to create chunks: {e!s}"} - except Exception as e: + except FileProcessingError as e: logger.exception("Unexpected error creating audio chunks:") return {"success": False, "error": f"Unexpected error creating chunks: {e!s}"} @@ -290,7 +291,7 @@ def transcribe_single_chunk(chunk_info: dict) -> tuple[int, str, bool]: chunk_statuses[chunk_index] = "error" logger.exception(f"Error transcribing chunk {chunk_index}:") return chunk_index, f"Error: {e!s}", False - except Exception as e: + except APIClientError as e: chunk_statuses[chunk_index] = "error" logger.exception(f"Unexpected error transcribing chunk {chunk_index}:") return chunk_index, f"Unexpected error: {e!s}", False diff --git a/core/large_file_processor.py b/core/large_file_processor.py index 38a09f9..f860885 100644 --- a/core/large_file_processor.py +++ b/core/large_file_processor.py @@ -29,6 +29,7 @@ MAX_PARALLEL_CHUNKS_FFMPEG, MAX_UPLOAD_SIZE_BYTES, ) +from .exceptions import APIClientError, FileProcessingError logger = logging.getLogger(__name__) @@ -104,7 +105,7 @@ def get_audio_info(self, file_path: str) -> dict: except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e: return {"error": f"Failed to get audio info: {e!s}"} - except Exception as e: + except FileProcessingError as e: return {"error": f"Unexpected error getting audio info: {e!s}"} def validate_file(self, uploaded_file) -> dict: @@ -249,7 +250,7 @@ def _process_standard(self, uploaded_file) -> dict: except (openai.APIError, OSError) as e: return {"success": False, "error": f"Standard processing failed: {e!s}"} - except Exception as e: + except (APIClientError, FileProcessingError) as e: return {"success": False, "error": f"Unexpected standard processing error: {e!s}"} def _process_with_ffmpeg_chunking(self, uploaded_file) -> dict: @@ -301,7 +302,7 @@ def _process_with_ffmpeg_chunking(self, uploaded_file) -> dict: except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e: return {"success": False, "error": f"FFmpeg processing failed: {e!s}"} - except Exception as e: + except (FileProcessingError, APIClientError) as e: return {"success": False, "error": f"Unexpected FFmpeg processing error: {e!s}"} finally: @@ -364,7 +365,7 @@ def _create_ffmpeg_chunks(self, input_file_path: str, duration: float) -> dict: except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e: return {"success": False, "error": f"Chunk creation failed: {e!s}"} - except Exception as e: + except FileProcessingError as e: return {"success": False, "error": f"Unexpected chunk creation error: {e!s}"} def _transcribe_chunks_parallel(self, chunks: list[dict]) -> dict: @@ -387,7 +388,7 @@ def transcribe_single_chunk(chunk_info: dict) -> tuple[int, str, bool]: except (openai.APIError, OSError) as e: logger.error(f"Failed to transcribe chunk {chunk_info['index']}: {e}") return chunk_info["index"], f"[Transcription failed for chunk {chunk_info['index']}]", False - except Exception as e: + except APIClientError as e: logger.error(f"Unexpected error transcribing chunk {chunk_info['index']}: {e}") return chunk_info["index"], f"[Unexpected error for chunk {chunk_info['index']}]", False diff --git a/core/notion_integration.py b/core/notion_integration.py index d46df3d..f90332a 100644 --- a/core/notion_integration.py +++ b/core/notion_integration.py @@ -17,6 +17,7 @@ NOTION_TITLE_MAX_TOKENS, ) from .content_generation import ContentGenerationError, _chat_completion +from .exceptions import APIClientError logger = logging.getLogger(__name__) @@ -35,7 +36,7 @@ def generate_ai_title(transcript: str) -> str: return title.strip().replace('"', "").replace("'", "")[:NOTION_TITLE_MAX_LENGTH] except ContentGenerationError: return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" - except Exception: + except APIClientError: return f"WhisperForge Content - {datetime.now().strftime('%Y-%m-%d %H:%M')}" @@ -77,7 +78,7 @@ def create_notion_page(title: str, content_data: dict[str, str]) -> str | None: except (ContentGenerationError, OSError) as e: logger.error(f"Notion publishing failed: {e!s}") return None - except Exception as e: + except APIClientError as e: logger.error(f"Unexpected error in Notion publishing: {e!s}") return None diff --git a/core/pages.py b/core/pages.py index f6e5310..e8d5596 100644 --- a/core/pages.py +++ b/core/pages.py @@ -11,6 +11,7 @@ from postgrest.exceptions import APIError from .content_display import create_enhanced_aurora_content_card +from .exceptions import APIClientError, DatabaseError, FileProcessingError from .supabase_integration import get_supabase_client from .utils import safe_path @@ -169,7 +170,7 @@ def show_settings_page(): st.error("Supabase connection failed") except APIError as e: st.error(f"Supabase error: {e}") - except Exception as e: + except DatabaseError as e: st.error(f"Unexpected Supabase error: {e}") try: @@ -183,7 +184,7 @@ def show_settings_page(): st.warning("Notion not configured") except OSError as e: st.error(f"Notion error: {e}") - except Exception as e: + except APIClientError as e: st.error(f"Unexpected Notion error: {e}") @@ -219,7 +220,7 @@ def show_knowledge_base(): st.info("Knowledge base directory not found") except OSError as e: st.error(f"Error reading knowledge base: {e}") - except Exception as e: + except FileProcessingError as e: st.error(f"Unexpected error reading knowledge base: {e}") with tabs[1]: @@ -253,7 +254,7 @@ def show_knowledge_base(): st.error(f"Invalid filename: {e}") except OSError as e: st.error(f"Error saving knowledge: {e}") - except Exception as e: + except FileProcessingError as e: st.error(f"Unexpected error saving knowledge: {e}") else: st.error("Please provide both title and content") @@ -277,7 +278,7 @@ def show_knowledge_base(): st.rerun() except (ValueError, OSError) as e: st.error(f"Error deleting file: {e}") - except Exception as e: + except FileProcessingError as e: st.error(f"Unexpected error deleting file: {e}") else: st.info("No knowledge files found") @@ -285,7 +286,7 @@ def show_knowledge_base(): st.info("Knowledge base directory not found") except OSError as e: st.error(f"Error managing files: {e}") - except Exception as e: + except FileProcessingError as e: st.error(f"Unexpected error managing files: {e}") @@ -325,7 +326,7 @@ def show_prompts_page(): current_prompt = f"# {prompt_name} Prompt\n\nDefault prompt for {prompt_key} generation." except OSError as e: st.error(f"Error loading prompt: {e}") - except Exception as e: + except FileProcessingError as e: st.error(f"Unexpected error loading prompt: {e}") new_prompt = st.text_area( @@ -345,7 +346,7 @@ def show_prompts_page(): st.success(f"{prompt_name} prompt saved!") except OSError as e: st.error(f"Error saving prompt: {e}") - except Exception as e: + except FileProcessingError as e: st.error(f"Unexpected error saving prompt: {e}") with col2: diff --git a/core/pipeline.py b/core/pipeline.py index cd47771..78988b1 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -13,6 +13,7 @@ import streamlit as st from . import pipeline_engine +from .exceptions import DatabaseError from .pipeline_engine import NullListener, PipelineConfig from .prompt_loader import load_custom_prompts from .supabase_integration import get_supabase_client @@ -104,7 +105,7 @@ def process_pipeline(audio_file=None, transcript: str | None = None) -> dict | N # --- Obtain content store (Supabase) --- try: content_store = get_supabase_client() - except Exception: + except (DatabaseError, ImportError): logger.warning("Supabase client unavailable -- content will not be persisted") content_store = None diff --git a/core/pipeline_engine.py b/core/pipeline_engine.py index bec7260..f7823a3 100644 --- a/core/pipeline_engine.py +++ b/core/pipeline_engine.py @@ -23,6 +23,7 @@ generate_wisdom, transcribe_audio, ) +from .exceptions import DatabaseError from .notion_integration import create_notion_page, generate_ai_title from .prompt_loader import get_prompt_for_step, load_custom_prompts, load_template @@ -292,7 +293,7 @@ def run_pipeline( "created_at": datetime.now().isoformat(), } content_store.save_content(config.user_id, content_data) - except Exception as exc: + except DatabaseError as exc: logger.warning("Content saved locally but database save failed: %s", exc) listener.on_step_progress(5, 100, 100, "Pipeline complete! All content generated successfully.") diff --git a/core/prompts.py b/core/prompts.py index b6305d7..eeb73b4 100644 --- a/core/prompts.py +++ b/core/prompts.py @@ -7,6 +7,7 @@ import logging +from .exceptions import FileProcessingError from .path_safety import safe_path logger = logging.getLogger(__name__) @@ -50,7 +51,7 @@ def load_prompt_from_file(prompt_type: str, user_id: str = None) -> str: logger.error(f"No prompt found for type: {prompt_type}") return f"Please provide content for {prompt_type.replace('_', ' ')}." - except Exception as e: + except (FileProcessingError, OSError) as e: logger.error(f"Error loading prompt {prompt_type}: {e}") return DEFAULT_PROMPTS.get(prompt_type, f"Error loading {prompt_type} prompt.") diff --git a/core/session_manager.py b/core/session_manager.py index f057a00..74963eb 100644 --- a/core/session_manager.py +++ b/core/session_manager.py @@ -44,7 +44,7 @@ def _load(self): except (json.JSONDecodeError, OSError, KeyError) as e: logger.error(f"Session load failed: {e}") except Exception as e: - logger.error(f"Unexpected error in session load: {e}") + logger.warning(f"Unexpected error type ({type(e).__name__}) in session load: {e}") def _save(self): try: @@ -54,7 +54,7 @@ def _save(self): except OSError as e: logger.error(f"Session save failed: {e}") except Exception as e: - logger.error(f"Unexpected error in session save: {e}") + logger.warning(f"Unexpected error type ({type(e).__name__}) in session save: {e}") def authenticate_user(self, user_id: str, email: str) -> bool: self.data.update( diff --git a/core/streaming_pipeline.py b/core/streaming_pipeline.py index 90c6eeb..878aad8 100644 --- a/core/streaming_pipeline.py +++ b/core/streaming_pipeline.py @@ -19,6 +19,7 @@ generate_wisdom, transcribe_audio, ) +from .exceptions import DatabaseError, PipelineError from .visible_thinking import thinking_error, thinking_step_complete, thinking_step_start @@ -94,7 +95,7 @@ def process_next_step(self): st.session_state.pipeline_active = False st.error(f"Error in {step_id}: {error_msg}") return False - except Exception as e: + except PipelineError as e: error_msg = str(e) st.session_state.pipeline_errors[step_id] = error_msg st.session_state.pipeline_active = False @@ -129,7 +130,7 @@ def _execute_step(self, step_id: str, step_index: int) -> Any: except (ContentGenerationError, openai.APIError, ValueError) as e: thinking_error(step_id, str(e)) raise - except Exception as e: + except PipelineError as e: thinking_error(step_id, f"Unexpected: {e!s}") raise @@ -294,7 +295,7 @@ def _step_database_storage(self) -> str: except APIError as e: return f"Database save failed: {str(e)}" - except Exception as e: + except DatabaseError as e: return f"Unexpected database error: {str(e)}" @property diff --git a/core/streaming_status.py b/core/streaming_status.py index 0f40c5a..316e1a1 100644 --- a/core/streaming_status.py +++ b/core/streaming_status.py @@ -8,6 +8,7 @@ import streamlit as st +from .exceptions import WhisperForgeError from .streaming_pipeline import get_pipeline_controller @@ -257,7 +258,7 @@ def show_enhanced_streaming_status(): from .visible_thinking import render_thinking_stream render_thinking_stream(thinking_container) - except Exception: + except (WhisperForgeError, ImportError): st.info("\U0001f4ad AI is thinking... (thinking system loading)") # Main processing status From 736a241d6185b764b1a969c03a1b54c2f3542148 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 19:59:53 -0800 Subject: [PATCH 45/46] refactor(core): extract MCPSupabaseIntegration into dedicated module Move MCPSupabaseIntegration class and get_mcp_integration() singleton out of supabase_integration.py into core/mcp_integration.py. The original module re-exports both symbols for backward compatibility, following the established shim pattern from core/utils.py. Co-Authored-By: Claude Opus 4.6 --- core/mcp_integration.py | 111 ++++++++++++++++++++++++++++++ core/supabase_integration.py | 127 +++++++---------------------------- 2 files changed, 136 insertions(+), 102 deletions(-) create mode 100644 core/mcp_integration.py diff --git a/core/mcp_integration.py b/core/mcp_integration.py new file mode 100644 index 0000000..45a0a32 --- /dev/null +++ b/core/mcp_integration.py @@ -0,0 +1,111 @@ +""" +MCP (Model Context Protocol) Integration Module +================================================ + +Provides AI models with context about user data and preferences +via Supabase. Extracted from core.supabase_integration to separate +MCP concerns from the base database client. +""" + +import logging +from typing import Any + +from postgrest.exceptions import APIError + +from .exceptions import DatabaseError + +logger = logging.getLogger(__name__) + + +class MCPSupabaseIntegration: + """ + Model Context Protocol integration for Supabase + Provides AI models with context about user data and preferences + """ + + def __init__(self, supabase_client): + self.db = supabase_client + + def get_user_context(self, user_id: int) -> dict[str, Any]: + """Get comprehensive user context for AI models""" + try: + # Get user profile + user = self.db.get_user(user_id) + if not user: + return {} + + # Get user's knowledge base + knowledge_base = self.db.get_user_knowledge_base(user_id) + + # Get user's custom prompts + custom_prompts = self.db.get_user_prompts(user_id) + + # Get recent content history (for style learning) + recent_content = self.db.get_user_content(user_id, limit=10) + + # Get user analytics + analytics = self.db.get_user_analytics(user_id, days=30) + + context = { + "user_profile": { + "subscription_tier": user.get("subscription_tier", "free"), + "usage_quota": user.get("usage_quota", 60), + "usage_current": user.get("usage_current", 0), + "created_at": user.get("created_at"), + }, + "knowledge_base": knowledge_base, + "custom_prompts": custom_prompts, + "content_history": recent_content, + "analytics": analytics, + "preferences": { + "preferred_ai_provider": analytics.get("ai_providers_used", ["openai"])[0] + if analytics.get("ai_providers_used") + else "openai", + "most_used_model": analytics.get("most_used_model", "gpt-3.5-turbo"), + }, + } + + return context + except APIError as e: + logger.error(f"Error getting user context for MCP: {e}") + return {} + except DatabaseError as e: + logger.error(f"Unexpected error getting user context for MCP: {e}") + return {} + + def update_context_from_interaction(self, user_id: int, interaction_data: dict[str, Any]) -> bool: + """Update user context based on AI interaction results""" + try: + # Log the interaction + self.db.log_pipeline_execution(user_id, interaction_data) + + # Update usage if provided + if "duration_seconds" in interaction_data: + self.db.update_user_usage(user_id, interaction_data["duration_seconds"]) + + # Save generated content if provided + if "content" in interaction_data: + self.db.save_content(user_id, interaction_data["content"]) + + return True + except APIError as e: + logger.error(f"Error updating context from interaction: {e}") + return False + except DatabaseError as e: + logger.error(f"Unexpected error updating context from interaction: {e}") + return False + + +# Global instance +_mcp_integration = None + + +def get_mcp_integration() -> MCPSupabaseIntegration: + """Get or create MCP integration instance""" + global _mcp_integration + if _mcp_integration is None: + # Lazy import to avoid circular imports + from .supabase_integration import get_supabase_client + + _mcp_integration = MCPSupabaseIntegration(get_supabase_client()) + return _mcp_integration diff --git a/core/supabase_integration.py b/core/supabase_integration.py index 9258c38..514942e 100644 --- a/core/supabase_integration.py +++ b/core/supabase_integration.py @@ -16,6 +16,7 @@ from supabase import Client, create_client from .constants import DEFAULT_ANALYTICS_DAYS, DEFAULT_CONTENT_QUERY_LIMIT, DEFAULT_USAGE_QUOTA_MINUTES +from .exceptions import DatabaseError from .utils import hash_password # Load environment variables @@ -60,7 +61,7 @@ def test_connection(self) -> bool: except APIError as e: logger.error(f"Supabase connection test failed: {e}") return False - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error in Supabase connection test: {e}") return False @@ -90,7 +91,7 @@ def create_user(self, email: str, password: str, metadata: dict[str, Any] = None except APIError as e: logger.error(f"Error creating user: {e}") raise - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error creating user: {e}") raise @@ -102,7 +103,7 @@ def get_user(self, user_id: int) -> dict[str, Any] | None: except APIError as e: logger.error(f"Error fetching user: {e}") return None - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error fetching user: {e}") return None @@ -114,7 +115,7 @@ def get_user_by_email(self, email: str) -> dict[str, Any] | None: except APIError as e: logger.error(f"Error fetching user by email: {e}") return None - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error fetching user by email: {e}") return None @@ -130,7 +131,7 @@ def update_user_usage(self, user_id: int, usage_seconds: int) -> bool: except APIError as e: logger.error(f"Error updating user usage: {e}") return False - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error updating user usage: {e}") return False @@ -162,7 +163,7 @@ def save_content(self, user_id: int, content_data: dict[str, Any]) -> str | None except APIError as e: logger.error(f"Error saving content: {e}") return None - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error saving content: {e}") return None @@ -181,7 +182,7 @@ def get_user_content(self, user_id: int, limit: int = DEFAULT_CONTENT_QUERY_LIMI except APIError as e: logger.error(f"Error fetching user content: {e}") return [] - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error fetching user content: {e}") return [] @@ -201,7 +202,7 @@ def save_user_api_keys(self, user_id: int, api_keys: dict[str, str]) -> bool: except APIError as e: logger.error(f"Error saving API keys: {e}") return False - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error saving API keys: {e}") return False @@ -215,7 +216,7 @@ def get_user_api_keys(self, user_id: int) -> dict[str, str]: except APIError as e: logger.error(f"Error fetching API keys: {e}") return {} - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error fetching API keys: {e}") return {} @@ -257,7 +258,7 @@ def save_knowledge_base_file(self, user_id: int, filename: str, content: str) -> except APIError as e: logger.error(f"Error saving knowledge base file: {e}") return False - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error saving knowledge base file: {e}") return False @@ -276,7 +277,7 @@ def get_user_knowledge_base(self, user_id: int) -> dict[str, str]: except APIError as e: logger.error(f"Error fetching knowledge base: {e}") return {} - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error fetching knowledge base: {e}") return {} @@ -318,7 +319,7 @@ def save_custom_prompt(self, user_id: int, prompt_type: str, content: str) -> bo except APIError as e: logger.error(f"Error saving custom prompt: {e}") return False - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error saving custom prompt: {e}") return False @@ -335,7 +336,7 @@ def get_user_prompts(self, user_id: int) -> dict[str, str]: except APIError as e: logger.error(f"Error fetching custom prompts: {e}") return {} - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error fetching custom prompts: {e}") return {} @@ -360,7 +361,7 @@ def log_pipeline_execution(self, user_id: int, pipeline_data: dict[str, Any]) -> except APIError as e: logger.error(f"Error logging pipeline execution: {e}") return False - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error logging pipeline execution: {e}") return False @@ -392,7 +393,7 @@ def get_user_analytics(self, user_id: int, days: int = DEFAULT_ANALYTICS_DAYS) - except APIError as e: logger.error(f"Error fetching user analytics: {e}") return {} - except Exception as e: + except DatabaseError as e: logger.error(f"Unexpected error fetching user analytics: {e}") return {} @@ -407,89 +408,8 @@ def _get_most_frequent(self, logs: list[dict], field: str) -> str: return Counter(values).most_common(1)[0][0] -# MCP Integration Functions -class MCPSupabaseIntegration: - """ - Model Context Protocol integration for Supabase - Provides AI models with context about user data and preferences - """ - - def __init__(self, supabase_client: SupabaseClient): - self.db = supabase_client - - def get_user_context(self, user_id: int) -> dict[str, Any]: - """Get comprehensive user context for AI models""" - try: - # Get user profile - user = self.db.get_user(user_id) - if not user: - return {} - - # Get user's knowledge base - knowledge_base = self.db.get_user_knowledge_base(user_id) - - # Get user's custom prompts - custom_prompts = self.db.get_user_prompts(user_id) - - # Get recent content history (for style learning) - recent_content = self.db.get_user_content(user_id, limit=10) - - # Get user analytics - analytics = self.db.get_user_analytics(user_id, days=30) - - context = { - "user_profile": { - "subscription_tier": user.get("subscription_tier", "free"), - "usage_quota": user.get("usage_quota", 60), - "usage_current": user.get("usage_current", 0), - "created_at": user.get("created_at"), - }, - "knowledge_base": knowledge_base, - "custom_prompts": custom_prompts, - "content_history": recent_content, - "analytics": analytics, - "preferences": { - "preferred_ai_provider": analytics.get("ai_providers_used", ["openai"])[0] - if analytics.get("ai_providers_used") - else "openai", - "most_used_model": analytics.get("most_used_model", "gpt-3.5-turbo"), - }, - } - - return context - except APIError as e: - logger.error(f"Error getting user context for MCP: {e}") - return {} - except Exception as e: - logger.error(f"Unexpected error getting user context for MCP: {e}") - return {} - - def update_context_from_interaction(self, user_id: int, interaction_data: dict[str, Any]) -> bool: - """Update user context based on AI interaction results""" - try: - # Log the interaction - self.db.log_pipeline_execution(user_id, interaction_data) - - # Update usage if provided - if "duration_seconds" in interaction_data: - self.db.update_user_usage(user_id, interaction_data["duration_seconds"]) - - # Save generated content if provided - if "content" in interaction_data: - self.db.save_content(user_id, interaction_data["content"]) - - return True - except APIError as e: - logger.error(f"Error updating context from interaction: {e}") - return False - except Exception as e: - logger.error(f"Unexpected error updating context from interaction: {e}") - return False - - # Global instance _supabase_client = None -_mcp_integration = None def get_supabase_client() -> SupabaseClient: @@ -500,9 +420,12 @@ def get_supabase_client() -> SupabaseClient: return _supabase_client -def get_mcp_integration() -> MCPSupabaseIntegration: - """Get or create MCP integration instance""" - global _mcp_integration - if _mcp_integration is None: - _mcp_integration = MCPSupabaseIntegration(get_supabase_client()) - return _mcp_integration +# Re-exports for backward compatibility (extracted to core.mcp_integration) +from .mcp_integration import MCPSupabaseIntegration, get_mcp_integration # noqa: E402 + +__all__ = [ + "SupabaseClient", + "get_supabase_client", + "MCPSupabaseIntegration", + "get_mcp_integration", +] From 4c26de3ae04bc94c30ec523c7da88b884520f5c3 Mon Sep 17 00:00:00 2001 From: MrScripty Date: Thu, 19 Feb 2026 20:01:28 -0800 Subject: [PATCH 46/46] refactor(core): route API key handling through auth persistence layer Settings page now reads keys from auth session cache when authenticated (falling back to os.environ for unauthenticated/local use) and persists updates via auth_wrapper.update_api_key() to the database, instead of only setting os.environ directly. Co-Authored-By: Claude Opus 4.6 --- core/pages.py | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/core/pages.py b/core/pages.py index e8d5596..aef222a 100644 --- a/core/pages.py +++ b/core/pages.py @@ -10,12 +10,31 @@ import streamlit as st from postgrest.exceptions import APIError +from .auth_wrapper import get_auth from .content_display import create_enhanced_aurora_content_card from .exceptions import APIClientError, DatabaseError, FileProcessingError from .supabase_integration import get_supabase_client from .utils import safe_path +def _get_api_key(env_var: str) -> str: + """Get API key from auth session cache (if authenticated) or environment.""" + auth = get_auth() + if auth.is_authenticated(): + stored = auth.get_api_keys() + if env_var in stored: + return stored[env_var] + return os.getenv(env_var, "") + + +def _set_api_key(env_var: str, value: str) -> None: + """Persist API key via auth wrapper and set in environment for SDK clients.""" + os.environ[env_var] = value + auth = get_auth() + if auth.is_authenticated(): + auth.update_api_key(env_var, value) + + def show_settings_page(): """Settings and configuration page.""" st.markdown("### Settings & Configuration") @@ -46,10 +65,10 @@ def show_settings_page(): with col1: st.markdown("**OpenAI**") openai_key = st.text_input( - "OpenAI API Key", type="password", value=os.getenv("OPENAI_API_KEY", ""), help="Your OpenAI API key" + "OpenAI API Key", type="password", value=_get_api_key("OPENAI_API_KEY"), help="Your OpenAI API key" ) if openai_key: - os.environ["OPENAI_API_KEY"] = openai_key + _set_api_key("OPENAI_API_KEY", openai_key) st.success("OpenAI key configured") openai_model = st.selectbox( @@ -64,11 +83,11 @@ def show_settings_page(): anthropic_key = st.text_input( "Anthropic API Key", type="password", - value=os.getenv("ANTHROPIC_API_KEY", ""), + value=_get_api_key("ANTHROPIC_API_KEY"), help="Your Anthropic API key", ) if anthropic_key: - os.environ["ANTHROPIC_API_KEY"] = anthropic_key + _set_api_key("ANTHROPIC_API_KEY", anthropic_key) st.success("Anthropic key configured") anthropic_model = st.selectbox( @@ -83,11 +102,11 @@ def show_settings_page(): grok_key = st.text_input( "Grok API Key", type="password", - value=os.getenv("GROK_API_KEY", ""), + value=_get_api_key("GROK_API_KEY"), help="Your xAI / Grok API key", ) if grok_key: - os.environ["GROK_API_KEY"] = grok_key + _set_api_key("GROK_API_KEY", grok_key) st.success("Grok key configured") grok_model = st.selectbox( @@ -102,15 +121,15 @@ def show_settings_page(): notion_key = st.text_input( "Notion API Key", type="password", - value=os.getenv("NOTION_API_KEY", ""), + value=_get_api_key("NOTION_API_KEY"), help="Your Notion integration token", ) if notion_key: - os.environ["NOTION_API_KEY"] = notion_key + _set_api_key("NOTION_API_KEY", notion_key) - notion_db = st.text_input("Notion Database ID", value=os.getenv("NOTION_DATABASE_ID", "")) + notion_db = st.text_input("Notion Database ID", value=_get_api_key("NOTION_DATABASE_ID")) if notion_db: - os.environ["NOTION_DATABASE_ID"] = notion_db + _set_api_key("NOTION_DATABASE_ID", notion_db) if notion_key and notion_db: st.success("Notion configured")