diff --git a/.jules/bolt.md b/.jules/bolt.md index 956273fc..5dc1ba63 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -89,3 +89,7 @@ ## 2026-05-18 - Jaccard Similarity Optimization via Set Arithmetic **Learning:** In retrieval loops calculating Jaccard similarity (e.g. RAG), explicitly building a union set `A.union(B)` is expensive due to memory allocation and population. **Action:** Use the inclusion-exclusion principle $|A \cup B| = |A| + |B| - |A \cap B|$ to calculate union size in O(1) arithmetic time after calculating the intersection. Pre-calculate $|B|$ (token count) to further reduce overhead. Use `isdisjoint()` for fast early-exit. + +## 2026-05-18 - Aggregation Query Optimization +**Learning:** In SQLite/Postgres workloads, when aggregating counts over categorical columns (like `confirmation_type`), a standard `GROUP BY` query is measurably faster than selecting multiple `func.sum(case(...))` statements. The `case` statements require evaluating the condition for every row across all requested columns, whereas `GROUP BY` utilizes the database's grouping engine more efficiently. Additionally, placing inline imports inside frequently called functions (like `from sqlalchemy import case`) adds recurring overhead on every request. +**Action:** Prefer `GROUP BY` for simple categorical aggregations instead of building complex `sum(case())` constructs. Always move inline imports to the top of the file to avoid execution overhead during hot paths. diff --git a/backend/closure_service.py b/backend/closure_service.py index f4ecf984..e7328161 100644 --- a/backend/closure_service.py +++ b/backend/closure_service.py @@ -139,14 +139,15 @@ def check_and_finalize_closure(grievance_id: int, db: Session) -> dict: ).scalar() # Get all confirmation counts in a single query instead of multiple round-trips - from sqlalchemy import case - stats = db.query( - func.sum(case((ClosureConfirmation.confirmation_type == 'confirmed', 1), else_=0)).label('confirmed'), - func.sum(case((ClosureConfirmation.confirmation_type == 'disputed', 1), else_=0)).label('disputed') - ).filter(ClosureConfirmation.grievance_id == grievance_id).first() - - confirmations_count = stats.confirmed or 0 - disputes_count = stats.disputed or 0 + # Optimized: Group by is faster than sum(case()) in SQLite/Postgres for this workload + counts = db.query( + ClosureConfirmation.confirmation_type, + func.count(ClosureConfirmation.id) + ).filter(ClosureConfirmation.grievance_id == grievance_id).group_by(ClosureConfirmation.confirmation_type).all() + + counts_dict = {ctype: count for ctype, count in counts} + confirmations_count = counts_dict.get("confirmed", 0) + disputes_count = counts_dict.get("disputed", 0) required_confirmations = max(1, int(total_followers * ClosureService.CONFIRMATION_THRESHOLD)) diff --git a/backend/routers/grievances.py b/backend/routers/grievances.py index 9aa24312..6f4c66a1 100644 --- a/backend/routers/grievances.py +++ b/backend/routers/grievances.py @@ -435,14 +435,15 @@ def get_closure_status( ).scalar() # Get all confirmation counts in a single query instead of multiple round-trips - from sqlalchemy import case - stats = db.query( - func.sum(case((ClosureConfirmation.confirmation_type == 'confirmed', 1), else_=0)).label('confirmed'), - func.sum(case((ClosureConfirmation.confirmation_type == 'disputed', 1), else_=0)).label('disputed') - ).filter(ClosureConfirmation.grievance_id == grievance_id).first() + # Optimized: Group by is faster than sum(case()) in SQLite/Postgres for this workload + counts = db.query( + ClosureConfirmation.confirmation_type, + func.count(ClosureConfirmation.id) + ).filter(ClosureConfirmation.grievance_id == grievance_id).group_by(ClosureConfirmation.confirmation_type).all() - confirmations_count = stats.confirmed or 0 - disputes_count = stats.disputed or 0 + counts_dict = {ctype: count for ctype, count in counts} + confirmations_count = counts_dict.get("confirmed", 0) + disputes_count = counts_dict.get("disputed", 0) required_confirmations = max(1, int(total_followers * ClosureService.CONFIRMATION_THRESHOLD))