From 95d1ecf77b9ca26830e9b214a1c01134ed9efb83 Mon Sep 17 00:00:00 2001 From: Milos Marinkovic Date: Tue, 14 Apr 2026 22:20:37 +0200 Subject: [PATCH 1/3] Make scheduled data cleanups broader --- src/db/crud/chat_message.py | 8 ++ src/db/crud/chat_message_attachment.py | 13 +++ src/db/crud/price_alert.py | 8 ++ src/db/crud/sponsorship.py | 9 +++ src/db/crud/tools_cache.py | 2 +- src/di/di.py | 10 +++ .../accounting/usage/usage_record_repo.py | 7 ++ src/features/cleanup/cleanup_service.py | 63 +++++++++++++++ src/main.py | 12 +-- src/util/config.py | 9 +++ test/db/crud/test_chat_message.py | 42 ++++++++++ test/db/crud/test_chat_message_attachment.py | 44 ++++++++++ test/db/crud/test_price_alert.py | 37 +++++++++ test/db/crud/test_sponsorship.py | 41 +++++++++- test/db/crud/test_tools_cache_crud.py | 8 +- .../usage/test_usage_record_repo.py | 24 +++++- test/features/cleanup/test_cleanup_service.py | 81 +++++++++++++++++++ test/util/test_config.py | 9 +++ 18 files changed, 413 insertions(+), 14 deletions(-) create mode 100644 src/features/cleanup/cleanup_service.py create mode 100644 test/features/cleanup/test_cleanup_service.py diff --git a/src/db/crud/chat_message.py b/src/db/crud/chat_message.py index 21be290c..49452715 100644 --- a/src/db/crud/chat_message.py +++ b/src/db/crud/chat_message.py @@ -1,3 +1,4 @@ +from datetime import datetime from uuid import UUID from sqlalchemy import desc @@ -64,3 +65,10 @@ def delete(self, chat_id: UUID, message_id: str) -> ChatMessageDB | None: self._db.delete(chat_message) self._db.commit() return chat_message + + def delete_older_than(self, cutoff: datetime) -> int: + deleted = self._db.query(ChatMessageDB).filter( + ChatMessageDB.sent_at < cutoff, + ).delete(synchronize_session = False) + self._db.commit() + return deleted diff --git a/src/db/crud/chat_message_attachment.py b/src/db/crud/chat_message_attachment.py index a7cc2540..837a3636 100644 --- a/src/db/crud/chat_message_attachment.py +++ b/src/db/crud/chat_message_attachment.py @@ -1,7 +1,10 @@ +from datetime import datetime from uuid import UUID +from sqlalchemy import select, tuple_ from sqlalchemy.orm import Session +from db.model.chat_message import ChatMessageDB from db.model.chat_message_attachment import ChatMessageAttachmentDB from db.schema.chat_message_attachment import ChatMessageAttachmentSave from util.functions import generate_short_uuid @@ -67,3 +70,13 @@ def delete(self, attachment_id: str) -> ChatMessageAttachmentDB | None: self._db.delete(attachment) self._db.commit() return attachment + + def delete_by_old_messages(self, cutoff: datetime) -> int: + old_message_pairs = select(ChatMessageDB.chat_id, ChatMessageDB.message_id).where( + ChatMessageDB.sent_at < cutoff, + ) + deleted = self._db.query(ChatMessageAttachmentDB).filter( + tuple_(ChatMessageAttachmentDB.chat_id, ChatMessageAttachmentDB.message_id).in_(old_message_pairs), + ).delete(synchronize_session = False) + self._db.commit() + return deleted diff --git a/src/db/crud/price_alert.py b/src/db/crud/price_alert.py index 1357e852..117fb7da 100644 --- a/src/db/crud/price_alert.py +++ b/src/db/crud/price_alert.py @@ -1,3 +1,4 @@ +from datetime import datetime from uuid import UUID from sqlalchemy.orm import Session @@ -58,3 +59,10 @@ def delete(self, chat_id: UUID, base_currency: str, desired_currency: str) -> Pr self._db.delete(price_alert) self._db.commit() return price_alert + + def delete_stale(self, cutoff: datetime) -> int: + deleted = self._db.query(PriceAlertDB).filter( + PriceAlertDB.last_price_time < cutoff, + ).delete(synchronize_session = False) + self._db.commit() + return deleted diff --git a/src/db/crud/sponsorship.py b/src/db/crud/sponsorship.py index eae07b09..4e2b20b4 100644 --- a/src/db/crud/sponsorship.py +++ b/src/db/crud/sponsorship.py @@ -1,3 +1,4 @@ +from datetime import datetime from uuid import UUID from sqlalchemy.orm import Session @@ -70,3 +71,11 @@ def delete_all_by_receiver(self, receiver_id: UUID) -> int: ).delete(synchronize_session = False) # optimizes by assuming no other session will use these objects self._db.commit() return result + + def delete_unaccepted_older_than(self, cutoff: datetime) -> int: + deleted = self._db.query(SponsorshipDB).filter( + SponsorshipDB.accepted_at.is_(None), + SponsorshipDB.sponsored_at < cutoff, + ).delete(synchronize_session = False) + self._db.commit() + return deleted diff --git a/src/db/crud/tools_cache.py b/src/db/crud/tools_cache.py index b119b1e7..5ec02ac8 100644 --- a/src/db/crud/tools_cache.py +++ b/src/db/crud/tools_cache.py @@ -58,7 +58,7 @@ def delete(self, key: str) -> ToolsCacheDB | None: def delete_expired(self) -> int: expired = self._db.query(ToolsCacheDB).filter( ToolsCacheDB.expires_at < datetime.now(), - ).delete() + ).delete(synchronize_session = False) self._db.commit() return expired diff --git a/src/di/di.py b/src/di/di.py index f581a971..181d123f 100644 --- a/src/di/di.py +++ b/src/di/di.py @@ -68,6 +68,7 @@ from features.chat.whatsapp.sdk.whatsapp_bot_sdk import WhatsAppBotSDK from features.chat.whatsapp.whatsapp_data_resolver import WhatsAppDataResolver from features.chat.whatsapp.whatsapp_domain_mapper import WhatsAppDomainMapper + from features.cleanup.cleanup_service import CleanupService from features.connect.profile_connect_service import ProfileConnectService from features.currencies.exchange_rate_fetcher import ExchangeRateFetcher from features.documents.document_search import DocumentSearch @@ -115,6 +116,7 @@ class DI: _usage_record_repo: "UsageRecordRepository | None" _purchase_record_repo: "PurchaseRecordRepository | None" # Services + _cleanup_service: "CleanupService | None" _sponsorship_service: "SponsorshipService | None" _credit_transfer_service: "CreditTransferService | None" _profile_connect_service: "ProfileConnectService | None" @@ -171,6 +173,7 @@ def __init__( self._usage_record_repo = None self._purchase_record_repo = None # Services + self._cleanup_service = None self._sponsorship_service = None self._credit_transfer_service = None self._profile_connect_service = None @@ -394,6 +397,13 @@ def purchase_record_repo(self) -> "PurchaseRecordRepository": # === Services === + @property + def cleanup_service(self) -> "CleanupService": + if self._cleanup_service is None: + from features.cleanup.cleanup_service import CleanupService + self._cleanup_service = CleanupService(self) + return self._cleanup_service + @property def sponsorship_service(self) -> "SponsorshipService": if self._sponsorship_service is None: diff --git a/src/features/accounting/usage/usage_record_repo.py b/src/features/accounting/usage/usage_record_repo.py index 1a35e27d..bb841f7e 100644 --- a/src/features/accounting/usage/usage_record_repo.py +++ b/src/features/accounting/usage/usage_record_repo.py @@ -228,3 +228,10 @@ def _build_user_query( base_query = base_query.filter(UsageRecordDB.timestamp <= end_date) return base_query + + def delete_older_than(self, cutoff: datetime) -> int: + deleted = self._db.query(UsageRecordDB).filter( + UsageRecordDB.timestamp < cutoff, + ).delete(synchronize_session = False) + self._db.commit() + return deleted diff --git a/src/features/cleanup/cleanup_service.py b/src/features/cleanup/cleanup_service.py new file mode 100644 index 00000000..94356f64 --- /dev/null +++ b/src/features/cleanup/cleanup_service.py @@ -0,0 +1,63 @@ +from dataclasses import dataclass, field +from datetime import datetime, timedelta + +from util import log +from util.config import config + + +@dataclass +class CleanupResult: + attachments_deleted: int = field(default = 0) + messages_deleted: int = field(default = 0) + cache_entries_cleared: int = field(default = 0) + usage_records_deleted: int = field(default = 0) + price_alerts_deleted: int = field(default = 0) + sponsorships_deleted: int = field(default = 0) + + +class CleanupService: + + def __init__(self, di): + self.__di = di + + def run(self) -> CleanupResult: + result = CleanupResult() + log.i("Cleanup starting...") + + message_cutoff = datetime.now() - timedelta(days = config.cleanup_message_retention_days) + try: + result.attachments_deleted = self.__di.chat_message_attachment_crud.delete_by_old_messages(message_cutoff) + log.i(f" Cleanup phase 1A: deleted {result.attachments_deleted} attachments") + result.messages_deleted = self.__di.chat_message_crud.delete_older_than(message_cutoff) + log.i(f" Cleanup phase 1B: deleted {result.messages_deleted} messages") + except Exception as e: + log.e(f" Cleanup phase 1 (messages / attachments) failed: {e}") + + try: + result.cache_entries_cleared = self.__di.tools_cache_crud.delete_expired() + log.i(f" Cleanup phase 2: cleared {result.cache_entries_cleared} cache entries") + except Exception as e: + log.e(f" Cleanup phase 2 (cache) failed: {e}") + + try: + result.usage_records_deleted = self.__di.usage_record_repo.delete_older_than(message_cutoff) + log.i(f" Cleanup phase 3: deleted {result.usage_records_deleted} usage records") + except Exception as e: + log.e(f" Cleanup phase 3 (usage records) failed: {e}") + + try: + price_alert_cutoff = datetime.now() - timedelta(days = config.cleanup_price_alert_staleness_days) + result.price_alerts_deleted = self.__di.price_alert_crud.delete_stale(price_alert_cutoff) + log.i(f" Cleanup phase 4: deleted {result.price_alerts_deleted} price alerts") + except Exception as e: + log.e(f" Cleanup phase 4 (price alerts) failed: {e}") + + try: + sponsorship_cutoff = datetime.now() - timedelta(days = config.cleanup_sponsorship_staleness_days) + result.sponsorships_deleted = self.__di.sponsorship_crud.delete_unaccepted_older_than(sponsorship_cutoff) + log.i(f" Cleanup phase 5: deleted {result.sponsorships_deleted} sponsorships") + except Exception as e: + log.e(f" Cleanup phase 5 (sponsorships) failed: {e}") + + log.i("Cleanup completed.") + return result diff --git a/src/main.py b/src/main.py index 5c8c5d91..050dbc61 100644 --- a/src/main.py +++ b/src/main.py @@ -1,3 +1,4 @@ +import dataclasses import multiprocessing import os import random @@ -185,14 +186,15 @@ def notify_of_release( return respond_with_summary(payload, di) -@app.post("/task/clear-expired-cache") -def clear_expired_cache( +@app.post("/task/cleanup") +def cleanup( db = Depends(get_session), _ = Depends(verify_api_key), ) -> dict: - cleared_count = DI(db).tools_cache_crud.delete_expired() - log.i(f"Cleared expired cache entries: {cleared_count}") - return {"cleared_entries_count": cleared_count} + result = DI(db).cleanup_service.run() + result_map = dataclasses.asdict(result) + log.i("Responding to cleanup request", result_map) + return result_map @app.get("/settings/{settings_type}/{resource_id}") diff --git a/src/util/config.py b/src/util/config.py index 04a26aaa..576b494c 100644 --- a/src/util/config.py +++ b/src/util/config.py @@ -51,6 +51,9 @@ class Config(metaclass = Singleton): whatsapp_bot_phone_number: str chat_history_depth: int chat_debounce_delay_s: float + cleanup_message_retention_days: int + cleanup_price_alert_staleness_days: int + cleanup_sponsorship_staleness_days: int github_issues_repo: str issue_templates_abs_path: str jwt_expires_in_minutes: int @@ -147,6 +150,9 @@ def __init__( def_whatsapp_bot_phone_number: str = "11234567890", def_chat_history_depth: int = 30, def_chat_debounce_delay_s: float = 1.0, + def_cleanup_message_retention_days: int = 30, + def_cleanup_price_alert_staleness_days: int = 360, + def_cleanup_sponsorship_staleness_days: int = 30, def_github_issues_repo: str = "appifyhub/agent-backend", def_issue_templates_path: str = ".github/ISSUE_TEMPLATE", def_jwt_expires_in_minutes: int = 30, @@ -215,6 +221,9 @@ def __init__( self.whatsapp_bot_phone_number = self.__env("WHATSAPP_BOT_PHONE_NUMBER", lambda: def_whatsapp_bot_phone_number) self.chat_history_depth = int(self.__env("CHAT_HISTORY_DEPTH", lambda: str(def_chat_history_depth))) self.chat_debounce_delay_s = float(self.__env("CHAT_DEBOUNCE_DELAY_S", lambda: str(def_chat_debounce_delay_s))) + self.cleanup_message_retention_days = int(self.__env("CLEANUP_MESSAGE_RETENTION_DAYS", lambda: str(def_cleanup_message_retention_days))) + self.cleanup_price_alert_staleness_days = int(self.__env("CLEANUP_PRICE_ALERT_STALENESS_DAYS", lambda: str(def_cleanup_price_alert_staleness_days))) + self.cleanup_sponsorship_staleness_days = int(self.__env("CLEANUP_SPONSORSHIP_STALENESS_DAYS", lambda: str(def_cleanup_sponsorship_staleness_days))) self.github_issues_repo = self.__env("THE_AGENT_ISSUES_REPO", lambda: def_github_issues_repo) self.issue_templates_abs_path = self.__env("THE_AGENT_ISSUE_TEMPLATES_PATH", lambda: def_issue_templates_path) self.jwt_expires_in_minutes = int(self.__env("JWT_EXPIRES_IN_MINUTES", lambda: str(def_jwt_expires_in_minutes))) diff --git a/test/db/crud/test_chat_message.py b/test/db/crud/test_chat_message.py index 2ef53c08..8deb6519 100644 --- a/test/db/crud/test_chat_message.py +++ b/test/db/crud/test_chat_message.py @@ -303,3 +303,45 @@ def test_delete_chat_message(self): self.assertIsNone( self.sql.chat_message_crud().get(created_chat_message.chat_id, created_chat_message.message_id), ) + + def test_delete_older_than(self): + chat = self.sql.chat_config_crud().create( + ChatConfigSave(external_id = "chat1", chat_type = ChatConfigDB.ChatType.telegram), + ) + user = self.sql.user_crud().create( + UserSave( + full_name = "Test User", + telegram_username = "test-user", + telegram_chat_id = "123456", + telegram_user_id = 123456, + open_ai_key = SecretStr("test-key"), + group = UserDB.Group.standard, + ), + ) + old_message = self.sql.chat_message_crud().create( + ChatMessageSave( + chat_id = chat.chat_id, + message_id = "old_msg", + author_id = user.id, + sent_at = datetime.now() - timedelta(days = 31), + text = "Old message", + ), + ) + recent_message = self.sql.chat_message_crud().create( + ChatMessageSave( + chat_id = chat.chat_id, + message_id = "recent_msg", + author_id = user.id, + sent_at = datetime.now(), + text = "Recent message", + ), + ) + old_chat_id, old_message_id = old_message.chat_id, old_message.message_id + recent_chat_id, recent_message_id = recent_message.chat_id, recent_message.message_id + cutoff = datetime.now() - timedelta(days = 30) + + deleted_count = self.sql.chat_message_crud().delete_older_than(cutoff) + + self.assertEqual(deleted_count, 1) + self.assertIsNone(self.sql.chat_message_crud().get(old_chat_id, old_message_id)) + self.assertIsNotNone(self.sql.chat_message_crud().get(recent_chat_id, recent_message_id)) diff --git a/test/db/crud/test_chat_message_attachment.py b/test/db/crud/test_chat_message_attachment.py index 6aff7b3d..572b004c 100644 --- a/test/db/crud/test_chat_message_attachment.py +++ b/test/db/crud/test_chat_message_attachment.py @@ -1,4 +1,5 @@ import unittest +from datetime import datetime, timedelta from uuid import UUID from db.sql_util import SQLUtil @@ -464,3 +465,46 @@ def test_integration_id_and_external_id_relationship(self): # Both lookups should return the same record self.assertEqual(by_id.id, "custom_id_123") self.assertEqual(by_id.external_id, "telegram_external_456") + + def test_delete_by_old_messages(self): + chat = self.sql.chat_config_crud().create( + ChatConfigSave(external_id = "chat1", chat_type = ChatConfigDB.ChatType.telegram), + ) + old_message = self.sql.chat_message_crud().create( + ChatMessageSave( + chat_id = chat.chat_id, + message_id = "old_msg", + sent_at = datetime.now() - timedelta(days = 31), + text = "Old", + ), + ) + recent_message = self.sql.chat_message_crud().create( + ChatMessageSave( + chat_id = chat.chat_id, + message_id = "recent_msg", + sent_at = datetime.now(), + text = "Recent", + ), + ) + old_attachment = self.sql.chat_message_attachment_crud().create( + ChatMessageAttachmentSave( + id = "attach_old", + chat_id = old_message.chat_id, + message_id = old_message.message_id, + ), + ) + recent_attachment = self.sql.chat_message_attachment_crud().create( + ChatMessageAttachmentSave( + id = "attach_recent", + chat_id = recent_message.chat_id, + message_id = recent_message.message_id, + ), + ) + old_attachment_id, recent_attachment_id = old_attachment.id, recent_attachment.id + cutoff = datetime.now() - timedelta(days = 30) + + deleted_count = self.sql.chat_message_attachment_crud().delete_by_old_messages(cutoff) + + self.assertEqual(deleted_count, 1) + self.assertIsNone(self.sql.chat_message_attachment_crud().get(old_attachment_id)) + self.assertIsNotNone(self.sql.chat_message_attachment_crud().get(recent_attachment_id)) diff --git a/test/db/crud/test_price_alert.py b/test/db/crud/test_price_alert.py index 20e54b86..a196af4e 100644 --- a/test/db/crud/test_price_alert.py +++ b/test/db/crud/test_price_alert.py @@ -266,3 +266,40 @@ def test_delete_price_alert(self): created_price_alert.desired_currency, ), ) + + def test_delete_stale(self): + chat = self.sql.chat_config_crud().create( + ChatConfigSave(external_id = "chat1", chat_type = ChatConfigDB.ChatType.telegram), + ) + user_id = self._create_test_user() + stale_alert = self.sql.price_alert_crud().create( + PriceAlertSave( + chat_id = chat.chat_id, + owner_id = user_id, + base_currency = "USD", + desired_currency = "EUR", + threshold_percent = 5, + last_price = 0.85, + last_price_time = datetime.now() - timedelta(days = 361), + ), + ) + fresh_alert = self.sql.price_alert_crud().create( + PriceAlertSave( + chat_id = chat.chat_id, + owner_id = user_id, + base_currency = "BTC", + desired_currency = "USD", + threshold_percent = 10, + last_price = 50000.0, + last_price_time = datetime.now(), + ), + ) + stale_chat_id, stale_base, stale_desired = stale_alert.chat_id, stale_alert.base_currency, stale_alert.desired_currency + fresh_chat_id, fresh_base, fresh_desired = fresh_alert.chat_id, fresh_alert.base_currency, fresh_alert.desired_currency + cutoff = datetime.now() - timedelta(days = 360) + + deleted_count = self.sql.price_alert_crud().delete_stale(cutoff) + + self.assertEqual(deleted_count, 1) + self.assertIsNone(self.sql.price_alert_crud().get(stale_chat_id, stale_base, stale_desired)) + self.assertIsNotNone(self.sql.price_alert_crud().get(fresh_chat_id, fresh_base, fresh_desired)) diff --git a/test/db/crud/test_sponsorship.py b/test/db/crud/test_sponsorship.py index 51a07acf..9a1ab1b3 100644 --- a/test/db/crud/test_sponsorship.py +++ b/test/db/crud/test_sponsorship.py @@ -1,8 +1,9 @@ import unittest -from datetime import datetime +from datetime import datetime, timedelta from db.sql_util import SQLUtil +from db.model.sponsorship import SponsorshipDB from db.schema.sponsorship import SponsorshipSave from db.schema.user import UserSave @@ -173,3 +174,41 @@ def test_delete_all_by_receiver(self): self.assertEqual(deleted_count, 2) remaining_sponsorships = self.sql.sponsorship_crud().get_all_by_receiver(receiver.id) self.assertEqual(len(remaining_sponsorships), 0) + + def test_delete_unaccepted_older_than(self): + sponsor = self.sql.user_crud().create(UserSave()) + receiver1 = self.sql.user_crud().create(UserSave()) + receiver2 = self.sql.user_crud().create(UserSave()) + receiver3 = self.sql.user_crud().create(UserSave()) + + old_unaccepted = self.sql.sponsorship_crud().create( + SponsorshipSave(sponsor_id = sponsor.id, receiver_id = receiver1.id), + ) + self.sql.sponsorship_crud().create( + SponsorshipSave(sponsor_id = sponsor.id, receiver_id = receiver2.id), + ) + old_accepted = self.sql.sponsorship_crud().create( + SponsorshipSave(sponsor_id = sponsor.id, receiver_id = receiver3.id, accepted_at = datetime.now()), + ) + + session = self.sql.get_session() + old_unaccepted_db = session.query(SponsorshipDB).filter( + SponsorshipDB.sponsor_id == old_unaccepted.sponsor_id, + SponsorshipDB.receiver_id == old_unaccepted.receiver_id, + ).first() + old_unaccepted_db.sponsored_at = datetime.now() - timedelta(days = 31) + old_accepted_db = session.query(SponsorshipDB).filter( + SponsorshipDB.sponsor_id == old_accepted.sponsor_id, + SponsorshipDB.receiver_id == old_accepted.receiver_id, + ).first() + old_accepted_db.sponsored_at = datetime.now() - timedelta(days = 31) + session.commit() + + cutoff = datetime.now() - timedelta(days = 30) + + deleted_count = self.sql.sponsorship_crud().delete_unaccepted_older_than(cutoff) + + self.assertEqual(deleted_count, 1) + self.assertIsNone(self.sql.sponsorship_crud().get(sponsor.id, receiver1.id)) + self.assertIsNotNone(self.sql.sponsorship_crud().get(sponsor.id, receiver2.id)) + self.assertIsNotNone(self.sql.sponsorship_crud().get(sponsor.id, receiver3.id)) diff --git a/test/db/crud/test_tools_cache_crud.py b/test/db/crud/test_tools_cache_crud.py index 05bcd721..babd0775 100644 --- a/test/db/crud/test_tools_cache_crud.py +++ b/test/db/crud/test_tools_cache_crud.py @@ -125,17 +125,17 @@ def test_delete_expired_tools_cache(self): ToolsCacheSave(key = "tool2", value = "value2", expires_at = datetime.now() - timedelta(days = 1)), ) + not_expired_key, expired_key = tools_cache_not_expired.key, tools_cache_expired.key + # Deleting expired cache entries count_deleted = self.sql.tools_cache_crud().delete_expired() self.assertEqual(count_deleted, 1) # Asserting non-expired entry still exists - fetched_non_expired = self.sql.tools_cache_crud().get(tools_cache_not_expired.key) - self.assertIsNotNone(fetched_non_expired) + self.assertIsNotNone(self.sql.tools_cache_crud().get(not_expired_key)) # Asserting expired entry does not exist - fetched_expired = self.sql.tools_cache_crud().get(tools_cache_expired.key) - self.assertIsNone(fetched_expired) + self.assertIsNone(self.sql.tools_cache_crud().get(expired_key)) def test_create_key(self): key = ToolsCacheCRUD.create_key("prefix", "identifier") diff --git a/test/features/accounting/usage/test_usage_record_repo.py b/test/features/accounting/usage/test_usage_record_repo.py index 4eaa3c4c..86514d4e 100644 --- a/test/features/accounting/usage/test_usage_record_repo.py +++ b/test/features/accounting/usage/test_usage_record_repo.py @@ -373,7 +373,9 @@ def test_get_by_user_only_transfers(self): def test_get_aggregates_includes_transfers_by_default(self): self.repo.create(self._create_record(tool = GPT_4O, tool_purpose = ToolType.chat, total_cost_credits = 10)) - self.repo.create(self._create_record(tool = TRANSFER_TOOL, tool_purpose = ToolType.credit_transfer, total_cost_credits = 50)) + self.repo.create(self._create_record( + tool = TRANSFER_TOOL, tool_purpose = ToolType.credit_transfer, total_cost_credits = 50, + )) stats = self.repo.get_aggregates_by_user(self.user.id) @@ -383,7 +385,9 @@ def test_get_aggregates_includes_transfers_by_default(self): def test_get_aggregates_exclude_transfers(self): self.repo.create(self._create_record(tool = GPT_4O, tool_purpose = ToolType.chat, total_cost_credits = 10)) - self.repo.create(self._create_record(tool = TRANSFER_TOOL, tool_purpose = ToolType.credit_transfer, total_cost_credits = 50)) + self.repo.create(self._create_record( + tool = TRANSFER_TOOL, tool_purpose = ToolType.credit_transfer, total_cost_credits = 50, + )) stats = self.repo.get_aggregates_by_user(self.user.id, include_transfers = False) @@ -393,7 +397,9 @@ def test_get_aggregates_exclude_transfers(self): def test_get_aggregates_only_transfers(self): self.repo.create(self._create_record(tool = GPT_4O, tool_purpose = ToolType.chat, total_cost_credits = 10)) - self.repo.create(self._create_record(tool = TRANSFER_TOOL, tool_purpose = ToolType.credit_transfer, total_cost_credits = 50)) + self.repo.create(self._create_record( + tool = TRANSFER_TOOL, tool_purpose = ToolType.credit_transfer, total_cost_credits = 50, + )) stats = self.repo.get_aggregates_by_user(self.user.id, only_transfers = True) @@ -471,3 +477,15 @@ def test_get_aggregates_includes_incoming_transfer_as_counterpart(self): self.assertEqual(stats.total_records, 2) self.assertEqual(stats.total_cost_credits, 35.0) + + def test_delete_older_than(self): + self.repo.create(self._create_record(timestamp = datetime.now(timezone.utc) - timedelta(days = 31))) + self.repo.create(self._create_record(timestamp = datetime.now(timezone.utc) - timedelta(days = 31))) + self.repo.create(self._create_record(timestamp = datetime.now(timezone.utc))) + cutoff = datetime.now(timezone.utc) - timedelta(days = 30) + + deleted_count = self.repo.delete_older_than(cutoff) + + self.assertEqual(deleted_count, 2) + remaining = self.repo.get_by_user(self.user.id) + self.assertEqual(len(remaining), 1) diff --git a/test/features/cleanup/test_cleanup_service.py b/test/features/cleanup/test_cleanup_service.py new file mode 100644 index 00000000..01bd02cc --- /dev/null +++ b/test/features/cleanup/test_cleanup_service.py @@ -0,0 +1,81 @@ +import unittest +from unittest.mock import MagicMock, patch + +from features.cleanup.cleanup_service import CleanupResult, CleanupService + + +class CleanupServiceTest(unittest.TestCase): + + def _make_service( + self, + attachments_deleted = 0, + messages_deleted = 0, + cache_cleared = 0, + usage_deleted = 0, + alerts_deleted = 0, + sponsorships_deleted = 0, + ) -> CleanupService: + di = MagicMock() + di.chat_message_attachment_crud.delete_by_old_messages.return_value = attachments_deleted + di.chat_message_crud.delete_older_than.return_value = messages_deleted + di.tools_cache_crud.delete_expired.return_value = cache_cleared + di.usage_record_repo.delete_older_than.return_value = usage_deleted + di.price_alert_crud.delete_stale.return_value = alerts_deleted + di.sponsorship_crud.delete_unaccepted_older_than.return_value = sponsorships_deleted + return CleanupService(di) + + @patch("features.cleanup.cleanup_service.log") + def test_run_returns_all_phase_counts(self, _): + service = self._make_service( + attachments_deleted = 5, + messages_deleted = 10, + cache_cleared = 3, + usage_deleted = 7, + alerts_deleted = 2, + sponsorships_deleted = 1, + ) + + result = service.run() + + self.assertIsInstance(result, CleanupResult) + self.assertEqual(result.attachments_deleted, 5) + self.assertEqual(result.messages_deleted, 10) + self.assertEqual(result.cache_entries_cleared, 3) + self.assertEqual(result.usage_records_deleted, 7) + self.assertEqual(result.price_alerts_deleted, 2) + self.assertEqual(result.sponsorships_deleted, 1) + + @patch("features.cleanup.cleanup_service.log") + def test_attachment_failure_skips_message_deletion(self, _): + service = self._make_service() + service._CleanupService__di.chat_message_attachment_crud.delete_by_old_messages.side_effect = RuntimeError("DB error") + + result = service.run() + + service._CleanupService__di.chat_message_crud.delete_older_than.assert_not_called() + self.assertEqual(result.attachments_deleted, 0) + self.assertEqual(result.messages_deleted, 0) + + @patch("features.cleanup.cleanup_service.log") + def test_phase_failure_does_not_block_subsequent_phases(self, _): + service = self._make_service( + cache_cleared = 3, + alerts_deleted = 2, + sponsorships_deleted = 1, + ) + service._CleanupService__di.usage_record_repo.delete_older_than.side_effect = RuntimeError("DB error") + + result = service.run() + + self.assertEqual(result.cache_entries_cleared, 3) + self.assertEqual(result.usage_records_deleted, 0) + self.assertEqual(result.price_alerts_deleted, 2) + self.assertEqual(result.sponsorships_deleted, 1) + + @patch("features.cleanup.cleanup_service.log") + def test_run_with_zero_deletions(self, _): + service = self._make_service() + + result = service.run() + + self.assertEqual(result, CleanupResult()) diff --git a/test/util/test_config.py b/test/util/test_config.py index 087c306f..25c2ad03 100644 --- a/test/util/test_config.py +++ b/test/util/test_config.py @@ -52,6 +52,9 @@ def test_default_config(self): self.assertEqual(config.whatsapp_bot_phone_number, "11234567890") self.assertEqual(config.chat_history_depth, 30) self.assertEqual(config.chat_debounce_delay_s, 1.0) + self.assertEqual(config.cleanup_message_retention_days, 30) + self.assertEqual(config.cleanup_price_alert_staleness_days, 360) + self.assertEqual(config.cleanup_sponsorship_staleness_days, 30) self.assertEqual(config.github_issues_repo, "appifyhub/agent-backend") self.assertEqual(config.issue_templates_abs_path, ".github/ISSUE_TEMPLATE") self.assertEqual(config.jwt_expires_in_minutes, 30) @@ -117,6 +120,9 @@ def test_custom_config(self): os.environ["WHATSAPP_BOT_PHONE_NUMBER"] = "19876543210" os.environ["CHAT_HISTORY_DEPTH"] = "10" os.environ["CHAT_DEBOUNCE_DELAY_S"] = "2.5" + os.environ["CLEANUP_MESSAGE_RETENTION_DAYS"] = "60" + os.environ["CLEANUP_PRICE_ALERT_STALENESS_DAYS"] = "180" + os.environ["CLEANUP_SPONSORSHIP_STALENESS_DAYS"] = "14" os.environ["THE_AGENT_ISSUES_REPO"] = "appifyhub/the-new-agent" os.environ["THE_AGENT_ISSUE_TEMPLATES_PATH"] = "issue_templates" os.environ["JWT_EXPIRES_IN_MINUTES"] = "10" @@ -186,6 +192,9 @@ def test_custom_config(self): self.assertEqual(config.whatsapp_bot_phone_number, "19876543210") self.assertEqual(config.chat_history_depth, 10) self.assertEqual(config.chat_debounce_delay_s, 2.5) + self.assertEqual(config.cleanup_message_retention_days, 60) + self.assertEqual(config.cleanup_price_alert_staleness_days, 180) + self.assertEqual(config.cleanup_sponsorship_staleness_days, 14) self.assertEqual(config.github_issues_repo, "appifyhub/the-new-agent") self.assertEqual(config.issue_templates_abs_path, "issue_templates") self.assertEqual(config.jwt_expires_in_minutes, 10) From 4de789dca71ebd6d7704ce601ed27596f1f90728 Mon Sep 17 00:00:00 2001 From: Milos Marinkovic Date: Tue, 14 Apr 2026 22:28:50 +0200 Subject: [PATCH 2/3] Update openspec docs --- .../2026-04-14-unified-cleanup/design.md | 59 +++++++++++++++ .../2026-04-14-unified-cleanup/proposal.md | 42 +++++++++++ .../specs/cleanup-service/spec.md | 75 +++++++++++++++++++ .../2026-04-14-unified-cleanup/tasks.md | 37 +++++++++ openspec/specs/binary-search-resize/spec.md | 63 ---------------- 5 files changed, 213 insertions(+), 63 deletions(-) create mode 100644 openspec/changes/archive/2026-04-14-unified-cleanup/design.md create mode 100644 openspec/changes/archive/2026-04-14-unified-cleanup/proposal.md create mode 100644 openspec/changes/archive/2026-04-14-unified-cleanup/specs/cleanup-service/spec.md create mode 100644 openspec/changes/archive/2026-04-14-unified-cleanup/tasks.md delete mode 100644 openspec/specs/binary-search-resize/spec.md diff --git a/openspec/changes/archive/2026-04-14-unified-cleanup/design.md b/openspec/changes/archive/2026-04-14-unified-cleanup/design.md new file mode 100644 index 00000000..212804f9 --- /dev/null +++ b/openspec/changes/archive/2026-04-14-unified-cleanup/design.md @@ -0,0 +1,59 @@ +## Context + +The backend currently has one cleanup endpoint (`POST /task/clear-expired-cache`) that calls `ToolsCacheCRUD.delete_expired()`. Other data — messages, attachments, usage records, price alerts, unaccepted sponsorships — has no expiry. All CRUD classes use individual `delete()` methods; only `ToolsCacheCRUD` has a bulk `delete_expired()`. + +The `chat_message_attachments` table has an FK to `chat_messages(chat_id, message_id)` with **no ON DELETE CASCADE**, meaning attachments must be deleted before their parent messages. + +Users are never deleted because their profiles carry phone number verification ("real human" proof) from Telegram/WhatsApp onboarding. + +## Goals / Non-Goals + +**Goals:** + +- Single `POST /task/cleanup` endpoint replaces `/task/clear-expired-cache` +- Configurable retention periods via environment variables +- Bulk deletes for performance (SQLAlchemy `query.filter(...).delete()`) +- Per-phase error isolation — one phase failing doesn't block others +- Attachment-then-message ordering to respect FK constraints + +**Non-Goals:** + +- User deletion (identity anchors) +- Notifications on cleanup (decided against — too noisy) +- Background/async processing (start synchronous, revisit if needed) +- Purchase record cleanup (financial audit trail, kept forever) +- Chat config cleanup (tied to users, stay forever) + +## Decisions + +### Unified cleanup service with ordered phases + +A single `CleanupService` class runs 5 phases in a fixed order. Phases 1a/1b (attachments + messages) are wrapped in one try/except block because they have an FK dependency. Phases 2-5 are each independently wrapped. + +**Why ordered phases instead of parallel**: Simplicity. No threading complexity, no partial-failure coordination. The total volume per CRON run should be small if run regularly. If performance becomes an issue, backgrounding can be added later without changing the service interface. + +### Retention periods shared between messages and usage records + +Both messages and usage records use `cleanup_message_retention_days` (default 30). This keeps the config surface small. Usage records aren't financial records (purchase_records are), and 30 days matches message retention. + +**Alternative considered**: Separate `cleanup_usage_retention_days`. Rejected because usage records are internal activity logs, not regulated data. GDPR mandates data minimization. Purchase records (kept forever) are the financial audit trail. + +### Price alert staleness at 360 days + +`last_price_time` is updated only on alert creation and when the threshold is crossed (trigger). It is NOT updated on routine CRON price checks. This makes it a reliable "last relevant activity" indicator. 360 days gives wide-threshold alerts (e.g., "notify me if BTC drops 50%") a full year to trigger before cleanup. + +### Bulk delete via SQLAlchemy filter + delete + +New CRUD methods use `query.filter(...).delete()` returning the count of deleted rows. This matches the existing pattern in `ToolsCacheCRUD.delete_expired()`. No need to load rows into memory. + +For the attachment cleanup, we use a subquery: delete attachments whose `(chat_id, message_id)` matches messages with `sent_at` older than the cutoff. This avoids a two-step load-then-delete. + +### No DB migration needed + +All cleanup is row deletion based on existing date columns. No new columns, no schema changes. The new config values are environment variables with defaults. + +## Risks / Trade-offs + +- **Bulk delete lock contention**: Large deletes can hold table-level locks on Postgres. → Mitigation: if run regularly (daily), each batch is small. If backlog is large on first run, may cause brief lock contention. Acceptable for a CRON-triggered maintenance task. +- **Attachment subquery complexity**: Joining attachments to messages for bulk delete is more complex than simple date filtering. → Mitigation: straightforward subquery on `sent_at`; Postgres handles this efficiently with the existing index. +- **Usage records at 30 days**: Users lose visibility into spending after 30 days. → Mitigation: the backoffice usage stats endpoint aggregates on-the-fly; users wanting longer history can export or the retention can be increased via config. diff --git a/openspec/changes/archive/2026-04-14-unified-cleanup/proposal.md b/openspec/changes/archive/2026-04-14-unified-cleanup/proposal.md new file mode 100644 index 00000000..defd8b33 --- /dev/null +++ b/openspec/changes/archive/2026-04-14-unified-cleanup/proposal.md @@ -0,0 +1,42 @@ +## Why + +The backend has a single cleanup endpoint (`/task/clear-expired-cache`) that only clears expired tool cache entries. Meanwhile, messages, attachments, usage records, price alerts, and unaccepted sponsorships accumulate indefinitely. This wastes storage, keeps stale data around, and means multiple CRON jobs would be needed as cleanup concerns grow. + +A unified `/task/cleanup` endpoint consolidates all data retention into one place, called by a single external CRON job. + +## What Changes + +- Replace `/task/clear-expired-cache` with `POST /task/cleanup` (same API key auth) +- Add a cleanup service that runs 5 phases in order: + 1. Delete attachments + messages older than 30 days (atomic — attachments first due to FK, no cascade) + 2. Clear expired cache (reuse existing `ToolsCacheCRUD.delete_expired()`) + 3. Delete usage records older than 30 days + 4. Delete price alerts not created/triggered in 360 days + 5. Delete unaccepted sponsorships older than 30 days +- Add 3 new config values: `cleanup_message_retention_days`, `cleanup_price_alert_staleness_days`, `cleanup_sponsorship_staleness_days` +- Add bulk-delete methods to CRUDs that don't already have them +- No notifications on any cleanup action +- No background processing — runs synchronously +- Users are never deleted (identity anchors with phone number verification) + +## Capabilities + +### New Capabilities + +- `cleanup-service`: Unified data cleanup service with configurable retention periods, bulk deletes, and per-phase error isolation + +### Modified Capabilities + +## Impact + +- `src/main.py` — replace `/task/clear-expired-cache` with `/task/cleanup` +- `src/util/config.py` — add 3 new config values +- `src/db/crud/chat_message.py` — add `delete_older_than()` +- `src/db/crud/chat_message_attachment.py` — add `delete_by_old_messages()` +- `src/db/crud/usage_record_repo.py` or new CRUD — add `delete_older_than()` +- `src/db/crud/price_alert.py` — add `delete_stale()` +- `src/db/crud/sponsorship.py` — add `delete_unaccepted_older_than()` +- New cleanup service file under `src/features/` +- New test file(s) for the cleanup service and new CRUD methods +- No DB migrations (no schema changes) +- No API contract changes beyond the endpoint rename diff --git a/openspec/changes/archive/2026-04-14-unified-cleanup/specs/cleanup-service/spec.md b/openspec/changes/archive/2026-04-14-unified-cleanup/specs/cleanup-service/spec.md new file mode 100644 index 00000000..3220ec5f --- /dev/null +++ b/openspec/changes/archive/2026-04-14-unified-cleanup/specs/cleanup-service/spec.md @@ -0,0 +1,75 @@ +## ADDED Requirements + +### Requirement: Unified cleanup endpoint +The system SHALL expose `POST /task/cleanup` authenticated via API key. It SHALL replace the existing `POST /task/clear-expired-cache` endpoint. The response SHALL be a JSON object with counts of deleted items per phase. + +#### Scenario: Successful cleanup run +- **WHEN** `POST /task/cleanup` is called with a valid API key +- **THEN** the system SHALL execute all cleanup phases and return a JSON response with keys: `messages_deleted`, `attachments_deleted`, `cache_entries_cleared`, `usage_records_deleted`, `price_alerts_deleted`, `sponsorships_deleted` + +#### Scenario: Unauthorized cleanup attempt +- **WHEN** `POST /task/cleanup` is called without a valid API key +- **THEN** the system SHALL return HTTP 401 + +### Requirement: Message and attachment cleanup (Phase 1) +The system SHALL delete chat message attachments and then chat messages where `sent_at` is older than `cleanup_message_retention_days` (default 30). Attachments SHALL be deleted before messages to satisfy FK constraints. Both deletions SHALL be wrapped in a single error handling block — if attachment deletion fails, message deletion SHALL be skipped. + +#### Scenario: Old messages and their attachments are deleted +- **WHEN** cleanup runs and messages exist with `sent_at` older than 30 days +- **THEN** attachments belonging to those messages SHALL be deleted first, then the messages themselves + +#### Scenario: Attachment deletion fails +- **WHEN** attachment deletion raises an exception +- **THEN** message deletion SHALL be skipped, remaining phases SHALL still execute + +### Requirement: Expired cache cleanup (Phase 2) +The system SHALL delete tool cache entries where `expires_at < now()`. This SHALL reuse the existing `ToolsCacheCRUD.delete_expired()` method. + +#### Scenario: Expired cache cleared +- **WHEN** cleanup runs and expired cache entries exist +- **THEN** those entries SHALL be deleted + +### Requirement: Usage record cleanup (Phase 3) +The system SHALL delete usage records where `timestamp` is older than `cleanup_message_retention_days` (default 30). This shares the message retention config value. + +#### Scenario: Old usage records are deleted +- **WHEN** cleanup runs and usage records exist with `timestamp` older than 30 days +- **THEN** those records SHALL be deleted + +### Requirement: Price alert cleanup (Phase 4) +The system SHALL delete price alerts where `last_price_time` is older than `cleanup_price_alert_staleness_days` (default 360). + +#### Scenario: Stale price alerts are deleted +- **WHEN** cleanup runs and price alerts have `last_price_time` older than 360 days +- **THEN** those alerts SHALL be deleted + +### Requirement: Unaccepted sponsorship cleanup (Phase 5) +The system SHALL delete sponsorships where `accepted_at IS NULL` and `sponsored_at` is older than `cleanup_sponsorship_staleness_days` (default 30). + +#### Scenario: Stale unaccepted sponsorships are deleted +- **WHEN** cleanup runs and sponsorships exist where `accepted_at` is NULL and `sponsored_at` is older than 30 days +- **THEN** those sponsorships SHALL be deleted + +### Requirement: Per-phase error isolation +Phases 2, 3, 4, and 5 SHALL each be independently wrapped in error handling. A failure in any one phase SHALL NOT prevent execution of subsequent phases. Phase 1 (attachments + messages) is a single atomic block. + +#### Scenario: One independent phase fails +- **WHEN** phase 3 (usage records) raises an exception +- **THEN** phases 4 and 5 SHALL still execute, and the response SHALL include 0 for the failed phase's count + +### Requirement: Configurable retention periods +The system SHALL support 3 new environment-variable-backed config values with the following defaults: +- `CLEANUP_MESSAGE_RETENTION_DAYS` = 30 (used for messages, attachments, and usage records) +- `CLEANUP_PRICE_ALERT_STALENESS_DAYS` = 360 +- `CLEANUP_SPONSORSHIP_STALENESS_DAYS` = 30 + +#### Scenario: Custom retention via environment variable +- **WHEN** `CLEANUP_MESSAGE_RETENTION_DAYS` is set to 60 +- **THEN** messages, attachments, and usage records older than 60 days SHALL be deleted + +### Requirement: Bulk deletes +All cleanup phases SHALL use bulk `DELETE WHERE` queries (not row-by-row loading and deletion). This follows the existing pattern in `ToolsCacheCRUD.delete_expired()`. + +#### Scenario: Bulk message deletion +- **WHEN** 1000 messages are older than the retention period +- **THEN** they SHALL be deleted in a single SQL statement, not 1000 individual deletes diff --git a/openspec/changes/archive/2026-04-14-unified-cleanup/tasks.md b/openspec/changes/archive/2026-04-14-unified-cleanup/tasks.md new file mode 100644 index 00000000..356834e8 --- /dev/null +++ b/openspec/changes/archive/2026-04-14-unified-cleanup/tasks.md @@ -0,0 +1,37 @@ +## 1. Add config values + +- [x] 1.1 Add `cleanup_message_retention_days` (env `CLEANUP_MESSAGE_RETENTION_DAYS`, default 30) to `Config` +- [x] 1.2 Add `cleanup_price_alert_staleness_days` (env `CLEANUP_PRICE_ALERT_STALENESS_DAYS`, default 360) to `Config` +- [x] 1.3 Add `cleanup_sponsorship_staleness_days` (env `CLEANUP_SPONSORSHIP_STALENESS_DAYS`, default 30) to `Config` +- [x] 1.4 Update config test with new values + +## 2. Add bulk-delete CRUD methods + +- [x] 2.1 `ChatMessageAttachmentCRUD.delete_by_old_messages(cutoff: datetime) -> int` — delete attachments via subquery joining messages on `sent_at < cutoff` +- [x] 2.2 `ChatMessageCRUD.delete_older_than(cutoff: datetime) -> int` — bulk delete messages with `sent_at < cutoff` +- [x] 2.3 Add `delete_older_than(cutoff: datetime) -> int` to usage record repo — bulk delete usage records with `timestamp < cutoff` +- [x] 2.4 `PriceAlertCRUD.delete_stale(cutoff: datetime) -> int` — bulk delete alerts with `last_price_time < cutoff` +- [x] 2.5 `SponsorshipCRUD.delete_unaccepted_older_than(cutoff: datetime) -> int` — bulk delete where `accepted_at IS NULL AND sponsored_at < cutoff` +- [x] 2.6 Write tests for each new CRUD method + +## 3. Create cleanup service + +- [x] 3.1 Create `src/features/cleanup/cleanup_service.py` with `CleanupService` class +- [x] 3.2 Implement phase 1: attachments + messages (atomic block) +- [x] 3.3 Implement phase 2: expired cache (reuse `ToolsCacheCRUD.delete_expired()`) +- [x] 3.4 Implement phase 3: old usage records +- [x] 3.5 Implement phase 4: stale price alerts +- [x] 3.6 Implement phase 5: unaccepted sponsorships +- [x] 3.7 Return summary dict with per-phase counts +- [x] 3.8 Write tests for `CleanupService` + +## 4. Wire up endpoint + +- [x] 4.1 Replace `POST /task/clear-expired-cache` with `POST /task/cleanup` in `main.py` +- [x] 4.2 Endpoint calls `CleanupService`, returns summary JSON +- [x] 4.3 Verify API key auth works on the new endpoint + +## 5. Verify + +- [x] 5.1 Run full test suite — no regressions +- [x] 5.2 Run pre-commit linting diff --git a/openspec/specs/binary-search-resize/spec.md b/openspec/specs/binary-search-resize/spec.md deleted file mode 100644 index 612facd8..00000000 --- a/openspec/specs/binary-search-resize/spec.md +++ /dev/null @@ -1,63 +0,0 @@ -## Requirements - -### Requirement: Binary search convergence on scale factor -The system SHALL use binary search on scale factor to find an output size within the target band. The initial guess SHALL be computed as `sqrt(max_size_bytes / original_file_size)`, clamped to `[0.1, 1.0]`. The search space SHALL be `lo=0.1`, `hi=1.0`. - -#### Scenario: Large JPEG resized into target band -- **WHEN** a 2000x2000 JPEG image exceeding the size limit is resized with a given max_size_bytes -- **THEN** the output file size SHALL be between 90% and 100% of max_size_bytes - -#### Scenario: Large PNG resized into target band -- **WHEN** a 2000x2000 PNG image exceeding the size limit is resized with a given max_size_bytes -- **THEN** the output file size SHALL be between 90% and 100% of max_size_bytes - -#### Scenario: Large WEBP resized into target band -- **WHEN** a 2000x2000 WEBP image exceeding the size limit is resized with a given max_size_bytes -- **THEN** the output file size SHALL be between 90% and 100% of max_size_bytes - -### Requirement: Fixed compression quality for lossy formats -The system SHALL use a fixed quality of 90 for JPEG and WEBP encoding. PNG encoding SHALL NOT use a quality parameter (lossless). - -#### Scenario: JPEG encoded at quality 90 -- **WHEN** a JPEG image is resized -- **THEN** the output SHALL be encoded with quality=90 and optimize=True - -#### Scenario: PNG encoded without quality parameter -- **WHEN** a PNG image is resized -- **THEN** the output SHALL be encoded with optimize=True and no quality parameter - -### Requirement: Early return for under-limit files -The system SHALL return the original file path unchanged when the original file size is already within the size limit. - -#### Scenario: Small file returns original path -- **WHEN** resize_file is called with an image whose file size is already under max_size_bytes -- **THEN** the original input_path SHALL be returned without re-encoding - -### Requirement: Minimum dimension guard -The system SHALL stop searching and return best effort when either dimension of the scaled image would fall below 64 pixels. - -#### Scenario: Image hits minimum dimension during search -- **WHEN** binary search reaches a scale factor where either width or height would be below 64px -- **THEN** the system SHALL stop the search and return the best under-limit result seen so far, or the closest result overall - -### Requirement: Iteration safety cap -The system SHALL stop after a maximum of 10 iterations and return the best result available. - -#### Scenario: Safety cap reached -- **WHEN** the binary search has not converged after 10 iterations -- **THEN** the system SHALL return the best under-limit result, or the closest result overall, or raise a ValidationError if no result was produced - -### Requirement: Best-effort fallback -The system SHALL track the best under-limit result and the best overall result during the search. When the search ends without landing in the target band, the system SHALL prefer the best under-limit result, then the best overall result, and only raise ValidationError as a last resort. - -#### Scenario: No result in target band but under-limit result exists -- **WHEN** the search ends and no result landed in the 90-100% band but a result under the limit was found -- **THEN** the system SHALL return the best under-limit result - -#### Scenario: No under-limit result exists -- **WHEN** the search ends and no result was under the limit -- **THEN** the system SHALL return the smallest result seen overall - -#### Scenario: No result produced at all -- **WHEN** the search ends with no results (e.g., image too small to encode) -- **THEN** the system SHALL raise a ValidationError with error code INVALID_IMAGE_SIZE From cba614e37b504e2e09ebbf8b6fe607b633e00c56 Mon Sep 17 00:00:00 2001 From: Milos Marinkovic Date: Tue, 14 Apr 2026 22:29:17 +0200 Subject: [PATCH 3/3] Bump version --- docs/open-api-docs.yaml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/open-api-docs.yaml b/docs/open-api-docs.yaml index bbcd9378..3ce4910c 100644 --- a/docs/open-api-docs.yaml +++ b/docs/open-api-docs.yaml @@ -2,7 +2,7 @@ openapi: 3.0.3 info: title: The Agent's user-facing API description: The user-facing parts of The Agent's API service (excluding system-level endpoints, chat completion, maintenance endpoints, etc.) - version: 5.8.1 + version: 5.9.0 license: name: MIT url: https://opensource.org/licenses/MIT diff --git a/pyproject.toml b/pyproject.toml index 2d96df62..67f04d3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "the-agent" -version = "5.8.1" +version = "5.9.0" [tool.setuptools] package-dir = {"" = "src"}