diff --git a/src/__tests__/integration/bill-processing.integration.test.ts b/src/__tests__/integration/bill-processing.integration.test.ts index c0b28e1..aa9a5ef 100644 --- a/src/__tests__/integration/bill-processing.integration.test.ts +++ b/src/__tests__/integration/bill-processing.integration.test.ts @@ -161,7 +161,6 @@ describe('Bill Processing Integration Tests', () => { }); afterEach(() => { - // Clean up temp directory fs.rmSync(tmpDir, { recursive: true, force: true }); container.clear(); eventEmitter.removeAllListeners(); @@ -241,14 +240,17 @@ Please do not reply to this email as it is auto generated`, }); describe('Unknown merchant: email → event → queue → notification', () => { - it('should emit categorization event for unknown merchant', async () => { - // Empty mappings — merchant is unknown + it('should emit categorization event when AI confidence is low', async () => { merchantMappingMock.__setMappings({}); + // AI returns low confidence — should trigger manual flow + mockOpenAIAdapter.processMessage.mockResolvedValue( + '{"category": "Expenses:Food:Dining", "confidence": 0.5}' + ); + const testEmail = createTestEmail(); mockGmailAdapter.fetchUnreadEmails.mockResolvedValue([testEmail]); - // Track events const categorizationEvents: unknown[] = []; eventEmitter.on(EventTypes.MERCHANT_NEEDS_CATEGORIZATION, (data) => { categorizationEvents.push(data); @@ -258,10 +260,10 @@ Please do not reply to this email as it is auto generated`, const automationService = container.getByClass(AutomationService); await automationService.scheduledCheck(); - // Merchant should be added to mapping with empty category - expect(addMerchantToMapping).toHaveBeenCalledWith('GRAB FOOD', undefined); + // No mapping written (removed auto-add with empty category) + expect(addMerchantToMapping).not.toHaveBeenCalled(); - // Event should have been emitted + // Event should have been emitted for manual categorization expect(categorizationEvents).toHaveLength(1); const event = categorizationEvents[0] as Record; expect(event).toMatchObject({ @@ -269,13 +271,49 @@ Please do not reply to this email as it is auto generated`, merchantId: 'grab_food', }); - // Email should NOT be marked as read (unknown merchant) + // Email should NOT be marked as read (needs manual categorization) expect(mockGmailAdapter.markAsRead).not.toHaveBeenCalled(); }); + it('should auto-categorize when AI confidence is high', async () => { + merchantMappingMock.__setMappings({}); + + // AI returns high confidence — should auto-categorize + mockOpenAIAdapter.processMessage.mockResolvedValue( + '{"category": "Expenses:Food:Dining", "confidence": 0.95}' + ); + + const testEmail = createTestEmail(); + mockGmailAdapter.fetchUnreadEmails.mockResolvedValue([testEmail]); + + // Register listener BEFORE running scheduledCheck + const categorizationEvents: unknown[] = []; + eventEmitter.on(EventTypes.MERCHANT_NEEDS_CATEGORIZATION, (data) => { + categorizationEvents.push(data); + }); + + container.registerClassFactory(AutomationService, () => new AutomationService()); + const automationService = container.getByClass(AutomationService); + await automationService.scheduledCheck(); + + // No manual categorization event (AI handled it) + expect(categorizationEvents).toHaveLength(0); + + // Transaction should be written (auto-categorized) + expect(mockGmailAdapter.markAsRead).toHaveBeenCalledWith('test-email-001'); + + // No mapping written (AI handles it each time) + expect(addMerchantToMapping).not.toHaveBeenCalled(); + }); + it('should send Telegram notification through event listener and queue', async () => { merchantMappingMock.__setMappings({}); + // AI returns low confidence to trigger manual flow + mockOpenAIAdapter.processMessage.mockResolvedValue( + '{"category": "Expenses:Food:Dining", "confidence": 0.3}' + ); + const testEmail = createTestEmail(); mockGmailAdapter.fetchUnreadEmails.mockResolvedValue([testEmail]); diff --git a/src/domain/services/nlp.service.ts b/src/domain/services/nlp.service.ts index 414d7be..353e5e6 100644 --- a/src/domain/services/nlp.service.ts +++ b/src/domain/services/nlp.service.ts @@ -10,6 +10,11 @@ export interface CategoryOptions { suggestedNewCategory: string; } +export interface AutoCategoryResult { + category: string; + confidence: number; +} + export interface ParsedExpenseData { amount: number; currency: string; @@ -66,6 +71,45 @@ Respond in exactly this format, with each option on a new line.`; } } + async autoCategorizeMerchant(merchant: string): Promise { + try { + const expenseAccounts = this.accountingService.getAllAccountNames() + .filter(name => name.startsWith('Expenses:')); + + const prompt = `You are a financial transaction categorizer. Given a merchant name, pick the best matching category from the list below. + +Merchant Name: ${merchant} + +Available expense categories: +${expenseAccounts.join('\n')} + +Respond with ONLY a JSON object in this exact format, no other text: +{"category": "the best matching category from the list", "confidence": 0.95} + +Rules: +- confidence is a number between 0 and 1 +- Use 0.9+ for well-known merchants where the category is obvious +- Use 0.5-0.8 for ambiguous merchants where the category is a guess +- Use below 0.5 for completely unclear merchants +- The category MUST be exactly one of the values from the provided list above, copy it exactly as shown`; + + const response = await this.openaiAdapter.processMessage(prompt, ''); + const jsonMatch = response.match(/\{.*\}/s); + if (!jsonMatch) { + return { category: '', confidence: 0 }; + } + + const parsed = JSON.parse(jsonMatch[0]); + return { + category: parsed.category || '', + confidence: typeof parsed.confidence === 'number' ? parsed.confidence : 0, + }; + } catch (error) { + logger.error('Error auto-categorizing merchant:', error); + return { category: '', confidence: 0 }; + } + } + async parseExpenseInput(input: string): Promise { try { const prompt = `Please parse the following expense information and create a transaction record: diff --git a/src/infrastructure/email-parsers/__tests__/dbs-email-parser.test.ts b/src/infrastructure/email-parsers/__tests__/dbs-email-parser.test.ts index 34d2431..0a76952 100644 --- a/src/infrastructure/email-parsers/__tests__/dbs-email-parser.test.ts +++ b/src/infrastructure/email-parsers/__tests__/dbs-email-parser.test.ts @@ -45,7 +45,7 @@ describe('DBSEmailParser', () => { }); describe('parse', () => { - it('should correctly parse a DBS transaction alert email', () => { + it('should correctly parse a DBS transaction alert email', async () => { const emailBody = `Card Transaction Alert Transaction Ref: 510805332088 Dear Sir / Madam, @@ -66,7 +66,7 @@ Please do not reply to this email as it is auto generated`; body: emailBody }; - const result = parser.parse(email); + const result = await parser.parse(email); expect(result).not.toBeNull(); if (result) { @@ -104,7 +104,7 @@ Please do not reply to this email as it is auto generated`; } }); - it('should correctly parse a DBS transaction alert email with compact date format', () => { + it('should correctly parse a DBS transaction alert email with compact date format', async () => { const emailBody = `Card Transaction Alert Transaction Ref: 510805332088 Dear Sir / Madam, @@ -125,7 +125,7 @@ Please do not reply to this email as it is auto generated`; body: emailBody }; - const result = parser.parse(email); + const result = await parser.parse(email); expect(result).not.toBeNull(); if (result) { @@ -163,7 +163,7 @@ Please do not reply to this email as it is auto generated`; } }); - it('should correctly parse a DBS transaction alert email with tab-separated date format', () => { + it('should correctly parse a DBS transaction alert email with tab-separated date format', async () => { const emailBody = `Card Transaction Alert Transaction Ref: 510805332088 Dear Sir / Madam, @@ -180,7 +180,7 @@ Please do not reply to this email as it is auto generated`; body: emailBody }; - const result = parser.parse(email); + const result = await parser.parse(email); expect(result).not.toBeNull(); if (result) { @@ -218,7 +218,7 @@ Please do not reply to this email as it is auto generated`; } }); - it('should correctly parse a DBS transaction alert email with S$ currency format', () => { + it('should correctly parse a DBS transaction alert email with S$ currency format', async () => { const emailBody = `Card Transaction Alert Transaction Ref: 510805332088 Dear Sir / Madam, @@ -237,7 +237,7 @@ Please do not reply to this email as it is auto generated`; body: emailBody }; - const result = parser.parse(email); + const result = await parser.parse(email); expect(result).not.toBeNull(); if (result) { @@ -275,7 +275,7 @@ Please do not reply to this email as it is auto generated`; } }); - it('should return null for invalid email format', () => { + it('should return null for invalid email format', async () => { const email: Email = { id: 'test-id', subject: 'Card Transaction Alert', @@ -284,7 +284,7 @@ Please do not reply to this email as it is auto generated`; body: 'Invalid email body without required fields' }; - const result = parser.parse(email); + const result = await parser.parse(email); expect(result).toBeNull(); }); }); diff --git a/src/infrastructure/email-parsers/dbs-email-parser.ts b/src/infrastructure/email-parsers/dbs-email-parser.ts index 105429c..d8ea435 100644 --- a/src/infrastructure/email-parsers/dbs-email-parser.ts +++ b/src/infrastructure/email-parsers/dbs-email-parser.ts @@ -16,6 +16,7 @@ import { } from "./dbs-transaction-extractor"; import { EventTypes } from "../events/event-types"; import { getCardAccount } from "../utils/telegram"; +import { NLPService } from "../../domain/services/nlp.service"; /** * Interface for transaction creation parameters */ @@ -58,7 +59,7 @@ export class DBSEmailParser implements EmailParser { /** * Parses a DBS transaction alert email into a Transaction */ - parse(email: Email): Transaction | null { + async parse(email: Email): Promise { if (!this.canParse(email)) { return null; } @@ -71,13 +72,12 @@ export class DBSEmailParser implements EmailParser { const { amount, date, merchant, cardInfo, currency } = transactionData; - // Get merchant category from mapping - const category = this.getMerchantCategory(merchant, email); + // Get merchant category from mapping or AI + const category = await this.getMerchantCategory(merchant, email); if (!category) { return null; } - // Create transaction entries return this.createTransaction({ date, merchant, @@ -94,27 +94,52 @@ export class DBSEmailParser implements EmailParser { } } + private get nlpService(): NLPService { + return container.getByClass(NLPService); + } + + private static readonly AUTO_CATEGORIZE_CONFIDENCE_THRESHOLD = 0.8; + /** - * Gets category for merchant or handles new merchant categorization + * Gets category for merchant: mapping → AI auto → manual flow */ - private getMerchantCategory( + private async getMerchantCategory( merchant: string, email: Email - ): AccountName | null { - const category = this.accountingService.findCategoryForMerchant(merchant); + ): Promise { + // 1. Check mapping file (human-confirmed categories) + const mappedCategory = this.accountingService.findCategoryForMerchant(merchant); + if (mappedCategory) { + return mappedCategory as AccountName; + } - if (!category) { - // If merchant not found in mapping, add it to the config file and emit event - this.accountingService.addMerchantToCategory(merchant); + // 2. Try AI auto-categorization + const aiResult = await this.nlpService.autoCategorizeMerchant(merchant); + const validAccountNames = (Object.values(AccountName) as string[]) + .filter(name => name.startsWith('Expenses:')); + if ( + aiResult.confidence >= DBSEmailParser.AUTO_CATEGORIZE_CONFIDENCE_THRESHOLD && + aiResult.category && + validAccountNames.includes(aiResult.category) + ) { logger.info( - `Merchant "${merchant}" not found in category mapping. Added to config for manual categorization.` + `AI auto-categorized "${merchant}" → ${aiResult.category} (confidence: ${aiResult.confidence})` ); + return aiResult.category as AccountName; + } - this.emitMerchantCategorizationEvent(merchant, email); - return null; + if (aiResult.category && !validAccountNames.includes(aiResult.category)) { + logger.warn( + `AI returned invalid category "${aiResult.category}" for "${merchant}", falling back to manual` + ); } - return category as AccountName; + // 3. Low confidence — notify for manual categorization + logger.info( + `AI uncertain for "${merchant}" (confidence: ${aiResult.confidence}), requesting manual categorization` + ); + this.emitMerchantCategorizationEvent(merchant, email); + return null; } /** diff --git a/src/infrastructure/email-parsers/email-parser-factory.ts b/src/infrastructure/email-parsers/email-parser-factory.ts index 2673037..97e5bdb 100644 --- a/src/infrastructure/email-parsers/email-parser-factory.ts +++ b/src/infrastructure/email-parsers/email-parser-factory.ts @@ -31,7 +31,7 @@ export class EmailParserFactory { /** * Parse an email into a transaction using the appropriate parser */ - parseEmail(email: Email): Transaction | null { + async parseEmail(email: Email): Promise { const parser = this.findParser(email); if (!parser) { console.log(`No parser found for email: ${email.subject} from ${email.from}`); diff --git a/src/infrastructure/email-parsers/email-parser.interface.ts b/src/infrastructure/email-parsers/email-parser.interface.ts index b48441e..fa9b2a4 100644 --- a/src/infrastructure/email-parsers/email-parser.interface.ts +++ b/src/infrastructure/email-parsers/email-parser.interface.ts @@ -14,5 +14,5 @@ export interface EmailParser { * Parses the email into a transaction * Returns null if the email cannot be parsed */ - parse(email: Email): Transaction | null; + parse(email: Email): Transaction | null | Promise; } \ No newline at end of file diff --git a/src/infrastructure/events/message-queue.service.ts b/src/infrastructure/events/message-queue.service.ts index a2d2858..7151db7 100644 --- a/src/infrastructure/events/message-queue.service.ts +++ b/src/infrastructure/events/message-queue.service.ts @@ -113,7 +113,7 @@ export class MessageQueueService { this.clearTaskTimeout(); if (this.taskTimeoutMs <= 0) return; - this.timeoutTimer = setTimeout(() => { + const timer = setTimeout(() => { const item = this.queue[0]; if (!item?.taskId) return; @@ -124,6 +124,9 @@ export class MessageQueueService { } this.completeTask(item.taskId); }, this.taskTimeoutMs); + // Don't keep the process alive just for the timeout + timer.unref(); + this.timeoutTimer = timer; } private clearTaskTimeout(): void {