diff --git a/desktop/windows/src/main/index.ts b/desktop/windows/src/main/index.ts index ab558764cdb..826e8294101 100644 --- a/desktop/windows/src/main/index.ts +++ b/desktop/windows/src/main/index.ts @@ -23,6 +23,7 @@ import { registerOverlayHandlers } from './overlay/ipc' import { seedUserAssistOnce } from './usage/userAssistSeed' import { registerRewindHandlers } from './ipc/rewind' import { registerScreenHandlers } from './ipc/screen' +import { registerScreenHistoryHandlers } from './ipc/screenHistory' import { registerInsightHandlers } from './ipc/insight' import { createInsightToastWindow } from './insight/toastWindow' import { registerAutomationHandlers } from './ipc/automation' @@ -290,6 +291,7 @@ app.whenReady().then(async () => { registerMemoryCleanupHandlers() registerRewindHandlers() registerScreenHandlers() + registerScreenHistoryHandlers() // Cross-window conversations refresh: any renderer that writes a local // conversation (main window OR overlay) notifies here; rebroadcast to every // window so each invalidates its own per-process conversations cache (e.g. an diff --git a/desktop/windows/src/main/ipc/db.ts b/desktop/windows/src/main/ipc/db.ts index 6d6d989b112..8cb0decd856 100644 --- a/desktop/windows/src/main/ipc/db.ts +++ b/desktop/windows/src/main/ipc/db.ts @@ -747,6 +747,35 @@ export function searchRewindFrames(query: string, limit = 500): RewindFrame[] { }) } +export function searchRewindFramesInTimeRange( + startTime: number, + endTime: number, + searchQuery?: string, + limit = 50 +): RewindFrame[] { + return timed('searchRewindFramesInTimeRange', () => { + if (searchQuery && searchQuery.trim()) { + const like = `%${searchQuery.trim()}%` + return get() + .prepare( + `SELECT ${REWIND_COLUMNS} FROM rewind_frames + WHERE ts BETWEEN ? AND ? + AND (ocr_text LIKE ? OR window_title LIKE ? OR app LIKE ?) + ORDER BY ts DESC LIMIT ?` + ) + .all(startTime, endTime, like, like, like, limit) as RewindFrame[] + } else { + return get() + .prepare( + `SELECT ${REWIND_COLUMNS} FROM rewind_frames + WHERE ts BETWEEN ? AND ? + ORDER BY ts DESC LIMIT ?` + ) + .all(startTime, endTime, limit) as RewindFrame[] + } + }) +} + export function rewindDayBounds(): { min: number; max: number } | null { const row = get() .prepare('SELECT MIN(ts) AS min, MAX(ts) AS max FROM rewind_frames') diff --git a/desktop/windows/src/main/ipc/screenHistory.ts b/desktop/windows/src/main/ipc/screenHistory.ts new file mode 100644 index 00000000000..c9bd982fd1b --- /dev/null +++ b/desktop/windows/src/main/ipc/screenHistory.ts @@ -0,0 +1,141 @@ +import { ipcMain } from 'electron' +import { searchRewindFramesInTimeRange } from './db' +import type { RewindFrame } from '../../shared/types' + +interface ScreenHistorySearchParams { + startTime: number + endTime: number + searchQuery?: string + limit?: number +} + +interface ScreenHistoryResult { + frames: RewindFrame[] + summary: string + timeRange: { + start: number + end: number + description: string + } +} + +async function searchScreenHistory(params: ScreenHistorySearchParams): Promise { + const { startTime, endTime, searchQuery, limit = 20 } = params + + // Search the database for frames in the time range + const frames = searchRewindFramesInTimeRange(startTime, endTime, searchQuery, limit) + + // Generate a summary of findings + let summary = '' + if (frames.length === 0) { + summary = 'No screen activity found in the specified time range.' + } else { + // Group frames by app for summary + const appCounts = new Map() + const windowTitles = new Set() + const snippets: string[] = [] + + for (const frame of frames) { + // Count apps + const app = frame.app || 'Unknown' + appCounts.set(app, (appCounts.get(app) || 0) + 1) + + // Collect window titles + if (frame.windowTitle) { + windowTitles.add(frame.windowTitle) + } + + // Collect relevant OCR snippets if searching for something specific + if (searchQuery && frame.ocrText) { + const lowerOcr = frame.ocrText.toLowerCase() + const lowerQuery = searchQuery.toLowerCase() + const index = lowerOcr.indexOf(lowerQuery) + if (index !== -1) { + // Extract context around the match + const contextStart = Math.max(0, index - 50) + const contextEnd = Math.min(frame.ocrText.length, index + searchQuery.length + 50) + const snippet = frame.ocrText.substring(contextStart, contextEnd).trim() + if (snippets.length < 3) { + snippets.push(`[${frame.app}] ...${snippet}...`) + } + } + } + } + + // Build summary + const appSummary = Array.from(appCounts.entries()) + .sort((a, b) => b[1] - a[1]) + .slice(0, 3) + .map(([app, count]) => `${app} (${count})`) + .join(', ') + + summary = `Found ${frames.length} screen captures. Apps: ${appSummary}.` + + // Add window titles to summary if no OCR available + if (windowTitles.size > 0 && snippets.length === 0) { + const titleList = Array.from(windowTitles).slice(0, 5).join(', ') + summary += `\nWindows: ${titleList}` + } + + if (snippets.length > 0) { + summary += '\n\nRelevant text snippets:\n' + snippets.join('\n') + } + } + + return { + frames, + summary, + timeRange: { + start: startTime, + end: endTime, + description: `${new Date(startTime).toLocaleString()} - ${new Date(endTime).toLocaleString()}` + } + } +} + +async function getScreenContextForTimeRange( + startTime: number, + endTime: number, + maxFrames = 5 +): Promise { + const frames = searchRewindFramesInTimeRange(startTime, endTime, undefined, maxFrames) + + if (frames.length === 0) { + return '' + } + + const lines: string[] = [ + `[Screen history from ${new Date(startTime).toLocaleTimeString()} to ${new Date( + endTime + ).toLocaleTimeString()}]` + ] + + for (const frame of frames) { + const time = new Date(frame.ts).toLocaleTimeString() + const app = frame.app || 'Unknown' + const title = frame.windowTitle ? ` - ${frame.windowTitle}` : '' + + lines.push(`\n${time} | ${app}${title}`) + + if (frame.ocrText && frame.ocrText.trim()) { + // Include first 200 chars of OCR text + const preview = frame.ocrText.trim().substring(0, 200) + lines.push(preview + (frame.ocrText.length > 200 ? '...' : '')) + } + } + + return lines.join('\n') +} + +export function registerScreenHistoryHandlers(): void { + ipcMain.handle('screen:searchHistory', async (_e, params: ScreenHistorySearchParams) => { + return searchScreenHistory(params) + }) + + ipcMain.handle( + 'screen:getHistoryContext', + async (_e, startTime: number, endTime: number, maxFrames?: number) => { + return getScreenContextForTimeRange(startTime, endTime, maxFrames) + } + ) +} \ No newline at end of file diff --git a/desktop/windows/src/preload/index.ts b/desktop/windows/src/preload/index.ts index fd2263cef2c..e3b8febaa4e 100644 --- a/desktop/windows/src/preload/index.ts +++ b/desktop/windows/src/preload/index.ts @@ -98,6 +98,14 @@ const omi: OmiBridgeApi = { rewindPrimarySourceId: () => ipcRenderer.invoke('rewind:primarySourceId'), rewindSaveFrame: (data: Uint8Array) => ipcRenderer.invoke('rewind:saveFrame', data), screenReadText: () => ipcRenderer.invoke('screen:readNow'), + screenSearchHistory: (params: { + startTime: number + endTime: number + searchQuery?: string + limit?: number + }) => ipcRenderer.invoke('screen:searchHistory', params), + screenGetHistoryContext: (startTime: number, endTime: number, maxFrames?: number) => + ipcRenderer.invoke('screen:getHistoryContext', startTime, endTime, maxFrames), screenSynthFramesSince: () => ipcRenderer.invoke('screenSynth:framesSince'), screenSynthGetState: () => ipcRenderer.invoke('screenSynth:getState'), screenSynthSetState: (patch) => ipcRenderer.invoke('screenSynth:setState', patch), diff --git a/desktop/windows/src/renderer/src/hooks/useChat.ts b/desktop/windows/src/renderer/src/hooks/useChat.ts index 8f3698fb50e..31e229b0248 100644 --- a/desktop/windows/src/renderer/src/hooks/useChat.ts +++ b/desktop/windows/src/renderer/src/hooks/useChat.ts @@ -3,6 +3,7 @@ import { auth } from '../lib/firebase' import { invalidateConversationsCache } from '../lib/pageCache' import { gatherLocalContext } from '../lib/localAgent' import { readCurrentScreen } from '../lib/screenContext' +import { getScreenHistoryContext } from '../lib/screenHistoryContext' import { looksLikeAction, looksLikeRawPlan, planActions } from '../lib/actionPlanner' import { callAgentLLM } from '../lib/agentLLM' import type { AutomationPlan } from '../../../shared/types' @@ -249,12 +250,17 @@ export function useChat(opts?: { surface?: 'main' | 'overlay' }): UseChat { // context to EVERY message. It's framed so the model ignores it unless the // message is actually about the screen, so it doesn't bloat answers. This // is an instant hot-cache read, so normal messages don't pay a capture cost; + // • screen history — if asking about past screens ("5 minutes ago", "that error") // • local KG/file context — apps/projects/tech the chat is grounded in. - const [screenContext, localContext] = await Promise.all([ + const [screenContext, historyContext, localContext] = await Promise.all([ readCurrentScreen(), + Promise.race([ + getScreenHistoryContext(userMsg.content), + new Promise((resolve) => setTimeout(() => resolve(''), 1500)) + ]), gatherLocalContext(userMsg.content) ]) - const contextParts = [screenContext, localContext].filter(Boolean) + const contextParts = [screenContext, historyContext, localContext].filter(Boolean) const textToSend = contextParts.length ? `${contextParts.join('\n\n')}\n\n${userMsg.content}` : userMsg.content diff --git a/desktop/windows/src/renderer/src/lib/screenHistoryContext.ts b/desktop/windows/src/renderer/src/lib/screenHistoryContext.ts new file mode 100644 index 00000000000..77c7c36e9a5 --- /dev/null +++ b/desktop/windows/src/renderer/src/lib/screenHistoryContext.ts @@ -0,0 +1,133 @@ +import { parseTemporalReference, extractSearchTerms } from './temporalParser' + +export async function getScreenHistoryContext(message: string): Promise { + // Parse temporal reference from the message + const temporalRef = parseTemporalReference(message) + + // If no temporal reference, check for keywords that suggest screen history + if (!temporalRef) { + const historyKeywords = [ + 'saw', 'seen', 'was on', 'showed', 'displayed', + 'error', 'message', 'screen', 'window', + 'earlier', 'before', 'previously' + ] + + const lowerMessage = message.toLowerCase() + const hasHistoryKeyword = historyKeywords.some(keyword => lowerMessage.includes(keyword)) + + if (!hasHistoryKeyword) { + return '' + } + + // Default to last 30 minutes if talking about screen but no specific time + const defaultRange = { + startTime: Date.now() - 30 * 60 * 1000, + endTime: Date.now(), + description: 'recent' + } + + // Extract search terms without temporal phrases for better search + const searchTerms = extractSearchTerms(message) + return fetchScreenContext(defaultRange.startTime, defaultRange.endTime, searchTerms || undefined) + } + + // Extract search terms after removing temporal phrases + const searchTerms = extractSearchTerms(message) + + // Fetch screen context for the identified time range + return fetchScreenContext(temporalRef.startTime, temporalRef.endTime, searchTerms) +} + +async function fetchScreenContext( + startTime: number, + endTime: number, + searchQuery?: string +): Promise { + try { + // Search screen history + const result = await window.omi.screenSearchHistory({ + startTime, + endTime, + searchQuery: searchQuery || undefined, + limit: 10 + }) + + if (!result || result.frames.length === 0) { + return '' + } + + // Format context for the AI + const lines: string[] = [ + `[Screen history context from ${result.timeRange.description}]`, + result.summary + ] + + // Add detailed frame information for top results + const topFrames = result.frames.slice(0, 5) + if (topFrames.length > 0) { + lines.push('\nDetailed screen captures:') + + for (const frame of topFrames) { + const time = new Date(frame.ts).toLocaleTimeString() + const app = frame.app || 'Unknown app' + const title = frame.windowTitle || 'No title' + + lines.push(`\n[${time}] ${app} - ${title}`) + + if (frame.ocrText) { + // Include relevant portion of OCR text + let ocrPreview = frame.ocrText.trim() + + // If searching for something specific, find that context + if (searchQuery) { + const lowerOcr = ocrPreview.toLowerCase() + const lowerQuery = searchQuery.toLowerCase() + const index = lowerOcr.indexOf(lowerQuery) + + if (index !== -1) { + // Extract context around the match + const contextStart = Math.max(0, index - 100) + const contextEnd = Math.min(ocrPreview.length, index + searchQuery.length + 100) + ocrPreview = '...' + ocrPreview.substring(contextStart, contextEnd) + '...' + } + } + + // Limit OCR text length + if (ocrPreview.length > 300) { + ocrPreview = ocrPreview.substring(0, 300) + '...' + } + + lines.push(ocrPreview) + } + } + } + + return lines.join('\n') + } catch (error) { + console.error('Failed to fetch screen history context:', error) + return '' + } +} + +export function isAskingAboutScreenHistory(message: string): boolean { + const lowerMessage = message.toLowerCase() + + // Check for temporal references + if (parseTemporalReference(message)) { + // Has temporal reference, check if it's about screens/visual content + const screenKeywords = ['saw', 'seen', 'screen', 'showed', 'error', 'message', 'window', 'displayed'] + return screenKeywords.some(keyword => lowerMessage.includes(keyword)) + } + + // Check for direct screen history questions + const historyPatterns = [ + /what.*(was|were).*on.*screen/, + /what.*did.*i.*see/, + /show.*me.*what.*was/, + /error.*message/, + /that.*(error|message|screen)/, + /find.*what.*showed/ + ] + + return historyPatterns.some(pattern => pattern.test(lowerMessage)) +} \ No newline at end of file diff --git a/desktop/windows/src/renderer/src/lib/temporalParser.ts b/desktop/windows/src/renderer/src/lib/temporalParser.ts new file mode 100644 index 00000000000..4d4557deb5d --- /dev/null +++ b/desktop/windows/src/renderer/src/lib/temporalParser.ts @@ -0,0 +1,168 @@ +interface TimeRange { + startTime: number + endTime: number + description: string +} + +export function parseTemporalReference(query: string, currentTime: number = Date.now()): TimeRange | null { + const lowerQuery = query.toLowerCase() + + // Match "X minutes ago" + const minutesAgoMatch = lowerQuery.match(/(\d+)\s*minutes?\s*ago/) + if (minutesAgoMatch) { + const minutes = parseInt(minutesAgoMatch[1], 10) + const targetTime = currentTime - minutes * 60 * 1000 + // Return a 5-minute window around the target time + return { + startTime: targetTime - 2.5 * 60 * 1000, + endTime: targetTime + 2.5 * 60 * 1000, + description: `${minutes} minute${minutes === 1 ? '' : 's'} ago` + } + } + + // Match "X hours ago" + const hoursAgoMatch = lowerQuery.match(/(\d+)\s*hours?\s*ago/) + if (hoursAgoMatch) { + const hours = parseInt(hoursAgoMatch[1], 10) + const targetTime = currentTime - hours * 60 * 60 * 1000 + // Return the full hour window + const startOfHour = new Date(targetTime) + startOfHour.setMinutes(0, 0, 0) + const endOfHour = new Date(targetTime) + endOfHour.setMinutes(59, 59, 999) + return { + startTime: startOfHour.getTime(), + endTime: endOfHour.getTime(), + description: `${hours} hour${hours === 1 ? '' : 's'} ago` + } + } + + // Match "today" + if (lowerQuery.includes('today')) { + const startOfDay = new Date(currentTime) + startOfDay.setHours(0, 0, 0, 0) + const endOfDay = new Date(currentTime) + endOfDay.setHours(23, 59, 59, 999) + return { + startTime: startOfDay.getTime(), + endTime: endOfDay.getTime(), + description: 'today' + } + } + + // Match "yesterday" + if (lowerQuery.includes('yesterday')) { + const yesterday = new Date(currentTime) + yesterday.setDate(yesterday.getDate() - 1) + yesterday.setHours(0, 0, 0, 0) + const endOfYesterday = new Date(yesterday) + endOfYesterday.setHours(23, 59, 59, 999) + return { + startTime: yesterday.getTime(), + endTime: endOfYesterday.getTime(), + description: 'yesterday' + } + } + + // Match "this morning", "this afternoon", "this evening" + if (lowerQuery.includes('this evening')) { + const startOfEvening = new Date(currentTime) + startOfEvening.setHours(18, 0, 0, 0) + const endOfEvening = new Date(currentTime) + endOfEvening.setHours(23, 59, 59, 999) + return { + startTime: startOfEvening.getTime(), + endTime: endOfEvening.getTime(), + description: 'this evening' + } + } + + if (lowerQuery.includes('this morning')) { + const startOfMorning = new Date(currentTime) + startOfMorning.setHours(6, 0, 0, 0) + const endOfMorning = new Date(currentTime) + endOfMorning.setHours(11, 59, 59, 999) + return { + startTime: startOfMorning.getTime(), + endTime: endOfMorning.getTime(), + description: 'this morning' + } + } + + if (lowerQuery.includes('this afternoon')) { + const startOfAfternoon = new Date(currentTime) + startOfAfternoon.setHours(12, 0, 0, 0) + const endOfAfternoon = new Date(currentTime) + endOfAfternoon.setHours(17, 59, 59, 999) + return { + startTime: startOfAfternoon.getTime(), + endTime: endOfAfternoon.getTime(), + description: 'this afternoon' + } + } + + // Match "last week" + if (lowerQuery.includes('last week')) { + const startOfWeek = new Date(currentTime) + startOfWeek.setDate(startOfWeek.getDate() - 7) + startOfWeek.setHours(0, 0, 0, 0) + return { + startTime: startOfWeek.getTime(), + endTime: currentTime, + description: 'last week' + } + } + + // Match "recently" or "just now" + if (lowerQuery.includes('recently') || lowerQuery.includes('just now') || lowerQuery.includes('just saw')) { + return { + startTime: currentTime - 10 * 60 * 1000, // Last 10 minutes + endTime: currentTime, + description: 'recently' + } + } + + // Match "earlier" + if (lowerQuery.includes('earlier')) { + const startOfDay = new Date(currentTime) + startOfDay.setHours(0, 0, 0, 0) + return { + startTime: startOfDay.getTime(), + endTime: currentTime, + description: 'earlier today' + } + } + + return null +} + +export function extractSearchTerms(query: string): string { + // Remove common temporal phrases + const temporalPhrases = [ + /\d+\s*minutes?\s*ago/gi, + /\d+\s*hours?\s*ago/gi, + /\byesterday\b/gi, + /\btoday\b/gi, + /\bthis morning\b/gi, + /\bthis afternoon\b/gi, + /\bthis evening\b/gi, + /\blast week\b/gi, + /\brecently\b/gi, + /\bjust now\b/gi, + /\bjust saw\b/gi, + /\bearlier\b/gi, + /\bwhat was\b/gi, + /\bwhat were\b/gi, + /\bi saw\b/gi, + /\bthat\b/gi, + /\bthe\b/gi + ] + + let cleanedQuery = query + for (const phrase of temporalPhrases) { + cleanedQuery = cleanedQuery.replace(phrase, ' ') + } + + // Clean up extra spaces and trim + return cleanedQuery.replace(/\s+/g, ' ').trim() +} \ No newline at end of file diff --git a/desktop/windows/src/shared/types.ts b/desktop/windows/src/shared/types.ts index 3f01fc59eb6..225c3a00b1f 100644 --- a/desktop/windows/src/shared/types.ts +++ b/desktop/windows/src/shared/types.ts @@ -242,6 +242,21 @@ export type OmiBridgeApi = { /** Capture the primary screen once and OCR it, returning the recognized text * (or '' on failure/timeout). Used by the chat to read the screen at send time. */ screenReadText: () => Promise + screenSearchHistory: (params: { + startTime: number + endTime: number + searchQuery?: string + limit?: number + }) => Promise<{ + frames: RewindFrame[] + summary: string + timeRange: { + start: number + end: number + description: string + } + }> + screenGetHistoryContext: (startTime: number, endTime: number, maxFrames?: number) => Promise insightGetSettings: () => Promise insightSetSettings: (patch: Partial) => Promise insightAdd: (p: InsightPayload) => Promise