Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions desktop/windows/src/main/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { registerOverlayHandlers } from './overlay/ipc'
import { seedUserAssistOnce } from './usage/userAssistSeed'
import { registerRewindHandlers } from './ipc/rewind'
import { registerScreenHandlers } from './ipc/screen'
import { registerScreenHistoryHandlers } from './ipc/screenHistory'
import { registerInsightHandlers } from './ipc/insight'
import { createInsightToastWindow } from './insight/toastWindow'
import { registerAutomationHandlers } from './ipc/automation'
Expand Down Expand Up @@ -290,6 +291,7 @@ app.whenReady().then(async () => {
registerMemoryCleanupHandlers()
registerRewindHandlers()
registerScreenHandlers()
registerScreenHistoryHandlers()
// Cross-window conversations refresh: any renderer that writes a local
// conversation (main window OR overlay) notifies here; rebroadcast to every
// window so each invalidates its own per-process conversations cache (e.g. an
Expand Down
29 changes: 29 additions & 0 deletions desktop/windows/src/main/ipc/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,35 @@ export function searchRewindFrames(query: string, limit = 500): RewindFrame[] {
})
}

export function searchRewindFramesInTimeRange(
startTime: number,
endTime: number,
searchQuery?: string,
limit = 50
): RewindFrame[] {
return timed('searchRewindFramesInTimeRange', () => {
if (searchQuery && searchQuery.trim()) {
const like = `%${searchQuery.trim()}%`
return get()
.prepare(
`SELECT ${REWIND_COLUMNS} FROM rewind_frames
WHERE ts BETWEEN ? AND ?
AND (ocr_text LIKE ? OR window_title LIKE ? OR app LIKE ?)
ORDER BY ts DESC LIMIT ?`
)
.all(startTime, endTime, like, like, like, limit) as RewindFrame[]
} else {
return get()
.prepare(
`SELECT ${REWIND_COLUMNS} FROM rewind_frames
WHERE ts BETWEEN ? AND ?
ORDER BY ts DESC LIMIT ?`
)
.all(startTime, endTime, limit) as RewindFrame[]
}
})
}

export function rewindDayBounds(): { min: number; max: number } | null {
const row = get()
.prepare('SELECT MIN(ts) AS min, MAX(ts) AS max FROM rewind_frames')
Expand Down
141 changes: 141 additions & 0 deletions desktop/windows/src/main/ipc/screenHistory.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import { ipcMain } from 'electron'
import { searchRewindFramesInTimeRange } from './db'
import type { RewindFrame } from '../../shared/types'

interface ScreenHistorySearchParams {
startTime: number
endTime: number
searchQuery?: string
limit?: number
}

interface ScreenHistoryResult {
frames: RewindFrame[]
summary: string
timeRange: {
start: number
end: number
description: string
}
}

async function searchScreenHistory(params: ScreenHistorySearchParams): Promise<ScreenHistoryResult> {
const { startTime, endTime, searchQuery, limit = 20 } = params

// Search the database for frames in the time range
const frames = searchRewindFramesInTimeRange(startTime, endTime, searchQuery, limit)

// Generate a summary of findings
let summary = ''
if (frames.length === 0) {
summary = 'No screen activity found in the specified time range.'
} else {
// Group frames by app for summary
const appCounts = new Map<string, number>()
const windowTitles = new Set<string>()
const snippets: string[] = []

for (const frame of frames) {
// Count apps
const app = frame.app || 'Unknown'
appCounts.set(app, (appCounts.get(app) || 0) + 1)

// Collect window titles
if (frame.windowTitle) {
windowTitles.add(frame.windowTitle)
}

// Collect relevant OCR snippets if searching for something specific
if (searchQuery && frame.ocrText) {
const lowerOcr = frame.ocrText.toLowerCase()
const lowerQuery = searchQuery.toLowerCase()
const index = lowerOcr.indexOf(lowerQuery)
if (index !== -1) {
// Extract context around the match
const contextStart = Math.max(0, index - 50)
const contextEnd = Math.min(frame.ocrText.length, index + searchQuery.length + 50)
const snippet = frame.ocrText.substring(contextStart, contextEnd).trim()
if (snippets.length < 3) {
snippets.push(`[${frame.app}] ...${snippet}...`)
}
}
}
}

// Build summary
const appSummary = Array.from(appCounts.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 3)
.map(([app, count]) => `${app} (${count})`)
.join(', ')

summary = `Found ${frames.length} screen captures. Apps: ${appSummary}.`

// Add window titles to summary if no OCR available
if (windowTitles.size > 0 && snippets.length === 0) {
const titleList = Array.from(windowTitles).slice(0, 5).join(', ')
summary += `\nWindows: ${titleList}`
}

if (snippets.length > 0) {
summary += '\n\nRelevant text snippets:\n' + snippets.join('\n')
}
}

return {
frames,
summary,
timeRange: {
start: startTime,
end: endTime,
description: `${new Date(startTime).toLocaleString()} - ${new Date(endTime).toLocaleString()}`
}
}
}

async function getScreenContextForTimeRange(
startTime: number,
endTime: number,
maxFrames = 5
): Promise<string> {
const frames = searchRewindFramesInTimeRange(startTime, endTime, undefined, maxFrames)

if (frames.length === 0) {
return ''
}

const lines: string[] = [
`[Screen history from ${new Date(startTime).toLocaleTimeString()} to ${new Date(
endTime
).toLocaleTimeString()}]`
]

for (const frame of frames) {
const time = new Date(frame.ts).toLocaleTimeString()
const app = frame.app || 'Unknown'
const title = frame.windowTitle ? ` - ${frame.windowTitle}` : ''

lines.push(`\n${time} | ${app}${title}`)

if (frame.ocrText && frame.ocrText.trim()) {
// Include first 200 chars of OCR text
const preview = frame.ocrText.trim().substring(0, 200)
lines.push(preview + (frame.ocrText.length > 200 ? '...' : ''))
}
}

return lines.join('\n')
}

export function registerScreenHistoryHandlers(): void {
ipcMain.handle('screen:searchHistory', async (_e, params: ScreenHistorySearchParams) => {
return searchScreenHistory(params)
})

ipcMain.handle(
'screen:getHistoryContext',
async (_e, startTime: number, endTime: number, maxFrames?: number) => {
return getScreenContextForTimeRange(startTime, endTime, maxFrames)
}
)
}
8 changes: 8 additions & 0 deletions desktop/windows/src/preload/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@ const omi: OmiBridgeApi = {
rewindPrimarySourceId: () => ipcRenderer.invoke('rewind:primarySourceId'),
rewindSaveFrame: (data: Uint8Array) => ipcRenderer.invoke('rewind:saveFrame', data),
screenReadText: () => ipcRenderer.invoke('screen:readNow'),
screenSearchHistory: (params: {
startTime: number
endTime: number
searchQuery?: string
limit?: number
}) => ipcRenderer.invoke('screen:searchHistory', params),
screenGetHistoryContext: (startTime: number, endTime: number, maxFrames?: number) =>
ipcRenderer.invoke('screen:getHistoryContext', startTime, endTime, maxFrames),
screenSynthFramesSince: () => ipcRenderer.invoke('screenSynth:framesSince'),
screenSynthGetState: () => ipcRenderer.invoke('screenSynth:getState'),
screenSynthSetState: (patch) => ipcRenderer.invoke('screenSynth:setState', patch),
Expand Down
10 changes: 8 additions & 2 deletions desktop/windows/src/renderer/src/hooks/useChat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { auth } from '../lib/firebase'
import { invalidateConversationsCache } from '../lib/pageCache'
import { gatherLocalContext } from '../lib/localAgent'
import { readCurrentScreen } from '../lib/screenContext'
import { getScreenHistoryContext } from '../lib/screenHistoryContext'
import { looksLikeAction, looksLikeRawPlan, planActions } from '../lib/actionPlanner'
import { callAgentLLM } from '../lib/agentLLM'
import type { AutomationPlan } from '../../../shared/types'
Expand Down Expand Up @@ -249,12 +250,17 @@ export function useChat(opts?: { surface?: 'main' | 'overlay' }): UseChat {
// context to EVERY message. It's framed so the model ignores it unless the
// message is actually about the screen, so it doesn't bloat answers. This
// is an instant hot-cache read, so normal messages don't pay a capture cost;
// • screen history — if asking about past screens ("5 minutes ago", "that error")
// • local KG/file context — apps/projects/tech the chat is grounded in.
const [screenContext, localContext] = await Promise.all([
const [screenContext, historyContext, localContext] = await Promise.all([
readCurrentScreen(),
Promise.race([
getScreenHistoryContext(userMsg.content),
new Promise<string>((resolve) => setTimeout(() => resolve(''), 1500))
]),
gatherLocalContext(userMsg.content)
])
const contextParts = [screenContext, localContext].filter(Boolean)
const contextParts = [screenContext, historyContext, localContext].filter(Boolean)
const textToSend = contextParts.length
? `${contextParts.join('\n\n')}\n\n${userMsg.content}`
: userMsg.content
Expand Down
133 changes: 133 additions & 0 deletions desktop/windows/src/renderer/src/lib/screenHistoryContext.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import { parseTemporalReference, extractSearchTerms } from './temporalParser'

export async function getScreenHistoryContext(message: string): Promise<string> {
// Parse temporal reference from the message
const temporalRef = parseTemporalReference(message)

// If no temporal reference, check for keywords that suggest screen history
if (!temporalRef) {
const historyKeywords = [
'saw', 'seen', 'was on', 'showed', 'displayed',
'error', 'message', 'screen', 'window',
'earlier', 'before', 'previously'
]

const lowerMessage = message.toLowerCase()
const hasHistoryKeyword = historyKeywords.some(keyword => lowerMessage.includes(keyword))

if (!hasHistoryKeyword) {
return ''
}

// Default to last 30 minutes if talking about screen but no specific time
const defaultRange = {
startTime: Date.now() - 30 * 60 * 1000,
endTime: Date.now(),
description: 'recent'
}

// Extract search terms without temporal phrases for better search
const searchTerms = extractSearchTerms(message)
return fetchScreenContext(defaultRange.startTime, defaultRange.endTime, searchTerms || undefined)
}

// Extract search terms after removing temporal phrases
const searchTerms = extractSearchTerms(message)

// Fetch screen context for the identified time range
return fetchScreenContext(temporalRef.startTime, temporalRef.endTime, searchTerms)
}

async function fetchScreenContext(
startTime: number,
endTime: number,
searchQuery?: string
): Promise<string> {
try {
// Search screen history
const result = await window.omi.screenSearchHistory({
startTime,
endTime,
searchQuery: searchQuery || undefined,
limit: 10
})

if (!result || result.frames.length === 0) {
return ''
}

// Format context for the AI
const lines: string[] = [
`[Screen history context from ${result.timeRange.description}]`,
result.summary
]

// Add detailed frame information for top results
const topFrames = result.frames.slice(0, 5)
if (topFrames.length > 0) {
lines.push('\nDetailed screen captures:')

for (const frame of topFrames) {
const time = new Date(frame.ts).toLocaleTimeString()
const app = frame.app || 'Unknown app'
const title = frame.windowTitle || 'No title'

lines.push(`\n[${time}] ${app} - ${title}`)

if (frame.ocrText) {
// Include relevant portion of OCR text
let ocrPreview = frame.ocrText.trim()

// If searching for something specific, find that context
if (searchQuery) {
const lowerOcr = ocrPreview.toLowerCase()
const lowerQuery = searchQuery.toLowerCase()
const index = lowerOcr.indexOf(lowerQuery)

if (index !== -1) {
// Extract context around the match
const contextStart = Math.max(0, index - 100)
const contextEnd = Math.min(ocrPreview.length, index + searchQuery.length + 100)
ocrPreview = '...' + ocrPreview.substring(contextStart, contextEnd) + '...'
}
}

// Limit OCR text length
if (ocrPreview.length > 300) {
ocrPreview = ocrPreview.substring(0, 300) + '...'
}

lines.push(ocrPreview)
}
}
}

return lines.join('\n')
} catch (error) {
console.error('Failed to fetch screen history context:', error)
return ''
}
}

export function isAskingAboutScreenHistory(message: string): boolean {
const lowerMessage = message.toLowerCase()

// Check for temporal references
if (parseTemporalReference(message)) {
// Has temporal reference, check if it's about screens/visual content
const screenKeywords = ['saw', 'seen', 'screen', 'showed', 'error', 'message', 'window', 'displayed']
return screenKeywords.some(keyword => lowerMessage.includes(keyword))
}

// Check for direct screen history questions
const historyPatterns = [
/what.*(was|were).*on.*screen/,
/what.*did.*i.*see/,
/show.*me.*what.*was/,
/error.*message/,
/that.*(error|message|screen)/,
/find.*what.*showed/
]

return historyPatterns.some(pattern => pattern.test(lowerMessage))
}
Loading