diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..5a42571 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,66 @@ +name: release + +# Triggered by the tags that scripts/release.sh pushes (vX.Y.Z). Builds +# installers on each OS and publishes them to a GitHub Release, then +# flips that release from draft to public once every OS has finished. +on: + push: + tags: + - 'v*' + +permissions: + contents: write # create the release and upload assets + +concurrency: + group: release-${{ github.ref }} + cancel-in-progress: false + +jobs: + build: + name: build (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-latest, windows-latest, ubuntu-latest] + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node 22 + uses: actions/setup-node@v4 + with: + node-version: '22' + cache: npm + + - name: Install dependencies + run: npm ci + + - name: Build and publish to the draft release + run: npm run release:ci + env: + # Lets electron-builder create/upload to the GitHub Release. + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # Optional macOS code signing + notarization. These only take + # effect when the secrets exist; without them the macOS build + # is unsigned (and macOS auto-update stays disabled until they + # are added). Windows/Linux publish and auto-update regardless. + CSC_LINK: ${{ secrets.CSC_LINK }} + CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }} + APPLE_ID: ${{ secrets.APPLE_ID }} + APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} + APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} + + publish: + name: publish release + needs: build + runs-on: ubuntu-latest + permissions: + contents: write + steps: + # All OS builds succeeded and uploaded their assets to the draft + # release — flip it public and mark it the latest. + - name: Promote draft to published + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: gh release edit "${GITHUB_REF_NAME}" --repo "${GITHUB_REPOSITORY}" --draft=false --latest diff --git a/CHANGELOG.md b/CHANGELOG.md index aaff195..7c13df0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### AI (new) +- **Ask AI on a snip** — capture a region and get it *solved/answered* + (math, code, questions, errors), not just described. A dockable chat + panel streams the response. +- **Local-first routing** — on-device models via [Ollama](https://ollama.com) + with a first-run setup wizard; optional cloud providers as fallback. +- **Providers**: Ollama (local), Anthropic Claude, OpenAI, Google Gemini, + DeepSeek (text), and Sarvam AI (Indic-strong Vision OCR → LLM solve). +- **Autocorrect** for typed text and recognized handwriting (per-kind + toggles), **handwriting recognition** (drawn ink → text via a vision + model), and **trader chart analysis** from drawn levels. +- **On-device learning (RAG)** — accepted corrections are remembered + locally to personalize suggestions; nothing leaves the machine. +- **Per-profile** system prompts and model overrides in Settings → AI. +- Follow-up questions retain full conversation context (image/OCR carried + across turns) until a new snip starts a fresh conversation. + +### Auto-update (new) +- Background auto-update from GitHub Releases via `electron-updater`: + downloads in the background and applies on quit. Settings → Updates adds + an automatic-updates toggle, a manual check, and restart-to-update. + (macOS auto-update activates once the build is signed + notarized.) + +### Build & release (new) +- Tag-driven release automation: `npm run release[:minor|:major]` bumps + the version, rolls the changelog, commits, tags, and pushes. +- GitHub Actions builds macOS / Windows / Linux on a `v*` tag and + publishes installers + update manifests to GitHub Releases. +- macOS builds now also emit a `.zip` (for Squirrel.Mac auto-update). + ## [1.0.0] — 2026-05-20 Initial open-source release of Lekhini, by diff --git a/README.md b/README.md index 329aa7c..460e6a9 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,11 @@ content without switching apps. by default with a hotkey to toggle drawing. - **Undo/redo, clear, screenshot**, all from the toolbar or global hotkeys. +- **AI, local-first.** Snip anything and **Ask AI** to solve/explain it, + autocorrect handwriting and typed text, and analyze trader charts — + running on-device via Ollama by default, with optional cloud providers. + Everything is opt-in and configured in **Settings → AI**. See + [docs/AI.md](./docs/AI.md). ## Tech stack @@ -48,8 +53,12 @@ content without switching apps. - **Zustand** vanilla store with snapshot-based undo/redo history - **`electron-store`** for persisted orientation / theme / per-tool widths / active tool / color, with schema-tolerant hydration -- **`electron-builder`** for notarized `.dmg` (and `nsis` / - `AppImage` for Windows / Linux) +- **Local-first AI** — [Ollama](https://ollama.com) for on-device models, + with optional cloud providers (Anthropic, OpenAI, Gemini, DeepSeek, + Sarvam AI). See [docs/AI.md](./docs/AI.md). +- **`electron-builder`** for `.dmg` + `.zip` (macOS), `nsis` (Windows), + `AppImage` (Linux), and **`electron-updater`** for background + auto-updates from GitHub Releases ## Architecture @@ -154,29 +163,95 @@ After granting, quit and relaunch the app. Switch profile from **Settings → Profile**. The choice is remembered. +## AI + +Lekhini's AI is **local-first and entirely opt-in**. With **Local AI** +on, snips and text never leave your machine; cloud providers are an +optional fallback you configure with your own API key. Nothing AI-related +is enabled until you set one of them up. + +**What you can do** + +- **Ask AI about a snip** — drag a region, click **Ask AI**, and a chat + panel opens. It *solves/answers* what's in the image (math, code, + questions, errors), not just describes it. Follow-up questions keep the + full conversation context until you start a new snip. +- **Autocorrect** — typed text and recognized handwriting can be cleaned + up automatically (toggle per kind in Settings). +- **Handwriting recognition** — drawn ink is transcribed to text on + device via a vision model. +- **Trader analysis** — the Trader profile can hand your drawn levels to + the AI for a written read. +- **On-device learning (RAG)** — accepted corrections are remembered + locally to personalize future suggestions. Stored only on your machine. + +**Providers** + +| Provider | Kind | Vision | Notes | +| --- | --- | --- | --- | +| **Ollama (Local)** | on-device | yes | Default. Private, free, no key. | +| **Anthropic Claude** | cloud | yes | API key required | +| **OpenAI** | cloud | yes | API key required | +| **Google Gemini** | cloud | yes | API key required | +| **DeepSeek** | cloud | no (text) | Strong reasoning; image snips answer from text | +| **Sarvam AI** | cloud | yes (OCR→LLM) | Indic-strong document OCR, then solves | + +**Configure** in **Settings → AI**: enable Local AI (a first-run wizard +installs Ollama + recommended models), or pick a cloud provider and paste +its key. Routing is local-first — if Local AI is on and a suitable model +is installed it's used; otherwise the configured cloud provider is. + +Full details — architecture, the resolver, per-profile prompts/models, +privacy, and how each provider is wired — are in +**[docs/AI.md](./docs/AI.md)**. + +## Updates + +Installed builds **auto-update from GitHub Releases** via +`electron-updater`. By default new versions download in the background +and apply on the next quit/relaunch. Manage this in **Settings → +Updates**: toggle **Automatic updates**, **Check for updates** on demand, +or **Restart to update** once a version is downloaded. + +> macOS auto-update requires a signed + notarized build. Until signing +> is configured, macOS users update manually (Settings → Updates links to +> the latest GitHub Release); Windows and Linux auto-update out of the box. + ## Building installers +Build for the **current OS** (most reliable locally): + ```bash -# macOS — set these in your shell for signed/notarized builds -export APPLE_ID="you@example.com" -export APPLE_APP_SPECIFIC_PASSWORD="xxxx-xxxx-xxxx-xxxx" -export APPLE_TEAM_ID="ABCDE12345" -export CSC_LINK="path/to/DeveloperIDApplication.p12" -export CSC_KEY_PASSWORD="..." +npm run build # installers for this OS → release/ +npm run build:unpacked # unpacked app dir, no installer (fastest) +``` -npm run build:mac # produces release/Lekhini-1.0.0-arm64.dmg (+ x64) -npm run build:win # produces release/Lekhini Setup 1.0.0.exe -npm run build:linux # produces release/Lekhini-1.0.0.AppImage +Per-OS targets (cross-OS locally needs the right toolchains — CI is the +supported path for all three at once): + +```bash +npm run build:mac # release/Lekhini--arm64.dmg (+ x64) + .zip +npm run build:win # release/Lekhini Setup .exe +npm run build:linux # release/Lekhini-.AppImage +npm run build:all # attempt mac + win + linux (-mwl) ``` -Unsigned local builds (no notarization): +Optional **macOS signing + notarization** — set these in your shell (or +as CI secrets) and the build signs automatically; omit them for an +unsigned build: ```bash -npm run build:unpacked +export APPLE_ID="you@example.com" +export APPLE_APP_SPECIFIC_PASSWORD="xxxx-xxxx-xxxx-xxxx" +export APPLE_TEAM_ID="ABCDE12345" +export CSC_LINK="path/to/DeveloperIDApplication.p12" +export CSC_KEY_PASSWORD="..." ``` -GitHub Actions on `macos-14` is the recommended CI target — same -`npm run build` command, with the secrets above set as repo secrets. +**Automated multi-OS builds + releases** run in CI — pushing a `vX.Y.Z` +tag builds macOS / Windows / Linux in parallel and publishes them to +GitHub Releases. See [RELEASING.md](./RELEASING.md); the one-liner is +`npm run release` (patch) / `release:minor` / `release:major`. ## Hard constraint: macOS fullscreen Spaces diff --git a/RELEASING.md b/RELEASING.md index 2778a0e..68030af 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -45,42 +45,61 @@ Bug fixes and small polish that don't change behavior intentionally: - Dependency bumps that don't change behavior. - Documentation-only changes. -## Cutting a release - -1. Make sure `main` is green: `npm run typecheck` and `npm run build` - succeed locally. CI on the release commit must also be green. -2. Decide the version bump (major / minor / patch) per the policy above. -3. Update `CHANGELOG.md`: - - Move items from `[Unreleased]` into a new versioned section. - - Add a dated heading: `## [X.Y.Z] — YYYY-MM-DD`. - - Update the link references at the bottom of the file. -4. Bump `package.json`'s `version` field to the new version. Do NOT - use `npm version` if your workflow doesn't also tag — keep these - steps explicit. -5. Commit: +## Cutting a release (automated) + +Releases are **tag-driven**. One command bumps the version, rolls the +changelog, commits, tags, and pushes — then CI builds every OS and +publishes the GitHub Release. You do not build or upload anything by hand. + +1. Make sure the branch is clean and green (`npm run typecheck`, and CI + on the latest commit is passing). Land all release-worthy changes + first, with notes under `## [Unreleased]` in `CHANGELOG.md`. +2. Run the release script with the bump type: + ```bash + npm run release # patch (X.Y.Z+1) — the default + npm run release:minor # X.Y+1.0 + npm run release:major # X+1.0.0 + # or an exact version: + bash scripts/release.sh 1.4.0 ``` - git add CHANGELOG.md package.json package-lock.json - git commit -m "chore: release vX.Y.Z" - ``` -6. Tag the commit: - ``` - git tag -a vX.Y.Z -m "Lekhini vX.Y.Z" - ``` -7. Push commit and tag: - ``` - git push origin main - git push origin vX.Y.Z - ``` -8. Build the installers (signed where applicable): - ``` - npm run build:mac # produces release/Lekhini-X.Y.Z-arm64.dmg - npm run build:win # produces release/Lekhini Setup X.Y.Z.exe - npm run build:linux # produces release/Lekhini-X.Y.Z.AppImage - ``` -9. Create a GitHub Release from the `vX.Y.Z` tag: - - Title: `vX.Y.Z` - - Body: copy the relevant CHANGELOG section. - - Attach the installers from step 8. + This (see `scripts/release.sh`): + - refuses to run on a dirty tree, + - validates with `npm run prebuild` (typecheck + build), + - bumps `package.json` + `package-lock.json` (no tag yet), + - rolls `CHANGELOG.md`: `[Unreleased]` → a dated `[X.Y.Z]` section and + updates the link refs (`scripts/update-changelog.mjs`), + - commits `chore(release): vX.Y.Z`, tags `vX.Y.Z`, and pushes both. +3. The pushed tag triggers **`.github/workflows/release.yml`**, which: + - builds installers on macOS, Windows, and Linux in parallel + (`npm run release:ci` → `electron-builder --publish always`), + - uploads them plus the `latest*.yml` update manifests to a **draft** + GitHub Release for the tag, + - flips the release **public** once all three OSes succeed. +4. Watch it at . + When green, the release is live and installed apps will auto-update. + +### macOS signing (optional) + +The workflow signs + notarizes macOS builds **only when** these repo +secrets exist; otherwise the macOS build is unsigned (and macOS +auto-update stays disabled until they're added — Windows/Linux are +unaffected): `CSC_LINK`, `CSC_KEY_PASSWORD`, `APPLE_ID`, +`APPLE_APP_SPECIFIC_PASSWORD`, `APPLE_TEAM_ID`. + +### Local build (optional) + +To produce installers without releasing, use `npm run build` (current +OS) or `npm run build:mac|win|linux`. These write to `release/` and do +**not** publish. + +## Auto-update + +Installed apps check GitHub Releases via `electron-updater` +(`src/main/updater.ts`), download in the background, and apply on quit. +Users control this in **Settings → Updates** (toggle, manual check, +restart-to-update). Because the feed is GitHub Releases, **every public +release is automatically an update** for existing installs — so prefer +small, frequent patch releases in the early stage. ## Tag naming diff --git a/docs/AI.md b/docs/AI.md new file mode 100644 index 0000000..b511f06 --- /dev/null +++ b/docs/AI.md @@ -0,0 +1,136 @@ +# AI in Lekhini + +Lekhini's AI is **local-first, private, and entirely opt-in**. Nothing +AI-related runs until you turn it on, and with Local AI enabled your +snips and text never leave your machine. Cloud providers are an optional +fallback you configure with your own API key. + +This document explains what the AI features do, how routing works, how to +configure each provider, where keys live, and how it's wired in code. + +--- + +## What you can do + +| Feature | Where | What it does | +| --- | --- | --- | +| **Ask AI** | Snip → **Ask AI** | Opens a chat about the captured region and **solves/answers** it (math, code, a question, an error) rather than just describing it. | +| **Follow-up chat** | Chat composer | Keeps the full conversation — the image (or its OCR text) is carried across turns — until a new snip starts a fresh conversation. | +| **Autocorrect (typed)** | Settings → AI | Cleans grammar/spelling of typed text. | +| **Autocorrect (drawn)** | Settings → AI | Cleans recognized handwriting. | +| **Handwriting recognition** | Draw, then pause | Transcribes drawn ink to a text shape via a vision model. | +| **Trader analysis** | Trader profile → Analyze | Sends your drawn levels (as numbers) for a written read. | +| **On-device learning** | automatic | Remembers accepted corrections locally (RAG) to personalize future suggestions. | + +--- + +## How routing works (local-first) + +A single resolver — `resolveProvider()` in +[`src/main/ai/ipc.ts`](../src/main/ai/ipc.ts) — decides who serves each +request: + +1. **Local first.** If **Local AI** is on, the Ollama service is running, + and a suitable model is installed, the request goes to Ollama. For an + image request it picks a **vision** model; for text, a **text** model. + Selection order: per-profile override → global default → the + profile's catalogue default → any installed model of the right kind. +2. **Cloud fallback.** Otherwise, if a cloud provider is configured (has a + saved API key and is the active provider), the request goes there. +3. **Nothing configured → no AI.** If neither is available, AI entry + points stay hidden/disabled and the chat shows a "set something up" + message. You must configure a provider before any AI feature works. + +The renderer stays provider-agnostic: it sends a request and subscribes +to streamed chunks; the resolver picks local-vs-cloud and the concrete +model server-side. + +--- + +## Providers + +| Provider | Kind | Vision | Notes | +| --- | --- | --- | --- | +| **Ollama (Local)** | on-device | ✅ | Default. Private, free, no key. Models run via the local [Ollama](https://ollama.com) service. | +| **Anthropic Claude** | cloud | ✅ | Native vision. | +| **OpenAI** | cloud | ✅ | Native vision (`image_url`). | +| **Google Gemini** | cloud | ✅ | Native vision (`inlineData`). | +| **DeepSeek** | cloud | ❌ text-only | Strong reasoning; image snips are answered from text alone. Use a vision provider for image Q&A. | +| **Sarvam AI** | cloud | ✅ (OCR→LLM) | Runs **Sarvam Vision** document OCR on the image, then solves with Sarvam's own chat model. Excellent for Indic + dense text. | + +**Sarvam's two-step pipeline** ([`src/main/ai/sarvam.ts`](../src/main/ai/sarvam.ts)): +the snip PNG is wrapped in a zip and sent to Sarvam's job-based Document +Intelligence (Vision) OCR; the extracted text is then embedded into the +chat and solved by `sarvam-m` / `sarvam-30b` / `sarvam-105b`. OCR runs +**once per conversation** (cached by session), so follow-ups are fast. + +--- + +## Configuring it (Settings → AI) + +### Local AI (recommended) + +1. Toggle **Local AI (Ollama)** on. A first-run wizard checks for Ollama, + links you to install it if missing, starts the service, and downloads + a recommended model set (a small text model + a vision model + the + embedding model for learning). +2. Optionally pin a **text** and **vision** model per profile, or install + extra models from the catalogue. + +Everything here stays on your device. + +### Cloud provider (optional fallback) + +1. Pick a provider under **Cloud fallback** and paste its API key, then + **Save**. **Test** confirms the round-trip. +2. The provider/model you save becomes the active fallback used when + Local AI is off or has no suitable model installed. + +Get a key: Anthropic / OpenAI / Gemini / DeepSeek / Sarvam consoles are +linked from each provider's row. + +### Other settings + +- **Autocorrect typed / drawn** — independent toggles (default off). +- **Profile prompts** — override the built-in system prompt per profile. +- **Default text font** — for newly created text shapes. +- **Learning** — view/reset the on-device example store per profile. + +--- + +## Privacy & where keys live + +- **Local AI**: text and images stay on your machine — nothing is sent to + a server. +- **Cloud providers**: content goes directly to the provider you chose, + under its own data policy. Lekhini does not proxy or log it. +- **API keys** are never stored in the plaintext settings file. They're + encrypted with the OS keychain (macOS Keychain / Windows DPAPI / + libsecret) via [`src/main/ai/credentials.ts`](../src/main/ai/credentials.ts), + in a sidecar `ai-credentials.json` decryptable only by your OS user. +- **Learning (RAG)** examples are stored only on this device. + +--- + +## Code map + +| Concern | File | +| --- | --- | +| Provider interface | [`src/main/ai/types.ts`](../src/main/ai/types.ts) | +| Per-provider adapters | `src/main/ai/{anthropic,openai,gemini,deepseek,sarvam,ollama}.ts` | +| Adapter registry (models, labels, key URLs) | [`src/main/ai/registry.ts`](../src/main/ai/registry.ts) | +| Resolver + IPC + session image cache | [`src/main/ai/ipc.ts`](../src/main/ai/ipc.ts) | +| Shared message assembly (history + first-turn image) | [`src/main/ai/messages.ts`](../src/main/ai/messages.ts) | +| Local Ollama service + model catalogue | `src/main/ai/ollama*.ts` | +| On-device learning (RAG) | `src/main/ai/rag.ts`, `ragIpc.ts` | +| Encrypted API key store | [`src/main/ai/credentials.ts`](../src/main/ai/credentials.ts) | +| Chat UI | [`src/renderer/toolbar/ChatPanel.tsx`](../src/renderer/toolbar/ChatPanel.tsx) | +| Settings → AI UI | [`src/renderer/toolbar/App.tsx`](../src/renderer/toolbar/App.tsx) | + +**Adding a provider**: implement `ProviderAdapter.ask()` in a new +`src/main/ai/.ts`, add the id to `ProviderId` in +[`src/shared/types.ts`](../src/shared/types.ts), register it in +`registry.ts` (adapter + models + label + key URL), and add it to the +cloud-provider checks in `ipc.ts`, the persistence validation in +`hub.ts`, and the Settings maps in the toolbar `App.tsx`. The renderer, +preload, and message assembly need no changes. diff --git a/electron-builder.yml b/electron-builder.yml index edb8b6a..1ea4c66 100644 --- a/electron-builder.yml +++ b/electron-builder.yml @@ -3,6 +3,18 @@ productName: Lekhini copyright: Copyright © 2026 Open Source Bharat — https://opensourcebharat.org asar: true +# Auto-update feed + release target. electron-builder bakes this into +# app-update.yml inside the build and, when run with --publish, uploads +# the installers plus the latest*.yml manifests to GitHub Releases. +# electron-updater reads the same feed at runtime to discover updates. +publish: + provider: github + owner: opensourcebharat + repo: lekhini + # Upload to a DRAFT release; the release workflow flips it public only + # after every OS finishes, so users never see a half-uploaded release. + releaseType: draft + # Single source-of-truth for the app icon. electron-builder auto- # generates the platform-specific .icns (macOS) and .ico (Windows) # from this PNG. Recommended: 1024×1024, transparent background. @@ -17,6 +29,12 @@ files: - dist-electron - package.json +# Bundled per-profile RAG seed examples ("intent files"), read at +# runtime from process.resourcesPath/intent. +extraResources: + - from: resources/intent + to: intent + mac: category: public.app-category.productivity target: @@ -24,6 +42,14 @@ mac: arch: - arm64 - x64 + # zip is what Squirrel.Mac consumes for auto-update; ship it + # alongside the dmg so updates work once the build is signed + + # notarized. (Unsigned builds still produce it; macOS just won't + # auto-apply until signing is configured.) + - target: zip + arch: + - arm64 + - x64 hardenedRuntime: true gatekeeperAssess: false entitlements: build/entitlements.mac.plist diff --git a/package-lock.json b/package-lock.json index 09e8166..99fb3fa 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,9 +9,16 @@ "version": "1.0.0", "license": "MIT", "dependencies": { + "@anthropic-ai/sdk": "^0.97.1", + "@google/generative-ai": "^0.24.1", "active-win": "^8.2.1", "electron-store": "^10.0.0", + "electron-updater": "^6.8.3", + "jszip": "^3.10.1", + "marked": "^18.0.4", + "openai": "^6.38.0", "perfect-freehand": "^1.2.2", + "sarvamai": "^1.1.7", "solid-js": "^1.8.22", "zustand": "^4.5.5" }, @@ -27,6 +34,27 @@ "vite-plugin-solid": "^2.10.2" } }, + "node_modules/@anthropic-ai/sdk": { + "version": "0.97.1", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.97.1.tgz", + "integrity": "sha512-wOf7AUeJPitcVpvKO4UMu63mWH5SaVipkGd7OOQJt/G6VYGlV8D2Gp9dLxOrttDJh/9gqPqdaBwDGcBevumeAg==", + "license": "MIT", + "dependencies": { + "json-schema-to-ts": "^3.1.1", + "standardwebhooks": "^1.0.0" + }, + "bin": { + "anthropic-ai-sdk": "bin/cli" + }, + "peerDependencies": { + "zod": "^3.25.0 || ^4.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, "node_modules/@babel/code-frame": { "version": "7.29.0", "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz", @@ -282,6 +310,15 @@ "@babel/core": "^7.0.0-0" } }, + "node_modules/@babel/runtime": { + "version": "7.29.2", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.29.2.tgz", + "integrity": "sha512-JiDShH45zKHWyGe4ZNVRrCjBz8Nh9TMmZG1kh4QTK8hCBTWBi8Da+i7s1fJw7/lYpM4ccepSNfqzZ/QvABBi5g==", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, "node_modules/@babel/template": { "version": "7.28.6", "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz", @@ -1111,6 +1148,15 @@ "devOptional": true, "license": "MIT" }, + "node_modules/@google/generative-ai": { + "version": "0.24.1", + "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz", + "integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==", + "license": "Apache-2.0", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@isaacs/cliui": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", @@ -1805,6 +1851,12 @@ "url": "https://github.com/sindresorhus/is?sponsor=1" } }, + "node_modules/@stablelib/base64": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@stablelib/base64/-/base64-1.0.1.tgz", + "integrity": "sha512-1bnPQqSxSuc3Ii6MhBysoWCg58j97aUjuCSZrGSmDxNqtytIi0k8utUenAwTZN4V5mXXYGsVUI9zeBqy+jBOSQ==", + "license": "MIT" + }, "node_modules/@szmarczak/http-timer": { "version": "4.0.6", "resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-4.0.6.tgz", @@ -2365,7 +2417,6 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true, "license": "Python-2.0" }, "node_modules/assert-plus": { @@ -3280,7 +3331,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", "integrity": "sha512-3lqz5YjWTYnW6dlDa5TLaTCcShfar1e40rmcJVwCBJC6mWlFuj0eCHIElmG1g5kyuJ/GD+8Wn4FFCcz4gJPfaQ==", - "dev": true, "license": "MIT" }, "node_modules/crc": { @@ -3363,7 +3413,6 @@ "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", - "devOptional": true, "license": "MIT", "dependencies": { "ms": "^2.1.3" @@ -3924,6 +3973,82 @@ "dev": true, "license": "ISC" }, + "node_modules/electron-updater": { + "version": "6.8.3", + "resolved": "https://registry.npmjs.org/electron-updater/-/electron-updater-6.8.3.tgz", + "integrity": "sha512-Z6sgw3jgbikWKXei1ENdqFOxBP0WlXg3TtKfz0rgw2vIZFJUyI4pD7ZN7jrkm7EoMK+tcm/qTnPUdqfZukBlBQ==", + "license": "MIT", + "dependencies": { + "builder-util-runtime": "9.5.1", + "fs-extra": "^10.1.0", + "js-yaml": "^4.1.0", + "lazy-val": "^1.0.5", + "lodash.escaperegexp": "^4.1.2", + "lodash.isequal": "^4.5.0", + "semver": "~7.7.3", + "tiny-typed-emitter": "^2.1.0" + } + }, + "node_modules/electron-updater/node_modules/builder-util-runtime": { + "version": "9.5.1", + "resolved": "https://registry.npmjs.org/builder-util-runtime/-/builder-util-runtime-9.5.1.tgz", + "integrity": "sha512-qt41tMfgHTllhResqM5DcnHyDIWNgzHvuY2jDcYP9iaGpkWxTUzV6GQjDeLnlR1/DtdlcsWQbA7sByMpmJFTLQ==", + "license": "MIT", + "dependencies": { + "debug": "^4.3.4", + "sax": "^1.2.4" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/electron-updater/node_modules/fs-extra": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz", + "integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==", + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/electron-updater/node_modules/jsonfile": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.2.1.tgz", + "integrity": "sha512-zwOTdL3rFQ/lRdBnntKVOX6k5cKJwEc1HdilT71BWEu7J41gXIB2MRp+vxduPSwZJPWBxEzv4yH1wYLJGUHX4Q==", + "license": "MIT", + "dependencies": { + "universalify": "^2.0.0" + }, + "optionalDependencies": { + "graceful-fs": "^4.1.6" + } + }, + "node_modules/electron-updater/node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/electron-updater/node_modules/universalify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz", + "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==", + "license": "MIT", + "engines": { + "node": ">= 10.0.0" + } + }, "node_modules/electron/node_modules/@types/node": { "version": "20.19.41", "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.41.tgz", @@ -4163,6 +4288,12 @@ "dev": true, "license": "MIT" }, + "node_modules/fast-sha256": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/fast-sha256/-/fast-sha256-1.3.0.tgz", + "integrity": "sha512-n11RGP/lrWEFI/bWdygLxhI+pVeo1ZYIVwvvPkW7azl/rOy+F3HYRZ2K5zeE9mmkhQppyv9sQFx0JM9UabnpPQ==", + "license": "Unlicense" + }, "node_modules/fast-uri": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz", @@ -4574,7 +4705,6 @@ "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", - "devOptional": true, "license": "ISC" }, "node_modules/has-flag": { @@ -4802,6 +4932,12 @@ ], "license": "BSD-3-Clause" }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==", + "license": "MIT" + }, "node_modules/imurmurhash": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", @@ -4845,7 +4981,6 @@ "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", - "devOptional": true, "license": "ISC" }, "node_modules/ip-address": { @@ -4928,9 +5063,7 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==", - "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/isbinaryfile": { "version": "5.0.7", @@ -4997,7 +5130,6 @@ "version": "4.1.1", "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", - "dev": true, "license": "MIT", "dependencies": { "argparse": "^2.0.1" @@ -5026,6 +5158,19 @@ "dev": true, "license": "MIT" }, + "node_modules/json-schema-to-ts": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz", + "integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.3", + "ts-algebra": "^2.0.0" + }, + "engines": { + "node": ">=16" + } + }, "node_modules/json-schema-traverse": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", @@ -5070,6 +5215,48 @@ "graceful-fs": "^4.1.6" } }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "license": "(MIT OR GPL-3.0-or-later)", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, + "node_modules/jszip/node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "license": "MIT", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/jszip/node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "license": "MIT" + }, + "node_modules/jszip/node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, "node_modules/keyv": { "version": "4.5.4", "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", @@ -5084,7 +5271,6 @@ "version": "1.0.5", "resolved": "https://registry.npmjs.org/lazy-val/-/lazy-val-1.0.5.tgz", "integrity": "sha512-0/BnGCCfyUMkBpeDgWihanIAF9JmZhHBgUhEqzvf+adhNGLoP6TaiI5oF8oyb3I45P+PcnrqihSf01M0l0G5+Q==", - "dev": true, "license": "MIT" }, "node_modules/lazystream": { @@ -5137,6 +5323,15 @@ "safe-buffer": "~5.1.0" } }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "license": "MIT", + "dependencies": { + "immediate": "~3.0.5" + } + }, "node_modules/lodash": { "version": "4.18.1", "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz", @@ -5160,6 +5355,12 @@ "license": "MIT", "peer": true }, + "node_modules/lodash.escaperegexp": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/lodash.escaperegexp/-/lodash.escaperegexp-4.1.2.tgz", + "integrity": "sha512-TM9YBvyC84ZxE3rgfefxUWiQKLilstD6k7PTGt6wfbtXF8ixIJLOL3VYyV/z+ZiPLsVxAsKAFVwWlWeb2Y8Yyw==", + "license": "MIT" + }, "node_modules/lodash.flatten": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz", @@ -5168,6 +5369,13 @@ "license": "MIT", "peer": true }, + "node_modules/lodash.isequal": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.isequal/-/lodash.isequal-4.5.0.tgz", + "integrity": "sha512-pDo3lu8Jhfjqls6GkMgpahsF9kCyayhgykjyLMNFTKWrpVdAQtYyB4muAMWozBB4ig/dtWAmsMxLEI8wuz+DYQ==", + "deprecated": "This package is deprecated. Use require('node:util').isDeepStrictEqual instead.", + "license": "MIT" + }, "node_modules/lodash.isplainobject": { "version": "4.0.6", "resolved": "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz", @@ -5316,6 +5524,18 @@ "node": ">=12" } }, + "node_modules/marked": { + "version": "18.0.4", + "resolved": "https://registry.npmjs.org/marked/-/marked-18.0.4.tgz", + "integrity": "sha512-c/BTaKzg0G6ezQx97DAkYU7k0HM6ys0FqYeKBL6hlBByZwy+ycA1+f0vDdjMHKKeEjdgkx0GOv9Il6D+85cOqA==", + "license": "MIT", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 20" + } + }, "node_modules/matcher": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/matcher/-/matcher-3.0.0.tgz", @@ -5564,7 +5784,6 @@ "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "devOptional": true, "license": "MIT" }, "node_modules/nanoid": { @@ -5850,6 +6069,27 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/openai": { + "version": "6.38.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.38.0.tgz", + "integrity": "sha512-AoMplt2UalrpgUDMh3L09QWjNRlgJPipclQvA6sYAaeF6nHNBMgmikAZGmcYLn8on4d9sQY9Q8bOLfrBS7Lc8g==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, "node_modules/ora": { "version": "5.4.1", "resolved": "https://registry.npmjs.org/ora/-/ora-5.4.1.tgz", @@ -5923,6 +6163,12 @@ "dev": true, "license": "BlueOak-1.0.0" }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", + "license": "(MIT AND Zlib)" + }, "node_modules/parse5": { "version": "7.3.0", "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", @@ -6089,9 +6335,7 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", - "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/progress": { "version": "2.0.3", @@ -6440,11 +6684,21 @@ "truncate-utf8-bytes": "^1.0.0" } }, + "node_modules/sarvamai": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/sarvamai/-/sarvamai-1.1.7.tgz", + "integrity": "sha512-xROqWjLB2jajV+CFzMUAS913poGgpVP0t/ELwn4cAVNXX4dju/LcP8VrOWzIeU0LTK9n2M00UQEriJTYNrIrJw==", + "dependencies": { + "ws": "^8.16.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/sax": { "version": "1.6.0", "resolved": "https://registry.npmjs.org/sax/-/sax-1.6.0.tgz", "integrity": "sha512-6R3J5M4AcbtLUdZmRv2SygeVaM7IhrLXu9BmnOGmmACak8fiUtOsYNWUS4uK7upbmHIBbLBeFeI//477BKLBzA==", - "dev": true, "license": "BlueOak-1.0.0", "engines": { "node": ">=11.0.0" @@ -6529,6 +6783,12 @@ "devOptional": true, "license": "ISC" }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==", + "license": "MIT" + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -6720,6 +6980,16 @@ "node": "^12.13.0 || ^14.15.0 || >=16.0.0" } }, + "node_modules/standardwebhooks": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/standardwebhooks/-/standardwebhooks-1.0.0.tgz", + "integrity": "sha512-BbHGOQK9olHPMvQNHWul6MYlrRTAOKn03rOe4A8O3CLWhNf4YHBqq2HJKKC+sfqpxiBY52pNeesD6jIiLDz8jg==", + "license": "MIT", + "dependencies": { + "@stablelib/base64": "^1.0.0", + "fast-sha256": "^1.3.0" + } + }, "node_modules/stat-mode": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/stat-mode/-/stat-mode-1.0.0.tgz", @@ -6935,6 +7205,12 @@ "node": ">= 10.0.0" } }, + "node_modules/tiny-typed-emitter": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/tiny-typed-emitter/-/tiny-typed-emitter-2.1.0.tgz", + "integrity": "sha512-qVtvMxeXbVej0cQWKqVSSAHmKZEHAvxdF8HEUBFWts8h+xEo5m/lEiPakuyZ3BnCBjOD8i24kzNOiOLLgsSxhA==", + "license": "MIT" + }, "node_modules/tmp": { "version": "0.2.5", "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.5.tgz", @@ -6972,6 +7248,12 @@ "utf8-byte-length": "^1.0.1" } }, + "node_modules/ts-algebra": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz", + "integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==", + "license": "MIT" + }, "node_modules/type-fest": { "version": "4.41.0", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz", @@ -7114,7 +7396,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", - "devOptional": true, "license": "MIT" }, "node_modules/verror": { @@ -7364,6 +7645,27 @@ "devOptional": true, "license": "ISC" }, + "node_modules/ws": { + "version": "8.21.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.21.0.tgz", + "integrity": "sha512-Vsp28b7DRcimFQvrqu2Wek3z1iYxDCWqHYB8Qsnk/S4RfaCQzPGPyBNuVjJV3cd6UiKtUtp6sNM77gWvzcCH+g==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "node_modules/xmlbuilder": { "version": "15.1.1", "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-15.1.1.tgz", diff --git a/package.json b/package.json index 1de772f..aadac90 100644 --- a/package.json +++ b/package.json @@ -22,18 +22,29 @@ "prebuild": "tsc --noEmit && vite build", "build": "npm run prebuild && electron-builder", "build:unpacked": "npm run prebuild && electron-builder --dir", - "build:mac": "npm run prebuild && electron-builder --mac", - "build:win": "npm run prebuild && electron-builder --win", + "build:mac": "npm run prebuild && electron-builder --mac", + "build:win": "npm run prebuild && electron-builder --win", "build:linux": "npm run prebuild && electron-builder --linux", - "build:all": "npm run prebuild && electron-builder -mwl", + "build:all": "npm run prebuild && electron-builder -mwl", + "release:ci": "npm run prebuild && electron-builder --publish always", + "release": "bash scripts/release.sh", + "release:minor": "bash scripts/release.sh minor", + "release:major": "bash scripts/release.sh major", "typecheck": "tsc --noEmit", "format": "prettier --write .", "fix:electron": "bash scripts/fix-electron.sh" }, "dependencies": { + "@anthropic-ai/sdk": "^0.97.1", + "@google/generative-ai": "^0.24.1", "active-win": "^8.2.1", "electron-store": "^10.0.0", + "electron-updater": "^6.8.3", + "jszip": "^3.10.1", + "marked": "^18.0.4", + "openai": "^6.38.0", "perfect-freehand": "^1.2.2", + "sarvamai": "^1.1.7", "solid-js": "^1.8.22", "zustand": "^4.5.5" }, diff --git a/resources/intent/general.jsonl b/resources/intent/general.jsonl new file mode 100644 index 0000000..62e75ed --- /dev/null +++ b/resources/intent/general.jsonl @@ -0,0 +1,6 @@ +{"original": "i dont no where its at", "corrected": "I don't know where it is."} +{"original": "she dont have no time for this", "corrected": "She doesn't have any time for this."} +{"original": "their going to they're house later", "corrected": "They're going to their house later."} +{"original": "recieve the package tommorow", "corrected": "Receive the package tomorrow."} +{"original": "me and him went to the store", "corrected": "He and I went to the store."} +{"original": "its definately gonna happen", "corrected": "It's definitely going to happen."} diff --git a/resources/intent/teacher.jsonl b/resources/intent/teacher.jsonl new file mode 100644 index 0000000..9752c62 --- /dev/null +++ b/resources/intent/teacher.jsonl @@ -0,0 +1,5 @@ +{"original": "the mitochondria is the power house of cell", "corrected": "The mitochondria are the powerhouse of the cell."} +{"original": "photosynthesis make food in plants", "corrected": "Photosynthesis makes food in plants."} +{"original": "gravity pull things down to the ground", "corrected": "Gravity pulls things down toward the ground."} +{"original": "their are three states of matter", "corrected": "There are three states of matter."} +{"original": "the equation balance on both side", "corrected": "The equation balances on both sides."} diff --git a/resources/intent/trader.jsonl b/resources/intent/trader.jsonl new file mode 100644 index 0000000..d4c8bd5 --- /dev/null +++ b/resources/intent/trader.jsonl @@ -0,0 +1,5 @@ +{"original": "price broke the resistence and going up", "corrected": "Price broke the resistance and is moving up."} +{"original": "its a bullish trend with higher high", "corrected": "It's a bullish trend with higher highs."} +{"original": "support hold at the fib level", "corrected": "Support held at the Fibonacci level."} +{"original": "the candle close above the moving avg", "corrected": "The candle closed above the moving average."} +{"original": "volume is increasing on the breakout", "corrected": "Volume is increasing on the breakout."} diff --git a/scripts/release.sh b/scripts/release.sh new file mode 100755 index 0000000..e7834f3 --- /dev/null +++ b/scripts/release.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Cut a release: validate, bump the version, roll the CHANGELOG, commit, +# tag, and push. The pushed tag triggers .github/workflows/release.yml, +# which builds macOS / Windows / Linux installers and publishes them to +# GitHub Releases. This script does NOT build installers itself. +# +# Usage: +# scripts/release.sh [patch|minor|major|] (default: patch) +# Or via npm: +# npm run release # patch +# npm run release:minor +# npm run release:major + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT" + +BUMP="${1:-patch}" + +# 1. Preconditions — clean tree so the release commit is exactly the +# version + changelog change and nothing else. +if [ -n "$(git status --porcelain)" ]; then + echo "✗ Working tree not clean. Commit or stash changes first." >&2 + exit 1 +fi +BRANCH="$(git rev-parse --abbrev-ref HEAD)" +echo "→ Releasing from '$BRANCH' (bump: $BUMP)" + +# 2. Validate before tagging anything (typecheck + full vite build). +echo "→ Validating build (npm run prebuild)…" +npm run prebuild + +# 3. Bump package.json + lockfile WITHOUT committing or tagging — we do +# those explicitly so the CHANGELOG lands in the same commit. +npm version "$BUMP" --no-git-tag-version >/dev/null +VERSION="$(node -p "require('./package.json').version")" +TAG="v$VERSION" +echo "→ New version: $VERSION" + +if git rev-parse "$TAG" >/dev/null 2>&1; then + echo "✗ Tag $TAG already exists. Never re-use a tag — bump again." >&2 + exit 1 +fi + +# 4. Roll CHANGELOG: [Unreleased] → dated [VERSION] section + links. +node scripts/update-changelog.mjs "$VERSION" + +# 5. Commit, tag, push. CI takes it from here. +git add package.json package-lock.json CHANGELOG.md +git commit -m "chore(release): $TAG" +git tag -a "$TAG" -m "Lekhini $TAG" +echo "→ Pushing '$BRANCH' and tag '$TAG'…" +git push origin "$BRANCH" +git push origin "$TAG" + +echo "✓ $TAG pushed. GitHub Actions will build + publish the release." +echo " Watch: https://github.com/opensourcebharat/lekhini/actions" diff --git a/scripts/update-changelog.mjs b/scripts/update-changelog.mjs new file mode 100644 index 0000000..5be598f --- /dev/null +++ b/scripts/update-changelog.mjs @@ -0,0 +1,47 @@ +// Roll CHANGELOG.md for a release: move the "[Unreleased]" heading down +// into a new dated "[VERSION]" section (leaving Unreleased empty for the +// next cycle) and update the link references at the bottom of the file. +// Invoked by scripts/release.sh with the freshly-bumped version. +// +// Usage: node scripts/update-changelog.mjs + +import { readFileSync, writeFileSync } from 'node:fs'; + +const version = process.argv[2]; +if (!version) { + console.error('usage: node scripts/update-changelog.mjs '); + process.exit(1); +} + +const REPO = 'https://github.com/opensourcebharat/lekhini'; +const path = new URL('../CHANGELOG.md', import.meta.url); +let md = readFileSync(path, 'utf8'); + +const today = new Date().toISOString().slice(0, 10); // YYYY-MM-DD (UTC) + +const UNRELEASED = '## [Unreleased]'; +if (!md.includes(UNRELEASED)) { + console.error('CHANGELOG.md has no "## [Unreleased]" section.'); + process.exit(1); +} + +// 1. Insert the dated version heading just below [Unreleased], keeping +// an empty Unreleased section at the top. +md = md.replace(UNRELEASED, `${UNRELEASED}\n\n## [${version}] — ${today}`); + +// 2. Find the previous version from the existing Unreleased compare link +// so we can build a proper compare range for the new one. +const prevMatch = md.match( + /\[Unreleased\]:\s*\S+\/compare\/v(\d+\.\d+\.\d+(?:-[\w.]+)?)\.\.\.HEAD/, +); +const prev = prevMatch ? prevMatch[1] : null; + +// 3. Repoint [Unreleased] at the new version and add the version link. +md = md.replace(/\[Unreleased\]:.*$/m, `[Unreleased]: ${REPO}/compare/v${version}...HEAD`); +const versionLink = prev + ? `[${version}]: ${REPO}/compare/v${prev}...v${version}` + : `[${version}]: ${REPO}/releases/tag/v${version}`; +md = md.replace(/(\[Unreleased\]:.*$)/m, `$1\n${versionLink}`); + +writeFileSync(path, md); +console.log(`CHANGELOG.md rolled for v${version} (${today}).`); diff --git a/src/main/ai/anthropic.ts b/src/main/ai/anthropic.ts new file mode 100644 index 0000000..dc0c36d --- /dev/null +++ b/src/main/ai/anthropic.ts @@ -0,0 +1,70 @@ +import Anthropic from '@anthropic-ai/sdk'; +import type { AskInput } from '../../shared/types'; +import type { ProviderAdapter } from './types'; +import { assembleTurns } from './messages'; + +// The Anthropic SDK's MessageParam type is stricter than what's +// useful at our boundary (media_type is a literal union; content is +// a discriminated union per role). We build the array structurally +// and cast at the call site — the runtime shape matches the SDK +// expectations exactly. Stream shape documented at +// https://docs.anthropic.com/en/api/messages-streaming. + +const MAX_TOKENS = 2048; + +// Anthropic only accepts these image MIME types — coerce so the SDK +// doesn't reject. The user can only produce PNGs from snip today, so +// the runtime path is always 'image/png'. +function normaliseMime(mime: string): 'image/png' | 'image/jpeg' | 'image/gif' | 'image/webp' { + if (mime === 'image/jpeg' || mime === 'image/gif' || mime === 'image/webp') return mime; + return 'image/png'; +} + +function buildMessages(input: AskInput): Anthropic.MessageParam[] { + // The image rides the FIRST user turn so it stays in context across + // follow-ups; all other turns are plain text. + const { turns, firstUserIdx } = assembleTurns(input); + return turns.map((t, i): Anthropic.MessageParam => { + if (input.image && i === firstUserIdx) { + return { + role: 'user', + content: [ + { + type: 'image', + source: { + type: 'base64', + media_type: normaliseMime(input.image.mime), + data: input.image.base64, + }, + }, + { type: 'text', text: t.content }, + ], + }; + } + return { role: t.role, content: t.content }; + }); +} + +export const anthropic: ProviderAdapter = { + id: 'anthropic', + async *ask(input, apiKey, signal) { + const client = new Anthropic({ apiKey }); + const stream = client.messages.stream( + { + model: input.model, + max_tokens: MAX_TOKENS, + system: input.systemPrompt, + messages: buildMessages(input), + }, + { signal }, + ); + for await (const event of stream) { + if ( + event.type === 'content_block_delta' && + event.delta.type === 'text_delta' + ) { + yield event.delta.text; + } + } + }, +}; diff --git a/src/main/ai/credentials.ts b/src/main/ai/credentials.ts new file mode 100644 index 0000000..74f26af --- /dev/null +++ b/src/main/ai/credentials.ts @@ -0,0 +1,106 @@ +import { app, safeStorage } from 'electron'; +import fs from 'node:fs'; +import path from 'node:path'; +import type { ProviderId } from '../../shared/types'; + +// API keys live OUTSIDE PersistedState (which is plaintext electron-store +// JSON). Each key is encrypted with Electron's safeStorage and stashed +// in a tiny sidecar file in userData/. safeStorage uses the platform +// keychain underneath: macOS Keychain, Windows DPAPI, libsecret on +// Linux. Decryption only succeeds for the same OS user account — so a +// stolen config.json doesn't yield the keys. +// +// File format on disk: +// /ai-credentials.json +// { +// "anthropic": "", +// "openai": "", +// "gemini": "" +// } +// +// In-memory fallback: if safeStorage.isEncryptionAvailable() returns +// false (rare — would happen on a freshly-installed Linux without +// libsecret), keys live in process memory only and are LOST when the +// app quits. We log a clear warning and the renderer surfaces that +// state in the AI settings UI. + +const FILE_NAME = 'ai-credentials.json'; + +let memoryFallback: Partial> | null = null; + +function filePath(): string { + return path.join(app.getPath('userData'), FILE_NAME); +} + +function readStore(): Record { + try { + const raw = fs.readFileSync(filePath(), 'utf-8'); + const parsed = JSON.parse(raw); + return parsed && typeof parsed === 'object' ? parsed : {}; + } catch { + return {}; + } +} + +function writeStore(store: Record): void { + try { + fs.writeFileSync(filePath(), JSON.stringify(store), { mode: 0o600 }); + } catch (err) { + console.warn('[pen] failed to persist AI credentials store', err); + } +} + +export function encryptionAvailable(): boolean { + return safeStorage.isEncryptionAvailable(); +} + +export function setKey(provider: ProviderId, key: string): void { + const trimmed = key.trim(); + if (!encryptionAvailable()) { + if (!memoryFallback) memoryFallback = {}; + memoryFallback[provider] = trimmed; + console.warn( + '[pen] safeStorage unavailable; AI key for', + provider, + 'held in process memory only (will be lost on quit)', + ); + return; + } + const cipher = safeStorage.encryptString(trimmed).toString('base64'); + const store = readStore(); + store[provider] = cipher; + writeStore(store); +} + +export function getKey(provider: ProviderId): string | null { + if (!encryptionAvailable()) { + return memoryFallback?.[provider] ?? null; + } + const store = readStore(); + const cipher = store[provider]; + if (!cipher) return null; + try { + return safeStorage.decryptString(Buffer.from(cipher, 'base64')); + } catch (err) { + console.warn('[pen] failed to decrypt AI key for', provider, err); + return null; + } +} + +export function hasKey(provider: ProviderId): boolean { + if (!encryptionAvailable()) { + return Boolean(memoryFallback?.[provider]); + } + const store = readStore(); + return typeof store[provider] === 'string' && store[provider].length > 0; +} + +export function deleteKey(provider: ProviderId): void { + if (!encryptionAvailable()) { + if (memoryFallback) delete memoryFallback[provider]; + return; + } + const store = readStore(); + delete store[provider]; + writeStore(store); +} diff --git a/src/main/ai/deepseek.ts b/src/main/ai/deepseek.ts new file mode 100644 index 0000000..422e44f --- /dev/null +++ b/src/main/ai/deepseek.ts @@ -0,0 +1,53 @@ +import OpenAI from 'openai'; +import type { AskInput } from '../../shared/types'; +import type { ProviderAdapter } from './types'; +import { assembleTurns } from './messages'; + +// DeepSeek exposes an OpenAI-compatible API at api.deepseek.com, so we +// reuse the OpenAI SDK with a different baseURL. Its chat models +// (deepseek-chat = V3, deepseek-reasoner = R1) are TEXT-ONLY — they +// reject image inputs — so we never attach the snip here. An image +// session that resolves to DeepSeek is answered from the text alone; +// for image Q&A the resolver prefers a local vision model or a +// vision-capable cloud provider (Claude / GPT-4o / Gemini). + +const MAX_TOKENS = 2048; +const BASE_URL = 'https://api.deepseek.com'; + +type Message = + | { role: 'system'; content: string } + | { role: 'user'; content: string } + | { role: 'assistant'; content: string }; + +function buildMessages(input: AskInput): Message[] { + // No image part — DeepSeek can't see it — but we still replay the full + // conversation so follow-ups keep their textual context. Empty opening + // turns fall back to the solve-oriented default inside assembleTurns. + const out: Message[] = [{ role: 'system', content: input.systemPrompt }]; + const { turns } = assembleTurns(input); + for (const t of turns) out.push({ role: t.role, content: t.content }); + return out; +} + +export const deepseek: ProviderAdapter = { + id: 'deepseek', + async *ask(input, apiKey, signal) { + const client = new OpenAI({ apiKey, baseURL: BASE_URL }); + const stream = await client.chat.completions.create( + { + model: input.model, + max_tokens: MAX_TOKENS, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + messages: buildMessages(input) as any, + stream: true, + }, + { signal }, + ); + for await (const chunk of stream) { + const delta = chunk.choices?.[0]?.delta?.content; + if (typeof delta === 'string' && delta.length > 0) { + yield delta; + } + } + }, +}; diff --git a/src/main/ai/gemini.ts b/src/main/ai/gemini.ts new file mode 100644 index 0000000..4b0287c --- /dev/null +++ b/src/main/ai/gemini.ts @@ -0,0 +1,50 @@ +import { GoogleGenerativeAI } from '@google/generative-ai'; +import type { AskInput } from '../../shared/types'; +import type { ProviderAdapter } from './types'; +import { assembleTurns } from './messages'; + +// Gemini's generateContentStream API takes content parts as either +// text or inlineData (base64 with mimeType). The streaming response +// gives chunks where each .text() returns the new delta. The system +// prompt is passed via `systemInstruction` on the model — separate +// from the messages. + +type GeminiPart = { text: string } | { inlineData: { data: string; mimeType: string } }; + +type GeminiContent = { role: 'user' | 'model'; parts: GeminiPart[] }; + +function roleFor(role: 'user' | 'assistant'): 'user' | 'model' { + return role === 'assistant' ? 'model' : 'user'; +} + +function buildContents(input: AskInput): GeminiContent[] { + // Image attaches to the FIRST user turn so follow-ups keep it in view. + const { turns, firstUserIdx } = assembleTurns(input); + return turns.map((t, i): GeminiContent => { + const parts: GeminiPart[] = []; + if (input.image && i === firstUserIdx) { + parts.push({ inlineData: { data: input.image.base64, mimeType: input.image.mime } }); + } + parts.push({ text: t.content }); + return { role: roleFor(t.role), parts }; + }); +} + +export const gemini: ProviderAdapter = { + id: 'gemini', + async *ask(input, apiKey, signal) { + const client = new GoogleGenerativeAI(apiKey); + const model = client.getGenerativeModel({ + model: input.model, + systemInstruction: input.systemPrompt, + }); + const result = await model.generateContentStream( + { contents: buildContents(input) }, + { signal }, + ); + for await (const chunk of result.stream) { + const text = chunk.text(); + if (text.length > 0) yield text; + } + }, +}; diff --git a/src/main/ai/ipc.ts b/src/main/ai/ipc.ts new file mode 100644 index 0000000..cab8023 --- /dev/null +++ b/src/main/ai/ipc.ts @@ -0,0 +1,450 @@ +import { BrowserWindow, ipcMain, shell } from 'electron'; +import type { + AiStatus, + AskInput, + ChatSessionPayload, + ConnectionTestResult, + OllamaPullProgress, + ProfileId, + ProviderId, + StreamChunk, +} from '../../shared/types'; +import { deleteKey, getKey, hasKey, setKey } from './credentials'; +import { defaultModelFor, getAdapter } from './registry'; +import { patch as patchHub, getState as getHubState } from '../hub'; +import { + OLLAMA_INSTALL_URL, + cancelPull, + deleteModel, + freeDiskBytes, + getStatus as getOllamaStatus, + listCatalog, + listInstalled, + pull as ollamaPull, + start as startOllama, +} from './ollamaService'; +import { MODEL_CATALOG, PROFILE_MODELS } from './ollamaModels'; +import { capture as ragCapture, maybeSeed, retrieve as ragRetrieve } from './rag'; + +// System prompts for the one-shot correction calls. +const RECOGNIZE_PROMPT = + 'You are a strict OCR engine for handwriting. Output ONLY the exact words ' + + 'written in the image, transcribed verbatim and then lightly corrected for ' + + 'spelling and grammar. Hard rules: do NOT describe the image; never say it is ' + + 'a signature, handwriting, a drawing, or refer to "the image" or "the user"; ' + + 'no quotes, labels, commentary, apologies, or markdown — just the words. If ' + + 'you cannot read any actual words, output nothing at all (an empty response).'; +const AUTOCORRECT_PROMPT = + 'You are an automated text-correction engine. Fix all grammar, spelling, ' + + "typos, and awkward phrasing in the user's input. Return ONLY the corrected " + + 'text — no quotes, commentary, or explanation. Preserve the original meaning.'; + +const isCloudProvider = (v: unknown): v is ProviderId => + v === 'anthropic' || + v === 'openai' || + v === 'gemini' || + v === 'deepseek' || + v === 'sarvam'; + +const tagInstalled = (tag: string | null | undefined, installed: string[]): boolean => + !!tag && + (installed.includes(tag) || installed.includes(tag.includes(':') ? tag : `${tag}:latest`)); + +// Single chokepoint deciding which provider/model actually serves a +// request. Local-first: if Local AI is enabled, the service is up, and +// a suitable model is installed, route to Ollama; otherwise fall back +// to a configured cloud provider; otherwise return a friendly error. +type Resolved = { provider: ProviderId; model: string; key: string } | { error: string }; + +async function resolveProvider(input: AskInput): Promise { + const hub = getHubState(); + const wantsVision = !!input.image; + if (hub.aiLocalEnabled) { + const svc = await getOllamaStatus(); + if (svc.running) { + const installed = await listInstalled(); + const profile = input.profile ?? hub.profile; + const kind: 'text' | 'vision' = wantsVision ? 'vision' : 'text'; + // Preference order: per-profile override → global default → + // catalogue default for the profile → any installed model of the + // right kind. First one that's actually installed wins. + const candidates = [ + hub.aiProfileModels[profile]?.[kind], + wantsVision ? hub.aiLocalVisionModel : hub.aiLocalModel, + PROFILE_MODELS[profile]?.[kind], + ]; + let model: string | null = candidates.find((c) => tagInstalled(c, installed)) ?? null; + if (!model) { + const cand = MODEL_CATALOG.find((m) => m.kind === kind && tagInstalled(m.tag, installed)); + model = cand?.tag ?? null; + } + if (model) return { provider: 'ollama', model, key: '' }; + // Local on but nothing usable → fall through to cloud. + } + } + // Cloud fallback — prefer the configured active provider, else honour + // an explicitly-requested cloud provider that happens to have a key. + if (isCloudProvider(hub.aiActiveProvider) && hasKey(hub.aiActiveProvider)) { + const p = hub.aiActiveProvider; + return { provider: p, model: hub.aiActiveModel ?? defaultModelFor(p), key: getKey(p)! }; + } + if (isCloudProvider(input.provider) && hasKey(input.provider)) { + return { + provider: input.provider, + model: input.model || defaultModelFor(input.provider), + key: getKey(input.provider)!, + }; + } + return { + error: 'No AI available. Enable Local AI and install a model, or add a cloud provider key.', + }; +} + +// Active in-flight requests, keyed by the requestId we hand back to +// the renderer. Lets the chat panel cancel a stream cleanly via +// ai:cancel. Removed on completion / error / cancellation. +const inFlight = new Map(); + +// Per-session snip cache. The renderer attaches the image only on the +// first turn; we stash it by sessionId and re-attach it on follow-up +// turns of the same conversation so context isn't lost. Bounded to the +// active conversation — a new session evicts the previous one. +const sessionImages = new Map(); + +let requestSeq = 0; +function nextRequestId(): string { + return `ai-${Date.now()}-${++requestSeq}`; +} + +function broadcastChunk(chunk: StreamChunk): void { + for (const win of BrowserWindow.getAllWindows()) { + if (!win.isDestroyed()) win.webContents.send('ai:chunk', chunk); + } +} + +function isProviderId(value: unknown): value is ProviderId { + return ( + value === 'anthropic' || + value === 'openai' || + value === 'gemini' || + value === 'deepseek' || + value === 'sarvam' || + value === 'ollama' + ); +} + +export function registerAiIpc(): void { + ipcMain.handle('ai:set-key', (_evt, payload: { provider: ProviderId; key: string }) => { + if (!isProviderId(payload.provider)) return; + if (typeof payload.key !== 'string' || payload.key.trim().length === 0) { + deleteKey(payload.provider); + return; + } + setKey(payload.provider, payload.key); + }); + + ipcMain.handle('ai:delete-key', (_evt, payload: { provider: ProviderId }) => { + if (!isProviderId(payload.provider)) return; + deleteKey(payload.provider); + }); + + ipcMain.handle('ai:get-status', async (): Promise => { + const cloud = ( + ['anthropic', 'openai', 'gemini', 'deepseek', 'sarvam'] as ProviderId[] + ).map((provider) => ({ + provider, + configured: hasKey(provider), + })); + // Local is "configured" when the service is up AND ≥1 model is installed. + const svc = await getOllamaStatus(); + const installed = svc.running ? await listInstalled() : []; + cloud.push({ provider: 'ollama', configured: svc.running && installed.length > 0 }); + return cloud; + }); + + // Tiny request that confirms the key reaches the provider and the + // model exists. We use the default model for each provider and ask + // it to reply with a single character — cheapest possible probe. + ipcMain.handle( + 'ai:test-connection', + async (_evt, payload: { provider: ProviderId; model: string }): Promise => { + const provider = payload.provider; + const model = payload.model; + if (!isProviderId(provider)) return { ok: false, message: 'Unknown provider' }; + // Local (ollama) needs no key; cloud providers do. + const key = provider === 'ollama' ? '' : getKey(provider); + if (provider !== 'ollama' && !key) return { ok: false, message: 'No API key configured' }; + const adapter = getAdapter(provider); + const ctrl = new AbortController(); + const started = Date.now(); + try { + const stream = adapter.ask( + { + provider, + model, + systemPrompt: 'You are a connection test. Reply with a single dot.', + history: [], + userMessage: 'ping', + }, + key ?? '', + ctrl.signal, + ); + let total = ''; + for await (const chunk of stream) { + total += chunk; + // First chunk is enough to confirm the round-trip. + if (total.length > 0) { + ctrl.abort(); + break; + } + } + return { ok: true, latencyMs: Date.now() - started }; + } catch (err) { + // AbortError on success-with-early-break is expected + const msg = (err as Error)?.message ?? String(err); + if (msg.toLowerCase().includes('abort')) { + return { ok: true, latencyMs: Date.now() - started }; + } + return { ok: false, message: msg }; + } + }, + ); + + ipcMain.handle( + 'ai:ask', + async (_evt, input: AskInput): Promise<{ requestId: string }> => { + const requestId = nextRequestId(); + // Carry the snip across follow-ups: cache it on first sight, + // re-attach it on later turns of the same conversation. Done + // BEFORE resolveProvider so wantsVision stays true and a vision + // conversation keeps routing to its vision model. + const sid = input.sessionId; + if (sid) { + if (input.image) { + sessionImages.clear(); + sessionImages.set(sid, input.image); + } else { + const cached = sessionImages.get(sid); + if (cached) input.image = cached; + } + } + // The resolver decides local-vs-cloud and the concrete model, + // so the renderer can stay provider-agnostic. + const resolved = await resolveProvider(input); + if ('error' in resolved) { + broadcastChunk({ requestId, error: resolved.error, done: true }); + return { requestId }; + } + const adapter = getAdapter(resolved.provider); + const finalInput: AskInput = { + ...input, + provider: resolved.provider, + model: resolved.model, + }; + const ctrl = new AbortController(); + inFlight.set(requestId, ctrl); + // Stream in the background so the IPC invoke can return the + // requestId immediately. The renderer subscribes to 'ai:chunk' + // events and matches by requestId. + void (async () => { + try { + for await (const delta of adapter.ask(finalInput, resolved.key, ctrl.signal)) { + if (ctrl.signal.aborted) break; + broadcastChunk({ requestId, delta }); + } + broadcastChunk({ requestId, done: true }); + } catch (err) { + const msg = (err as Error)?.message ?? String(err); + // User-initiated abort isn't an error; just close cleanly. + if (ctrl.signal.aborted || msg.toLowerCase().includes('abort')) { + broadcastChunk({ requestId, done: true }); + } else { + broadcastChunk({ requestId, error: msg, done: true }); + } + } finally { + inFlight.delete(requestId); + } + })(); + return { requestId }; + }, + ); + + ipcMain.handle('ai:cancel', (_evt, payload: { requestId: string }) => { + const ctrl = inFlight.get(payload.requestId); + if (ctrl) ctrl.abort(); + inFlight.delete(payload.requestId); + }); + + // One-shot, non-streaming: drain the adapter into a single string. + // Both go through the resolver, so local-first + cloud-fallback apply. + async function runOneShot(input: AskInput): Promise<{ text: string; error?: string }> { + const resolved = await resolveProvider(input); + if ('error' in resolved) return { text: '', error: resolved.error }; + const adapter = getAdapter(resolved.provider); + const ctrl = new AbortController(); + const timer = setTimeout(() => ctrl.abort(), 30_000); + try { + let out = ''; + for await (const delta of adapter.ask( + { ...input, provider: resolved.provider, model: resolved.model }, + resolved.key, + ctrl.signal, + )) { + out += delta; + } + return { text: out.trim() }; + } catch (err) { + const msg = (err as Error)?.message ?? String(err); + if (ctrl.signal.aborted || msg.toLowerCase().includes('abort')) { + return { text: '', error: 'Timed out' }; + } + return { text: '', error: msg }; + } finally { + clearTimeout(timer); + } + } + + // Handwriting recognition: image → corrected plain text (vision model). + ipcMain.handle( + 'ai:recognize', + ( + _evt, + payload: { png: Uint8Array; mime?: string; profile?: ProfileId }, + ): Promise<{ text: string; error?: string }> => + runOneShot({ + provider: 'ollama', + model: '', + systemPrompt: RECOGNIZE_PROMPT, + image: { mime: payload.mime ?? 'image/png', base64: Buffer.from(payload.png).toString('base64') }, + history: [], + userMessage: 'Transcribe and correct the handwriting in this image.', + profile: payload.profile, + }), + ); + + // Typed-text autocorrect: text → corrected text (text model). Pulls + // the user's most similar accepted corrections (RAG) into the prompt + // as few-shot examples, and records the resulting pair to learn from. + ipcMain.handle( + 'ai:autocorrect', + async ( + _evt, + payload: { text: string; profile?: ProfileId }, + ): Promise<{ text: string; error?: string }> => { + const text = (payload.text ?? '').toString(); + if (text.trim().length === 0) return { text }; + const profile = payload.profile ?? getHubState().profile; + + const examples = await ragRetrieve(profile, text, 3); + const systemPrompt = + examples.length > 0 + ? AUTOCORRECT_PROMPT + + '\n\nExamples of corrections this user prefers:\n' + + examples.map((e) => `"${e.original}" → "${e.corrected}"`).join('\n') + : AUTOCORRECT_PROMPT; + + const result = await runOneShot({ + provider: 'ollama', + model: '', + systemPrompt, + history: [], + userMessage: text, + profile, + }); + + // Learn from the applied correction (best-effort, non-blocking). + if (result.text && result.text.trim() !== text.trim()) { + void ragCapture({ profile, kind: 'typed', original: text, corrected: result.text }); + } + // Self-heal seeding once embeddings are reachable. + void maybeSeed(); + return result; + }, + ); + + // ── Local Ollama service management ── + const broadcastPull = (p: OllamaPullProgress): void => { + for (const win of BrowserWindow.getAllWindows()) { + if (!win.isDestroyed()) win.webContents.send('ollama:pull-progress', p); + } + }; + const refreshInstalled = async (): Promise => { + patchHub({ aiInstalledModels: await listInstalled() }); + }; + + ipcMain.handle('ollama:status', () => getOllamaStatus()); + ipcMain.handle('ollama:start', () => startOllama()); + ipcMain.handle('ollama:list-models', () => listCatalog()); + ipcMain.handle('ollama:disk-space', () => freeDiskBytes()); + ipcMain.handle('ollama:pull', async (_evt, payload: { model: string }) => { + await ollamaPull(payload.model, broadcastPull); + await refreshInstalled(); + return { ok: true }; + }); + ipcMain.handle('ollama:cancel-pull', (_evt, payload: { model: string }) => { + cancelPull(payload.model); + }); + ipcMain.handle('ollama:delete-model', async (_evt, payload: { model: string }) => { + await deleteModel(payload.model); + await refreshInstalled(); + }); + ipcMain.handle('ollama:install-help', () => { + void shell.openExternal(OLLAMA_INSTALL_URL); + }); + + // Renderer-facing chat:start handler. Calls startChatSession with + // the bytes the renderer hands over. Equivalent to the in-process + // startChatSession call that capture.ts makes for the snip-ask path. + ipcMain.handle( + 'chat:start', + (_evt, payload: { png: Uint8Array; mime: string; profile: ProfileId }) => { + const sessionId = startChatSession( + Buffer.from(payload.png), + payload.mime, + payload.profile, + ); + return { sessionId }; + }, + ); + + // Text-only chat session — no image. Used by the trader numeric + // analysis flow: the overlay computes its levels and hands the text + // here; the panel auto-fires it as the first user message. + ipcMain.handle( + 'chat:start-text', + (_evt, payload: { text: string; profile: ProfileId }) => { + const sessionId = startTextChatSession(payload.text, payload.profile); + return { sessionId }; + }, + ); +} + +// Shared helper: broadcast a new chat session to every renderer and +// open the dock-slot chat panel. Called by the chat:start IPC and +// also by capture.ts when Ask AI is triggered from the snip menu. +let chatSeq = 0; +export function startChatSession( + png: Buffer, + mime: string, + profile: ProfileId, +): string { + const sessionId = `chat-${Date.now()}-${++chatSeq}`; + const session: ChatSessionPayload = { sessionId, png, mime, profile }; + for (const win of BrowserWindow.getAllWindows()) { + if (!win.isDestroyed()) win.webContents.send('chat:session', session); + } + patchHub({ chatOpen: true }); + return sessionId; +} + +// Text-only counterpart of startChatSession — broadcasts a session with +// no image and an initial user message to auto-send. +export function startTextChatSession(initialText: string, profile: ProfileId): string { + const sessionId = `chat-${Date.now()}-${++chatSeq}`; + const session: ChatSessionPayload = { sessionId, initialText, profile }; + for (const win of BrowserWindow.getAllWindows()) { + if (!win.isDestroyed()) win.webContents.send('chat:session', session); + } + patchHub({ chatOpen: true }); + return sessionId; +} diff --git a/src/main/ai/messages.ts b/src/main/ai/messages.ts new file mode 100644 index 0000000..3028936 --- /dev/null +++ b/src/main/ai/messages.ts @@ -0,0 +1,34 @@ +import type { AskInput } from '../../shared/types'; +import { SOLVE_FIRST_TURN } from '../../shared/constants'; + +export interface AssembledTurn { + role: 'user' | 'assistant'; + content: string; +} + +// Assemble the full ordered turn list for one request — prior history +// plus the current user message — and report the index of the FIRST +// user turn. That index is where every adapter attaches the image, so +// follow-up turns keep the original visual/OCR context instead of only +// replaying the assistant's earlier answer. +// +// An empty user turn (the auto-fired opening turn carries no text — the +// image + system prompt are the request) falls back to SOLVE_FIRST_TURN +// so the model gets a clear instruction and the conversation always +// starts with a non-empty user message (Anthropic requires the first +// message to be a user turn). +export function assembleTurns(input: AskInput): { + turns: AssembledTurn[]; + firstUserIdx: number; +} { + const turns: AssembledTurn[] = input.history.map((t) => ({ + role: t.role, + content: t.role === 'user' && t.content.length === 0 ? SOLVE_FIRST_TURN : t.content, + })); + turns.push({ + role: 'user', + content: input.userMessage.length > 0 ? input.userMessage : SOLVE_FIRST_TURN, + }); + const firstUserIdx = turns.findIndex((t) => t.role === 'user'); + return { turns, firstUserIdx }; +} diff --git a/src/main/ai/ollama.ts b/src/main/ai/ollama.ts new file mode 100644 index 0000000..8dcc86c --- /dev/null +++ b/src/main/ai/ollama.ts @@ -0,0 +1,78 @@ +import type { AskInput } from '../../shared/types'; +import type { ProviderAdapter } from './types'; +import { OLLAMA_HOST } from './ollamaService'; +import { assembleTurns } from './messages'; + +// Ollama's /api/chat message shape. Vision models accept raw base64 +// strings in `images` (NO `data:` prefix, unlike OpenAI's data URL). +interface OllamaMessage { + role: 'system' | 'user' | 'assistant'; + content: string; + images?: string[]; +} + +function buildMessages(input: AskInput): OllamaMessage[] { + const out: OllamaMessage[] = [{ role: 'system', content: input.systemPrompt }]; + // Image rides the FIRST user turn so follow-ups keep it in view. + const { turns, firstUserIdx } = assembleTurns(input); + turns.forEach((t, i) => { + const msg: OllamaMessage = { role: t.role, content: t.content }; + if (input.image && i === firstUserIdx) msg.images = [input.image.base64]; + out.push(msg); + }); + return out; +} + +async function safeText(res: Response): Promise { + try { + return (await res.text()).slice(0, 200); + } catch { + return ''; + } +} + +// Local provider. `apiKey` is ignored — models run on-device via the +// Ollama service. Streams NDJSON from /api/chat, yielding content +// deltas exactly like the cloud adapters so the IPC layer is unchanged. +export const ollama: ProviderAdapter = { + id: 'ollama', + async *ask(input, _apiKey, signal) { + const res = await fetch(`${OLLAMA_HOST}/api/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: input.model, + messages: buildMessages(input), + stream: true, + }), + signal, + }); + if (!res.ok || !res.body) { + throw new Error(`Ollama ${res.status}: ${(await safeText(res)) || res.statusText}`); + } + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let buf = ''; + for (;;) { + const { value, done } = await reader.read(); + if (done) break; + buf += decoder.decode(value, { stream: true }); + let nl: number; + while ((nl = buf.indexOf('\n')) >= 0) { + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + let obj: { message?: { content?: unknown }; error?: unknown; done?: boolean }; + try { + obj = JSON.parse(line); + } catch { + continue; + } + if (obj.error) throw new Error(String(obj.error)); + const delta = obj.message?.content; + if (typeof delta === 'string' && delta.length > 0) yield delta; + if (obj.done) return; + } + } + }, +}; diff --git a/src/main/ai/ollamaModels.ts b/src/main/ai/ollamaModels.ts new file mode 100644 index 0000000..15043e4 --- /dev/null +++ b/src/main/ai/ollamaModels.ts @@ -0,0 +1,63 @@ +import type { ProfileId } from '../../shared/types'; + +// The local model catalogue surfaced in the installer. Sizes are the +// approximate on-disk footprint of the Q4_K_M quant Ollama pulls for +// each tag — used only for the disk-space pre-check and the UI, so a +// rough estimate is fine. All tags below are official Ollama library +// tags (no Modelfile needed) at time of writing; `qwen2.5vl` should be +// re-verified against the user's Ollama version (fallback: moondream). +const GB = 1024 * 1024 * 1024; + +export interface CatalogEntry { + tag: string; + label: string; + kind: 'text' | 'vision' | 'embed'; + approxBytes: number; + // A short "why pick this" note for the installer row. + note?: string; +} + +export const MODEL_CATALOG: CatalogEntry[] = [ + // ── Text ── + { tag: 'llama3.2:1b', label: 'Llama 3.2 1B', kind: 'text', approxBytes: 1.3 * GB, note: 'Tiny, fast — grammar / formatting' }, + { tag: 'qwen2.5:1.5b', label: 'Qwen 2.5 1.5B', kind: 'text', approxBytes: 1.0 * GB, note: 'Great text cleanup' }, + { tag: 'qwen2.5:3b', label: 'Qwen 2.5 3B', kind: 'text', approxBytes: 2.0 * GB, note: 'Stronger reasoning / analysis' }, + { tag: 'smollm2:1.7b', label: 'SmolLM2 1.7B', kind: 'text', approxBytes: 1.0 * GB, note: 'Lite on-device utility' }, + // ── Vision ── + { tag: 'moondream', label: 'Moondream 2 (2B)', kind: 'vision', approxBytes: 1.7 * GB, note: 'Fast OCR / screenshot Q&A' }, + { tag: 'qwen2.5vl:3b', label: 'Qwen 2.5-VL 3B', kind: 'vision', approxBytes: 3.2 * GB, note: 'Best small vision (verify tag)' }, + // ── Embeddings (RAG, Phase 4) ── + { tag: 'nomic-embed-text', label: 'Nomic Embed Text', kind: 'embed', approxBytes: 0.27 * GB, note: 'Embeddings for learning' }, +]; + +export function catalogEntry(tag: string): CatalogEntry | undefined { + return MODEL_CATALOG.find((m) => m.tag === tag); +} + +// Per-profile, per-task default model tags. The resolver uses these +// when the user hasn't set an explicit override and falls back to the +// first installed model of the right kind if the default isn't pulled. +export interface ProfileModels { + text: string; + vision: string; +} + +export const PROFILE_MODELS: Record = { + // Qwen2.5-VL is the vision default everywhere — it's far better at + // reading handwriting / dense text than moondream, which matters most + // for the drawn-ink recognition path. moondream stays in the catalogue + // as a lighter option for low-RAM machines. + general: { text: 'llama3.2:1b', vision: 'qwen2.5vl:3b' }, + teacher: { text: 'qwen2.5:3b', vision: 'qwen2.5vl:3b' }, + trader: { text: 'qwen2.5:3b', vision: 'qwen2.5vl:3b' }, +}; + +// Default set pulled on first run: a fast tiny text model for autocorrect, +// a capable vision model for screenshot Q&A + handwriting OCR, and the +// embedding model for the learning loop. Heavier per-profile text models +// (e.g. qwen2.5:3b) are opt-in. Low-RAM users can swap the vision model +// to the lighter `moondream` from the catalogue. +export const DEFAULT_PULL_SET: string[] = ['llama3.2:1b', 'qwen2.5vl:3b', 'nomic-embed-text']; + +// Embedding model tag used by the RAG layer (Phase 4). +export const EMBED_MODEL = 'nomic-embed-text'; diff --git a/src/main/ai/ollamaService.ts b/src/main/ai/ollamaService.ts new file mode 100644 index 0000000..c02199c --- /dev/null +++ b/src/main/ai/ollamaService.ts @@ -0,0 +1,246 @@ +import { spawn, execFile, type ChildProcess } from 'node:child_process'; +import { existsSync } from 'node:fs'; +import { statfs } from 'node:fs/promises'; +import { promisify } from 'node:util'; +import os from 'node:os'; +import type { + LocalModelInfo, + OllamaPullProgress, + OllamaServiceStatus, +} from '../../shared/types'; +import { DEFAULT_PULL_SET, MODEL_CATALOG } from './ollamaModels'; + +const execFileP = promisify(execFile); + +// Ollama's local HTTP endpoint. Honour OLLAMA_HOST if the user has +// pointed their daemon elsewhere, else the documented default. +export const OLLAMA_HOST = + process.env.OLLAMA_HOST && /^https?:\/\//.test(process.env.OLLAMA_HOST) + ? process.env.OLLAMA_HOST + : 'http://127.0.0.1:11434'; + +export const OLLAMA_INSTALL_URL = 'https://ollama.com/download'; + +// The daemon we spawned (if any). We only kill what we started — a +// pre-existing user daemon is left running on quit. +let spawned: ChildProcess | null = null; + +// In-flight pulls keyed by model tag, so a pull can be cancelled. +const pulls = new Map(); + +const sleep = (ms: number): Promise => new Promise((r) => setTimeout(r, ms)); + +async function probeVersion(): Promise { + try { + const res = await fetch(`${OLLAMA_HOST}/api/version`, { + signal: AbortSignal.timeout(1500), + }); + if (!res.ok) return null; + const j = (await res.json()) as { version?: unknown }; + return typeof j.version === 'string' ? j.version : 'unknown'; + } catch { + return null; + } +} + +function candidatePaths(): string[] { + if (process.platform === 'darwin') { + return [ + '/usr/local/bin/ollama', + '/opt/homebrew/bin/ollama', + '/Applications/Ollama.app/Contents/Resources/ollama', + ]; + } + if (process.platform === 'win32') { + const la = process.env.LOCALAPPDATA ?? ''; + return [`${la}\\Programs\\Ollama\\ollama.exe`]; + } + return ['/usr/local/bin/ollama', '/usr/bin/ollama', '/bin/ollama']; +} + +async function findBinary(): Promise { + for (const p of candidatePaths()) { + if (existsSync(p)) return p; + } + try { + const cmd = process.platform === 'win32' ? 'where' : 'which'; + const { stdout } = await execFileP(cmd, ['ollama']); + const line = stdout.split(/\r?\n/).find((l) => l.trim().length > 0); + if (line && existsSync(line.trim())) return line.trim(); + } catch { + /* not on PATH */ + } + return null; +} + +export async function getStatus(): Promise { + const version = await probeVersion(); + if (version) return { installed: true, running: true, version }; + const bin = await findBinary(); + return { installed: bin != null, running: false }; +} + +async function waitForReady(timeoutMs: number): Promise { + const started = Date.now(); + let delay = 200; + while (Date.now() - started < timeoutMs) { + if (await probeVersion()) return true; + await sleep(delay); + delay = Math.min(Math.round(delay * 1.5), 1500); + } + return false; +} + +export async function start(): Promise { + // Already serving (possibly a daemon the user started themselves) — + // attach, never double-spawn. + if (await probeVersion()) return getStatus(); + const bin = await findBinary(); + if (!bin) return { installed: false, running: false, error: 'Ollama is not installed' }; + try { + spawned = spawn(bin, ['serve'], { stdio: 'ignore' }); + spawned.on('exit', () => { + spawned = null; + }); + } catch (err) { + return { installed: true, running: false, error: (err as Error).message }; + } + const ready = await waitForReady(15000); + return ready + ? getStatus() + : { installed: true, running: false, error: 'Ollama did not become ready' }; +} + +export async function listInstalled(): Promise { + try { + const res = await fetch(`${OLLAMA_HOST}/api/tags`); + if (!res.ok) return []; + const j = (await res.json()) as { models?: Array<{ name?: unknown }> }; + return Array.isArray(j.models) + ? j.models.map((m) => m.name).filter((n): n is string => typeof n === 'string') + : []; + } catch { + return []; + } +} + +export async function listCatalog(): Promise { + const installed = new Set(await listInstalled()); + // Ollama records bare tags as ':latest'; match both forms. + const isInstalled = (tag: string): boolean => + installed.has(tag) || installed.has(tag.includes(':') ? tag : `${tag}:latest`); + return MODEL_CATALOG.map((e) => ({ + tag: e.tag, + label: e.label, + kind: e.kind, + approxBytes: e.approxBytes, + installed: isInstalled(e.tag), + defaultPull: DEFAULT_PULL_SET.includes(e.tag), + })); +} + +export async function pull( + model: string, + onProgress: (p: OllamaPullProgress) => void, +): Promise { + if (pulls.has(model)) return; // already pulling + const ctrl = new AbortController(); + pulls.set(model, ctrl); + try { + const res = await fetch(`${OLLAMA_HOST}/api/pull`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: model, stream: true }), + signal: ctrl.signal, + }); + if (!res.ok || !res.body) { + onProgress({ model, status: 'error', error: `HTTP ${res.status}`, done: true }); + return; + } + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let buf = ''; + for (;;) { + const { value, done } = await reader.read(); + if (done) break; + buf += decoder.decode(value, { stream: true }); + let nl: number; + while ((nl = buf.indexOf('\n')) >= 0) { + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + try { + const o = JSON.parse(line) as { + status?: unknown; + completed?: number; + total?: number; + error?: unknown; + }; + if (o.error) { + onProgress({ model, status: 'error', error: String(o.error), done: true }); + return; + } + onProgress({ + model, + status: String(o.status ?? ''), + completed: o.completed, + total: o.total, + }); + } catch { + /* ignore non-JSON keepalive lines */ + } + } + } + onProgress({ model, status: 'success', done: true }); + } catch (err) { + const aborted = ctrl.signal.aborted; + onProgress({ + model, + status: aborted ? 'cancelled' : 'error', + error: aborted ? undefined : (err as Error).message, + done: true, + }); + } finally { + pulls.delete(model); + } +} + +export function cancelPull(model: string): void { + pulls.get(model)?.abort(); + pulls.delete(model); +} + +export async function deleteModel(model: string): Promise { + await fetch(`${OLLAMA_HOST}/api/delete`, { + method: 'DELETE', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: model }), + }); +} + +// Free bytes on the volume where models are stored (~ user home). -1 +// when it can't be determined, so callers can skip the pre-check +// rather than block on a bad reading. +export async function freeDiskBytes(): Promise { + try { + const s = await statfs(os.homedir()); + return s.bavail * s.bsize; + } catch { + return -1; + } +} + +// Kill only a daemon WE spawned; abort any in-flight pulls. Wired to +// app 'before-quit'. +export function shutdown(): void { + for (const c of pulls.values()) c.abort(); + pulls.clear(); + if (spawned && !spawned.killed) { + try { + spawned.kill(); + } catch { + /* already gone */ + } + spawned = null; + } +} diff --git a/src/main/ai/openai.ts b/src/main/ai/openai.ts new file mode 100644 index 0000000..7a500ef --- /dev/null +++ b/src/main/ai/openai.ts @@ -0,0 +1,66 @@ +import OpenAI from 'openai'; +import type { AskInput } from '../../shared/types'; +import type { ProviderAdapter } from './types'; +import { assembleTurns } from './messages'; + +// OpenAI's chat.completions API takes vision via `image_url` content +// parts on user messages. The URL can be a data: URL so we don't need +// to host the image anywhere. Stream chunks arrive with deltas under +// choices[0].delta.content as strings (null when the message starts). + +const MAX_TOKENS = 2048; + +type ContentPart = + | { type: 'text'; text: string } + | { type: 'image_url'; image_url: { url: string } }; + +type OpenAIMessage = + | { role: 'system'; content: string } + | { role: 'user'; content: string | ContentPart[] } + | { role: 'assistant'; content: string }; + +function buildMessages(input: AskInput): OpenAIMessage[] { + const out: OpenAIMessage[] = [{ role: 'system', content: input.systemPrompt }]; + // Image attaches to the FIRST user turn so follow-ups keep it in view. + const { turns, firstUserIdx } = assembleTurns(input); + turns.forEach((t, i) => { + if (input.image && i === firstUserIdx) { + out.push({ + role: 'user', + content: [ + { + type: 'image_url', + image_url: { url: `data:${input.image.mime};base64,${input.image.base64}` }, + }, + { type: 'text', text: t.content }, + ], + }); + } else { + out.push({ role: t.role, content: t.content }); + } + }); + return out; +} + +export const openai: ProviderAdapter = { + id: 'openai', + async *ask(input, apiKey, signal) { + const client = new OpenAI({ apiKey }); + const stream = await client.chat.completions.create( + { + model: input.model, + max_tokens: MAX_TOKENS, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + messages: buildMessages(input) as any, + stream: true, + }, + { signal }, + ); + for await (const chunk of stream) { + const delta = chunk.choices?.[0]?.delta?.content; + if (typeof delta === 'string' && delta.length > 0) { + yield delta; + } + } + }, +}; diff --git a/src/main/ai/rag.ts b/src/main/ai/rag.ts new file mode 100644 index 0000000..cb5235c --- /dev/null +++ b/src/main/ai/rag.ts @@ -0,0 +1,245 @@ +import { app } from 'electron'; +import { promises as fs } from 'node:fs'; +import path from 'node:path'; +import type { ProfileId } from '../../shared/types'; +import { OLLAMA_HOST } from './ollamaService'; +import { EMBED_MODEL } from './ollamaModels'; + +// Local, privacy-preserving "self-learning" via RAG. Accepted +// corrections + bundled per-profile intent examples are embedded with +// a local model and stored on disk; the closest ones are retrieved as +// few-shot context for future corrections. No data leaves the device. +// +// The store is a single JSON file with brute-force cosine search. At +// the realistic scale (hundreds–few thousand examples) this is fast +// and dependency-free; swapping in sqlite-vec later is a drop-in for +// the same retrieve()/capture() surface. + +export type RagKind = 'typed' | 'drawn' | 'analysis' | 'chat'; + +interface RagEntry { + id: number; + profile: ProfileId; + kind: RagKind; + original: string; + corrected: string; + accepted: boolean; + source: 'user' | 'seed'; + createdAt: number; + embedding: number[]; +} + +export interface CaptureInput { + profile: ProfileId; + kind: RagKind; + original: string; + corrected: string; + accepted?: boolean; + source?: 'user' | 'seed'; +} + +const PROFILES: ProfileId[] = ['general', 'teacher', 'trader']; +// Keep brute-force search snappy: cap stored user examples per profile +// (seeds are exempt). Oldest user entries are evicted first. +const MAX_USER_PER_PROFILE = 500; +const SIM_FLOOR = 0.55; // ignore weak matches + +let entries: RagEntry[] = []; +let nextId = 1; +let loaded = false; +let dbPath = ''; +let saveTimer: ReturnType | null = null; + +function file(): string { + if (!dbPath) dbPath = path.join(app.getPath('userData'), 'lekhini-rag.json'); + return dbPath; +} + +async function load(): Promise { + if (loaded) return; + loaded = true; + try { + const raw = await fs.readFile(file(), 'utf8'); + const data = JSON.parse(raw) as { entries?: RagEntry[] }; + entries = Array.isArray(data.entries) ? data.entries : []; + nextId = entries.reduce((m, e) => Math.max(m, e.id), 0) + 1; + } catch { + entries = []; + } +} + +function scheduleSave(): void { + if (saveTimer) return; + saveTimer = setTimeout(() => { + saveTimer = null; + void persist(); + }, 500); +} + +async function persist(): Promise { + try { + await fs.writeFile(file(), JSON.stringify({ entries }), 'utf8'); + } catch { + /* best-effort */ + } +} + +// Embed text with the local embedding model. Returns null whenever the +// model/service isn't available — callers treat that as "RAG off". +async function embed(text: string): Promise { + try { + const res = await fetch(`${OLLAMA_HOST}/api/embeddings`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model: EMBED_MODEL, prompt: text }), + signal: AbortSignal.timeout(8000), + }); + if (!res.ok) return null; + const j = (await res.json()) as { embedding?: number[] }; + return Array.isArray(j.embedding) && j.embedding.length > 0 ? j.embedding : null; + } catch { + return null; + } +} + +function cosine(a: number[], b: number[]): number { + const n = Math.min(a.length, b.length); + let dot = 0; + let na = 0; + let nb = 0; + for (let i = 0; i < n; i++) { + dot += a[i] * b[i]; + na += a[i] * a[i]; + nb += b[i] * b[i]; + } + if (na === 0 || nb === 0) return 0; + return dot / (Math.sqrt(na) * Math.sqrt(nb)); +} + +function evictIfNeeded(profile: ProfileId): void { + const userEntries = entries.filter((e) => e.profile === profile && e.source === 'user'); + if (userEntries.length <= MAX_USER_PER_PROFILE) return; + const excess = userEntries.length - MAX_USER_PER_PROFILE; + const evictIds = new Set( + userEntries + .sort((a, b) => a.createdAt - b.createdAt) + .slice(0, excess) + .map((e) => e.id), + ); + entries = entries.filter((e) => !evictIds.has(e.id)); +} + +export async function capture(input: CaptureInput): Promise { + await load(); + const original = input.original.trim(); + const corrected = input.corrected.trim(); + if (!original || !corrected || original === corrected) return; + // De-dupe identical originals within a profile. + if (entries.some((e) => e.profile === input.profile && e.original === original)) return; + const embedding = await embed(original); + if (!embedding) return; // embeddings unavailable → silently skip + entries.push({ + id: nextId++, + profile: input.profile, + kind: input.kind, + original, + corrected, + accepted: input.accepted ?? true, + source: input.source ?? 'user', + createdAt: Date.now(), + embedding, + }); + evictIfNeeded(input.profile); + scheduleSave(); +} + +export async function retrieve( + profile: ProfileId, + query: string, + k = 3, +): Promise<{ original: string; corrected: string }[]> { + await load(); + const q = query.trim(); + if (!q || entries.length === 0) return []; + const emb = await embed(q); + if (!emb) return []; + return entries + .filter((e) => e.profile === profile && e.accepted) + .map((e) => ({ e, s: cosine(emb, e.embedding) })) + .filter((x) => x.s >= SIM_FLOOR) + .sort((a, b) => b.s - a.s) + .slice(0, k) + .map((x) => ({ original: x.e.original, corrected: x.e.corrected })); +} + +export async function stats(): Promise> { + await load(); + const out: Record = { general: 0, teacher: 0, trader: 0 }; + for (const e of entries) out[e.profile] = (out[e.profile] ?? 0) + 1; + return out; +} + +export async function resetProfile(profile: ProfileId): Promise { + await load(); + entries = entries.filter((e) => e.profile !== profile); + await persist(); +} + +// ── Intent-file seeding ───────────────────────────────────────────── + +function intentDirs(): string[] { + const dirs: string[] = []; + if (process.resourcesPath) dirs.push(path.join(process.resourcesPath, 'intent')); + dirs.push(path.join(app.getAppPath(), 'resources', 'intent')); + dirs.push(path.join(app.getAppPath(), '..', 'resources', 'intent')); + return dirs; +} + +async function loadIntentFile(profile: ProfileId): Promise<{ original: string; corrected: string }[]> { + for (const base of intentDirs()) { + try { + const raw = await fs.readFile(path.join(base, `${profile}.jsonl`), 'utf8'); + return raw + .split(/\r?\n/) + .filter((l) => l.trim().length > 0) + .map((l) => { + try { + return JSON.parse(l) as { original?: string; corrected?: string }; + } catch { + return {}; + } + }) + .filter((o): o is { original: string; corrected: string } => !!o.original && !!o.corrected); + } catch { + /* try next dir */ + } + } + return []; +} + +let seeding = false; +// Ingest the bundled per-profile examples once embeddings are available. +// Self-healing: if Ollama/the embed model isn't up yet, nothing is +// recorded and a later call retries. Fire-and-forget. +export async function maybeSeed(): Promise { + if (seeding) return; + seeding = true; + try { + await load(); + for (const profile of PROFILES) { + if (entries.some((e) => e.profile === profile && e.source === 'seed')) continue; + const pairs = await loadIntentFile(profile); + for (const p of pairs) { + await capture({ + profile, + kind: 'typed', + original: p.original, + corrected: p.corrected, + source: 'seed', + }); + } + } + } finally { + seeding = false; + } +} diff --git a/src/main/ai/ragIpc.ts b/src/main/ai/ragIpc.ts new file mode 100644 index 0000000..a32dec8 --- /dev/null +++ b/src/main/ai/ragIpc.ts @@ -0,0 +1,39 @@ +import { ipcMain } from 'electron'; +import type { ProfileId } from '../../shared/types'; +import { capture, maybeSeed, resetProfile, stats, type RagKind } from './rag'; + +const isProfile = (v: unknown): v is ProfileId => + v === 'general' || v === 'teacher' || v === 'trader'; + +export function registerRagIpc(): void { + // Examples-learned counts per profile (drives the Learning settings). + ipcMain.handle('rag:stats', () => stats()); + + // Forget everything learned for one profile (seed examples included). + ipcMain.handle('rag:reset-profile', (_evt, payload: { profile: ProfileId }) => { + if (!isProfile(payload?.profile)) return; + return resetProfile(payload.profile); + }); + + // Record a correction the user accepted/edited, so it becomes + // few-shot context for future corrections. + ipcMain.handle( + 'rag:capture', + ( + _evt, + payload: { profile: ProfileId; kind: RagKind; original: string; corrected: string }, + ) => { + if (!isProfile(payload?.profile)) return; + return capture({ + profile: payload.profile, + kind: payload.kind, + original: payload.original, + corrected: payload.corrected, + }); + }, + ); + + // Background-seed the bundled intent examples once embeddings are + // available (self-heals on later launches if Ollama is down now). + void maybeSeed(); +} diff --git a/src/main/ai/registry.ts b/src/main/ai/registry.ts new file mode 100644 index 0000000..daf161a --- /dev/null +++ b/src/main/ai/registry.ts @@ -0,0 +1,93 @@ +import type { ProviderId } from '../../shared/types'; +import type { ModelOption, ProviderAdapter } from './types'; +import { anthropic } from './anthropic'; +import { openai } from './openai'; +import { gemini } from './gemini'; +import { deepseek } from './deepseek'; +import { sarvam } from './sarvam'; +import { ollama } from './ollama'; +import { MODEL_CATALOG } from './ollamaModels'; +import { OLLAMA_INSTALL_URL } from './ollamaService'; + +const ADAPTERS: Record = { + anthropic, + openai, + gemini, + deepseek, + sarvam, + ollama, +}; + +export function getAdapter(id: ProviderId): ProviderAdapter { + const adapter = ADAPTERS[id]; + if (!adapter) throw new Error(`Unknown AI provider: ${id}`); + return adapter; +} + +// Vision-capable models exposed in the Settings dropdown. The first +// `recommended: true` entry is the default when the user picks a new +// provider. Keep this list small — every model adds a row to the +// dropdown and a maintenance line as providers rotate IDs. +export const MODELS_BY_PROVIDER: Record = { + anthropic: [ + { id: 'claude-sonnet-4-5', label: 'Claude Sonnet 4.5', recommended: true }, + { id: 'claude-opus-4-5', label: 'Claude Opus 4.5' }, + { id: 'claude-haiku-4-5', label: 'Claude Haiku 4.5 (fast / cheap)' }, + ], + openai: [ + { id: 'gpt-4o', label: 'GPT-4o', recommended: true }, + { id: 'gpt-4o-mini', label: 'GPT-4o mini (fast / cheap)' }, + ], + gemini: [ + { id: 'gemini-2.0-flash', label: 'Gemini 2.0 Flash', recommended: true }, + { id: 'gemini-1.5-pro', label: 'Gemini 1.5 Pro' }, + ], + // DeepSeek is text-only (no vision); great for reasoning / math on + // typed questions and text follow-ups. deepseek-reasoner (R1) shows + // its chain-of-thought; deepseek-chat (V3) is the faster general model. + deepseek: [ + { id: 'deepseek-chat', label: 'DeepSeek V3 (chat)', recommended: true }, + { id: 'deepseek-reasoner', label: 'DeepSeek R1 (reasoner)' }, + ], + // Sarvam solves from its own Vision OCR. sarvam-m is the confirmed + // stable default; the larger models reason better on complex problems. + sarvam: [ + { id: 'sarvam-m', label: 'Sarvam-M (24B)', recommended: true }, + { id: 'sarvam-30b', label: 'Sarvam-30B' }, + { id: 'sarvam-105b', label: 'Sarvam-105B (strongest)' }, + ], + // For local, the real source of truth is which tags are installed + // (the Local AI settings query Ollama directly). This static list + // just gives defaultModelFor() a sane fallback. + ollama: MODEL_CATALOG.filter((m) => m.kind !== 'embed').map((m) => ({ + id: m.tag, + label: m.label, + recommended: m.tag === 'llama3.2:1b', + })), +}; + +export function defaultModelFor(provider: ProviderId): string { + const list = MODELS_BY_PROVIDER[provider]; + return (list.find((m) => m.recommended) ?? list[0]).id; +} + +export const PROVIDER_LABELS: Record = { + anthropic: 'Anthropic Claude', + openai: 'OpenAI ChatGPT', + gemini: 'Google Gemini', + deepseek: 'DeepSeek', + sarvam: 'Sarvam AI', + ollama: 'Local (Ollama)', +}; + +// The Settings UI uses this to render Set up → links to provider +// console pages for users to grab an API key. Local has no key — its +// link points at the Ollama install/download page instead. +export const PROVIDER_KEY_URLS: Record = { + anthropic: 'https://console.anthropic.com/settings/keys', + openai: 'https://platform.openai.com/api-keys', + gemini: 'https://aistudio.google.com/app/apikey', + deepseek: 'https://platform.deepseek.com/api_keys', + sarvam: 'https://dashboard.sarvam.ai', + ollama: OLLAMA_INSTALL_URL, +}; diff --git a/src/main/ai/sarvam.ts b/src/main/ai/sarvam.ts new file mode 100644 index 0000000..cb375ca --- /dev/null +++ b/src/main/ai/sarvam.ts @@ -0,0 +1,220 @@ +import OpenAI from 'openai'; +import JSZip from 'jszip'; +import { SarvamAIClient } from 'sarvamai'; +import type { AskInput } from '../../shared/types'; +import type { ProviderAdapter } from './types'; +import { assembleTurns } from './messages'; + +// Sarvam AI is integrated as a self-contained vision provider: when a +// snip image rides the request we first run it through Sarvam's +// Document Intelligence (Vision) OCR to extract the text, then hand +// that text to Sarvam's chat model to actually SOLVE the problem. With +// no image it's a plain chat call. +// +// Two transports: +// • OCR — the official `sarvamai` SDK orchestrates the async, +// job-based Document Intelligence flow (createJob → upload +// → start → poll → download). Upload accepts only PDF/ZIP, +// so the PNG snip is wrapped in a single-entry ZIP; output +// comes back as a ZIP we unpack in memory with JSZip. +// • solve — Sarvam's chat endpoint is OpenAI-compatible, so we reuse +// the `openai` SDK with a baseURL override (same trick as +// deepseek.ts) to stream the answer. + +const MAX_TOKENS = 2048; +const CHAT_BASE_URL = 'https://api.sarvam.ai/v1'; + +// Keep the interactive snip flow snappy: poll a bit faster than the +// SDK default (2s) and cap total wait so a stuck job surfaces an error +// instead of hanging the chat. ~1.5s × 30 ≈ 45s ceiling. +const OCR_POLL_INTERVAL_MS = 1500; +const OCR_MAX_POLLS = 30; + +type Message = + | { role: 'system'; content: string } + | { role: 'user'; content: string } + | { role: 'assistant'; content: string }; + +// Per-session OCR cache. The snip is transcribed exactly once per +// conversation; follow-up turns reuse the text instead of re-running +// the (slow, job-based) Vision job. Bounded to the active session — +// any new sessionId evicts the rest. +const ocrCache = new Map(); + +// Build the chat message list. The OCR'd image text is embedded into +// the FIRST user turn (not the latest follow-up) so every replayed turn +// carries the original problem; the model solves from the transcription. +function buildMessages(input: AskInput, ocrText: string | null): Message[] { + const out: Message[] = [{ role: 'system', content: input.systemPrompt }]; + const { turns, firstUserIdx } = assembleTurns(input); + turns.forEach((t, i) => { + if (ocrText && ocrText.trim().length > 0 && i === firstUserIdx) { + out.push({ + role: 'user', + content: `Text extracted from the image:\n\n${ocrText.trim()}\n\n${t.content}`, + }); + } else { + out.push({ role: t.role, content: t.content }); + } + }); + return out; +} + +// PK\x03\x04 — local file header magic that starts every ZIP archive. +function isZip(bytes: Uint8Array): boolean { + return bytes[0] === 0x50 && bytes[1] === 0x4b && bytes[2] === 0x03 && bytes[3] === 0x04; +} + +// Very small HTML→text fallback for when the only output is .html. +function stripHtml(html: string): string { + return html + .replace(//gi, '') + .replace(//gi, '') + .replace(/<[^>]+>/g, ' ') + .replace(/ /g, ' ') + .replace(/\s+\n/g, '\n') + .replace(/[ \t]{2,}/g, ' ') + .trim(); +} + +// Pull human-readable text out of one downloaded output payload. The +// Document Intelligence output is delivered as a ZIP of per-page files; +// we prefer Markdown, then plain text, then HTML, then JSON. +async function textFromPayload(bytes: Uint8Array): Promise { + if (!isZip(bytes)) { + return new TextDecoder().decode(bytes); + } + const zip = await JSZip.loadAsync(bytes); + const files = Object.values(zip.files).filter((f) => !f.dir); + const pick = (exts: string[]): typeof files => + files + .filter((f) => exts.some((e) => f.name.toLowerCase().endsWith(e))) + .sort((a, b) => a.name.localeCompare(b.name)); + + const md = pick(['.md', '.markdown', '.txt']); + if (md.length) return (await Promise.all(md.map((f) => f.async('string')))).join('\n\n'); + + const html = pick(['.html', '.htm']); + if (html.length) { + return (await Promise.all(html.map((f) => f.async('string').then(stripHtml)))).join('\n\n'); + } + + const json = pick(['.json']); + if (json.length) { + // Structured page data — flatten any string leaves into text. + const texts: string[] = []; + for (const f of json) { + try { + collectStrings(JSON.parse(await f.async('string')), texts); + } catch { + /* skip unparseable */ + } + } + return texts.join('\n'); + } + return ''; +} + +// Recursively gather string values from the structured JSON output. +function collectStrings(node: unknown, out: string[]): void { + if (typeof node === 'string') { + if (node.trim().length > 0) out.push(node); + } else if (Array.isArray(node)) { + for (const v of node) collectStrings(v, out); + } else if (node && typeof node === 'object') { + for (const v of Object.values(node)) collectStrings(v, out); + } +} + +// Run the snip PNG through Sarvam Document Intelligence and return the +// extracted text. Throws with a clear message on failure so the chat +// panel surfaces it. +async function runOcr( + image: { mime: string; base64: string }, + apiKey: string, + signal: AbortSignal, +): Promise { + const client = new SarvamAIClient({ apiSubscriptionKey: apiKey }); + + // Upload requires PDF or ZIP. Wrap the PNG in a flat single-entry ZIP + // and hand it over as a File so the SDK keeps the .zip name (a bare + // Blob would be uploaded as "document.pdf" and rejected). + const zip = new JSZip(); + zip.file('snip.png', Buffer.from(image.base64, 'base64')); + const zipBuf = await zip.generateAsync({ type: 'arraybuffer' }); + const zipFile = new File([zipBuf], 'snip.zip', { type: 'application/zip' }); + + const job = await client.documentIntelligence.createJob({ + language: 'en-IN', + outputFormat: 'md', + pollingIntervalMs: OCR_POLL_INTERVAL_MS, + maxPollingAttempts: OCR_MAX_POLLS, + }); + if (signal.aborted) throw new Error('aborted'); + + await job.uploadFile(zipFile); + await job.start(); + const status = await job.waitUntilComplete(); + if (signal.aborted) throw new Error('aborted'); + // 'Completed' and 'PartiallyCompleted' both yield usable output; only + // a hard failure (or a still-running job that hit the poll ceiling) + // is an error. + if (status.job_state !== 'Completed' && status.job_state !== 'PartiallyCompleted') { + throw new Error(`Sarvam OCR ${status.job_state}: ${status.error_message ?? 'failed'}`); + } + + const links = await job.getDownloadLinks(); + const urls = Object.values(links.download_urls ?? {}) + .map((d) => d.file_url) + .filter((u): u is string => typeof u === 'string' && u.length > 0); + if (urls.length === 0) throw new Error('Sarvam OCR returned no output files'); + + const parts: string[] = []; + for (const url of urls) { + const res = await fetch(url, { signal }); + if (!res.ok) continue; + const text = await textFromPayload(new Uint8Array(await res.arrayBuffer())); + if (text.trim().length > 0) parts.push(text); + } + return parts.join('\n\n'); +} + +export const sarvam: ProviderAdapter = { + id: 'sarvam', + async *ask(input, apiKey, signal) { + // Main re-injects the cached snip on every turn of an image + // conversation, so OCR once and reuse the text on follow-ups rather + // than re-running the slow job each time. + let ocrText: string | null = null; + if (input.image) { + const sid = input.sessionId; + if (sid && ocrCache.has(sid)) { + ocrText = ocrCache.get(sid) ?? null; + } else { + ocrText = await runOcr(input.image, apiKey, signal); + if (sid) { + ocrCache.clear(); // bound the cache to the active conversation + ocrCache.set(sid, ocrText); + } + } + } + + const client = new OpenAI({ apiKey, baseURL: CHAT_BASE_URL }); + const stream = await client.chat.completions.create( + { + model: input.model, + max_tokens: MAX_TOKENS, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + messages: buildMessages(input, ocrText) as any, + stream: true, + }, + { signal }, + ); + for await (const chunk of stream) { + const delta = chunk.choices?.[0]?.delta?.content; + if (typeof delta === 'string' && delta.length > 0) { + yield delta; + } + } + }, +}; diff --git a/src/main/ai/types.ts b/src/main/ai/types.ts new file mode 100644 index 0000000..63e7fac --- /dev/null +++ b/src/main/ai/types.ts @@ -0,0 +1,23 @@ +import type { AskInput, ProviderId } from '../../shared/types'; + +export type { ProviderId, AskInput } from '../../shared/types'; + +// Each provider implements this interface in a separate file. The +// async iterable yields plain text deltas; the IPC layer pipes them +// to the renderer as 'ai:chunk' events. AbortSignal is honoured by +// all three SDKs (Anthropic / OpenAI / Gemini) and lets the renderer +// cancel an in-flight stream from the chat panel. +export interface ProviderAdapter { + id: ProviderId; + ask( + input: AskInput, + apiKey: string, + signal: AbortSignal, + ): AsyncIterable; +} + +export interface ModelOption { + id: string; + label: string; + recommended?: boolean; +} diff --git a/src/main/capture.ts b/src/main/capture.ts index 0ef37f8..b58b06f 100644 --- a/src/main/capture.ts +++ b/src/main/capture.ts @@ -15,6 +15,8 @@ import { getOverlays } from './windows/overlay'; import { notifyStatus, onFocusRecheck, screenStatus } from './permissions'; import { persisted } from './persistence'; import { patch as patchHub } from './hub'; +import { startChatSession } from './ai/ipc'; +import type { ProfileId } from '../shared/types'; interface Rect { x: number; @@ -121,6 +123,33 @@ export async function copyFocusedSnipToClipboard(): Promise { clipboard.writeImage(img); } +// Start an AI chat about the user's current snip selection. Same +// capture + composite path Save / Copy use; the bytes are handed to +// startChatSession which broadcasts chat:session and opens the dock +// chat panel. +export async function askAiAboutFocusedSnip(profile: ProfileId): Promise { + if (!gateScreenForCapture('clipboard')) return; + const displayId = getFocusedDisplayId(); + const rect = snipSelections.get(displayId); + if (!rect) return; + const display = screen.getAllDisplays().find((d) => d.id === displayId); + if (!display) return; + const overlay = getOverlays().get(displayId); + if (!overlay || overlay.isDestroyed()) return; + + // Hide the dashed selection so it isn't baked into the PNG sent + // to the AI (the existing crop rect is already in hand). + setSnipSelection(displayId, null); + await waitMs(60); + + const png = await captureCroppedComposite(overlay, display, rect); + if (!png) { + handleCaptureFailure(); + return; + } + startChatSession(png, 'image/png', profile); +} + export async function captureFocusedDisplay(): Promise { if (!gateScreenForCapture('capture')) return; @@ -317,6 +346,9 @@ export function registerCaptureIpc() { ipcMain.handle('snip:clear', (_evt, payload: { displayId: number }) => { setSnipSelection(payload.displayId, null); }); + ipcMain.handle('snip:ask-ai', async (_evt, payload: { profile: ProfileId }) => { + await askAiAboutFocusedSnip(payload.profile); + }); // Renderer-triggered folder picker, used by the "Change…" button in // Settings → File save. Returns the chosen path so the renderer can // patch the hub with it (which is what persists + broadcasts to diff --git a/src/main/hotkeys.ts b/src/main/hotkeys.ts index 8f11eb4..c740460 100644 --- a/src/main/hotkeys.ts +++ b/src/main/hotkeys.ts @@ -23,6 +23,7 @@ export function registerHotkeys() { ipcMain.handle('relay:undo', () => sendToFocusedOverlay('overlay:undo')); ipcMain.handle('relay:redo', () => sendToFocusedOverlay('overlay:redo')); ipcMain.handle('relay:clear', () => sendToAllOverlays('overlay:clear')); + ipcMain.handle('relay:analyze', () => sendToFocusedOverlay('overlay:analyze')); ipcMain.handle('capture:trigger', () => captureFocusedDisplay()); ipcMain.handle('snip:copy', () => copyFocusedSnipToClipboard()); } diff --git a/src/main/hub.ts b/src/main/hub.ts index db099dc..1814025 100644 --- a/src/main/hub.ts +++ b/src/main/hub.ts @@ -3,11 +3,13 @@ import { DEFAULT_SETTINGS, GRAPHITE_COLOR } from '../shared/constants'; import { DEFAULT_PROFILE } from '../shared/profiles'; import { persisted, PERSISTED_DEFAULTS, save } from './persistence'; import type { + AiProfileModels, Calibration, HubStateUpdate, Orientation, PerToolWidth, ProfileId, + ProviderId, Theme, ToolId, ToolSettings, @@ -34,6 +36,26 @@ export interface HubState { // in hub so main can grow the toolbar window to fit, the same way // it does for settingsOpen. statusPanelOpen: boolean; + // AI chat panel visibility — transient like statusPanelOpen. + // Mutually exclusive with settingsOpen + statusPanelOpen at the + // dock slot level. + chatOpen: boolean; + // Persisted AI configuration mirrored into the hub so renderers + // can subscribe via the existing hub.onBroadcast pipe. + aiActiveProvider: ProviderId | null; + aiActiveModel: string | null; + aiProfilePrompts: Partial>; + // Local-first (Ollama) AI configuration, mirrored from persistence. + aiLocalEnabled: boolean; + aiInstalledModels: string[]; + aiLocalModel: string | null; + aiLocalVisionModel: string | null; + aiProfileModels: AiProfileModels; + autocorrectTyped: boolean; + autocorrectDrawn: boolean; + defaultTextFont: string; + aiOnboarded: boolean; + autoUpdate: boolean; } const state: HubState = { @@ -52,6 +74,20 @@ const state: HubState = { saveDir: null, alwaysAskSavePath: false, statusPanelOpen: false, + chatOpen: false, + aiActiveProvider: null, + aiActiveModel: null, + aiProfilePrompts: {}, + aiLocalEnabled: false, + aiInstalledModels: [], + aiLocalModel: null, + aiLocalVisionModel: null, + aiProfileModels: {}, + autocorrectTyped: false, + autocorrectDrawn: false, + defaultTextFont: 'system-ui, -apple-system, sans-serif', + aiOnboarded: false, + autoUpdate: true, }; const subscribers = new Set(); @@ -114,6 +150,39 @@ export function hydrateFromPersistence(): void { // older installs without this key fall through to the default. state.saveDir = typeof p.saveDir === 'string' ? p.saveDir : null; state.alwaysAskSavePath = typeof p.alwaysAskSavePath === 'boolean' ? p.alwaysAskSavePath : false; + // AI config — schema-tolerant: missing fields fall back to null / + // empty so old installs upgrade cleanly when they first launch the + // build with AI integration. + state.aiActiveProvider = + p.aiActiveProvider === 'anthropic' || + p.aiActiveProvider === 'openai' || + p.aiActiveProvider === 'gemini' || + p.aiActiveProvider === 'deepseek' || + p.aiActiveProvider === 'sarvam' || + p.aiActiveProvider === 'ollama' + ? p.aiActiveProvider + : null; + state.aiActiveModel = typeof p.aiActiveModel === 'string' ? p.aiActiveModel : null; + state.aiProfilePrompts = + p.aiProfilePrompts && typeof p.aiProfilePrompts === 'object' ? p.aiProfilePrompts : {}; + // Local AI — schema-tolerant for installs that predate it. + state.aiLocalEnabled = typeof p.aiLocalEnabled === 'boolean' ? p.aiLocalEnabled : false; + state.aiInstalledModels = Array.isArray(p.aiInstalledModels) + ? p.aiInstalledModels.filter((m): m is string => typeof m === 'string') + : []; + state.aiLocalModel = typeof p.aiLocalModel === 'string' ? p.aiLocalModel : null; + state.aiLocalVisionModel = typeof p.aiLocalVisionModel === 'string' ? p.aiLocalVisionModel : null; + state.aiProfileModels = + p.aiProfileModels && typeof p.aiProfileModels === 'object' ? p.aiProfileModels : {}; + state.autocorrectTyped = typeof p.autocorrectTyped === 'boolean' ? p.autocorrectTyped : false; + state.autocorrectDrawn = typeof p.autocorrectDrawn === 'boolean' ? p.autocorrectDrawn : false; + state.defaultTextFont = + typeof p.defaultTextFont === 'string' && p.defaultTextFont.length > 0 + ? p.defaultTextFont + : PERSISTED_DEFAULTS.defaultTextFont; + state.aiOnboarded = typeof p.aiOnboarded === 'boolean' ? p.aiOnboarded : false; + // Default ON when absent (older installs predate auto-update). + state.autoUpdate = typeof p.autoUpdate === 'boolean' ? p.autoUpdate : true; // If the active tool is pencil, the canonical color is graphite — // don't restore a stray non-graphite value from a previous session. const colorForTool = @@ -244,10 +313,21 @@ export function patch(update: HubStateUpdate) { if (update.settingsOpen !== undefined && update.settingsOpen !== state.settingsOpen) { state.settingsOpen = update.settingsOpen; changed.add('settingsOpen'); - // Settings and flyout share the side panel slot; only one open at once. - if (state.settingsOpen && state.thicknessFlyoutOpen) { - state.thicknessFlyoutOpen = false; - changed.add('thicknessFlyoutOpen'); + // The dock slot holds AT MOST ONE of: settings, status panel, + // chat panel, thickness flyout. Opening settings closes the rest. + if (state.settingsOpen) { + if (state.thicknessFlyoutOpen) { + state.thicknessFlyoutOpen = false; + changed.add('thicknessFlyoutOpen'); + } + if (state.statusPanelOpen) { + state.statusPanelOpen = false; + changed.add('statusPanelOpen'); + } + if (state.chatOpen) { + state.chatOpen = false; + changed.add('chatOpen'); + } } } if ( @@ -280,13 +360,128 @@ export function patch(update: HubStateUpdate) { ) { state.statusPanelOpen = update.statusPanelOpen; changed.add('statusPanelOpen'); - // Status panel and settings are mutually exclusive panels in the - // same dock slot — opening one closes the other so the renderer - // and main agree on what's showing. - if (state.statusPanelOpen && state.settingsOpen) { - state.settingsOpen = false; - changed.add('settingsOpen'); + // Mutex with the other dock-slot panels. + if (state.statusPanelOpen) { + if (state.settingsOpen) { + state.settingsOpen = false; + changed.add('settingsOpen'); + } + if (state.chatOpen) { + state.chatOpen = false; + changed.add('chatOpen'); + } + } + } + if (update.chatOpen !== undefined && update.chatOpen !== state.chatOpen) { + state.chatOpen = update.chatOpen; + changed.add('chatOpen'); + // Mutex with the other dock-slot panels. + if (state.chatOpen) { + if (state.settingsOpen) { + state.settingsOpen = false; + changed.add('settingsOpen'); + } + if (state.statusPanelOpen) { + state.statusPanelOpen = false; + changed.add('statusPanelOpen'); + } + } + } + if ( + update.aiActiveProvider !== undefined && + update.aiActiveProvider !== state.aiActiveProvider + ) { + state.aiActiveProvider = update.aiActiveProvider; + changed.add('aiActiveProvider'); + save('aiActiveProvider', state.aiActiveProvider); + } + if (update.aiActiveModel !== undefined && update.aiActiveModel !== state.aiActiveModel) { + state.aiActiveModel = update.aiActiveModel; + changed.add('aiActiveModel'); + save('aiActiveModel', state.aiActiveModel); + } + if (update.aiProfilePrompts !== undefined) { + // Merge — caller can patch a single profile's override without + // wiping the others. Empty-string entry removes the override. + const merged = { ...state.aiProfilePrompts, ...update.aiProfilePrompts }; + for (const key of Object.keys(merged) as ProfileId[]) { + const v = merged[key]; + if (typeof v !== 'string' || v.length === 0) delete merged[key]; } + state.aiProfilePrompts = merged; + changed.add('aiProfilePrompts'); + save('aiProfilePrompts', state.aiProfilePrompts); + } + if (update.aiLocalEnabled !== undefined && update.aiLocalEnabled !== state.aiLocalEnabled) { + state.aiLocalEnabled = update.aiLocalEnabled; + changed.add('aiLocalEnabled'); + save('aiLocalEnabled', state.aiLocalEnabled); + } + if (update.aiInstalledModels !== undefined) { + state.aiInstalledModels = update.aiInstalledModels.filter( + (m): m is string => typeof m === 'string', + ); + changed.add('aiInstalledModels'); + save('aiInstalledModels', state.aiInstalledModels); + } + if (update.aiLocalModel !== undefined && update.aiLocalModel !== state.aiLocalModel) { + state.aiLocalModel = update.aiLocalModel; + changed.add('aiLocalModel'); + save('aiLocalModel', state.aiLocalModel); + } + if ( + update.aiLocalVisionModel !== undefined && + update.aiLocalVisionModel !== state.aiLocalVisionModel + ) { + state.aiLocalVisionModel = update.aiLocalVisionModel; + changed.add('aiLocalVisionModel'); + save('aiLocalVisionModel', state.aiLocalVisionModel); + } + if (update.aiProfileModels !== undefined) { + // Deep-merge per profile so a single profile's text/vision pick can + // be patched without wiping the rest. An empty string clears that + // slot back to the catalogue default. + const merged: AiProfileModels = { ...state.aiProfileModels }; + for (const key of Object.keys(update.aiProfileModels) as ProfileId[]) { + const incoming = update.aiProfileModels[key] ?? {}; + const slot = { ...merged[key], ...incoming }; + if (slot.text === '') delete slot.text; + if (slot.vision === '') delete slot.vision; + if (slot.text === undefined && slot.vision === undefined) delete merged[key]; + else merged[key] = slot; + } + state.aiProfileModels = merged; + changed.add('aiProfileModels'); + save('aiProfileModels', state.aiProfileModels); + } + if (update.autocorrectTyped !== undefined && update.autocorrectTyped !== state.autocorrectTyped) { + state.autocorrectTyped = update.autocorrectTyped; + changed.add('autocorrectTyped'); + save('autocorrectTyped', state.autocorrectTyped); + } + if (update.autocorrectDrawn !== undefined && update.autocorrectDrawn !== state.autocorrectDrawn) { + state.autocorrectDrawn = update.autocorrectDrawn; + changed.add('autocorrectDrawn'); + save('autocorrectDrawn', state.autocorrectDrawn); + } + if ( + update.defaultTextFont !== undefined && + update.defaultTextFont !== state.defaultTextFont && + update.defaultTextFont.length > 0 + ) { + state.defaultTextFont = update.defaultTextFont; + changed.add('defaultTextFont'); + save('defaultTextFont', state.defaultTextFont); + } + if (update.aiOnboarded !== undefined && update.aiOnboarded !== state.aiOnboarded) { + state.aiOnboarded = update.aiOnboarded; + changed.add('aiOnboarded'); + save('aiOnboarded', state.aiOnboarded); + } + if (update.autoUpdate !== undefined && update.autoUpdate !== state.autoUpdate) { + state.autoUpdate = update.autoUpdate; + changed.add('autoUpdate'); + save('autoUpdate', state.autoUpdate); } broadcast(changed); } diff --git a/src/main/main.ts b/src/main/main.ts index 58515a7..04f1608 100644 --- a/src/main/main.ts +++ b/src/main/main.ts @@ -10,6 +10,10 @@ import { import { createToolbar, getToolbar, registerToolbarIpc, resizeToolbar } from './windows/toolbar'; import { registerPermissionsIpc } from './permissions'; import { registerCaptureIpc } from './capture'; +import { registerAiIpc } from './ai/ipc'; +import { registerRagIpc } from './ai/ragIpc'; +import { shutdown as shutdownOllama } from './ai/ollamaService'; +import { initAutoUpdates, registerUpdaterIpc } from './updater'; import { registerDrawingHotkeys, registerEscapeWhileDrawing, @@ -36,6 +40,9 @@ app.whenReady().then(async () => { registerPermissionsIpc(); registerCaptureIpc(); registerToolbarIpc(); + registerAiIpc(); + registerRagIpc(); + registerUpdaterIpc(); for (const display of screen.getAllDisplays()) { console.log('[pen] creating overlay for display', display.id, display.bounds); @@ -50,6 +57,10 @@ app.whenReady().then(async () => { registerHotkeys(); + // Kick off background update checks once windows exist to receive the + // 'updater:status' broadcasts. No-op (→ 'unsupported') in dev / unsigned. + initAutoUpdates(); + onChange((state, changed) => { if (changed.has('drawMode')) { console.log('[pen] drawMode ->', state.drawMode); @@ -59,16 +70,18 @@ app.whenReady().then(async () => { registerEscapeWhileDrawing(state.drawMode); registerDrawingHotkeys(state.drawMode); } - // The status panel (permission / save error) occupies the same - // dock slot as Settings in the toolbar, so we treat either being - // open as "the side panel is showing" for window-resize purposes. - const sidePanelOpen = state.settingsOpen || state.statusPanelOpen; + // Three panels share the dock slot: settings, status (permission + // / save error), and AI chat. Any of them being open means the + // toolbar window should grow to fit a side panel. + const sidePanelOpen = + state.settingsOpen || state.statusPanelOpen || state.chatOpen; if (changed.has('orientation')) { resizeToolbar(state.orientation, state.minimized, sidePanelOpen, 'default'); } else if ( changed.has('minimized') || changed.has('settingsOpen') || - changed.has('statusPanelOpen') + changed.has('statusPanelOpen') || + changed.has('chatOpen') ) { resizeToolbar(state.orientation, state.minimized, sidePanelOpen, 'keep'); } @@ -79,6 +92,8 @@ app.whenReady().then(async () => { app.on('will-quit', () => { unregisterHotkeys(); + // Stop only an Ollama daemon we spawned; abort any in-flight pulls. + shutdownOllama(); }); app.on('window-all-closed', () => { diff --git a/src/main/persistence.ts b/src/main/persistence.ts index 677e740..3ed27b0 100644 --- a/src/main/persistence.ts +++ b/src/main/persistence.ts @@ -1,5 +1,12 @@ import { GRAPHITE_COLOR } from '../shared/constants'; -import type { Orientation, ProfileId, Theme, ToolId } from '../shared/types'; +import type { + AiProfileModels, + Orientation, + ProfileId, + ProviderId, + Theme, + ToolId, +} from '../shared/types'; export interface PersistedState { orientation: Orientation; @@ -17,6 +24,35 @@ export interface PersistedState { // Off by default — the "remember + auto-save" UX is the recommended // path. Lives in Settings → File save. alwaysAskSavePath: boolean; + // AI integration. The provider/model pair the "Ask AI" button will + // use; `null` until the user has configured at least one provider. + // API keys themselves are NEVER in PersistedState — they live behind + // OS keychain via src/main/ai/credentials.ts. + aiActiveProvider: ProviderId | null; + aiActiveModel: string | null; + // Per-profile user overrides for the default AI system prompt. + // Falls back to the profile's built-in prompt (see profiles.ts) + // when a profile isn't present here. + aiProfilePrompts: Partial>; + // Local-first (Ollama) AI. When aiLocalEnabled and a model is + // installed, the resolver prefers local over any configured cloud + // provider. Keys are never needed for local. + aiLocalEnabled: boolean; + aiInstalledModels: string[]; + aiLocalModel: string | null; + aiLocalVisionModel: string | null; + // Per-profile local model overrides (Phase 2 routing). + aiProfileModels: AiProfileModels; + // Autocorrect toggles (default OFF — raw stays raw). + autocorrectTyped: boolean; + autocorrectDrawn: boolean; + // CSS font-family for newly created text shapes. + defaultTextFont: string; + // First-run setup wizard completed (or skipped). + aiOnboarded: boolean; + // Background auto-update preference. Default ON — new versions + // download silently and apply on quit. Toggle in Settings → Updates. + autoUpdate: boolean; } export const PERSISTED_DEFAULTS: PersistedState = { @@ -30,6 +66,19 @@ export const PERSISTED_DEFAULTS: PersistedState = { activeTool: 'pencil', saveDir: null, alwaysAskSavePath: false, + aiActiveProvider: null, + aiActiveModel: null, + aiProfilePrompts: {}, + aiLocalEnabled: false, + aiInstalledModels: [], + aiLocalModel: null, + aiLocalVisionModel: null, + aiProfileModels: {}, + autocorrectTyped: false, + autocorrectDrawn: false, + defaultTextFont: 'system-ui, -apple-system, sans-serif', + aiOnboarded: false, + autoUpdate: true, }; interface MinimalStore { diff --git a/src/main/preload.ts b/src/main/preload.ts index a1f1ef4..120dbfa 100644 --- a/src/main/preload.ts +++ b/src/main/preload.ts @@ -1,5 +1,19 @@ import { contextBridge, ipcRenderer } from 'electron'; -import type { HubStateUpdate, IpcChannel } from '../shared/types'; +import type { + AiStatus, + AskInput, + ChatSessionPayload, + ConnectionTestResult, + HubStateUpdate, + IpcChannel, + LocalModelInfo, + OllamaPullProgress, + OllamaServiceStatus, + ProfileId, + ProviderId, + StreamChunk, + UpdateStatus, +} from '../shared/types'; const api = { hub: { @@ -16,6 +30,7 @@ const api = { onUndo: (cb: () => void) => bind('overlay:undo', cb), onRedo: (cb: () => void) => bind('overlay:redo', cb), onClear: (cb: () => void) => bind('overlay:clear', cb), + onAnalyze: (cb: () => void) => bind('overlay:analyze', cb), onScreenshot: (cb: (payload: { png: Uint8Array }) => void) => bind('overlay:screenshot', cb as (v: unknown) => void), onSnip: ( @@ -43,11 +58,14 @@ const api = { clear: (payload: { displayId: number }) => ipcRenderer.invoke('snip:clear' satisfies IpcChannel, payload), copy: () => ipcRenderer.invoke('snip:copy' satisfies IpcChannel), + askAi: (profile: ProfileId) => + ipcRenderer.invoke('snip:ask-ai' satisfies IpcChannel, { profile }), }, relay: { undo: () => ipcRenderer.invoke('relay:undo' satisfies IpcChannel), redo: () => ipcRenderer.invoke('relay:redo' satisfies IpcChannel), clear: () => ipcRenderer.invoke('relay:clear' satisfies IpcChannel), + analyze: () => ipcRenderer.invoke('relay:analyze' satisfies IpcChannel), screenshot: () => ipcRenderer.invoke('capture:trigger' satisfies IpcChannel), }, win: { @@ -91,6 +109,86 @@ const api = { openPath: (p: string) => ipcRenderer.invoke('shell:open-path' satisfies IpcChannel, p), }, + ai: { + setKey: (provider: ProviderId, key: string) => + ipcRenderer.invoke('ai:set-key' satisfies IpcChannel, { provider, key }), + deleteKey: (provider: ProviderId) => + ipcRenderer.invoke('ai:delete-key' satisfies IpcChannel, { provider }), + getStatus: () => + ipcRenderer.invoke('ai:get-status' satisfies IpcChannel) as Promise, + testConnection: (provider: ProviderId, model: string) => + ipcRenderer.invoke('ai:test-connection' satisfies IpcChannel, { + provider, + model, + }) as Promise, + ask: (input: AskInput) => + ipcRenderer.invoke('ai:ask' satisfies IpcChannel, input) as Promise<{ + requestId: string; + }>, + cancel: (requestId: string) => + ipcRenderer.invoke('ai:cancel' satisfies IpcChannel, { requestId }), + onChunk: (cb: (c: StreamChunk) => void) => + bind('ai:chunk', cb as (v: unknown) => void), + // One-shot correction calls (non-streaming). + recognize: (payload: { png: Uint8Array; mime?: string; profile?: ProfileId }) => + ipcRenderer.invoke('ai:recognize' satisfies IpcChannel, payload) as Promise<{ + text: string; + error?: string; + }>, + autocorrect: (payload: { text: string; profile?: ProfileId }) => + ipcRenderer.invoke('ai:autocorrect' satisfies IpcChannel, payload) as Promise<{ + text: string; + error?: string; + }>, + }, + ollama: { + status: () => + ipcRenderer.invoke('ollama:status' satisfies IpcChannel) as Promise, + start: () => + ipcRenderer.invoke('ollama:start' satisfies IpcChannel) as Promise, + listModels: () => + ipcRenderer.invoke('ollama:list-models' satisfies IpcChannel) as Promise, + diskSpace: () => + ipcRenderer.invoke('ollama:disk-space' satisfies IpcChannel) as Promise, + pull: (model: string) => + ipcRenderer.invoke('ollama:pull' satisfies IpcChannel, { model }) as Promise<{ ok: boolean }>, + cancelPull: (model: string) => + ipcRenderer.invoke('ollama:cancel-pull' satisfies IpcChannel, { model }) as Promise, + deleteModel: (model: string) => + ipcRenderer.invoke('ollama:delete-model' satisfies IpcChannel, { model }) as Promise, + installHelp: () => + ipcRenderer.invoke('ollama:install-help' satisfies IpcChannel) as Promise, + onPullProgress: (cb: (p: OllamaPullProgress) => void) => + bind('ollama:pull-progress', cb as (v: unknown) => void), + }, + rag: { + stats: () => + ipcRenderer.invoke('rag:stats' satisfies IpcChannel) as Promise< + Record + >, + resetProfile: (profile: ProfileId) => + ipcRenderer.invoke('rag:reset-profile' satisfies IpcChannel, { profile }) as Promise, + capture: (payload: { + profile: ProfileId; + kind: 'typed' | 'drawn' | 'analysis' | 'chat'; + original: string; + corrected: string; + }) => ipcRenderer.invoke('rag:capture' satisfies IpcChannel, payload) as Promise, + }, + chat: { + // Called by SnipActions in the overlay to hand a snip off to the + // toolbar's ChatPanel. Main relays via chat:session. + start: (payload: { png: Uint8Array; mime: string; profile: ProfileId }) => + ipcRenderer.invoke('chat:start' satisfies IpcChannel, payload) as Promise<{ + sessionId: string; + }>, + startText: (payload: { text: string; profile: ProfileId }) => + ipcRenderer.invoke('chat:start-text' satisfies IpcChannel, payload) as Promise<{ + sessionId: string; + }>, + onSession: (cb: (s: ChatSessionPayload) => void) => + bind('chat:session', cb as (v: unknown) => void), + }, app: { info: () => ipcRenderer.invoke('app:info' satisfies IpcChannel) as Promise<{ @@ -99,6 +197,15 @@ const api = { }>, relaunch: () => ipcRenderer.invoke('app:relaunch' satisfies IpcChannel), }, + updater: { + get: () => ipcRenderer.invoke('updater:get' satisfies IpcChannel) as Promise, + check: () => ipcRenderer.invoke('updater:check' satisfies IpcChannel) as Promise, + install: () => ipcRenderer.invoke('updater:install' satisfies IpcChannel) as Promise, + openReleases: () => + ipcRenderer.invoke('updater:open-releases' satisfies IpcChannel) as Promise, + onStatus: (cb: (s: UpdateStatus) => void) => + bind('updater:status', cb as (v: unknown) => void), + }, env: { displayId: () => ipcRenderer.sendSync('overlay:display-id'), }, diff --git a/src/main/updater.ts b/src/main/updater.ts new file mode 100644 index 0000000..65d023d --- /dev/null +++ b/src/main/updater.ts @@ -0,0 +1,134 @@ +import { app, BrowserWindow, ipcMain, shell } from 'electron'; +// electron-updater is CommonJS; import the default and destructure so the +// ESM↔CJS interop is stable across bundlers. +import electronUpdater from 'electron-updater'; +import type { UpdateStatus } from '../shared/types'; +import { getState, onChange } from './hub'; + +const { autoUpdater } = electronUpdater; + +// Where users go to grab a build by hand — the manual fallback when +// auto-update can't run (dev, or unsigned macOS where Squirrel refuses +// to apply an update). Derived from package.json's repository field. +const RELEASES_URL = 'https://github.com/opensourcebharat/lekhini/releases/latest'; + +// Re-check this often while the app stays open, so a long-running +// session still notices a release without a restart. +const CHECK_INTERVAL_MS = 6 * 60 * 60 * 1000; // 6 hours + +let status: UpdateStatus = { state: 'idle', currentVersion: '' }; + +function broadcast(): void { + for (const win of BrowserWindow.getAllWindows()) { + if (!win.isDestroyed()) win.webContents.send('updater:status', status); + } +} + +function setStatus(next: Partial): void { + status = { ...status, ...next }; + broadcast(); +} + +// Auto-update only works in a packaged build (a dev run has no +// app-update.yml feed) — and on macOS only when the app is signed + +// notarized. We can't cheaply detect signing, so we attempt the check +// and map a signature error to 'unsupported' (the UI then offers a +// manual download link instead of looking broken). +function canUpdate(): boolean { + return app.isPackaged; +} + +function isSignatureError(message: string): boolean { + return /code sign|signature|not signed|not been signed/i.test(message); +} + +async function check(): Promise { + if (!canUpdate()) return; + try { + await autoUpdater.checkForUpdates(); + } catch (err) { + const msg = (err as Error)?.message ?? String(err); + setStatus({ state: isSignatureError(msg) ? 'unsupported' : 'error', message: msg }); + } +} + +function wireEvents(): void { + autoUpdater.on('checking-for-update', () => setStatus({ state: 'checking', message: undefined })); + autoUpdater.on('update-available', (info) => + // With autoDownload on, electron-updater is already fetching; reflect + // that. With it off, we sit at 'available' until the user acts. + setStatus({ + state: autoUpdater.autoDownload ? 'downloading' : 'available', + version: info.version, + percent: 0, + }), + ); + autoUpdater.on('update-not-available', () => setStatus({ state: 'none', version: undefined })); + autoUpdater.on('download-progress', (p) => + setStatus({ state: 'downloading', percent: Math.round(p.percent) }), + ); + autoUpdater.on('update-downloaded', (info) => + setStatus({ state: 'downloaded', version: info.version, percent: 100 }), + ); + autoUpdater.on('error', (err) => { + const msg = (err as Error)?.message ?? String(err); + setStatus({ state: isSignatureError(msg) ? 'unsupported' : 'error', message: msg }); + }); +} + +export function initAutoUpdates(): void { + status = { state: 'idle', currentVersion: app.getVersion() }; + + autoUpdater.autoDownload = getState().autoUpdate; + autoUpdater.autoInstallOnAppQuit = true; + autoUpdater.allowPrerelease = false; + + wireEvents(); + + // Live-react to the Settings toggle: flip autoDownload, and if the user + // just enabled it while an update is already known, start fetching. + onChange((s, changed) => { + if (!changed.has('autoUpdate')) return; + autoUpdater.autoDownload = s.autoUpdate; + if (s.autoUpdate && status.state === 'available') void check(); + }); + + if (!canUpdate()) { + setStatus({ + state: 'unsupported', + message: 'Updates apply to installed builds only (you are running from source).', + }); + return; + } + + void check(); + setInterval(() => void check(), CHECK_INTERVAL_MS); +} + +export function registerUpdaterIpc(): void { + ipcMain.handle('updater:get', () => status); + ipcMain.handle('updater:check', async () => { + await check(); + return status; + }); + // Apply an update. If it's downloaded, quit + install now; if it's only + // been detected (autoDownload off), kick off the download — the UI then + // flips to "Restart to update" once 'update-downloaded' fires. + ipcMain.handle('updater:install', async () => { + if (!canUpdate()) return; + if (status.state === 'downloaded') { + // Defer so the IPC reply flushes before the app tears down. + setImmediate(() => autoUpdater.quitAndInstall()); + return; + } + try { + await autoUpdater.downloadUpdate(); + } catch (err) { + const msg = (err as Error)?.message ?? String(err); + setStatus({ state: isSignatureError(msg) ? 'unsupported' : 'error', message: msg }); + } + }); + ipcMain.handle('updater:open-releases', () => { + void shell.openExternal(RELEASES_URL); + }); +} diff --git a/src/renderer/overlay/App.tsx b/src/renderer/overlay/App.tsx index 9804517..a7e9390 100644 --- a/src/renderer/overlay/App.tsx +++ b/src/renderer/overlay/App.tsx @@ -5,7 +5,27 @@ import { attachPointerPipeline } from './canvas/pointerPipeline'; import { cursorFor } from './cursors'; import { store, type SnipRect } from './store'; import { buildRegistry } from './tools/registry'; -import type { Item, Theme, ToolSettings, Whiteboard } from '../../shared/types'; +import { nextId } from './tools/types'; +import { + dominantColor, + groupBounds, + HANDWRITING_FONT, + isDescriptiveJunk, + isLikelyQuestion, + isRecognizableStroke, + rasterizeGroup, +} from './canvas/recognize'; +import { buildTradeAnalysisText } from './canvas/ta'; +import type { + Calibration, + Item, + ProfileId, + StrokeItem, + TextShape, + Theme, + ToolSettings, + Whiteboard, +} from '../../shared/types'; import type { Tool, ToolContext } from './tools/types'; export function OverlayApp() { @@ -19,7 +39,21 @@ export function OverlayApp() { // can re-render on Solid's signal cycle. Synced inside the store // subscriber below. const [snipRectSig, setSnipRectSig] = createSignal(null); + // AI-configuration mirror + current profile, used by the SnipActions + // Ask AI button. Updated from hub.onBroadcast below. + const [aiConfigured, setAiConfigured] = createSignal(false); + const [activeProfile, setActiveProfile] = createSignal('general'); + // Autocorrect + default-font settings, mirrored from the hub. + const [autocorrectTyped, setAutocorrectTyped] = createSignal(false); + const [autocorrectDrawn, setAutocorrectDrawn] = createSignal(false); + const [defaultFont, setDefaultFont] = createSignal('system-ui, -apple-system, sans-serif'); + // Local AI usable = enabled with at least one model installed. + const [aiLocalReady, setAiLocalReady] = createSignal(false); + // Any AI path available (local or a configured cloud provider). + const aiAvailable = () => aiLocalReady() || aiConfigured(); let currentTheme: Theme = 'dark'; + // Latest pixel↔price calibration from the hub (null until set). + let currentCalibration: Calibration | null = null; const applyCursor = () => { if (!surface) return; @@ -47,10 +81,134 @@ export function OverlayApp() { store.getState().snipRect, ); + // ── Handwriting recognition controller ───────────────────────── + // After the user FINISHES drawing pen/pencil ink (long idle, and + // never while a stroke is in progress), the recent strokes are + // rasterized and sent to the AI for transcription + correction, then + // swapped for a single TextShape in one undo step. Gated by the + // autocorrectDrawn setting and AI availability. + // + // The idle must be generous: people pause between letters/words, so + // a short timer fires mid-word and overwrites half-written ink. We + // wait ~2.2s of no drawing AND cancel any pending pass the moment a + // new stroke starts (cancelRecognition on pointer-down). + const RECOGNIZE_IDLE_MS = 2200; + const recog = { + pending: new Set(), + recognized: new Set(), + timer: null as ReturnType | null, + inFlight: false, + }; + + const cancelRecognition = () => { + if (recog.timer !== null) { + clearTimeout(recog.timer); + recog.timer = null; + } + }; + + const scheduleRecognition = () => { + cancelRecognition(); + recog.timer = setTimeout(() => { + recog.timer = null; + void runRecognition(); + }, RECOGNIZE_IDLE_MS); + }; + + const onStrokeCommitted = (item: Item) => { + if (!autocorrectDrawn() || !aiAvailable()) return; + if (!isRecognizableStroke(item)) return; + recog.pending.add(item.id); + scheduleRecognition(); + }; + + const runRecognition = async () => { + if (recog.inFlight) { + scheduleRecognition(); + return; + } + if (!autocorrectDrawn()) { + recog.pending.clear(); + return; + } + const byId = new Map(store.getState().items.map((i) => [i.id, i] as const)); + const group: StrokeItem[] = []; + for (const id of recog.pending) { + const it = byId.get(id); + if (it && isRecognizableStroke(it) && !recog.recognized.has(id)) group.push(it); + } + recog.pending.clear(); + if (group.length === 0) return; + const bounds = groupBounds(group); + // Require a real bit of writing — a single tiny mark is almost + // always an accidental tap, not a word worth transcribing. + if (bounds.w < 24 || bounds.h < 10) return; + // Claim these ids before the await so strokes drawn during the + // request form a fresh batch and these are never re-sent. + const ids = group.map((g) => g.id); + ids.forEach((id) => recog.recognized.add(id)); + + const dpr = Math.max(window.devicePixelRatio || 1, 2); + const png = await canvasToPng(rasterizeGroup(group, bounds, dpr)); + + recog.inFlight = true; + let text = ''; + try { + const res = await window.pen.ai.recognize({ + png, + mime: 'image/png', + profile: activeProfile(), + }); + text = (res.text ?? '').trim(); + } catch { + text = ''; + } finally { + recog.inFlight = false; + } + // Strip wrapping quotes the model sometimes adds. + text = text.replace(/^["'“”‘’`]+|["'“”‘’`]+$/g, '').trim(); + // Reject non-transcriptions: small vision models often DESCRIBE the + // image ("a signature", "the user wrote…", "this appears to be + // handwriting") instead of transcribing. Replacing the user's ink + // with that is worse than doing nothing, so keep the ink instead. + if (!text || isDescriptiveJunk(text)) return; + + // The user may have undone or erased the ink during the request — + // only replace strokes that still exist. + const live = new Set(store.getState().items.map((i) => i.id)); + const survivors = ids.filter((id) => live.has(id)); + if (survivors.length === 0) return; + + const textItem: TextShape = { + kind: 'text', + id: nextId('text'), + at: { x: bounds.x, y: bounds.y }, + text, + color: dominantColor(group), + // Match the size the user actually drew (ink height), so the + // replacement neither balloons nor shrinks. Rendered in a + // handwriting font to stay realistic at that spot. + fontSize: Math.min(200, Math.max(12, Math.round(bounds.h * 0.8))), + fontFamily: HANDWRITING_FONT, + }; + store.getState().replaceMany(survivors, [textItem]); + + // If the user hand-wrote a question/request, also answer it in + // the chat panel — profile-aware (teacher explains, trader + // analyzes) via the profile system prompt. The tidy text stays + // on the canvas; the answer opens in the dock chat. + if (isLikelyQuestion(text)) { + void window.pen.chat.startText({ text, profile: activeProfile() }); + } + }; + const ctx: ToolContext = { get settings() { return currentSettings; }, + profile: () => activeProfile(), + defaultFont: () => defaultFont(), + autocorrectTyped: () => autocorrectTyped(), items: () => store.getState().items, selectedId: () => store.getState().selectedId, setDraft(item: Item | null) { @@ -58,6 +216,7 @@ export function OverlayApp() { }, commit(item: Item) { store.getState().commit(item); + onStrokeCommitted(item); }, commitShapeAndSelect(item: Item) { store.getState().commit(item); @@ -91,6 +250,10 @@ export function OverlayApp() { onDown(s, e) { if (!drawMode()) return; e.preventDefault(); + // Starting a new stroke means the user isn't done writing — + // cancel any pending recognition so it never fires mid-word and + // overwrites half-finished ink. + cancelRecognition(); // The user is starting an actual stroke — close any thickness // popup that was left open on the toolbar so it doesn't hover // over the drawing surface. Cheap; only fires when the popup @@ -155,6 +318,13 @@ export function OverlayApp() { const unUndo = window.pen.overlay.onUndo(() => store.getState().undo()); const unRedo = window.pen.overlay.onRedo(() => store.getState().redo()); const unClear = window.pen.overlay.onClear(() => store.getState().clear()); + // Trader hybrid: compute the drawn fib / trendline levels as text + // and open a text-only analysis chat (no chart image is sent). + const unAnalyze = window.pen.overlay.onAnalyze(() => { + const text = buildTradeAnalysisText(store.getState().items, currentCalibration); + if (!text) return; // nothing drawn to analyze + void window.pen.chat.startText({ text, profile: activeProfile() }); + }); const unShot = window.pen.overlay.onScreenshot(async ({ png }) => { const out = await composite(png, committed.getCanvas()); await window.pen.overlay.sendScreenshotResult(out); @@ -167,6 +337,27 @@ export function OverlayApp() { store.getState().setSnipRect(rect); }); + // Mirror the AI / autocorrect / font fields from a hub snapshot. + type AiHubFields = { + aiActiveProvider?: string | null; + aiLocalEnabled?: boolean; + aiInstalledModels?: string[]; + autocorrectTyped?: boolean; + autocorrectDrawn?: boolean; + defaultTextFont?: string; + }; + const applyAiFields = (s: AiHubFields) => { + if ('aiActiveProvider' in s) setAiConfigured(s.aiActiveProvider != null); + if (typeof s.aiLocalEnabled === 'boolean' || Array.isArray(s.aiInstalledModels)) { + setAiLocalReady(!!s.aiLocalEnabled && (s.aiInstalledModels?.length ?? 0) > 0); + } + if (typeof s.autocorrectTyped === 'boolean') setAutocorrectTyped(s.autocorrectTyped); + if (typeof s.autocorrectDrawn === 'boolean') setAutocorrectDrawn(s.autocorrectDrawn); + if (typeof s.defaultTextFont === 'string' && s.defaultTextFont.length > 0) { + setDefaultFont(s.defaultTextFont); + } + }; + const unBroadcast = window.pen.hub.onBroadcast((state: unknown) => { const s = state as { activeTool?: string; @@ -175,7 +366,10 @@ export function OverlayApp() { whiteboard?: Whiteboard; theme?: Theme; thicknessFlyoutOpen?: boolean; - }; + profile?: ProfileId; + calibration?: Calibration | null; + } & AiHubFields; + if ('calibration' in s) currentCalibration = s.calibration ?? null; if (s.activeTool) store.getState().setActiveTool(s.activeTool as never); if (typeof s.drawMode === 'boolean') store.getState().setDrawMode(s.drawMode); if (s.settings) store.getState().setSettings(s.settings); @@ -187,6 +381,8 @@ export function OverlayApp() { if (typeof s.thicknessFlyoutOpen === 'boolean') { toolbarFlyoutOpen = s.thicknessFlyoutOpen; } + applyAiFields(s); + if (s.profile) setActiveProfile(s.profile); }); void window.pen.hub.get().then((state) => { @@ -197,7 +393,10 @@ export function OverlayApp() { whiteboard: Whiteboard; theme?: Theme; thicknessFlyoutOpen?: boolean; - }; + profile?: ProfileId; + calibration?: Calibration | null; + } & AiHubFields; + if ('calibration' in s) currentCalibration = s.calibration ?? null; store.getState().setActiveTool(s.activeTool as never); store.getState().setDrawMode(s.drawMode); store.getState().setSettings(s.settings); @@ -206,6 +405,8 @@ export function OverlayApp() { if (typeof s.thicknessFlyoutOpen === 'boolean') { toolbarFlyoutOpen = s.thicknessFlyoutOpen; } + applyAiFields(s); + if (s.profile) setActiveProfile(s.profile); applyCursor(); }); @@ -216,6 +417,7 @@ export function OverlayApp() { unUndo(); unRedo(); unClear(); + unAnalyze(); unShot(); unSnip(); unSnipSel(); @@ -263,7 +465,13 @@ export function OverlayApp() { (Order matters: snipRectSig() goes last so the && chain resolves to the SnipRect itself for Show's accessor.) */} - {(rect) => } + {(rect) => ( + + )} ); @@ -273,15 +481,19 @@ export function OverlayApp() { // Anchored at the bottom-right corner of the rect with a small offset. // Falls back to inside-rect-bottom-right if the rect is too close to // the screen edge to fit the menu below it. -function SnipActions(props: { rect: SnipRect }) { - const MENU_W = 168; +function SnipActions(props: { + rect: SnipRect; + aiConfigured: boolean; + profile: ProfileId; +}) { + // Wider menu when the Ask AI button is showing so the four buttons + // fit in one row without wrapping. + const MENU_W = () => (props.aiConfigured ? 232 : 168); const MENU_H = 32; const GAP = 8; - // Tracks an in-flight Copy so the button can show 'Copying…' and - // block double-clicks. Save is fire-and-forget (capture goes - // through the toolbar's save flow), so it doesn't get a busy state - // — the menu just dismisses immediately. - const [busy, setBusy] = createSignal<'copy' | null>(null); + // Tracks an in-flight Copy / AskAi so the button can show its + // busy label and block double-clicks. Save is fire-and-forget. + const [busy, setBusy] = createSignal<'copy' | 'ask' | null>(null); const clearSnip = (): void => { const displayId = window.pen.env.displayId(); @@ -316,21 +528,38 @@ function SnipActions(props: { rect: SnipRect }) { void window.pen.relay.screenshot(); exitToIdle(); }; + const onAskAi = async (): Promise => { + if (busy()) return; + setBusy('ask'); + try { + // Main captures + composites + broadcasts chat:session → + // toolbar's ChatPanel picks it up and fires the first AI turn. + // Selection is cleared by capture.ts during the capture (same + // path Save / Copy use). + await window.pen.snip.askAi(props.profile); + } finally { + setBusy(null); + // Don't exitToIdle here — the user might want to keep snipping + // while chatting. The chat panel is in the toolbar window; the + // overlay stays interactive. + } + }; const onCancel = (): void => clearSnip(); const positioned = (): { left: string; top: string } => { const r = props.rect; const winW = window.innerWidth; const winH = window.innerHeight; + const menuW = MENU_W(); // Default: below the rect, right-aligned to its right edge. - let left = r.x + r.w - MENU_W; + let left = r.x + r.w - menuW; let top = r.y + r.h + GAP; // If it would overflow the bottom of the screen, place ABOVE the rect. if (top + MENU_H > winH - 4) top = r.y - MENU_H - GAP; // If still off-screen (very tall rect near top), tuck inside the rect. if (top < 4) top = Math.min(r.y + r.h - MENU_H - GAP, winH - MENU_H - 4); // Horizontal clamping: never let the menu fall off either edge. - left = Math.max(4, Math.min(left, winW - MENU_W - 4)); + left = Math.max(4, Math.min(left, winW - menuW - 4)); return { left: `${left}px`, top: `${top}px` }; }; @@ -356,6 +585,16 @@ function SnipActions(props: { rect: SnipRect }) { > Save + + + + + + + {/* ── First-run setup wizard ── */} + +
+ + + +
Checking for Ollama…
+
+ +
+ Lekhini runs AI privately on your device using Ollama. Install + it once, then come back and re-check. +
+
+ + +
+
+ +
Ollama is installed but not running.
+ +
+ +
+ Download the recommended models (~{defaultModelsTotalGB()} GB + total). One-time — it runs in the background. +
+ + {(m) => ( +
+ + {m.label} · {(m.approxBytes / 1e9).toFixed(1)} GB + + + {m.installed ? '● Installed' : '—'} + + } + > + {(p) => ( + + {pullPct(p()) != null + ? `${pullPct(p())}%` + : p().status || 'pulling…'} + + )} + +
+ )} +
+ +
+ +
✓ You're ready — local AI is set up.
+ +
+
+ { + e.preventDefault(); + skipOnboarding(); + }} + > + Skip for now + +
+
+
@@ -904,6 +1411,365 @@ export function ToolbarApp() {
+
+ + + {/* ── Local-first AI (Ollama) ── */} +
+ Local AI (Ollama) + +
+ +
+ + Service + + + ● Running{ollamaStatus()?.version ? ` ${ollamaStatus()!.version}` : ''} + + + + + + + + + +
+ + +
+ Models per profile + + {(pid) => ( +
+
+ {PROFILES[pid].label} +
+ + +
+ )} +
+
+
+ Models + + {(m) => ( +
+
+ + {m.label} · {(m.approxBytes / 1e9).toFixed(1)} GB + + void pullModel(m.tag)} + > + Install + + } + > + + + } + > + {(p) => ( + + {pullPct(p()) != null + ? `${pullPct(p())}%` + : p().status || 'pulling…'} + + + )} + +
+
+ )} +
+
+
+
+ + {/* ── Autocorrect + default font ── */} +
+ Autocorrect typed text + +
+
+ Autocorrect drawn text + +
+
+ Default text font + +
+ + {/* ── Cloud provider (optional fallback) ── */} + +
+ Provider + +
+
+ Model + +
+
+ + API key + + ● Configured + + + + setAiKeyInput((e.currentTarget as HTMLInputElement).value) + } + /> +
+ + + + + +
+ + {(r) => ( +
+ {r().ok + ? `✓ ${r().message ?? 'OK'}${ + r().latencyMs ? ` · ${r().latencyMs}ms` : '' + }` + : `✗ ${r().message ?? 'Failed'}`} +
+ )} +
+ { + e.preventDefault(); + void window.pen.shell.openPath(PROVIDER_KEY_URLS[aiSelectedProvider()]); + }} + > + Get a key → + +
+
+ Profile prompts + + {(pid) => ( +
+
+ {PROFILES[pid].label} + + + +
+