From b9d43f30d7cadaed66b179135f0770e742014eca Mon Sep 17 00:00:00 2001 From: cce <51567+cce@users.noreply.github.com> Date: Tue, 24 Feb 2026 22:05:08 -0500 Subject: [PATCH 1/2] fix: use node-easyocr's bundled venv python instead of requiring global pip install node-easyocr has two bugs that break OCR for most users: 1. Its preinstall script creates a venv with easyocr/torch installed, but the runtime hardcodes `pythonPath = 'python3'`, ignoring the venv entirely. This means OCR only works if easyocr is installed globally, which homebrew python on macOS actively blocks. 2. On first run, easyocr downloads models and prints a progress bar to stdout. node-easyocr's IPC tries to JSON.parse every stdout chunk, so it chokes on "Progress: |...| 45% Complete" and rejects the init promise. The models download successfully in the background, but the cached rejected promise means all subsequent OCR calls in that session also fail. Fix 1: Resolve node-easyocr's package location at runtime via createRequire and point pythonPath at its bundled venv/bin/python. This works regardless of where node_modules lives (local, global, or npx cache under ~/.npm/_npx/). Uses platform-aware paths (venv/bin/python on POSIX, venv/Scripts/python.exe on Windows), matching the same convention in node-easyocr's own setup-python-env.js. Fix 2: Before calling node-easyocr's init, pre-download models via a separate subprocess with verbose=False. This runs with the full set of configured languages (not just English) so all needed recognition models are cached before node-easyocr's init runs. The subprocess exit code is checked so download failures surface clearly instead of falling through to a misleading error. Also clears easyOCRInitPromise on failure so transient errors don't poison the process for the rest of the session, and updates error messages and README to reflect automatic setup (no more "pip install easyocr" instructions). --- README.md | 12 +++------- src/core/ocr.ts | 60 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index eb37c18..a019eb4 100644 --- a/README.md +++ b/README.md @@ -1093,17 +1093,11 @@ ios_tap with x=187 y=420 The tool uses EasyOCR (Python-based) for text recognition. It provides excellent accuracy on colored backgrounds and stylized text common in mobile UIs. -### Installing EasyOCR (Required for OCR) +### EasyOCR Setup -```bash -# Install Python 3.10+ if not already installed -brew install python@3.11 - -# Install EasyOCR -pip3 install easyocr -``` +EasyOCR and its Python dependencies are installed automatically by the `node-easyocr` package. You just need Python 3.6+ available on your system (e.g. `brew install python@3.11`). -First run will download models (~100MB for English). Additional language models are downloaded automatically when configured. +The first OCR call will download language models (~100MB for English). Additional language models are downloaded automatically when configured. ### OCR Language Configuration diff --git a/src/core/ocr.ts b/src/core/ocr.ts index 333d390..0fd1afc 100644 --- a/src/core/ocr.ts +++ b/src/core/ocr.ts @@ -1,7 +1,10 @@ import { tmpdir } from "os"; -import { join } from "path"; +import { join, dirname } from "path"; import { writeFile, unlink } from "fs/promises"; +import { existsSync } from "fs"; import { randomUUID } from "crypto"; +import { createRequire } from "module"; +import { spawn } from "child_process"; export interface OCRWord { text: string; @@ -107,7 +110,7 @@ function getOCRLanguages(): string[] { /** * Initialize EasyOCR (Python-based, better for colored backgrounds) - * Requires Python and easyocr package: pip install easyocr + * Requires Python 3.6+ on the system; easyocr is provided by node-easyocr's bundled venv. */ async function getEasyOCR(): Promise { if (easyOCRInstance) { @@ -119,12 +122,49 @@ async function getEasyOCR(): Promise { } easyOCRInitPromise = (async () => { - const languages = getOCRLanguages(); - const { EasyOCR } = await import("node-easyocr"); - const ocr = new EasyOCR(); - await withTimeout(ocr.init(languages), 30000, "EasyOCR init timeout - ensure Python and easyocr are installed: pip install easyocr"); - easyOCRInstance = ocr; - return ocr; + try { + const languages = getOCRLanguages(); + const { EasyOCR } = await import("node-easyocr"); + const ocr = new EasyOCR(); + + // node-easyocr's preinstall creates a venv with easyocr, but the + // runtime hardcodes pythonPath to system 'python3' which won't have it. + // Resolve the package location and point at its bundled venv instead. + const require = createRequire(import.meta.url); + const easyocrPkgDir = dirname(require.resolve("node-easyocr")); + // Matches the convention in node-easyocr's own setup-python-env.js + const venvBin = process.platform === "win32" ? "Scripts" : "bin"; + const venvExe = process.platform === "win32" ? "python.exe" : "python"; + const venvPython = join(easyocrPkgDir, "..", "venv", venvBin, venvExe); + if (existsSync(venvPython)) { + (ocr as any).pythonPath = venvPython; + } + + // node-easyocr's JSON parser chokes on easyocr's progress bar output + // during model downloads. Pre-download models with verbose=False so + // node-easyocr's init never sees non-JSON output on stdout. + // Always run with the configured languages since each language has its + // own recognition model that may need downloading. + const pythonPath = (ocr as any).pythonPath || "python3"; + const langArg = JSON.stringify(languages); + await new Promise((resolve, reject) => { + const proc = spawn(pythonPath, [ + "-c", `import easyocr; easyocr.Reader(${langArg}, verbose=False)` + ]); + proc.on("close", (code) => { + if (code === 0) resolve(); + else reject(new Error(`EasyOCR model setup exited with code ${code}. Ensure Python 3.6+ is installed.`)); + }); + proc.on("error", reject); + }); + + await withTimeout(ocr.init(languages), 30000, "EasyOCR init timeout. Ensure Python 3.6+ is available on your system."); + easyOCRInstance = ocr; + return ocr; + } catch (error) { + easyOCRInitPromise = null; + throw error; + } })(); return easyOCRInitPromise; @@ -177,7 +217,7 @@ function toTapCoord( /** * Run OCR using EasyOCR - * Requires Python and easyocr package: pip install easyocr + * Requires Python 3.6+ on the system; easyocr is provided by node-easyocr's bundled venv. */ export async function recognizeText(imageBuffer: Buffer, options?: OCROptions): Promise { const scaleFactor = options?.scaleFactor ?? 1; @@ -190,7 +230,7 @@ export async function recognizeText(imageBuffer: Buffer, options?: OCROptions): ocr = await getEasyOCR(); } catch (error) { const message = error instanceof Error ? error.message : String(error); - throw new Error(`EasyOCR initialization failed. Ensure Python and easyocr are installed: pip install easyocr. Error: ${message}`); + throw new Error(`EasyOCR initialization failed. Ensure Python 3.6+ is available on your system. Error: ${message}`); } // Write buffer to temp file (EasyOCR requires file path) From 8cb49af8a0e17cb471978f450df190b56ea24bf6 Mon Sep 17 00:00:00 2001 From: cce <51567+cce@users.noreply.github.com> Date: Thu, 5 Mar 2026 21:42:39 -0500 Subject: [PATCH 2/2] fix: add timeout and stderr capture to model pre-download subprocess --- src/core/ocr.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/core/ocr.ts b/src/core/ocr.ts index 0fd1afc..b730b4f 100644 --- a/src/core/ocr.ts +++ b/src/core/ocr.ts @@ -147,16 +147,18 @@ async function getEasyOCR(): Promise { // own recognition model that may need downloading. const pythonPath = (ocr as any).pythonPath || "python3"; const langArg = JSON.stringify(languages); - await new Promise((resolve, reject) => { + await withTimeout(new Promise((resolve, reject) => { const proc = spawn(pythonPath, [ "-c", `import easyocr; easyocr.Reader(${langArg}, verbose=False)` ]); + let stderr = ""; + proc.stderr.on("data", (d: Buffer) => { stderr += d; }); proc.on("close", (code) => { if (code === 0) resolve(); - else reject(new Error(`EasyOCR model setup exited with code ${code}. Ensure Python 3.6+ is installed.`)); + else reject(new Error(`EasyOCR model setup failed (code ${code}): ${stderr.trim() || "unknown error"}. Ensure Python 3.6+ is installed.`)); }); proc.on("error", reject); - }); + }), 120000, "EasyOCR model download timeout -- check your network connection"); await withTimeout(ocr.init(languages), 30000, "EasyOCR init timeout. Ensure Python 3.6+ is available on your system."); easyOCRInstance = ocr;