Skip to content

Commit 569fd8b

Browse files
committed
fix: upgrade Transformers.js to 4.0.0-next.7 for GLM-OCR Glm46VImageProcessor support
1 parent dd889c4 commit 569fd8b

File tree

3 files changed

+27
-12
lines changed

3 files changed

+27
-12
lines changed

ai-worker-glm-ocr.js

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* ping/pong → health check
1313
*/
1414

15-
const TRANSFORMERS_URL = "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.6";
15+
const TRANSFORMERS_URL = "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.7";
1616

1717
// Model host — downloads ONNX models from textagent HuggingFace org
1818
const MODEL_ORG_FALLBACK = "onnx-community";
@@ -23,7 +23,7 @@ let MODEL_LABEL = "GLM-OCR (1.5B)";
2323

2424
// Dynamically loaded modules
2525
let AutoProcessor = null;
26-
let AutoModelForVision2Seq = null;
26+
let AutoModelForImageTextToText = null;
2727
let load_image = null;
2828
let TextStreamer = null;
2929

@@ -43,7 +43,7 @@ async function loadModel() {
4343
try {
4444
const transformers = await import(TRANSFORMERS_URL);
4545
AutoProcessor = transformers.AutoProcessor;
46-
AutoModelForVision2Seq = transformers.AutoModelForVision2Seq;
46+
AutoModelForImageTextToText = transformers.AutoModelForImageTextToText;
4747
load_image = transformers.load_image;
4848
TextStreamer = transformers.TextStreamer;
4949
} catch (importError) {
@@ -97,7 +97,7 @@ async function loadModel() {
9797
});
9898

9999
self.postMessage({ type: "status", message: `Loading ${MODEL_LABEL} model (${device.toUpperCase()})...` });
100-
model = await AutoModelForVision2Seq.from_pretrained(MODEL_ID, {
100+
model = await AutoModelForImageTextToText.from_pretrained(MODEL_ID, {
101101
dtype: {
102102
embed_tokens: "q4f16",
103103
vision_encoder: "q4f16",
@@ -184,8 +184,8 @@ async function processDocument({ imageData, outputFormat = 'text', doImageSplitt
184184

185185
// Apply chat template and process inputs
186186
const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
187-
const inputs = await processor(text, [image], {
188-
do_image_splitting: doImageSplitting,
187+
const inputs = await processor(text, image, {
188+
add_special_tokens: false,
189189
});
190190

191191
// Generate with streaming
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# CHANGELOG — GLM-OCR Transformers.js Compatibility Fix
2+
3+
## 2026-03-18
4+
5+
Fixed `Unknown image_processor_type: 'Glm46VImageProcessor'` error that prevented GLM-OCR model from loading.
6+
7+
### Root Cause
8+
Transformers.js `4.0.0-next.6` did not include support for the `Glm46VImageProcessor` class required by the GLM-OCR ONNX model. Support was added in [PR #1582](https://github.com/huggingface/transformers.js/pull/1582).
9+
10+
### Changes
11+
12+
- **`ai-worker-glm-ocr.js`** + **`public/ai-worker-glm-ocr.js`** (both copies):
13+
- Upgraded Transformers.js from `4.0.0-next.6``4.0.0-next.7`
14+
- Switched model class from `AutoModelForVision2Seq``AutoModelForImageTextToText`
15+
- Fixed processor call: `processor(text, image, { add_special_tokens: false })` (was `processor(text, [image], { do_image_splitting })`)

public/ai-worker-glm-ocr.js

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* ping/pong → health check
1313
*/
1414

15-
const TRANSFORMERS_URL = "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.6";
15+
const TRANSFORMERS_URL = "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.7";
1616

1717
// Model host — downloads ONNX models from textagent HuggingFace org
1818
const MODEL_ORG_FALLBACK = "onnx-community";
@@ -23,7 +23,7 @@ let MODEL_LABEL = "GLM-OCR (1.5B)";
2323

2424
// Dynamically loaded modules
2525
let AutoProcessor = null;
26-
let AutoModelForVision2Seq = null;
26+
let AutoModelForImageTextToText = null;
2727
let load_image = null;
2828
let TextStreamer = null;
2929

@@ -43,7 +43,7 @@ async function loadModel() {
4343
try {
4444
const transformers = await import(TRANSFORMERS_URL);
4545
AutoProcessor = transformers.AutoProcessor;
46-
AutoModelForVision2Seq = transformers.AutoModelForVision2Seq;
46+
AutoModelForImageTextToText = transformers.AutoModelForImageTextToText;
4747
load_image = transformers.load_image;
4848
TextStreamer = transformers.TextStreamer;
4949
} catch (importError) {
@@ -97,7 +97,7 @@ async function loadModel() {
9797
});
9898

9999
self.postMessage({ type: "status", message: `Loading ${MODEL_LABEL} model (${device.toUpperCase()})...` });
100-
model = await AutoModelForVision2Seq.from_pretrained(MODEL_ID, {
100+
model = await AutoModelForImageTextToText.from_pretrained(MODEL_ID, {
101101
dtype: {
102102
embed_tokens: "q4f16",
103103
vision_encoder: "q4f16",
@@ -184,8 +184,8 @@ async function processDocument({ imageData, outputFormat = 'text', doImageSplitt
184184

185185
// Apply chat template and process inputs
186186
const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
187-
const inputs = await processor(text, [image], {
188-
do_image_splitting: doImageSplitting,
187+
const inputs = await processor(text, image, {
188+
add_special_tokens: false,
189189
});
190190

191191
// Generate with streaming

0 commit comments

Comments
 (0)