From 1495c9c99908e8255925cf9548a43c7d52961161 Mon Sep 17 00:00:00 2001 From: u8array Date: Fri, 8 May 2026 16:26:37 +0200 Subject: [PATCH 1/4] fix(parser): decode multi-byte UTF-8 in ^FH hex escapes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously decodeFH() converted each hex pair to a character via String.fromCharCode, mangling multi-byte UTF-8 sequences (e.g. _C3_A4 became 'ä' instead of 'ä'). Since the generator emits ^CI28, third-party ZPL using ^FH for non-ASCII glyphs round-tripped through mojibake. Collect contiguous escape pairs into a Uint8Array and decode the run via TextDecoder('utf-8'); invalid byte sequences fall back to U+FFFD. --- src/lib/zplParser.test.ts | 30 ++++++++++++++++++++++++++++++ src/lib/zplParser.ts | 19 +++++++++++++++---- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/lib/zplParser.test.ts b/src/lib/zplParser.test.ts index d54a3ced..49248f55 100644 --- a/src/lib/zplParser.test.ts +++ b/src/lib/zplParser.test.ts @@ -257,6 +257,36 @@ describe('parseZPL — ^FH hex escape', () => { const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_41BC^FS^XZ', 8); expect(props(objects[0]).content).toBe('ABC'); }); + + it('decodes UTF-8 multibyte escapes (German umlauts)', () => { + // _C3_A4 = ä, _C3_B6 = ö, _C3_BC = ü + const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3_A4_C3_B6_C3_BC^FS^XZ', 8); + expect(props(objects[0]).content).toBe('äöü'); + }); + + it('decodes UTF-8 multibyte escapes (Nordic)', () => { + // _C3_A6 = æ, _C3_B8 = ø, _C3_A5 = å + const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3_A6_C3_B8_C3_A5^FS^XZ', 8); + expect(props(objects[0]).content).toBe('æøå'); + }); + + it('decodes 3-byte UTF-8 escapes (Euro sign)', () => { + // _E2_82_AC = € + const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_E2_82_AC^FS^XZ', 8); + expect(props(objects[0]).content).toBe('€'); + }); + + it('decodes mixed ASCII and UTF-8 escapes in one field', () => { + // _48 = H, _69 = i, then ä + const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_48_69 _C3_A4^FS^XZ', 8); + expect(props(objects[0]).content).toBe('Hi ä'); + }); + + it('replaces invalid UTF-8 byte sequences with U+FFFD', () => { + // _C3 alone is a truncated 2-byte sequence + const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3^FS^XZ', 8); + expect(props(objects[0]).content).toBe('�'); + }); }); // ── ^FB field block ─────────────────────────────────────────────────────────── diff --git a/src/lib/zplParser.ts b/src/lib/zplParser.ts index d6050562..fed7b3aa 100644 --- a/src/lib/zplParser.ts +++ b/src/lib/zplParser.ts @@ -89,12 +89,23 @@ function makeObj( } as unknown as LabelObject; } -/** Decode ^FH hex escapes: replaces {delimiter}XX with the character for hex XX */ +/** + * Decode ^FH hex escapes: replaces runs of {delimiter}XX with the UTF-8 string + * for the byte sequence XX XX … . The generator emits ^CI28 (UTF-8), so a single + * non-ASCII glyph spans multiple escape pairs (e.g. `_C3_A4` → `ä`). Decoding + * pair-by-pair via fromCharCode would yield mojibake; we collect contiguous + * pairs into a Uint8Array and run TextDecoder on the whole run. Invalid byte + * sequences are replaced with U+FFFD by the decoder's default behaviour. + */ +const fhDecoder = new TextDecoder("utf-8"); function decodeFH(text: string, delimiter: string): string { const escaped = delimiter.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); - return text.replace(new RegExp(`${escaped}([0-9A-Fa-f]{2})`, "g"), (_, hex) => - String.fromCharCode(parseInt(hex, 16)), - ); + const runRe = new RegExp(`(?:${escaped}[0-9A-Fa-f]{2})+`, "g"); + return text.replace(runRe, (run) => { + const pairRe = new RegExp(`${escaped}([0-9A-Fa-f]{2})`, "g"); + const bytes = Array.from(run.matchAll(pairRe), ([, hex]) => parseInt(hex ?? "0", 16)); + return fhDecoder.decode(new Uint8Array(bytes)); + }); } /** From 46662f05df5d73c8f09ef39abacb9ccae8ee4bb8 Mon Sep 17 00:00:00 2001 From: u8array Date: Fri, 8 May 2026 16:29:25 +0200 Subject: [PATCH 2/4] refactor(parser): single-regex stride decode in decodeFH Replace inner matchAll + impossible-case fallback with a stride loop: the outer regex already guarantees the run is a sequence of fixed-width {delim}XX pairs, so byte offsets are computable directly. One regex allocation per call, fixed Uint8Array, no defensive ?? fallback. --- src/lib/zplParser.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lib/zplParser.ts b/src/lib/zplParser.ts index fed7b3aa..41f3d49f 100644 --- a/src/lib/zplParser.ts +++ b/src/lib/zplParser.ts @@ -101,10 +101,13 @@ const fhDecoder = new TextDecoder("utf-8"); function decodeFH(text: string, delimiter: string): string { const escaped = delimiter.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); const runRe = new RegExp(`(?:${escaped}[0-9A-Fa-f]{2})+`, "g"); + const stride = delimiter.length + 2; return text.replace(runRe, (run) => { - const pairRe = new RegExp(`${escaped}([0-9A-Fa-f]{2})`, "g"); - const bytes = Array.from(run.matchAll(pairRe), ([, hex]) => parseInt(hex ?? "0", 16)); - return fhDecoder.decode(new Uint8Array(bytes)); + const bytes = new Uint8Array(run.length / stride); + for (let i = 0, b = 0; i < run.length; i += stride, b++) { + bytes[b] = parseInt(run.slice(i + delimiter.length, i + stride), 16); + } + return fhDecoder.decode(bytes); }); } From cc89d4432ef0b747b11275ec6771df744ae10f33 Mon Sep 17 00:00:00 2001 From: u8array Date: Fri, 8 May 2026 17:42:57 +0200 Subject: [PATCH 3/4] feat(parser): track ^CI for ^FH byte decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ^FH decoder was hardcoded to UTF-8, which broke single-byte encodings (^CI27 / ^CI0..13) where bytes like 0xE4 (= ä in CP1252) are valid but invalid as standalone UTF-8 → U+FFFD. Track ^CI state in the parser, map known values to TextDecoder labels: - ^CI28 → utf-8 - ^CI27 → windows-1252 - ^CI0..^CI13 → windows-1252 (ASCII-compatible legacy variants) - others → keep current decoder, surface as partial import Decoders are cached to avoid per-field allocation. Default remains UTF-8 to preserve round-trip fidelity for this app's own generator output. --- src/lib/zplParser.test.ts | 22 +++++++++++++++ src/lib/zplParser.ts | 58 +++++++++++++++++++++++++++++++-------- 2 files changed, 69 insertions(+), 11 deletions(-) diff --git a/src/lib/zplParser.test.ts b/src/lib/zplParser.test.ts index 49248f55..dbdac92d 100644 --- a/src/lib/zplParser.test.ts +++ b/src/lib/zplParser.test.ts @@ -287,6 +287,28 @@ describe('parseZPL — ^FH hex escape', () => { const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3^FS^XZ', 8); expect(props(objects[0]).content).toBe('�'); }); + + it('decodes ^CI27 (Windows-1252) single-byte escapes', () => { + // _E4 = 0xE4 = ä in CP1252 (in UTF-8 this would be invalid → U+FFFD) + const { objects } = parseZPL('^XA^CI27^FH_^FO0,0^A0N,30,0^FD_E4_F6_FC^FS^XZ', 8); + expect(props(objects[0]).content).toBe('äöü'); + }); + + it('switches encoding mid-label on ^CI', () => { + // first field UTF-8 (default), second field CP1252 + const zpl = + '^XA^FH_^FO0,0^A0N,30,0^FD_C3_A4^FS' + + '^CI27^FH_^FO0,50^A0N,30,0^FD_E4^FS^XZ'; + const { objects } = parseZPL(zpl, 8); + expect(props(objects[0]).content).toBe('ä'); + expect(props(objects[1]).content).toBe('ä'); + }); + + it('reports unsupported ^CI N as partial import', () => { + // ^CI50 is not a real Zebra encoding — falls back to current decoder + const { importReport } = parseZPL('^XA^CI50^FH_^FO0,0^A0N,30,0^FDx^FS^XZ', 8); + expect(importReport.partial).toContain('^CI50'); + }); }); // ── ^FB field block ─────────────────────────────────────────────────────────── diff --git a/src/lib/zplParser.ts b/src/lib/zplParser.ts index 41f3d49f..1ae97996 100644 --- a/src/lib/zplParser.ts +++ b/src/lib/zplParser.ts @@ -90,15 +90,38 @@ function makeObj( } /** - * Decode ^FH hex escapes: replaces runs of {delimiter}XX with the UTF-8 string - * for the byte sequence XX XX … . The generator emits ^CI28 (UTF-8), so a single - * non-ASCII glyph spans multiple escape pairs (e.g. `_C3_A4` → `ä`). Decoding - * pair-by-pair via fromCharCode would yield mojibake; we collect contiguous - * pairs into a Uint8Array and run TextDecoder on the whole run. Invalid byte - * sequences are replaced with U+FFFD by the decoder's default behaviour. + * Map a ^CI N parameter to a TextDecoder label. Most labels printed by this + * app use ^CI28 (UTF-8); ^CI27 is Windows-1252 (Zebra default for many EU + * setups); legacy ^CI0..13 are 7-bit-ASCII-compatible code-page variants for + * which Windows-1252 is a safe superset for the purposes of `^FH` decoding. + * Unsupported encodings (multi-byte UTF-16/32 variants, code page 850, …) + * fall back to UTF-8 with the command surfaced via importReport.partial. */ -const fhDecoder = new TextDecoder("utf-8"); -function decodeFH(text: string, delimiter: string): string { +function ciToEncoding(n: number): { label: string; supported: boolean } { + if (n === 28) return { label: "utf-8", supported: true }; + if (n === 27) return { label: "windows-1252", supported: true }; + if (n >= 0 && n <= 13) return { label: "windows-1252", supported: true }; + return { label: "utf-8", supported: false }; +} + +const decoderCache = new Map(); +function getDecoder(label: string): TextDecoder { + let dec = decoderCache.get(label); + if (!dec) { + dec = new TextDecoder(label); + decoderCache.set(label, dec); + } + return dec; +} + +/** + * Decode ^FH hex escapes: replaces runs of {delimiter}XX with the string for + * the byte sequence XX XX … under the active ^CI encoding. A single non-ASCII + * glyph may span multiple escape pairs (e.g. `_C3_A4` → `ä` under UTF-8), so + * we collect contiguous pairs into a Uint8Array and run one TextDecoder pass + * per run. Invalid byte sequences become U+FFFD (decoder default). + */ +function decodeFH(text: string, delimiter: string, decoder: TextDecoder): string { const escaped = delimiter.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); const runRe = new RegExp(`(?:${escaped}[0-9A-Fa-f]{2})+`, "g"); const stride = delimiter.length + 2; @@ -107,7 +130,7 @@ function decodeFH(text: string, delimiter: string): string { for (let i = 0, b = 0; i < run.length; i += stride, b++) { bytes[b] = parseInt(run.slice(i + delimiter.length, i + stride), 16); } - return fhDecoder.decode(bytes); + return decoder.decode(bytes); }); } @@ -256,6 +279,11 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL { let fhActive = false; let fhDelimiter = "_"; + // ^CI state (character set / encoding for ^FH byte decoding). Default UTF-8 + // matches our generator output; legacy ZPL using ^CI27 / ^CI0..13 sets a + // single-byte decoder before ^FH escapes are processed. + let fhDecoder = getDecoder("utf-8"); + // ^FT vs ^FO: store position type so we can reproduce exactly in re-export. let positionIsFT = false; @@ -299,7 +327,7 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL { const flushField = () => { if (!fieldType || pendingFD === null) return; - const content = fhActive ? decodeFH(pendingFD, fhDelimiter) : pendingFD; + const content = fhActive ? decodeFH(pendingFD, fhDelimiter, fhDecoder) : pendingFD; const posType: "FT" | "FO" = positionIsFT ? "FT" : "FO"; const comment = takeComment(); @@ -1184,9 +1212,17 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL { // assembled text reaches the next field object as one multi-line comment. FX: appendComment, + // ^CI N: character set / encoding for ^FH byte decoding. Mapped to a + // TextDecoder; unsupported variants (UTF-16/32, code page 850) keep the + // current decoder and surface as a partial import. + CI: (p) => { + const enc = ciToEncoding(int(p[0])); + if (enc.supported) fhDecoder = getDecoder(enc.label); + else partialCmds.add(`^CI${int(p[0])}`); + }, + // These commands carry no canvas-design information and are silently // discarded so they do not pollute importReport.unknown. - CI: noop, // character set encoding (^CI28 = UTF-8 is the browser default) FN: noop, // field number — variable data placeholder (template feature) FV: noop, // field variable — supplies data for ^FN at print time FC: noop, // field clock — inserts date/time (requires printer RTC) From cbf06a93bc297e3d3b304a4b2c6277f388ac660d Mon Sep 17 00:00:00 2001 From: u8array Date: Fri, 8 May 2026 17:46:54 +0200 Subject: [PATCH 4/4] fix(parser): reset ^FH decoder to UTF-8 on unsupported ^CI Previously an unknown ^CI N kept whatever decoder was active before, contradicting the comment that promised a UTF-8 fallback. If ^CI27 preceded the unknown command, CP1252 stayed silently active. Always rebind to ciToEncoding's label (which is 'utf-8' for the unsupported branch) so behaviour matches the documentation and is predictable regardless of prior state. --- src/lib/zplParser.test.ts | 13 ++++++++++++- src/lib/zplParser.ts | 4 ++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/lib/zplParser.test.ts b/src/lib/zplParser.test.ts index dbdac92d..712bc7c2 100644 --- a/src/lib/zplParser.test.ts +++ b/src/lib/zplParser.test.ts @@ -305,10 +305,21 @@ describe('parseZPL — ^FH hex escape', () => { }); it('reports unsupported ^CI N as partial import', () => { - // ^CI50 is not a real Zebra encoding — falls back to current decoder + // ^CI50 is not a real Zebra encoding — falls back to UTF-8 default const { importReport } = parseZPL('^XA^CI50^FH_^FO0,0^A0N,30,0^FDx^FS^XZ', 8); expect(importReport.partial).toContain('^CI50'); }); + + it('resets decoder to UTF-8 default on unsupported ^CI', () => { + // After ^CI27 sets CP1252, an unknown ^CI50 must fall back to UTF-8 + // (not keep CP1252) so behaviour is predictable. + const zpl = + '^XA^CI27^FH_^FO0,0^A0N,30,0^FD_E4^FS' + + '^CI50^FH_^FO0,50^A0N,30,0^FD_C3_A4^FS^XZ'; + const { objects } = parseZPL(zpl, 8); + expect(props(objects[0]).content).toBe('ä'); // CP1252 + expect(props(objects[1]).content).toBe('ä'); // UTF-8 (after reset) + }); }); // ── ^FB field block ─────────────────────────────────────────────────────────── diff --git a/src/lib/zplParser.ts b/src/lib/zplParser.ts index 1ae97996..02a6c1fa 100644 --- a/src/lib/zplParser.ts +++ b/src/lib/zplParser.ts @@ -1217,8 +1217,8 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL { // current decoder and surface as a partial import. CI: (p) => { const enc = ciToEncoding(int(p[0])); - if (enc.supported) fhDecoder = getDecoder(enc.label); - else partialCmds.add(`^CI${int(p[0])}`); + fhDecoder = getDecoder(enc.label); + if (!enc.supported) partialCmds.add(`^CI${int(p[0])}`); }, // These commands carry no canvas-design information and are silently