From 1495c9c99908e8255925cf9548a43c7d52961161 Mon Sep 17 00:00:00 2001
From: u8array <alexanderleinich1@gmail.com>
Date: Fri, 8 May 2026 16:26:37 +0200
Subject: [PATCH 1/4] fix(parser): decode multi-byte UTF-8 in ^FH hex escapes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously decodeFH() converted each hex pair to a character via
String.fromCharCode, mangling multi-byte UTF-8 sequences (e.g. _C3_A4
became 'Ã¤' instead of 'ä'). Since the generator emits ^CI28, third-party
ZPL using ^FH for non-ASCII glyphs round-tripped through mojibake.

Collect contiguous escape pairs into a Uint8Array and decode the run via
TextDecoder('utf-8'); invalid byte sequences fall back to U+FFFD.
---
 src/lib/zplParser.test.ts | 30 ++++++++++++++++++++++++++++++
 src/lib/zplParser.ts      | 19 +++++++++++++++----
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/src/lib/zplParser.test.ts b/src/lib/zplParser.test.ts
index d54a3ced..49248f55 100644
--- a/src/lib/zplParser.test.ts
+++ b/src/lib/zplParser.test.ts
@@ -257,6 +257,36 @@ describe('parseZPL — ^FH hex escape', () => {
     const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_41BC^FS^XZ', 8);
     expect(props(objects[0]).content).toBe('ABC');
   });
+
+  it('decodes UTF-8 multibyte escapes (German umlauts)', () => {
+    // _C3_A4 = ä, _C3_B6 = ö, _C3_BC = ü
+    const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3_A4_C3_B6_C3_BC^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('äöü');
+  });
+
+  it('decodes UTF-8 multibyte escapes (Nordic)', () => {
+    // _C3_A6 = æ, _C3_B8 = ø, _C3_A5 = å
+    const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3_A6_C3_B8_C3_A5^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('æøå');
+  });
+
+  it('decodes 3-byte UTF-8 escapes (Euro sign)', () => {
+    // _E2_82_AC = €
+    const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_E2_82_AC^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('€');
+  });
+
+  it('decodes mixed ASCII and UTF-8 escapes in one field', () => {
+    // _48 = H, _69 = i, then ä
+    const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_48_69 _C3_A4^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('Hi ä');
+  });
+
+  it('replaces invalid UTF-8 byte sequences with U+FFFD', () => {
+    // _C3 alone is a truncated 2-byte sequence
+    const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('�');
+  });
 });
 
 // ── ^FB field block ───────────────────────────────────────────────────────────
diff --git a/src/lib/zplParser.ts b/src/lib/zplParser.ts
index d6050562..fed7b3aa 100644
--- a/src/lib/zplParser.ts
+++ b/src/lib/zplParser.ts
@@ -89,12 +89,23 @@ function makeObj(
   } as unknown as LabelObject;
 }
 
-/** Decode ^FH hex escapes: replaces {delimiter}XX with the character for hex XX */
+/**
+ * Decode ^FH hex escapes: replaces runs of {delimiter}XX with the UTF-8 string
+ * for the byte sequence XX XX … . The generator emits ^CI28 (UTF-8), so a single
+ * non-ASCII glyph spans multiple escape pairs (e.g. `_C3_A4` → `ä`). Decoding
+ * pair-by-pair via fromCharCode would yield mojibake; we collect contiguous
+ * pairs into a Uint8Array and run TextDecoder on the whole run. Invalid byte
+ * sequences are replaced with U+FFFD by the decoder's default behaviour.
+ */
+const fhDecoder = new TextDecoder("utf-8");
 function decodeFH(text: string, delimiter: string): string {
   const escaped = delimiter.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-  return text.replace(new RegExp(`${escaped}([0-9A-Fa-f]{2})`, "g"), (_, hex) =>
-    String.fromCharCode(parseInt(hex, 16)),
-  );
+  const runRe = new RegExp(`(?:${escaped}[0-9A-Fa-f]{2})+`, "g");
+  return text.replace(runRe, (run) => {
+    const pairRe = new RegExp(`${escaped}([0-9A-Fa-f]{2})`, "g");
+    const bytes = Array.from(run.matchAll(pairRe), ([, hex]) => parseInt(hex ?? "0", 16));
+    return fhDecoder.decode(new Uint8Array(bytes));
+  });
 }
 
 /**

From 46662f05df5d73c8f09ef39abacb9ccae8ee4bb8 Mon Sep 17 00:00:00 2001
From: u8array <alexanderleinich1@gmail.com>
Date: Fri, 8 May 2026 16:29:25 +0200
Subject: [PATCH 2/4] refactor(parser): single-regex stride decode in decodeFH

Replace inner matchAll + impossible-case fallback with a stride loop:
the outer regex already guarantees the run is a sequence of fixed-width
{delim}XX pairs, so byte offsets are computable directly. One regex
allocation per call, fixed Uint8Array, no defensive ?? fallback.
---
 src/lib/zplParser.ts | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/lib/zplParser.ts b/src/lib/zplParser.ts
index fed7b3aa..41f3d49f 100644
--- a/src/lib/zplParser.ts
+++ b/src/lib/zplParser.ts
@@ -101,10 +101,13 @@ const fhDecoder = new TextDecoder("utf-8");
 function decodeFH(text: string, delimiter: string): string {
   const escaped = delimiter.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
   const runRe = new RegExp(`(?:${escaped}[0-9A-Fa-f]{2})+`, "g");
+  const stride = delimiter.length + 2;
   return text.replace(runRe, (run) => {
-    const pairRe = new RegExp(`${escaped}([0-9A-Fa-f]{2})`, "g");
-    const bytes = Array.from(run.matchAll(pairRe), ([, hex]) => parseInt(hex ?? "0", 16));
-    return fhDecoder.decode(new Uint8Array(bytes));
+    const bytes = new Uint8Array(run.length / stride);
+    for (let i = 0, b = 0; i < run.length; i += stride, b++) {
+      bytes[b] = parseInt(run.slice(i + delimiter.length, i + stride), 16);
+    }
+    return fhDecoder.decode(bytes);
   });
 }
 

From cc89d4432ef0b747b11275ec6771df744ae10f33 Mon Sep 17 00:00:00 2001
From: u8array <alexanderleinich1@gmail.com>
Date: Fri, 8 May 2026 17:42:57 +0200
Subject: [PATCH 3/4] feat(parser): track ^CI for ^FH byte decoding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ^FH decoder was hardcoded to UTF-8, which broke single-byte
encodings (^CI27 / ^CI0..13) where bytes like 0xE4 (= ä in CP1252) are
valid but invalid as standalone UTF-8 → U+FFFD.

Track ^CI state in the parser, map known values to TextDecoder labels:
  - ^CI28        → utf-8
  - ^CI27        → windows-1252
  - ^CI0..^CI13  → windows-1252 (ASCII-compatible legacy variants)
  - others       → keep current decoder, surface as partial import

Decoders are cached to avoid per-field allocation. Default remains UTF-8
to preserve round-trip fidelity for this app's own generator output.
---
 src/lib/zplParser.test.ts | 22 +++++++++++++++
 src/lib/zplParser.ts      | 58 +++++++++++++++++++++++++++++++--------
 2 files changed, 69 insertions(+), 11 deletions(-)

diff --git a/src/lib/zplParser.test.ts b/src/lib/zplParser.test.ts
index 49248f55..dbdac92d 100644
--- a/src/lib/zplParser.test.ts
+++ b/src/lib/zplParser.test.ts
@@ -287,6 +287,28 @@ describe('parseZPL — ^FH hex escape', () => {
     const { objects } = parseZPL('^XA^FH_^FO0,0^A0N,30,0^FD_C3^FS^XZ', 8);
     expect(props(objects[0]).content).toBe('�');
   });
+
+  it('decodes ^CI27 (Windows-1252) single-byte escapes', () => {
+    // _E4 = 0xE4 = ä in CP1252 (in UTF-8 this would be invalid → U+FFFD)
+    const { objects } = parseZPL('^XA^CI27^FH_^FO0,0^A0N,30,0^FD_E4_F6_FC^FS^XZ', 8);
+    expect(props(objects[0]).content).toBe('äöü');
+  });
+
+  it('switches encoding mid-label on ^CI', () => {
+    // first field UTF-8 (default), second field CP1252
+    const zpl =
+      '^XA^FH_^FO0,0^A0N,30,0^FD_C3_A4^FS' +
+      '^CI27^FH_^FO0,50^A0N,30,0^FD_E4^FS^XZ';
+    const { objects } = parseZPL(zpl, 8);
+    expect(props(objects[0]).content).toBe('ä');
+    expect(props(objects[1]).content).toBe('ä');
+  });
+
+  it('reports unsupported ^CI N as partial import', () => {
+    // ^CI50 is not a real Zebra encoding — falls back to current decoder
+    const { importReport } = parseZPL('^XA^CI50^FH_^FO0,0^A0N,30,0^FDx^FS^XZ', 8);
+    expect(importReport.partial).toContain('^CI50');
+  });
 });
 
 // ── ^FB field block ───────────────────────────────────────────────────────────
diff --git a/src/lib/zplParser.ts b/src/lib/zplParser.ts
index 41f3d49f..1ae97996 100644
--- a/src/lib/zplParser.ts
+++ b/src/lib/zplParser.ts
@@ -90,15 +90,38 @@ function makeObj(
 }
 
 /**
- * Decode ^FH hex escapes: replaces runs of {delimiter}XX with the UTF-8 string
- * for the byte sequence XX XX … . The generator emits ^CI28 (UTF-8), so a single
- * non-ASCII glyph spans multiple escape pairs (e.g. `_C3_A4` → `ä`). Decoding
- * pair-by-pair via fromCharCode would yield mojibake; we collect contiguous
- * pairs into a Uint8Array and run TextDecoder on the whole run. Invalid byte
- * sequences are replaced with U+FFFD by the decoder's default behaviour.
+ * Map a ^CI N parameter to a TextDecoder label. Most labels printed by this
+ * app use ^CI28 (UTF-8); ^CI27 is Windows-1252 (Zebra default for many EU
+ * setups); legacy ^CI0..13 are 7-bit-ASCII-compatible code-page variants for
+ * which Windows-1252 is a safe superset for the purposes of `^FH` decoding.
+ * Unsupported encodings (multi-byte UTF-16/32 variants, code page 850, …)
+ * fall back to UTF-8 with the command surfaced via importReport.partial.
  */
-const fhDecoder = new TextDecoder("utf-8");
-function decodeFH(text: string, delimiter: string): string {
+function ciToEncoding(n: number): { label: string; supported: boolean } {
+  if (n === 28) return { label: "utf-8", supported: true };
+  if (n === 27) return { label: "windows-1252", supported: true };
+  if (n >= 0 && n <= 13) return { label: "windows-1252", supported: true };
+  return { label: "utf-8", supported: false };
+}
+
+const decoderCache = new Map<string, TextDecoder>();
+function getDecoder(label: string): TextDecoder {
+  let dec = decoderCache.get(label);
+  if (!dec) {
+    dec = new TextDecoder(label);
+    decoderCache.set(label, dec);
+  }
+  return dec;
+}
+
+/**
+ * Decode ^FH hex escapes: replaces runs of {delimiter}XX with the string for
+ * the byte sequence XX XX … under the active ^CI encoding. A single non-ASCII
+ * glyph may span multiple escape pairs (e.g. `_C3_A4` → `ä` under UTF-8), so
+ * we collect contiguous pairs into a Uint8Array and run one TextDecoder pass
+ * per run. Invalid byte sequences become U+FFFD (decoder default).
+ */
+function decodeFH(text: string, delimiter: string, decoder: TextDecoder): string {
   const escaped = delimiter.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
   const runRe = new RegExp(`(?:${escaped}[0-9A-Fa-f]{2})+`, "g");
   const stride = delimiter.length + 2;
@@ -107,7 +130,7 @@ function decodeFH(text: string, delimiter: string): string {
     for (let i = 0, b = 0; i < run.length; i += stride, b++) {
       bytes[b] = parseInt(run.slice(i + delimiter.length, i + stride), 16);
     }
-    return fhDecoder.decode(bytes);
+    return decoder.decode(bytes);
   });
 }
 
@@ -256,6 +279,11 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL {
   let fhActive = false;
   let fhDelimiter = "_";
 
+  // ^CI state (character set / encoding for ^FH byte decoding). Default UTF-8
+  // matches our generator output; legacy ZPL using ^CI27 / ^CI0..13 sets a
+  // single-byte decoder before ^FH escapes are processed.
+  let fhDecoder = getDecoder("utf-8");
+
   // ^FT vs ^FO: store position type so we can reproduce exactly in re-export.
   let positionIsFT = false;
 
@@ -299,7 +327,7 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL {
 
   const flushField = () => {
     if (!fieldType || pendingFD === null) return;
-    const content = fhActive ? decodeFH(pendingFD, fhDelimiter) : pendingFD;
+    const content = fhActive ? decodeFH(pendingFD, fhDelimiter, fhDecoder) : pendingFD;
     const posType: "FT" | "FO" = positionIsFT ? "FT" : "FO";
     const comment = takeComment();
 
@@ -1184,9 +1212,17 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL {
     // assembled text reaches the next field object as one multi-line comment.
     FX: appendComment,
 
+    // ^CI N: character set / encoding for ^FH byte decoding. Mapped to a
+    // TextDecoder; unsupported variants (UTF-16/32, code page 850) keep the
+    // current decoder and surface as a partial import.
+    CI: (p) => {
+      const enc = ciToEncoding(int(p[0]));
+      if (enc.supported) fhDecoder = getDecoder(enc.label);
+      else partialCmds.add(`^CI${int(p[0])}`);
+    },
+
     // These commands carry no canvas-design information and are silently
     // discarded so they do not pollute importReport.unknown.
-    CI: noop, // character set encoding (^CI28 = UTF-8 is the browser default)
     FN: noop, // field number — variable data placeholder (template feature)
     FV: noop, // field variable — supplies data for ^FN at print time
     FC: noop, // field clock — inserts date/time (requires printer RTC)

From cbf06a93bc297e3d3b304a4b2c6277f388ac660d Mon Sep 17 00:00:00 2001
From: u8array <alexanderleinich1@gmail.com>
Date: Fri, 8 May 2026 17:46:54 +0200
Subject: [PATCH 4/4] fix(parser): reset ^FH decoder to UTF-8 on unsupported
 ^CI

Previously an unknown ^CI N kept whatever decoder was active before,
contradicting the comment that promised a UTF-8 fallback. If ^CI27
preceded the unknown command, CP1252 stayed silently active.

Always rebind to ciToEncoding's label (which is 'utf-8' for the
unsupported branch) so behaviour matches the documentation and is
predictable regardless of prior state.
---
 src/lib/zplParser.test.ts | 13 ++++++++++++-
 src/lib/zplParser.ts      |  4 ++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/lib/zplParser.test.ts b/src/lib/zplParser.test.ts
index dbdac92d..712bc7c2 100644
--- a/src/lib/zplParser.test.ts
+++ b/src/lib/zplParser.test.ts
@@ -305,10 +305,21 @@ describe('parseZPL — ^FH hex escape', () => {
   });
 
   it('reports unsupported ^CI N as partial import', () => {
-    // ^CI50 is not a real Zebra encoding — falls back to current decoder
+    // ^CI50 is not a real Zebra encoding — falls back to UTF-8 default
     const { importReport } = parseZPL('^XA^CI50^FH_^FO0,0^A0N,30,0^FDx^FS^XZ', 8);
     expect(importReport.partial).toContain('^CI50');
   });
+
+  it('resets decoder to UTF-8 default on unsupported ^CI', () => {
+    // After ^CI27 sets CP1252, an unknown ^CI50 must fall back to UTF-8
+    // (not keep CP1252) so behaviour is predictable.
+    const zpl =
+      '^XA^CI27^FH_^FO0,0^A0N,30,0^FD_E4^FS' +
+      '^CI50^FH_^FO0,50^A0N,30,0^FD_C3_A4^FS^XZ';
+    const { objects } = parseZPL(zpl, 8);
+    expect(props(objects[0]).content).toBe('ä');  // CP1252
+    expect(props(objects[1]).content).toBe('ä');  // UTF-8 (after reset)
+  });
 });
 
 // ── ^FB field block ───────────────────────────────────────────────────────────
diff --git a/src/lib/zplParser.ts b/src/lib/zplParser.ts
index 1ae97996..02a6c1fa 100644
--- a/src/lib/zplParser.ts
+++ b/src/lib/zplParser.ts
@@ -1217,8 +1217,8 @@ export function parseZPL(zpl: string, dpmm = 8): ParsedZPL {
     // current decoder and surface as a partial import.
     CI: (p) => {
       const enc = ciToEncoding(int(p[0]));
-      if (enc.supported) fhDecoder = getDecoder(enc.label);
-      else partialCmds.add(`^CI${int(p[0])}`);
+      fhDecoder = getDecoder(enc.label);
+      if (!enc.supported) partialCmds.add(`^CI${int(p[0])}`);
     },
 
     // These commands carry no canvas-design information and are silently