From 5fe77772d36978f67c3db3a86268310c9cfcb795 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Tue, 17 Feb 2026 11:15:11 -0500
Subject: [PATCH 1/3] handle whatwg encoding standard overrides

---
 compile_flags.txt                           |   1 +
 deps/rust/Cargo.lock                        |   1 +
 deps/rust/cargo.bzl                         |   1 +
 deps/rust/crates/BUILD.bazel                |  12 +
 deps/rust/crates/defs.bzl                   |   2 +
 src/rust/encoding/BUILD.bazel               |  11 +
 src/rust/encoding/lib.rs                    | 150 +++++++++++
 src/workerd/api/BUILD.bazel                 |  13 +-
 src/workerd/api/encoding-legacy.c++         |  68 +++++
 src/workerd/api/encoding-legacy.h           |  51 ++++
 src/workerd/api/encoding-shared.h           |  81 ++++++
 src/workerd/api/encoding.c++                | 166 +++---------
 src/workerd/api/encoding.h                  | 107 +-------
 src/workerd/api/tests/encoding-test.js      | 263 ++++++++++++++++++++
 src/workerd/api/tests/encoding-test.wd-test |   2 +-
 src/workerd/io/compatibility-date.capnp     |   7 +
 src/wpt/BUILD.bazel                         |   5 +-
 src/wpt/encoding-test.ts                    | 234 ++++++++++++++---
 18 files changed, 895 insertions(+), 280 deletions(-)
 create mode 100644 src/rust/encoding/BUILD.bazel
 create mode 100644 src/rust/encoding/lib.rs
 create mode 100644 src/workerd/api/encoding-legacy.c++
 create mode 100644 src/workerd/api/encoding-legacy.h
 create mode 100644 src/workerd/api/encoding-shared.h

diff --git a/compile_flags.txt b/compile_flags.txt
index 68e865fbfa7..a9b5d8088d1 100644
--- a/compile_flags.txt
+++ b/compile_flags.txt
@@ -63,6 +63,7 @@
 -isystembazel-bin/src/rust/jsg/_virtual_includes/lib.rs@cxx
 -isystembazel-bin/src/rust/jsg/_virtual_includes/v8.rs@cxx
 -isystembazel-bin/src/rust/jsg-test/_virtual_includes/ffi-hdrs
+-isystembazel-bin/src/rust/encoding/_virtual_includes/lib.rs@cxx
 -isystembazel-bin/src/rust/jsg-test/_virtual_includes/lib.rs@cxx
 -isystembazel-bin/src/rust/gen-compile-cache/_virtual_includes/cxx-bridge
 -isystembazel-bin/src/rust/gen-compile-cache/_virtual_includes/gen-compile-cache@cxx
diff --git a/deps/rust/Cargo.lock b/deps/rust/Cargo.lock
index efd1e052306..4229c1b1e60 100644
--- a/deps/rust/Cargo.lock
+++ b/deps/rust/Cargo.lock
@@ -451,6 +451,7 @@ dependencies = [
  "clang-ast",
  "clap",
  "codespan-reporting",
+ "encoding_rs",
  "flate2",
  "foldhash",
  "futures",
diff --git a/deps/rust/cargo.bzl b/deps/rust/cargo.bzl
index 904afe40f3c..28e46227e8a 100644
--- a/deps/rust/cargo.bzl
+++ b/deps/rust/cargo.bzl
@@ -17,6 +17,7 @@ PACKAGES = WORKERD_CXX_PACKAGES | {
     "clang-ast": crate.spec(version = "0"),
     "clap": crate.spec(version = "4", default_features = False, features = ["derive", "std", "help"]),
     "codespan-reporting": crate.spec(version = "0"),
+    "encoding_rs": crate.spec(version = "0"),
     "flate2": crate.spec(version = "1"),
     "futures": crate.spec(version = "0"),
     "lol_html_c_api": crate.spec(git = "https://github.com/cloudflare/lol-html", tag = "v2.7.1"),
diff --git a/deps/rust/crates/BUILD.bazel b/deps/rust/crates/BUILD.bazel
index 6836d2e47e7..71c72b11088 100644
--- a/deps/rust/crates/BUILD.bazel
+++ b/deps/rust/crates/BUILD.bazel
@@ -151,6 +151,18 @@ alias(
     tags = ["manual"],
 )
 
+alias(
+    name = "encoding_rs-0.8.35",
+    actual = "@crates_vendor__encoding_rs-0.8.35//:encoding_rs",
+    tags = ["manual"],
+)
+
+alias(
+    name = "encoding_rs",
+    actual = "@crates_vendor__encoding_rs-0.8.35//:encoding_rs",
+    tags = ["manual"],
+)
+
 alias(
     name = "flate2-1.1.9",
     actual = "@crates_vendor__flate2-1.1.9//:flate2",
diff --git a/deps/rust/crates/defs.bzl b/deps/rust/crates/defs.bzl
index 4c647a91c27..a6cd505691c 100644
--- a/deps/rust/crates/defs.bzl
+++ b/deps/rust/crates/defs.bzl
@@ -305,6 +305,7 @@ _NORMAL_DEPENDENCIES = {
             "clang-ast": Label("@crates_vendor//:clang-ast-0.1.35"),
             "clap": Label("@crates_vendor//:clap-4.5.58"),
             "codespan-reporting": Label("@crates_vendor//:codespan-reporting-0.13.1"),
+            "encoding_rs": Label("@crates_vendor//:encoding_rs-0.8.35"),
             "flate2": Label("@crates_vendor//:flate2-1.1.9"),
             "foldhash": Label("@crates_vendor//:foldhash-0.2.0"),
             "futures": Label("@crates_vendor//:futures-0.3.31"),
@@ -2957,6 +2958,7 @@ def crate_repositories():
         struct(repo = "crates_vendor__clang-ast-0.1.35", is_dev_dep = False),
         struct(repo = "crates_vendor__clap-4.5.58", is_dev_dep = False),
         struct(repo = "crates_vendor__codespan-reporting-0.13.1", is_dev_dep = False),
+        struct(repo = "crates_vendor__encoding_rs-0.8.35", is_dev_dep = False),
         struct(repo = "crates_vendor__flate2-1.1.9", is_dev_dep = False),
         struct(repo = "crates_vendor__foldhash-0.2.0", is_dev_dep = False),
         struct(repo = "crates_vendor__futures-0.3.31", is_dev_dep = False),
diff --git a/src/rust/encoding/BUILD.bazel b/src/rust/encoding/BUILD.bazel
new file mode 100644
index 00000000000..0c332064b7b
--- /dev/null
+++ b/src/rust/encoding/BUILD.bazel
@@ -0,0 +1,11 @@
+load("//:build/wd_rust_crate.bzl", "wd_rust_crate")
+
+wd_rust_crate(
+    name = "encoding",
+    cxx_bridge_src = "lib.rs",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//src/rust/cxx-integration",
+        "@crates_vendor//:encoding_rs",
+    ],
+)
diff --git a/src/rust/encoding/lib.rs b/src/rust/encoding/lib.rs
new file mode 100644
index 00000000000..b6b112156ff
--- /dev/null
+++ b/src/rust/encoding/lib.rs
@@ -0,0 +1,150 @@
+// Copyright (c) 2017-2022 Cloudflare, Inc.
+// Licensed under the Apache 2.0 license found in the LICENSE file or at:
+//     https://opensource.org/licenses/Apache-2.0
+
+//! WHATWG Encoding Standard legacy decoders via `encoding_rs`.
+//!
+//! Exposes a streaming decoder to C++ via CXX bridge. All legacy encodings
+//! (CJK multi-byte, single-byte windows-1252, and x-user-defined) are handled
+//! by a single opaque `Decoder` type backed by `encoding_rs::Decoder`.
+
+#[cxx::bridge(namespace = "workerd::rust::encoding")]
+mod ffi {
+    /// Legacy encoding types supported by the Rust decoder.
+    /// Shared between C++ and Rust.
+    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+    #[repr(u16)]
+    enum Encoding {
+        Big5,
+        EucJp,
+        EucKr,
+        Gb18030,
+        Gbk,
+        Iso2022Jp,
+        ShiftJis,
+        Windows1252,
+        XUserDefined,
+    }
+
+    /// Result of a decode operation.
+    struct DecodeResult {
+        /// UTF-16 output.
+        output: Vec<u16>,
+        /// True if a fatal decoding error was encountered. Only meaningful
+        /// when the caller requested fatal mode — in replacement mode errors
+        /// are silently replaced with U+FFFD and this flag is not set.
+        had_error: bool,
+    }
+
+    extern "Rust" {
+        type Decoder;
+
+        /// Create a new streaming decoder for the given encoding.
+        // CXX bridge requires Box for opaque types.
+        #[expect(clippy::unnecessary_box_returns)]
+        fn new_decoder(encoding: Encoding) -> Box<Decoder>;
+
+        /// Decode a chunk of bytes. Set `flush` to true on the final chunk.
+        /// When `fatal` is true and an error is encountered, `had_error` is
+        /// set and the output may be incomplete.
+        fn decode(decoder: &mut Decoder, input: &[u8], flush: bool, fatal: bool) -> DecodeResult;
+
+        /// Reset the decoder to its initial state.
+        fn reset(decoder: &mut Decoder);
+    }
+}
+
+/// Opaque decoder state exposed to C++ via `Box<Decoder>`.
+pub struct Decoder {
+    encoding: &'static encoding_rs::Encoding,
+    inner: encoding_rs::Decoder,
+}
+
+/// Map a CXX-shared `Encoding` variant to the corresponding
+/// `encoding_rs` static.
+fn to_encoding(enc: ffi::Encoding) -> &'static encoding_rs::Encoding {
+    match enc {
+        ffi::Encoding::Big5 => encoding_rs::BIG5,
+        ffi::Encoding::EucJp => encoding_rs::EUC_JP,
+        ffi::Encoding::EucKr => encoding_rs::EUC_KR,
+        ffi::Encoding::Gb18030 => encoding_rs::GB18030,
+        ffi::Encoding::Gbk => encoding_rs::GBK,
+        ffi::Encoding::Iso2022Jp => encoding_rs::ISO_2022_JP,
+        ffi::Encoding::ShiftJis => encoding_rs::SHIFT_JIS,
+        ffi::Encoding::Windows1252 => encoding_rs::WINDOWS_1252,
+        ffi::Encoding::XUserDefined => encoding_rs::X_USER_DEFINED,
+        _ => unreachable!(),
+    }
+}
+
+pub fn new_decoder(encoding: ffi::Encoding) -> Box<Decoder> {
+    let encoding = to_encoding(encoding);
+    Box::new(Decoder {
+        inner: encoding.new_decoder_without_bom_handling(),
+        encoding,
+    })
+}
+
+pub fn decode(state: &mut Decoder, input: &[u8], flush: bool, fatal: bool) -> ffi::DecodeResult {
+    let max_len = state
+        .inner
+        .max_utf16_buffer_length(input.len())
+        .unwrap_or(input.len() + 4);
+    let mut output = vec![0u16; max_len];
+    let mut total_read = 0usize;
+    let mut total_written = 0usize;
+
+    if fatal {
+        loop {
+            let (result, read, written) = state.inner.decode_to_utf16_without_replacement(
+                &input[total_read..],
+                &mut output[total_written..],
+                flush,
+            );
+            total_read += read;
+            total_written += written;
+
+            match result {
+                encoding_rs::DecoderResult::InputEmpty => break,
+                encoding_rs::DecoderResult::OutputFull => {
+                    output.resize(output.len() * 2, 0);
+                }
+                encoding_rs::DecoderResult::Malformed(_, _) => {
+                    state.inner = state.encoding.new_decoder_without_bom_handling();
+                    output.truncate(total_written);
+                    return ffi::DecodeResult {
+                        output,
+                        had_error: true,
+                    };
+                }
+            }
+        }
+    } else {
+        loop {
+            let (result, read, written, _had_errors) = state.inner.decode_to_utf16(
+                &input[total_read..],
+                &mut output[total_written..],
+                flush,
+            );
+            total_read += read;
+            total_written += written;
+
+            match result {
+                encoding_rs::CoderResult::InputEmpty => break,
+                encoding_rs::CoderResult::OutputFull => {
+                    output.resize(output.len() * 2, 0);
+                }
+            }
+        }
+    }
+
+    output.truncate(total_written);
+    ffi::DecodeResult {
+        output,
+        had_error: false,
+    }
+}
+
+pub fn reset(state: &mut Decoder) {
+    state.inner = state.encoding.new_decoder_without_bom_handling();
+}
diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel
index bd3b81e18e7..e47e0426e05 100644
--- a/src/workerd/api/BUILD.bazel
+++ b/src/workerd/api/BUILD.bazel
@@ -430,8 +430,15 @@ wd_cc_library(
 
 wd_cc_library(
     name = "encoding",
-    srcs = ["encoding.c++"],
-    hdrs = ["encoding.h"],
+    srcs = [
+        "encoding.c++",
+        "encoding-legacy.c++",
+    ],
+    hdrs = [
+        "encoding.h",
+        "encoding-legacy.h",
+        "encoding-shared.h",
+    ],
     implementation_deps = [
         "//src/workerd/io:features",
         "//src/workerd/util:strings",
@@ -439,10 +446,12 @@ wd_cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":util",
+        "//src/rust/encoding",
         "//src/workerd/io:compatibility-date_capnp",
         "//src/workerd/jsg",
         "@capnp-cpp//src/kj",
         "@simdutf",
+        "@workerd-cxx//kj-rs",
     ],
 )
 
diff --git a/src/workerd/api/encoding-legacy.c++ b/src/workerd/api/encoding-legacy.c++
new file mode 100644
index 00000000000..378afec91b2
--- /dev/null
+++ b/src/workerd/api/encoding-legacy.c++
@@ -0,0 +1,68 @@
+// Copyright (c) 2017-2022 Cloudflare, Inc.
+// Licensed under the Apache 2.0 license found in the LICENSE file or at:
+//     https://opensource.org/licenses/Apache-2.0
+
+#include "encoding-legacy.h"
+
+#include <kj-rs/convert.h>
+#include <rust/cxx.h>
+
+#include <kj/common.h>
+
+namespace workerd::api {
+
+namespace {
+
+// Map workerd::api::Encoding to the Rust-side RustEncoding enum.
+::workerd::rust::encoding::Encoding toRustEncoding(Encoding encoding) {
+  using RE = ::workerd::rust::encoding::Encoding;
+  switch (encoding) {
+    case Encoding::Big5:
+      return RE::Big5;
+    case Encoding::Euc_Jp:
+      return RE::EucJp;
+    case Encoding::Euc_Kr:
+      return RE::EucKr;
+    case Encoding::Gb18030:
+      return RE::Gb18030;
+    case Encoding::Gbk:
+      return RE::Gbk;
+    case Encoding::Iso2022_Jp:
+      return RE::Iso2022Jp;
+    case Encoding::Shift_Jis:
+      return RE::ShiftJis;
+    case Encoding::Windows_1252:
+      return RE::Windows1252;
+    case Encoding::X_User_Defined:
+      return RE::XUserDefined;
+    default:
+      KJ_UNREACHABLE;
+  }
+}
+
+}  // namespace
+
+LegacyDecoder::LegacyDecoder(Encoding encoding, DecoderFatal fatal)
+    : encoding(encoding),
+      fatal(fatal),
+      state(::workerd::rust::encoding::new_decoder(toRustEncoding(encoding))) {}
+
+void LegacyDecoder::reset() {
+  ::workerd::rust::encoding::reset(*state);
+}
+
+kj::Maybe<jsg::JsString> LegacyDecoder::decode(
+    jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush) {
+  auto result = ::workerd::rust::encoding::decode(
+      *state, buffer.as<kj_rs::RustMutable>(), flush, fatal.toBool());
+
+  if (fatal.toBool() && result.had_error) {
+    // Decoder state already reset by the Rust side on fatal error.
+    return kj::none;
+  }
+
+  auto output = kj::from<kj_rs::Rust>(result.output);
+  return js.str(output);
+}
+
+}  // namespace workerd::api
diff --git a/src/workerd/api/encoding-legacy.h b/src/workerd/api/encoding-legacy.h
new file mode 100644
index 00000000000..9e91e8d275e
--- /dev/null
+++ b/src/workerd/api/encoding-legacy.h
@@ -0,0 +1,51 @@
+// Copyright (c) 2017-2022 Cloudflare, Inc.
+// Licensed under the Apache 2.0 license found in the LICENSE file or at:
+//     https://opensource.org/licenses/Apache-2.0
+
+// WHATWG-compliant legacy decoders (CJK multi-byte, windows-1252,
+// x-user-defined) implemented via the encoding_rs Rust crate through
+// a CXX bridge. A single LegacyDecoder class wraps an opaque Rust-side
+// decoder that handles all the encoding-specific state machines.
+
+#pragma once
+
+#include "encoding-shared.h"
+
+#include <workerd/rust/encoding/lib.rs.h>
+
+#include <rust/cxx.h>
+
+#include <kj/common.h>
+
+namespace workerd::api {
+
+// Unified legacy decoder using encoding_rs via Rust CXX bridge.
+// encoding_rs implements the full WHATWG decoder algorithms for all
+// legacy encodings, including streaming, error recovery, and ASCII
+// byte pushback.
+//
+// According to WHATWG spec, any encoding except UTF-8 and UTF-16 is considered legacy.
+class LegacyDecoder final: public Decoder {
+ public:
+  LegacyDecoder(Encoding encoding, DecoderFatal fatal);
+  ~LegacyDecoder() noexcept = default;
+  LegacyDecoder(LegacyDecoder&&) noexcept = default;
+  LegacyDecoder& operator=(LegacyDecoder&&) noexcept = default;
+  KJ_DISALLOW_COPY(LegacyDecoder);
+
+  Encoding getEncoding() override {
+    return encoding;
+  }
+
+  kj::Maybe<jsg::JsString> decode(
+      jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush = false) override;
+
+  void reset() override;
+
+ private:
+  Encoding encoding;
+  DecoderFatal fatal;
+  ::rust::Box<::workerd::rust::encoding::Decoder> state;
+};
+
+}  // namespace workerd::api
diff --git a/src/workerd/api/encoding-shared.h b/src/workerd/api/encoding-shared.h
new file mode 100644
index 00000000000..77aa8cfd702
--- /dev/null
+++ b/src/workerd/api/encoding-shared.h
@@ -0,0 +1,81 @@
+// Copyright (c) 2017-2022 Cloudflare, Inc.
+// Licensed under the Apache 2.0 license found in the LICENSE file or at:
+//     https://opensource.org/licenses/Apache-2.0
+
+// Shared types used by encoding.h and encoding-legacy.h.
+// Extracted to break circular dependencies between the two headers.
+
+#pragma once
+
+#include <workerd/jsg/jsg.h>
+#include <workerd/util/strong-bool.h>
+
+namespace workerd::api {
+
+WD_STRONG_BOOL(DecoderFatal);
+WD_STRONG_BOOL(DecoderIgnoreBom);
+
+// The encodings listed here are defined as required by the Encoding spec.
+// The first label is enum we use to identify the encoding in code, while
+// the second label is the public identifier.
+#define EW_ENCODINGS(V)                                                                            \
+  V(Utf8, "utf-8")                                                                                 \
+  V(Ibm866, "ibm866")                                                                              \
+  V(Iso8859_2, "iso-8859-2")                                                                       \
+  V(Iso8859_3, "iso-8859-3")                                                                       \
+  V(Iso8859_4, "iso-8859-4")                                                                       \
+  V(Iso8859_5, "iso-8859-5")                                                                       \
+  V(Iso8859_6, "iso-8859-6")                                                                       \
+  V(Iso8859_7, "iso-8859-7")                                                                       \
+  V(Iso8859_8, "iso-8859-8")                                                                       \
+  V(Iso8859_8i, "iso-8859-8-i")                                                                    \
+  V(Iso8859_10, "iso-8859-10")                                                                     \
+  V(Iso8859_13, "iso-8859-13")                                                                     \
+  V(Iso8859_14, "iso-8859-14")                                                                     \
+  V(Iso8859_15, "iso-8859-15")                                                                     \
+  V(Iso8859_16, "iso-8859-16")                                                                     \
+  V(Ko18_r, "koi8-r")                                                                              \
+  V(Koi8_u, "koi8-u")                                                                              \
+  V(Macintosh, "macintosh")                                                                        \
+  V(Windows_874, "windows-874")                                                                    \
+  V(Windows_1250, "windows-1250")                                                                  \
+  V(Windows_1251, "windows-1251")                                                                  \
+  V(Windows_1252, "windows-1252")                                                                  \
+  V(Windows_1253, "windows-1253")                                                                  \
+  V(Windows_1254, "windows-1254")                                                                  \
+  V(Windows_1255, "windows-1255")                                                                  \
+  V(Windows_1256, "windows-1256")                                                                  \
+  V(Windows_1257, "windows-1257")                                                                  \
+  V(Windows_1258, "windows-1258")                                                                  \
+  V(X_Mac_Cyrillic, "x-mac-cyrillic")                                                              \
+  V(Gbk, "gbk")                                                                                    \
+  V(Gb18030, "gb18030")                                                                            \
+  V(Big5, "big5")                                                                                  \
+  V(Euc_Jp, "euc-jp")                                                                              \
+  V(Iso2022_Jp, "iso-2022-jp")                                                                     \
+  V(Shift_Jis, "shift_jis")                                                                        \
+  V(Euc_Kr, "euc-kr")                                                                              \
+  V(Replacement, "replacement")                                                                    \
+  V(Utf16be, "utf-16be")                                                                           \
+  V(Utf16le, "utf-16le")                                                                           \
+  V(X_User_Defined, "x-user-defined")
+
+enum class Encoding {
+  INVALID,
+#define V(name, _) name,
+  EW_ENCODINGS(V)
+#undef V
+};
+
+// A Decoder provides the underlying implementation of a TextDecoder.
+class Decoder {
+ public:
+  virtual ~Decoder() noexcept(true) {}
+  virtual Encoding getEncoding() = 0;
+  virtual kj::Maybe<jsg::JsString> decode(
+      jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush = false) = 0;
+
+  virtual void reset() {}
+};
+
+}  // namespace workerd::api
diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index f44b1215fb3..e4590818a66 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -14,7 +14,7 @@
 #include <unicode/ucnv.h>
 #include <unicode/utf8.h>
 
-#include <algorithm>
+#include <kj/array.h>
 
 namespace workerd::api {
 
@@ -303,6 +303,7 @@ kj::Maybe<IcuDecoder> IcuDecoder::create(Encoding encoding, bool fatal, bool ign
 kj::Maybe<jsg::JsString> IcuDecoder::decode(
     jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush) {
   UErrorCode status = U_ZERO_ERROR;
+  kj::Maybe<kj::Array<kj::byte>> merged;
   const auto maxCharSize = [this]() { return ucnv_getMaxCharSize(inner.get()); };
 
   const auto isUnicode = [this]() {
@@ -318,8 +319,6 @@ kj::Maybe<jsg::JsString> IcuDecoder::decode(
     KJ_UNREACHABLE;
   };
 
-  const auto isUsAscii = [](const auto& b) { return b <= 0x7f; };
-
   KJ_DEFER({
     if (flush) reset();
   });
@@ -329,7 +328,8 @@ kj::Maybe<jsg::JsString> IcuDecoder::decode(
   // conversions are being handled by v8 directly rather than by the ICU converter).
   if (buffer.size() > 0 && ucnv_toUCountPending(inner.get(), &status) == 0) {
     KJ_ASSERT(U_SUCCESS(status));
-    if (encoding == Encoding::Utf8 && std::all_of(buffer.begin(), buffer.end(), isUsAscii)) {
+    if (encoding == Encoding::Utf8 &&
+        simdutf::validate_ascii(buffer.asChars().begin(), buffer.size())) {
       // This is a fast-path option for UTF-8 that can be taken when there
       // are no buffered inputs and the non-empty input buffer contains only
       // codepoints <= 0x7f. This path is safe because with ASCII range codepoints
@@ -391,7 +391,7 @@ kj::Maybe<jsg::JsString> IcuDecoder::decode(
   status = U_ZERO_ERROR;
   auto limit = 2 * maxCharSize() *
       (!flush ? buffer.size()
-              : std::max(buffer.size(),
+              : kj::max(buffer.size(),
                     static_cast<size_t>(ucnv_toUCountPending(inner.get(), &status))));
 
   KJ_STACK_ARRAY(UChar, result, limit, 512, 4096);
@@ -414,121 +414,6 @@ kj::Maybe<jsg::JsString> IcuDecoder::decode(
   return js.str(result.slice(omitInitialBom ? 1 : 0, length));
 }
 
-// Full 256-entry windows-1252 byte-to-Unicode lookup table.
-// For most entries table[i] == i (identity mapping). Bytes 0x80-0x9F
-// differ from Latin-1 and map to their correct windows-1252 code points.
-// Undefined bytes (0x81, 0x8D, 0x8F, 0x90, 0x9D) map to 0x0000 as a sentinel.
-// See: https://encoding.spec.whatwg.org/index-windows-1252.txt
-// clang-format off
-static constexpr uint16_t WIN1252_TABLE[256] = {
-    // 0x00-0x0F
-    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
-    0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
-    // 0x10-0x1F
-    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
-    0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
-    // 0x20-0x2F
-    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
-    0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
-    // 0x30-0x3F
-    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
-    0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
-    // 0x40-0x4F
-    0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
-    0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
-    // 0x50-0x5F
-    0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
-    0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
-    // 0x60-0x6F
-    0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
-    0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
-    // 0x70-0x7F
-    0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
-    0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
-    // 0x80-0x8F — windows-1252 diverges from Latin-1 here
-    0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
-    0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
-    // 0x90-0x9F
-    0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
-    0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178,
-    // 0xA0-0xAF
-    0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
-    0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
-    // 0xB0-0xBF
-    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
-    0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
-    // 0xC0-0xCF
-    0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
-    0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
-    // 0xD0-0xDF
-    0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
-    0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
-    // 0xE0-0xEF
-    0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
-    0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
-    // 0xF0-0xFF
-    0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
-    0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF,
-};
-// clang-format on
-
-kj::Maybe<jsg::JsString> AsciiDecoder::decode(
-    jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush) {
-  // Single branchless scan: accumulate whether any byte maps to a
-  // different code point than its raw value. For bytes outside 0x80-0x9F
-  // the table is identity so the XOR is zero and contributes nothing.
-  uint16_t diff = 0;
-  for (auto byte: buffer) {
-    diff |= WIN1252_TABLE[byte] ^ byte;
-  }
-
-  if (diff == 0) {
-    // Fast path: all bytes mapped to their own value (pure ASCII or
-    // 0xA0-0xFF range), so Latin-1 identity decoding is correct.
-    return js.str(buffer);
-  }
-
-  // Slow path: at least one byte in 0x80-0x9F needs remapping.
-  // Since some windows-1252 code points are > 0xFF we use uint16_t.
-  auto result = kj::heapArray<uint16_t>(buffer.size());
-  for (size_t i = 0; i < buffer.size(); i++) {
-    result[i] = WIN1252_TABLE[buffer[i]];
-  }
-
-  return js.str(result.asPtr());
-}
-
-kj::Maybe<jsg::JsString> XUserDefinedDecoder::decode(
-    jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush) {
-  // x-user-defined encoding per WHATWG spec:
-  // https://encoding.spec.whatwg.org/#x-user-defined-decoder
-  // - 0x00-0x7F: code point = byte (ASCII identity)
-  // - 0x80-0xFF: code point = 0xF780 + (byte - 0x80) = 0xF700 + byte
-
-  // Check if we have any high bytes that need remapping
-  bool hasHighBytes =
-      !simdutf::validate_ascii(reinterpret_cast<const char*>(buffer.begin()), buffer.size());
-
-  if (!hasHighBytes) {
-    // Fast path: all ASCII bytes, identity mapping
-    return js.str(buffer);
-  }
-
-  // Slow path: at least one byte >= 0x80, need uint16_t for PUA mapping
-  auto result = kj::heapArray<uint16_t>(buffer.size());
-  for (size_t i = 0; i < buffer.size(); i++) {
-    auto byte = buffer[i];
-    if (byte < 0x80) {
-      result[i] = byte;
-    } else {
-      // Map 0x80-0xFF to U+F780-U+F7FF (Private Use Area)
-      result[i] = 0xF700 + byte;
-    }
-  }
-
-  return js.str(result.asPtr());
-}
-
 void IcuDecoder::reset() {
   bomSeen = false;
   return ucnv_reset(inner.get());
@@ -536,15 +421,12 @@ void IcuDecoder::reset() {
 
 Decoder& TextDecoder::getImpl() {
   KJ_SWITCH_ONEOF(decoder) {
-    KJ_CASE_ONEOF(dec, AsciiDecoder) {
+    KJ_CASE_ONEOF(dec, LegacyDecoder) {
       return dec;
     }
     KJ_CASE_ONEOF(dec, IcuDecoder) {
       return dec;
     }
-    KJ_CASE_ONEOF(dec, XUserDefinedDecoder) {
-      return dec;
-    }
   }
   KJ_UNREACHABLE;
 }
@@ -566,12 +448,19 @@ jsg::Ref<TextDecoder> TextDecoder::constructor(jsg::Lock& js,
         errorMessage(label));
   }
 
-  if (encoding == Encoding::Windows_1252) {
-    return js.alloc<TextDecoder>(AsciiDecoder(), options);
-  }
-
-  if (encoding == Encoding::X_User_Defined) {
-    return js.alloc<TextDecoder>(XUserDefinedDecoder(), options);
+  switch (encoding) {
+    case Encoding::Big5:
+    case Encoding::Euc_Jp:
+    case Encoding::Euc_Kr:
+    case Encoding::Gb18030:
+    case Encoding::Gbk:
+    case Encoding::Iso2022_Jp:
+    case Encoding::Shift_Jis:
+    case Encoding::Windows_1252:
+    case Encoding::X_User_Defined:
+      return js.alloc<TextDecoder>(LegacyDecoder(encoding, DecoderFatal(options.fatal)), options);
+    default:
+      break;
   }
 
   return js.alloc<TextDecoder>(
@@ -588,23 +477,28 @@ jsg::JsString TextDecoder::decode(jsg::Lock& js,
     jsg::Optional<kj::Array<const kj::byte>> maybeInput,
     jsg::Optional<DecodeOptions> maybeOptions) {
   auto options = maybeOptions.orDefault(DEFAULT_OPTIONS);
+  // Per spec, omitting input is end-of-queue, so we must flush pending bytes.
+  const auto flush = maybeInput == kj::none || !options.stream;
   auto& input = maybeInput.orDefault(EMPTY);
-  return JSG_REQUIRE_NONNULL(
-      getImpl().decode(js, input, !options.stream), TypeError, "Failed to decode input.");
+  auto result =
+      JSG_REQUIRE_NONNULL(getImpl().decode(js, input, flush), TypeError, "Failed to decode input.");
+  // Per WHATWG spec, when flush is set the decoder is reset to a new instance
+  // so subsequent calls start with clean state.
+  if (flush) {
+    getImpl().reset();
+  }
+  return kj::mv(result);
 }
 
 kj::Maybe<jsg::JsString> TextDecoder::decodePtr(
     jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush) {
   KJ_SWITCH_ONEOF(decoder) {
-    KJ_CASE_ONEOF(dec, AsciiDecoder) {
+    KJ_CASE_ONEOF(dec, LegacyDecoder) {
       return dec.decode(js, buffer, flush);
     }
     KJ_CASE_ONEOF(dec, IcuDecoder) {
       return dec.decode(js, buffer, flush);
     }
-    KJ_CASE_ONEOF(dec, XUserDefinedDecoder) {
-      return dec.decode(js, buffer, flush);
-    }
   }
   KJ_UNREACHABLE;
 }
diff --git a/src/workerd/api/encoding.h b/src/workerd/api/encoding.h
index 37720124a6f..adaae247e89 100644
--- a/src/workerd/api/encoding.h
+++ b/src/workerd/api/encoding.h
@@ -4,6 +4,9 @@
 
 #pragma once
 
+#include "encoding-legacy.h"
+#include "encoding-shared.h"
+
 #include <workerd/io/compatibility-date.capnp.h>
 #include <workerd/jsg/jsg.h>
 
@@ -11,108 +14,6 @@
 
 namespace workerd::api {
 
-// The encodings listed here are defined as required by the Encoding spec.
-// The first label is enum we use to identify the encoding in code, while
-// the second label is the public identifier.
-#define EW_ENCODINGS(V)                                                                            \
-  V(Utf8, "utf-8")                                                                                 \
-  V(Ibm866, "ibm866")                                                                              \
-  V(Iso8859_2, "iso-8859-2")                                                                       \
-  V(Iso8859_3, "iso-8859-3")                                                                       \
-  V(Iso8859_4, "iso-8859-4")                                                                       \
-  V(Iso8859_5, "iso-8859-5")                                                                       \
-  V(Iso8859_6, "iso-8859-6")                                                                       \
-  V(Iso8859_7, "iso-8859-7")                                                                       \
-  V(Iso8859_8, "iso-8859-8")                                                                       \
-  V(Iso8859_8i, "iso-8859-8-i")                                                                    \
-  V(Iso8859_10, "iso-8859-10")                                                                     \
-  V(Iso8859_13, "iso-8859-13")                                                                     \
-  V(Iso8859_14, "iso-8859-14")                                                                     \
-  V(Iso8859_15, "iso-8859-15")                                                                     \
-  V(Iso8859_16, "iso-8859-16")                                                                     \
-  V(Ko18_r, "koi8-r")                                                                              \
-  V(Koi8_u, "koi8-u")                                                                              \
-  V(Macintosh, "macintosh")                                                                        \
-  V(Windows_874, "windows-874")                                                                    \
-  V(Windows_1250, "windows-1250")                                                                  \
-  V(Windows_1251, "windows-1251")                                                                  \
-  V(Windows_1252, "windows-1252")                                                                  \
-  V(Windows_1253, "windows-1253")                                                                  \
-  V(Windows_1254, "windows-1254")                                                                  \
-  V(Windows_1255, "windows-1255")                                                                  \
-  V(Windows_1256, "windows-1256")                                                                  \
-  V(Windows_1257, "windows-1257")                                                                  \
-  V(Windows_1258, "windows-1258")                                                                  \
-  V(X_Mac_Cyrillic, "x-mac-cyrillic")                                                              \
-  V(Gbk, "gbk")                                                                                    \
-  V(Gb18030, "gb18030")                                                                            \
-  V(Big5, "big5")                                                                                  \
-  V(Euc_Jp, "euc-jp")                                                                              \
-  V(Iso2022_Jp, "iso-2022-jp")                                                                     \
-  V(Shift_Jis, "shift_jis")                                                                        \
-  V(Euc_Kr, "euc-kr")                                                                              \
-  V(Replacement, "replacement")                                                                    \
-  V(Utf16be, "utf-16be")                                                                           \
-  V(Utf16le, "utf-16le")                                                                           \
-  V(X_User_Defined, "x-user-defined")
-
-enum class Encoding {
-  INVALID,
-#define V(name, _) name,
-  EW_ENCODINGS(V)
-#undef V
-};
-
-// A Decoder provides the underlying implementation of a TextDecoder.
-class Decoder {
- public:
-  virtual ~Decoder() noexcept(true) {}
-  virtual Encoding getEncoding() = 0;
-  virtual kj::Maybe<jsg::JsString> decode(
-      jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush = false) = 0;
-
-  virtual void reset() {}
-};
-
-// Decoder implementation that provides a fast-track for windows-1252.
-// When the input contains only bytes <= 0x7F or >= 0xA0, these are
-// identical between Latin-1 and windows-1252 so we can use V8's
-// efficient NewFromOneByte. For bytes in 0x80-0x9F, we remap them
-// to the correct windows-1252 code points using NewFromTwoByte.
-class AsciiDecoder final: public Decoder {
- public:
-  AsciiDecoder() = default;
-  AsciiDecoder(AsciiDecoder&&) = default;
-  AsciiDecoder& operator=(AsciiDecoder&&) = default;
-  KJ_DISALLOW_COPY(AsciiDecoder);
-
-  Encoding getEncoding() override {
-    return Encoding::Windows_1252;
-  }
-
-  kj::Maybe<jsg::JsString> decode(
-      jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush = false) override;
-};
-
-// Decoder implementation for x-user-defined encoding.
-// Per WHATWG spec (https://encoding.spec.whatwg.org/#x-user-defined-decoder):
-// - Bytes 0x00-0x7F map to themselves (ASCII identity)
-// - Bytes 0x80-0xFF map to U+F780 + (byte - 0x80) = U+F700 + byte
-class XUserDefinedDecoder final: public Decoder {
- public:
-  XUserDefinedDecoder() = default;
-  XUserDefinedDecoder(XUserDefinedDecoder&&) = default;
-  XUserDefinedDecoder& operator=(XUserDefinedDecoder&&) = default;
-  KJ_DISALLOW_COPY(XUserDefinedDecoder);
-
-  Encoding getEncoding() override {
-    return Encoding::X_User_Defined;
-  }
-
-  kj::Maybe<jsg::JsString> decode(
-      jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush = false) override;
-};
-
 // Decoder implementation that uses ICU's built-in conversion APIs.
 // ICU's decoder is fairly comprehensive, covering the full range
 // of encodings required by the Encoding specification.
@@ -157,7 +58,7 @@ class IcuDecoder final: public Decoder {
 // https://encoding.spec.whatwg.org/#interface-textdecoder
 class TextDecoder final: public jsg::Object {
  public:
-  using DecoderImpl = kj::OneOf<AsciiDecoder, IcuDecoder, XUserDefinedDecoder>;
+  using DecoderImpl = kj::OneOf<LegacyDecoder, IcuDecoder>;
 
   struct ConstructorOptions {
     bool fatal = false;
diff --git a/src/workerd/api/tests/encoding-test.js b/src/workerd/api/tests/encoding-test.js
index bcbff6f199d..c5940d429e6 100644
--- a/src/workerd/api/tests/encoding-test.js
+++ b/src/workerd/api/tests/encoding-test.js
@@ -15,6 +15,8 @@ function decodeStreaming(decoder, input) {
   return x;
 }
 
+const u = (...args) => Uint8Array.of(...args);
+
 // From https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
 const windows1252Labels = [
   'ansi_x3.4-1968',
@@ -764,6 +766,161 @@ export const gbkDecoderIsGb18030Decoder = {
   },
 };
 
+const gbVersionAndRangesTest = (encoding) => {
+  const loose = new TextDecoder(encoding);
+  const checkAll = (...list) => list.forEach((x) => check(...x));
+  const check = (bytes, str, invalid = false) => {
+    const fatal = new TextDecoder(encoding, { fatal: true });
+    const u8 = Uint8Array.from(bytes);
+    strictEqual(loose.decode(u8), str);
+    if (!invalid) strictEqual(fatal.decode(u8), str);
+    if (invalid) throws(() => fatal.decode(u8));
+  };
+
+  check([0x84, 0x31, 0xa4, 0x36], '\uFFFC');
+  check([0x84, 0x31, 0xa4, 0x37], '\uFFFD');
+  check([0x84, 0x31, 0xa4, 0x38], '\uFFFE');
+  check([0x84, 0x31, 0xa4, 0x39], '\uFFFF');
+  check([0x84, 0x31, 0xa5, 0x30], '\uFFFD', true);
+  check([0x8f, 0x39, 0xfe, 0x39], '\uFFFD', true);
+  check([0x90, 0x30, 0x81, 0x30], String.fromCodePoint(0x1_00_00));
+  check([0x90, 0x30, 0x81, 0x31], String.fromCodePoint(0x1_00_01));
+
+  check([0xe3, 0x32, 0x9a, 0x35], String.fromCodePoint(0x10_ff_ff));
+  check([0xe3, 0x32, 0x9a, 0x36], '\uFFFD', true);
+  check([0xe3, 0x32, 0x9a, 0x37], '\uFFFD', true);
+
+  check([0xfe, 0x39, 0xfe, 0x39], '\uFFFD', true);
+  check([0xff, 0x39, 0xfe, 0x39], '\uFFFD9\uFFFD', true);
+  check([0xfe, 0x40, 0xfe, 0x39], '\uFA0C\uFFFD', true);
+  check([0xfe, 0x39, 0xff, 0x39], '\uFFFD9\uFFFD9', true);
+  check([0xfe, 0x39, 0xfe, 0x40], '\uFFFD9\uFA0C', true);
+
+  checkAll(
+    [[0xa8, 0xbb], '\u0251'],
+    [[0xa8, 0xbc], '\u1E3F'],
+    [[0xa8, 0xbd], '\u0144']
+  );
+  check([0x81, 0x35, 0xf4, 0x36], '\u1E3E');
+  check([0x81, 0x35, 0xf4, 0x37], '\uE7C7');
+  check([0x81, 0x35, 0xf4, 0x38], '\u1E40');
+
+  checkAll(
+    [[0xa6, 0xd9], '\uFE10'],
+    [[0xa6, 0xed], '\uFE18'],
+    [[0xa6, 0xf3], '\uFE19']
+  );
+  checkAll([[0xfe, 0x59], '\u9FB4'], [[0xfe, 0xa0], '\u9FBB']);
+};
+
+export const gb18030VersionAndRanges = {
+  test() {
+    gbVersionAndRangesTest('gb18030');
+  },
+};
+
+export const gbkVersionAndRanges = {
+  test() {
+    gbVersionAndRangesTest('gbk');
+  },
+};
+
+// Verify that the WHATWG-required mapping corrections also produce the
+// correct output when the corrected byte sequences appear inside a larger
+// buffer (surrounded by ASCII), not only when they are the entire input.
+export const gb18030OverridesEmbedded = {
+  test() {
+    const d = new TextDecoder('gb18030');
+
+    // 0x80 → U+20AC (Euro sign) surrounded by ASCII
+    strictEqual(d.decode(Uint8Array.of(0x41, 0x80, 0x42)), 'A\u20ACB');
+
+    // Two-byte mapping corrections surrounded by ASCII
+    strictEqual(d.decode(Uint8Array.of(0x41, 0xa8, 0xbb, 0x42)), 'A\u0251B');
+    strictEqual(d.decode(Uint8Array.of(0x41, 0xa8, 0xbc, 0x42)), 'A\u1E3FB');
+    strictEqual(d.decode(Uint8Array.of(0x41, 0xa8, 0xbd, 0x42)), 'A\u0144B');
+
+    // Vertical form corrections surrounded by ASCII
+    strictEqual(d.decode(Uint8Array.of(0x41, 0xa6, 0xd9, 0x42)), 'A\uFE10B');
+
+    // CJK extension corrections surrounded by ASCII
+    strictEqual(d.decode(Uint8Array.of(0x41, 0xfe, 0x59, 0x42)), 'A\u9FB4B');
+  },
+};
+
+export const replacementPushbackAsciiCharactersLoose = {
+  test() {
+    const vectors = {
+      big5: [
+        [[0x80], '\uFFFD'],
+        [[0x81, 0x40], '\uFFFD@'],
+        [[0x83, 0x5c], '\uFFFD\\'],
+        [[0x87, 0x87, 0x40], '\uFFFD@'],
+        [[0x81, 0x81], '\uFFFD'],
+      ],
+      'iso-2022-jp': [
+        [[0x1b, 0x24], '\uFFFD$'],
+        [[0x1b, 0x24, 0x40, 0x1b, 0x24], '\uFFFD\uFFFD'],
+      ],
+      'euc-jp': [
+        [[0x80], '\uFFFD'],
+        [[0x8d, 0x8d], '\uFFFD\uFFFD'],
+        [[0x8e, 0x8e], '\uFFFD'],
+      ],
+    };
+
+    for (const [encoding, list] of Object.entries(vectors)) {
+      const d = new TextDecoder(encoding);
+      for (const [bytes, text] of list) {
+        strictEqual(d.decode(Uint8Array.from(bytes)), text);
+      }
+    }
+  },
+};
+
+export const stickyMultibyteStateIso2022JpLoose = {
+  test() {
+    const vectors = [
+      [[27], '\uFFFD'],
+      [[27, 0x28], '\uFFFD('],
+      [[0x1b, 0x28, 0x49], ''],
+    ];
+
+    const d = new TextDecoder('iso-2022-jp');
+    for (const [bytes, text] of vectors) {
+      strictEqual(d.decode(u(0x40)), '@');
+      strictEqual(d.decode(Uint8Array.from(bytes)), text);
+      strictEqual(d.decode(u(0x40)), '@');
+      strictEqual(d.decode(u(0x2a)), '*');
+      strictEqual(d.decode(u(0x42)), 'B');
+    }
+  },
+};
+
+export const fatalStreamGb18030Gbk = {
+  test() {
+    for (const encoding of ['gb18030', 'gbk']) {
+      {
+        const d = new TextDecoder(encoding, { fatal: true });
+        strictEqual(d.decode(Uint8Array.of(0x80), { stream: true }), '\u20AC');
+        throws(() =>
+          d.decode(u(0x81, 0x30, 0x21, 0x21, 0x21), { stream: true })
+        );
+        strictEqual(d.decode(Uint8Array.of(0x80)), '\u20AC');
+      }
+
+      {
+        const d = new TextDecoder(encoding, { fatal: true });
+        strictEqual(d.decode(Uint8Array.of(0x80), { stream: true }), '\u20AC');
+        throws(() =>
+          d.decode(u(0x81, 0x30, 0x81, 0x42, 0x42), { stream: true })
+        );
+        strictEqual(d.decode(Uint8Array.of(0x80)), '\u20AC');
+      }
+    }
+  },
+};
+
 export const textDecoderStream = {
   test() {
     const stream = new TextDecoderStream('utf-16', {
@@ -779,6 +936,55 @@ export const textDecoderStream = {
   },
 };
 
+// Per WHATWG Big5 decoder step 1, when end-of-queue is reached with a
+// pending lead byte, the decoder must return error (U+FFFD in replacement
+// mode, throw in fatal mode). This tests the streaming case where a lead
+// byte is buffered in one call and then flushed without a trail byte.
+export const big5OrphanedLeadOnFlush = {
+  test() {
+    // 0xA4 is a valid Big5 lead byte (e.g., first byte of 中 = 0xA4 0xA4).
+    // Streaming it alone, then flushing, must produce U+FFFD.
+    {
+      const dec = new TextDecoder('big5');
+      strictEqual(dec.decode(Uint8Array.of(0xa4), { stream: true }), '');
+      strictEqual(dec.decode(), '\uFFFD');
+    }
+
+    // Fatal mode must throw on the orphaned lead.
+    {
+      const dec = new TextDecoder('big5', { fatal: true });
+      strictEqual(dec.decode(Uint8Array.of(0xa4), { stream: true }), '');
+      throws(() => dec.decode());
+    }
+
+    // Orphaned lead followed by an invalid trail byte on flush: the lead
+    // must produce U+FFFD. 0x20 (space) is not a valid Big5 trail byte
+    // (valid trails are 0x40-0x7E and 0xA1-0xFE).
+    {
+      const dec = new TextDecoder('big5');
+      strictEqual(dec.decode(Uint8Array.of(0xa4), { stream: true }), '');
+      const result = dec.decode(Uint8Array.of(0x20));
+      // The orphaned lead must produce at least one U+FFFD.
+      ok(
+        result.includes('\uFFFD'),
+        `expected U+FFFD in output, got: ${JSON.stringify(result)}`
+      );
+      // The space byte must not be swallowed.
+      ok(
+        result.includes(' '),
+        `expected space in output, got: ${JSON.stringify(result)}`
+      );
+    }
+
+    // Streaming a complete pair across two calls must still work.
+    {
+      const dec = new TextDecoder('big5');
+      strictEqual(dec.decode(Uint8Array.of(0xa4), { stream: true }), '');
+      strictEqual(dec.decode(Uint8Array.of(0xa4)), '中');
+    }
+  },
+};
+
 // Test x-user-defined encoding per WHATWG spec
 // https://encoding.spec.whatwg.org/#x-user-defined-decoder
 export const xUserDefinedDecode = {
@@ -840,3 +1046,60 @@ export const xUserDefinedFatal = {
     }
   },
 };
+
+// Verify that streaming with zero-length input works for every legacy
+// encoding handled by the Rust LegacyDecoder. An empty chunk in streaming
+// mode must produce an empty string and leave the decoder in a valid state
+// for subsequent calls.
+export const legacyStreamEmptyInput = {
+  test() {
+    const encodings = [
+      'big5',
+      'euc-jp',
+      'euc-kr',
+      'gb18030',
+      'gbk',
+      'iso-2022-jp',
+      'shift_jis',
+      'windows-1252',
+      'x-user-defined',
+    ];
+
+    const empty = new Uint8Array(0);
+
+    for (const label of encodings) {
+      for (const fatal of [false, true]) {
+        const dec = new TextDecoder(label, { fatal });
+
+        // Empty stream chunk must produce empty string.
+        strictEqual(
+          dec.decode(empty, { stream: true }),
+          '',
+          `${label} (fatal=${fatal}): empty stream chunk should be ''`
+        );
+
+        // A second empty stream chunk must also be fine.
+        strictEqual(
+          dec.decode(empty, { stream: true }),
+          '',
+          `${label} (fatal=${fatal}): second empty stream chunk should be ''`
+        );
+
+        // Final flush with no pending bytes must produce empty string.
+        strictEqual(
+          dec.decode(),
+          '',
+          `${label} (fatal=${fatal}): flush after empty chunks should be ''`
+        );
+
+        // Decoder must still work normally after the empty-stream sequence.
+        // Feed a single ASCII byte to verify.
+        strictEqual(
+          dec.decode(Uint8Array.of(0x41)),
+          'A',
+          `${label} (fatal=${fatal}): decode 'A' after empty stream should work`
+        );
+      }
+    }
+  },
+};
diff --git a/src/workerd/api/tests/encoding-test.wd-test b/src/workerd/api/tests/encoding-test.wd-test
index 6ec29f86389..016035b74e7 100644
--- a/src/workerd/api/tests/encoding-test.wd-test
+++ b/src/workerd/api/tests/encoding-test.wd-test
@@ -7,7 +7,7 @@ const unitTests :Workerd.Config = (
         modules = [
           (name = "worker", esModule = embed "encoding-test.js")
         ],
-        compatibilityFlags = ["nodejs_compat"]
+        compatibilityFlags = ["nodejs_compat", "text_decoder_cjk_decoder"]
       )
     ),
   ],
diff --git a/src/workerd/io/compatibility-date.capnp b/src/workerd/io/compatibility-date.capnp
index 83bba3ca604..c73f9480369 100644
--- a/src/workerd/io/compatibility-date.capnp
+++ b/src/workerd/io/compatibility-date.capnp
@@ -1403,4 +1403,11 @@ struct CompatibilityFlags @0x8f8c1b68151b6cef {
     $compatEnableDate("2026-03-03");
   # When enabled, unhandledrejection processing is deferred until the microtask
   # checkpoint completes, avoiding misfires on multi-tick promise chains.
+
+  textDecoderCjkDecoder @163 :Bool
+    $compatEnableFlag("text_decoder_cjk_decoder")
+    $compatDisableFlag("disable_text_decoder_cjk_decoder")
+    $compatEnableDate("2026-03-03");
+  # Enables the dedicated CJK TextDecoder implementation for overrides and
+  # Big5 lead-byte handling instead of the legacy ICU-only path.
 }
diff --git a/src/wpt/BUILD.bazel b/src/wpt/BUILD.bazel
index a7382c9ba7d..6914ac125a7 100644
--- a/src/wpt/BUILD.bazel
+++ b/src/wpt/BUILD.bazel
@@ -75,7 +75,10 @@ wpt_test(
 
 wpt_test(
     name = "encoding",
-    compat_flags = ["pedantic_wpt"],
+    compat_flags = [
+        "pedantic_wpt",
+        "text_decoder_cjk_decoder",
+    ],
     config = "encoding-test.ts",
     wpt_directory = "@wpt//:encoding@module",
 )
diff --git a/src/wpt/encoding-test.ts b/src/wpt/encoding-test.ts
index b73f7d43660..29186e73001 100644
--- a/src/wpt/encoding-test.ts
+++ b/src/wpt/encoding-test.ts
@@ -2,8 +2,24 @@
 // Licensed under the Apache 2.0 license found in the LICENSE file or at:
 //     https://opensource.org/licenses/Apache-2.0
 
+import path from 'node:path';
+import { getBindingPath } from 'harness/common';
 import { type TestRunnerConfig } from 'harness/harness';
 
+function loadWptResource(relativePath: string): void {
+  const bindingPath = getBindingPath(
+    path.dirname(globalThis.state.testFileName),
+    relativePath
+  );
+  const code = globalThis.state.env[bindingPath];
+  if (typeof code !== 'string') {
+    throw new Error(
+      `Test file ${bindingPath} not found. Update wpt_test.bzl to handle this case.`
+    );
+  }
+  globalThis.state.env.unsafe.eval(code);
+}
+
 export default {
   'api-basics.any.js': {},
   'api-invalid-label.any.js': {},
@@ -68,53 +84,44 @@ export default {
       'TextEncoderStream interface: attribute encoding',
     ],
   },
-  'iso-2022-jp-decoder.any.js': {
-    comment: 'TODO investigate this',
-    expectedFailures: [
-      'iso-2022-jp decoder: Error ESC',
-      'iso-2022-jp decoder: Katakana ESC, SO / SI',
-      'iso-2022-jp decoder: character, error ESC #2',
-    ],
-  },
+  'iso-2022-jp-decoder.any.js': {},
   'legacy-mb-japanese/euc-jp/eucjp-decoder.js': {},
   'legacy-mb-japanese/euc-jp/eucjp-encoder.js': {
-    comment: 'ReferenceError: jis0208 is not defined',
-    omittedTests: true,
+    before: (): void => {
+      loadWptResource('./jis0208_index.js');
+      loadWptResource('./jis0212_index.js');
+    },
   },
   'legacy-mb-japanese/euc-jp/jis0208_index.js': {},
   'legacy-mb-japanese/euc-jp/jis0212_index.js': {},
   'legacy-mb-japanese/iso-2022-jp/iso2022jp-decoder.js': {},
   'legacy-mb-japanese/iso-2022-jp/iso2022jp-encoder.js': {
-    comment:
-      'This file is meant to be included by tests and cannot run on its own',
-    omittedTests: true,
+    before: (): void => {
+      loadWptResource('./jis0208_index.js');
+    },
   },
   'legacy-mb-japanese/iso-2022-jp/jis0208_index.js': {},
   'legacy-mb-japanese/shift_jis/jis0208_index.js': {},
   'legacy-mb-japanese/shift_jis/sjis-decoder.js': {},
   'legacy-mb-japanese/shift_jis/sjis-encoder.js': {
-    comment:
-      'This file is meant to be included by tests and cannot run on its own',
-    omittedTests: true,
+    before: (): void => {
+      loadWptResource('./jis0208_index.js');
+    },
   },
   'legacy-mb-korean/euc-kr/euckr-decoder.js': {},
   'legacy-mb-korean/euc-kr/euckr-encoder.js': {
-    comment: 'ReferenceError: euckr is not defined',
-    omittedTests: true,
+    before: (): void => {
+      loadWptResource('./euckr_index.js');
+    },
   },
   'legacy-mb-korean/euc-kr/euckr_index.js': {},
-  'legacy-mb-schinese/gb18030/gb18030-decoder.any.js': {
-    comment: 'Too many failures to list individually',
-    omittedTests: true,
-  },
-  'legacy-mb-schinese/gbk/gbk-decoder.any.js': {
-    comment: 'Too many failures to list individually',
-    omittedTests: true,
-  },
+  'legacy-mb-schinese/gb18030/gb18030-decoder.any.js': {},
+  'legacy-mb-schinese/gbk/gbk-decoder.any.js': {},
   'legacy-mb-tchinese/big5/big5-decoder.js': {},
   'legacy-mb-tchinese/big5/big5-encoder.js': {
-    comment: 'big5 is not defined',
-    omittedTests: true,
+    before: (): void => {
+      loadWptResource('./big5_index.js');
+    },
   },
   'legacy-mb-tchinese/big5/big5_index.js': {},
   'replacement-encodings.any.js': {
@@ -511,18 +518,12 @@ export default {
   'streams/readable-writable-properties.any.js': {},
   'streams/realms.window.js': {
     comment: 'ReferenceError: window is not defined',
-    disabledTests: true,
+    omittedTests: true,
   },
   'textdecoder-arguments.any.js': {},
   'textdecoder-byte-order-marks.any.js': {},
   'textdecoder-copy.any.js': {},
-  'textdecoder-eof.any.js': {
-    comment: 'TextDecoder end-of-queue handling differs from spec',
-    expectedFailures: [
-      'TextDecoder end-of-queue handling',
-      'TextDecoder end-of-queue handling using stream: true',
-    ],
-  },
+  'textdecoder-eof.any.js': {},
   'textdecoder-fatal-single-byte.any.js': {},
   'textdecoder-fatal-streaming.any.js': {},
   'textdecoder-fatal.any.js': {},
@@ -553,6 +554,165 @@ export default {
   },
   'unsupported-labels.window.js': {
     comment: 'Too many failures to list by name',
-    disabledTests: true,
+    expectedFailures: [
+      '437 is not supported by the Encoding Standard',
+      'adobe-standard-encoding is not supported by the Encoding Standard',
+      'armscii-8 is not supported by the Encoding Standard',
+      'bocu-1 is not supported by the Encoding Standard',
+      'cesu-8 is not supported by the Encoding Standard',
+      'cp1025 is not supported by the Encoding Standard',
+      'cp437 is not supported by the Encoding Standard',
+      'cp737 is not supported by the Encoding Standard',
+      'cp851 is not supported by the Encoding Standard',
+      'cp858 is not supported by the Encoding Standard',
+      'cp862 is not supported by the Encoding Standard',
+      'cp864 is not supported by the Encoding Standard',
+      'cp869 is not supported by the Encoding Standard',
+      'cp875 is not supported by the Encoding Standard',
+      'cp950 is not supported by the Encoding Standard',
+      'csiso103t618bit is not supported by the Encoding Standard',
+      'csiso111ecmacyrillic is not supported by the Encoding Standard',
+      'cspc8codepage437 is not supported by the Encoding Standard',
+      'csviscii is not supported by the Encoding Standard',
+      'dos-720 is not supported by the Encoding Standard',
+      'dos-862 is not supported by the Encoding Standard',
+      'ecma-cyrillic is not supported by the Encoding Standard',
+      'euc-tw is not supported by the Encoding Standard',
+      'german is not supported by the Encoding Standard',
+      'geostd8 is not supported by the Encoding Standard',
+      'hp-roman8 is not supported by the Encoding Standard',
+      'ibm-thai is not supported by the Encoding Standard',
+      'ibm00858 is not supported by the Encoding Standard',
+      'ibm00924 is not supported by the Encoding Standard',
+      'ibm01047 is not supported by the Encoding Standard',
+      'ibm01140 is not supported by the Encoding Standard',
+      'ibm01141 is not supported by the Encoding Standard',
+      'ibm01142 is not supported by the Encoding Standard',
+      'ibm01143 is not supported by the Encoding Standard',
+      'ibm01144 is not supported by the Encoding Standard',
+      'ibm01145 is not supported by the Encoding Standard',
+      'ibm01146 is not supported by the Encoding Standard',
+      'ibm01147 is not supported by the Encoding Standard',
+      'ibm01148 is not supported by the Encoding Standard',
+      'ibm01149 is not supported by the Encoding Standard',
+      'ibm037 is not supported by the Encoding Standard',
+      'ibm1026 is not supported by the Encoding Standard',
+      'ibm1047 is not supported by the Encoding Standard',
+      'ibm273 is not supported by the Encoding Standard',
+      'ibm277 is not supported by the Encoding Standard',
+      'ibm278 is not supported by the Encoding Standard',
+      'ibm280 is not supported by the Encoding Standard',
+      'ibm284 is not supported by the Encoding Standard',
+      'ibm285 is not supported by the Encoding Standard',
+      'ibm290 is not supported by the Encoding Standard',
+      'ibm297 is not supported by the Encoding Standard',
+      'ibm367 is not supported by the Encoding Standard',
+      'ibm420 is not supported by the Encoding Standard',
+      'ibm423 is not supported by the Encoding Standard',
+      'ibm424 is not supported by the Encoding Standard',
+      'ibm437 is not supported by the Encoding Standard',
+      'ibm500 is not supported by the Encoding Standard',
+      'ibm737 is not supported by the Encoding Standard',
+      'ibm775 is not supported by the Encoding Standard',
+      'ibm850 is not supported by the Encoding Standard',
+      'ibm852 is not supported by the Encoding Standard',
+      'ibm855 is not supported by the Encoding Standard',
+      'ibm857 is not supported by the Encoding Standard',
+      'ibm860 is not supported by the Encoding Standard',
+      'ibm861 is not supported by the Encoding Standard',
+      'ibm862 is not supported by the Encoding Standard',
+      'ibm863 is not supported by the Encoding Standard',
+      'ibm864 is not supported by the Encoding Standard',
+      'ibm864i is not supported by the Encoding Standard',
+      'ibm865 is not supported by the Encoding Standard',
+      'ibm868 is not supported by the Encoding Standard',
+      'ibm869 is not supported by the Encoding Standard',
+      'ibm870 is not supported by the Encoding Standard',
+      'ibm871 is not supported by the Encoding Standard',
+      'ibm880 is not supported by the Encoding Standard',
+      'ibm905 is not supported by the Encoding Standard',
+      'ibm918 is not supported by the Encoding Standard',
+      'iso-2022-jp-1 is not supported by the Encoding Standard',
+      'iso-2022-jp-2 is not supported by the Encoding Standard',
+      'iso-2022-jp-3 is not supported by the Encoding Standard',
+      'iso-8859-8 visual is not supported by the Encoding Standard',
+      'jis_c6226-1978 is not supported by the Encoding Standard',
+      'jis_x0208-1983 is not supported by the Encoding Standard',
+      'jis_x0208-1990 is not supported by the Encoding Standard',
+      'jis_x0212-1990 is not supported by the Encoding Standard',
+      'johab is not supported by the Encoding Standard',
+      'latin9 is not supported by the Encoding Standard',
+      'norwegian is not supported by the Encoding Standard',
+      'sami-ws2 is not supported by the Encoding Standard',
+      'scsu is not supported by the Encoding Standard',
+      'shift_jis_x0213-2000 is not supported by the Encoding Standard',
+      'swedish is not supported by the Encoding Standard',
+      'tcvn is not supported by the Encoding Standard',
+      'tis-620-2533 is not supported by the Encoding Standard',
+      'utf-7 is not supported by the Encoding Standard',
+      'utf-32 is not supported by the Encoding Standard',
+      'viscii is not supported by the Encoding Standard',
+      'windows-936-2000 is not supported by the Encoding Standard',
+      'windows-sami-2 is not supported by the Encoding Standard',
+      'ws2 is not supported by the Encoding Standard',
+      'x-chinese-cns is not supported by the Encoding Standard',
+      'x-chinese-eten is not supported by the Encoding Standard',
+      'x-cp20001 is not supported by the Encoding Standard',
+      'x-cp20003 is not supported by the Encoding Standard',
+      'x-cp20004 is not supported by the Encoding Standard',
+      'x-cp20005 is not supported by the Encoding Standard',
+      'x-cp20261 is not supported by the Encoding Standard',
+      'x-cp20269 is not supported by the Encoding Standard',
+      'x-cp20936 is not supported by the Encoding Standard',
+      'x-cp20949 is not supported by the Encoding Standard',
+      'x-cp21027 is not supported by the Encoding Standard',
+      'x-cp50227 is not supported by the Encoding Standard',
+      'x-cp50229 is not supported by the Encoding Standard',
+      'x-ebcdic-koreanextended is not supported by the Encoding Standard',
+      'x-europa is not supported by the Encoding Standard',
+      'x-ia5 is not supported by the Encoding Standard',
+      'x-ia5-german is not supported by the Encoding Standard',
+      'x-ia5-norwegian is not supported by the Encoding Standard',
+      'x-ia5-swedish is not supported by the Encoding Standard',
+      'x-iscii-as is not supported by the Encoding Standard',
+      'x-iscii-be is not supported by the Encoding Standard',
+      'x-iscii-de is not supported by the Encoding Standard',
+      'x-iscii-gu is not supported by the Encoding Standard',
+      'x-iscii-ka is not supported by the Encoding Standard',
+      'x-iscii-ma is not supported by the Encoding Standard',
+      'x-iscii-or is not supported by the Encoding Standard',
+      'x-iscii-pa is not supported by the Encoding Standard',
+      'x-iscii-t is not supported by the Encoding Standard',
+      'x-iscii-ta is not supported by the Encoding Standard',
+      'x-iscii-te is not supported by the Encoding Standard',
+      'x-mac-arabic is not supported by the Encoding Standard',
+      'x-mac-ce is not supported by the Encoding Standard',
+      'x-mac-centraleurroman is not supported by the Encoding Standard',
+      'x-mac-chinesesimp is not supported by the Encoding Standard',
+      'x-mac-chinesetrad is not supported by the Encoding Standard',
+      'x-mac-croatian is not supported by the Encoding Standard',
+      'x-mac-devanagari is not supported by the Encoding Standard',
+      'x-mac-dingbats is not supported by the Encoding Standard',
+      'x-mac-farsi is not supported by the Encoding Standard',
+      'x-mac-greek is not supported by the Encoding Standard',
+      'x-mac-gujarati is not supported by the Encoding Standard',
+      'x-mac-gurmukhi is not supported by the Encoding Standard',
+      'x-mac-hebrew is not supported by the Encoding Standard',
+      'x-mac-icelandic is not supported by the Encoding Standard',
+      'x-mac-japanese is not supported by the Encoding Standard',
+      'x-mac-korean is not supported by the Encoding Standard',
+      'x-mac-roman-latin1 is not supported by the Encoding Standard',
+      'x-mac-romanian is not supported by the Encoding Standard',
+      'x-mac-symbol is not supported by the Encoding Standard',
+      'x-mac-thai is not supported by the Encoding Standard',
+      'x-mac-tibetan is not supported by the Encoding Standard',
+      'x-mac-turkish is not supported by the Encoding Standard',
+      'x-mac-vt100 is not supported by the Encoding Standard',
+      'x-nextstep is not supported by the Encoding Standard',
+      'x-vps is not supported by the Encoding Standard',
+      '_autodetect is not supported by the Encoding Standard',
+      '_autodetect_all is not supported by the Encoding Standard',
+      '_autodetect_kr is not supported by the Encoding Standard',
+    ],
   },
 } satisfies TestRunnerConfig;

From 79b3f4b6a7f9d24908bbde01d7891001cd55d5e2 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 20 Feb 2026 17:12:59 -0500
Subject: [PATCH 2/3] put textdecoder behind compat flag

---
 src/workerd/api/encoding.c++ | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index e4590818a66..6b5c84693e5 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -455,9 +455,17 @@ jsg::Ref<TextDecoder> TextDecoder::constructor(jsg::Lock& js,
     case Encoding::Gb18030:
     case Encoding::Gbk:
     case Encoding::Iso2022_Jp:
-    case Encoding::Shift_Jis:
-    case Encoding::Windows_1252:
+    case Encoding::Shift_Jis: {
+      // If the feature flag is disabled, we use the ICU decoder.
+      if (!FeatureFlags::get(js).getTextDecoderCjkDecoder()) {
+        break;
+      }
+
+      // We fallthrough to LegacyDecoder in order to avoid breaking changes.
+      [[fallthrough]];
+    }
     case Encoding::X_User_Defined:
+    case Encoding::Windows_1252:
       return js.alloc<TextDecoder>(LegacyDecoder(encoding, DecoderFatal(options.fatal)), options);
     default:
       break;

From 6460a2727a385933dc1a553509c63bb9967240e8 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Tue, 24 Feb 2026 12:54:20 -0500
Subject: [PATCH 3/3] improve code consistency

---
 src/rust/encoding/lib.rs               | 24 ++++++++++++++++++------
 src/workerd/api/encoding-legacy.c++    | 12 ++++++++++--
 src/workerd/api/encoding.c++           | 13 ++-----------
 src/workerd/api/tests/encoding-test.js | 18 ++++++++++--------
 4 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/src/rust/encoding/lib.rs b/src/rust/encoding/lib.rs
index b6b112156ff..3a89198de4a 100644
--- a/src/rust/encoding/lib.rs
+++ b/src/rust/encoding/lib.rs
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2022 Cloudflare, Inc.
+// Copyright (c) 2026 Cloudflare, Inc.
 // Licensed under the Apache 2.0 license found in the LICENSE file or at:
 //     https://opensource.org/licenses/Apache-2.0
 
@@ -36,6 +36,11 @@ mod ffi {
         had_error: bool,
     }
 
+    struct DecodeOptions {
+        flush: bool,
+        fatal: bool,
+    }
+
     extern "Rust" {
         type Decoder;
 
@@ -47,7 +52,7 @@ mod ffi {
         /// Decode a chunk of bytes. Set `flush` to true on the final chunk.
         /// When `fatal` is true and an error is encountered, `had_error` is
         /// set and the output may be incomplete.
-        fn decode(decoder: &mut Decoder, input: &[u8], flush: bool, fatal: bool) -> DecodeResult;
+        fn decode(decoder: &mut Decoder, input: &[u8], options: &DecodeOptions) -> DecodeResult;
 
         /// Reset the decoder to its initial state.
         fn reset(decoder: &mut Decoder);
@@ -85,7 +90,14 @@ pub fn new_decoder(encoding: ffi::Encoding) -> Box<Decoder> {
     })
 }
 
-pub fn decode(state: &mut Decoder, input: &[u8], flush: bool, fatal: bool) -> ffi::DecodeResult {
+pub fn decode(
+    state: &mut Decoder,
+    input: &[u8],
+    options: &ffi::DecodeOptions,
+) -> ffi::DecodeResult {
+    // max_utf16_buffer_length() returns None on usize overflow. The +4 covers extra
+    // UTF-16 code units from decoder state. Safe even if slightly short since the decode loop
+    // below resizes on OutputFull.
     let max_len = state
         .inner
         .max_utf16_buffer_length(input.len())
@@ -94,12 +106,12 @@ pub fn decode(state: &mut Decoder, input: &[u8], flush: bool, fatal: bool) -> ff
     let mut total_read = 0usize;
     let mut total_written = 0usize;
 
-    if fatal {
+    if options.fatal {
         loop {
             let (result, read, written) = state.inner.decode_to_utf16_without_replacement(
                 &input[total_read..],
                 &mut output[total_written..],
-                flush,
+                options.flush,
             );
             total_read += read;
             total_written += written;
@@ -124,7 +136,7 @@ pub fn decode(state: &mut Decoder, input: &[u8], flush: bool, fatal: bool) -> ff
             let (result, read, written, _had_errors) = state.inner.decode_to_utf16(
                 &input[total_read..],
                 &mut output[total_written..],
-                flush,
+                options.flush,
             );
             total_read += read;
             total_written += written;
diff --git a/src/workerd/api/encoding-legacy.c++ b/src/workerd/api/encoding-legacy.c++
index 378afec91b2..aef0562ff53 100644
--- a/src/workerd/api/encoding-legacy.c++
+++ b/src/workerd/api/encoding-legacy.c++
@@ -53,8 +53,16 @@ void LegacyDecoder::reset() {
 
 kj::Maybe<jsg::JsString> LegacyDecoder::decode(
     jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush) {
-  auto result = ::workerd::rust::encoding::decode(
-      *state, buffer.as<kj_rs::RustMutable>(), flush, fatal.toBool());
+  // Reset decoder state after flush, matching IcuDecoder's KJ_DEFER contract.
+  // This ensures decodePtr() (used by TextDecoderStream) resets correctly on flush.
+  KJ_DEFER({
+    if (flush) reset();
+  });
+
+  ::workerd::rust::encoding::DecodeOptions options{.flush = flush, .fatal = fatal.toBool()};
+  // kj_rs::RustMutable is used to avoid a copy of the underlying buffer.
+  auto result =
+      ::workerd::rust::encoding::decode(*state, buffer.as<kj_rs::RustMutable>(), kj::mv(options));
 
   if (fatal.toBool() && result.had_error) {
     // Decoder state already reset by the Rust side on fatal error.
diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 6b5c84693e5..6ef6b46589a 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -303,7 +303,6 @@ kj::Maybe<IcuDecoder> IcuDecoder::create(Encoding encoding, bool fatal, bool ign
 kj::Maybe<jsg::JsString> IcuDecoder::decode(
     jsg::Lock& js, kj::ArrayPtr<const kj::byte> buffer, bool flush) {
   UErrorCode status = U_ZERO_ERROR;
-  kj::Maybe<kj::Array<kj::byte>> merged;
   const auto maxCharSize = [this]() { return ucnv_getMaxCharSize(inner.get()); };
 
   const auto isUnicode = [this]() {
@@ -485,17 +484,9 @@ jsg::JsString TextDecoder::decode(jsg::Lock& js,
     jsg::Optional<kj::Array<const kj::byte>> maybeInput,
     jsg::Optional<DecodeOptions> maybeOptions) {
   auto options = maybeOptions.orDefault(DEFAULT_OPTIONS);
-  // Per spec, omitting input is end-of-queue, so we must flush pending bytes.
-  const auto flush = maybeInput == kj::none || !options.stream;
   auto& input = maybeInput.orDefault(EMPTY);
-  auto result =
-      JSG_REQUIRE_NONNULL(getImpl().decode(js, input, flush), TypeError, "Failed to decode input.");
-  // Per WHATWG spec, when flush is set the decoder is reset to a new instance
-  // so subsequent calls start with clean state.
-  if (flush) {
-    getImpl().reset();
-  }
-  return kj::mv(result);
+  return JSG_REQUIRE_NONNULL(
+      getImpl().decode(js, input, !options.stream), TypeError, "Failed to decode input.");
 }
 
 kj::Maybe<jsg::JsString> TextDecoder::decodePtr(
diff --git a/src/workerd/api/tests/encoding-test.js b/src/workerd/api/tests/encoding-test.js
index c5940d429e6..7fd8ae9f02c 100644
--- a/src/workerd/api/tests/encoding-test.js
+++ b/src/workerd/api/tests/encoding-test.js
@@ -15,8 +15,6 @@ function decodeStreaming(decoder, input) {
   return x;
 }
 
-const u = (...args) => Uint8Array.of(...args);
-
 // From https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
 const windows1252Labels = [
   'ansi_x3.4-1968',
@@ -888,11 +886,11 @@ export const stickyMultibyteStateIso2022JpLoose = {
 
     const d = new TextDecoder('iso-2022-jp');
     for (const [bytes, text] of vectors) {
-      strictEqual(d.decode(u(0x40)), '@');
+      strictEqual(d.decode(Uint8Array.of(0x40)), '@');
       strictEqual(d.decode(Uint8Array.from(bytes)), text);
-      strictEqual(d.decode(u(0x40)), '@');
-      strictEqual(d.decode(u(0x2a)), '*');
-      strictEqual(d.decode(u(0x42)), 'B');
+      strictEqual(d.decode(Uint8Array.of(0x40)), '@');
+      strictEqual(d.decode(Uint8Array.of(0x2a)), '*');
+      strictEqual(d.decode(Uint8Array.of(0x42)), 'B');
     }
   },
 };
@@ -904,7 +902,9 @@ export const fatalStreamGb18030Gbk = {
         const d = new TextDecoder(encoding, { fatal: true });
         strictEqual(d.decode(Uint8Array.of(0x80), { stream: true }), '\u20AC');
         throws(() =>
-          d.decode(u(0x81, 0x30, 0x21, 0x21, 0x21), { stream: true })
+          d.decode(Uint8Array.of(0x81, 0x30, 0x21, 0x21, 0x21), {
+            stream: true,
+          })
         );
         strictEqual(d.decode(Uint8Array.of(0x80)), '\u20AC');
       }
@@ -913,7 +913,9 @@ export const fatalStreamGb18030Gbk = {
         const d = new TextDecoder(encoding, { fatal: true });
         strictEqual(d.decode(Uint8Array.of(0x80), { stream: true }), '\u20AC');
         throws(() =>
-          d.decode(u(0x81, 0x30, 0x81, 0x42, 0x42), { stream: true })
+          d.decode(Uint8Array.of(0x81, 0x30, 0x81, 0x42, 0x42), {
+            stream: true,
+          })
         );
         strictEqual(d.decode(Uint8Array.of(0x80)), '\u20AC');
       }