From 350519165ff083ce59181972ee8cf2e243e4ff9d Mon Sep 17 00:00:00 2001
From: Hugo Heuzard <hugo.heuzard@gmail.com>
Date: Sat, 2 May 2026 15:57:46 +0200
Subject: [PATCH 1/5] quickjs-libc: add TextEncoder and TextDecoder

Implements the WHATWG Encoding API's TextEncoder and TextDecoder
classes (UTF-8 only, the only encoding the spec actually requires)
and installs them on the global object from js_std_add_helpers,
alongside `console`, `print`, and `scriptArgs`.

TextEncoder:
  * encode(string?)         -> Uint8Array
  * encodeInto(string, dst) -> { read, written }
  * encoding                -> "utf-8"

TextDecoder:
  * new TextDecoder(label?, { fatal?, ignoreBOM? })
  * decode(input?, { stream? }) -> string
  * encoding / fatal / ignoreBOM accessors

decode() handles:
  * any TypedArray view or ArrayBuffer (BufferSource) as input,
  * UTF-8 BOM stripping (suppressed by ignoreBOM),
  * stream mode by saving up to 3 trailing bytes of an incomplete
    sequence and prepending them on the next call,
  * fatal mode by throwing TypeError on any encoding error
    (including a trailing partial sequence in non-stream mode),
  * non-fatal mode by emitting U+FFFD for each invalid byte.

The label parser accepts the WHATWG list of UTF-8 aliases
(case-insensitive, ASCII-whitespace trimmed); other encodings
throw RangeError, matching the spec.

UTF-8 decoding reuses the existing utf8_decode / utf8_decode_len
helpers in cutils.h, so no new UTF-8 logic is introduced.
---
 quickjs-libc.c | 445 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 445 insertions(+)

diff --git a/quickjs-libc.c b/quickjs-libc.c
index 1fe3cf5c1..ded1d31b5 100644
--- a/quickjs-libc.c
+++ b/quickjs-libc.c
@@ -197,6 +197,8 @@ typedef struct JSThreadState {
 #endif // USE_WORKER
     JSClassID std_file_class_id;
     JSClassID worker_class_id;
+    JSClassID text_encoder_class_id;
+    JSClassID text_decoder_class_id;
 } JSThreadState;
 
 static uint64_t os_pending_signals;
@@ -4599,6 +4601,447 @@ static JSValue js_print(JSContext *ctx, JSValueConst this_val,
     return JS_UNDEFINED;
 }
 
+/**********************************************************/
+/* WHATWG Encoding: TextEncoder / TextDecoder (UTF-8 only) */
+
+typedef struct {
+    bool fatal;
+    bool ignore_bom;
+    /* Once we've decoded any input (or skipped a BOM), we stop treating
+       a leading U+FEFF as a BOM. Reset on non-stream decode(). */
+    bool bom_seen;
+    /* Up to 3 trailing bytes of an incomplete UTF-8 sequence saved
+       across stream decode() calls. */
+    uint8_t pending[4];
+    int pending_len;
+} JSTextDecoder;
+
+static void js_text_decoder_finalizer(JSRuntime *rt, JSValue val)
+{
+    JSThreadState *ts = js_get_thread_state(rt);
+    JSTextDecoder *td = JS_GetOpaque(val, ts->text_decoder_class_id);
+    js_free_rt(rt, td);
+}
+
+static JSClassDef js_text_encoder_class = {
+    "TextEncoder",
+};
+
+static JSClassDef js_text_decoder_class = {
+    "TextDecoder",
+    .finalizer = js_text_decoder_finalizer,
+};
+
+/* Lead-byte length of a UTF-8 sequence, or 0 for invalid/continuation. */
+static int js_utf8_seq_len(uint8_t b)
+{
+    if (b < 0x80) return 1;
+    if (b < 0xC2) return 0;
+    if (b < 0xE0) return 2;
+    if (b < 0xF0) return 3;
+    if (b < 0xF5) return 4;
+    return 0;
+}
+
+/* TextEncoder ------------------------------------------------------------ */
+
+static JSValue js_text_encoder_constructor(JSContext *ctx,
+                                           JSValueConst new_target,
+                                           int argc, JSValueConst *argv)
+{
+    JSRuntime *rt = JS_GetRuntime(ctx);
+    JSThreadState *ts = js_get_thread_state(rt);
+    JSValue proto, obj;
+
+    proto = JS_GetPropertyStr(ctx, new_target, "prototype");
+    if (JS_IsException(proto))
+        return proto;
+    obj = JS_NewObjectProtoClass(ctx, proto, ts->text_encoder_class_id);
+    JS_FreeValue(ctx, proto);
+    if (JS_IsException(obj))
+        return obj;
+    /* Stateless; opaque is just a brand. */
+    JS_SetOpaque(obj, (void *)1);
+    return obj;
+}
+
+static JSValue js_text_encoder_encode(JSContext *ctx, JSValueConst this_val,
+                                      int argc, JSValueConst *argv)
+{
+    JSRuntime *rt = JS_GetRuntime(ctx);
+    JSThreadState *ts = js_get_thread_state(rt);
+    const char *str;
+    size_t len;
+    JSValue ret;
+
+    if (!JS_GetOpaque(this_val, ts->text_encoder_class_id))
+        return JS_ThrowTypeError(ctx, "'this' is not a TextEncoder");
+    if (argc < 1 || JS_IsUndefined(argv[0]))
+        return JS_NewUint8ArrayCopy(ctx, NULL, 0);
+    str = JS_ToCStringLen(ctx, &len, argv[0]);
+    if (!str)
+        return JS_EXCEPTION;
+    ret = JS_NewUint8ArrayCopy(ctx, (const uint8_t *)str, len);
+    JS_FreeCString(ctx, str);
+    return ret;
+}
+
+static JSValue js_text_encoder_encode_into(JSContext *ctx, JSValueConst this_val,
+                                           int argc, JSValueConst *argv)
+{
+    JSRuntime *rt = JS_GetRuntime(ctx);
+    JSThreadState *ts = js_get_thread_state(rt);
+    const char *src;
+    size_t src_len, dst_len;
+    uint8_t *dst;
+    int read = 0, written = 0;
+    const uint8_t *p, *end, *next;
+    uint32_t cp;
+    size_t enc_len;
+    JSValue ret;
+
+    if (!JS_GetOpaque(this_val, ts->text_encoder_class_id))
+        return JS_ThrowTypeError(ctx, "'this' is not a TextEncoder");
+    if (argc < 2)
+        return JS_ThrowTypeError(ctx, "TextEncoder.encodeInto requires two arguments");
+    if (JS_GetTypedArrayType(argv[1]) != JS_TYPED_ARRAY_UINT8)
+        return JS_ThrowTypeError(ctx,
+            "TextEncoder.encodeInto: destination must be a Uint8Array");
+    dst = JS_GetUint8Array(ctx, &dst_len, argv[1]);
+    if (!dst)
+        return JS_EXCEPTION;
+    src = JS_ToCStringLen(ctx, &src_len, argv[0]);
+    if (!src)
+        return JS_EXCEPTION;
+
+    p = (const uint8_t *)src;
+    end = p + src_len;
+    while (p < end) {
+        cp = utf8_decode(p, &next);
+        enc_len = utf8_encode_len(cp);
+        if ((size_t)written + enc_len > dst_len)
+            break;
+        utf8_encode(dst + written, cp);
+        written += (int)enc_len;
+        /* Spec: read counts UTF-16 code units consumed from the input. */
+        read += (cp > 0xFFFF) ? 2 : 1;
+        p = next;
+    }
+    JS_FreeCString(ctx, src);
+
+    ret = JS_NewObject(ctx);
+    if (JS_IsException(ret))
+        return ret;
+    JS_DefinePropertyValueStr(ctx, ret, "read",
+                              JS_NewInt32(ctx, read), JS_PROP_C_W_E);
+    JS_DefinePropertyValueStr(ctx, ret, "written",
+                              JS_NewInt32(ctx, written), JS_PROP_C_W_E);
+    return ret;
+}
+
+static JSValue js_text_encoder_get_encoding(JSContext *ctx, JSValueConst this_val)
+{
+    JSRuntime *rt = JS_GetRuntime(ctx);
+    JSThreadState *ts = js_get_thread_state(rt);
+    if (!JS_GetOpaque(this_val, ts->text_encoder_class_id))
+        return JS_ThrowTypeError(ctx, "'this' is not a TextEncoder");
+    return JS_NewString(ctx, "utf-8");
+}
+
+static const JSCFunctionListEntry js_text_encoder_proto_funcs[] = {
+    JS_PROP_STRING_DEF("[Symbol.toStringTag]", "TextEncoder", JS_PROP_CONFIGURABLE),
+    JS_CFUNC_DEF("encode", 1, js_text_encoder_encode),
+    JS_CFUNC_DEF("encodeInto", 2, js_text_encoder_encode_into),
+    JS_CGETSET_DEF("encoding", js_text_encoder_get_encoding, NULL),
+};
+
+/* TextDecoder ------------------------------------------------------------ */
+
+/* Match a label against the WHATWG list of UTF-8 aliases (case-insensitive,
+   ASCII-whitespace trimmed). Returns 0 on match, -1 otherwise. */
+static int js_text_decoder_label_is_utf8(const char *label, size_t len)
+{
+    static const char * const aliases[] = {
+        "unicode-1-1-utf-8", "unicode11utf8", "unicode20utf8",
+        "utf-8", "utf8", "x-unicode20utf8",
+    };
+    size_t i, j;
+    while (len > 0 && (*label == ' ' || *label == '\t' || *label == '\n'
+                       || *label == '\r' || *label == '\f')) {
+        label++; len--;
+    }
+    while (len > 0 && (label[len-1] == ' ' || label[len-1] == '\t'
+                       || label[len-1] == '\n' || label[len-1] == '\r'
+                       || label[len-1] == '\f')) {
+        len--;
+    }
+    for (i = 0; i < countof(aliases); i++) {
+        size_t alen = strlen(aliases[i]);
+        if (alen != len) continue;
+        for (j = 0; j < len; j++) {
+            int c = (unsigned char)label[j];
+            if (c >= 'A' && c <= 'Z') c += 32;
+            if (c != aliases[i][j]) break;
+        }
+        if (j == len) return 0;
+    }
+    return -1;
+}
+
+static JSValue js_text_decoder_constructor(JSContext *ctx,
+                                           JSValueConst new_target,
+                                           int argc, JSValueConst *argv)
+{
+    JSRuntime *rt = JS_GetRuntime(ctx);
+    JSThreadState *ts = js_get_thread_state(rt);
+    JSValue proto, obj;
+    JSTextDecoder *td;
+    bool fatal = false, ignore_bom = false;
+
+    if (argc >= 1 && !JS_IsUndefined(argv[0])) {
+        size_t llen;
+        const char *label = JS_ToCStringLen(ctx, &llen, argv[0]);
+        if (!label)
+            return JS_EXCEPTION;
+        if (js_text_decoder_label_is_utf8(label, llen) < 0) {
+            JSValue err = JS_ThrowRangeError(ctx,
+                "The \"%s\" encoding is not supported", label);
+            JS_FreeCString(ctx, label);
+            return err;
+        }
+        JS_FreeCString(ctx, label);
+    }
+    if (argc >= 2 && JS_IsObject(argv[1])) {
+        JSValue v = JS_GetPropertyStr(ctx, argv[1], "fatal");
+        if (JS_IsException(v)) return v;
+        fatal = JS_ToBool(ctx, v);
+        JS_FreeValue(ctx, v);
+        v = JS_GetPropertyStr(ctx, argv[1], "ignoreBOM");
+        if (JS_IsException(v)) return v;
+        ignore_bom = JS_ToBool(ctx, v);
+        JS_FreeValue(ctx, v);
+    }
+
+    proto = JS_GetPropertyStr(ctx, new_target, "prototype");
+    if (JS_IsException(proto))
+        return proto;
+    obj = JS_NewObjectProtoClass(ctx, proto, ts->text_decoder_class_id);
+    JS_FreeValue(ctx, proto);
+    if (JS_IsException(obj))
+        return obj;
+    td = js_mallocz(ctx, sizeof(*td));
+    if (!td) {
+        JS_FreeValue(ctx, obj);
+        return JS_EXCEPTION;
+    }
+    td->fatal = fatal;
+    td->ignore_bom = ignore_bom;
+    JS_SetOpaque(obj, td);
+    return obj;
+}
+
+/* Get the byte view of a BufferSource (ArrayBuffer or any TypedArray view).
+   On success returns 0 with bytes/len populated; on failure returns -1
+   with a TypeError pending. JS_UNDEFINED yields the empty input. */
+static int js_text_decoder_get_bytes(JSContext *ctx, JSValueConst v,
+                                     const uint8_t **bytes, size_t *len)
+{
+    if (JS_IsUndefined(v)) {
+        *bytes = NULL; *len = 0;
+        return 0;
+    }
+    if (JS_IsArrayBuffer(v)) {
+        size_t l;
+        uint8_t *p = JS_GetArrayBuffer(ctx, &l, v);
+        if (!p) return -1;
+        *bytes = p; *len = l;
+        return 0;
+    }
+    if (JS_GetTypedArrayType(v) >= 0) {
+        size_t off, blen, bpe, ablen;
+        JSValue ab = JS_GetTypedArrayBuffer(ctx, v, &off, &blen, &bpe);
+        uint8_t *p;
+        if (JS_IsException(ab)) return -1;
+        p = JS_GetArrayBuffer(ctx, &ablen, ab);
+        JS_FreeValue(ctx, ab);
+        if (!p) return -1;
+        *bytes = p + off; *len = blen;
+        return 0;
+    }
+    JS_ThrowTypeError(ctx,
+        "TextDecoder.decode: input must be an ArrayBuffer or TypedArray");
+    return -1;
+}
+
+static JSValue js_text_decoder_decode(JSContext *ctx, JSValueConst this_val,
+                                      int argc, JSValueConst *argv)
+{
+    JSRuntime *rt = JS_GetRuntime(ctx);
+    JSThreadState *ts = js_get_thread_state(rt);
+    JSTextDecoder *td;
+    const uint8_t *src;
+    size_t src_len;
+    bool stream = false;
+    uint8_t *combined = NULL;
+    uint8_t *out = NULL;
+    size_t out_len = 0, out_cap;
+    const uint8_t *p, *p_end, *next;
+    uint32_t cp;
+    JSValue ret;
+    JSValueConst input = argc > 0 ? argv[0] : JS_UNDEFINED;
+
+    td = JS_GetOpaque(this_val, ts->text_decoder_class_id);
+    if (!td)
+        return JS_ThrowTypeError(ctx, "'this' is not a TextDecoder");
+    if (argc >= 2 && JS_IsObject(argv[1])) {
+        JSValue v = JS_GetPropertyStr(ctx, argv[1], "stream");
+        if (JS_IsException(v)) return v;
+        stream = JS_ToBool(ctx, v);
+        JS_FreeValue(ctx, v);
+    }
+    if (js_text_decoder_get_bytes(ctx, input, &src, &src_len) < 0)
+        return JS_EXCEPTION;
+
+    if (td->pending_len > 0) {
+        size_t total = (size_t)td->pending_len + src_len;
+        combined = js_malloc(ctx, total ? total : 1);
+        if (!combined) return JS_EXCEPTION;
+        memcpy(combined, td->pending, td->pending_len);
+        if (src_len > 0) memcpy(combined + td->pending_len, src, src_len);
+        src = combined;
+        src_len = total;
+        td->pending_len = 0;
+    }
+
+    /* Worst case output: each byte expands to 3-byte U+FFFD replacement. */
+    out_cap = src_len * 3 + 4;
+    out = js_malloc(ctx, out_cap);
+    if (!out) {
+        if (combined) js_free(ctx, combined);
+        return JS_EXCEPTION;
+    }
+
+    p = src;
+    p_end = src + src_len;
+    while (p < p_end) {
+        int seq_len = js_utf8_seq_len(*p);
+        if (seq_len == 0) {
+            if (td->fatal) goto invalid;
+            out[out_len++] = 0xEF; out[out_len++] = 0xBF; out[out_len++] = 0xBD;
+            p++;
+            continue;
+        }
+        if (p + seq_len > p_end) {
+            /* Incomplete trailing sequence. */
+            if (stream) {
+                int rem = (int)(p_end - p);
+                memcpy(td->pending, p, rem);
+                td->pending_len = rem;
+                p = p_end;
+                break;
+            }
+            if (td->fatal) goto invalid;
+            out[out_len++] = 0xEF; out[out_len++] = 0xBF; out[out_len++] = 0xBD;
+            p = p_end;
+            break;
+        }
+        cp = utf8_decode_len(p, p_end - p, &next);
+        if (cp == 0xFFFD && next == p + 1 && *p >= 0x80) {
+            if (td->fatal) goto invalid;
+            out[out_len++] = 0xEF; out[out_len++] = 0xBF; out[out_len++] = 0xBD;
+            p = next;
+            continue;
+        }
+        if (!td->bom_seen) {
+            td->bom_seen = true;
+            if (!td->ignore_bom && cp == 0xFEFF) {
+                p = next;
+                continue;
+            }
+        }
+        out_len += utf8_encode(out + out_len, cp);
+        p = next;
+    }
+
+    if (!stream) {
+        td->pending_len = 0;
+        td->bom_seen = false;
+    }
+    ret = JS_NewStringLen(ctx, (const char *)out, out_len);
+    js_free(ctx, out);
+    if (combined) js_free(ctx, combined);
+    return ret;
+
+invalid:
+    js_free(ctx, out);
+    if (combined) js_free(ctx, combined);
+    return JS_ThrowTypeError(ctx, "The encoded data was not valid");
+}
+
+static JSValue js_text_decoder_get_encoding(JSContext *ctx, JSValueConst this_val)
+{
+    JSRuntime *rt = JS_GetRuntime(ctx);
+    JSThreadState *ts = js_get_thread_state(rt);
+    if (!JS_GetOpaque(this_val, ts->text_decoder_class_id))
+        return JS_ThrowTypeError(ctx, "'this' is not a TextDecoder");
+    return JS_NewString(ctx, "utf-8");
+}
+
+static JSValue js_text_decoder_get_fatal(JSContext *ctx, JSValueConst this_val)
+{
+    JSRuntime *rt = JS_GetRuntime(ctx);
+    JSThreadState *ts = js_get_thread_state(rt);
+    JSTextDecoder *td = JS_GetOpaque(this_val, ts->text_decoder_class_id);
+    if (!td) return JS_ThrowTypeError(ctx, "'this' is not a TextDecoder");
+    return JS_NewBool(ctx, td->fatal);
+}
+
+static JSValue js_text_decoder_get_ignore_bom(JSContext *ctx, JSValueConst this_val)
+{
+    JSRuntime *rt = JS_GetRuntime(ctx);
+    JSThreadState *ts = js_get_thread_state(rt);
+    JSTextDecoder *td = JS_GetOpaque(this_val, ts->text_decoder_class_id);
+    if (!td) return JS_ThrowTypeError(ctx, "'this' is not a TextDecoder");
+    return JS_NewBool(ctx, td->ignore_bom);
+}
+
+static const JSCFunctionListEntry js_text_decoder_proto_funcs[] = {
+    JS_PROP_STRING_DEF("[Symbol.toStringTag]", "TextDecoder", JS_PROP_CONFIGURABLE),
+    JS_CFUNC_DEF("decode", 1, js_text_decoder_decode),
+    JS_CGETSET_DEF("encoding", js_text_decoder_get_encoding, NULL),
+    JS_CGETSET_DEF("fatal", js_text_decoder_get_fatal, NULL),
+    JS_CGETSET_DEF("ignoreBOM", js_text_decoder_get_ignore_bom, NULL),
+};
+
+static void js_std_install_text_codecs(JSContext *ctx, JSValue global_obj)
+{
+    JSRuntime *rt = JS_GetRuntime(ctx);
+    JSThreadState *ts = js_get_thread_state(rt);
+    JSValue proto, ctor;
+
+    JS_NewClassID(rt, &ts->text_encoder_class_id);
+    JS_NewClass(rt, ts->text_encoder_class_id, &js_text_encoder_class);
+    proto = JS_NewObject(ctx);
+    JS_SetPropertyFunctionList(ctx, proto, js_text_encoder_proto_funcs,
+                               countof(js_text_encoder_proto_funcs));
+    JS_SetClassProto(ctx, ts->text_encoder_class_id, proto);
+    ctor = JS_NewCFunction2(ctx, js_text_encoder_constructor, "TextEncoder", 0,
+                            JS_CFUNC_constructor, 0);
+    JS_SetConstructor(ctx, ctor, proto);
+    JS_SetPropertyStr(ctx, global_obj, "TextEncoder", ctor);
+
+    JS_NewClassID(rt, &ts->text_decoder_class_id);
+    JS_NewClass(rt, ts->text_decoder_class_id, &js_text_decoder_class);
+    proto = JS_NewObject(ctx);
+    JS_SetPropertyFunctionList(ctx, proto, js_text_decoder_proto_funcs,
+                               countof(js_text_decoder_proto_funcs));
+    JS_SetClassProto(ctx, ts->text_decoder_class_id, proto);
+    ctor = JS_NewCFunction2(ctx, js_text_decoder_constructor, "TextDecoder", 2,
+                            JS_CFUNC_constructor, 0);
+    JS_SetConstructor(ctx, ctor, proto);
+    JS_SetPropertyStr(ctx, global_obj, "TextDecoder", ctor);
+}
+
 void js_std_add_helpers(JSContext *ctx, int argc, char **argv)
 {
     JSValue global_obj, console, args;
@@ -4624,6 +5067,8 @@ void js_std_add_helpers(JSContext *ctx, int argc, char **argv)
     JS_SetPropertyStr(ctx, global_obj, "print",
                       JS_NewCFunction(ctx, js_print, "print", 1));
 
+    js_std_install_text_codecs(ctx, global_obj);
+
     JS_FreeValue(ctx, global_obj);
 }
 

From d1e7882b8074ba99360bb6afff7b945b97b0c2d7 Mon Sep 17 00:00:00 2001
From: Hugo Heuzard <hugo.heuzard@gmail.com>
Date: Thu, 7 May 2026 16:55:16 +0200
Subject: [PATCH 2/5] quickjs-libc: stringify TextEncoder.encodeInto source
 before validating destination

Per the WHATWG Encoding spec, encodeInto's first argument is converted
to a USVString before the second is checked for being a Uint8Array, so
the source's toString side effects must be observable even when the
destination is invalid.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quickjs-libc.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/quickjs-libc.c b/quickjs-libc.c
index ded1d31b5..9c1fb819c 100644
--- a/quickjs-libc.c
+++ b/quickjs-libc.c
@@ -4704,15 +4704,19 @@ static JSValue js_text_encoder_encode_into(JSContext *ctx, JSValueConst this_val
         return JS_ThrowTypeError(ctx, "'this' is not a TextEncoder");
     if (argc < 2)
         return JS_ThrowTypeError(ctx, "TextEncoder.encodeInto requires two arguments");
-    if (JS_GetTypedArrayType(argv[1]) != JS_TYPED_ARRAY_UINT8)
+    src = JS_ToCStringLen(ctx, &src_len, argv[0]);
+    if (!src)
+        return JS_EXCEPTION;
+    if (JS_GetTypedArrayType(argv[1]) != JS_TYPED_ARRAY_UINT8) {
+        JS_FreeCString(ctx, src);
         return JS_ThrowTypeError(ctx,
             "TextEncoder.encodeInto: destination must be a Uint8Array");
+    }
     dst = JS_GetUint8Array(ctx, &dst_len, argv[1]);
-    if (!dst)
-        return JS_EXCEPTION;
-    src = JS_ToCStringLen(ctx, &src_len, argv[0]);
-    if (!src)
+    if (!dst) {
+        JS_FreeCString(ctx, src);
         return JS_EXCEPTION;
+    }
 
     p = (const uint8_t *)src;
     end = p + src_len;

From b4b6c873bb4121eb049a5db1982decdb31c96887 Mon Sep 17 00:00:00 2001
From: Hugo Heuzard <hugo.heuzard@gmail.com>
Date: Thu, 7 May 2026 16:59:30 +0200
Subject: [PATCH 3/5] quickjs-libc: distinguish partial vs invalid trailing
 UTF-8 in TextDecoder

The "incomplete trailing sequence" branch only checked the lead byte's
declared length against remaining bytes, so a lead followed by an
out-of-range continuation (e.g. E0 41, E0 80, F0 80, F4 90) silently
dropped the offending byte and emitted a single U+FFFD. Per WHATWG, an
out-of-range continuation must produce U+FFFD and be re-read as a fresh
lead, yielding e.g. "U+FFFD U+0041" for E0 41 and two U+FFFD for E0 80.
Stream mode had the same issue: it would buffer bytes already known to
violate the continuation bounds.

Add a small helper that returns the first-continuation-byte bounds for
each lead (matching utf8_decode's acceptance set) and use it to walk the
available bytes; emit eagerly on the first out-of-range byte, and only
defer or flush when every available byte is a valid continuation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quickjs-libc.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/quickjs-libc.c b/quickjs-libc.c
index 9c1fb819c..94f2aabdc 100644
--- a/quickjs-libc.c
+++ b/quickjs-libc.c
@@ -4643,6 +4643,17 @@ static int js_utf8_seq_len(uint8_t b)
     return 0;
 }
 
+/* Bounds for the first continuation byte after `lead`, matching the
+   acceptance set of utf8_decode() in cutils.h. Subsequent continuation
+   bytes are always [0x80, 0xBF]. */
+static void js_utf8_first_cont_bounds(uint8_t lead, uint8_t *lo, uint8_t *hi)
+{
+    if (lead == 0xE0)      { *lo = 0xA0; *hi = 0xBF; }
+    else if (lead == 0xF0) { *lo = 0x90; *hi = 0xBF; }
+    else if (lead == 0xF4) { *lo = 0x80; *hi = 0x8F; }
+    else                   { *lo = 0x80; *hi = 0xBF; }
+}
+
 /* TextEncoder ------------------------------------------------------------ */
 
 static JSValue js_text_encoder_constructor(JSContext *ctx,
@@ -4936,11 +4947,34 @@ static JSValue js_text_decoder_decode(JSContext *ctx, JSValueConst this_val,
             continue;
         }
         if (p + seq_len > p_end) {
-            /* Incomplete trailing sequence. */
+            /* Sequence is incomplete by length. Check the bytes we do have
+               against the per-lead continuation bounds: a byte that's out
+               of range is a known error and must be re-read as a fresh
+               lead, not buffered. */
+            int avail = (int)(p_end - p);
+            int k = 1;
+            if (avail >= 2) {
+                uint8_t lo, hi;
+                js_utf8_first_cont_bounds(*p, &lo, &hi);
+                if (p[1] >= lo && p[1] <= hi) {
+                    for (k = 2; k < avail; k++) {
+                        if (p[k] < 0x80 || p[k] > 0xBF) break;
+                    }
+                }
+            }
+            if (k < avail) {
+                /* p[k] violates the continuation rules: emit one error,
+                   advance past the lead and any valid continuations, and
+                   leave p[k] for the next iteration. */
+                if (td->fatal) goto invalid;
+                out[out_len++] = 0xEF; out[out_len++] = 0xBF; out[out_len++] = 0xBD;
+                p += k;
+                continue;
+            }
+            /* Truly partial: defer in stream mode, otherwise flush as one error. */
             if (stream) {
-                int rem = (int)(p_end - p);
-                memcpy(td->pending, p, rem);
-                td->pending_len = rem;
+                memcpy(td->pending, p, avail);
+                td->pending_len = avail;
                 p = p_end;
                 break;
             }

From cd3f535c3ff591e702ad21252759ff30ef1c29f9 Mon Sep 17 00:00:00 2001
From: Hugo Heuzard <hugo.heuzard@gmail.com>
Date: Thu, 7 May 2026 17:03:33 +0200
Subject: [PATCH 4/5] quickjs-libc: replace lone surrogates with U+FFFD in
 TextEncoder

WHATWG Encoding's encode/encodeInto operate on USVStrings: lone
surrogates in the input are replaced with U+FFFD before UTF-8 encoding.
JS_ToCStringLen, however, keeps lone surrogates and emits them as their
3-byte CESU-8-like encoding (ED A0..BF XX), which is invalid UTF-8 and
not what the spec mandates.

In encode(), scan the JS_ToCStringLen output for ED A0..BF XX (a triple
that valid UTF-8 never produces) and rewrite each occurrence to
EF BF BD; the replacement is the same length so the output size is
unchanged. The common ASCII/BMP path stays a single allocation+copy.

In encodeInto(), the loop already calls utf8_decode per code point;
clamp surrogate code points (D800..DFFF) to U+FFFD before re-encoding.
The read counter naturally still credits 1 UTF-16 code unit for a lone
surrogate and 2 for a matched pair.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quickjs-libc.c | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/quickjs-libc.c b/quickjs-libc.c
index 94f2aabdc..07a71ba04 100644
--- a/quickjs-libc.c
+++ b/quickjs-libc.c
@@ -4692,7 +4692,42 @@ static JSValue js_text_encoder_encode(JSContext *ctx, JSValueConst this_val,
     str = JS_ToCStringLen(ctx, &len, argv[0]);
     if (!str)
         return JS_EXCEPTION;
-    ret = JS_NewUint8ArrayCopy(ctx, (const uint8_t *)str, len);
+    /* JS_ToCStringLen keeps lone surrogates as their 3-byte CESU-8-like
+       encoding (ED A0..BF XX). USVString conversion in the WHATWG Encoding
+       spec replaces them with U+FFFD before UTF-8 encoding. Valid UTF-8
+       never produces ED A0..BF, so any such triple comes from a lone
+       surrogate. The replacement is 3 bytes, so output length is unchanged. */
+    {
+        const uint8_t *s = (const uint8_t *)str;
+        size_t i;
+        for (i = 0; i + 2 < len; i++) {
+            if (s[i] == 0xED && s[i+1] >= 0xA0 && s[i+1] <= 0xBF)
+                break;
+        }
+        if (i + 2 >= len) {
+            ret = JS_NewUint8ArrayCopy(ctx, s, len);
+        } else {
+            uint8_t *buf = js_malloc(ctx, len);
+            size_t j;
+            if (!buf) {
+                JS_FreeCString(ctx, str);
+                return JS_EXCEPTION;
+            }
+            memcpy(buf, s, i);
+            for (j = i; i < len; ) {
+                if (i + 2 < len && s[i] == 0xED
+                    && s[i+1] >= 0xA0 && s[i+1] <= 0xBF
+                    && s[i+2] >= 0x80 && s[i+2] <= 0xBF) {
+                    buf[j++] = 0xEF; buf[j++] = 0xBF; buf[j++] = 0xBD;
+                    i += 3;
+                } else {
+                    buf[j++] = s[i++];
+                }
+            }
+            ret = JS_NewUint8ArrayCopy(ctx, buf, j);
+            js_free(ctx, buf);
+        }
+    }
     JS_FreeCString(ctx, str);
     return ret;
 }
@@ -4733,6 +4768,11 @@ static JSValue js_text_encoder_encode_into(JSContext *ctx, JSValueConst this_val
     end = p + src_len;
     while (p < end) {
         cp = utf8_decode(p, &next);
+        /* JS_ToCStringLen keeps lone surrogates as ED A0..BF XX, which
+           utf8_decode happily decodes back to a surrogate code point. The
+           USVString conversion in the spec replaces them with U+FFFD. */
+        if (cp >= 0xD800 && cp <= 0xDFFF)
+            cp = 0xFFFD;
         enc_len = utf8_encode_len(cp);
         if ((size_t)written + enc_len > dst_len)
             break;

From 58df657d8dd3381be957f0c0552dcbde8529be20 Mon Sep 17 00:00:00 2001
From: Hugo Heuzard <hugo.heuzard@gmail.com>
Date: Thu, 7 May 2026 17:21:19 +0200
Subject: [PATCH 5/5] quickjs-libc: add tests for TextEncoder/TextDecoder

Cover the WHATWG Encoding behaviors the existing implementation aims at:
encoder ToString coercion, lone-surrogate replacement, encodeInto's
read/written semantics and partial-write rule, decoder label parsing,
BOM handling (default/ignoreBOM/middle/split), per-lead continuation
bounds and the "incomplete vs invalid trailing" distinction, fatal
mode, and stream split/flush behavior.

The classes are installed by js_std_add_helpers, which run-test262 does
not call, so `make test` (run-test262 in local mode) didn't see them.
Expose the install as a public js_std_add_text_codecs(ctx) and call it
from JS_NewCustomContext's local-mode setup so the new test runs under
the same harness as the rest of tests/.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 quickjs-libc.c           |  10 +-
 quickjs-libc.h           |   1 +
 run-test262.c            |   1 +
 tests/test_text_codec.js | 304 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 313 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_text_codec.js

diff --git a/quickjs-libc.c b/quickjs-libc.c
index 07a71ba04..2bb0b1734 100644
--- a/quickjs-libc.c
+++ b/quickjs-libc.c
@@ -5091,11 +5091,13 @@ static const JSCFunctionListEntry js_text_decoder_proto_funcs[] = {
     JS_CGETSET_DEF("ignoreBOM", js_text_decoder_get_ignore_bom, NULL),
 };
 
-static void js_std_install_text_codecs(JSContext *ctx, JSValue global_obj)
+void js_std_add_text_codecs(JSContext *ctx)
 {
     JSRuntime *rt = JS_GetRuntime(ctx);
     JSThreadState *ts = js_get_thread_state(rt);
-    JSValue proto, ctor;
+    JSValue global_obj, proto, ctor;
+
+    global_obj = JS_GetGlobalObject(ctx);
 
     JS_NewClassID(rt, &ts->text_encoder_class_id);
     JS_NewClass(rt, ts->text_encoder_class_id, &js_text_encoder_class);
@@ -5118,6 +5120,8 @@ static void js_std_install_text_codecs(JSContext *ctx, JSValue global_obj)
                             JS_CFUNC_constructor, 0);
     JS_SetConstructor(ctx, ctor, proto);
     JS_SetPropertyStr(ctx, global_obj, "TextDecoder", ctor);
+
+    JS_FreeValue(ctx, global_obj);
 }
 
 void js_std_add_helpers(JSContext *ctx, int argc, char **argv)
@@ -5145,7 +5149,7 @@ void js_std_add_helpers(JSContext *ctx, int argc, char **argv)
     JS_SetPropertyStr(ctx, global_obj, "print",
                       JS_NewCFunction(ctx, js_print, "print", 1));
 
-    js_std_install_text_codecs(ctx, global_obj);
+    js_std_add_text_codecs(ctx);
 
     JS_FreeValue(ctx, global_obj);
 }
diff --git a/quickjs-libc.h b/quickjs-libc.h
index fd91a2f68..847c6d58f 100644
--- a/quickjs-libc.h
+++ b/quickjs-libc.h
@@ -45,6 +45,7 @@ JS_LIBC_EXTERN JSModuleDef *js_init_module_os(JSContext *ctx,
 JS_LIBC_EXTERN JSModuleDef *js_init_module_bjson(JSContext *ctx,
                                                  const char *module_name);
 JS_LIBC_EXTERN void js_std_add_helpers(JSContext *ctx, int argc, char **argv);
+JS_LIBC_EXTERN void js_std_add_text_codecs(JSContext *ctx);
 JS_LIBC_EXTERN int js_std_loop(JSContext *ctx);
 JS_LIBC_EXTERN int js_std_loop_once(JSContext *ctx);
 JS_LIBC_EXTERN int js_std_poll_io(JSContext *ctx, int timeout_ms);
diff --git a/run-test262.c b/run-test262.c
index 4c996fb06..ba2f3ff1c 100644
--- a/run-test262.c
+++ b/run-test262.c
@@ -1729,6 +1729,7 @@ JSContext *JS_NewCustomContext(JSRuntime *rt)
         js_init_module_std(ctx, "qjs:std");
         js_init_module_os(ctx, "qjs:os");
         js_init_module_bjson(ctx, "qjs:bjson");
+        js_std_add_text_codecs(ctx);
         obj = JS_GetGlobalObject(ctx);
         JS_SetPropertyFunctionList(ctx, obj, &qjs_object, 1);
         JS_FreeValue(ctx, obj);
diff --git a/tests/test_text_codec.js b/tests/test_text_codec.js
new file mode 100644
index 000000000..ab5710d8e
--- /dev/null
+++ b/tests/test_text_codec.js
@@ -0,0 +1,304 @@
+import { assert, assertThrows, assertArrayEquals } from "./assert.js";
+
+function bytes(arr) { return new Uint8Array(arr); }
+function arr(u8) { return Array.from(u8); }
+
+function test_encoder_basic() {
+    const e = new TextEncoder();
+    assert(e.encoding, "utf-8");
+    assert(Object.prototype.toString.call(e), "[object TextEncoder]");
+
+    assertArrayEquals(arr(e.encode()), []);
+    assertArrayEquals(arr(e.encode(undefined)), []);
+    assertArrayEquals(arr(e.encode("")), []);
+    assertArrayEquals(arr(e.encode("hi")), [0x68, 0x69]);
+    // U+2603 SNOWMAN — 3-byte sequence.
+    assertArrayEquals(arr(e.encode("☃")), [0xE2, 0x98, 0x83]);
+    // U+10000 via surrogate pair — 4-byte sequence.
+    assertArrayEquals(arr(e.encode("𐀀")), [0xF0, 0x90, 0x80, 0x80]);
+    // ToString coercion.
+    assertArrayEquals(arr(e.encode(null)), [0x6E, 0x75, 0x6C, 0x6C]); // "null"
+    assertArrayEquals(arr(e.encode(42)), [0x34, 0x32]);                // "42"
+}
+
+function test_encoder_lone_surrogates() {
+    // USVString conversion: lone surrogates become U+FFFD before encoding.
+    const e = new TextEncoder();
+    assertArrayEquals(arr(e.encode("\uD800")), [0xEF, 0xBF, 0xBD]);
+    assertArrayEquals(arr(e.encode("\uDFFF")), [0xEF, 0xBF, 0xBD]);
+    assertArrayEquals(arr(e.encode("\uDC00")), [0xEF, 0xBF, 0xBD]);
+    assertArrayEquals(arr(e.encode("a\uD800b")),
+                      [0x61, 0xEF, 0xBF, 0xBD, 0x62]);
+    // Two adjacent lone high surrogates: each replaced independently.
+    assertArrayEquals(arr(e.encode("\uD800\uD800")),
+                      [0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD]);
+    // Reverse-order surrogates (low then high): both lone.
+    assertArrayEquals(arr(e.encode("\uDC00\uD800")),
+                      [0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD]);
+    // Lone high followed by ASCII before a matched pair: only the lone one
+    // is replaced.
+    assertArrayEquals(arr(e.encode("\uD800a😀")),
+                      [0xEF, 0xBF, 0xBD, 0x61, 0xF0, 0x9F, 0x98, 0x80]);
+}
+
+function test_encode_into_basic() {
+    const e = new TextEncoder();
+
+    let dst = new Uint8Array(8);
+    let r = e.encodeInto("hi", dst);
+    assert(r.read, 2);
+    assert(r.written, 2);
+    assertArrayEquals(arr(dst.subarray(0, 2)), [0x68, 0x69]);
+
+    // Surrogate pair: read counts UTF-16 code units (2), written is 4 bytes.
+    dst = new Uint8Array(8);
+    r = e.encodeInto("😀", dst);
+    assert(r.read, 2);
+    assert(r.written, 4);
+    assertArrayEquals(arr(dst.subarray(0, 4)), [0xF0, 0x9F, 0x98, 0x80]);
+
+    // Lone surrogate replaced with U+FFFD; read still counts 1 UTF-16 unit.
+    dst = new Uint8Array(8);
+    r = e.encodeInto("a\uD800", dst);
+    assert(r.read, 2);
+    assert(r.written, 4);
+    assertArrayEquals(arr(dst.subarray(0, 4)), [0x61, 0xEF, 0xBF, 0xBD]);
+
+    // Empty source.
+    dst = new Uint8Array(4); dst.fill(0xAA);
+    r = e.encodeInto("", dst);
+    assert(r.read, 0); assert(r.written, 0);
+    assertArrayEquals(arr(dst), [0xAA, 0xAA, 0xAA, 0xAA]);
+
+    // Empty destination.
+    r = e.encodeInto("abc", new Uint8Array(0));
+    assert(r.read, 0); assert(r.written, 0);
+}
+
+function test_encode_into_partial() {
+    const e = new TextEncoder();
+
+    // Destination too small for the next char's full encoding — must NOT
+    // write a partial sequence.
+    let dst = new Uint8Array(2); dst.fill(0xAA);
+    let r = e.encodeInto("☃hi", dst);   // snowman is 3 bytes
+    assert(r.read, 0); assert(r.written, 0);
+    assertArrayEquals(arr(dst), [0xAA, 0xAA]);
+
+    // Same for U+FFFD replacement of a lone surrogate (3 bytes).
+    dst = new Uint8Array(2); dst.fill(0xAA);
+    r = e.encodeInto("\uD800X", dst);
+    assert(r.read, 0); assert(r.written, 0);
+    assertArrayEquals(arr(dst), [0xAA, 0xAA]);
+
+    // Some chars fit, then we stop short of an over-large one.
+    dst = new Uint8Array(4); dst.fill(0xAA);
+    r = e.encodeInto("ab☃c", dst);
+    assert(r.read, 2); assert(r.written, 2);
+    assertArrayEquals(arr(dst), [0x61, 0x62, 0xAA, 0xAA]);
+}
+
+function test_encode_into_argument_errors() {
+    const e = new TextEncoder();
+
+    assertThrows(TypeError, () => e.encodeInto());
+    assertThrows(TypeError, () => e.encodeInto("x"));
+    assertThrows(TypeError, () => e.encodeInto("x", "not a buffer"));
+    assertThrows(TypeError, () => e.encodeInto("x", new Int8Array(4)));
+    assertThrows(TypeError, () => e.encodeInto("x", new Uint16Array(4)));
+    assertThrows(TypeError, () => e.encodeInto("x", new Uint8ClampedArray(4)));
+    assertThrows(TypeError, () => e.encodeInto("x", new ArrayBuffer(4)));
+
+    // Source is stringified before destination is validated (spec order).
+    let calls = [];
+    const src = { toString() { calls.push("src"); return "x"; } };
+    assertThrows(TypeError, () => e.encodeInto(src, "not a buffer"));
+    assertArrayEquals(calls, ["src"]);
+}
+
+function test_encoder_brand() {
+    assertThrows(TypeError, () => TextEncoder.prototype.encode.call({}, "x"));
+    assertThrows(TypeError, () =>
+        TextEncoder.prototype.encodeInto.call({}, "x", new Uint8Array(4)));
+    // Calling the constructor without `new`.
+    assertThrows(TypeError, () => TextEncoder());
+}
+
+function test_decoder_basic() {
+    const d = new TextDecoder();
+    assert(d.encoding, "utf-8");
+    assert(d.fatal, false);
+    assert(d.ignoreBOM, false);
+    assert(Object.prototype.toString.call(d), "[object TextDecoder]");
+
+    assert(d.decode(), "");
+    assert(d.decode(undefined), "");
+    assert(d.decode(bytes([])), "");
+    assert(d.decode(bytes([0x68, 0x69])), "hi");
+    assert(d.decode(bytes([0xE2, 0x98, 0x83])), "☃");
+    assert(d.decode(bytes([0xF0, 0x9F, 0x98, 0x80])), "😀"); // U+1F600
+}
+
+function test_decoder_input_types() {
+    const d = new TextDecoder();
+    const data = [0x61, 0x62, 0x63];
+
+    assert(d.decode(new Uint8Array(data)), "abc");
+    assert(d.decode(new Uint8Array(data).buffer), "abc");
+    assert(d.decode(new Int8Array(new Uint8Array(data).buffer)), "abc");
+
+    // Subarray view at an offset must use that view's bytes only.
+    const big = new Uint8Array([0xFF, 0x61, 0x62, 0x63, 0xFF]);
+    assert(d.decode(big.subarray(1, 4)), "abc");
+
+    assertThrows(TypeError, () => d.decode("not a buffer"));
+    assertThrows(TypeError, () => d.decode({}));
+    assertThrows(TypeError, () => d.decode(null));
+    assertThrows(TypeError, () => d.decode(123));
+}
+
+function test_decoder_label() {
+    for (const label of [
+        "utf-8", "UTF-8", "utf8", "UTF8", "Utf-8",
+        "  utf-8\t", "\nutf-8\r\f", "\fUTF-8 ",
+        "unicode-1-1-utf-8", "unicode11utf8",
+        "unicode20utf8", "x-unicode20utf8",
+    ]) {
+        assert(new TextDecoder(label).encoding, "utf-8");
+    }
+    for (const label of ["latin1", "iso-8859-1", "utf-16", "windows-1252",
+                         "utf-7", "ascii", ""]) {
+        assertThrows(RangeError, () => new TextDecoder(label));
+    }
+}
+
+function test_decoder_options() {
+    let d = new TextDecoder("utf-8", { fatal: true });
+    assert(d.fatal, true); assert(d.ignoreBOM, false);
+
+    d = new TextDecoder("utf-8", { ignoreBOM: true });
+    assert(d.fatal, false); assert(d.ignoreBOM, true);
+
+    d = new TextDecoder("utf-8", { fatal: true, ignoreBOM: true });
+    assert(d.fatal, true); assert(d.ignoreBOM, true);
+
+    // Truthy/falsy coercion.
+    d = new TextDecoder("utf-8", { fatal: 1, ignoreBOM: 0 });
+    assert(d.fatal, true); assert(d.ignoreBOM, false);
+
+    // Missing or non-object options: defaults.
+    d = new TextDecoder("utf-8");
+    assert(d.fatal, false); assert(d.ignoreBOM, false);
+}
+
+function test_decoder_bom() {
+    const bom = [0xEF, 0xBB, 0xBF];
+
+    // Default: BOM at start is stripped.
+    let d = new TextDecoder();
+    assert(d.decode(bytes([...bom, 0x68, 0x69])), "hi");
+    // BOM in the middle is kept as U+FEFF.
+    assert(d.decode(bytes([0x68, ...bom, 0x69])), "h﻿i");
+    // ignoreBOM=true: BOM is kept.
+    d = new TextDecoder("utf-8", { ignoreBOM: true });
+    assert(d.decode(bytes([...bom, 0x68])), "﻿h");
+    // Decoder state is reset on non-stream call: a fresh BOM is honored.
+    d = new TextDecoder();
+    assert(d.decode(bytes([...bom, 0x61])), "a");
+    assert(d.decode(bytes([...bom, 0x62])), "b");
+    // BOM split across stream calls is still recognized.
+    d = new TextDecoder();
+    assert(d.decode(bytes([0xEF, 0xBB]), { stream: true }), "");
+    assert(d.decode(bytes([0xBF, 0x68])), "h");
+}
+
+function test_decoder_invalid_sequences() {
+    const d = new TextDecoder();
+
+    // Stray continuation byte.
+    assert(d.decode(bytes([0x80])), "�");
+
+    // Lead byte followed by an out-of-range continuation: emit U+FFFD AND
+    // re-process the offending byte.
+    assert(d.decode(bytes([0xE0, 0x41])), "�A");
+    assert(d.decode(bytes([0xE0, 0x80])), "��");
+    assert(d.decode(bytes([0xF0, 0x80])), "��");
+    assert(d.decode(bytes([0xF4, 0x90])), "��");
+    assert(d.decode(bytes([0xF0, 0x90, 0x7F])), "�");
+
+    // Truly partial sequences (valid prefix, no following byte): single U+FFFD.
+    assert(d.decode(bytes([0xE0])), "�");
+    assert(d.decode(bytes([0xE0, 0xA0])), "�");
+    assert(d.decode(bytes([0xF0, 0x90])), "�");
+    assert(d.decode(bytes([0xF0, 0x90, 0x80])), "�");
+
+    // Bytes that can never start a UTF-8 sequence.
+    assert(d.decode(bytes([0xC0])), "�");
+    assert(d.decode(bytes([0xC1])), "�");
+    assert(d.decode(bytes([0xF5])), "�");
+    assert(d.decode(bytes([0xFF])), "�");
+}
+
+function test_decoder_fatal() {
+    const d = new TextDecoder("utf-8", { fatal: true });
+    assert(d.decode(bytes([0x68, 0x69])), "hi");
+    assertThrows(TypeError, () => d.decode(bytes([0x80])));
+    assertThrows(TypeError, () => d.decode(bytes([0xE0, 0x41])));
+    assertThrows(TypeError, () => d.decode(bytes([0xE0])));
+    assertThrows(TypeError, () => d.decode(bytes([0xC0])));
+
+    // Stream mode with valid partial: pending, no error.
+    const d2 = new TextDecoder("utf-8", { fatal: true });
+    assert(d2.decode(bytes([0xE2, 0x98]), { stream: true }), "");
+    assert(d2.decode(bytes([0x83])), "☃");
+
+    // Stream + flush with partial pending → error on flush.
+    const d3 = new TextDecoder("utf-8", { fatal: true });
+    assert(d3.decode(bytes([0xE2, 0x98]), { stream: true }), "");
+    assertThrows(TypeError, () => d3.decode());
+}
+
+function test_decoder_stream() {
+    // Split a 4-byte sequence at every boundary and reassemble.
+    const seq = [0xF0, 0x9F, 0x98, 0x80]; // U+1F600
+    for (let split = 1; split < 4; split++) {
+        const d = new TextDecoder();
+        let out = d.decode(bytes(seq.slice(0, split)), { stream: true });
+        out += d.decode(bytes(seq.slice(split)));
+        assert(out, "😀");
+    }
+
+    // E0 alone deferred; second call's first byte (0x41) is an invalid
+    // continuation, so we emit U+FFFD eagerly and re-read 0x41 as ASCII.
+    const d = new TextDecoder();
+    assert(d.decode(bytes([0xE0]), { stream: true }), "");
+    assert(d.decode(bytes([0x41])), "�A");
+}
+
+function test_decoder_brand() {
+    assertThrows(TypeError, () => TextDecoder.prototype.decode.call({}));
+    const enc_get =
+        Object.getOwnPropertyDescriptor(TextDecoder.prototype, "encoding").get;
+    assertThrows(TypeError, () => enc_get.call({}));
+    const fatal_get =
+        Object.getOwnPropertyDescriptor(TextDecoder.prototype, "fatal").get;
+    assertThrows(TypeError, () => fatal_get.call({}));
+    // Constructor without `new`.
+    assertThrows(TypeError, () => TextDecoder());
+}
+
+test_encoder_basic();
+test_encoder_lone_surrogates();
+test_encode_into_basic();
+test_encode_into_partial();
+test_encode_into_argument_errors();
+test_encoder_brand();
+test_decoder_basic();
+test_decoder_input_types();
+test_decoder_label();
+test_decoder_options();
+test_decoder_bom();
+test_decoder_invalid_sequences();
+test_decoder_fatal();
+test_decoder_stream();
+test_decoder_brand();