diff --git a/Cargo.lock b/Cargo.lock index 4b27306fcf7..0e4ba418d2e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -394,6 +394,8 @@ dependencies = [ "cow-utils", "criterion", "dashmap", + "data-encoding", + "data-encoding-macro", "dynify", "either", "fast-float2", @@ -1137,6 +1139,32 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "data-encoding" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" + +[[package]] +name = "data-encoding-macro" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3259c913752a86488b501ed8680446a5ed2d5aeac6e596cb23ba3800768ea32c" +dependencies = [ + "data-encoding", + "data-encoding-macro-internal", +] + +[[package]] +name = "data-encoding-macro-internal" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccc2776f0c61eca1ca32528f85548abd1a4be8fb53d1b21c013e4f18da1e7090" +dependencies = [ + "data-encoding", + "syn", +] + [[package]] name = "databake" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 4645a317722..8984f32c7b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -147,6 +147,8 @@ strum = { version = "0.28", features = ["derive"] } unsend = { version = "0.2.1", default-features = false } husky-rs = "0.3.2" async-channel = "2.5.0" +data-encoding = "2.11.0" +data-encoding-macro = "0.1.20" # ICU4X core diff --git a/core/engine/Cargo.toml b/core/engine/Cargo.toml index d31abc302c8..e366edd09b5 100644 --- a/core/engine/Cargo.toml +++ b/core/engine/Cargo.toml @@ -142,6 +142,8 @@ dynify = { workspace = true, features = ["macros"] } futures-concurrency.workspace = true oneshot = { workspace = true, features = ["async"] } async-channel.workspace = true +data-encoding.workspace = true +data-encoding-macro.workspace = true # intl deps boa_icu_provider = { workspace = true, features = ["std"], optional = true } diff --git a/core/engine/src/builtins/typed_array/base64.rs b/core/engine/src/builtins/typed_array/base64.rs new file mode 100644 index 00000000000..dea010464f2 --- /dev/null +++ b/core/engine/src/builtins/typed_array/base64.rs @@ -0,0 +1,256 @@ +//! Base64 helpers for `Uint8Array` proposal methods. +//! +//! This is adapted from the `ecma262` helper in the `data-encoding` repository so that +//! `Uint8Array.{fromBase64,setFromBase64}` follow the proposal's partial-decoding rules. + +use data_encoding::{Character, DecodeError, DecodeKind, DecodePartial, Encoding}; +use data_encoding_macro::new_encoding; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) enum Alphabet { + Base64, + Base64Url, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) enum LastChunkHandling { + Loose, + Strict, + StopBeforePartial, +} +use LastChunkHandling::{Loose, StopBeforePartial, Strict}; + +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct DecodeMutResult { + pub(crate) read: usize, + pub(crate) written: usize, + pub(crate) error: Option, +} + +/// Decodes `input` in `output` according to the given parameters. +/// +/// # Panics +/// +/// Panics if `output.len() < 6 * input.len() / 8`. It is not an error if `max_length` is smaller +/// than `output.len()`. This function will however not optimize those cases. +pub(crate) fn decode_mut( + input: &[u8], + output: &mut [u8], + alphabet: Alphabet, + last_chunk_handling: LastChunkHandling, + max_length: Option, +) -> DecodeMutResult { + // Select the appropriate encoding. + let base = match alphabet { + Alphabet::Base64 => &BASE64, + Alphabet::Base64Url => &BASE64URL, + }; + let max_length = max_length.unwrap_or(usize::MAX); + + // Decode as much as possible. + let (mut read, mut written) = match base.decode_mut(input, &mut output[..6 * input.len() / 8]) { + Ok(olen) => (input.len(), olen), + Err(DecodePartial { read, written, .. }) => (read, written), + }; + + // Backtrack to the last complete chunk that fits below the maximum output length. + let extra_output = written - core::cmp::min(written, max_length) / 3 * 3; + let mut extra_input = (8 * extra_output).div_ceil(6); + written -= extra_output; + loop { + // Backtrack white-spaces. + while 0 < read && base.interpret_byte(input[read - 1]).is_ignored() { + read -= 1; + } + if extra_input == 0 { + break; + } + // Backtrack one symbol. + read -= 1; + extra_input -= 1; + debug_assert!(base.interpret_byte(input[read]).is_symbol().is_some()); + } + + // Parse the next chunk manually. + let mut index = [0; 4]; // maps to index in input + let mut index_len = 0; + let mut index_pad = 4; + let mut ipos = read; + let remaining = max_length - written; + if remaining == 0 { + return DecodeMutResult { + read, + written, + error: None, + }; + } + while ipos < input.len() { + let byte = input[ipos]; + let position = ipos; + ipos += 1; + let kind = match base.interpret_byte(byte) { + Character::Padding => unreachable!(), + Character::Ignored => continue, + Character::Symbol { .. } if index_pad < 4 => Some(DecodeKind::Padding), + Character::Symbol { .. } => None, + Character::Invalid if byte != b'=' => Some(DecodeKind::Symbol), + Character::Invalid if index_len < 2 => Some(DecodeKind::Padding), + Character::Invalid => { + index_pad = core::cmp::min(index_pad, index_len); + None + } + }; + if let Some(kind) = kind { + return DecodeMutResult { + read, + written, + error: Some(DecodeError { position, kind }), + }; + } + if index_len == 4 { + debug_assert!(index_pad < 4); + let error = Some(DecodeError { + position, + kind: DecodeKind::Padding, + }); + return DecodeMutResult { + read, + written, + error, + }; + } + index[index_len] = position; + index_len += 1; + if matches!( + (core::cmp::min(index_len, index_pad), remaining), + (3, 1) | (4, 2) + ) { + return DecodeMutResult { + read, + written, + error: None, + }; + } + } + debug_assert!(index_len <= 4 && index_pad <= 4); + debug_assert!(index_len < 4 || index_pad < 4); + + // Process the last chunk. + if index_len == 0 { + return DecodeMutResult { + read: input.len(), + written, + error: None, + }; + } + let check = match (last_chunk_handling, index_len, index_pad) { + (Loose, 1, _) | (Loose, 0..4, 0..4) | (Strict, 0..4, _) => { + let error = Some(DecodeError { + position: ipos, + kind: DecodeKind::Length, + }); + return DecodeMutResult { + read, + written, + error, + }; + } + (Strict, _, _) => true, + (StopBeforePartial, 0..4, _) => { + return DecodeMutResult { + read, + written, + error: None, + }; + } + (Loose | StopBeforePartial, _, _) => false, + }; + let iend = core::cmp::min(index_len, index_pad); + let oend = iend - 1; + let mut ichunk = [b'A'; 4]; + for i in 0..iend { + ichunk[i] = input[index[i]]; + } + let mut ochunk = [0; 3]; + let rchunk = base.decode_mut(&ichunk, &mut ochunk); + debug_assert_eq!(rchunk, Ok(3)); + if check && iend < 4 && ochunk[oend] != 0 { + let error = Some(DecodeError { + position: index[iend], + kind: DecodeKind::Trailing, + }); + return DecodeMutResult { + read, + written, + error, + }; + } + output[written..][..oend].copy_from_slice(&ochunk[..oend]); + read = input.len(); + written += oend; + DecodeMutResult { + read, + written, + error: None, + } +} + +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct DecodeResult { + pub(crate) read: usize, + pub(crate) output: Vec, + pub(crate) error: Option, +} + +/// Decodes `input` in `output` according to the given parameters. +pub(crate) fn decode( + input: &[u8], + alphabet: Alphabet, + last_chunk_handling: LastChunkHandling, + max_length: Option, +) -> DecodeResult { + let mut output = vec![0; 6 * input.len() / 8]; + let DecodeMutResult { + read, + written, + error, + } = decode_mut( + input, + &mut output, + alphabet, + last_chunk_handling, + max_length, + ); + debug_assert!(written <= output.len()); + output.truncate(written); + DecodeResult { + read, + output, + error, + } +} + +pub(crate) fn encode(input: &[u8], alphabet: Alphabet, omit_padding: bool) -> String { + let base = match (alphabet, omit_padding) { + (Alphabet::Base64, false) => &data_encoding::BASE64, + (Alphabet::Base64, true) => &data_encoding::BASE64_NOPAD, + (Alphabet::Base64Url, false) => &data_encoding::BASE64URL, + (Alphabet::Base64Url, true) => &data_encoding::BASE64URL_NOPAD, + }; + + let mut output = String::with_capacity(base.encode_len(input.len())); + base.encode_append(input, &mut output); + output +} + +const BASE64: Encoding = new_encoding! { + symbols: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", + ignore: " \t\n\x0C\r", + check_trailing_bits: false, +}; + +const BASE64URL: Encoding = new_encoding! { + symbols: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", + ignore: " \t\n\x0C\r", + check_trailing_bits: false, +}; diff --git a/core/engine/src/builtins/typed_array/builtin_uint8.rs b/core/engine/src/builtins/typed_array/builtin_uint8.rs new file mode 100644 index 00000000000..6709581640e --- /dev/null +++ b/core/engine/src/builtins/typed_array/builtin_uint8.rs @@ -0,0 +1,672 @@ +//! `Uint8Array`-specific base64 and hex encoding/decoding methods. +//! +//! Implements the [proposal-arraybuffer-base64](https://tc39.es/proposal-arraybuffer-base64/) methods: +//! - [`Uint8Array.fromBase64()`](https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.frombase64) +//! - [`Uint8Array.prototype.setFromBase64()`](https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.prototype.setfrombase64) +//! - [`Uint8Array.prototype.toBase64()`](https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.prototype.tobase64) +//! - [`Uint8Array.fromHex()`](https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.fromhex) +//! - [`Uint8Array.prototype.setFromHex()`](https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.prototype.setfromhex) +//! - [`Uint8Array.prototype.toHex()`](https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.prototype.tohex) + +use std::{cmp::min, sync::atomic::Ordering}; + +use super::{ + TypedArray, TypedArrayKind, + base64::{self, Alphabet as Base64Alphabet, LastChunkHandling as Base64LastChunkHandling}, + hex, +}; +use crate::{ + Context, JsArgs, JsNativeError, JsObject, JsResult, JsString, JsValue, + builtins::array_buffer::{ArrayBuffer, BufferObject, utils::SliceRefMut}, + js_string, +}; + +/// Boa's implementation of `Uint8Array`-specific base64 and hex proposal methods. +pub(crate) struct BuiltinUint8Array; + +impl BuiltinUint8Array { + /// `Uint8Array.fromBase64 ( string, options )` + /// + /// More information: + /// - [proposal-arraybuffer-base64][spec] + /// + /// [spec]: https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.frombase64 + pub(crate) fn from_base64( + _: &JsValue, + args: &[JsValue], + context: &mut Context, + ) -> JsResult { + // 1. If string is not a String, throw a TypeError exception. + let input = args.get_or_undefined(0); + // Check if input is a string (not a string object) - match V8's behavior + let Some(input_string) = input.as_string() else { + return Err(JsNativeError::typ() + .with_message("input must be a string") + .into()); + }; + + // 2. Let opts be ? GetOptionsObject(options). + let options = args.get_or_undefined(1); + let (alphabet, last_chunk_handling) = Self::get_base64_options(options, context)?; + + let input_bytes = Self::encoded_input_bytes(&input_string); + let decoded = base64::decode(&input_bytes, alphabet, last_chunk_handling, None); + + if decoded.error.is_some() { + return Err(JsNativeError::syntax() + .with_message("Invalid base64 string") + .into()); + } + + let output_len = decoded.output.len(); + + // Create Uint8Array from decoded data + let buffer = ArrayBuffer::allocate( + &context + .intrinsics() + .constructors() + .array_buffer() + .constructor() + .into(), + output_len as u64, + None, + context, + )?; + + // Copy data to buffer + { + let mut buffer_data = buffer.borrow_mut(); + if let Some(bytes) = buffer_data.data_mut().bytes_mut() { + bytes[..output_len].copy_from_slice(&decoded.output); + } + } + + // Create Uint8Array + let uint8_array = JsObject::from_proto_and_data_with_shared_shape( + context.root_shape(), + context + .intrinsics() + .constructors() + .typed_uint8_array() + .prototype(), + TypedArray::new( + BufferObject::Buffer(buffer), + TypedArrayKind::Uint8, + 0, + Some(output_len as u64), + Some(output_len as u64), + ), + ); + + Ok(uint8_array.upcast().into()) + } + + /// `Uint8Array.prototype.setFromBase64 ( string, options )` + /// + /// More information: + /// - [proposal-arraybuffer-base64][spec] + /// + /// [spec]: https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.prototype.setfrombase64 + pub(crate) fn set_from_base64( + this: &JsValue, + args: &[JsValue], + context: &mut Context, + ) -> JsResult { + // 1. Let into be the this value. + // 2. Perform ? ValidateUint8Array(into). + let uint8array = this + .as_object() + .and_then(|o| o.clone().downcast::().ok()) + .ok_or_else(|| JsNativeError::typ().with_message("Value is not a Uint8Array object"))?; + + let uint8array_borrow = uint8array.borrow(); + let uint8array_data = uint8array_borrow.data(); + + // Verify it's a Uint8Array + if uint8array_data.kind() != TypedArrayKind::Uint8 { + return Err(JsNativeError::typ() + .with_message("Value is not a Uint8Array object") + .into()); + } + + // Check if detached + let buffer = uint8array_data.viewed_array_buffer(); + let Some(buf_len) = buffer.as_buffer().bytes(Ordering::SeqCst).map(|s| s.len()) else { + return Err(JsNativeError::typ() + .with_message("TypedArray is detached") + .into()); + }; + + if uint8array_data.is_out_of_bounds(buf_len) { + return Err(JsNativeError::typ() + .with_message("TypedArray is out of bounds") + .into()); + } + + let array_length = uint8array_data.array_length(buf_len); + let byte_offset = uint8array_data.byte_offset() as usize; + drop(uint8array_borrow); + + // 3. If string is not a String, throw a TypeError exception. + let input = args.get_or_undefined(0); + // Check if input is a string (not a string object) - match V8's behavior + let Some(input_string) = input.as_string() else { + return Err(JsNativeError::typ() + .with_message("input must be a string") + .into()); + }; + + // 4. Let opts be ? GetOptionsObject(options). + let options = args.get_or_undefined(1); + let (alphabet, last_chunk_handling) = Self::get_base64_options(options, context)?; + + // If array length is 0, return early + if array_length == 0 { + let read = JsValue::from(0); + let written = JsValue::from(0); + return Self::create_set_from_result(read, written, context); + } + + let input_bytes = Self::encoded_input_bytes(&input_string); + let mut output = vec![0; 6 * input_bytes.len() / 8]; + let result = base64::decode_mut( + &input_bytes, + &mut output, + alphabet, + last_chunk_handling, + Some(array_length as usize), + ); + + // FromBase64 does not invoke user code, so the backing buffer cannot be detached or shrunk + // between the bounds check above and the copy below. + { + let uint8array_mut = uint8array.borrow_mut(); + let uint8array_data = uint8array_mut.data(); + let mut buffer = uint8array_data.viewed_array_buffer().as_buffer_mut(); + let Some(mut data) = buffer.bytes(Ordering::SeqCst) else { + return Err(JsNativeError::typ() + .with_message("Cannot access buffer data") + .into()); + }; + + let mut subslice = data.subslice_mut(byte_offset..byte_offset + array_length as usize); + match &mut subslice { + SliceRefMut::Slice(slice) => { + slice[..result.written].copy_from_slice(&output[..result.written]); + } + SliceRefMut::AtomicSlice(slice) => { + for (dst, src) in slice.iter().zip(&output[..result.written]) { + dst.store(*src, Ordering::SeqCst); + } + } + } + } + + if result.error.is_some() { + return Err(JsNativeError::syntax() + .with_message("Invalid base64 string") + .into()); + } + + let read = JsValue::from(result.read as u64); + let written = JsValue::from(result.written as u64); + Self::create_set_from_result(read, written, context) + } + + /// `Uint8Array.prototype.toBase64 ( options )` + /// + /// More information: + /// - [proposal-arraybuffer-base64][spec] + /// + /// [spec]: https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.prototype.tobase64 + pub(crate) fn to_base64( + this: &JsValue, + args: &[JsValue], + context: &mut Context, + ) -> JsResult { + // 1. Let O be the this value. + // 2. Perform ? ValidateUint8Array(O). + let uint8array = this + .as_object() + .and_then(|o| o.clone().downcast::().ok()) + .ok_or_else(|| JsNativeError::typ().with_message("Value is not a Uint8Array object"))?; + + let uint8array_borrow = uint8array.borrow(); + let uint8array_data = uint8array_borrow.data(); + + // Verify it's a Uint8Array + if uint8array_data.kind() != TypedArrayKind::Uint8 { + return Err(JsNativeError::typ() + .with_message("Value is not a Uint8Array object") + .into()); + } + + // Get buffer info but don't check detached yet + let byte_offset = uint8array_data.byte_offset() as usize; + drop(uint8array_borrow); + + // 3. Let opts be ? GetOptionsObject(options). + // Get options first (this may trigger side effects that detach the buffer) + let options = args.get_or_undefined(0); + let (alphabet, omit_padding) = Self::get_base64_encode_options(options, context)?; + + // After getting options, check if buffer is detached + let uint8array_borrow = uint8array.borrow(); + let uint8array_data = uint8array_borrow.data(); + let buffer = uint8array_data.viewed_array_buffer(); + let Some(buf_len) = buffer.as_buffer().bytes(Ordering::SeqCst).map(|s| s.len()) else { + return Err(JsNativeError::typ() + .with_message("TypedArray is detached") + .into()); + }; + + if uint8array_data.is_out_of_bounds(buf_len) { + return Err(JsNativeError::typ() + .with_message("TypedArray is out of bounds") + .into()); + } + + let byte_length = uint8array_data.array_length(buf_len) as usize; + + // Get the data + let buffer_data = buffer.as_buffer(); + let Some(data) = buffer_data.bytes(Ordering::SeqCst) else { + return Err(JsNativeError::typ() + .with_message("Cannot access buffer data") + .into()); + }; + + let input = data + .subslice(byte_offset..byte_offset + byte_length) + .to_vec(); + Ok(JsString::from(base64::encode(&input, alphabet, omit_padding)).into()) + } + + /// `Uint8Array.fromHex ( string )` + /// + /// More information: + /// - [proposal-arraybuffer-base64][spec] + /// + /// [spec]: https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.fromhex + pub(crate) fn from_hex( + _: &JsValue, + args: &[JsValue], + context: &mut Context, + ) -> JsResult { + // 1. If string is not a String, throw a TypeError exception. + let input = args.get_or_undefined(0); + // Check if input is a string (not a string object) - match V8's behavior + let Some(input_string) = input.as_string() else { + return Err(JsNativeError::typ() + .with_message("input must be a string") + .into()); + }; + + let input_bytes = Self::encoded_input_bytes(&input_string); + + // Check if length is even + if !input_bytes.len().is_multiple_of(2) { + return Err(JsNativeError::syntax() + .with_message("Invalid hex string: odd length") + .into()); + } + + let decoded = hex::decode(&input_bytes, None); + if decoded.error.is_some() { + return Err(JsNativeError::syntax() + .with_message("Invalid hex character") + .into()); + } + let output_len = decoded.output.len(); + + // Create Uint8Array from decoded data + let buffer = ArrayBuffer::allocate( + &context + .intrinsics() + .constructors() + .array_buffer() + .constructor() + .into(), + output_len as u64, + None, + context, + )?; + + // Copy data to buffer + { + let mut buffer_data = buffer.borrow_mut(); + if let Some(bytes) = buffer_data.data_mut().bytes_mut() { + bytes[..output_len].copy_from_slice(&decoded.output); + } + } + + // Create Uint8Array + let uint8_array = JsObject::from_proto_and_data_with_shared_shape( + context.root_shape(), + context + .intrinsics() + .constructors() + .typed_uint8_array() + .prototype(), + TypedArray::new( + BufferObject::Buffer(buffer), + TypedArrayKind::Uint8, + 0, + Some(output_len as u64), + Some(output_len as u64), + ), + ); + + Ok(uint8_array.upcast().into()) + } + + /// `Uint8Array.prototype.setFromHex ( string )` + /// + /// More information: + /// - [proposal-arraybuffer-base64][spec] + /// + /// [spec]: https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.prototype.setfromhex + pub(crate) fn set_from_hex( + this: &JsValue, + args: &[JsValue], + context: &mut Context, + ) -> JsResult { + // 1. Let into be the this value. + // 2. Perform ? ValidateUint8Array(into). + let uint8array = this + .as_object() + .and_then(|o| o.clone().downcast::().ok()) + .ok_or_else(|| JsNativeError::typ().with_message("Value is not a Uint8Array object"))?; + + let uint8array_borrow = uint8array.borrow(); + let uint8array_data = uint8array_borrow.data(); + + // Verify it's a Uint8Array + if uint8array_data.kind() != TypedArrayKind::Uint8 { + return Err(JsNativeError::typ() + .with_message("Value is not a Uint8Array object") + .into()); + } + + // Check if detached + let buffer = uint8array_data.viewed_array_buffer(); + let Some(buf_len) = buffer.as_buffer().bytes(Ordering::SeqCst).map(|s| s.len()) else { + return Err(JsNativeError::typ() + .with_message("TypedArray is detached") + .into()); + }; + + if uint8array_data.is_out_of_bounds(buf_len) { + return Err(JsNativeError::typ() + .with_message("TypedArray is out of bounds") + .into()); + } + + let array_length = uint8array_data.array_length(buf_len); + let byte_offset = uint8array_data.byte_offset() as usize; + drop(uint8array_borrow); + + // 3. If string is not a String, throw a TypeError exception. + let input = args.get_or_undefined(0); + // Check if input is a string (not a string object) - match V8's behavior + let Some(input_string) = input.as_string() else { + return Err(JsNativeError::typ() + .with_message("input must be a string") + .into()); + }; + + let input_bytes = Self::encoded_input_bytes(&input_string); + let input_len = input_bytes.len(); + + // Check if length is odd first - this must be done even if array_length is 0 + // Per spec: FromHex checks length before checking maxLength + if !input_len.is_multiple_of(2) { + return Err(JsNativeError::syntax() + .with_message("Invalid hex string: odd length") + .into()); + } + + // If array length is 0, return early (after checking for odd length) + if array_length == 0 { + let read = JsValue::from(0); + let written = JsValue::from(0); + return Self::create_set_from_result(read, written, context); + } + + // 4. Let taRecord be MakeTypedArrayWithBufferWitnessRecord(into, seq-cst). + // 5. If IsTypedArrayOutOfBounds(taRecord) is true, throw a TypeError exception. + // 6. Let byteLength be TypedArrayLength(taRecord). + // 7. Let result be FromHex(string, byteLength). + // 8. Let bytes be result.[[Bytes]]. + // 9. Let written be the length of bytes. + // 10. NOTE: FromHex does not invoke any user code, so the ArrayBuffer backing + // into cannot have been detached or shrunk. + // 11. Assert: written ≤ byteLength. + // 12. Perform SetUint8ArrayBytes(into, bytes). + // 13. If result.[[Error]] is not none, then + // a. Throw result.[[Error]]. + // 14. Let resultObject be OrdinaryObjectCreate(%Object.prototype%). + // 15. Perform ! CreateDataPropertyOrThrow(resultObject, "read", 𝔽(result.[[Read]])). + // 16. Perform ! CreateDataPropertyOrThrow(resultObject, "written", 𝔽(written)). + // 17. Return resultObject. + + let output_len = min(input_len / 2, array_length as usize); + let mut output = vec![0; output_len]; + let result = hex::decode_mut(&input_bytes, &mut output, Some(array_length as usize)); + + { + let uint8array_mut = uint8array.borrow_mut(); + let uint8array_data = uint8array_mut.data(); + let mut buffer = uint8array_data.viewed_array_buffer().as_buffer_mut(); + let Some(mut data) = buffer.bytes(Ordering::SeqCst) else { + return Err(JsNativeError::typ() + .with_message("Cannot access buffer data") + .into()); + }; + + let mut subslice = data.subslice_mut(byte_offset..byte_offset + array_length as usize); + match &mut subslice { + SliceRefMut::Slice(slice) => { + slice[..result.written].copy_from_slice(&output[..result.written]); + } + SliceRefMut::AtomicSlice(slice) => { + for (dst, src) in slice.iter().zip(&output[..result.written]) { + dst.store(*src, Ordering::SeqCst); + } + } + } + } + + if result.error.is_some() { + return Err(JsNativeError::syntax() + .with_message("Invalid hex character") + .into()); + } + + let read = JsValue::from(result.read as u64); + let written = JsValue::from(result.written as u64); + Self::create_set_from_result(read, written, context) + } + + /// `Uint8Array.prototype.toHex ( )` + /// + /// More information: + /// - [proposal-arraybuffer-base64][spec] + /// + /// [spec]: https://tc39.es/proposal-arraybuffer-base64/spec/#sec-uint8array.prototype.tohex + pub(crate) fn to_hex( + this: &JsValue, + _: &[JsValue], + _context: &mut Context, + ) -> JsResult { + // 1. Let O be the this value. + // 2. Perform ? ValidateUint8Array(O). + let uint8array = this + .as_object() + .and_then(|o| o.clone().downcast::().ok()) + .ok_or_else(|| JsNativeError::typ().with_message("Value is not a Uint8Array object"))?; + + let uint8array_borrow = uint8array.borrow(); + let uint8array_data = uint8array_borrow.data(); + + // Verify it's a Uint8Array + if uint8array_data.kind() != TypedArrayKind::Uint8 { + return Err(JsNativeError::typ() + .with_message("Value is not a Uint8Array object") + .into()); + } + + // Check if detached + let buffer = uint8array_data.viewed_array_buffer(); + let Some(buf_len) = buffer.as_buffer().bytes(Ordering::SeqCst).map(|s| s.len()) else { + return Err(JsNativeError::typ() + .with_message("TypedArray is detached") + .into()); + }; + + if uint8array_data.is_out_of_bounds(buf_len) { + return Err(JsNativeError::typ() + .with_message("TypedArray is out of bounds") + .into()); + } + + let byte_offset = uint8array_data.byte_offset() as usize; + let byte_length = uint8array_data.array_length(buf_len) as usize; + + // Get the data + let buffer_data = buffer.as_buffer(); + let Some(data) = buffer_data.bytes(Ordering::SeqCst) else { + return Err(JsNativeError::typ() + .with_message("Cannot access buffer data") + .into()); + }; + + let input = data + .subslice(byte_offset..byte_offset + byte_length) + .to_vec(); + Ok(JsString::from(hex::encode(&input)).into()) + } + + // ===== Private helpers ===== + + fn encoded_input_bytes(input: &JsString) -> Vec { + if let Some(bytes) = input.as_str().as_latin1() { + return bytes.to_vec(); + } + + input + .iter() + .map(|code_unit| { + if u8::try_from(code_unit).is_ok() { + code_unit as u8 + } else { + u8::MAX + } + }) + .collect() + } + + fn get_base64_options( + options: &JsValue, + context: &mut Context, + ) -> JsResult<(Base64Alphabet, Base64LastChunkHandling)> { + let mut alphabet = Base64Alphabet::Base64; + let mut last_chunk_handling = Base64LastChunkHandling::Loose; + + if let Some(options_obj) = options.as_object() { + // Get alphabet option + let alphabet_value = options_obj.get(js_string!("alphabet"), context)?; + if !alphabet_value.is_undefined() { + // Check if it's a string (not a string object) + let Some(alphabet_str) = alphabet_value.as_string() else { + return Err(JsNativeError::typ() + .with_message("Invalid alphabet option") + .into()); + }; + if alphabet_str == js_string!("base64") { + alphabet = Base64Alphabet::Base64; + } else if alphabet_str == js_string!("base64url") { + alphabet = Base64Alphabet::Base64Url; + } else { + return Err(JsNativeError::typ() + .with_message("Invalid alphabet option") + .into()); + } + } + + // Get lastChunkHandling option + let last_chunk_value = options_obj.get(js_string!("lastChunkHandling"), context)?; + if !last_chunk_value.is_undefined() { + // Check if it's a string (not a string object) - match V8's behavior + let Some(last_chunk_str) = last_chunk_value.as_string() else { + return Err(JsNativeError::typ() + .with_message("Invalid lastChunkHandling option") + .into()); + }; + if last_chunk_str == js_string!("loose") { + last_chunk_handling = Base64LastChunkHandling::Loose; + } else if last_chunk_str == js_string!("strict") { + last_chunk_handling = Base64LastChunkHandling::Strict; + } else if last_chunk_str == js_string!("stop-before-partial") { + last_chunk_handling = Base64LastChunkHandling::StopBeforePartial; + } else { + return Err(JsNativeError::typ() + .with_message("Invalid lastChunkHandling option") + .into()); + } + } + } + + Ok((alphabet, last_chunk_handling)) + } + + fn get_base64_encode_options( + options: &JsValue, + context: &mut Context, + ) -> JsResult<(Base64Alphabet, bool)> { + let mut alphabet = Base64Alphabet::Base64; + let mut omit_padding = false; + + if let Some(options_obj) = options.as_object() { + // Get alphabet option + let alphabet_value = options_obj.get(js_string!("alphabet"), context)?; + if !alphabet_value.is_undefined() { + // Check if it's a string (not a string object) + let Some(alphabet_str) = alphabet_value.as_string() else { + return Err(JsNativeError::typ() + .with_message("Invalid alphabet option") + .into()); + }; + if alphabet_str == js_string!("base64") { + alphabet = Base64Alphabet::Base64; + } else if alphabet_str == js_string!("base64url") { + alphabet = Base64Alphabet::Base64Url; + } else { + return Err(JsNativeError::typ() + .with_message("Invalid alphabet option") + .into()); + } + } + + // Get omitPadding option + let omit_padding_value = options_obj.get(js_string!("omitPadding"), context)?; + if !omit_padding_value.is_undefined() { + omit_padding = omit_padding_value.to_boolean(); + } + } + + Ok((alphabet, omit_padding)) + } + + fn create_set_from_result( + read: JsValue, + written: JsValue, + context: &mut Context, + ) -> JsResult { + // Create { read, written } object + let obj = JsObject::with_object_proto(context.intrinsics()); + obj.set(js_string!("read"), read, false, context)?; + obj.set(js_string!("written"), written, false, context)?; + Ok(obj.into()) + } +} diff --git a/core/engine/src/builtins/typed_array/hex.rs b/core/engine/src/builtins/typed_array/hex.rs new file mode 100644 index 00000000000..927e59c8857 --- /dev/null +++ b/core/engine/src/builtins/typed_array/hex.rs @@ -0,0 +1,74 @@ +//! Hex helpers for `Uint8Array` proposal methods. + +use data_encoding::{DecodeError, DecodePartial, HEXLOWER, HEXLOWER_PERMISSIVE}; + +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct DecodeMutResult { + pub(crate) read: usize, + pub(crate) written: usize, + pub(crate) error: Option, +} + +pub(crate) fn decode_mut( + input: &[u8], + output: &mut [u8], + max_length: Option, +) -> DecodeMutResult { + let max_length = max_length.unwrap_or(usize::MAX); + + if let Err(error) = HEXLOWER_PERMISSIVE.decode_len(input.len()) { + return DecodeMutResult { + read: 0, + written: 0, + error: Some(error), + }; + } + + let read = core::cmp::min(input.len(), max_length.saturating_mul(2)); + match HEXLOWER_PERMISSIVE.decode_mut(&input[..read], &mut output[..read / 2]) { + Ok(written) => DecodeMutResult { + read, + written, + error: None, + }, + Err(DecodePartial { + read, + written, + error, + }) => DecodeMutResult { + read, + written, + error: Some(error), + }, + } +} + +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct DecodeResult { + pub(crate) read: usize, + pub(crate) output: Vec, + pub(crate) error: Option, +} + +pub(crate) fn decode(input: &[u8], max_length: Option) -> DecodeResult { + let output_len = core::cmp::min(input.len() / 2, max_length.unwrap_or(usize::MAX)); + let mut output = vec![0; output_len]; + let DecodeMutResult { + read, + written, + error, + } = decode_mut(input, &mut output, max_length); + debug_assert!(written <= output.len()); + output.truncate(written); + DecodeResult { + read, + output, + error, + } +} + +pub(crate) fn encode(input: &[u8]) -> String { + let mut output = String::with_capacity(HEXLOWER.encode_len(input.len())); + HEXLOWER.encode_append(input, &mut output); + output +} diff --git a/core/engine/src/builtins/typed_array/mod.rs b/core/engine/src/builtins/typed_array/mod.rs index d79c8d188c5..1a45003fc27 100644 --- a/core/engine/src/builtins/typed_array/mod.rs +++ b/core/engine/src/builtins/typed_array/mod.rs @@ -27,11 +27,15 @@ use crate::{ }; use boa_gc::{Finalize, Trace}; +mod base64; mod builtin; +mod builtin_uint8; mod element; +mod hex; mod object; pub(crate) use builtin::{BuiltinTypedArray, is_valid_integer_index}; +pub(crate) use builtin_uint8::BuiltinUint8Array; #[cfg(feature = "float16")] pub(crate) use element::Float16; pub(crate) use element::{Atomic, ClampedU8, Element}; @@ -52,7 +56,7 @@ impl IntrinsicObject for T { .name(js_string!("get [Symbol.species]")) .build(); - BuiltInBuilder::from_standard_constructor::(realm) + let mut builder = BuiltInBuilder::from_standard_constructor::(realm) .prototype( realm .intrinsics() @@ -78,8 +82,24 @@ impl IntrinsicObject for T { js_string!("BYTES_PER_ELEMENT"), size_of::(), Attribute::READONLY | Attribute::NON_ENUMERABLE | Attribute::PERMANENT, - ) - .build(); + ); + + // Uint8Array specific methods for base64 and hex encoding/decoding + if T::ERASED == TypedArrayKind::Uint8 { + builder = builder + .static_method(BuiltinUint8Array::from_base64, js_string!("fromBase64"), 1) + .static_method(BuiltinUint8Array::from_hex, js_string!("fromHex"), 1) + .method( + BuiltinUint8Array::set_from_base64, + js_string!("setFromBase64"), + 1, + ) + .method(BuiltinUint8Array::set_from_hex, js_string!("setFromHex"), 1) + .method(BuiltinUint8Array::to_base64, js_string!("toBase64"), 0) + .method(BuiltinUint8Array::to_hex, js_string!("toHex"), 0); + } + + builder.build(); } } @@ -92,8 +112,8 @@ impl BuiltInObject for T { impl BuiltInConstructor for T { const CONSTRUCTOR_ARGUMENTS: usize = 3; - const PROTOTYPE_STORAGE_SLOTS: usize = 1; - const CONSTRUCTOR_STORAGE_SLOTS: usize = 3; + const PROTOTYPE_STORAGE_SLOTS: usize = 5; + const CONSTRUCTOR_STORAGE_SLOTS: usize = 5; const STANDARD_CONSTRUCTOR: fn(&StandardConstructors) -> &StandardConstructor = ::ERASED.standard_constructor(); diff --git a/core/engine/src/builtins/typed_array/tests.rs b/core/engine/src/builtins/typed_array/tests.rs index 547dc4dff94..4fe4d21c059 100644 --- a/core/engine/src/builtins/typed_array/tests.rs +++ b/core/engine/src/builtins/typed_array/tests.rs @@ -182,3 +182,29 @@ fn typedarray_exotic_prevent_extensions() { TestAction::assert("!Object.isExtensible(fixedLengthWithOffset1)"), ]); } + +#[test] +fn uint8array_from_base64_rejects_concatenated_padded_input() { + run_test_actions([TestAction::assert_native_error( + "Uint8Array.fromBase64('AQ==AQ==')", + JsNativeErrorKind::Syntax, + "Invalid base64 string", + )]); +} + +#[test] +fn uint8array_from_base64_allows_trailing_whitespace_after_padding() { + run_test_actions([TestAction::assert_eq( + "Uint8Array.fromBase64('AQ== ')[0]", + 1, + )]); +} + +#[test] +fn uint8array_from_base64_rejects_trailing_non_whitespace_after_padding() { + run_test_actions([TestAction::assert_native_error( + "Uint8Array.fromBase64('AQ== AQ==')", + JsNativeErrorKind::Syntax, + "Invalid base64 string", + )]); +} diff --git a/test262_config.toml b/test262_config.toml index 01a3fecca09..6895fb3c2ff 100644 --- a/test262_config.toml +++ b/test262_config.toml @@ -37,10 +37,6 @@ features = [ # https://github.com/tc39/proposal-decorators "decorators", - # Uint8Array Base64 - # https://github.com/tc39/proposal-arraybuffer-base64 - "uint8array-base64", - # Source Phase Imports # test262 special specifier "source-phase-imports-module-source",