Skip to content

Commit 4db76b8

Browse files
authored
Start removing custom translation in wasm-mutate (#1794)
Now that `wasm_encoder::reencode` exists that's the better option to use for reencoding modules. This commit removes one of the major users of the equivalent `Translator` trait in `wasm_mutate` to start down the path of removing that trait entirely and instead relying on the support in `wasm_encoder`.
1 parent ebd4b68 commit 4db76b8

6 files changed

Lines changed: 322 additions & 452 deletions

File tree

crates/wasm-encoder/src/reencode.rs

Lines changed: 53 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,14 @@ pub trait Reencode {
486486
) -> Result<(), Error<Self::Error>> {
487487
utils::parse_custom_name_subsection(self, names, section)
488488
}
489+
490+
fn data_count(&mut self, count: u32) -> u32 {
491+
count
492+
}
493+
494+
fn start_section(&mut self, start: u32) -> u32 {
495+
self.function_index(start)
496+
}
489497
}
490498

491499
/// An error when re-encoding from `wasmparser` to `wasm-encoder`.
@@ -498,6 +506,8 @@ pub enum Error<E = Infallible> {
498506
/// The const expression is invalid: not actually constant or something like
499507
/// that.
500508
InvalidConstExpr,
509+
/// The code section size listed was not valid for the wasm binary provided.
510+
InvalidCodeSectionSize,
501511
/// There was a section that does not belong into a core wasm module.
502512
UnexpectedNonCoreModuleSection,
503513
/// There was a section that does not belong into a compoennt module.
@@ -539,6 +549,7 @@ impl<E: std::fmt::Display> std::fmt::Display for Error<E> {
539549
Self::UnsupportedCoreTypeInComponent => {
540550
fmt.write_str("unsupported core type in a component")
541551
}
552+
Self::InvalidCodeSectionSize => fmt.write_str("invalid code section size"),
542553
}
543554
}
544555
}
@@ -552,7 +563,8 @@ impl<E: 'static + std::error::Error> std::error::Error for Error<E> {
552563
| Self::CanonicalizedHeapTypeReference
553564
| Self::UnexpectedNonCoreModuleSection
554565
| Self::UnexpectedNonComponentSection
555-
| Self::UnsupportedCoreTypeInComponent => None,
566+
| Self::UnsupportedCoreTypeInComponent
567+
| Self::InvalidCodeSectionSize => None,
556568
}
557569
}
558570
}
@@ -588,11 +600,10 @@ pub mod utils {
588600
reencoder.intersperse_section_hook(module, after, before)
589601
}
590602

591-
let mut sections = parser.parse_all(data);
592-
let mut next_section = sections.next();
603+
let orig_offset = parser.offset() as usize;
593604
let mut last_section = None;
594605

595-
'outer: while let Some(section) = next_section {
606+
for section in parser.parse_all(data) {
596607
match section? {
597608
wasmparser::Payload::Version {
598609
encoding: wasmparser::Encoding::Module,
@@ -697,7 +708,7 @@ pub mod utils {
697708
Some(crate::SectionId::Start),
698709
)?;
699710
module.section(&crate::StartSection {
700-
function_index: reencoder.function_index(func),
711+
function_index: reencoder.start_section(func),
701712
});
702713
}
703714
wasmparser::Payload::ElementSection(section) => {
@@ -718,6 +729,7 @@ pub mod utils {
718729
&mut last_section,
719730
Some(crate::SectionId::DataCount),
720731
)?;
732+
let count = reencoder.data_count(count);
721733
module.section(&crate::DataCountSection { count });
722734
}
723735
wasmparser::Payload::DataSection(section) => {
@@ -731,47 +743,37 @@ pub mod utils {
731743
reencoder.parse_data_section(&mut data, section)?;
732744
module.section(&data);
733745
}
734-
wasmparser::Payload::CodeSectionStart { count, .. } => {
746+
wasmparser::Payload::CodeSectionStart { range, .. } => {
735747
handle_intersperse_section_hook(
736748
reencoder,
737749
module,
738750
&mut last_section,
739751
Some(crate::SectionId::Code),
740752
)?;
741753
let mut codes = crate::CodeSection::new();
742-
for _ in 0..count {
743-
if let Some(Ok(wasmparser::Payload::CodeSectionEntry(section))) =
744-
sections.next()
745-
{
746-
reencoder.parse_function_body(&mut codes, section)?;
747-
} else {
748-
return Err(Error::UnexpectedNonCoreModuleSection);
749-
}
750-
}
751-
module.section(&codes);
752-
}
753-
wasmparser::Payload::CodeSectionEntry(section) => {
754-
handle_intersperse_section_hook(
755-
reencoder,
756-
module,
757-
&mut last_section,
758-
Some(crate::SectionId::Code),
759-
)?;
760-
// we can't do better than start a new code section here
761-
let mut codes = crate::CodeSection::new();
762-
reencoder.parse_function_body(&mut codes, section)?;
763-
while let Some(section) = sections.next() {
764-
let section = section?;
765-
if let wasmparser::Payload::CodeSectionEntry(section) = section {
766-
reencoder.parse_function_body(&mut codes, section)?;
767-
} else {
768-
module.section(&codes);
769-
next_section = Some(Ok(section));
770-
continue 'outer;
771-
}
772-
}
754+
755+
// Convert from `range` to a byte range within `data` while
756+
// accounting for various offsets. Then create a
757+
// `CodeSectionReader` (which notably the payload does not
758+
// give us here) and recurse with that. This means that
759+
// users overridding `parse_code_section` always get that
760+
// function called.
761+
let section = match data.get(range.start - orig_offset..range.end - orig_offset)
762+
{
763+
Some(section) => section,
764+
None => return Err(Error::InvalidCodeSectionSize),
765+
};
766+
let reader = wasmparser::BinaryReader::new(section, range.start);
767+
let section = wasmparser::CodeSectionReader::new(reader)?;
768+
reencoder.parse_code_section(&mut codes, section)?;
773769
module.section(&codes);
774770
}
771+
772+
// Parsing of code section entries (function bodies) happens
773+
// above during the handling of the code section. That means
774+
// that we just skip all these payloads.
775+
wasmparser::Payload::CodeSectionEntry(_) => {}
776+
775777
wasmparser::Payload::ModuleSection { .. }
776778
| wasmparser::Payload::InstanceSection(_)
777779
| wasmparser::Payload::CoreTypeSection(_)
@@ -795,8 +797,6 @@ pub mod utils {
795797
handle_intersperse_section_hook(reencoder, module, &mut last_section, None)?;
796798
}
797799
}
798-
799-
next_section = sections.next();
800800
}
801801

802802
Ok(())
@@ -1413,7 +1413,20 @@ pub mod utils {
14131413
table_index,
14141414
offset_expr,
14151415
} => elements.active(
1416-
table_index.map(|t| reencoder.table_index(t)),
1416+
// Inform the reencoder that a table index is being used even if
1417+
// it's not actually present here. That helps wasm-mutate for
1418+
// example which wants to track uses to know when it's ok to
1419+
// remove a table.
1420+
//
1421+
// If the table index is still zero though go ahead and pass
1422+
// `None` here since that implicitly references table 0. Note
1423+
// that this means that this does not round-trip the encoding of
1424+
// `Some(0)` since that reencodes to `None`, but that's seen as
1425+
// hopefully ok.
1426+
match reencoder.table_index(table_index.unwrap_or(0)) {
1427+
0 => None,
1428+
i => Some(i),
1429+
},
14171430
&reencoder.const_expr(offset_expr)?,
14181431
elems,
14191432
),

crates/wasm-mutate/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ anyhow = { workspace = true }
2424
wat = { path = "../wat" }
2525
wasmprinter = { path = "../wasmprinter" }
2626
env_logger = { workspace = true }
27+
wasmparser = { workspace = true, features = ['validate', 'features'] }

crates/wasm-mutate/src/error.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,16 @@ impl From<wasmparser::BinaryReaderError> for Error {
5656
}
5757
}
5858

59+
impl From<wasm_encoder::reencode::Error<Error>> for Error {
60+
fn from(e: wasm_encoder::reencode::Error<Error>) -> Self {
61+
match e {
62+
wasm_encoder::reencode::Error::ParseError(e) => Error::parse(e),
63+
wasm_encoder::reencode::Error::UserError(e) => e,
64+
other => Error::other(other.to_string()),
65+
}
66+
}
67+
}
68+
5969
/// The kind of error.
6070
#[derive(thiserror::Error, Debug)]
6171
pub enum ErrorKind {

0 commit comments

Comments
 (0)