From 19ed07708da5ca7e6cbc3dd629b4c81a82932722 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 12 Jun 2026 15:07:27 -0700 Subject: [PATCH 1/2] Compress the `.wasmtime.traps` section This commit scratches an itch I've had for a long time about how we encode traps into a final `*.cwasm`. This is frequently a pretty substantial portion of a `*.cwasm` hovering around ~10-15% of the size often. The goal of this commit is to shrink the size of this section by at least a factor of two, and this currently shrinks it by ~75%. The basic problem of this section is it's encoding 5 bytes of information per trap, the u32 pc offset and the u8 trap code. The previous encoding used all 5 bytes per trap, but this is generally not the most efficient method. The other constraint for this section, however, is that we want O(log N) search time to find a trap code for a particular trapping offset meaning that a linear scan is a bit too much to bite off here. The general idea of this new encoding is as follows: * Split the entire list of traps for a `*.cwasm` into fixed-width blocks, here defined as 128 traps-per-block. * A fixed-width index is created which maps from first-pc-in-block to where-block-is-encoded. This index is the O(log N) search. * Each block is encoded as: * First a trap code byte. Currently the most common trap in this block. * Next, for each entry in the block, `uleb((offset - prev_offset) << 1 | different_trap)` is encoded. This enables a delta-encoding of offsets which is the main source of compression, and the lowest bit, if present, means that the uleb is followed by a trap byte indicating what trap this offset corresponds to. Overall this gets the original 5-byte-per-trap overhead to roughly 1.5 bytes-per-trap which shaves off 75% of the size of this section. The lookup factor for traps is still O(log N) with a slightly higher constant factor than before. The 128 traps-per-block factor is relatively arbitrary at this time, but some analysis showed that it was a relatively good sweet spot of not being too big while still getting the lion's share of compression benefits. --- crates/environ/src/compile/trap_encoding.rs | 221 ++++++++++++++++++-- crates/environ/src/obj.rs | 28 +-- crates/environ/src/trap_encoding.rs | 167 ++++++++++++--- 3 files changed, 349 insertions(+), 67 deletions(-) diff --git a/crates/environ/src/compile/trap_encoding.rs b/crates/environ/src/compile/trap_encoding.rs index 139c85a7eed4..6ed423caa4d0 100644 --- a/crates/environ/src/compile/trap_encoding.rs +++ b/crates/environ/src/compile/trap_encoding.rs @@ -1,6 +1,7 @@ use crate::TrapInformation; use crate::obj::ELF_WASMTIME_TRAPS; use crate::prelude::*; +use crate::trap_encoding::TRAP_BLOCK_SIZE; use object::write::{Object, StandardSegment}; use object::{LittleEndian, SectionKind, U32}; use std::ops::Range; @@ -10,11 +11,91 @@ use std::ops::Range; /// /// This structure is incrementally fed the results of compiling individual /// functions and handles all the encoding internally, allowing usage of -/// `lookup_trap_code` below with the resulting section. +/// `lookup_trap_code` with the resulting section. +/// +/// # Section format +/// +/// The section encodes a sequence of `(text_offset, trap_code)` entries, +/// sorted by `text_offset`, where `text_offset` is the location of a +/// trapping instruction relative to the start of the text section and +/// `trap_code` is the byte encoding of its `CompiledTrap`. This format is +/// optimized to enable cheap (O(log n)) lookup given an offset to find a trap +/// code while also being relatively compact as this is included in all modules +/// by default. To satisfy this the section is encoded as two major pieces: an +/// index and a sequence of blocks. +/// +/// The index is used to perform a binary search given a particular +/// `text_offset` to find a particular block. The index stores text offsets as +/// well as byte offsets in the "block bodies" section. Once a block is found +/// each block contains up to `TRAP_BLOCK_SIZE` entries encoded next to each +/// other. Blocks take up a variable width of bytes to encode. More information +/// on decoding each block is below, but the general layout of the section looks +/// like: +/// +/// ```text +/// ┌───────────────────────────────────┐ +/// │ entry_count: u32 │ +/// │ block_count: u32 │ +/// ├───────────────────────────────────┤ +/// │ block index │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ first_offset: u32 │ │ one pair per block, sorted by +/// │ │ data_pos: u32 │ │ `first_offset`; `data_pos` is +/// │ ├───────────────────────────────┤ │ relative to the start of the +/// │ │ ... │ │ block bodies area below +/// │ └───────────────────────────────┘ │ +/// ├───────────────────────────────────┤ +/// │ block bodies │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ default_code: u8 │ │ +/// │ ├───────────────────────────────┤ │ +/// │ │ entry: uleb token │ │ one entry per trap in the +/// │ │ [trap_code: u8] │ │ block, `TRAP_BLOCK_SIZE` max +/// │ ├───────────────────────────────┤ │ +/// │ │ ... │ │ +/// │ └───────────────────────────────┘ │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ ... │ │ +/// │ └───────────────────────────────┘ │ +/// └───────────────────────────────────┘ +/// ``` +/// +/// * `entry_count` is the total number of entries (pc/trap combos) in the +/// section and `block_count` is the number of blocks, `ceil(entry_count / +/// TRAP_BLOCK_SIZE)`. +/// * In the block index, `first_offset` is the `text_offset` of the block's +/// first entry and `data_pos` is the position of the block's body, +/// relative to the start of the bodies area (i.e. the end of the index). +/// * Each block body starts with `default_code`, the block's "default" trap +/// code, chosen as the most common code among the block's entries. +/// * Each entry is a uleb-encoded token `(pc_delta << 1) | code_differs`. +/// Here `pc_delta` is this entry's `text_offset` minus the previous +/// entry's (the first entry's delta is relative to the block's +/// `first_offset` and is therefore 0). If `code_differs` is set the token +/// is followed by one byte holding this entry's trap code, otherwise the +/// entry has the block's default code. +/// +/// Lookup (`lookup_trap_code`) binary searches the fixed-width block index +/// for the last block whose `first_offset` is `<=` the pc in question, then +/// linearly decodes at most `TRAP_BLOCK_SIZE` entries of that block's body +/// looking for an exact match. +/// +/// This encoding leans on two properties of trap metadata: consecutive trap +/// sites are generally close together (pc deltas almost always fit in a +/// single-byte leb) and most entries share one trap code (typically +/// `MemoryOutOfBounds` for gc-less wasm), making explicit code bytes rare. This +/// is all in service of shrinking the minimum 5 bytes per entry (u32 offset, u8 +/// code), to a bit more than one byte per entry in practice. +/// +/// Note that at this time this section has an alignment of 1. Additionally +/// due to the 32-bit offsets in the block index this doesn't support images +/// >= 4GB. #[derive(Default)] pub struct TrapEncodingBuilder { - offsets: Vec>, - traps: Vec, + entries: usize, + block_index: Vec<[U32; 2]>, + block_bodies: Vec, + pending: Vec<(u32, u8)>, last_offset: u32, } @@ -37,22 +118,56 @@ impl TrapEncodingBuilder { let func_end = u32::try_from(func.end).unwrap(); // Sanity-check to ensure that functions are pushed in-order, otherwise - // the `offsets` array won't be sorted which is our goal. + // the encoded blocks won't be sorted which is our goal. assert!(func_start >= self.last_offset); - self.offsets.reserve(traps.len()); - self.traps.reserve(traps.len()); for info in traps { let pos = func_start + info.code_offset; assert!(pos >= self.last_offset); - self.offsets.push(U32::new(LittleEndian, pos)); - self.traps.push(info.trap_code.as_u8()); + self.pending.push((pos, info.trap_code.as_u8())); + self.entries += 1; self.last_offset = pos; + if self.pending.len() == TRAP_BLOCK_SIZE { + self.seal_block(); + } } self.last_offset = func_end; } + /// Flushes `self.pending` into one encoded block, appending to the index + /// and data arrays. + fn seal_block(&mut self) { + let first_offset = match self.pending.first() { + Some((offset, _)) => *offset, + None => return, + }; + let body_pos = u32::try_from(self.block_bodies.len()).unwrap(); + self.block_index.push([ + U32::new(LittleEndian, first_offset), + U32::new(LittleEndian, body_pos), + ]); + + // The block's default code is its most common one, making the common + // case of a run of identical codes free to encode. + let default_code = most_common_code(&self.pending); + self.block_bodies.push(default_code); + + let mut prev = first_offset; + for (pc, code) in self.pending.drain(..) { + let delta = pc - prev; + prev = pc; + let differs = code != default_code; + write_uleb( + &mut self.block_bodies, + (u64::from(delta) << 1) | u64::from(differs), + ); + if differs { + self.block_bodies.push(code); + } + } + } + /// Encodes this section into the object provided. pub fn append_to(self, obj: &mut Object) { let section = obj.add_section( @@ -61,10 +176,90 @@ impl TrapEncodingBuilder { SectionKind::ReadOnlyData, ); - // NB: this matches the encoding expected by `lookup` below. - let amt = u32::try_from(self.traps.len()).unwrap(); - obj.append_section_data(section, &amt.to_le_bytes(), 1); - obj.append_section_data(section, object::bytes_of_slice(&self.offsets), 1); - obj.append_section_data(section, &self.traps, 1); + obj.append_section_data(section, &self.finish(), 1); + } + + /// Finishes encoding and returns the raw section contents, as decoded by + /// `lookup_trap_code` and `iterate_traps`. + fn finish(mut self) -> Vec { + self.seal_block(); + let entries = u32::try_from(self.entries).unwrap(); + let num_blocks = u32::try_from(self.block_index.len()).unwrap(); + let mut ret = Vec::with_capacity(8 + self.block_index.len() * 8 + self.block_bodies.len()); + ret.extend_from_slice(&entries.to_le_bytes()); + ret.extend_from_slice(&num_blocks.to_le_bytes()); + ret.extend_from_slice(object::bytes_of_slice(&self.block_index)); + ret.extend_from_slice(&self.block_bodies); + ret + } +} + +fn most_common_code(entries: &[(u32, u8)]) -> u8 { + let mut counts = [0u16; 256]; + let mut best = entries[0].1; + for (_, code) in entries { + let count = &mut counts[usize::from(*code)]; + *count += 1; + if *count > counts[usize::from(best)] { + best = *code; + } + } + best +} + +fn write_uleb(data: &mut Vec, mut value: u64) { + while value >= 0x80 { + data.push(0x80 | (value as u8 & 0x7f)); + value >>= 7; + } + data.push(value as u8); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Trap, iterate_traps, lookup_trap_code}; + + fn encode(funcs: &[(Range, &[TrapInformation])]) -> Vec { + let mut builder = TrapEncodingBuilder::default(); + for (func, traps) in funcs { + builder.push(func.clone(), traps); + } + builder.finish() + } + + fn info(code_offset: u32, trap: Trap) -> TrapInformation { + TrapInformation { + code_offset, + trap_code: trap.into(), + } + } + + #[test] + fn smoke() { + let section = encode(&[]); + assert_eq!(lookup_trap_code(§ion, 0), None); + assert_eq!(iterate_traps(§ion).unwrap().count(), 0); + + let section = encode(&[(0..0x100, &[])]); + assert_eq!(lookup_trap_code(§ion, 0x50), None); + assert_eq!(iterate_traps(§ion).unwrap().count(), 0); + + let section = encode(&[( + 0..0x100, + &[ + info(10, Trap::MemoryOutOfBounds), + info(20, Trap::StackOverflow), + ], + )]); + assert_eq!(lookup_trap_code(§ion, 0x50), None); + assert_eq!( + lookup_trap_code(§ion, 10), + Some(Trap::MemoryOutOfBounds.into()) + ); + assert_eq!( + lookup_trap_code(§ion, 20), + Some(Trap::StackOverflow.into()) + ); } } diff --git a/crates/environ/src/obj.rs b/crates/environ/src/obj.rs index 1c3dc478e3a3..3ecf38107891 100644 --- a/crates/environ/src/obj.rs +++ b/crates/environ/src/obj.rs @@ -75,27 +75,13 @@ pub const ELF_WASMTIME_STACK_MAP: &str = ".wasmtime.stackmap"; /// encodes the ability to map an offset in the text section to the trap code /// that it corresponds to. /// -/// This section is used at runtime to determine what flavor of trap happened to -/// ensure that embedders and debuggers know the reason for the wasm trap. The -/// encoding of this section is custom to Wasmtime and managed with helpers in -/// the `object` crate: -/// -/// * First the section has a 32-bit little endian integer indicating how many -/// trap entries are in the section. -/// * Next is an array, of the same length as read before, of 32-bit -/// little-endian integers. These integers are offsets into the text section -/// of the compilation image. -/// * Finally is the same count number of bytes. Each of these bytes corresponds -/// to a trap code. -/// -/// This section is decoded by `lookup_trap_code` below which will read the -/// section count, slice some bytes to get the various arrays, and then perform -/// a binary search on the offsets array to find the index corresponding to -/// the pc being looked up. If found the same index in the trap array (the array -/// of bytes) is the trap code for that offset. -/// -/// Note that at this time this section has an alignment of 1. Additionally due -/// to the 32-bit encodings for offsets this doesn't support images >=4gb. +/// This section is used at runtime to determine what flavor of trap happened +/// to ensure that embedders and debuggers know the reason for the wasm trap. +/// +/// This section's format is defined by the documentation of the +/// `crate::compile::TrapEncodingBuilder` data structure, which builds it. It +/// is decoded by `lookup_trap_code`. Its offsets are relative to the start of +/// the text section. pub const ELF_WASMTIME_TRAPS: &str = ".wasmtime.traps"; /// A custom binary-encoded section of the wasmtime compilation diff --git a/crates/environ/src/trap_encoding.rs b/crates/environ/src/trap_encoding.rs index 85a52828a9d2..232431151ed0 100644 --- a/crates/environ/src/trap_encoding.rs +++ b/crates/environ/src/trap_encoding.rs @@ -285,54 +285,155 @@ generate_trap_type! { impl core::error::Error for Trap {} +/// Number of trap entries packed into one block of the trap section. +/// +/// See `TrapEncodingBuilder` in `crate::compile` for the full section format. +/// Chosen as a balance between the fixed-width index overhead per block (8 +/// bytes, amortized across entries) and the amount of linear decoding required +/// to look up a single pc within a block. +pub(crate) const TRAP_BLOCK_SIZE: usize = 128; + /// Decodes the provided trap information section and attempts to find the trap /// code corresponding to the `offset` specified. /// /// The `section` provided is expected to have been built by -/// `TrapEncodingBuilder` above. Additionally the `offset` should be a relative +/// `TrapEncodingBuilder` in `crate::compile`, whose documentation describes +/// the format decoded here. Additionally the `offset` should be a relative /// offset within the text section of the compilation image. pub fn lookup_trap_code(section: &[u8], offset: usize) -> Option { - let (offsets, traps) = parse(section)?; + let section = parse(section)?; + let offset = u32::try_from(offset).ok()?; - // The `offsets` table is sorted in the trap section so perform a binary - // search of the contents of this section to find whether `offset` is an - // entry in the section. Note that this is a precise search because trap pcs + // Find the last block whose first pc is `<= offset`; only that block can + // contain `offset`. Note that this is a precise search because trap pcs // should always be precise as well as our metadata about them, which means // we expect an exact match to correspond to a trap opcode. - // - // Once an index is found within the `offsets` array then that same index is - // used to lookup from the `traps` list of bytes to get the trap code byte - // corresponding to this offset. - let offset = u32::try_from(offset).ok()?; - let index = offsets - .binary_search_by_key(&offset, |val| val.get(LittleEndian)) - .ok()?; - debug_assert!(index < traps.len()); - let byte = *traps.get(index)?; - - let trap = CompiledTrap::from_u8(byte); - debug_assert!(trap.is_some(), "missing mapping for {byte}"); - trap + let block = section + .block_index + .partition_point(|[first_offset, _]| first_offset.get(LittleEndian) <= offset) + .checked_sub(1)?; + + for (pc, byte) in section.block_entries(block)? { + if pc == offset { + let trap = CompiledTrap::from_u8(byte); + debug_assert!(trap.is_some(), "missing mapping for {byte}"); + return trap; + } + if pc > offset { + break; + } + } + None +} + +/// A parsed view of the trap section. +/// +/// The fields here correspond to the pieces of the section layout described +/// on `TrapEncodingBuilder` in `crate::compile`. +#[derive(Clone, Copy)] +struct TrapSection<'a> { + /// Total number of trap entries in this section. + entries: usize, + /// One `(first_offset, block_pos)` pair per block. + block_index: &'a [[U32; 2]], + /// Variable-length block bodies, index by `block_pos` in the `block_index` + /// table above. + block_bodies: &'a [u8], +} + +impl<'a> TrapSection<'a> { + /// Returns an iterator of `(text_offset, trap_code_byte)` for all entries + /// in `block`, or `None` if the section is malformed. + fn block_entries(&self, block_index: usize) -> Option> { + let [first_offset, block_pos] = self.block_index.get(block_index)?; + let first_offset = first_offset.get(LittleEndian); + let block_pos = block_pos.get(LittleEndian); + let mut block = self.block_bodies.get(usize::try_from(block_pos).ok()?..)?; + let default_code = pop(&mut block)?; + let remaining = core::cmp::min( + TRAP_BLOCK_SIZE, + self.entries.checked_sub(block_index * TRAP_BLOCK_SIZE)?, + ); + Some(BlockEntries { + block, + prev_offset: first_offset, + default_code, + remaining, + }) + } +} + +/// Iterator over the entries of a single block, decoding the +/// delta-and-code-flag varints described in the "block body" portion of the +/// section format on `TrapEncodingBuilder` in `crate::compile`. +struct BlockEntries<'a> { + block: &'a [u8], + prev_offset: u32, + default_code: u8, + remaining: usize, +} + +impl Iterator for BlockEntries<'_> { + type Item = (u32, u8); + + fn next(&mut self) -> Option<(u32, u8)> { + self.remaining = self.remaining.checked_sub(1)?; + let token = read_uleb(&mut self.block)?; + let delta = u32::try_from(token >> 1).ok()?; + let cur_offset = self.prev_offset.checked_add(delta)?; + self.prev_offset = cur_offset; + let code = if token & 1 != 0 { + pop(&mut self.block)? + } else { + self.default_code + }; + Some((cur_offset, code)) + } +} + +fn read_uleb(data: &mut &[u8]) -> Option { + let mut result = 0; + let mut shift = 0; + while shift < 64 { + let byte = pop(data)?; + result |= u64::from(byte & 0x7f) << shift; + if byte & 0x80 == 0 { + return Some(result); + } + shift += 7; + } + None +} + +fn pop(data: &mut &[u8]) -> Option { + let (&byte, rest) = data.split_first()?; + *data = rest; + Some(byte) } -fn parse(section: &[u8]) -> Option<(&[U32], &[u8])> { +fn parse(section: &[u8]) -> Option> { let mut section = Bytes(section); - // NB: this matches the encoding written by `append_to` above. - let count = section.read::>().ok()?; - let count = usize::try_from(count.get(LittleEndian)).ok()?; - let (offsets, traps) = object::slice_from_bytes::>(section.0, count).ok()?; - debug_assert_eq!(traps.len(), count); - Some((offsets, traps)) + // NB: this matches the encoding written by `TrapEncodingBuilder`. + let entries = section.read::>().ok()?; + let entries = usize::try_from(entries.get(LittleEndian)).ok()?; + let num_blocks = section.read::>().ok()?; + let num_blocks = usize::try_from(num_blocks.get(LittleEndian)).ok()?; + let (block_index, block_bodies) = + object::slice_from_bytes::<[U32; 2]>(section.0, num_blocks).ok()?; + Some(TrapSection { + entries, + block_index, + block_bodies, + }) } /// Returns an iterator over all of the traps encoded in `section`, which should /// have been produced by `TrapEncodingBuilder`. pub fn iterate_traps(section: &[u8]) -> Option + '_> { - let (offsets, traps) = parse(section)?; - Some(offsets.iter().zip(traps).map(|(offset, trap)| { - ( - offset.get(LittleEndian), - CompiledTrap::from_u8(*trap).unwrap(), - ) - })) + let section = parse(section)?; + Some( + (0..section.block_index.len()) + .flat_map(move |block| section.block_entries(block).into_iter().flatten()) + .map(|(pc, byte)| (pc, CompiledTrap::from_u8(byte).unwrap())), + ) } From c5b00f8ca97666b2700e92a238a75c73ffdda2b9 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 12 Jun 2026 15:38:16 -0700 Subject: [PATCH 2/2] Compress the `.wasmtime.addrmap` section This commit mirrors the previous commit for the `.wasmtime.addrmap` section of binaries. The encoding is similar in structure but the encoding of each block is slightly different where it handles the different nature of the address map section. Notably the payload of pc-delta's lowest bit of each entry indicates whether this is a "none" position or not. If a position is available then it's sleb-encoded as a delta from the previous position. The goal is to compress the 8-bytes-per-entry to ~2 bytes-per-entry which is largely achieved with this commit. Each entry tends to be pretty close pc-wise to the previous entry and pretty close source-wise from the previous entry as well. Overall this shrinks the `.wasmtime.addrmap` section by ~75% locally. In sum for a `libpython.so` this shaves of 8M of a 25M binary, saving ~30% in total file size between this optimization and the previous. cc #3547 - note though this doesn't close the issue because this only compresses the section better, it doesn't remove extraneous entries which won't ever be needed. --- crates/environ/src/address_map.rs | 167 +++++++++---- crates/environ/src/bytes.rs | 74 ++++++ crates/environ/src/compile/address_map.rs | 245 ++++++++++++++++++-- crates/environ/src/compile/trap_encoding.rs | 9 +- crates/environ/src/lib.rs | 1 + crates/environ/src/obj.rs | 27 +-- crates/environ/src/trap_encoding.rs | 27 +-- 7 files changed, 442 insertions(+), 108 deletions(-) create mode 100644 crates/environ/src/bytes.rs diff --git a/crates/environ/src/address_map.rs b/crates/environ/src/address_map.rs index 8d1fd2d48bfd..4c02ff4e8046 100644 --- a/crates/environ/src/address_map.rs +++ b/crates/environ/src/address_map.rs @@ -1,5 +1,6 @@ //! Data structures to provide transformation of the source +use crate::bytes::{read_sleb, read_uleb}; use core::fmt; use object::{Bytes, LittleEndian, U32}; use serde_derive::{Deserialize, Serialize}; @@ -141,76 +142,160 @@ impl fmt::Display for ModulePC { } } -/// Parse an `ELF_WASMTIME_ADDRMAP` section, returning the slice of code offsets -/// and the slice of associated file positions for each offset. -fn parse_address_map(section: &[u8]) -> Option<(&[U32], &[U32])> { +/// Number of address-mapping entries packed into one block of the address map +/// section. +/// +/// See `AddressMapSection` in `crate::compile` for the full section format. +/// Chosen as a balance between the fixed-width index overhead per block (8 +/// bytes, amortized across entries) and the amount of linear decoding required +/// to look up a single pc within a block. +pub(crate) const ADDRMAP_BLOCK_SIZE: usize = 128; + +/// A parsed view of the address map section. +/// +/// The fields here correspond to the pieces of the section layout described +/// on `AddressMapSection` in `crate::compile`. +#[derive(Clone, Copy)] +struct AddressMap<'a> { + /// Total number of address-mapping entries in this section. + entries: usize, + /// One `(first_offset, block_pos)` pair per block. + block_index: &'a [[U32; 2]], + /// Variable-length block bodies, indexed by `block_pos` in the + /// `block_index` table above. + block_bodies: &'a [u8], +} + +impl<'a> AddressMap<'a> { + /// Returns an iterator of `(text_offset, FilePos)` for all entries in + /// `block`, or `None` if the section is malformed. + fn block_entries(&self, block_index: usize) -> Option> { + let [first_offset, block_pos] = self.block_index.get(block_index)?; + let first_offset = first_offset.get(LittleEndian); + let block_pos = block_pos.get(LittleEndian); + let block = self.block_bodies.get(usize::try_from(block_pos).ok()?..)?; + let remaining = core::cmp::min( + ADDRMAP_BLOCK_SIZE, + self.entries.checked_sub(block_index * ADDRMAP_BLOCK_SIZE)?, + ); + Some(BlockEntries { + block, + prev_offset: first_offset, + prev_pos: None, + remaining, + }) + } +} + +/// Iterator over the entries of a single block, decoding the delta-and-flag +/// varints described in the "block body" portion of the section format on +/// `AddressMapSection` in `crate::compile`. +struct BlockEntries<'a> { + block: &'a [u8], + prev_offset: u32, + prev_pos: Option, + remaining: usize, +} + +impl Iterator for BlockEntries<'_> { + type Item = (u32, FilePos); + + fn next(&mut self) -> Option<(u32, FilePos)> { + self.remaining = self.remaining.checked_sub(1)?; + let token = read_uleb(&mut self.block)?; + let delta = u32::try_from(token >> 1).ok()?; + let cur_offset = self.prev_offset.checked_add(delta)?; + self.prev_offset = cur_offset; + if token & 1 != 0 { + return Some((cur_offset, FilePos::none())); + } + let pos = match self.prev_pos { + // The first non-none position of a block is encoded absolutely... + None => u32::try_from(read_uleb(&mut self.block)?).ok()?, + // ... and subsequent positions are sleb deltas from the previous + // non-none position. + Some(prev) => { + let delta = read_sleb(&mut self.block)?; + prev.checked_add_signed(i32::try_from(delta).ok()?)? + } + }; + self.prev_pos = Some(pos); + Some((cur_offset, FilePos(pos))) + } +} + +/// Parse an `ELF_WASMTIME_ADDRMAP` section into its header, block index, and +/// block bodies. +fn parse(section: &[u8]) -> Option> { let mut section = Bytes(section); - // NB: this matches the encoding written by `append_to` in the + // NB: this matches the encoding written by `AddressMapSection` in the // `compile::address_map` module. - let count = section.read::>().ok()?; - let count = usize::try_from(count.get(LittleEndian)).ok()?; - let (offsets, section) = - object::slice_from_bytes::>(section.0, count).ok()?; - let (positions, section) = - object::slice_from_bytes::>(section, count).ok()?; - debug_assert!(section.is_empty()); - Some((offsets, positions)) + let entries = section.read::>().ok()?; + let entries = usize::try_from(entries.get(LittleEndian)).ok()?; + let num_blocks = section.read::>().ok()?; + let num_blocks = usize::try_from(num_blocks.get(LittleEndian)).ok()?; + let (block_index, block_bodies) = + object::slice_from_bytes::<[U32; 2]>(section.0, num_blocks).ok()?; + Some(AddressMap { + entries, + block_index, + block_bodies, + }) } /// Lookup an `offset` within an encoded address map section, returning the /// original `FilePos` that corresponds to the offset, if found. /// /// This function takes a `section` as its first argument which must have been -/// created with `AddressMapSection` above. This is intended to be the raw +/// created with `AddressMapSection` in `crate::compile`, whose documentation +/// describes the format decoded here. This is intended to be the raw /// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact. /// /// The `offset` provided is a relative offset from the start of the text /// section of the pc that is being looked up. If `offset` is out of range or /// doesn't correspond to anything in this file then `None` is returned. pub fn lookup_file_pos(section: &[u8], offset: usize) -> Option { - let (offsets, positions) = parse_address_map(section)?; - - // First perform a binary search on the `offsets` array. This is a sorted - // array of offsets within the text section, which is conveniently what our - // `offset` also is. Note that we are somewhat unlikely to find a precise - // match on the element in the array, so we're largely interested in which - // "bucket" the `offset` falls into. + let section = parse(section)?; let offset = u32::try_from(offset).ok()?; - let index = match offsets.binary_search_by_key(&offset, |v| v.get(LittleEndian)) { - // Exact hit! - Ok(i) => i, - - // This *would* be at the first slot in the array, so no - // instructions cover `pc`. - Err(0) => return None, - // This would be at the `nth` slot, so we're at the `n-1`th slot. - Err(n) => n - 1, - }; + // Find the last block whose first pc is `<= offset`. Note that, unlike the + // trap section, this is a bucket-style search: each entry covers addresses + // from its own `text_offset` until the next entry's, so `offset` need not + // match an entry exactly. The covering entry is wholly contained in this + // block since the next block only takes over at its own `first_offset`. + let block = section + .block_index + .partition_point(|[first_offset, _]| first_offset.get(LittleEndian) <= offset) + .checked_sub(1)?; - // Using the `index` we found of which bucket `offset` corresponds to we can - // lookup the actual `FilePos` value in the `positions` array. - let pos = positions.get(index)?; - Some(FilePos(pos.get(LittleEndian))) + // Find the last entry within this block whose offset is `<= offset`; that + // entry's bucket covers `offset`. At least the block's first entry always + // qualifies due to the index search above. + let mut pos = None; + for (entry_offset, entry_pos) in section.block_entries(block)? { + if entry_offset > offset { + break; + } + pos = Some(entry_pos); + } + pos } /// Iterate over the address map contained in the given address map section. /// /// This function takes a `section` as its first argument which must have been -/// created with `AddressMapSection` above. This is intended to be the raw -/// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact. +/// created with `AddressMapSection` in `crate::compile`. This is intended to +/// be the raw `ELF_WASMTIME_ADDRMAP` section from the compilation artifact. /// /// The yielded offsets are relative to the start of the text section for this /// map's code object. pub fn iterate_address_map<'a>( section: &'a [u8], ) -> Option + 'a> { - let (offsets, positions) = parse_address_map(section)?; + let section = parse(section)?; Some( - offsets - .iter() - .map(|o| o.get(LittleEndian)) - .zip(positions.iter().map(|pos| FilePos(pos.get(LittleEndian)))), + (0..section.block_index.len()) + .flat_map(move |block| section.block_entries(block).into_iter().flatten()), ) } diff --git a/crates/environ/src/bytes.rs b/crates/environ/src/bytes.rs new file mode 100644 index 000000000000..104dfc7fd552 --- /dev/null +++ b/crates/environ/src/bytes.rs @@ -0,0 +1,74 @@ +//! Helpers for workign with encoding/decoding bytes w.r.t. wasmtime's +//! custom-encoded sections. + +use alloc::vec::Vec; + +/// Writes the uleb-encoded `value` to `data`. +pub fn write_uleb(data: &mut Vec, mut value: u64) { + while value >= 0x80 { + data.push(0x80 | (value as u8 & 0x7f)); + value >>= 7; + } + data.push(value as u8); +} + +/// Writes the sleb-encoded `value` to `data`. +pub fn write_sleb(data: &mut Vec, mut value: i64) { + loop { + let byte = value.cast_unsigned() as u8 & 0x7f; + value >>= 7; + // Termination requires that the remaining bits of `value` all match + // the encoded sign bit, i.e. that sign extension of what's been + // written reproduces `value` exactly. + let done = (value == 0 && byte & 0x40 == 0) || (value == -1 && byte & 0x40 != 0); + if done { + data.push(byte); + return; + } + data.push(byte | 0x80); + } +} + +/// Reads a uleb-encoded value from `data`, returning the value and consuming +/// the bytes read from `data`. Returns `None` if the encoding is invalid. +pub fn read_uleb(data: &mut &[u8]) -> Option { + let mut result = 0; + let mut shift = 0; + while shift < 64 { + let byte = pop(data)?; + result |= u64::from(byte & 0x7f) << shift; + if byte & 0x80 == 0 { + return Some(result); + } + shift += 7; + } + None +} + +/// Reads a sleb-encoded value from `data`, returning the value and consuming +/// the bytes read from `data`. Returns `None` if the encoding is invalid. +pub fn read_sleb(data: &mut &[u8]) -> Option { + let mut result = 0; + let mut shift = 0; + while shift < 64 { + let byte = pop(data)?; + result |= i64::from(byte & 0x7f) << shift; + shift += 7; + if byte & 0x80 == 0 { + // Sign-extend from the topmost bit that was encoded. + if shift < 64 && byte & 0x40 != 0 { + result |= -1 << shift; + } + return Some(result); + } + } + None +} + +/// Pops a single byte from the front of `data`, returning it and consuming it +/// from `data`. Returns `None` if `data` is empty. +pub fn pop(data: &mut &[u8]) -> Option { + let (&byte, rest) = data.split_first()?; + *data = rest; + Some(byte) +} diff --git a/crates/environ/src/compile/address_map.rs b/crates/environ/src/compile/address_map.rs index 8620a4840ebd..0c382c98ff64 100644 --- a/crates/environ/src/compile/address_map.rs +++ b/crates/environ/src/compile/address_map.rs @@ -1,6 +1,8 @@ //! Data structures to provide transformation of the source use crate::InstructionAddressMap; +use crate::address_map::ADDRMAP_BLOCK_SIZE; +use crate::bytes::{write_sleb, write_uleb}; use crate::obj::ELF_WASMTIME_ADDRMAP; use crate::prelude::*; use object::write::{Object, StandardSegment}; @@ -9,13 +11,99 @@ use std::ops::Range; /// Builder for the address map section of a wasmtime compilation image. /// -/// This builder is used to conveniently built the `ELF_WASMTIME_ADDRMAP` +/// This builder is used to conveniently build the `ELF_WASMTIME_ADDRMAP` /// section by compilers, and provides utilities to directly insert the results /// into an `Object`. +/// +/// # Section format +/// +/// The section encodes a sequence of `(text_offset, file_pos)` entries, sorted +/// by `text_offset`, where `text_offset` is the location of an instruction +/// relative to the start of the text section and `file_pos` is the offset +/// within the original wasm file of the instruction it was compiled from, or +/// the `FilePos::none()` sentinel for generated code with no wasm-level +/// source. Unlike the trap section each entry here describes a range of pcs: +/// an entry covers addresses from its own `text_offset` up to the next +/// entry's. This format is optimized to enable cheap (O(log n)) lookup given +/// an offset to find a source location while also being relatively compact as +/// this is included in all modules by default and is, uncompressed, the +/// largest of Wasmtime's metadata sections. To satisfy this the section is +/// encoded as two major pieces: an index and a sequence of blocks. +/// +/// The index is used to perform a binary search given a particular +/// `text_offset` to find a particular block. The index stores text offsets as +/// well as byte offsets in the "block bodies" section. Once a block is found +/// each block contains up to `ADDRMAP_BLOCK_SIZE` entries encoded next to each +/// other. Blocks take up a variable width of bytes to encode. More information +/// on decoding each block is below, but the general layout of the section looks +/// like: +/// +/// ```text +/// ┌───────────────────────────────────┐ +/// │ entry_count: u32 │ +/// │ block_count: u32 │ +/// ├───────────────────────────────────┤ +/// │ block index │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ first_offset: u32 │ │ one pair per block, sorted by +/// │ │ block_pos: u32 │ │ `first_offset`; `block_pos` is +/// │ ├───────────────────────────────┤ │ relative to the start of the +/// │ │ ... │ │ block bodies area below +/// │ └───────────────────────────────┘ │ +/// ├───────────────────────────────────┤ +/// │ block bodies │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ entry: uleb token │ │ one entry per instruction +/// │ │ [file_pos: uleb] │ │ mapping in the block, +/// │ ├───────────────────────────────┤ │ `ADDRMAP_BLOCK_SIZE` max +/// │ │ ... │ │ +/// │ └───────────────────────────────┘ │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ ... │ │ +/// │ └───────────────────────────────┘ │ +/// └───────────────────────────────────┘ +/// ``` +/// +/// * `entry_count` is the total number of entries (pc/srcloc combos) in the +/// section and `block_count` is the number of blocks, `ceil(entry_count / +/// ADDRMAP_BLOCK_SIZE)`. +/// * In the block index, `first_offset` is the `text_offset` of the block's +/// first entry and `block_pos` is the position of the block's body, +/// relative to the start of the bodies area (i.e. the end of the index). +/// * Each entry is a uleb-encoded token `(pc_delta << 1) | pos_is_none`. +/// Here `pc_delta` is this entry's `text_offset` minus the previous +/// entry's (the first entry's delta is relative to the block's +/// `first_offset` and is therefore 0). If `pos_is_none` is set this entry's +/// file position is `FilePos::none()` and nothing else follows the token. +/// Otherwise the token is followed by the entry's file position: the first +/// non-none position in a block is uleb-encoded absolutely and subsequent +/// positions are sleb-encoded deltas from the previous non-none position. +/// Delta chains restart at each block so blocks can be decoded +/// independently. +/// +/// Lookup (`lookup_file_pos`) binary searches the fixed-width block index for +/// the last block whose `first_offset` is `<=` the pc in question, then +/// linearly decodes at most `ADDRMAP_BLOCK_SIZE` entries of that block's body +/// looking for the last entry whose `text_offset` is `<=` the pc. +/// +/// This encoding leans on a few properties of address map metadata: +/// consecutive instructions are close together (pc deltas almost always fit in +/// a single-byte leb), consecutive source locations are close together and +/// mostly increasing (position deltas almost always fit in a single-byte +/// sleb), and entries with no source location are common (a quarter of all +/// entries) and cost only the token's flag bit. This is all in service of shrinking the +/// previous 8 bytes per entry (u32 offset, u32 file position) to roughly 2 +/// bytes per entry in practice. +/// +/// Note that at this time this section has an alignment of 1. Additionally +/// due to the 32-bit offsets in the block index this doesn't support images +/// >= 4GB. #[derive(Default)] pub struct AddressMapSection { - offsets: Vec>, - positions: Vec>, + entries: usize, + block_index: Vec<[U32; 2]>, + block_bodies: Vec, + pending: Vec<(u32, u32)>, last_offset: u32, } @@ -39,12 +127,10 @@ impl AddressMapSection { let func_start = u32::try_from(func.start).unwrap(); let func_end = u32::try_from(func.end).unwrap(); - self.offsets.reserve(instrs.len()); - self.positions.reserve(instrs.len()); let mut last_srcloc = None; for map in instrs { // Sanity-check to ensure that functions are pushed in-order, otherwise - // the `offsets` array won't be sorted which is our goal. + // the encoded blocks won't be sorted which is our goal. let pos = func_start + map.code_offset; assert!(pos >= self.last_offset); self.last_offset = pos; @@ -58,12 +144,52 @@ impl AddressMapSection { } last_srcloc = Some(srcloc); - self.offsets.push(U32::new(LittleEndian, pos)); - self.positions.push(U32::new(LittleEndian, srcloc)); + self.pending.push((pos, srcloc)); + self.entries += 1; + if self.pending.len() == ADDRMAP_BLOCK_SIZE { + self.seal_block(); + } } self.last_offset = func_end; } + /// Flushes `self.pending` into one encoded block, appending to the index + /// and data arrays. + fn seal_block(&mut self) { + let first_offset = match self.pending.first() { + Some((offset, _)) => *offset, + None => return, + }; + let block_pos = u32::try_from(self.block_bodies.len()).unwrap(); + self.block_index.push([ + U32::new(LittleEndian, first_offset), + U32::new(LittleEndian, block_pos), + ]); + + let mut prev_offset = first_offset; + let mut prev_pos = None; + for (offset, pos) in self.pending.drain(..) { + let delta = offset - prev_offset; + prev_offset = offset; + let is_none = pos == u32::MAX; + write_uleb( + &mut self.block_bodies, + (u64::from(delta) << 1) | u64::from(is_none), + ); + if is_none { + continue; + } + match prev_pos { + // The first non-none position of a block is encoded absolutely + // and subsequent positions are deltas from the previous one, + // ensuring each block can be decoded independently. + None => write_uleb(&mut self.block_bodies, u64::from(pos)), + Some(prev) => write_sleb(&mut self.block_bodies, i64::from(pos) - i64::from(prev)), + } + prev_pos = Some(pos); + } + } + /// Finishes encoding this section into the `Object` provided. pub fn append_to(self, obj: &mut Object) { let section = obj.add_section( @@ -72,10 +198,103 @@ impl AddressMapSection { SectionKind::ReadOnlyData, ); - // NB: this matches the encoding expected by `lookup` below. - let amt = u32::try_from(self.offsets.len()).unwrap(); - obj.append_section_data(section, &amt.to_le_bytes(), 1); - obj.append_section_data(section, object::bytes_of_slice(&self.offsets), 1); - obj.append_section_data(section, object::bytes_of_slice(&self.positions), 1); + obj.append_section_data(section, &self.finish(), 1); + } + + /// Finishes encoding and returns the raw section contents, as decoded by + /// `lookup_file_pos` and `iterate_address_map`. + fn finish(mut self) -> Vec { + self.seal_block(); + let entries = u32::try_from(self.entries).unwrap(); + let num_blocks = u32::try_from(self.block_index.len()).unwrap(); + let mut ret = Vec::with_capacity(8 + self.block_index.len() * 8 + self.block_bodies.len()); + ret.extend_from_slice(&entries.to_le_bytes()); + ret.extend_from_slice(&num_blocks.to_le_bytes()); + ret.extend_from_slice(object::bytes_of_slice(&self.block_index)); + ret.extend_from_slice(&self.block_bodies); + ret + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{FilePos, iterate_address_map, lookup_file_pos}; + + fn encode(funcs: &[(Range, &[InstructionAddressMap])]) -> Vec { + let mut builder = AddressMapSection::default(); + for (func, instrs) in funcs { + builder.push(func.clone(), instrs); + } + builder.finish() + } + + fn map(code_offset: u32, srcloc: FilePos) -> InstructionAddressMap { + InstructionAddressMap { + srcloc, + code_offset, + } + } + + #[test] + fn smoke() { + let section = encode(&[]); + assert_eq!(lookup_file_pos(§ion, 0), None); + assert_eq!(iterate_address_map(§ion).unwrap().count(), 0); + + let section = encode(&[(0..0x100, &[])]); + assert_eq!(lookup_file_pos(§ion, 0x50), None); + assert_eq!(iterate_address_map(§ion).unwrap().count(), 0); + + let section = encode(&[( + 0..0x100, + &[ + map(10, FilePos::new(100)), + map(20, FilePos::none()), + map(30, FilePos::new(90)), + ], + )]); + // pcs before the first entry have no mapping + assert_eq!(lookup_file_pos(§ion, 9), None); + // each entry covers pcs from its own offset until the next entry + assert_eq!(lookup_file_pos(§ion, 10), Some(FilePos::new(100))); + assert_eq!(lookup_file_pos(§ion, 19), Some(FilePos::new(100))); + assert_eq!(lookup_file_pos(§ion, 20), Some(FilePos::none())); + assert_eq!(lookup_file_pos(§ion, 29), Some(FilePos::none())); + assert_eq!(lookup_file_pos(§ion, 30), Some(FilePos::new(90))); + // ... with the last entry covering everything afterwards + assert_eq!(lookup_file_pos(§ion, 0x1000), Some(FilePos::new(90))); + } + + #[test] + fn many_blocks() { + // Enough entries to span multiple blocks, mixing forward and backward + // source-position movement with `FilePos::none()` entries, including + // at block boundaries. + let maps = (0..1000) + .map(|i| { + let srcloc = match i % 3 { + 0 => FilePos::none(), + 1 => FilePos::new(20_000 + i), + _ => FilePos::new(20_000 - i), + }; + map(i * 3, srcloc) + }) + .collect::>(); + let section = encode(&[(0..0x10000, &maps)]); + + let decoded = iterate_address_map(§ion).unwrap().collect::>(); + assert_eq!(decoded.len(), maps.len()); + for (map, (offset, pos)) in maps.iter().zip(&decoded) { + assert_eq!(*offset, map.code_offset); + assert_eq!(*pos, map.srcloc); + } + + // Both an entry's exact pc and a pc inside its bucket resolve to it. + for map in &maps { + let offset = usize::try_from(map.code_offset).unwrap(); + assert_eq!(lookup_file_pos(§ion, offset), Some(map.srcloc)); + assert_eq!(lookup_file_pos(§ion, offset + 1), Some(map.srcloc)); + } } } diff --git a/crates/environ/src/compile/trap_encoding.rs b/crates/environ/src/compile/trap_encoding.rs index 6ed423caa4d0..5c3c6452456b 100644 --- a/crates/environ/src/compile/trap_encoding.rs +++ b/crates/environ/src/compile/trap_encoding.rs @@ -1,4 +1,5 @@ use crate::TrapInformation; +use crate::bytes::write_uleb; use crate::obj::ELF_WASMTIME_TRAPS; use crate::prelude::*; use crate::trap_encoding::TRAP_BLOCK_SIZE; @@ -207,14 +208,6 @@ fn most_common_code(entries: &[(u32, u8)]) -> u8 { best } -fn write_uleb(data: &mut Vec, mut value: u64) { - while value >= 0x80 { - data.push(0x80 | (value as u8 & 0x7f)); - value >>= 7; - } - data.push(value as u8); -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/environ/src/lib.rs b/crates/environ/src/lib.rs index 7153600c8df6..cb7b11bd2513 100644 --- a/crates/environ/src/lib.rs +++ b/crates/environ/src/lib.rs @@ -24,6 +24,7 @@ mod address_map; mod frame_table; #[macro_use] mod builtin; +pub mod bytes; mod demangling; mod ext; mod gc; diff --git a/crates/environ/src/obj.rs b/crates/environ/src/obj.rs index 3ecf38107891..718c7873d908 100644 --- a/crates/environ/src/obj.rs +++ b/crates/environ/src/obj.rs @@ -35,29 +35,10 @@ pub const SH_WASMTIME_NOT_EXECUTED: u64 = 1 << 0; /// mapping data from offsets in the image to offset in the original wasm /// binary. /// -/// This section has a custom binary encoding. Currently its encoding is: -/// -/// * The section starts with a 32-bit little-endian integer. This integer is -/// how many entries are in the following two arrays. -/// * Next is an array with the previous count number of 32-bit little-endian -/// integers. This array is a sorted list of relative offsets within the text -/// section. This is intended to be a lookup array to perform a binary search -/// on an offset within the text section on this array. -/// * Finally there is another array, with the same count as before, also of -/// 32-bit little-endian integers. These integers map 1:1 with the previous -/// array of offsets, and correspond to what the original offset was in the -/// wasm file. -/// -/// Decoding this section is intentionally simple, it only requires loading a -/// 32-bit little-endian integer plus some bounds checks. Reading this section -/// is done with the `lookup_file_pos` function below. Reading involves -/// performing a binary search on the first array using the index found for the -/// native code offset to index into the second array and find the wasm code -/// offset. -/// -/// At this time this section has an alignment of 1, which means all reads of it -/// are unaligned. Additionally at this time the 32-bit encodings chosen here -/// mean that >=4gb text sections are not supported. +/// This section's format is defined by the documentation of the +/// `crate::compile::AddressMapSection` data structure, which builds it. It is +/// decoded by `lookup_file_pos`. Its offsets are relative to the start of the +/// text section. pub const ELF_WASMTIME_ADDRMAP: &str = ".wasmtime.addrmap"; /// A custom Wasmtime-specific section of compilation which store information diff --git a/crates/environ/src/trap_encoding.rs b/crates/environ/src/trap_encoding.rs index 232431151ed0..692238a4a462 100644 --- a/crates/environ/src/trap_encoding.rs +++ b/crates/environ/src/trap_encoding.rs @@ -1,3 +1,4 @@ +use crate::bytes; use core::fmt; use object::{Bytes, LittleEndian, U32}; @@ -349,7 +350,7 @@ impl<'a> TrapSection<'a> { let first_offset = first_offset.get(LittleEndian); let block_pos = block_pos.get(LittleEndian); let mut block = self.block_bodies.get(usize::try_from(block_pos).ok()?..)?; - let default_code = pop(&mut block)?; + let default_code = bytes::pop(&mut block)?; let remaining = core::cmp::min( TRAP_BLOCK_SIZE, self.entries.checked_sub(block_index * TRAP_BLOCK_SIZE)?, @@ -378,12 +379,12 @@ impl Iterator for BlockEntries<'_> { fn next(&mut self) -> Option<(u32, u8)> { self.remaining = self.remaining.checked_sub(1)?; - let token = read_uleb(&mut self.block)?; + let token = bytes::read_uleb(&mut self.block)?; let delta = u32::try_from(token >> 1).ok()?; let cur_offset = self.prev_offset.checked_add(delta)?; self.prev_offset = cur_offset; let code = if token & 1 != 0 { - pop(&mut self.block)? + bytes::pop(&mut self.block)? } else { self.default_code }; @@ -391,26 +392,6 @@ impl Iterator for BlockEntries<'_> { } } -fn read_uleb(data: &mut &[u8]) -> Option { - let mut result = 0; - let mut shift = 0; - while shift < 64 { - let byte = pop(data)?; - result |= u64::from(byte & 0x7f) << shift; - if byte & 0x80 == 0 { - return Some(result); - } - shift += 7; - } - None -} - -fn pop(data: &mut &[u8]) -> Option { - let (&byte, rest) = data.split_first()?; - *data = rest; - Some(byte) -} - fn parse(section: &[u8]) -> Option> { let mut section = Bytes(section); // NB: this matches the encoding written by `TrapEncodingBuilder`.