diff --git a/crates/environ/src/address_map.rs b/crates/environ/src/address_map.rs index 8d1fd2d48bfd..4c02ff4e8046 100644 --- a/crates/environ/src/address_map.rs +++ b/crates/environ/src/address_map.rs @@ -1,5 +1,6 @@ //! Data structures to provide transformation of the source +use crate::bytes::{read_sleb, read_uleb}; use core::fmt; use object::{Bytes, LittleEndian, U32}; use serde_derive::{Deserialize, Serialize}; @@ -141,76 +142,160 @@ impl fmt::Display for ModulePC { } } -/// Parse an `ELF_WASMTIME_ADDRMAP` section, returning the slice of code offsets -/// and the slice of associated file positions for each offset. -fn parse_address_map(section: &[u8]) -> Option<(&[U32], &[U32])> { +/// Number of address-mapping entries packed into one block of the address map +/// section. +/// +/// See `AddressMapSection` in `crate::compile` for the full section format. +/// Chosen as a balance between the fixed-width index overhead per block (8 +/// bytes, amortized across entries) and the amount of linear decoding required +/// to look up a single pc within a block. +pub(crate) const ADDRMAP_BLOCK_SIZE: usize = 128; + +/// A parsed view of the address map section. +/// +/// The fields here correspond to the pieces of the section layout described +/// on `AddressMapSection` in `crate::compile`. +#[derive(Clone, Copy)] +struct AddressMap<'a> { + /// Total number of address-mapping entries in this section. + entries: usize, + /// One `(first_offset, block_pos)` pair per block. + block_index: &'a [[U32; 2]], + /// Variable-length block bodies, indexed by `block_pos` in the + /// `block_index` table above. + block_bodies: &'a [u8], +} + +impl<'a> AddressMap<'a> { + /// Returns an iterator of `(text_offset, FilePos)` for all entries in + /// `block`, or `None` if the section is malformed. + fn block_entries(&self, block_index: usize) -> Option> { + let [first_offset, block_pos] = self.block_index.get(block_index)?; + let first_offset = first_offset.get(LittleEndian); + let block_pos = block_pos.get(LittleEndian); + let block = self.block_bodies.get(usize::try_from(block_pos).ok()?..)?; + let remaining = core::cmp::min( + ADDRMAP_BLOCK_SIZE, + self.entries.checked_sub(block_index * ADDRMAP_BLOCK_SIZE)?, + ); + Some(BlockEntries { + block, + prev_offset: first_offset, + prev_pos: None, + remaining, + }) + } +} + +/// Iterator over the entries of a single block, decoding the delta-and-flag +/// varints described in the "block body" portion of the section format on +/// `AddressMapSection` in `crate::compile`. +struct BlockEntries<'a> { + block: &'a [u8], + prev_offset: u32, + prev_pos: Option, + remaining: usize, +} + +impl Iterator for BlockEntries<'_> { + type Item = (u32, FilePos); + + fn next(&mut self) -> Option<(u32, FilePos)> { + self.remaining = self.remaining.checked_sub(1)?; + let token = read_uleb(&mut self.block)?; + let delta = u32::try_from(token >> 1).ok()?; + let cur_offset = self.prev_offset.checked_add(delta)?; + self.prev_offset = cur_offset; + if token & 1 != 0 { + return Some((cur_offset, FilePos::none())); + } + let pos = match self.prev_pos { + // The first non-none position of a block is encoded absolutely... + None => u32::try_from(read_uleb(&mut self.block)?).ok()?, + // ... and subsequent positions are sleb deltas from the previous + // non-none position. + Some(prev) => { + let delta = read_sleb(&mut self.block)?; + prev.checked_add_signed(i32::try_from(delta).ok()?)? + } + }; + self.prev_pos = Some(pos); + Some((cur_offset, FilePos(pos))) + } +} + +/// Parse an `ELF_WASMTIME_ADDRMAP` section into its header, block index, and +/// block bodies. +fn parse(section: &[u8]) -> Option> { let mut section = Bytes(section); - // NB: this matches the encoding written by `append_to` in the + // NB: this matches the encoding written by `AddressMapSection` in the // `compile::address_map` module. - let count = section.read::>().ok()?; - let count = usize::try_from(count.get(LittleEndian)).ok()?; - let (offsets, section) = - object::slice_from_bytes::>(section.0, count).ok()?; - let (positions, section) = - object::slice_from_bytes::>(section, count).ok()?; - debug_assert!(section.is_empty()); - Some((offsets, positions)) + let entries = section.read::>().ok()?; + let entries = usize::try_from(entries.get(LittleEndian)).ok()?; + let num_blocks = section.read::>().ok()?; + let num_blocks = usize::try_from(num_blocks.get(LittleEndian)).ok()?; + let (block_index, block_bodies) = + object::slice_from_bytes::<[U32; 2]>(section.0, num_blocks).ok()?; + Some(AddressMap { + entries, + block_index, + block_bodies, + }) } /// Lookup an `offset` within an encoded address map section, returning the /// original `FilePos` that corresponds to the offset, if found. /// /// This function takes a `section` as its first argument which must have been -/// created with `AddressMapSection` above. This is intended to be the raw +/// created with `AddressMapSection` in `crate::compile`, whose documentation +/// describes the format decoded here. This is intended to be the raw /// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact. /// /// The `offset` provided is a relative offset from the start of the text /// section of the pc that is being looked up. If `offset` is out of range or /// doesn't correspond to anything in this file then `None` is returned. pub fn lookup_file_pos(section: &[u8], offset: usize) -> Option { - let (offsets, positions) = parse_address_map(section)?; - - // First perform a binary search on the `offsets` array. This is a sorted - // array of offsets within the text section, which is conveniently what our - // `offset` also is. Note that we are somewhat unlikely to find a precise - // match on the element in the array, so we're largely interested in which - // "bucket" the `offset` falls into. + let section = parse(section)?; let offset = u32::try_from(offset).ok()?; - let index = match offsets.binary_search_by_key(&offset, |v| v.get(LittleEndian)) { - // Exact hit! - Ok(i) => i, - - // This *would* be at the first slot in the array, so no - // instructions cover `pc`. - Err(0) => return None, - // This would be at the `nth` slot, so we're at the `n-1`th slot. - Err(n) => n - 1, - }; + // Find the last block whose first pc is `<= offset`. Note that, unlike the + // trap section, this is a bucket-style search: each entry covers addresses + // from its own `text_offset` until the next entry's, so `offset` need not + // match an entry exactly. The covering entry is wholly contained in this + // block since the next block only takes over at its own `first_offset`. + let block = section + .block_index + .partition_point(|[first_offset, _]| first_offset.get(LittleEndian) <= offset) + .checked_sub(1)?; - // Using the `index` we found of which bucket `offset` corresponds to we can - // lookup the actual `FilePos` value in the `positions` array. - let pos = positions.get(index)?; - Some(FilePos(pos.get(LittleEndian))) + // Find the last entry within this block whose offset is `<= offset`; that + // entry's bucket covers `offset`. At least the block's first entry always + // qualifies due to the index search above. + let mut pos = None; + for (entry_offset, entry_pos) in section.block_entries(block)? { + if entry_offset > offset { + break; + } + pos = Some(entry_pos); + } + pos } /// Iterate over the address map contained in the given address map section. /// /// This function takes a `section` as its first argument which must have been -/// created with `AddressMapSection` above. This is intended to be the raw -/// `ELF_WASMTIME_ADDRMAP` section from the compilation artifact. +/// created with `AddressMapSection` in `crate::compile`. This is intended to +/// be the raw `ELF_WASMTIME_ADDRMAP` section from the compilation artifact. /// /// The yielded offsets are relative to the start of the text section for this /// map's code object. pub fn iterate_address_map<'a>( section: &'a [u8], ) -> Option + 'a> { - let (offsets, positions) = parse_address_map(section)?; + let section = parse(section)?; Some( - offsets - .iter() - .map(|o| o.get(LittleEndian)) - .zip(positions.iter().map(|pos| FilePos(pos.get(LittleEndian)))), + (0..section.block_index.len()) + .flat_map(move |block| section.block_entries(block).into_iter().flatten()), ) } diff --git a/crates/environ/src/bytes.rs b/crates/environ/src/bytes.rs new file mode 100644 index 000000000000..104dfc7fd552 --- /dev/null +++ b/crates/environ/src/bytes.rs @@ -0,0 +1,74 @@ +//! Helpers for workign with encoding/decoding bytes w.r.t. wasmtime's +//! custom-encoded sections. + +use alloc::vec::Vec; + +/// Writes the uleb-encoded `value` to `data`. +pub fn write_uleb(data: &mut Vec, mut value: u64) { + while value >= 0x80 { + data.push(0x80 | (value as u8 & 0x7f)); + value >>= 7; + } + data.push(value as u8); +} + +/// Writes the sleb-encoded `value` to `data`. +pub fn write_sleb(data: &mut Vec, mut value: i64) { + loop { + let byte = value.cast_unsigned() as u8 & 0x7f; + value >>= 7; + // Termination requires that the remaining bits of `value` all match + // the encoded sign bit, i.e. that sign extension of what's been + // written reproduces `value` exactly. + let done = (value == 0 && byte & 0x40 == 0) || (value == -1 && byte & 0x40 != 0); + if done { + data.push(byte); + return; + } + data.push(byte | 0x80); + } +} + +/// Reads a uleb-encoded value from `data`, returning the value and consuming +/// the bytes read from `data`. Returns `None` if the encoding is invalid. +pub fn read_uleb(data: &mut &[u8]) -> Option { + let mut result = 0; + let mut shift = 0; + while shift < 64 { + let byte = pop(data)?; + result |= u64::from(byte & 0x7f) << shift; + if byte & 0x80 == 0 { + return Some(result); + } + shift += 7; + } + None +} + +/// Reads a sleb-encoded value from `data`, returning the value and consuming +/// the bytes read from `data`. Returns `None` if the encoding is invalid. +pub fn read_sleb(data: &mut &[u8]) -> Option { + let mut result = 0; + let mut shift = 0; + while shift < 64 { + let byte = pop(data)?; + result |= i64::from(byte & 0x7f) << shift; + shift += 7; + if byte & 0x80 == 0 { + // Sign-extend from the topmost bit that was encoded. + if shift < 64 && byte & 0x40 != 0 { + result |= -1 << shift; + } + return Some(result); + } + } + None +} + +/// Pops a single byte from the front of `data`, returning it and consuming it +/// from `data`. Returns `None` if `data` is empty. +pub fn pop(data: &mut &[u8]) -> Option { + let (&byte, rest) = data.split_first()?; + *data = rest; + Some(byte) +} diff --git a/crates/environ/src/compile/address_map.rs b/crates/environ/src/compile/address_map.rs index 8620a4840ebd..0c382c98ff64 100644 --- a/crates/environ/src/compile/address_map.rs +++ b/crates/environ/src/compile/address_map.rs @@ -1,6 +1,8 @@ //! Data structures to provide transformation of the source use crate::InstructionAddressMap; +use crate::address_map::ADDRMAP_BLOCK_SIZE; +use crate::bytes::{write_sleb, write_uleb}; use crate::obj::ELF_WASMTIME_ADDRMAP; use crate::prelude::*; use object::write::{Object, StandardSegment}; @@ -9,13 +11,99 @@ use std::ops::Range; /// Builder for the address map section of a wasmtime compilation image. /// -/// This builder is used to conveniently built the `ELF_WASMTIME_ADDRMAP` +/// This builder is used to conveniently build the `ELF_WASMTIME_ADDRMAP` /// section by compilers, and provides utilities to directly insert the results /// into an `Object`. +/// +/// # Section format +/// +/// The section encodes a sequence of `(text_offset, file_pos)` entries, sorted +/// by `text_offset`, where `text_offset` is the location of an instruction +/// relative to the start of the text section and `file_pos` is the offset +/// within the original wasm file of the instruction it was compiled from, or +/// the `FilePos::none()` sentinel for generated code with no wasm-level +/// source. Unlike the trap section each entry here describes a range of pcs: +/// an entry covers addresses from its own `text_offset` up to the next +/// entry's. This format is optimized to enable cheap (O(log n)) lookup given +/// an offset to find a source location while also being relatively compact as +/// this is included in all modules by default and is, uncompressed, the +/// largest of Wasmtime's metadata sections. To satisfy this the section is +/// encoded as two major pieces: an index and a sequence of blocks. +/// +/// The index is used to perform a binary search given a particular +/// `text_offset` to find a particular block. The index stores text offsets as +/// well as byte offsets in the "block bodies" section. Once a block is found +/// each block contains up to `ADDRMAP_BLOCK_SIZE` entries encoded next to each +/// other. Blocks take up a variable width of bytes to encode. More information +/// on decoding each block is below, but the general layout of the section looks +/// like: +/// +/// ```text +/// ┌───────────────────────────────────┐ +/// │ entry_count: u32 │ +/// │ block_count: u32 │ +/// ├───────────────────────────────────┤ +/// │ block index │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ first_offset: u32 │ │ one pair per block, sorted by +/// │ │ block_pos: u32 │ │ `first_offset`; `block_pos` is +/// │ ├───────────────────────────────┤ │ relative to the start of the +/// │ │ ... │ │ block bodies area below +/// │ └───────────────────────────────┘ │ +/// ├───────────────────────────────────┤ +/// │ block bodies │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ entry: uleb token │ │ one entry per instruction +/// │ │ [file_pos: uleb] │ │ mapping in the block, +/// │ ├───────────────────────────────┤ │ `ADDRMAP_BLOCK_SIZE` max +/// │ │ ... │ │ +/// │ └───────────────────────────────┘ │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ ... │ │ +/// │ └───────────────────────────────┘ │ +/// └───────────────────────────────────┘ +/// ``` +/// +/// * `entry_count` is the total number of entries (pc/srcloc combos) in the +/// section and `block_count` is the number of blocks, `ceil(entry_count / +/// ADDRMAP_BLOCK_SIZE)`. +/// * In the block index, `first_offset` is the `text_offset` of the block's +/// first entry and `block_pos` is the position of the block's body, +/// relative to the start of the bodies area (i.e. the end of the index). +/// * Each entry is a uleb-encoded token `(pc_delta << 1) | pos_is_none`. +/// Here `pc_delta` is this entry's `text_offset` minus the previous +/// entry's (the first entry's delta is relative to the block's +/// `first_offset` and is therefore 0). If `pos_is_none` is set this entry's +/// file position is `FilePos::none()` and nothing else follows the token. +/// Otherwise the token is followed by the entry's file position: the first +/// non-none position in a block is uleb-encoded absolutely and subsequent +/// positions are sleb-encoded deltas from the previous non-none position. +/// Delta chains restart at each block so blocks can be decoded +/// independently. +/// +/// Lookup (`lookup_file_pos`) binary searches the fixed-width block index for +/// the last block whose `first_offset` is `<=` the pc in question, then +/// linearly decodes at most `ADDRMAP_BLOCK_SIZE` entries of that block's body +/// looking for the last entry whose `text_offset` is `<=` the pc. +/// +/// This encoding leans on a few properties of address map metadata: +/// consecutive instructions are close together (pc deltas almost always fit in +/// a single-byte leb), consecutive source locations are close together and +/// mostly increasing (position deltas almost always fit in a single-byte +/// sleb), and entries with no source location are common (a quarter of all +/// entries) and cost only the token's flag bit. This is all in service of shrinking the +/// previous 8 bytes per entry (u32 offset, u32 file position) to roughly 2 +/// bytes per entry in practice. +/// +/// Note that at this time this section has an alignment of 1. Additionally +/// due to the 32-bit offsets in the block index this doesn't support images +/// >= 4GB. #[derive(Default)] pub struct AddressMapSection { - offsets: Vec>, - positions: Vec>, + entries: usize, + block_index: Vec<[U32; 2]>, + block_bodies: Vec, + pending: Vec<(u32, u32)>, last_offset: u32, } @@ -39,12 +127,10 @@ impl AddressMapSection { let func_start = u32::try_from(func.start).unwrap(); let func_end = u32::try_from(func.end).unwrap(); - self.offsets.reserve(instrs.len()); - self.positions.reserve(instrs.len()); let mut last_srcloc = None; for map in instrs { // Sanity-check to ensure that functions are pushed in-order, otherwise - // the `offsets` array won't be sorted which is our goal. + // the encoded blocks won't be sorted which is our goal. let pos = func_start + map.code_offset; assert!(pos >= self.last_offset); self.last_offset = pos; @@ -58,12 +144,52 @@ impl AddressMapSection { } last_srcloc = Some(srcloc); - self.offsets.push(U32::new(LittleEndian, pos)); - self.positions.push(U32::new(LittleEndian, srcloc)); + self.pending.push((pos, srcloc)); + self.entries += 1; + if self.pending.len() == ADDRMAP_BLOCK_SIZE { + self.seal_block(); + } } self.last_offset = func_end; } + /// Flushes `self.pending` into one encoded block, appending to the index + /// and data arrays. + fn seal_block(&mut self) { + let first_offset = match self.pending.first() { + Some((offset, _)) => *offset, + None => return, + }; + let block_pos = u32::try_from(self.block_bodies.len()).unwrap(); + self.block_index.push([ + U32::new(LittleEndian, first_offset), + U32::new(LittleEndian, block_pos), + ]); + + let mut prev_offset = first_offset; + let mut prev_pos = None; + for (offset, pos) in self.pending.drain(..) { + let delta = offset - prev_offset; + prev_offset = offset; + let is_none = pos == u32::MAX; + write_uleb( + &mut self.block_bodies, + (u64::from(delta) << 1) | u64::from(is_none), + ); + if is_none { + continue; + } + match prev_pos { + // The first non-none position of a block is encoded absolutely + // and subsequent positions are deltas from the previous one, + // ensuring each block can be decoded independently. + None => write_uleb(&mut self.block_bodies, u64::from(pos)), + Some(prev) => write_sleb(&mut self.block_bodies, i64::from(pos) - i64::from(prev)), + } + prev_pos = Some(pos); + } + } + /// Finishes encoding this section into the `Object` provided. pub fn append_to(self, obj: &mut Object) { let section = obj.add_section( @@ -72,10 +198,103 @@ impl AddressMapSection { SectionKind::ReadOnlyData, ); - // NB: this matches the encoding expected by `lookup` below. - let amt = u32::try_from(self.offsets.len()).unwrap(); - obj.append_section_data(section, &amt.to_le_bytes(), 1); - obj.append_section_data(section, object::bytes_of_slice(&self.offsets), 1); - obj.append_section_data(section, object::bytes_of_slice(&self.positions), 1); + obj.append_section_data(section, &self.finish(), 1); + } + + /// Finishes encoding and returns the raw section contents, as decoded by + /// `lookup_file_pos` and `iterate_address_map`. + fn finish(mut self) -> Vec { + self.seal_block(); + let entries = u32::try_from(self.entries).unwrap(); + let num_blocks = u32::try_from(self.block_index.len()).unwrap(); + let mut ret = Vec::with_capacity(8 + self.block_index.len() * 8 + self.block_bodies.len()); + ret.extend_from_slice(&entries.to_le_bytes()); + ret.extend_from_slice(&num_blocks.to_le_bytes()); + ret.extend_from_slice(object::bytes_of_slice(&self.block_index)); + ret.extend_from_slice(&self.block_bodies); + ret + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{FilePos, iterate_address_map, lookup_file_pos}; + + fn encode(funcs: &[(Range, &[InstructionAddressMap])]) -> Vec { + let mut builder = AddressMapSection::default(); + for (func, instrs) in funcs { + builder.push(func.clone(), instrs); + } + builder.finish() + } + + fn map(code_offset: u32, srcloc: FilePos) -> InstructionAddressMap { + InstructionAddressMap { + srcloc, + code_offset, + } + } + + #[test] + fn smoke() { + let section = encode(&[]); + assert_eq!(lookup_file_pos(§ion, 0), None); + assert_eq!(iterate_address_map(§ion).unwrap().count(), 0); + + let section = encode(&[(0..0x100, &[])]); + assert_eq!(lookup_file_pos(§ion, 0x50), None); + assert_eq!(iterate_address_map(§ion).unwrap().count(), 0); + + let section = encode(&[( + 0..0x100, + &[ + map(10, FilePos::new(100)), + map(20, FilePos::none()), + map(30, FilePos::new(90)), + ], + )]); + // pcs before the first entry have no mapping + assert_eq!(lookup_file_pos(§ion, 9), None); + // each entry covers pcs from its own offset until the next entry + assert_eq!(lookup_file_pos(§ion, 10), Some(FilePos::new(100))); + assert_eq!(lookup_file_pos(§ion, 19), Some(FilePos::new(100))); + assert_eq!(lookup_file_pos(§ion, 20), Some(FilePos::none())); + assert_eq!(lookup_file_pos(§ion, 29), Some(FilePos::none())); + assert_eq!(lookup_file_pos(§ion, 30), Some(FilePos::new(90))); + // ... with the last entry covering everything afterwards + assert_eq!(lookup_file_pos(§ion, 0x1000), Some(FilePos::new(90))); + } + + #[test] + fn many_blocks() { + // Enough entries to span multiple blocks, mixing forward and backward + // source-position movement with `FilePos::none()` entries, including + // at block boundaries. + let maps = (0..1000) + .map(|i| { + let srcloc = match i % 3 { + 0 => FilePos::none(), + 1 => FilePos::new(20_000 + i), + _ => FilePos::new(20_000 - i), + }; + map(i * 3, srcloc) + }) + .collect::>(); + let section = encode(&[(0..0x10000, &maps)]); + + let decoded = iterate_address_map(§ion).unwrap().collect::>(); + assert_eq!(decoded.len(), maps.len()); + for (map, (offset, pos)) in maps.iter().zip(&decoded) { + assert_eq!(*offset, map.code_offset); + assert_eq!(*pos, map.srcloc); + } + + // Both an entry's exact pc and a pc inside its bucket resolve to it. + for map in &maps { + let offset = usize::try_from(map.code_offset).unwrap(); + assert_eq!(lookup_file_pos(§ion, offset), Some(map.srcloc)); + assert_eq!(lookup_file_pos(§ion, offset + 1), Some(map.srcloc)); + } } } diff --git a/crates/environ/src/compile/trap_encoding.rs b/crates/environ/src/compile/trap_encoding.rs index 139c85a7eed4..5c3c6452456b 100644 --- a/crates/environ/src/compile/trap_encoding.rs +++ b/crates/environ/src/compile/trap_encoding.rs @@ -1,6 +1,8 @@ use crate::TrapInformation; +use crate::bytes::write_uleb; use crate::obj::ELF_WASMTIME_TRAPS; use crate::prelude::*; +use crate::trap_encoding::TRAP_BLOCK_SIZE; use object::write::{Object, StandardSegment}; use object::{LittleEndian, SectionKind, U32}; use std::ops::Range; @@ -10,11 +12,91 @@ use std::ops::Range; /// /// This structure is incrementally fed the results of compiling individual /// functions and handles all the encoding internally, allowing usage of -/// `lookup_trap_code` below with the resulting section. +/// `lookup_trap_code` with the resulting section. +/// +/// # Section format +/// +/// The section encodes a sequence of `(text_offset, trap_code)` entries, +/// sorted by `text_offset`, where `text_offset` is the location of a +/// trapping instruction relative to the start of the text section and +/// `trap_code` is the byte encoding of its `CompiledTrap`. This format is +/// optimized to enable cheap (O(log n)) lookup given an offset to find a trap +/// code while also being relatively compact as this is included in all modules +/// by default. To satisfy this the section is encoded as two major pieces: an +/// index and a sequence of blocks. +/// +/// The index is used to perform a binary search given a particular +/// `text_offset` to find a particular block. The index stores text offsets as +/// well as byte offsets in the "block bodies" section. Once a block is found +/// each block contains up to `TRAP_BLOCK_SIZE` entries encoded next to each +/// other. Blocks take up a variable width of bytes to encode. More information +/// on decoding each block is below, but the general layout of the section looks +/// like: +/// +/// ```text +/// ┌───────────────────────────────────┐ +/// │ entry_count: u32 │ +/// │ block_count: u32 │ +/// ├───────────────────────────────────┤ +/// │ block index │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ first_offset: u32 │ │ one pair per block, sorted by +/// │ │ data_pos: u32 │ │ `first_offset`; `data_pos` is +/// │ ├───────────────────────────────┤ │ relative to the start of the +/// │ │ ... │ │ block bodies area below +/// │ └───────────────────────────────┘ │ +/// ├───────────────────────────────────┤ +/// │ block bodies │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ default_code: u8 │ │ +/// │ ├───────────────────────────────┤ │ +/// │ │ entry: uleb token │ │ one entry per trap in the +/// │ │ [trap_code: u8] │ │ block, `TRAP_BLOCK_SIZE` max +/// │ ├───────────────────────────────┤ │ +/// │ │ ... │ │ +/// │ └───────────────────────────────┘ │ +/// │ ┌───────────────────────────────┐ │ +/// │ │ ... │ │ +/// │ └───────────────────────────────┘ │ +/// └───────────────────────────────────┘ +/// ``` +/// +/// * `entry_count` is the total number of entries (pc/trap combos) in the +/// section and `block_count` is the number of blocks, `ceil(entry_count / +/// TRAP_BLOCK_SIZE)`. +/// * In the block index, `first_offset` is the `text_offset` of the block's +/// first entry and `data_pos` is the position of the block's body, +/// relative to the start of the bodies area (i.e. the end of the index). +/// * Each block body starts with `default_code`, the block's "default" trap +/// code, chosen as the most common code among the block's entries. +/// * Each entry is a uleb-encoded token `(pc_delta << 1) | code_differs`. +/// Here `pc_delta` is this entry's `text_offset` minus the previous +/// entry's (the first entry's delta is relative to the block's +/// `first_offset` and is therefore 0). If `code_differs` is set the token +/// is followed by one byte holding this entry's trap code, otherwise the +/// entry has the block's default code. +/// +/// Lookup (`lookup_trap_code`) binary searches the fixed-width block index +/// for the last block whose `first_offset` is `<=` the pc in question, then +/// linearly decodes at most `TRAP_BLOCK_SIZE` entries of that block's body +/// looking for an exact match. +/// +/// This encoding leans on two properties of trap metadata: consecutive trap +/// sites are generally close together (pc deltas almost always fit in a +/// single-byte leb) and most entries share one trap code (typically +/// `MemoryOutOfBounds` for gc-less wasm), making explicit code bytes rare. This +/// is all in service of shrinking the minimum 5 bytes per entry (u32 offset, u8 +/// code), to a bit more than one byte per entry in practice. +/// +/// Note that at this time this section has an alignment of 1. Additionally +/// due to the 32-bit offsets in the block index this doesn't support images +/// >= 4GB. #[derive(Default)] pub struct TrapEncodingBuilder { - offsets: Vec>, - traps: Vec, + entries: usize, + block_index: Vec<[U32; 2]>, + block_bodies: Vec, + pending: Vec<(u32, u8)>, last_offset: u32, } @@ -37,22 +119,56 @@ impl TrapEncodingBuilder { let func_end = u32::try_from(func.end).unwrap(); // Sanity-check to ensure that functions are pushed in-order, otherwise - // the `offsets` array won't be sorted which is our goal. + // the encoded blocks won't be sorted which is our goal. assert!(func_start >= self.last_offset); - self.offsets.reserve(traps.len()); - self.traps.reserve(traps.len()); for info in traps { let pos = func_start + info.code_offset; assert!(pos >= self.last_offset); - self.offsets.push(U32::new(LittleEndian, pos)); - self.traps.push(info.trap_code.as_u8()); + self.pending.push((pos, info.trap_code.as_u8())); + self.entries += 1; self.last_offset = pos; + if self.pending.len() == TRAP_BLOCK_SIZE { + self.seal_block(); + } } self.last_offset = func_end; } + /// Flushes `self.pending` into one encoded block, appending to the index + /// and data arrays. + fn seal_block(&mut self) { + let first_offset = match self.pending.first() { + Some((offset, _)) => *offset, + None => return, + }; + let body_pos = u32::try_from(self.block_bodies.len()).unwrap(); + self.block_index.push([ + U32::new(LittleEndian, first_offset), + U32::new(LittleEndian, body_pos), + ]); + + // The block's default code is its most common one, making the common + // case of a run of identical codes free to encode. + let default_code = most_common_code(&self.pending); + self.block_bodies.push(default_code); + + let mut prev = first_offset; + for (pc, code) in self.pending.drain(..) { + let delta = pc - prev; + prev = pc; + let differs = code != default_code; + write_uleb( + &mut self.block_bodies, + (u64::from(delta) << 1) | u64::from(differs), + ); + if differs { + self.block_bodies.push(code); + } + } + } + /// Encodes this section into the object provided. pub fn append_to(self, obj: &mut Object) { let section = obj.add_section( @@ -61,10 +177,82 @@ impl TrapEncodingBuilder { SectionKind::ReadOnlyData, ); - // NB: this matches the encoding expected by `lookup` below. - let amt = u32::try_from(self.traps.len()).unwrap(); - obj.append_section_data(section, &amt.to_le_bytes(), 1); - obj.append_section_data(section, object::bytes_of_slice(&self.offsets), 1); - obj.append_section_data(section, &self.traps, 1); + obj.append_section_data(section, &self.finish(), 1); + } + + /// Finishes encoding and returns the raw section contents, as decoded by + /// `lookup_trap_code` and `iterate_traps`. + fn finish(mut self) -> Vec { + self.seal_block(); + let entries = u32::try_from(self.entries).unwrap(); + let num_blocks = u32::try_from(self.block_index.len()).unwrap(); + let mut ret = Vec::with_capacity(8 + self.block_index.len() * 8 + self.block_bodies.len()); + ret.extend_from_slice(&entries.to_le_bytes()); + ret.extend_from_slice(&num_blocks.to_le_bytes()); + ret.extend_from_slice(object::bytes_of_slice(&self.block_index)); + ret.extend_from_slice(&self.block_bodies); + ret + } +} + +fn most_common_code(entries: &[(u32, u8)]) -> u8 { + let mut counts = [0u16; 256]; + let mut best = entries[0].1; + for (_, code) in entries { + let count = &mut counts[usize::from(*code)]; + *count += 1; + if *count > counts[usize::from(best)] { + best = *code; + } + } + best +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Trap, iterate_traps, lookup_trap_code}; + + fn encode(funcs: &[(Range, &[TrapInformation])]) -> Vec { + let mut builder = TrapEncodingBuilder::default(); + for (func, traps) in funcs { + builder.push(func.clone(), traps); + } + builder.finish() + } + + fn info(code_offset: u32, trap: Trap) -> TrapInformation { + TrapInformation { + code_offset, + trap_code: trap.into(), + } + } + + #[test] + fn smoke() { + let section = encode(&[]); + assert_eq!(lookup_trap_code(§ion, 0), None); + assert_eq!(iterate_traps(§ion).unwrap().count(), 0); + + let section = encode(&[(0..0x100, &[])]); + assert_eq!(lookup_trap_code(§ion, 0x50), None); + assert_eq!(iterate_traps(§ion).unwrap().count(), 0); + + let section = encode(&[( + 0..0x100, + &[ + info(10, Trap::MemoryOutOfBounds), + info(20, Trap::StackOverflow), + ], + )]); + assert_eq!(lookup_trap_code(§ion, 0x50), None); + assert_eq!( + lookup_trap_code(§ion, 10), + Some(Trap::MemoryOutOfBounds.into()) + ); + assert_eq!( + lookup_trap_code(§ion, 20), + Some(Trap::StackOverflow.into()) + ); } } diff --git a/crates/environ/src/lib.rs b/crates/environ/src/lib.rs index 7153600c8df6..cb7b11bd2513 100644 --- a/crates/environ/src/lib.rs +++ b/crates/environ/src/lib.rs @@ -24,6 +24,7 @@ mod address_map; mod frame_table; #[macro_use] mod builtin; +pub mod bytes; mod demangling; mod ext; mod gc; diff --git a/crates/environ/src/obj.rs b/crates/environ/src/obj.rs index 1c3dc478e3a3..718c7873d908 100644 --- a/crates/environ/src/obj.rs +++ b/crates/environ/src/obj.rs @@ -35,29 +35,10 @@ pub const SH_WASMTIME_NOT_EXECUTED: u64 = 1 << 0; /// mapping data from offsets in the image to offset in the original wasm /// binary. /// -/// This section has a custom binary encoding. Currently its encoding is: -/// -/// * The section starts with a 32-bit little-endian integer. This integer is -/// how many entries are in the following two arrays. -/// * Next is an array with the previous count number of 32-bit little-endian -/// integers. This array is a sorted list of relative offsets within the text -/// section. This is intended to be a lookup array to perform a binary search -/// on an offset within the text section on this array. -/// * Finally there is another array, with the same count as before, also of -/// 32-bit little-endian integers. These integers map 1:1 with the previous -/// array of offsets, and correspond to what the original offset was in the -/// wasm file. -/// -/// Decoding this section is intentionally simple, it only requires loading a -/// 32-bit little-endian integer plus some bounds checks. Reading this section -/// is done with the `lookup_file_pos` function below. Reading involves -/// performing a binary search on the first array using the index found for the -/// native code offset to index into the second array and find the wasm code -/// offset. -/// -/// At this time this section has an alignment of 1, which means all reads of it -/// are unaligned. Additionally at this time the 32-bit encodings chosen here -/// mean that >=4gb text sections are not supported. +/// This section's format is defined by the documentation of the +/// `crate::compile::AddressMapSection` data structure, which builds it. It is +/// decoded by `lookup_file_pos`. Its offsets are relative to the start of the +/// text section. pub const ELF_WASMTIME_ADDRMAP: &str = ".wasmtime.addrmap"; /// A custom Wasmtime-specific section of compilation which store information @@ -75,27 +56,13 @@ pub const ELF_WASMTIME_STACK_MAP: &str = ".wasmtime.stackmap"; /// encodes the ability to map an offset in the text section to the trap code /// that it corresponds to. /// -/// This section is used at runtime to determine what flavor of trap happened to -/// ensure that embedders and debuggers know the reason for the wasm trap. The -/// encoding of this section is custom to Wasmtime and managed with helpers in -/// the `object` crate: -/// -/// * First the section has a 32-bit little endian integer indicating how many -/// trap entries are in the section. -/// * Next is an array, of the same length as read before, of 32-bit -/// little-endian integers. These integers are offsets into the text section -/// of the compilation image. -/// * Finally is the same count number of bytes. Each of these bytes corresponds -/// to a trap code. -/// -/// This section is decoded by `lookup_trap_code` below which will read the -/// section count, slice some bytes to get the various arrays, and then perform -/// a binary search on the offsets array to find the index corresponding to -/// the pc being looked up. If found the same index in the trap array (the array -/// of bytes) is the trap code for that offset. -/// -/// Note that at this time this section has an alignment of 1. Additionally due -/// to the 32-bit encodings for offsets this doesn't support images >=4gb. +/// This section is used at runtime to determine what flavor of trap happened +/// to ensure that embedders and debuggers know the reason for the wasm trap. +/// +/// This section's format is defined by the documentation of the +/// `crate::compile::TrapEncodingBuilder` data structure, which builds it. It +/// is decoded by `lookup_trap_code`. Its offsets are relative to the start of +/// the text section. pub const ELF_WASMTIME_TRAPS: &str = ".wasmtime.traps"; /// A custom binary-encoded section of the wasmtime compilation diff --git a/crates/environ/src/trap_encoding.rs b/crates/environ/src/trap_encoding.rs index 85a52828a9d2..692238a4a462 100644 --- a/crates/environ/src/trap_encoding.rs +++ b/crates/environ/src/trap_encoding.rs @@ -1,3 +1,4 @@ +use crate::bytes; use core::fmt; use object::{Bytes, LittleEndian, U32}; @@ -285,54 +286,135 @@ generate_trap_type! { impl core::error::Error for Trap {} +/// Number of trap entries packed into one block of the trap section. +/// +/// See `TrapEncodingBuilder` in `crate::compile` for the full section format. +/// Chosen as a balance between the fixed-width index overhead per block (8 +/// bytes, amortized across entries) and the amount of linear decoding required +/// to look up a single pc within a block. +pub(crate) const TRAP_BLOCK_SIZE: usize = 128; + /// Decodes the provided trap information section and attempts to find the trap /// code corresponding to the `offset` specified. /// /// The `section` provided is expected to have been built by -/// `TrapEncodingBuilder` above. Additionally the `offset` should be a relative +/// `TrapEncodingBuilder` in `crate::compile`, whose documentation describes +/// the format decoded here. Additionally the `offset` should be a relative /// offset within the text section of the compilation image. pub fn lookup_trap_code(section: &[u8], offset: usize) -> Option { - let (offsets, traps) = parse(section)?; + let section = parse(section)?; + let offset = u32::try_from(offset).ok()?; - // The `offsets` table is sorted in the trap section so perform a binary - // search of the contents of this section to find whether `offset` is an - // entry in the section. Note that this is a precise search because trap pcs + // Find the last block whose first pc is `<= offset`; only that block can + // contain `offset`. Note that this is a precise search because trap pcs // should always be precise as well as our metadata about them, which means // we expect an exact match to correspond to a trap opcode. - // - // Once an index is found within the `offsets` array then that same index is - // used to lookup from the `traps` list of bytes to get the trap code byte - // corresponding to this offset. - let offset = u32::try_from(offset).ok()?; - let index = offsets - .binary_search_by_key(&offset, |val| val.get(LittleEndian)) - .ok()?; - debug_assert!(index < traps.len()); - let byte = *traps.get(index)?; - - let trap = CompiledTrap::from_u8(byte); - debug_assert!(trap.is_some(), "missing mapping for {byte}"); - trap + let block = section + .block_index + .partition_point(|[first_offset, _]| first_offset.get(LittleEndian) <= offset) + .checked_sub(1)?; + + for (pc, byte) in section.block_entries(block)? { + if pc == offset { + let trap = CompiledTrap::from_u8(byte); + debug_assert!(trap.is_some(), "missing mapping for {byte}"); + return trap; + } + if pc > offset { + break; + } + } + None +} + +/// A parsed view of the trap section. +/// +/// The fields here correspond to the pieces of the section layout described +/// on `TrapEncodingBuilder` in `crate::compile`. +#[derive(Clone, Copy)] +struct TrapSection<'a> { + /// Total number of trap entries in this section. + entries: usize, + /// One `(first_offset, block_pos)` pair per block. + block_index: &'a [[U32; 2]], + /// Variable-length block bodies, index by `block_pos` in the `block_index` + /// table above. + block_bodies: &'a [u8], +} + +impl<'a> TrapSection<'a> { + /// Returns an iterator of `(text_offset, trap_code_byte)` for all entries + /// in `block`, or `None` if the section is malformed. + fn block_entries(&self, block_index: usize) -> Option> { + let [first_offset, block_pos] = self.block_index.get(block_index)?; + let first_offset = first_offset.get(LittleEndian); + let block_pos = block_pos.get(LittleEndian); + let mut block = self.block_bodies.get(usize::try_from(block_pos).ok()?..)?; + let default_code = bytes::pop(&mut block)?; + let remaining = core::cmp::min( + TRAP_BLOCK_SIZE, + self.entries.checked_sub(block_index * TRAP_BLOCK_SIZE)?, + ); + Some(BlockEntries { + block, + prev_offset: first_offset, + default_code, + remaining, + }) + } +} + +/// Iterator over the entries of a single block, decoding the +/// delta-and-code-flag varints described in the "block body" portion of the +/// section format on `TrapEncodingBuilder` in `crate::compile`. +struct BlockEntries<'a> { + block: &'a [u8], + prev_offset: u32, + default_code: u8, + remaining: usize, +} + +impl Iterator for BlockEntries<'_> { + type Item = (u32, u8); + + fn next(&mut self) -> Option<(u32, u8)> { + self.remaining = self.remaining.checked_sub(1)?; + let token = bytes::read_uleb(&mut self.block)?; + let delta = u32::try_from(token >> 1).ok()?; + let cur_offset = self.prev_offset.checked_add(delta)?; + self.prev_offset = cur_offset; + let code = if token & 1 != 0 { + bytes::pop(&mut self.block)? + } else { + self.default_code + }; + Some((cur_offset, code)) + } } -fn parse(section: &[u8]) -> Option<(&[U32], &[u8])> { +fn parse(section: &[u8]) -> Option> { let mut section = Bytes(section); - // NB: this matches the encoding written by `append_to` above. - let count = section.read::>().ok()?; - let count = usize::try_from(count.get(LittleEndian)).ok()?; - let (offsets, traps) = object::slice_from_bytes::>(section.0, count).ok()?; - debug_assert_eq!(traps.len(), count); - Some((offsets, traps)) + // NB: this matches the encoding written by `TrapEncodingBuilder`. + let entries = section.read::>().ok()?; + let entries = usize::try_from(entries.get(LittleEndian)).ok()?; + let num_blocks = section.read::>().ok()?; + let num_blocks = usize::try_from(num_blocks.get(LittleEndian)).ok()?; + let (block_index, block_bodies) = + object::slice_from_bytes::<[U32; 2]>(section.0, num_blocks).ok()?; + Some(TrapSection { + entries, + block_index, + block_bodies, + }) } /// Returns an iterator over all of the traps encoded in `section`, which should /// have been produced by `TrapEncodingBuilder`. pub fn iterate_traps(section: &[u8]) -> Option + '_> { - let (offsets, traps) = parse(section)?; - Some(offsets.iter().zip(traps).map(|(offset, trap)| { - ( - offset.get(LittleEndian), - CompiledTrap::from_u8(*trap).unwrap(), - ) - })) + let section = parse(section)?; + Some( + (0..section.block_index.len()) + .flat_map(move |block| section.block_entries(block).into_iter().flatten()) + .map(|(pc, byte)| (pc, CompiledTrap::from_u8(byte).unwrap())), + ) }