diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index a31492722271..e9e427c0bb07 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -319,6 +319,8 @@ wasmtime_option_group! { pub inherit_stderr: Option, /// Maximum number of frames to capture in backtraces. pub max_backtrace: Option, + /// Whether or not `*.cwasm` files have symbols in them. + pub symbols: Option, } enum Debug { @@ -974,6 +976,9 @@ impl CommonOptions { } } } + if let Some(enable) = self.debug.symbols { + config.debug_symbols(enable); + } if let Some(enable) = self.opts.memory_init_cow { config.memory_init_cow(enable); } diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 53c0c8fdcf1c..0966f7f7c401 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -694,7 +694,7 @@ impl wasmtime_environ::Compiler for Compiler { obj: &mut Object<'static>, funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, FuncKey) -> usize, - ) -> Result> { + ) -> Result, FunctionLoc)>> { log::trace!( "appending functions to object file: {:#?}", funcs.iter().map(|(sym, _, _)| sym).collect::>() @@ -878,7 +878,7 @@ impl wasmtime_environ::Compiler for Compiler { get_func: &'a dyn Fn( StaticModuleIndex, DefinedFuncIndex, - ) -> (SymbolId, &'a (dyn Any + Send + Sync)), + ) -> (Option, &'a (dyn Any + Send + Sync)), dwarf_package_bytes: Option<&'a [u8]>, tunables: &'a Tunables, ) -> Result<()> { @@ -922,7 +922,7 @@ impl wasmtime_environ::Compiler for Compiler { let section_id = *dwarf_sections_ids.get(name).unwrap(); for reloc in relocs { let target_symbol = match reloc.target { - DwarfSectionRelocTarget::Func(id) => compilation.symbol_id(id), + DwarfSectionRelocTarget::Func(id) => compilation.symbol_id(id).unwrap(), DwarfSectionRelocTarget::Section(name) => { obj.section_symbol(dwarf_sections_ids[name]) } diff --git a/crates/cranelift/src/debug.rs b/crates/cranelift/src/debug.rs index aadc36682893..e2938ada26f3 100644 --- a/crates/cranelift/src/debug.rs +++ b/crates/cranelift/src/debug.rs @@ -56,8 +56,10 @@ pub struct Compilation<'a> { /// /// This returns the `object`-based-symbol for the function as well as the /// `&CompiledFunction`. - get_func: - &'a dyn Fn(StaticModuleIndex, DefinedFuncIndex) -> (SymbolId, &'a CompiledFunctionMetadata), + get_func: &'a dyn Fn( + StaticModuleIndex, + DefinedFuncIndex, + ) -> (Option, &'a CompiledFunctionMetadata), /// Optionally-specified `*.dwp` file, currently only supported for core /// wasm modules. @@ -68,7 +70,7 @@ pub struct Compilation<'a> { /// Translation between `SymbolId` and a `usize`-based symbol which gimli /// uses. - symbol_index_to_id: Vec, + symbol_index_to_id: Vec>, symbol_id_to_index: HashMap, /// The `ModuleMemoryOffset` for each module within `translations`. @@ -84,7 +86,7 @@ impl<'a> Compilation<'a> { get_func: &'a dyn Fn( StaticModuleIndex, DefinedFuncIndex, - ) -> (SymbolId, &'a CompiledFunctionMetadata), + ) -> (Option, &'a CompiledFunctionMetadata), dwarf_package_bytes: Option<&'a [u8]>, tunables: &'a Tunables, ) -> Compilation<'a> { @@ -127,7 +129,9 @@ impl<'a> Compilation<'a> { for (module, translation) in translations { for func in translation.module.defined_func_indices() { let (sym, _func) = get_func(module, func); - symbol_id_to_index.insert(sym, (symbol_index_to_id.len(), module, func)); + if let Some(sym) = sym { + symbol_id_to_index.insert(sym, (symbol_index_to_id.len(), module, func)); + } symbol_index_to_id.push(sym); } } @@ -157,7 +161,13 @@ impl<'a> Compilation<'a> { /// function metadata that were produced during compilation. fn functions( &self, - ) -> impl Iterator + '_ { + ) -> impl Iterator< + Item = ( + StaticModuleIndex, + Option, + &'a CompiledFunctionMetadata, + ), + > + '_ { self.indexes().map(move |(module, func)| { let (sym, func) = self.function(module, func); (module, sym, func) @@ -169,14 +179,14 @@ impl<'a> Compilation<'a> { &self, module: StaticModuleIndex, func: DefinedFuncIndex, - ) -> (usize, &'a CompiledFunctionMetadata) { + ) -> (Option, &'a CompiledFunctionMetadata) { let (sym, func) = (self.get_func)(module, func); - (self.symbol_id_to_index[&sym].0, func) + (sym.map(|sym| self.symbol_id_to_index[&sym].0), func) } /// Maps a `usize`-based symbol used by gimli to the object-based /// `SymbolId`. - pub fn symbol_id(&self, sym: usize) -> SymbolId { + pub fn symbol_id(&self, sym: usize) -> Option { self.symbol_index_to_id[sym] } } diff --git a/crates/cranelift/src/debug/transform/address_transform.rs b/crates/cranelift/src/debug/transform/address_transform.rs index 7de691c7f415..c3653a1cc619 100644 --- a/crates/cranelift/src/debug/transform/address_transform.rs +++ b/crates/cranelift/src/debug/transform/address_transform.rs @@ -19,7 +19,7 @@ pub struct AddressMap { /// length, and instructions addresses. #[derive(Debug)] pub struct FunctionMap { - pub symbol: usize, + pub symbol: Option, pub offset: GeneratedAddress, pub len: GeneratedAddress, pub wasm_start: WasmAddress, @@ -514,7 +514,7 @@ impl AddressTransform { object::Endianness::Little, ); let dummy_symbol = dummy_obj.add_file_symbol(Vec::new()); - let func_lookup = move |_, f| (dummy_symbol, module_map[f]); + let func_lookup = move |_, f| (Some(dummy_symbol), module_map[f]); let tunables = wasmtime_environ::Tunables::default_host(); let compile = Compilation::new( &*cranelift_codegen::isa::lookup(target_lexicon::Triple::host()) @@ -565,10 +565,10 @@ impl AddressTransform { if addr == func.end { // Clamp last address to the end to extend translation to the end // of the function. - return Some((map.symbol, map.len)); + return Some((map.symbol?, map.len)); } let first_result = TransformRangeStartIter::new(func, addr).next(); - first_result.map(|(address, _)| (map.symbol, address)) + first_result.and_then(|(address, _)| Some((map.symbol?, address))) } else { // Address was not found: function was not compiled? None @@ -598,7 +598,7 @@ impl AddressTransform { } if let Some(func) = self.find_func(start) { let result = TransformRangeIter::new(func, start, end); - let symbol = self.map[func.index].symbol; + let symbol = self.map[func.index].symbol?; return Some((symbol, result)); } // Address was not found: function was not compiled? diff --git a/crates/cranelift/src/debug/transform/line_program.rs b/crates/cranelift/src/debug/transform/line_program.rs index 10106123ac20..9174b9fff7b1 100644 --- a/crates/cranelift/src/debug/transform/line_program.rs +++ b/crates/cranelift/src/debug/transform/line_program.rs @@ -26,7 +26,9 @@ pub(crate) fn clone_line_program( let Some(map) = addr_tr.map().get(index) else { continue; // no code generated }; - let symbol = map.symbol; + let Some(symbol) = map.symbol else { + continue; + }; let base_addr = map.offset; transform.begin_sequence(Some(write::Address::Symbol { symbol, addend: 0 })); // TODO track and place function declaration line here diff --git a/crates/cranelift/src/debug/transform/range_info_builder.rs b/crates/cranelift/src/debug/transform/range_info_builder.rs index 2799123bf5cd..7a4986f1edac 100644 --- a/crates/cranelift/src/debug/transform/range_info_builder.rs +++ b/crates/cranelift/src/debug/transform/range_info_builder.rs @@ -149,7 +149,9 @@ impl RangeInfoBuilder { } } RangeInfoBuilder::Function(index) => { - let symbol = addr_tr.map()[*index].symbol; + let Some(symbol) = addr_tr.map()[*index].symbol else { + return; + }; let range = addr_tr.func_range(*index); let addr = write::Address::Symbol { symbol, diff --git a/crates/cranelift/src/debug/transform/simulate.rs b/crates/cranelift/src/debug/transform/simulate.rs index 24f6d6c7c8a0..dd39c2f54398 100644 --- a/crates/cranelift/src/debug/transform/simulate.rs +++ b/crates/cranelift/src/debug/transform/simulate.rs @@ -60,10 +60,11 @@ fn generate_line_info( let maps = addr_tr.iter().flat_map(|(_, transform)| { transform.map().iter().filter_map(|(_, map)| { - if translated.contains(&map.symbol) { + let sym = map.symbol?; + if translated.contains(&sym) { None } else { - Some((map.symbol, map)) + Some((sym, map)) } }) }); @@ -357,7 +358,9 @@ pub fn generate_simulated_dwarf( let wasm_types = add_wasm_types(unit, root_id, out_strings); let mut unit_ranges = vec![]; for (module, index) in compilation.indexes().collect::>() { - let (symbol, _) = compilation.function(module, index); + let (Some(symbol), _) = compilation.function(module, index) else { + continue; + }; if translated.contains(&symbol) { continue; } diff --git a/crates/cranelift/src/debug/transform/unit.rs b/crates/cranelift/src/debug/transform/unit.rs index 726478ce7cb3..5e0ab805fb52 100644 --- a/crates/cranelift/src/debug/transform/unit.rs +++ b/crates/cranelift/src/debug/transform/unit.rs @@ -344,9 +344,11 @@ pub(crate) fn clone_unit<'a>( let frame_info = compilation.function_frame_info(module, func); current_value_range.push(new_stack_len, frame_info); let (symbol, _) = compilation.function(module, func); - translated.insert(symbol); - current_scope_ranges.push(new_stack_len, range_builder.get_ranges(addr_tr)); - Some(range_builder) + symbol.map(|symbol| { + translated.insert(symbol); + current_scope_ranges.push(new_stack_len, range_builder.get_ranges(addr_tr)); + range_builder + }) } else { // FIXME current_scope_ranges.push() None diff --git a/crates/cranelift/src/debug/write_debuginfo.rs b/crates/cranelift/src/debug/write_debuginfo.rs index 2123f5c159ef..4c90cbcc8da8 100644 --- a/crates/cranelift/src/debug/write_debuginfo.rs +++ b/crates/cranelift/src/debug/write_debuginfo.rs @@ -146,6 +146,9 @@ fn create_frame_table( let cie_id = table.add_cie(isa.create_systemv_cie()?); for (_, symbol, metadata) in compilation.functions() { + let Some(symbol) = symbol else { + continue; + }; // The CFA-based unwind info will either be natively present, or we // have generated it and placed into the "cfa_unwind_info" auxiliary // field. We shouldn't emit both, though, it'd be wasteful. diff --git a/crates/cranelift/src/obj.rs b/crates/cranelift/src/obj.rs index 64104c072cbe..bbb7f7235554 100644 --- a/crates/cranelift/src/obj.rs +++ b/crates/cranelift/src/obj.rs @@ -13,7 +13,7 @@ //! function body, the imported wasm function do not. The trampolines symbol //! names have format "_trampoline_N", where N is `SignatureIndex`. -use crate::CompiledFunction; +use crate::{CompiledFunction, Compiler}; use cranelift_codegen::TextSectionBuilder; use cranelift_codegen::isa::unwind::{UnwindInfo, systemv}; use cranelift_control::ControlPlane; @@ -23,12 +23,11 @@ use object::write::{Object, SectionId, StandardSegment, Symbol, SymbolId, Symbol use object::{Architecture, SectionFlags, SectionKind, SymbolFlags, SymbolKind, SymbolScope}; use std::ops::Range; use wasmtime_environ::error::Result; -use wasmtime_environ::{Compiler, TripleExt}; -use wasmtime_environ::{FuncKey, obj}; +use wasmtime_environ::{Compiler as _, FuncKey, TripleExt, obj}; const TEXT_SECTION_NAME: &[u8] = b".text"; -fn text_align(compiler: &dyn Compiler) -> u64 { +fn text_align(compiler: &Compiler) -> u64 { // text pages will not be made executable with pulley, so the section // doesn't need to be padded out to page alignment boundaries. if compiler.triple().is_pulley() { @@ -47,7 +46,7 @@ fn text_align(compiler: &dyn Compiler) -> u64 { pub struct ModuleTextBuilder<'a> { /// The target that we're compiling for, used to query target-specific /// information as necessary. - compiler: &'a dyn Compiler, + compiler: &'a Compiler, /// The object file that we're generating code into. obj: &'a mut Object<'static>, @@ -73,7 +72,7 @@ impl<'a> ModuleTextBuilder<'a> { /// be called. The `finish` function will panic if this contract is not met. pub fn new( obj: &'a mut Object<'static>, - compiler: &'a dyn Compiler, + compiler: &'a Compiler, text: Box, ) -> Self { // Entire code (functions and trampolines) will be placed @@ -121,7 +120,7 @@ impl<'a> ModuleTextBuilder<'a> { name: &str, compiled_func: &'a CompiledFunction, resolve_reloc_target: impl Fn(wasmtime_environ::FuncKey) -> usize, - ) -> (SymbolId, Range) { + ) -> (Option, Range) { let body = compiled_func.buffer.data(); let alignment = compiled_func.alignment; let body_len = body.len() as u64; @@ -129,16 +128,20 @@ impl<'a> ModuleTextBuilder<'a> { .text .append(true, &body, alignment, &mut self.ctrl_plane); - let symbol_id = self.obj.add_symbol(Symbol { - name: name.as_bytes().to_vec(), - value: off, - size: body_len, - kind: SymbolKind::Text, - scope: SymbolScope::Compilation, - weak: false, - section: SymbolSection::Section(self.text_section), - flags: SymbolFlags::None, - }); + let symbol_id = if self.compiler.tunables().debug_symbols { + Some(self.obj.add_symbol(Symbol { + name: name.as_bytes().to_vec(), + value: off, + size: body_len, + kind: SymbolKind::Text, + scope: SymbolScope::Compilation, + weak: false, + section: SymbolSection::Section(self.text_section), + flags: SymbolFlags::None, + })) + } else { + None + }; if let Some(info) = compiled_func.unwind_info() { self.unwind_info.push(off, body_len, info); @@ -375,12 +378,7 @@ impl<'a> UnwindInfoBuilder<'a> { /// section immediately. /// /// The `text_section`'s section identifier is passed into this function. - fn append_section( - &self, - compiler: &dyn Compiler, - obj: &mut Object<'_>, - text_section: SectionId, - ) { + fn append_section(&self, compiler: &Compiler, obj: &mut Object<'_>, text_section: SectionId) { // This write will align the text section to a page boundary and then // return the offset at that point. This gives us the full size of the // text section at that point, after alignment. @@ -513,7 +511,7 @@ impl<'a> UnwindInfoBuilder<'a> { /// bits. fn write_systemv_unwind_info( &self, - compiler: &dyn Compiler, + compiler: &Compiler, obj: &mut Object<'_>, section_id: SectionId, text_section_size: u64, diff --git a/crates/environ/src/compile/mod.rs b/crates/environ/src/compile/mod.rs index 4dc2fa6d01c3..d4f005d2daea 100644 --- a/crates/environ/src/compile/mod.rs +++ b/crates/environ/src/compile/mod.rs @@ -308,7 +308,7 @@ pub trait Compiler: Send + Sync { obj: &mut Object<'static>, funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, FuncKey) -> usize, - ) -> Result>; + ) -> Result, FunctionLoc)>>; /// Creates a new `Object` file which is used to build the results of a /// compilation into. @@ -417,7 +417,7 @@ pub trait Compiler: Send + Sync { get_func: &'a dyn Fn( StaticModuleIndex, DefinedFuncIndex, - ) -> (SymbolId, &'a (dyn Any + Send + Sync)), + ) -> (Option, &'a (dyn Any + Send + Sync)), dwarf_package_bytes: Option<&'a [u8]>, tunables: &'a Tunables, ) -> Result<()>; diff --git a/crates/environ/src/tunables.rs b/crates/environ/src/tunables.rs index 76123c17d1ec..d67f59fbccbe 100644 --- a/crates/environ/src/tunables.rs +++ b/crates/environ/src/tunables.rs @@ -82,6 +82,10 @@ define_tunables! { /// the guest. pub debug_guest: bool, + /// Whether we are enabling native symbols to get inserted into the + /// final `*.cwasm`. + pub debug_symbols: bool, + /// Whether or not to retain DWARF sections in compiled modules. pub parse_wasm_debuginfo: bool, @@ -278,6 +282,7 @@ impl Tunables { metadata_for_internal_asserts: false, metadata_for_gc_heap_corruption: true, branch_hinting: false, + debug_symbols: true, } } diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index e61fac35b9cb..d91715640c86 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -303,6 +303,7 @@ impl Config { cfg.codegen.parallel_compilation = Some(false); cfg.debug.address_map = Some(self.wasmtime.generate_address_map); + cfg.debug.symbols = Some(self.wasmtime.debug_symbols); cfg.opts.opt_level = Some(self.wasmtime.opt_level.to_wasmtime()); cfg.opts.regalloc_algorithm = Some(self.wasmtime.regalloc_algorithm.to_wasmtime()); cfg.opts.signals_based_traps = Some(self.wasmtime.signals_based_traps); @@ -584,6 +585,7 @@ pub struct WasmtimeConfig { opt_level: OptLevel, regalloc_algorithm: RegallocAlgorithm, debug_info: bool, + debug_symbols: bool, canonicalize_nans: bool, interruptible: bool, pub(crate) consume_fuel: bool, @@ -879,6 +881,10 @@ impl WasmtimeConfig { // must not conflict. Set it to None so the default matches. mcfg.gc_heap_may_move = None; } + + if !self.debug_symbols { + self.debug_info = false; + } } } diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 0eb4d2e16a76..fd4c838ce15e 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -439,6 +439,29 @@ impl Config { self } + /// Whether or not symbols are located in generated compiled module + /// artifacts. + /// + /// Wasmtime's currently representation of compiled artifacts is an ELF + /// file. ELF files have symbol tables and such and this option enables + /// whether symbols are emitted for wasm functions. This utility can be + /// useful when profiling wasm modules (many profilers work with + /// ELF-in-memory by default without futher configuration), introspection of + /// a `*.cwasm` (e.g. the symbol table is what `wasmtime objdump` reads), or + /// just general binary analysis of the result ELF file. Large wasm modules + /// can have large symbol tables, however, and the symbols serve no purpose + /// at runtime meaning that they are pure overhead for minimal module as + /// well. This option can be used to disable these symbols which will reduce + /// the debuggability of modules but will also reduce their size. + /// + /// This option is `true` by default. + /// + /// This option is required if [`Config::debug_info`] is enabled. + pub fn debug_symbols(&mut self, enable: bool) -> &mut Self { + self.tunables.debug_symbols = Some(enable); + self + } + /// Configures whether compiled guest code will be instrumented to /// provide debugging at the Wasm VM level. /// @@ -2724,6 +2747,10 @@ impl Config { } } + if tunables.debug_native && !tunables.debug_symbols { + bail!("cannot enable native debug info while debug symbols are disabled"); + } + Ok((tunables, features)) } diff --git a/crates/wasmtime/src/engine/serialization.rs b/crates/wasmtime/src/engine/serialization.rs index 3eeb11c7e2f1..ceb9b154a0a2 100644 --- a/crates/wasmtime/src/engine/serialization.rs +++ b/crates/wasmtime/src/engine/serialization.rs @@ -298,6 +298,7 @@ impl Metadata<'_> { memory_guard_size, debug_native, debug_guest, + debug_symbols, parse_wasm_debuginfo, consume_fuel, ref operator_cost, @@ -365,6 +366,7 @@ impl Metadata<'_> { "native debug information support", )?; Self::check_bool(debug_guest, other.debug_guest, "guest debug")?; + Self::check_bool(debug_symbols, other.debug_symbols, "debug symbols")?; Self::check_bool( parse_wasm_debuginfo, other.parse_wasm_debuginfo, diff --git a/crates/winch/src/compiler.rs b/crates/winch/src/compiler.rs index ad34b8e2a9f5..08dae41dcb3b 100644 --- a/crates/winch/src/compiler.rs +++ b/crates/winch/src/compiler.rs @@ -182,7 +182,7 @@ impl wasmtime_environ::Compiler for Compiler { obj: &mut Object<'static>, funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, wasmtime_environ::FuncKey) -> usize, - ) -> Result> { + ) -> Result, FunctionLoc)>> { self.trampolines.append_code(obj, funcs, resolve_reloc) } @@ -213,7 +213,7 @@ impl wasmtime_environ::Compiler for Compiler { _get_func: &'a dyn Fn( StaticModuleIndex, DefinedFuncIndex, - ) -> (SymbolId, &'a (dyn Any + Send + Sync)), + ) -> (Option, &'a (dyn Any + Send + Sync)), _dwarf_package_bytes: Option<&'a [u8]>, _tunables: &'a Tunables, ) -> Result<()> { @@ -287,7 +287,7 @@ impl wasmtime_environ::Compiler for NoInlineCompiler { obj: &mut Object<'static>, funcs: &[(String, FuncKey, Box)], resolve_reloc: &dyn Fn(usize, FuncKey) -> usize, - ) -> Result> { + ) -> Result, FunctionLoc)>> { self.0.append_code(obj, funcs, resolve_reloc) } @@ -318,7 +318,7 @@ impl wasmtime_environ::Compiler for NoInlineCompiler { get_func: &'a dyn Fn( StaticModuleIndex, DefinedFuncIndex, - ) -> (SymbolId, &'a (dyn Any + Send + Sync)), + ) -> (Option, &'a (dyn Any + Send + Sync)), dwarf_package_bytes: Option<&'a [u8]>, tunables: &'a Tunables, ) -> Result<()> { diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 06f6253d3518..20369f903b8d 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -33,6 +33,7 @@ - [Profiling with `samply`](./examples-profiling-samply.md) - [Cross-platform Profiling](./examples-profiling-guest.md) - [Building a Minimal Embedding](./examples-minimal.md) + - [Building a Minimal `*.cwasm`](./examples-minimal-cwasm.md) - [Portable Interpretation](./examples-pulley.md) - [Pre-Compiling Wasm](./examples-pre-compiling-wasm.md) - [Fast Execution](./examples-fast-execution.md) diff --git a/docs/examples-minimal-cwasm.md b/docs/examples-minimal-cwasm.md new file mode 100644 index 000000000000..316c46448c67 --- /dev/null +++ b/docs/examples-minimal-cwasm.md @@ -0,0 +1,173 @@ +# Building a minimal `*.cwasm` + +In addition to building a [minimal embedding] embedders may also want to +minimize the size of their `*.cwasm` they're compiling as well. These size of a +`*.cwasm` affects the in-memory size of a compiled wasm module on a device, for +example, and thus minimizing that can lead to freeing up resources to use +elsewhere. + +As with building a [minimal embedding] wasmtime is not optimized for this use +case, so some knobs will need to be turned to enable this. The first step to +building a minimal `*.cwasm` is building a minimal wasm itself. To that extent +many of the instructions on [minimal embedding] about recompiling code with +smaller options apply here too. This example will walk through compiling a Rust +"hello world" program and optimizing the size of the output `*.cwasm`. + +The source code we have here is: + +```rust +fn main() { + println!("Hello, world!"); +} +``` + +The defaults are: + +```bash +$ rustc foo.rs --target wasm32-wasip2 +$ wasmtime compile foo.wasm +$ ls -lh foo.wasm foo.cwasm +-rw-rw-r-- 1 alex alex 284K Jun 12 16:58 foo.cwasm +-rw-rw-r-- 1 alex alex 2.5M Jun 12 16:58 foo.wasm +``` + +While this might look like Wasmtime is a super awesome optimizing compiler here +what's actually happening is that the Rust compiler is preserving DWARF debug +info by default, but Wasmtime strips guest-DWARF information by default. The +first step to minimizing a wasm is to strip out unnecessary custom sections like +this: + +```bash +$ rustc foo.rs --target wasm32-wasip2 +$ wasm-tools strip foo.wasm -o foo.wasm +$ wasmtime compile foo.wasm +$ ls -lh foo.wasm foo.cwasm +-rw-rw-r-- 1 alex alex 284K Jun 12 16:59 foo.cwasm +-rw-rw-r-- 1 alex alex 78K Jun 12 16:59 foo.wasm +``` + +Here we can see a not-so-awesome property, which is that compiled `*.cwasm` +files are often larger than their corresponding `*.wasm` file. This is expected +and generally always going to be the case. First though let's apply many +learnings from a [minimal embedding] to shrink the size of this wasm module. +Note that here we're compiling with rustc manually, but for Cargo or other +projects it'll look similar. + +```bash +$ rustc foo.rs --target wasm32-wasip2 -Copt-level=s -Clto -Ccodegen-units=1 -Cdstrip=debuginfo +$ wasmtime compile foo.wasm +$ ls -lh foo.wasm foo.cwasm +-rw-rw-r-- 1 alex alex 219K Jun 12 17:02 foo.cwasm +-rw-rw-r-- 1 alex alex 64K Jun 12 17:02 foo.wasm +``` + +Optimizations, LTO, reducing codegen units, etc, all reduce the size of this +input `*.wasm` file by ~14k in this case. This additionally reflects a general +trend where `*.cwasm` is proportional to the size of the input `*.wasm`, so it +shrunk appropriately as well. At this point we'll assume that the input `*.wasm` +is as small as can be and shift to Wasmtime-specific optimizations. + +Like with the documentation of a [minimal embedding] the trend here is that by +removing features of Wasmtime or the compiled artifact you'll be able to shrink +the output. First what we can do is to disable Wasmtime's "address maps": + +```bash +$ rustc foo.rs --target wasm32-wasip2 -Copt-level=s -Clto -Ccodegen-units=1 -Cdstrip=debuginfo +$ wasmtime compile foo.wasm -Daddress-map=n +$ ls -lh foo.wasm foo.cwasm +-rw-rw-r-- 1 alex alex 167K Jun 12 17:04 foo.cwasm +-rw-rw-r-- 1 alex alex 64K Jun 12 17:04 foo.wasm +``` + +Address maps are used by Wasmtime to generate a backtrace that refers to +WebAssembly program counters information in the output file. These counters can +be coupled with in-WebAssembly DWARF to generate backtraces with filenames and +line numbers pointing back to the source. For this use case though this can all +be safely stripped out as we won't be using it. + +The next optimization is to disable debug symbols in Wasmtime: + +```bash +$ rustc foo.rs --target wasm32-wasip2 -Copt-level=s -Clto -Ccodegen-units=1 -Cdstrip=debuginfo +$ wasmtime compile foo.wasm -Daddress-map=n -Dsymbols=n +$ ls -lh foo.wasm foo.cwasm +-rw-rw-r-- 1 alex alex 143K Jun 12 17:06 foo.cwasm +-rw-rw-r-- 1 alex alex 64K Jun 12 17:06 foo.wasm +``` + +Wasmtime's `*.cwasm` artifacts are ELF files and by default they contain ELF +symbol information for debugging. This can help with native profilers and +`wasmtime objdump` for example, but this information isn't needed at runtime and +is safe to remove. + +The final optimization is noticing that the original wasm's `name` custom +section is actually still present. This section generally survives stripping +because of how useful it is for debugging, but for the absolutely minimal size +it can be stripped away: + +```bash +$ rustc foo.rs --target wasm32-wasip2 -Copt-level=s -Clto -Ccodegen-units=1 -Cdstrip=debuginfo +$ wasm-tools strip -a foo.wasm -o foo.wasm +$ wasmtime compile foo.wasm -Daddress-map=n -Dsymbols=n +$ ls -lh foo.wasm foo.cwasm +-rw-rw-r-- 1 alex alex 132K Jun 12 17:08 foo.cwasm +-rw-rw-r-- 1 alex alex 50K Jun 12 17:08 foo.wasm +``` + +The final output has virtually no debugging information in it for when anything +goes wrong, so all you'll get are function indices and not much else. + +The next optimization is when runtime performance of this module will start +being affected. Wasmtime precomputes an image of linear memory for +initialization and page-aligns it, but this page-alignment and precomputation +can add fair amount of empty space in the output file. This can be disabled to +avoid CoW initialization and instead manually initialize all linear memories: + +```bash +$ rustc foo.rs --target wasm32-wasip2 -Copt-level=s -Clto -Ccodegen-units=1 -Cdstrip=debuginfo +$ wasm-tools strip -a foo.wasm -o foo.wasm +$ wasmtime compile foo.wasm -Daddress-map=n -Dsymbols=n -Omemory-init-cow=n +$ ls -lh foo.wasm foo.cwasm +-rw-rw-r-- 1 alex alex 129K Jun 12 17:11 foo.cwasm +-rw-rw-r-- 1 alex alex 50K Jun 12 17:11 foo.wasm +``` + +The final optimization is that Wasmtime's interpreter, Pulley, can sometimes +have smaller output than native machine output. This is another hit on runtime +performance, but for the sake of example: + +```bash +$ rustc foo.rs --target wasm32-wasip2 -Copt-level=s -Clto -Ccodegen-units=1 -Cdstrip=debuginfo +$ wasm-tools strip -a foo.wasm -o foo.wasm +$ wasmtime compile foo.wasm -Daddress-map=n -Dsymbols=n -Omemory-init-cow=n --target pulley64 +$ ls -lh foo.wasm foo.cwasm +-rw-rw-r-- 1 alex alex 90K Jun 12 17:12 foo.cwasm +-rw-rw-r-- 1 alex alex 50K Jun 12 17:12 foo.wasm +``` + +At this time this is the smallest binary that Wasmtime can generate. If this is +not small enough for you please feel free to file an issue and Wasmtime +maintainers can help debug if there's any more low-hanging fruit to pick. + +## Minimizing `*.cwasm`: Summary + +The steps you'll want to use when minimizing `*.cwasm` size are: + +* Minimize the size of the input `*.wasm`. + * Compile with optimizations. + * Strip debug info. + * Strip the `name` section. + * Apply language-specific optimizations like LTO, codegen units, rebuilding + Rust's libstd, etc. +* Pass `-Daddress-map=n` to disable the ability to generate backtraces with wasm + pc's in the backtrace. +* Pass `-Dsymbols=n` to diasble ELF symbols used for debugging/profiling in the + output artifact. +* Pass `-Omemory-init-cow=n` to disable page-aligned data sections and + precomputation of a memory image that may have holes in it. +* Pass `--target pulley64` to leverage "macro opcodes" in Pulley to compress + instructions a bit further. + +And, failing that, feel free to file an issue! + +[minimal embedding]: ./examples-minimal.md