From 8819cea6df80b19a80adc6a9564cf75b7aa91bf0 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 00:56:03 +1000 Subject: [PATCH 01/11] replace wasm-gen with wasm-encoder and add spec filtering --- Cargo.lock | 54 +- cmd/crates/soroban-spec-tools/Cargo.toml | 1 + cmd/crates/soroban-spec-tools/src/contract.rs | 247 ++++++++++ cmd/crates/soroban-spec-tools/src/filter.rs | 461 ++++++++++++++++++ cmd/crates/soroban-spec-tools/src/lib.rs | 1 + cmd/soroban-cli/Cargo.toml | 2 +- .../src/commands/contract/build.rs | 76 ++- 7 files changed, 813 insertions(+), 29 deletions(-) create mode 100644 cmd/crates/soroban-spec-tools/src/filter.rs diff --git a/Cargo.lock b/Cargo.lock index a02cf20c8f..ab93ec0e89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -827,12 +827,6 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" -[[package]] -name = "byteorder" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855" - [[package]] name = "byteorder" version = "1.5.0" @@ -2938,7 +2932,7 @@ version = "3.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eebcc3aff044e5944a8fbaf69eb277d11986064cba30c468730e8b9909fb551c" dependencies = [ - "byteorder 1.5.0", + "byteorder", "dbus-secret-service", "log", "secret-service", @@ -2995,10 +2989,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] -name = "leb128" -version = "0.2.5" +name = "leb128fmt" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "ledger-apdu" @@ -3027,7 +3021,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45ba81a1f5f24396b37211478aff7fbcd605dd4544df8dbed07b9da3c2057aee" dependencies = [ - "byteorder 1.5.0", + "byteorder", "cfg-if", "hex", "hidapi", @@ -3858,7 +3852,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.106", @@ -4995,9 +4989,9 @@ dependencies = [ "ulid", "url", "walkdir", - "wasm-gen", + "wasm-encoder", "wasm-opt", - "wasmparser", + "wasmparser 0.116.1", "which", "whoami", "zeroize", @@ -5019,7 +5013,7 @@ dependencies = [ "soroban-wasmi", "static_assertions", "stellar-xdr", - "wasmparser", + "wasmparser 0.116.1", ] [[package]] @@ -5065,7 +5059,7 @@ dependencies = [ "soroban-wasmi", "static_assertions", "stellar-strkey 0.0.13", - "wasmparser", + "wasmparser 0.116.1", ] [[package]] @@ -5153,7 +5147,7 @@ dependencies = [ "base64 0.22.1", "stellar-xdr", "thiserror 1.0.69", - "wasmparser", + "wasmparser 0.116.1", ] [[package]] @@ -5200,7 +5194,8 @@ dependencies = [ "stellar-xdr", "thiserror 1.0.69", "tokio", - "wasmparser", + "wasm-encoder", + "wasmparser 0.116.1", "which", ] @@ -5351,7 +5346,7 @@ version = "23.4.1" dependencies = [ "async-trait", "bollard", - "byteorder 1.5.0", + "byteorder", "ed25519-dalek", "env_logger", "hex", @@ -6529,13 +6524,13 @@ dependencies = [ ] [[package]] -name = "wasm-gen" -version = "0.1.4" +name = "wasm-encoder" +version = "0.235.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b854b1461005a7b3365742310f7faa3cac3add809d66928c64a40c7e9e842ebb" +checksum = "b3bc393c395cb621367ff02d854179882b9a351b4e0c93d1397e6090b53a5c2a" dependencies = [ - "byteorder 0.5.3", - "leb128", + "leb128fmt", + "wasmparser 0.235.0", ] [[package]] @@ -6619,6 +6614,17 @@ dependencies = [ "semver", ] +[[package]] +name = "wasmparser" +version = "0.235.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "161296c618fa2d63f6ed5fffd1112937e803cb9ec71b32b01a76321555660917" +dependencies = [ + "bitflags", + "indexmap 2.11.0", + "semver", +] + [[package]] name = "wasmparser-nostd" version = "0.100.2" diff --git a/cmd/crates/soroban-spec-tools/Cargo.toml b/cmd/crates/soroban-spec-tools/Cargo.toml index a17a5d31f1..d2b719eaec 100644 --- a/cmd/crates/soroban-spec-tools/Cargo.toml +++ b/cmd/crates/soroban-spec-tools/Cargo.toml @@ -27,6 +27,7 @@ hex = { workspace = true } wasmparser = { workspace = true } base64 = { workspace = true } thiserror = "1.0.31" +wasm-encoder = "0.235.0" [dev-dependencies] diff --git a/cmd/crates/soroban-spec-tools/src/contract.rs b/cmd/crates/soroban-spec-tools/src/contract.rs index 0f6bef7a04..753a998bb1 100644 --- a/cmd/crates/soroban-spec-tools/src/contract.rs +++ b/cmd/crates/soroban-spec-tools/src/contract.rs @@ -119,6 +119,87 @@ impl Spec { ScSpecEntry::read_xdr_iter(&mut read).collect::, xdr::Error>>()?, )) } + + /// Returns a filtered version of the spec with unused types removed. + /// + /// This removes any type definitions that are not referenced (directly or + /// transitively) by any function in the contract. Functions and events are + /// always preserved. + #[must_use] + pub fn filter_unused_types(&self) -> Vec { + crate::filter::filter_unused_types(self.spec.clone()) + } + + /// Returns the filtered spec entries serialized as XDR bytes. + /// + /// This is useful for replacing the contractspecv0 custom section in a WASM + /// file with a smaller version that only contains used types. + pub fn filtered_spec_xdr(&self) -> Result, Error> { + let filtered = self.filter_unused_types(); + let mut buffer = Vec::new(); + let mut writer = Limited::new(Cursor::new(&mut buffer), Limits::none()); + for entry in filtered { + entry.write_xdr(&mut writer)?; + } + Ok(buffer) + } +} + +/// Replaces a custom section in WASM bytes with new content. +/// +/// This function parses the WASM to find the target custom section, then rebuilds +/// the WASM by copying all other sections verbatim and appending the new custom +/// section at the end. +/// +/// # Arguments +/// +/// * `wasm_bytes` - The original WASM binary +/// * `section_name` - The name of the custom section to replace +/// * `new_content` - The new content for the custom section +/// +/// # Returns +/// +/// A new WASM binary with the custom section replaced. +pub fn replace_custom_section( + wasm_bytes: &[u8], + section_name: &str, + new_content: &[u8], +) -> Result, Error> { + use wasm_encoder::{CustomSection, Module, RawSection}; + use wasmparser::Payload; + + let mut module = Module::new(); + + let parser = wasmparser::Parser::new(0); + for payload in parser.parse_all(wasm_bytes) { + let payload = payload?; + + match &payload { + // Skip the target custom section - we'll append the new one at the end + Payload::CustomSection(section) if section.name() == section_name => { + continue; + } + // For all other payloads that represent sections, copy them verbatim + _ => { + if let Some((id, range)) = payload.as_section() { + let raw = RawSection { + id, + data: &wasm_bytes[range], + }; + module.section(&raw); + } + } + } + } + + // Append the new custom section + let custom = CustomSection { + name: section_name.into(), + data: new_content.into(), + }; + module.section(&custom); + + Ok(module.finish()) } impl Display for Spec { @@ -296,3 +377,169 @@ fn format_name(lib: &StringM<80>, name: &StringM<60>) -> String { ) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_filter_spec_on_empty_contract() { + // This test checks that filtering a contract with events but no UDT references + // keeps the events (as per design) but would filter any unused UDTs + + // Skip if the file doesn't exist (it's in a different repo) + let wasm_path = "/Users/leighmcculloch/Code/rs-soroban-sdk/tests/empty/out/test_empty.wasm"; + if !std::path::Path::new(wasm_path).exists() { + return; + } + + let wasm_bytes = std::fs::read(wasm_path).unwrap(); + let spec = Spec::new(&wasm_bytes).unwrap(); + + println!("Original spec entries: {}", spec.spec.len()); + for entry in &spec.spec { + match entry { + ScSpecEntry::FunctionV0(f) => { + println!(" Function: {}", f.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtStructV0(s) => { + println!(" Struct: {}", s.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtUnionV0(u) => { + println!(" Union: {}", u.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtEnumV0(e) => { + println!(" Enum: {}", e.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtErrorEnumV0(e) => { + println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()); + } + ScSpecEntry::EventV0(e) => { + println!(" Event: {}", e.name.to_utf8_string_lossy()); + } + } + } + + let filtered = spec.filter_unused_types(); + println!("\nFiltered spec entries: {}", filtered.len()); + for entry in &filtered { + match entry { + ScSpecEntry::FunctionV0(f) => { + println!(" Function: {}", f.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtStructV0(s) => { + println!(" Struct: {}", s.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtUnionV0(u) => { + println!(" Union: {}", u.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtEnumV0(e) => { + println!(" Enum: {}", e.name.to_utf8_string_lossy()); + } + ScSpecEntry::UdtErrorEnumV0(e) => { + println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()); + } + ScSpecEntry::EventV0(e) => { + println!(" Event: {}", e.name.to_utf8_string_lossy()); + } + } + } + + // The function should still be there + assert!(filtered + .iter() + .any(|e| matches!(e, ScSpecEntry::FunctionV0(_)))); + + // Events should be preserved + let event_count = filtered + .iter() + .filter(|e| matches!(e, ScSpecEntry::EventV0(_))) + .count(); + assert!(event_count > 0, "Events should be preserved"); + } + + #[test] + fn test_filter_on_custom_types_contract() { + // Test filtering on the custom_types contract wasm + let wasm_path = "/Users/leighmcculloch/Code/stellar-cli-spec-clean/target/wasm32v1-none/release/test_custom_types.wasm"; + if !std::path::Path::new(wasm_path).exists() { + eprintln!("Skipping test: wasm file not found at {}", wasm_path); + return; + } + + let wasm_bytes = std::fs::read(wasm_path).unwrap(); + let spec = Spec::new(&wasm_bytes).unwrap(); + + println!("\n=== CUSTOM TYPES CONTRACT ==="); + println!("Original spec entries: {}", spec.spec.len()); + + // Count functions and UDTs + let func_count = spec + .spec + .iter() + .filter(|e| matches!(e, ScSpecEntry::FunctionV0(_))) + .count(); + let udt_count = spec.spec.len() - func_count; + + println!("Functions: {}", func_count); + println!("UDTs: {}", udt_count); + + // List all UDTs before filtering + for entry in &spec.spec { + match entry { + ScSpecEntry::UdtStructV0(s) => { + println!(" Struct: {}", s.name.to_utf8_string_lossy()) + } + ScSpecEntry::UdtUnionV0(u) => { + println!(" Union: {}", u.name.to_utf8_string_lossy()) + } + ScSpecEntry::UdtEnumV0(e) => { + println!(" Enum: {}", e.name.to_utf8_string_lossy()) + } + ScSpecEntry::UdtErrorEnumV0(e) => { + println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()) + } + ScSpecEntry::EventV0(e) => println!(" Event: {}", e.name.to_utf8_string_lossy()), + _ => {} + } + } + + let filtered = spec.filter_unused_types(); + println!("\nFiltered spec entries: {}", filtered.len()); + + // All types in this contract are used by functions, so nothing should be filtered + // Verify key types are preserved: + // - Test (used by strukt, strukt_hel, and transitively by ComplexEnum, TupleStruct) + // - SimpleEnum (used by simple, and transitively by ComplexEnum, TupleStruct) + // - RoyalCard (used by card) + // - ComplexEnum (used by complex) + // - TupleStruct (used by tuple_strukt) + // - RecursiveEnum (used by recursive_enum) + + let has_test = filtered.iter().any(|entry| { + matches!(entry, ScSpecEntry::UdtStructV0(s) if s.name.to_utf8_string_lossy() == "Test") + }); + let has_simple_enum = filtered.iter().any(|entry| { + matches!(entry, ScSpecEntry::UdtUnionV0(u) if u.name.to_utf8_string_lossy() == "SimpleEnum") + }); + let has_complex_enum = filtered.iter().any(|entry| { + matches!(entry, ScSpecEntry::UdtUnionV0(u) if u.name.to_utf8_string_lossy() == "ComplexEnum") + }); + + assert!(has_test, "Test struct should be preserved"); + assert!(has_simple_enum, "SimpleEnum should be preserved"); + assert!(has_complex_enum, "ComplexEnum should be preserved"); + + // All functions should be preserved + let filtered_func_count = filtered + .iter() + .filter(|e| matches!(e, ScSpecEntry::FunctionV0(_))) + .count(); + assert_eq!( + filtered_func_count, func_count, + "All functions should be preserved" + ); + + println!("Filter test passed: all used types and functions preserved"); + } +} diff --git a/cmd/crates/soroban-spec-tools/src/filter.rs b/cmd/crates/soroban-spec-tools/src/filter.rs new file mode 100644 index 0000000000..4bdc192a0d --- /dev/null +++ b/cmd/crates/soroban-spec-tools/src/filter.rs @@ -0,0 +1,461 @@ +//! Filter unused types from contract spec entries. +//! +//! This module provides functionality to remove type definitions that are not +//! referenced by any function in the contract spec. This helps reduce WASM size +//! by eliminating unnecessary spec entries. + +use std::collections::HashSet; + +use stellar_xdr::curr::{ + ScSpecEntry, ScSpecTypeDef, ScSpecUdtStructV0, ScSpecUdtUnionCaseV0, ScSpecUdtUnionV0, +}; + +/// Extracts UDT (User Defined Type) names referenced by a type definition. +/// +/// This function recursively traverses the type structure to find all +/// references to user-defined types. +fn get_type_refs(type_def: &ScSpecTypeDef) -> HashSet { + let mut refs = HashSet::new(); + + match type_def { + // Primitive types have no UDT references + ScSpecTypeDef::Val + | ScSpecTypeDef::U64 + | ScSpecTypeDef::I64 + | ScSpecTypeDef::U128 + | ScSpecTypeDef::I128 + | ScSpecTypeDef::U32 + | ScSpecTypeDef::I32 + | ScSpecTypeDef::U256 + | ScSpecTypeDef::I256 + | ScSpecTypeDef::Bool + | ScSpecTypeDef::Symbol + | ScSpecTypeDef::Error + | ScSpecTypeDef::Bytes + | ScSpecTypeDef::BytesN(_) + | ScSpecTypeDef::Void + | ScSpecTypeDef::Timepoint + | ScSpecTypeDef::Duration + | ScSpecTypeDef::String + | ScSpecTypeDef::Address + | ScSpecTypeDef::MuxedAddress => {} + + // UDT reference - add the type name + ScSpecTypeDef::Udt(udt) => { + refs.insert(udt.name.to_utf8_string_lossy()); + } + + // Composite types - recurse into contained types + ScSpecTypeDef::Vec(vec_type) => { + refs.extend(get_type_refs(&vec_type.element_type)); + } + ScSpecTypeDef::Map(map_type) => { + refs.extend(get_type_refs(&map_type.key_type)); + refs.extend(get_type_refs(&map_type.value_type)); + } + ScSpecTypeDef::Option(opt_type) => { + refs.extend(get_type_refs(&opt_type.value_type)); + } + ScSpecTypeDef::Result(result_type) => { + refs.extend(get_type_refs(&result_type.ok_type)); + refs.extend(get_type_refs(&result_type.error_type)); + } + ScSpecTypeDef::Tuple(tuple_type) => { + for value_type in tuple_type.value_types.iter() { + refs.extend(get_type_refs(value_type)); + } + } + } + + refs +} + +/// Extracts all UDT names referenced by a spec entry. +fn get_entry_type_refs(entry: &ScSpecEntry) -> HashSet { + let mut refs = HashSet::new(); + + match entry { + ScSpecEntry::FunctionV0(func) => { + // Collect types from inputs + for input in func.inputs.iter() { + refs.extend(get_type_refs(&input.type_)); + } + // Collect types from outputs + for output in func.outputs.iter() { + refs.extend(get_type_refs(output)); + } + } + ScSpecEntry::UdtStructV0(ScSpecUdtStructV0 { fields, .. }) => { + for field in fields.iter() { + refs.extend(get_type_refs(&field.type_)); + } + } + ScSpecEntry::UdtUnionV0(ScSpecUdtUnionV0 { cases, .. }) => { + for case in cases.iter() { + if let ScSpecUdtUnionCaseV0::TupleV0(tuple_case) = case { + for type_def in tuple_case.type_.iter() { + refs.extend(get_type_refs(type_def)); + } + } + } + } + // Enums and error enums don't reference other types + ScSpecEntry::UdtEnumV0(_) | ScSpecEntry::UdtErrorEnumV0(_) => {} + // Events are kept unconditionally + ScSpecEntry::EventV0(_) => {} + } + + refs +} + +/// Gets the name of a UDT entry, or None if it's not a UDT. +fn get_udt_name(entry: &ScSpecEntry) -> Option { + match entry { + ScSpecEntry::UdtStructV0(s) => Some(s.name.to_utf8_string_lossy()), + ScSpecEntry::UdtUnionV0(u) => Some(u.name.to_utf8_string_lossy()), + ScSpecEntry::UdtEnumV0(e) => Some(e.name.to_utf8_string_lossy()), + ScSpecEntry::UdtErrorEnumV0(e) => Some(e.name.to_utf8_string_lossy()), + ScSpecEntry::FunctionV0(_) | ScSpecEntry::EventV0(_) => None, + } +} + +/// Filters out unused types from contract spec entries. +/// +/// This function performs a reachability analysis starting from all functions. +/// It keeps: +/// - All functions (FunctionV0) +/// - All events (EventV0) +/// - All UDTs that are directly or transitively referenced by functions +/// +/// Types that are defined but never used by any function are removed. +/// +/// # Example +/// +/// If a contract has: +/// - Function `foo` that takes `TypeA` as input +/// - `TypeA` which references `TypeB` in a field +/// - `TypeC` which is defined but never used +/// +/// The result will include `foo`, `TypeA`, and `TypeB`, but not `TypeC`. +pub fn filter_unused_types(entries: Vec) -> Vec { + // Build a map from type name to entry for lookup + let type_entries: std::collections::HashMap = entries + .iter() + .filter_map(|entry| get_udt_name(entry).map(|name| (name, entry))) + .collect(); + + // Collect initial references from all functions + let mut reachable_types: HashSet = HashSet::new(); + for entry in &entries { + if matches!(entry, ScSpecEntry::FunctionV0(_)) { + reachable_types.extend(get_entry_type_refs(entry)); + } + } + + // Fixed-point iteration: keep adding types referenced by reachable types + // until no new types are found + loop { + let mut new_types: HashSet = HashSet::new(); + + for type_name in &reachable_types { + if let Some(entry) = type_entries.get(type_name) { + for referenced_type in get_entry_type_refs(entry) { + if !reachable_types.contains(&referenced_type) { + new_types.insert(referenced_type); + } + } + } + } + + if new_types.is_empty() { + break; + } + + reachable_types.extend(new_types); + } + + // Filter entries: keep functions, events, and reachable UDTs + entries + .into_iter() + .filter(|entry| { + match entry { + // Always keep functions + ScSpecEntry::FunctionV0(_) => true, + // Always keep events + ScSpecEntry::EventV0(_) => true, + // Keep UDTs only if they're reachable + _ => { + if let Some(name) = get_udt_name(entry) { + reachable_types.contains(&name) + } else { + true + } + } + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use stellar_xdr::curr::{ + ScSpecFunctionInputV0, ScSpecFunctionV0, ScSpecTypeUdt, ScSpecUdtEnumCaseV0, + ScSpecUdtEnumV0, ScSpecUdtErrorEnumCaseV0, ScSpecUdtErrorEnumV0, ScSpecUdtStructFieldV0, + StringM, VecM, + }; + + fn make_function(name: &str, input_types: Vec) -> ScSpecEntry { + let inputs: VecM = input_types + .into_iter() + .enumerate() + .map(|(i, type_)| ScSpecFunctionInputV0 { + doc: StringM::default(), + name: format!("arg{i}").try_into().unwrap(), + type_, + }) + .collect::>() + .try_into() + .unwrap(); + + ScSpecEntry::FunctionV0(ScSpecFunctionV0 { + doc: StringM::default(), + name: name.try_into().unwrap(), + inputs, + outputs: VecM::default(), + }) + } + + fn make_struct(name: &str, field_types: Vec<(&str, ScSpecTypeDef)>) -> ScSpecEntry { + let fields: VecM = field_types + .into_iter() + .map(|(field_name, type_)| ScSpecUdtStructFieldV0 { + doc: StringM::default(), + name: field_name.try_into().unwrap(), + type_, + }) + .collect::>() + .try_into() + .unwrap(); + + ScSpecEntry::UdtStructV0(ScSpecUdtStructV0 { + doc: StringM::default(), + lib: StringM::default(), + name: name.try_into().unwrap(), + fields, + }) + } + + fn make_enum(name: &str) -> ScSpecEntry { + ScSpecEntry::UdtEnumV0(ScSpecUdtEnumV0 { + doc: StringM::default(), + lib: StringM::default(), + name: name.try_into().unwrap(), + cases: vec![ScSpecUdtEnumCaseV0 { + doc: StringM::default(), + name: "Variant".try_into().unwrap(), + value: 0, + }] + .try_into() + .unwrap(), + }) + } + + fn make_error_enum(name: &str) -> ScSpecEntry { + ScSpecEntry::UdtErrorEnumV0(ScSpecUdtErrorEnumV0 { + doc: StringM::default(), + lib: StringM::default(), + name: name.try_into().unwrap(), + cases: vec![ScSpecUdtErrorEnumCaseV0 { + doc: StringM::default(), + name: "Error".try_into().unwrap(), + value: 1, + }] + .try_into() + .unwrap(), + }) + } + + fn udt(name: &str) -> ScSpecTypeDef { + ScSpecTypeDef::Udt(ScSpecTypeUdt { + name: name.try_into().unwrap(), + }) + } + + #[test] + fn test_removes_unused_type() { + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + make_struct("UsedType", vec![("field", ScSpecTypeDef::U32)]), + make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), + ]; + + let filtered = filter_unused_types(entries); + + assert_eq!(filtered.len(), 1); + assert!(matches!(filtered[0], ScSpecEntry::FunctionV0(_))); + } + + #[test] + fn test_keeps_directly_referenced_type() { + let entries = vec![ + make_function("foo", vec![udt("UsedType")]), + make_struct("UsedType", vec![("field", ScSpecTypeDef::U32)]), + make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), + ]; + + let filtered = filter_unused_types(entries); + + assert_eq!(filtered.len(), 2); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"UsedType".to_string())); + assert!(!names.contains(&"UnusedType".to_string())); + } + + #[test] + fn test_keeps_transitively_referenced_type() { + let entries = vec![ + make_function("foo", vec![udt("TypeA")]), + make_struct("TypeA", vec![("field", udt("TypeB"))]), + make_struct("TypeB", vec![("field", ScSpecTypeDef::U32)]), + make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"TypeA".to_string())); + assert!(names.contains(&"TypeB".to_string())); + assert!(!names.contains(&"UnusedType".to_string())); + } + + #[test] + fn test_keeps_all_functions() { + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + make_function("bar", vec![ScSpecTypeDef::Bool]), + ]; + + let filtered = filter_unused_types(entries); + + assert_eq!(filtered.len(), 2); + assert!(filtered + .iter() + .all(|e| matches!(e, ScSpecEntry::FunctionV0(_)))); + } + + #[test] + fn test_removes_unused_error_enum() { + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + make_error_enum("UsedError"), + make_error_enum("UnusedError"), + ]; + + let filtered = filter_unused_types(entries); + + // Only function should remain, no error enums are referenced + assert_eq!(filtered.len(), 1); + assert!(matches!(filtered[0], ScSpecEntry::FunctionV0(_))); + } + + #[test] + fn test_keeps_error_enum_in_result() { + let entries = vec![ + make_function( + "foo", + vec![ScSpecTypeDef::Result(Box::new( + stellar_xdr::curr::ScSpecTypeResult { + ok_type: Box::new(ScSpecTypeDef::U32), + error_type: Box::new(udt("MyError")), + }, + ))], + ), + make_error_enum("MyError"), + make_error_enum("UnusedError"), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"MyError".to_string())); + assert!(!names.contains(&"UnusedError".to_string())); + } + + #[test] + fn test_handles_circular_references() { + // TypeA references TypeB, TypeB references TypeA + let entries = vec![ + make_function("foo", vec![udt("TypeA")]), + make_struct("TypeA", vec![("b", udt("TypeB"))]), + make_struct("TypeB", vec![("a", udt("TypeA"))]), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"TypeA".to_string())); + assert!(names.contains(&"TypeB".to_string())); + } + + #[test] + fn test_handles_vec_of_udt() { + let entries = vec![ + make_function( + "foo", + vec![ScSpecTypeDef::Vec(Box::new( + stellar_xdr::curr::ScSpecTypeVec { + element_type: Box::new(udt("MyType")), + }, + ))], + ), + make_struct("MyType", vec![("field", ScSpecTypeDef::U32)]), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"MyType".to_string())); + } + + #[test] + fn test_handles_map_with_udt() { + let entries = vec![ + make_function( + "foo", + vec![ScSpecTypeDef::Map(Box::new( + stellar_xdr::curr::ScSpecTypeMap { + key_type: Box::new(udt("KeyType")), + value_type: Box::new(udt("ValueType")), + }, + ))], + ), + make_struct("KeyType", vec![("field", ScSpecTypeDef::U32)]), + make_struct("ValueType", vec![("field", ScSpecTypeDef::U32)]), + make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"KeyType".to_string())); + assert!(names.contains(&"ValueType".to_string())); + assert!(!names.contains(&"UnusedType".to_string())); + } + + #[test] + fn test_keeps_enum_referenced_by_function() { + let entries = vec![ + make_function("foo", vec![udt("MyEnum")]), + make_enum("MyEnum"), + make_enum("UnusedEnum"), + ]; + + let filtered = filter_unused_types(entries); + + let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); + assert!(names.contains(&"MyEnum".to_string())); + assert!(!names.contains(&"UnusedEnum".to_string())); + } +} diff --git a/cmd/crates/soroban-spec-tools/src/lib.rs b/cmd/crates/soroban-spec-tools/src/lib.rs index e4dd8c659d..c2796e2195 100644 --- a/cmd/crates/soroban-spec-tools/src/lib.rs +++ b/cmd/crates/soroban-spec-tools/src/lib.rs @@ -16,6 +16,7 @@ use stellar_xdr::curr::{ }; pub mod contract; +pub mod filter; pub mod utils; #[derive(thiserror::Error, Debug)] diff --git a/cmd/soroban-cli/Cargo.toml b/cmd/soroban-cli/Cargo.toml index c3868c91c6..d08bfe612b 100644 --- a/cmd/soroban-cli/Cargo.toml +++ b/cmd/soroban-cli/Cargo.toml @@ -122,7 +122,7 @@ glob = "0.3.1" fqdn = "0.3.12" open = "5.3.0" url = "2.5.2" -wasm-gen = "0.1.4" +wasm-encoder = "0.235.0" zeroize = "1.8.1" keyring = { version = "3", features = ["apple-native", "windows-native", "sync-secret-service", "crypto-rust"], optional = true } whoami = "1.5.2" diff --git a/cmd/soroban-cli/src/commands/contract/build.rs b/cmd/soroban-cli/src/commands/contract/build.rs index 7cd33ab33c..aa25ddb2ce 100644 --- a/cmd/soroban-cli/src/commands/contract/build.rs +++ b/cmd/soroban-cli/src/commands/contract/build.rs @@ -162,6 +162,12 @@ pub enum Error { #[error(transparent)] Wasm(#[from] wasm::Error), + + #[error(transparent)] + SpecTools(#[from] soroban_spec_tools::contract::Error), + + #[error(transparent)] + WasmParsing(#[from] wasmparser::BinaryReaderError), } const WASM_TARGET: &str = "wasm32v1-none"; @@ -256,6 +262,7 @@ impl Cmd { .join(&file); self.inject_meta(&target_file_path)?; + Self::filter_spec(&target_file_path)?; let final_path = if let Some(out_dir) = &self.out_dir { fs::create_dir_all(out_dir).map_err(Error::CreatingOutDir)?; @@ -361,14 +368,75 @@ impl Cmd { } fn inject_meta(&self, target_file_path: &PathBuf) -> Result<(), Error> { - let mut wasm_bytes = fs::read(target_file_path).map_err(Error::ReadingWasmFile)?; - let xdr = self.encoded_new_meta()?; - wasm_gen::write_custom_section(&mut wasm_bytes, META_CUSTOM_SECTION_NAME, &xdr); + use wasm_encoder::{CustomSection, Module, RawSection}; + use wasmparser::Payload; + + let wasm_bytes = fs::read(target_file_path).map_err(Error::ReadingWasmFile)?; + + let mut module = Module::new(); + let mut existing_meta: Vec = Vec::new(); + + let parser = wasmparser::Parser::new(0); + for payload in parser.parse_all(&wasm_bytes) { + let payload = payload?; + + match &payload { + // Collect existing meta to merge with new meta + Payload::CustomSection(section) if section.name() == META_CUSTOM_SECTION_NAME => { + existing_meta.extend_from_slice(section.data()); + } + // Copy all other sections verbatim + _ => { + if let Some((id, range)) = payload.as_section() { + let raw = RawSection { + id, + data: &wasm_bytes[range], + }; + module.section(&raw); + } + } + } + } + + // Append new meta to existing meta + let new_meta = self.encoded_new_meta()?; + existing_meta.extend(new_meta); + + let meta_section = CustomSection { + name: META_CUSTOM_SECTION_NAME.into(), + data: existing_meta.into(), + }; + module.section(&meta_section); + + let updated_wasm = module.finish(); // Deleting .wasm file effectively unlinking it from /release/deps/.wasm preventing from overwrite // See https://github.com/stellar/stellar-cli/issues/1694#issuecomment-2709342205 fs::remove_file(target_file_path).map_err(Error::DeletingArtifact)?; - fs::write(target_file_path, wasm_bytes).map_err(Error::WritingWasmFile) + fs::write(target_file_path, updated_wasm).map_err(Error::WritingWasmFile) + } + + /// Filters unused types from the contract spec. + /// + /// This removes type definitions that are not referenced by any function, + /// reducing the size of the WASM binary. + fn filter_spec(target_file_path: &PathBuf) -> Result<(), Error> { + use soroban_spec_tools::contract::{replace_custom_section, Spec}; + + let wasm_bytes = fs::read(target_file_path).map_err(Error::ReadingWasmFile)?; + + // Parse the spec from the wasm + let spec = Spec::new(&wasm_bytes)?; + + // Get the filtered spec as XDR bytes + let filtered_xdr = spec.filtered_spec_xdr()?; + + // Replace the contractspecv0 section with the filtered version + let new_wasm = replace_custom_section(&wasm_bytes, "contractspecv0", &filtered_xdr)?; + + // Write the modified wasm back + fs::remove_file(target_file_path).map_err(Error::DeletingArtifact)?; + fs::write(target_file_path, new_wasm).map_err(Error::WritingWasmFile) } fn encoded_new_meta(&self) -> Result, Error> { From ce91eb85b11627481bb57f86a60dd1719ce8c9fb Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 01:15:56 +1000 Subject: [PATCH 02/11] Remove tests with hard-coded local paths --- cmd/crates/soroban-spec-tools/src/contract.rs | 166 ------------------ 1 file changed, 166 deletions(-) diff --git a/cmd/crates/soroban-spec-tools/src/contract.rs b/cmd/crates/soroban-spec-tools/src/contract.rs index 753a998bb1..d52eba012d 100644 --- a/cmd/crates/soroban-spec-tools/src/contract.rs +++ b/cmd/crates/soroban-spec-tools/src/contract.rs @@ -377,169 +377,3 @@ fn format_name(lib: &StringM<80>, name: &StringM<60>) -> String { ) } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_filter_spec_on_empty_contract() { - // This test checks that filtering a contract with events but no UDT references - // keeps the events (as per design) but would filter any unused UDTs - - // Skip if the file doesn't exist (it's in a different repo) - let wasm_path = "/Users/leighmcculloch/Code/rs-soroban-sdk/tests/empty/out/test_empty.wasm"; - if !std::path::Path::new(wasm_path).exists() { - return; - } - - let wasm_bytes = std::fs::read(wasm_path).unwrap(); - let spec = Spec::new(&wasm_bytes).unwrap(); - - println!("Original spec entries: {}", spec.spec.len()); - for entry in &spec.spec { - match entry { - ScSpecEntry::FunctionV0(f) => { - println!(" Function: {}", f.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtStructV0(s) => { - println!(" Struct: {}", s.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtUnionV0(u) => { - println!(" Union: {}", u.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtEnumV0(e) => { - println!(" Enum: {}", e.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtErrorEnumV0(e) => { - println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()); - } - ScSpecEntry::EventV0(e) => { - println!(" Event: {}", e.name.to_utf8_string_lossy()); - } - } - } - - let filtered = spec.filter_unused_types(); - println!("\nFiltered spec entries: {}", filtered.len()); - for entry in &filtered { - match entry { - ScSpecEntry::FunctionV0(f) => { - println!(" Function: {}", f.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtStructV0(s) => { - println!(" Struct: {}", s.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtUnionV0(u) => { - println!(" Union: {}", u.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtEnumV0(e) => { - println!(" Enum: {}", e.name.to_utf8_string_lossy()); - } - ScSpecEntry::UdtErrorEnumV0(e) => { - println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()); - } - ScSpecEntry::EventV0(e) => { - println!(" Event: {}", e.name.to_utf8_string_lossy()); - } - } - } - - // The function should still be there - assert!(filtered - .iter() - .any(|e| matches!(e, ScSpecEntry::FunctionV0(_)))); - - // Events should be preserved - let event_count = filtered - .iter() - .filter(|e| matches!(e, ScSpecEntry::EventV0(_))) - .count(); - assert!(event_count > 0, "Events should be preserved"); - } - - #[test] - fn test_filter_on_custom_types_contract() { - // Test filtering on the custom_types contract wasm - let wasm_path = "/Users/leighmcculloch/Code/stellar-cli-spec-clean/target/wasm32v1-none/release/test_custom_types.wasm"; - if !std::path::Path::new(wasm_path).exists() { - eprintln!("Skipping test: wasm file not found at {}", wasm_path); - return; - } - - let wasm_bytes = std::fs::read(wasm_path).unwrap(); - let spec = Spec::new(&wasm_bytes).unwrap(); - - println!("\n=== CUSTOM TYPES CONTRACT ==="); - println!("Original spec entries: {}", spec.spec.len()); - - // Count functions and UDTs - let func_count = spec - .spec - .iter() - .filter(|e| matches!(e, ScSpecEntry::FunctionV0(_))) - .count(); - let udt_count = spec.spec.len() - func_count; - - println!("Functions: {}", func_count); - println!("UDTs: {}", udt_count); - - // List all UDTs before filtering - for entry in &spec.spec { - match entry { - ScSpecEntry::UdtStructV0(s) => { - println!(" Struct: {}", s.name.to_utf8_string_lossy()) - } - ScSpecEntry::UdtUnionV0(u) => { - println!(" Union: {}", u.name.to_utf8_string_lossy()) - } - ScSpecEntry::UdtEnumV0(e) => { - println!(" Enum: {}", e.name.to_utf8_string_lossy()) - } - ScSpecEntry::UdtErrorEnumV0(e) => { - println!(" ErrorEnum: {}", e.name.to_utf8_string_lossy()) - } - ScSpecEntry::EventV0(e) => println!(" Event: {}", e.name.to_utf8_string_lossy()), - _ => {} - } - } - - let filtered = spec.filter_unused_types(); - println!("\nFiltered spec entries: {}", filtered.len()); - - // All types in this contract are used by functions, so nothing should be filtered - // Verify key types are preserved: - // - Test (used by strukt, strukt_hel, and transitively by ComplexEnum, TupleStruct) - // - SimpleEnum (used by simple, and transitively by ComplexEnum, TupleStruct) - // - RoyalCard (used by card) - // - ComplexEnum (used by complex) - // - TupleStruct (used by tuple_strukt) - // - RecursiveEnum (used by recursive_enum) - - let has_test = filtered.iter().any(|entry| { - matches!(entry, ScSpecEntry::UdtStructV0(s) if s.name.to_utf8_string_lossy() == "Test") - }); - let has_simple_enum = filtered.iter().any(|entry| { - matches!(entry, ScSpecEntry::UdtUnionV0(u) if u.name.to_utf8_string_lossy() == "SimpleEnum") - }); - let has_complex_enum = filtered.iter().any(|entry| { - matches!(entry, ScSpecEntry::UdtUnionV0(u) if u.name.to_utf8_string_lossy() == "ComplexEnum") - }); - - assert!(has_test, "Test struct should be preserved"); - assert!(has_simple_enum, "SimpleEnum should be preserved"); - assert!(has_complex_enum, "ComplexEnum should be preserved"); - - // All functions should be preserved - let filtered_func_count = filtered - .iter() - .filter(|e| matches!(e, ScSpecEntry::FunctionV0(_))) - .count(); - assert_eq!( - filtered_func_count, func_count, - "All functions should be preserved" - ); - - println!("Filter test passed: all used types and functions preserved"); - } -} From 39493abee2b4ab9e932e63c0c38c89d7baed768a Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 01:23:15 +1000 Subject: [PATCH 03/11] Fix clippy warnings for needless_continue and match_same_arms --- cmd/crates/soroban-spec-tools/src/contract.rs | 23 ++++++++----------- cmd/crates/soroban-spec-tools/src/filter.rs | 12 ++++------ 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/cmd/crates/soroban-spec-tools/src/contract.rs b/cmd/crates/soroban-spec-tools/src/contract.rs index d52eba012d..3e2dc5be1c 100644 --- a/cmd/crates/soroban-spec-tools/src/contract.rs +++ b/cmd/crates/soroban-spec-tools/src/contract.rs @@ -174,20 +174,17 @@ pub fn replace_custom_section( for payload in parser.parse_all(wasm_bytes) { let payload = payload?; - match &payload { - // Skip the target custom section - we'll append the new one at the end - Payload::CustomSection(section) if section.name() == section_name => { - continue; - } + // Skip the target custom section - we'll append the new one at the end + let dominated = + matches!(&payload, Payload::CustomSection(section) if section.name() == section_name); + if !dominated { // For all other payloads that represent sections, copy them verbatim - _ => { - if let Some((id, range)) = payload.as_section() { - let raw = RawSection { - id, - data: &wasm_bytes[range], - }; - module.section(&raw); - } + if let Some((id, range)) = payload.as_section() { + let raw = RawSection { + id, + data: &wasm_bytes[range], + }; + module.section(&raw); } } } diff --git a/cmd/crates/soroban-spec-tools/src/filter.rs b/cmd/crates/soroban-spec-tools/src/filter.rs index 4bdc192a0d..db7434ad66 100644 --- a/cmd/crates/soroban-spec-tools/src/filter.rs +++ b/cmd/crates/soroban-spec-tools/src/filter.rs @@ -99,10 +99,8 @@ fn get_entry_type_refs(entry: &ScSpecEntry) -> HashSet { } } } - // Enums and error enums don't reference other types - ScSpecEntry::UdtEnumV0(_) | ScSpecEntry::UdtErrorEnumV0(_) => {} - // Events are kept unconditionally - ScSpecEntry::EventV0(_) => {} + // Enums, error enums, and events don't reference other types + ScSpecEntry::UdtEnumV0(_) | ScSpecEntry::UdtErrorEnumV0(_) | ScSpecEntry::EventV0(_) => {} } refs @@ -179,10 +177,8 @@ pub fn filter_unused_types(entries: Vec) -> Vec { .into_iter() .filter(|entry| { match entry { - // Always keep functions - ScSpecEntry::FunctionV0(_) => true, - // Always keep events - ScSpecEntry::EventV0(_) => true, + // Always keep functions and events + ScSpecEntry::FunctionV0(_) | ScSpecEntry::EventV0(_) => true, // Keep UDTs only if they're reachable _ => { if let Some(name) = get_udt_name(entry) { From 6958e164da3ee257b5fbb67186dbaf66a10bb8a5 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 01:24:38 +1000 Subject: [PATCH 04/11] Rename variable to is_target_section for clarity --- cmd/crates/soroban-spec-tools/src/contract.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/crates/soroban-spec-tools/src/contract.rs b/cmd/crates/soroban-spec-tools/src/contract.rs index 3e2dc5be1c..1f44886ed6 100644 --- a/cmd/crates/soroban-spec-tools/src/contract.rs +++ b/cmd/crates/soroban-spec-tools/src/contract.rs @@ -175,9 +175,9 @@ pub fn replace_custom_section( let payload = payload?; // Skip the target custom section - we'll append the new one at the end - let dominated = + let is_target_section = matches!(&payload, Payload::CustomSection(section) if section.name() == section_name); - if !dominated { + if !is_target_section { // For all other payloads that represent sections, copy them verbatim if let Some((id, range)) = payload.as_section() { let raw = RawSection { From 3432f53abca6e1d61b1a206e0ced26974dbfd023 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 19:35:57 +1000 Subject: [PATCH 05/11] add marker-based spec filtering for types and events --- Cargo.lock | 1 + cmd/crates/soroban-spec-tools/Cargo.toml | 1 + cmd/crates/soroban-spec-tools/src/contract.rs | 33 ++ cmd/crates/soroban-spec-tools/src/filter.rs | 329 +++++++++++++++++- .../src/commands/contract/build.rs | 17 +- 5 files changed, 375 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ab93ec0e89..6db08c7ee3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5189,6 +5189,7 @@ dependencies = [ "hex", "itertools 0.10.5", "serde_json", + "sha2 0.10.9", "soroban-spec", "stellar-strkey 0.0.15", "stellar-xdr", diff --git a/cmd/crates/soroban-spec-tools/Cargo.toml b/cmd/crates/soroban-spec-tools/Cargo.toml index d2b719eaec..e9938d7ff7 100644 --- a/cmd/crates/soroban-spec-tools/Cargo.toml +++ b/cmd/crates/soroban-spec-tools/Cargo.toml @@ -28,6 +28,7 @@ wasmparser = { workspace = true } base64 = { workspace = true } thiserror = "1.0.31" wasm-encoder = "0.235.0" +sha2 = { workspace = true } [dev-dependencies] diff --git a/cmd/crates/soroban-spec-tools/src/contract.rs b/cmd/crates/soroban-spec-tools/src/contract.rs index 1f44886ed6..54a0a1e2b3 100644 --- a/cmd/crates/soroban-spec-tools/src/contract.rs +++ b/cmd/crates/soroban-spec-tools/src/contract.rs @@ -143,6 +143,39 @@ impl Spec { } Ok(buffer) } + + /// Returns the filtered spec entries serialized as XDR bytes, filtering + /// based on markers in the WASM data section. + /// + /// The SDK embeds markers in the data section for each type/event that is + /// actually used in the contract. These markers survive dead code elimination, + /// so we can filter out any spec entries that don't have corresponding markers. + /// + /// Functions are always kept as they define the contract's API. + /// + /// # Arguments + /// + /// * `wasm_bytes` - The WASM binary to extract markers from + /// + /// # Returns + /// + /// XDR bytes of the filtered spec entries. + pub fn filtered_spec_xdr_with_markers(&self, wasm_bytes: &[u8]) -> Result, Error> { + use crate::filter::{extract_spec_markers, filter_by_markers}; + + // Extract markers from the WASM data section + let markers = extract_spec_markers(wasm_bytes); + + // Filter all entries (types, events) based on markers + let filtered = filter_by_markers(self.spec.clone(), &markers); + + let mut buffer = Vec::new(); + let mut writer = Limited::new(Cursor::new(&mut buffer), Limits::none()); + for entry in filtered { + entry.write_xdr(&mut writer)?; + } + Ok(buffer) + } } /// Replaces a custom section in WASM bytes with new content. diff --git a/cmd/crates/soroban-spec-tools/src/filter.rs b/cmd/crates/soroban-spec-tools/src/filter.rs index db7434ad66..d4f50ed887 100644 --- a/cmd/crates/soroban-spec-tools/src/filter.rs +++ b/cmd/crates/soroban-spec-tools/src/filter.rs @@ -6,10 +6,122 @@ use std::collections::HashSet; +use sha2::{Digest, Sha256}; use stellar_xdr::curr::{ - ScSpecEntry, ScSpecTypeDef, ScSpecUdtStructV0, ScSpecUdtUnionCaseV0, ScSpecUdtUnionV0, + Limits, ScSpecEntry, ScSpecTypeDef, ScSpecUdtStructV0, ScSpecUdtUnionCaseV0, ScSpecUdtUnionV0, + WriteXdr, }; +/// Magic bytes that identify a spec marker: "SpEc" +pub const SPEC_MARKER_MAGIC: [u8; 4] = [b'S', b'p', b'E', b'c']; + +/// Length of the hash portion (truncated SHA256 - first 8 bytes / 64 bits). +pub const SPEC_MARKER_HASH_LEN: usize = 8; + +/// Length of the marker: 4-byte prefix + 8-byte truncated SHA256 hash. +pub const SPEC_MARKER_LEN: usize = 4 + SPEC_MARKER_HASH_LEN; + +/// A spec marker hash found in the WASM data section. +/// This is an 8-byte truncated SHA256 hash of the spec entry XDR bytes. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SpecMarkerHash(pub [u8; SPEC_MARKER_HASH_LEN]); + +/// Computes the marker hash for a spec entry. +/// +/// The hash is a truncated SHA256 (first 8 bytes) of the spec entry's XDR bytes. +pub fn compute_marker_hash(entry: &ScSpecEntry) -> SpecMarkerHash { + let xdr_bytes = entry + .to_xdr(Limits::none()) + .expect("XDR encoding should not fail"); + let mut hasher = Sha256::new(); + hasher.update(&xdr_bytes); + let hash: [u8; 32] = hasher.finalize().into(); + let mut truncated = [0u8; SPEC_MARKER_HASH_LEN]; + truncated.copy_from_slice(&hash[..SPEC_MARKER_HASH_LEN]); + SpecMarkerHash(truncated) +} + +/// Extracts spec markers from the WASM data section. +/// +/// The SDK embeds markers in the data section for each spec entry that is +/// actually used in the contract. These markers survive dead code elimination +/// only if the corresponding type/event is used. +/// +/// Marker format: +/// - 4 bytes: "SpEc" magic +/// - 8 bytes: truncated SHA256 hash of the spec entry XDR bytes +pub fn extract_spec_markers(wasm_bytes: &[u8]) -> HashSet { + let mut markers = HashSet::new(); + + for payload in wasmparser::Parser::new(0).parse_all(wasm_bytes) { + let Ok(payload) = payload else { continue }; + + if let wasmparser::Payload::DataSection(reader) = payload { + for data in reader.into_iter().flatten() { + extract_markers_from_data(data.data, &mut markers); + } + } + } + + markers +} + +/// Extracts spec markers from a data segment. +fn extract_markers_from_data(data: &[u8], markers: &mut HashSet) { + // Marker size is exactly 12 bytes: 4 (magic) + 8 (hash) + if data.len() < SPEC_MARKER_LEN { + return; + } + + for i in 0..=data.len() - SPEC_MARKER_LEN { + // Look for magic bytes + if data[i..].starts_with(&SPEC_MARKER_MAGIC) { + let hash_start = i + 4; + let hash_end = hash_start + SPEC_MARKER_HASH_LEN; + let mut hash = [0u8; SPEC_MARKER_HASH_LEN]; + hash.copy_from_slice(&data[hash_start..hash_end]); + markers.insert(SpecMarkerHash(hash)); + } + } +} + +/// Filters spec entries based on markers found in the WASM data section. +/// +/// This removes any spec entries (types, events) that don't have corresponding +/// markers in the data section. The SDK embeds markers for types/events that +/// are actually used, and these markers survive dead code elimination. +/// +/// Functions are always kept as they define the contract's API. +/// +/// # Arguments +/// +/// * `entries` - The spec entries to filter +/// * `markers` - Marker hashes extracted from the WASM data section +/// +/// # Returns +/// +/// Filtered entries with only used types/events remaining. +pub fn filter_by_markers( + entries: Vec, + markers: &HashSet, +) -> Vec { + entries + .into_iter() + .filter(|entry| { + match entry { + // Always keep functions - they're the contract's API + ScSpecEntry::FunctionV0(_) => true, + + // For all other entries (types, events), check if marker exists + _ => { + let hash = compute_marker_hash(entry); + markers.contains(&hash) + } + } + }) + .collect() +} + /// Extracts UDT (User Defined Type) names referenced by a type definition. /// /// This function recursively traverses the type structure to find all @@ -454,4 +566,219 @@ mod tests { assert!(names.contains(&"MyEnum".to_string())); assert!(!names.contains(&"UnusedEnum".to_string())); } + + // Helper to encode a marker (matches SDK's spec_marker.rs format) + // Format: "SpEc" (4 bytes) + truncated SHA256 hash (8 bytes) + fn encode_marker(entry: &ScSpecEntry) -> Vec { + let hash = compute_marker_hash(entry); + let mut buf = Vec::new(); + buf.extend_from_slice(&SPEC_MARKER_MAGIC); + buf.extend_from_slice(&hash.0); + buf + } + + use stellar_xdr::curr::{ScSpecEventDataFormat, ScSpecEventV0}; + + fn make_event(name: &str) -> ScSpecEntry { + ScSpecEntry::EventV0(ScSpecEventV0 { + doc: StringM::default(), + lib: StringM::default(), + name: name.try_into().unwrap(), + prefix_topics: VecM::default(), + params: VecM::default(), + data_format: ScSpecEventDataFormat::SingleValue, + }) + } + + #[test] + fn test_compute_marker_hash() { + let entry = make_struct("MyStruct", vec![("field", ScSpecTypeDef::U32)]); + let hash = compute_marker_hash(&entry); + + // Hash should be 8 bytes + assert_eq!(hash.0.len(), SPEC_MARKER_HASH_LEN); + + // Same entry produces same hash + let hash2 = compute_marker_hash(&entry); + assert_eq!(hash.0, hash2.0); + + // Different entry produces different hash + let entry2 = make_struct("DifferentStruct", vec![("field", ScSpecTypeDef::U32)]); + let hash3 = compute_marker_hash(&entry2); + assert_ne!(hash.0, hash3.0); + } + + #[test] + fn test_encode_marker_format() { + let entry = make_event("Transfer"); + let marker = encode_marker(&entry); + + // Marker should be 12 bytes: 4 (magic) + 8 (hash) + assert_eq!(marker.len(), SPEC_MARKER_LEN); + + // First 4 bytes should be magic + assert_eq!(&marker[..4], &SPEC_MARKER_MAGIC); + } + + #[test] + fn test_extract_markers_from_data() { + let entry1 = make_event("Transfer"); + let entry2 = make_struct("MyStruct", vec![("field", ScSpecTypeDef::U32)]); + + let marker1 = encode_marker(&entry1); + let marker2 = encode_marker(&entry2); + + // Concatenate markers with some padding + let mut data = Vec::new(); + data.extend_from_slice(&[0u8; 16]); // Some leading bytes + data.extend_from_slice(&marker1); + data.extend_from_slice(&[0u8; 8]); // Some padding + data.extend_from_slice(&marker2); + data.extend_from_slice(&[0u8; 16]); // Some trailing bytes + + let mut markers = HashSet::new(); + extract_markers_from_data(&data, &mut markers); + + // Both markers should be found + assert!(markers.contains(&compute_marker_hash(&entry1))); + assert!(markers.contains(&compute_marker_hash(&entry2))); + } + + #[test] + fn test_filter_by_markers_keeps_used_events() { + let transfer_event = make_event("Transfer"); + let mint_event = make_event("Mint"); + + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + transfer_event.clone(), + mint_event.clone(), + make_event("Unused"), + ]; + + let mut markers = HashSet::new(); + markers.insert(compute_marker_hash(&transfer_event)); + markers.insert(compute_marker_hash(&mint_event)); + + let filtered = filter_by_markers(entries, &markers); + + // Should have: 1 function + 2 used events + assert_eq!(filtered.len(), 3); + + let event_names: Vec<_> = filtered + .iter() + .filter_map(|e| { + if let ScSpecEntry::EventV0(event) = e { + Some(event.name.to_utf8_string_lossy()) + } else { + None + } + }) + .collect(); + + assert!(event_names.contains(&"Transfer".to_string())); + assert!(event_names.contains(&"Mint".to_string())); + assert!(!event_names.contains(&"Unused".to_string())); + } + + #[test] + fn test_filter_by_markers_removes_all_events_if_no_markers() { + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + make_event("Transfer"), + make_event("Mint"), + ]; + + let markers = HashSet::new(); + + let filtered = filter_by_markers(entries, &markers); + + // Should have: 1 function, 0 events + assert_eq!(filtered.len(), 1); + assert!(matches!(filtered[0], ScSpecEntry::FunctionV0(_))); + } + + #[test] + fn test_filter_by_markers_removes_all_if_no_markers() { + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + make_struct("MyStruct", vec![("field", ScSpecTypeDef::U32)]), + make_enum("MyEnum"), + make_event("Unused"), + ]; + + let markers = HashSet::new(); // No markers + + let filtered = filter_by_markers(entries, &markers); + + // Should have: only functions (always kept), no types or events + assert_eq!(filtered.len(), 1); + assert!(filtered + .iter() + .all(|e| matches!(e, ScSpecEntry::FunctionV0(_)))); + } + + #[test] + fn test_filter_by_markers_keeps_types_with_markers() { + let used_struct = make_struct("UsedStruct", vec![("field", ScSpecTypeDef::U32)]); + let used_enum = make_enum("UsedEnum"); + let used_event = make_event("UsedEvent"); + + let entries = vec![ + make_function("foo", vec![ScSpecTypeDef::U32]), + used_struct.clone(), + make_struct("UnusedStruct", vec![("field", ScSpecTypeDef::U32)]), + used_enum.clone(), + make_enum("UnusedEnum"), + used_event.clone(), + make_event("UnusedEvent"), + ]; + + let mut markers = HashSet::new(); + markers.insert(compute_marker_hash(&used_struct)); + markers.insert(compute_marker_hash(&used_enum)); + markers.insert(compute_marker_hash(&used_event)); + + let filtered = filter_by_markers(entries, &markers); + + // Should have: 1 function + 1 struct + 1 enum + 1 event + assert_eq!(filtered.len(), 4); + + // Check specific entries + let struct_names: Vec<_> = filtered + .iter() + .filter_map(|e| { + if let ScSpecEntry::UdtStructV0(s) = e { + Some(s.name.to_utf8_string_lossy()) + } else { + None + } + }) + .collect(); + assert_eq!(struct_names, vec!["UsedStruct"]); + + let enum_names: Vec<_> = filtered + .iter() + .filter_map(|e| { + if let ScSpecEntry::UdtEnumV0(s) = e { + Some(s.name.to_utf8_string_lossy()) + } else { + None + } + }) + .collect(); + assert_eq!(enum_names, vec!["UsedEnum"]); + + let event_names: Vec<_> = filtered + .iter() + .filter_map(|e| { + if let ScSpecEntry::EventV0(s) = e { + Some(s.name.to_utf8_string_lossy()) + } else { + None + } + }) + .collect(); + assert_eq!(event_names, vec!["UsedEvent"]); + } } diff --git a/cmd/soroban-cli/src/commands/contract/build.rs b/cmd/soroban-cli/src/commands/contract/build.rs index aa25ddb2ce..f72a40c129 100644 --- a/cmd/soroban-cli/src/commands/contract/build.rs +++ b/cmd/soroban-cli/src/commands/contract/build.rs @@ -416,10 +416,16 @@ impl Cmd { fs::write(target_file_path, updated_wasm).map_err(Error::WritingWasmFile) } - /// Filters unused types from the contract spec. + /// Filters unused types and events from the contract spec. /// - /// This removes type definitions that are not referenced by any function, - /// reducing the size of the WASM binary. + /// This removes: + /// - Type definitions that are not referenced by any function + /// - Events that don't have corresponding markers in the WASM data section + /// (events that are defined but never published) + /// + /// The SDK embeds markers in the data section for types/events that are + /// actually used. These markers survive dead code elimination, so we can + /// detect which spec entries are truly needed. fn filter_spec(target_file_path: &PathBuf) -> Result<(), Error> { use soroban_spec_tools::contract::{replace_custom_section, Spec}; @@ -428,8 +434,9 @@ impl Cmd { // Parse the spec from the wasm let spec = Spec::new(&wasm_bytes)?; - // Get the filtered spec as XDR bytes - let filtered_xdr = spec.filtered_spec_xdr()?; + // Get the filtered spec as XDR bytes, filtering both types and events + // based on markers in the WASM data section + let filtered_xdr = spec.filtered_spec_xdr_with_markers(&wasm_bytes)?; // Replace the contractspecv0 section with the filtered version let new_wasm = replace_custom_section(&wasm_bytes, "contractspecv0", &filtered_xdr)?; From c27baf89834794196864ee246e80b467d87fa167 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 9 Jan 2026 22:39:34 +1000 Subject: [PATCH 06/11] fix: address clippy lints in filter module --- cmd/crates/soroban-spec-tools/src/filter.rs | 29 ++++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/cmd/crates/soroban-spec-tools/src/filter.rs b/cmd/crates/soroban-spec-tools/src/filter.rs index d4f50ed887..83747fa870 100644 --- a/cmd/crates/soroban-spec-tools/src/filter.rs +++ b/cmd/crates/soroban-spec-tools/src/filter.rs @@ -12,7 +12,7 @@ use stellar_xdr::curr::{ WriteXdr, }; -/// Magic bytes that identify a spec marker: "SpEc" +/// Magic bytes that identify a spec marker: `SpEc` pub const SPEC_MARKER_MAGIC: [u8; 4] = [b'S', b'p', b'E', b'c']; /// Length of the hash portion (truncated SHA256 - first 8 bytes / 64 bits). @@ -29,6 +29,11 @@ pub struct SpecMarkerHash(pub [u8; SPEC_MARKER_HASH_LEN]); /// Computes the marker hash for a spec entry. /// /// The hash is a truncated SHA256 (first 8 bytes) of the spec entry's XDR bytes. +/// +/// # Panics +/// +/// Panics if the spec entry cannot be encoded to XDR, which should never happen +/// for valid `ScSpecEntry` values. pub fn compute_marker_hash(entry: &ScSpecEntry) -> SpecMarkerHash { let xdr_bytes = entry .to_xdr(Limits::none()) @@ -48,7 +53,7 @@ pub fn compute_marker_hash(entry: &ScSpecEntry) -> SpecMarkerHash { /// only if the corresponding type/event is used. /// /// Marker format: -/// - 4 bytes: "SpEc" magic +/// - 4 bytes: `SpEc` magic /// - 8 bytes: truncated SHA256 hash of the spec entry XDR bytes pub fn extract_spec_markers(wasm_bytes: &[u8]) -> HashSet { let mut markers = HashSet::new(); @@ -101,6 +106,7 @@ fn extract_markers_from_data(data: &[u8], markers: &mut HashSet) /// # Returns /// /// Filtered entries with only used types/events remaining. +#[allow(clippy::implicit_hasher)] pub fn filter_by_markers( entries: Vec, markers: &HashSet, @@ -108,16 +114,13 @@ pub fn filter_by_markers( entries .into_iter() .filter(|entry| { - match entry { - // Always keep functions - they're the contract's API - ScSpecEntry::FunctionV0(_) => true, - - // For all other entries (types, events), check if marker exists - _ => { - let hash = compute_marker_hash(entry); - markers.contains(&hash) - } + // Always keep functions - they're the contract's API + if matches!(entry, ScSpecEntry::FunctionV0(_)) { + return true; } + // For all other entries (types, events), check if marker exists + let hash = compute_marker_hash(entry); + markers.contains(&hash) }) .collect() } @@ -233,8 +236,8 @@ fn get_udt_name(entry: &ScSpecEntry) -> Option { /// /// This function performs a reachability analysis starting from all functions. /// It keeps: -/// - All functions (FunctionV0) -/// - All events (EventV0) +/// - All functions (`FunctionV0`) +/// - All events (`EventV0`) /// - All UDTs that are directly or transitively referenced by functions /// /// Types that are defined but never used by any function are removed. From c55e029dbd125229a3a657fd08a4fb12b0559790 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Sat, 10 Jan 2026 01:29:16 +1000 Subject: [PATCH 07/11] fix: rename test variables to avoid similar_names lint --- cmd/crates/soroban-spec-tools/src/filter.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cmd/crates/soroban-spec-tools/src/filter.rs b/cmd/crates/soroban-spec-tools/src/filter.rs index 83747fa870..d4edbb5734 100644 --- a/cmd/crates/soroban-spec-tools/src/filter.rs +++ b/cmd/crates/soroban-spec-tools/src/filter.rs @@ -628,23 +628,23 @@ mod tests { let entry1 = make_event("Transfer"); let entry2 = make_struct("MyStruct", vec![("field", ScSpecTypeDef::U32)]); - let marker1 = encode_marker(&entry1); - let marker2 = encode_marker(&entry2); + let encoded1 = encode_marker(&entry1); + let encoded2 = encode_marker(&entry2); // Concatenate markers with some padding let mut data = Vec::new(); data.extend_from_slice(&[0u8; 16]); // Some leading bytes - data.extend_from_slice(&marker1); + data.extend_from_slice(&encoded1); data.extend_from_slice(&[0u8; 8]); // Some padding - data.extend_from_slice(&marker2); + data.extend_from_slice(&encoded2); data.extend_from_slice(&[0u8; 16]); // Some trailing bytes - let mut markers = HashSet::new(); - extract_markers_from_data(&data, &mut markers); + let mut found = HashSet::new(); + extract_markers_from_data(&data, &mut found); // Both markers should be found - assert!(markers.contains(&compute_marker_hash(&entry1))); - assert!(markers.contains(&compute_marker_hash(&entry2))); + assert!(found.contains(&compute_marker_hash(&entry1))); + assert!(found.contains(&compute_marker_hash(&entry2))); } #[test] From c3949e9cc421d43b34d8dc6725153425dcf1f281 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 30 Jan 2026 17:15:05 +1000 Subject: [PATCH 08/11] add cfg flag for spec optimization using data markers --- cmd/soroban-cli/src/commands/contract/build.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmd/soroban-cli/src/commands/contract/build.rs b/cmd/soroban-cli/src/commands/contract/build.rs index d5c8968f46..f8fcced513 100644 --- a/cmd/soroban-cli/src/commands/contract/build.rs +++ b/cmd/soroban-cli/src/commands/contract/build.rs @@ -237,6 +237,11 @@ impl Cmd { cmd.env("CARGO_BUILD_RUSTFLAGS", rustflags); } + // Pass cfg flag to rustc to inform the SDK that this CLI supports + // spec optimization using markers. + cmd.arg("--"); + cmd.arg("--cfg=soroban_sdk_build_system_supports_optimising_specs_using_data_markers"); + let mut cmd_str_parts = Vec::::new(); cmd_str_parts.extend(cmd.get_envs().map(|(key, val)| { format!( From 15af743b4f1426bf64038b0475d7bdb10dee757b Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 30 Jan 2026 20:46:28 +1000 Subject: [PATCH 09/11] add cfg flag for spec optimization using data markers --- cmd/crates/soroban-test/tests/it/build.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cmd/crates/soroban-test/tests/it/build.rs b/cmd/crates/soroban-test/tests/it/build.rs index f9046615df..7795fbfb5b 100644 --- a/cmd/crates/soroban-test/tests/it/build.rs +++ b/cmd/crates/soroban-test/tests/it/build.rs @@ -327,15 +327,20 @@ fn with_flags(expected: &str) -> String { #[cfg(windows)] let registry_prefix = registry_prefix.replace('\\', "/"); + const CFG_FLAG: &str = + "-- --cfg=soroban_sdk_build_system_supports_optimising_specs_using_data_markers"; let vec: Vec<_> = if env::var("RUSTFLAGS").is_ok() { - expected.split('\n').map(ToString::to_string).collect() + expected + .split('\n') + .map(|x| format!("{x} {CFG_FLAG}")) + .collect() } else { expected .split('\n') .map(|x| { let rustflags_value = format!("--remap-path-prefix={registry_prefix}="); let escaped_value = escape(std::borrow::Cow::Borrowed(&rustflags_value)); - format!("CARGO_BUILD_RUSTFLAGS={escaped_value} {x}") + format!("CARGO_BUILD_RUSTFLAGS={escaped_value} {x} {CFG_FLAG}") }) .collect() }; From 4bdde5e239f14c3f189c5144e081ee31b84d5af8 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 30 Jan 2026 21:20:09 +1000 Subject: [PATCH 10/11] move CFG_FLAG const to top of with_flags fn --- cmd/crates/soroban-test/tests/it/build.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/crates/soroban-test/tests/it/build.rs b/cmd/crates/soroban-test/tests/it/build.rs index 7795fbfb5b..73f4a6ac03 100644 --- a/cmd/crates/soroban-test/tests/it/build.rs +++ b/cmd/crates/soroban-test/tests/it/build.rs @@ -321,14 +321,15 @@ fn parent_path() -> String { } fn with_flags(expected: &str) -> String { + const CFG_FLAG: &str = + "-- --cfg=soroban_sdk_build_system_supports_optimising_specs_using_data_markers"; + let cargo_home = home::cargo_home().unwrap(); let registry_prefix = cargo_home.join("registry").join("src"); let registry_prefix = registry_prefix.display().to_string(); #[cfg(windows)] let registry_prefix = registry_prefix.replace('\\', "/"); - const CFG_FLAG: &str = - "-- --cfg=soroban_sdk_build_system_supports_optimising_specs_using_data_markers"; let vec: Vec<_> = if env::var("RUSTFLAGS").is_ok() { expected .split('\n') From 2373e76a6135bf89a4ee60f1bb08a80e0ecb1474 Mon Sep 17 00:00:00 2001 From: Leigh <351529+leighmcculloch@users.noreply.github.com> Date: Fri, 30 Jan 2026 23:46:47 +1000 Subject: [PATCH 11/11] use marker module from soroban-spec for filtering --- Cargo.lock | 4 +- Cargo.toml | 2 + cmd/crates/soroban-spec-tools/Cargo.toml | 1 - cmd/crates/soroban-spec-tools/src/contract.rs | 30 +- cmd/crates/soroban-spec-tools/src/filter.rs | 787 ------------------ cmd/crates/soroban-spec-tools/src/lib.rs | 1 - 6 files changed, 6 insertions(+), 819 deletions(-) delete mode 100644 cmd/crates/soroban-spec-tools/src/filter.rs diff --git a/Cargo.lock b/Cargo.lock index ee0c105ff7..f772cf82ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5194,10 +5194,9 @@ dependencies = [ [[package]] name = "soroban-spec" version = "25.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c79501d0636f86fe2c9b1dd7e88b9397415b3493a59b34f466abd7758c84b92b" dependencies = [ "base64 0.22.1", + "sha2 0.10.9", "stellar-xdr", "thiserror 1.0.69", "wasmparser 0.116.1", @@ -5242,7 +5241,6 @@ dependencies = [ "hex", "itertools 0.10.5", "serde_json", - "sha2 0.10.9", "soroban-spec", "stellar-strkey 0.0.15", "stellar-xdr", diff --git a/Cargo.toml b/Cargo.toml index d1016a0f4a..b7d641b91d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -138,3 +138,5 @@ lto = true inherits = "release" panic = "unwind" +[patch.crates-io.soroban-spec] +path = "../rs-soroban-sdk-spec-markers/soroban-spec" diff --git a/cmd/crates/soroban-spec-tools/Cargo.toml b/cmd/crates/soroban-spec-tools/Cargo.toml index e9938d7ff7..d2b719eaec 100644 --- a/cmd/crates/soroban-spec-tools/Cargo.toml +++ b/cmd/crates/soroban-spec-tools/Cargo.toml @@ -28,7 +28,6 @@ wasmparser = { workspace = true } base64 = { workspace = true } thiserror = "1.0.31" wasm-encoder = "0.235.0" -sha2 = { workspace = true } [dev-dependencies] diff --git a/cmd/crates/soroban-spec-tools/src/contract.rs b/cmd/crates/soroban-spec-tools/src/contract.rs index 54a0a1e2b3..deb61468bf 100644 --- a/cmd/crates/soroban-spec-tools/src/contract.rs +++ b/cmd/crates/soroban-spec-tools/src/contract.rs @@ -120,30 +120,6 @@ impl Spec { )) } - /// Returns a filtered version of the spec with unused types removed. - /// - /// This removes any type definitions that are not referenced (directly or - /// transitively) by any function in the contract. Functions and events are - /// always preserved. - #[must_use] - pub fn filter_unused_types(&self) -> Vec { - crate::filter::filter_unused_types(self.spec.clone()) - } - - /// Returns the filtered spec entries serialized as XDR bytes. - /// - /// This is useful for replacing the contractspecv0 custom section in a WASM - /// file with a smaller version that only contains used types. - pub fn filtered_spec_xdr(&self) -> Result, Error> { - let filtered = self.filter_unused_types(); - let mut buffer = Vec::new(); - let mut writer = Limited::new(Cursor::new(&mut buffer), Limits::none()); - for entry in filtered { - entry.write_xdr(&mut writer)?; - } - Ok(buffer) - } - /// Returns the filtered spec entries serialized as XDR bytes, filtering /// based on markers in the WASM data section. /// @@ -161,13 +137,13 @@ impl Spec { /// /// XDR bytes of the filtered spec entries. pub fn filtered_spec_xdr_with_markers(&self, wasm_bytes: &[u8]) -> Result, Error> { - use crate::filter::{extract_spec_markers, filter_by_markers}; + use soroban_spec::marker; // Extract markers from the WASM data section - let markers = extract_spec_markers(wasm_bytes); + let markers = marker::find_all(wasm_bytes); // Filter all entries (types, events) based on markers - let filtered = filter_by_markers(self.spec.clone(), &markers); + let filtered = marker::filter(self.spec.clone(), &markers); let mut buffer = Vec::new(); let mut writer = Limited::new(Cursor::new(&mut buffer), Limits::none()); diff --git a/cmd/crates/soroban-spec-tools/src/filter.rs b/cmd/crates/soroban-spec-tools/src/filter.rs deleted file mode 100644 index 42fde8db0a..0000000000 --- a/cmd/crates/soroban-spec-tools/src/filter.rs +++ /dev/null @@ -1,787 +0,0 @@ -//! Filter unused types from contract spec entries. -//! -//! This module provides functionality to remove type definitions that are not -//! referenced by any function in the contract spec. This helps reduce WASM size -//! by eliminating unnecessary spec entries. - -use std::collections::HashSet; - -use sha2::{Digest, Sha256}; -use stellar_xdr::curr::{ - Limits, ScSpecEntry, ScSpecTypeDef, ScSpecUdtStructV0, ScSpecUdtUnionCaseV0, ScSpecUdtUnionV0, - WriteXdr, -}; - -/// Magic bytes that identify a spec marker: `SpEc` -pub const SPEC_MARKER_MAGIC: [u8; 4] = [b'S', b'p', b'E', b'c']; - -/// Length of the hash portion (truncated SHA256 - first 8 bytes / 64 bits). -pub const SPEC_MARKER_HASH_LEN: usize = 8; - -/// Length of the marker: 4-byte prefix + 8-byte truncated SHA256 hash. -pub const SPEC_MARKER_LEN: usize = 4 + SPEC_MARKER_HASH_LEN; - -/// A spec marker hash found in the WASM data section. -/// This is an 8-byte truncated SHA256 hash of the spec entry XDR bytes. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct SpecMarkerHash(pub [u8; SPEC_MARKER_HASH_LEN]); - -/// Computes the marker hash for a spec entry. -/// -/// The hash is a truncated SHA256 (first 8 bytes) of the spec entry's XDR bytes. -/// -/// # Panics -/// -/// Panics if the spec entry cannot be encoded to XDR, which should never happen -/// for valid `ScSpecEntry` values. -pub fn compute_marker_hash(entry: &ScSpecEntry) -> SpecMarkerHash { - let xdr_bytes = entry - .to_xdr(Limits::none()) - .expect("XDR encoding should not fail"); - let mut hasher = Sha256::new(); - hasher.update(&xdr_bytes); - let hash: [u8; 32] = hasher.finalize().into(); - let mut truncated = [0u8; SPEC_MARKER_HASH_LEN]; - truncated.copy_from_slice(&hash[..SPEC_MARKER_HASH_LEN]); - SpecMarkerHash(truncated) -} - -/// Extracts spec markers from the WASM data section. -/// -/// The SDK embeds markers in the data section for each spec entry that is -/// actually used in the contract. These markers survive dead code elimination -/// only if the corresponding type/event is used. -/// -/// Marker format: -/// - 4 bytes: `SpEc` magic -/// - 8 bytes: truncated SHA256 hash of the spec entry XDR bytes -pub fn extract_spec_markers(wasm_bytes: &[u8]) -> HashSet { - let mut markers = HashSet::new(); - - for payload in wasmparser::Parser::new(0).parse_all(wasm_bytes) { - let Ok(payload) = payload else { continue }; - - if let wasmparser::Payload::DataSection(reader) = payload { - for data in reader.into_iter().flatten() { - extract_markers_from_data(data.data, &mut markers); - } - } - } - - markers -} - -/// Extracts spec markers from a data segment. -fn extract_markers_from_data(data: &[u8], markers: &mut HashSet) { - // Marker size is exactly 12 bytes: 4 (magic) + 8 (hash) - if data.len() < SPEC_MARKER_LEN { - return; - } - - for i in 0..=data.len() - SPEC_MARKER_LEN { - // Look for magic bytes - if data[i..].starts_with(&SPEC_MARKER_MAGIC) { - let hash_start = i + 4; - let hash_end = hash_start + SPEC_MARKER_HASH_LEN; - let mut hash = [0u8; SPEC_MARKER_HASH_LEN]; - hash.copy_from_slice(&data[hash_start..hash_end]); - markers.insert(SpecMarkerHash(hash)); - } - } -} - -/// Filters spec entries based on markers found in the WASM data section. -/// -/// This removes any spec entries (types, events) that don't have corresponding -/// markers in the data section. The SDK embeds markers for types/events that -/// are actually used, and these markers survive dead code elimination. -/// -/// Functions are always kept as they define the contract's API. -/// -/// # Arguments -/// -/// * `entries` - The spec entries to filter -/// * `markers` - Marker hashes extracted from the WASM data section -/// -/// # Returns -/// -/// Filtered entries with only used types/events remaining. -#[allow(clippy::implicit_hasher)] -pub fn filter_by_markers( - entries: Vec, - markers: &HashSet, -) -> Vec { - entries - .into_iter() - .filter(|entry| { - // Always keep functions - they're the contract's API - if matches!(entry, ScSpecEntry::FunctionV0(_)) { - return true; - } - // For all other entries (types, events), check if marker exists - let hash = compute_marker_hash(entry); - markers.contains(&hash) - }) - .collect() -} - -/// Extracts UDT (User Defined Type) names referenced by a type definition. -/// -/// This function recursively traverses the type structure to find all -/// references to user-defined types. -fn get_type_refs(type_def: &ScSpecTypeDef) -> HashSet { - let mut refs = HashSet::new(); - - match type_def { - // Primitive types have no UDT references - ScSpecTypeDef::Val - | ScSpecTypeDef::U64 - | ScSpecTypeDef::I64 - | ScSpecTypeDef::U128 - | ScSpecTypeDef::I128 - | ScSpecTypeDef::U32 - | ScSpecTypeDef::I32 - | ScSpecTypeDef::U256 - | ScSpecTypeDef::I256 - | ScSpecTypeDef::Bool - | ScSpecTypeDef::Symbol - | ScSpecTypeDef::Error - | ScSpecTypeDef::Bytes - | ScSpecTypeDef::BytesN(_) - | ScSpecTypeDef::Void - | ScSpecTypeDef::Timepoint - | ScSpecTypeDef::Duration - | ScSpecTypeDef::String - | ScSpecTypeDef::Address - | ScSpecTypeDef::MuxedAddress => {} - - // UDT reference - add the type name - ScSpecTypeDef::Udt(udt) => { - refs.insert(udt.name.to_utf8_string_lossy()); - } - - // Composite types - recurse into contained types - ScSpecTypeDef::Vec(vec_type) => { - refs.extend(get_type_refs(&vec_type.element_type)); - } - ScSpecTypeDef::Map(map_type) => { - refs.extend(get_type_refs(&map_type.key_type)); - refs.extend(get_type_refs(&map_type.value_type)); - } - ScSpecTypeDef::Option(opt_type) => { - refs.extend(get_type_refs(&opt_type.value_type)); - } - ScSpecTypeDef::Result(result_type) => { - refs.extend(get_type_refs(&result_type.ok_type)); - refs.extend(get_type_refs(&result_type.error_type)); - } - ScSpecTypeDef::Tuple(tuple_type) => { - for value_type in tuple_type.value_types.iter() { - refs.extend(get_type_refs(value_type)); - } - } - } - - refs -} - -/// Extracts all UDT names referenced by a spec entry. -fn get_entry_type_refs(entry: &ScSpecEntry) -> HashSet { - let mut refs = HashSet::new(); - - match entry { - ScSpecEntry::FunctionV0(func) => { - // Collect types from inputs - for input in func.inputs.iter() { - refs.extend(get_type_refs(&input.type_)); - } - // Collect types from outputs - for output in func.outputs.iter() { - refs.extend(get_type_refs(output)); - } - } - ScSpecEntry::UdtStructV0(ScSpecUdtStructV0 { fields, .. }) => { - for field in fields.iter() { - refs.extend(get_type_refs(&field.type_)); - } - } - ScSpecEntry::UdtUnionV0(ScSpecUdtUnionV0 { cases, .. }) => { - for case in cases.iter() { - if let ScSpecUdtUnionCaseV0::TupleV0(tuple_case) = case { - for type_def in tuple_case.type_.iter() { - refs.extend(get_type_refs(type_def)); - } - } - } - } - // Enums, error enums, and events don't reference other types - ScSpecEntry::UdtEnumV0(_) | ScSpecEntry::UdtErrorEnumV0(_) | ScSpecEntry::EventV0(_) => {} - } - - refs -} - -/// Gets the name of a UDT entry, or None if it's not a UDT. -fn get_udt_name(entry: &ScSpecEntry) -> Option { - match entry { - ScSpecEntry::UdtStructV0(s) => Some(s.name.to_utf8_string_lossy()), - ScSpecEntry::UdtUnionV0(u) => Some(u.name.to_utf8_string_lossy()), - ScSpecEntry::UdtEnumV0(e) => Some(e.name.to_utf8_string_lossy()), - ScSpecEntry::UdtErrorEnumV0(e) => Some(e.name.to_utf8_string_lossy()), - ScSpecEntry::FunctionV0(_) | ScSpecEntry::EventV0(_) => None, - } -} - -/// Filters out unused types from contract spec entries. -/// -/// This function performs a reachability analysis starting from all functions. -/// It keeps: -/// - All functions (`FunctionV0`) -/// - All events (`EventV0`) -/// - All UDTs that are directly or transitively referenced by functions -/// -/// Types that are defined but never used by any function are removed. -/// -/// # Example -/// -/// If a contract has: -/// - Function `foo` that takes `TypeA` as input -/// - `TypeA` which references `TypeB` in a field -/// - `TypeC` which is defined but never used -/// -/// The result will include `foo`, `TypeA`, and `TypeB`, but not `TypeC`. -pub fn filter_unused_types(entries: Vec) -> Vec { - // Build a map from type name to entry for lookup - let type_entries: std::collections::HashMap = entries - .iter() - .filter_map(|entry| get_udt_name(entry).map(|name| (name, entry))) - .collect(); - - // Collect initial references from all functions - let mut reachable_types: HashSet = HashSet::new(); - for entry in &entries { - if matches!(entry, ScSpecEntry::FunctionV0(_)) { - reachable_types.extend(get_entry_type_refs(entry)); - } - } - - // Fixed-point iteration: keep adding types referenced by reachable types - // until no new types are found - loop { - let mut new_types: HashSet = HashSet::new(); - - for type_name in &reachable_types { - if let Some(entry) = type_entries.get(type_name) { - for referenced_type in get_entry_type_refs(entry) { - if !reachable_types.contains(&referenced_type) { - new_types.insert(referenced_type); - } - } - } - } - - if new_types.is_empty() { - break; - } - - reachable_types.extend(new_types); - } - - // Filter entries: keep functions, events, and reachable UDTs - entries - .into_iter() - .filter(|entry| { - match entry { - // Always keep functions and events - ScSpecEntry::FunctionV0(_) | ScSpecEntry::EventV0(_) => true, - // Keep UDTs only if they're reachable - _ => { - if let Some(name) = get_udt_name(entry) { - reachable_types.contains(&name) - } else { - true - } - } - } - }) - .collect() -} - -#[cfg(test)] -mod tests { - use super::*; - use stellar_xdr::curr::{ - ScSpecFunctionInputV0, ScSpecFunctionV0, ScSpecTypeUdt, ScSpecUdtEnumCaseV0, - ScSpecUdtEnumV0, ScSpecUdtErrorEnumCaseV0, ScSpecUdtErrorEnumV0, ScSpecUdtStructFieldV0, - StringM, VecM, - }; - - fn make_function(name: &str, input_types: Vec) -> ScSpecEntry { - let inputs = input_types - .into_iter() - .enumerate() - .map(|(i, type_)| ScSpecFunctionInputV0 { - doc: StringM::default(), - name: format!("arg{i}").try_into().unwrap(), - type_, - }) - .collect::>() - .try_into() - .unwrap(); - - ScSpecEntry::FunctionV0(ScSpecFunctionV0 { - doc: StringM::default(), - name: name.try_into().unwrap(), - inputs, - outputs: VecM::default(), - }) - } - - fn make_struct(name: &str, field_types: Vec<(&str, ScSpecTypeDef)>) -> ScSpecEntry { - let fields = field_types - .into_iter() - .map(|(field_name, type_)| ScSpecUdtStructFieldV0 { - doc: StringM::default(), - name: field_name.try_into().unwrap(), - type_, - }) - .collect::>() - .try_into() - .unwrap(); - - ScSpecEntry::UdtStructV0(ScSpecUdtStructV0 { - doc: StringM::default(), - lib: StringM::default(), - name: name.try_into().unwrap(), - fields, - }) - } - - fn make_enum(name: &str) -> ScSpecEntry { - ScSpecEntry::UdtEnumV0(ScSpecUdtEnumV0 { - doc: StringM::default(), - lib: StringM::default(), - name: name.try_into().unwrap(), - cases: vec![ScSpecUdtEnumCaseV0 { - doc: StringM::default(), - name: "Variant".try_into().unwrap(), - value: 0, - }] - .try_into() - .unwrap(), - }) - } - - fn make_error_enum(name: &str) -> ScSpecEntry { - ScSpecEntry::UdtErrorEnumV0(ScSpecUdtErrorEnumV0 { - doc: StringM::default(), - lib: StringM::default(), - name: name.try_into().unwrap(), - cases: vec![ScSpecUdtErrorEnumCaseV0 { - doc: StringM::default(), - name: "Error".try_into().unwrap(), - value: 1, - }] - .try_into() - .unwrap(), - }) - } - - fn udt(name: &str) -> ScSpecTypeDef { - ScSpecTypeDef::Udt(ScSpecTypeUdt { - name: name.try_into().unwrap(), - }) - } - - #[test] - fn test_removes_unused_type() { - let entries = vec![ - make_function("foo", vec![ScSpecTypeDef::U32]), - make_struct("UsedType", vec![("field", ScSpecTypeDef::U32)]), - make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), - ]; - - let filtered = filter_unused_types(entries); - - assert_eq!(filtered.len(), 1); - assert!(matches!(filtered[0], ScSpecEntry::FunctionV0(_))); - } - - #[test] - fn test_keeps_directly_referenced_type() { - let entries = vec![ - make_function("foo", vec![udt("UsedType")]), - make_struct("UsedType", vec![("field", ScSpecTypeDef::U32)]), - make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), - ]; - - let filtered = filter_unused_types(entries); - - assert_eq!(filtered.len(), 2); - - let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); - assert!(names.contains(&"UsedType".to_string())); - assert!(!names.contains(&"UnusedType".to_string())); - } - - #[test] - fn test_keeps_transitively_referenced_type() { - let entries = vec![ - make_function("foo", vec![udt("TypeA")]), - make_struct("TypeA", vec![("field", udt("TypeB"))]), - make_struct("TypeB", vec![("field", ScSpecTypeDef::U32)]), - make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), - ]; - - let filtered = filter_unused_types(entries); - - let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); - assert!(names.contains(&"TypeA".to_string())); - assert!(names.contains(&"TypeB".to_string())); - assert!(!names.contains(&"UnusedType".to_string())); - } - - #[test] - fn test_keeps_all_functions() { - let entries = vec![ - make_function("foo", vec![ScSpecTypeDef::U32]), - make_function("bar", vec![ScSpecTypeDef::Bool]), - ]; - - let filtered = filter_unused_types(entries); - - assert_eq!(filtered.len(), 2); - assert!(filtered - .iter() - .all(|e| matches!(e, ScSpecEntry::FunctionV0(_)))); - } - - #[test] - fn test_removes_unused_error_enum() { - let entries = vec![ - make_function("foo", vec![ScSpecTypeDef::U32]), - make_error_enum("UsedError"), - make_error_enum("UnusedError"), - ]; - - let filtered = filter_unused_types(entries); - - // Only function should remain, no error enums are referenced - assert_eq!(filtered.len(), 1); - assert!(matches!(filtered[0], ScSpecEntry::FunctionV0(_))); - } - - #[test] - fn test_keeps_error_enum_in_result() { - let entries = vec![ - make_function( - "foo", - vec![ScSpecTypeDef::Result(Box::new( - stellar_xdr::curr::ScSpecTypeResult { - ok_type: Box::new(ScSpecTypeDef::U32), - error_type: Box::new(udt("MyError")), - }, - ))], - ), - make_error_enum("MyError"), - make_error_enum("UnusedError"), - ]; - - let filtered = filter_unused_types(entries); - - let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); - assert!(names.contains(&"MyError".to_string())); - assert!(!names.contains(&"UnusedError".to_string())); - } - - #[test] - fn test_handles_circular_references() { - // TypeA references TypeB, TypeB references TypeA - let entries = vec![ - make_function("foo", vec![udt("TypeA")]), - make_struct("TypeA", vec![("b", udt("TypeB"))]), - make_struct("TypeB", vec![("a", udt("TypeA"))]), - ]; - - let filtered = filter_unused_types(entries); - - let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); - assert!(names.contains(&"TypeA".to_string())); - assert!(names.contains(&"TypeB".to_string())); - } - - #[test] - fn test_handles_vec_of_udt() { - let entries = vec![ - make_function( - "foo", - vec![ScSpecTypeDef::Vec(Box::new( - stellar_xdr::curr::ScSpecTypeVec { - element_type: Box::new(udt("MyType")), - }, - ))], - ), - make_struct("MyType", vec![("field", ScSpecTypeDef::U32)]), - ]; - - let filtered = filter_unused_types(entries); - - let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); - assert!(names.contains(&"MyType".to_string())); - } - - #[test] - fn test_handles_map_with_udt() { - let entries = vec![ - make_function( - "foo", - vec![ScSpecTypeDef::Map(Box::new( - stellar_xdr::curr::ScSpecTypeMap { - key_type: Box::new(udt("KeyType")), - value_type: Box::new(udt("ValueType")), - }, - ))], - ), - make_struct("KeyType", vec![("field", ScSpecTypeDef::U32)]), - make_struct("ValueType", vec![("field", ScSpecTypeDef::U32)]), - make_struct("UnusedType", vec![("field", ScSpecTypeDef::U32)]), - ]; - - let filtered = filter_unused_types(entries); - - let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); - assert!(names.contains(&"KeyType".to_string())); - assert!(names.contains(&"ValueType".to_string())); - assert!(!names.contains(&"UnusedType".to_string())); - } - - #[test] - fn test_keeps_enum_referenced_by_function() { - let entries = vec![ - make_function("foo", vec![udt("MyEnum")]), - make_enum("MyEnum"), - make_enum("UnusedEnum"), - ]; - - let filtered = filter_unused_types(entries); - - let names: Vec<_> = filtered.iter().filter_map(get_udt_name).collect(); - assert!(names.contains(&"MyEnum".to_string())); - assert!(!names.contains(&"UnusedEnum".to_string())); - } - - // Helper to encode a marker (matches SDK's spec_marker.rs format) - // Format: "SpEc" (4 bytes) + truncated SHA256 hash (8 bytes) - fn encode_marker(entry: &ScSpecEntry) -> Vec { - let hash = compute_marker_hash(entry); - let mut buf = Vec::new(); - buf.extend_from_slice(&SPEC_MARKER_MAGIC); - buf.extend_from_slice(&hash.0); - buf - } - - use stellar_xdr::curr::{ScSpecEventDataFormat, ScSpecEventV0}; - - fn make_event(name: &str) -> ScSpecEntry { - ScSpecEntry::EventV0(ScSpecEventV0 { - doc: StringM::default(), - lib: StringM::default(), - name: name.try_into().unwrap(), - prefix_topics: VecM::default(), - params: VecM::default(), - data_format: ScSpecEventDataFormat::SingleValue, - }) - } - - #[test] - fn test_compute_marker_hash() { - let entry = make_struct("MyStruct", vec![("field", ScSpecTypeDef::U32)]); - let hash = compute_marker_hash(&entry); - - // Hash should be 8 bytes - assert_eq!(hash.0.len(), SPEC_MARKER_HASH_LEN); - - // Same entry produces same hash - let hash2 = compute_marker_hash(&entry); - assert_eq!(hash.0, hash2.0); - - // Different entry produces different hash - let entry2 = make_struct("DifferentStruct", vec![("field", ScSpecTypeDef::U32)]); - let hash3 = compute_marker_hash(&entry2); - assert_ne!(hash.0, hash3.0); - } - - #[test] - fn test_encode_marker_format() { - let entry = make_event("Transfer"); - let marker = encode_marker(&entry); - - // Marker should be 12 bytes: 4 (magic) + 8 (hash) - assert_eq!(marker.len(), SPEC_MARKER_LEN); - - // First 4 bytes should be magic - assert_eq!(&marker[..4], &SPEC_MARKER_MAGIC); - } - - #[test] - fn test_extract_markers_from_data() { - let entry1 = make_event("Transfer"); - let entry2 = make_struct("MyStruct", vec![("field", ScSpecTypeDef::U32)]); - - let encoded1 = encode_marker(&entry1); - let encoded2 = encode_marker(&entry2); - - // Concatenate markers with some padding - let mut data = Vec::new(); - data.extend_from_slice(&[0u8; 16]); // Some leading bytes - data.extend_from_slice(&encoded1); - data.extend_from_slice(&[0u8; 8]); // Some padding - data.extend_from_slice(&encoded2); - data.extend_from_slice(&[0u8; 16]); // Some trailing bytes - - let mut found = HashSet::new(); - extract_markers_from_data(&data, &mut found); - - // Both markers should be found - assert!(found.contains(&compute_marker_hash(&entry1))); - assert!(found.contains(&compute_marker_hash(&entry2))); - } - - #[test] - fn test_filter_by_markers_keeps_used_events() { - let transfer_event = make_event("Transfer"); - let mint_event = make_event("Mint"); - - let entries = vec![ - make_function("foo", vec![ScSpecTypeDef::U32]), - transfer_event.clone(), - mint_event.clone(), - make_event("Unused"), - ]; - - let mut markers = HashSet::new(); - markers.insert(compute_marker_hash(&transfer_event)); - markers.insert(compute_marker_hash(&mint_event)); - - let filtered = filter_by_markers(entries, &markers); - - // Should have: 1 function + 2 used events - assert_eq!(filtered.len(), 3); - - let event_names: Vec<_> = filtered - .iter() - .filter_map(|e| { - if let ScSpecEntry::EventV0(event) = e { - Some(event.name.to_utf8_string_lossy()) - } else { - None - } - }) - .collect(); - - assert!(event_names.contains(&"Transfer".to_string())); - assert!(event_names.contains(&"Mint".to_string())); - assert!(!event_names.contains(&"Unused".to_string())); - } - - #[test] - fn test_filter_by_markers_removes_all_events_if_no_markers() { - let entries = vec![ - make_function("foo", vec![ScSpecTypeDef::U32]), - make_event("Transfer"), - make_event("Mint"), - ]; - - let markers = HashSet::new(); - - let filtered = filter_by_markers(entries, &markers); - - // Should have: 1 function, 0 events - assert_eq!(filtered.len(), 1); - assert!(matches!(filtered[0], ScSpecEntry::FunctionV0(_))); - } - - #[test] - fn test_filter_by_markers_removes_all_if_no_markers() { - let entries = vec![ - make_function("foo", vec![ScSpecTypeDef::U32]), - make_struct("MyStruct", vec![("field", ScSpecTypeDef::U32)]), - make_enum("MyEnum"), - make_event("Unused"), - ]; - - let markers = HashSet::new(); // No markers - - let filtered = filter_by_markers(entries, &markers); - - // Should have: only functions (always kept), no types or events - assert_eq!(filtered.len(), 1); - assert!(filtered - .iter() - .all(|e| matches!(e, ScSpecEntry::FunctionV0(_)))); - } - - #[test] - fn test_filter_by_markers_keeps_types_with_markers() { - let used_struct = make_struct("UsedStruct", vec![("field", ScSpecTypeDef::U32)]); - let used_enum = make_enum("UsedEnum"); - let used_event = make_event("UsedEvent"); - - let entries = vec![ - make_function("foo", vec![ScSpecTypeDef::U32]), - used_struct.clone(), - make_struct("UnusedStruct", vec![("field", ScSpecTypeDef::U32)]), - used_enum.clone(), - make_enum("UnusedEnum"), - used_event.clone(), - make_event("UnusedEvent"), - ]; - - let mut markers = HashSet::new(); - markers.insert(compute_marker_hash(&used_struct)); - markers.insert(compute_marker_hash(&used_enum)); - markers.insert(compute_marker_hash(&used_event)); - - let filtered = filter_by_markers(entries, &markers); - - // Should have: 1 function + 1 struct + 1 enum + 1 event - assert_eq!(filtered.len(), 4); - - // Check specific entries - let struct_names: Vec<_> = filtered - .iter() - .filter_map(|e| { - if let ScSpecEntry::UdtStructV0(s) = e { - Some(s.name.to_utf8_string_lossy()) - } else { - None - } - }) - .collect(); - assert_eq!(struct_names, vec!["UsedStruct"]); - - let enum_names: Vec<_> = filtered - .iter() - .filter_map(|e| { - if let ScSpecEntry::UdtEnumV0(s) = e { - Some(s.name.to_utf8_string_lossy()) - } else { - None - } - }) - .collect(); - assert_eq!(enum_names, vec!["UsedEnum"]); - - let event_names: Vec<_> = filtered - .iter() - .filter_map(|e| { - if let ScSpecEntry::EventV0(s) = e { - Some(s.name.to_utf8_string_lossy()) - } else { - None - } - }) - .collect(); - assert_eq!(event_names, vec!["UsedEvent"]); - } -} diff --git a/cmd/crates/soroban-spec-tools/src/lib.rs b/cmd/crates/soroban-spec-tools/src/lib.rs index c2796e2195..e4dd8c659d 100644 --- a/cmd/crates/soroban-spec-tools/src/lib.rs +++ b/cmd/crates/soroban-spec-tools/src/lib.rs @@ -16,7 +16,6 @@ use stellar_xdr::curr::{ }; pub mod contract; -pub mod filter; pub mod utils; #[derive(thiserror::Error, Debug)]