diff --git a/crates/fuzzing/src/generators/gc_ops/limits.rs b/crates/fuzzing/src/generators/gc_ops/limits.rs index 4600fc9f52dd..12bdb069508a 100644 --- a/crates/fuzzing/src/generators/gc_ops/limits.rs +++ b/crates/fuzzing/src/generators/gc_ops/limits.rs @@ -19,7 +19,7 @@ pub const MAX_FIELDS_RANGE: RangeInclusive = 0..=8; pub const MAX_OPS: usize = 100; /// Limits controlling the structure of a generated Wasm module. -#[derive(Debug, Default, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct GcOpsLimits { pub(crate) num_params: u32, pub(crate) num_globals: u32, @@ -29,11 +29,22 @@ pub struct GcOpsLimits { pub(crate) max_fields: u32, } +impl Default for GcOpsLimits { + fn default() -> Self { + Self { + num_params: 5, + num_globals: 5, + table_size: 5, + max_rec_groups: 5, + max_types: 5, + max_fields: 5, + } + } +} + impl GcOpsLimits { /// Fixup the limits to ensure they are within the valid range. pub(crate) fn fixup(&mut self) { - // NB: Exhaustively match so that we remember to fixup any other new - // limits we add in the future. let Self { num_params, num_globals, diff --git a/crates/fuzzing/src/generators/gc_ops/mutator.rs b/crates/fuzzing/src/generators/gc_ops/mutator.rs index 8f8f47d3e23b..bf16b1b4a619 100644 --- a/crates/fuzzing/src/generators/gc_ops/mutator.rs +++ b/crates/fuzzing/src/generators/gc_ops/mutator.rs @@ -391,9 +391,14 @@ impl TypesMutator { c: &mut Candidates<'_>, types: &mut Types, ) -> mutatis::Result<()> { + // Snapshot target types up front so fields can reference any type (incl. self) without borrowing `types`. + let candidates: Vec = types.type_defs.keys().copied().collect(); for (_, def) in types.type_defs.iter_mut() { let CompositeType::Struct(ref mut st) = def.composite_type; - m::vec(StructFieldMutator).mutate(c, &mut st.fields)?; + m::vec(StructFieldMutator { + candidates: &candidates, + }) + .mutate(c, &mut st.fields)?; } Ok(()) } @@ -419,16 +424,17 @@ impl TypesMutator { } } -/// Mutator for [`StructField`]: used by `m::vec` to add, remove, and -/// modify fields within a struct type. -#[derive(Debug, Default)] -pub struct StructFieldMutator; +/// Mutator for [`StructField`] (add/remove/modify fields). +#[derive(Debug)] +pub struct StructFieldMutator<'a> { + candidates: &'a [TypeId], +} -impl Mutate for StructFieldMutator { +impl Mutate for StructFieldMutator<'_> { fn mutate(&mut self, c: &mut Candidates<'_>, field: &mut StructField) -> MutResult<()> { c.mutation(|ctx| { let old = format!("{field:?}"); - field.field_type = FieldType::random(ctx.rng()); + field.field_type = FieldType::generate(ctx.rng(), self.candidates); field.mutable = (ctx.rng().gen_u32() % 2) == 0; log::debug!("Mutated field {old} -> {field:?}"); Ok(()) @@ -437,10 +443,10 @@ impl Mutate for StructFieldMutator { } } -impl Generate for StructFieldMutator { +impl Generate for StructFieldMutator<'_> { fn generate(&mut self, ctx: &mut Context) -> MutResult { let field = StructField { - field_type: FieldType::random(ctx.rng()), + field_type: FieldType::generate(ctx.rng(), self.candidates), mutable: (ctx.rng().gen_u32() % 2) == 0, }; log::debug!("Generated field {field:?}"); @@ -448,10 +454,6 @@ impl Generate for StructFieldMutator { } } -impl DefaultMutate for StructField { - type DefaultMutate = StructFieldMutator; -} - /// Mutator for [`GcOps`]. /// /// Also implements [`Mutate`] / [`Generate`] for [`GcOp`] so `m::vec` can mutate diff --git a/crates/fuzzing/src/generators/gc_ops/ops.rs b/crates/fuzzing/src/generators/gc_ops/ops.rs index 6f2fced4f035..fd9257463dfe 100644 --- a/crates/fuzzing/src/generators/gc_ops/ops.rs +++ b/crates/fuzzing/src/generators/gc_ops/ops.rs @@ -154,7 +154,7 @@ impl GcOps { .fields .iter() .map(|f| wasm_encoder::FieldType { - element_type: f.field_type.to_storage_type(), + element_type: f.field_type.to_storage_type(&type_ids_to_index), mutable: f.mutable, }) .collect(); @@ -425,6 +425,7 @@ impl GcOps { storage_bases, &self.types, &encoding_order, + &type_ids_to_index, ); } func.instruction(&Instruction::Br(0)); @@ -1021,6 +1022,7 @@ impl GcOp { encoding_bases: WasmEncodingBases, types: &Types, encoding_order: &[TypeId], + type_ids_to_index: &BTreeMap, ) { let gc_func_idx = 0; let take_refs_func_idx = 1; @@ -1115,7 +1117,7 @@ impl GcOp { if let Some(def) = types.type_defs.get(tid) { let CompositeType::Struct(ref st) = def.composite_type; for field in &st.fields { - field.field_type.emit_default_const(func); + field.field_type.emit_default_const(func, type_ids_to_index); } } } @@ -1326,7 +1328,9 @@ impl GcOp { func.instruction(&Instruction::If(wasm_encoder::BlockType::Empty)); func.instruction(&Instruction::Else); func.instruction(&Instruction::LocalGet(typed_local)); - fields[idx].field_type.emit_default_const(func); + fields[idx] + .field_type + .emit_default_const(func, type_ids_to_index); let idx = u32::try_from(idx).unwrap(); func.instruction(&Instruction::StructSet { struct_type_index: wasm_type, diff --git a/crates/fuzzing/src/generators/gc_ops/tests.rs b/crates/fuzzing/src/generators/gc_ops/tests.rs index 2ea13407f16e..4a44c42796c1 100644 --- a/crates/fuzzing/src/generators/gc_ops/tests.rs +++ b/crates/fuzzing/src/generators/gc_ops/tests.rs @@ -6,6 +6,7 @@ use crate::generators::gc_ops::{ use mutatis; use rand::rngs::StdRng; use rand::{RngExt, SeedableRng}; +use std::collections::BTreeSet; use wasmparser; use wasmprinter; @@ -118,7 +119,6 @@ fn test_ops(num_params: u32, num_globals: u32, table_size: u32) -> GcOps { fn assert_valid_wasm(ops: &mut GcOps) { let wasm = ops.to_wasm_binary(); let feats = wasmparser::WasmFeatures::default(); - eprintln!("wat: {}", wasmprinter::print_bytes(&wasm).unwrap()); feats.reference_types(); feats.gc(); let mut validator = wasmparser::Validator::new_with_features(feats); @@ -220,7 +220,7 @@ fn every_op_generated() -> mutatis::Result<()> { let mut res = empty_test_ops(); let mut session = mutatis::Session::new().seed(0xC0FFEE); - 'outer: for _ in 0..=1024 { + 'outer: for _ in 0..=8192 { session.mutate(&mut res)?; for op in &res.ops { unseen_ops.remove(op.name()); @@ -528,7 +528,7 @@ fn sort_rec_groups_topo_orders_dependencies_first() { } #[test] -fn break_rec_group_cycles() { +fn merge_rec_group_cycles() { let _ = env_logger::try_init(); let mut types = Types::new(); @@ -568,11 +568,15 @@ fn break_rec_group_cycles() { // -----------| g3 | // +----+ // - // After: back edges dropped, clean chain + // All four groups are mutually reachable (g0->g1->g2->g0 and + // g1->g2->g3->g1), so they form a single strongly-connected component. // - // +----+ +----+ +----+ +----+ - // | g0 |------>| g1 |------>| g2 |------>| g3 | - // +----+ +----+ +----+ +----+ + // After: the whole SCC is merged into one rec group, with every supertype + // edge preserved (nothing is dropped). + // + // +-------------------------+ + // | g0 ∪ g1 ∪ g2 ∪ g3 | + // +-------------------------+ types.insert_struct(a0, g0, false, Some(b0), Vec::new()); // g0 -> g1 types.insert_struct(a1, g0, false, None, Vec::new()); @@ -598,32 +602,28 @@ fn break_rec_group_cycles() { assert_eq!(types.rec_groups.len(), 4); let type_to_group = types.type_to_group_map(); - types.break_rec_group_cycles(&type_to_group); - - // All four groups preserved. - assert_eq!(types.rec_groups.len(), 4); - assert!(types.rec_groups.contains_key(&g0)); - assert!(types.rec_groups.contains_key(&g1)); - assert!(types.rec_groups.contains_key(&g2)); - assert!(types.rec_groups.contains_key(&g3)); + types.merge_rec_group_cycles(&type_to_group); - // Back edge (g2->g0): c1's supertype cleared. - assert_eq!(types.type_defs.get(&c1).unwrap().supertype, None); + // The whole cycle collapses into a single rec group... + assert_eq!(types.rec_groups.len(), 1); - // Back edge (g3->g1): d1's supertype cleared. - assert_eq!(types.type_defs.get(&d1).unwrap().supertype, None); + // ...which contains every type from all four original groups. + let merged: BTreeSet = types.rec_groups.values().flatten().copied().collect(); + let expected: BTreeSet = [a0, a1, b0, b1, c0, c1, c2, d0, d1].into_iter().collect(); + assert_eq!(merged, expected); - // All other cross-group supertypes preserved. + // Merging drops no edges: every supertype relationship is preserved. assert_eq!(types.type_defs.get(&a0).unwrap().supertype, Some(b0)); assert_eq!(types.type_defs.get(&b1).unwrap().supertype, Some(c0)); + assert_eq!(types.type_defs.get(&c1).unwrap().supertype, Some(a1)); assert_eq!(types.type_defs.get(&c2).unwrap().supertype, Some(d0)); + assert_eq!(types.type_defs.get(&d1).unwrap().supertype, Some(b0)); - // Result is a clean chain: g0 -> g1 -> g2 -> g3 + // With a single group there are no cross-group ordering constraints left. let type_to_group = types.type_to_group_map(); let mut topo = Vec::new(); types.sort_rec_groups_topo(&mut topo, &type_to_group); - assert_eq!(topo.len(), 4); - assert_eq!(topo, vec![g3, g2, g1, g0]); + assert_eq!(topo.len(), 1); } #[test] diff --git a/crates/fuzzing/src/generators/gc_ops/types.rs b/crates/fuzzing/src/generators/gc_ops/types.rs index c31ed1dd3574..46d86b13f03b 100644 --- a/crates/fuzzing/src/generators/gc_ops/types.rs +++ b/crates/fuzzing/src/generators/gc_ops/types.rs @@ -5,7 +5,8 @@ use crate::generators::gc_ops::ops::GcOp; use serde::{Deserialize, Serialize}; use std::collections::btree_map::Entry; use std::collections::{BTreeMap, BTreeSet, HashMap}; -use wasmtime_environ::graphs::{Dfs, DfsEvent, Graph}; +use wasmtime_environ::graphs::{Dfs, DfsEvent, Graph, StronglyConnectedComponents}; +use wasmtime_environ::{EntityRef, entity_impl}; /// Identifies a `(rec ...)` group. #[derive( @@ -72,22 +73,55 @@ macro_rules! define_field_type_enum { #[allow(missing_docs, reason = "self-describing")] pub enum FieldType { $( $variant, )* + /// Abstract `(ref null? struct)`. + StructRef { nullable: bool }, + /// Concrete `(ref null? $t)` referencing a defined struct type. + Ref { nullable: bool, type_id: TypeId }, } impl FieldType { - /// All possible field type variants, for random selection. + /// All scalar/abstract-leaf field type variants, for random selection. pub const ALL: &[FieldType] = &[ $( FieldType::$variant, )* ]; - /// Pick a random field type. + /// Pick a random scalar/abstract-leaf field type. pub fn random(rng: &mut mutatis::Rng) -> FieldType { let idx = rng.gen_index(FieldType::ALL.len()).unwrap(); FieldType::ALL[idx] } /// Convert to a `wasm_encoder::StorageType`. - pub fn to_storage_type(self) -> wasm_encoder::StorageType { + pub fn to_storage_type( + self, + type_ids_to_index: &BTreeMap, + ) -> wasm_encoder::StorageType { + use wasm_encoder::{AbstractHeapType, HeapType, RefType, StorageType, ValType}; match self { $( FieldType::$variant => $storage, )* + FieldType::StructRef { nullable } => StorageType::Val(ValType::Ref(RefType { + nullable, + heap_type: HeapType::Abstract { + shared: false, + ty: AbstractHeapType::Struct, + }, + })), + FieldType::Ref { nullable, type_id } => { + // Fixup guarantees every concrete reference target is a + // live type that gets a Wasm index, so a miss here is a + // bug in fixup, not a recoverable case. Panic loudly + // rather than silently emitting a `(ref null struct)`, + // which would not validate against this field's + // concrete type. + let &idx = type_ids_to_index.get(&type_id).unwrap_or_else(|| { + unreachable!( + "concrete struct reference to {type_id:?} missing from \ + index map; fixup should keep all reference targets" + ) + }); + StorageType::Val(ValType::Ref(RefType { + nullable, + heap_type: HeapType::Concrete(idx), + })) + } } } @@ -97,10 +131,37 @@ macro_rules! define_field_type_enum { matches!(self, FieldType::I8 | FieldType::I16) } - /// Emit an iconic default constant for this field type onto the Wasm stack. - pub fn emit_default_const(self, func: &mut wasm_encoder::Function) { + /// Emit a default constant for this field type onto the stack + pub fn emit_default_const( + self, + func: &mut wasm_encoder::Function, + type_ids_to_index: &BTreeMap, + ) { match self { $( FieldType::$variant => { func.instruction(&$default_val); } )* + FieldType::StructRef { .. } => { + func.instruction(&wasm_encoder::Instruction::RefNull( + wasm_encoder::HeapType::Abstract { + shared: false, + ty: wasm_encoder::AbstractHeapType::Struct, + }, + )); + } + FieldType::Ref { type_id, .. } => { + // See `to_storage_type`: a missing index is a fixup bug. + // Emitting `ref.null struct` here would produce a value + // that does not match the field's concrete `(ref null + // $t)` type and yield an invalid module, so panic. + let &idx = type_ids_to_index.get(&type_id).unwrap_or_else(|| { + unreachable!( + "concrete struct reference to {type_id:?} missing from \ + index map; fixup should keep all reference targets" + ) + }); + func.instruction(&wasm_encoder::Instruction::RefNull( + wasm_encoder::HeapType::Concrete(idx), + )); + } } } } @@ -108,6 +169,26 @@ macro_rules! define_field_type_enum { } for_each_field_type!(define_field_type_enum); +impl FieldType { + /// Generate a random field type, including reference types. + pub fn generate(rng: &mut mutatis::Rng, candidates: &[TypeId]) -> FieldType { + match rng.gen_u32() % 4 { + // Abstract `structref`. + 0 => FieldType::StructRef { nullable: true }, + // Concrete `(ref null $t)`, when we have a type to point at. + 1 => match rng.choose(candidates).copied() { + Some(type_id) => FieldType::Ref { + nullable: true, + type_id, + }, + None => FieldType::random(rng), + }, + // Scalar / abstract-leaf type. + _ => FieldType::random(rng), + } + } +} + /// A single field within a struct type. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct StructField { @@ -195,13 +276,32 @@ impl Graph for RecGroupGraph<'_> { if let Some(type_ids) = self.rec_groups.get(&group) { for &ty in type_ids { - if let Some(super_ty) = self.type_defs.get(&ty).and_then(|d| d.supertype) { + let Some(def) = self.type_defs.get(&ty) else { + continue; + }; + + // Supertype edge: the supertype's group must encode first. + if let Some(super_ty) = def.supertype { if let Some(&super_group) = self.type_to_group.get(&super_ty) { if super_group != group { deps.insert(super_group); } } } + + // Field-reference edges: a concrete `(ref null $t)` field means + // `$t`'s group must encode first (references *within* a group are + // always legal and impose no ordering constraint). + let CompositeType::Struct(ref st) = def.composite_type; + for field in &st.fields { + if let FieldType::Ref { type_id, .. } = field.field_type { + if let Some(&ref_group) = self.type_to_group.get(&type_id) { + if ref_group != group { + deps.insert(ref_group); + } + } + } + } } } @@ -209,6 +309,37 @@ impl Graph for RecGroupGraph<'_> { } } +/// A dense [`EntityRef`] node used to run strongly-connected-component analysis. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct RecGroupNode(u32); +entity_impl!(RecGroupNode); + +/// A densely-indexed view of the rec-group dependency graph, suitable for +struct DenseRecGroupGraph { + adjacency: Vec>, +} + +impl Graph for DenseRecGroupGraph { + type NodesIter<'a> + = std::iter::Map, fn(u32) -> RecGroupNode> + where + Self: 'a; + + fn nodes(&self) -> Self::NodesIter<'_> { + let len = u32::try_from(self.adjacency.len()).unwrap(); + (0..len).map(RecGroupNode as fn(u32) -> RecGroupNode) + } + + type SuccessorsIter<'a> + = std::iter::Copied> + where + Self: 'a; + + fn successors(&self, node: RecGroupNode) -> Self::SuccessorsIter<'_> { + self.adjacency[node.index()].iter().copied() + } +} + /// All type and rec-group state. /// /// Rec groups own sets of [`TypeId`]s; moving a type between groups is @@ -451,57 +582,53 @@ impl Types { .collect() } - /// Break cycles in the rec-group dependency graph by dropping cross-group - /// supertype edges that are DFS back edges. - pub fn break_rec_group_cycles(&mut self, type_to_group: &BTreeMap) { - let graph = RecGroupGraph { + /// Resolve cross-group cycles in the rec-group dependency graph by + /// **merging** each strongly-connected component of mutually-dependent rec + /// groups into a single rec group. + pub fn merge_rec_group_cycles(&mut self, type_to_group: &BTreeMap) { + if self.rec_groups.len() < 2 { + return; + } + + let groups: Vec = self.rec_groups.keys().copied().collect(); + let group_to_dense: BTreeMap = groups + .iter() + .enumerate() + .map(|(i, &g)| (g, u32::try_from(i).unwrap())) + .collect(); + + let rec_graph = RecGroupGraph { type_defs: &self.type_defs, rec_groups: &self.rec_groups, type_to_group, }; + let adjacency: Vec> = groups + .iter() + .map(|&g| { + rec_graph + .successors(g) + .map(|succ| RecGroupNode(group_to_dense[&succ])) + .collect() + }) + .collect(); - let mut seen = BTreeSet::new(); - let mut back_edges: BTreeSet<(RecGroupId, RecGroupId)> = BTreeSet::new(); - let mut dfs = Dfs::default(); + let dense = DenseRecGroupGraph { adjacency }; + let sccs = StronglyConnectedComponents::new(&dense); - for &root in self.rec_groups.keys() { - if seen.contains(&root) { + for (_, nodes) in sccs.iter() { + if nodes.len() < 2 { continue; } - dfs.add_root(root); - let mut active = BTreeSet::new(); - - while let Some(event) = dfs.next(&graph, |id| seen.contains(&id)) { - match event { - DfsEvent::Pre(id) => { - seen.insert(id); - active.insert(id); - } - DfsEvent::Post(id) => { - active.remove(&id); - } - DfsEvent::AfterEdge(from, to) => { - if active.contains(&to) { - back_edges.insert((from, to)); - } - } - } - } - } - - // Drop supertype edges that correspond to back edges. - if !back_edges.is_empty() { - for (&tid, def) in self.type_defs.iter_mut() { - if let Some(st) = def.supertype { - if let (Some(&sg), Some(&spg)) = - (type_to_group.get(&tid), type_to_group.get(&st)) - { - if back_edges.contains(&(sg, spg)) { - def.supertype = None; - } - } + // Fold every group in this component into the first one. + let group_ids: Vec = nodes.iter().map(|n| groups[n.index()]).collect(); + let representative = group_ids[0]; + let mut merged = BTreeSet::new(); + for gid in &group_ids { + if let Some(members) = self.rec_groups.remove(gid) { + merged.extend(members); } } + self.rec_groups.insert(representative, merged); } } @@ -588,12 +715,35 @@ impl Types { st.fields.truncate(max_fields); } - // 9. Break supertype cycles and rec-group dependency cycles. + // 9. Normalize reference fields. + let valid_type_ids: BTreeSet = self.type_defs.keys().copied().collect(); + for def in self.type_defs.values_mut() { + let CompositeType::Struct(ref mut st) = def.composite_type; + for field in &mut st.fields { + match &mut field.field_type { + FieldType::StructRef { nullable } => *nullable = true, + FieldType::Ref { nullable, type_id } => { + if valid_type_ids.contains(type_id) { + *nullable = true; + } else { + field.field_type = FieldType::StructRef { nullable: true }; + } + } + _ => {} + } + } + } + + // 10. Break supertype cycles and merge rec-group reference cycles, so + // the type graph is well-founded before we encode it. self.break_supertype_cycles(); let type_to_group = self.type_to_group_map(); - self.break_rec_group_cycles(&type_to_group); + self.merge_rec_group_cycles(&type_to_group); + // Merging changes group membership, so recompute the reverse map for + // the encoding-order computation below. + let type_to_group = self.type_to_group_map(); - // 10. Ensure subtype fields are prefix-compatible with supertype fields. + // 11. Ensure subtype fields are prefix-compatible with supertype fields. // Process in topological order (supertype before subtype). let mut topo_order = Vec::new(); self.sort_types_topo(&mut topo_order); @@ -627,7 +777,7 @@ impl Types { debug_assert!(self.is_well_formed(limits)); - // 11. Compute encoding order (reuses type_to_group from step 9). + // 12. Compute encoding order (reuses type_to_group from step 10). self.encoding_order_grouped(encoding_order_grouped, &type_to_group); } @@ -674,6 +824,24 @@ impl Types { return false; } + // Reference fields must be nullable (non-nullable references are + // deferred), and concrete references must target an existing type. + for field in &st.fields { + match field.field_type { + FieldType::StructRef { nullable } | FieldType::Ref { nullable, .. } + if !nullable => + { + log::debug!("[-] Failed: type {tid:?} has a non-nullable reference field"); + return false; + } + FieldType::Ref { type_id, .. } if !self.type_defs.contains_key(&type_id) => { + log::debug!("[-] Failed: type {tid:?} references missing type {type_id:?}"); + return false; + } + _ => {} + } + } + if let Some(super_id) = def.supertype { match self.type_defs.get(&super_id) { None => {