From 9d8deeafbfab818eb099f2705c27f1a35c7a1a3c Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Mon, 11 May 2026 16:23:51 -0600 Subject: [PATCH 1/7] feat(vm): Add NotImplemented sentinel and dunder dispatch primitives. --- compiler/src/modules/vm/handlers/dunder.rs | 31 +++++++++++++++++++++ compiler/src/modules/vm/handlers/methods.rs | 11 ++++++++ compiler/src/modules/vm/handlers/mod.rs | 1 + compiler/src/modules/vm/mod.rs | 5 ++++ compiler/src/modules/vm/ops.rs | 3 ++ compiler/src/modules/vm/types/mod.rs | 28 ++++++++++++++++++- compiler/tests/cases/vm.json | 10 ++++++- 7 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 compiler/src/modules/vm/handlers/dunder.rs diff --git a/compiler/src/modules/vm/handlers/dunder.rs b/compiler/src/modules/vm/handlers/dunder.rs new file mode 100644 index 0000000..051bc39 --- /dev/null +++ b/compiler/src/modules/vm/handlers/dunder.rs @@ -0,0 +1,31 @@ +/* +Dunder dispatch protocol: probe an instance method, invoke with `self` prepended, treat `NotImplemented` as a miss so reflected ops / generic fallbacks take over. +*/ + +use super::*; +use super::methods::AttrLookup; + +impl<'a> VM<'a> { + /* `recv.(*args)`: `Some(v)` on return, `None` on miss / `NotImplemented`, `Err` only on a raised dunder. */ + #[allow(dead_code)] // Consumed by per-operator handlers in the next phase. + pub(crate) fn try_call_dunder(&mut self, recv: Val, name: &str, args: &[Val], chunk: &SSAChunk, slots: &mut [Val]) -> Result, VmErr> { + // Built-in types route through their native handlers; dunder dispatch only fires on user instances. + if !recv.is_heap() { return Ok(None); } + if !matches!(self.heap.get(recv), HeapObj::Instance(..)) { return Ok(None); } + + let Some(AttrLookup::InstanceMethod { recv, func }) = self.resolve_attr_silent(recv, name)? else { return Ok(None); }; + + // Mirror `__init__` dispatch: depth guard before pushing so a recursive blow-up leaves no half-built frame. + if self.depth >= self.max_calls { return Err(cold_depth()); } + + self.push(func); + self.push(recv); + for &a in args { self.push(a); } + let argc = (1 + args.len()) as u16; + self.exec_call(argc, chunk, slots)?; + + let result = self.pop()?; + if self.heap.is_not_implemented(result) { return Ok(None); } + Ok(Some(result)) + } +} diff --git a/compiler/src/modules/vm/handlers/methods.rs b/compiler/src/modules/vm/handlers/methods.rs index efa31e1..00fa627 100644 --- a/compiler/src/modules/vm/handlers/methods.rs +++ b/compiler/src/modules/vm/handlers/methods.rs @@ -72,6 +72,17 @@ impl<'a> VM<'a> { .ok_or_else(|| VmErr::Attribute(s!("'", str ty, "' object has no attribute '", str name, "'"))) } + // `resolve_attr` that swallows `AttributeError` into `None`; other VmErrs still propagate — dunder probes need a miss to be silent. + // Allow: consumed by `try_call_dunder` and the per-operator handlers wired in the next phase. + #[allow(dead_code)] + pub(crate) fn resolve_attr_silent(&self, obj: Val, name: &str) -> Result, VmErr> { + match self.resolve_attr(obj, name) { + Ok(lookup) => Ok(Some(lookup)), + Err(VmErr::Attribute(_)) => Ok(None), + Err(other) => Err(other), + } + } + pub(crate) fn handle_load_attr(&mut self, name_idx: u16, chunk: &SSAChunk) -> Result<(), VmErr> { let name = chunk.names.get(name_idx as usize).ok_or(VmErr::Runtime("LoadAttr: bad name index"))?.clone(); let obj = self.pop()?; diff --git a/compiler/src/modules/vm/handlers/mod.rs b/compiler/src/modules/vm/handlers/mod.rs index 33d07e6..154c5ac 100644 --- a/compiler/src/modules/vm/handlers/mod.rs +++ b/compiler/src/modules/vm/handlers/mod.rs @@ -1,5 +1,6 @@ pub(crate) mod arith; pub(crate) mod data; +pub(crate) mod dunder; pub(crate) mod format; pub(crate) mod function; pub(crate) mod methods; diff --git a/compiler/src/modules/vm/mod.rs b/compiler/src/modules/vm/mod.rs index a08d2ab..1123bd6 100644 --- a/compiler/src/modules/vm/mod.rs +++ b/compiler/src/modules/vm/mod.rs @@ -283,6 +283,11 @@ impl<'a> VM<'a> { vm.globals.insert("__name__".to_string(), main_name); vm.globals.insert("__name___0".to_string(), main_name); } + // `NotImplemented` singleton; dunders return it to delegate to the reflected operator. + if let Ok(ni) = vm.heap.alloc(HeapObj::NotImplemented) { + vm.globals.insert("NotImplemented".to_string(), ni); + vm.globals.insert("NotImplemented_0".to_string(), ni); + } // Builtins as first-class NativeFn values so they can be rebound/passed around. let builtin_fns: &[NativeFnId] = &[ NativeFnId::Print, NativeFnId::Len, NativeFnId::Abs, NativeFnId::Str, diff --git a/compiler/src/modules/vm/ops.rs b/compiler/src/modules/vm/ops.rs index 953369b..4392079 100644 --- a/compiler/src/modules/vm/ops.rs +++ b/compiler/src/modules/vm/ops.rs @@ -82,6 +82,7 @@ impl<'a> VM<'a> { HeapObj::Extern(_) => true, HeapObj::ExcInstance(..) => true, HeapObj::Ellipsis => true, + HeapObj::NotImplemented => true, } } @@ -155,6 +156,7 @@ impl<'a> VM<'a> { HeapObj::Extern(_) => "builtin_function_or_method", HeapObj::ExcInstance(..) => "exception", HeapObj::Ellipsis => "ellipsis", + HeapObj::NotImplemented => "NotImplementedType", }} } @@ -237,6 +239,7 @@ impl<'a> VM<'a> { out } HeapObj::Ellipsis => "Ellipsis".into(), + HeapObj::NotImplemented => "NotImplemented".into(), } } diff --git a/compiler/src/modules/vm/types/mod.rs b/compiler/src/modules/vm/types/mod.rs index e5fcd29..2f8fb01 100644 --- a/compiler/src/modules/vm/types/mod.rs +++ b/compiler/src/modules/vm/types/mod.rs @@ -143,6 +143,8 @@ pub enum HeapObj { // True `...` singleton, distinct from any string. Ellipsis, Type(String), + // `NotImplemented` singleton; dunder return sentinel that triggers the reflected operator fallback. + NotImplemented, /* Wide-int slow path (i128); `int_to_val` canonicalises so 47-bit values stay inline. */ LongInt(i128), /* Exception instance: type name + ctor args (exposed via `.args`). */ @@ -302,7 +304,7 @@ pub(crate) fn for_each_val(obj: &HeapObj, mut f: impl FnMut(Val)) { // Variants without Val payloads — terminal, nothing to trace. HeapObj::Str(_) | HeapObj::Bytes(_) | HeapObj::LongInt(_) | HeapObj::Type(_) | HeapObj::NativeFn(_) | HeapObj::Range(..) - | HeapObj::Extern(_) | HeapObj::Ellipsis => {} + | HeapObj::Extern(_) | HeapObj::Ellipsis | HeapObj::NotImplemented => {} } } @@ -326,6 +328,8 @@ pub struct HeapPool { longints: HashMap, // Cached Ellipsis slot index so `... is ...` is True (singleton parity). ellipsis_idx: Option, + // Same singleton invariant as `ellipsis_idx`, but for `NotImplemented`. + notimpl_idx: Option, /* Reused across mark() calls; cleared not freed, so GC never re-allocates under pressure. */ mark_worklist: Vec, } @@ -343,6 +347,7 @@ impl HeapPool { bytes_intern: HashMap::default(), longints: HashMap::default(), ellipsis_idx: None, + notimpl_idx: None, mark_worklist: Vec::with_capacity(64), } } @@ -367,6 +372,11 @@ impl HeapPool { && let Some(idx) = self.ellipsis_idx { return Ok(Val::heap(idx)); } + // `NotImplemented` follows the same singleton rule so `is` and dunder checks agree. + if matches!(obj, HeapObj::NotImplemented) + && let Some(idx) = self.notimpl_idx { + return Ok(Val::heap(idx)); + } if self.live >= self.limit { return Err(cold_heap()); } if self.slots.len() >= (1 << 28) { return Err(VmErr::Heap); } @@ -384,6 +394,7 @@ impl HeapPool { HeapObj::Bytes(b) if b.len() <= 128 => { self.bytes_intern.insert(b.clone(), idx); } HeapObj::LongInt(i) => { self.longints.insert(*i, idx); } HeapObj::Ellipsis => { self.ellipsis_idx = Some(idx); } + HeapObj::NotImplemented => { self.notimpl_idx = Some(idx); } _ => {} } @@ -438,6 +449,13 @@ impl HeapPool { self.free_list.push(idx as u32); self.live -= 1; } + Some(HeapObj::NotImplemented) => { + // Singleton index becomes stale when its slot is freed. + if self.notimpl_idx == Some(idx as u32) { self.notimpl_idx = None; } + slot.obj = None; + self.free_list.push(idx as u32); + self.live -= 1; + } Some(_) => { slot.obj = None; self.free_list.push(idx as u32); @@ -503,10 +521,18 @@ impl HeapPool { Some(HeapObj::Bytes(_)) => 22, Some(HeapObj::ExcInstance(..)) => 24, Some(HeapObj::Ellipsis) => 26, + Some(HeapObj::NotImplemented) => 27, None => 0, } } else { 0 } } + + /* Identity probe for the `NotImplemented` singleton; consumed by the dunder dispatch protocol. */ + #[inline(always)] + pub fn is_not_implemented(&self, v: Val) -> bool { + v.is_heap() + && matches!(self.slots[v.as_heap() as usize].obj.as_ref(), Some(HeapObj::NotImplemented)) + } } /* Single-write SSA store after register coalescing. */ diff --git a/compiler/tests/cases/vm.json b/compiler/tests/cases/vm.json index 62e6f59..cbef6bd 100644 --- a/compiler/tests/cases/vm.json +++ b/compiler/tests/cases/vm.json @@ -1548,5 +1548,13 @@ {"src": "print(pow(2, 100, 2 ** 100))", "output": [], "error": "modulus too large"}, {"src": "print(2 ** 128)", "output": [], "error": "OverflowError"}, {"src": "print(170141183460469231731687303715884105727 + 1)", "output": [], "error": "OverflowError"}, - {"src": "print(99999999999999999999999999999999999999999)", "output": [], "error": "integer literal too large"} + {"src": "print(99999999999999999999999999999999999999999)", "output": [], "error": "integer literal too large"}, + {"src": "print(NotImplemented)", "output": ["NotImplemented"]}, + {"src": "print(type(NotImplemented))", "output": [""]}, + {"src": "print(NotImplemented is NotImplemented)", "output": ["True"]}, + {"src": "x = NotImplemented\ny = NotImplemented\nprint(x is y)", "output": ["True"]}, + {"src": "print(NotImplemented is None)", "output": ["False"]}, + {"src": "print(NotImplemented == None)", "output": ["False"]}, + {"src": "print(bool(NotImplemented))", "output": ["True"]}, + {"src": "def f():\n return NotImplemented\nprint(f() is NotImplemented)", "output": ["True"]} ] From 62cf410f6dff918a7877e4f93bbab5e0b62fd2bf Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Mon, 11 May 2026 17:14:39 -0600 Subject: [PATCH 2/7] feat(vm): Add single-level inheritance with super() and isinstance walks. --- compiler/src/modules/parser/literals.rs | 8 ++- compiler/src/modules/parser/stmt.rs | 7 ++- compiler/src/modules/vm/builtins/identity.rs | 22 +++++++- compiler/src/modules/vm/dispatch.rs | 21 ++++++-- compiler/src/modules/vm/handlers/dunder.rs | 3 +- compiler/src/modules/vm/handlers/function.rs | 31 +++++++----- compiler/src/modules/vm/handlers/methods.rs | 53 ++++++++++++++++---- compiler/src/modules/vm/mod.rs | 4 ++ compiler/src/modules/vm/ops.rs | 13 +++-- compiler/src/modules/vm/types/coro.rs | 3 ++ compiler/src/modules/vm/types/mod.rs | 27 ++++++++-- compiler/tests/cases/parser.json | 28 +++++------ compiler/tests/cases/vm.json | 19 ++++++- 13 files changed, 181 insertions(+), 58 deletions(-) diff --git a/compiler/src/modules/parser/literals.rs b/compiler/src/modules/parser/literals.rs index 6a0dc35..e23cc48 100644 --- a/compiler/src/modules/parser/literals.rs +++ b/compiler/src/modules/parser/literals.rs @@ -396,9 +396,12 @@ impl<'src, I: Iterator> Parser<'src, I> { "".to_string() }; + // Bases are pushed left-to-right; `MakeClass` pops `num_bases` and stores them in the Class. + let mut num_bases: u16 = 0; if self.eat_if(TokenType::Lpar) { while !matches!(self.peek(), Some(TokenType::Rpar) | None) { self.expr(); + num_bases = num_bases.saturating_add(1); if !self.eat_if(TokenType::Comma) { break; } } self.eat(TokenType::Rpar); @@ -409,8 +412,11 @@ impl<'src, I: Iterator> Parser<'src, I> { let body = self.with_fresh_chunk(|s| s.compile_block()); let ci = self.chunk.classes.len() as u16; + // Operand packs `(num_bases << 8) | class_idx`; each field is one byte to keep the dispatch decode cheap. + if ci > 0xFF { self.error("too many classes in this scope (limit 255)"); return; } + if num_bases > 0xFF { self.error("too many base classes (limit 255)"); return; } self.chunk.classes.push(body); - self.chunk.emit(OpCode::MakeClass, ci); + self.chunk.emit(OpCode::MakeClass, (num_bases << 8) | ci); // Each decorator Calls with the previous result, same as for functions. for _ in 0..decorators { diff --git a/compiler/src/modules/parser/stmt.rs b/compiler/src/modules/parser/stmt.rs index ec19fcb..be2b05c 100644 --- a/compiler/src/modules/parser/stmt.rs +++ b/compiler/src/modules/parser/stmt.rs @@ -526,7 +526,12 @@ impl<'src, I: Iterator> Parser<'src, I> { true } } - Some(TokenType::Lpar) => self.call(name), + Some(TokenType::Lpar) => { + // `name(...)` at statement level: allow postfix chains like `super().__init__(x)`. + let leaves = self.call(name); + if leaves { self.expr_tails(); } + leaves + } _ => { self.emit_load_ssa(name); self.expr_tails(); diff --git a/compiler/src/modules/vm/builtins/identity.rs b/compiler/src/modules/vm/builtins/identity.rs index 5265e26..a49da55 100644 --- a/compiler/src/modules/vm/builtins/identity.rs +++ b/compiler/src/modules/vm/builtins/identity.rs @@ -6,6 +6,18 @@ use super::matches_exc_class; impl<'a> VM<'a> { + // `super()` zero-arg: reads the running method's `(class, self)` off the top frame and returns a Super proxy. + pub fn call_super(&mut self) -> Result<(), VmErr> { + let binding = self.call_stack.last() + .and_then(|f| f.current_class.zip(f.current_self)); + let Some((class, recv)) = binding else { + return Err(VmErr::Runtime("super() must be called inside a method")); + }; + let proxy = self.heap.alloc(HeapObj::Super(class, recv))?; + self.push(proxy); + Ok(()) + } + pub fn call_repr(&mut self) -> Result<(), VmErr> { let o = self.pop()?; self.alloc_and_push_str(self.repr(o)) @@ -51,7 +63,7 @@ impl<'a> VM<'a> { Ok(()) } - /* Type-name based isinstance check. Accepts Type or NativeFn (for the builtins-as-types case) on the right; allows int↔bool aliasing. */ + /* Type-name based isinstance check. Accepts Type / NativeFn (builtin types) / user Class on the right; allows int↔bool aliasing and walks user inheritance via `is_subclass`. */ pub fn call_isinstance(&mut self) -> Result<(), VmErr> { let (arg2, obj) = (self.pop()?, self.pop()?); let obj_ty = self.type_name(obj); @@ -65,6 +77,11 @@ impl<'a> VM<'a> { } } else { None }; + // User-class membership uses heap identity, not type names, so capture the instance's class up-front. + let obj_class: Option = if obj.is_heap() { + if let HeapObj::Instance(cls, _) = self.heap.get(obj) { Some(*cls) } else { None } + } else { None }; + let check_one = |t: Val, heap: &HeapPool| -> Result { if !t.is_heap() { return Err(VmErr::Type("isinstance() arg 2 must be a type or tuple of types")); @@ -90,6 +107,7 @@ impl<'a> VM<'a> { || (obj_ty == "bool" && name == "int") ) } + HeapObj::Class(..) => Ok(obj_class.is_some_and(|c| heap.is_subclass(c, t))), _ => Err(VmErr::Type("isinstance() arg 2 must be a type or tuple of types")), } }; @@ -99,7 +117,7 @@ impl<'a> VM<'a> { } let result = match self.heap.get(arg2) { - HeapObj::Type(_) | HeapObj::NativeFn(_) => check_one(arg2, &self.heap)?, + HeapObj::Type(_) | HeapObj::NativeFn(_) | HeapObj::Class(..) => check_one(arg2, &self.heap)?, HeapObj::Tuple(items) => { let items: Vec = items.clone(); items.iter().any(|&t| check_one(t, &self.heap).unwrap_or(false)) diff --git a/compiler/src/modules/vm/dispatch.rs b/compiler/src/modules/vm/dispatch.rs index 09e19c8..03d12d8 100644 --- a/compiler/src/modules/vm/dispatch.rs +++ b/compiler/src/modules/vm/dispatch.rs @@ -217,8 +217,9 @@ impl<'a> VM<'a> { let encoded = ((kw_flat.len() as u16 / 2) << 8) | argc; self.exec_call(encoded, chunk, slots) } - handlers::methods::AttrLookup::InstanceMethod { recv, func } => { - // Prepend `self`; kwargs aren't forwarded (preserved behaviour). + handlers::methods::AttrLookup::InstanceMethod { recv, func, class } => { + // Prepend `self`; kwargs aren't forwarded (preserved behaviour). `super()` reads the binding off `pending`. + self.pending.method_binding = Some((class, recv)); self.push(func); self.push(recv); for a in &positional { self.push(*a); } @@ -413,8 +414,18 @@ impl<'a> VM<'a> { } OpCode::MakeClass => { - let ci = op as usize; - let body = &chunk.classes[ci]; + // Operand layout mirrors `class_def_with`: low byte = class chunk index, high byte = base count. + let class_idx = (op & 0xFF) as usize; + let num_bases = (op >> 8) as usize; + // Pop bases first so a misencoded operand fails before we touch the body. + let bases = self.pop_n(num_bases)?; + // Reject non-class bases up-front; otherwise inherited lookups silently miss. + for &b in &bases { + if !b.is_heap() || !matches!(self.heap.get(b), HeapObj::Class(..)) { + return Err(cold_type("base class must be a class object")); + } + } + let body = &chunk.classes[class_idx]; let mut class_slots = self.fill_builtins(&body.names); self.exec(body, &mut class_slots)?; // Every defined slot becomes a class member (methods + class-level constants). @@ -435,7 +446,7 @@ impl<'a> VM<'a> { } let next_op = cache.fused_ref().get(*ip).map(|i| i.operand).unwrap_or(0); let name_str = chunk.names.get(next_op as usize).map(|n| ssa_strip(n)).unwrap_or("?").to_string(); - let cls = self.heap.alloc(HeapObj::Class(name_str, methods))?; + let cls = self.heap.alloc(HeapObj::Class(name_str, bases, methods))?; self.push(cls); } OpCode::StoreAttr => { diff --git a/compiler/src/modules/vm/handlers/dunder.rs b/compiler/src/modules/vm/handlers/dunder.rs index 051bc39..1985984 100644 --- a/compiler/src/modules/vm/handlers/dunder.rs +++ b/compiler/src/modules/vm/handlers/dunder.rs @@ -13,11 +13,12 @@ impl<'a> VM<'a> { if !recv.is_heap() { return Ok(None); } if !matches!(self.heap.get(recv), HeapObj::Instance(..)) { return Ok(None); } - let Some(AttrLookup::InstanceMethod { recv, func }) = self.resolve_attr_silent(recv, name)? else { return Ok(None); }; + let Some(AttrLookup::InstanceMethod { recv, func, class }) = self.resolve_attr_silent(recv, name)? else { return Ok(None); }; // Mirror `__init__` dispatch: depth guard before pushing so a recursive blow-up leaves no half-built frame. if self.depth >= self.max_calls { return Err(cold_depth()); } + self.pending.method_binding = Some((class, recv)); self.push(func); self.push(recv); for &a in args { self.push(a); } diff --git a/compiler/src/modules/vm/handlers/function.rs b/compiler/src/modules/vm/handlers/function.rs index 7f6ef65..4662815 100644 --- a/compiler/src/modules/vm/handlers/function.rs +++ b/compiler/src/modules/vm/handlers/function.rs @@ -230,23 +230,21 @@ impl<'a> VM<'a> { return Ok(true); } - // Calling a class: create an instance and run __init__ if defined. - if let HeapObj::Class(_, methods) = self.heap.get(callee) { + // Calling a class: create an instance and run __init__ if defined (walks bases). + if let HeapObj::Class(..) = self.heap.get(callee) { // The recursive `exec_call` below only encodes positional count — kwargs would silently disappear before reaching `__init__`, so reject them here. if !kw_flat.is_empty() { return Err(cold_type("class constructor takes no keyword arguments")); } - let methods = methods.clone(); let instance = self.heap.alloc(HeapObj::Instance(callee, Rc::new(RefCell::new(DictMap::new()))))?; - if let Some((_, init_fn)) = methods.iter().find(|(n, _)| n == "__init__") { + if let Some((init_fn, defining)) = self.lookup_class_member(callee, "__init__") { // Fail-fast before pushing — the inner check fires only after parse_call_args pops. if self.depth >= self.max_calls { return Err(cold_depth()); } - let init_fn = *init_fn; + self.pending.method_binding = Some((defining, instance)); self.push(init_fn); - let mut args = vec![instance]; - args.extend_from_slice(positional); - for a in &args { self.push(*a); } - let argc = args.len() as u16; + self.push(instance); + for a in positional { self.push(*a); } + let argc = (1 + positional.len()) as u16; self.exec_call(argc, chunk, slots)?; // Discard `__init__` return value. self.pop()?; @@ -256,10 +254,11 @@ impl<'a> VM<'a> { } // Bound user method: prepend `self` to the arg list and re-dispatch. - if let HeapObj::BoundUserMethod(recv, func) = self.heap.get(callee) { + if let HeapObj::BoundUserMethod(recv, func, class) = self.heap.get(callee) { // Same as Class branch: depth check before mutating the stack. if self.depth >= self.max_calls { return Err(cold_depth()); } - let (recv, func) = (*recv, *func); + let (recv, func, class) = (*recv, *func, *class); + self.pending.method_binding = Some((class, recv)); self.push(func); self.push(recv); for a in positional { self.push(*a); } @@ -431,11 +430,18 @@ impl<'a> VM<'a> { // Frame snapshots caller's source/path so render doesn't borrow live chunk pointers. let call_byte_pos = self.pending.call_byte_pos.take().unwrap_or(0); + // Method-call paths set `method_binding` immediately before invoking `exec_call`; plain function calls leave it `None`. + let (current_class, current_self) = match self.pending.method_binding.take() { + Some((c, s)) => (Some(c), Some(s)), + None => (None, None), + }; self.call_stack.push(super::super::types::CallFrame { fi, call_byte_pos, caller_source: chunk.source.clone(), caller_path: chunk.path.clone(), + current_class, + current_self, }); self.observed_impure.push(false); @@ -535,7 +541,7 @@ impl<'a> VM<'a> { BytesFromHex => Some(1), IntFromBytes => Some(2), IntToBytes => Some(3), - Globals | Locals => Some(0), + Globals | Locals | Super => Some(0), Bytes => None, // 0/1/2-arg: bytes() | bytes(n|iter) | bytes(str, "utf-8") Slice => None, // 1/2/3-arg Gather => None, // variadic @@ -618,6 +624,7 @@ impl<'a> VM<'a> { FrozenSet => self.call_frozenset(argc), Globals => self.call_globals(chunk, slots), Locals => self.call_locals(chunk, slots), + Super => self.call_super(), } } } diff --git a/compiler/src/modules/vm/handlers/methods.rs b/compiler/src/modules/vm/handlers/methods.rs index 00fa627..3dd2d43 100644 --- a/compiler/src/modules/vm/handlers/methods.rs +++ b/compiler/src/modules/vm/handlers/methods.rs @@ -12,13 +12,36 @@ pub(crate) enum AttrLookup { ModuleAttr(Val), ClassMember(Val), InstanceField(Val), - InstanceMethod { recv: Val, func: Val }, + // `class` is where `func` was found; the called frame needs it so `super()` knows where to resume. + InstanceMethod { recv: Val, func: Val, class: Val }, BuiltinMethod(BuiltinMethodId), // `e.args` on ExcInstance — caller picks: LoadAttr materialises the tuple, CallMethod errors. ExcArgs(Vec), } impl<'a> VM<'a> { + // Direct-then-DFS member lookup; first hit wins. Cycles are impossible: bases are validated at `MakeClass` time and `HeapObj::Class` is immutable, so the class graph is a static DAG. + // Returns `(value, defining_class)` so callers building `BoundUserMethod` / `InstanceMethod` can record where the method came from for `super()`. + pub(crate) fn lookup_class_member(&self, cls: Val, name: &str) -> Option<(Val, Val)> { + if !cls.is_heap() { return None; } + let HeapObj::Class(_, bases, members) = self.heap.get(cls) else { return None; }; + if let Some((_, v)) = members.iter().find(|(n, _)| n == name) { return Some((*v, cls)); } + for &b in bases { + if let Some(found) = self.lookup_class_member(b, name) { return Some(found); } + } + None + } + + // Same lookup but skipping `cls` itself; powers `super()` which must search strictly above the current class. + pub(crate) fn lookup_class_member_after(&self, cls: Val, name: &str) -> Option<(Val, Val)> { + if !cls.is_heap() { return None; } + let HeapObj::Class(_, bases, _) = self.heap.get(cls) else { return None; }; + for &b in bases { + if let Some(found) = self.lookup_class_member(b, name) { return Some(found); } + } + None + } + // `obj.` resolution shared by `handle_load_attr` and `exec_call_method`. pub(crate) fn resolve_attr(&self, obj: Val, name: &str) -> Result { let bare = crate::modules::parser::ssa_strip(name); @@ -42,13 +65,13 @@ impl<'a> VM<'a> { // Class attr: `MyClass.method` returns the unbound function (no `self` prepended). if obj.is_heap() - && let HeapObj::Class(cls_name, members) = self.heap.get(obj) { - if let Some((_, v)) = members.iter().find(|(n, _)| n == bare) { return Ok(AttrLookup::ClassMember(*v)); } + && let HeapObj::Class(cls_name, _, _) = self.heap.get(obj) { + if let Some((v, _)) = self.lookup_class_member(obj, bare) { return Ok(AttrLookup::ClassMember(v)); } let cls_name = cls_name.clone(); return Err(VmErr::Attribute(s!("type object '", str &cls_name, "' has no attribute '", str bare, "'"))); } - // Instance attribute lookup: check `__dict__` first, then class methods. + // Instance attribute lookup: check `__dict__` first, then the class chain (direct + bases). if obj.is_heap() && let HeapObj::Instance(cls_val, attrs) = self.heap.get(obj) { let cls_val = *cls_val; @@ -56,15 +79,23 @@ impl<'a> VM<'a> { .find(|(k, _)| k.is_heap() && matches!(self.heap.get(*k), HeapObj::Str(s) if s == name)) .map(|(_, v)| *v); if let Some(v) = found { return Ok(AttrLookup::InstanceField(v)); } - if cls_val.is_heap() - && let HeapObj::Class(_, methods) = self.heap.get(cls_val) - && let Some((_, mv)) = methods.iter().find(|(n, _)| n == name) { - return Ok(AttrLookup::InstanceMethod { recv: obj, func: *mv }); - } + if let Some((mv, defining)) = self.lookup_class_member(cls_val, name) { + return Ok(AttrLookup::InstanceMethod { recv: obj, func: mv, class: defining }); + } let ty = self.type_name(obj); return Err(VmErr::Attribute(s!("'", str ty, "' object has no attribute '", str name, "'"))); } + // `super().`: search strictly above the proxy's stored class; methods bind to the proxy's `recv`. + if obj.is_heap() + && let HeapObj::Super(cls_val, recv) = self.heap.get(obj) { + let (cls_val, recv) = (*cls_val, *recv); + if let Some((mv, defining)) = self.lookup_class_member_after(cls_val, name) { + return Ok(AttrLookup::InstanceMethod { recv, func: mv, class: defining }); + } + return Err(VmErr::Attribute(s!("'super' object has no attribute '", str name, "'"))); + } + // Builtin type method. let ty = self.type_name(obj); lookup_method(ty, name) @@ -93,8 +124,8 @@ impl<'a> VM<'a> { self.push(v); Ok(()) } - AttrLookup::InstanceMethod { recv, func } => { - let bound = self.heap.alloc(HeapObj::BoundUserMethod(recv, func))?; + AttrLookup::InstanceMethod { recv, func, class } => { + let bound = self.heap.alloc(HeapObj::BoundUserMethod(recv, func, class))?; self.push(bound); Ok(()) } diff --git a/compiler/src/modules/vm/mod.rs b/compiler/src/modules/vm/mod.rs index 1123bd6..aa7a2c2 100644 --- a/compiler/src/modules/vm/mod.rs +++ b/compiler/src/modules/vm/mod.rs @@ -42,6 +42,8 @@ pub(crate) struct Pending { pub sleep_until_ns: Option, /* Lifted ExcInstance from `raise X(...)` so `except X as e` binds the real instance. */ pub exc_val: Option, + /* `(class, self)` for the next user-function call when it's invoked as a method; populated by method-dispatch paths and consumed by `run_body_with_frame`. */ + pub method_binding: Option<(Val, Val)>, } impl Pending { @@ -52,6 +54,7 @@ impl Pending { call_byte_pos: None, sleep_until_ns: None, exc_val: None, + method_binding: None, } } } @@ -309,6 +312,7 @@ impl<'a> VM<'a> { NativeFnId::BytesFromHex, NativeFnId::IntFromBytes, NativeFnId::IntToBytes, NativeFnId::FrozenSet, NativeFnId::Globals, NativeFnId::Locals, + NativeFnId::Super, ]; for &id in builtin_fns { if let Ok(v) = vm.heap.alloc(HeapObj::NativeFn(id)) { diff --git a/compiler/src/modules/vm/ops.rs b/compiler/src/modules/vm/ops.rs index 4392079..f22df93 100644 --- a/compiler/src/modules/vm/ops.rs +++ b/compiler/src/modules/vm/ops.rs @@ -76,6 +76,7 @@ impl<'a> VM<'a> { HeapObj::NativeFn(_) => true, HeapObj::Class(..) => true, HeapObj::BoundUserMethod(..) => true, + HeapObj::Super(..) => true, HeapObj::Instance(..) => true, HeapObj::Coroutine(..) => true, HeapObj::Module(..) => true, @@ -148,8 +149,9 @@ impl<'a> VM<'a> { HeapObj::Slice(..) => "slice", HeapObj::BoundMethod(..) => "builtin_function_or_method", HeapObj::NativeFn(_) => "builtin_function_or_method", - HeapObj::Class(_name, _) => "type", - HeapObj::BoundUserMethod(_, _) => "", + HeapObj::Class(_name, _, _) => "type", + HeapObj::BoundUserMethod(..) => "", + HeapObj::Super(..) => "super", HeapObj::Instance(..) => "object", HeapObj::Coroutine(..) => "coroutine", HeapObj::Module(..) => "module", @@ -197,12 +199,13 @@ impl<'a> VM<'a> { HeapObj::Dict(d) => { let mut o = s!(cap: 32; "{"); for (i,(k,v)) in d.borrow().iter().enumerate() { if i>0 { o.push_str(", "); } o.push_str(&self.repr(k)); o.push_str(": "); o.push_str(&self.repr(v)); } o.push('}'); o }, HeapObj::BoundMethod(_, id) => s!(""), HeapObj::NativeFn(id) => s!(""), - HeapObj::Class(name, _) => crate::s!("" ), + HeapObj::Class(name, _, _) => crate::s!("" ), HeapObj::Instance(cls, _) => { - if cls.is_heap() && let HeapObj::Class(name, _) = self.heap.get(*cls) { return crate::s!("<", str name, " instance>"); } + if cls.is_heap() && let HeapObj::Class(name, _, _) = self.heap.get(*cls) { return crate::s!("<", str name, " instance>"); } "".into() } - HeapObj::BoundUserMethod(_, _) => "".into(), + HeapObj::BoundUserMethod(..) => "".into(), + HeapObj::Super(..) => "".into(), HeapObj::Coroutine(..) => "".into(), HeapObj::Module(name, _) => s!(""), HeapObj::Extern(f) => s!(""), diff --git a/compiler/src/modules/vm/types/coro.rs b/compiler/src/modules/vm/types/coro.rs index 5fe0eb0..12f1c7a 100644 --- a/compiler/src/modules/vm/types/coro.rs +++ b/compiler/src/modules/vm/types/coro.rs @@ -34,6 +34,9 @@ pub struct CallFrame { pub call_byte_pos: u32, pub caller_source: alloc::sync::Arc, pub caller_path: alloc::sync::Arc, + // Class where the running method was found and its implicit `self`; consumed by `super()` to walk one level up. `None` for plain function calls. + pub current_class: Option, + pub current_self: Option, } /* ForIter state, consumed one item per `next_item`. */ diff --git a/compiler/src/modules/vm/types/mod.rs b/compiler/src/modules/vm/types/mod.rs index 2f8fb01..7ef43f9 100644 --- a/compiler/src/modules/vm/types/mod.rs +++ b/compiler/src/modules/vm/types/mod.rs @@ -151,9 +151,13 @@ pub enum HeapObj { ExcInstance(String, Vec), BoundMethod(Val, BuiltinMethodId), NativeFn(NativeFnId), - Class(String, Vec<(String, Val)>), + // `bases` lists direct parents in declared order; `resolve_attr` DFS-walks them on miss. + Class(String, Vec, Vec<(String, Val)>), Instance(Val, Rc>), - BoundUserMethod(Val, Val), + // `(recv, func, class)`; `class` is where `func` was found so the called frame knows what `super()` should skip past. + BoundUserMethod(Val, Val, Val), + // `super()` proxy: attribute access walks the bases of `cls` (skipping `cls` itself); methods bind to `recv`. + Super(Val, Val), Coroutine(usize, Vec, Vec, usize, Vec), /* Produced by `import m`; attr access via LoadAttr, calls fuse through CallMethod. */ Module(String, Vec<(String, Val)>), @@ -175,6 +179,7 @@ pub enum NativeFnId { Gather, WithTimeout, Cancel, BytesFromHex, IntFromBytes, IntToBytes, FrozenSet, Globals, Locals, + Super, } impl NativeFnId { @@ -192,6 +197,7 @@ impl NativeFnId { "gather", "with_timeout", "cancel", "bytes_fromhex", "int_from_bytes", "int_to_bytes", "frozenset", "globals", "locals", + "super", ]; NAMES[self as usize] } @@ -280,8 +286,12 @@ pub(crate) fn for_each_val(obj: &HeapObj, mut f: impl FnMut(Val)) { HeapObj::Set(rc) => for &v in rc.borrow().iter() { f(v); }, HeapObj::FrozenSet(rc) => for &v in rc.iter() { f(v); }, HeapObj::BoundMethod(recv, _) => f(*recv), - HeapObj::Class(_, methods) => for (_, v) in methods { f(*v); }, - HeapObj::BoundUserMethod(r, fu) => { f(*r); f(*fu); } + HeapObj::Class(_, bases, methods) => { + for &v in bases { f(v); } + for (_, v) in methods { f(*v); } + } + HeapObj::BoundUserMethod(r, fu, cls) => { f(*r); f(*fu); f(*cls); } + HeapObj::Super(cls, recv) => { f(*cls); f(*recv); } HeapObj::Instance(cls, attrs) => { f(*cls); for (k, v) in attrs.borrow().iter() { f(k); f(v); } @@ -522,6 +532,7 @@ impl HeapPool { Some(HeapObj::ExcInstance(..)) => 24, Some(HeapObj::Ellipsis) => 26, Some(HeapObj::NotImplemented) => 27, + Some(HeapObj::Super(..)) => 28, None => 0, } } else { 0 } @@ -533,6 +544,14 @@ impl HeapPool { v.is_heap() && matches!(self.slots[v.as_heap() as usize].obj.as_ref(), Some(HeapObj::NotImplemented)) } + + /* `child` is `ancestor` or has it in its transitive bases. Identity on heap idx — classes are interned per-MakeClass and never mutated, so direct equality suffices. */ + pub fn is_subclass(&self, child: Val, ancestor: Val) -> bool { + if child.0 == ancestor.0 { return true; } + if !child.is_heap() { return false; } + let HeapObj::Class(_, bases, _) = self.get(child) else { return false; }; + bases.iter().any(|&b| self.is_subclass(b, ancestor)) + } } /* Single-write SSA store after register coalescing. */ diff --git a/compiler/tests/cases/parser.json b/compiler/tests/cases/parser.json index 3ba45b4..0484d73 100644 --- a/compiler/tests/cases/parser.json +++ b/compiler/tests/cases/parser.json @@ -518,10 +518,10 @@ "classes": 1 }, { - "src": "class Dog(Animal):\n pass", - "constants": [], - "names": ["Animal_0", "Dog_1"], - "instructions": [["LoadName",0], ["MakeClass",0], ["StoreName",1], ["ReturnValue",0]], + "src": "class Dog(Animal):\n pass", + "constants": [], + "names": ["Animal_0", "Dog_1"], + "instructions": [["LoadName",0], ["MakeClass",256], ["StoreName",1], ["ReturnValue",0]], "annotations": {}, "classes": 1 }, @@ -980,19 +980,19 @@ "annotations": {} }, { - "src": "class C(A, B):\n pass", - "constants": [], - "names": ["A_0", "B_0", "C_1"], - "instructions": [["LoadName",0], ["LoadName",1], ["MakeClass",0], ["StoreName",2], ["ReturnValue",0]], - "annotations": {}, + "src": "class C(A, B):\n pass", + "constants": [], + "names": ["A_0", "B_0", "C_1"], + "instructions": [["LoadName",0], ["LoadName",1], ["MakeClass",512], ["StoreName",2], ["ReturnValue",0]], + "annotations": {}, "classes": 1 }, { - "src": "class C(A, B, D):\n pass", - "constants": [], - "names": ["A_0", "B_0", "D_0", "C_1"], - "instructions": [["LoadName",0], ["LoadName",1], ["LoadName",2], ["MakeClass",0], ["StoreName",3], ["ReturnValue",0]], - "annotations": {}, + "src": "class C(A, B, D):\n pass", + "constants": [], + "names": ["A_0", "B_0", "D_0", "C_1"], + "instructions": [["LoadName",0], ["LoadName",1], ["LoadName",2], ["MakeClass",768], ["StoreName",3], ["ReturnValue",0]], + "annotations": {}, "classes": 1 }, { diff --git a/compiler/tests/cases/vm.json b/compiler/tests/cases/vm.json index cbef6bd..8180ffb 100644 --- a/compiler/tests/cases/vm.json +++ b/compiler/tests/cases/vm.json @@ -1516,7 +1516,7 @@ {"src": "def deco(c):\n print('decorated')\n return c\n@deco\nclass A: pass", "output": ["decorated"]}, {"src": "class A:\n def __add__(self, o):\n return 'magic'\nprint(A() + A())", "output": [], "error": "unsupported operand"}, {"src": "class CM:\n def __enter__(self):\n print('enter')\n return self\n def __exit__(self, *a):\n print('exit')\nwith CM() as c:\n print('body')", "output": ["body"]}, - {"src": "class A:\n def m(self):\n return 'A'\nclass B(A):\n def m(self):\n return super().m() + 'B'\nprint(B().m())", "output": [], "error": "name 'super' is not defined"}, + {"src": "class A:\n def m(self):\n return 'A'\nclass B(A):\n def m(self):\n return super().m() + 'B'\nprint(B().m())", "output": ["AB"]}, {"src": "class A: pass", "output": []}, {"src": "print(140737488355328)", "output": ["140737488355328"]}, {"src": "print(-140737488355329)", "output": ["-140737488355329"]}, @@ -1556,5 +1556,20 @@ {"src": "print(NotImplemented is None)", "output": ["False"]}, {"src": "print(NotImplemented == None)", "output": ["False"]}, {"src": "print(bool(NotImplemented))", "output": ["True"]}, - {"src": "def f():\n return NotImplemented\nprint(f() is NotImplemented)", "output": ["True"]} + {"src": "def f():\n return NotImplemented\nprint(f() is NotImplemented)", "output": ["True"]}, + {"src": "class A:\n def greet(self):\n return 'hi'\nclass B(A):\n pass\nprint(B().greet())", "output": ["hi"]}, + {"src": "class A:\n def greet(self):\n return 'A'\nclass B(A):\n def greet(self):\n return 'B'\nprint(B().greet())", "output": ["B"]}, + {"src": "class A:\n def x(self):\n return 'A.x'\nclass B(A):\n pass\nclass C(B):\n pass\nprint(C().x())", "output": ["A.x"]}, + {"src": "class A:\n def __init__(self, v):\n self.v = v\nclass B(A):\n pass\nprint(B(7).v)", "output": ["7"]}, + {"src": "class A:\n label = 'A'\nclass B(A):\n pass\nprint(B.label)", "output": ["A"]}, + {"src": "class A:\n pass\nclass B(A):\n pass\nb = B()\nprint(b.missing)", "output": [], "error": "has no attribute 'missing'"}, + {"src": "class A:\n def __init__(self, x):\n self.x = x\nclass B(A):\n def __init__(self, x, y):\n super().__init__(x)\n self.y = y\nb = B(3, 4)\nprint(b.x)\nprint(b.y)", "output": ["3", "4"]}, + {"src": "class A:\n def label(self):\n return 'A'\nclass B(A):\n def label(self):\n return super().label() + 'B'\nclass C(B):\n def label(self):\n return super().label() + 'C'\nprint(C().label())", "output": ["ABC"]}, + {"src": "super()", "output": [], "error": "super() must be called inside a method"}, + {"src": "def f():\n return super()\nf()", "output": [], "error": "super() must be called inside a method"}, + {"src": "class A:\n pass\nclass B(A):\n pass\nprint(isinstance(B(), A))", "output": ["True"]}, + {"src": "class A:\n pass\nclass B(A):\n pass\nprint(isinstance(A(), B))", "output": ["False"]}, + {"src": "class A:\n pass\nclass B:\n pass\nprint(isinstance(A(), (B, A)))", "output": ["True"]}, + {"src": "class A:\n pass\nclass B(A):\n pass\nclass C(B):\n pass\nprint(isinstance(C(), A))", "output": ["True"]}, + {"src": "class A:\n pass\nprint(isinstance(42, A))", "output": ["False"]} ] From 93f9780c894ebce587489a01e59b21a5fd851418 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Mon, 11 May 2026 18:49:59 -0600 Subject: [PATCH 3/7] feat(vm): Wire dunder dispatch protocol across operators and protocols. --- compiler/src/modules/parser/control.rs | 35 ++- compiler/src/modules/parser/types.rs | 2 +- .../src/modules/vm/builtins/conversion.rs | 12 +- compiler/src/modules/vm/builtins/identity.rs | 38 +++- compiler/src/modules/vm/builtins/index.rs | 30 ++- compiler/src/modules/vm/builtins/io.rs | 12 +- compiler/src/modules/vm/builtins/sequence.rs | 23 +- compiler/src/modules/vm/dispatch.rs | 132 ++++++++--- compiler/src/modules/vm/gc.rs | 1 + compiler/src/modules/vm/handlers/arith.rs | 39 +++- compiler/src/modules/vm/handlers/data.rs | 23 +- compiler/src/modules/vm/handlers/dunder.rs | 205 +++++++++++++++++- compiler/src/modules/vm/handlers/function.rs | 51 +++-- compiler/src/modules/vm/handlers/methods.rs | 25 ++- compiler/src/modules/vm/helpers.rs | 7 +- compiler/src/modules/vm/types/coro.rs | 5 +- compiler/src/modules/vm/types/err.rs | 16 ++ compiler/src/modules/vm/types/mod.rs | 1 + compiler/tests/cases/parser.json | 29 ++- compiler/tests/cases/vm.json | 51 ++++- 20 files changed, 611 insertions(+), 126 deletions(-) diff --git a/compiler/src/modules/parser/control.rs b/compiler/src/modules/parser/control.rs index 7e0a671..e4cd4af 100644 --- a/compiler/src/modules/parser/control.rs +++ b/compiler/src/modules/parser/control.rs @@ -522,16 +522,18 @@ impl<'src, I: Iterator> Parser<'src, I> { self.commit_block(); } - /* with / async with: SetupWith per CM, ExitWith 1:1 on unwind. */ + /* with / async with: each CM gets its own implicit `SetupExcept` so the per-CM cleanup pad can run `__exit__(exc_type, exc, None)` and honour the suppression contract. Normal exit pops the except frame before running `__exit__(None, None, None)`. */ pub(super) fn with_stmt_inner(&mut self, is_async: bool) { self.advance(); let operand = is_async as u16; - let mut cm_count: u16 = 0; + let mut setup_except_idxs: Vec = Vec::new(); loop { self.expr(); self.chunk.emit(OpCode::SetupWith, operand); - cm_count += 1; + // Implicit `SetupExcept` per CM; handler IP patched once the cleanup pad is emitted. + setup_except_idxs.push(self.chunk.instructions.len()); + self.chunk.emit(OpCode::SetupExcept, 0); if self.eat_if(TokenType::As) { let name = self.advance_text(); self.store_name(name); @@ -540,10 +542,33 @@ impl<'src, I: Iterator> Parser<'src, I> { } self.eat(TokenType::Colon); self.compile_block(); - // Paired ExitWith for each SetupWith. - for _ in 0..cm_count { + + // Normal exit: innermost first. PopExcept BEFORE ExitWith so a raising `__exit__(None,...)` propagates to the outer CM's cleanup, matching CPython. + let n = setup_except_idxs.len(); + let normal_exit_start = self.chunk.instructions.len(); + for _ in 0..n { + self.chunk.emit(OpCode::PopExcept, 0); self.chunk.emit(OpCode::ExitWith, operand); } + let skip_cleanup_jump = self.chunk.instructions.len(); + self.chunk.emit(OpCode::Jump, 0); + + // Cleanup pads: per-CM in source order (outermost first). Each runs `WithCleanup` then jumps into the normal-exit sequence at the point right after its own slot, so outer CMs get their `__exit__(None, None, None)` on a suppression path. + let mut cleanup_pad_positions: Vec = Vec::with_capacity(n); + for i in 0..n { + cleanup_pad_positions.push(self.chunk.instructions.len()); + self.chunk.emit(OpCode::WithCleanup, 0); + // `normal_exit_start + 2*(n-i)` lands past the PopExcept+ExitWith pairs for CMs i..n-1 (innermost). i == 0 lands at the `Jump @end` which falls through to `end`. + let target = (normal_exit_start + 2 * (n - i)) as u16; + self.chunk.emit(OpCode::Jump, target); + } + let end_label = self.chunk.instructions.len(); + + // Patch SetupExcept handler IPs and the skip-cleanup jump. + for (i, &se_idx) in setup_except_idxs.iter().enumerate() { + self.chunk.instructions[se_idx].operand = cleanup_pad_positions[i] as u16; + } + self.chunk.instructions[skip_cleanup_jump].operand = end_label as u16; } /* Delegates to imports.rs; compile-time only — no import opcodes reach the VM. */ diff --git a/compiler/src/modules/parser/types.rs b/compiler/src/modules/parser/types.rs index f015509..d055644 100644 --- a/compiler/src/modules/parser/types.rs +++ b/compiler/src/modules/parser/types.rs @@ -16,7 +16,7 @@ pub enum OpCode { CallOrd, BuildDict, BuildList, NotEq, Lt, Gt, LtEq, GtEq, And, Or, Not, JumpIfFalse, Jump, GetIter, ForIter, GetItem, Mod, Pow, FloorDiv, LoadTrue, LoadFalse, LoadNone, LoadAttr, StoreAttr, BuildSlice, MakeClass, SetupExcept, PopExcept, Raise, BitAnd, BitOr, BitXor, - BitNot, Shl, Shr, In, NotIn, Is, IsNot, UnpackSequence, BuildTuple, SetupWith, ExitWith, Yield, + BitNot, Shl, Shr, In, NotIn, Is, IsNot, UnpackSequence, BuildTuple, SetupWith, ExitWith, WithCleanup, Yield, Del, Assert, Global, Nonlocal, UnpackArgs, ListAppend, SetAdd, MapAdd, BuildSet, RaiseFrom, UnpackEx, LoadEllipsis, Await, MakeCoroutine, StoreItem, Dup2, JumpIfFalseOrPop, JumpIfTrueOrPop, Dup, CallMethod, CallMethodArgs, CallAll, CallAny, CallBin, diff --git a/compiler/src/modules/vm/builtins/conversion.rs b/compiler/src/modules/vm/builtins/conversion.rs index 95f3b0d..5b21bc0 100644 --- a/compiler/src/modules/vm/builtins/conversion.rs +++ b/compiler/src/modules/vm/builtins/conversion.rs @@ -5,13 +5,17 @@ use super::super::types::*; impl<'a> VM<'a> { - pub fn call_str(&mut self) -> Result<(), VmErr> { + pub fn call_str(&mut self, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let o = self.pop()?; - self.alloc_and_push_str(self.display(o)) + let s = self.display_op(o, chunk, slots)?; + self.alloc_and_push_str(s) } - pub fn call_bool(&mut self) -> Result<(), VmErr> { - let o = self.pop()?; self.push(Val::bool(self.truthy(o))); Ok(()) + pub fn call_bool(&mut self, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { + let o = self.pop()?; + let t = self.truthy_op(o, chunk, slots)?; + self.push(Val::bool(t)); + Ok(()) } pub fn call_type(&mut self) -> Result<(), VmErr> { diff --git a/compiler/src/modules/vm/builtins/identity.rs b/compiler/src/modules/vm/builtins/identity.rs index a49da55..18dca6c 100644 --- a/compiler/src/modules/vm/builtins/identity.rs +++ b/compiler/src/modules/vm/builtins/identity.rs @@ -18,17 +18,24 @@ impl<'a> VM<'a> { Ok(()) } - pub fn call_repr(&mut self) -> Result<(), VmErr> { + pub fn call_repr(&mut self, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let o = self.pop()?; - self.alloc_and_push_str(self.repr(o)) + let s = self.repr_op(o, chunk, slots)?; + self.alloc_and_push_str(s) } pub fn call_callable(&mut self) -> Result<(), VmErr> { let o = self.pop()?; let result = if o.is_heap() { - matches!(self.heap.get(o), + match self.heap.get(o) { HeapObj::Func(..) | HeapObj::BoundMethod(..) - | HeapObj::Type(_) | HeapObj::NativeFn(_)) + | HeapObj::Type(_) | HeapObj::NativeFn(_) + | HeapObj::Class(..) | HeapObj::BoundUserMethod(..) + | HeapObj::Extern(_) => true, + // F2.5: instance is callable iff its class chain defines `__call__`. + HeapObj::Instance(cls, _) => self.lookup_class_member(*cls, "__call__").is_some(), + _ => false, + } } else { false }; self.push(Val::bool(result)); Ok(()) @@ -42,9 +49,30 @@ impl<'a> VM<'a> { Ok(()) } - pub fn call_hash(&mut self) -> Result<(), VmErr> { + pub fn call_hash(&mut self, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { use core::hash::{Hash, Hasher}; let o = self.pop()?; + + // F2.7: instance dispatch — user `__hash__` wins; `__eq__` without `__hash__` makes the instance unhashable. + if o.is_heap() && let HeapObj::Instance(cls, _) = self.heap.get(o) { + let cls = *cls; + let has_hash = self.lookup_class_member(cls, "__hash__").is_some(); + let has_eq = self.lookup_class_member(cls, "__eq__").is_some(); + if has_hash { + let r = self.try_call_dunder(o, "__hash__", &[], chunk, slots)? + .ok_or_else(|| cold_type("__hash__ returned NotImplemented"))?; + if !r.is_int() { + return Err(cold_type("__hash__ must return int")); + } + self.push(Val::int(r.as_int() & Val::INT_MAX)); + return Ok(()); + } + if has_eq { + return Err(cold_type("unhashable type: instance defines __eq__ without __hash__")); + } + // Default fallback: pointer identity, mirroring Python's `object.__hash__`. + } + let mut h = crate::util::fx::FxHasher::default(); if o.is_int() { o.as_int().hash(&mut h); } else if o.is_float() { o.as_float().to_bits().hash(&mut h); } diff --git a/compiler/src/modules/vm/builtins/index.rs b/compiler/src/modules/vm/builtins/index.rs index cf79fed..af06dae 100644 --- a/compiler/src/modules/vm/builtins/index.rs +++ b/compiler/src/modules/vm/builtins/index.rs @@ -23,10 +23,16 @@ impl SliceSource { impl<'a> VM<'a> { - pub fn get_item(&mut self) -> Result { + pub fn get_item(&mut self, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result { let idx = self.pop()?; let obj = self.pop()?; + // F2.4: instance `__getitem__` runs before built-in indexing; slices pass through as a single Slice arg. + if let Some(r) = self.try_call_dunder(obj, "__getitem__", &[idx], chunk, slots)? { + self.push(r); + return Ok(true); + } + if idx.is_heap() && let HeapObj::Slice(start, stop, step) = self.heap.get(idx).clone() { let v = self.slice_val(obj, start, stop, step)?; @@ -134,24 +140,36 @@ impl<'a> VM<'a> { } } - /* Reject mutable types (list/dict/set) used as dict/set keys. Called wherever a Val crosses into a hash-keyed container. */ + /* Reject mutable types (list/dict/set) used as dict/set keys, plus instances that override `__eq__` without `__hash__`. */ pub(in crate::modules::vm) fn require_hashable(&self, v: Val) -> Result<(), VmErr> { if v.is_heap() { match self.heap.get(v) { HeapObj::List(_) => return Err(cold_type("unhashable type: 'list'")), HeapObj::Dict(_) => return Err(cold_type("unhashable type: 'dict'")), HeapObj::Set(_) => return Err(cold_type("unhashable type: 'set'")), + HeapObj::Instance(cls, _) => { + // Same eq-hash invariant as `call_hash`; defining one without the other voids hashability. + let cls = *cls; + if self.lookup_class_member(cls, "__eq__").is_some() + && self.lookup_class_member(cls, "__hash__").is_none() { + return Err(cold_type("unhashable type: instance defines __eq__ without __hash__")); + } + } _ => {} } } Ok(()) } - pub fn store_item(&mut self) -> Result<(), VmErr> { + pub fn store_item(&mut self, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let value = self.pop()?; let idx_val = self.pop()?; let cont = self.pop()?; if !cont.is_heap() { return Err(cold_type("object does not support item assignment")); } + // F2.4: instance `__setitem__(idx, value)` short-circuits the built-in dispatch. + if self.try_call_dunder(cont, "__setitem__", &[idx_val, value], chunk, slots)?.is_some() { + return Ok(()); + } // Slice assignment: `xs[a:b] = iterable` (step must be 1 for resize). Resolves the target range, materialises RHS, and splices in place. if idx_val.is_heap() && let HeapObj::Slice(start, stop, step) = self.heap.get(idx_val).clone() @@ -179,10 +197,14 @@ impl<'a> VM<'a> { Ok(()) } - pub fn del_item(&mut self) -> Result<(), VmErr> { + pub fn del_item(&mut self, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let idx_val = self.pop()?; let cont = self.pop()?; if !cont.is_heap() { return Err(cold_type("object does not support item deletion")); } + // F2.4: instance `__delitem__(idx)` short-circuits the built-in dispatch. + if self.try_call_dunder(cont, "__delitem__", &[idx_val], chunk, slots)?.is_some() { + return Ok(()); + } // Slice deletion: `del xs[a:b]` — same step=1 restriction as `store_slice`. Reuses `store_slice` with an empty replacement vec. if idx_val.is_heap() && let HeapObj::Slice(start, stop, step) = self.heap.get(idx_val).clone() diff --git a/compiler/src/modules/vm/builtins/io.rs b/compiler/src/modules/vm/builtins/io.rs index 9aff471..8ace9a6 100644 --- a/compiler/src/modules/vm/builtins/io.rs +++ b/compiler/src/modules/vm/builtins/io.rs @@ -6,12 +6,13 @@ use super::super::types::*; impl<'a> VM<'a> { /* Pops N args, joins with single spaces. Calls `print_hook` if set (streaming), otherwise buffers into `output`. */ - pub fn call_print(&mut self, op: u16) -> Result<(), VmErr> { + pub fn call_print(&mut self, op: u16, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let args = self.pop_n(op as usize)?; let mut out = String::new(); for (i, v) in args.iter().enumerate() { if i > 0 { out.push(' '); } - out.push_str(&self.display(*v)); + // F2.8: each arg goes through `display_op` so user `__str__` / `__repr__` are honoured. + out.push_str(&self.display_op(*v, chunk, slots)?); } match self.print_hook { Some(hook) => hook(&out), @@ -42,7 +43,7 @@ impl<'a> VM<'a> { } // `format(value [, spec])`. - pub fn call_format(&mut self, op: u16) -> Result<(), VmErr> { + pub fn call_format(&mut self, op: u16, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { if op != 1 && op != 2 { return Err(cold_type("format() takes 1 or 2 arguments")); } @@ -54,10 +55,9 @@ impl<'a> VM<'a> { HeapObj::Str(s) => s.clone(), _ => return Err(cold_type("format() spec must be a string")), }; - super::super::handlers::format::format_value(val, &spec, &self.heap) - .map_err(cold_value)? + self.format_op(val, &spec, chunk, slots)? } - None => self.display(val), + None => self.display_op(val, chunk, slots)?, }; self.alloc_and_push_str(result) } diff --git a/compiler/src/modules/vm/builtins/sequence.rs b/compiler/src/modules/vm/builtins/sequence.rs index db13ad3..b304e6b 100644 --- a/compiler/src/modules/vm/builtins/sequence.rs +++ b/compiler/src/modules/vm/builtins/sequence.rs @@ -49,8 +49,18 @@ impl IterCursor { impl<'a> VM<'a> { - pub fn call_len(&mut self) -> Result<(), VmErr> { + pub fn call_len(&mut self, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let o = self.pop()?; + // F2.3: instance `__len__` takes precedence over built-in length rules. + if let Some(r) = self.try_call_dunder(o, "__len__", &[], chunk, slots)? { + let n = if r.is_int() { r.as_int() as i128 } + else if let Some(i) = crate::modules::vm::types::as_i128(r, &self.heap) { i } + else { return Err(cold_type("__len__ must return int")); }; + if n < 0 { return Err(cold_value("__len__() should return >= 0")); } + let v = self.int_to_val(Some(n))?; + self.push(v); + return Ok(()); + } let n: i64 = if o.is_heap() { match self.heap.get(o) { HeapObj::Str(s) => s.chars().count() as i64, HeapObj::Bytes(b) => b.len() as i64, @@ -332,8 +342,12 @@ impl<'a> VM<'a> { } // Materialise an iterable to a list — strings -> chars, ranges eager, coroutines drained. - pub fn call_list(&mut self) -> Result<(), VmErr> { + pub fn call_list(&mut self, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let o = self.pop()?; + // F2.6: user-defined iterable wins over the built-in dispatch. + if let Some(items) = self.iter_to_vec_op(o, chunk, slots)? { + return self.alloc_and_push_list(items); + } if o.is_heap() { match self.heap.get(o) { HeapObj::Str(s) => { @@ -358,8 +372,11 @@ impl<'a> VM<'a> { self.alloc_and_push_list(items) } - pub fn call_tuple(&mut self) -> Result<(), VmErr> { + pub fn call_tuple(&mut self, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let o = self.pop()?; + if let Some(items) = self.iter_to_vec_op(o, chunk, slots)? { + return self.alloc_and_push_tuple(items); + } let items = self.extract_iter(o, true)?; self.alloc_and_push_tuple(items) } diff --git a/compiler/src/modules/vm/dispatch.rs b/compiler/src/modules/vm/dispatch.rs index 03d12d8..a238705 100644 --- a/compiler/src/modules/vm/dispatch.rs +++ b/compiler/src/modules/vm/dispatch.rs @@ -148,21 +148,7 @@ impl<'a> VM<'a> { self.error_byte_pos = None; // Drop partial traceback so a later error doesn't inherit stale frames. self.call_stack.clear(); - // Class-name lookup key; `Raised` carries the user-supplied name. - let msg: String = match &e { - VmErr::ZeroDiv => "ZeroDivisionError".into(), - VmErr::Overflow => "OverflowError".into(), - VmErr::Type(_) => "TypeError".into(), - VmErr::TypeMsg(_) => "TypeError".into(), - VmErr::Value(_) => "ValueError".into(), - VmErr::Attribute(_)=> "AttributeError".into(), - VmErr::Name(_) => "NameError".into(), - VmErr::CallDepth => "RecursionError".into(), - VmErr::Heap => "MemoryError".into(), - VmErr::Budget => "RuntimeError".into(), - VmErr::Runtime(_) => "RuntimeError".into(), - VmErr::Raised(s) => s.clone(), - }; + let msg = e.class_name(); // Prefer the user-raised instance; synthesize one for native errors (CPython parity). let exc = if let Some(v) = self.pending.exc_val.take() { v @@ -206,7 +192,23 @@ impl<'a> VM<'a> { let obj = self.pop()?; let name = chunk.names.get(attr_idx as usize).ok_or(VmErr::Runtime("CallMethod: bad name index"))?.clone(); - match self.resolve_attr(obj, &name)? { + let lookup = match self.resolve_attr(obj, &name) { + Ok(l) => l, + Err(VmErr::Attribute(msg)) => { + // F2.10: if `__getattr__` resolves the name to a callable, invoke it with the positional args. + if let Some(v) = self.try_getattr_fallback(obj, &name, chunk, slots)? { + self.push(v); + for a in &positional { self.push(*a); } + for a in &kw_flat { self.push(*a); } + let argc = positional.len() as u16; + let encoded = ((kw_flat.len() as u16 / 2) << 8) | argc; + return self.exec_call(encoded, chunk, slots); + } + return Err(VmErr::Attribute(msg)); + } + Err(other) => return Err(other), + }; + match lookup { handlers::methods::AttrLookup::ModuleAttr(callee) | handlers::methods::AttrLookup::ClassMember(callee) => { // Direct call on the resolved value, no `self` prepended. @@ -253,15 +255,15 @@ impl<'a> VM<'a> { *ip += 1; match ins.opcode { - // Short-circuit jumps. + // Short-circuit jumps; instance `__bool__` / `__len__` may run via `truthy_op`. OpCode::JumpIfFalseOrPop => { let v = *self.stack.last().ok_or(cold_runtime("stack underflow"))?; - if !self.truthy(v) { *ip = op as usize; } + if !self.truthy_op(v, chunk, slots)? { *ip = op as usize; } else { self.pop()?; } } OpCode::JumpIfTrueOrPop => { let v = *self.stack.last().ok_or(cold_runtime("stack underflow"))?; - if self.truthy(v) { *ip = op as usize; } + if self.truthy_op(v, chunk, slots)? { *ip = op as usize; } else { self.pop()?; } } @@ -311,19 +313,19 @@ impl<'a> VM<'a> { if matches!(ins.opcode, OpCode::Eq | OpCode::Lt | OpCode::NotEq | OpCode::Gt | OpCode::LtEq | OpCode::GtEq) { - self.handle_compare(ins.opcode, rip, cache)?; + self.handle_compare(ins.opcode, rip, cache, chunk, slots)?; } else { - self.handle_arith(ins.opcode, rip, cache)?; + self.handle_arith(ins.opcode, rip, cache, chunk, slots)?; } } OpCode::Div | OpCode::Pow | OpCode::Minus => { - self.handle_arith(ins.opcode, rip, cache)?; + self.handle_arith(ins.opcode, rip, cache, chunk, slots)?; } OpCode::Jump => { *ip = self.checked_jump(op as usize, n)?; } OpCode::JumpIfFalse => { let v = self.pop()?; - if !self.truthy(v) { *ip = self.checked_jump(op as usize, n)?; } + if !self.truthy_op(v, chunk, slots)? { *ip = self.checked_jump(op as usize, n)?; } } OpCode::ForIter => { if !self.sandbox_off { @@ -345,6 +347,25 @@ impl<'a> VM<'a> { } return Ok(None); } + // F2.6: user-defined iterator calls `__next__`; `StopIteration` ends the loop without propagating, other exceptions surface. + if let Some(IterFrame::UserDefined(iter_val)) = self.iter_stack.last() { + let iter = *iter_val; + match self.try_call_dunder(iter, "__next__", &[], chunk, slots) { + Ok(Some(item)) => { self.push(item); } + Ok(None) => { + self.iter_stack.pop(); + if op as usize > n { return Err(cold_runtime("jump target out of bounds")); } + *ip = op as usize; + } + Err(VmErr::Raised(m)) if m == "StopIteration" => { + self.iter_stack.pop(); + if op as usize > n { return Err(cold_runtime("jump target out of bounds")); } + *ip = op as usize; + } + Err(e) => return Err(e), + } + return Ok(None); + } match self.iter_stack.last_mut().and_then(|f| f.next_item()) { Some(item) => self.push(item), None => { @@ -361,7 +382,7 @@ impl<'a> VM<'a> { } // Warm opcodes. - OpCode::GetItem => { self.get_item()?; } + OpCode::GetItem => { self.get_item(chunk, slots)?; } OpCode::Call | OpCode::CallPrint | OpCode::CallLen | OpCode::CallAbs | OpCode::CallStr | OpCode::CallInt | OpCode::CallFloat | OpCode::CallBool @@ -381,19 +402,19 @@ impl<'a> VM<'a> { OpCode::GetIter => { let obj = self.pop()?; - let frame = self.make_iter_frame(obj)?; + let frame = self.make_iter_frame(obj, chunk, slots)?; self.iter_stack.push(frame); } OpCode::LoadTrue => self.push(Val::bool(true)), OpCode::LoadFalse => self.push(Val::bool(false)), OpCode::LoadNone => self.push(Val::none()), - OpCode::Not => self.handle_logic(OpCode::Not)?, + OpCode::Not => self.handle_logic(OpCode::Not, chunk, slots)?, OpCode::Phi => { Self::exec_phi(op, rip, &chunk.phi_map, slots, &chunk.phi_sources); } - OpCode::LoadAttr => { self.handle_load_attr(op, chunk)?; } + OpCode::LoadAttr => { self.handle_load_attr(op, chunk, slots)?; } // Fused method call. OpCode::CallMethod => { @@ -499,23 +520,23 @@ impl<'a> VM<'a> { self.push(m); } - other => self.dispatch_generic(other, op, slots)?, + other => self.dispatch_generic(other, op, chunk, slots)?, } Ok(None) } - fn dispatch_generic(&mut self, opcode: OpCode, operand: u16, slots: &mut [Val]) -> Result<(), VmErr> { + fn dispatch_generic(&mut self, opcode: OpCode, operand: u16, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { match opcode { OpCode::BitAnd | OpCode::BitOr | OpCode::BitXor | OpCode::BitNot | OpCode::Shl | OpCode::Shr => self.handle_bitwise(opcode)?, - OpCode::In | OpCode::NotIn | OpCode::Is | OpCode::IsNot => self.handle_identity(opcode)?, + OpCode::In | OpCode::NotIn | OpCode::Is | OpCode::IsNot => self.handle_identity(opcode, chunk, slots)?, OpCode::BuildList | OpCode::BuildTuple | OpCode::BuildDict | OpCode::BuildString | OpCode::BuildSet | OpCode::BuildSlice => self.handle_build(opcode, operand)?, - OpCode::StoreItem => { self.mark_impure(); self.store_item()?; } - OpCode::DelItem => { self.mark_impure(); self.del_item()?; } - OpCode::UnpackSequence | OpCode::UnpackEx | OpCode::FormatValue => self.handle_container(opcode, operand)?, + OpCode::StoreItem => { self.mark_impure(); self.store_item(chunk, slots)?; } + OpCode::DelItem => { self.mark_impure(); self.del_item(chunk, slots)?; } + OpCode::UnpackSequence | OpCode::UnpackEx | OpCode::FormatValue => self.handle_container(opcode, operand, chunk, slots)?, OpCode::ListAppend | OpCode::SetAdd | OpCode::MapAdd => self.handle_comprehension(opcode)?, @@ -534,7 +555,7 @@ impl<'a> VM<'a> { } OpCode::Assert | OpCode::Del | OpCode::Global | OpCode::Nonlocal | OpCode::Raise | OpCode::RaiseFrom | OpCode::Await => { - self.handle_side(opcode, operand, slots)?; + self.handle_side(opcode, operand, chunk, slots)?; } OpCode::SetupExcept => { self.exception_stack.push(ExceptionFrame { @@ -547,8 +568,10 @@ impl<'a> VM<'a> { OpCode::SetupWith => { let _ = operand; let cm = self.pop()?; + // F2.9: instance `__enter__` runs at setup; its return value feeds the `as` target. + let bound = if let Some(r) = self.try_call_dunder(cm, "__enter__", &[], chunk, slots)? { r } else { cm }; self.with_stack.push(cm); - self.push(cm); + self.push(bound); } OpCode::ExitWith => { let _ = operand; @@ -556,6 +579,45 @@ impl<'a> VM<'a> { .ok_or(cold_runtime("ExitWith without matching SetupWith"))?; if let Some(&top) = self.stack.last() && top.0 == cm.0 { self.pop()?; } + // F2.9: normal-flow cleanup passes `(None, None, None)` to signal "no exception". + if cm.is_heap() && matches!(self.heap.get(cm), HeapObj::Instance(..)) { + let n = Val::none(); + let _ = self.try_call_dunder(cm, "__exit__", &[n, n, n], chunk, slots)?; + } + } + OpCode::WithCleanup => { + let _ = operand; + // Reached when a `with` body raised: the SetupExcept unwind has pushed the synthesised exception. We consume it + the matching CM and dispatch `__exit__(type, exc, None)`; truthy return suppresses, falsy or absent re-raises with identity preserved via `pending.exc_val`. + let exc = self.pop()?; + let cm = self.with_stack.pop().ok_or(cold_runtime("WithCleanup without matching SetupWith"))?; + let exc_name: String = if exc.is_heap() { + match self.heap.get(exc) { + HeapObj::ExcInstance(n, _) => n.clone(), + HeapObj::Instance(cls, _) => { + if cls.is_heap() && let HeapObj::Class(name, _, _) = self.heap.get(*cls) { name.clone() } else { "Exception".into() } + } + _ => "Exception".into(), + } + } else { "Exception".into() }; + if cm.is_heap() && matches!(self.heap.get(cm), HeapObj::Instance(..)) { + let exc_type = self.heap.alloc(HeapObj::Type(exc_name.clone()))?; + let n = Val::none(); + match self.try_call_dunder(cm, "__exit__", &[exc_type, exc, n], chunk, slots)? { + Some(r) if self.truthy(r) => { + // Suppressed: drop the pending exc identity so a later `raise` doesn't reuse it. + self.pending.exc_val = None; + } + _ => { + // Re-raise: preserve identity via `pending.exc_val` so an outer handler sees the same instance. + self.pending.exc_val = Some(exc); + return Err(VmErr::Raised(exc_name)); + } + } + } else { + // No `__exit__` (or non-instance CM): re-raise unconditionally. + self.pending.exc_val = Some(exc); + return Err(VmErr::Raised(exc_name)); + } } OpCode::UnpackArgs => { let val = self.pop()?; diff --git a/compiler/src/modules/vm/gc.rs b/compiler/src/modules/vm/gc.rs index fbe89e8..5125ee5 100644 --- a/compiler/src/modules/vm/gc.rs +++ b/compiler/src/modules/vm/gc.rs @@ -21,6 +21,7 @@ impl<'a> VM<'a> { for &v in items { self.heap.mark(v); } } IterFrame::Coroutine(v) => self.heap.mark(*v), + IterFrame::UserDefined(v) => self.heap.mark(*v), IterFrame::Range { .. } => {} } } diff --git a/compiler/src/modules/vm/handlers/arith.rs b/compiler/src/modules/vm/handlers/arith.rs index 64454b7..8f6276c 100644 --- a/compiler/src/modules/vm/handlers/arith.rs +++ b/compiler/src/modules/vm/handlers/arith.rs @@ -5,13 +5,20 @@ use ops::cached_binop; impl<'a> VM<'a> { - /* Add/Sub/Mul/Div with IC; Mod/Pow/FloorDiv on i64 with overflow trap; Minus is unary. */ - pub(crate) fn handle_arith(&mut self, op: OpCode, rip: usize, cache: &mut OpcodeCache) -> Result<(), VmErr> { + /* Add/Sub/Mul/Div with IC; Mod/Pow/FloorDiv on i128 with overflow trap; Minus is unary. */ + pub(crate) fn handle_arith(&mut self, op: OpCode, rip: usize, cache: &mut OpcodeCache, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { if op == OpCode::Minus { - return self.exec_neg(); + return self.exec_neg(chunk, slots); } let (a, b) = self.pop2()?; + + // F2.1: instance dunder protocol — try user-defined operator before any builtin coercion. + if let Some(r) = self.try_binary_dunder(op, a, b, chunk, slots)? { + self.push(r); + return Ok(()); + } + // Register-based FastOps (Add/Sub/Mul/Mod/FloorDiv) are cached; Div/Pow are not. if matches!(op, OpCode::Add | OpCode::Sub | OpCode::Mul | OpCode::Mod | OpCode::FloorDiv) { cached_binop!(self.heap, rip, &op, a, b, cache); @@ -31,8 +38,13 @@ impl<'a> VM<'a> { Ok(()) } - fn exec_neg(&mut self) -> Result<(), VmErr> { + fn exec_neg(&mut self, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let v = self.pop()?; + // F2.1: instance `__neg__` takes precedence over numeric coercion. + if let Some(r) = self.try_call_dunder(v, "__neg__", &[], chunk, slots)? { + self.push(r); + return Ok(()); + } let result = if v.is_float() { Val::float(-v.as_float()) } else if let Some(i) = self.as_i128(v) { @@ -130,11 +142,17 @@ impl<'a> VM<'a> { self.int_to_val(Some(ai >> shift.min(127))) } - pub(crate) fn handle_compare(&mut self, op: OpCode, rip: usize, cache: &mut OpcodeCache) -> Result<(), VmErr> { + pub(crate) fn handle_compare(&mut self, op: OpCode, rip: usize, cache: &mut OpcodeCache, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let (a, b) = self.pop2()?; // Record type-key for every compare op; `cache::specialize` picks the FastOp variant. cached_binop!(self.heap, rip, &op, a, b, cache); + // F2.2: try the user-defined comparison dunder before falling back to numeric/string compare. + if let Some(r) = self.try_compare_dunder(op, a, b, chunk, slots)? { + self.push(Val::bool(r)); + return Ok(()); + } + // Set/Set uses subset/superset, NOT total order — the numeric `LtEq = !lt_vals(b, a)` identity is wrong here ({1,2} <= {2,3} would come back True), so we bypass `lt_vals`. if a.is_heap() && b.is_heap() && matches!(self.heap.get(a), HeapObj::Set(_)) @@ -156,11 +174,12 @@ impl<'a> VM<'a> { } // Only plain `not`; And/Or are short-circuited by the parser via Jump-If-Or-Pop. - pub(crate) fn handle_logic(&mut self, op: OpCode) -> Result<(), VmErr> { + pub(crate) fn handle_logic(&mut self, op: OpCode, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { match op { OpCode::Not => { let v = self.pop()?; - self.push(Val::bool(!self.truthy(v))); + let t = self.truthy_op(v, chunk, slots)?; + self.push(Val::bool(!t)); } _ => return Err(cold_runtime("non-logic opcode in handle_logic")), } @@ -168,11 +187,11 @@ impl<'a> VM<'a> { } /* `is` / `is not` compare tag bits inline; `in` / `not in` delegate to contains(). */ - pub(crate) fn handle_identity(&mut self, op: OpCode) -> Result<(), VmErr> { + pub(crate) fn handle_identity(&mut self, op: OpCode, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let (a, b) = self.pop2()?; let result = match op { - OpCode::In => self.contains(b, a), - OpCode::NotIn => !self.contains(b, a), + OpCode::In => self.contains_op(b, a, chunk, slots)?, + OpCode::NotIn => !self.contains_op(b, a, chunk, slots)?, OpCode::Is => a.0 == b.0, OpCode::IsNot => a.0 != b.0, _ => return Err(cold_runtime("non-identity opcode in handle_identity")), diff --git a/compiler/src/modules/vm/handlers/data.rs b/compiler/src/modules/vm/handlers/data.rs index 0001b5c..333e2bb 100644 --- a/compiler/src/modules/vm/handlers/data.rs +++ b/compiler/src/modules/vm/handlers/data.rs @@ -43,16 +43,16 @@ impl<'a> VM<'a> { } /* Indexed access/store, unpacking, and `{value!s:spec}` formatting. */ - pub(crate) fn handle_container(&mut self, op: OpCode, operand: u16) -> Result<(), VmErr> { + pub(crate) fn handle_container(&mut self, op: OpCode, operand: u16, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { match op { - OpCode::GetItem => { self.get_item()?; } + OpCode::GetItem => { self.get_item(chunk, slots)?; } OpCode::StoreItem => { self.mark_impure(); - self.store_item()?; + self.store_item(chunk, slots)?; } OpCode::DelItem => { self.mark_impure(); - self.del_item()?; + self.del_item(chunk, slots)?; } OpCode::UnpackSequence => self.exec_unpack_seq(operand as usize)?, OpCode::UnpackEx => self.unpack_ex(operand)?, @@ -63,9 +63,10 @@ impl<'a> VM<'a> { let spec_val = if has_spec { Some(self.pop()?) } else { None }; let v = self.pop()?; + // F2.8: conversion flags consult the dunder-aware helpers so `f"{x!s}"` honours `__str__`. let converted = match conv { - 1 => self.heap.alloc(HeapObj::Str(self.repr(v)))?, - 2 => self.heap.alloc(HeapObj::Str(self.display(v)))?, + 1 => { let s = self.repr_op(v, chunk, slots)?; self.heap.alloc(HeapObj::Str(s))? } + 2 => { let s = self.display_op(v, chunk, slots)?; self.heap.alloc(HeapObj::Str(s))? } 3 => self.heap.alloc(HeapObj::Str(super::format::display_inline(v, &self.heap).escape_default().collect::()))?, _ => v, }; @@ -76,14 +77,14 @@ impl<'a> VM<'a> { HeapObj::Str(s) => s.clone(), _ => return Err(cold_type("format spec must be a string")), }; - super::format::format_value(converted, &spec, &self.heap) - .map_err(cold_value)? + // F2.11: instance `__format__(spec)` runs through `format_op`; built-ins fall through to the spec engine. + self.format_op(converted, &spec, chunk, slots)? } None => { if conv != 0 && let HeapObj::Str(s) = self.heap.get(converted) { s.clone() } else { - self.display(converted) + self.display_op(converted, chunk, slots)? } } }; @@ -133,11 +134,11 @@ impl<'a> VM<'a> { } /* Side-effecting / impure ops: assert, del, global/nonlocal, import, type alias, raise, await. */ - pub(crate) fn handle_side(&mut self, op: OpCode, operand: u16, slots: &mut [Val]) -> Result<(), VmErr> { + pub(crate) fn handle_side(&mut self, op: OpCode, operand: u16, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { match op { OpCode::Assert => { let v = self.pop()?; - if !self.truthy(v) { return Err(VmErr::Runtime("AssertionError")); } + if !self.truthy_op(v, chunk, slots)? { return Err(VmErr::Runtime("AssertionError")); } } OpCode::Del => { let slot = operand as usize; diff --git a/compiler/src/modules/vm/handlers/dunder.rs b/compiler/src/modules/vm/handlers/dunder.rs index 1985984..ce8f6de 100644 --- a/compiler/src/modules/vm/handlers/dunder.rs +++ b/compiler/src/modules/vm/handlers/dunder.rs @@ -7,7 +7,6 @@ use super::methods::AttrLookup; impl<'a> VM<'a> { /* `recv.(*args)`: `Some(v)` on return, `None` on miss / `NotImplemented`, `Err` only on a raised dunder. */ - #[allow(dead_code)] // Consumed by per-operator handlers in the next phase. pub(crate) fn try_call_dunder(&mut self, recv: Val, name: &str, args: &[Val], chunk: &SSAChunk, slots: &mut [Val]) -> Result, VmErr> { // Built-in types route through their native handlers; dunder dispatch only fires on user instances. if !recv.is_heap() { return Ok(None); } @@ -29,4 +28,208 @@ impl<'a> VM<'a> { if self.heap.is_not_implemented(result) { return Ok(None); } Ok(Some(result)) } + + /* Class of an Instance, or `None` for built-in operands; powers the subclass-first ordering rule. */ + fn instance_class(&self, v: Val) -> Option { + if !v.is_heap() { return None; } + match self.heap.get(v) { HeapObj::Instance(c, _) => Some(*c), _ => None } + } + + /* Binary arithmetic dunder dispatch with Python's subclass-first ordering: if `type(b)` is a strict subclass of `type(a)`, the reflected op runs first so overrides win. */ + pub(crate) fn try_binary_dunder(&mut self, op: OpCode, a: Val, b: Val, chunk: &SSAChunk, slots: &mut [Val]) -> Result, VmErr> { + let a_cls = self.instance_class(a); + let b_cls = self.instance_class(b); + if a_cls.is_none() && b_cls.is_none() { return Ok(None); } + + let (lname, rname) = match op { + OpCode::Add => ("__add__", "__radd__"), + OpCode::Sub => ("__sub__", "__rsub__"), + OpCode::Mul => ("__mul__", "__rmul__"), + OpCode::Div => ("__truediv__", "__rtruediv__"), + OpCode::FloorDiv => ("__floordiv__", "__rfloordiv__"), + OpCode::Mod => ("__mod__", "__rmod__"), + OpCode::Pow => ("__pow__", "__rpow__"), + _ => return Ok(None), + }; + + let b_overrides = match (a_cls, b_cls) { + (Some(ac), Some(bc)) => ac.0 != bc.0 && self.heap.is_subclass(bc, ac), + _ => false, + }; + + if b_overrides { + if let Some(r) = self.try_call_dunder(b, rname, &[a], chunk, slots)? { return Ok(Some(r)); } + if let Some(r) = self.try_call_dunder(a, lname, &[b], chunk, slots)? { return Ok(Some(r)); } + } else { + if let Some(r) = self.try_call_dunder(a, lname, &[b], chunk, slots)? { return Ok(Some(r)); } + if let Some(r) = self.try_call_dunder(b, rname, &[a], chunk, slots)? { return Ok(Some(r)); } + } + Ok(None) + } + + /* Comparison dunder dispatch. `__eq__` reflects to itself; `__ne__` falls back to `not __eq__`; `<` reflects to `>` and vice-versa. */ + pub(crate) fn try_compare_dunder(&mut self, op: OpCode, a: Val, b: Val, chunk: &SSAChunk, slots: &mut [Val]) -> Result, VmErr> { + let a_cls = self.instance_class(a); + let b_cls = self.instance_class(b); + if a_cls.is_none() && b_cls.is_none() { return Ok(None); } + + let (lname, rname, negate) = match op { + OpCode::Eq => ("__eq__", "__eq__", false), + OpCode::NotEq => ("__eq__", "__eq__", true), + OpCode::Lt => ("__lt__", "__gt__", false), + OpCode::LtEq => ("__le__", "__ge__", false), + OpCode::Gt => ("__gt__", "__lt__", false), + OpCode::GtEq => ("__ge__", "__le__", false), + _ => return Ok(None), + }; + + let b_overrides = match (a_cls, b_cls) { + (Some(ac), Some(bc)) => ac.0 != bc.0 && self.heap.is_subclass(bc, ac), + _ => false, + }; + + let raw = if b_overrides { + match self.try_call_dunder(b, rname, &[a], chunk, slots)? { + Some(r) => Some(r), + None => self.try_call_dunder(a, lname, &[b], chunk, slots)?, + } + } else { + match self.try_call_dunder(a, lname, &[b], chunk, slots)? { + Some(r) => Some(r), + None => self.try_call_dunder(b, rname, &[a], chunk, slots)?, + } + }; + + let Some(r) = raw else { return Ok(None); }; + let truthy = self.truthy(r); + Ok(Some(if negate { !truthy } else { truthy })) + } + + /* Python `bool()` semantics: try `__bool__`, then `__len__` (0 = False), else default True for instances. Pass-through for built-in types. */ + pub(crate) fn truthy_op(&mut self, v: Val, chunk: &SSAChunk, slots: &mut [Val]) -> Result { + if !v.is_heap() || !matches!(self.heap.get(v), HeapObj::Instance(..)) { + return Ok(self.truthy(v)); + } + if let Some(r) = self.try_call_dunder(v, "__bool__", &[], chunk, slots)? { + if !matches!(r, x if x.is_bool()) { + return Err(cold_type("__bool__ should return bool")); + } + return Ok(r.as_bool()); + } + if let Some(r) = self.try_call_dunder(v, "__len__", &[], chunk, slots)? { + return Ok(self.len_to_bool(r)?); + } + Ok(true) + } + + /* `in` operator: prefer the container's `__contains__`; for built-in sequences with an instance item, iterate using `__eq__` so user equality is honoured. */ + pub(crate) fn contains_op(&mut self, container: Val, item: Val, chunk: &SSAChunk, slots: &mut [Val]) -> Result { + if let Some(r) = self.try_call_dunder(container, "__contains__", &[item], chunk, slots)? { + return Ok(self.truthy(r)); + } + + let item_is_instance = item.is_heap() && matches!(self.heap.get(item), HeapObj::Instance(..)); + + // Built-in sequence container + instance item: walk and compare with `__eq__` so user equality wins over pointer eq. + if item_is_instance && container.is_heap() { + let items: Option> = match self.heap.get(container) { + HeapObj::List(v) => Some(v.borrow().clone()), + HeapObj::Tuple(v) => Some(v.clone()), + HeapObj::Set(s) => Some(s.borrow().iter().copied().collect()), + HeapObj::FrozenSet(s) => Some(s.iter().copied().collect()), + _ => None, + }; + if let Some(items) = items { + for x in items { + if self.eq_op(item, x, chunk, slots)? { return Ok(true); } + } + return Ok(false); + } + } + + // User instance container with `__iter__`: walk via the iterator protocol, comparing items with `__eq__`. + if container.is_heap() && matches!(self.heap.get(container), HeapObj::Instance(..)) + && let Some(iter) = self.try_call_dunder(container, "__iter__", &[], chunk, slots)? { + loop { + match self.try_call_dunder(iter, "__next__", &[], chunk, slots) { + Ok(Some(v)) => { + if self.eq_op(item, v, chunk, slots)? { return Ok(true); } + } + Ok(None) => return Ok(false), + Err(VmErr::Raised(ref m)) if m == "StopIteration" => return Ok(false), + Err(e) => return Err(e), + } + } + } + + Ok(self.contains(container, item)) + } + + /* `==` with dunder dispatch and pointer-eq fallback; used wherever `contains_op` walks a sequence. */ + pub(crate) fn eq_op(&mut self, a: Val, b: Val, chunk: &SSAChunk, slots: &mut [Val]) -> Result { + if let Some(r) = self.try_compare_dunder(OpCode::Eq, a, b, chunk, slots)? { return Ok(r); } + Ok(eq_vals_with_heap(a, b, &self.heap)) + } + + /* Drive a user-defined iterator to a Vec; treats a missing or non-Instance receiver as "no protocol" by returning `None`. Used by `list(custom)`, `tuple(custom)`, etc. */ + pub(crate) fn iter_to_vec_op(&mut self, obj: Val, chunk: &SSAChunk, slots: &mut [Val]) -> Result>, VmErr> { + if !obj.is_heap() || !matches!(self.heap.get(obj), HeapObj::Instance(..)) { return Ok(None); } + let Some(iter) = self.try_call_dunder(obj, "__iter__", &[], chunk, slots)? else { return Ok(None); }; + let mut out = Vec::new(); + loop { + match self.try_call_dunder(iter, "__next__", &[], chunk, slots) { + Ok(Some(v)) => out.push(v), + Ok(None) => return Ok(Some(out)), + Err(VmErr::Raised(ref m)) if m == "StopIteration" => return Ok(Some(out)), + Err(e) => return Err(e), + } + } + } + + /* `str(v)` semantics: instance `__str__` wins, then `__repr__`, else the built-in display. */ + pub(crate) fn display_op(&mut self, v: Val, chunk: &SSAChunk, slots: &mut [Val]) -> Result { + if v.is_heap() && matches!(self.heap.get(v), HeapObj::Instance(..)) { + if let Some(r) = self.try_call_dunder(v, "__str__", &[], chunk, slots)? { + return self.require_str(r, "__str__"); + } + if let Some(r) = self.try_call_dunder(v, "__repr__", &[], chunk, slots)? { + return self.require_str(r, "__repr__"); + } + } + Ok(self.display(v)) + } + + /* `repr(v)` semantics: instance `__repr__` wins; otherwise the built-in repr (which adds quotes for strings, etc.). */ + pub(crate) fn repr_op(&mut self, v: Val, chunk: &SSAChunk, slots: &mut [Val]) -> Result { + if v.is_heap() && matches!(self.heap.get(v), HeapObj::Instance(..)) + && let Some(r) = self.try_call_dunder(v, "__repr__", &[], chunk, slots)? { + return self.require_str(r, "__repr__"); + } + Ok(self.repr(v)) + } + + fn require_str(&self, v: Val, name: &str) -> Result { + if v.is_heap() && let HeapObj::Str(s) = self.heap.get(v) { return Ok(s.clone()); } + Err(VmErr::TypeMsg(crate::s!("'", str name, "' did not return a string"))) + } + + /* `format(v, spec)` dispatch: instance `__format__(spec)` wins; otherwise the built-in spec engine runs. Empty spec on an instance still goes through `__format__` so user formatting can opt in. */ + pub(crate) fn format_op(&mut self, v: Val, spec: &str, chunk: &SSAChunk, slots: &mut [Val]) -> Result { + if v.is_heap() && matches!(self.heap.get(v), HeapObj::Instance(..)) { + let spec_val = self.heap.alloc(HeapObj::Str(spec.to_string()))?; + if let Some(r) = self.try_call_dunder(v, "__format__", &[spec_val], chunk, slots)? { + return self.require_str(r, "__format__"); + } + } + super::format::format_value(v, spec, &self.heap).map_err(cold_value) + } + + /* Coerce a `__len__` / `__length_hint__` return value to bool semantics; rejects negatives like CPython. */ + fn len_to_bool(&self, v: Val) -> Result { + let n = if v.is_int() { v.as_int() as i128 } + else if let Some(i) = crate::modules::vm::types::as_i128(v, &self.heap) { i } + else { return Err(cold_type("__len__ must return int")); }; + if n < 0 { return Err(cold_value("__len__() should return >= 0")); } + Ok(n != 0) + } } diff --git a/compiler/src/modules/vm/handlers/function.rs b/compiler/src/modules/vm/handlers/function.rs index 4662815..6133956 100644 --- a/compiler/src/modules/vm/handlers/function.rs +++ b/compiler/src/modules/vm/handlers/function.rs @@ -10,18 +10,18 @@ impl<'a> VM<'a> { match op { OpCode::Call => self.exec_call(operand, chunk, slots), OpCode::MakeFunction | OpCode::MakeCoroutine => self.exec_make_function(op, operand, chunk, slots), - OpCode::CallLen => self.call_len(), + OpCode::CallLen => self.call_len(chunk, slots), OpCode::CallAbs => self.call_abs(), - OpCode::CallStr => self.call_str(), + OpCode::CallStr => self.call_str(chunk, slots), OpCode::CallInt => self.call_int(), OpCode::CallFloat => self.call_float(), - OpCode::CallBool => self.call_bool(), + OpCode::CallBool => self.call_bool(chunk, slots), OpCode::CallType => self.call_type(), OpCode::CallChr => self.call_chr(), OpCode::CallOrd => self.call_ord(), OpCode::CallSorted => self.call_sorted(), - OpCode::CallList => self.call_list(), - OpCode::CallTuple => self.call_tuple(), + OpCode::CallList => self.call_list(chunk, slots), + OpCode::CallTuple => self.call_tuple(chunk, slots), OpCode::CallEnumerate => self.call_enumerate(), OpCode::CallIsInstance => self.call_isinstance(), OpCode::CallRange => self.call_range(operand), @@ -32,7 +32,7 @@ impl<'a> VM<'a> { OpCode::CallZip => self.call_zip(operand), OpCode::CallDict => self.call_dict(operand), OpCode::CallSet => self.call_set(operand), - OpCode::CallPrint => { self.mark_impure(); self.call_print(operand) } + OpCode::CallPrint => { self.mark_impure(); self.call_print(operand, chunk, slots) } OpCode::CallInput => { self.mark_impure(); self.call_input() } OpCode::CallAll => self.call_all(operand), OpCode::CallAny => self.call_any(operand), @@ -41,11 +41,11 @@ impl<'a> VM<'a> { OpCode::CallHex => self.call_hex(), OpCode::CallDivmod => self.call_divmod(), OpCode::CallPow => self.call_pow(operand), - OpCode::CallRepr => self.call_repr(), + OpCode::CallRepr => self.call_repr(chunk, slots), OpCode::CallReversed => self.call_reversed(), OpCode::CallCallable => self.call_callable(), OpCode::CallId => self.call_id(), - OpCode::CallHash => self.call_hash(), + OpCode::CallHash => self.call_hash(chunk, slots), OpCode::CallExtern => self.call_extern(operand, chunk), _ => Err(cold_runtime("non-function opcode in handle_function")), } @@ -268,6 +268,23 @@ impl<'a> VM<'a> { return Ok(true); } + // F2.5: instance with `__call__` — bind and dispatch through `BoundUserMethod`-style flow. + if let HeapObj::Instance(..) = self.heap.get(callee) + && let Some((func, class)) = self.lookup_class_member( + match self.heap.get(callee) { HeapObj::Instance(c, _) => *c, _ => unreachable!() }, + "__call__") + { + if !kw_flat.is_empty() { return Err(cold_type("__call__ does not accept keyword arguments")); } + if self.depth >= self.max_calls { return Err(cold_depth()); } + self.pending.method_binding = Some((class, callee)); + self.push(func); + self.push(callee); + for a in positional { self.push(*a); } + let argc = (positional.len() + 1) as u16; + self.exec_call(argc, chunk, slots)?; + return Ok(true); + } + // Resume a suspended coroutine; the inner yield must NOT propagate to the caller. if let HeapObj::Coroutine(..) = self.heap.get(callee) { let result = self.resume_coroutine(callee)?; @@ -559,7 +576,7 @@ impl<'a> VM<'a> { // Variadic Print => { // CallPrint is statement-shaped (no trailing Pop); when reached via Call the parser emits Pop, so push None to keep the stack balanced. - self.call_print(argc)?; + self.call_print(argc, chunk, slots)?; self.push(Val::none()); Ok(()) } @@ -575,30 +592,30 @@ impl<'a> VM<'a> { All => self.call_all(argc), Any => self.call_any(argc), GetAttr => self.call_getattr(argc), - Format => self.call_format(argc), + Format => self.call_format(argc, chunk, slots), // 0/1/2-arg Input => self.call_input(), - Len => self.call_len(), + Len => self.call_len(chunk, slots), Abs => self.call_abs(), - Str => self.call_str(), + Str => self.call_str(chunk, slots), Int => self.call_int(), Float => self.call_float(), - Bool => self.call_bool(), + Bool => self.call_bool(chunk, slots), Type => self.call_type(), Chr => self.call_chr(), Ord => self.call_ord(), Sorted => self.call_sorted_with_key(sort_key, chunk, slots), Enumerate => self.call_enumerate(), - List => self.call_list(), - Tuple => self.call_tuple(), + List => self.call_list(chunk, slots), + Tuple => self.call_tuple(chunk, slots), Bin => self.call_bin(), Oct => self.call_oct(), Hex => self.call_hex(), - Repr => self.call_repr(), + Repr => self.call_repr(chunk, slots), Reversed => self.call_reversed(), Callable => self.call_callable(), Id => self.call_id(), - Hash => self.call_hash(), + Hash => self.call_hash(chunk, slots), Divmod => self.call_divmod(), IsInstance => self.call_isinstance(), HasAttr => self.call_hasattr(), diff --git a/compiler/src/modules/vm/handlers/methods.rs b/compiler/src/modules/vm/handlers/methods.rs index 3dd2d43..3064e3c 100644 --- a/compiler/src/modules/vm/handlers/methods.rs +++ b/compiler/src/modules/vm/handlers/methods.rs @@ -104,8 +104,6 @@ impl<'a> VM<'a> { } // `resolve_attr` that swallows `AttributeError` into `None`; other VmErrs still propagate — dunder probes need a miss to be silent. - // Allow: consumed by `try_call_dunder` and the per-operator handlers wired in the next phase. - #[allow(dead_code)] pub(crate) fn resolve_attr_silent(&self, obj: Val, name: &str) -> Result, VmErr> { match self.resolve_attr(obj, name) { Ok(lookup) => Ok(Some(lookup)), @@ -114,10 +112,29 @@ impl<'a> VM<'a> { } } - pub(crate) fn handle_load_attr(&mut self, name_idx: u16, chunk: &SSAChunk) -> Result<(), VmErr> { + /* F2.10: instance fallback via `__getattr__(name)`. Called by `LoadAttr` / `CallMethod` after the normal lookup raises `AttributeError`. */ + pub(crate) fn try_getattr_fallback(&mut self, obj: Val, name: &str, chunk: &SSAChunk, slots: &mut [Val]) -> Result, VmErr> { + if !obj.is_heap() || !matches!(self.heap.get(obj), HeapObj::Instance(..)) { return Ok(None); } + let bare = crate::modules::parser::ssa_strip(name); + let name_val = self.heap.alloc(HeapObj::Str(bare.to_string()))?; + self.try_call_dunder(obj, "__getattr__", &[name_val], chunk, slots) + } + + pub(crate) fn handle_load_attr(&mut self, name_idx: u16, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let name = chunk.names.get(name_idx as usize).ok_or(VmErr::Runtime("LoadAttr: bad name index"))?.clone(); let obj = self.pop()?; - match self.resolve_attr(obj, &name)? { + let lookup = match self.resolve_attr(obj, &name) { + Ok(l) => l, + Err(VmErr::Attribute(msg)) => { + if let Some(v) = self.try_getattr_fallback(obj, &name, chunk, slots)? { + self.push(v); + return Ok(()); + } + return Err(VmErr::Attribute(msg)); + } + Err(other) => return Err(other), + }; + match lookup { AttrLookup::ModuleAttr(v) | AttrLookup::ClassMember(v) | AttrLookup::InstanceField(v) => { diff --git a/compiler/src/modules/vm/helpers.rs b/compiler/src/modules/vm/helpers.rs index 2e86380..65b446a 100644 --- a/compiler/src/modules/vm/helpers.rs +++ b/compiler/src/modules/vm/helpers.rs @@ -100,10 +100,15 @@ impl<'a> VM<'a> { s.chars().map(|c| self.heap.alloc(HeapObj::Str(c.to_string()))).collect() } - pub(crate) fn make_iter_frame(&mut self, obj: Val) -> Result { + pub(crate) fn make_iter_frame(&mut self, obj: Val, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result { if !obj.is_heap() { return Err(VmErr::TypeMsg(s!("'", str self.type_name(obj), "' object is not iterable"))); } + // F2.6: instance `__iter__` produces a user-defined iterator that drives `ForIter` via `__next__`. + if matches!(self.heap.get(obj), HeapObj::Instance(..)) + && let Some(iter) = self.try_call_dunder(obj, "__iter__", &[], chunk, slots)? { + return Ok(IterFrame::UserDefined(iter)); + } Ok(match self.heap.get(obj) { HeapObj::Range(s, e, st) => IterFrame::Range { cur: *s, end: *e, step: *st }, HeapObj::List(v) => IterFrame::Seq { items: v.borrow().clone(), idx: 0 }, diff --git a/compiler/src/modules/vm/types/coro.rs b/compiler/src/modules/vm/types/coro.rs index 12f1c7a..52c05b1 100644 --- a/compiler/src/modules/vm/types/coro.rs +++ b/compiler/src/modules/vm/types/coro.rs @@ -45,12 +45,15 @@ pub enum IterFrame { Seq { items: Vec, idx: usize }, Range { cur: i64, end: i64, step: i64 }, Coroutine(Val), + // User-defined iterator: holds the value returned by `__iter__`; each step calls its `__next__`. + UserDefined(Val), } impl IterFrame { + /* Stateless steps only — built-in Seq/Range. User-defined iterators step in `dispatch.rs` because they need the VM to invoke `__next__`. */ pub fn next_item(&mut self) -> Option { match self { - Self::Coroutine(_) => None, + Self::Coroutine(_) | Self::UserDefined(_) => None, Self::Seq { items, idx } => { if *idx < items.len() { let v = items[*idx]; *idx += 1; Some(v) } else { None } } diff --git a/compiler/src/modules/vm/types/err.rs b/compiler/src/modules/vm/types/err.rs index 858d34d..43bea63 100644 --- a/compiler/src/modules/vm/types/err.rs +++ b/compiler/src/modules/vm/types/err.rs @@ -16,6 +16,22 @@ pub enum VmErr { } impl VmErr { + /* Class-name lookup used by the exception unwind path and the `with` cleanup opcode; native errors get their CPython-equivalent name, `Raised` keeps the user-supplied name. */ + pub fn class_name(&self) -> alloc::string::String { + match self { + Self::ZeroDiv => "ZeroDivisionError".into(), + Self::Overflow => "OverflowError".into(), + Self::Type(_) | Self::TypeMsg(_) => "TypeError".into(), + Self::Value(_) => "ValueError".into(), + Self::Attribute(_) => "AttributeError".into(), + Self::Name(_) => "NameError".into(), + Self::CallDepth => "RecursionError".into(), + Self::Heap => "MemoryError".into(), + Self::Budget | Self::Runtime(_) => "RuntimeError".into(), + Self::Raised(s) => s.clone(), + } + } + pub fn as_str(&self) -> &'static str { match self { Self::CallDepth => "RecursionError: max depth", diff --git a/compiler/src/modules/vm/types/mod.rs b/compiler/src/modules/vm/types/mod.rs index 7ef43f9..29b36f3 100644 --- a/compiler/src/modules/vm/types/mod.rs +++ b/compiler/src/modules/vm/types/mod.rs @@ -302,6 +302,7 @@ pub(crate) fn for_each_val(obj: &HeapObj, mut f: impl FnMut(Val)) { for fr in iters { match fr { IterFrame::Seq { items, .. } => for &v in items { f(v); }, IterFrame::Coroutine(v) => f(*v), + IterFrame::UserDefined(v) => f(*v), IterFrame::Range { .. } => {} }} } diff --git a/compiler/tests/cases/parser.json b/compiler/tests/cases/parser.json index 0484d73..c69e97e 100644 --- a/compiler/tests/cases/parser.json +++ b/compiler/tests/cases/parser.json @@ -735,17 +735,17 @@ "functions": 1 }, { - "src": "with x as f:\n pass", - "constants": [], - "names": ["x_0", "f_1"], - "instructions": [["LoadName",0], ["SetupWith",0], ["StoreName",1], ["ExitWith",0], ["ReturnValue",0]], + "src": "with x as f:\n pass", + "constants": [], + "names": ["x_0", "f_1"], + "instructions": [["LoadName",0], ["SetupWith",0], ["SetupExcept",7], ["StoreName",1], ["PopExcept",0], ["ExitWith",0], ["Jump",9], ["WithCleanup",0], ["Jump",6], ["ReturnValue",0]], "annotations": {} }, { - "src": "with x:\n pass", - "constants": [], - "names": ["x_0"], - "instructions": [["LoadName",0], ["SetupWith",0], ["ExitWith",0], ["ReturnValue",0]], + "src": "with x:\n pass", + "constants": [], + "names": ["x_0"], + "instructions": [["LoadName",0], ["SetupWith",0], ["SetupExcept",6], ["PopExcept",0], ["ExitWith",0], ["Jump",8], ["WithCleanup",0], ["Jump",5], ["ReturnValue",0]], "annotations": {} }, { @@ -1062,15 +1062,14 @@ "src": "with a, b:\n pass", "constants": [], "names": ["a_0", "b_0"], - "instructions": [["LoadName", 0], ["SetupWith", 0],["LoadName", 1], ["SetupWith", 0],["ExitWith", 0], ["ExitWith", 0],["ReturnValue", 0]], + "instructions": [["LoadName",0], ["SetupWith",0], ["SetupExcept",11], ["LoadName",1], ["SetupWith",0], ["SetupExcept",13], ["PopExcept",0], ["ExitWith",0], ["PopExcept",0], ["ExitWith",0], ["Jump",15], ["WithCleanup",0], ["Jump",10], ["WithCleanup",0], ["Jump",8], ["ReturnValue",0]], "annotations": {} }, { "src": "with a as x, b as y:\n pass", "constants": [], "names": ["a_0", "x_1", "b_0", "y_1"], - "instructions": [["LoadName", 0], ["SetupWith", 0], ["StoreName", 1],["LoadName", 2], ["SetupWith", 0], ["StoreName", 3],["ExitWith", 0], ["ExitWith", 0],["ReturnValue", 0] - ], + "instructions": [["LoadName",0], ["SetupWith",0], ["SetupExcept",13], ["StoreName",1], ["LoadName",2], ["SetupWith",0], ["SetupExcept",15], ["StoreName",3], ["PopExcept",0], ["ExitWith",0], ["PopExcept",0], ["ExitWith",0], ["Jump",17], ["WithCleanup",0], ["Jump",12], ["WithCleanup",0], ["Jump",10], ["ReturnValue",0]], "annotations": {} }, { @@ -1269,10 +1268,10 @@ "annotations": {} }, { - "src": "async with a as x:\n pass", - "constants": [], - "names": ["a_0", "x_1"], - "instructions": [["LoadName",0], ["SetupWith",1], ["StoreName",1], ["ExitWith",1], ["ReturnValue",0]], + "src": "async with a as x:\n pass", + "constants": [], + "names": ["a_0", "x_1"], + "instructions": [["LoadName",0], ["SetupWith",1], ["SetupExcept",7], ["StoreName",1], ["PopExcept",0], ["ExitWith",1], ["Jump",9], ["WithCleanup",0], ["Jump",6], ["ReturnValue",0]], "annotations": {} }, { diff --git a/compiler/tests/cases/vm.json b/compiler/tests/cases/vm.json index 8180ffb..a46288b 100644 --- a/compiler/tests/cases/vm.json +++ b/compiler/tests/cases/vm.json @@ -1514,8 +1514,8 @@ {"src": "import asyncio", "output": [], "error": "asyncio"}, {"src": "print(2 ** 127)", "output": [], "error": "OverflowError"}, {"src": "def deco(c):\n print('decorated')\n return c\n@deco\nclass A: pass", "output": ["decorated"]}, - {"src": "class A:\n def __add__(self, o):\n return 'magic'\nprint(A() + A())", "output": [], "error": "unsupported operand"}, - {"src": "class CM:\n def __enter__(self):\n print('enter')\n return self\n def __exit__(self, *a):\n print('exit')\nwith CM() as c:\n print('body')", "output": ["body"]}, + {"src": "class A:\n def __add__(self, o):\n return 'magic'\nprint(A() + A())", "output": ["magic"]}, + {"src": "class CM:\n def __enter__(self):\n print('enter')\n return self\n def __exit__(self, *a):\n print('exit')\nwith CM() as c:\n print('body')", "output": ["enter", "body", "exit"]}, {"src": "class A:\n def m(self):\n return 'A'\nclass B(A):\n def m(self):\n return super().m() + 'B'\nprint(B().m())", "output": ["AB"]}, {"src": "class A: pass", "output": []}, {"src": "print(140737488355328)", "output": ["140737488355328"]}, @@ -1571,5 +1571,50 @@ {"src": "class A:\n pass\nclass B(A):\n pass\nprint(isinstance(A(), B))", "output": ["False"]}, {"src": "class A:\n pass\nclass B:\n pass\nprint(isinstance(A(), (B, A)))", "output": ["True"]}, {"src": "class A:\n pass\nclass B(A):\n pass\nclass C(B):\n pass\nprint(isinstance(C(), A))", "output": ["True"]}, - {"src": "class A:\n pass\nprint(isinstance(42, A))", "output": ["False"]} + {"src": "class A:\n pass\nprint(isinstance(42, A))", "output": ["False"]}, + {"src": "class M:\n def __init__(self, n):\n self.n = n\n def __add__(self, o):\n if isinstance(o, M): return M(self.n + o.n)\n return M(self.n + o)\n def __radd__(self, o):\n return M(o + self.n)\nprint((M(10) + M(5)).n)\nprint((5 + M(10)).n)\nprint((M(10) + 5).n)", "output": ["15", "15", "15"]}, + {"src": "class M:\n def __sub__(self, o):\n return self.n - o\n def __init__(self, n):\n self.n = n\nprint(M(10) - 3)", "output": ["7"]}, + {"src": "class M:\n def __init__(self, n):\n self.n = n\n def __mul__(self, o):\n return self.n * o\nprint(M(4) * 3)", "output": ["12"]}, + {"src": "class M:\n def __init__(self, n):\n self.n = n\n def __truediv__(self, o):\n return self.n / o\n def __floordiv__(self, o):\n return self.n // o\n def __mod__(self, o):\n return self.n % o\n def __pow__(self, o):\n return self.n ** o\nm = M(10)\nprint(m / 4)\nprint(m // 3)\nprint(m % 3)\nprint(m ** 2)", "output": ["2.5", "3", "1", "100"]}, + {"src": "class M:\n def __init__(self, n):\n self.n = n\n def __neg__(self):\n return M(-self.n)\nprint((-M(7)).n)", "output": ["-7"]}, + {"src": "class Base:\n def __add__(self, o):\n return 'Base.add'\n def __radd__(self, o):\n return 'Base.radd'\nclass Sub(Base):\n def __radd__(self, o):\n return 'Sub.radd'\nprint(Base() + Sub())", "output": ["Sub.radd"]}, + {"src": "class M:\n def __add__(self, o):\n return NotImplemented\nprint(M() + 'x')", "output": [], "error": "unsupported operand"}, + {"src": "class V:\n def __init__(self, n):\n self.n = n\n def __eq__(self, o):\n return isinstance(o, V) and self.n == o.n\nprint(V(3) == V(3))\nprint(V(3) == V(4))\nprint(V(3) != V(4))", "output": ["True", "False", "True"]}, + {"src": "class V:\n def __init__(self, n):\n self.n = n\n def __lt__(self, o):\n return self.n < o.n\nprint(V(3) < V(5))\nprint(V(5) > V(3))", "output": ["True", "True"]}, + {"src": "class V:\n def __init__(self, n):\n self.n = n\n def __lt__(self, o):\n return self.n < o\nprint(V(1) < 5)\nprint(V(10) < 5)", "output": ["True", "False"]}, + {"src": "class V:\n def __init__(self, n):\n self.n = n\n def __eq__(self, o):\n return isinstance(o, V) and self.n == o.n\n def __hash__(self):\n return self.n\nprint(V(2) in [V(1), V(2), V(3)])\nprint(V(9) in [V(1), V(2)])", "output": ["True", "False"]}, + {"src": "class Empty:\n def __bool__(self):\n return False\nprint(bool(Empty()))\nif Empty():\n print('truthy')\nelse:\n print('falsy')", "output": ["False", "falsy"]}, + {"src": "class C:\n def __init__(self, items):\n self.items = items\n def __len__(self):\n return len(self.items)\nprint(len(C([1,2,3])))\nprint(bool(C([])))\nprint(bool(C([1])))", "output": ["3", "False", "True"]}, + {"src": "class Always:\n pass\nprint(bool(Always()))", "output": ["True"]}, + {"src": "class S:\n def __init__(self, data):\n self.data = data\n def __getitem__(self, i):\n return self.data[i]\nprint(S([10, 20, 30])[1])", "output": ["20"]}, + {"src": "class S:\n def __init__(self, data):\n self.data = data\n def __getitem__(self, i):\n return type(i)\nprint(S([1,2,3,4])[1:3])\nprint(S([1,2,3])[0])", "output": ["", ""]}, + {"src": "class S:\n def __init__(self):\n self.calls = []\n def __setitem__(self, k, v):\n self.calls.append((k, v))\ns = S()\ns[3] = 'x'\ns['k'] = 42\nprint(s.calls)", "output": ["[(3, 'x'), ('k', 42)]"]}, + {"src": "class S:\n def __init__(self):\n self.deleted = []\n def __delitem__(self, k):\n self.deleted.append(k)\ns = S()\ndel s[5]\ndel s['x']\nprint(s.deleted)", "output": ["[5, 'x']"]}, + {"src": "class S:\n def __init__(self):\n self.items = [1, 2, 3]\n def __contains__(self, v):\n return v in self.items\nprint(2 in S())\nprint(99 in S())", "output": ["True", "False"]}, + {"src": "class D:\n def __call__(self, x):\n return x * 2\nd = D()\nprint(d(5))\nprint(callable(d))", "output": ["10", "True"]}, + {"src": "class N:\n pass\nprint(callable(N()))", "output": ["False"]}, + {"src": "class C:\n def __init__(self):\n self.i = 0\n def __iter__(self):\n return self\n def __next__(self):\n if self.i >= 3:\n raise StopIteration\n self.i = self.i + 1\n return self.i\nfor x in C():\n print(x)", "output": ["1", "2", "3"]}, + {"src": "class C:\n def __init__(self):\n self.i = 0\n def __iter__(self):\n return self\n def __next__(self):\n if self.i >= 4:\n raise StopIteration\n self.i = self.i + 1\n return self.i * 10\nprint(list(C()))", "output": ["[10, 20, 30, 40]"]}, + {"src": "class Outer:\n def __init__(self):\n self.i = 0\n def __iter__(self):\n return self\n def __next__(self):\n if self.i >= 2:\n raise StopIteration\n self.i = self.i + 1\n return self.i\ntotal = 0\nfor a in Outer():\n for b in Outer():\n total = total + 1\nprint(total)", "output": ["4"]}, + {"src": "class P:\n def __init__(self, n):\n self.n = n\n def __hash__(self):\n return self.n * 31\nprint(hash(P(7)) == 7 * 31)", "output": ["True"]}, + {"src": "class K:\n def __init__(self, n):\n self.n = n\n def __hash__(self):\n return self.n\nk = K(5)\nd = {k: 'v'}\nprint(d[k])", "output": ["v"]}, + {"src": "class Bad:\n def __eq__(self, o):\n return True\nprint({Bad(): 1})", "output": [], "error": "unhashable"}, + {"src": "class Bad:\n def __eq__(self, o):\n return True\nprint(hash(Bad()))", "output": [], "error": "unhashable"}, + {"src": "class P:\n def __init__(self, n):\n self.n = n\n def __repr__(self):\n return 'P(' + str(self.n) + ')'\nprint(repr(P(7)))\nprint(P(3))", "output": ["P(7)", "P(3)"]}, + {"src": "class P:\n def __init__(self, n):\n self.n = n\n def __str__(self):\n return 'str:' + str(self.n)\n def __repr__(self):\n return 'repr:' + str(self.n)\nprint(str(P(1)))\nprint(repr(P(1)))\nprint(P(1))", "output": ["str:1", "repr:1", "str:1"]}, + {"src": "class P:\n def __init__(self, n):\n self.n = n\n def __repr__(self):\n return 'P/' + str(self.n)\nprint(str(P(8)))", "output": ["P/8"]}, + {"src": "class P:\n def __str__(self):\n return 'A'\nprint(f'{P()}')", "output": ["A"]}, + {"src": "class CM:\n def __enter__(self):\n return 'value'\n def __exit__(self, *a):\n pass\nwith CM() as x:\n print(x)", "output": ["value"]}, + {"src": "class CM:\n def __enter__(self):\n print('e')\n def __exit__(self, t, v, tb):\n print('x:', t)\ntry:\n with CM():\n raise ValueError('boom')\nexcept ValueError:\n print('caught')", "output": ["e", "x: ", "caught"]}, + {"src": "class Outer:\n def __enter__(self):\n print('O.e')\n def __exit__(self, *a):\n print('O.x')\nclass Inner:\n def __enter__(self):\n print('I.e')\n def __exit__(self, *a):\n print('I.x')\nwith Outer():\n with Inner():\n print('body')", "output": ["O.e", "I.e", "body", "I.x", "O.x"]}, + {"src": "class P:\n def __format__(self, spec):\n return 'fmt:' + spec\nprint(f'{P():>10}')\nprint(format(P(), 'x'))", "output": ["fmt:>10", "fmt:x"]}, + {"src": "class Proxy:\n def __getattr__(self, name):\n return 'attr:' + name\np = Proxy()\nprint(p.foo)\nprint(p.bar)", "output": ["attr:foo", "attr:bar"]}, + {"src": "class P:\n def __init__(self):\n self.x = 1\n def __getattr__(self, name):\n return 'fallback'\np = P()\nprint(p.x)\nprint(p.y)", "output": ["1", "fallback"]}, + {"src": "class P:\n def __getattr__(self, name):\n raise AttributeError('no ' + name)\nprint(P().missing)", "output": [], "error": "AttributeError"}, + {"src": "class Suppress:\n def __enter__(self):\n return self\n def __exit__(self, *a):\n return True\nwith Suppress():\n raise ValueError('x')\nprint('after')", "output": ["after"]}, + {"src": "class Pass:\n def __enter__(self):\n return self\n def __exit__(self, *a):\n return False\ntry:\n with Pass():\n raise ValueError('x')\nexcept ValueError:\n print('caught')", "output": ["caught"]}, + {"src": "class S:\n def __init__(self, suppress):\n self.suppress = suppress\n def __enter__(self):\n return self\n def __exit__(self, t, v, tb):\n print('exit', self.suppress)\n return self.suppress\nwith S(False):\n try:\n with S(True):\n raise ValueError('x')\n print('inner-after')\n except ValueError:\n print('outer-caught')", "output": ["exit True", "inner-after", "exit False"]}, + {"src": "class CM:\n def __enter__(self):\n return self\n def __exit__(self, t, v, tb):\n raise RuntimeError('exit-fail')\ntry:\n with CM():\n pass\nexcept RuntimeError:\n print('caught-exit')", "output": ["caught-exit"]}, + {"src": "class Chain:\n def __init__(self, n):\n self.n = n\n def __enter__(self):\n return self\n def __exit__(self, *a):\n print('exit', self.n)\nwith Chain(1), Chain(2):\n print('body')", "output": ["body", "exit 2", "exit 1"]}, + {"src": "class Chain:\n def __init__(self, n, suppress):\n self.n = n\n self.suppress = suppress\n def __enter__(self):\n return self\n def __exit__(self, *a):\n print('exit', self.n)\n return self.suppress\nwith Chain(1, False), Chain(2, True):\n raise ValueError('x')\nprint('after')", "output": ["exit 2", "exit 1", "after"]} ] From d2b9519f76285ac0c5423e1caaab80e644bcdc80 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Mon, 11 May 2026 19:14:09 -0600 Subject: [PATCH 4/7] feat(vm): Add @property and @x.setter descriptor support. --- compiler/src/modules/vm/builtins/identity.rs | 13 ++++++ compiler/src/modules/vm/dispatch.rs | 46 +++++++++++++++++++- compiler/src/modules/vm/handlers/function.rs | 18 ++++++++ compiler/src/modules/vm/handlers/methods.rs | 32 ++++++++++++-- compiler/src/modules/vm/mod.rs | 1 + compiler/src/modules/vm/ops.rs | 6 +++ compiler/src/modules/vm/types/mod.rs | 10 +++++ compiler/tests/cases/vm.json | 8 +++- 8 files changed, 129 insertions(+), 5 deletions(-) diff --git a/compiler/src/modules/vm/builtins/identity.rs b/compiler/src/modules/vm/builtins/identity.rs index 18dca6c..b1aea00 100644 --- a/compiler/src/modules/vm/builtins/identity.rs +++ b/compiler/src/modules/vm/builtins/identity.rs @@ -6,6 +6,19 @@ use super::matches_exc_class; impl<'a> VM<'a> { + /* `property(fget)` / `property(fget, fset)` — captures the descriptor pair the class chain hands to `LoadAttr` / `StoreAttr`. The `@x.setter` decorator builds the second form via `PropertySetter`. */ + pub fn call_property(&mut self, argc: u16) -> Result<(), VmErr> { + let args = self.pop_n(argc as usize)?; + let (getter, setter) = match args.as_slice() { + [g] => (*g, Val::none()), + [g, s] => (*g, *s), + _ => return Err(cold_type("property() takes 1 or 2 arguments")), + }; + let prop = self.heap.alloc(HeapObj::Property(getter, setter))?; + self.push(prop); + Ok(()) + } + // `super()` zero-arg: reads the running method's `(class, self)` off the top frame and returns a Super proxy. pub fn call_super(&mut self) -> Result<(), VmErr> { let binding = self.call_stack.last() diff --git a/compiler/src/modules/vm/dispatch.rs b/compiler/src/modules/vm/dispatch.rs index a238705..f1349cb 100644 --- a/compiler/src/modules/vm/dispatch.rs +++ b/compiler/src/modules/vm/dispatch.rs @@ -242,6 +242,29 @@ impl<'a> VM<'a> { let ty = self.type_name(obj); Err(VmErr::Attribute(s!("'", str ty, "' object has no attribute '", str &name, "'"))) } + handlers::methods::AttrLookup::PropertyGet { recv, getter } => { + // Materialise the value first, then call it with the user's args — `foo.prop(arg)` where `prop` returns a callable. + if self.depth >= self.max_calls { return Err(cold_depth()); } + self.push(getter); + self.push(recv); + self.exec_call(1, chunk, slots)?; + let result = self.pop()?; + self.push(result); + for a in &positional { self.push(*a); } + for a in &kw_flat { self.push(*a); } + let argc = positional.len() as u16; + let encoded = ((kw_flat.len() as u16 / 2) << 8) | argc; + self.exec_call(encoded, chunk, slots) + } + handlers::methods::AttrLookup::PropertySetterRef(prop) => { + let v = self.heap.alloc(HeapObj::PropertySetter(prop))?; + self.push(v); + for a in &positional { self.push(*a); } + for a in &kw_flat { self.push(*a); } + let argc = positional.len() as u16; + let encoded = ((kw_flat.len() as u16 / 2) << 8) | argc; + self.exec_call(encoded, chunk, slots) + } } } @@ -460,7 +483,10 @@ impl<'a> VM<'a> { && matches!(self.heap.get(v), HeapObj::NativeFn(_)) && self.globals.get(base).copied() == Some(v); if is_builtin_shadow { continue; } - if !methods.iter().any(|(n, _)| n == base) { + // A later definition of the same name wins (e.g. `@property` + `@x.setter` rebinds `x` to the descriptor with both halves). + if let Some(pos) = methods.iter().position(|(n, _)| n == base) { + methods[pos].1 = v; + } else { methods.push((base.to_string(), v)); } } @@ -476,6 +502,24 @@ impl<'a> VM<'a> { if !obj.is_heap() { return Err(cold_type("cannot set attribute")); } let name = chunk.names.get(op as usize) .ok_or(cold_runtime("StoreAttr: bad name index"))?.clone(); + // F3: a class-chain `Property` overrides plain dict insertion. Setter `none()` means read-only. + if let HeapObj::Instance(cls_val, _) = self.heap.get(obj) { + let cls_val = *cls_val; + if let Some((member, _)) = self.lookup_class_member(cls_val, ssa_strip(&name)) + && let HeapObj::Property(_, setter) = self.heap.get(member) { + let setter = *setter; + if setter.is_none() { + return Err(VmErr::Attribute(s!("can't set attribute '", str ssa_strip(&name), "'"))); + } + if self.depth >= self.max_calls { return Err(cold_depth()); } + self.push(setter); + self.push(obj); + self.push(value); + self.exec_call(2, chunk, slots)?; + self.pop()?; + return Ok(None); + } + } let key = self.heap.alloc(HeapObj::Str(name))?; match self.heap.get_mut(obj) { HeapObj::Instance(_, attrs) => { diff --git a/compiler/src/modules/vm/handlers/function.rs b/compiler/src/modules/vm/handlers/function.rs index 6133956..2d32dbe 100644 --- a/compiler/src/modules/vm/handlers/function.rs +++ b/compiler/src/modules/vm/handlers/function.rs @@ -268,6 +268,22 @@ impl<'a> VM<'a> { return Ok(true); } + // F3: `prop.setter(fn)` returns a new `Property` carrying the original getter plus the supplied setter. + if let HeapObj::PropertySetter(prop_val) = self.heap.get(callee) { + if positional.len() != 1 || !kw_flat.is_empty() { + return Err(cold_type("property.setter takes exactly 1 argument")); + } + let prop_val = *prop_val; + let getter = match self.heap.get(prop_val) { + HeapObj::Property(g, _) => *g, + _ => return Err(cold_runtime("PropertySetter wraps a non-Property value")), + }; + let new_setter = positional[0]; + let new_prop = self.heap.alloc(HeapObj::Property(getter, new_setter))?; + self.push(new_prop); + return Ok(true); + } + // F2.5: instance with `__call__` — bind and dispatch through `BoundUserMethod`-style flow. if let HeapObj::Instance(..) = self.heap.get(callee) && let Some((func, class)) = self.lookup_class_member( @@ -559,6 +575,7 @@ impl<'a> VM<'a> { IntFromBytes => Some(2), IntToBytes => Some(3), Globals | Locals | Super => Some(0), + Property => None, // 1 or 2 args, validated in `call_property`. Bytes => None, // 0/1/2-arg: bytes() | bytes(n|iter) | bytes(str, "utf-8") Slice => None, // 1/2/3-arg Gather => None, // variadic @@ -642,6 +659,7 @@ impl<'a> VM<'a> { Globals => self.call_globals(chunk, slots), Locals => self.call_locals(chunk, slots), Super => self.call_super(), + Property => self.call_property(argc), } } } diff --git a/compiler/src/modules/vm/handlers/methods.rs b/compiler/src/modules/vm/handlers/methods.rs index 3064e3c..b277275 100644 --- a/compiler/src/modules/vm/handlers/methods.rs +++ b/compiler/src/modules/vm/handlers/methods.rs @@ -17,11 +17,14 @@ pub(crate) enum AttrLookup { BuiltinMethod(BuiltinMethodId), // `e.args` on ExcInstance — caller picks: LoadAttr materialises the tuple, CallMethod errors. ExcArgs(Vec), + // Property descriptor on an instance — `LoadAttr` invokes `getter(recv)`. + PropertyGet { recv: Val, getter: Val }, + // `prop.setter` access — `LoadAttr` materialises a `PropertySetter` value bound to the source property. + PropertySetterRef(Val), } impl<'a> VM<'a> { - // Direct-then-DFS member lookup; first hit wins. Cycles are impossible: bases are validated at `MakeClass` time and `HeapObj::Class` is immutable, so the class graph is a static DAG. - // Returns `(value, defining_class)` so callers building `BoundUserMethod` / `InstanceMethod` can record where the method came from for `super()`. + // Direct-then-DFS member lookup; first hit wins. Cycles are impossible: bases are validated at `MakeClass` time and `HeapObj::Class` is immutable, so the class graph is a static DAG. Returns `(value, defining_class)` so callers building `BoundUserMethod` / `InstanceMethod` can record where the method came from for `super()`. pub(crate) fn lookup_class_member(&self, cls: Val, name: &str) -> Option<(Val, Val)> { if !cls.is_heap() { return None; } let HeapObj::Class(_, bases, members) = self.heap.get(cls) else { return None; }; @@ -79,7 +82,11 @@ impl<'a> VM<'a> { .find(|(k, _)| k.is_heap() && matches!(self.heap.get(*k), HeapObj::Str(s) if s == name)) .map(|(_, v)| *v); if let Some(v) = found { return Ok(AttrLookup::InstanceField(v)); } - if let Some((mv, defining)) = self.lookup_class_member(cls_val, name) { + if let Some((mv, defining)) = self.lookup_class_member(cls_val, bare) { + // F3: a Property member triggers getter invocation in `handle_load_attr`; plain methods stay bound to the receiver. + if let HeapObj::Property(getter, _) = self.heap.get(mv) { + return Ok(AttrLookup::PropertyGet { recv: obj, getter: *getter }); + } return Ok(AttrLookup::InstanceMethod { recv: obj, func: mv, class: defining }); } let ty = self.type_name(obj); @@ -96,6 +103,13 @@ impl<'a> VM<'a> { return Err(VmErr::Attribute(s!("'super' object has no attribute '", str name, "'"))); } + // `prop.setter` produces a callable that re-builds the property with a new setter (powers `@x.setter`). + if obj.is_heap() + && matches!(self.heap.get(obj), HeapObj::Property(..)) + && bare == "setter" { + return Ok(AttrLookup::PropertySetterRef(obj)); + } + // Builtin type method. let ty = self.type_name(obj); lookup_method(ty, name) @@ -156,6 +170,18 @@ impl<'a> VM<'a> { self.push(v); Ok(()) } + AttrLookup::PropertyGet { recv, getter } => { + // Inline getter call: matches `BoundUserMethod` dispatch (push func, push self, call). + if self.depth >= self.max_calls { return Err(cold_depth()); } + self.push(getter); + self.push(recv); + self.exec_call(1, chunk, slots) + } + AttrLookup::PropertySetterRef(prop) => { + let v = self.heap.alloc(HeapObj::PropertySetter(prop))?; + self.push(v); + Ok(()) + } } } } diff --git a/compiler/src/modules/vm/mod.rs b/compiler/src/modules/vm/mod.rs index aa7a2c2..2f619ce 100644 --- a/compiler/src/modules/vm/mod.rs +++ b/compiler/src/modules/vm/mod.rs @@ -313,6 +313,7 @@ impl<'a> VM<'a> { NativeFnId::IntToBytes, NativeFnId::FrozenSet, NativeFnId::Globals, NativeFnId::Locals, NativeFnId::Super, + NativeFnId::Property, ]; for &id in builtin_fns { if let Ok(v) = vm.heap.alloc(HeapObj::NativeFn(id)) { diff --git a/compiler/src/modules/vm/ops.rs b/compiler/src/modules/vm/ops.rs index f22df93..6c55774 100644 --- a/compiler/src/modules/vm/ops.rs +++ b/compiler/src/modules/vm/ops.rs @@ -77,6 +77,8 @@ impl<'a> VM<'a> { HeapObj::Class(..) => true, HeapObj::BoundUserMethod(..) => true, HeapObj::Super(..) => true, + HeapObj::Property(..) => true, + HeapObj::PropertySetter(..) => true, HeapObj::Instance(..) => true, HeapObj::Coroutine(..) => true, HeapObj::Module(..) => true, @@ -152,6 +154,8 @@ impl<'a> VM<'a> { HeapObj::Class(_name, _, _) => "type", HeapObj::BoundUserMethod(..) => "", HeapObj::Super(..) => "super", + HeapObj::Property(..) => "property", + HeapObj::PropertySetter(..) => "property", HeapObj::Instance(..) => "object", HeapObj::Coroutine(..) => "coroutine", HeapObj::Module(..) => "module", @@ -206,6 +210,8 @@ impl<'a> VM<'a> { } HeapObj::BoundUserMethod(..) => "".into(), HeapObj::Super(..) => "".into(), + HeapObj::Property(..) => "".into(), + HeapObj::PropertySetter(..) => "".into(), HeapObj::Coroutine(..) => "".into(), HeapObj::Module(name, _) => s!(""), HeapObj::Extern(f) => s!(""), diff --git a/compiler/src/modules/vm/types/mod.rs b/compiler/src/modules/vm/types/mod.rs index 29b36f3..8595284 100644 --- a/compiler/src/modules/vm/types/mod.rs +++ b/compiler/src/modules/vm/types/mod.rs @@ -158,6 +158,10 @@ pub enum HeapObj { BoundUserMethod(Val, Val, Val), // `super()` proxy: attribute access walks the bases of `cls` (skipping `cls` itself); methods bind to `recv`. Super(Val, Val), + // `(getter, setter)`; `setter == none()` for getter-only properties — written via `@property` / `@x.setter`. + Property(Val, Val), + // Intermediate produced by `prop.setter`: callable that takes a function and returns a new `Property` with the setter attached. + PropertySetter(Val), Coroutine(usize, Vec, Vec, usize, Vec), /* Produced by `import m`; attr access via LoadAttr, calls fuse through CallMethod. */ Module(String, Vec<(String, Val)>), @@ -180,6 +184,7 @@ pub enum NativeFnId { BytesFromHex, IntFromBytes, IntToBytes, FrozenSet, Globals, Locals, Super, + Property, } impl NativeFnId { @@ -198,6 +203,7 @@ impl NativeFnId { "bytes_fromhex", "int_from_bytes", "int_to_bytes", "frozenset", "globals", "locals", "super", + "property", ]; NAMES[self as usize] } @@ -292,6 +298,8 @@ pub(crate) fn for_each_val(obj: &HeapObj, mut f: impl FnMut(Val)) { } HeapObj::BoundUserMethod(r, fu, cls) => { f(*r); f(*fu); f(*cls); } HeapObj::Super(cls, recv) => { f(*cls); f(*recv); } + HeapObj::Property(g, s) => { f(*g); f(*s); } + HeapObj::PropertySetter(p) => f(*p), HeapObj::Instance(cls, attrs) => { f(*cls); for (k, v) in attrs.borrow().iter() { f(k); f(v); } @@ -534,6 +542,8 @@ impl HeapPool { Some(HeapObj::Ellipsis) => 26, Some(HeapObj::NotImplemented) => 27, Some(HeapObj::Super(..)) => 28, + Some(HeapObj::Property(..)) => 29, + Some(HeapObj::PropertySetter(..)) => 30, None => 0, } } else { 0 } diff --git a/compiler/tests/cases/vm.json b/compiler/tests/cases/vm.json index a46288b..21f0c57 100644 --- a/compiler/tests/cases/vm.json +++ b/compiler/tests/cases/vm.json @@ -1616,5 +1616,11 @@ {"src": "class S:\n def __init__(self, suppress):\n self.suppress = suppress\n def __enter__(self):\n return self\n def __exit__(self, t, v, tb):\n print('exit', self.suppress)\n return self.suppress\nwith S(False):\n try:\n with S(True):\n raise ValueError('x')\n print('inner-after')\n except ValueError:\n print('outer-caught')", "output": ["exit True", "inner-after", "exit False"]}, {"src": "class CM:\n def __enter__(self):\n return self\n def __exit__(self, t, v, tb):\n raise RuntimeError('exit-fail')\ntry:\n with CM():\n pass\nexcept RuntimeError:\n print('caught-exit')", "output": ["caught-exit"]}, {"src": "class Chain:\n def __init__(self, n):\n self.n = n\n def __enter__(self):\n return self\n def __exit__(self, *a):\n print('exit', self.n)\nwith Chain(1), Chain(2):\n print('body')", "output": ["body", "exit 2", "exit 1"]}, - {"src": "class Chain:\n def __init__(self, n, suppress):\n self.n = n\n self.suppress = suppress\n def __enter__(self):\n return self\n def __exit__(self, *a):\n print('exit', self.n)\n return self.suppress\nwith Chain(1, False), Chain(2, True):\n raise ValueError('x')\nprint('after')", "output": ["exit 2", "exit 1", "after"]} + {"src": "class Chain:\n def __init__(self, n, suppress):\n self.n = n\n self.suppress = suppress\n def __enter__(self):\n return self\n def __exit__(self, *a):\n print('exit', self.n)\n return self.suppress\nwith Chain(1, False), Chain(2, True):\n raise ValueError('x')\nprint('after')", "output": ["exit 2", "exit 1", "after"]}, + {"src": "class P:\n def __init__(self, v):\n self._v = v\n @property\n def v(self):\n return self._v\nprint(P(42).v)", "output": ["42"]}, + {"src": "class P:\n def __init__(self):\n self._v = 0\n @property\n def v(self):\n return self._v\n @v.setter\n def v(self, x):\n self._v = x * 2\np = P()\np.v = 7\nprint(p.v)", "output": ["14"]}, + {"src": "class P:\n @property\n def x(self):\n return 1\np = P()\np.x = 5", "output": [], "error": "can't set attribute"}, + {"src": "class Base:\n @property\n def kind(self):\n return 'base'\nclass Sub(Base):\n pass\nprint(Sub().kind)", "output": ["base"]}, + {"src": "class P:\n def __init__(self):\n self.calls = []\n @property\n def hits(self):\n self.calls.append('get')\n return len(self.calls)\np = P()\nprint(p.hits)\nprint(p.hits)\nprint(p.calls)", "output": ["1", "2", "['get', 'get']"]}, + {"src": "class P:\n @property\n def v(self):\n return 'getter-only'\n @v.setter\n def v(self, x):\n self._v = x\np = P()\np.v = 5\nprint(p._v)\nprint(p.v)", "output": ["5", "getter-only"]} ] From a185693ed727b9cfc30d69106c7adbbfddcd72d6 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Mon, 11 May 2026 19:55:03 -0600 Subject: [PATCH 5/7] feat(vm): promote monomorphic instance dunders through the IC. --- compiler/src/modules/vm/builtins/index.rs | 4 +- compiler/src/modules/vm/cache.rs | 63 +++++++++++++++++- compiler/src/modules/vm/dispatch.rs | 80 ++++++++++++++++++++++- compiler/src/modules/vm/gc.rs | 2 + compiler/src/modules/vm/handlers/arith.rs | 40 +++++++++++- compiler/src/modules/vm/handlers/data.rs | 3 +- compiler/tests/cases/vm.json | 7 +- 7 files changed, 189 insertions(+), 10 deletions(-) diff --git a/compiler/src/modules/vm/builtins/index.rs b/compiler/src/modules/vm/builtins/index.rs index af06dae..cff57b9 100644 --- a/compiler/src/modules/vm/builtins/index.rs +++ b/compiler/src/modules/vm/builtins/index.rs @@ -23,12 +23,14 @@ impl SliceSource { impl<'a> VM<'a> { - pub fn get_item(&mut self, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val]) -> Result { + pub fn get_item(&mut self, ip: usize, chunk: &crate::modules::parser::SSAChunk, slots: &mut [Val], cache: &mut crate::modules::vm::cache::OpcodeCache) -> Result { let idx = self.pop()?; let obj = self.pop()?; // F2.4: instance `__getitem__` runs before built-in indexing; slices pass through as a single Slice arg. if let Some(r) = self.try_call_dunder(obj, "__getitem__", &[idx], chunk, slots)? { + // F4: record monomorphic hit so the next iteration skips `resolve_attr_silent`. + self.record_dunder_hit(ip, cache, obj, "__getitem__", 2); self.push(r); return Ok(true); } diff --git a/compiler/src/modules/vm/cache.rs b/compiler/src/modules/vm/cache.rs index 0d944bd..5ca4b96 100644 --- a/compiler/src/modules/vm/cache.rs +++ b/compiler/src/modules/vm/cache.rs @@ -19,11 +19,23 @@ pub enum FastOp { /* Promote to `fast` after this many hits with a stable type key. */ const QUICK_THRESH: u8 = 4; +/* F4: per-site monomorphic instance-dunder cache. Records the receiver's class heap idx and the pre-resolved method Val; once `hits >= QUICK_THRESH` the slot promotes and the hot dispatch skips `resolve_attr_silent` entirely. `arity` is the total operand count consumed from the stack (1 for unary, 2 for binary like `__add__`/`__getitem__`). */ +#[derive(Clone, Copy)] +pub struct InstanceCache { + pub class: u32, + pub method_bits: u64, + pub arity: u8, + hits: u8, + promoted: bool, +} + #[derive(Clone, Default)] struct CacheSlot { type_key: u8, hits: u8, fast: Option, + // F4: instance-dunder cache; orthogonal to `fast`, dispatch checks it after scalar specialisation misses. + inst: Option, } pub struct OpcodeCache { @@ -107,7 +119,10 @@ impl OpcodeCache { s.fast = Self::specialize(opcode, ta, tb); } } else { - *s = CacheSlot { type_key: key, hits: 1, fast: None }; + // Preserve `inst` — its lifecycle is independent of scalar specialisation. + s.type_key = key; + s.hits = 1; + s.fast = None; } } @@ -117,7 +132,51 @@ impl OpcodeCache { } pub fn invalidate(&mut self, ip: usize) { - if let Some(s) = self.slots.get_mut(ip) { *s = CacheSlot::default(); } + // Preserve `inst` so the instance-dunder cache survives a scalar specialisation miss at the same site. + if let Some(s) = self.slots.get_mut(ip) { + s.type_key = 0; + s.hits = 0; + s.fast = None; + } + } + + /* F4: monomorphic instance-dunder hit counter — promotes after `QUICK_THRESH` consecutive hits with the same class + method pair. Polymorphic sites churn (`record_inst` overwrites on mismatch) but never wedge. */ + pub fn record_inst(&mut self, ip: usize, class: u32, method: Val, arity: u8) { + let Some(s) = self.slots.get_mut(ip) else { return }; + match s.inst.as_mut() { + Some(c) if c.class == class && c.method_bits == method.0 && c.arity == arity => { + c.hits = c.hits.saturating_add(1); + if c.hits >= QUICK_THRESH { c.promoted = true; } + } + _ => { + s.inst = Some(InstanceCache { + class, + method_bits: method.0, + arity, + hits: 1, + promoted: false, + }); + } + } + } + + #[inline] + pub fn get_inst(&self, ip: usize) -> Option { + self.slots.get(ip).and_then(|s| s.inst).filter(|c| c.promoted) + } + + pub fn invalidate_inst(&mut self, ip: usize) { + if let Some(s) = self.slots.get_mut(ip) { s.inst = None; } + } + + /* GC root iterator for `InstanceCache` entries: yields the cached method Val and class Val so the collector keeps both alive while the cache holds them. */ + pub fn inst_roots(&self) -> impl Iterator + '_ { + self.slots.iter().filter_map(|s| s.inst).flat_map(|c| { + // SAFETY: `method_bits` was recorded from a live `Val`; class Val is reconstructed from the stored heap idx. + let method = unsafe { Val::from_raw(c.method_bits) }; + let class = Val::heap(c.class); + [method, class].into_iter() + }) } fn specialize(opcode: &OpCode, ta: u8, tb: u8) -> Option { diff --git a/compiler/src/modules/vm/dispatch.rs b/compiler/src/modules/vm/dispatch.rs index f1349cb..adf4c98 100644 --- a/compiler/src/modules/vm/dispatch.rs +++ b/compiler/src/modules/vm/dispatch.rs @@ -5,7 +5,7 @@ use crate::modules::parser::{OpCode, SSAChunk, Instruction, ssa_strip}; use super::{ExceptionFrame, VM, handlers}; use super::types::*; -use super::cache::{OpcodeCache, FastOp}; +use super::cache::{OpcodeCache, FastOp, InstanceCache}; /* Three-way result of a fast-path attempt; see exec_fast for semantics. */ enum FastOutcome { Done, TypeMiss, Overflow } @@ -82,6 +82,56 @@ impl<'a> VM<'a> { Ok(FastOutcome::Done) } + /* F4: instance-dunder fast path. Guards the receiver's class identity, invokes the pre-resolved method bypassing `resolve_attr_silent`, and treats `NotImplemented` as a deopt so reflected dispatch can take over via the slow path. Restores the stack on miss so the slow handler reads its operands unchanged. */ + #[inline] + fn exec_inst(&mut self, inst: InstanceCache, chunk: &SSAChunk, slots: &mut [Val]) -> Result { + let arity = inst.arity as usize; + let len = self.stack.len(); + if len < arity { return Ok(FastOutcome::TypeMiss); } + + let recv_idx = len - arity; + let recv = self.stack[recv_idx]; + if !recv.is_heap() { return Ok(FastOutcome::TypeMiss); } + let class_val = match self.heap.get(recv) { + HeapObj::Instance(c, _) => *c, + _ => return Ok(FastOutcome::TypeMiss), + }; + if class_val.as_heap() != inst.class { return Ok(FastOutcome::TypeMiss); } + + if self.depth >= self.max_calls { return Err(cold_depth()); } + + // Snapshot the operand window before mutating; reused to roll back on deopt. + let mut operands: Vec = Vec::with_capacity(arity); + operands.extend_from_slice(&self.stack[recv_idx..len]); + self.stack.truncate(recv_idx); + + // SAFETY: `method_bits` was recorded from a live `Val` and `Class` references are immutable, so the function still lives on the heap. + let method = unsafe { Val::from_raw(inst.method_bits) }; + self.pending.method_binding = Some((class_val, recv)); + self.push(method); + for &v in &operands { self.push(v); } + self.exec_call(arity as u16, chunk, slots)?; + + let result = self.pop()?; + if self.heap.is_not_implemented(result) { + // Deopt: restore the original stack window so the slow handler sees its operands. + for &v in &operands { self.push(v); } + return Ok(FastOutcome::TypeMiss); + } + self.push(result); + Ok(FastOutcome::Done) + } + + /* Post-success recording for the instance-dunder IC; ignored when the receiver isn't an instance or the method isn't on its class. */ + #[inline] + pub(crate) fn record_dunder_hit(&self, ip: usize, cache: &mut OpcodeCache, recv: Val, name: &str, arity: u8) { + if !recv.is_heap() { return; } + let HeapObj::Instance(cls, _) = self.heap.get(recv) else { return; }; + let cls = *cls; + let Some((method, _)) = self.lookup_class_member(cls, name) else { return; }; + cache.record_inst(ip, cls.as_heap(), method, arity); + } + /* Main dispatch loop. Walks the fused instruction stream (LoadAttr+Call already collapsed to CallMethod+CallMethodArgs); checks the IC inline for hot arith/compare opcodes. */ pub(crate) fn exec(&mut self, chunk: &SSAChunk, slots: &mut [Val]) -> Result { @@ -333,6 +383,14 @@ impl<'a> VM<'a> { FastOutcome::TypeMiss => cache.invalidate(rip), } } + // F4: instance-dunder fast path — orthogonal to scalar specialisation, only fires for monomorphic instance sites. + if let Some(inst) = cache.get_inst(rip) { + match self.exec_inst(inst, chunk, slots)? { + FastOutcome::Done => return Ok(None), + FastOutcome::Overflow => {} + FastOutcome::TypeMiss => cache.invalidate_inst(rip), + } + } if matches!(ins.opcode, OpCode::Eq | OpCode::Lt | OpCode::NotEq | OpCode::Gt | OpCode::LtEq | OpCode::GtEq) { @@ -342,6 +400,14 @@ impl<'a> VM<'a> { } } OpCode::Div | OpCode::Pow | OpCode::Minus => { + // F4: Div/Pow/Minus skip scalar IC (Float-only / overflow-prone) but still benefit from the instance-dunder fast path. + if let Some(inst) = cache.get_inst(rip) { + match self.exec_inst(inst, chunk, slots)? { + FastOutcome::Done => return Ok(None), + FastOutcome::Overflow => {} + FastOutcome::TypeMiss => cache.invalidate_inst(rip), + } + } self.handle_arith(ins.opcode, rip, cache, chunk, slots)?; } @@ -405,7 +471,17 @@ impl<'a> VM<'a> { } // Warm opcodes. - OpCode::GetItem => { self.get_item(chunk, slots)?; } + OpCode::GetItem => { + // F4: `Series[i]`-style hot loop — bypass `resolve_attr_silent("__getitem__")` once the site is monomorphic. + if let Some(inst) = cache.get_inst(rip) { + match self.exec_inst(inst, chunk, slots)? { + FastOutcome::Done => return Ok(None), + FastOutcome::Overflow => {} + FastOutcome::TypeMiss => cache.invalidate_inst(rip), + } + } + self.get_item(rip, chunk, slots, cache)?; + } OpCode::Call | OpCode::CallPrint | OpCode::CallLen | OpCode::CallAbs | OpCode::CallStr | OpCode::CallInt | OpCode::CallFloat | OpCode::CallBool diff --git a/compiler/src/modules/vm/gc.rs b/compiler/src/modules/vm/gc.rs index 5125ee5..cc8bd62 100644 --- a/compiler/src/modules/vm/gc.rs +++ b/compiler/src/modules/vm/gc.rs @@ -29,6 +29,8 @@ impl<'a> VM<'a> { if let Some(consts) = cache.const_vals_opt() { for &v in consts { self.heap.mark(v); } } + // F4: keep the IC's cached class + method Vals alive so a promoted slot can't reference a swept-and-reused slot. + for v in cache.inst_roots() { self.heap.mark(v); } } // SAFETY: each ptr is live for its exec() frame and the Vec's alloc is move-stable. for i in 0..self.active_const_pools.len() { diff --git a/compiler/src/modules/vm/handlers/arith.rs b/compiler/src/modules/vm/handlers/arith.rs index 8f6276c..926335e 100644 --- a/compiler/src/modules/vm/handlers/arith.rs +++ b/compiler/src/modules/vm/handlers/arith.rs @@ -3,18 +3,48 @@ use super::*; use cache::OpcodeCache; use ops::cached_binop; +/* IC: maps an arithmetic opcode to the dunder name it dispatches to. Only the cacheable forward dunder — reflected ops are handled by the slow path's `NotImplemented` deopt. */ +fn binary_dunder_name(op: OpCode) -> Option<&'static str> { + match op { + OpCode::Add => Some("__add__"), + OpCode::Sub => Some("__sub__"), + OpCode::Mul => Some("__mul__"), + OpCode::Div => Some("__truediv__"), + OpCode::FloorDiv => Some("__floordiv__"), + OpCode::Mod => Some("__mod__"), + OpCode::Pow => Some("__pow__"), + _ => None, + } +} + +/* IC: same mapping for comparison opcodes. `Eq`/`NotEq` share `__eq__`; reflected pairs (`Lt`/`Gt`, `LtEq`/`GtEq`) collapse to the forward name. */ +fn compare_dunder_name(op: OpCode) -> Option<&'static str> { + match op { + OpCode::Eq | OpCode::NotEq => Some("__eq__"), + OpCode::Lt => Some("__lt__"), + OpCode::LtEq => Some("__le__"), + OpCode::Gt => Some("__gt__"), + OpCode::GtEq => Some("__ge__"), + _ => None, + } +} + impl<'a> VM<'a> { /* Add/Sub/Mul/Div with IC; Mod/Pow/FloorDiv on i128 with overflow trap; Minus is unary. */ pub(crate) fn handle_arith(&mut self, op: OpCode, rip: usize, cache: &mut OpcodeCache, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { if op == OpCode::Minus { - return self.exec_neg(chunk, slots); + return self.exec_neg(rip, cache, chunk, slots); } let (a, b) = self.pop2()?; // F2.1: instance dunder protocol — try user-defined operator before any builtin coercion. if let Some(r) = self.try_binary_dunder(op, a, b, chunk, slots)? { + // F4: record the resolved class+method so the IC can fire on subsequent iterations of a hot loop. + if let Some(name) = binary_dunder_name(op) { + self.record_dunder_hit(rip, cache, a, name, 2); + } self.push(r); return Ok(()); } @@ -38,10 +68,12 @@ impl<'a> VM<'a> { Ok(()) } - fn exec_neg(&mut self, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { + fn exec_neg(&mut self, rip: usize, cache: &mut OpcodeCache, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { let v = self.pop()?; // F2.1: instance `__neg__` takes precedence over numeric coercion. if let Some(r) = self.try_call_dunder(v, "__neg__", &[], chunk, slots)? { + // F4: monomorphic `-instance` sites promote like binary ops. + self.record_dunder_hit(rip, cache, v, "__neg__", 1); self.push(r); return Ok(()); } @@ -149,6 +181,10 @@ impl<'a> VM<'a> { // F2.2: try the user-defined comparison dunder before falling back to numeric/string compare. if let Some(r) = self.try_compare_dunder(op, a, b, chunk, slots)? { + // F4: monomorphic comparison sites cache the resolved method like arithmetic ones. + if let Some(name) = compare_dunder_name(op) { + self.record_dunder_hit(rip, cache, a, name, 2); + } self.push(Val::bool(r)); return Ok(()); } diff --git a/compiler/src/modules/vm/handlers/data.rs b/compiler/src/modules/vm/handlers/data.rs index 333e2bb..9d5426b 100644 --- a/compiler/src/modules/vm/handlers/data.rs +++ b/compiler/src/modules/vm/handlers/data.rs @@ -42,10 +42,9 @@ impl<'a> VM<'a> { Ok(()) } - /* Indexed access/store, unpacking, and `{value!s:spec}` formatting. */ + /* Indexed access/store, unpacking, and `{value!s:spec}` formatting. `GetItem`/`StoreItem`/`DelItem` are dispatched directly from the hot loop; the arms below cover legacy callers that may route through here. */ pub(crate) fn handle_container(&mut self, op: OpCode, operand: u16, chunk: &SSAChunk, slots: &mut [Val]) -> Result<(), VmErr> { match op { - OpCode::GetItem => { self.get_item(chunk, slots)?; } OpCode::StoreItem => { self.mark_impure(); self.store_item(chunk, slots)?; diff --git a/compiler/tests/cases/vm.json b/compiler/tests/cases/vm.json index 21f0c57..a1478a4 100644 --- a/compiler/tests/cases/vm.json +++ b/compiler/tests/cases/vm.json @@ -1622,5 +1622,10 @@ {"src": "class P:\n @property\n def x(self):\n return 1\np = P()\np.x = 5", "output": [], "error": "can't set attribute"}, {"src": "class Base:\n @property\n def kind(self):\n return 'base'\nclass Sub(Base):\n pass\nprint(Sub().kind)", "output": ["base"]}, {"src": "class P:\n def __init__(self):\n self.calls = []\n @property\n def hits(self):\n self.calls.append('get')\n return len(self.calls)\np = P()\nprint(p.hits)\nprint(p.hits)\nprint(p.calls)", "output": ["1", "2", "['get', 'get']"]}, - {"src": "class P:\n @property\n def v(self):\n return 'getter-only'\n @v.setter\n def v(self, x):\n self._v = x\np = P()\np.v = 5\nprint(p._v)\nprint(p.v)", "output": ["5", "getter-only"]} + {"src": "class P:\n @property\n def v(self):\n return 'getter-only'\n @v.setter\n def v(self, x):\n self._v = x\np = P()\np.v = 5\nprint(p._v)\nprint(p.v)", "output": ["5", "getter-only"]}, + {"src": "class S:\n def __init__(self, data):\n self.data = data\n def __getitem__(self, i):\n return self.data[i]\ns = S([10, 20, 30, 40, 50])\ntotal = 0\nfor i in range(5):\n total = total + s[i]\nprint(total)", "output": ["150"]}, + {"src": "class V:\n def __init__(self, n):\n self.n = n\n def __add__(self, o):\n if isinstance(o, V):\n return V(self.n + o.n)\n return NotImplemented\n def __radd__(self, o):\n return V(o + self.n)\ntotal = V(0)\nfor i in range(5):\n total = total + V(i)\nprint(total.n)\nprint((10 + V(5)).n)", "output": ["10", "15"]}, + {"src": "class A:\n def __eq__(self, o):\n return isinstance(o, A)\nclass B:\n def __eq__(self, o):\n return False\nhits = 0\nfor i in range(10):\n if A() == A(): hits = hits + 1\n if B() == A(): hits = hits + 100\nprint(hits)", "output": ["10"]}, + {"src": "class C:\n def __init__(self, n):\n self.n = n\n def __getitem__(self, i):\n return self.n + i\n def __setitem__(self, i, v):\n self.n = v\n def __neg__(self):\n return C(-self.n)\nc = C(10)\nresults = []\nfor i in range(3):\n results.append(c[i])\nc[0] = 99\nresults.append(c[0])\nresults.append((-c).n)\nprint(results)", "output": ["[10, 11, 12, 99, -99]"]}, + {"src": "class S:\n def __init__(self):\n self.data = list(range(20))\n def __getitem__(self, i):\n garbage = [j * j for j in range(50)]\n return self.data[i] + garbage[0]\ns = S()\nacc = 0\nfor i in range(20):\n acc = acc + s[i]\nprint(acc)", "output": ["190"]} ] From c31ef557340516dceed2c2186a8a15fd0f1f74f1 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Mon, 11 May 2026 20:20:06 -0600 Subject: [PATCH 6/7] test(vm): Cover protocol matrix dispatch ordering rules. --- compiler/tests/cases/vm.json | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/compiler/tests/cases/vm.json b/compiler/tests/cases/vm.json index a1478a4..810241b 100644 --- a/compiler/tests/cases/vm.json +++ b/compiler/tests/cases/vm.json @@ -1627,5 +1627,39 @@ {"src": "class V:\n def __init__(self, n):\n self.n = n\n def __add__(self, o):\n if isinstance(o, V):\n return V(self.n + o.n)\n return NotImplemented\n def __radd__(self, o):\n return V(o + self.n)\ntotal = V(0)\nfor i in range(5):\n total = total + V(i)\nprint(total.n)\nprint((10 + V(5)).n)", "output": ["10", "15"]}, {"src": "class A:\n def __eq__(self, o):\n return isinstance(o, A)\nclass B:\n def __eq__(self, o):\n return False\nhits = 0\nfor i in range(10):\n if A() == A(): hits = hits + 1\n if B() == A(): hits = hits + 100\nprint(hits)", "output": ["10"]}, {"src": "class C:\n def __init__(self, n):\n self.n = n\n def __getitem__(self, i):\n return self.n + i\n def __setitem__(self, i, v):\n self.n = v\n def __neg__(self):\n return C(-self.n)\nc = C(10)\nresults = []\nfor i in range(3):\n results.append(c[i])\nc[0] = 99\nresults.append(c[0])\nresults.append((-c).n)\nprint(results)", "output": ["[10, 11, 12, 99, -99]"]}, - {"src": "class S:\n def __init__(self):\n self.data = list(range(20))\n def __getitem__(self, i):\n garbage = [j * j for j in range(50)]\n return self.data[i] + garbage[0]\ns = S()\nacc = 0\nfor i in range(20):\n acc = acc + s[i]\nprint(acc)", "output": ["190"]} + {"src": "class S:\n def __init__(self):\n self.data = list(range(20))\n def __getitem__(self, i):\n garbage = [j * j for j in range(50)]\n return self.data[i] + garbage[0]\ns = S()\nacc = 0\nfor i in range(20):\n acc = acc + s[i]\nprint(acc)", "output": ["190"]}, + {"src": "class Base:\n def __add__(self, o):\n return 'Base.add'\n def __radd__(self, o):\n return 'Base.radd'\nclass Sub(Base):\n def __radd__(self, o):\n return 'Sub.radd'\nprint(Base() + Sub())", "output": ["Sub.radd"]}, + {"src": "class Base:\n def __add__(self, o):\n return 'Base.add'\nclass Sub(Base):\n pass\nprint(Sub() + Base())", "output": ["Base.add"]}, + {"src": "class A:\n def __add__(self, o):\n return 'A.add'\nclass B:\n def __radd__(self, o):\n return 'B.radd'\nprint(A() + B())", "output": ["A.add"]}, + {"src": "class A:\n def __add__(self, o):\n return NotImplemented\nclass B:\n def __radd__(self, o):\n return 'B.radd'\nprint(A() + B())", "output": ["B.radd"]}, + {"src": "class A:\n def __add__(self, o):\n return NotImplemented\nclass B:\n def __radd__(self, o):\n return NotImplemented\nA() + B()", "output": [], "error": "unsupported operand"}, + {"src": "class V:\n def __init__(self, n):\n self.n = n\n def __lt__(self, o):\n return self.n < o.n\nprint(V(1) < V(2))\nprint(V(5) > V(2))", "output": ["True", "True"]}, + {"src": "class A:\n def __lt__(self, o):\n return NotImplemented\nclass B:\n def __gt__(self, o):\n return True\nprint(A() < B())", "output": ["True"]}, + {"src": "class V:\n def __init__(self, n):\n self.n = n\n def __eq__(self, o):\n return self.n == o.n\nprint(V(3) != V(4))\nprint(V(3) != V(3))", "output": ["True", "False"]}, + {"src": "class Suppress:\n def __enter__(self):\n pass\n def __exit__(self, *a):\n return True\nwith Suppress():\n raise ValueError('x')\nprint('after')", "output": ["after"]}, + {"src": "class Pass:\n def __enter__(self):\n pass\n def __exit__(self, *a):\n return False\ntry:\n with Pass():\n raise ValueError('x')\nexcept ValueError:\n print('caught')", "output": ["caught"]}, + {"src": "class Replace:\n def __enter__(self):\n pass\n def __exit__(self, *a):\n raise RuntimeError('replaced')\ntry:\n with Replace():\n raise ValueError('original')\nexcept RuntimeError:\n print('got-replaced')\nexcept ValueError:\n print('got-original')", "output": ["got-replaced"]}, + {"src": "class Inner:\n def __enter__(self):\n pass\n def __exit__(self, *a):\n print('inner-exit')\n return True\nclass Outer:\n def __enter__(self):\n pass\n def __exit__(self, *a):\n print('outer-exit', a[0])\nwith Outer():\n with Inner():\n raise ValueError('x')\nprint('after')", "output": ["inner-exit", "outer-exit None", "after"]}, + {"src": "class Pass:\n def __enter__(self):\n pass\n def __exit__(self, *a):\n print('exit', a[0])\nwith Pass():\n try:\n raise ValueError('x')\n except ValueError:\n print('caught')\nprint('after')", "output": ["caught", "exit None", "after"]}, + {"src": "class A:\n def __getitem__(self, i):\n return 'A:' + str(i)\nclass B:\n def __getitem__(self, i):\n return 'B:' + str(i)\nout = []\nfor i in range(6):\n if i < 3:\n out.append(A()[i])\n else:\n out.append(B()[i])\nprint(out)", "output": ["['A:0', 'A:1', 'A:2', 'B:3', 'B:4', 'B:5']"]}, + {"src": "class C:\n def __init__(self, k):\n self.k = k\n def __add__(self, o):\n if isinstance(o, int):\n return self.k + o\n return NotImplemented\nacc = 0\nfor i in range(20):\n acc = acc + C(i).__add__(1)\nprint(acc)", "output": ["210"]}, + {"src": "class S:\n def __init__(self, base):\n self.base = base\n def __getitem__(self, i):\n leak = [j for j in range(64)]\n return self.base + i + leak[0]\ns = S(100)\nresults = []\nfor i in range(30):\n results.append(s[i])\nprint(sum(results))", "output": ["3435"]}, + {"src": "class P:\n @property\n def v(self):\n return 7\np = P()\np.v = 5", "output": [], "error": "can't set attribute"}, + {"src": "class Base:\n @property\n def v(self):\n return 'base'\nclass Sub(Base):\n pass\nprint(Sub().v)", "output": ["base"]}, + {"src": "class P:\n def __init__(self):\n self._v = 0\n @property\n def v(self):\n return self._v\n @v.setter\n def v(self, x):\n self._v = x + 1\np = P()\np.v = 9\nprint(p.v)", "output": ["10"]}, + {"src": "class A:\n def label(self):\n return 'A'\nclass B(A):\n def label(self):\n return super().label() + 'B'\nclass C(B):\n def label(self):\n return super().label() + 'C'\nprint(C().label())", "output": ["ABC"]}, + {"src": "class A:\n pass\nclass B(A):\n pass\nclass C(B):\n pass\nprint(isinstance(C(), A))\nprint(isinstance(A(), C))", "output": ["True", "False"]}, + {"src": "class P:\n def __init__(self):\n self.real = 'set'\n def __getattr__(self, name):\n return 'fallback:' + name\np = P()\nprint(p.real)\nprint(p.missing)", "output": ["set", "fallback:missing"]}, + {"src": "class Fail:\n def __getattr__(self, name):\n raise AttributeError('nope')\nFail().anything", "output": [], "error": "AttributeError"}, + {"src": "class B:\n def __bool__(self):\n return False\n def __len__(self):\n return 99\nprint(bool(B()))", "output": ["False"]}, + {"src": "class L:\n def __len__(self):\n return 0\nprint(bool(L()))", "output": ["False"]}, + {"src": "class N:\n pass\nprint(bool(N()))", "output": ["True"]}, + {"src": "class Range3:\n def __init__(self):\n self.i = 0\n def __iter__(self):\n return self\n def __next__(self):\n if self.i >= 3:\n raise StopIteration\n self.i = self.i + 1\n return self.i\nout = []\nfor a in Range3():\n for b in Range3():\n out.append((a, b))\nprint(out)", "output": ["[(1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3), (3, 1), (3, 2), (3, 3)]"]}, + {"src": "class K:\n def __init__(self, n):\n self.n = n\n def __hash__(self):\n return self.n * 31\nk = K(3)\nd = {k: 'v'}\nprint(d[k])\nprint(hash(k))", "output": ["v", "93"]}, + {"src": "class Bad:\n def __eq__(self, o):\n return True\n{Bad(): 1}", "output": [], "error": "unhashable"}, + {"src": "class D:\n def __call__(self, x, y):\n return x * y\nprint(D()(6, 7))\nprint(callable(D()))", "output": ["42", "True"]}, + {"src": "class NotCallable:\n pass\nNotCallable()(1)", "output": [], "error": "not callable"}, + {"src": "class P:\n def __repr__(self):\n return 'r'\nprint(P())\nprint(repr(P()))", "output": ["r", "r"]}, + {"src": "class P:\n def __str__(self):\n return 's'\n def __repr__(self):\n return 'r'\nprint(str(P()))\nprint(repr(P()))\nprint(f'{P()}')", "output": ["s", "r", "s"]}, + {"src": "class P:\n def __format__(self, spec):\n return 'fmt[' + spec + ']'\nprint(f'{P():>10}')\nprint(format(P(), 'x'))", "output": ["fmt[>10]", "fmt[x]"]} ] From eb41b6a9cc5c6e4ea7a5fe62fcc7f143464175dd Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Mon, 11 May 2026 20:32:38 -0600 Subject: [PATCH 7/7] docs: Align reference with inheritance, dunders, and property support. --- README.md | 2 +- compiler/README.md | 5 +- documentation/docs.json | 1 + documentation/getting-started/quickstart.md | 2 +- documentation/getting-started/what-it-is.md | 8 +- documentation/implementation/design.md | 5 +- documentation/language/async.md | 2 +- documentation/language/classes.md | 92 ++++++- documentation/language/control-flow.md | 2 +- documentation/language/dunders.md | 258 +++++++++++++++++++ documentation/reference/builtins.md | 54 +++- documentation/reference/limits-and-errors.md | 2 +- 12 files changed, 409 insertions(+), 24 deletions(-) create mode 100644 documentation/language/dunders.md diff --git a/README.md b/README.md index 44603c7..e0503b1 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ There is no built-in CLI binary. If you need one for local development, embed `c ## What it is -Edge Python targets functional edge computing: first-class functions, lambdas, closures, decorators (including class decorators), generators, async/await with a built-in cooperative scheduler, comprehensions, structural pattern matching, and pure-function memoization. Classes exist as flat state containers with `__init__`, attributes, and methods — no inheritance, no `super()`, no descriptor protocol, and no dunder-method dispatch (operators and protocols dispatch on type tag, not user-class methods). Integers are 47-bit inline (overflow raises `OverflowError`); there is no bignum. +Edge Python targets functional edge computing: first-class functions, lambdas, closures, decorators (including class decorators), generators, async/await with a built-in cooperative scheduler, comprehensions, structural pattern matching, and pure-function memoization. Classes support single-level inheritance, `super()`, dunder-method dispatch (operators, indexing, iteration, context managers, etc.), and `@property` / `@x.setter`. Integers are 47-bit inline with automatic promotion to i128 LongInt on overflow; the hard cap is ±2^127. Imports resolve at compile time through a host-injected resolver. Bare names walk up `packages.json` manifests; quoted specs (`"./util.py"`, `"https://..."`) are loaded verbatim and may carry a `#sha256-` integrity fragment. `.py` modules are compiled and run once; native modules dispatch via the `CallExtern` opcode (either a `.wasm` loaded by URL per the public ABI, or in-process Rust closures from the embedder). There is no bundled stdlib — modules are external artifacts. diff --git a/compiler/README.md b/compiler/README.md index 5b8b350..c2ce55a 100644 --- a/compiler/README.md +++ b/compiler/README.md @@ -9,7 +9,7 @@ A compact, single-pass SSA-style bytecode compiler and stack VM for a functional ## 1. Paradigm -Edge Python targets functional edge computing. The language treats functions as first-class values: lambdas, higher-order functions, closures, comprehensions, decorators (including class decorators), generators, async/await, pattern matching, and pure-function memoization. Classes exist as flat state containers with `__init__`, instance attributes, and methods — no inheritance walking, no MRO, no `super()`, no descriptor protocol, and no dunder-method dispatch (operators, `with`, iteration, `len`, equality, etc. all dispatch on type tag, not on user-class methods). `__init__` is the only honoured magic method. +Edge Python targets functional edge computing. The language treats functions as first-class values: lambdas, higher-order functions, closures, comprehensions, decorators (including class decorators), generators, async/await, pattern matching, and pure-function memoization. Classes support single-level inheritance, `super()`, dunder protocol dispatch (operators, indexing, iteration, context managers, hashing, etc.), and `@property` / `@x.setter`. `import` and `from import names` resolve at compile time through a host-injected resolver (see `modules/packages/`, manifest = `packages.json`). Each module is compiled and initialised once: the parser registers it in the importing chunk's `imports` list, the VM runs every imported module's top level in dependency order, and importers reach the resulting `HeapObj::Module` value via `OpCode::LoadModule`. Native modules dispatch via `CallExtern` for fast call-site fusion. Quoted specs may carry a `#sha256-` integrity fragment. @@ -25,7 +25,7 @@ What this leaves is a small, fast, deterministic core: 47-bit inline integers + * **VM**: Stack-based interpreter over `Vec`, where each `Instruction` is `(opcode: OpCode, operand: u16)`. The hot loop lives in `modules/vm/dispatch.rs` as a flat `match` on the opcode (Rust lowers it to a jump table); the VM struct and constructor live in `modules/vm/mod.rs`, with `init.rs` / `helpers.rs` / `gc.rs` covering module init, stack/iter primitives, and the collector. The hot path is split across handler modules (`handlers/{arith,data,format,function,methods,methods_helpers,mod}.rs`). `LoadAttr + Call(0)` is fused into a `CallMethod` / `CallMethodArgs` super-instruction at first execution and cached per call site. * **Inline Caching**: Per-instruction type-recording cache (`modules/vm/cache.rs`) for arithmetic and comparisons. After 4 stable hits the IC promotes the slot to a typed `FastOp` (`AddInt`, `AddFloat`, `LtFloat`, `EqStr`, ...); the fast path keeps a type-tag guard so a miss falls back to the generic handler. * **Template Memoization**: Pure functions called with the same arguments return a cached result after 2 hits, bypassing full execution. Functions are tagged impure on first observed side effect (`StoreItem`, `StoreAttr`, `print`, `input`, `raise`, `yield`). -* **Memory**: NaN-boxed 64-bit `Val` (47-bit signed inline int, IEEE-754 float, bool, None, 28-bit heap index). Heap is an arena of `HeapObj` slots managed by a mark-and-sweep GC. Strings and bytes ≤ 128 bytes are interned. **Integers are a hard 47 bits** (±140,737,488,355,327); overflow raises `OverflowError`. There is no bignum fallback — this is paradigm-level, not a TODO. +* **Memory**: NaN-boxed 64-bit `Val` (47-bit signed inline int, IEEE-754 float, bool, None, 28-bit heap index). Heap is an arena of `HeapObj` slots managed by a mark-and-sweep GC. Strings and bytes ≤ 128 bytes are interned. **Integers are 47-bit inline with automatic i128 (`LongInt`) promotion on overflow**, hard-capped at ±2^127. --- @@ -141,6 +141,7 @@ Mark-and-sweep with roots: operand stack, with-stack, pending yields, event queu │ ├── mod.rs │ ├── arith.rs │ ├── data.rs +│ ├── dunder.rs │ ├── format.rs │ ├── function.rs │ ├── methods.rs diff --git a/documentation/docs.json b/documentation/docs.json index 5e7a585..bb54216 100644 --- a/documentation/docs.json +++ b/documentation/docs.json @@ -36,6 +36,7 @@ "language/control-flow", "language/functions", "language/classes", + "language/dunders", "language/async" ] }, diff --git a/documentation/getting-started/quickstart.md b/documentation/getting-started/quickstart.md index 719db28..4306d83 100644 --- a/documentation/getting-started/quickstart.md +++ b/documentation/getting-started/quickstart.md @@ -46,7 +46,7 @@ Hello, python! ## Language overview -Edge Python is a functional subset of Python 3.13. Functions are first-class values; lambdas, currying, higher-order functions, and comprehensions are central. Classes exist as flat state containers — no inheritance, no `super()`, no dunder dispatch. +Edge Python is a functional subset of Python 3.13. Functions are first-class values; lambdas, currying, higher-order functions, and comprehensions are central. Classes support single-level inheritance with `super()` and dunder dispatch for operators, indexing, iteration, and context managers. ```python # First-class functions diff --git a/documentation/getting-started/what-it-is.md b/documentation/getting-started/what-it-is.md index 514535e..1804d88 100644 --- a/documentation/getting-started/what-it-is.md +++ b/documentation/getting-started/what-it-is.md @@ -17,7 +17,8 @@ The language reads like Python because it parses Python's syntax. It runs differ - **Comprehensions**: list, dict, and set, with multiple `for` clauses and `if` guards. - **Pattern matching**: `match` / `case` with literals, captures, OR-patterns, guards, and sequence patterns (one star permitted). - **Exceptions**: `try` / `except` / `else` / `finally`, named handlers, `raise X from Y` (chain info discarded but `X` is what propagates), and subclass-aware matching (`except Exception` catches `RuntimeError`). -- **Context managers**: `with` blocks, single and multi-target — they save and restore VM state but do not invoke `__enter__` / `__exit__`. Use `try` / `finally` for resource cleanup. +- **Context managers**: `with` and `async with` invoke `__enter__` / `__exit__` on the context-manager value; a truthy return from `__exit__` suppresses the raised exception, matching CPython. +- **Protocol dunders**: operator overloading, indexing, iteration, hashing, and `repr` / `str` / `format` dispatch through user-defined dunders — see [Dunders](/language/dunders) for the full matrix. - **Numbers**: 47-bit signed integers inline (NaN-boxed, no allocation) with automatic promotion to a 128-bit `LongInt` heap slot when arithmetic overflows. Range up to `±2^127`; beyond that raises `OverflowError`. Full IEEE-754 floats. No `complex`, `Decimal`, `Fraction`, or arbitrary precision beyond 128 bits. - **Sequences**: lists, tuples, dicts (insertion-ordered), sets, frozensets, ranges, strings (UTF-8, codepoint-indexed), and bytes. - **f-strings**: full grammar — embedded expressions, `{expr=}` self-doc, `!r` / `!s` / `!a` conversions, and format specs covering `s d b o x X f F e E g G n % c` plus fill / align / sign / `#` / `0` / width / `,` / precision. @@ -30,18 +31,17 @@ The language reads like Python because it parses Python's syntax. It runs differ These parse for syntactic compatibility but raise at runtime, or simply don't exist: -- **Inheritance and protocol dispatch**: classes carry `__init__`, attributes, and methods, but there is no base-class chain, no `super()`, no method resolution order, and no dunder dispatch (`__add__`, `__eq__`, `__iter__`, `__enter__`, `__getitem__`, etc. are never consulted on user instances). Operators dispatch on the built-in type tag, not on user classes. - **Standard library**: there is no bundled stdlib. Modules are external — `.py` files distributed via URL or filesystem, `.wasm` binaries published per the public [WASM ABI](/reference/wasm-abi), or in-process Rust bindings provided by the embedder. See [Imports](/reference/imports) and [Writing modules](/reference/writing-modules). - **I/O**: `input()` reads from a host-provided buffer. There is no file system, no network, no `os`, no `sys` — *unless* the host registers a native module that provides those capabilities. - **Async surface**: `async def` creates real coroutines and the VM runs a cooperative scheduler, but there is no `asyncio` module — primitives are top-level builtins (`run`, `sleep`, `gather`, `with_timeout`, `cancel`, `receive`). Coroutines do not expose `.send()` / `.throw()` / `.close()`. -- **Metaclasses, descriptors, properties, `__slots__`**: not modeled. +- **Metaclasses, descriptor protocol, `__slots__`**: not modeled. - **Dynamic code**: no `exec`, no `eval`, no `compile`, no `__import__` (use the `import_module(name)` builtin to look up an already-imported module by alias). - **Reflection beyond `type`, `id`, `hash`, `repr`, `callable`, `getattr`, `hasattr`, `vars`, `globals`, `locals`, `isinstance`**. `type(x)` returns a string like `""`, not a type object. `issubclass` and `dir` are absent. - **Relative imports**: `from . import x` is not supported; use the resolver-aware `import` / `from import` forms. ## Design philosophy -Edge Python is **functional-first**. Classes exist as basic state containers, not as the primary abstraction. Inheritance, descriptor protocols, `super()`, `__slots__`, and dunder method dispatch are intentionally omitted to keep the VM small and fast — behaviour reuse goes through function composition, not method overriding. +Edge Python is **functional-first**. Classes exist as basic state containers, not as the primary abstraction. Single-level inheritance, `super()`, and dunder dispatch are supported where they pay their way, but behaviour reuse is expected to go through function composition rather than deep class hierarchies. A functional core gives Edge Python: diff --git a/documentation/implementation/design.md b/documentation/implementation/design.md index 9cfef3b..48e1477 100644 --- a/documentation/implementation/design.md +++ b/documentation/implementation/design.md @@ -9,7 +9,7 @@ Edge Python is a compact bytecode compiler and stack VM for a functional-first s There is no AST and no IR: bytecode is the only intermediate representation between source and execution. The whole compiler is roughly 13,800 lines of Rust; production dependencies are `hashbrown` and `itoa` (SHA-256 is implemented in-tree). The WASM build adds `lol_alloc` for a single-threaded leaking bump allocator. -Classes are state containers, not the primary abstraction. Inheritance, descriptor protocols, `super()`, `__slots__`, and dunder dispatch (other than `__init__`) are intentionally omitted to keep the VM small and the dispatch loop fast. +Classes support single-level inheritance, `super()`, full dunder dispatch, and `@property` / `@x.setter`. The paradigm remains functional-first: behaviour reuse via composition is still preferred, and the VM optimises the monomorphic case via inline caching on instance dunders. ## Concepts @@ -73,7 +73,6 @@ The heap is a `Vec` arena with a free list (capped at 524,288 slots an - No IR — there is exactly one representation between source and dispatch. - No JIT. Edge Python stays single-tier and pure Rust. Method JITs need per-architecture stencils; trace JITs duplicate the execution model and complicate the GC contract. - No runtime module system. `import` and `from ... import` resolve at parse time through a host-injected `Resolver`; the VM never learns what a module is. See [Imports](/reference/imports). -- No dunder dispatch (other than `__init__`). Operators dispatch on the value's type tag, not on user-class methods. `__add__`, `__eq__`, `__iter__`, `__enter__`, `__getitem__`, etc. on user classes are never consulted; behaviour reuse is via free functions, not method overriding. `super()` is not registered as a builtin and there is no MRO machinery. - No bigints, complex numbers, `bytearray`, `memoryview`, `Decimal`, or `Fraction`. No generator `send` / `throw` / `close`. No `asyncio` module — `run`, `sleep`, `gather`, `with_timeout`, `cancel`, `receive` are top-level builtins. ## Architecture @@ -163,7 +162,7 @@ compiler/src/ `async def` and `yield`-bearing `def` both produce a `HeapObj::Coroutine` (one variant covers both). `run()` drives the cooperative scheduler with `sleep()`, `gather()`, `with_timeout()`, `cancel()`, and `receive()` as top-level builtins. There is no `asyncio` module. -`with` is a stack-save scope: `SetupWith` and `ExitWith` save and restore VM state, but they do **not** invoke `__enter__` or `__exit__` on the context-manager value (same for `async with`). For deterministic resource cleanup, use explicit `try` / `finally`. +`with` invokes `__enter__` on entry and `__exit__(exc_type, exc_val, traceback)` on exit, supporting suppression via a truthy `__exit__` return. `async with` still uses the sync `__enter__` / `__exit__` (no `__aenter__` / `__aexit__` dispatch). ## References diff --git a/documentation/language/async.md b/documentation/language/async.md index 630c8db..4286427 100644 --- a/documentation/language/async.md +++ b/documentation/language/async.md @@ -169,7 +169,7 @@ Both are in the built-in exception namespace and match `except` clauses normally - **Cancellation is silent.** `cancel(coro)` stops the coroutine; the body does not see `CancelledError`. Use `with_timeout` if you need deadline semantics that propagate as exceptions. - **No host event loop integration.** Edge Python's scheduler is in-process. Real I/O concurrency requires the host to expose async-shaped externs (e.g. `await fetch_json(url)` where `fetch_json` is a host function that queues a callback and yields). - **`async for`** works against any iterable accepted by `for`, *plus* coroutines and async generators (functions defined with both `async def` and `yield`). Each iteration resumes the coroutine to its next yield. There is no `__aiter__` / `__anext__` dispatch on user-defined classes — define an `async def` generator instead. Behavior over plain lists/tuples/dicts is identical to a regular `for`. -- **`async with`** is a stack-save scope just like `with`. The runtime does not call `__aenter__` / `__aexit__` (no dunder dispatch on either form). Use `try` / `finally` and explicit `await` calls for setup/teardown. +- **`async with`** reuses the sync `with` dispatch path, invoking `__enter__` / `__exit__` on the context manager. `__aenter__` / `__aexit__` are not consulted (the async dunder forms are not dispatched). For async setup/teardown that needs `await`, use `try` / `finally` with explicit `await` calls. - **No async comprehensions.** `[x async for x in it]` is not supported. - **No `gen.send` / `.throw` / `.close`.** Generators and coroutines are one-way producers. For bidirectional flow, structure the work with `run` / `gather` and pass messages through call arguments. - **`receive()` is unbounded.** Without an external producer pushing into the event queue, `receive()` yields `None` forever. Pair with `with_timeout` if you need a deadline. diff --git a/documentation/language/classes.md b/documentation/language/classes.md index 6afb521..c6d08e3 100644 --- a/documentation/language/classes.md +++ b/documentation/language/classes.md @@ -5,7 +5,7 @@ description: "User-defined classes as state machines and library namespaces." Edge Python is **functional-first**: classes are state containers and library namespaces, not the primary abstraction. They cover two patterns: **state machines** (a small number of methods that mutate the receiver) and **namespaces** (a bundle of related functions and constants). -By design, the class system omits inheritance chains, `super()`, MRO walking, descriptor protocols, properties, metaclasses, slots, and dunder dispatch. The only dunder the VM consults on user instances is `__init__`. This keeps the object model in ~300 LOC of VM code; programs that need a richer object system are a poor fit for the target. +Classes support single-level inheritance with `super()`, `@property` / `@x.setter` for managed attributes, and the full dunder protocol for operators, indexing, iteration, context managers, and the rest (see [Dunder methods](/language/dunders) for the matrix). Multi-base inheritance with C3 MRO, descriptors, metaclasses, and `__slots__` remain out of scope. ## State-machine pattern @@ -59,6 +59,38 @@ print(Math.cube(3)) 27 ``` +## Inheritance and super() + +A class can declare a single base with `class Sub(Base):`. Methods not defined on the subclass are looked up on the base, and the lookup walks the chain linearly — there is no C3 MRO. `isinstance(x, Base)` walks the same chain, so an instance of `Sub` is also an instance of every ancestor. + +`super()` in zero-argument form delegates to the next class up the chain, bound to the current `self`. It is most commonly used in `__init__` to extend a base constructor. + +```python +class Animal: + def __init__(self, name): + self.name = name + def describe(self): + return self.name + +class Dog(Animal): + def __init__(self, name, breed): + super().__init__(name) + self.breed = breed + def describe(self): + return super().describe() + " (" + self.breed + ")" + +d = Dog("Rex", "lab") +print(d.describe()) +print(isinstance(d, Animal)) +``` + +```text Output +Rex (lab) +True +``` + +A multi-base declaration `class C(A, B):` is parsed and both bases are stored, but resolution is a simple left-to-right depth-first walk, not Python's C3 MRO. Prefer single inheritance. + ## Attribute access on classes vs instances | Access form | Resolves to | @@ -93,12 +125,58 @@ tagged 7 ``` +## Properties + +`@property` turns a method into a read-only attribute. `@x.setter` (built via the `property.setter` factory on the existing property) makes the same attribute writable. Properties are looked up on the class, so subclasses inherit them and may override either side. + +```python +class Temp: + def __init__(self, c): + self._c = c + @property + def celsius(self): + return self._c + @celsius.setter + def celsius(self, value): + self._c = value + @property + def fahrenheit(self): + return self._c * 9 / 5 + 32 + +t = Temp(20) +print(t.celsius) +print(t.fahrenheit) +t.celsius = 100 +print(t.fahrenheit) +``` + +```text Output +20 +68.0 +212.0 +``` + +The two-argument form `property(fget, fset)` is also available for building properties without decorator syntax. + +## Operator overloading and protocols + +Operators, indexing, iteration, context managers, hashing, and `repr` / `str` / `format` all dispatch through dunder methods defined on the class. Define `__add__` for `+`, `__eq__` for `==`, `__getitem__` for `x[i]`, `__iter__` / `__next__` for `for`, `__enter__` / `__exit__` for `with`, and so on. + +```python +class Vec: + def __init__(self, x, y): + self.x, self.y = x, y + def __add__(self, other): + return Vec(self.x + other.x, self.y + other.y) +``` + +See [Dunder methods](/language/dunders) for the full matrix. + ## What is *not* supported -- `class Sub(Super):` — parsed but the base list has no MRO; methods are not inherited from a base class. There is no `super()`, no method resolution order, and no inheritance chain. Reuse comes from composition (hold a field of another class) or free functions. -- `__eq__`, `__hash__`, `__repr__`, `__add__`, `__getitem__`, `__iter__`, `__len__`, `__call__`, `__bool__`, ... — none of these dunders are dispatched. Operators and built-ins resolve on the type tag, not on user-class methods. `==` on instances compares by identity. -- `__enter__` / `__exit__` and `__aenter__` / `__aexit__` — `with` and `async with` are stack-save scopes; the runtime does **not** invoke entry or exit hooks. Use `try` / `finally` for resource cleanup. -- `@property`, `@staticmethod`, `@classmethod` — the namespace pattern above replaces `@staticmethod`. The other two have no equivalent. -- Slots, descriptors, metaclasses, abstract base classes, `__slots__`, `__init_subclass__`. +- Multi-base inheritance with proper C3 MRO. `class C(A, B):` is parsed and both bases are stored, but resolution is a linear depth-first walk, not Python's C3 algorithm. Prefer single inheritance. +- Metaclasses, descriptors (`__get__` / `__set__`), `__slots__`, abstract base classes, `__init_subclass__`. +- `@staticmethod` and `@classmethod`. Use the namespace pattern above or free functions instead. +- Async dunders: `__aenter__` / `__aexit__` / `__aiter__` / `__anext__`. `async with` and `async for` do not dispatch these hooks. -When you need behaviour reuse, write a free function that takes the value rather than a method on the class. That keeps dispatch fast (one ALU instruction per op rather than a dunder lookup) and aligns with the functional-first identity. +Behaviour reuse via free functions and composition is still the preferred default — it keeps dispatch fast and aligns with the functional-first identity. Reach for operator overloading and inheritance when the abstraction genuinely calls for them. diff --git a/documentation/language/control-flow.md b/documentation/language/control-flow.md index 92182ee..ce529b5 100644 --- a/documentation/language/control-flow.md +++ b/documentation/language/control-flow.md @@ -299,7 +299,7 @@ These are pre-bound type names you can match against, with their parent links sh ## with -`with` is a stack-save scope: the expression value is bound to the `as` name and the body runs. The runtime does **not** call `__enter__` or `__exit__` — there is no protocol invocation. Use `try` / `finally` when you need real teardown semantics. +`with` drives the context-manager protocol: the expression is evaluated, its `__enter__` is called, and the return value is bound to the `as` name before the body runs. On exit, `__exit__(exc_type, exc_value, traceback)` is invoked — with `(None, None, None)` on normal completion, or the live exception info if the body raised. A truthy return from `__exit__` suppresses the exception; otherwise it propagates. See [`/language/dunders`](/language/dunders) for the full protocol. ```python x = [1, 2] diff --git a/documentation/language/dunders.md b/documentation/language/dunders.md new file mode 100644 index 0000000..f0af4cc --- /dev/null +++ b/documentation/language/dunders.md @@ -0,0 +1,258 @@ +--- +title: "Dunder methods" +description: "Protocol methods Edge Python invokes on user classes — operators, indexing, iteration, hashing, context managers, attribute fallback." +--- + +Dunders ("double-underscore" methods like `__add__`, `__eq__`, `__getitem__`) are how a class plugs into the language's protocols. Define them on the class body and the VM calls them when the corresponding operator, builtin, or syntax form runs. + +```python +class V: + def __init__(self, n): + self.n = n + def __add__(self, o): + return V(self.n + o.n) + def __eq__(self, o): + return self.n == o.n + +print((V(3) + V(4)).n) +print(V(3) == V(3)) +``` + +```text Output +7 +True +``` + +Dunders are looked up on the class chain (instance dict is skipped). A subclass inherits every dunder defined on its bases and can override any of them — operator overloading composes naturally with [single-level inheritance](/language/classes#inheritance-and-super). A monomorphic site — same class for both operands across iterations — promotes through the inline cache after four hits and bypasses the lookup entirely on subsequent calls. + +## Arithmetic + +| Operator | Forward | Reflected | +|----------|-----------------|------------------| +| `a + b` | `__add__` | `__radd__` | +| `a - b` | `__sub__` | `__rsub__` | +| `a * b` | `__mul__` | `__rmul__` | +| `a / b` | `__truediv__` | `__rtruediv__` | +| `a // b` | `__floordiv__` | `__rfloordiv__` | +| `a % b` | `__mod__` | `__rmod__` | +| `a ** b` | `__pow__` | `__rpow__` | +| `-a` | `__neg__` | — | + +Returning `NotImplemented` from the forward op tells the VM to try the reflected op on the other operand. If both return `NotImplemented` (or neither is defined), the VM raises `TypeError`. + +Subclass-first ordering: when `type(b)` is a strict subclass of `type(a)`, `b.__radd__` runs **before** `a.__add__`. This is the standard CPython rule and lets a subclass override an inherited reflected op without touching the base. + +```python +class Money: + def __init__(self, n): self.n = n + def __add__(self, o): + return Money(self.n + (o.n if isinstance(o, Money) else o)) + def __radd__(self, o): + return Money(o + self.n) + +print((Money(10) + Money(5)).n) +print((3 + Money(7)).n) +``` + +```text Output +15 +10 +``` + +## Comparison + +| Operator | Forward | Reflected | +|------------|-------------|------------| +| `a == b` | `__eq__` | `__eq__` | +| `a != b` | `__eq__` | `__eq__` | +| `a < b` | `__lt__` | `__gt__` | +| `a <= b` | `__le__` | `__ge__` | +| `a > b` | `__gt__` | `__lt__` | +| `a >= b` | `__ge__` | `__le__` | + +`!=` falls back to `not __eq__` when `__ne__` is absent. Comparison results are coerced to `bool` — returning `'A.lt'` from `__lt__` yields `True` in `a < b`, not the string. + +## Truth and length + +`bool(x)` (and any boolean context like `if x:`) consults: + +1. `__bool__` if defined → cast to bool. +2. `__len__` if defined → `False` when length is 0, `True` otherwise. +3. Default `True`. + +`len(x)` calls `__len__` directly; the return must be a non-negative integer. + +```python +class Empty: + def __bool__(self): + return False + +class Container: + def __init__(self, n): self.n = n + def __len__(self): + return self.n + +print(bool(Empty())) +print(bool(Container(0)), bool(Container(3))) +print(len(Container(5))) +``` + +```text Output +False +False True +5 +``` + +## Indexing and containment + +| Form | Dunder | Arguments | +|---------------------|------------------|------------------------| +| `obj[i]` | `__getitem__` | `(self, i)` | +| `obj[i] = v` | `__setitem__` | `(self, i, value)` | +| `del obj[i]` | `__delitem__` | `(self, i)` | +| `v in obj` | `__contains__` | `(self, value)` | + +Slices are passed as a single `slice` object: `obj[1:3]` calls `__getitem__(self, slice(1, 3, None))`. + +When `__contains__` is absent, `v in obj` falls back to iterating `obj` and comparing each item with `__eq__`. + +## Iteration + +| Method | Role | +|---------------|----------------------------------------------------------------------| +| `__iter__` | Returns an iterator (often `self`). | +| `__next__` | Returns the next item, or raises `StopIteration` to end the loop. | + +```python +class Up: + def __init__(self, stop): + self.i = 0 + self.stop = stop + def __iter__(self): + return self + def __next__(self): + if self.i >= self.stop: + raise StopIteration + self.i += 1 + return self.i + +print(list(Up(3))) +``` + +```text Output +[1, 2, 3] +``` + +`for` loops, `list(x)`, and `tuple(x)` all honour the protocol. + +## Callable + +`__call__` makes instances invocable. + +```python +class Double: + def __call__(self, x): + return x * 2 + +d = Double() +print(d(7)) +print(callable(d)) +``` + +```text Output +14 +True +``` + +## Hashing + +`hash(x)` calls `__hash__`. The result must be an `int`; the VM masks it to fit `INT_MAX`. + +Eq/hash invariant: a class that defines `__eq__` **without** `__hash__` is unhashable — `hash(x)` and `{x: 1}` raise `TypeError`. This matches CPython and prevents inconsistent dict keys. + +```python +class K: + def __init__(self, n): self.n = n + def __hash__(self): + return self.n + def __eq__(self, o): + return self.n == o.n + +print(hash(K(5))) +print({K(1): 'one'}[K(1)] if K(1).__hash__() == K(1).__hash__() else 'unhashable') +``` + +```text Output +5 +one +``` + +Built-in dict and set still compare instance keys by identity (`Val` bit pattern); the user `__hash__` is returned by `hash()` but doesn't change containment semantics in built-in containers. Use the same instance reference as the key to look up reliably. + +## Representation + +| Function / form | Dunder | Fallback | +|---------------------|---------------|-----------------------------| +| `repr(x)` | `__repr__` | `` | +| `str(x)`, `print(x)`| `__str__` | `__repr__`, then default | +| `f"{x}"` (no spec) | `__str__` | same as `str(x)` | +| `f"{x:spec}"` | `__format__` | built-in format spec engine | +| `f"{x!r}"` | `__repr__` | — | + +`__format__(spec)` receives the format spec string and must return a `str`. + +## Attribute access fallback + +`__getattr__(self, name)` runs only when the normal lookup (instance dict → class chain) misses. It receives the attribute name as a string and returns the value to use, or raises `AttributeError` to surface a real miss. + +```python +class Proxy: + def __getattr__(self, name): + return f"computed:{name}" + +p = Proxy() +print(p.anything) +print(p.foo) +``` + +```text Output +computed:anything +computed:foo +``` + +Existing attributes bypass `__getattr__` — only misses trigger it. + +## Context managers + +`with cm() as x:` invokes `__enter__` on entry; its return value binds to the `as` target. On exit, `__exit__(exc_type, exc_value, traceback)` runs — receiving `(None, None, None)` for normal exit, or the live exception info if the body raised. Returning truthy from `__exit__` suppresses the exception; falsy (or no return) propagates it. + +```python +class Suppress: + def __enter__(self): + return self + def __exit__(self, t, v, tb): + return True # swallow whatever raised + +with Suppress(): + raise ValueError("boom") +print("after") +``` + +```text Output +after +``` + +Multiple managers in one `with` (`with a(), b() as x:`) nest LIFO: `b` enters last and exits first. Each manager has its own implicit exception handler, so an inner suppression still lets outer managers run their normal `__exit__(None, None, None)`. + +If `__exit__` itself raises a new exception, the new exception replaces the original — matching CPython. + +## What's not dispatched + +These dunders are parsed for syntactic compatibility but the VM doesn't invoke them on user classes: + +- `__init_subclass__`, `__set_name__`, descriptor protocol (`__get__` / `__set__` / `__delete__`) +- `__new__` (instances are constructed by the VM; `__init__` runs the user logic) +- Augmented assignment dunders (`__iadd__`, `__imul__`, ...) — `a += b` desugars to `a = a + b`, so `__add__` covers it +- Async dunders (`__aenter__` / `__aexit__` / `__aiter__` / `__anext__`) — `async with` and `async for` use the sync `__enter__` / `__iter__` paths + +For class basics (constructors, inheritance, properties), see [Classes](/language/classes). diff --git a/documentation/reference/builtins.md b/documentation/reference/builtins.md index 083eaa5..2adf614 100644 --- a/documentation/reference/builtins.md +++ b/documentation/reference/builtins.md @@ -19,7 +19,7 @@ p("aliased") aliased ``` -Edge Python is functional-first. Introspection helpers (`eval`, `exec`, `compile`, `dir`, `ascii`, `help`, `__import__`, `breakpoint`, `open`, `issubclass`) are intentionally absent — the static-import contract and the lack of a writable global module table make them either impossible to implement or inconsistent with the paradigm. Class-machinery builtins (`super`, `staticmethod`, `classmethod`, `property`) are also omitted; classes are flat state containers, behavior reuse is via free functions. +Edge Python is functional-first. Introspection helpers (`eval`, `exec`, `compile`, `dir`, `ascii`, `help`, `__import__`, `breakpoint`, `open`, `issubclass`) are intentionally absent — the static-import contract and the lack of a writable global module table make them either impossible to implement or inconsistent with the paradigm. `staticmethod` and `classmethod` are omitted (use the namespace pattern or free functions); `super` and `property` are supported — see [`/language/classes`](/language/classes) and [`/language/dunders`](/language/dunders). ## Output @@ -538,7 +538,7 @@ print(type(print)) ### isinstance -Type-name based check. The second argument must be a type object or a tuple of type objects — passing a string (`isinstance(x, "str")`) raises `TypeError`. `bool` is a subtype of `int`. For exception classes, the standard subclass hierarchy is consulted (e.g. `isinstance(e, Exception)` is true for any built-in exception); user classes do not participate. +`isinstance(obj, X)` accepts built-in types, exception classes, user-defined `Class` objects, and tuples of any of those. The second argument must be one of those — passing a string (`isinstance(x, "str")`) raises `TypeError`. `bool` is a subtype of `int`. For exception classes, the standard subclass hierarchy is consulted (e.g. `isinstance(e, Exception)` is true for any built-in exception). For user classes, it walks the inheritance chain — `isinstance(sub_instance, Base)` is `True` when `Sub` derives from `Base`. ```python print(isinstance(42, int)) @@ -760,6 +760,52 @@ print(vars(p)) {'x': 1, 'y': 2} ``` +## Classes + +### super + +`super()` — zero-arg form only. Returns a proxy that resolves attribute access against the bases of the currently-running method's class, starting one step up the MRO. Calling it outside a method raises `TypeError`. + +```python +class A: + def m(self): + return "a" + +class B(A): + def m(self): + return super().m() + "b" + +print(B().m()) +``` + +```text Output +ab +``` + +### property + +`property(fget, fset=None)` — builds a descriptor for use as a class member. Usually applied via the `@property` decorator, with an optional `@.setter` decorator to attach the setter. + +```python +class C: + def __init__(self, x): + self._x = x + @property + def x(self): + return self._x + @x.setter + def x(self, v): + self._x = v + +c = C(1) +c.x = 9 +print(c.x) +``` + +```text Output +9 +``` + ## Async These primitives are top-level builtins, not under `asyncio` — there is no `asyncio` module to import. @@ -854,7 +900,9 @@ timed out | `all` | 1 | logical AND; `all([])` is `True` | | `any` | 1 | logical OR; `any([])` is `False` | | `type` | 1 | display string `` | -| `isinstance` | 2 | type or tuple of types | +| `isinstance` | 2 | type, user class, exception, or tuple | +| `super` | 0 | proxy to current method's class bases | +| `property` | 1 or 2 | descriptor; usually via `@property` | | `callable` | 1 | True for fn / lambda / type / built-in | | `id` | 1 | stable identifier | | `hash` | 1 | hash for hashable values | diff --git a/documentation/reference/limits-and-errors.md b/documentation/reference/limits-and-errors.md index 11a64d2..76c9524 100644 --- a/documentation/reference/limits-and-errors.md +++ b/documentation/reference/limits-and-errors.md @@ -155,7 +155,7 @@ caught via parent: oops caught IndexError as Exception ``` -User-defined classes do not participate in the hierarchy — they're flat state containers, caught only by their own name or by a bare `except`. `raise X from Y` raises `X`; the cause is currently discarded (no `__cause__` / `__context__` chaining). +User-defined classes do not auto-extend the built-in `BaseException` / `Exception` tree, but they support single-level inheritance among themselves — so `except UserBase` catches a raised `UserSub` instance when `UserSub` inherits from `UserBase`, alongside catches by exact name or a bare `except`. `raise X from Y` raises `X`; the cause is currently discarded (no `__cause__` / `__context__` chaining). ### Exception arguments