From f3df4462d415746cf2acb19a486e792e6951ebc5 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Tue, 3 Mar 2026 23:35:40 +0900 Subject: [PATCH 1/6] Object header slimming: prefix allocation for ObjExt Extract dict, weak_list, and slots fields from PyInner into a separate ObjExt struct allocated as a prefix before PyInner using Layout::extend(). Objects that don't need these fields (int, str, float, list, tuple, dict, etc.) skip the prefix entirely. - Add HAS_WEAKREF flag to PyTypeFlags for per-type weakref control - Add HAS_EXT bit to GcBits indicating prefix presence - Define ObjExt struct with dict, weak_list, slots - Shrink PyInner header from ~80-88 bytes to ~32 bytes for lightweight objects - Update all accessor methods to go through ext_ref() - Update bootstrap type hierarchy to use prefix allocation - Add __weakref__ getset descriptor for heap types - Set HAS_WEAKREF on builtin types that support weak references - Remove test_weak_keyed_bad_delitem expectedFailure (now passes) --- Lib/test/test_weakref.py | 1 - crates/vm/src/builtins/asyncgenerator.rs | 5 +- crates/vm/src/builtins/builtin_func.rs | 4 +- crates/vm/src/builtins/classmethod.rs | 2 +- crates/vm/src/builtins/coroutine.rs | 2 +- crates/vm/src/builtins/function.rs | 4 +- crates/vm/src/builtins/generator.rs | 2 +- crates/vm/src/builtins/genericalias.rs | 2 +- crates/vm/src/builtins/memory.rs | 2 +- crates/vm/src/builtins/module.rs | 5 +- crates/vm/src/builtins/namespace.rs | 2 +- crates/vm/src/builtins/property.rs | 5 +- crates/vm/src/builtins/set.rs | 4 +- crates/vm/src/builtins/staticmethod.rs | 2 +- crates/vm/src/builtins/type.rs | 271 +++++++++++--------- crates/vm/src/object/core.rs | 300 ++++++++++++++++++----- crates/vm/src/object/traverse_object.rs | 17 +- crates/vm/src/types/slot.rs | 1 + 18 files changed, 433 insertions(+), 198 deletions(-) diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index e04afbb1af5..27bbcf7048d 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -1862,7 +1862,6 @@ def test_weak_valued_delitem(self): self.assertEqual(len(d), 1) self.assertEqual(list(d.items()), [('something else', o2)]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weak_keyed_bad_delitem(self): d = weakref.WeakKeyDictionary() o = Object('1') diff --git a/crates/vm/src/builtins/asyncgenerator.rs b/crates/vm/src/builtins/asyncgenerator.rs index 6b7cfea9c29..dcb1c6d6f81 100644 --- a/crates/vm/src/builtins/asyncgenerator.rs +++ b/crates/vm/src/builtins/asyncgenerator.rs @@ -40,7 +40,10 @@ impl PyPayload for PyAsyncGen { } } -#[pyclass(flags(DISALLOW_INSTANTIATION), with(PyRef, Representable, Destructor))] +#[pyclass( + flags(DISALLOW_INSTANTIATION, HAS_WEAKREF), + with(PyRef, Representable, Destructor) +)] impl PyAsyncGen { pub const fn as_coro(&self) -> &Coro { &self.inner diff --git a/crates/vm/src/builtins/builtin_func.rs b/crates/vm/src/builtins/builtin_func.rs index bc72b1ad533..de5ff1d6878 100644 --- a/crates/vm/src/builtins/builtin_func.rs +++ b/crates/vm/src/builtins/builtin_func.rs @@ -126,7 +126,7 @@ impl Representable for PyNativeFunction { #[pyclass( with(Callable, Comparable, Representable), - flags(HAS_DICT, DISALLOW_INSTANTIATION) + flags(HAS_DICT, HAS_WEAKREF, DISALLOW_INSTANTIATION) )] impl PyNativeFunction { #[pygetset] @@ -210,7 +210,7 @@ pub struct PyNativeMethod { // All Python-visible behavior (getters, slots) is registered by PyNativeFunction::extend_class. // PyNativeMethod only extends the Rust-side struct with the defining class reference. // The func field at offset 0 (#[repr(C)]) allows NativeFunctionOrMethod to read it safely. -#[pyclass(flags(HAS_DICT, DISALLOW_INSTANTIATION))] +#[pyclass(flags(HAS_DICT, HAS_WEAKREF, DISALLOW_INSTANTIATION))] impl PyNativeMethod {} impl fmt::Debug for PyNativeMethod { diff --git a/crates/vm/src/builtins/classmethod.rs b/crates/vm/src/builtins/classmethod.rs index 3ec1085abc4..f42bdcc23d2 100644 --- a/crates/vm/src/builtins/classmethod.rs +++ b/crates/vm/src/builtins/classmethod.rs @@ -125,7 +125,7 @@ impl PyClassMethod { #[pyclass( with(GetDescriptor, Constructor, Initializer, Representable), - flags(BASETYPE, HAS_DICT) + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) )] impl PyClassMethod { #[pygetset] diff --git a/crates/vm/src/builtins/coroutine.rs b/crates/vm/src/builtins/coroutine.rs index 5b29570b2f8..9746dddda87 100644 --- a/crates/vm/src/builtins/coroutine.rs +++ b/crates/vm/src/builtins/coroutine.rs @@ -32,7 +32,7 @@ impl PyPayload for PyCoroutine { } #[pyclass( - flags(DISALLOW_INSTANTIATION), + flags(DISALLOW_INSTANTIATION, HAS_WEAKREF), with(Py, IterNext, Representable, Destructor) )] impl PyCoroutine { diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index 03663d22e5d..099fd92e667 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -717,7 +717,7 @@ impl PyPayload for PyFunction { #[pyclass( with(GetDescriptor, Callable, Representable, Constructor), - flags(HAS_DICT, METHOD_DESCRIPTOR) + flags(HAS_DICT, HAS_WEAKREF, METHOD_DESCRIPTOR) )] impl PyFunction { #[pygetset] @@ -1170,7 +1170,7 @@ impl PyBoundMethod { #[pyclass( with(Callable, Comparable, GetAttr, Constructor, Representable), - flags(IMMUTABLETYPE) + flags(IMMUTABLETYPE, HAS_WEAKREF) )] impl PyBoundMethod { #[pymethod] diff --git a/crates/vm/src/builtins/generator.rs b/crates/vm/src/builtins/generator.rs index fd822e9fbfe..2eee2fecd0d 100644 --- a/crates/vm/src/builtins/generator.rs +++ b/crates/vm/src/builtins/generator.rs @@ -34,7 +34,7 @@ impl PyPayload for PyGenerator { } #[pyclass( - flags(DISALLOW_INSTANTIATION), + flags(DISALLOW_INSTANTIATION, HAS_WEAKREF), with(Py, IterNext, Iterable, Representable, Destructor) )] impl PyGenerator { diff --git a/crates/vm/src/builtins/genericalias.rs b/crates/vm/src/builtins/genericalias.rs index 96da93dd8ef..607236abfce 100644 --- a/crates/vm/src/builtins/genericalias.rs +++ b/crates/vm/src/builtins/genericalias.rs @@ -84,7 +84,7 @@ impl Constructor for PyGenericAlias { Iterable, Representable ), - flags(BASETYPE) + flags(BASETYPE, HAS_WEAKREF) )] impl PyGenericAlias { pub fn new( diff --git a/crates/vm/src/builtins/memory.rs b/crates/vm/src/builtins/memory.rs index a3403287dae..cc7a7d02a6d 100644 --- a/crates/vm/src/builtins/memory.rs +++ b/crates/vm/src/builtins/memory.rs @@ -549,7 +549,7 @@ impl Py { Iterable, Representable ), - flags(SEQUENCE) + flags(SEQUENCE, HAS_WEAKREF) )] impl PyMemoryView { #[pyclassmethod] diff --git a/crates/vm/src/builtins/module.rs b/crates/vm/src/builtins/module.rs index 0dc2b571eae..b8609aae965 100644 --- a/crates/vm/src/builtins/module.rs +++ b/crates/vm/src/builtins/module.rs @@ -286,7 +286,10 @@ impl Py { } } -#[pyclass(with(GetAttr, Initializer, Representable), flags(BASETYPE, HAS_DICT))] +#[pyclass( + with(GetAttr, Initializer, Representable), + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) +)] impl PyModule { #[pyslot] fn slot_new(cls: PyTypeRef, _args: FuncArgs, vm: &VirtualMachine) -> PyResult { diff --git a/crates/vm/src/builtins/namespace.rs b/crates/vm/src/builtins/namespace.rs index a32dda14586..4e872a172a4 100644 --- a/crates/vm/src/builtins/namespace.rs +++ b/crates/vm/src/builtins/namespace.rs @@ -28,7 +28,7 @@ impl PyPayload for PyNamespace { impl DefaultConstructor for PyNamespace {} #[pyclass( - flags(BASETYPE, HAS_DICT), + flags(BASETYPE, HAS_DICT, HAS_WEAKREF), with(Constructor, Initializer, Comparable, Representable) )] impl PyNamespace { diff --git a/crates/vm/src/builtins/property.rs b/crates/vm/src/builtins/property.rs index 509307c7b00..2a7d451069e 100644 --- a/crates/vm/src/builtins/property.rs +++ b/crates/vm/src/builtins/property.rs @@ -65,7 +65,10 @@ impl GetDescriptor for PyProperty { } } -#[pyclass(with(Constructor, Initializer, GetDescriptor), flags(BASETYPE))] +#[pyclass( + with(Constructor, Initializer, GetDescriptor), + flags(BASETYPE, HAS_WEAKREF) +)] impl PyProperty { // Helper method to get property name // Returns the name if available, None if not found, or propagates errors diff --git a/crates/vm/src/builtins/set.rs b/crates/vm/src/builtins/set.rs index 2b1e9c82e60..85e6b37fab0 100644 --- a/crates/vm/src/builtins/set.rs +++ b/crates/vm/src/builtins/set.rs @@ -531,7 +531,7 @@ fn reduce_set( AsNumber, Representable ), - flags(BASETYPE, _MATCH_SELF) + flags(BASETYPE, _MATCH_SELF, HAS_WEAKREF) )] impl PySet { fn __len__(&self) -> usize { @@ -996,7 +996,7 @@ impl Constructor for PyFrozenSet { } #[pyclass( - flags(BASETYPE, _MATCH_SELF), + flags(BASETYPE, _MATCH_SELF, HAS_WEAKREF), with( Constructor, AsSequence, diff --git a/crates/vm/src/builtins/staticmethod.rs b/crates/vm/src/builtins/staticmethod.rs index a06267650a2..2554fa816aa 100644 --- a/crates/vm/src/builtins/staticmethod.rs +++ b/crates/vm/src/builtins/staticmethod.rs @@ -88,7 +88,7 @@ impl Initializer for PyStaticMethod { #[pyclass( with(Callable, GetDescriptor, Constructor, Initializer, Representable), - flags(BASETYPE, HAS_DICT) + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) )] impl PyStaticMethod { #[pygetset] diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index cca8c4692e6..1978ad4e363 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -562,6 +562,14 @@ impl PyType { slots.flags |= PyTypeFlags::HAS_DICT } + // Inherit HAS_WEAKREF from any base in MRO that has it + if mro + .iter() + .any(|b| b.slots.flags.has_feature(PyTypeFlags::HAS_WEAKREF)) + { + slots.flags |= PyTypeFlags::HAS_WEAKREF + } + // Inherit SEQUENCE and MAPPING flags from base classes Self::inherit_patma_flags(&mut slots, &bases); @@ -623,6 +631,9 @@ impl PyType { if base.slots.flags.has_feature(PyTypeFlags::HAS_DICT) { slots.flags |= PyTypeFlags::HAS_DICT } + if base.slots.flags.has_feature(PyTypeFlags::HAS_WEAKREF) { + slots.flags |= PyTypeFlags::HAS_WEAKREF + } // Inherit SEQUENCE and MAPPING flags from base class // For static types, we only have a single base @@ -1083,7 +1094,7 @@ impl Py { AsNumber, Representable ), - flags(BASETYPE) + flags(BASETYPE, HAS_WEAKREF) )] impl PyType { #[pygetset] @@ -1709,141 +1720,125 @@ impl Constructor for PyType { attributes.insert(identifier!(vm, __hash__), vm.ctx.none.clone().into()); } - let (heaptype_slots, add_dict): (Option>>, bool) = - if let Some(x) = attributes.get(identifier!(vm, __slots__)) { - // Check if __slots__ is bytes - not allowed - if x.class().is(vm.ctx.types.bytes_type) { - return Err(vm.new_type_error("__slots__ items must be strings, not 'bytes'")); - } - - let slots = if x.class().is(vm.ctx.types.str_type) { - let x = unsafe { x.downcast_unchecked_ref::() }; - PyTuple::new_ref_typed(vec![x.to_owned()], &vm.ctx) - } else { - let iter = x.get_iter(vm)?; - let elements = { - let mut elements = Vec::new(); - while let PyIterReturn::Return(element) = iter.next(vm)? { - // Check if any slot item is bytes - if element.class().is(vm.ctx.types.bytes_type) { - return Err(vm.new_type_error( - "__slots__ items must be strings, not 'bytes'", - )); - } - elements.push(element); - } - elements - }; - let tuple = elements.into_pytuple(vm); - tuple.try_into_typed(vm)? - }; - - // Check if base has itemsize > 0 - can't add arbitrary slots to variable-size types - // Types like int, bytes, tuple have itemsize > 0 and don't allow custom slots - // But types like weakref.ref have itemsize = 0 and DO allow slots - let has_custom_slots = slots - .iter() - .any(|s| !matches!(s.as_bytes(), b"__dict__" | b"__weakref__")); - if has_custom_slots && base.slots.itemsize > 0 { - return Err(vm.new_type_error(format!( - "nonempty __slots__ not supported for subtype of '{}'", - base.name() - ))); - } - - // Validate slot names and track duplicates - let mut seen_dict = false; - let mut seen_weakref = false; - for slot in slots.iter() { - // Use isidentifier for validation (handles Unicode properly) - if !slot.isidentifier() { - return Err(vm.new_type_error("__slots__ must be identifiers")); - } - - let slot_name = slot.as_bytes(); + let (heaptype_slots, add_dict, add_weakref): ( + Option>>, + bool, + bool, + ) = if let Some(x) = attributes.get(identifier!(vm, __slots__)) { + // Check if __slots__ is bytes - not allowed + if x.class().is(vm.ctx.types.bytes_type) { + return Err(vm.new_type_error("__slots__ items must be strings, not 'bytes'")); + } - // Check for duplicate __dict__ - if slot_name == b"__dict__" { - if seen_dict { + let slots = if x.class().is(vm.ctx.types.str_type) { + let x = unsafe { x.downcast_unchecked_ref::() }; + PyTuple::new_ref_typed(vec![x.to_owned()], &vm.ctx) + } else { + let iter = x.get_iter(vm)?; + let elements = { + let mut elements = Vec::new(); + while let PyIterReturn::Return(element) = iter.next(vm)? { + // Check if any slot item is bytes + if element.class().is(vm.ctx.types.bytes_type) { return Err( - vm.new_type_error("__dict__ slot disallowed: we already got one") + vm.new_type_error("__slots__ items must be strings, not 'bytes'") ); } - seen_dict = true; + elements.push(element); } + elements + }; + let tuple = elements.into_pytuple(vm); + tuple.try_into_typed(vm)? + }; - // Check for duplicate __weakref__ - if slot_name == b"__weakref__" { - if seen_weakref { - return Err(vm.new_type_error( - "__weakref__ slot disallowed: we already got one", - )); - } - seen_weakref = true; - } + // Check if base has itemsize > 0 - can't add arbitrary slots to variable-size types + // Types like int, bytes, tuple have itemsize > 0 and don't allow custom slots + // But types like weakref.ref have itemsize = 0 and DO allow slots + let has_custom_slots = slots + .iter() + .any(|s| !matches!(s.as_bytes(), b"__dict__" | b"__weakref__")); + if has_custom_slots && base.slots.itemsize > 0 { + return Err(vm.new_type_error(format!( + "nonempty __slots__ not supported for subtype of '{}'", + base.name() + ))); + } - // Check if slot name conflicts with class attributes - if attributes.contains_key(vm.ctx.intern_str(slot.as_wtf8())) { - return Err(vm.new_value_error(format!( - "'{}' in __slots__ conflicts with a class variable", - slot.as_wtf8() - ))); - } + // Validate slot names and track duplicates + let mut seen_dict = false; + let mut seen_weakref = false; + for slot in slots.iter() { + // Use isidentifier for validation (handles Unicode properly) + if !slot.isidentifier() { + return Err(vm.new_type_error("__slots__ must be identifiers")); } - // Check if base class already has __dict__ - can't redefine it - if seen_dict && base.slots.flags.has_feature(PyTypeFlags::HAS_DICT) { - return Err(vm.new_type_error("__dict__ slot disallowed: we already got one")); - } + let slot_name = slot.as_bytes(); - // Check if base class already has __weakref__ - can't redefine it - // A base has weakref support if: - // 1. It's a heap type without explicit __slots__ (automatic weakref), OR - // 2. It's a heap type with __weakref__ in its __slots__ - if seen_weakref { - let base_has_weakref = if let Some(ref ext) = base.heaptype_ext { - match &ext.slots { - // Heap type without __slots__ - has automatic weakref - None => true, - // Heap type with __slots__ - check if __weakref__ is in slots - Some(base_slots) => { - base_slots.iter().any(|s| s.as_bytes() == b"__weakref__") - } - } - } else { - // Builtin type - check if it has __weakref__ descriptor - let weakref_name = vm.ctx.intern_str("__weakref__"); - base.attributes.read().contains_key(weakref_name) - }; + // Check for duplicate __dict__ + if slot_name == b"__dict__" { + if seen_dict { + return Err( + vm.new_type_error("__dict__ slot disallowed: we already got one") + ); + } + seen_dict = true; + } - if base_has_weakref { + // Check for duplicate __weakref__ + if slot_name == b"__weakref__" { + if seen_weakref { return Err( vm.new_type_error("__weakref__ slot disallowed: we already got one") ); } + seen_weakref = true; } - // Check if __dict__ is in slots - let dict_name = "__dict__"; - let has_dict = slots.iter().any(|s| s.as_wtf8() == dict_name); - - // Filter out __dict__ from slots - let filtered_slots = if has_dict { - let filtered: Vec = slots - .iter() - .filter(|s| s.as_wtf8() != dict_name) - .cloned() - .collect(); - PyTuple::new_ref_typed(filtered, &vm.ctx) - } else { - slots - }; + // Check if slot name conflicts with class attributes + if attributes.contains_key(vm.ctx.intern_str(slot.as_wtf8())) { + return Err(vm.new_value_error(format!( + "'{}' in __slots__ conflicts with a class variable", + slot.as_wtf8() + ))); + } + } - (Some(filtered_slots), has_dict) + // Check if base class already has __dict__ - can't redefine it + if seen_dict && base.slots.flags.has_feature(PyTypeFlags::HAS_DICT) { + return Err(vm.new_type_error("__dict__ slot disallowed: we already got one")); + } + + // Check if base class already has __weakref__ - can't redefine it + if seen_weakref && base.slots.flags.has_feature(PyTypeFlags::HAS_WEAKREF) { + return Err(vm.new_type_error("__weakref__ slot disallowed: we already got one")); + } + + // Check if __dict__ or __weakref__ is in slots + let dict_name = "__dict__"; + let weakref_name = "__weakref__"; + let has_dict = slots.iter().any(|s| s.as_wtf8() == dict_name); + let add_weakref = seen_weakref; + + // Filter out __dict__ and __weakref__ from slots + // (they become descriptors, not member slots) + let filtered_slots = if has_dict || add_weakref { + let filtered: Vec = slots + .iter() + .filter(|s| s.as_wtf8() != dict_name && s.as_wtf8() != weakref_name) + .cloned() + .collect(); + PyTuple::new_ref_typed(filtered, &vm.ctx) } else { - (None, false) + slots }; + (Some(filtered_slots), has_dict, add_weakref) + } else { + (None, false, false) + }; + // FIXME: this is a temporary fix. multi bases with multiple slots will break object let base_member_count = bases .iter() @@ -1867,6 +1862,14 @@ impl Constructor for PyType { flags |= PyTypeFlags::HAS_DICT | PyTypeFlags::MANAGED_DICT; } + // Add HAS_WEAKREF if: + // 1. __slots__ is not defined (automatic weakref support), OR + // 2. __weakref__ is in __slots__ + let may_add_weakref = !base.slots.flags.has_feature(PyTypeFlags::HAS_WEAKREF); + if (heaptype_slots.is_none() && may_add_weakref) || add_weakref { + flags |= PyTypeFlags::HAS_WEAKREF; + } + let (slots, heaptype_ext) = { let slots = PyTypeSlots { flags, @@ -1965,6 +1968,29 @@ impl Constructor for PyType { } } + // Add __weakref__ descriptor for types with HAS_WEAKREF + if typ.slots.flags.has_feature(PyTypeFlags::HAS_WEAKREF) { + let __weakref__ = vm.ctx.intern_str("__weakref__"); + let has_inherited_weakref = typ + .mro + .read() + .iter() + .any(|base| base.attributes.read().contains_key(&__weakref__)); + if !typ.attributes.read().contains_key(&__weakref__) && !has_inherited_weakref { + unsafe { + let descriptor = vm.ctx.new_getset( + "__weakref__", + &typ, + subtype_getweakref, + subtype_setweakref, + ); + typ.attributes + .write() + .insert(__weakref__, descriptor.into()); + } + } + } + // Set __doc__ to None if not already present in the type's dict // This matches CPython's behavior in type_dict_set_doc (typeobject.c) // which ensures every type has a __doc__ entry in its dict @@ -2400,6 +2426,21 @@ fn subtype_set_dict(obj: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) - } } +// subtype_getweakref +fn subtype_getweakref(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // Return the first weakref in the weakref list, or None + let weakref = obj.get_weakrefs(); + Ok(weakref.unwrap_or_else(|| vm.ctx.none())) +} + +// subtype_setweakref: __weakref__ is read-only +fn subtype_setweakref(obj: PyObjectRef, _value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + Err(vm.new_attribute_error(format!( + "attribute '__weakref__' of '{}' objects is not writable", + obj.class().name() + ))) +} + /* * The magical type type */ diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index 13927952604..f2708e2160d 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -203,7 +203,8 @@ pub(super) unsafe fn default_dealloc(obj: *mut PyObject) { false }; if !pushed { - drop(unsafe { Box::from_raw(obj as *mut PyInner) }); + // Deallocate the object memory (handles ObjExt prefix if present) + unsafe { PyInner::dealloc(obj as *mut PyInner) }; } // Drop child references - may trigger recursive destruction. @@ -256,6 +257,8 @@ bitflags::bitflags! { const SHARED_INLINE = 1 << 5; /// Use deferred reference counting const DEFERRED = 1 << 6; + /// Object has ObjExt prefix (dict, weak_list, slots) + const HAS_EXT = 1 << 7; } } @@ -286,6 +289,40 @@ unsafe impl Link for GcLink { } } +/// Extension fields for objects that need dict, weakref list, or member slots. +/// Allocated as a prefix before PyInner when needed (prefix allocation pattern). +/// Access via `PyInner::ext_ref()` using negative offset from the object pointer. +#[repr(C)] +pub(super) struct ObjExt { + pub(super) dict: Option, + pub(super) weak_list: WeakRefList, + pub(super) slots: Box<[PyRwLock>]>, +} + +impl ObjExt { + fn new(dict: Option, member_count: usize) -> Self { + Self { + dict: dict.map(InstanceDict::new), + weak_list: WeakRefList::new(), + slots: core::iter::repeat_with(|| PyRwLock::new(None)) + .take(member_count) + .collect_vec() + .into_boxed_slice(), + } + } +} + +impl fmt::Debug for ObjExt { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[ObjExt]") + } +} + +/// Precomputed offset from PyInner pointer back to ObjExt prefix. +/// Both ObjExt and PyInner are #[repr(C)] with 8-byte alignment, +/// so the offset equals size_of::() with no padding. +const EXT_OFFSET: usize = core::mem::size_of::(); + /// This is an actual python object. It consists of a `typ` which is the /// python class, and carries some rust payload optionally. This rust /// payload can be a rust float or rust int in case of float and int objects. @@ -302,14 +339,31 @@ pub(super) struct PyInner { pub(super) gc_pointers: Pointers, pub(super) typ: PyAtomicRef, // __class__ member - pub(super) dict: Option, - pub(super) weak_list: WeakRefList, - pub(super) slots: Box<[PyRwLock>]>, pub(super) payload: T, } pub(crate) const SIZEOF_PYOBJECT_HEAD: usize = core::mem::size_of::>(); +impl PyInner { + /// Access the ObjExt prefix at a negative offset from this PyInner. + /// Returns None if this object was allocated without the prefix. + /// + /// Uses exposed provenance to reconstruct a pointer covering the entire + /// allocation (ObjExt prefix + PyInner). The allocation pointer's provenance + /// is exposed at allocation time via `expose_provenance()`. + #[inline(always)] + pub(super) fn ext_ref(&self) -> Option<&ObjExt> { + if !GcBits::from_bits_retain(self.gc_bits.load(Ordering::Relaxed)).contains(GcBits::HAS_EXT) + { + return None; + } + let self_addr = (self as *const Self as *const u8).addr(); + let ext_ptr = + core::ptr::with_exposed_provenance::(self_addr.wrapping_sub(EXT_OFFSET)); + Some(unsafe { &*ext_ptr }) + } +} + impl fmt::Debug for PyInner { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "[PyObject {:?}]", &self.payload) @@ -764,7 +818,8 @@ impl PyWeak { } let obj = unsafe { &*obj_ptr }; - let wrl = &obj.0.weak_list; + // Safety: if a weakref exists pointing to this object, ext must be present + let wrl = &obj.0.ext_ref().unwrap().weak_list; // Compute our Py node pointer from payload address let offset = std::mem::offset_of!(PyInner, payload); @@ -839,24 +894,88 @@ impl InstanceDict { } } +impl PyInner { + /// Deallocate a PyInner, handling the optional ObjExt prefix. + /// + /// # Safety + /// `ptr` must be a valid pointer from `PyInner::new` and must not be used after this call. + unsafe fn dealloc(ptr: *mut Self) { + unsafe { + let has_ext = GcBits::from_bits_retain((*ptr).gc_bits.load(Ordering::Relaxed)) + .contains(GcBits::HAS_EXT); + + if has_ext { + let ext_layout = core::alloc::Layout::new::(); + let inner_layout = core::alloc::Layout::new::(); + let (combined, inner_offset) = ext_layout.extend(inner_layout).unwrap(); + let combined = combined.pad_to_align(); + + let alloc_ptr = (ptr as *mut u8).sub(inner_offset); + + core::ptr::drop_in_place(ptr); + core::ptr::drop_in_place(alloc_ptr as *mut ObjExt); + + alloc::alloc::dealloc(alloc_ptr, combined); + } else { + drop(Box::from_raw(ptr)); + } + } + } +} + impl PyInner { - fn new(payload: T, typ: PyTypeRef, dict: Option) -> Box { + /// Allocate a new PyInner, optionally with an ObjExt prefix. + /// Returns a raw pointer to the PyInner (NOT the allocation start). + /// For objects with ext, the allocation layout is: [ObjExt][PyInner] + fn new(payload: T, typ: PyTypeRef, dict: Option) -> *mut Self { let member_count = typ.slots.member_count; - Box::new(Self { - ref_count: RefCount::new(), - vtable: PyObjVTable::of::(), - gc_bits: Radium::new(0), - gc_generation: Radium::new(GC_UNTRACKED), - gc_pointers: Pointers::new(), - typ: PyAtomicRef::from(typ), - dict: dict.map(InstanceDict::new), - weak_list: WeakRefList::new(), - payload, - slots: core::iter::repeat_with(|| PyRwLock::new(None)) - .take(member_count) - .collect_vec() - .into_boxed_slice(), - }) + let needs_ext = dict.is_some() + || typ + .slots + .flags + .has_feature(crate::types::PyTypeFlags::HAS_WEAKREF) + || member_count > 0; + + if needs_ext { + let ext_layout = core::alloc::Layout::new::(); + let inner_layout = core::alloc::Layout::new::(); + let (combined, inner_offset) = ext_layout.extend(inner_layout).unwrap(); + let combined = combined.pad_to_align(); + + let alloc_ptr = unsafe { alloc::alloc::alloc(combined) }; + if alloc_ptr.is_null() { + alloc::alloc::handle_alloc_error(combined); + } + // Expose provenance so ext_ref() can reconstruct via with_exposed_provenance + alloc_ptr.expose_provenance(); + + unsafe { + let ext_ptr = alloc_ptr as *mut ObjExt; + ext_ptr.write(ObjExt::new(dict, member_count)); + + let inner_ptr = alloc_ptr.add(inner_offset) as *mut Self; + inner_ptr.write(Self { + ref_count: RefCount::new(), + vtable: PyObjVTable::of::(), + gc_bits: Radium::new(GcBits::HAS_EXT.bits()), + gc_generation: Radium::new(GC_UNTRACKED), + gc_pointers: Pointers::new(), + typ: PyAtomicRef::from(typ), + payload, + }); + inner_ptr + } + } else { + Box::into_raw(Box::new(Self { + ref_count: RefCount::new(), + vtable: PyObjVTable::of::(), + gc_bits: Radium::new(0), + gc_generation: Radium::new(GC_UNTRACKED), + gc_pointers: Pointers::new(), + typ: PyAtomicRef::from(typ), + payload, + })) + } } } @@ -1075,9 +1194,29 @@ impl PyObjectRef { } impl PyObject { + /// Returns the WeakRefList if the object has ext (prefix allocation). + /// Note: This does NOT check HAS_WEAKREF flag. Callers creating weakrefs + /// must check HAS_WEAKREF themselves. This method is used in drop/GC paths + /// where the type may already be deallocated. #[inline(always)] - const fn weak_ref_list(&self) -> Option<&WeakRefList> { - Some(&self.0.weak_list) + fn weak_ref_list(&self) -> Option<&WeakRefList> { + self.0.ext_ref().map(|ext| &ext.weak_list) + } + + /// Returns the first weakref in the weakref list, if any. + pub(crate) fn get_weakrefs(&self) -> Option { + let wrl = self.weak_ref_list()?; + let head_ptr = wrl.head.load(Ordering::Relaxed); + if head_ptr.is_null() { + None + } else { + let head = unsafe { &*head_ptr }; + if head.0.ref_count.safe_inc() { + Some(unsafe { PyRef::from_raw(head_ptr) }.into()) + } else { + None + } + } } pub(crate) fn downgrade_with_weakref_typ_opt( @@ -1096,6 +1235,18 @@ impl PyObject { typ: PyTypeRef, vm: &VirtualMachine, ) -> PyResult> { + // Check HAS_WEAKREF flag first + if !self + .class() + .slots + .flags + .has_feature(crate::types::PyTypeFlags::HAS_WEAKREF) + { + return Err(vm.new_type_error(format!( + "cannot create weak reference to '{}' object", + self.class().name() + ))); + } let dict = if typ .slots .flags @@ -1180,8 +1331,8 @@ impl PyObject { } #[inline(always)] - const fn instance_dict(&self) -> Option<&InstanceDict> { - self.0.dict.as_ref() + fn instance_dict(&self) -> Option<&InstanceDict> { + self.0.ext_ref().and_then(|ext| ext.dict.as_ref()) } #[inline(always)] @@ -1396,11 +1547,11 @@ impl PyObject { } pub(crate) fn get_slot(&self, offset: usize) -> Option { - self.0.slots[offset].read().clone() + self.0.ext_ref().unwrap().slots[offset].read().clone() } pub(crate) fn set_slot(&self, offset: usize, value: Option) { - *self.0.slots[offset].write() = value; + *self.0.ext_ref().unwrap().slots[offset].write() = value; } /// _PyObject_GC_IS_TRACKED @@ -1487,9 +1638,11 @@ impl PyObject { } // 2. Clear member slots (subtype_clear) - for slot in obj.0.slots.iter() { - if let Some(val) = slot.write().take() { - result.push(val); + if let Some(ext) = obj.0.ext_ref() { + for slot in ext.slots.iter() { + if let Some(val) = slot.write().take() { + result.push(val); + } } } @@ -1513,7 +1666,11 @@ impl PyObject { /// Check if this object has clear capability (tp_clear) // Py_TPFLAGS_HAVE_GC types have tp_clear pub fn gc_has_clear(&self) -> bool { - self.0.vtable.clear.is_some() || self.0.dict.is_some() || !self.0.slots.is_empty() + self.0.vtable.clear.is_some() + || self + .0 + .ext_ref() + .is_some_and(|ext| ext.dict.is_some() || !ext.slots.is_empty()) } } @@ -1902,7 +2059,7 @@ impl PyRef { drop(typ); unsafe { NonNull::new_unchecked(inner.cast::>()) } } else { - let inner = Box::into_raw(PyInner::new(payload, typ, dict)); + let inner = PyInner::new(payload, typ, dict); unsafe { NonNull::new_unchecked(inner.cast::>()) } }; @@ -2122,34 +2279,57 @@ pub(crate) fn init_type_hierarchy() -> (PyTypeRef, PyTypeRef, PyTypeRef) { heaptype_ext: None, tp_version_tag: core::sync::atomic::AtomicU32::new(0), }; - let type_type_ptr = Box::into_raw(Box::new(partially_init!( - PyInner:: { - ref_count: RefCount::new(), - vtable: PyObjVTable::of::(), - gc_bits: Radium::new(0), - gc_generation: Radium::new(GC_UNTRACKED), - gc_pointers: Pointers::new(), - dict: None, - weak_list: WeakRefList::new(), - payload: type_payload, - slots: Box::new([]), - }, - Uninit { typ } - ))); - let object_type_ptr = Box::into_raw(Box::new(partially_init!( - PyInner:: { - ref_count: RefCount::new(), - vtable: PyObjVTable::of::(), - gc_bits: Radium::new(0), - gc_generation: Radium::new(GC_UNTRACKED), - gc_pointers: Pointers::new(), - dict: None, - weak_list: WeakRefList::new(), - payload: object_payload, - slots: Box::new([]), - }, - Uninit { typ }, - ))); + // Both type_type and object_type are instances of `type`, which has HAS_WEAKREF, + // so they need prefix allocation with ObjExt. + let alloc_type_with_ext = || -> *mut MaybeUninit> { + let ext_layout = core::alloc::Layout::new::(); + let inner_layout = core::alloc::Layout::new::>>(); + let (combined, inner_offset) = ext_layout.extend(inner_layout).unwrap(); + let combined = combined.pad_to_align(); + + let alloc_ptr = unsafe { alloc::alloc::alloc(combined) }; + if alloc_ptr.is_null() { + alloc::alloc::handle_alloc_error(combined); + } + // Expose provenance so ext_ref() can reconstruct via with_exposed_provenance + alloc_ptr.expose_provenance(); + + unsafe { + let ext_ptr = alloc_ptr as *mut ObjExt; + ext_ptr.write(ObjExt::new(None, 0)); + alloc_ptr.add(inner_offset) as *mut MaybeUninit> + } + }; + + let type_type_ptr = alloc_type_with_ext(); + unsafe { + type_type_ptr.write(partially_init!( + PyInner:: { + ref_count: RefCount::new(), + vtable: PyObjVTable::of::(), + gc_bits: Radium::new(GcBits::HAS_EXT.bits()), + gc_generation: Radium::new(GC_UNTRACKED), + gc_pointers: Pointers::new(), + payload: type_payload, + }, + Uninit { typ } + )); + } + + let object_type_ptr = alloc_type_with_ext(); + unsafe { + object_type_ptr.write(partially_init!( + PyInner:: { + ref_count: RefCount::new(), + vtable: PyObjVTable::of::(), + gc_bits: Radium::new(GcBits::HAS_EXT.bits()), + gc_generation: Radium::new(GC_UNTRACKED), + gc_pointers: Pointers::new(), + payload: object_payload, + }, + Uninit { typ }, + )); + } let object_type_ptr = object_type_ptr as *mut PyInner; let type_type_ptr = type_type_ptr as *mut PyInner; diff --git a/crates/vm/src/object/traverse_object.rs b/crates/vm/src/object/traverse_object.rs index 3f88c6b7481..de8d2d5f53e 100644 --- a/crates/vm/src/object/traverse_object.rs +++ b/crates/vm/src/object/traverse_object.rs @@ -65,9 +65,12 @@ unsafe impl Traverse for PyInner { let typ_obj: &PyObject = unsafe { &*(typ as *const _ as *const PyObject) }; tracer_fn(typ_obj); } - self.dict.traverse(tracer_fn); - // weak_list is inline atomic pointers, no heap allocation, no trace - self.slots.traverse(tracer_fn); + // Traverse ObjExt prefix fields (dict and slots) if present + if let Some(ext) = self.ext_ref() { + ext.dict.traverse(tracer_fn); + // weak_list is atomic pointers, no trace needed + ext.slots.traverse(tracer_fn); + } if let Some(f) = self.vtable.trace { unsafe { @@ -87,9 +90,11 @@ unsafe impl Traverse for PyInner { let typ_obj: &PyObject = unsafe { &*(typ as *const _ as *const PyObject) }; tracer_fn(typ_obj); } - self.dict.traverse(tracer_fn); - // weak_list is inline atomic pointers, no heap allocation, no trace - self.slots.traverse(tracer_fn); + // Traverse ObjExt prefix fields (dict and slots) if present + if let Some(ext) = self.ext_ref() { + ext.dict.traverse(tracer_fn); + ext.slots.traverse(tracer_fn); + } T::try_traverse(&self.payload, tracer_fn); } } diff --git a/crates/vm/src/types/slot.rs b/crates/vm/src/types/slot.rs index 58040f7928c..60d10194f30 100644 --- a/crates/vm/src/types/slot.rs +++ b/crates/vm/src/types/slot.rs @@ -228,6 +228,7 @@ bitflags! { // This is not a stable API const _MATCH_SELF = 1 << 22; const HAS_DICT = 1 << 40; + const HAS_WEAKREF = 1 << 41; #[cfg(debug_assertions)] const _CREATED_WITH_FLAGS = 1 << 63; From 75d8a3a60c30dcecf2b439c27e7cf2a7ce13ac9b Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 00:46:59 +0900 Subject: [PATCH 2/6] Add HAS_WEAKREF to _asyncio Future/Task, rename weakref helpers - Add HAS_WEAKREF flag to PyFuture and PyTask (matches CPython) - Rename subtype_getweakref/setweakref to subtype_get_weakref/set_weakref to fix cspell unknown word lint --- crates/stdlib/src/_asyncio.rs | 4 ++-- crates/vm/src/builtins/type.rs | 12 ++++++------ crates/vm/src/object/core.rs | 7 ++++++- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/crates/stdlib/src/_asyncio.rs b/crates/stdlib/src/_asyncio.rs index 2733e801251..abcd56a74d3 100644 --- a/crates/stdlib/src/_asyncio.rs +++ b/crates/stdlib/src/_asyncio.rs @@ -160,7 +160,7 @@ pub(crate) mod _asyncio { } #[pyclass( - flags(BASETYPE, HAS_DICT), + flags(BASETYPE, HAS_DICT, HAS_WEAKREF), with(Constructor, Initializer, Destructor, Representable, Iterable) )] impl PyFuture { @@ -1169,7 +1169,7 @@ pub(crate) mod _asyncio { } #[pyclass( - flags(BASETYPE, HAS_DICT), + flags(BASETYPE, HAS_DICT, HAS_WEAKREF), with(Constructor, Initializer, Destructor, Representable, Iterable) )] impl PyTask { diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index 1978ad4e363..ae97ce6520e 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -1981,8 +1981,8 @@ impl Constructor for PyType { let descriptor = vm.ctx.new_getset( "__weakref__", &typ, - subtype_getweakref, - subtype_setweakref, + subtype_get_weakref, + subtype_set_weakref, ); typ.attributes .write() @@ -2426,15 +2426,15 @@ fn subtype_set_dict(obj: PyObjectRef, value: PyObjectRef, vm: &VirtualMachine) - } } -// subtype_getweakref -fn subtype_getweakref(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { +// subtype_get_weakref +fn subtype_get_weakref(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { // Return the first weakref in the weakref list, or None let weakref = obj.get_weakrefs(); Ok(weakref.unwrap_or_else(|| vm.ctx.none())) } -// subtype_setweakref: __weakref__ is read-only -fn subtype_setweakref(obj: PyObjectRef, _value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { +// subtype_set_weakref: __weakref__ is read-only +fn subtype_set_weakref(obj: PyObjectRef, _value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { Err(vm.new_attribute_error(format!( "attribute '__weakref__' of '{}' objects is not writable", obj.class().name() diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index f2708e2160d..0486df56acc 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -292,7 +292,12 @@ unsafe impl Link for GcLink { /// Extension fields for objects that need dict, weakref list, or member slots. /// Allocated as a prefix before PyInner when needed (prefix allocation pattern). /// Access via `PyInner::ext_ref()` using negative offset from the object pointer. -#[repr(C)] +/// +/// align(8) ensures size_of::() is always a multiple of 8, +/// so the offset from Layout::extend equals size_of::() for any +/// PyInner alignment (important on wasm32 where pointers are 4 bytes +/// but some payloads like PyWeak have align 8 due to i64 fields). +#[repr(C, align(8))] pub(super) struct ObjExt { pub(super) dict: Option, pub(super) weak_list: WeakRefList, From 2ee043e50287f15703de2345c10bdfe8a7dfc293 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 01:39:50 +0900 Subject: [PATCH 3/6] Add HAS_WEAKREF to array, deque, _grouper; remove expectedFailure markers - Add HAS_WEAKREF to PyArray and PyDeque (matches CPython) - Add HAS_WEAKREF to PyItertoolsGrouper (internal use by groupby) - Remove 6 expectedFailure markers from test_dataclasses for weakref/slots tests that now pass --- Lib/test/test_dataclasses/__init__.py | 6 ------ crates/stdlib/src/array.rs | 2 +- crates/vm/src/stdlib/collections.rs | 2 +- crates/vm/src/stdlib/itertools.rs | 2 +- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_dataclasses/__init__.py b/Lib/test/test_dataclasses/__init__.py index 12db84a1209..dfe6b89f1ed 100644 --- a/Lib/test/test_dataclasses/__init__.py +++ b/Lib/test/test_dataclasses/__init__.py @@ -3672,7 +3672,6 @@ class A: self.assertEqual(obj.a, 'a') self.assertEqual(obj.b, 'b') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_slots_no_weakref(self): @dataclass(slots=True) class A: @@ -3687,7 +3686,6 @@ class A: with self.assertRaises(AttributeError): a.__weakref__ - @unittest.expectedFailure # TODO: RUSTPYTHON def test_slots_weakref(self): @dataclass(slots=True, weakref_slot=True) class A: @@ -3748,7 +3746,6 @@ def test_weakref_slot_make_dataclass(self): "weakref_slot is True but slots is False"): B = make_dataclass('B', [('a', int),], weakref_slot=True) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weakref_slot_subclass_weakref_slot(self): @dataclass(slots=True, weakref_slot=True) class Base: @@ -3767,7 +3764,6 @@ class A(Base): a_ref = weakref.ref(a) self.assertIs(a.__weakref__, a_ref) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weakref_slot_subclass_no_weakref_slot(self): @dataclass(slots=True, weakref_slot=True) class Base: @@ -3785,7 +3781,6 @@ class A(Base): a_ref = weakref.ref(a) self.assertIs(a.__weakref__, a_ref) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weakref_slot_normal_base_weakref_slot(self): class Base: __slots__ = ('__weakref__',) @@ -3830,7 +3825,6 @@ class B[T2]: self.assertTrue(B.__weakref__) B() - @unittest.expectedFailure # TODO: RUSTPYTHON def test_dataclass_derived_generic_from_base(self): T = typing.TypeVar('T') diff --git a/crates/stdlib/src/array.rs b/crates/stdlib/src/array.rs index 656b5028623..34dec6f8356 100644 --- a/crates/stdlib/src/array.rs +++ b/crates/stdlib/src/array.rs @@ -698,7 +698,7 @@ mod array { } #[pyclass( - flags(BASETYPE), + flags(BASETYPE, HAS_WEAKREF), with( Comparable, AsBuffer, diff --git a/crates/vm/src/stdlib/collections.rs b/crates/vm/src/stdlib/collections.rs index 80f80e2d28f..2807e171777 100644 --- a/crates/vm/src/stdlib/collections.rs +++ b/crates/vm/src/stdlib/collections.rs @@ -56,7 +56,7 @@ mod _collections { } #[pyclass( - flags(BASETYPE), + flags(BASETYPE, HAS_WEAKREF), with( Constructor, Initializer, diff --git a/crates/vm/src/stdlib/itertools.rs b/crates/vm/src/stdlib/itertools.rs index d1af433d7dc..763c3ddce76 100644 --- a/crates/vm/src/stdlib/itertools.rs +++ b/crates/vm/src/stdlib/itertools.rs @@ -667,7 +667,7 @@ mod decl { groupby: PyRef, } - #[pyclass(with(IterNext, Iterable))] + #[pyclass(with(IterNext, Iterable), flags(HAS_WEAKREF))] impl PyItertoolsGrouper {} impl SelfIter for PyItertoolsGrouper {} From de37c016eda3bcb635ebf4573abda1e182c63a61 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 04:46:34 +0900 Subject: [PATCH 4/6] Add HAS_WEAKREF to code, union, partial, lock, IO, mmap, sre, sqlite3, typevar types Add HAS_WEAKREF flag to built-in types that support weakref: - PyCode, PyUnion, PyPartial, Lock, RLock - All IO base/concrete classes (_IOBase, _RawIOBase, _BufferedIOBase, _TextIOBase, BufferedReader, BufferedWriter, BufferedRandom, BufferedRWPair, TextIOWrapper, StringIO, BytesIO, FileIO, WindowsConsoleIO) - PyMmap, sre Pattern, sqlite3 Connection/Cursor - TypeVar, ParamSpec, ParamSpecArgs, ParamSpecKwargs, TypeVarTuple Remove 3 expectedFailure markers from test_descr for now-passing tests. --- Lib/test/test_descr.py | 3 -- crates/stdlib/src/_sqlite3.rs | 7 +++- crates/stdlib/src/mmap.rs | 2 +- crates/stdlib/src/re.rs | 2 +- crates/vm/src/builtins/code.rs | 2 +- crates/vm/src/builtins/object.rs | 4 ++ crates/vm/src/builtins/union.rs | 2 +- crates/vm/src/object/core.rs | 62 +++++++++++++++++++++++-------- crates/vm/src/object/ext.rs | 15 +++++++- crates/vm/src/stdlib/functools.rs | 2 +- crates/vm/src/stdlib/io.rs | 32 +++++++++------- crates/vm/src/stdlib/sre.rs | 2 +- crates/vm/src/stdlib/thread.rs | 4 +- crates/vm/src/stdlib/typevar.rs | 19 +++++++--- 14 files changed, 111 insertions(+), 47 deletions(-) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index c948d156cdb..1f7c5452c4d 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1321,7 +1321,6 @@ class X(object): with self.assertRaisesRegex(AttributeError, "'X' object has no attribute 'a'"): X().a - @unittest.expectedFailure # TODO: RUSTPYTHON def test_slots_special(self): # Testing __dict__ and __weakref__ in __slots__... class D(object): @@ -2294,7 +2293,6 @@ def __contains__(self, value): self.assertIn(i, p10) self.assertNotIn(10, p10) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_weakrefs(self): # Testing weak references... import weakref @@ -3976,7 +3974,6 @@ def __init__(self, x): o = trash(o) del o - @unittest.expectedFailure # TODO: RUSTPYTHON def test_slots_multiple_inheritance(self): # SF bug 575229, multiple inheritance w/ slots dumps core class A(object): diff --git a/crates/stdlib/src/_sqlite3.rs b/crates/stdlib/src/_sqlite3.rs index 971c4ec13ac..029883e2eb8 100644 --- a/crates/stdlib/src/_sqlite3.rs +++ b/crates/stdlib/src/_sqlite3.rs @@ -976,7 +976,7 @@ mod _sqlite3 { } } - #[pyclass(with(Constructor, Callable, Initializer), flags(BASETYPE))] + #[pyclass(with(Constructor, Callable, Initializer), flags(BASETYPE, HAS_WEAKREF))] impl Connection { fn drop_db(&self) { self.db.lock().take(); @@ -1629,7 +1629,10 @@ mod _sqlite3 { size: Option, } - #[pyclass(with(Constructor, Initializer, IterNext, Iterable), flags(BASETYPE))] + #[pyclass( + with(Constructor, Initializer, IterNext, Iterable), + flags(BASETYPE, HAS_WEAKREF) + )] impl Cursor { fn new( connection: PyRef, diff --git a/crates/stdlib/src/mmap.rs b/crates/stdlib/src/mmap.rs index c9a6be3b392..6924b700caf 100644 --- a/crates/stdlib/src/mmap.rs +++ b/crates/stdlib/src/mmap.rs @@ -851,7 +851,7 @@ mod mmap { #[pyclass( with(Constructor, AsMapping, AsSequence, AsBuffer, Representable), - flags(BASETYPE) + flags(BASETYPE, HAS_WEAKREF) )] impl PyMmap { fn as_bytes_mut(&self) -> BorrowedValueMut<'_, [u8]> { diff --git a/crates/stdlib/src/re.rs b/crates/stdlib/src/re.rs index fdb14d427fc..c72039f10c5 100644 --- a/crates/stdlib/src/re.rs +++ b/crates/stdlib/src/re.rs @@ -317,7 +317,7 @@ mod re { #[pyfunction] fn purge(_vm: &VirtualMachine) {} - #[pyclass] + #[pyclass(flags(HAS_WEAKREF))] impl PyPattern { #[pymethod(name = "match")] fn match_(&self, text: PyStrRef) -> Option { diff --git a/crates/vm/src/builtins/code.rs b/crates/vm/src/builtins/code.rs index 3a9ccc35637..a41bc5b03b0 100644 --- a/crates/vm/src/builtins/code.rs +++ b/crates/vm/src/builtins/code.rs @@ -597,7 +597,7 @@ impl Constructor for PyCode { } } -#[pyclass(with(Representable, Constructor))] +#[pyclass(with(Representable, Constructor), flags(HAS_WEAKREF))] impl PyCode { #[pygetset] const fn co_posonlyargcount(&self) -> usize { diff --git a/crates/vm/src/builtins/object.rs b/crates/vm/src/builtins/object.rs index 8fed43cd5d7..8ed4c5a7ff1 100644 --- a/crates/vm/src/builtins/object.rs +++ b/crates/vm/src/builtins/object.rs @@ -464,6 +464,8 @@ impl PyBaseObject { if both_mutable || both_module { let has_dict = |typ: &Py| typ.slots.flags.has_feature(PyTypeFlags::HAS_DICT); + let has_weakref = + |typ: &Py| typ.slots.flags.has_feature(PyTypeFlags::HAS_WEAKREF); // Compare slots tuples let slots_equal = match ( current_cls @@ -484,6 +486,8 @@ impl PyBaseObject { if current_cls.slots.basicsize != cls.slots.basicsize || !slots_equal || has_dict(current_cls) != has_dict(&cls) + || has_weakref(current_cls) != has_weakref(&cls) + || current_cls.slots.member_count != cls.slots.member_count { return Err(vm.new_type_error(format!( "__class__ assignment: '{}' object layout differs from '{}'", diff --git a/crates/vm/src/builtins/union.rs b/crates/vm/src/builtins/union.rs index 830465e49f5..639ea3036e3 100644 --- a/crates/vm/src/builtins/union.rs +++ b/crates/vm/src/builtins/union.rs @@ -98,7 +98,7 @@ impl PyUnion { } #[pyclass( - flags(DISALLOW_INSTANTIATION), + flags(DISALLOW_INSTANTIATION, HAS_WEAKREF), with(Hashable, Comparable, AsMapping, AsNumber, Representable) )] impl PyUnion { diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index 0486df56acc..240ea7059bf 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -257,8 +257,6 @@ bitflags::bitflags! { const SHARED_INLINE = 1 << 5; /// Use deferred reference counting const DEFERRED = 1 << 6; - /// Object has ObjExt prefix (dict, weak_list, slots) - const HAS_EXT = 1 << 7; } } @@ -324,9 +322,15 @@ impl fmt::Debug for ObjExt { } /// Precomputed offset from PyInner pointer back to ObjExt prefix. -/// Both ObjExt and PyInner are #[repr(C)] with 8-byte alignment, -/// so the offset equals size_of::() with no padding. +/// ObjExt is #[repr(C, align(8))] and PyInner is #[repr(C)], so as long as +/// ObjExt's alignment >= PyInner's alignment, Layout::extend adds no padding +/// and the offset equals size_of::(). const EXT_OFFSET: usize = core::mem::size_of::(); +// Guarantee: ObjExt size is a multiple of its alignment, and its alignment +// is >= any PyInner alignment, so Layout::extend produces no inter-padding. +const _: () = + assert!(core::mem::size_of::().is_multiple_of(core::mem::align_of::())); +const _: () = assert!(core::mem::align_of::() >= core::mem::align_of::>()); /// This is an actual python object. It consists of a `typ` which is the /// python class, and carries some rust payload optionally. This rust @@ -350,16 +354,39 @@ pub(super) struct PyInner { pub(crate) const SIZEOF_PYOBJECT_HEAD: usize = core::mem::size_of::>(); impl PyInner { + /// Check if this object has an ObjExt prefix by examining type flags. + /// Equivalent to Py_TPFLAGS_PREHEADER (MANAGED_DICT | MANAGED_WEAKREF) + /// plus member slots. + /// + /// Uses raw pointer operations to read type flags without creating a + /// shared reference. This avoids Stacked Borrows violations during + /// bootstrap when type objects are mutated through raw pointers. + #[inline(always)] + fn has_ext(&self) -> bool { + use crate::types::PyTypeFlags; + let typ_ptr = self.typ.load_raw(); + unsafe { + let inner_ptr = typ_ptr as *const PyInner; + let flags = core::ptr::addr_of!((*inner_ptr).payload.slots.flags).read(); + let member_count = core::ptr::addr_of!((*inner_ptr).payload.slots.member_count).read(); + flags.has_feature(PyTypeFlags::HAS_DICT) + || flags.has_feature(PyTypeFlags::HAS_WEAKREF) + || member_count > 0 + } + } + /// Access the ObjExt prefix at a negative offset from this PyInner. /// Returns None if this object was allocated without the prefix. /// + /// Uses type flags (HAS_DICT, HAS_WEAKREF, member_count) to determine + /// if the prefix exists, matching CPython's Py_TPFLAGS_PREHEADER approach. + /// /// Uses exposed provenance to reconstruct a pointer covering the entire /// allocation (ObjExt prefix + PyInner). The allocation pointer's provenance /// is exposed at allocation time via `expose_provenance()`. #[inline(always)] pub(super) fn ext_ref(&self) -> Option<&ObjExt> { - if !GcBits::from_bits_retain(self.gc_bits.load(Ordering::Relaxed)).contains(GcBits::HAS_EXT) - { + if !self.has_ext() { return None; } let self_addr = (self as *const Self as *const u8).addr(); @@ -906,10 +933,7 @@ impl PyInner { /// `ptr` must be a valid pointer from `PyInner::new` and must not be used after this call. unsafe fn dealloc(ptr: *mut Self) { unsafe { - let has_ext = GcBits::from_bits_retain((*ptr).gc_bits.load(Ordering::Relaxed)) - .contains(GcBits::HAS_EXT); - - if has_ext { + if (*ptr).has_ext() { let ext_layout = core::alloc::Layout::new::(); let inner_layout = core::alloc::Layout::new::(); let (combined, inner_offset) = ext_layout.extend(inner_layout).unwrap(); @@ -934,7 +958,10 @@ impl PyInner { /// For objects with ext, the allocation layout is: [ObjExt][PyInner] fn new(payload: T, typ: PyTypeRef, dict: Option) -> *mut Self { let member_count = typ.slots.member_count; - let needs_ext = dict.is_some() + let needs_ext = typ + .slots + .flags + .has_feature(crate::types::PyTypeFlags::HAS_DICT) || typ .slots .flags @@ -962,7 +989,7 @@ impl PyInner { inner_ptr.write(Self { ref_count: RefCount::new(), vtable: PyObjVTable::of::(), - gc_bits: Radium::new(GcBits::HAS_EXT.bits()), + gc_bits: Radium::new(0), gc_generation: Radium::new(GC_UNTRACKED), gc_pointers: Pointers::new(), typ: PyAtomicRef::from(typ), @@ -1211,6 +1238,7 @@ impl PyObject { /// Returns the first weakref in the weakref list, if any. pub(crate) fn get_weakrefs(&self) -> Option { let wrl = self.weak_ref_list()?; + let _lock = weakref_lock::lock(self as *const PyObject as usize); let head_ptr = wrl.head.load(Ordering::Relaxed); if head_ptr.is_null() { None @@ -1642,8 +1670,12 @@ impl PyObject { unsafe { clear_fn(ptr, &mut result) }; } - // 2. Clear member slots (subtype_clear) + // 2. Clear dict and member slots (subtype_clear) if let Some(ext) = obj.0.ext_ref() { + if let Some(dict) = ext.dict.as_ref() { + let dict_ref = dict.get(); + result.push(dict_ref.into()); + } for slot in ext.slots.iter() { if let Some(val) = slot.write().take() { result.push(val); @@ -2312,7 +2344,7 @@ pub(crate) fn init_type_hierarchy() -> (PyTypeRef, PyTypeRef, PyTypeRef) { PyInner:: { ref_count: RefCount::new(), vtable: PyObjVTable::of::(), - gc_bits: Radium::new(GcBits::HAS_EXT.bits()), + gc_bits: Radium::new(0), gc_generation: Radium::new(GC_UNTRACKED), gc_pointers: Pointers::new(), payload: type_payload, @@ -2327,7 +2359,7 @@ pub(crate) fn init_type_hierarchy() -> (PyTypeRef, PyTypeRef, PyTypeRef) { PyInner:: { ref_count: RefCount::new(), vtable: PyObjVTable::of::(), - gc_bits: Radium::new(GcBits::HAS_EXT.bits()), + gc_bits: Radium::new(0), gc_generation: Radium::new(GC_UNTRACKED), gc_pointers: Pointers::new(), payload: object_payload, diff --git a/crates/vm/src/object/ext.rs b/crates/vm/src/object/ext.rs index 0fd251499f1..cc3a47c2dbd 100644 --- a/crates/vm/src/object/ext.rs +++ b/crates/vm/src/object/ext.rs @@ -289,8 +289,12 @@ impl fmt::Debug for PyAtomicRef { impl From> for PyAtomicRef { fn from(pyref: PyRef) -> Self { let py = PyRef::leak(pyref); + let ptr = py as *const _ as *mut u8; + // Expose provenance so we can re-derive via with_exposed_provenance + // without Stacked Borrows tag restrictions during bootstrap + ptr.expose_provenance(); Self { - inner: Radium::new(py as *const _ as *mut _), + inner: Radium::new(ptr), _phantom: Default::default(), } } @@ -311,6 +315,15 @@ impl Deref for PyAtomicRef { } impl PyAtomicRef { + /// Load the raw pointer without creating a reference. + /// Uses exposed provenance to avoid Stacked Borrows violations + /// when the pointed-to object may have been mutated through raw pointers. + #[inline(always)] + pub(crate) fn load_raw(&self) -> *const Py { + let addr = self.inner.load(Ordering::Relaxed).addr(); + core::ptr::with_exposed_provenance(addr) + } + /// # Safety /// The caller is responsible to keep the returned PyRef alive /// until no more reference can be used via PyAtomicRef::deref() diff --git a/crates/vm/src/stdlib/functools.rs b/crates/vm/src/stdlib/functools.rs index 2c3f70ab52a..76012deb9ff 100644 --- a/crates/vm/src/stdlib/functools.rs +++ b/crates/vm/src/stdlib/functools.rs @@ -140,7 +140,7 @@ mod _functools { #[pyclass( with(Constructor, Callable, GetDescriptor, Representable), - flags(BASETYPE, HAS_DICT) + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) )] impl PyPartial { #[pygetset] diff --git a/crates/vm/src/stdlib/io.rs b/crates/vm/src/stdlib/io.rs index 945042bc9e4..052937f9f8d 100644 --- a/crates/vm/src/stdlib/io.rs +++ b/crates/vm/src/stdlib/io.rs @@ -413,7 +413,10 @@ mod _io { #[derive(Debug, Default, PyPayload)] pub struct _IOBase; - #[pyclass(with(IterNext, Iterable, Destructor), flags(BASETYPE, HAS_DICT))] + #[pyclass( + with(IterNext, Iterable, Destructor), + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) + )] impl _IOBase { #[pymethod] fn seek( @@ -634,7 +637,7 @@ mod _io { #[repr(transparent)] pub(super) struct _RawIOBase(_IOBase); - #[pyclass(flags(BASETYPE, HAS_DICT))] + #[pyclass(flags(BASETYPE, HAS_DICT, HAS_WEAKREF))] impl _RawIOBase { #[pymethod] fn read(instance: PyObjectRef, size: OptionalSize, vm: &VirtualMachine) -> PyResult { @@ -720,7 +723,7 @@ mod _io { #[repr(transparent)] struct _BufferedIOBase(_IOBase); - #[pyclass(flags(BASETYPE))] + #[pyclass(flags(BASETYPE, HAS_WEAKREF))] impl _BufferedIOBase { #[pymethod] fn read(zelf: PyObjectRef, _size: OptionalArg, vm: &VirtualMachine) -> PyResult { @@ -785,7 +788,7 @@ mod _io { #[repr(transparent)] struct _TextIOBase(_IOBase); - #[pyclass(flags(BASETYPE))] + #[pyclass(flags(BASETYPE, HAS_WEAKREF))] impl _TextIOBase { #[pygetset] fn encoding(_zelf: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef { @@ -1981,7 +1984,7 @@ mod _io { #[pyclass( with(Constructor, BufferedMixin, BufferedReadable, Destructor), - flags(BASETYPE, HAS_DICT) + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) )] impl BufferedReader {} @@ -2085,7 +2088,7 @@ mod _io { #[pyclass( with(Constructor, BufferedMixin, BufferedWritable, Destructor), - flags(BASETYPE, HAS_DICT) + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) )] impl BufferedWriter {} @@ -2159,7 +2162,7 @@ mod _io { BufferedWritable, Destructor ), - flags(BASETYPE, HAS_DICT) + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) )] impl BufferedRandom {} @@ -2229,7 +2232,7 @@ mod _io { BufferedWritable, Destructor ), - flags(BASETYPE, HAS_DICT) + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) )] impl BufferedRWPair { #[pymethod] @@ -3015,7 +3018,7 @@ mod _io { IterNext, Representable ), - flags(BASETYPE) + flags(BASETYPE, HAS_WEAKREF) )] impl TextIOWrapper { #[pymethod] @@ -4376,7 +4379,7 @@ mod _io { } } - #[pyclass(flags(BASETYPE, HAS_DICT), with(Constructor, Initializer))] + #[pyclass(flags(BASETYPE, HAS_DICT, HAS_WEAKREF), with(Constructor, Initializer))] impl StringIO { #[pymethod] const fn readable(&self) -> bool { @@ -4593,7 +4596,10 @@ mod _io { } } - #[pyclass(flags(BASETYPE, HAS_DICT), with(PyRef, Constructor, Initializer))] + #[pyclass( + flags(BASETYPE, HAS_DICT, HAS_WEAKREF), + with(PyRef, Constructor, Initializer) + )] impl BytesIO { #[pymethod] const fn readable(&self) -> bool { @@ -5634,7 +5640,7 @@ mod fileio { #[pyclass( with(Constructor, Initializer, Representable, Destructor), - flags(BASETYPE, HAS_DICT) + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) )] impl FileIO { fn io_error( @@ -6390,7 +6396,7 @@ mod winconsoleio { #[pyclass( with(Constructor, Initializer, Representable, Destructor), - flags(BASETYPE, HAS_DICT) + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) )] impl WindowsConsoleIO { #[allow(dead_code)] diff --git a/crates/vm/src/stdlib/sre.rs b/crates/vm/src/stdlib/sre.rs index 2c18bab4ba1..ba7044fb5a9 100644 --- a/crates/vm/src/stdlib/sre.rs +++ b/crates/vm/src/stdlib/sre.rs @@ -212,7 +212,7 @@ mod _sre { }; } - #[pyclass(with(Hashable, Comparable, Representable))] + #[pyclass(with(Hashable, Comparable, Representable), flags(HAS_WEAKREF))] impl Pattern { fn with_str(string: &PyObject, vm: &VirtualMachine, f: F) -> PyResult where diff --git a/crates/vm/src/stdlib/thread.rs b/crates/vm/src/stdlib/thread.rs index 45be328dc1e..bf22cb3c9c6 100644 --- a/crates/vm/src/stdlib/thread.rs +++ b/crates/vm/src/stdlib/thread.rs @@ -132,7 +132,7 @@ pub(crate) mod _thread { } } - #[pyclass(with(Constructor, Representable))] + #[pyclass(with(Constructor, Representable), flags(HAS_WEAKREF))] impl Lock { #[pymethod] #[pymethod(name = "acquire_lock")] @@ -205,7 +205,7 @@ pub(crate) mod _thread { } } - #[pyclass(with(Representable), flags(BASETYPE))] + #[pyclass(with(Representable), flags(BASETYPE, HAS_WEAKREF))] impl RLock { #[pyslot] fn slot_new(cls: PyTypeRef, _args: FuncArgs, vm: &VirtualMachine) -> PyResult { diff --git a/crates/vm/src/stdlib/typevar.rs b/crates/vm/src/stdlib/typevar.rs index d0bd3f5666d..0993d466e74 100644 --- a/crates/vm/src/stdlib/typevar.rs +++ b/crates/vm/src/stdlib/typevar.rs @@ -94,7 +94,10 @@ pub(crate) mod typevar { contravariant: bool, infer_variance: bool, } - #[pyclass(flags(HAS_DICT), with(AsNumber, Constructor, Representable))] + #[pyclass( + flags(HAS_DICT, HAS_WEAKREF), + with(AsNumber, Constructor, Representable) + )] impl TypeVar { #[pymethod] fn __mro_entries__(&self, _bases: PyObjectRef, vm: &VirtualMachine) -> PyResult { @@ -461,7 +464,10 @@ pub(crate) mod typevar { infer_variance: bool, } - #[pyclass(flags(HAS_DICT), with(AsNumber, Constructor, Representable))] + #[pyclass( + flags(HAS_DICT, HAS_WEAKREF), + with(AsNumber, Constructor, Representable) + )] impl ParamSpec { #[pymethod] fn __mro_entries__(&self, _bases: PyObjectRef, vm: &VirtualMachine) -> PyResult { @@ -713,7 +719,10 @@ pub(crate) mod typevar { default_value: PyMutex, evaluate_default: PyMutex, } - #[pyclass(flags(HAS_DICT), with(Constructor, Representable, Iterable))] + #[pyclass( + flags(HAS_DICT, HAS_WEAKREF), + with(Constructor, Representable, Iterable) + )] impl TypeVarTuple { #[pygetset] fn __name__(&self) -> PyObjectRef { @@ -883,7 +892,7 @@ pub(crate) mod typevar { pub struct ParamSpecArgs { __origin__: PyObjectRef, } - #[pyclass(with(Constructor, Representable, Comparable))] + #[pyclass(with(Constructor, Representable, Comparable), flags(HAS_WEAKREF))] impl ParamSpecArgs { #[pymethod] fn __mro_entries__(&self, _bases: PyObjectRef, vm: &VirtualMachine) -> PyResult { @@ -946,7 +955,7 @@ pub(crate) mod typevar { pub struct ParamSpecKwargs { __origin__: PyObjectRef, } - #[pyclass(with(Constructor, Representable, Comparable))] + #[pyclass(with(Constructor, Representable, Comparable), flags(HAS_WEAKREF))] impl ParamSpecKwargs { #[pymethod] fn __mro_entries__(&self, _bases: PyObjectRef, vm: &VirtualMachine) -> PyResult { From fb20298bfea2b586a3b17bc8f876b78755255c6e Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 21:39:37 +0900 Subject: [PATCH 5/6] Add HAS_DICT to type flags and handle non-METHOD/CLASS in descr_get - Add HAS_DICT flag to PyType (type metaclass) alongside existing HAS_WEAKREF. All type objects are instances of type and need dict support, matching CPython's PyType_Type. - Replace unimplemented!() in PyMethodDescriptor::descr_get with fallback to bind obj directly, matching CPython's method_get which uses PyCFunction_NewEx for non-METH_METHOD methods. --- crates/vm/src/builtins/descriptor.rs | 3 +-- crates/vm/src/builtins/type.rs | 2 +- crates/vm/src/object/core.rs | 40 +++++++++------------------- crates/vm/src/object/ext.rs | 9 ------- 4 files changed, 15 insertions(+), 39 deletions(-) diff --git a/crates/vm/src/builtins/descriptor.rs b/crates/vm/src/builtins/descriptor.rs index 05e819a56e9..fe07017f9aa 100644 --- a/crates/vm/src/builtins/descriptor.rs +++ b/crates/vm/src/builtins/descriptor.rs @@ -88,13 +88,12 @@ impl GetDescriptor for PyMethodDescriptor { } else if descr.method.flags.contains(PyMethodFlags::CLASS) { obj.class().to_owned().into() } else { - unimplemented!() + obj } } None if descr.method.flags.contains(PyMethodFlags::CLASS) => cls.unwrap(), None => return Ok(zelf), }; - // Ok(descr.method.build_bound_method(&vm.ctx, bound, class).into()) Ok(descr.bind(bound, &vm.ctx).into()) } } diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index ae97ce6520e..72eebe9f30f 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -1094,7 +1094,7 @@ impl Py { AsNumber, Representable ), - flags(BASETYPE, HAS_WEAKREF) + flags(BASETYPE, HAS_DICT, HAS_WEAKREF) )] impl PyType { #[pygetset] diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index 240ea7059bf..9d9cbbf2839 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -257,6 +257,8 @@ bitflags::bitflags! { const SHARED_INLINE = 1 << 5; /// Use deferred reference counting const DEFERRED = 1 << 6; + /// Object has ObjExt prefix allocation + const HAS_EXT = 1 << 7; } } @@ -354,33 +356,16 @@ pub(super) struct PyInner { pub(crate) const SIZEOF_PYOBJECT_HEAD: usize = core::mem::size_of::>(); impl PyInner { - /// Check if this object has an ObjExt prefix by examining type flags. - /// Equivalent to Py_TPFLAGS_PREHEADER (MANAGED_DICT | MANAGED_WEAKREF) - /// plus member slots. - /// - /// Uses raw pointer operations to read type flags without creating a - /// shared reference. This avoids Stacked Borrows violations during - /// bootstrap when type objects are mutated through raw pointers. + /// Check if this object has an ObjExt prefix. + /// Uses the per-instance HAS_EXT bit in gc_bits, set at allocation time. #[inline(always)] fn has_ext(&self) -> bool { - use crate::types::PyTypeFlags; - let typ_ptr = self.typ.load_raw(); - unsafe { - let inner_ptr = typ_ptr as *const PyInner; - let flags = core::ptr::addr_of!((*inner_ptr).payload.slots.flags).read(); - let member_count = core::ptr::addr_of!((*inner_ptr).payload.slots.member_count).read(); - flags.has_feature(PyTypeFlags::HAS_DICT) - || flags.has_feature(PyTypeFlags::HAS_WEAKREF) - || member_count > 0 - } + GcBits::from_bits_retain(self.gc_bits.load(Ordering::Relaxed)).contains(GcBits::HAS_EXT) } /// Access the ObjExt prefix at a negative offset from this PyInner. /// Returns None if this object was allocated without the prefix. /// - /// Uses type flags (HAS_DICT, HAS_WEAKREF, member_count) to determine - /// if the prefix exists, matching CPython's Py_TPFLAGS_PREHEADER approach. - /// /// Uses exposed provenance to reconstruct a pointer covering the entire /// allocation (ObjExt prefix + PyInner). The allocation pointer's provenance /// is exposed at allocation time via `expose_provenance()`. @@ -958,10 +943,11 @@ impl PyInner { /// For objects with ext, the allocation layout is: [ObjExt][PyInner] fn new(payload: T, typ: PyTypeRef, dict: Option) -> *mut Self { let member_count = typ.slots.member_count; - let needs_ext = typ - .slots - .flags - .has_feature(crate::types::PyTypeFlags::HAS_DICT) + let needs_ext = dict.is_some() + || typ + .slots + .flags + .has_feature(crate::types::PyTypeFlags::HAS_DICT) || typ .slots .flags @@ -989,7 +975,7 @@ impl PyInner { inner_ptr.write(Self { ref_count: RefCount::new(), vtable: PyObjVTable::of::(), - gc_bits: Radium::new(0), + gc_bits: Radium::new(GcBits::HAS_EXT.bits()), gc_generation: Radium::new(GC_UNTRACKED), gc_pointers: Pointers::new(), typ: PyAtomicRef::from(typ), @@ -2344,7 +2330,7 @@ pub(crate) fn init_type_hierarchy() -> (PyTypeRef, PyTypeRef, PyTypeRef) { PyInner:: { ref_count: RefCount::new(), vtable: PyObjVTable::of::(), - gc_bits: Radium::new(0), + gc_bits: Radium::new(GcBits::HAS_EXT.bits()), gc_generation: Radium::new(GC_UNTRACKED), gc_pointers: Pointers::new(), payload: type_payload, @@ -2359,7 +2345,7 @@ pub(crate) fn init_type_hierarchy() -> (PyTypeRef, PyTypeRef, PyTypeRef) { PyInner:: { ref_count: RefCount::new(), vtable: PyObjVTable::of::(), - gc_bits: Radium::new(0), + gc_bits: Radium::new(GcBits::HAS_EXT.bits()), gc_generation: Radium::new(GC_UNTRACKED), gc_pointers: Pointers::new(), payload: object_payload, diff --git a/crates/vm/src/object/ext.rs b/crates/vm/src/object/ext.rs index cc3a47c2dbd..933ed4d61d1 100644 --- a/crates/vm/src/object/ext.rs +++ b/crates/vm/src/object/ext.rs @@ -315,15 +315,6 @@ impl Deref for PyAtomicRef { } impl PyAtomicRef { - /// Load the raw pointer without creating a reference. - /// Uses exposed provenance to avoid Stacked Borrows violations - /// when the pointed-to object may have been mutated through raw pointers. - #[inline(always)] - pub(crate) fn load_raw(&self) -> *const Py { - let addr = self.inner.load(Ordering::Relaxed).addr(); - core::ptr::with_exposed_provenance(addr) - } - /// # Safety /// The caller is responsible to keep the returned PyRef alive /// until no more reference can be used via PyAtomicRef::deref() From a2fe158a9734781f26d448218f4bc803646763f9 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 4 Mar 2026 22:38:15 +0900 Subject: [PATCH 6/6] Fix ext detection, HeapMethodDef ownership, WASM error - Remove HAS_EXT gc_bits flag; detect ext from type flags using raw pointer reads to avoid Stacked Borrows violations - Store HeapMethodDef owner in payload instead of dict hack - Clear dict entries in gc_clear_raw to break cycles - Add WASM error fallback when exception serialization fails --- .cspell.dict/rust-more.txt | 4 +-- crates/vm/src/builtins/builtin_func.rs | 2 ++ crates/vm/src/builtins/descriptor.rs | 3 ++ crates/vm/src/function/method.rs | 20 ++++++------- crates/vm/src/object/core.rs | 41 +++++++++++++++++--------- crates/vm/src/object/ext.rs | 8 +++++ crates/wasm/src/convert.rs | 10 ++++++- 7 files changed, 61 insertions(+), 27 deletions(-) diff --git a/.cspell.dict/rust-more.txt b/.cspell.dict/rust-more.txt index c3ebd61833a..af20aef568d 100644 --- a/.cspell.dict/rust-more.txt +++ b/.cspell.dict/rust-more.txt @@ -6,6 +6,7 @@ bindgen bitand bitflags bitor +bitvec bitxor bstr byteorder @@ -58,6 +59,7 @@ powi prepended punct replacen +retag rmatch rposition rsplitn @@ -89,5 +91,3 @@ widestring winapi winresource winsock -bitvec -Bitvec diff --git a/crates/vm/src/builtins/builtin_func.rs b/crates/vm/src/builtins/builtin_func.rs index de5ff1d6878..1326febd000 100644 --- a/crates/vm/src/builtins/builtin_func.rs +++ b/crates/vm/src/builtins/builtin_func.rs @@ -16,6 +16,8 @@ pub struct PyNativeFunction { pub(crate) value: &'static PyMethodDef, pub(crate) zelf: Option, pub(crate) module: Option<&'static PyStrInterned>, // None for bound method + /// Prevent HeapMethodDef from being freed while this function references it + pub(crate) _method_def_owner: Option, } impl PyPayload for PyNativeFunction { diff --git a/crates/vm/src/builtins/descriptor.rs b/crates/vm/src/builtins/descriptor.rs index fe07017f9aa..45a62b7a8e7 100644 --- a/crates/vm/src/builtins/descriptor.rs +++ b/crates/vm/src/builtins/descriptor.rs @@ -37,6 +37,8 @@ pub struct PyMethodDescriptor { pub method: &'static PyMethodDef, // vectorcall: vector_call_func, pub objclass: &'static Py, // TODO: move to tp_members + /// Prevent HeapMethodDef from being freed while this descriptor references it + pub(crate) _method_def_owner: Option, } impl PyMethodDescriptor { @@ -49,6 +51,7 @@ impl PyMethodDescriptor { }, method, objclass: typ, + _method_def_owner: None, } } } diff --git a/crates/vm/src/function/method.rs b/crates/vm/src/function/method.rs index 211f7e3adc5..f2affef905d 100644 --- a/crates/vm/src/function/method.rs +++ b/crates/vm/src/function/method.rs @@ -123,6 +123,7 @@ impl PyMethodDef { zelf: None, value: self, module: None, + _method_def_owner: None, } } @@ -144,6 +145,7 @@ impl PyMethodDef { zelf: Some(obj), value: self, module: None, + _method_def_owner: None, }, class, } @@ -162,6 +164,7 @@ impl PyMethodDef { zelf: Some(obj), value: self, module: None, + _method_def_owner: None, }; PyRef::new_ref( function, @@ -217,6 +220,7 @@ impl PyMethodDef { zelf: Some(class.to_owned().into()), value: self, module: None, + _method_def_owner: None, }; PyNativeMethod { func, class }.into_ref(ctx) } @@ -293,14 +297,12 @@ impl Py { } pub fn build_function(&self, vm: &VirtualMachine) -> PyRef { - let function = unsafe { self.method() }.to_function(); - let dict = vm.ctx.new_dict(); - dict.set_item("__method_def__", self.to_owned().into(), vm) - .unwrap(); + let mut function = unsafe { self.method() }.to_function(); + function._method_def_owner = Some(self.to_owned().into()); PyRef::new_ref( function, vm.ctx.types.builtin_function_or_method_type.to_owned(), - Some(dict), + None, ) } @@ -309,14 +311,12 @@ impl Py { class: &'static Py, vm: &VirtualMachine, ) -> PyRef { - let function = unsafe { self.method() }.to_method(class, &vm.ctx); - let dict = vm.ctx.new_dict(); - dict.set_item("__method_def__", self.to_owned().into(), vm) - .unwrap(); + let mut function = unsafe { self.method() }.to_method(class, &vm.ctx); + function._method_def_owner = Some(self.to_owned().into()); PyRef::new_ref( function, vm.ctx.types.method_descriptor_type.to_owned(), - Some(dict), + None, ) } } diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index 9d9cbbf2839..4198c00be74 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -17,7 +17,7 @@ use super::{ }; use crate::object::traverse_object::PyObjVTable; use crate::{ - builtins::{PyDictRef, PyType, PyTypeRef}, + builtins::{PyDict, PyDictRef, PyType, PyTypeRef}, common::{ atomic::{Ordering, PyAtomic, Radium}, linked_list::{Link, Pointers}, @@ -257,8 +257,6 @@ bitflags::bitflags! { const SHARED_INLINE = 1 << 5; /// Use deferred reference counting const DEFERRED = 1 << 6; - /// Object has ObjExt prefix allocation - const HAS_EXT = 1 << 7; } } @@ -356,11 +354,20 @@ pub(super) struct PyInner { pub(crate) const SIZEOF_PYOBJECT_HEAD: usize = core::mem::size_of::>(); impl PyInner { - /// Check if this object has an ObjExt prefix. - /// Uses the per-instance HAS_EXT bit in gc_bits, set at allocation time. + /// Check if this object has an ObjExt prefix based on type flags. + /// Uses raw pointer reads to avoid Stacked Borrows violations during bootstrap, + /// where type objects have self-referential typ pointers that may be mutated. #[inline(always)] fn has_ext(&self) -> bool { - GcBits::from_bits_retain(self.gc_bits.load(Ordering::Relaxed)).contains(GcBits::HAS_EXT) + // Read slots via raw pointers only — creating a &Py reference + // would retag the entire object, conflicting with &mut writes during bootstrap. + let typ_ptr = self.typ.load_raw(); + let slots = unsafe { core::ptr::addr_of!((*typ_ptr).0.payload.slots) }; + let flags = unsafe { core::ptr::addr_of!((*slots).flags).read() }; + let member_count = unsafe { core::ptr::addr_of!((*slots).member_count).read() }; + flags.has_feature(crate::types::PyTypeFlags::HAS_DICT) + || flags.has_feature(crate::types::PyTypeFlags::HAS_WEAKREF) + || member_count > 0 } /// Access the ObjExt prefix at a negative offset from this PyInner. @@ -943,16 +950,20 @@ impl PyInner { /// For objects with ext, the allocation layout is: [ObjExt][PyInner] fn new(payload: T, typ: PyTypeRef, dict: Option) -> *mut Self { let member_count = typ.slots.member_count; - let needs_ext = dict.is_some() - || typ - .slots - .flags - .has_feature(crate::types::PyTypeFlags::HAS_DICT) + let needs_ext = typ + .slots + .flags + .has_feature(crate::types::PyTypeFlags::HAS_DICT) || typ .slots .flags .has_feature(crate::types::PyTypeFlags::HAS_WEAKREF) || member_count > 0; + debug_assert!( + needs_ext || dict.is_none(), + "dict passed to type '{}' without HAS_DICT flag", + typ.name() + ); if needs_ext { let ext_layout = core::alloc::Layout::new::(); @@ -975,7 +986,7 @@ impl PyInner { inner_ptr.write(Self { ref_count: RefCount::new(), vtable: PyObjVTable::of::(), - gc_bits: Radium::new(GcBits::HAS_EXT.bits()), + gc_bits: Radium::new(0), gc_generation: Radium::new(GC_UNTRACKED), gc_pointers: Pointers::new(), typ: PyAtomicRef::from(typ), @@ -1660,6 +1671,8 @@ impl PyObject { if let Some(ext) = obj.0.ext_ref() { if let Some(dict) = ext.dict.as_ref() { let dict_ref = dict.get(); + // Clear dict entries to break cycles, then collect the dict itself + PyDict::clear(&dict_ref); result.push(dict_ref.into()); } for slot in ext.slots.iter() { @@ -2330,7 +2343,7 @@ pub(crate) fn init_type_hierarchy() -> (PyTypeRef, PyTypeRef, PyTypeRef) { PyInner:: { ref_count: RefCount::new(), vtable: PyObjVTable::of::(), - gc_bits: Radium::new(GcBits::HAS_EXT.bits()), + gc_bits: Radium::new(0), gc_generation: Radium::new(GC_UNTRACKED), gc_pointers: Pointers::new(), payload: type_payload, @@ -2345,7 +2358,7 @@ pub(crate) fn init_type_hierarchy() -> (PyTypeRef, PyTypeRef, PyTypeRef) { PyInner:: { ref_count: RefCount::new(), vtable: PyObjVTable::of::(), - gc_bits: Radium::new(GcBits::HAS_EXT.bits()), + gc_bits: Radium::new(0), gc_generation: Radium::new(GC_UNTRACKED), gc_pointers: Pointers::new(), payload: object_payload, diff --git a/crates/vm/src/object/ext.rs b/crates/vm/src/object/ext.rs index 933ed4d61d1..11cee6af3ec 100644 --- a/crates/vm/src/object/ext.rs +++ b/crates/vm/src/object/ext.rs @@ -315,6 +315,14 @@ impl Deref for PyAtomicRef { } impl PyAtomicRef { + /// Load the raw pointer without creating a reference. + /// Avoids Stacked Borrows retag, safe for use during bootstrap + /// when type objects have self-referential pointers being mutated. + #[inline(always)] + pub(super) fn load_raw(&self) -> *const Py { + self.inner.load(Ordering::Relaxed).cast::>() + } + /// # Safety /// The caller is responsible to keep the returned PyRef alive /// until no more reference can be used via PyAtomicRef::deref() diff --git a/crates/wasm/src/convert.rs b/crates/wasm/src/convert.rs index a0186ce2834..bbf263975f3 100644 --- a/crates/wasm/src/convert.rs +++ b/crates/wasm/src/convert.rs @@ -49,7 +49,15 @@ pub fn py_err_to_js_err(vm: &VirtualMachine, py_err: &Py) -> Js serde_wasm_bindgen::to_value(&exceptions::SerializeException::new(vm, py_err)); match res { Ok(err_info) => PyError::new(err_info).into(), - Err(e) => e.into(), + Err(_) => { + // Fallback: create a basic JS Error with the exception type and message + let exc_type = py_err.class().name().to_string(); + let msg = match py_err.as_object().str(vm) { + Ok(s) => format!("{exc_type}: {s}"), + Err(_) => exc_type, + }; + js_sys::Error::new(&msg).into() + } } } }