diff --git a/dev/modules/moose_support.md b/dev/modules/moose_support.md index d2409d792..721e9a4f3 100644 --- a/dev/modules/moose_support.md +++ b/dev/modules/moose_support.md @@ -1991,6 +1991,113 @@ Tests fixed: - A handful of cmop/method introspection edge cases (constants, forward declarations, eval-defined subs). +## Phase D-W6.2: refcount drift investigation log (2026-04-29) + +This investigation builds on PR #599's "no class-name dispatch" rule +and PR #600's "remove the gate, find the drift sources" plan. + +### Reproducers shipped + +`src/test/resources/unit/refcount/drift/`: + +- **`sub_install.t`** — five sub-installation patterns (glob assign, + named sub, loop install, temp drop, nested install). All pass on + master AND with the walker gate disabled. +- **`closure_capture.t`** — five closure-capture patterns (single, + two-, three-, five-layer wrap, plus a 20-closure chain). All pass + on master AND with the walker gate disabled. +- **`hash_slot.t`** — four hash-slot patterns (direct, package + global, 50-entry registry, slot overwrite). All pass on master + AND with the walker gate disabled. + +`PJ_DESTROY_TRACE=1` is also wired into `DestroyDispatch.callDestroy` +to print every destroy with `Pkg::subname` for `RuntimeCode` (and +class name for blessed objects). Off by default; zero cost. + +### What the simple patterns prove + +The basic shapes of sub-install, closure-capture, and hash-slot all +have correct cooperative refCount semantics in PerlOnJava — strong +holds from package stashes, hash slots, and closure captures all +keep their referents alive without the walker gate. + +### Where the drift actually is + +`PJ_DESTROY_TRACE=1 ./jperl -e 'use Class::MOP'` (gate disabled) +fails with: + +``` +Can't call method "get_method" on an undefined value + at jar:PERL5LIB/Class/MOP/Attribute.pm line 475. +``` + +`Class::MOP::Attribute._remove_accessor` calls +`$class->get_method($accessor)` where `$class = $self->associated_class()`. +`associated_class` is a *weakened* ref. The weak ref reads as undef, +which means the metaclass it pointed at was destroyed. + +The trace shows `Class::MOP::Class@1424108509` destroyed **twice**: + +``` +[DESTROY] Class::MOP::Class@1424108509 refCount=-2147483648 + at MortalList.flush(line 585) + at anon1205.apply(.../Class/MOP/Class.pm:260) + ... +[DESTROY] Class::MOP::Class@1424108509 refCount=-2147483648 + at MortalList.drainPendingSince(line 659) + at DestroyDispatch.doCallDestroy(line 373) + at DestroyDispatch.callDestroy(line 266) + at MortalList.flush(line 585) + at anon1205.apply(.../Class/MOP/Class.pm:260) +``` + +Same identity-hash, two destroys. The metaclass instance was added +to the deferred-decrement queue twice (or processed twice during +cascading flush). + +### Conclusion + +D-W6.2 is **not** in the simple closure-capture path. The drift is +specifically in the **metaclass-instance lifecycle during `Class::MOP` +load**, where a Class::MOP::Class instance built up by +`_construct_class_instance` ends up double-decremented when: + +1. its `attach_to_class` path weakens an attribute's `associated_class` + ref to itself, AND +2. some intermediate scope-exit cleanup queues the instance for + deferred decrement that the cascading flush from another + destroy already drained. + +### Next concrete leads + +1. **Audit `MortalList.deferDecrementIfTracked`** for double-add: a + single `RuntimeBase` should never appear twice in the `pending` + list. Add an `IdentityHashMap`-based dedup at the deferred-add + point, or detect the second add and drop it. +2. **Audit `MortalList.drainPendingSince`** — the second destroy of + `Class::MOP::Class@1424108509` came through this path. If the + pending list contains an entry whose refCount has already been + zeroed (or marked MIN_VALUE), `drainPendingSince` should skip it. +3. **Audit `Class::MOP::Class.pm:260`** (the line emitting the + first destroy) — that's likely + `_construct_class_instance`'s last statement; figure out which + scope-exit puts the metaclass on the deferred queue. + +### What's deferred + +- D-W6.1 (sub-install drift): closed — the simple patterns work; the + observed Sub::Install destroys during bootstrap are *symptoms*, + not the root cause. +- D-W6.2 (closure-capture drift): closed — the simple patterns work + here too. +- **D-W6.4 (NEW) — pending-list double-add / metaclass lifecycle**: + the actual drift identified by the investigation. This is what + needs the next round of debugging. +- D-W6.3 (`@_` argument promotion): still pending; reproducer not + yet written. + + + ## Related Documents - [xs_fallback.md](xs_fallback.md) — XS fallback mechanism diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/DestroyDispatch.java b/src/main/java/org/perlonjava/runtime/runtimetypes/DestroyDispatch.java index 92400b081..f2a589662 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/DestroyDispatch.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/DestroyDispatch.java @@ -17,6 +17,12 @@ */ public class DestroyDispatch { + /** Phase D-W6 debug: enable destroy tracing via -Dperlonjava.destroyTrace=1 + * or env PJ_DESTROY_TRACE=1. */ + private static final boolean DESTROY_TRACE = + "1".equals(System.getProperty("perlonjava.destroyTrace")) + || "1".equals(System.getenv("PJ_DESTROY_TRACE")); + // BitSet indexed by |blessId| — set if the class defines DESTROY (or AUTOLOAD) private static final BitSet destroyClasses = new BitSet(); @@ -146,6 +152,24 @@ public static void invalidateCache() { public static void callDestroy(RuntimeBase referent) { // refCount is already MIN_VALUE (set by caller) + // Phase D-W6 debug: optional trace of every destroy call. + // Enable with -Dperlonjava.destroyTrace=1 (or env PJ_DESTROY_TRACE=1) + // to find refCount-drift sources. + if (DESTROY_TRACE) { + String klass = referent.blessId != 0 + ? NameNormalizer.getBlessStr(referent.blessId) + : referent.getClass().getSimpleName(); + String extra = ""; + if (referent instanceof RuntimeCode rc) { + extra = " name=" + (rc.packageName != null ? rc.packageName : "?") + + "::" + (rc.subName != null ? rc.subName : "(anon)"); + } + System.err.println("[DESTROY] " + klass + "@" + + System.identityHashCode(referent) + + " refCount=" + referent.refCount + extra); + new RuntimeException("destroy trace").printStackTrace(System.err); + } + // Phase 3 (refcount_alignment_plan.md): Re-entry guard. // If this object is already inside its own DESTROY body, a transient // decrement-to-0 (local temp release, deferred MortalList flush, diff --git a/src/test/resources/unit/refcount/drift/closure_capture.t b/src/test/resources/unit/refcount/drift/closure_capture.t new file mode 100644 index 000000000..aa41ecd46 --- /dev/null +++ b/src/test/resources/unit/refcount/drift/closure_capture.t @@ -0,0 +1,132 @@ +# D-W6.2 — Closure-capture drift reproducer. +# +# Tracing `PJ_DESTROY_TRACE=1 ./jperl -e 'use Class::MOP::Class'` showed +# anonymous CVs from Sub::Install being destroyed prematurely. The +# pattern is Sub::Install's nested closure wrappers: +# +# *install_sub = _build_public_installer(_ignore_warnings(_installer)); +# +# Each layer is `sub { ... my $code = shift; sub { $code->(@_) } }` — +# a closure that captures a CODE-ref my-var and returns a new closure +# using it. Three layers stack three levels of capture. +# +# The hypothesis (D-W6.2): when a closure captures a my-var holding a +# CODE ref, and the my-var's outer scope exits, PerlOnJava decrements +# the CODE ref's cooperative refCount even though the closure still +# references it. The walker gate masks this; without the gate the +# CODE ref's refCount goes negative and DESTROY fires. +use strict; +use warnings; +use Test::More; + +# ---- Pattern A: single-layer wrap (baseline) ----------------------------- +sub wrap_one { + my $code = shift; + sub { $code->(@_) }; +} + +{ + my $cv = sub { 'A-result' }; + my $wrapped = wrap_one($cv); + $cv = undef; # drop outer reference + + is $wrapped->(), 'A-result', + 'A: single-layer wrapped closure callable after outer ref dropped'; +} + +# ---- Pattern B: two-layer wrap ------------------------------------------- +sub wrap_two_a { + my $code = shift; + sub { $code->(@_) }; +} +sub wrap_two_b { + my $code = shift; + sub { my $r = $code->(@_); $r }; +} + +{ + my $cv = sub { 'B-result' }; + my $wrapped = wrap_two_b(wrap_two_a($cv)); + $cv = undef; + + is $wrapped->(), 'B-result', + 'B: two-layer wrapped closure callable'; +} + +# ---- Pattern C: three-layer wrap (Sub::Install shape) -------------------- +# This is the precise install_sub pattern. +sub _installer { + sub { + my ($pkg, $name, $code) = @_; + no strict 'refs'; + *{"${pkg}::${name}"} = $code; + return $code; + } +} + +sub _ignore_warnings { + my $code = shift; + sub { + local $SIG{__WARN__} = sub {}; + $code->(@_); + }; +} + +sub _build_public_installer { + my $installer = shift; + sub { + my $arg = shift; + $installer->(@{$arg}{qw(into as code)}); + }; +} + +# Build the install function the way Sub::Install does it. +my $install_sub = _build_public_installer(_ignore_warnings(_installer())); + +# The build helpers' temp lexicals (`$code`, `$installer`) are now out of +# scope — the only ref to each layer's CV is the next outer closure's +# capture. + +$install_sub->({ + into => 'D_W6_2_C', + as => 'method', + code => sub { 'C-result' }, +}); + +ok exists &D_W6_2_C::method, 'C: three-layer install put method in stash'; +is D_W6_2_C->method, 'C-result', + 'C: three-layer-installed method callable'; + +# ---- Pattern D: deep capture chain (5 levels) ---------------------------- +sub make_layer { + my $depth = shift; + return sub { @_ } if $depth == 0; + my $inner = make_layer($depth - 1); + return sub { $inner->(@_) }; +} + +{ + my $top = make_layer(5); + is_deeply [$top->('deep-1', 'deep-2')], ['deep-1', 'deep-2'], + 'D: 5-layer deep capture chain returns args'; +} + +# ---- Pattern E: closure captures a CV that captures a CV ----------------- +# Each level captures the level below — refCount on each captured CV +# must not decay. +sub make_chain { + my $tag = shift; + my $inner = sub { "$tag-result" }; + return sub { + my $extra = shift; + return $inner->() . " ($extra)"; + }; +} + +my @chained = map { make_chain("E$_") } 1 .. 20; +my @results = map { $chained[$_]->("call$_") } 0 .. 19; +is scalar @results, 20, 'E: 20 chained closures all callable'; +is $results[0], 'E1-result (call0)', 'E: first closure result'; +is $results[19], 'E20-result (call19)', 'E: last closure result'; + +done_testing; diff --git a/src/test/resources/unit/refcount/drift/hash_slot.t b/src/test/resources/unit/refcount/drift/hash_slot.t new file mode 100644 index 000000000..2ed830d50 --- /dev/null +++ b/src/test/resources/unit/refcount/drift/hash_slot.t @@ -0,0 +1,96 @@ +# D-W6.2 — Hash-slot refCount drift reproducer. +# +# `RuntimeHash.put` does a plain HashMap.put without any refCount +# tracking. So when we do `$h{key} = $obj`: +# - $obj's referent's refCount is NOT incremented (the hash doesn't +# register as an owner) +# - the previous slot value's referent's refCount is NOT decremented +# +# When the right-hand-side scalar's scope exits, refCount drops to 0 +# even though the hash still strongly holds the value, and DESTROY +# fires on a live object. +# +# This is the core drift behind the Class::MOP failure: `our %METAS` +# stores metaclass instances, but their refCount goes to 0 the moment +# the my-var that built them goes out of scope. +use strict; +use warnings; +use Test::More; + +my $destroyed = 0; +package Probe; +sub new { bless { id => ++$Probe::N }, shift } +sub DESTROY { $destroyed++ } + +package main; + +# ---- Pattern A: direct hash slot ---------------------------------------- +# Place a blessed object in a hash slot, drop the lexical, expect the +# object to live (held by the hash). +{ + $destroyed = 0; + my %h; + { + my $obj = Probe->new; + $h{key} = $obj; + # $obj scope ends here — but %h still holds a strong ref + } + is $destroyed, 0, 'A: hash-held blessed object survives my-var exit'; + ok defined $h{key}, 'A: hash slot still defined'; + ok defined $h{key}{id}, 'A: hash slot still has data'; + %h = (); # explicit clear + is $destroyed, 1, 'A: blessed object destroyed after hash cleared'; +} + +# ---- Pattern B: package-global hash (the %METAS shape) ------------------- +# The exact shape Class::MOP uses for `our %METAS`. +{ + $destroyed = 0; + { + package Registry; + our %METAS; + package main; + my $obj = Probe->new; + $Registry::METAS{Foo} = $obj; + } + is $destroyed, 0, + 'B: package-global hash holds blessed object after my-var exit'; + ok defined $Registry::METAS{Foo}, 'B: registered slot still defined'; + %Registry::METAS = (); + is $destroyed, 1, 'B: blessed object destroyed after %METAS cleared'; +} + +# ---- Pattern C: many objects in a hash ---------------------------------- +{ + $destroyed = 0; + my %registry; + for my $i (1 .. 50) { + $registry{$i} = Probe->new; + } + is scalar(keys %registry), 50, 'C: 50 entries in registry'; + is $destroyed, 0, 'C: no premature destroys'; + + my $live_count = 0; + for my $k (keys %registry) { + $live_count++ if defined $registry{$k} && defined $registry{$k}{id}; + } + is $live_count, 50, 'C: all 50 live with valid {id}'; + + %registry = (); + is $destroyed, 50, 'C: all 50 destroyed after clear'; +} + +# ---- Pattern D: replace then drop --------------------------------------- +# Slot overwrite must release the OLD value (Perl 5 refcount semantics). +{ + $destroyed = 0; + my %h; + $h{key} = Probe->new; # obj1 + is $destroyed, 0, 'D: obj1 alive after install'; + $h{key} = Probe->new; # obj2 — obj1 should be destroyed + is $destroyed, 1, 'D: obj1 destroyed when slot overwritten'; + %h = (); + is $destroyed, 2, 'D: obj2 destroyed after clear'; +} + +done_testing; diff --git a/src/test/resources/unit/refcount/drift/sub_install.t b/src/test/resources/unit/refcount/drift/sub_install.t new file mode 100644 index 000000000..ded233777 --- /dev/null +++ b/src/test/resources/unit/refcount/drift/sub_install.t @@ -0,0 +1,104 @@ +# D-W6.1 — Sub-installation drift reproducer. +# +# Tracing `PJ_DESTROY_TRACE=1 ./jperl -e 'use Class::MOP::Class'` revealed +# two specific patterns where anonymous CVs are getting refCount=0 +# transiently with the walker gate disabled: +# +# 1. `Sub::Install`'s anon CVs during `install_sub({ code => $cv, ... })`. +# 2. `Module::Implementation`'s `try { ... } catch { ... }` block CVs. +# +# Both patterns share a shape: an anonymous CV is created, passed through +# `@_` to a subroutine, the subroutine stores or invokes it, and the +# original CV's container scope completes — and at that point the CV's +# cooperative refCount drops to zero even though the receiver's structure +# (a closure-captured array, a hash slot, a glob slot) still holds it. +# +# This file recreates each pattern in bare Perl. +use strict; +use warnings; +use Test::More; + +# ---- Pattern A: install_sub-shaped pass-through -------------------------- +# Mimics Sub::Install's `install_sub({ code => sub { ... }, ... })`. +# A hashref containing the anonymous CV is built, passed to a function, +# the function stores the CV in a package stash, the hashref scope ends. +sub install_via_args { + my $args = shift; + no strict 'refs'; + *{ $args->{into} . '::' . $args->{as} } = $args->{code}; +} + +install_via_args({ + code => sub { 'A-result' }, + into => 'D_W6_1_A', + as => 'method', +}); + +ok exists &D_W6_1_A::method, 'A: install_sub-shaped CV present in stash'; +is D_W6_1_A->method, 'A-result', + 'A: install_sub-shaped CV callable after caller scope ends'; + +# ---- Pattern B: try/catch-shaped block invocation ------------------------ +# Mimics `Try::Tiny`'s `try { ... } catch { ... }`. Two CVs are passed by +# argument; the receiver eval-runs the first, optionally calls the second. +sub mini_try { + my ($try_cv, $catch_cv) = @_; + my $r = eval { $try_cv->() }; + if (!defined $r && $catch_cv) { + $r = $catch_cv->($@); + } + return $r; +} + +is mini_try(sub { 'no-error' }), 'no-error', + 'B: try-shaped success path returns CV result'; +is mini_try(sub { die "boom\n" }, sub { my $e = shift; "caught: $e" }), + "caught: boom\n", + 'B: try-shaped error path runs catch CV'; + +# Loop variant — Module::Implementation does this in a list of candidates. +my @candidates = map { + my $i = $_; + sub { "try-$i" }; +} (1 .. 10); +my $hit = 0; +for my $cv (@candidates) { + $hit++ if mini_try($cv) =~ /^try-/; +} +is $hit, 10, 'B: 10 try-shaped CVs all callable through pass-through'; + +# ---- Pattern C: temp lexical drop, then call through stash --------------- +# This is the precise shape of Sub::Install's failure: the original lexical +# holding the CV is dropped after install_sub returns, leaving the stash +# slot as the only strong holder. +{ + no strict 'refs'; + my $temp_cv = sub { 'C-from-temp' }; + install_via_args({ + code => $temp_cv, + into => 'D_W6_1_C', + as => 'method', + }); + $temp_cv = undef; # explicit drop +} +ok exists &D_W6_1_C::method, 'C: stash holds CV after temp dropped'; +is D_W6_1_C->method, 'C-from-temp', 'C: stash CV still callable'; + +# ---- Pattern D: pass CV through @_ then return it ------------------------ +# `Sub::Install` and many other frameworks pass a CV through one or more +# layers of indirection before installing it. Each layer's `shift`/`return` +# must preserve the refCount. +sub return_arg { return $_[0] } +sub indirect_return { return return_arg(shift) } +sub deep_return { return indirect_return(shift) } + +{ + no strict 'refs'; + my $cv = sub { 'D-deep' }; + *{"D_W6_1_D::method"} = deep_return($cv); + $cv = undef; +} +ok exists &D_W6_1_D::method, 'D: deeply-passed CV present in stash'; +is D_W6_1_D->method, 'D-deep', 'D: deeply-passed CV callable'; + +done_testing;