From 782b62294571b885f179eaaf34b065e948cd2ca4 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 18 Mar 2026 17:55:24 +0100 Subject: [PATCH 1/4] Fix spurious 'my variable masks earlier declaration' warning for if/unless conditions Variables declared in if (my $x = ...) conditions should be in a new scope, not the enclosing scope. This matches Perl behavior where: - my $x = 1; if (my $x = 2) {...} - no warning (different scopes) - if (my $x = 1) {...} elsif (my $x = 2) {...} - warns (same if-chain scope) The fix adds enterScope/exitScope around if/unless statements during parsing, similar to how while/for statements already handle this. Fixes TAP::Formatter::Session warnings during jcpan test runs. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../frontend/parser/StatementParser.java | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/perlonjava/frontend/parser/StatementParser.java b/src/main/java/org/perlonjava/frontend/parser/StatementParser.java index 9122e25ed..9055e8ff1 100644 --- a/src/main/java/org/perlonjava/frontend/parser/StatementParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/StatementParser.java @@ -271,7 +271,28 @@ private static Node parseThreeArgumentForLoop(Parser parser, String label, Node * @return An IfNode representing the if/unless/elsif statement */ public static Node parseIfStatement(Parser parser) { + return parseIfStatementInternal(parser, true); + } + + /** + * Internal helper for parsing if/unless/elsif statements. + * The enterNewScope parameter controls whether to enter a new scope. + * For 'if' and 'unless', we enter a new scope; for 'elsif', we don't + * (to match Perl's behavior where elsif conditions are in the same scope as the if condition). + * + * @param parser The Parser instance + * @param enterNewScope Whether to enter a new scope before parsing + * @return An IfNode representing the if/unless/elsif statement + */ + private static Node parseIfStatementInternal(Parser parser, boolean enterNewScope) { LexerToken operator = TokenUtils.consume(parser, LexerTokenType.IDENTIFIER); // "if", "unless", "elsif" + + // Enter a new scope for 'if' and 'unless' (but not for 'elsif' which is part of the same chain) + int scopeIndex = -1; + if (enterNewScope) { + scopeIndex = parser.ctx.symbolTable.enterScope(); + } + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "("); Node condition = parser.parseExpression(0); TokenUtils.consume(parser, LexerTokenType.OPERATOR, ")"); @@ -286,7 +307,13 @@ public static Node parseIfStatement(Parser parser) { elseBranch = ParseBlock.parseBlock(parser); TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); } else if (token.text.equals("elsif")) { - elseBranch = parseIfStatement(parser); + // Don't enter new scope for elsif - it's in the same scope as the if condition + elseBranch = parseIfStatementInternal(parser, false); + } + + // Exit the scope if we entered one + if (enterNewScope) { + parser.ctx.symbolTable.exitScope(scopeIndex); } // Use a macro to emulate Test::More SKIP blocks From 58847d1520aa3486e5f3522fac003799f0354b23 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 18 Mar 2026 18:07:44 +0100 Subject: [PATCH 2/4] Document 'local' package variable bug as root cause of Log4perl issues Investigation revealed that 'local $Foo::X' from outside package Foo doesn't affect '$X' accessed inside Foo subroutines. Root cause: - 'our' variables are loaded once at declaration time and cached in registers - When 'local' replaces the global with a new object, cached registers still point to the old object - Proper fix requires 'our' variables to re-fetch from symbol table on each access This affects $Carp::CarpLevel and likely many other modules that use 'local $Module::Variable' pattern. Added detailed reproduction case and updated priority order in design doc. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- dev/design/log4perl-compatibility.md | 54 +++++++++++++++++----------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/dev/design/log4perl-compatibility.md b/dev/design/log4perl-compatibility.md index 0fc7f7e65..f6c0d518a 100644 --- a/dev/design/log4perl-compatibility.md +++ b/dev/design/log4perl-compatibility.md @@ -169,40 +169,52 @@ my $m = Carp::longmess(); # Sometimes fails with undef GLOB ## Remaining Issues -### Issue 1: Carp.pm / warnings.pm Interaction +### Issue 1: `local` Package Variable Bug (ROOT CAUSE) -**Symptom:** t/020Easy.t tests 17-21 - error after %T logging: -``` -Can't use an undefined value as a GLOB reference at jar:PERL5LIB/Carp.pm line 755 -``` +**Symptom:** `$Carp::CarpLevel` set via `local` from outside the Carp package is not visible when accessed inside Carp.pm using the short name `$CarpLevel`. -**Root Cause:** PerlOnJava's warnings.pm lacks `$VERSION`, causing Carp.pm to execute a workaround code path (line 752) that fails in certain contexts. +**Reproduction:** +```perl +package Foo; +our $X = 0; +sub check { print "X=$X\n"; } # Uses short name -**Proposed Fix:** Add `our $VERSION = "1.78";` to PerlOnJava's warnings.pm to skip the problematic code path. +package main; +local $Foo::X = 1; +Foo::check(); # jperl prints "X=0", Perl prints "X=1" +``` -**Note:** 4 of the 5 failing tests in t/020Easy.t are just filename pattern mismatches (test expects "020Easy.t" but gets "-" from stdin). Only 1 failure is the Carp.pm error. +**Root Cause:** When compiling `$X` inside `package Foo`, jperl resolves it to a different storage location than `$Foo::X`. The `local` modifier only affects the fully-qualified name, not the short name used inside the package. + +**Impact:** This breaks any module that uses `local $Module::Variable` to temporarily modify package state, including: +- `$Carp::CarpLevel` - affects all Carp error reporting +- Many other modules that use this pattern **Affected Tests:** -- t/020Easy.t (tests 17-21) +- t/020Easy.t (tests 18-20) - line numbers off due to CarpLevel not working +- t/022Wrap.t (2 failures) - same root cause +- t/024WarnDieCarp.t (11 failures) - same root cause +- t/051Extra.t (2 failures) - same root cause + +**Workaround:** Modules can use fully-qualified variable names (`$Carp::CarpLevel` instead of `$CarpLevel`) but this requires patching every affected module. -### Issue 2: caller() Stack Trace Format +**Fix Needed:** Investigate how package variables are resolved during compilation. The short name `$X` inside `package Foo` should resolve to the same glob as `$Foo::X`. -**Symptom:** Stack traces from `Carp::shortmess` include internal PerlOnJava frames. +### Issue 2: Carp.pm / warnings.pm Interaction -**Example from t/022Wrap.t:** +**Symptom:** t/020Easy.t tests 17-21 - error after %T logging: ``` -Expected: 'File: 022Wrap.t Line number: 70 package: main trace: at 022Wrap.t line 70' -Got: 'File: 022Wrap.t Line number: 70 package: main trace: Log::Log4perl::Appender::log() called at ... line 1115, ...' +Can't use an undefined value as a GLOB reference at jar:PERL5LIB/Carp.pm line 755 ``` -**Root Cause:** The `caller()` implementation is exposing internal call frames that Perl would filter out. +**Root Cause:** PerlOnJava's warnings.pm lacks `$VERSION`, causing Carp.pm to execute a workaround code path (line 752) that fails in certain contexts. -**Fix Needed:** Review `ExceptionFormatter.formatException()` and filter out internal frames from Log::Log4perl's perspective. +**Proposed Fix:** Add `our $VERSION = "1.78";` to PerlOnJava's warnings.pm to skip the problematic code path. + +**Note:** 4 of the 5 failing tests in t/020Easy.t are just filename pattern mismatches (test expects "020Easy.t" but gets "-" from stdin). Only 1 failure is the Carp.pm error. **Affected Tests:** -- t/022Wrap.t (2 failures) -- t/024WarnDieCarp.t (11 failures) - tests 51-53, 58-62, 67, 69-70 -- t/051Extra.t (2 failures) - line number reporting +- t/020Easy.t (tests 17-21) ### Issue 3: File Permissions (stat/chmod) @@ -254,8 +266,8 @@ Got: 'File: 022Wrap.t Line number: 70 package: main trace: Log::Log4perl::A ## Priority Order -1. **Carp.pm undef GLOB** - Blocks 5 tests in t/020Easy.t -2. **caller() stack trace format** - Affects 15 tests across multiple files +1. **`local` package variable bug** - ROOT CAUSE affecting 16+ tests across multiple files +2. **Carp.pm/warnings.pm interaction** - Separate issue, quick fix possible 3. **DESTROY message** - May be a minor timing/output issue 4. **File permissions** - Likely straightforward fix 5. **Safe.pm** - May require significant work From dfa6e59186bbc479139e17468242fc3474c73fb0 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 18 Mar 2026 21:41:13 +0100 Subject: [PATCH 3/4] Add design doc for fixing 'local' package variable bug Detailed plan to fix the issue where 'our' variables in subroutines don't see 'local'ized values set from outside the package. Recommends Option A: re-fetch 'our' variables from global symbol table on each access instead of using cached register values. Includes: - Root cause analysis - Three solution options with pros/cons - Detailed 5-phase implementation plan - Risk assessment and timeline estimate Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- dev/design/local-package-variable-fix.md | 270 +++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 dev/design/local-package-variable-fix.md diff --git a/dev/design/local-package-variable-fix.md b/dev/design/local-package-variable-fix.md new file mode 100644 index 000000000..818719eb5 --- /dev/null +++ b/dev/design/local-package-variable-fix.md @@ -0,0 +1,270 @@ +# Fix Plan: `local` Package Variable Bug + +## Problem Statement + +When `local $Foo::X` is executed from outside package `Foo`, subroutines inside `Foo` that access `$X` via the short name don't see the localized value. + +### Reproduction + +```perl +package Foo; +our $X = 0; +sub check { print "X=$X\n"; } # Uses short name + +package main; +local $Foo::X = 1; +Foo::check(); # jperl: "X=0", Perl: "X=1" +``` + +### Impact + +- Breaks `$Carp::CarpLevel` (affects error reporting in all modules using Carp) +- Affects 16+ Log4perl tests +- Likely affects many CPAN modules that use `local $Module::Variable` pattern + +## Root Cause Analysis + +### Current Implementation + +1. **`our` declaration** (`our $X` in package Foo): + ```java + int reg = addVariable(varName, "our"); + emit(Opcodes.LOAD_GLOBAL_SCALAR); // Load from symbol table + emitReg(reg); // Store in register + emit(nameIdx); // Using name "Foo::X" + ``` + +2. **Subsequent access** (`$X` in subroutine): + ```java + if (hasVariable(varName)) { + return getVariableRegister(varName); // Returns cached register! + } + ``` + +3. **`local` execution** (`local $Foo::X = 1`): + - Creates a new RuntimeScalar object + - Replaces the entry in GlobalVariable.globalVariables map + - The subroutine's cached register still points to the OLD object + +### Why Perl Works Differently + +In Perl, `our` creates a lexical alias to the **glob slot**, not to the scalar value. When `local` replaces the scalar in the glob slot, the alias still points to the slot (which now contains the new value). + +In jperl, `our` caches a **reference to the scalar object** in a register. When `local` puts a new object in the symbol table, the register still holds the old reference. + +## Proposed Solution + +### Option A: Re-fetch `our` Variables on Every Access (Recommended) + +**Approach:** When accessing an `our` variable inside a subroutine, always emit `LOAD_GLOBAL_*` instead of using the cached register. + +**Changes Required:** + +1. **Track `our` variable declarations separately from lexicals** + - Modify `ScopedSymbolTable` to track `our` declarations differently + - Store the normalized global name with each `our` declaration + +2. **Modify variable access emission** + - In `BytecodeCompiler.visit(OperatorNode)` for `$` sigil: + - If variable is `our`-declared, emit `LOAD_GLOBAL_SCALAR` instead of register access + - Same for `@` and `%` sigils + +3. **Affected files:** + - `src/main/java/org/perlonjava/frontend/semantic/ScopedSymbolTable.java` + - `src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java` + - `src/main/java/org/perlonjava/backend/jvm/EmitVariable.java` + +**Pros:** +- Semantically correct - matches Perl behavior exactly +- Simple conceptually + +**Cons:** +- Performance impact: extra indirection on every `our` variable access +- May need optimization pass later (e.g., caching when provably safe) + +### Option B: Make GlobalVariable Entries Indirect References + +**Approach:** Instead of storing RuntimeScalar directly in globalVariables map, store an indirection object that holds the current scalar. + +**Changes Required:** + +1. **Create `GlobalScalarSlot` class:** + ```java + public class GlobalScalarSlot { + private RuntimeScalar current; + public RuntimeScalar get() { return current; } + public void set(RuntimeScalar s) { current = s; } + } + ``` + +2. **Modify `GlobalVariable.globalVariables`:** + - Store `GlobalScalarSlot` instead of `RuntimeScalar` + - All accesses go through the slot + +3. **Modify `local` implementation:** + - `local` calls `slot.set(newScalar)` instead of replacing map entry + +**Pros:** +- `our` variables automatically see changes (no emission changes needed) +- Potentially better performance (register still valid) + +**Cons:** +- Significant refactoring of GlobalVariable infrastructure +- Breaking change to RuntimeScalar usage patterns +- More complex memory model + +### Option C: Hybrid - Invalidate Cached Registers on `local` + +**Approach:** Track which registers hold `our` variable references. When `local` is executed, invalidate those registers so they're re-fetched. + +**Cons:** +- Complex to implement correctly +- Cross-cutting concern (local affects unrelated code) +- Hard to handle dynamic scopes correctly + +## Recommended Implementation: Option A + +Option A is recommended because: +1. Conceptually simple and correct +2. Minimal risk of introducing new bugs +3. Performance impact is acceptable for correctness +4. Can be optimized later if profiling shows it's a bottleneck + +## Detailed Implementation Plan + +### Phase 1: Modify Symbol Table (Low Risk) + +**File:** `src/main/java/org/perlonjava/frontend/semantic/ScopedSymbolTable.java` + +1. Add new tracking for `our` declarations: + ```java + // Map from short variable name to normalized global name + private Map ourVariableGlobalNames = new HashMap<>(); + + public void addOurVariable(String varName, String globalName) { + ourVariableGlobalNames.put(varName, globalName); + } + + public String getOurVariableGlobalName(String varName) { + return ourVariableGlobalNames.get(varName); + } + + public boolean isOurVariable(String varName) { + return ourVariableGlobalNames.containsKey(varName); + } + ``` + +2. Modify `addVariable()` to populate this map when `declarationType.equals("our")` + +### Phase 2: Modify Bytecode Compiler (Medium Risk) + +**File:** `src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java` + +1. In `visit(OperatorNode node)` for sigil operators (`$`, `@`, `%`): + + Current code path for variable access: + ```java + if (hasVariable(varName)) { + int reg = getVariableRegister(varName); + // Use register directly + } + ``` + + Modified code path: + ```java + if (hasVariable(varName)) { + String globalName = symbolTable.getOurVariableGlobalName(varName); + if (globalName != null) { + // It's an 'our' variable - always fetch from global + int reg = allocateRegister(); + int nameIdx = addToStringPool(globalName); + emit(Opcodes.LOAD_GLOBAL_SCALAR); // or ARRAY/HASH + emitReg(reg); + emit(nameIdx); + lastResultReg = reg; + } else { + // Regular lexical - use cached register + int reg = getVariableRegister(varName); + lastResultReg = reg; + } + } + ``` + +### Phase 3: Modify JVM Emitter (Medium Risk) + +**File:** `src/main/java/org/perlonjava/backend/jvm/EmitVariable.java` + +Apply similar changes to `emitVariable()` method: +- Check if variable is `our`-declared +- If so, emit code to fetch from GlobalVariable instead of using cached reference + +### Phase 4: Testing + +1. **Unit test for the specific bug:** + ```perl + package Foo; + our $X = 0; + sub check { return $X; } + + package main; + use Test::More; + is(Foo::check(), 0, 'before local'); + { + local $Foo::X = 1; + is(Foo::check(), 1, 'inside local'); # Currently fails + } + is(Foo::check(), 0, 'after local'); + done_testing(); + ``` + +2. **Run Log4perl tests:** Should see improvement in t/020Easy.t, t/022Wrap.t, t/024WarnDieCarp.t, t/051Extra.t + +3. **Run full test suite:** Ensure no regressions + +4. **Performance testing:** Benchmark before/after on code-heavy `our` variable usage + +### Phase 5: Optimization (Future) + +If Phase 4 shows performance issues: + +1. **Static analysis:** If we can prove `local` is never called on a variable within a compilation unit, use the cached register + +2. **Inline caching:** First access fetches from global and caches; subsequent accesses use cache but have a guard that invalidates on `local` + +## Risk Assessment + +| Risk | Likelihood | Impact | Mitigation | +|------|------------|--------|------------| +| Performance regression | Medium | Medium | Phase 5 optimizations | +| Breaking existing code | Low | High | Comprehensive testing | +| Incomplete fix | Low | Medium | Thorough test coverage | +| Interpreter backend inconsistency | Medium | Medium | Apply same changes to both backends | + +## Timeline Estimate + +- Phase 1: 1-2 hours +- Phase 2: 2-4 hours +- Phase 3: 2-4 hours +- Phase 4: 2-4 hours +- Phase 5: Future work + +**Total: 1-2 days** + +## Success Criteria + +1. Reproduction case above works correctly +2. `$Carp::CarpLevel` works correctly with `local` +3. Log4perl test failures related to CarpLevel are resolved +4. No regressions in existing test suite +5. Performance impact is acceptable (< 5% on typical workloads) + +## Related Issues + +- Log4perl compatibility: `dev/design/log4perl-compatibility.md` +- Affects: t/020Easy.t, t/022Wrap.t, t/024WarnDieCarp.t, t/051Extra.t + +## References + +- Perl `our` documentation: https://perldoc.perl.org/functions/our +- Perl `local` documentation: https://perldoc.perl.org/functions/local +- PerlGuts on symbol tables: https://perldoc.perl.org/perlguts#Stashes-and-Globs From f6accd6ab6f90f980e8d82e92aa05407a3463ba6 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 18 Mar 2026 21:49:43 +0100 Subject: [PATCH 4/4] Add constraint: our aliases must persist across package changes The global name for an 'our' variable is captured at declaration time, not access time. This ensures 'our $x' in main still refers to $main::x even after 'package ZZZ;'. Added test case and key insight to Phase 1 of implementation plan. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- dev/design/local-package-variable-fix.md | 27 +++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/dev/design/local-package-variable-fix.md b/dev/design/local-package-variable-fix.md index 818719eb5..f7bdc59bd 100644 --- a/dev/design/local-package-variable-fix.md +++ b/dev/design/local-package-variable-fix.md @@ -16,6 +16,29 @@ local $Foo::X = 1; Foo::check(); # jperl: "X=0", Perl: "X=1" ``` +### Important Constraint: `our` Aliases Persist Across Package Changes + +The `our` declaration creates a **lexical** alias that must persist even when the package changes: + +```perl +our $x = 10; # Creates alias $x -> $main::x +package ZZZ; +print $x; # Must still print 10 (referring to $main::x) +``` + +This already works in jperl. The fix must preserve this behavior - the `our` variable's **target package** is fixed at declaration time, not at access time. + +Combined test case (currently broken): +```perl +our $x = 10; +sub check { print "x=$x\n"; } +package ZZZ; +{ + local $main::x = 99; + main::check(); # jperl: "x=10", Perl: "x=99" +} +``` + ### Impact - Breaks `$Carp::CarpLevel` (affects error reporting in all modules using Carp) @@ -136,9 +159,11 @@ Option A is recommended because: **File:** `src/main/java/org/perlonjava/frontend/semantic/ScopedSymbolTable.java` +**Key Insight:** The global name must be captured at `our` declaration time, not at access time. This ensures that after `package ZZZ;`, the `$x` still refers to `$main::x` (the package where `our $x` was declared). + 1. Add new tracking for `our` declarations: ```java - // Map from short variable name to normalized global name + // Map from short variable name to normalized global name (captured at declaration time) private Map ourVariableGlobalNames = new HashMap<>(); public void addOurVariable(String varName, String globalName) {