diff --git a/dev/import-perl5/config.yaml b/dev/import-perl5/config.yaml index f75849d60..f752ee00d 100644 --- a/dev/import-perl5/config.yaml +++ b/dev/import-perl5/config.yaml @@ -788,6 +788,13 @@ imports: - perlvos.pod - perlwin32.pod + # diagnostics - Verbose warning/error diagnostics using perldiag + # Required by Math::Base::Convert t/overload.t and other CPAN test suites + # Protected: has PerlOnJava-specific patch to search @INC for perldiag.pod + - source: perl5/lib/diagnostics.pm + target: src/main/perl/lib/diagnostics.pm + protected: true + # Add more imports below as needed # Example with minimal fields: # - source: perl5/lib/SomeModule.pm diff --git a/dev/modules/math_base_convert.md b/dev/modules/math_base_convert.md new file mode 100644 index 000000000..886ce8b08 --- /dev/null +++ b/dev/modules/math_base_convert.md @@ -0,0 +1,382 @@ +# Math::Base::Convert — CPAN Compatibility Plan + +## Module Info + +- **CPAN**: Math-Base-Convert-0.13 +- **Author**: MIKER +- **Type**: Pure Perl (no XS) +- **Purpose**: Arbitrary base-to-base number conversion + +## Current Status + +After fixing the MakeMaker root-level `.pm` install issue (PR #500), the module +loads and **15/20 test programs pass**. The 5 remaining failures trace back to +exactly **two jperl bugs** plus one missing bundled module. + +## Test Results Summary + +| Test file | Result | Root cause | +|-----------|--------|------------| +| t/ascii.t | PASS | — | +| t/backend.t | FAIL (132/133) | Bug 1: `\@{&func}` pattern | +| t/basefunct.t | PASS | — | +| t/basemap.t | PASS | — | +| t/basemethods.t | PASS | — | +| t/benchmarkcalc.t | PASS | — | +| t/benchmarkcnv.t | FAIL (156/157) | Bug 1: `\@{&func}` pattern | +| t/convert.t | PASS | — | +| t/frontend.t | FAIL (138/139) | Bug 1: `\@{&func}` pattern | +| t/isnotp2.t | PASS | — | +| t/longmultiply.t | PASS | — | +| t/overload.t | FAIL (0 subtests) | Issue 3: Missing `diagnostics.pm` | +| t/shiftright.t | PASS | — | +| t/useFROMbaseShortcuts.t | PASS | — | +| t/useFROMbaseto32wide.t | PASS | — | +| t/useTObaseShortcuts.t | PASS | — | +| t/validbase.t | PASS | — | +| t/vet.t | PASS | — | +| t/vetcontext.t | PASS | — | +| t/zstrings.t | FAIL (198/4357) | Bug 2: `caller` hasargs flag | + +--- + +## Bug 1: `\@{&func}` — `parsingTakeReference` flag leaks into block context + +**Affects**: t/backend.t, t/benchmarkcnv.t, t/frontend.t (3 test files, ~426 subtests) + +### Symptom + +The expression `\@{&func}` throws "Not an ARRAY reference" even though +`&func` returns a valid blessed arrayref, and `@{&func}` (without the +backslash) works fine. + +### Reproducer + +```perl +my $ref = bless ["a", "b", "c"], "Test::Class"; +sub mysub { return $ref } + +\@{mysub()}; # OK — works in both perl and jperl +@{&mysub}; # OK — works in both (array deref, no backslash) +\@{&mysub}; # BUG — "Not an ARRAY reference" in jperl, works in perl +``` + +### Root cause: parser flag leak + +When the parser encounters `\`, it sets `parser.parsingTakeReference = true` +(in `ParsePrimary.java` line 348) to prevent `&sub` from being auto-called. +This is correct for `\&sub` (which should produce a code reference), but the +flag **leaks** into the inner block of `@{...}` because `parseBracedVariable()` +does not save/restore it before parsing the block contents. + +**AST comparison:** + +`@{&mysub}` (correct — flag is `false`): +``` +OperatorNode: @ + BlockNode: + BinaryOperatorNode: ( ← &mysub is called with @_ + shareCallerArgs: true + OperatorNode: & + IdentifierNode: 'mysub' + OperatorNode: @ + IdentifierNode: '_' +``` + +`\@{&mysub}` (broken — flag is `true`): +``` +OperatorNode: \ + OperatorNode: @ + BlockNode: + OperatorNode: & ← &mysub treated as CODE ref, not called! + IdentifierNode: 'mysub' +``` + +**The critical check in the parser** is at `Variable.java` line 747–749: +```java +if (parser.parsingTakeReference && !peek(parser).text.equals("(")) { + return node; // Returns &func as CODE reference, no auto-call +} +``` + +When `parsingTakeReference` is `true` (leaked from the outer `\`), `&mysub` is +returned as a bare code reference. The `@{}` dereference then tries to use this +code reference as an array reference, which fails. + +### Code path walkthrough + +1. `\` is parsed → `ParsePrimary.java:348` sets `parser.parsingTakeReference = true` +2. `parseExpression(22)` is called to parse the operand at higher precedence +3. `@` is encountered → dispatches to `Variable.parseVariable("@")` +4. `{` follows → `parseBracedVariable("@", false)` at `Variable.java:133` +5. Inside `parseBracedVariable`, tries simple identifier parse, fails (it's `&func`) +6. Falls through to **block parsing** at `Variable.java:1029` +7. `parseBracedVariable` saves/restores `insideBracedDereference` but **does NOT + save/restore `parsingTakeReference`** +8. `ParseBlock.parseBlock()` is called with `parsingTakeReference` still `true` +9. Inside the block, `&func` → `parseCoderefVariable()` at `Variable.java:747`: + - `parser.parsingTakeReference` is `true` ← **leaked from step 1** + - next token is `}` (not `(`), so condition matches + - Returns `&func` as bare CODE ref — **no call, no `@_` pass-through** + +### Fix + +Save/restore `parsingTakeReference` in `parseBracedVariable()` before the block +parsing fallback, following the same pattern as `insideBracedDereference`: + +**File**: `src/main/java/org/perlonjava/frontend/parser/Variable.java` (~line 1025) + +```java +boolean savedInsideBracedDereference = parser.insideBracedDereference; +boolean savedParsingTakeReference = parser.parsingTakeReference; // ADD +if (sigil.equals("%")) { + parser.insideBracedDereference = true; +} +parser.parsingTakeReference = false; // ADD +try { + BlockNode block = ParseBlock.parseBlock(parser); + // ... existing code ... +} finally { + parser.insideBracedDereference = savedInsideBracedDereference; + parser.parsingTakeReference = savedParsingTakeReference; // ADD +} +``` + +This pattern is already used elsewhere: +- `OperatorParser.java:44-47` (`parseDoOperator`) +- `OperatorParser.java:804-807` (`parseDefined`) +- `OperatorParser.java:825-828` (`parseUndef`) +- `PrototypeArgs.java:821-844` + +### Module code that triggers this + +All three failing tests have the same pattern at line 114: +```perl +my @bases = ( \@{&bin}, \@{&dna}, \@{&oct}, \@{&hex}, \@{&bas32}, \@{&b64}, ... ); +``` + +The `&bin`, `&dna` etc. are imported from `Math::Base::Convert` via `:base` tag. +They return blessed arrayrefs. `\@{&func}` should dereference the arrayref and +create a new plain array ref from it. + +--- + +## Bug 2: `caller(0)[4]` hasargs flag always returns 1 + +**Affects**: t/zstrings.t (198/4357 subtests) + +### Symptom + +The `hasargs` field of `caller()` (index 4) always returns `1`, even when a +function is called via `&func` (ampersand, no parens) where `@_` is inherited +rather than explicitly passed. + +### Reproducer + +```perl +sub inner { print "hasargs: ", (caller(0))[4], "\n" } +sub outer { &inner } # should inherit @_, hasargs = false +outer("arg"); +# jperl: hasargs: 1 ← WRONG +# perl: hasargs: ← correct (empty/false) +``` + +### Perl 5 semantics + +| Call style | @_ behavior | hasargs | +|------------|-------------|---------| +| `func(args)` | Fresh @_ created from args | 1 (true) | +| `&func(args)` | Fresh @_ created from args | 1 (true) | +| `&func` (no parens) | Inherits caller's @_ | empty (false) | + +### How this breaks Math::Base::Convert + +`Bases.pm` overrides `hex()` and `oct()` to serve dual purpose: + +```perl +sub oct { + unless (ref($_[0]) && ...) { + if ( defined $_[0] && (caller(0))[4] ) { # ← checks hasargs + return CORE::oct $_[0]; # ← delegate to CORE::oct + } + } + return ocT(); # ← return base array +} +``` + +When called as `&Math::Base::Convert::oct` (via `getref("oct")` in the tests), +`@_` is inherited from the caller (containing the string `"oct"`). The function +checks `(caller(0))[4]` to distinguish: +- `hasargs = true` → called as `oct("string")` → delegate to `CORE::oct` +- `hasargs = false` → called as `&oct` with inherited @_ → return base array + +When jperl always returns `hasargs = 1`, the function always takes the CORE path, +so `CORE::oct("oct")` returns `0` instead of the expected base array reference. + +The test `zstrings.t` calls `getref("oct")` which uses `&{$sub}` (symbolic ref, +no parens): +```perl +sub getref { + return $_[0] if ref $_[0]; + my $sub = "Math::Base::Convert::" . $_[0]; + no strict; + &{$sub}; # ← inherits @_ = ("oct"), but hasargs should be false +} +``` + +All 198 failures are: any base → `oct` or `hex` target, `"data got: |0|, exp: ||"`. + +### Current implementation + +**File**: `RuntimeCode.java` lines 1973–1978: +```java +// hasargs is always 1 for any subroutine frame (no distinction) +boolean hasArgs = subName != null && !subName.isEmpty() && + !subName.equals("(eval)") && !subName.endsWith("::(eval)"); +res.add(hasArgs ? RuntimeScalarCache.scalarTrue : RuntimeScalarCache.scalarUndef); +``` + +The `shareCallerArgs` annotation IS already tracked at parse time and used +by both backends (JVM and bytecode interpreter) to decide how to pass `@_`: +- JVM: `EmitSubroutine.java` lines 613–643 passes caller's slot 1 directly +- Interpreter: `CALL_SUB_SHARE_ARGS` opcode (401) vs `CALL_SUB` (57) + +But the information is **not propagated** to `caller()`. + +### How both backends handle `&func` vs `func()` calls + +**Parser stage** (`Variable.java` `parseCoderefVariable()`): +- `&func` (no parens): builds `&func(@_)` with annotation `shareCallerArgs=true` +- `func()` or `&func()`: builds normal call, no annotation + +**JVM backend** (`EmitSubroutine.java`): +- `shareCallerArgs=true`: calls `RuntimeCode.apply(codeRef, callerArgs, ctx)` (3-arg) + - Passes caller's `RuntimeArray` (slot 1) directly — no copy +- Normal call: calls `RuntimeCode.apply(codeRef, name, argsArray, ctx)` (4-arg) + - Creates new `RuntimeArray` from args + +**Bytecode interpreter** (`BytecodeInterpreter.java`): +- `CALL_SUB_SHARE_ARGS`: calls `RuntimeCode.apply(codeRef, callArgs, ctx)` (3-arg) +- `CALL_SUB`: calls `RuntimeCode.apply(codeRef, "", callArgs, ctx)` (4-arg) + +### Fix approach: thread-local `hasArgsStack` + +Add a `ThreadLocal>` parallel to the existing `argsStack`. + +**Existing infrastructure** (`RuntimeCode.java`): +```java +// Already exists — tracks @_ per call frame +private static final ThreadLocal> argsStack = ...; +public static void pushArgs(RuntimeArray args) { ... } +public static void popArgs() { ... } +``` + +**Add:** +```java +// NEW — tracks whether each frame created fresh @_ +private static final ThreadLocal> hasArgsStack = + ThreadLocal.withInitial(ArrayDeque::new); +``` + +**Push** in both `apply()` overloads: +- 3-arg `apply(scalar, array, ctx)` — shared args: push `false` +- 4-arg `apply(scalar, name, args, ctx)` — fresh args: push `true` + +**Pop** in the existing `finally` blocks of both `apply()` methods, alongside +the existing `WarningBitsRegistry.popCallerBits()` etc. + +**Read** in `callerWithSub()` at the hasargs section (~line 1976): +```java +// Replace the current heuristic with actual stack lookup +Boolean hasArgs = getHasArgsAt(callerDepth); +res.add(hasArgs != null && hasArgs + ? RuntimeScalarCache.scalarTrue + : RuntimeScalarCache.scalarUndef); +``` + +### Key files to modify + +| File | Change | +|------|--------| +| `RuntimeCode.java` ~line 150 | Add `hasArgsStack` declaration | +| `RuntimeCode.java` ~line 200 | Add push/pop/getter helpers | +| `RuntimeCode.java` 3-arg `apply()` | Push `false` to `hasArgsStack` | +| `RuntimeCode.java` 4-arg `apply()` | Push `true` to `hasArgsStack` | +| `RuntimeCode.java` both `finally` blocks | Pop `hasArgsStack` | +| `RuntimeCode.java` `callerWithSub()` ~line 1976 | Read from `hasArgsStack` | + +### Depth mapping challenge + +The `hasArgsStack` depth does not directly correspond to the `caller(N)` depth, +because `caller()` counts Perl-visible frames from the JVM stack trace while +`hasArgsStack` counts `apply()` calls. However, both stacks grow/shrink in lockstep +for normal subroutine calls, so the Nth entry from the top of `hasArgsStack` +corresponds to the Nth Perl subroutine frame. + +A simpler approach: since `callerWithSub()` iterates through JVM stack frames +counting Perl-visible frames, it can maintain a counter of how many subroutine +frames it has passed through, and use that as the depth index into `hasArgsStack`. + +--- + +## Issue 3: Missing `diagnostics.pm` + +**Affects**: t/overload.t (1 test file, ~23 subtests) + +### Symptom + +`Can't locate diagnostics.pm in @INC` + +### Resolution + +Bundle `diagnostics.pm` from `perl5/lib/diagnostics.pm` via `sync.pl`, with a +PerlOnJava-specific patch to also search `@INC` for `Pod/perldiag.pod` (the JAR +bundles it as `lib/Pod/perldiag.pod` with capital P, not in `$Config{privlibexp}`). + +**Config entry** (`dev/import-perl5/config.yaml`): +```yaml +- source: perl5/lib/diagnostics.pm + target: src/main/perl/lib/diagnostics.pm + protected: true # has @INC search patch for perldiag.pod +``` + +**Patch** (in `diagnostics.pm` around line 207): +```perl +# PerlOnJava: also search @INC for pod/perldiag.pod and Pod/perldiag.pod +for my $inc (@INC) { + push @trypod, "$inc/pod/perldiag.pod", "$inc/Pod/perldiag.pod"; +} +``` + +Dependencies: `Text::Tabs` (already bundled), `Config` (already available), +`perldiag.pod` (already in JAR via `perl5/pod` directory import). + +### Status: DONE + +diagnostics.pm has been imported, patched, and verified working. + +--- + +## Implementation Status + +| Item | Status | Details | +|------|--------|---------| +| PR #500: MakeMaker root-level .pm fix | MERGED | `ExtUtils/MakeMaker.pm` | +| Issue 3: diagnostics.pm | DONE | Imported, patched, protected | +| Bug 1: `\@{&func}` parser fix | DONE | `Variable.java` — save/restore `parsingTakeReference` | +| Bug 2: caller hasargs | IN PROGRESS | `RuntimeCode.java` — `hasArgsStack` added, wiring needed | + +## Verification + +```bash +# Clean previous build +rm -rf ~/.perlonjava/cpan/build/Math-Base-Convert-* +rm -f ~/.perlonjava/lib/Math/Base/Convert.pm +rm -f ~/.perlonjava/lib/Math/Base/Convert/*.pm + +# Rebuild and test +make +./jcpan -t Math::Base::Convert + +# Expected after all fixes: 20/20 test programs pass +``` diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 1359fd2ac..7fd7161c1 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "582f96865"; + public static final String gitCommitId = "7bf93f44f"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 13 2026 13:52:26"; + public static final String buildTimestamp = "Apr 13 2026 14:15:09"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/frontend/parser/Variable.java b/src/main/java/org/perlonjava/frontend/parser/Variable.java index b9b5bfb5a..a7329934c 100644 --- a/src/main/java/org/perlonjava/frontend/parser/Variable.java +++ b/src/main/java/org/perlonjava/frontend/parser/Variable.java @@ -1023,9 +1023,13 @@ public static Node parseBracedVariable(Parser parser, String sigil, boolean isSt // When the sigil is %, inner {} should be treated as hash constructor (not block) // to match Perl 5's behavior: %{ {map { $_ => 1 } @_} } should parse the inner {} as hashref. boolean savedInsideBracedDereference = parser.insideBracedDereference; + // Reset parsingTakeReference inside block context: the block inside @{...} is an + // independent evaluation context, so \@{&func} should call &func (not take a reference). + boolean savedParsingTakeReference = parser.parsingTakeReference; if (sigil.equals("%")) { parser.insideBracedDereference = true; } + parser.parsingTakeReference = false; try { BlockNode block = ParseBlock.parseBlock(parser); if (!TokenUtils.peek(parser).text.equals("}")) { @@ -1055,6 +1059,7 @@ public static Node parseBracedVariable(Parser parser, String sigil, boolean isSt throw new PerlParserException(multiLineError); } finally { parser.insideBracedDereference = savedInsideBracedDereference; + parser.parsingTakeReference = savedParsingTakeReference; } } diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java index 387a73ff6..f6764b33b 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java @@ -150,6 +150,17 @@ protected boolean removeEldestEntry(Map.Entry, MethodHandle> eldest) { private static final ThreadLocal> argsStack = ThreadLocal.withInitial(ArrayDeque::new); + /** + * Thread-local stack tracking whether each call frame created a fresh @_ (hasargs). + * In Perl 5, caller()[4] (hasargs) is 1 when the subroutine was called with explicit + * arguments (func() or &func()), and false/empty when called via &func (no parens) + * which inherits the caller's @_. + * + * Push/pop is handled alongside argsStack in the apply() methods. + */ + private static final ThreadLocal> hasArgsStack = + ThreadLocal.withInitial(ArrayDeque::new); + /** * Get the current subroutine's @_ array. * Used by Java-implemented functions (like List::Util::any) that need to pass @@ -192,7 +203,8 @@ public static void pushArgs(RuntimeArray args) { } /** - * Pop @_ from the args stack when exiting a subroutine. + * Pop @_ and hasargs flag from their respective stacks when exiting a subroutine. + * Both stacks are pushed in the instance apply() methods and must be popped together. * Public so BytecodeInterpreter can use it when calling InterpretedCode directly. */ public static void popArgs() { @@ -200,6 +212,33 @@ public static void popArgs() { if (!stack.isEmpty()) { stack.pop(); } + Deque haStack = hasArgsStack.get(); + if (!haStack.isEmpty()) { + haStack.pop(); + } + } + + /** + * Get the hasargs flag for a given call depth. + * depth=0 is the current (innermost) frame, depth=1 is its caller, etc. + * + * This depth maps directly to the user-supplied argument of caller(N): + * caller(0) queries depth 0, caller(1) queries depth 1, etc. + * The mapping works because hasArgsStack has one entry per Perl subroutine + * call (pushed in the instance apply() methods), and the Deque iteration + * order is LIFO (most recent first), matching the call stack order. + * + * @return true if the frame at that depth created fresh @_, false if it + * inherited @_ (via &func with no parens), null if depth is out of range + */ + public static Boolean getHasArgsAt(int depth) { + Deque stack = hasArgsStack.get(); + int i = 0; + for (Boolean b : stack) { + if (i == depth) return b; + i++; + } + return null; } /** @@ -1878,6 +1917,13 @@ public static RuntimeList callerWithSub(RuntimeList args, int ctx, RuntimeScalar frame = args.getFirst().getInt(); } + // Save the original user-supplied frame before the JVM skip adjustment. + // This value maps directly to hasArgsStack depth: caller(0) → depth 0 (current frame), + // caller(1) → depth 1 (caller's frame), etc. The hasArgsStack is pushed/popped in the + // instance apply() methods, one entry per Perl subroutine call, so the Nth entry from + // the top corresponds to the Nth caller() frame. + int originalFrame = frame; + Throwable t = new Throwable(); ExceptionFormatter.StackTraceResult result = ExceptionFormatter.formatExceptionDetailed(t); ArrayList> stackTrace = result.frames(); @@ -1970,12 +2016,23 @@ public static RuntimeList callerWithSub(RuntimeList args, int ctx, RuntimeScalar } } - // Add hasargs (element 4): 1 if @_ was populated for this sub - // Subroutines always have @_ available, so this is 1 for subs - // Check the subroutine name to determine if this is a sub call - boolean hasArgs = subName != null && !subName.isEmpty() && - !subName.equals("(eval)") && !subName.endsWith("::(eval)"); - res.add(hasArgs ? RuntimeScalarCache.scalarTrue : RuntimeScalarCache.scalarUndef); + // Add hasargs (element 4): whether @_ was freshly created for this call. + // In Perl 5, this is 1 for func(args) and &func(args), but false/empty + // for &func (no parens) which inherits the caller's @_. + // We consult hasArgsStack which is pushed in the instance apply() methods: + // - apply(RuntimeArray, int) pushes false (shared args / &func) + // - apply(String, RuntimeArray, int) pushes true (fresh args / func()) + // Fall back to the name-based heuristic for frames outside our tracking + // (e.g., top-level code, eval frames). + Boolean hasArgsFromStack = getHasArgsAt(originalFrame); + if (hasArgsFromStack != null) { + res.add(hasArgsFromStack ? RuntimeScalarCache.scalarTrue : RuntimeScalarCache.scalarUndef); + } else { + // Fallback: assume hasargs=true for named subs, false for eval + boolean hasArgs = subName != null && !subName.isEmpty() && + !subName.equals("(eval)") && !subName.endsWith("::(eval)"); + res.add(hasArgs ? RuntimeScalarCache.scalarTrue : RuntimeScalarCache.scalarUndef); + } // Add wantarray (element 5): undef for void, 0 for scalar, 1 for list // We don't currently track this per-frame, so return undef @@ -3064,7 +3121,15 @@ public RuntimeList apply(RuntimeArray a, int callContext) { } // Always push args for getCurrentArgs() support (used by List::Util::any/all/etc.) pushArgs(a); - + + // hasArgs tracking for caller()[4]: + // This is the 2-arg instance method, called from the 3-arg static apply(scalar, array, ctx). + // That static method is the "shared args" path — used when Perl code calls &func (no parens), + // which inherits the caller's @_ instead of creating a fresh one. + // Perl's caller()[4] (hasargs) should be false/empty for these calls. + // See also: the 3-arg instance method apply(name, array, ctx) which pushes true. + hasArgsStack.get().push(false); + // Push warning bits for FATAL warnings support String warningBits = getWarningBitsForCode(this); if (warningBits != null) { @@ -3085,7 +3150,7 @@ public RuntimeList apply(RuntimeArray a, int callContext) { if (warningBits != null) { WarningBitsRegistry.popCurrent(); } - popArgs(); + popArgs(); // also pops hasArgsStack — see popArgs() implementation if (DebugState.debugMode) { DebugHooks.exitSubroutine(); DebugState.popArgs(); @@ -3161,7 +3226,15 @@ public RuntimeList apply(String subroutineName, RuntimeArray a, int callContext) } // Always push args for getCurrentArgs() support (used by List::Util::any/all/etc.) pushArgs(a); - + + // hasArgs tracking for caller()[4]: + // This is the 3-arg instance method, called from the 4-arg static apply(scalar, name, args[], ctx). + // That static method is the "fresh args" path — used for normal func(args) and &func(args) calls, + // which create a new @_ from the supplied arguments. + // Perl's caller()[4] (hasargs) should be true (1) for these calls. + // See also: the 2-arg instance method apply(array, ctx) which pushes false. + hasArgsStack.get().push(true); + // Push warning bits for FATAL warnings support String warningBits = getWarningBitsForCode(this); if (warningBits != null) { @@ -3182,7 +3255,7 @@ public RuntimeList apply(String subroutineName, RuntimeArray a, int callContext) if (warningBits != null) { WarningBitsRegistry.popCurrent(); } - popArgs(); + popArgs(); // also pops hasArgsStack — see popArgs() implementation if (DebugState.debugMode) { DebugHooks.exitSubroutine(); DebugState.popArgs(); diff --git a/src/main/perl/lib/ExtUtils/MakeMaker.pm b/src/main/perl/lib/ExtUtils/MakeMaker.pm index 95be5fc3d..7e0ecc52c 100644 --- a/src/main/perl/lib/ExtUtils/MakeMaker.pm +++ b/src/main/perl/lib/ExtUtils/MakeMaker.pm @@ -233,14 +233,16 @@ sub _install_pure_perl { }, 'blib/lib'); } - # Fallback: scan current directory for .pm files (flat layout) - # Some CPAN distributions (e.g. Crypt::RC4) have .pm files at the - # root level instead of in lib/. Standard MakeMaker handles this via - # PMLIBDIRS which defaults to ['lib', $self->{BASEEXT}]. - # We derive the install subdirectory from the NAME parameter. - if (!%pm && $name) { + # Scan root-level .pm files and BASEEXT directory. + # Standard MakeMaker maps: ./*.pm => $(INST_LIBDIR)/*.pm + # where INST_LIBDIR = INST_LIB/Parent/Path (derived from NAME). + # PMLIBDIRS defaults to ['lib', $BASEEXT], so both lib/ (handled + # above) and root .pm files / BASEEXT dir are always scanned. + # This handles distributions like Math::Base::Convert where the + # main .pm lives at the root alongside sub-modules in lib/. + if ($name) { my @parts = split /::/, $name; - my $baseext = pop @parts; # Remove BASEEXT (e.g. XML::Parser -> Parser) + my $baseext = pop @parts; # BASEEXT (e.g. XML::Parser -> Parser) my $parent_dir = @parts ? File::Spec->catdir(@parts) : ''; # Scan flat .pm files in current directory @@ -251,7 +253,8 @@ sub _install_pure_perl { my $dest_rel = $parent_dir ? File::Spec->catfile($parent_dir, $file) : $file; - $pm{$file} = File::Spec->catfile($INSTALL_BASE, $dest_rel); + $pm{$file} = File::Spec->catfile($INSTALL_BASE, $dest_rel) + unless exists $pm{$file}; } closedir($dh); } @@ -266,7 +269,8 @@ sub _install_pure_perl { my $rel = $parent_dir ? File::Spec->catfile($parent_dir, $src) : $src; - $pm{$src} = File::Spec->catfile($INSTALL_BASE, $rel); + $pm{$src} = File::Spec->catfile($INSTALL_BASE, $rel) + unless exists $pm{$src}; }, no_chdir => 1, }, $baseext); diff --git a/src/main/perl/lib/diagnostics.pm b/src/main/perl/lib/diagnostics.pm new file mode 100644 index 000000000..8ba7c1b07 --- /dev/null +++ b/src/main/perl/lib/diagnostics.pm @@ -0,0 +1,723 @@ +package diagnostics; + +=head1 NAME + +diagnostics, splain - produce verbose warning diagnostics + +=head1 SYNOPSIS + +Using the C pragma: + + use diagnostics; + use diagnostics -verbose; + + diagnostics->enable; + diagnostics->disable; + +Using the C standalone filter program: + + perl program 2>diag.out + splain [-v] [-p] diag.out + +Using diagnostics to get stack traces from a misbehaving script: + + perl -Mdiagnostics=-traceonly my_script.pl + +=head1 DESCRIPTION + +=head2 The C Pragma + +This module extends the terse diagnostics normally emitted by both the +perl compiler and the perl interpreter (from running perl with a -w +switch or C), augmenting them with the more +explicative and endearing descriptions found in L. Like the +other pragmata, it affects the compilation phase of your program rather +than merely the execution phase. + +To use in your program as a pragma, merely invoke + + use diagnostics; + +at the start (or near the start) of your program. (Note +that this I enable perl's B<-w> flag.) Your whole +compilation will then be subject(ed :-) to the enhanced diagnostics. +These still go out B. + +Due to the interaction between runtime and compiletime issues, +and because it's probably not a very good idea anyway, +you may not use C to turn them off at compiletime. +However, you may control their behaviour at runtime using the +C and C methods to turn them off and on respectively. + +The B<-verbose> flag first prints out the L introduction before +any other diagnostics. The $diagnostics::PRETTY variable can generate nicer +escape sequences for pagers. + +Warnings dispatched from perl itself (or more accurately, those that match +descriptions found in L) are only displayed once (no duplicate +descriptions). User code generated warnings a la warn() are unaffected, +allowing duplicate user messages to be displayed. + +This module also adds a stack trace to the error message when perl dies. +This is useful for pinpointing what +caused the death. The B<-traceonly> (or +just B<-t>) flag turns off the explanations of warning messages leaving just +the stack traces. So if your script is dieing, run it again with + + perl -Mdiagnostics=-traceonly my_bad_script + +to see the call stack at the time of death. By supplying the B<-warntrace> +(or just B<-w>) flag, any warnings emitted will also come with a stack +trace. + +=head2 The I Program + +Another program, I is actually nothing +more than a link to the (executable) F module, as well as +a link to the F documentation. The B<-v> flag is like +the C directive. +The B<-p> flag is like the +$diagnostics::PRETTY variable. Since you're post-processing with +I, there's no sense in being able to enable() or disable() processing. + +Output from I is directed to B, unlike the pragma. + +=head1 EXAMPLES + +The following file is certain to trigger a few errors at both +runtime and compiletime: + + use diagnostics; + print NOWHERE "nothing\n"; + print STDERR "\n\tThis message should be unadorned.\n"; + warn "\tThis is a user warning"; + print "\nDIAGNOSTIC TESTER: Please enter a here: "; + my $a, $b = scalar ; + print "\n"; + print $x/$y; + +If you prefer to run your program first and look at its problem +afterwards, do this: + + perl -w test.pl 2>test.out + ./splain < test.out + +Note that this is not in general possible in shells of more dubious heritage, +as the theoretical + + (perl -w test.pl >/dev/tty) >& test.out + ./splain < test.out + +Because you just moved the existing B to somewhere else. + +If you don't want to modify your source code, but still have on-the-fly +warnings, do this: + + exec 3>&1; perl -w test.pl 2>&1 1>&3 3>&- | splain 1>&2 3>&- + +Nifty, eh? + +If you want to control warnings on the fly, do something like this. +Make sure you do the C first, or you won't be able to get +at the enable() or disable() methods. + + use diagnostics; # checks entire compilation phase + print "\ntime for 1st bogus diags: SQUAWKINGS\n"; + print BOGUS1 'nada'; + print "done with 1st bogus\n"; + + diagnostics->disable; # only turns off runtime warnings + print "\ntime for 2nd bogus: (squelched)\n"; + print BOGUS2 'nada'; + print "done with 2nd bogus\n"; + + diagnostics->enable; # turns back on runtime warnings + print "\ntime for 3rd bogus: SQUAWKINGS\n"; + print BOGUS3 'nada'; + print "done with 3rd bogus\n"; + + diagnostics->disable; + print "\ntime for 4th bogus: (squelched)\n"; + print BOGUS4 'nada'; + print "done with 4th bogus\n"; + +=head1 INTERNALS + +Diagnostic messages derive from the F file when available at +runtime. Otherwise, they may be embedded in the file itself when the +splain package is built. See the F for details. + +If an extant $SIG{__WARN__} handler is discovered, it will continue +to be honored, but only after the diagnostics::splainthis() function +(the module's $SIG{__WARN__} interceptor) has had its way with your +warnings. + +There is a $diagnostics::DEBUG variable you may set if you're desperately +curious what sorts of things are being intercepted. + + BEGIN { $diagnostics::DEBUG = 1 } + + +=head1 BUGS + +Not being able to say "no diagnostics" is annoying, but may not be +insurmountable. + +The C<-pretty> directive is called too late to affect matters. +You have to do this instead, and I you load the module. + + BEGIN { $diagnostics::PRETTY = 1 } + +I could start up faster by delaying compilation until it should be +needed, but this gets a "panic: top_level" when using the pragma form +in Perl 5.001e. + +While it's true that this documentation is somewhat subserious, if you use +a program named I, you should expect a bit of whimsy. + +=head1 AUTHOR + +Tom Christiansen >, 25 June 1995. + +=cut + +use strict; +use 5.009001; +use Carp; +$Carp::Internal{__PACKAGE__.""}++; + +our $VERSION = '1.40'; +our $DEBUG; +our $VERBOSE; +our $PRETTY; +our $TRACEONLY = 0; +our $WARNTRACE = 0; + +use Config; +use Text::Tabs 'expand'; +my $privlib = $Config{privlibexp}; +if ($^O eq 'VMS') { + require VMS::Filespec; + $privlib = VMS::Filespec::unixify($privlib); +} +my @trypod = ( + "$privlib/pod/perldiag.pod", + "$privlib/pods/perldiag.pod", + ); +# PerlOnJava: also search @INC for pod/perldiag.pod and Pod/perldiag.pod +# (the JAR bundles it as Pod/perldiag.pod with capital P) +for my $inc (@INC) { + push @trypod, "$inc/pod/perldiag.pod", "$inc/Pod/perldiag.pod"; +} +# handy for development testing of new warnings etc +unshift @trypod, "./pod/perldiag.pod" if -e "pod/perldiag.pod"; +(my $PODFILE) = ((grep { -e } @trypod), $trypod[$#trypod])[0]; + +$DEBUG ||= 0; + +local $| = 1; +local $_; +local $.; + +my $standalone; +my(%HTML_2_Troff, %HTML_2_Latin_1, %HTML_2_ASCII_7); + +CONFIG: { + our $opt_p = our $opt_d = our $opt_v = our $opt_f = ''; + + unless (caller) { + $standalone++; + require Getopt::Std; + Getopt::Std::getopts('pdvf:') + or die "Usage: $0 [-v] [-p] [-f splainpod]"; + $PODFILE = $opt_f if $opt_f; + $DEBUG = 2 if $opt_d; + $VERBOSE = $opt_v; + $PRETTY = $opt_p; + } + + if (open(POD_DIAG, '<', $PODFILE)) { + warn "Happy happy podfile from real $PODFILE\n" if $DEBUG; + last CONFIG; + } + + if (caller) { + INCPATH: { + for my $file ( (map { "$_/".__PACKAGE__.".pm" } @INC), $0) { + warn "Checking $file\n" if $DEBUG; + if (open(POD_DIAG, '<', $file)) { + while () { + next unless + /^__END__\s*# wish diag dbase were more accessible/; + print STDERR "podfile is $file\n" if $DEBUG; + last INCPATH; + } + } + } + } + } else { + print STDERR "podfile is \n" if $DEBUG; + *POD_DIAG = *main::DATA; + } +} +if (eof(POD_DIAG)) { + die "couldn't find diagnostic data in $PODFILE @INC $0"; +} + + +%HTML_2_Troff = ( + 'amp' => '&', # ampersand + 'lt' => '<', # left chevron, less-than + 'gt' => '>', # right chevron, greater-than + 'quot' => '"', # double quote + 'sol' => '/', # forward slash / solidus + 'verbar' => '|', # vertical bar + + "Aacute" => "A\\*'", # capital A, acute accent + # etc + +); + +%HTML_2_Latin_1 = ( + 'amp' => '&', # ampersand + 'lt' => '<', # left chevron, less-than + 'gt' => '>', # right chevron, greater-than + 'quot' => '"', # double quote + 'sol' => '/', # Forward slash / solidus + 'verbar' => '|', # vertical bar + + # # capital A, acute accent + "Aacute" => chr utf8::unicode_to_native(0xC1) + + # etc +); + +%HTML_2_ASCII_7 = ( + 'amp' => '&', # ampersand + 'lt' => '<', # left chevron, less-than + 'gt' => '>', # right chevron, greater-than + 'quot' => '"', # double quote + 'sol' => '/', # Forward slash / solidus + 'verbar' => '|', # vertical bar + + "Aacute" => "A" # capital A, acute accent + # etc +); + +our %HTML_Escapes; +*HTML_Escapes = do { + if ($standalone) { + $PRETTY ? \%HTML_2_Latin_1 : \%HTML_2_ASCII_7; + } else { + \%HTML_2_Latin_1; + } +}; + +*THITHER = $standalone ? *STDOUT : *STDERR; + +my %transfmt = (); +my $transmo = <) { + + sub _split_pod_link { + $_[0] =~ m'(?:([^|]*)\|)?([^/]*)(?:/("?)(.*)\3)?'s; + ($1,$2,$4); + } + + unescape(); + if ($PRETTY) { + sub noop { return $_[0] } # spensive for a noop + sub bold { my $str =$_[0]; $str =~ s/(.)/$1\b$1/g; return $str; } + sub italic { my $str = $_[0]; $str =~ s/(.)/_\b$1/g; return $str; } + s/C<<< (.*?) >>>|C<< (.*?) >>|[BC]<(.*?)>/bold($+)/ges; + s/[IF]<(.*?)>/italic($1)/ges; + s/L<(.*?)>/ + my($text,$page,$sect) = _split_pod_link($1); + defined $text + ? $text + : defined $sect + ? italic($sect) . ' in ' . italic($page) + : italic($page) + /ges; + s/S<(.*?)>/ + $1 + /ges; + } else { + s/C<<< (.*?) >>>|C<< (.*?) >>|[BC]<(.*?)>/$+/gs; + s/[IF]<(.*?)>/$1/gs; + s/L<(.*?)>/ + my($text,$page,$sect) = _split_pod_link($1); + defined $text + ? $text + : defined $sect + ? qq '"$sect" in $page' + : $page + /ges; + s/S<(.*?)>/ + $1 + /ges; + } + unless (/^=/) { + if (defined $header) { + if ( $header eq 'DESCRIPTION' && + ( /Optional warnings are enabled/ + || /Some of these messages are generic./ + ) ) + { + next; + } + $_ = expand $_; + s/^/ /gm; + $msg{$header} .= $_; + for my $h(@headers) { $msg{$h} .= $_ } + ++$seen_body; + undef $for_item; + } + next; + } + + # If we have not come across the body of the description yet, then + # the previous header needs to share the same description. + if ($seen_body) { + @headers = (); + } + else { + push @headers, $header if defined $header; + } + + if ( ! s/=item (.*?)\s*\z//s || $over_level != 1) { + + if ( s/=head1\sDESCRIPTION//) { + $msg{$header = 'DESCRIPTION'} = ''; + undef $for_item; + } + elsif( s/^=for\s+diagnostics\s*\n(.*?)\s*\z// ) { + $for_item = $1; + } + elsif( /^=over\b/ ) { + $over_level++; + } + elsif( /^=back\b/ ) { # Stop processing body here + $over_level--; + if ($over_level == 0) { + undef $header; + undef $for_item; + $seen_body = 0; + next; + } + } + next; + } + + if( $for_item ) { $header = $for_item; undef $for_item } + else { + $header = $1; + + $header =~ s/\n/ /gs; # Allow multi-line headers + } + + # strip formatting directives from =item line + $header =~ s/[A-Z]<(.*?)>/$1/g; + + # Since we strip "(\.\s*)\n" when we search a warning, strip it here as well + $header =~ s/(\.\s*)?$//; + + my @toks = split( /(%l?[dxX]|%[ucp]|%(?:\.\d+)?[fs])/, $header ); + if (@toks > 1) { + my $conlen = 0; + for my $i (0..$#toks){ + if( $i % 2 ){ + if( $toks[$i] eq '%c' ){ + $toks[$i] = '.'; + } elsif( $toks[$i] =~ /^%(?:d|u)$/ ){ + $toks[$i] = '\d+'; + } elsif( $toks[$i] =~ '^%(?:s|.*f)$' ){ + $toks[$i] = $i == $#toks ? '.*' : '.*?'; + } elsif( $toks[$i] =~ '%.(\d+)s' ){ + $toks[$i] = ".{$1}"; + } elsif( $toks[$i] =~ '^%l*([pxX])$' ){ + $toks[$i] = $1 eq 'X' ? '[\dA-F]+' : '[\da-f]+'; + } + } elsif( length( $toks[$i] ) ){ + $toks[$i] = quotemeta $toks[$i]; + $conlen += length( $toks[$i] ); + } + } + my $lhs = join( '', @toks ); + $lhs =~ s/(\\\s)+/\\s+/g; # Replace lit space with multi-space match + $transfmt{$header}{pat} = + " s^\\s*$lhs\\s*\Q$header\Es\n\t&& return 1;\n"; + $transfmt{$header}{len} = $conlen; + } else { + my $lhs = "\Q$header\E"; + $lhs =~ s/(\\\s)+/\\s+/g; # Replace lit space with multi-space match + $transfmt{$header}{pat} = + " s^\\s*$lhs\\s*\Q$header\E\n\t && return 1;\n"; + $transfmt{$header}{len} = length( $header ); + } + + print STDERR __PACKAGE__.": Duplicate entry: \"$header\"\n" + if $msg{$header}; + + $msg{$header} = ''; + $seen_body = 0; + } + + + close POD_DIAG unless *main::DATA eq *POD_DIAG; + + die "No diagnostics?" unless %msg; + + # Apply patterns in order of decreasing sum of lengths of fixed parts + # Seems the best way of hitting the right one. + for my $hdr ( sort { $transfmt{$b}{len} <=> $transfmt{$a}{len} } + keys %transfmt ){ + $transmo .= $transfmt{$hdr}{pat}; + } + $transmo .= " return 0;\n}\n"; + print STDERR $transmo if $DEBUG; + eval $transmo; + die $@ if $@; +} + +if ($standalone) { + if (!@ARGV and -t STDIN) { print STDERR "$0: Reading from STDIN\n" } + while (defined (my $error = <>)) { + splainthis($error) || print THITHER $error; + } + exit; +} + +my $olddie; +my $oldwarn; + +sub import { + shift; + $^W = 1; # yup, clobbered the global variable; + # tough, if you want diags, you want diags. + return if defined $SIG{__WARN__} && ($SIG{__WARN__} eq \&warn_trap); + + for (@_) { + + /^-d(ebug)?$/ && do { + $DEBUG++; + next; + }; + + /^-v(erbose)?$/ && do { + $VERBOSE++; + next; + }; + + /^-p(retty)?$/ && do { + print STDERR "$0: I'm afraid it's too late for prettiness.\n"; + $PRETTY++; + next; + }; + # matches trace and traceonly for legacy doc mixup reasons + /^-t(race(only)?)?$/ && do { + $TRACEONLY++; + next; + }; + /^-w(arntrace)?$/ && do { + $WARNTRACE++; + next; + }; + + warn "Unknown flag: $_"; + } + + $oldwarn = $SIG{__WARN__}; + $olddie = $SIG{__DIE__}; + $SIG{__WARN__} = \&warn_trap; + $SIG{__DIE__} = \&death_trap; +} + +sub enable { &import } + +sub disable { + shift; + return unless $SIG{__WARN__} eq \&warn_trap; + $SIG{__WARN__} = $oldwarn || ''; + $SIG{__DIE__} = $olddie || ''; +} + +sub warn_trap { + my $warning = $_[0]; + if (caller eq __PACKAGE__ or !splainthis($warning)) { + if ($WARNTRACE) { + print STDERR Carp::longmess($warning); + } else { + print STDERR $warning; + } + } + goto &$oldwarn if defined $oldwarn and $oldwarn and $oldwarn ne \&warn_trap; +}; + +sub death_trap { + my $exception = $_[0]; + + # See if we are coming from anywhere within an eval. If so we don't + # want to explain the exception because it's going to get caught. + my $in_eval = 0; + my $i = 0; + while (my $caller = (caller($i++))[3]) { + if ($caller eq '(eval)') { + $in_eval = 1; + last; + } + } + + splainthis($exception) unless $in_eval; + if (caller eq __PACKAGE__) { + print STDERR "INTERNAL EXCEPTION: $exception"; + } + &$olddie if defined $olddie and $olddie and $olddie ne \&death_trap; + + return if $in_eval; + + # We don't want to unset these if we're coming from an eval because + # then we've turned off diagnostics. + + # Switch off our die/warn handlers so we don't wind up in our own + # traps. + $SIG{__DIE__} = $SIG{__WARN__} = ''; + + $exception =~ s/\n(?=.)/\n\t/gas; + + die Carp::longmess("__diagnostics__") + =~ s/^__diagnostics__.*?line \d+\.?\n/ + "Uncaught exception from user code:\n\t$exception" + /re; + # up we go; where we stop, nobody knows, but i think we die now + # but i'm deeply afraid of the &$olddie guy reraising and us getting + # into an indirect recursion loop +}; + +my %exact_duplicate; +my %old_diag; +my $count; +my $wantspace; +sub splainthis { + return 0 if $TRACEONLY; + for (my $tmp = shift) { + local $\; + local $!; + ### &finish_compilation unless %msg; + s/(\.\s*)?\n+$//; + my $orig = $_; + # return unless defined; + + # get rid of the where-are-we-in-input part + s/, <.*?> (?:line|chunk).*$//; + + # Discard 1st " at line " and all text beyond + # but be aware of messages containing " at this-or-that" + my $real = 0; + my @secs = split( / at / ); + return unless @secs; + $_ = $secs[0]; + for my $i ( 1..$#secs ){ + if( $secs[$i] =~ /.+? (?:line|chunk) \d+/ ){ + $real = 1; + last; + } else { + $_ .= ' at ' . $secs[$i]; + } + } + + # remove parenthesis occurring at the end of some messages + s/^\((.*)\)$/$1/; + + if ($exact_duplicate{$orig}++) { + return &transmo; + } else { + return 0 unless &transmo; + } + + my $short = shorten($orig); + if ($old_diag{$_}) { + autodescribe(); + print THITHER "$short (#$old_diag{$_})\n"; + $wantspace = 1; + } elsif (!$msg{$_} && $orig =~ /\n./s) { + # A multiline message, like "Attempt to reload / + # Compilation failed" + my $found; + for (split /^/, $orig) { + splainthis($_) and $found = 1; + } + return $found; + } else { + autodescribe(); + $old_diag{$_} = ++$count; + print THITHER "\n" if $wantspace; + $wantspace = 0; + print THITHER "$short (#$old_diag{$_})\n"; + if ($msg{$_}) { + print THITHER $msg{$_}; + } else { + if (0 and $standalone) { + print THITHER " **** Error #$old_diag{$_} ", + ($real ? "is" : "appears to be"), + " an unknown diagnostic message.\n\n"; + } + return 0; + } + } + return 1; + } +} + +sub autodescribe { + if ($VERBOSE and not $count) { + print THITHER &{$PRETTY ? \&bold : \&noop}("DESCRIPTION OF DIAGNOSTICS"), + "\n$msg{DESCRIPTION}\n"; + } +} + +sub unescape { + s { + E< + ( [A-Za-z]+ ) + > + } { + do { + exists $HTML_Escapes{$1} + ? do { $HTML_Escapes{$1} } + : do { + warn "Unknown escape: E<$1> in $_"; + "E<$1>"; + } + } + }egx; +} + +sub shorten { + my $line = $_[0]; + if (length($line) > 79 and index($line, "\n") == -1) { + my $space_place = rindex($line, ' ', 79); + if ($space_place != -1) { + substr($line, $space_place, 1) = "\n\t"; + } + } + return $line; +} + + +1 unless $standalone; # or it'll complain about itself +__END__ # wish diag dbase were more accessible