From f8b1f952f944e6b0b236203da3a25cc98297de88 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sat, 28 Feb 2026 14:53:49 +0100 Subject: [PATCH 1/9] Fix AUTOLOAD dispatch order: search full MRO before AUTOLOAD In Perl, AUTOLOAD is only checked after the entire class hierarchy (including UNIVERSAL) has been searched for the method. Previously, AUTOLOAD was checked per-class during the hierarchy walk, causing UNIVERSAL methods like can(), isa() to be intercepted by AUTOLOAD in classes that define it (e.g. Image::ExifTool). Split findMethodInHierarchy into two passes: 1. Search all classes for the actual method 2. Only then search for AUTOLOAD Also fix can() to return coderef for forward declarations (sub foo;) which exist in the stash but are not yet defined. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../runtime/mro/InheritanceResolver.java | 28 +++++++------------ .../runtime/perlmodule/Universal.java | 8 ++++++ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/mro/InheritanceResolver.java b/src/main/java/org/perlonjava/runtime/mro/InheritanceResolver.java index dae5aa00b..14cf9b03b 100644 --- a/src/main/java/org/perlonjava/runtime/mro/InheritanceResolver.java +++ b/src/main/java/org/perlonjava/runtime/mro/InheritanceResolver.java @@ -301,7 +301,8 @@ public static RuntimeScalar findMethodInHierarchy(String methodName, String perl System.err.flush(); } - // Search through the class hierarchy starting from the specified index + // Perl MRO: first pass — search all classes (including UNIVERSAL) for the method. + // AUTOLOAD is only checked after the entire hierarchy has been searched. for (int i = startFromIndex; i < linearizedClasses.size(); i++) { String className = linearizedClasses.get(i); String effectiveClassName = GlobalVariable.resolveStashAlias(className); @@ -314,42 +315,33 @@ public static RuntimeScalar findMethodInHierarchy(String methodName, String perl System.err.flush(); } - // Check if method exists in current class if (GlobalVariable.existsGlobalCodeRef(normalizedClassMethodName)) { RuntimeScalar codeRef = GlobalVariable.getGlobalCodeRef(normalizedClassMethodName); - // Perl method lookup should ignore undefined CODE slots (e.g. after `undef *pkg::method`). if (!codeRef.getDefinedBoolean()) { continue; } - // Cache the found method cacheMethod(cacheKey, codeRef); - if (TRACE_METHOD_RESOLUTION) { System.err.println(" FOUND method!"); System.err.flush(); } - return codeRef; } + } - // Method not found in current class, check AUTOLOAD - if (!autoloadEnabled || methodName.startsWith("(")) { - // refuse to AUTOLOAD tie() flags and overload markers (all start with "(") - } else { - // Check for AUTOLOAD in current class + // Second pass — method not found anywhere, check AUTOLOAD in class hierarchy. + // This matches Perl semantics: AUTOLOAD is only tried after the full MRO + // search (including UNIVERSAL) fails to find the method. + if (autoloadEnabled && !methodName.startsWith("(")) { + for (int i = startFromIndex; i < linearizedClasses.size(); i++) { + String className = linearizedClasses.get(i); + String effectiveClassName = GlobalVariable.resolveStashAlias(className); String autoloadName = (effectiveClassName.endsWith("::") ? effectiveClassName : effectiveClassName + "::") + "AUTOLOAD"; if (GlobalVariable.existsGlobalCodeRef(autoloadName)) { RuntimeScalar autoload = GlobalVariable.getGlobalCodeRef(autoloadName); if (autoload.getDefinedBoolean()) { - // System.out.println("AUTOLOAD: " + autoloadName + " looking for " + methodName); - - // The caller will need to set $AUTOLOAD before calling ((RuntimeCode) autoload.value).autoloadVariableName = autoloadName; - - // Cache the found method; - // In case AUTOLOAD creates the missing method, it will invalidate the cache cacheMethod(cacheKey, autoload); - return autoload; } } diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/Universal.java b/src/main/java/org/perlonjava/runtime/perlmodule/Universal.java index d35d1d91d..5879e68b9 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/Universal.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/Universal.java @@ -130,6 +130,14 @@ public static RuntimeList can(RuntimeArray args, int ctx) { return method.getList(); } + // Forward declarations (sub foo;) exist in the stash but are not "defined" in the + // Perl sense, so findMethodInHierarchy skips them (falling through to AUTOLOAD). + // However, can() should still return the coderef for forward declarations. + String normalizedName = NameNormalizer.normalizeVariableName(methodName, perlClassName); + if (GlobalVariable.existsGlobalCodeRef(normalizedName)) { + return GlobalVariable.getGlobalCodeRef(normalizedName).getList(); + } + // Fallback: if either the class name or method name was stored as UTF-8 octets // (common when source/strings are treated as raw bytes), retry using a decoded form. String decodedMethodName = tryDecodeUtf8Octets(methodName); From 516a72897ae1ea854648080bfe50ce1e79d9036b Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sat, 28 Feb 2026 15:05:33 +0100 Subject: [PATCH 2/9] Fix file test operators on filehandles and stat _ caching - Store file path in CustomFileChannel so -f $fh and stat($fh) can resolve the underlying path from LayeredIOHandle delegates - Fix stat _ parser: move underscore check before bareword filehandle handler which was intercepting _ as a glob reference - Add STAT_LASTHANDLE/LSTAT_LASTHANDLE opcodes for bytecode interpreter so stat _ uses cached stat buffer instead of re-statting Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../backend/bytecode/BytecodeInterpreter.java | 8 +++++ .../backend/bytecode/CompileOperator.java | 32 ++++++++++++------- .../backend/bytecode/InterpretedCode.java | 10 ++++++ .../bytecode/OpcodeHandlerExtended.java | 14 ++++++++ .../frontend/parser/OperatorParser.java | 18 +++++------ .../runtime/io/CustomFileChannel.java | 13 +++++--- .../runtime/operators/FileTestOperator.java | 16 +++++++++- .../perlonjava/runtime/operators/Stat.java | 23 ++++++++++++- 8 files changed, 107 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java index f327efb6d..6f92505a2 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java @@ -1285,6 +1285,14 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c pc = OpcodeHandlerExtended.executeLstat(bytecode, pc, registers); break; + case Opcodes.STAT_LASTHANDLE: + pc = OpcodeHandlerExtended.executeStatLastHandle(bytecode, pc, registers); + break; + + case Opcodes.LSTAT_LASTHANDLE: + pc = OpcodeHandlerExtended.executeLstatLastHandle(bytecode, pc, registers); + break; + // File test operations (opcodes 190-216) - delegated to handler case Opcodes.FILETEST_R: case Opcodes.FILETEST_W: diff --git a/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java b/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java index 4374be4fe..a8a763a91 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java +++ b/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java @@ -554,21 +554,31 @@ public static void visitOperator(BytecodeCompiler bytecodeCompiler, OperatorNode } } else if (op.equals("stat") || op.equals("lstat")) { // stat FILE or lstat FILE - int savedContext = bytecodeCompiler.currentCallContext; - bytecodeCompiler.currentCallContext = RuntimeContextType.SCALAR; - try { - node.operand.accept(bytecodeCompiler); - int operandReg = bytecodeCompiler.lastResultReg; + boolean isUnderscoreOperand = (node.operand instanceof IdentifierNode) + && ((IdentifierNode) node.operand).name.equals("_"); + if (isUnderscoreOperand) { int rd = bytecodeCompiler.allocateRegister(); - bytecodeCompiler.emit(op.equals("stat") ? Opcodes.STAT : Opcodes.LSTAT); + bytecodeCompiler.emit(op.equals("stat") ? Opcodes.STAT_LASTHANDLE : Opcodes.LSTAT_LASTHANDLE); bytecodeCompiler.emitReg(rd); - bytecodeCompiler.emitReg(operandReg); - bytecodeCompiler.emit(savedContext); // Pass calling context - + bytecodeCompiler.emit(bytecodeCompiler.currentCallContext); bytecodeCompiler.lastResultReg = rd; - } finally { - bytecodeCompiler.currentCallContext = savedContext; + } else { + int savedContext = bytecodeCompiler.currentCallContext; + bytecodeCompiler.currentCallContext = RuntimeContextType.SCALAR; + try { + node.operand.accept(bytecodeCompiler); + int operandReg = bytecodeCompiler.lastResultReg; + + int rd = bytecodeCompiler.allocateRegister(); + bytecodeCompiler.emit(op.equals("stat") ? Opcodes.STAT : Opcodes.LSTAT); + bytecodeCompiler.emitReg(rd); + bytecodeCompiler.emitReg(operandReg); + bytecodeCompiler.emit(savedContext); + bytecodeCompiler.lastResultReg = rd; + } finally { + bytecodeCompiler.currentCallContext = savedContext; + } } } else if (op.startsWith("-") && op.length() == 2) { // File test operators: -r, -w, -x, etc. diff --git a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java index b39ab962f..a9352ffb3 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java +++ b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java @@ -655,6 +655,16 @@ public String disassemble() { int lstatCtx = bytecode[pc++]; sb.append("LSTAT r").append(rd).append(" = lstat(r").append(rs).append(", ctx=").append(lstatCtx).append(")\n"); break; + case Opcodes.STAT_LASTHANDLE: + rd = bytecode[pc++]; + int slhCtx = bytecode[pc++]; + sb.append("STAT_LASTHANDLE r").append(rd).append(" = stat(_, ctx=").append(slhCtx).append(")\n"); + break; + case Opcodes.LSTAT_LASTHANDLE: + rd = bytecode[pc++]; + int llhCtx = bytecode[pc++]; + sb.append("LSTAT_LASTHANDLE r").append(rd).append(" = lstat(_, ctx=").append(llhCtx).append(")\n"); + break; case Opcodes.FILETEST_R: rd = bytecode[pc++]; rs = bytecode[pc++]; diff --git a/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java b/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java index 4849678ec..5b543dc6f 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java +++ b/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java @@ -513,6 +513,20 @@ public static int executeLstat(int[] bytecode, int pc, RuntimeBase[] registers) return pc; } + public static int executeStatLastHandle(int[] bytecode, int pc, RuntimeBase[] registers) { + int rd = bytecode[pc++]; + int ctx = bytecode[pc++]; + registers[rd] = Stat.statLastHandle(ctx); + return pc; + } + + public static int executeLstatLastHandle(int[] bytecode, int pc, RuntimeBase[] registers) { + int rd = bytecode[pc++]; + int ctx = bytecode[pc++]; + registers[rd] = Stat.lstatLastHandle(ctx); + return pc; + } + /** * Execute print operation. * Format: PRINT contentReg filehandleReg diff --git a/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java b/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java index 6a8a059de..57aea0258 100644 --- a/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java @@ -694,6 +694,15 @@ static OperatorNode parseStat(Parser parser, LexerToken token, int currentIndex) paren = true; } + if (nextToken.text.equals("_")) { + TokenUtils.consume(parser); + if (paren) { + TokenUtils.consume(parser, OPERATOR, ")"); + } + return new OperatorNode(token.text, + new IdentifierNode("_", parser.tokenIndex), parser.tokenIndex); + } + // stat/lstat: bareword filehandle (typically ALLCAPS) should be treated as a typeglob. // Consume it here, before generic expression parsing can turn it into a subroutine call. if (nextToken.type == IDENTIFIER) { @@ -710,15 +719,6 @@ static OperatorNode parseStat(Parser parser, LexerToken token, int currentIndex) return new OperatorNode(token.text, operand, currentIndex); } } - if (nextToken.text.equals("_")) { - // Handle `stat _` - TokenUtils.consume(parser); - if (paren) { - TokenUtils.consume(parser, OPERATOR, ")"); - } - return new OperatorNode(token.text, - new IdentifierNode("_", parser.tokenIndex), parser.tokenIndex); - } // Parse optional single argument (or default to $_) // If we've already consumed '(', we must parse a full expression up to ')'. diff --git a/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java b/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java index cb9e8dc84..ecb1d0bd4 100644 --- a/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java +++ b/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java @@ -61,9 +61,8 @@ public class CustomFileChannel implements IOHandle { */ private final FileChannel fileChannel; - /** - * Tracks whether end-of-file has been reached during reading - */ + private final Path filePath; + private boolean isEOF; // When true, writes should always occur at end-of-file (Perl's append semantics). @@ -82,6 +81,7 @@ public class CustomFileChannel implements IOHandle { * @throws IOException if an I/O error occurs opening the file */ public CustomFileChannel(Path path, Set options) throws IOException { + this.filePath = path; this.fileChannel = FileChannel.open(path, options); this.isEOF = false; this.appendMode = false; @@ -99,11 +99,10 @@ public CustomFileChannel(Path path, Set options) throws IOEx * @throws IllegalArgumentException if options don't contain READ or WRITE */ public CustomFileChannel(FileDescriptor fd, Set options) throws IOException { + this.filePath = null; if (options.contains(StandardOpenOption.READ)) { - // Create a read channel from the file descriptor this.fileChannel = new FileInputStream(fd).getChannel(); } else if (options.contains(StandardOpenOption.WRITE)) { - // Create a write channel from the file descriptor this.fileChannel = new FileOutputStream(fd).getChannel(); } else { throw new IllegalArgumentException("Invalid options for FileDescriptor"); @@ -112,6 +111,10 @@ public CustomFileChannel(FileDescriptor fd, Set options) thr this.appendMode = false; } + public Path getFilePath() { + return filePath; + } + public void setAppendMode(boolean appendMode) { this.appendMode = appendMode; } diff --git a/src/main/java/org/perlonjava/runtime/operators/FileTestOperator.java b/src/main/java/org/perlonjava/runtime/operators/FileTestOperator.java index 5037fc64a..bef36c78c 100644 --- a/src/main/java/org/perlonjava/runtime/operators/FileTestOperator.java +++ b/src/main/java/org/perlonjava/runtime/operators/FileTestOperator.java @@ -1,6 +1,9 @@ package org.perlonjava.runtime.operators; import org.perlonjava.runtime.io.ClosedIOHandle; +import org.perlonjava.runtime.io.CustomFileChannel; +import org.perlonjava.runtime.io.IOHandle; +import org.perlonjava.runtime.io.LayeredIOHandle; import org.perlonjava.runtime.runtimetypes.RuntimeGlob; import org.perlonjava.runtime.runtimetypes.PerlCompilerException; import org.perlonjava.runtime.runtimetypes.RuntimeCode; @@ -259,7 +262,18 @@ public static RuntimeScalar fileTest(String operator, RuntimeScalar fileHandle) return scalarUndef; } - // For file test operators on file handles, return undef and set EBADF + // Try to get the file path from the handle for stat-based file tests + IOHandle innerHandle = fh.ioHandle; + while (innerHandle instanceof LayeredIOHandle lh) { + innerHandle = lh.getDelegate(); + } + if (innerHandle instanceof CustomFileChannel cfc) { + Path path = cfc.getFilePath(); + if (path != null) { + return fileTest(operator, new RuntimeScalar(path.toString())); + } + } + // Fallback for non-file handles (pipes, sockets, etc.) getGlobalVariable("main::!").set(9); updateLastStat(fileHandle, false, 9); return scalarUndef; diff --git a/src/main/java/org/perlonjava/runtime/operators/Stat.java b/src/main/java/org/perlonjava/runtime/operators/Stat.java index cfce73aa6..225c38b78 100644 --- a/src/main/java/org/perlonjava/runtime/operators/Stat.java +++ b/src/main/java/org/perlonjava/runtime/operators/Stat.java @@ -1,6 +1,9 @@ package org.perlonjava.runtime.operators; import org.perlonjava.runtime.io.ClosedIOHandle; +import org.perlonjava.runtime.io.CustomFileChannel; +import org.perlonjava.runtime.io.IOHandle; +import org.perlonjava.runtime.io.LayeredIOHandle; import org.perlonjava.runtime.runtimetypes.RuntimeBase; import org.perlonjava.runtime.runtimetypes.RuntimeContextType; import org.perlonjava.runtime.runtimetypes.RuntimeIO; @@ -67,10 +70,18 @@ public static RuntimeList statLastHandle() { return stat(lastFileHandle); } + public static RuntimeBase statLastHandle(int ctx) { + return stat(lastFileHandle, ctx); + } + public static RuntimeList lstatLastHandle() { return lstat(lastFileHandle); } + public static RuntimeBase lstatLastHandle(int ctx) { + return lstat(lastFileHandle, ctx); + } + /** * stat with context awareness * @param arg the file or filehandle to stat @@ -126,8 +137,18 @@ public static RuntimeList stat(RuntimeScalar arg) { return res; // Return empty list } + // Try to get the file path from the handle + IOHandle innerHandle = fh.ioHandle; + while (innerHandle instanceof LayeredIOHandle lh) { + innerHandle = lh.getDelegate(); + } + if (innerHandle instanceof CustomFileChannel cfc) { + Path path = cfc.getFilePath(); + if (path != null) { + return stat(new RuntimeScalar(path.toString())); + } + } // For in-memory file handles (like PerlIO::scalar), we can't stat them - // They should return EBADF getGlobalVariable("main::!").set(9); updateLastStat(arg, false, 9, false); return res; From 348624a61d6ad7d04f06c512ab47fa53a7762631 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sat, 28 Feb 2026 15:23:18 +0100 Subject: [PATCH 3/9] Fix prototype @ argument parsing with parenthesized expressions f((expr) * y) was wrongly rejected as "too many arguments" when the subroutine had an @ prototype. The parseZeroOrMoreList call was using obeyParentheses=true, which mistook inner grouping parens for an argument list boundary. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java b/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java index bb45a56c7..b3c5b424e 100644 --- a/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java +++ b/src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java @@ -482,7 +482,7 @@ private static void handleListOrHashArgument(Parser parser, ListNode args, boole parser.tokenIndex = saveIndex; } - ListNode argList = ListParser.parseZeroOrMoreList(parser, 0, false, true, false, false); + ListNode argList = ListParser.parseZeroOrMoreList(parser, 0, false, false, false, false); // @ and % consume remaining arguments in LIST context // for (Node element : argList.elements) { // element.setAnnotation("context", "LIST"); From 24b880edf590eb3bd62e6826482d3946054563ee Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sat, 28 Feb 2026 15:27:11 +0100 Subject: [PATCH 4/9] Add goto LABEL support in bytecode interpreter Emit CREATE_GOTO + RETURN opcodes for goto LABEL statements, matching the JVM backend non-local goto control flow via RuntimeControlFlowList with ControlFlowType.GOTO. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../backend/bytecode/BytecodeInterpreter.java | 11 +++++++++++ .../backend/bytecode/CompileOperator.java | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java index 6f92505a2..17d020b5b 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java @@ -1113,6 +1113,17 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; } + case Opcodes.CREATE_GOTO: { + int rd = bytecode[pc++]; + int labelIdx = bytecode[pc++]; + String label = labelIdx == 255 ? null : code.stringPool[labelIdx]; + registers[rd] = new RuntimeControlFlowList( + ControlFlowType.GOTO, label, + code.sourceName, code.sourceLine + ); + break; + } + case Opcodes.IS_CONTROL_FLOW: { // Check if value is control flow: rd = (rs instanceof RuntimeControlFlowList) int rd = bytecode[pc++]; diff --git a/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java b/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java index a8a763a91..e6b20866a 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java +++ b/src/main/java/org/perlonjava/backend/bytecode/CompileOperator.java @@ -2905,6 +2905,25 @@ public static void visitOperator(BytecodeCompiler bytecodeCompiler, OperatorNode bytecodeCompiler.emitReg(fileReg); bytecodeCompiler.emit(bytecodeCompiler.currentCallContext); bytecodeCompiler.lastResultReg = rd; + } else if (op.equals("goto")) { + String labelStr = null; + if (node.operand instanceof ListNode labelNode && !labelNode.elements.isEmpty()) { + Node arg = labelNode.elements.getFirst(); + if (arg instanceof IdentifierNode) { + labelStr = ((IdentifierNode) arg).name; + } + } + if (labelStr == null) { + bytecodeCompiler.throwCompilerException("goto must be given label"); + } + int rd = bytecodeCompiler.allocateRegister(); + bytecodeCompiler.emit(Opcodes.CREATE_GOTO); + bytecodeCompiler.emitReg(rd); + int labelIdx = bytecodeCompiler.addToStringPool(labelStr); + bytecodeCompiler.emitReg(labelIdx); + bytecodeCompiler.emit(Opcodes.RETURN); + bytecodeCompiler.emitReg(rd); + bytecodeCompiler.lastResultReg = -1; } else { bytecodeCompiler.throwCompilerException("Unsupported operator: " + op); } From 104ff91f7f5d3fca3ed778105c5d4209c18b02fc Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sat, 28 Feb 2026 15:31:40 +0100 Subject: [PATCH 5/9] Fix unpack float/double for non-UTF8 binary data The UnpackState constructor was using UTF-8 byte encoding when the source scalar had the utf8 flag set, even when all code points were <= 255. This caused bytes like 0x80 to be encoded as 0xC2 0x80, corrupting numeric unpack formats (f, d, N, etc.). Split isUTF8Data (controls byte encoding) from isUTF8Flagged (controls A* whitespace trimming) so binary data always uses ISO-8859-1 encoding. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../org/perlonjava/runtime/operators/UnpackState.java | 10 ++++++---- .../runtime/operators/unpack/StringFormatHandler.java | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/operators/UnpackState.java b/src/main/java/org/perlonjava/runtime/operators/UnpackState.java index 8368b02a1..67b85e4f6 100644 --- a/src/main/java/org/perlonjava/runtime/operators/UnpackState.java +++ b/src/main/java/org/perlonjava/runtime/operators/UnpackState.java @@ -63,6 +63,7 @@ */ public class UnpackState { public final boolean isUTF8Data; + public final boolean isUTF8Flagged; private final String dataString; private final byte[] originalBytes; private final int[] codePoints; @@ -126,10 +127,11 @@ public UnpackState(String dataString, boolean startsWithU, boolean utf8Flagged) } // If we have Unicode characters beyond Latin-1, use extended UTF-8 (Perl semantics). - // Also, if the original scalar was UTF-8 flagged, treat it as UTF-8 data even when - // all code points are <= 255. This matches Perl behavior and is required for - // A* trimming of Unicode whitespace. - this.isUTF8Data = utf8Flagged || hasHighUnicode || hasSurrogates || hasBeyondUnicode; + // Only use UTF-8 byte encoding when there are actual high-Unicode characters. + // The utf8Flagged hint only affects character-mode operations (like A* trimming), + // not the byte representation used by numeric formats (f, d, N, V, etc.). + this.isUTF8Flagged = utf8Flagged || hasHighUnicode || hasSurrogates || hasBeyondUnicode; + this.isUTF8Data = hasHighUnicode || hasSurrogates || hasBeyondUnicode; if (isUTF8Data) { this.originalBytes = encodeUtf8Extended(this.codePoints); } else { diff --git a/src/main/java/org/perlonjava/runtime/operators/unpack/StringFormatHandler.java b/src/main/java/org/perlonjava/runtime/operators/unpack/StringFormatHandler.java index 91e9281f9..1d6ded5ce 100644 --- a/src/main/java/org/perlonjava/runtime/operators/unpack/StringFormatHandler.java +++ b/src/main/java/org/perlonjava/runtime/operators/unpack/StringFormatHandler.java @@ -54,7 +54,7 @@ public void unpack(UnpackState state, List output, int count, boole // Perl's behavior depends on whether the source scalar is UTF-8 flagged. // For non-UTF8 (byte) strings, 'A' trims only ASCII whitespace and must // not treat \xA0 (NBSP) as whitespace. - str = state.isUTF8Data ? processString(str) : processStringByteMode(str); + str = state.isUTF8Flagged ? processString(str) : processStringByteMode(str); } // Pad if needed and not star count From f7ec9b71a3eeacc7eeabd1d91de6a791b064c64a Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sat, 28 Feb 2026 15:34:37 +0100 Subject: [PATCH 6/9] Fix regex octal escapes in character classes In Perl, \4 and \17 inside [] are octal, but Java treats \N as a backreference. Prepend 0 to make them unambiguous Java octal escapes. Handle both 1-digit and 2-digit octal sequences correctly. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../runtime/regex/RegexPreprocessorHelper.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java b/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java index 761e905ff..5bdc9da6d 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java +++ b/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java @@ -626,9 +626,14 @@ static int handleRegexCharacterClassEscape(int offset, String s, StringBuilder s sb.append(Character.toChars(c2)); lastChar = octalValue; } else { - // Short octal or single digit, pass through - sb.append(Character.toChars(c2)); - lastChar = c2; + // Short octal (1-2 digits) — prepend 0 for Java + // In Perl, \1-\7 inside [] are octal; in Java, \N is a backreference + sb.append('0'); + for (int j = 0; j < octalLength; j++) { + sb.append(Character.toChars(s.codePointAt(offset + j))); + } + offset += octalLength - 1; + lastChar = octalValue; } } else if (c2 == '8' || c2 == '9') { // \8 and \9 are not valid octals - treat as literal digits From bc756d613075abf53552410c08f71b4abf90fdb8 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sat, 28 Feb 2026 16:32:45 +0100 Subject: [PATCH 7/9] Fix foreach with pre-declared lexical variable in bytecode interpreter When using `foreach $var (...)` where $var was declared earlier with `my`, the bytecode compiler allocated a fresh register instead of reusing the existing one. FOREACH_NEXT_OR_EXIT wrote to the new register while the loop body read from the original, causing $var to appear empty. Now checks for pre-existing lexical variables and reuses their register. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../backend/bytecode/BytecodeCompiler.java | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index b8c8fe0c6..3d752fe18 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -4085,8 +4085,18 @@ public void visit(For1Node node) { emitReg(listReg); // Step 3: Allocate loop variable register BEFORE entering scope - // This ensures both iterReg and varReg are protected from recycling - int varReg = allocateRegister(); + // For pre-existing lexical variables (e.g., `my $k; foreach $k (...)`), + // reuse the existing register so FOREACH_NEXT_OR_EXIT writes to the same + // slot the loop body reads from. + int varReg = -1; + if (globalLoopVarName == null && node.variable instanceof OperatorNode varOp + && varOp.operator.equals("$") && varOp.operand instanceof IdentifierNode idNode) { + String varName = "$" + idNode.name; + varReg = getVariableRegister(varName); + } + if (varReg == -1) { + varReg = allocateRegister(); + } // Step 3b: For global loop variable: emit LOCAL_SCALAR_SAVE_LEVEL. // This atomically saves getLocalLevel() into levelReg (pre-push), then calls makeLocal. @@ -4106,9 +4116,9 @@ public void visit(For1Node node) { // Step 5: If we have a named lexical loop variable, add it to the scope now if (node.variable != null && node.variable instanceof OperatorNode) { - OperatorNode varOp = (OperatorNode) node.variable; - if (varOp.operator.equals("my") && varOp.operand instanceof OperatorNode) { - OperatorNode sigilOp = (OperatorNode) varOp.operand; + OperatorNode varOp2 = (OperatorNode) node.variable; + if (varOp2.operator.equals("my") && varOp2.operand instanceof OperatorNode) { + OperatorNode sigilOp = (OperatorNode) varOp2.operand; if (sigilOp.operator.equals("$") && sigilOp.operand instanceof IdentifierNode) { String varName = "$" + ((IdentifierNode) sigilOp.operand).name; variableScopes.peek().put(varName, varReg); From f8624705ed59949f630e9b537902c9d57dca1534 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sat, 28 Feb 2026 19:19:09 +0100 Subject: [PATCH 8/9] Fix eval STRING corrupting outer scope my variables during recursive calls Clean up BEGIN aliases for captured variables in evalStringHelper finally block after compilation completes. These aliases (inserted into GlobalVariable so BEGIN blocks can access outer lexicals during parsing) were persisting beyond their useful lifetime, causing retrieveBeginScalar to return stale shared objects instead of fresh variables on recursive function calls. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../runtime/runtimetypes/RuntimeCode.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java index 43f16e70b..83623ab9e 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java @@ -413,6 +413,7 @@ public static Class evalStringHelper(RuntimeScalar code, String evalTag, Obje // We create: globalArrays["BEGIN_PKG_x::@arr"] = (the runtime @arr object) // Then when "say @arr" is parsed in the BEGIN, it resolves to BEGIN_PKG_x::@arr // which is aliased to the runtime array with values (a, b). + List evalAliasKeys = new ArrayList<>(); Map capturedVars = capturedSymbolTable.getAllVisibleVariables(); for (SymbolTable.SymbolEntry entry : capturedVars.values()) { if (!entry.name().equals("@_") && !entry.decl().isEmpty() && !entry.name().startsWith("&")) { @@ -442,6 +443,7 @@ public static Class evalStringHelper(RuntimeScalar code, String evalTag, Obje } else if (runtimeValue instanceof RuntimeScalar) { GlobalVariable.globalVariables.put(fullName, (RuntimeScalar) runtimeValue); } + evalAliasKeys.add(entry.name().substring(0, 1) + fullName); } } } @@ -565,6 +567,21 @@ public static Class evalStringHelper(RuntimeScalar code, String evalTag, Obje setCurrentScope(capturedSymbolTable); + // Clean up BEGIN aliases for captured variables after compilation. + // These aliases were only needed during parsing (for BEGIN blocks to access + // outer lexicals). Leaving them in GlobalVariable would cause corruption + // if a recursive call re-enters the same function and its `my` declaration + // calls retrieveBeginScalar, finding the stale alias instead of creating + // a fresh variable. + for (String key : evalAliasKeys) { + String fullName = key.substring(1); + switch (key.charAt(0)) { + case '$' -> GlobalVariable.globalVariables.remove(fullName); + case '@' -> GlobalVariable.globalArrays.remove(fullName); + case '%' -> GlobalVariable.globalHashes.remove(fullName); + } + } + // Store source lines in symbol table if $^P flags are set // Do this on both success and failure paths when flags require retention // Use the original evalString and actualFileName; AST may be null on failure From 4f6b37d485f6c674e20eeda8b84c5e5e5a4f4947 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sat, 28 Feb 2026 19:57:13 +0100 Subject: [PATCH 9/9] Fix stat _/lstat _ to use cached stat buffer instead of re-statting statLastHandle/lstatLastHandle were re-statting the last file argument instead of returning the cached stat result. This caused stat _ after a failed stat to return the wrong errno (e.g. ENOENT instead of EBADF). Also ensure $! is correctly set in scalar context paths. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../perlonjava/runtime/operators/Stat.java | 40 +++++++++++++++++-- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/operators/Stat.java b/src/main/java/org/perlonjava/runtime/operators/Stat.java index 225c38b78..5feb1b9af 100644 --- a/src/main/java/org/perlonjava/runtime/operators/Stat.java +++ b/src/main/java/org/perlonjava/runtime/operators/Stat.java @@ -24,6 +24,8 @@ import static org.perlonjava.runtime.operators.FileTestOperator.lastBasicAttr; import static org.perlonjava.runtime.operators.FileTestOperator.lastFileHandle; import static org.perlonjava.runtime.operators.FileTestOperator.lastPosixAttr; +import static org.perlonjava.runtime.operators.FileTestOperator.lastStatOk; +import static org.perlonjava.runtime.operators.FileTestOperator.lastStatErrno; import static org.perlonjava.runtime.operators.FileTestOperator.updateLastStat; import static org.perlonjava.runtime.runtimetypes.GlobalVariable.getGlobalVariable; import static org.perlonjava.runtime.runtimetypes.RuntimeIO.resolvePath; @@ -67,19 +69,49 @@ private static int getPermissionsOctal(BasicFileAttributes basicAttr, PosixFileA } public static RuntimeList statLastHandle() { - return stat(lastFileHandle); + if (!lastStatOk) { + getGlobalVariable("main::!").set(9); // EBADF + return new RuntimeList(); + } + RuntimeList res = new RuntimeList(); + statInternal(res, lastBasicAttr, lastPosixAttr); + getGlobalVariable("main::!").set(0); + return res; } public static RuntimeBase statLastHandle(int ctx) { - return stat(lastFileHandle, ctx); + if (ctx == RuntimeContextType.SCALAR) { + if (!lastStatOk) { + getGlobalVariable("main::!").set(9); // EBADF + return new RuntimeScalar(""); + } + getGlobalVariable("main::!").set(0); + return scalarTrue; + } + return statLastHandle(); } public static RuntimeList lstatLastHandle() { - return lstat(lastFileHandle); + if (!lastStatOk) { + getGlobalVariable("main::!").set(9); // EBADF + return new RuntimeList(); + } + RuntimeList res = new RuntimeList(); + statInternal(res, lastBasicAttr, lastPosixAttr); + getGlobalVariable("main::!").set(0); + return res; } public static RuntimeBase lstatLastHandle(int ctx) { - return lstat(lastFileHandle, ctx); + if (ctx == RuntimeContextType.SCALAR) { + if (!lastStatOk) { + getGlobalVariable("main::!").set(9); // EBADF + return new RuntimeScalar(""); + } + getGlobalVariable("main::!").set(0); + return scalarTrue; + } + return lstatLastHandle(); } /**