From 12773917a3f61cc4d1e313dbf70fdabf486e509f Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sun, 1 Mar 2026 19:50:07 +0100 Subject: [PATCH 1/9] Fix ExifTool test crashes: substr warning, named sub return, bitwise cast - substr outside of string: emit warning + return undef instead of throwing, both read path (Operator.substr) and write path (RuntimeSubstrLvalue.set). Matches Perl 5 behavior. - Named sub definitions now return NumberNode("1") instead of empty ListNode, fixing "did not return a true value" for modules ending with a sub definition (e.g. Protobuf.pm). - Interpreter bitwise operators use .scalar() instead of direct RuntimeScalar casts, fixing ClassCastException when register holds RuntimeList (e.g. Pentax.pm via interpreter backend). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../bytecode/OpcodeHandlerExtended.java | 28 +++++++++---------- .../frontend/parser/SubroutineParser.java | 11 +++----- .../runtime/operators/Operator.java | 12 ++++++++ .../runtimetypes/RuntimeSubstrLvalue.java | 6 +++- src/test/resources/unit/lvalue_substr.t | 27 ++++++++++++++---- 5 files changed, 56 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java b/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java index fe5995c2c..561553d88 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java +++ b/src/main/java/org/perlonjava/backend/bytecode/OpcodeHandlerExtended.java @@ -397,8 +397,8 @@ public static int executeBitwiseAndBinary(int[] bytecode, int pc, RuntimeBase[] int rs1 = bytecode[pc++]; int rs2 = bytecode[pc++]; registers[rd] = BitwiseOperators.bitwiseAnd( - (RuntimeScalar) registers[rs1], - (RuntimeScalar) registers[rs2] + registers[rs1].scalar(), + registers[rs2].scalar() ); return pc; } @@ -412,8 +412,8 @@ public static int executeBitwiseOrBinary(int[] bytecode, int pc, RuntimeBase[] r int rs1 = bytecode[pc++]; int rs2 = bytecode[pc++]; registers[rd] = BitwiseOperators.bitwiseOr( - (RuntimeScalar) registers[rs1], - (RuntimeScalar) registers[rs2] + registers[rs1].scalar(), + registers[rs2].scalar() ); return pc; } @@ -427,8 +427,8 @@ public static int executeBitwiseXorBinary(int[] bytecode, int pc, RuntimeBase[] int rs1 = bytecode[pc++]; int rs2 = bytecode[pc++]; registers[rd] = BitwiseOperators.bitwiseXor( - (RuntimeScalar) registers[rs1], - (RuntimeScalar) registers[rs2] + registers[rs1].scalar(), + registers[rs2].scalar() ); return pc; } @@ -442,8 +442,8 @@ public static int executeStringBitwiseAnd(int[] bytecode, int pc, RuntimeBase[] int rs1 = bytecode[pc++]; int rs2 = bytecode[pc++]; registers[rd] = BitwiseOperators.bitwiseAndDot( - (RuntimeScalar) registers[rs1], - (RuntimeScalar) registers[rs2] + registers[rs1].scalar(), + registers[rs2].scalar() ); return pc; } @@ -457,8 +457,8 @@ public static int executeStringBitwiseOr(int[] bytecode, int pc, RuntimeBase[] r int rs1 = bytecode[pc++]; int rs2 = bytecode[pc++]; registers[rd] = BitwiseOperators.bitwiseOrDot( - (RuntimeScalar) registers[rs1], - (RuntimeScalar) registers[rs2] + registers[rs1].scalar(), + registers[rs2].scalar() ); return pc; } @@ -472,8 +472,8 @@ public static int executeStringBitwiseXor(int[] bytecode, int pc, RuntimeBase[] int rs1 = bytecode[pc++]; int rs2 = bytecode[pc++]; registers[rd] = BitwiseOperators.bitwiseXorDot( - (RuntimeScalar) registers[rs1], - (RuntimeScalar) registers[rs2] + registers[rs1].scalar(), + registers[rs2].scalar() ); return pc; } @@ -485,7 +485,7 @@ public static int executeStringBitwiseXor(int[] bytecode, int pc, RuntimeBase[] public static int executeBitwiseNotBinary(int[] bytecode, int pc, RuntimeBase[] registers) { int rd = bytecode[pc++]; int rs = bytecode[pc++]; - registers[rd] = BitwiseOperators.bitwiseNotBinary((RuntimeScalar) registers[rs]); + registers[rd] = BitwiseOperators.bitwiseNotBinary(registers[rs].scalar()); return pc; } @@ -496,7 +496,7 @@ public static int executeBitwiseNotBinary(int[] bytecode, int pc, RuntimeBase[] public static int executeBitwiseNotString(int[] bytecode, int pc, RuntimeBase[] registers) { int rd = bytecode[pc++]; int rs = bytecode[pc++]; - registers[rd] = BitwiseOperators.bitwiseNotDot((RuntimeScalar) registers[rs]); + registers[rd] = BitwiseOperators.bitwiseNotDot(registers[rs].scalar()); return pc; } diff --git a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java index 9cba34c7d..36edb17d9 100644 --- a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java @@ -578,11 +578,11 @@ static String consumeAttributes(Parser parser, List attributes) { return prototype; } - public static ListNode handleNamedSub(Parser parser, String subName, String prototype, List attributes, BlockNode block, String declaration) { + public static Node handleNamedSub(Parser parser, String subName, String prototype, List attributes, BlockNode block, String declaration) { return handleNamedSubWithFilter(parser, subName, prototype, attributes, block, false, declaration); } - public static ListNode handleNamedSubWithFilter(Parser parser, String subName, String prototype, List attributes, BlockNode block, boolean filterLexicalMethods, String declaration) { + public static Node handleNamedSubWithFilter(Parser parser, String subName, String prototype, List attributes, BlockNode block, boolean filterLexicalMethods, String declaration) { // Check if there's a lexical forward declaration (our/my/state sub name;) that this definition should fulfill String lexicalKey = "&" + subName; SymbolTable.SymbolEntry lexicalEntry = parser.ctx.symbolTable.getSymbolEntry(lexicalKey); @@ -642,8 +642,7 @@ public static ListNode handleNamedSubWithFilter(Parser parser, String subName, S // Execute the BEGIN block immediately during parsing SpecialBlockParser.runSpecialBlock(parser, "BEGIN", beginBlock); - // Return empty list since the assignment already executed - return new ListNode(parser.tokenIndex); + return new NumberNode("1", parser.tokenIndex); } } } @@ -863,9 +862,7 @@ public static ListNode handleNamedSubWithFilter(Parser parser, String subName, S RuntimeCode placeholderForSupplier = (RuntimeCode) codeRef.value; placeholderForSupplier.compilerSupplier = subroutineCreationTaskSupplier; - - // return an empty AST list - return new ListNode(parser.tokenIndex); + return new NumberNode("1", parser.tokenIndex); } private static SubroutineNode handleAnonSub(Parser parser, String subName, String prototype, List attributes, BlockNode block, int currentIndex) { diff --git a/src/main/java/org/perlonjava/runtime/operators/Operator.java b/src/main/java/org/perlonjava/runtime/operators/Operator.java index e0fa6a696..7d2a7db97 100644 --- a/src/main/java/org/perlonjava/runtime/operators/Operator.java +++ b/src/main/java/org/perlonjava/runtime/operators/Operator.java @@ -272,6 +272,18 @@ public static RuntimeScalar substr(int ctx, RuntimeBase... args) { offset = strLength + offset; } + if (offset < 0 || offset > strLength) { + WarnDie.warn(new RuntimeScalar("substr outside of string"), + RuntimeScalarCache.scalarEmptyString); + if (replacement != null) { + return new RuntimeScalar(); + } + var lvalue = new RuntimeSubstrLvalue((RuntimeScalar) args[0], "", originalOffset, originalLength); + lvalue.type = RuntimeScalarType.UNDEF; + lvalue.value = null; + return lvalue; + } + // Ensure offset is within bounds offset = Math.max(0, Math.min(offset, strLength)); diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeSubstrLvalue.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeSubstrLvalue.java index 5fe125bc3..7cab66f05 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeSubstrLvalue.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeSubstrLvalue.java @@ -1,5 +1,7 @@ package org.perlonjava.runtime.runtimetypes; +import org.perlonjava.runtime.operators.WarnDie; + /** * Represents a substring of a RuntimeScalar that can be used as an lvalue (left-hand value). * This class allows for modification of a specific portion of a string within a RuntimeScalar. @@ -64,7 +66,9 @@ public RuntimeScalar set(RuntimeScalar value) { actualOffset = 0; } if (actualOffset > strLength) { - throw new PerlCompilerException("substr outside of string"); + WarnDie.warn(new RuntimeScalar("substr outside of string"), + RuntimeScalarCache.scalarEmptyString); + return this; } // Calculate the actual length, handling negative lengths diff --git a/src/test/resources/unit/lvalue_substr.t b/src/test/resources/unit/lvalue_substr.t index d2b389b7c..63d8cc7d7 100644 --- a/src/test/resources/unit/lvalue_substr.t +++ b/src/test/resources/unit/lvalue_substr.t @@ -1,19 +1,20 @@ use strict; use warnings; -use Test::More tests => 12; +use Test::More tests => 16; # Test basic substring assignment my $str = "Hello, world!"; substr($str, 0, 5) = "Greetings"; is($str, "Greetings, world!", "Basic substring assignment"); -# Test assignment beyond string length +# Test assignment beyond string length (warns, doesn't modify string) $str = "Short"; -my $error = eval { +{ + my $warned = 0; + local $SIG{__WARN__} = sub { $warned++ if $_[0] =~ /substr outside of string/ }; substr($str, 10, 5) = "long"; - 1; -} ? "" : $@; -like($error, qr/substr outside of string/, "Assignment beyond string length throws correct error"); + ok($warned, "Assignment beyond string length warns"); +} # Test assignment with negative offset $str = "Hello, world!"; @@ -65,3 +66,17 @@ is($str, "Reve", "Empty string assignment"); $str = ""; substr($str, 0, 0) = "New"; is($str, "New", "Assignment to empty string"); + +# Test read with offset beyond string returns undef +$str = "hello"; +my $val = substr($str, 6, 1); +is($val, undef, "Read with offset beyond string returns undef"); + +# Test read with too-negative offset returns undef +$val = substr($str, -10, 1); +is($val, undef, "Read with too-negative offset returns undef"); + +# Test read at exact end returns empty string (not undef) +$val = substr($str, 5, 1); +ok(defined($val), "Read at exact string end returns defined value"); +is($val, "", "Read at exact string end returns empty string"); From 22c64a729d0e1302dd65f4dee9b86d9f30a199df Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sun, 1 Mar 2026 19:59:09 +0100 Subject: [PATCH 2/9] Fix root cause: MiscOpcodeHandler scalar context for RuntimeList Operators like unpack, localtime, gmtime, caller return RuntimeList even in scalar context. The JVM backend handles this with explicit .scalar() calls, but the interpreter backend stored RuntimeList directly into registers. Add scalar conversion in MiscOpcodeHandler before storing results, mirroring the JVM backend behavior. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../org/perlonjava/backend/bytecode/MiscOpcodeHandler.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/org/perlonjava/backend/bytecode/MiscOpcodeHandler.java b/src/main/java/org/perlonjava/backend/bytecode/MiscOpcodeHandler.java index f8b97de2f..9068e3f2c 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/MiscOpcodeHandler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/MiscOpcodeHandler.java @@ -8,6 +8,7 @@ import org.perlonjava.runtime.operators.Unpack; import org.perlonjava.runtime.runtimetypes.RuntimeBase; import org.perlonjava.runtime.runtimetypes.RuntimeCode; +import org.perlonjava.runtime.runtimetypes.RuntimeContextType; import org.perlonjava.runtime.runtimetypes.RuntimeList; import org.perlonjava.runtime.runtimetypes.RuntimeScalar; @@ -91,6 +92,9 @@ public static int execute(int opcode, int[] bytecode, int pc, RuntimeBase[] regi default -> throw new IllegalStateException("Unknown opcode in MiscOpcodeHandler: " + opcode); }; + if (ctx == RuntimeContextType.SCALAR && result instanceof RuntimeList) { + result = ((RuntimeList) result).scalar(); + } registers[rd] = result; return pc; } From 735b240faf3a758ef8ebfada8efe7ede1768f87f Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sun, 1 Mar 2026 20:24:11 +0100 Subject: [PATCH 3/9] Fix AUTOLOAD not triggering for forward-declared subs When a forward-declared sub (e.g. sub foo;) had no implementation, calling it would throw Undefined subroutine without trying AUTOLOAD. This affected both instance apply() methods in RuntimeCode. The fix adds explicit methodHandle null checks after compilerSupplier runs, with AUTOLOAD fallback logic. Also fixes subroutine name resolution to prefer the code objects own packageName::subName over the caller-provided name (which could be tailcall from the JVM trampoline). Fixes 19 ExifTool Lang.t test failures. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../runtime/runtimetypes/RuntimeCode.java | 72 ++++++++++++++----- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java index 7f785d5dc..44aa46cab 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java @@ -1568,12 +1568,11 @@ public static RuntimeList apply(RuntimeScalar runtimeScalar, String subroutineNa } // Does AUTOLOAD exist? - String fullSubName = subroutineName; - if (fullSubName.isEmpty() && code.packageName != null && code.subName != null) { - fullSubName = code.packageName + "::" + code.subName; - } + String fullSubName = (code.packageName != null && code.subName != null) + ? code.packageName + "::" + code.subName + : subroutineName; - if (!fullSubName.isEmpty()) { + if (!fullSubName.isEmpty() && fullSubName.contains("::")) { // If this is an imported forward declaration, check AUTOLOAD in the source package FIRST if (code.sourcePackage != null && !code.sourcePackage.isEmpty()) { String sourceAutoloadString = code.sourcePackage + "::AUTOLOAD"; @@ -1594,6 +1593,7 @@ public static RuntimeList apply(RuntimeScalar runtimeScalar, String subroutineNa } throw new PerlCompilerException("Undefined subroutine &" + fullSubName + " called at "); } + throw new PerlCompilerException("Undefined subroutine &" + fullSubName + " called at "); } RuntimeScalar overloadedCode = handleCodeOverload(runtimeScalar); @@ -1729,6 +1729,32 @@ public RuntimeList apply(RuntimeArray a, int callContext) { this.compilerSupplier.get(); } + if (this.methodHandle == null) { + String fullSubName = ""; + if (this.packageName != null && this.subName != null) { + fullSubName = this.packageName + "::" + this.subName; + } + if (!fullSubName.isEmpty()) { + if (this.sourcePackage != null && !this.sourcePackage.isEmpty()) { + String sourceAutoloadString = this.sourcePackage + "::AUTOLOAD"; + RuntimeScalar sourceAutoload = GlobalVariable.getGlobalCodeRef(sourceAutoloadString); + if (sourceAutoload.getDefinedBoolean()) { + String sourceSubroutineName = this.sourcePackage + "::" + this.subName; + getGlobalVariable(sourceAutoloadString).set(sourceSubroutineName); + return apply(sourceAutoload, a, callContext); + } + } + String autoloadString = fullSubName.substring(0, fullSubName.lastIndexOf("::") + 2) + "AUTOLOAD"; + RuntimeScalar autoload = GlobalVariable.getGlobalCodeRef(autoloadString); + if (autoload.getDefinedBoolean()) { + getGlobalVariable(autoloadString).set(fullSubName); + return apply(autoload, a, callContext); + } + throw new PerlCompilerException("Undefined subroutine &" + fullSubName + " called at "); + } + throw new PerlCompilerException("Undefined subroutine called at "); + } + RuntimeList result; if (isStatic) { result = (RuntimeList) this.methodHandle.invoke(a, callContext); @@ -1736,15 +1762,6 @@ public RuntimeList apply(RuntimeArray a, int callContext) { result = (RuntimeList) this.methodHandle.invoke(this.codeObject, a, callContext); } return result; - } catch (NullPointerException e) { - - if (this.methodHandle == null) { - throw new PerlCompilerException("Subroutine exists but has null method handle (possible compilation or registration error) at "); - } else if (this.codeObject == null && !isStatic) { - throw new PerlCompilerException("Subroutine exists but has null code object at "); - } else { - throw new PerlCompilerException("Null pointer exception in subroutine call: " + e.getMessage() + " at "); - } } catch (InvocationTargetException e) { Throwable targetException = e.getTargetException(); if (!(targetException instanceof RuntimeException)) { @@ -1765,6 +1782,31 @@ public RuntimeList apply(String subroutineName, RuntimeArray a, int callContext) this.compilerSupplier.get(); } + if (this.methodHandle == null) { + String fullSubName = (this.packageName != null && this.subName != null) + ? this.packageName + "::" + this.subName + : subroutineName; + if (fullSubName != null && !fullSubName.isEmpty() && fullSubName.contains("::")) { + if (this.sourcePackage != null && !this.sourcePackage.isEmpty()) { + String sourceAutoloadString = this.sourcePackage + "::AUTOLOAD"; + RuntimeScalar sourceAutoload = GlobalVariable.getGlobalCodeRef(sourceAutoloadString); + if (sourceAutoload.getDefinedBoolean()) { + String sourceSubroutineName = this.sourcePackage + "::" + this.subName; + getGlobalVariable(sourceAutoloadString).set(sourceSubroutineName); + return apply(sourceAutoload, a, callContext); + } + } + String autoloadString = fullSubName.substring(0, fullSubName.lastIndexOf("::") + 2) + "AUTOLOAD"; + RuntimeScalar autoload = GlobalVariable.getGlobalCodeRef(autoloadString); + if (autoload.getDefinedBoolean()) { + getGlobalVariable(autoloadString).set(fullSubName); + return apply(autoload, a, callContext); + } + throw new PerlCompilerException("Undefined subroutine &" + fullSubName + " called at "); + } + throw new PerlCompilerException("Undefined subroutine &" + (fullSubName != null ? fullSubName : "") + " called at "); + } + RuntimeList result; if (isStatic) { result = (RuntimeList) this.methodHandle.invoke(a, callContext); @@ -1772,8 +1814,6 @@ public RuntimeList apply(String subroutineName, RuntimeArray a, int callContext) result = (RuntimeList) this.methodHandle.invoke(this.codeObject, a, callContext); } return result; - } catch (NullPointerException e) { - throw new PerlCompilerException("Undefined subroutine &" + subroutineName + " called at "); } catch (InvocationTargetException e) { Throwable targetException = e.getTargetException(); if (!(targetException instanceof RuntimeException)) { From b5fb299d15737c6d655ba201264e94a07b5b8be8 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sun, 1 Mar 2026 20:44:30 +0100 Subject: [PATCH 4/9] Fix Encode::is_utf8() inverted result and sysread for scalar refs Encode::is_utf8() was returning true for BYTE_STRING and false for STRING - the exact opposite of Perl 5 semantics. This caused ExifTool Sanitize() and WriteInfo() to double-encode UTF-8 data. Also fixed sysread() on scalar-backed IO handles (open $var) which was returning undef instead of delegating to ScalarBackedIO.sysread(). Fixes IPTC, XMP, and PNG test failures related to UTF-8 encoding. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../runtime/operators/IOOperator.java | 27 ++++++++++++++++--- .../perlonjava/runtime/perlmodule/Encode.java | 2 +- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/operators/IOOperator.java b/src/main/java/org/perlonjava/runtime/operators/IOOperator.java index d861975ea..93d44cac2 100644 --- a/src/main/java/org/perlonjava/runtime/operators/IOOperator.java +++ b/src/main/java/org/perlonjava/runtime/operators/IOOperator.java @@ -651,9 +651,30 @@ public static RuntimeScalar sysread(int ctx, RuntimeBase... args) { // Check for in-memory handles (ScalarBackedIO) IOHandle baseHandle = getBaseHandle(fh.ioHandle); - if (baseHandle instanceof ScalarBackedIO) { - getGlobalVariable("main::!").set("Invalid argument"); - return new RuntimeScalar(); // undef + if (baseHandle instanceof ScalarBackedIO scalarIO) { + RuntimeScalar result; + try { + result = scalarIO.sysread(length); + } catch (Exception e) { + getGlobalVariable("main::!").set("Bad file descriptor"); + return new RuntimeScalar(); + } + if (!result.getDefinedBoolean()) { + return new RuntimeScalar(0); + } + String readData = result.toString(); + String existing = target.toString(); + if (offset > 0) { + while (existing.length() < offset) existing += "\0"; + target.set(existing.substring(0, offset) + readData); + } else if (offset < 0) { + int effectiveOffset = existing.length() + offset; + if (effectiveOffset < 0) effectiveOffset = 0; + target.set(existing.substring(0, effectiveOffset) + readData); + } else { + target.set(readData); + } + return new RuntimeScalar(readData.length()); } // Try to perform the system read diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java b/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java index bb39ab3ab..ff1adcede 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java @@ -175,7 +175,7 @@ public static RuntimeList is_utf8(RuntimeArray args, int ctx) { throw new IllegalStateException("Bad number of arguments for is_utf8"); } - return RuntimeScalarCache.getScalarBoolean(args.get(0).type == BYTE_STRING).getList(); + return RuntimeScalarCache.getScalarBoolean(args.get(0).type != BYTE_STRING).getList(); // // In PerlOnJava, strings are always internally Unicode (Java strings) // // So we'll check if the string contains any non-ASCII characters From 3839c4c06b497016674a11e4b5c72b35ca230e86 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sun, 1 Mar 2026 21:27:33 +0100 Subject: [PATCH 5/9] Fix binary data handling: BYTE_STRING preservation in concat, read, and sysread - StringOperators: STRING+BYTE_STRING concat returns BYTE_STRING when no wide chars present, matching Perl 5 utf8 flag semantics - CustomFileChannel: read() and sysread() return byte[] (BYTE_STRING) instead of StringBuilder (STRING) to preserve binary data type - ScalarBackedIO: same byte[] return for read/sysread and write methods - Readline.read(): preserve BYTE_STRING type through offset handling Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../runtime/io/CustomFileChannel.java | 18 ++-- .../perlonjava/runtime/io/ScalarBackedIO.java | 17 ++- .../runtime/operators/Readline.java | 30 ++---- .../runtime/operators/StringOperators.java | 102 +++++++++--------- 4 files changed, 72 insertions(+), 95 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java b/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java index ecb1d0bd4..9aefc3030 100644 --- a/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java +++ b/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java @@ -156,12 +156,9 @@ public RuntimeScalar doRead(int maxBytes, Charset charset) { // ignore } - // Convert bytes to string where each char represents a byte - StringBuilder result = new StringBuilder(bytesRead); - for (int i = 0; i < bytesRead; i++) { - result.append((char) (buffer[i] & 0xFF)); - } - return new RuntimeScalar(result.toString()); + byte[] result = new byte[bytesRead]; + System.arraycopy(buffer, 0, result, 0, bytesRead); + return new RuntimeScalar(result); } catch (IOException e) { return handleIOException(e, "Read operation failed"); } @@ -357,14 +354,11 @@ public RuntimeScalar sysread(int length) { return new RuntimeScalar(""); } - // Convert bytes to string representation buffer.flip(); - StringBuilder result = new StringBuilder(bytesRead); - while (buffer.hasRemaining()) { - result.append((char) (buffer.get() & 0xFF)); - } + byte[] result = new byte[bytesRead]; + buffer.get(result); - return new RuntimeScalar(result.toString()); + return new RuntimeScalar(result); } catch (IOException e) { getGlobalVariable("main::!").set(e.getMessage()); return new RuntimeScalar(); // undef diff --git a/src/main/java/org/perlonjava/runtime/io/ScalarBackedIO.java b/src/main/java/org/perlonjava/runtime/io/ScalarBackedIO.java index 4437a98d0..337e24b0b 100644 --- a/src/main/java/org/perlonjava/runtime/io/ScalarBackedIO.java +++ b/src/main/java/org/perlonjava/runtime/io/ScalarBackedIO.java @@ -49,7 +49,7 @@ public RuntimeScalar doRead(int maxBytes, Charset charset) { isEOF = true; } - return new RuntimeScalar(decoded); + return new RuntimeScalar(buffer); } @Override @@ -83,8 +83,8 @@ public RuntimeScalar write(String string) { currentBytes.length - position - newBytes.length); } - // Update backing scalar - backingScalar.set(new String(resultBytes, StandardCharsets.ISO_8859_1)); + // Update backing scalar (preserve BYTE_STRING type for binary data) + backingScalar.set(new RuntimeScalar(resultBytes)); position += newBytes.length; return RuntimeScalarCache.scalarTrue; @@ -207,20 +207,15 @@ public RuntimeScalar sysread(int length) { int available = bytes.length - position; if (available <= 0) { - // EOF return new RuntimeScalar(""); } int toRead = Math.min(length, available); - - // Convert bytes to string representation - StringBuilder result = new StringBuilder(toRead); - for (int i = 0; i < toRead; i++) { - result.append((char) (bytes[position + i] & 0xFF)); - } + byte[] result = new byte[toRead]; + System.arraycopy(bytes, position, result, 0, toRead); position += toRead; - return new RuntimeScalar(result.toString()); + return new RuntimeScalar(result); } @Override diff --git a/src/main/java/org/perlonjava/runtime/operators/Readline.java b/src/main/java/org/perlonjava/runtime/operators/Readline.java index 8d5b7ba65..5ec8dcb72 100644 --- a/src/main/java/org/perlonjava/runtime/operators/Readline.java +++ b/src/main/java/org/perlonjava/runtime/operators/Readline.java @@ -2,6 +2,8 @@ import org.perlonjava.runtime.runtimetypes.*; +import java.nio.charset.StandardCharsets; + import static org.perlonjava.runtime.runtimetypes.GlobalVariable.getGlobalVariable; import static org.perlonjava.runtime.runtimetypes.RuntimeScalarCache.scalarFalse; import static org.perlonjava.runtime.runtimetypes.RuntimeScalarCache.scalarUndef; @@ -302,40 +304,31 @@ public static RuntimeScalar read(RuntimeList args) { return new RuntimeScalar(0); } - // Read data using the new API - read characters, not bytes - String readData = fh.ioHandle.read(lengthValue).toString(); + RuntimeScalar readResult = fh.ioHandle.read(lengthValue); + boolean isByteData = readResult.type == RuntimeScalarType.BYTE_STRING; + String readData = readResult.toString(); int charsRead = readData.length(); if (charsRead == 0) { - // EOF or error - handle based on offset if (offsetValue != 0) { - // Handle offset (both positive and negative) when reading 0 bytes StringBuilder scalarValue = new StringBuilder(scalar.toString()); - - // Convert negative offset to positive if (offsetValue < 0) { offsetValue = scalarValue.length() + offsetValue; if (offsetValue < 0) { offsetValue = 0; } } - - // Ensure buffer is large enough for offset while (scalarValue.length() < offsetValue) { scalarValue.append('\0'); } - - // Truncate to offset scalarValue.setLength(offsetValue); scalar.set(scalarValue.toString()); } else { - // No offset - just clear the scalar scalar.set(""); } return new RuntimeScalar(0); } - // Handle offset StringBuilder scalarValue = new StringBuilder(scalar.toString()); if (offsetValue < 0) { @@ -346,20 +339,17 @@ public static RuntimeScalar read(RuntimeList args) { } int newLength = offsetValue + charsRead; - - // Ensure the buffer is large enough for the offset while (scalarValue.length() < offsetValue) { scalarValue.append('\0'); } - - // Replace the data from offsetValue onwards with the new data scalarValue.replace(offsetValue, scalarValue.length(), readData); - - // Truncate to the correct final length scalarValue.setLength(newLength); - // Update the scalar with the new value - scalar.set(scalarValue.toString()); + if (isByteData) { + scalar.set(new RuntimeScalar(scalarValue.toString().getBytes(StandardCharsets.ISO_8859_1))); + } else { + scalar.set(scalarValue.toString()); + } // Return the number of characters read return new RuntimeScalar(charsRead); diff --git a/src/main/java/org/perlonjava/runtime/operators/StringOperators.java b/src/main/java/org/perlonjava/runtime/operators/StringOperators.java index 40cd65af4..957a1f6a7 100644 --- a/src/main/java/org/perlonjava/runtime/operators/StringOperators.java +++ b/src/main/java/org/perlonjava/runtime/operators/StringOperators.java @@ -278,40 +278,38 @@ public static RuntimeScalar stringConcat(RuntimeScalar runtimeScalar, RuntimeSca String aStr = runtimeScalar.toString(); String bStr = b.toString(); - boolean aIsString = runtimeScalar.type == RuntimeScalarType.STRING || runtimeScalar.type == RuntimeScalarType.BYTE_STRING; - boolean bIsString = b.type == RuntimeScalarType.STRING || b.type == RuntimeScalarType.BYTE_STRING; + boolean aIsByteOrEmpty = runtimeScalar.type == BYTE_STRING + || runtimeScalar.type == RuntimeScalarType.UNDEF + || (aStr.isEmpty() && runtimeScalar.type != RuntimeScalarType.STRING); + boolean bIsByteOrEmpty = b.type == BYTE_STRING + || b.type == RuntimeScalarType.UNDEF + || (bStr.isEmpty() && b.type != RuntimeScalarType.STRING); + + if ((runtimeScalar.type == BYTE_STRING || b.type == BYTE_STRING) && aIsByteOrEmpty && bIsByteOrEmpty) { + byte[] aBytes = aStr.getBytes(StandardCharsets.ISO_8859_1); + byte[] bBytes = bStr.getBytes(StandardCharsets.ISO_8859_1); + byte[] out = new byte[aBytes.length + bBytes.length]; + System.arraycopy(aBytes, 0, out, 0, aBytes.length); + System.arraycopy(bBytes, 0, out, aBytes.length, bBytes.length); + return new RuntimeScalar(out); + } - // Preserve Perl-like UTF-8 flag semantics only for string scalars. - // For other types, keep legacy behavior to avoid wide behavioral changes. - if (aIsString && bIsString) { - // If either operand is explicitly STRING type, return STRING - if (runtimeScalar.type == RuntimeScalarType.STRING || b.type == RuntimeScalarType.STRING) { - return new RuntimeScalar(aStr + bStr); - } + boolean aIsString = runtimeScalar.type == RuntimeScalarType.STRING || runtimeScalar.type == BYTE_STRING; + boolean bIsString = b.type == RuntimeScalarType.STRING || b.type == BYTE_STRING; - // Both are BYTE_STRING - check if they actually contain only bytes 0-255 - boolean hasUnicode = false; - for (int i = 0; i < aStr.length(); i++) { - if (aStr.charAt(i) > 255) { - hasUnicode = true; - break; - } + if (aIsString && bIsString) { + boolean hasWideChars = false; + for (int i = 0; !hasWideChars && i < aStr.length(); i++) { + if (aStr.charAt(i) > 255) hasWideChars = true; } - if (!hasUnicode) { - for (int i = 0; i < bStr.length(); i++) { - if (bStr.charAt(i) > 255) { - hasUnicode = true; - break; - } - } + for (int i = 0; !hasWideChars && i < bStr.length(); i++) { + if (bStr.charAt(i) > 255) hasWideChars = true; } - // If Unicode present, upgrade to STRING to preserve characters - if (hasUnicode) { + if (hasWideChars) { return new RuntimeScalar(aStr + bStr); } - // Pure byte strings - concatenate as bytes byte[] aBytes = aStr.getBytes(StandardCharsets.ISO_8859_1); byte[] bBytes = bStr.getBytes(StandardCharsets.ISO_8859_1); byte[] out = new byte[aBytes.length + bBytes.length]; @@ -331,38 +329,38 @@ public static RuntimeScalar stringConcatWarnUninitialized(RuntimeScalar runtimeS String aStr = runtimeScalar.toString(); String bStr = b.toString(); - boolean aIsString = runtimeScalar.type == RuntimeScalarType.STRING || runtimeScalar.type == RuntimeScalarType.BYTE_STRING; - boolean bIsString = b.type == RuntimeScalarType.STRING || b.type == RuntimeScalarType.BYTE_STRING; + boolean aIsByteOrEmpty = runtimeScalar.type == BYTE_STRING + || runtimeScalar.type == RuntimeScalarType.UNDEF + || (aStr.isEmpty() && runtimeScalar.type != RuntimeScalarType.STRING); + boolean bIsByteOrEmpty = b.type == BYTE_STRING + || b.type == RuntimeScalarType.UNDEF + || (bStr.isEmpty() && b.type != RuntimeScalarType.STRING); - if (aIsString && bIsString) { - // If either operand is explicitly STRING type, return STRING - if (runtimeScalar.type == RuntimeScalarType.STRING || b.type == RuntimeScalarType.STRING) { - return new RuntimeScalar(aStr + bStr); - } + if ((runtimeScalar.type == BYTE_STRING || b.type == BYTE_STRING) && aIsByteOrEmpty && bIsByteOrEmpty) { + byte[] aBytes = aStr.getBytes(StandardCharsets.ISO_8859_1); + byte[] bBytes = bStr.getBytes(StandardCharsets.ISO_8859_1); + byte[] out = new byte[aBytes.length + bBytes.length]; + System.arraycopy(aBytes, 0, out, 0, aBytes.length); + System.arraycopy(bBytes, 0, out, aBytes.length, bBytes.length); + return new RuntimeScalar(out); + } + + boolean aIsString = runtimeScalar.type == RuntimeScalarType.STRING || runtimeScalar.type == BYTE_STRING; + boolean bIsString = b.type == RuntimeScalarType.STRING || b.type == BYTE_STRING; - // Both are BYTE_STRING - check if they actually contain only bytes 0-255 - boolean hasUnicode = false; - for (int i = 0; i < aStr.length(); i++) { - if (aStr.charAt(i) > 255) { - hasUnicode = true; - break; - } + if (aIsString && bIsString) { + boolean hasWideChars = false; + for (int i = 0; !hasWideChars && i < aStr.length(); i++) { + if (aStr.charAt(i) > 255) hasWideChars = true; } - if (!hasUnicode) { - for (int i = 0; i < bStr.length(); i++) { - if (bStr.charAt(i) > 255) { - hasUnicode = true; - break; - } - } + for (int i = 0; !hasWideChars && i < bStr.length(); i++) { + if (bStr.charAt(i) > 255) hasWideChars = true; } - // If Unicode present, upgrade to STRING to preserve characters - if (hasUnicode) { + if (hasWideChars) { return new RuntimeScalar(aStr + bStr); } - // Pure byte strings - concatenate as bytes byte[] aBytes = aStr.getBytes(StandardCharsets.ISO_8859_1); byte[] bBytes = bStr.getBytes(StandardCharsets.ISO_8859_1); byte[] out = new byte[aBytes.length + bBytes.length]; @@ -558,9 +556,9 @@ private static RuntimeScalar joinInternal(RuntimeScalar runtimeScalar, RuntimeBa RuntimeScalarCache.scalarEmptyString); } - boolean isByteString = runtimeScalar.type == BYTE_STRING; - String delimiter = runtimeScalar.toString(); + + boolean isByteString = runtimeScalar.type == BYTE_STRING || delimiter.isEmpty(); // String interpolation uses empty delimiter - don't warn about undef in that case boolean isStringInterpolation = delimiter.isEmpty(); From 9de91a13aab8eeb2eae6f192ac135071993a852d Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sun, 1 Mar 2026 21:44:05 +0100 Subject: [PATCH 6/9] Rename MOVE opcode to ALIAS; fix my/our variable aliasing bug MOVE was doing reference copy (aliasing) for all register transfers. This caused my/our variable declarations to share RuntimeScalar objects with source variables, so in-place mutations (+=, ++, etc.) on one would corrupt the other. - Rename MOVE to ALIAS to clarify its reference-sharing semantics - Use MY_SCALAR (new RuntimeScalar + value copy) for my $x = expr - Use SET_SCALAR for our ($a,...) = (...) to preserve global binding - Use LOAD_UNDEF + SET_SCALAR for bare ident and list assignments - Implement MY_SCALAR handler in BytecodeInterpreter - Add MY_SCALAR to disassembler ALIAS remains correct for temp register shuffling (block results, ternary, ||/&&//) where the source is ephemeral. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../backend/bytecode/BytecodeCompiler.java | 14 +++++----- .../backend/bytecode/BytecodeInterpreter.java | 16 +++++++++-- .../backend/bytecode/CompileAssignment.java | 28 +++++++++---------- .../bytecode/CompileBinaryOperator.java | 12 ++++---- .../backend/bytecode/InterpretedCode.java | 9 ++++-- .../perlonjava/backend/bytecode/Opcodes.java | 14 +++++----- 6 files changed, 53 insertions(+), 40 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index e8974b8f2..85177bc6c 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -740,7 +740,7 @@ public void visit(BlockNode node) { // Save the last statement's result to the outer register BEFORE exiting scope if (outerResultReg >= 0 && lastResultReg >= 0) { - emit(Opcodes.MOVE); + emit(Opcodes.ALIAS); emitReg(outerResultReg); emitReg(lastResultReg); } @@ -782,7 +782,7 @@ public void visit(NumberNode node) { if (isInteger) { // Regular integer - use LOAD_INT to create mutable scalar - // Note: We don't use RuntimeScalarCache here because MOVE just copies references, + // Note: We don't use RuntimeScalarCache here because ALIAS just copies references, // and we need mutable scalars for variables (++, --, etc.) int intValue = Integer.parseInt(value); emit(Opcodes.LOAD_INT); @@ -3949,7 +3949,7 @@ private void visitEvalBlock(SubroutineNode node) { // Store result from block if (lastResultReg >= 0) { - emit(Opcodes.MOVE); + emit(Opcodes.ALIAS); emitReg(resultReg); emitReg(lastResultReg); } @@ -4188,7 +4188,7 @@ public void visit(For3Node node) { } // Save last statement result into outer register before exiting scope if (outerResultReg >= 0 && lastResultReg >= 0) { - emit(Opcodes.MOVE); + emit(Opcodes.ALIAS); emitReg(outerResultReg); emitReg(lastResultReg); } @@ -4374,7 +4374,7 @@ public void visit(IfNode node) { // Both branches should produce results in the same register // If they differ, move else result to then result register if (thenResultReg >= 0 && elseResultReg >= 0 && thenResultReg != elseResultReg) { - emit(Opcodes.MOVE); + emit(Opcodes.ALIAS); emitReg(thenResultReg); emitReg(elseResultReg); } @@ -4426,7 +4426,7 @@ public void visit(TernaryOperatorNode node) { int trueReg = lastResultReg; // Move true result to rd - emit(Opcodes.MOVE); + emit(Opcodes.ALIAS); emitReg(rd); emitReg(trueReg); @@ -4444,7 +4444,7 @@ public void visit(TernaryOperatorNode node) { int falseReg = lastResultReg; // Move false result to rd - emit(Opcodes.MOVE); + emit(Opcodes.ALIAS); emitReg(rd); emitReg(falseReg); diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java index be137dbe2..d45a8f3e4 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java @@ -197,8 +197,8 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c // REGISTER OPERATIONS // ================================================================= - case Opcodes.MOVE: { - // Register copy: rd = rs + case Opcodes.ALIAS: { + // Register alias: rd = rs (shares reference, does NOT copy value) // Must unwrap RuntimeScalarReadOnly to prevent read-only values in variable registers int dest = bytecode[pc++]; int src = bytecode[pc++]; @@ -273,6 +273,16 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; } + case Opcodes.MY_SCALAR: { + // Lexical scalar assignment: rd = new RuntimeScalar(); rd.set(rs) + int rd = bytecode[pc++]; + int rs = bytecode[pc++]; + RuntimeScalar newScalar = new RuntimeScalar(); + registers[rs].addToScalar(newScalar); + registers[rd] = newScalar; + break; + } + // ================================================================= // VARIABLE ACCESS - GLOBAL // ================================================================= @@ -1215,7 +1225,7 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; // ================================================================= - // SUPERINSTRUCTIONS - Eliminate MOVE overhead + // SUPERINSTRUCTIONS - Eliminate ALIAS overhead // ================================================================= case Opcodes.INC_REG: { diff --git a/src/main/java/org/perlonjava/backend/bytecode/CompileAssignment.java b/src/main/java/org/perlonjava/backend/bytecode/CompileAssignment.java index 55afd7878..7dc1f51b2 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/CompileAssignment.java +++ b/src/main/java/org/perlonjava/backend/bytecode/CompileAssignment.java @@ -88,8 +88,7 @@ public static void compileAssignmentOperator(BytecodeCompiler bytecodeCompiler, node.right.accept(bytecodeCompiler); int valueReg = bytecodeCompiler.lastResultReg; - // Move to variable register - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.MY_SCALAR); bytecodeCompiler.emitReg(reg); bytecodeCompiler.emitReg(valueReg); @@ -228,8 +227,7 @@ public static void compileAssignmentOperator(BytecodeCompiler bytecodeCompiler, node.right.accept(bytecodeCompiler); int valueReg = bytecodeCompiler.lastResultReg; - // Move to variable register - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.MY_SCALAR); bytecodeCompiler.emitReg(reg); bytecodeCompiler.emitReg(valueReg); @@ -329,13 +327,11 @@ public static void compileAssignmentOperator(BytecodeCompiler bytecodeCompiler, // Assign to variable if (sigil.equals("$")) { if (sigilOp.id != 0) { - // Captured variable - use SET_SCALAR to preserve aliasing bytecodeCompiler.emit(Opcodes.SET_SCALAR); bytecodeCompiler.emitReg(varReg); bytecodeCompiler.emitReg(elemReg); } else { - // Regular variable - use MOVE - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.MY_SCALAR); bytecodeCompiler.emitReg(varReg); bytecodeCompiler.emitReg(elemReg); } @@ -612,7 +608,7 @@ public static void compileAssignmentOperator(BytecodeCompiler bytecodeCompiler, } // Regular assignment: $x = value - // OPTIMIZATION: Detect $x = $x + $y and emit ADD_ASSIGN instead of ADD_SCALAR + MOVE + // OPTIMIZATION: Detect $x = $x + $y and emit ADD_ASSIGN instead of ADD_SCALAR + ALIAS if (node.left instanceof OperatorNode && node.right instanceof BinaryOperatorNode) { OperatorNode leftOp = (OperatorNode) node.left; BinaryOperatorNode rightBin = (BinaryOperatorNode) node.right; @@ -639,7 +635,7 @@ public static void compileAssignmentOperator(BytecodeCompiler bytecodeCompiler, rightBin.right.accept(bytecodeCompiler); int rhsReg = bytecodeCompiler.lastResultReg; - // Emit ADD_ASSIGN instead of ADD_SCALAR + MOVE + // Emit ADD_ASSIGN instead of ADD_SCALAR + ALIAS bytecodeCompiler.emit(Opcodes.ADD_ASSIGN); bytecodeCompiler.emitReg(targetReg); bytecodeCompiler.emitReg(rhsReg); @@ -740,7 +736,7 @@ public static void compileAssignmentOperator(BytecodeCompiler bytecodeCompiler, // LOAD_UNDEF allocates a new mutable RuntimeScalar in the target register; // SET_SCALAR copies the source value into it. // This avoids two bugs: - // - MOVE aliases constants from the pool, corrupting them on later mutation + // - ALIAS shares constants from the pool, corrupting them on later mutation // - SET_SCALAR alone modifies the existing object in-place, which breaks // 'local' variable restoration when the register was shared bytecodeCompiler.emit(Opcodes.LOAD_UNDEF); @@ -906,9 +902,8 @@ public static void compileAssignmentOperator(BytecodeCompiler bytecodeCompiler, bytecodeCompiler.emitReg(rhsListReg); bytecodeCompiler.emitReg(indexReg); - // Assign to variable if (sigil.equals("$")) { - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.SET_SCALAR); bytecodeCompiler.emitReg(varReg); bytecodeCompiler.emitReg(elemReg); } else if (sigil.equals("@")) { @@ -1040,9 +1035,10 @@ public static void compileAssignmentOperator(BytecodeCompiler bytecodeCompiler, String varName = ((IdentifierNode) node.left).name; if (bytecodeCompiler.hasVariable(varName)) { - // Lexical variable - copy to its register int targetReg = bytecodeCompiler.getVariableRegister(varName); - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.LOAD_UNDEF); + bytecodeCompiler.emitReg(targetReg); + bytecodeCompiler.emit(Opcodes.SET_SCALAR); bytecodeCompiler.emitReg(targetReg); bytecodeCompiler.emitReg(valueReg); bytecodeCompiler.lastResultReg = targetReg; @@ -1545,7 +1541,9 @@ public static void compileAssignmentOperator(BytecodeCompiler bytecodeCompiler, bytecodeCompiler.emitReg(targetReg); bytecodeCompiler.emitReg(elementReg); } else { - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.LOAD_UNDEF); + bytecodeCompiler.emitReg(targetReg); + bytecodeCompiler.emit(Opcodes.SET_SCALAR); bytecodeCompiler.emitReg(targetReg); bytecodeCompiler.emitReg(elementReg); } diff --git a/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java b/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java index 98010eb1e..ab5228001 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java +++ b/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java @@ -442,7 +442,7 @@ else if (node.right instanceof BinaryOperatorNode) { // Allocate result register and move left value to it int rd = bytecodeCompiler.allocateRegister(); - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.ALIAS); bytecodeCompiler.emitReg(rd); bytecodeCompiler.emitReg(rs1); @@ -459,7 +459,7 @@ else if (node.right instanceof BinaryOperatorNode) { int rs2 = bytecodeCompiler.lastResultReg; // Move right result to rd (overwriting left value) - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.ALIAS); bytecodeCompiler.emitReg(rd); bytecodeCompiler.emitReg(rs2); @@ -484,7 +484,7 @@ else if (node.right instanceof BinaryOperatorNode) { // Allocate result register and move left value to it int rd = bytecodeCompiler.allocateRegister(); - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.ALIAS); bytecodeCompiler.emitReg(rd); bytecodeCompiler.emitReg(rs1); @@ -501,7 +501,7 @@ else if (node.right instanceof BinaryOperatorNode) { int rs2 = bytecodeCompiler.lastResultReg; // Move right result to rd (overwriting left value) - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.ALIAS); bytecodeCompiler.emitReg(rd); bytecodeCompiler.emitReg(rs2); @@ -526,7 +526,7 @@ else if (node.right instanceof BinaryOperatorNode) { // Allocate result register and move left value to it int rd = bytecodeCompiler.allocateRegister(); - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.ALIAS); bytecodeCompiler.emitReg(rd); bytecodeCompiler.emitReg(rs1); @@ -549,7 +549,7 @@ else if (node.right instanceof BinaryOperatorNode) { int rs2 = bytecodeCompiler.lastResultReg; // Move right result to rd (overwriting left value) - bytecodeCompiler.emit(Opcodes.MOVE); + bytecodeCompiler.emit(Opcodes.ALIAS); bytecodeCompiler.emitReg(rd); bytecodeCompiler.emitReg(rs2); diff --git a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java index 354ffa781..ac2f5af32 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java +++ b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java @@ -288,10 +288,10 @@ public String disassemble() { pc += 1; sb.append("GOTO_IF_TRUE r").append(condReg).append(" -> ").append(target).append("\n"); break; - case Opcodes.MOVE: + case Opcodes.ALIAS: int dest = bytecode[pc++]; int src = bytecode[pc++]; - sb.append("MOVE r").append(dest).append(" = r").append(src).append("\n"); + sb.append("ALIAS r").append(dest).append(" = r").append(src).append("\n"); break; case Opcodes.LOAD_CONST: int rd = bytecode[pc++]; @@ -365,6 +365,11 @@ public String disassemble() { rd = bytecode[pc++]; sb.append("LOAD_UNDEF r").append(rd).append("\n"); break; + case Opcodes.MY_SCALAR: + rd = bytecode[pc++]; + src = bytecode[pc++]; + sb.append("MY_SCALAR r").append(rd).append(" = r").append(src).append("\n"); + break; case Opcodes.LOAD_GLOBAL_SCALAR: rd = bytecode[pc++]; int nameIdx = bytecode[pc++]; diff --git a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java index 02a22b473..b110638c1 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java +++ b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java @@ -50,8 +50,8 @@ public class Opcodes { // REGISTER OPERATIONS (5-9) // ================================================================= - /** Register copy: rd = rs */ - public static final short MOVE = 5; + /** Register alias: rd = rs (shares reference, does NOT copy value) */ + public static final short ALIAS = 5; /** Load from constant pool: rd = constants[index] */ public static final short LOAD_CONST = 6; @@ -314,19 +314,19 @@ public class Opcodes { // ================================================================= // SUPERINSTRUCTIONS (75-90) - Combine common opcode sequences - // These eliminate MOVE overhead by doing operation + store in one step + // These eliminate ALIAS overhead by doing operation + store in one step // ================================================================= - /** Increment register in-place: rd = rd + 1 (combines ADD_SCALAR_INT + MOVE) */ + /** Increment register in-place: rd = rd + 1 (combines ADD_SCALAR_INT + ALIAS) */ public static final short INC_REG = 75; - /** Decrement register in-place: rd = rd - 1 (combines SUB_SCALAR_INT + MOVE) */ + /** Decrement register in-place: rd = rd - 1 (combines SUB_SCALAR_INT + ALIAS) */ public static final short DEC_REG = 76; - /** Add and assign: rd = rd + rs (combines ADD_SCALAR + MOVE when dest == src1) */ + /** Add and assign: rd = rd + rs (combines ADD_SCALAR + ALIAS when dest == src1) */ public static final short ADD_ASSIGN = 77; - /** Add immediate and assign: rd = rd + imm (combines ADD_SCALAR_INT + MOVE when dest == src) */ + /** Add immediate and assign: rd = rd + imm (combines ADD_SCALAR_INT + ALIAS when dest == src) */ public static final short ADD_ASSIGN_INT = 78; /** Pre-increment: ++rd (calls RuntimeScalar.preAutoIncrement) */ From f0241015e34dbc454757dbd96df3fb33c3c00036 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sun, 1 Mar 2026 22:24:01 +0100 Subject: [PATCH 7/9] Fix concat type propagation: STRING is sticky, guard BYTE_STRING path - If either operand is STRING type, always return STRING (prevents BYTE_STRING from downgrading wide-char strings) - Add wide-char safety check in BYTE_STRING concat path to prevent ISO-8859-1 truncation of chars > 255 - Fixes: undef .= "\x{1ff}" producing 63 instead of 511 - Eliminates regressions in chomp.t (-50), sprintf.t (-9), utf.t (-48) Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../runtime/operators/StringOperators.java | 132 ++++++++---------- 1 file changed, 56 insertions(+), 76 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/operators/StringOperators.java b/src/main/java/org/perlonjava/runtime/operators/StringOperators.java index 957a1f6a7..afea7a4ad 100644 --- a/src/main/java/org/perlonjava/runtime/operators/StringOperators.java +++ b/src/main/java/org/perlonjava/runtime/operators/StringOperators.java @@ -278,47 +278,37 @@ public static RuntimeScalar stringConcat(RuntimeScalar runtimeScalar, RuntimeSca String aStr = runtimeScalar.toString(); String bStr = b.toString(); - boolean aIsByteOrEmpty = runtimeScalar.type == BYTE_STRING - || runtimeScalar.type == RuntimeScalarType.UNDEF - || (aStr.isEmpty() && runtimeScalar.type != RuntimeScalarType.STRING); - boolean bIsByteOrEmpty = b.type == BYTE_STRING - || b.type == RuntimeScalarType.UNDEF - || (bStr.isEmpty() && b.type != RuntimeScalarType.STRING); - - if ((runtimeScalar.type == BYTE_STRING || b.type == BYTE_STRING) && aIsByteOrEmpty && bIsByteOrEmpty) { - byte[] aBytes = aStr.getBytes(StandardCharsets.ISO_8859_1); - byte[] bBytes = bStr.getBytes(StandardCharsets.ISO_8859_1); - byte[] out = new byte[aBytes.length + bBytes.length]; - System.arraycopy(aBytes, 0, out, 0, aBytes.length); - System.arraycopy(bBytes, 0, out, aBytes.length, bBytes.length); - return new RuntimeScalar(out); - } - - boolean aIsString = runtimeScalar.type == RuntimeScalarType.STRING || runtimeScalar.type == BYTE_STRING; - boolean bIsString = b.type == RuntimeScalarType.STRING || b.type == BYTE_STRING; - - if (aIsString && bIsString) { - boolean hasWideChars = false; - for (int i = 0; !hasWideChars && i < aStr.length(); i++) { - if (aStr.charAt(i) > 255) hasWideChars = true; + if (runtimeScalar.type == RuntimeScalarType.STRING || b.type == RuntimeScalarType.STRING) { + return new RuntimeScalar(aStr + bStr); + } + + if (runtimeScalar.type == BYTE_STRING || b.type == BYTE_STRING) { + boolean aIsByte = runtimeScalar.type == BYTE_STRING + || runtimeScalar.type == RuntimeScalarType.UNDEF + || (aStr.isEmpty() && runtimeScalar.type != RuntimeScalarType.STRING); + boolean bIsByte = b.type == BYTE_STRING + || b.type == RuntimeScalarType.UNDEF + || (bStr.isEmpty() && b.type != RuntimeScalarType.STRING); + if (aIsByte && bIsByte) { + boolean safe = true; + for (int i = 0; safe && i < aStr.length(); i++) { + if (aStr.charAt(i) > 255) safe = false; + } + for (int i = 0; safe && i < bStr.length(); i++) { + if (bStr.charAt(i) > 255) safe = false; + } + if (safe) { + byte[] aBytes = aStr.getBytes(StandardCharsets.ISO_8859_1); + byte[] bBytes = bStr.getBytes(StandardCharsets.ISO_8859_1); + byte[] out = new byte[aBytes.length + bBytes.length]; + System.arraycopy(aBytes, 0, out, 0, aBytes.length); + System.arraycopy(bBytes, 0, out, aBytes.length, bBytes.length); + return new RuntimeScalar(out); + } } - for (int i = 0; !hasWideChars && i < bStr.length(); i++) { - if (bStr.charAt(i) > 255) hasWideChars = true; - } - - if (hasWideChars) { - return new RuntimeScalar(aStr + bStr); - } - - byte[] aBytes = aStr.getBytes(StandardCharsets.ISO_8859_1); - byte[] bBytes = bStr.getBytes(StandardCharsets.ISO_8859_1); - byte[] out = new byte[aBytes.length + bBytes.length]; - System.arraycopy(aBytes, 0, out, 0, aBytes.length); - System.arraycopy(bBytes, 0, out, aBytes.length, bBytes.length); - return new RuntimeScalar(out); } - return new RuntimeScalar(runtimeScalar + bStr); + return new RuntimeScalar(aStr + bStr); } public static RuntimeScalar stringConcatWarnUninitialized(RuntimeScalar runtimeScalar, RuntimeScalar b) { @@ -329,47 +319,37 @@ public static RuntimeScalar stringConcatWarnUninitialized(RuntimeScalar runtimeS String aStr = runtimeScalar.toString(); String bStr = b.toString(); - boolean aIsByteOrEmpty = runtimeScalar.type == BYTE_STRING - || runtimeScalar.type == RuntimeScalarType.UNDEF - || (aStr.isEmpty() && runtimeScalar.type != RuntimeScalarType.STRING); - boolean bIsByteOrEmpty = b.type == BYTE_STRING - || b.type == RuntimeScalarType.UNDEF - || (bStr.isEmpty() && b.type != RuntimeScalarType.STRING); - - if ((runtimeScalar.type == BYTE_STRING || b.type == BYTE_STRING) && aIsByteOrEmpty && bIsByteOrEmpty) { - byte[] aBytes = aStr.getBytes(StandardCharsets.ISO_8859_1); - byte[] bBytes = bStr.getBytes(StandardCharsets.ISO_8859_1); - byte[] out = new byte[aBytes.length + bBytes.length]; - System.arraycopy(aBytes, 0, out, 0, aBytes.length); - System.arraycopy(bBytes, 0, out, aBytes.length, bBytes.length); - return new RuntimeScalar(out); - } - - boolean aIsString = runtimeScalar.type == RuntimeScalarType.STRING || runtimeScalar.type == BYTE_STRING; - boolean bIsString = b.type == RuntimeScalarType.STRING || b.type == BYTE_STRING; - - if (aIsString && bIsString) { - boolean hasWideChars = false; - for (int i = 0; !hasWideChars && i < aStr.length(); i++) { - if (aStr.charAt(i) > 255) hasWideChars = true; + if (runtimeScalar.type == RuntimeScalarType.STRING || b.type == RuntimeScalarType.STRING) { + return new RuntimeScalar(aStr + bStr); + } + + if (runtimeScalar.type == BYTE_STRING || b.type == BYTE_STRING) { + boolean aIsByte = runtimeScalar.type == BYTE_STRING + || runtimeScalar.type == RuntimeScalarType.UNDEF + || (aStr.isEmpty() && runtimeScalar.type != RuntimeScalarType.STRING); + boolean bIsByte = b.type == BYTE_STRING + || b.type == RuntimeScalarType.UNDEF + || (bStr.isEmpty() && b.type != RuntimeScalarType.STRING); + if (aIsByte && bIsByte) { + boolean safe = true; + for (int i = 0; safe && i < aStr.length(); i++) { + if (aStr.charAt(i) > 255) safe = false; + } + for (int i = 0; safe && i < bStr.length(); i++) { + if (bStr.charAt(i) > 255) safe = false; + } + if (safe) { + byte[] aBytes = aStr.getBytes(StandardCharsets.ISO_8859_1); + byte[] bBytes = bStr.getBytes(StandardCharsets.ISO_8859_1); + byte[] out = new byte[aBytes.length + bBytes.length]; + System.arraycopy(aBytes, 0, out, 0, aBytes.length); + System.arraycopy(bBytes, 0, out, aBytes.length, bBytes.length); + return new RuntimeScalar(out); + } } - for (int i = 0; !hasWideChars && i < bStr.length(); i++) { - if (bStr.charAt(i) > 255) hasWideChars = true; - } - - if (hasWideChars) { - return new RuntimeScalar(aStr + bStr); - } - - byte[] aBytes = aStr.getBytes(StandardCharsets.ISO_8859_1); - byte[] bBytes = bStr.getBytes(StandardCharsets.ISO_8859_1); - byte[] out = new byte[aBytes.length + bBytes.length]; - System.arraycopy(aBytes, 0, out, 0, aBytes.length); - System.arraycopy(bBytes, 0, out, aBytes.length, bBytes.length); - return new RuntimeScalar(out); } - return new RuntimeScalar(runtimeScalar + bStr); + return new RuntimeScalar(aStr + bStr); } public static RuntimeScalar chompScalar(RuntimeScalar runtimeScalar) { From f60a6ef19f3ca38e99496fc29077383e18f9c624 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sun, 1 Mar 2026 22:51:46 +0100 Subject: [PATCH 8/9] Fix read.t, require.t, and concat.t regressions - Readline.read(): Do not force BYTE_STRING when existing buffer has wide chars (>255); add safety check before ISO-8859-1 conversion. Fixes 84 read.t regressions. - SubroutineParser: Revert named sub return value from NumberNode(1) to ListNode, restoring correct module_true semantics. Fixes 56 require.t regressions. - StringOperators: Re-read left operand after right operand evaluation in STRING and fallback concat paths, matching Perl 5 behavior where tied FETCH side effects are visible to earlier operands. Fixes 1 concat.t regression (RT #132595). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../frontend/parser/SubroutineParser.java | 8 ++++---- .../org/perlonjava/runtime/operators/Readline.java | 13 +++++++++++-- .../runtime/operators/StringOperators.java | 8 ++++---- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java index 36edb17d9..5cb75ded2 100644 --- a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java @@ -578,11 +578,11 @@ static String consumeAttributes(Parser parser, List attributes) { return prototype; } - public static Node handleNamedSub(Parser parser, String subName, String prototype, List attributes, BlockNode block, String declaration) { + public static ListNode handleNamedSub(Parser parser, String subName, String prototype, List attributes, BlockNode block, String declaration) { return handleNamedSubWithFilter(parser, subName, prototype, attributes, block, false, declaration); } - public static Node handleNamedSubWithFilter(Parser parser, String subName, String prototype, List attributes, BlockNode block, boolean filterLexicalMethods, String declaration) { + public static ListNode handleNamedSubWithFilter(Parser parser, String subName, String prototype, List attributes, BlockNode block, boolean filterLexicalMethods, String declaration) { // Check if there's a lexical forward declaration (our/my/state sub name;) that this definition should fulfill String lexicalKey = "&" + subName; SymbolTable.SymbolEntry lexicalEntry = parser.ctx.symbolTable.getSymbolEntry(lexicalKey); @@ -642,7 +642,7 @@ public static Node handleNamedSubWithFilter(Parser parser, String subName, Strin // Execute the BEGIN block immediately during parsing SpecialBlockParser.runSpecialBlock(parser, "BEGIN", beginBlock); - return new NumberNode("1", parser.tokenIndex); + return new ListNode(parser.tokenIndex); } } } @@ -862,7 +862,7 @@ public static Node handleNamedSubWithFilter(Parser parser, String subName, Strin RuntimeCode placeholderForSupplier = (RuntimeCode) codeRef.value; placeholderForSupplier.compilerSupplier = subroutineCreationTaskSupplier; - return new NumberNode("1", parser.tokenIndex); + return new ListNode(parser.tokenIndex); } private static SubroutineNode handleAnonSub(Parser parser, String subName, String prototype, List attributes, BlockNode block, int currentIndex) { diff --git a/src/main/java/org/perlonjava/runtime/operators/Readline.java b/src/main/java/org/perlonjava/runtime/operators/Readline.java index 5ec8dcb72..c086f54ca 100644 --- a/src/main/java/org/perlonjava/runtime/operators/Readline.java +++ b/src/main/java/org/perlonjava/runtime/operators/Readline.java @@ -345,8 +345,17 @@ public static RuntimeScalar read(RuntimeList args) { scalarValue.replace(offsetValue, scalarValue.length(), readData); scalarValue.setLength(newLength); - if (isByteData) { - scalar.set(new RuntimeScalar(scalarValue.toString().getBytes(StandardCharsets.ISO_8859_1))); + if (isByteData && scalar.type != RuntimeScalarType.STRING) { + String s = scalarValue.toString(); + boolean safe = true; + for (int i = 0; safe && i < s.length(); i++) { + if (s.charAt(i) > 255) safe = false; + } + if (safe) { + scalar.set(new RuntimeScalar(s.getBytes(StandardCharsets.ISO_8859_1))); + } else { + scalar.set(s); + } } else { scalar.set(scalarValue.toString()); } diff --git a/src/main/java/org/perlonjava/runtime/operators/StringOperators.java b/src/main/java/org/perlonjava/runtime/operators/StringOperators.java index afea7a4ad..1e9c52449 100644 --- a/src/main/java/org/perlonjava/runtime/operators/StringOperators.java +++ b/src/main/java/org/perlonjava/runtime/operators/StringOperators.java @@ -279,7 +279,7 @@ public static RuntimeScalar stringConcat(RuntimeScalar runtimeScalar, RuntimeSca String bStr = b.toString(); if (runtimeScalar.type == RuntimeScalarType.STRING || b.type == RuntimeScalarType.STRING) { - return new RuntimeScalar(aStr + bStr); + return new RuntimeScalar(runtimeScalar.toString() + bStr); } if (runtimeScalar.type == BYTE_STRING || b.type == BYTE_STRING) { @@ -308,7 +308,7 @@ public static RuntimeScalar stringConcat(RuntimeScalar runtimeScalar, RuntimeSca } } - return new RuntimeScalar(aStr + bStr); + return new RuntimeScalar(runtimeScalar.toString() + bStr); } public static RuntimeScalar stringConcatWarnUninitialized(RuntimeScalar runtimeScalar, RuntimeScalar b) { @@ -320,7 +320,7 @@ public static RuntimeScalar stringConcatWarnUninitialized(RuntimeScalar runtimeS String bStr = b.toString(); if (runtimeScalar.type == RuntimeScalarType.STRING || b.type == RuntimeScalarType.STRING) { - return new RuntimeScalar(aStr + bStr); + return new RuntimeScalar(runtimeScalar.toString() + bStr); } if (runtimeScalar.type == BYTE_STRING || b.type == BYTE_STRING) { @@ -349,7 +349,7 @@ public static RuntimeScalar stringConcatWarnUninitialized(RuntimeScalar runtimeS } } - return new RuntimeScalar(aStr + bStr); + return new RuntimeScalar(runtimeScalar.toString() + bStr); } public static RuntimeScalar chompScalar(RuntimeScalar runtimeScalar) { From cfc08031b5afc492275946517dc63ed151f63b96 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Sun, 1 Mar 2026 23:05:09 +0100 Subject: [PATCH 9/9] Update sysread_syswrite.t: sysread now works on in-memory handles Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- src/test/resources/unit/sysread_syswrite.t | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/test/resources/unit/sysread_syswrite.t b/src/test/resources/unit/sysread_syswrite.t index 8915113b6..3bd476e9c 100644 --- a/src/test/resources/unit/sysread_syswrite.t +++ b/src/test/resources/unit/sysread_syswrite.t @@ -54,25 +54,22 @@ subtest 'UTF-8 layer error handling' => sub { close($utf8); }; -subtest 'In-memory file handles (expected to fail)' => sub { +subtest 'In-memory file handles' => sub { plan tests => 2; - # Note: sysread/syswrite don't work with in-memory file handles in standard Perl - # This is a known limitation my $mem_content = ''; open(my $mem_out, '>', \$mem_content) or die "Cannot open in-memory handle: $!"; my $data = "In-memory test\n"; my $bytes = syswrite($mem_out, $data); - ok(!defined($bytes), 'syswrite to in-memory handle returns undef (expected limitation)'); + ok(!defined($bytes), 'syswrite to in-memory handle returns undef (known limitation)'); close($mem_out); - # Test sysread from in-memory variable $mem_content = "Test content"; open(my $mem_in, '<', \$mem_content) or die "Cannot open in-memory handle: $!"; my $buffer; my $read = sysread($mem_in, $buffer, 1024); - ok(!defined($read), 'sysread from in-memory handle returns undef (expected limitation)'); + is($read, length($mem_content), 'sysread from in-memory handle works'); close($mem_in); };