From d556db8347f63aace08fb1b716f780cfb289f9e0 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 27 Mar 2026 09:48:06 +0100 Subject: [PATCH 01/10] fix: handle indirect object syntax in fork-open exec emulation The fork-open emulation (open FH, "-|") was not correctly handling the indirect object syntax exec { $cmd[0] } @cmd used by Module::Build. The completeForkOpen() method was passing raw flattenedArgs to ProcessBuilder, but with indirect object syntax, flattenedArgs[0] is the program and flattenedArgs[1] is the duplicate program name (argv[0]). This fix mirrors the logic from the regular exec() path that correctly extracts the program and skips the argv[0] argument. Fixes Module::Build _backticks() which uses this pattern. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../runtime/operators/SystemOperator.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java b/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java index af8681ed7..4e00eac7c 100644 --- a/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java +++ b/src/main/java/org/perlonjava/runtime/operators/SystemOperator.java @@ -557,9 +557,19 @@ private static RuntimeScalar completeForkOpen(List flattenedArgs, boolea try { flushAllHandles(); - // Build the command + // Build the command - mirror the logic from exec() for consistency List command; - if (!hasHandle && flattenedArgs.size() == 1) { + if (hasHandle && flattenedArgs.size() >= 2) { + // Indirect object syntax: exec { $program } @args + // flattenedArgs[0] is the program from the indirect object + // flattenedArgs[1:] are the arguments from @args + // In Perl, @args[0] becomes argv[0] (process name), @args[1:] are actual arguments + // Java's ProcessBuilder can't set argv[0] separately, so we skip it + String program = flattenedArgs.get(0); + command = new ArrayList<>(); + command.add(program); + command.addAll(flattenedArgs.subList(2, flattenedArgs.size())); + } else if (!hasHandle && flattenedArgs.size() == 1) { String cmdStr = flattenedArgs.getFirst(); if (SHELL_METACHARACTERS.matcher(cmdStr).find()) { // Use shell for metacharacters From e4ea5ee22096dbd30c83fd887a4f5d1776e1d6d9 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 27 Mar 2026 09:57:16 +0100 Subject: [PATCH 02/10] fix: update selectedHandle when STDOUT/STDERR is redirected When `open STDOUT, "> file"` redirects STDOUT to a file, the RuntimeIO.selectedHandle was not updated. This caused `print` without an explicit filehandle to still write to the original stdout instead of the file. The fix updates RuntimeGlob.setIO() to check if the old IO was the currently selected handle, and if so, updates selectedHandle to point to the new IO. This enables proper STDOUT/STDERR redirect patterns used by Module::Build and other modules. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../runtime/runtimetypes/RuntimeGlob.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeGlob.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeGlob.java index 33ef879a0..0d7e1e533 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeGlob.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeGlob.java @@ -421,6 +421,11 @@ public RuntimeArray getGlobArray() { } public RuntimeGlob setIO(RuntimeScalar io) { + // Check if the current IO is the selected handle - if so, update it + RuntimeIO oldIO = null; + if (this.IO.value instanceof RuntimeIO) { + oldIO = (RuntimeIO) this.IO.value; + } // If IO slot is tied (TIED_SCALAR with TieHandle), replace it entirely // Otherwise use set() to modify in place, preserving sharing with detached copies if (this.IO.type == RuntimeScalarType.TIED_SCALAR) { @@ -432,6 +437,10 @@ public RuntimeGlob setIO(RuntimeScalar io) { // If the IO scalar contains a RuntimeIO, set its glob name if (io.value instanceof RuntimeIO runtimeIO) { runtimeIO.globName = this.globName; + // Update selectedHandle if the old IO was the selected handle + if (oldIO != null && oldIO == RuntimeIO.selectedHandle) { + RuntimeIO.selectedHandle = runtimeIO; + } } return this; } @@ -439,6 +448,11 @@ public RuntimeGlob setIO(RuntimeScalar io) { public RuntimeGlob setIO(RuntimeIO io) { // Set the glob name in the RuntimeIO for proper stringification io.globName = this.globName; + // Check if the current IO is the selected handle - if so, update it + RuntimeIO oldIO = null; + if (this.IO.value instanceof RuntimeIO) { + oldIO = (RuntimeIO) this.IO.value; + } // If IO slot is tied (TIED_SCALAR with TieHandle), replace it entirely // Otherwise modify in place, preserving sharing with detached copies if (this.IO.type == RuntimeScalarType.TIED_SCALAR) { @@ -447,6 +461,12 @@ public RuntimeGlob setIO(RuntimeIO io) { this.IO.type = RuntimeScalarType.GLOB; // RuntimeIO is stored as GLOB type this.IO.value = io; } + // Update selectedHandle if the old IO was the selected handle + // This ensures that when STDOUT is redirected, print without explicit + // filehandle uses the new handle + if (oldIO != null && oldIO == RuntimeIO.selectedHandle) { + RuntimeIO.selectedHandle = io; + } return this; } From 649969bf8e9eacffa013f72cf5c8e9214362720d Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 27 Mar 2026 10:10:20 +0100 Subject: [PATCH 03/10] fix: resolve filehandle names in caller's package for duplication When `open $fh, ">&BAREWORD"` is used to duplicate a filehandle by name, look up the bareword in the caller's package first before falling back to main::. This fixes the "Unsupported filehandle duplication: SAVEOUT" error when using patterns like: package MBTest; local *SAVEOUT; open SAVEOUT, ">&" . fileno(STDOUT); # ... redirect ... open STDOUT, ">&SAVEOUT"; # This now works Uses RuntimeCode.getCurrentPackage() which leverages caller() to get the package context at runtime, working for both JVM-compiled and interpreter code. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../runtime/operators/IOOperator.java | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/main/java/org/perlonjava/runtime/operators/IOOperator.java b/src/main/java/org/perlonjava/runtime/operators/IOOperator.java index ae7831f1e..9cd8b0661 100644 --- a/src/main/java/org/perlonjava/runtime/operators/IOOperator.java +++ b/src/main/java/org/perlonjava/runtime/operators/IOOperator.java @@ -2211,6 +2211,27 @@ public static RuntimeIO openFileHandleDup(String fileName, String mode) { break; default: // Try to look up as a global filehandle + // First, try the current package if no :: qualifier is present + if (!fileName.contains("::")) { + // Use RuntimeCode.getCurrentPackage() which uses caller() to determine + // the current package - this works for both JVM-compiled and interpreter code + String currentPkg = RuntimeCode.getCurrentPackage(); + // Remove trailing "::" for consistent naming + if (currentPkg.endsWith("::")) { + currentPkg = currentPkg.substring(0, currentPkg.length() - 2); + } + if (currentPkg != null && !currentPkg.isEmpty() && !currentPkg.equals("main")) { + String currentPkgName = currentPkg + "::" + fileName; + RuntimeGlob currentGlob = GlobalVariable.getGlobalIO(currentPkgName); + if (currentGlob != null) { + sourceHandle = currentGlob.getRuntimeIO(); + if (sourceHandle != null && sourceHandle.ioHandle != null) { + break; // Found it in current package + } + } + } + } + // Fall back to main:: or fully qualified name String normalizedName = fileName.contains("::") ? fileName : "main::" + fileName; RuntimeGlob glob = GlobalVariable.getGlobalIO(normalizedName); if (glob != null) { From 52f5bfb08d7aa4b5525cfb65edd8cd90a11c74c8 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 27 Mar 2026 10:30:47 +0100 Subject: [PATCH 04/10] fix: filter jar: paths from @INC when spawning test processes The jar:PERL5LIB marker is an internal PerlOnJava convention for accessing modules bundled in the JAR file. When Test::Harness spawns child processes, it passes @INC entries as -I switches, but jar: paths do not exist as filesystem directories. Filter out jar:* entries from @INC in both _filtered_inc() and _default_inc() to prevent passing invalid -I switches to child test processes. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- src/main/perl/lib/Test/Harness.pm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/perl/lib/Test/Harness.pm b/src/main/perl/lib/Test/Harness.pm index e197fbb22..e30a7911d 100644 --- a/src/main/perl/lib/Test/Harness.pm +++ b/src/main/perl/lib/Test/Harness.pm @@ -249,6 +249,10 @@ sub _new_harness { sub _filtered_inc { my @inc = grep { !ref } @INC; #28567 + # PerlOnJava: Filter out jar: paths - these are internal markers for + # modules bundled in the JAR and don't exist as filesystem directories + @inc = grep { !/^jar:/ } @inc; + if (IS_VMS) { # VMS has a 255-byte limit on the length of %ENV entries, so @@ -297,6 +301,8 @@ sub _filtered_inc { # Avoid using -l for the benefit of Perl 6 chomp( @inc = `"$perl" -e "print join qq[\\n], \@INC, q[]"` ); + # PerlOnJava: Filter out jar: paths from default @INC + @inc = grep { !/^jar:/ } @inc; return @inc; } } From acbcf192815ea455b90dd4d44912814d536439e4 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 27 Mar 2026 10:44:04 +0100 Subject: [PATCH 05/10] Fix pipe open not passing %ENV to child processes PipeInputChannel and PipeOutputChannel were not copying Perl's %ENV hash to the ProcessBuilder environment, causing environment variables (including PERL5LIB) to be invisible to child processes spawned via pipe open (e.g., open($fh, "$command|")). This fix adds copyPerlEnvToProcessBuilder() to both classes, matching the behavior already present in SystemOperator.java for system() and exec(). Also fixes Test/Harness.pm to convert relative paths to absolute paths so they work correctly in child processes that may run from a different directory. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../runtime/io/PipeInputChannel.java | 30 +++++++++++++++++++ .../runtime/io/PipeOutputChannel.java | 30 +++++++++++++++++++ src/main/perl/lib/Test/Harness.pm | 9 ++++++ 3 files changed, 69 insertions(+) diff --git a/src/main/java/org/perlonjava/runtime/io/PipeInputChannel.java b/src/main/java/org/perlonjava/runtime/io/PipeInputChannel.java index 6e934abab..2235d8a9c 100644 --- a/src/main/java/org/perlonjava/runtime/io/PipeInputChannel.java +++ b/src/main/java/org/perlonjava/runtime/io/PipeInputChannel.java @@ -1,5 +1,7 @@ package org.perlonjava.runtime.io; +import org.perlonjava.runtime.runtimetypes.GlobalVariable; +import org.perlonjava.runtime.runtimetypes.RuntimeHash; import org.perlonjava.runtime.runtimetypes.RuntimeScalar; import org.perlonjava.runtime.runtimetypes.RuntimeScalarCache; @@ -91,6 +93,9 @@ private void setupProcess(ProcessBuilder processBuilder) throws IOException { String userDir = System.getProperty("user.dir"); processBuilder.directory(new File(userDir)); + // Copy %ENV to the subprocess environment + copyPerlEnvToProcessBuilder(processBuilder); + // Start the process process = processBuilder.start(); @@ -348,4 +353,29 @@ public RuntimeScalar sysread(int length) { return new RuntimeScalar(); // undef } } + + /** + * Copies the Perl %ENV hash to the ProcessBuilder environment. + * This ensures that changes to %ENV in Perl are reflected in child processes. + * + * @param processBuilder The ProcessBuilder to update + */ + private void copyPerlEnvToProcessBuilder(ProcessBuilder processBuilder) { + try { + RuntimeHash envHash = GlobalVariable.getGlobalHash("main::ENV"); + java.util.Map pbEnv = processBuilder.environment(); + + // Clear the inherited environment and replace with Perl's %ENV + pbEnv.clear(); + + for (java.util.Map.Entry entry : envHash.elements.entrySet()) { + String value = entry.getValue().toString(); + if (value != null) { + pbEnv.put(entry.getKey(), value); + } + } + } catch (Exception e) { + // If we can't access %ENV, just use inherited environment (default behavior) + } + } } \ No newline at end of file diff --git a/src/main/java/org/perlonjava/runtime/io/PipeOutputChannel.java b/src/main/java/org/perlonjava/runtime/io/PipeOutputChannel.java index 0da23ce4c..4b734b746 100644 --- a/src/main/java/org/perlonjava/runtime/io/PipeOutputChannel.java +++ b/src/main/java/org/perlonjava/runtime/io/PipeOutputChannel.java @@ -1,5 +1,7 @@ package org.perlonjava.runtime.io; +import org.perlonjava.runtime.runtimetypes.GlobalVariable; +import org.perlonjava.runtime.runtimetypes.RuntimeHash; import org.perlonjava.runtime.runtimetypes.RuntimeScalar; import org.perlonjava.runtime.runtimetypes.RuntimeScalarCache; @@ -150,6 +152,9 @@ private void setupProcess(ProcessBuilder processBuilder) throws IOException { String userDir = System.getProperty("user.dir"); processBuilder.directory(new File(userDir)); + // Copy %ENV to the subprocess environment + copyPerlEnvToProcessBuilder(processBuilder); + // Start the process process = processBuilder.start(); @@ -384,4 +389,29 @@ public RuntimeScalar syswrite(String data) { return new RuntimeScalar(); // undef } } + + /** + * Copies the Perl %ENV hash to the ProcessBuilder environment. + * This ensures that changes to %ENV in Perl are reflected in child processes. + * + * @param processBuilder The ProcessBuilder to update + */ + private void copyPerlEnvToProcessBuilder(ProcessBuilder processBuilder) { + try { + RuntimeHash envHash = GlobalVariable.getGlobalHash("main::ENV"); + java.util.Map pbEnv = processBuilder.environment(); + + // Clear the inherited environment and replace with Perl's %ENV + pbEnv.clear(); + + for (java.util.Map.Entry entry : envHash.elements.entrySet()) { + String value = entry.getValue().toString(); + if (value != null) { + pbEnv.put(entry.getKey(), value); + } + } + } catch (Exception e) { + // If we can't access %ENV, just use inherited environment (default behavior) + } + } } \ No newline at end of file diff --git a/src/main/perl/lib/Test/Harness.pm b/src/main/perl/lib/Test/Harness.pm index e30a7911d..a76552c44 100644 --- a/src/main/perl/lib/Test/Harness.pm +++ b/src/main/perl/lib/Test/Harness.pm @@ -283,6 +283,15 @@ sub _filtered_inc { shift @default_inc while @default_inc and $seen{ $default_inc[0] }; } + # Convert relative paths to absolute paths so they work in child processes + # that may run from a different directory + require Cwd; + @new_inc = map { + # Skip if already absolute or doesn't exist + ($_ =~ m{^/} || $_ =~ m{^[A-Za-z]:}) ? $_ : + (-e $_) ? Cwd::abs_path($_) // $_ : $_ + } @new_inc; + return @new_inc; } From 1d81d115f313fd656499f0012d39bc0cf5ab031a Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 27 Mar 2026 11:00:11 +0100 Subject: [PATCH 06/10] Fix blib.pm to set PERL5LIB for child processes Since PerlOnJava can't use fork() to share address space, child processes don't inherit @INC modifications. This caused tests using -Mblib to fail when they spawn child processes (e.g., Module::Build compatibility tests). The fix makes blib.pm set PERL5LIB in addition to modifying @INC, so child processes can find modules in blib/lib and blib/arch. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- src/main/perl/lib/blib.pm | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/perl/lib/blib.pm b/src/main/perl/lib/blib.pm index f8fd500d5..f3cd1977e 100644 --- a/src/main/perl/lib/blib.pm +++ b/src/main/perl/lib/blib.pm @@ -82,6 +82,15 @@ sub import if (-d $blib && -d $blib_arch && -d $blib_lib) { unshift(@INC,$blib_arch,$blib_lib); + # PerlOnJava: Also set PERL5LIB so child processes can find modules. + # This is needed because PerlOnJava can't use fork() to share address space. + my $sep = $^O eq 'MSWin32' ? ';' : ':'; + my $new_perl5lib = join($sep, $blib_arch, $blib_lib); + if (exists $ENV{PERL5LIB} && defined $ENV{PERL5LIB} && $ENV{PERL5LIB} ne '') { + $ENV{PERL5LIB} = $new_perl5lib . $sep . $ENV{PERL5LIB}; + } else { + $ENV{PERL5LIB} = $new_perl5lib; + } warn "Using $blib\n" if $Verbose; return; } From 82705440041081857c944c79b96b41151779a01c Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 27 Mar 2026 11:26:34 +0100 Subject: [PATCH 07/10] Fix File::Spec->canonpath to remove leading ./ prefix The Java implementation of canonpath() was missing the logic to strip leading "./" from paths, unlike the pure-Perl File::Spec::Unix version. This caused issues in Module::Build where files stored with "lib/Simple.pm" didn't match canonicalized "./lib/Simple.pm" paths, leading to files being incorrectly deleted during clean(). Now canonpath("./lib/Simple.pm") returns "lib/Simple.pm" as expected, while preserving "./" when it's the entire path (e.g., canonpath("./") returns "./"). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../java/org/perlonjava/runtime/perlmodule/FileSpec.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/FileSpec.java b/src/main/java/org/perlonjava/runtime/perlmodule/FileSpec.java index 9d74294d3..55b480acf 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/FileSpec.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/FileSpec.java @@ -81,6 +81,15 @@ public static RuntimeList canonpath(RuntimeArray args, int ctx) { String quotedSeparator = Matcher.quoteReplacement(File.separator); String canonPath = path.replaceAll("[/\\\\]+", quotedSeparator) .replaceAll(Pattern.quote(File.separator) + "\\." + Pattern.quote(File.separator), quotedSeparator); + + // Remove leading ./ unless the path is exactly "./" + // This matches Perl's File::Spec::Unix behavior + if (!canonPath.equals("." + File.separator)) { + while (canonPath.startsWith("." + File.separator)) { + canonPath = canonPath.substring(2); + } + } + return new RuntimeScalar(canonPath).getList(); } From a7d7aeb4a551c7409c67754a650e90a9706f332a Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 27 Mar 2026 11:36:52 +0100 Subject: [PATCH 08/10] Fix regex capture groups to include undef for unmatched optional groups Previously, when a regex with optional capture groups like m{^(.*/)?(.*)}s matched against a string without a slash (e.g., "MANIFEST"), the optional group that did not participate in the match was skipped in the return list. Perl returns undef for groups that do not participate, so: my ($a, $b) = ("MANIFEST" =~ m{^(.*/)?(.*)}s); Should yield: $a = undef, $b = "MANIFEST" But PerlOnJava was yielding: $a = "MANIFEST", $b = undef This broke File::Basename::dirname() which uses this pattern, causing dirname("MANIFEST") to return "MANIFEST" instead of "." as expected. The fix includes undef values in the return list for unmatched groups, matching Perl behavior. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../java/org/perlonjava/runtime/regex/RuntimeRegex.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java index 28b8d8891..790b38d58 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java +++ b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java @@ -620,9 +620,10 @@ private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeSc if (ctx == RuntimeContextType.LIST) { for (int i = 1; i <= captureCount; i++) { String matchedStr = matcher.group(i); - if (matchedStr != null) { - matchedGroups.add(new RuntimeScalar(matchedStr)); - } + // Include undef for groups that didn't participate in the match + // This is important for patterns like m{^(.*/)?(.*)}s where + // the optional group returns undef when it doesn't match + matchedGroups.add(new RuntimeScalar(matchedStr)); } } } From d2a70a2f2c2a2aa148546d8ae5d20771c34160ef Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 27 Mar 2026 14:28:20 +0100 Subject: [PATCH 09/10] Fix regex capture group handling for branch reset patterns The previous fix to include undef for unmatched optional groups (commit 4c3a0b498) caused a regression in branch reset patterns (?|...). These patterns were returning undef values for groups in non-matching alternatives. The issue is that Java regex engine creates separate capture groups for each alternative in a branch reset pattern, while Perl reuses the same group numbers. When alternative 3 matches, Java returns: - Groups 1-4: null (from non-matching alternatives 1-2) - Groups 5-6: the matched values (from alternative 3) With the undef fix, all 6 groups were being returned, breaking assignment to fewer variables. The fix tracks whether a pattern uses branch reset syntax and: - For branch reset patterns: skip null groups (original behavior) - For non-branch-reset: include undef for unmatched optional groups This restores the re/pat.t test count to 1065/1298 while preserving the File::Basename::dirname fix for patterns like m{^(.*/)?(.*)}s. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../runtime/regex/RegexPreprocessor.java | 9 +++++++++ .../runtime/regex/RuntimeRegex.java | 19 +++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessor.java b/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessor.java index 09bea7374..737b8179c 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessor.java +++ b/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessor.java @@ -55,6 +55,7 @@ public class RegexPreprocessor { static int captureGroupCount; static boolean deferredUnicodePropertyEncountered; static boolean inlinePFlagEncountered; + static boolean branchResetEncountered; static void markDeferredUnicodePropertyEncountered() { deferredUnicodePropertyEncountered = true; @@ -68,6 +69,10 @@ static boolean hadInlinePFlag() { return inlinePFlagEncountered; } + static boolean hadBranchReset() { + return branchResetEncountered; + } + /** * Preprocesses a given regex string to make it compatible with Java's regex engine. * This involves handling various constructs and escape sequences that Java does not @@ -82,6 +87,7 @@ static String preProcessRegex(String s, RegexFlags regexFlags) { captureGroupCount = 0; deferredUnicodePropertyEncountered = false; inlinePFlagEncountered = false; + branchResetEncountered = false; // First, escape invalid quantifier braces (Perl compatibility) // DISABLED: Causes test regressions - needs more work @@ -1153,6 +1159,9 @@ private static int handleNamedCapture(int c, String s, int offset, int length, S * @return New offset after processing the branch reset group */ private static int handleBranchReset(String s, int offset, int length, StringBuilder sb, RegexFlags regexFlags) { + // Mark that this pattern uses branch reset + branchResetEncountered = true; + // Save the starting group count int startGroupCount = captureGroupCount; diff --git a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java index 790b38d58..03f6d0760 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java +++ b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java @@ -81,6 +81,7 @@ protected boolean removeEldestEntry(Map.Entry eldest) { private boolean matched = false; private boolean hasCodeBlockCaptures = false; // True if regex has (?{...}) code blocks private boolean deferredUserDefinedUnicodeProperties = false; + private boolean hasBranchReset = false; // True if pattern uses (?|...) branch reset public RuntimeRegex() { this.regexFlags = null; @@ -145,6 +146,7 @@ public static RuntimeRegex compile(String patternString, String modifiers) { // These need to be resolved later, once the corresponding Perl subs are defined. regex.deferredUserDefinedUnicodeProperties = RegexPreprocessor.hadDeferredUnicodePropertyEncountered(); regex.hasPreservesMatch = regex.regexFlags.preservesMatch() || RegexPreprocessor.hadInlinePFlag(); + regex.hasBranchReset = RegexPreprocessor.hadBranchReset(); regex.patternString = patternString; @@ -620,10 +622,19 @@ private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeSc if (ctx == RuntimeContextType.LIST) { for (int i = 1; i <= captureCount; i++) { String matchedStr = matcher.group(i); - // Include undef for groups that didn't participate in the match - // This is important for patterns like m{^(.*/)?(.*)}s where - // the optional group returns undef when it doesn't match - matchedGroups.add(new RuntimeScalar(matchedStr)); + if (regex.hasBranchReset) { + // For branch reset patterns (?|...), skip null groups + // because Java creates separate groups for each alternative + // but Perl reuses group numbers across alternatives + if (matchedStr != null) { + matchedGroups.add(new RuntimeScalar(matchedStr)); + } + } else { + // Include undef for groups that didn't participate in the match + // This is important for patterns like m{^(.*/)?(.*)}s where + // the optional group returns undef when it doesn't match + matchedGroups.add(new RuntimeScalar(matchedStr)); + } } } } From 02f44e6ce8e33c3c967d64f457ceeee283d5cec7 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 27 Mar 2026 14:29:20 +0100 Subject: [PATCH 10/10] Document environment variables needed for running tests directly Added documentation about JPERL_UNIMPLEMENTED, JPERL_OPTS, and PERL_SKIP_BIG_MEM_TESTS environment variables that perl_test_runner.pl sets automatically but need to be set manually when running tests directly with ./jperl. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- AGENTS.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 12c8e845f..fa7765a43 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -97,6 +97,30 @@ The runner: - Has a 300s timeout per test - Reports pass/fail counts in format: `passed/total` - Saves results to `test_results_YYYYMMDD_HHMMSS.txt` +- Sets required environment variables automatically (see below) + +#### Running Tests Directly (without perl_test_runner.pl) + +If you run tests directly with `./jperl`, you may need to set these environment variables: + +```bash +# For tests that use unimplemented features (re/pat.t, op/pack.t, etc.) +# Without this, unimplemented features cause fatal errors +export JPERL_UNIMPLEMENTED=warn + +# For memory-intensive tests (re/pat.t, op/repeat.t, op/list.t) +# Increases JVM stack size to prevent StackOverflowError +export JPERL_OPTS="-Xss256m" + +# Skip tests with 300KB+ strings that crash the JVM +export PERL_SKIP_BIG_MEM_TESTS=1 + +# Example: running re/pat.t directly +cd perl5_t/t +JPERL_UNIMPLEMENTED=warn JPERL_OPTS="-Xss256m" PERL_SKIP_BIG_MEM_TESTS=1 ../../jperl re/pat.t +``` + +The perl_test_runner.pl sets these automatically based on the test file being run. ### Git Workflow