From 676082acfbcb0c1ed682ac91708b13e6d6c6105f Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 3 Apr 2026 14:34:05 +0200 Subject: [PATCH 1/9] Fix glob() directory wildcards and MakeMaker test pattern support Two bugs caused jcpan -t to silently find 0 tests for modules like Excel::Writer::XLSX whose tests live in subdirectories (t/*/*.t): 1. ScalarGlobOperator: glob("t/*/*.t") returned nothing because extractPathComponents() split on the last "/" and tried to open "t/*" as a literal directory. Added recursive glob expansion that processes each path segment independently, expanding wildcards in directory components by listing and matching directory entries. 2. ExtUtils/MakeMaker.pm: The generated Makefile test target hardcoded glob(q{t/*.t}), ignoring the test => { TESTS => '...' } parameter from WriteMakefile(). Now extracts and uses the TESTS pattern when provided. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 2 +- .../runtime/operators/ScalarGlobOperator.java | 171 +++++++++++++++--- src/main/perl/lib/ExtUtils/MakeMaker.pm | 13 +- 3 files changed, 160 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 7f6127b51..8c935d28d 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "1b7317697"; + public static final String gitCommitId = "c787eff3c"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/runtime/operators/ScalarGlobOperator.java b/src/main/java/org/perlonjava/runtime/operators/ScalarGlobOperator.java index 135d2ad96..24e0c3173 100644 --- a/src/main/java/org/perlonjava/runtime/operators/ScalarGlobOperator.java +++ b/src/main/java/org/perlonjava/runtime/operators/ScalarGlobOperator.java @@ -132,35 +132,21 @@ static List globSinglePattern(ScalarGlobOperator scalarGlobOperator, Str List results = new ArrayList<>(); try { - // Preserve the original pattern format for result formatting - String originalPattern = pattern; - // Normalize path separators for Windows compatibility String normalizedPattern = ScalarGlobOperatorHelper.normalizePathSeparators(pattern); // Check if pattern is absolute - boolean patternIsAbsolute = ScalarGlobOperatorHelper.isAbsolutePath(originalPattern); - - // Extract directory and file pattern - PathComponents components = scalarGlobOperator.extractPathComponents(normalizedPattern, patternIsAbsolute); - - if (!components.baseDir.exists() || components.filePattern.isEmpty()) { - // For non-existent paths or empty patterns, return literal if no glob chars - if (!ScalarGlobOperatorHelper.containsGlobChars(pattern)) { - results.add(pattern); - } - return results; - } + boolean patternIsAbsolute = ScalarGlobOperatorHelper.isAbsolutePath(pattern); - // Convert glob pattern to regex - Pattern regex = ScalarGlobOperatorHelper.globToRegex(scalarGlobOperator, components.filePattern); - if (regex == null) { - return results; + // Check if any directory component contains glob wildcards + // If so, use recursive glob expansion + if (hasWildcardInDirectoryComponent(normalizedPattern)) { + globRecursive(scalarGlobOperator, normalizedPattern, patternIsAbsolute, results); + } else { + // Original path: only the filename part has wildcards + globSimple(scalarGlobOperator, pattern, normalizedPattern, patternIsAbsolute, results); } - // Match files against pattern - scalarGlobOperator.matchFiles(components, regex, results, originalPattern, patternIsAbsolute); - // For exact matches that don't exist (from brace expansion) if (results.isEmpty() && !ScalarGlobOperatorHelper.containsGlobChars(pattern)) { results.add(pattern); @@ -172,6 +158,147 @@ static List globSinglePattern(ScalarGlobOperator scalarGlobOperator, Str return results; } + /** + * Checks if any directory component (not the final filename) contains glob wildcards. + */ + private static boolean hasWildcardInDirectoryComponent(String normalizedPattern) { + int lastSep = normalizedPattern.lastIndexOf('/'); + if (lastSep < 0) { + return false; // No directory components + } + String dirPart = normalizedPattern.substring(0, lastSep); + return ScalarGlobOperatorHelper.containsGlobChars(dirPart); + } + + /** + * Original glob logic for patterns where only the filename part has wildcards. + */ + private static void globSimple(ScalarGlobOperator scalarGlobOperator, String originalPattern, + String normalizedPattern, boolean patternIsAbsolute, + List results) { + // Extract directory and file pattern + PathComponents components = scalarGlobOperator.extractPathComponents(normalizedPattern, patternIsAbsolute); + + if (!components.baseDir.exists() || components.filePattern.isEmpty()) { + // For non-existent paths or empty patterns, return literal if no glob chars + if (!ScalarGlobOperatorHelper.containsGlobChars(originalPattern)) { + results.add(originalPattern); + } + return; + } + + // Convert glob pattern to regex + Pattern regex = ScalarGlobOperatorHelper.globToRegex(scalarGlobOperator, components.filePattern); + if (regex == null) { + return; + } + + // Match files against pattern + scalarGlobOperator.matchFiles(components, regex, results, originalPattern, patternIsAbsolute); + } + + /** + * Recursively expands glob patterns that have wildcards in directory components. + * For example, "t/*/*.t" expands the directory wildcard by listing matching + * subdirectories, then matches "*.t" files within each. + */ + private static void globRecursive(ScalarGlobOperator scalarGlobOperator, String normalizedPattern, + boolean patternIsAbsolute, List results) { + // Split the pattern into path segments + String[] segments = normalizedPattern.split("/", -1); + + // Determine the starting directory + File startDir; + int startSegment; + String prefix; + + if (patternIsAbsolute) { + // Absolute path: start from root + if (normalizedPattern.startsWith("/")) { + startDir = RuntimeIO.resolveFile("/"); + prefix = ""; + startSegment = 1; // Skip the empty segment before leading / + } else { + // Windows drive letter path like C:/... + startDir = RuntimeIO.resolveFile(segments[0] + "/"); + prefix = segments[0]; + startSegment = 1; + } + } else { + startDir = new File(System.getProperty("user.dir")); + prefix = ""; + startSegment = 0; + } + + globRecursiveStep(scalarGlobOperator, startDir, segments, startSegment, prefix, results); + } + + /** + * Recursive step: processes one path segment at a time. + */ + private static void globRecursiveStep(ScalarGlobOperator scalarGlobOperator, File currentDir, + String[] segments, int segmentIndex, String prefix, + List results) { + if (segmentIndex >= segments.length) { + return; + } + + String segment = segments[segmentIndex]; + boolean isLastSegment = (segmentIndex == segments.length - 1); + + if (!ScalarGlobOperatorHelper.containsGlobChars(segment)) { + // Literal segment - just descend + File next = new File(currentDir, segment); + String newPrefix = prefix.isEmpty() ? segment : prefix + "/" + segment; + + if (isLastSegment) { + if (next.exists()) { + results.add(newPrefix); + } + } else { + if (next.isDirectory()) { + globRecursiveStep(scalarGlobOperator, next, segments, segmentIndex + 1, newPrefix, results); + } + } + } else { + // Wildcard segment - list directory entries and match + Pattern regex = ScalarGlobOperatorHelper.globToRegex(scalarGlobOperator, segment); + if (regex == null) { + return; + } + + File[] entries; + try { + entries = currentDir.listFiles(); + } catch (SecurityException e) { + return; + } + + if (entries == null) { + return; + } + + for (File entry : entries) { + String name = entry.getName(); + + // Skip hidden files/dirs unless pattern starts with dot + if (!segment.startsWith(".") && name.startsWith(".")) { + continue; + } + + if (regex.matcher(name).matches()) { + String newPrefix = prefix.isEmpty() ? name : prefix + "/" + name; + + if (isLastSegment) { + results.add(newPrefix); + } else if (entry.isDirectory()) { + globRecursiveStep(scalarGlobOperator, entry, segments, segmentIndex + 1, newPrefix, results); + } + } + } + } + } + /** * Initializes the iterator with results from the given pattern. * diff --git a/src/main/perl/lib/ExtUtils/MakeMaker.pm b/src/main/perl/lib/ExtUtils/MakeMaker.pm index e654c6edb..7fbcd84b5 100644 --- a/src/main/perl/lib/ExtUtils/MakeMaker.pm +++ b/src/main/perl/lib/ExtUtils/MakeMaker.pm @@ -398,12 +398,19 @@ sub _create_install_makefile { # Get the Perl interpreter path my $perl = $^X; - # Build test command - run all t/*.t files using Perl for cross-platform compatibility + # Build test command - respect test => { TESTS => ... } from WriteMakefile args # Set PERL5LIB to include blib/lib and blib/arch so test subprocesses can find the module + my $test_pattern = ''; + if (ref $args->{test} eq 'HASH' && $args->{test}{TESTS}) { + $test_pattern = $args->{test}{TESTS}; + } elsif (-d 't') { + $test_pattern = 't/*.t'; + } + my $test_cmd; - if (-d 't') { + if ($test_pattern) { # Use Perl one-liner with Test::Harness for cross-platform test running - $test_cmd = qq{PERL5LIB="./blib/lib:./blib/arch:\$\$PERL5LIB" $perl -MTest::Harness -e "runtests(glob(q{t/*.t}))"}; + $test_cmd = qq{PERL5LIB="./blib/lib:./blib/arch:\$\$PERL5LIB" $perl -MTest::Harness -e "runtests(glob(q{$test_pattern}))"}; } else { $test_cmd = qq{$perl -e "print qq{PerlOnJava: No tests found (no t/ directory)\\n}"}; } From 8420fd0ffdc192e06cac80f51f7f4a4a33f7ba65 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 3 Apr 2026 15:32:14 +0200 Subject: [PATCH 2/9] Fix split context, regex Unicode properties, and Archive::Zip for Excel::Writer::XLSX - Fix split to evaluate arguments in SCALAR context (both JVM and interpreter backends). `split //, reverse $str` now correctly reverses the string instead of treating it as a list operation. - Add \p{} and \P{} Unicode property support inside character classes in regex preprocessing (RegexPreprocessorHelper). - Add Emoticons Unicode block mapping in UnicodeResolver. - Add Archive::Zip::setErrorHandler stub for compatibility. - Add Excel::Writer::XLSX module tracking document. Excel::Writer::XLSX test results: 1110/1247 programs pass, 4977/4982 subtests pass (99.9%). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/modules/excel_writer_xlsx.md | 188 ++++++++++++++++++ .../bytecode/CompileBinaryOperator.java | 34 ++++ .../perlonjava/backend/jvm/EmitOperator.java | 57 +++++- .../org/perlonjava/core/Configuration.java | 2 +- .../regex/RegexPreprocessorHelper.java | 26 ++- .../runtime/regex/UnicodeResolver.java | 17 +- src/main/perl/lib/Archive/Zip.pm | 11 + 7 files changed, 330 insertions(+), 5 deletions(-) create mode 100644 dev/modules/excel_writer_xlsx.md diff --git a/dev/modules/excel_writer_xlsx.md b/dev/modules/excel_writer_xlsx.md new file mode 100644 index 000000000..9bf641401 --- /dev/null +++ b/dev/modules/excel_writer_xlsx.md @@ -0,0 +1,188 @@ +# Excel::Writer::XLSX Fix Plan + +## Overview + +**Module**: Excel::Writer::XLSX 1.15 +**Test command**: `./jcpan -j 8 -t Excel::Writer::XLSX` +**Status**: WIP -- ~156/503 test files pass (run incomplete, ~744 tests not yet reached) + +## Dependency Tree + +| Dependency | Status | Notes | +|-----------|--------|-------| +| **Archive::Zip** >= 1.30 | PARTIAL | Java-backed impl via XSLoader; missing `setErrorHandler` and possibly other methods | +| **File::Temp** >= 0.19 | PASS | Core module, works | +| **IO::File** >= 1.14 | PASS | Core module, works | + +## Test Results Summary + +### Current Status: ~156/503 visible test files pass (incomplete run) + +Tests are in subdirectories: `t/chart/`, `t/chartsheet/`, `t/drawing/`, `t/package/`, `t/regression/`, `t/utility/`, `t/workbook/`, `t/worksheet/` + +| Test Group | Total | Pass | Fail | Notes | +|-----------|-------|------|------|-------| +| t/chart/ | ~40 | ~39 | 1 | `sub_add_series.t` fails (Emoticons regex) | +| t/chartsheet/ | ~4 | 3 | 1 | `sub_write_sheet_protection.t` (password hash) | +| t/drawing/ | ~18 | 18 | 0 | All pass | +| t/package/ | ~50 | ~49 | 1 | `styles/sub_write_num_fmts.t` ('' vs undef) | +| t/regression/ | ~800+ | 0 | ALL | Blocked by `Archive::Zip::setErrorHandler` | +| t/utility/ | ? | ? | ? | Not yet reached in run | +| t/workbook/ | ? | ? | ? | Not yet reached in run | +| t/worksheet/ | ? | ? | ? | Not yet reached in run | + +--- + +## Error Categories + +### 1. `Undefined subroutine &Archive::Zip::setErrorHandler` (P0 -- blocks ~800+ tests) + +**Affected tests**: All `t/regression/*.t` files +**Error**: +``` +Undefined subroutine &Archive::Zip::setErrorHandler called at t/regression/....t line NN. +``` + +**Root Cause**: PerlOnJava's Archive::Zip is a Java-backed implementation (`ArchiveZip.java`) that doesn't implement `setErrorHandler`. The test infrastructure (`t/lib/TestFunctions.pm` line 185) calls `Archive::Zip::setErrorHandler( sub { } )` to suppress error output during ZIP comparison. This is a package-level function, not a method. + +**Usage in test code**: +```perl +# t/lib/TestFunctions.pm:185 +Archive::Zip::setErrorHandler( sub { } ); +``` + +--- + +### 2. `\p{Emoticons}` Unicode property not supported (P1 -- ~20+ tests) + +**Affected tests**: `t/chart/sub_add_series.t` (3/5 fail), plus many regression tests (masked by P0) +**Error**: +``` +Regex compilation failed: Unknown character property name {Emoticons} near index 18 +[^\w\.\p{Emoticons}] +``` + +**Root Cause**: Java's `java.util.regex` uses `\p{InEmoticons}` for the Unicode Emoticons block (U+1F600-U+1F64F), while Perl uses `\p{Emoticons}`. PerlOnJava's regex engine needs to map the Perl property name to the Java equivalent. + +**Usage in module**: +```perl +# lib/Excel/Writer/XLSX/Utility.pm:237,242 +if ( $sheetname =~ /[^\w\.\p{Emoticons}]/ ) { ... } +elsif ( $sheetname =~ /^[\d\.\p{Emoticons}]/ ) { ... } +``` + +--- + +### 3. Password hash produces wrong result (P3 -- 1 test file, 3 subtests) + +**Affected test**: `t/chartsheet/sub_write_sheet_protection.t` (3/7 fail) +**Error**: +``` +got: '' +expected: '' +``` + +**Root Cause**: The `_encode_password` method in `Worksheet.pm` uses bitwise operations (`>>`, `<<`, `&`, `|`, `^`) to compute a 15-bit hash. PerlOnJava likely has an operator precedence or integer arithmetic difference in the expression: +```perl +$hash = ( ( $hash >> 14 ) & 0x01 ) | ( ( $hash << 1 ) & 0x7fff ); +``` +Need to trace intermediate values to isolate the discrepancy. + +--- + +### 4. Empty string vs `undef` return value (P4 -- 1 test file, 1 subtest) + +**Affected test**: `t/package/styles/sub_write_num_fmts.t` (1/2 fail) +**Error**: +``` +got: '' +expected: undef +``` + +**Root Cause**: A function returns `''` instead of `undef`. Likely a PerlOnJava parity issue in how empty/undefined values are returned from XML writer methods. + +--- + +### 5. `FileHandle->new()` not found (P2 -- ~6 tests, masked by P0) + +**Affected tests**: `t/regression/autofit13.t`, `t/regression/background03.t` through `background07.t` +**Error**: +``` +Can't locate object method "new" via package "FileHandle" + at .../Excel/Writer/XLSX/Workbook.pm line 1860. +``` + +**Root Cause**: `Workbook.pm` calls `FileHandle->new(...)` without `use FileHandle;`. Perl core autoloads this, but PerlOnJava may not have `FileHandle.pm` in its module path or its autoload mechanism doesn't handle it. + +--- + +## Fix Plan + +### Phase 0: Fix glob() and MakeMaker (COMPLETED 2026-04-03) + +| Step | Description | File | Status | +|------|-------------|------|--------| +| 0.1 | Add recursive glob expansion for directory wildcards | `ScalarGlobOperator.java` | DONE | +| 0.2 | Extract and use `test => { TESTS => ... }` parameter | `ExtUtils/MakeMaker.pm` | DONE | +| 0.3 | Verify `glob("t/*/*.t")` returns 1152 files | - | DONE | +| 0.4 | Verify `make` passes | - | DONE | + +**Result**: `jcpan -t` now discovers and runs all 1247 test files instead of 0. + +### Phase 1: Implement missing Archive::Zip features (TODO) + +| Step | Description | File | Status | +|------|-------------|------|--------| +| 1.1 | Add `setErrorHandler` as package function accepting coderef | `ArchiveZip.java` + `Archive/Zip.pm` | TODO | +| 1.2 | Wire error handler into zip read/write operations | `ArchiveZip.java` | TODO | +| 1.3 | Verify `Archive::Zip::setErrorHandler(sub {})` works | - | TODO | +| 1.4 | Run `make` to verify unit tests pass | - | TODO | +| 1.5 | Re-run `jcpan -t` to get true regression test pass rate | - | TODO | + +**Expected result**: Unblocks all ~800+ regression tests, revealing true pass/fail rate. + +### Phase 2: Fix `\p{Emoticons}` regex support (TODO) + +| Step | Description | File | Status | +|------|-------------|------|--------| +| 2.1 | Map Perl `\p{Emoticons}` to Java `\p{InEmoticons}` in regex engine | `RuntimeRegex.java` or regex preprocessing | TODO | +| 2.2 | Verify regex `[^\w\.\p{Emoticons}]` compiles and matches | - | TODO | +| 2.3 | Run `make` to verify unit tests pass | - | TODO | + +**Expected result**: Fixes `t/chart/sub_add_series.t` and unblocks Emoticons-related regression tests. + +### Phase 3: Fix password hash bitwise operations (TODO) + +| Step | Description | File | Status | +|------|-------------|------|--------| +| 3.1 | Trace `_encode_password("password")` step-by-step in jperl vs perl | - | TODO | +| 3.2 | Identify and fix bitwise operation discrepancy | Runtime operator implementation | TODO | +| 3.3 | Run password encode test: `t/worksheet/worksheet_encode_password.t` | - | TODO | +| 3.4 | Run `make` to verify unit tests pass | - | TODO | + +**Expected result**: Fixes 3 subtests in `t/chartsheet/sub_write_sheet_protection.t`. + +### Phase 4: Fix '' vs undef and FileHandle issues (TODO) + +| Step | Description | File | Status | +|------|-------------|------|--------| +| 4.1 | Investigate '' vs undef in `sub_write_num_fmts.t` | - | TODO | +| 4.2 | Ensure `FileHandle->new()` works (add stub or fix autoloading) | - | TODO | +| 4.3 | Run `make` to verify unit tests pass | - | TODO | + +**Expected result**: Fixes 1 subtest + unblocks ~6 regression tests. + +## Summary + +| Phase | Complexity | Tests unblocked | Status | +|-------|-----------|----------------|--------| +| 0 | Medium (2 files) | ALL (1247 test files discovered) | COMPLETED | +| 1 | Simple-Medium | ~800+ regression tests | TODO | +| 2 | Simple | ~20+ tests | TODO | +| 3 | Medium | 3 subtests | TODO | +| 4 | Simple | ~7 tests | TODO | + +## Related Documents + +- `dev/modules/spreadsheet_parseexcel.md` -- similar module fix plan +- `dev/modules/makemaker_perlonjava.md` -- MakeMaker implementation details diff --git a/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java b/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java index 173661d05..fddf3e553 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java +++ b/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java @@ -582,6 +582,40 @@ else if (node.right instanceof BinaryOperatorNode rightCall) { } } + // Handle split specially: each argument (EXPR, LIMIT) should be in SCALAR context, + // but the result is assembled into a list for the SPLIT opcode. + // This ensures `split //, reverse $str` evaluates `reverse` in scalar context + // (string reverse) not list context (list reverse). + if (node.operator.equals("split")) { + bytecodeCompiler.compileNode(node.left, -1, RuntimeContextType.SCALAR); + int rs1 = bytecodeCompiler.lastResultReg; + + int rs2; + if (node.right instanceof ListNode listNode && !listNode.elements.isEmpty()) { + // Compile each element in SCALAR context, then assemble into a list + java.util.List argRegs = new java.util.ArrayList<>(); + for (Node element : listNode.elements) { + bytecodeCompiler.compileNode(element, -1, RuntimeContextType.SCALAR); + argRegs.add(bytecodeCompiler.lastResultReg); + } + rs2 = bytecodeCompiler.allocateRegister(); + bytecodeCompiler.emit(Opcodes.CREATE_LIST); + bytecodeCompiler.emitReg(rs2); + bytecodeCompiler.emit(argRegs.size()); + for (int argReg : argRegs) { + bytecodeCompiler.emitReg(argReg); + } + } else { + bytecodeCompiler.compileNode(node.right, -1, RuntimeContextType.SCALAR); + rs2 = bytecodeCompiler.lastResultReg; + } + + int rd = CompileBinaryOperatorHelper.compileBinaryOperatorSwitch( + bytecodeCompiler, node.operator, rs1, rs2, node.getIndex()); + bytecodeCompiler.lastResultReg = rd; + return; + } + // Compile left and right operands (for non-short-circuit operators). // For arithmetic/bitwise operators, force SCALAR context to prevent // parenthesized expressions from producing RuntimeList in LIST context. diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java b/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java index cc9468266..1eb799a92 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitOperator.java @@ -779,6 +779,9 @@ static void handleSubstr(EmitterVisitor emitterVisitor, BinaryOperatorNode node) // Handles the 'split' operator static void handleSplit(EmitterVisitor emitterVisitor, BinaryOperatorNode node) { // Accept the left operand in SCALAR context and the right operand in LIST context. + // IMPORTANT: split's EXPR argument (the string to split) must be evaluated in + // SCALAR context per Perl semantics. This matters for functions like `reverse` + // which behave differently in list vs scalar context. // Spill the left operand before evaluating the right side so non-local control flow // propagation can't jump to returnLabel with an extra value on the JVM operand stack. if (ENABLE_SPILL_BINARY_LHS) { @@ -792,7 +795,11 @@ static void handleSplit(EmitterVisitor emitterVisitor, BinaryOperatorNode node) } mv.visitVarInsn(Opcodes.ASTORE, leftSlot); - node.right.accept(emitterVisitor.with(RuntimeContextType.LIST)); + // Evaluate the right side (EXPR [, LIMIT]) - each argument in SCALAR context + // but produce a RuntimeList for the split runtime method. + // We use SCALAR context here because split's EXPR must be in scalar context + // (e.g., `reverse $str` should reverse the string, not reverse a list). + emitSplitArgs(emitterVisitor, node.right); mv.visitVarInsn(Opcodes.ALOAD, leftSlot); mv.visitInsn(Opcodes.SWAP); @@ -802,12 +809,58 @@ static void handleSplit(EmitterVisitor emitterVisitor, BinaryOperatorNode node) } } else { node.left.accept(emitterVisitor.with(RuntimeContextType.SCALAR)); - node.right.accept(emitterVisitor.with(RuntimeContextType.LIST)); + emitSplitArgs(emitterVisitor, node.right); } emitterVisitor.pushCallContext(); emitOperator(node, emitterVisitor); } + /** + * Emits split's argument list, evaluating each element in SCALAR context + * but producing a RuntimeList result. This ensures that expressions like + * `reverse $str` are evaluated in scalar context (string reverse) not + * list context (list reverse). + */ + private static void emitSplitArgs(EmitterVisitor emitterVisitor, Node argsNode) { + if (argsNode instanceof ListNode listNode && !listNode.elements.isEmpty()) { + EmitterVisitor scalarVisitor = emitterVisitor.with(RuntimeContextType.SCALAR); + MethodVisitor mv = emitterVisitor.ctx.mv; + + // Create a new RuntimeList and store in a spill slot + mv.visitTypeInsn(Opcodes.NEW, "org/perlonjava/runtime/runtimetypes/RuntimeList"); + mv.visitInsn(Opcodes.DUP); + mv.visitMethodInsn(Opcodes.INVOKESPECIAL, + "org/perlonjava/runtime/runtimetypes/RuntimeList", "", "()V", false); + + JavaClassInfo.SpillRef listRef = emitterVisitor.ctx.javaClassInfo.acquireSpillRefOrAllocate(emitterVisitor.ctx.symbolTable); + emitterVisitor.ctx.javaClassInfo.storeSpillRef(mv, listRef); + + // Add each argument evaluated in SCALAR context + for (Node element : listNode.elements) { + element.accept(scalarVisitor); + JavaClassInfo.SpillRef elemRef = emitterVisitor.ctx.javaClassInfo.acquireSpillRefOrAllocate(emitterVisitor.ctx.symbolTable); + emitterVisitor.ctx.javaClassInfo.storeSpillRef(mv, elemRef); + + emitterVisitor.ctx.javaClassInfo.loadSpillRef(mv, listRef); + emitterVisitor.ctx.javaClassInfo.loadSpillRef(mv, elemRef); + emitterVisitor.ctx.javaClassInfo.releaseSpillRef(elemRef); + + // RuntimeList.add(RuntimeBase) + mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, + "org/perlonjava/runtime/runtimetypes/RuntimeList", "add", + "(Lorg/perlonjava/runtime/runtimetypes/RuntimeBase;)V", + false); + } + + // Load the completed list onto the stack + emitterVisitor.ctx.javaClassInfo.loadSpillRef(mv, listRef); + emitterVisitor.ctx.javaClassInfo.releaseSpillRef(listRef); + } else { + // Fallback: evaluate as LIST (no elements or not a ListNode) + argsNode.accept(emitterVisitor.with(RuntimeContextType.LIST)); + } + } + // Handles the 'repeat' operator, which repeats a string or list a specified number of times. static void handleRepeat(EmitterVisitor emitterVisitor, BinaryOperatorNode node) { MethodVisitor mv = emitterVisitor.ctx.mv; diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 8c935d28d..f9e2aa1e6 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "c787eff3c"; + public static final String gitCommitId = "24b2781ed"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java b/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java index baa5c13e2..16c104bcb 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java +++ b/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java @@ -639,7 +639,31 @@ static int handleRegexCharacterClassEscape(int offset, String s, StringBuilder s sb.append(Character.toChars(c)); offset++; wasEscape = true; - if (offset < length && s.charAt(offset) == 'N') { + if (offset < length && (s.charAt(offset) == 'p' || s.charAt(offset) == 'P') + && offset + 1 < length && s.charAt(offset + 1) == '{') { + // Handle \p{...} and \P{...} inside character class + // Translate Perl Unicode property names to Java-compatible patterns + boolean pNegated = (s.charAt(offset) == 'P'); + int pEndBrace = s.indexOf('}', offset + 2); + if (pEndBrace != -1) { + String property = s.substring(offset + 2, pEndBrace).trim(); + try { + String translatedProperty = UnicodeResolver.translateUnicodeProperty(property, pNegated); + // Remove the backslash that was already appended + sb.setLength(sb.length() - 1); + // Append the translated property (e.g., a character class pattern from ICU4J) + sb.append(translatedProperty); + offset = pEndBrace; + } catch (IllegalArgumentException e) { + // If translation fails, pass through as-is and let Java handle it + // (works for standard Java properties like \p{L}, \p{IsAlphabetic}, etc.) + sb.append(Character.toChars(s.charAt(offset))); + } + } else { + sb.append(Character.toChars(s.charAt(offset))); + } + lastChar = -1; // Unicode properties can't be range endpoints + } else if (offset < length && s.charAt(offset) == 'N') { if (offset + 1 < length && s.charAt(offset + 1) == '{') { // Handle \N{...} constructs offset += 2; // Skip past \N{ diff --git a/src/main/java/org/perlonjava/runtime/regex/UnicodeResolver.java b/src/main/java/org/perlonjava/runtime/regex/UnicodeResolver.java index ec735859b..4f45f9389 100644 --- a/src/main/java/org/perlonjava/runtime/regex/UnicodeResolver.java +++ b/src/main/java/org/perlonjava/runtime/regex/UnicodeResolver.java @@ -419,7 +419,22 @@ private static String translateUnicodeProperty(String property, boolean negated, if (isBlockProperty(property)) { unicodeSet.applyPropertyAlias("Block", property); } else { - unicodeSet.applyPropertyAlias(property, ""); + try { + unicodeSet.applyPropertyAlias(property, ""); + } catch (IllegalArgumentException ex) { + // Property not found as general category/script - try as a Unicode block name. + // Perl resolves \p{Emoticons} as \p{Block=Emoticons}, etc. + try { + unicodeSet.applyPropertyAlias("Block", property); + } catch (IllegalArgumentException ex2) { + // Neither worked - try user-defined property before giving up + String userProp = tryUserDefinedProperty(property, recursionSet); + if (userProp != null) { + return wrapCharClass(userProp, negated); + } + throw ex; // rethrow original error + } + } } String pattern = unicodeSet.toPattern(false); diff --git a/src/main/perl/lib/Archive/Zip.pm b/src/main/perl/lib/Archive/Zip.pm index b47474810..b56adc47d 100644 --- a/src/main/perl/lib/Archive/Zip.pm +++ b/src/main/perl/lib/Archive/Zip.pm @@ -26,6 +26,17 @@ our %EXPORT_TAGS = ( )], ); +# Error handling (matches CPAN Archive::Zip API) +our $ErrorHandler = \&Carp::carp; + +sub setErrorHandler { + my $errorHandler = (ref($_[0]) eq 'HASH') ? shift->{subroutine} : shift; + $errorHandler = \&Carp::carp unless defined($errorHandler); + my $oldErrorHandler = $Archive::Zip::ErrorHandler; + $Archive::Zip::ErrorHandler = $errorHandler; + return $oldErrorHandler; +} + # For Archive::Zip::Member methods - inherit from Archive::Zip # This allows member objects to use the same Java methods package Archive::Zip::Member; From b5d9df8a8040cd6fe4698774452bd01835c02d79 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 3 Apr 2026 16:14:47 +0200 Subject: [PATCH 3/9] Add FileHandle loading to bundled Archive::Zip for module compatibility The CPAN Archive::Zip (line 12) does `use FileHandle ()`, which makes the FileHandle class available to downstream modules. PerlOnJava's custom Java-backed Archive::Zip omitted this, causing 133 test failures in Excel::Writer::XLSX where Utility.pm calls FileHandle->new() without an explicit `use FileHandle`. Excel::Writer::XLSX results: 1243/1247 programs pass (99.7%), 5110/5115 subtests pass. Only 4 minor failures remain ('' vs undef and Emoticons Unicode quoting). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- src/main/java/org/perlonjava/core/Configuration.java | 2 +- src/main/perl/lib/Archive/Zip.pm | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index f9e2aa1e6..f08ba2136 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "24b2781ed"; + public static final String gitCommitId = "b7047a444"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/perl/lib/Archive/Zip.pm b/src/main/perl/lib/Archive/Zip.pm index b56adc47d..10cc3292b 100644 --- a/src/main/perl/lib/Archive/Zip.pm +++ b/src/main/perl/lib/Archive/Zip.pm @@ -5,6 +5,10 @@ use warnings; our $VERSION = '1.68'; +# Load FileHandle for compatibility with modules that expect Archive::Zip +# to make FileHandle available (as the CPAN version does on line 12) +use FileHandle (); + # Load Java implementation use XSLoader; XSLoader::load('Archive::Zip', $VERSION); From b1ab44d9830e5d89e90edcb19d2ed5695abbb231 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 3 Apr 2026 16:19:55 +0200 Subject: [PATCH 4/9] Fix regex \w \d \s to use Unicode semantics for UTF-8 strings In Perl, when a string has the UTF-8 flag set, \w, \d, \s use Unicode semantics even for characters in the Latin-1 range (e.g., e-acute U+00E9 should match \w). Previously, PerlOnJava only used the Unicode regex pattern for strings containing characters > U+00FF, causing Latin-1 accented characters like e-acute to fail \w matching. Fix: use the Unicode-aware compiled pattern whenever the input string has the UTF-8 flag, not just when it contains chars above U+00FF. This fixes quote_sheetname.t tests 15-16 in Excel::Writer::XLSX. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 2 +- .../runtime/regex/RuntimeRegex.java | 21 +++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index f08ba2136..a64630611 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "b7047a444"; + public static final String gitCommitId = "b6cef199e"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java index 86a7ea4b0..e22d6f2ad 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java +++ b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java @@ -554,12 +554,11 @@ private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeSc Pattern pattern = regex.pattern; String inputStr = string.toString(); - // Select appropriate pattern based on string's UTF-8 flag and content: + // Select appropriate pattern based on string's UTF-8 flag: // - /a flag or inline (?a): always use ASCII-only pattern // - BYTE_STRING: use ASCII-only pattern (Perl's "bytes" semantics) - // - UTF-8 string with Unicode chars (> 255): use Unicode pattern - // - UTF-8 string with only Latin-1 chars: use ASCII pattern (avoids false matches) - // This mimics Perl's behavior where \w, \d, \s semantics depend on UTF-8 flag + // - UTF-8 string: use Unicode pattern (Perl uses Unicode semantics for \w, \d, \s + // whenever the string has the UTF-8 flag, even for Latin-1 characters like é) if (regex.patternUnicode != null && regex.patternUnicode != regex.pattern) { if (regex.regexFlags != null && regex.regexFlags.isAscii()) { // /a flag - always ASCII @@ -567,11 +566,11 @@ private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeSc } else if (hasInlineAsciiModifier(regex.patternString)) { // Inline (?a...) in pattern - use ASCII to be safe pattern = regex.pattern; - } else if (Utf8.isUtf8(string) && RuntimePosLvalue.hasUnicodeChars(string, inputStr)) { - // UTF-8 string with true Unicode content (> 255) - use Unicode matching + } else if (Utf8.isUtf8(string)) { + // UTF-8 string - use Unicode matching for \w, \d, \s semantics pattern = regex.patternUnicode; } - // else: BYTE_STRING or Latin-1 only content - keep ASCII pattern (default) + // else: BYTE_STRING - keep ASCII pattern (default) } // Workaround for Java MULTILINE quirk: Java's Pattern.MULTILINE changes ^ to only @@ -939,7 +938,7 @@ public static RuntimeBase replaceRegex(RuntimeScalar quotedRegex, RuntimeScalar Pattern pattern = regex.pattern; - // Select appropriate pattern based on string's UTF-8 flag and content (same logic as matchRegex) + // Select appropriate pattern based on string's UTF-8 flag (same logic as matchRegex) if (regex.patternUnicode != null && regex.patternUnicode != regex.pattern) { if (regex.regexFlags != null && regex.regexFlags.isAscii()) { // /a flag - always ASCII @@ -947,11 +946,11 @@ public static RuntimeBase replaceRegex(RuntimeScalar quotedRegex, RuntimeScalar } else if (hasInlineAsciiModifier(regex.patternString)) { // Inline (?a...) in pattern - use ASCII to be safe pattern = regex.pattern; - } else if (Utf8.isUtf8(string) && RuntimePosLvalue.hasUnicodeChars(string, inputStr)) { - // UTF-8 string with true Unicode content (> 255) - use Unicode matching + } else if (Utf8.isUtf8(string)) { + // UTF-8 string - use Unicode matching for \w, \d, \s semantics pattern = regex.patternUnicode; } - // else: BYTE_STRING or Latin-1 only content - keep ASCII pattern (default) + // else: BYTE_STRING - keep ASCII pattern (default) } // Workaround for Java MULTILINE quirk (same as matchRegexDirect) From 30340d0d99501be52d8ddd6775f8ee96616aef51 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 3 Apr 2026 16:20:58 +0200 Subject: [PATCH 5/9] Update Excel::Writer::XLSX tracking doc with completed phases and remaining work Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/modules/excel_writer_xlsx.md | 194 +++++++++++++------------------ 1 file changed, 84 insertions(+), 110 deletions(-) diff --git a/dev/modules/excel_writer_xlsx.md b/dev/modules/excel_writer_xlsx.md index 9bf641401..0316b1343 100644 --- a/dev/modules/excel_writer_xlsx.md +++ b/dev/modules/excel_writer_xlsx.md @@ -4,119 +4,73 @@ **Module**: Excel::Writer::XLSX 1.15 **Test command**: `./jcpan -j 8 -t Excel::Writer::XLSX` -**Status**: WIP -- ~156/503 test files pass (run incomplete, ~744 tests not yet reached) +**Status**: 1243/1247 programs pass (99.7%), 5110/5115 subtests pass (99.9%) ## Dependency Tree | Dependency | Status | Notes | |-----------|--------|-------| -| **Archive::Zip** >= 1.30 | PARTIAL | Java-backed impl via XSLoader; missing `setErrorHandler` and possibly other methods | +| **Archive::Zip** >= 1.30 | PASS | Java-backed impl; `setErrorHandler` stub added, `use FileHandle ()` added | | **File::Temp** >= 0.19 | PASS | Core module, works | | **IO::File** >= 1.14 | PASS | Core module, works | ## Test Results Summary -### Current Status: ~156/503 visible test files pass (incomplete run) +### Current Status: 1243/1247 programs pass, 5110/5115 subtests pass Tests are in subdirectories: `t/chart/`, `t/chartsheet/`, `t/drawing/`, `t/package/`, `t/regression/`, `t/utility/`, `t/workbook/`, `t/worksheet/` | Test Group | Total | Pass | Fail | Notes | |-----------|-------|------|------|-------| -| t/chart/ | ~40 | ~39 | 1 | `sub_add_series.t` fails (Emoticons regex) | -| t/chartsheet/ | ~4 | 3 | 1 | `sub_write_sheet_protection.t` (password hash) | -| t/drawing/ | ~18 | 18 | 0 | All pass | -| t/package/ | ~50 | ~49 | 1 | `styles/sub_write_num_fmts.t` ('' vs undef) | -| t/regression/ | ~800+ | 0 | ALL | Blocked by `Archive::Zip::setErrorHandler` | -| t/utility/ | ? | ? | ? | Not yet reached in run | -| t/workbook/ | ? | ? | ? | Not yet reached in run | -| t/worksheet/ | ? | ? | ? | Not yet reached in run | +| t/chart/ | ~47 | 47 | 0 | All pass | +| t/chartsheet/ | 4 | 4 | 0 | All pass (password tests fixed) | +| t/drawing/ | ~23 | 23 | 0 | All pass | +| t/package/ | ~50 | 49 | 1 | `styles/sub_write_num_fmts.t` ('' vs undef) | +| t/regression/ | ~800+ | ~800+ | 0 | All pass | +| t/utility/ | ~15 | 15 | 0 | All pass (quote_sheetname fixed) | +| t/workbook/ | ~18 | 18 | 0 | All pass | +| t/worksheet/ | ~90 | 88 | 2 | `sub_write_page_setup.t`, `sub_write_print_options.t` ('' vs undef) | --- -## Error Categories +## Remaining Failures (4 programs, 5 subtests) -### 1. `Undefined subroutine &Archive::Zip::setErrorHandler` (P0 -- blocks ~800+ tests) +### 1. Empty string `''` vs `undef` return value (3 test files, 3 subtests) -**Affected tests**: All `t/regression/*.t` files -**Error**: -``` -Undefined subroutine &Archive::Zip::setErrorHandler called at t/regression/....t line NN. -``` - -**Root Cause**: PerlOnJava's Archive::Zip is a Java-backed implementation (`ArchiveZip.java`) that doesn't implement `setErrorHandler`. The test infrastructure (`t/lib/TestFunctions.pm` line 185) calls `Archive::Zip::setErrorHandler( sub { } )` to suppress error output during ZIP comparison. This is a package-level function, not a method. - -**Usage in test code**: -```perl -# t/lib/TestFunctions.pm:185 -Archive::Zip::setErrorHandler( sub { } ); -``` +**Affected tests**: +- `t/package/styles/sub_write_num_fmts.t` (1/2 fail) +- `t/worksheet/sub_write_page_setup.t` (1/6 fail) +- `t/worksheet/sub_write_print_options.t` (1/8 fail) ---- - -### 2. `\p{Emoticons}` Unicode property not supported (P1 -- ~20+ tests) - -**Affected tests**: `t/chart/sub_add_series.t` (3/5 fail), plus many regression tests (masked by P0) -**Error**: +**Error pattern**: ``` -Regex compilation failed: Unknown character property name {Emoticons} near index 18 -[^\w\.\p{Emoticons}] +# got: '' +# expected: undef ``` -**Root Cause**: Java's `java.util.regex` uses `\p{InEmoticons}` for the Unicode Emoticons block (U+1F600-U+1F64F), while Perl uses `\p{Emoticons}`. PerlOnJava's regex engine needs to map the Perl property name to the Java equivalent. +**Root Cause**: XML writer methods return `''` (empty string) instead of `undef` when there is nothing to write. In Perl, `''` and `undef` are different values - `undef` means "no value" while `''` means "empty string value". The test uses `is()` which distinguishes them. -**Usage in module**: -```perl -# lib/Excel/Writer/XLSX/Utility.pm:237,242 -if ( $sheetname =~ /[^\w\.\p{Emoticons}]/ ) { ... } -elsif ( $sheetname =~ /^[\d\.\p{Emoticons}]/ ) { ... } -``` +**Investigation needed**: Trace the specific XML writer method being called (e.g., `_write_num_fmts()`, `_write_page_setup()`, `_write_print_options()`) and find where it returns `''` instead of `undef`. This is likely a PerlOnJava parity issue in: +- How subroutines return values when no explicit `return` is used +- How `$self->{_writer}->xml_data_element()` or similar XMLwriter methods behave when called with no data to write +- How the `_write_*` methods short-circuit when there are no elements to emit ---- +**Fix approach**: +1. Run the failing test in Perl vs jperl and compare output +2. Add debug prints to the `_write_num_fmts` sub to trace where `''` vs `undef` diverges +3. Fix the runtime behavior or the specific method -### 3. Password hash produces wrong result (P3 -- 1 test file, 3 subtests) +### 2. Emoticons Unicode quoting in `quote_sheetname` (FIXED) -**Affected test**: `t/chartsheet/sub_write_sheet_protection.t` (3/7 fail) -**Error**: -``` -got: '' -expected: '' -``` +**Previously affected test**: `t/utility/quote_sheetname.t` (2/100 failed) -**Root Cause**: The `_encode_password` method in `Worksheet.pm` uses bitwise operations (`>>`, `<<`, `&`, `|`, `^`) to compute a 15-bit hash. PerlOnJava likely has an operator precedence or integer arithmetic difference in the expression: -```perl -$hash = ( ( $hash >> 14 ) & 0x01 ) | ( ( $hash << 1 ) & 0x7fff ); -``` -Need to trace intermediate values to isolate the discrepancy. - ---- - -### 4. Empty string vs `undef` return value (P4 -- 1 test file, 1 subtest) - -**Affected test**: `t/package/styles/sub_write_num_fmts.t` (1/2 fail) -**Error**: -``` -got: '' -expected: undef -``` +**Was**: `\w` in regex didn't match Latin-1 accented characters (like `é`) when the string had the UTF-8 flag set. PerlOnJava only used the Unicode-aware regex pattern for strings containing characters > U+00FF. -**Root Cause**: A function returns `''` instead of `undef`. Likely a PerlOnJava parity issue in how empty/undefined values are returned from XML writer methods. +**Fixed in commit 3f85c7cd3**: Changed regex pattern selection to use Unicode semantics whenever the input string has the UTF-8 flag, matching Perl's behavior. --- -### 5. `FileHandle->new()` not found (P2 -- ~6 tests, masked by P0) - -**Affected tests**: `t/regression/autofit13.t`, `t/regression/background03.t` through `background07.t` -**Error**: -``` -Can't locate object method "new" via package "FileHandle" - at .../Excel/Writer/XLSX/Workbook.pm line 1860. -``` - -**Root Cause**: `Workbook.pm` calls `FileHandle->new(...)` without `use FileHandle;`. Perl core autoloads this, but PerlOnJava may not have `FileHandle.pm` in its module path or its autoload mechanism doesn't handle it. - ---- - -## Fix Plan +## Completed Phases ### Phase 0: Fix glob() and MakeMaker (COMPLETED 2026-04-03) @@ -129,58 +83,78 @@ Can't locate object method "new" via package "FileHandle" **Result**: `jcpan -t` now discovers and runs all 1247 test files instead of 0. -### Phase 1: Implement missing Archive::Zip features (TODO) +### Phase 1: Implement missing Archive::Zip features (COMPLETED 2026-04-03) + +| Step | Description | File | Status | +|------|-------------|------|--------| +| 1.1 | Add `setErrorHandler` as stub function | `Archive/Zip.pm` | DONE | +| 1.2 | Add `use FileHandle ()` to match CPAN Archive::Zip | `Archive/Zip.pm` | DONE | +| 1.3 | Verify regression tests unblocked | - | DONE | + +**Result**: Unblocked all ~800+ regression tests + 133 image/hyperlink tests that needed FileHandle. + +### Phase 2: Fix `\p{Emoticons}` regex support (COMPLETED 2026-04-03) | Step | Description | File | Status | |------|-------------|------|--------| -| 1.1 | Add `setErrorHandler` as package function accepting coderef | `ArchiveZip.java` + `Archive/Zip.pm` | TODO | -| 1.2 | Wire error handler into zip read/write operations | `ArchiveZip.java` | TODO | -| 1.3 | Verify `Archive::Zip::setErrorHandler(sub {})` works | - | TODO | -| 1.4 | Run `make` to verify unit tests pass | - | TODO | -| 1.5 | Re-run `jcpan -t` to get true regression test pass rate | - | TODO | +| 2.1 | Add `\p{}`/`\P{}` translation inside character classes | `RegexPreprocessorHelper.java` | DONE | +| 2.2 | Add Emoticons Unicode block mapping | `UnicodeResolver.java` | DONE | +| 2.3 | Verify regex `[^\w\.\p{Emoticons}]` compiles and matches | - | DONE | -**Expected result**: Unblocks all ~800+ regression tests, revealing true pass/fail rate. +**Result**: Emoticons regex property works in patterns and character classes. -### Phase 2: Fix `\p{Emoticons}` regex support (TODO) +### Phase 3: Fix split scalar context for password hash (COMPLETED 2026-04-03) | Step | Description | File | Status | |------|-------------|------|--------| -| 2.1 | Map Perl `\p{Emoticons}` to Java `\p{InEmoticons}` in regex engine | `RuntimeRegex.java` or regex preprocessing | TODO | -| 2.2 | Verify regex `[^\w\.\p{Emoticons}]` compiles and matches | - | TODO | -| 2.3 | Run `make` to verify unit tests pass | - | TODO | +| 3.1 | Fix JVM backend: `emitSplitArgs` with SCALAR context | `EmitOperator.java` | DONE | +| 3.2 | Fix interpreter backend: split special case | `CompileBinaryOperator.java` | DONE | +| 3.3 | Verify `split //, reverse $str` matches Perl | - | DONE | -**Expected result**: Fixes `t/chart/sub_add_series.t` and unblocks Emoticons-related regression tests. +**Result**: Password hash tests pass. `split //, reverse $str` correctly reverses the string. -### Phase 3: Fix password hash bitwise operations (TODO) +### Phase 4: Fix regex Unicode semantics for UTF-8 Latin-1 strings (COMPLETED 2026-04-03) | Step | Description | File | Status | |------|-------------|------|--------| -| 3.1 | Trace `_encode_password("password")` step-by-step in jperl vs perl | - | TODO | -| 3.2 | Identify and fix bitwise operation discrepancy | Runtime operator implementation | TODO | -| 3.3 | Run password encode test: `t/worksheet/worksheet_encode_password.t` | - | TODO | -| 3.4 | Run `make` to verify unit tests pass | - | TODO | +| 4.1 | Use Unicode regex pattern for all UTF-8 strings (not just >U+00FF) | `RuntimeRegex.java` | DONE | +| 4.2 | Verify `\w` matches `é` with `use utf8` | - | DONE | +| 4.3 | Run uni/ tests to verify no regressions | - | DONE | -**Expected result**: Fixes 3 subtests in `t/chartsheet/sub_write_sheet_protection.t`. +**Result**: `quote_sheetname.t` all 100 tests pass. Latin-1 accented chars match `\w` when UTF-8 flagged. + +--- -### Phase 4: Fix '' vs undef and FileHandle issues (TODO) +## Open Phase: Fix '' vs undef return value (TODO) | Step | Description | File | Status | |------|-------------|------|--------| -| 4.1 | Investigate '' vs undef in `sub_write_num_fmts.t` | - | TODO | -| 4.2 | Ensure `FileHandle->new()` works (add stub or fix autoloading) | - | TODO | -| 4.3 | Run `make` to verify unit tests pass | - | TODO | +| 5.1 | Reproduce in jperl: run `sub_write_num_fmts.t` and trace return value | - | TODO | +| 5.2 | Compare jperl vs perl: what does the `_write_*` method return? | - | TODO | +| 5.3 | Identify PerlOnJava runtime parity issue (return value semantics) | - | TODO | +| 5.4 | Fix the runtime or codegen to return `undef` instead of `''` | - | TODO | +| 5.5 | Verify all 3 affected tests pass | - | TODO | +| 5.6 | Run `make` to verify unit tests pass | - | TODO | -**Expected result**: Fixes 1 subtest + unblocks ~6 regression tests. +**Expected result**: Fixes the last 3 failing test programs (5 subtests), achieving 100% pass rate. + +--- ## Summary -| Phase | Complexity | Tests unblocked | Status | -|-------|-----------|----------------|--------| -| 0 | Medium (2 files) | ALL (1247 test files discovered) | COMPLETED | -| 1 | Simple-Medium | ~800+ regression tests | TODO | -| 2 | Simple | ~20+ tests | TODO | -| 3 | Medium | 3 subtests | TODO | -| 4 | Simple | ~7 tests | TODO | +| Phase | Description | Tests fixed | Status | +|-------|-----------|------------|--------| +| 0 | glob() + MakeMaker | ALL (1247 discovered) | COMPLETED | +| 1 | Archive::Zip (setErrorHandler + FileHandle) | ~933 programs | COMPLETED | +| 2 | `\p{Emoticons}` regex | ~20+ tests | COMPLETED | +| 3 | split scalar context (password hash) | 3 subtests | COMPLETED | +| 4 | regex Unicode semantics (Latin-1 \w) | 2 subtests | COMPLETED | +| 5 | '' vs undef return value | 3 programs, 5 subtests | TODO | + +## Branch & PR + +- Branch: `fix/glob-directory-wildcards` +- PR: https://github.com/fglock/PerlOnJava/pull/430 ## Related Documents From 95d2a6720150a07c77b4076973decbb5ca92c784 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 3 Apr 2026 16:46:11 +0200 Subject: [PATCH 6/9] Fix open '>' on scalar ref to preserve undef until first write In Perl, `open my $fh, '>', \$scalar` where $scalar is undef keeps it undef until something is actually written. PerlOnJava was eagerly setting it to '' on open, which caused Test::More::is() failures when comparing expected undef with actual ''. This fixes the last 5 failing subtests in Excel::Writer::XLSX, bringing it to 1247/1247 programs (5115/5115 subtests) passing. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- src/main/java/org/perlonjava/core/Configuration.java | 2 +- .../java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index a64630611..bcf8bd250 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "b6cef199e"; + public static final String gitCommitId = "26272cc66"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java index b20b12ce0..7e65d2ef2 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java @@ -538,7 +538,11 @@ public static RuntimeIO open(RuntimeScalar scalarRef, String mode) { try { if (mode.equals(">")) { // Truncate for write mode - this will throw if read-only - targetScalar.set(""); + // Match Perl behavior: if scalar was undef, keep it undef; + // if it was defined, truncate to empty string + if (targetScalar.getDefinedBoolean()) { + targetScalar.set(""); + } } else if (mode.equals(">>")) { // For append mode, test if scalar is writable by setting it to itself targetScalar.set(targetScalar.toString()); From 11f6452fc8bafa03e220800e0ae18e8fd831fc20 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 3 Apr 2026 16:46:40 +0200 Subject: [PATCH 7/9] Update Excel::Writer::XLSX tracking doc: 100% pass rate achieved All 1247 programs and 5115 subtests now pass. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/modules/excel_writer_xlsx.md | 65 +++++++------------------------- 1 file changed, 13 insertions(+), 52 deletions(-) diff --git a/dev/modules/excel_writer_xlsx.md b/dev/modules/excel_writer_xlsx.md index 0316b1343..bf5816ae3 100644 --- a/dev/modules/excel_writer_xlsx.md +++ b/dev/modules/excel_writer_xlsx.md @@ -4,7 +4,7 @@ **Module**: Excel::Writer::XLSX 1.15 **Test command**: `./jcpan -j 8 -t Excel::Writer::XLSX` -**Status**: 1243/1247 programs pass (99.7%), 5110/5115 subtests pass (99.9%) +**Status**: 1247/1247 programs pass (100%), 5115/5115 subtests pass (100%) ## Dependency Tree @@ -16,7 +16,7 @@ ## Test Results Summary -### Current Status: 1243/1247 programs pass, 5110/5115 subtests pass +### Current Status: 1247/1247 programs pass, 5115/5115 subtests pass (100%) Tests are in subdirectories: `t/chart/`, `t/chartsheet/`, `t/drawing/`, `t/package/`, `t/regression/`, `t/utility/`, `t/workbook/`, `t/worksheet/` @@ -25,48 +25,11 @@ Tests are in subdirectories: `t/chart/`, `t/chartsheet/`, `t/drawing/`, `t/packa | t/chart/ | ~47 | 47 | 0 | All pass | | t/chartsheet/ | 4 | 4 | 0 | All pass (password tests fixed) | | t/drawing/ | ~23 | 23 | 0 | All pass | -| t/package/ | ~50 | 49 | 1 | `styles/sub_write_num_fmts.t` ('' vs undef) | +| t/package/ | ~50 | 50 | 0 | All pass | | t/regression/ | ~800+ | ~800+ | 0 | All pass | | t/utility/ | ~15 | 15 | 0 | All pass (quote_sheetname fixed) | | t/workbook/ | ~18 | 18 | 0 | All pass | -| t/worksheet/ | ~90 | 88 | 2 | `sub_write_page_setup.t`, `sub_write_print_options.t` ('' vs undef) | - ---- - -## Remaining Failures (4 programs, 5 subtests) - -### 1. Empty string `''` vs `undef` return value (3 test files, 3 subtests) - -**Affected tests**: -- `t/package/styles/sub_write_num_fmts.t` (1/2 fail) -- `t/worksheet/sub_write_page_setup.t` (1/6 fail) -- `t/worksheet/sub_write_print_options.t` (1/8 fail) - -**Error pattern**: -``` -# got: '' -# expected: undef -``` - -**Root Cause**: XML writer methods return `''` (empty string) instead of `undef` when there is nothing to write. In Perl, `''` and `undef` are different values - `undef` means "no value" while `''` means "empty string value". The test uses `is()` which distinguishes them. - -**Investigation needed**: Trace the specific XML writer method being called (e.g., `_write_num_fmts()`, `_write_page_setup()`, `_write_print_options()`) and find where it returns `''` instead of `undef`. This is likely a PerlOnJava parity issue in: -- How subroutines return values when no explicit `return` is used -- How `$self->{_writer}->xml_data_element()` or similar XMLwriter methods behave when called with no data to write -- How the `_write_*` methods short-circuit when there are no elements to emit - -**Fix approach**: -1. Run the failing test in Perl vs jperl and compare output -2. Add debug prints to the `_write_num_fmts` sub to trace where `''` vs `undef` diverges -3. Fix the runtime behavior or the specific method - -### 2. Emoticons Unicode quoting in `quote_sheetname` (FIXED) - -**Previously affected test**: `t/utility/quote_sheetname.t` (2/100 failed) - -**Was**: `\w` in regex didn't match Latin-1 accented characters (like `é`) when the string had the UTF-8 flag set. PerlOnJava only used the Unicode-aware regex pattern for strings containing characters > U+00FF. - -**Fixed in commit 3f85c7cd3**: Changed regex pattern selection to use Unicode semantics whenever the input string has the UTF-8 flag, matching Perl's behavior. +| t/worksheet/ | ~90 | 90 | 0 | All pass | --- @@ -123,20 +86,18 @@ Tests are in subdirectories: `t/chart/`, `t/chartsheet/`, `t/drawing/`, `t/packa **Result**: `quote_sheetname.t` all 100 tests pass. Latin-1 accented chars match `\w` when UTF-8 flagged. ---- - -## Open Phase: Fix '' vs undef return value (TODO) +### Phase 5: Fix `open '>' \$scalar` to preserve undef (COMPLETED 2026-04-03) | Step | Description | File | Status | |------|-------------|------|--------| -| 5.1 | Reproduce in jperl: run `sub_write_num_fmts.t` and trace return value | - | TODO | -| 5.2 | Compare jperl vs perl: what does the `_write_*` method return? | - | TODO | -| 5.3 | Identify PerlOnJava runtime parity issue (return value semantics) | - | TODO | -| 5.4 | Fix the runtime or codegen to return `undef` instead of `''` | - | TODO | -| 5.5 | Verify all 3 affected tests pass | - | TODO | -| 5.6 | Run `make` to verify unit tests pass | - | TODO | +| 5.1 | Identify root cause: `open '>', \$scalar` sets undef to `''` | `RuntimeIO.java` | DONE | +| 5.2 | Fix: only truncate if scalar was already defined | `RuntimeIO.java` | DONE | +| 5.3 | Verify `open '>', \$undef_var` keeps undef (matches Perl) | - | DONE | +| 5.4 | Verify `open '>', \$defined_var` truncates to `''` (matches Perl) | - | DONE | +| 5.5 | Run `make` to verify unit tests pass | - | DONE | +| 5.6 | Run full Excel::Writer::XLSX test suite | - | DONE | -**Expected result**: Fixes the last 3 failing test programs (5 subtests), achieving 100% pass rate. +**Result**: All 1247 programs pass, all 5115 subtests pass. 100% pass rate achieved. --- @@ -149,7 +110,7 @@ Tests are in subdirectories: `t/chart/`, `t/chartsheet/`, `t/drawing/`, `t/packa | 2 | `\p{Emoticons}` regex | ~20+ tests | COMPLETED | | 3 | split scalar context (password hash) | 3 subtests | COMPLETED | | 4 | regex Unicode semantics (Latin-1 \w) | 2 subtests | COMPLETED | -| 5 | '' vs undef return value | 3 programs, 5 subtests | TODO | +| 5 | `open '>' \$scalar` undef preservation | 3 programs, 5 subtests | COMPLETED | ## Branch & PR From 57d1bc086c94763cd28278a3a769b88251474f4e Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 3 Apr 2026 17:32:08 +0200 Subject: [PATCH 8/9] Fix regressions: io/scalar read-only, [:punct:] Unicode, hash key UTF-8 flag Three fixes for regressions introduced by recent commits: 1. RuntimeIO: open '>' on read-only undef scalars (like \undef) now correctly fails. The undef preservation fix skipped the set("") call which also served as a read-only check. Now sets undef-to-undef to trigger the check without changing the value. 2. CharacterClassMapper: [:punct:] now maps to [\p{P}\p{S}] instead of \p{Punct}. Java's UNICODE_CHARACTER_CLASS flag changes \p{Punct} to only match Unicode Punctuation category, excluding ASCII symbols like +, <, =, >, $. Using \p{P}\p{S} covers both Punctuation and Symbol categories, matching Perl's behavior. Net improvement: +24 tests on re/charset.t. 3. RuntimeHash/RuntimeHashProxyEntry: Track hash key byte/UTF-8 flags. In Perl, hash keys preserve their byte/UTF-8 type which affects regex \w matching semantics. Added byteKeys Set to RuntimeHash, updated keys(), iterator, get(), delete(), and state management to preserve key types. Fixes 2 regressions in op/utfhash.t. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 2 +- .../runtime/regex/CharacterClassMapper.java | 4 +- .../runtime/runtimetypes/RuntimeHash.java | 61 ++++++++++++++++--- .../runtimetypes/RuntimeHashProxyEntry.java | 16 +++++ .../runtime/runtimetypes/RuntimeIO.java | 3 + 5 files changed, 76 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index bcf8bd250..1af393abd 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "26272cc66"; + public static final String gitCommitId = "c3fe2ffe9"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/runtime/regex/CharacterClassMapper.java b/src/main/java/org/perlonjava/runtime/regex/CharacterClassMapper.java index 6564738f3..ad67bee08 100644 --- a/src/main/java/org/perlonjava/runtime/regex/CharacterClassMapper.java +++ b/src/main/java/org/perlonjava/runtime/regex/CharacterClassMapper.java @@ -28,8 +28,8 @@ public class CharacterClassMapper { {"[:^lower:]", "\\P{Lower}"}, {"[:print:]", "\\p{Print}"}, {"[:^print:]", "\\P{Print}"}, - {"[:punct:]", "\\p{Punct}"}, - {"[:^punct:]", "\\P{Punct}"}, + {"[:punct:]", "\\p{P}\\p{S}"}, + {"[:^punct:]", "[^\\p{P}\\p{S}]"}, {"[:space:]", "\\p{Space}"}, {"[:^space:]", "\\P{Space}"}, {"[:upper:]", "\\p{Upper}"}, diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHash.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHash.java index 02934cdd2..2042f55f3 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHash.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHash.java @@ -5,8 +5,7 @@ import java.util.*; import static org.perlonjava.runtime.runtimetypes.RuntimeScalarCache.scalarFalse; -import static org.perlonjava.runtime.runtimetypes.RuntimeScalarType.HASHREFERENCE; -import static org.perlonjava.runtime.runtimetypes.RuntimeScalarType.TIED_SCALAR; +import static org.perlonjava.runtime.runtimetypes.RuntimeScalarType.*; /** * The RuntimeHash class simulates Perl hashes. @@ -33,6 +32,10 @@ public class RuntimeHash extends RuntimeBase implements RuntimeScalarReference, public Map elements; // Iterator for traversing the hash elements Iterator hashIterator; + // Track which keys were stored with BYTE_STRING type (vs STRING/UTF-8). + // In Perl, hash keys preserve their byte/UTF-8 flag, which affects regex matching semantics. + // Lazily initialized to avoid overhead when key type tracking is not needed. + Set byteKeys; /** * Constructor for RuntimeHash. @@ -210,11 +213,17 @@ public RuntimeArray setFromList(RuntimeList value) { // Clear existing elements but keep the same Map instance to preserve capacity this.elements.clear(); + if (this.byteKeys != null) this.byteKeys.clear(); // Populate the hash from the materialized list iterator = materializedList.iterator(); while (iterator.hasNext()) { - String key = iterator.next().toString(); + RuntimeScalar keyScalar = iterator.next(); + String key = keyScalar.toString(); + // Track the key's byte/UTF-8 type for correct semantics in keys() + if (keyScalar.type == BYTE_STRING) { + markKeyByte(key, true); + } // Create a new RuntimeScalar to properly handle aliasing and avoid read-only issues RuntimeScalar val = iterator.hasNext() ? new RuntimeScalar(iterator.next()) : new RuntimeScalar(); this.elements.put(key, val); @@ -281,6 +290,34 @@ public void put(String key, RuntimeScalar value) { } } + /** + * Tracks the byte/UTF-8 flag of a hash key. + * In Perl, hash keys preserve their byte/UTF-8 flag, which affects regex matching semantics. + * + * @param key The hash key string + * @param isByte true if the key was stored as BYTE_STRING, false for STRING (UTF-8) + */ + public void markKeyByte(String key, boolean isByte) { + if (isByte) { + if (byteKeys == null) byteKeys = new HashSet<>(); + byteKeys.add(key); + } else if (byteKeys != null) { + byteKeys.remove(key); + } + } + + /** + * Creates a RuntimeScalar for a hash key with the correct type (STRING or BYTE_STRING). + */ + RuntimeScalar createKeyScalar(String key) { + if (byteKeys != null && byteKeys.contains(key)) { + RuntimeScalar scalar = new RuntimeScalar(key); + scalar.type = BYTE_STRING; + return scalar; + } + return new RuntimeScalar(key); // default STRING type + } + /** * Retrieves a value by key. * @@ -314,10 +351,14 @@ public RuntimeScalar get(RuntimeScalar keyScalar) { String key = keyScalar.toString(); var value = elements.get(key); if (value != null) { + // Update the key's byte/UTF-8 flag to match the accessing key's type. + // In Perl, the key's UTF-8 flag is updated on each access. + markKeyByte(key, keyScalar.type == BYTE_STRING); yield value; } - // Lazy element autovivification - yield new RuntimeHashProxyEntry(this, key); + // Lazy element autovivification - pass key's byte flag for type tracking + boolean isByteKey = keyScalar.type == BYTE_STRING; + yield new RuntimeHashProxyEntry(this, key, isByteKey); } case TIED_HASH -> { @@ -370,6 +411,7 @@ public RuntimeScalar delete(RuntimeScalar key) { case PLAIN_HASH -> { String k = key.toString(); var value = elements.remove(k); + if (byteKeys != null) byteKeys.remove(k); if (value != null) { yield new RuntimeScalar(value); } @@ -388,6 +430,7 @@ public RuntimeScalar delete(String key) { return switch (type) { case PLAIN_HASH -> { var value = elements.remove(key); + if (byteKeys != null) byteKeys.remove(key); if (value != null) { yield new RuntimeScalar(value); } @@ -656,7 +699,7 @@ public RuntimeArray keys() { RuntimeArray list = new RuntimeArray(); for (String key : elements.keySet()) { - RuntimeArray.push(list, new RuntimeScalar(key)); + RuntimeArray.push(list, createKeyScalar(key)); } hashIterator = null; // keys resets the iterator // Set scalarContextSize so that keys() in scalar context returns the count @@ -863,6 +906,7 @@ public RuntimeHash undefine() { } else { this.elements.clear(); } + this.byteKeys = null; return this; } @@ -911,9 +955,11 @@ public void dynamicSaveState() { RuntimeHash currentState = new RuntimeHash(); currentState.elements = new StableHashMap<>(this.elements); currentState.blessId = this.blessId; + currentState.byteKeys = this.byteKeys != null ? new HashSet<>(this.byteKeys) : null; dynamicStateStack.push(currentState); // Clear the hash this.elements.clear(); + this.byteKeys = null; this.blessId = 0; } @@ -928,6 +974,7 @@ public void dynamicRestoreState() { RuntimeHash previousState = dynamicStateStack.pop(); this.elements = previousState.elements; this.blessId = previousState.blessId; + this.byteKeys = previousState.byteKeys; } } @@ -980,7 +1027,7 @@ public RuntimeScalar next() { if (returnKey) { currentEntry = entryIterator.next(); returnKey = false; - return new RuntimeScalar(currentEntry.getKey()); + return createKeyScalar(currentEntry.getKey()); } else { returnKey = true; return currentEntry.getValue(); diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHashProxyEntry.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHashProxyEntry.java index 9685c481e..b41d21184 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHashProxyEntry.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeHashProxyEntry.java @@ -14,6 +14,8 @@ public class RuntimeHashProxyEntry extends RuntimeBaseProxy { private final RuntimeHash parent; // Key associated with this proxy in the parent hash private final String key; + // Whether the key was originally a BYTE_STRING (for preserving key type in keys()) + private final boolean byteKey; /** * Constructs a RuntimeHashProxyEntry for a given key in the specified parent hash. @@ -22,9 +24,21 @@ public class RuntimeHashProxyEntry extends RuntimeBaseProxy { * @param key the key in the hash for which this proxy is created */ public RuntimeHashProxyEntry(RuntimeHash parent, String key) { + this(parent, key, false); + } + + /** + * Constructs a RuntimeHashProxyEntry with key type tracking. + * + * @param parent the parent RuntimeHash containing the elements + * @param key the key in the hash for which this proxy is created + * @param byteKey true if the key was from a BYTE_STRING scalar + */ + public RuntimeHashProxyEntry(RuntimeHash parent, String key, boolean byteKey) { super(); this.parent = parent; this.key = key; + this.byteKey = byteKey; // Note: this.type is RuntimeScalarType.UNDEF } @@ -44,6 +58,8 @@ void vivify() { } else { parent.put(key, new RuntimeScalar()); } + // Track the key's byte/UTF-8 type + parent.markKeyByte(key, byteKey); } // Retrieve the element associated with the key lvalue = parent.elements.get(key); diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java index 7e65d2ef2..2c9c4a1d5 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java @@ -542,6 +542,9 @@ public static RuntimeIO open(RuntimeScalar scalarRef, String mode) { // if it was defined, truncate to empty string if (targetScalar.getDefinedBoolean()) { targetScalar.set(""); + } else { + // Still need to check read-only for undef scalars + targetScalar.set(new RuntimeScalar()); } } else if (mode.equals(">>")) { // For append mode, test if scalar is writable by setting it to itself From b7f3d2f2ea41485956fd3d6367d5e9354449626c Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 3 Apr 2026 17:56:57 +0200 Subject: [PATCH 9/9] Fix [:punct:] to use ASCII semantics for byte strings, Unicode for UTF-8 The previous fix mapped [:punct:] to [\p{P}\p{S}] unconditionally, which broke re/pat_advanced.t because non-Unicode patterns should only match ASCII punctuation in the 0x80-0xFF range. Now [:punct:] maps to \p{Punct} (correct ASCII behavior by default), and the Unicode variant pattern replaces it with [\p{P}\p{S}] to include symbols when UNICODE_CHARACTER_CLASS is active. Results: - re/pat_advanced.t: 1316/1678 (restored from 1315) - re/charset.t: 5494/5552 (+8 over previous 5486) Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- src/main/java/org/perlonjava/core/Configuration.java | 2 +- .../perlonjava/runtime/regex/CharacterClassMapper.java | 4 ++-- .../java/org/perlonjava/runtime/regex/RuntimeRegex.java | 8 +++++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 1af393abd..56513ae0a 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "c3fe2ffe9"; + public static final String gitCommitId = "7a045da61"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/runtime/regex/CharacterClassMapper.java b/src/main/java/org/perlonjava/runtime/regex/CharacterClassMapper.java index ad67bee08..6564738f3 100644 --- a/src/main/java/org/perlonjava/runtime/regex/CharacterClassMapper.java +++ b/src/main/java/org/perlonjava/runtime/regex/CharacterClassMapper.java @@ -28,8 +28,8 @@ public class CharacterClassMapper { {"[:^lower:]", "\\P{Lower}"}, {"[:print:]", "\\p{Print}"}, {"[:^print:]", "\\P{Print}"}, - {"[:punct:]", "\\p{P}\\p{S}"}, - {"[:^punct:]", "[^\\p{P}\\p{S}]"}, + {"[:punct:]", "\\p{Punct}"}, + {"[:^punct:]", "\\P{Punct}"}, {"[:space:]", "\\p{Space}"}, {"[:^space:]", "\\P{Space}"}, {"[:upper:]", "\\p{Upper}"}, diff --git a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java index e22d6f2ad..dc3edc9e6 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java +++ b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java @@ -164,7 +164,13 @@ public static RuntimeRegex compile(String patternString, String modifiers) { // Compile the Unicode variant for Unicode strings // Only compile separately if the flags differ (saves memory when /a or /u is used) if (regex.patternFlagsUnicode != regex.patternFlags) { - regex.patternUnicode = Pattern.compile(javaPattern, regex.patternFlagsUnicode); + // Fix POSIX [:punct:] for Unicode mode: Java's UNICODE_CHARACTER_CLASS flag + // changes \p{Punct} from ASCII punct+symbols to only \p{P} (Unicode Punctuation). + // Perl's [:punct:] should match both Punctuation and Symbols in Unicode mode. + String javaPatternUnicode = javaPattern + .replace("\\p{Punct}", "[\\p{P}\\p{S}]") + .replace("\\P{Punct}", "[^\\p{P}\\p{S}]"); + regex.patternUnicode = Pattern.compile(javaPatternUnicode, regex.patternFlagsUnicode); } else { regex.patternUnicode = regex.pattern; }