diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index f3ec8ca8a..191cd4b23 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "f7bbbc40b"; + public static final String gitCommitId = "26d03a093"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 28 2026 14:48:58"; + public static final String buildTimestamp = "Apr 28 2026 14:22:49"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/frontend/parser/CoreOperatorResolver.java b/src/main/java/org/perlonjava/frontend/parser/CoreOperatorResolver.java index b15ab093a..1a2990c16 100644 --- a/src/main/java/org/perlonjava/frontend/parser/CoreOperatorResolver.java +++ b/src/main/java/org/perlonjava/frontend/parser/CoreOperatorResolver.java @@ -87,7 +87,8 @@ public static Node parseCoreOperator(Parser parser, LexerToken token, int startI case "sort" -> ParseMapGrepSort.parseSort(parser, token); case "map", "grep", "all", "any" -> ParseMapGrepSort.parseMapGrep(parser, token); case "pack" -> OperatorParser.parsePack(parser, token, currentIndex); - case "chomp", "chop", "splice", "mkdir" -> OperatorParser.parseReverse(parser, token, currentIndex); + case "chomp", "chop" -> OperatorParser.parseChompChop(parser, token, currentIndex); + case "splice", "mkdir" -> OperatorParser.parseReverse(parser, token, currentIndex); case "die", "warn" -> OperatorParser.parseDieWarn(parser, token, currentIndex); case "system", "exec" -> OperatorParser.parseSystem(parser, token, currentIndex); case "readline", "eof", "tell" -> OperatorParser.parseReadline(parser, token, currentIndex); diff --git a/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java b/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java index 1526dcf57..1edc622ad 100644 --- a/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java @@ -1198,6 +1198,34 @@ static OperatorNode parseReverse(Parser parser, LexerToken token, int currentInd return new OperatorNode(token.text, operand, currentIndex); } + /** + * Parse {@code chomp} / {@code chop}. Both behave like list operators + * with parentheses (any number of args) but as named-unary + * operators without parentheses (one term, stop at the comma). + * That mirrors upstream Perl, where {@code is(chomp @a, 3, "ok")} parses + * as {@code is(chomp(@a), 3, "ok")} rather than gobbling the rest of the + * comma-separated list. + */ + static OperatorNode parseChompChop(Parser parser, LexerToken token, int currentIndex) { + ListNode operand; + LexerToken next = TokenUtils.peek(parser); + if (next.text.equals("(")) { + // Parenthesized form: chomp(LIST) + TokenUtils.consume(parser); + operand = new ListNode(ListParser.parseList(parser, ")", 0), parser.tokenIndex); + } else if (next.type == org.perlonjava.frontend.lexer.LexerTokenType.EOF + || ListParser.isListTerminator(parser, next) + || next.text.equals(",")) { + // No argument: chomp / chop without args means chomp $_ + operand = new ListNode(parser.tokenIndex); + } else { + // Bareword form without parens: take exactly one expression and + // stop before any "," at the same precedence level. + operand = ListNode.makeList(parser.parseExpression(parser.getPrecedence(",") + 1)); + } + return new OperatorNode(token.text, operand, currentIndex); + } + static OperatorNode parseDieWarn(Parser parser, LexerToken token, int currentIndex) { int dieKeywordIndex = currentIndex; // Capture token position BEFORE parsing args ListNode operand = ListParser.parseZeroOrMoreList(parser, 0, false, true, false, false); diff --git a/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java b/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java index 157dbb928..46e6a3199 100644 --- a/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java +++ b/src/main/java/org/perlonjava/runtime/io/CustomFileChannel.java @@ -7,6 +7,7 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.lang.ref.Cleaner; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.channels.FileLock; @@ -85,6 +86,19 @@ public class CustomFileChannel implements IOHandle { private static final java.util.Map sharedLockRegistry = new java.util.concurrent.ConcurrentHashMap<>(); + /** + * Cleaner used to drop any flock() we still hold when this channel is GC'd + * without an explicit Perl-level {@code close($fh)}. Path::Tiny's + * {@code slurp}/{@code append} idiom returns from a sub while the locked + * filehandle is still in scope, then immediately reopens the same path and + * tries to take an EXCLUSIVE lock — which previously failed with + * {@code Resource deadlock avoided} because the SHARED lock entry from the + * abandoned channel was still in {@link #sharedLockRegistry}. The Cleaner + * action releases registry/native locks deterministically once the JVM + * notices the {@link CustomFileChannel} is unreachable. + */ + private static final Cleaner LOCK_CLEANER = Cleaner.create(); + /** * State for a JVM-wide shared flock() on a file path. Contains the owning * FileLock (from the first acquirer) and a count of how many channels in this @@ -111,6 +125,56 @@ private static final class SharedLockState { */ private boolean holdsSharedLockViaRegistry; + /** + * Mutable state shared with this channel's Cleaner action. Lives in a + * separate object so the Cleaner can run it without retaining a reference + * to {@code this} (a Cleaner action that captured the outer instance would + * never trigger). Updated whenever this channel acquires or releases a + * lock; the Cleaner runs at most once, when the channel is GC'd. + */ + private final CleanupState cleanupState = new CleanupState(); + + private final Cleaner.Cleanable cleanable = LOCK_CLEANER.register(this, cleanupState); + + /** + * Cleaner action: runs when the {@link CustomFileChannel} becomes + * unreachable without an explicit Perl-level {@code close($fh)}. Releases + * any flock() entry the channel still owns so Path::Tiny's + * {@code slurp}-then-{@code append({truncate=>1})} pattern doesn't get + * stuck on a stale SHARED lock from the abandoned read handle. + */ + private static final class CleanupState implements Runnable { + volatile String lockKey; + volatile boolean viaRegistry; + volatile FileLock nioLock; + + @Override + public void run() { + try { + if (viaRegistry && lockKey != null) { + synchronized (sharedLockRegistry) { + SharedLockState state = sharedLockRegistry.get(lockKey); + if (state != null) { + state.refCount--; + if (state.refCount <= 0) { + if (state.nioLock != null && state.nioLock.isValid()) { + state.nioLock.release(); + } + sharedLockRegistry.remove(lockKey); + } + } + } + } else if (nioLock != null && nioLock.isValid()) { + nioLock.release(); + } + } catch (IOException ignored) { + // Best-effort cleanup; nothing useful to do on failure. + } + viaRegistry = false; + nioLock = null; + } + } + /** * The underlying Java NIO FileChannel for actual I/O operations */ @@ -497,6 +561,9 @@ public RuntimeScalar flock(int operation) { // shared lock on this file — piggyback on it. state.refCount++; holdsSharedLockViaRegistry = true; + cleanupState.lockKey = lockKey; + cleanupState.viaRegistry = true; + cleanupState.nioLock = null; return scalarTrue; } // No existing shared lock. Acquire one on our channel and @@ -515,6 +582,9 @@ public RuntimeScalar flock(int operation) { sharedLockRegistry.put(lockKey, newState); currentLock = lock; holdsSharedLockViaRegistry = true; + cleanupState.lockKey = lockKey; + cleanupState.viaRegistry = true; + cleanupState.nioLock = null; return scalarTrue; } catch (OverlappingFileLockException e) { // Same JVM already holds a lock on this region that @@ -535,8 +605,29 @@ public RuntimeScalar flock(int operation) { return RuntimeScalarCache.scalarFalse; } } else { - currentLock = fileChannel.lock(0, Long.MAX_VALUE, isShared); + try { + currentLock = fileChannel.lock(0, Long.MAX_VALUE, isShared); + } catch (OverlappingFileLockException e) { + // The same JVM already holds a lock on this region — most + // commonly a SHARED lock from a sibling CustomFileChannel + // whose Perl-level handle has gone out of scope but whose + // underlying RuntimeIO/lock hasn't been released yet + // (Path::Tiny's slurp() pattern: returns from a sub while + // the locked $fh is still in scope, then immediately calls + // append({truncate=>1}) which wants LOCK_EX). Try to clean + // up abandoned handles via the existing fd-recycling + // pathway, then retry once. + if (lockKey != null + && reclaimAbandonedSharedLock(lockKey)) { + currentLock = fileChannel.lock(0, Long.MAX_VALUE, isShared); + } else { + throw e; + } + } } + cleanupState.lockKey = null; + cleanupState.viaRegistry = false; + cleanupState.nioLock = currentLock; return scalarTrue; } @@ -553,6 +644,43 @@ public RuntimeScalar flock(int operation) { } } + /** + * Try to reclaim a SHARED-lock registry entry whose holder has been + * abandoned at the Perl level. Triggers the IO fd-recycling sweep + * ({@link org.perlonjava.runtime.runtimetypes.RuntimeIO#processAbandonedGlobs()}) + * — and, if that doesn't drop the entry, gives the JVM a hint via + * {@code System.gc()} so any pending {@link Cleaner} actions and + * {@link java.lang.ref.PhantomReference}s for unreachable handles get + * processed before we retry the lock acquisition. + * + * @return {@code true} if the registry entry for {@code key} was removed + * (so the caller should retry); {@code false} otherwise. + */ + private static boolean reclaimAbandonedSharedLock(String key) { + org.perlonjava.runtime.runtimetypes.RuntimeIO.processAbandonedGlobs(); + if (!sharedLockRegistry.containsKey(key)) { + return true; + } + // Nudge the JVM to clean up any handles that are unreachable but + // haven't yet been enqueued for collection (e.g. a Path::Tiny `slurp` + // returned but its lexical $fh hasn't been GC'd in this microbench + // window). System.gc() is a hint; on a normal JVM this is enough to + // let the Cleaner action and PhantomReference for the abandoned + // handle run before we retry. We block briefly to give those + // background mechanisms a chance to actually fire. + System.gc(); + for (int i = 0; i < 5 && sharedLockRegistry.containsKey(key); i++) { + try { + Thread.sleep(2); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + break; + } + org.perlonjava.runtime.runtimetypes.RuntimeIO.processAbandonedGlobs(); + } + return !sharedLockRegistry.containsKey(key); + } + /** * Release whatever lock this channel currently holds, whether directly via * {@link #currentLock} or via the shared-lock registry. Safe to call when @@ -578,6 +706,8 @@ private void releaseCurrentLock() throws IOException { // way, we must not call release() on it ourselves a second time. currentLock = null; holdsSharedLockViaRegistry = false; + cleanupState.viaRegistry = false; + cleanupState.nioLock = null; return; } if (currentLock != null) { @@ -586,6 +716,8 @@ private void releaseCurrentLock() throws IOException { } currentLock = null; } + cleanupState.viaRegistry = false; + cleanupState.nioLock = null; } @Override diff --git a/src/main/java/org/perlonjava/runtime/operators/Operator.java b/src/main/java/org/perlonjava/runtime/operators/Operator.java index bfec7ad3e..2c31e6df5 100644 --- a/src/main/java/org/perlonjava/runtime/operators/Operator.java +++ b/src/main/java/org/perlonjava/runtime/operators/Operator.java @@ -38,15 +38,23 @@ public static RuntimeScalar chmod(RuntimeList runtimeList) { return new RuntimeScalar(0); } + // Flatten remaining args into a list of scalars. Without this, an + // unflattened RuntimeArray (e.g., `chmod $mode, @paths`) would end up + // as a single element whose toString() looks like "ARRAY(0x...)". + // UnlinkOperator/UtimeOperator do the same. int mode = runtimeList.elements.getFirst().scalar().getInt(); + RuntimeList fileList = new RuntimeList(); + for (int i = 1; i < runtimeList.elements.size(); i++) { + runtimeList.elements.get(i).addToList(fileList); + } int successCount = 0; // Detect platform boolean isWindows = NativeUtils.IS_WINDOWS; - // Process each file in the list - for (int i = 1; i < runtimeList.size(); i++) { - String fileName = runtimeList.elements.get(i).toString(); + // Process each file in the flattened list + for (RuntimeScalar fileScalar : fileList) { + String fileName = fileScalar.toString(); Path resolved = RuntimeIO.resolvePath(fileName, "chmod"); if (resolved == null) { continue; diff --git a/src/main/java/org/perlonjava/runtime/operators/Readline.java b/src/main/java/org/perlonjava/runtime/operators/Readline.java index d4fab91f0..e52c2d09a 100644 --- a/src/main/java/org/perlonjava/runtime/operators/Readline.java +++ b/src/main/java/org/perlonjava/runtime/operators/Readline.java @@ -80,6 +80,13 @@ public static RuntimeScalar readline(RuntimeIO runtimeIO) { boolean isSlurp = (rs != null && rs.isSlurpMode()) || (rs == null && rsScalar.type == RuntimeScalarType.UNDEF); if (isSlurp) { + // Match Perl's semantics: a slurp call on a fresh handle returns + // the file contents (possibly the empty string) and leaves the + // handle at EOF; the *next* call returns undef. If we are already + // at EOF on entry, this is that "next" call -> undef. + if (runtimeIO.eof().getBoolean()) { + return scalarUndef; + } StringBuilder content = new StringBuilder(); boolean isByteData = true; RuntimeScalar chunk; @@ -91,19 +98,13 @@ public static RuntimeScalar readline(RuntimeIO runtimeIO) { content.append(chunkStr); } - if (content.length() > 0) { - String contentStr = content.toString(); - // In Perl 5, slurp mode increments $. by 1 (not per line) - runtimeIO.currentLineNumber++; - RuntimeScalar result = new RuntimeScalar(contentStr); - if (isByteData) { - result.type = RuntimeScalarType.BYTE_STRING; - } - return result; - } else if (runtimeIO.eof().getBoolean()) { - return scalarUndef; + // In Perl 5, slurp mode increments $. by 1 (not per line) + runtimeIO.currentLineNumber++; + RuntimeScalar result = new RuntimeScalar(content.toString()); + if (isByteData) { + result.type = RuntimeScalarType.BYTE_STRING; } - return new RuntimeScalar(content.toString()); + return result; } if (rs != null && rs.isParagraphMode()) { diff --git a/src/main/java/org/perlonjava/runtime/operators/ReferenceOperators.java b/src/main/java/org/perlonjava/runtime/operators/ReferenceOperators.java index 7ea3e46b3..b1640c286 100644 --- a/src/main/java/org/perlonjava/runtime/operators/ReferenceOperators.java +++ b/src/main/java/org/perlonjava/runtime/operators/ReferenceOperators.java @@ -23,6 +23,16 @@ public class ReferenceOperators { */ public static RuntimeScalar bless(RuntimeScalar runtimeScalar, RuntimeScalar className) { if (RuntimeScalarType.isReference(runtimeScalar)) { + // Match Perl's diagnostics for `bless`: + // - undef class name produces "Use of uninitialized value $class in bless" + // - empty class name produces "Explicit blessing to '' (assuming package main)" + // Both are emitted before defaulting to package "main". + if (!className.getDefinedBoolean()) { + WarnDie.warnWithCategory( + new RuntimeScalar("Use of uninitialized value $class in bless"), + RuntimeScalarCache.scalarEmptyString, + "uninitialized"); + } // Use toString() which invokes "" overloading for blessed objects. // Perl 5 throws "Attempt to bless into a reference" for non-overloaded // refs, but callers like IO::Handle already handle this via @@ -30,6 +40,10 @@ public static RuntimeScalar bless(RuntimeScalar runtimeScalar, RuntimeScalar cla String str = className.toString(); // Default to "main" if className is empty if (str.isEmpty()) { + WarnDie.warnWithCategory( + new RuntimeScalar("Explicit blessing to '' (assuming package main)"), + RuntimeScalarCache.scalarEmptyString, + "misc"); str = "main"; } // Canonicalise the class-name argument through diff --git a/src/main/perl/lib/File/Spec.pm b/src/main/perl/lib/File/Spec.pm index 87780de3a..ae620efb5 100644 --- a/src/main/perl/lib/File/Spec.pm +++ b/src/main/perl/lib/File/Spec.pm @@ -28,6 +28,28 @@ our $VERSION = '3.95'; # Match perl5 PathTools version # but File::Spec::Unix must also be loaded for compatibility. require File::Spec::Unix; +# Set @ISA to the platform-specific File::Spec subclass, matching the +# behaviour of the upstream PathTools File::Spec.pm. Some CPAN modules +# (Directory::Scratch, Path::Class helpers, etc.) inspect +# C<$File::Spec::ISA[0]> to discover the current platform; without this +# they get C and break. +my %_module_for = ( + MacOS => 'Mac', + MSWin32 => 'Win32', + os2 => 'OS2', + VMS => 'VMS', + epoc => 'Epoc', + NetWare => 'Win32', + symbian => 'Win32', + dos => 'OS2', + cygwin => 'Cygwin', + amigaos => 'AmigaOS', +); + +my $_module = $_module_for{$^O} || 'Unix'; +require "File/Spec/$_module.pm"; +our @ISA = ("File::Spec::$_module"); + # NOTE: The rest of the code is in file: # src/main/java/org/perlonjava/perlmodule/FileSpec.java diff --git a/src/main/perl/lib/MIME/QuotedPrint.pm b/src/main/perl/lib/MIME/QuotedPrint.pm index d24454e2f..7efe7dc41 100644 --- a/src/main/perl/lib/MIME/QuotedPrint.pm +++ b/src/main/perl/lib/MIME/QuotedPrint.pm @@ -11,9 +11,14 @@ package MIME::QuotedPrint; # The implementation is in: src/main/java/org/perlonjava/perlmodule/MimeQuotedPrint.java # +our $VERSION = '3.16'; + use XSLoader; XSLoader::load( 'MIME::QuotedPrint' ); +*encode = \&encode_qp; +*decode = \&decode_qp; + 1; __END__ diff --git a/src/main/perl/lib/Unicode/LineBreak.pm b/src/main/perl/lib/Unicode/LineBreak.pm new file mode 100644 index 000000000..46c781d17 --- /dev/null +++ b/src/main/perl/lib/Unicode/LineBreak.pm @@ -0,0 +1,109 @@ +package Unicode::LineBreak; + +# Minimal pure-Perl shim of Unicode::LineBreak for PerlOnJava. +# +# The original module is XS-based and provides UAX #14 line breaking +# plus the Unicode::GCString grapheme-cluster API. PerlOnJava ships +# only the tiny subset of GCString used by modules like Text::vCard. +# +# If you need the full functionality, please open an issue. + +use strict; +use warnings; + +our $VERSION = '2019.001'; + +# Constants commonly imported from Unicode::LineBreak +use constant { + MANDATORY => 0, + DIRECT => 1, + INDIRECT => 2, + PROHIBITED => 3, +}; + +require Exporter; +our @ISA = qw(Exporter); +our @EXPORT_OK = qw(MANDATORY DIRECT INDIRECT PROHIBITED context); +our %EXPORT_TAGS = ( 'all' => \@EXPORT_OK ); + +sub new { + my ($class, %opts) = @_; + return bless { %opts }, $class; +} + +sub context { return 'NONEASTASIAN'; } + +# Minimal break(): just returns the input unchanged as a single chunk. +sub break { + my ($self, $str) = @_; + return defined $str ? $str : ''; +} + +package Unicode::GCString; + +# Minimal grapheme-cluster string class. Uses \X to split the string +# into grapheme clusters. Only the methods used by Text::vCard et al +# are implemented: new, length, substr, as_string, columns. + +use strict; +use warnings; + +sub new { + my ($class, $str) = @_; + $str = '' unless defined $str; + my @clusters = ($str =~ /(\X)/gs); + return bless { str => $str, clusters => \@clusters }, $class; +} + +sub length { return scalar @{ $_[0]->{clusters} }; } + +sub as_string { return $_[0]->{str}; } + +# String overload would be nice, but keep it explicit. +sub substr { + my ($self, $start, $len) = @_; + my @c = @{ $self->{clusters} }; + my $total = scalar @c; + $start = 0 if !defined $start; + if ($start < 0) { $start = $total + $start; } + $start = 0 if $start < 0; + $start = $total if $start > $total; + my $end; + if (!defined $len) { + $end = $total; + } elsif ($len < 0) { + $end = $total + $len; + } else { + $end = $start + $len; + } + $end = $start if $end < $start; + $end = $total if $end > $total; + my $piece = join '', @c[$start .. $end - 1]; + return Unicode::GCString->new($piece); +} + +# Approximate column width (1 per grapheme cluster). +sub columns { return scalar @{ $_[0]->{clusters} }; } + +use overload + '""' => \&as_string, + 'bool' => sub { CORE::length( $_[0]->{str} ) > 0 }, + '0+' => \&length, + fallback => 1; + +1; + +__END__ + +=head1 NAME + +Unicode::LineBreak - Minimal PerlOnJava shim + +=head1 DESCRIPTION + +Provides just enough of L and L +for modules like L that only need basic grapheme cluster +splitting. The full UAX #14 line-breaking algorithm is not +implemented. + +=cut