diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 1484eb6f8..65ed65e52 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "25b6fa935"; + public static final String gitCommitId = "52d34a6f8"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 28 2026 18:03:29"; + public static final String buildTimestamp = "Apr 28 2026 19:44:33"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java b/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java index 6100fdbd1..e65cfada6 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java @@ -222,18 +222,24 @@ public static void initialize() { Encode.class, "encoding_decode", RuntimeCode.methodType); java.lang.invoke.MethodHandle nameHandle = RuntimeCode.lookup.findStatic( Encode.class, "encoding_name", RuntimeCode.methodType); + java.lang.invoke.MethodHandle mimeNameHandle = RuntimeCode.lookup.findStatic( + Encode.class, "encoding_mime_name", RuntimeCode.methodType); RuntimeCode encodeCode = new RuntimeCode(encodeHandle, null, null); encodeCode.isStatic = true; RuntimeCode decodeCode = new RuntimeCode(decodeHandle, null, null); decodeCode.isStatic = true; RuntimeCode nameCode = new RuntimeCode(nameHandle, null, null); nameCode.isStatic = true; + RuntimeCode mimeNameCode = new RuntimeCode(mimeNameHandle, null, null); + mimeNameCode.isStatic = true; GlobalVariable.getGlobalCodeRef("Encode::Encoding::encode").set( new RuntimeScalar(encodeCode)); GlobalVariable.getGlobalCodeRef("Encode::Encoding::decode").set( new RuntimeScalar(decodeCode)); GlobalVariable.getGlobalCodeRef("Encode::Encoding::name").set( new RuntimeScalar(nameCode)); + GlobalVariable.getGlobalCodeRef("Encode::Encoding::mime_name").set( + new RuntimeScalar(mimeNameCode)); } catch (NoSuchMethodException | IllegalAccessException e) { System.err.println("Warning: Missing Encode::Encoding method: " + e.getMessage()); } @@ -863,9 +869,11 @@ public static RuntimeList find_encoding(RuntimeArray args, int ctx) { try { Charset charset = getCharset(encodingName); - // Create a blessed hash with the charset name + // Create a blessed hash with both the Perl-canonical Name + // (used by ->name) and the IANA MimeName (used by ->mime_name). RuntimeHash encObj = new RuntimeHash(); - encObj.put("Name", new RuntimeScalar(charset.name())); + encObj.put("Name", new RuntimeScalar(perlCanonicalName(charset.name()))); + encObj.put("MimeName", new RuntimeScalar(charset.name())); RuntimeScalar ref = encObj.createReference(); ReferenceOperators.bless(ref, new RuntimeScalar("Encode::Encoding")); return ref.getList(); @@ -875,6 +883,20 @@ public static RuntimeList find_encoding(RuntimeArray args, int ctx) { } } + /** + * Maps a Java canonical charset name to Perl Encode's canonical + * encoding name (as returned by C<< $enc->name >>). For encodings + * we don't have a special mapping for, the Java name is returned + * unchanged. + */ + private static String perlCanonicalName(String javaName) { + switch (javaName) { + case "US-ASCII": return "ascii"; + case "UTF-8": return "utf-8-strict"; + default: return javaName; + } + } + /** * find_mime_encoding($mime_name) * Looks up an encoding by its MIME name. Delegates to find_encoding @@ -1004,6 +1026,23 @@ public static RuntimeList encoding_name(RuntimeArray args, int ctx) { return hash.get("Name").getList(); } + /** + * Encode::Encoding->mime_name() + * Returns the IANA-registered MIME name of this encoding. Java's + * canonical Charset name matches the IANA preferred MIME name for + * the common encodings used here, so we reuse it. + */ + public static RuntimeList encoding_mime_name(RuntimeArray args, int ctx) { + if (args.isEmpty()) { + throw new IllegalStateException("Bad number of arguments for Encode::Encoding::mime_name"); + } + + RuntimeScalar self = args.get(0); + RuntimeHash hash = (RuntimeHash) self.value; + RuntimeScalar mime = hash.get("MimeName"); + return (mime != null && mime.getDefinedBoolean() ? mime : hash.get("Name")).getList(); + } + /** * from_to($octets, $from_enc, $to_enc [, $check]) * Converts in-place the octet sequence from one encoding to another. diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/HashSpecialVariable.java b/src/main/java/org/perlonjava/runtime/runtimetypes/HashSpecialVariable.java index 121e1ef57..cb21901e2 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/HashSpecialVariable.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/HashSpecialVariable.java @@ -79,6 +79,10 @@ public Set> entrySet() { Map namedGroups = matcher.pattern().namedGroups(); // Collect entries by decoded Perl name so that duplicate-name // captures (e.g. `(?a)|(?b)`) merge into a single key. + // Note: Java's Pattern.namedGroups() returns an unordered map + // (ImmutableCollections.MapN), so we must explicitly sort each + // bucket by group number so that the *leftmost* alternative + // wins (Perl semantics for $+{name}). java.util.Map> byPerlName = new java.util.LinkedHashMap<>(); for (String name : namedGroups.keySet()) { if (CaptureNameEncoder.isInternalCapture(name)) { @@ -87,6 +91,9 @@ public Set> entrySet() { String perlName = CaptureNameEncoder.decodeGroupName(name); byPerlName.computeIfAbsent(perlName, k -> new java.util.ArrayList<>()).add(name); } + for (java.util.List jns : byPerlName.values()) { + jns.sort(java.util.Comparator.comparingInt(namedGroups::get)); + } for (Map.Entry> e : byPerlName.entrySet()) { String perlName = e.getKey(); java.util.List javaNames = e.getValue(); @@ -288,6 +295,10 @@ private static java.util.List collectJavaNamesFor(Map n out.add(jn); } } + // Java's Pattern.namedGroups() doesn't preserve insertion order, so sort + // by group number to match Perl's source order. This makes `$+{name}` + // return the *leftmost* alternative for duplicate-named captures. + out.sort(java.util.Comparator.comparingInt(namedGroups::get)); return out; }