From 52d34a6f86413a8639864fd0882e357bb1d489be Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Tue, 28 Apr 2026 18:34:55 +0200 Subject: [PATCH 1/2] fix: jcpan -t HTTP::Response::Encoding Add Encode::Encoding mime_name() and a Perl-canonical Name mapping so HTTP::Message->content_charset (via IO::HTML::find_charset_in) and $res->encoding work for ASCII/UTF-8 the way real Perl Encode behaves. - Register Encode::Encoding::mime_name in the runtime. - Store separate Name (Perl-canonical, e.g. "ascii", "utf-8-strict") and MimeName (IANA, e.g. "US-ASCII", "UTF-8") on the blessed object. - mime_name() returns MimeName (falling back to Name); name() returns Name unchanged. Result: jcpan -t HTTP::Response::Encoding now passes 18/18. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 4 +- .../perlonjava/runtime/perlmodule/Encode.java | 43 ++++++++++++++++++- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 1484eb6f8..df11b1b4a 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "25b6fa935"; + public static final String gitCommitId = "6d622d7c2"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 28 2026 18:03:29"; + public static final String buildTimestamp = "Apr 28 2026 18:33:30"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java b/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java index 6100fdbd1..e65cfada6 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/Encode.java @@ -222,18 +222,24 @@ public static void initialize() { Encode.class, "encoding_decode", RuntimeCode.methodType); java.lang.invoke.MethodHandle nameHandle = RuntimeCode.lookup.findStatic( Encode.class, "encoding_name", RuntimeCode.methodType); + java.lang.invoke.MethodHandle mimeNameHandle = RuntimeCode.lookup.findStatic( + Encode.class, "encoding_mime_name", RuntimeCode.methodType); RuntimeCode encodeCode = new RuntimeCode(encodeHandle, null, null); encodeCode.isStatic = true; RuntimeCode decodeCode = new RuntimeCode(decodeHandle, null, null); decodeCode.isStatic = true; RuntimeCode nameCode = new RuntimeCode(nameHandle, null, null); nameCode.isStatic = true; + RuntimeCode mimeNameCode = new RuntimeCode(mimeNameHandle, null, null); + mimeNameCode.isStatic = true; GlobalVariable.getGlobalCodeRef("Encode::Encoding::encode").set( new RuntimeScalar(encodeCode)); GlobalVariable.getGlobalCodeRef("Encode::Encoding::decode").set( new RuntimeScalar(decodeCode)); GlobalVariable.getGlobalCodeRef("Encode::Encoding::name").set( new RuntimeScalar(nameCode)); + GlobalVariable.getGlobalCodeRef("Encode::Encoding::mime_name").set( + new RuntimeScalar(mimeNameCode)); } catch (NoSuchMethodException | IllegalAccessException e) { System.err.println("Warning: Missing Encode::Encoding method: " + e.getMessage()); } @@ -863,9 +869,11 @@ public static RuntimeList find_encoding(RuntimeArray args, int ctx) { try { Charset charset = getCharset(encodingName); - // Create a blessed hash with the charset name + // Create a blessed hash with both the Perl-canonical Name + // (used by ->name) and the IANA MimeName (used by ->mime_name). RuntimeHash encObj = new RuntimeHash(); - encObj.put("Name", new RuntimeScalar(charset.name())); + encObj.put("Name", new RuntimeScalar(perlCanonicalName(charset.name()))); + encObj.put("MimeName", new RuntimeScalar(charset.name())); RuntimeScalar ref = encObj.createReference(); ReferenceOperators.bless(ref, new RuntimeScalar("Encode::Encoding")); return ref.getList(); @@ -875,6 +883,20 @@ public static RuntimeList find_encoding(RuntimeArray args, int ctx) { } } + /** + * Maps a Java canonical charset name to Perl Encode's canonical + * encoding name (as returned by C<< $enc->name >>). For encodings + * we don't have a special mapping for, the Java name is returned + * unchanged. + */ + private static String perlCanonicalName(String javaName) { + switch (javaName) { + case "US-ASCII": return "ascii"; + case "UTF-8": return "utf-8-strict"; + default: return javaName; + } + } + /** * find_mime_encoding($mime_name) * Looks up an encoding by its MIME name. Delegates to find_encoding @@ -1004,6 +1026,23 @@ public static RuntimeList encoding_name(RuntimeArray args, int ctx) { return hash.get("Name").getList(); } + /** + * Encode::Encoding->mime_name() + * Returns the IANA-registered MIME name of this encoding. Java's + * canonical Charset name matches the IANA preferred MIME name for + * the common encodings used here, so we reuse it. + */ + public static RuntimeList encoding_mime_name(RuntimeArray args, int ctx) { + if (args.isEmpty()) { + throw new IllegalStateException("Bad number of arguments for Encode::Encoding::mime_name"); + } + + RuntimeScalar self = args.get(0); + RuntimeHash hash = (RuntimeHash) self.value; + RuntimeScalar mime = hash.get("MimeName"); + return (mime != null && mime.getDefinedBoolean() ? mime : hash.get("Name")).getList(); + } + /** * from_to($octets, $from_enc, $to_enc [, $check]) * Converts in-place the octet sequence from one encoding to another. From cd8e103b1d46dea1cbc45362a89b5fc279235424 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Tue, 28 Apr 2026 19:46:22 +0200 Subject: [PATCH 2/2] fix(regex): leftmost-wins order for duplicate-named %+/%- captures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Java 21's Pattern.namedGroups() returns an ImmutableCollections.MapN which does not preserve insertion order. HashSpecialVariable was iterating that map's keySet() to group duplicate-named captures (e.g. (?a)(?b)), so the order in which alternatives were seen was effectively random — making `values %+` return the rightmost capture instead of Perl's leftmost. Sort each per-Perl-name list by group number (= source order) in both entrySet() and collectJavaNamesFor(). This restores the 3 regressed re/pat_advanced.t subtests (1317, 1320, 1321 in the "Test keys in %+ and %-" block) without affecting any other behavior. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- src/main/java/org/perlonjava/core/Configuration.java | 4 ++-- .../runtime/runtimetypes/HashSpecialVariable.java | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index df11b1b4a..65ed65e52 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "6d622d7c2"; + public static final String gitCommitId = "52d34a6f8"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 28 2026 18:33:30"; + public static final String buildTimestamp = "Apr 28 2026 19:44:33"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/HashSpecialVariable.java b/src/main/java/org/perlonjava/runtime/runtimetypes/HashSpecialVariable.java index 121e1ef57..cb21901e2 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/HashSpecialVariable.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/HashSpecialVariable.java @@ -79,6 +79,10 @@ public Set> entrySet() { Map namedGroups = matcher.pattern().namedGroups(); // Collect entries by decoded Perl name so that duplicate-name // captures (e.g. `(?a)|(?b)`) merge into a single key. + // Note: Java's Pattern.namedGroups() returns an unordered map + // (ImmutableCollections.MapN), so we must explicitly sort each + // bucket by group number so that the *leftmost* alternative + // wins (Perl semantics for $+{name}). java.util.Map> byPerlName = new java.util.LinkedHashMap<>(); for (String name : namedGroups.keySet()) { if (CaptureNameEncoder.isInternalCapture(name)) { @@ -87,6 +91,9 @@ public Set> entrySet() { String perlName = CaptureNameEncoder.decodeGroupName(name); byPerlName.computeIfAbsent(perlName, k -> new java.util.ArrayList<>()).add(name); } + for (java.util.List jns : byPerlName.values()) { + jns.sort(java.util.Comparator.comparingInt(namedGroups::get)); + } for (Map.Entry> e : byPerlName.entrySet()) { String perlName = e.getKey(); java.util.List javaNames = e.getValue(); @@ -288,6 +295,10 @@ private static java.util.List collectJavaNamesFor(Map n out.add(jn); } } + // Java's Pattern.namedGroups() doesn't preserve insertion order, so sort + // by group number to match Perl's source order. This makes `$+{name}` + // return the *leftmost* alternative for duplicate-named captures. + out.sort(java.util.Comparator.comparingInt(namedGroups::get)); return out; }