From 0b0352af10949e7af202266ea2ade53d273e424f Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 25 Mar 2026 10:04:38 +0100 Subject: [PATCH 1/7] Fix $^H scope leakage causing use locale to enable strict The $^H (compile-time hints) variable was using a shared lvalue field to cache values. When Test::More enabled strict mode, the strict bits were cached in lvalue. Later when locale.pm read $^H, it got the stale cached value instead of the current scope's value, causing strict mode to incorrectly leak into outer scopes. Fix: - Remove lvalue caching for HINTS in vivify() - return without storing - Change set() to only update the scope's strict options, not the cache - Change getValueAsScalar() HINTS case to always read from the current scope's symbol table via SpecialBlockParser.getCurrentScope() This ensures $^H respects lexical scoping - each scope has its own value. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../runtimetypes/ScalarSpecialVariable.java | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarSpecialVariable.java b/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarSpecialVariable.java index 71ffe0203..a202abed8 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarSpecialVariable.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarSpecialVariable.java @@ -60,12 +60,16 @@ public ScalarSpecialVariable(Id variableId, int position) { */ @Override void vivify() { - if (variableId == Id.INPUT_LINE_NUMBER || variableId == Id.HINTS) { + if (variableId == Id.INPUT_LINE_NUMBER) { if (lvalue == null) { lvalue = new RuntimeScalar(0); } return; } + // HINTS doesn't need lvalue - it always reads/writes from the symbol table + if (variableId == Id.HINTS) { + return; + } throw new PerlCompilerException("Modification of a read-only value attempted"); } @@ -85,19 +89,14 @@ public RuntimeScalar set(RuntimeScalar value) { } if (variableId == Id.HINTS) { int hints = value.getInt(); - // Update the symbol table's strict options + // Update the symbol table's strict options directly + // No need to store in lvalue since reading always uses the symbol table ScopedSymbolTable symbolTable = SpecialBlockParser.getCurrentScope(); if (symbolTable != null) { - // Clear all strict options and set the new ones - // The hints value contains the strict flags directly symbolTable.setStrictOptions(hints); } - // Also store in lvalue for reading back - vivify(); - lvalue.set(hints); - this.type = lvalue.type; - this.value = lvalue.value; - return lvalue; + // Return a scalar with the hints value + return getScalarInt(hints); } return super.set(value); } @@ -212,11 +211,8 @@ public RuntimeScalar getValueAsScalar() { yield scalarUndef; } case HINTS -> { - // $^H - Return stored lvalue first (preserves custom hint bits like 0x04000000) - // Only fall back to symbol table strict options if no lvalue stored - if (lvalue != null) { - yield lvalue; - } + // $^H - Always read from the current scope's symbol table + // This ensures lexical scoping - each scope has its own $^H value ScopedSymbolTable symbolTable = SpecialBlockParser.getCurrentScope(); if (symbolTable != null) { yield getScalarInt(symbolTable.getStrictOptions()); From 67a4521cd16a659097e86f978f7f6a57c42488f6 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 25 Mar 2026 10:16:02 +0100 Subject: [PATCH 2/7] Add I18N::Langinfo module and update constant.pm - Add I18NLanginfo.java: Java XS implementation with langinfo() function and ~70 locale constants (CODESET, RADIXCHAR, DAY_1-7, MON_1-12, etc.) - Import I18N/Langinfo.pm from Perl 5 distribution - Update constant.pm to full Perl 5 version (was simplified stub) - Add I18N::Langinfo to import config This fixes 07locale.t test (was 1/8, now 8/8 passing). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/import-perl5/config.yaml | 4 + .../org/perlonjava/core/Configuration.java | 2 +- .../runtime/perlmodule/I18NLanginfo.java | 351 ++++++++++++ src/main/perl/lib/I18N/Langinfo.pm | 521 ++++++++++++++++++ src/main/perl/lib/constant.pm | 454 ++++++++++++++- 5 files changed, 1313 insertions(+), 19 deletions(-) create mode 100644 src/main/java/org/perlonjava/runtime/perlmodule/I18NLanginfo.java create mode 100644 src/main/perl/lib/I18N/Langinfo.pm diff --git a/dev/import-perl5/config.yaml b/dev/import-perl5/config.yaml index 123184d3d..1bae669ae 100644 --- a/dev/import-perl5/config.yaml +++ b/dev/import-perl5/config.yaml @@ -328,6 +328,10 @@ imports: target: perl5_t/version type: directory + # I18N::Langinfo - locale information (XS implementation in Java) + - source: perl5/ext/I18N-Langinfo/Langinfo.pm + target: src/main/perl/lib/I18N/Langinfo.pm + # From core distribution - source: perl5/dist/Attribute-Handlers/lib/Attribute/Handlers.pm target: src/main/perl/lib/Attribute/Handlers.pm diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 648d45671..8c4cfa0fd 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "1d09334fe"; + public static final String gitCommitId = "0b0352af1"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/I18NLanginfo.java b/src/main/java/org/perlonjava/runtime/perlmodule/I18NLanginfo.java new file mode 100644 index 000000000..bd3085cf7 --- /dev/null +++ b/src/main/java/org/perlonjava/runtime/perlmodule/I18NLanginfo.java @@ -0,0 +1,351 @@ +package org.perlonjava.runtime.perlmodule; + +import org.perlonjava.runtime.runtimetypes.RuntimeArray; +import org.perlonjava.runtime.runtimetypes.RuntimeList; +import org.perlonjava.runtime.runtimetypes.RuntimeScalar; + +import java.text.DateFormatSymbols; +import java.text.DecimalFormatSymbols; +import java.nio.charset.Charset; +import java.util.Locale; + +/** + * Java XS implementation for I18N::Langinfo. + * Provides locale information similar to the POSIX nl_langinfo() function. + */ +public class I18NLanginfo extends PerlModuleBase { + + // Constants for langinfo() - these match the values used by the Perl module + // Day names (Sunday = 1) + public static final int ABDAY_1 = 1; // Sun + public static final int ABDAY_2 = 2; // Mon + public static final int ABDAY_3 = 3; // Tue + public static final int ABDAY_4 = 4; // Wed + public static final int ABDAY_5 = 5; // Thu + public static final int ABDAY_6 = 6; // Fri + public static final int ABDAY_7 = 7; // Sat + + public static final int DAY_1 = 8; // Sunday + public static final int DAY_2 = 9; // Monday + public static final int DAY_3 = 10; // Tuesday + public static final int DAY_4 = 11; // Wednesday + public static final int DAY_5 = 12; // Thursday + public static final int DAY_6 = 13; // Friday + public static final int DAY_7 = 14; // Saturday + + // Month names + public static final int ABMON_1 = 15; // Jan + public static final int ABMON_2 = 16; // Feb + public static final int ABMON_3 = 17; // Mar + public static final int ABMON_4 = 18; // Apr + public static final int ABMON_5 = 19; // May + public static final int ABMON_6 = 20; // Jun + public static final int ABMON_7 = 21; // Jul + public static final int ABMON_8 = 22; // Aug + public static final int ABMON_9 = 23; // Sep + public static final int ABMON_10 = 24; // Oct + public static final int ABMON_11 = 25; // Nov + public static final int ABMON_12 = 26; // Dec + + public static final int MON_1 = 27; // January + public static final int MON_2 = 28; // February + public static final int MON_3 = 29; // March + public static final int MON_4 = 30; // April + public static final int MON_5 = 31; // May + public static final int MON_6 = 32; // June + public static final int MON_7 = 33; // July + public static final int MON_8 = 34; // August + public static final int MON_9 = 35; // September + public static final int MON_10 = 36; // October + public static final int MON_11 = 37; // November + public static final int MON_12 = 38; // December + + // Time/Date formats + public static final int D_T_FMT = 39; // Date and time format + public static final int D_FMT = 40; // Date format + public static final int T_FMT = 41; // Time format + public static final int T_FMT_AMPM = 42; // 12-hour time format + public static final int AM_STR = 43; // AM string + public static final int PM_STR = 44; // PM string + + // Numeric + public static final int RADIXCHAR = 45; // Decimal point character + public static final int THOUSEP = 46; // Thousands separator + + // Yes/No + public static final int YESEXPR = 47; // Regex for yes + public static final int NOEXPR = 48; // Regex for no + public static final int YESSTR = 49; // Yes string + public static final int NOSTR = 50; // No string + + // Character set + public static final int CODESET = 51; // Character encoding + + // Currency + public static final int CRNCYSTR = 52; // Currency symbol + + // Era (for locales with era-based dating) + public static final int ERA = 53; + public static final int ERA_D_FMT = 54; + public static final int ERA_D_T_FMT = 55; + public static final int ERA_T_FMT = 56; + + // Alternate digits + public static final int ALT_DIGITS = 57; + + public I18NLanginfo() { + super("I18N::Langinfo", false); + } + + public static void initialize() { + I18NLanginfo module = new I18NLanginfo(); + try { + // Main function + module.registerMethod("langinfo", null); + + // Day name constants + module.registerMethod("ABDAY_1", "const_ABDAY_1", ""); + module.registerMethod("ABDAY_2", "const_ABDAY_2", ""); + module.registerMethod("ABDAY_3", "const_ABDAY_3", ""); + module.registerMethod("ABDAY_4", "const_ABDAY_4", ""); + module.registerMethod("ABDAY_5", "const_ABDAY_5", ""); + module.registerMethod("ABDAY_6", "const_ABDAY_6", ""); + module.registerMethod("ABDAY_7", "const_ABDAY_7", ""); + module.registerMethod("DAY_1", "const_DAY_1", ""); + module.registerMethod("DAY_2", "const_DAY_2", ""); + module.registerMethod("DAY_3", "const_DAY_3", ""); + module.registerMethod("DAY_4", "const_DAY_4", ""); + module.registerMethod("DAY_5", "const_DAY_5", ""); + module.registerMethod("DAY_6", "const_DAY_6", ""); + module.registerMethod("DAY_7", "const_DAY_7", ""); + + // Month name constants + module.registerMethod("ABMON_1", "const_ABMON_1", ""); + module.registerMethod("ABMON_2", "const_ABMON_2", ""); + module.registerMethod("ABMON_3", "const_ABMON_3", ""); + module.registerMethod("ABMON_4", "const_ABMON_4", ""); + module.registerMethod("ABMON_5", "const_ABMON_5", ""); + module.registerMethod("ABMON_6", "const_ABMON_6", ""); + module.registerMethod("ABMON_7", "const_ABMON_7", ""); + module.registerMethod("ABMON_8", "const_ABMON_8", ""); + module.registerMethod("ABMON_9", "const_ABMON_9", ""); + module.registerMethod("ABMON_10", "const_ABMON_10", ""); + module.registerMethod("ABMON_11", "const_ABMON_11", ""); + module.registerMethod("ABMON_12", "const_ABMON_12", ""); + module.registerMethod("MON_1", "const_MON_1", ""); + module.registerMethod("MON_2", "const_MON_2", ""); + module.registerMethod("MON_3", "const_MON_3", ""); + module.registerMethod("MON_4", "const_MON_4", ""); + module.registerMethod("MON_5", "const_MON_5", ""); + module.registerMethod("MON_6", "const_MON_6", ""); + module.registerMethod("MON_7", "const_MON_7", ""); + module.registerMethod("MON_8", "const_MON_8", ""); + module.registerMethod("MON_9", "const_MON_9", ""); + module.registerMethod("MON_10", "const_MON_10", ""); + module.registerMethod("MON_11", "const_MON_11", ""); + module.registerMethod("MON_12", "const_MON_12", ""); + + // Time/Date format constants + module.registerMethod("D_T_FMT", "const_D_T_FMT", ""); + module.registerMethod("D_FMT", "const_D_FMT", ""); + module.registerMethod("T_FMT", "const_T_FMT", ""); + module.registerMethod("T_FMT_AMPM", "const_T_FMT_AMPM", ""); + module.registerMethod("AM_STR", "const_AM_STR", ""); + module.registerMethod("PM_STR", "const_PM_STR", ""); + + // Numeric constants + module.registerMethod("RADIXCHAR", "const_RADIXCHAR", ""); + module.registerMethod("THOUSEP", "const_THOUSEP", ""); + + // Yes/No constants + module.registerMethod("YESEXPR", "const_YESEXPR", ""); + module.registerMethod("NOEXPR", "const_NOEXPR", ""); + module.registerMethod("YESSTR", "const_YESSTR", ""); + module.registerMethod("NOSTR", "const_NOSTR", ""); + + // Other constants + module.registerMethod("CODESET", "const_CODESET", ""); + module.registerMethod("CRNCYSTR", "const_CRNCYSTR", ""); + module.registerMethod("ERA", "const_ERA", ""); + module.registerMethod("ERA_D_FMT", "const_ERA_D_FMT", ""); + module.registerMethod("ERA_D_T_FMT", "const_ERA_D_T_FMT", ""); + module.registerMethod("ERA_T_FMT", "const_ERA_T_FMT", ""); + module.registerMethod("ALT_DIGITS", "const_ALT_DIGITS", ""); + } catch (NoSuchMethodException e) { + System.err.println("Warning: Missing I18N::Langinfo method: " + e.getMessage()); + } + } + + /** + * langinfo(item) + * Returns locale information for the specified item. + */ + public static RuntimeList langinfo(RuntimeArray args, int ctx) { + int item; + if (args.isEmpty()) { + // Use $_ if no argument provided + item = org.perlonjava.runtime.runtimetypes.GlobalVariable + .getGlobalVariable("main::_").getInt(); + } else { + item = args.get(0).getInt(); + } + + Locale locale = Locale.getDefault(); + DateFormatSymbols dateSymbols = DateFormatSymbols.getInstance(locale); + DecimalFormatSymbols decimalSymbols = DecimalFormatSymbols.getInstance(locale); + + String result; + + // Abbreviated day names (Sunday = index 1 in Java's array, but we use 0-based) + if (item >= ABDAY_1 && item <= ABDAY_7) { + String[] shortWeekdays = dateSymbols.getShortWeekdays(); + // Java: index 1=Sunday, 2=Monday, etc. + result = shortWeekdays[item - ABDAY_1 + 1]; + } + // Full day names + else if (item >= DAY_1 && item <= DAY_7) { + String[] weekdays = dateSymbols.getWeekdays(); + result = weekdays[item - DAY_1 + 1]; + } + // Abbreviated month names + else if (item >= ABMON_1 && item <= ABMON_12) { + String[] shortMonths = dateSymbols.getShortMonths(); + result = shortMonths[item - ABMON_1]; + } + // Full month names + else if (item >= MON_1 && item <= MON_12) { + String[] months = dateSymbols.getMonths(); + result = months[item - MON_1]; + } + // Date/Time formats + else if (item == D_T_FMT) { + result = "%c"; // Standard strftime format + } + else if (item == D_FMT) { + result = "%x"; + } + else if (item == T_FMT) { + result = "%X"; + } + else if (item == T_FMT_AMPM) { + result = "%r"; + } + else if (item == AM_STR) { + String[] ampm = dateSymbols.getAmPmStrings(); + result = ampm.length > 0 ? ampm[0] : "AM"; + } + else if (item == PM_STR) { + String[] ampm = dateSymbols.getAmPmStrings(); + result = ampm.length > 1 ? ampm[1] : "PM"; + } + // Numeric + else if (item == RADIXCHAR) { + result = String.valueOf(decimalSymbols.getDecimalSeparator()); + } + else if (item == THOUSEP) { + result = String.valueOf(decimalSymbols.getGroupingSeparator()); + } + // Yes/No + else if (item == YESEXPR) { + result = "^[yY]"; + } + else if (item == NOEXPR) { + result = "^[nN]"; + } + else if (item == YESSTR) { + result = "yes"; + } + else if (item == NOSTR) { + result = "no"; + } + // Character set + else if (item == CODESET) { + result = Charset.defaultCharset().name(); + } + // Currency + else if (item == CRNCYSTR) { + String symbol = decimalSymbols.getCurrencySymbol(); + // Prefix with '-' to indicate it precedes the value + result = "-" + symbol; + } + // Era (not commonly used in Western locales) + else if (item == ERA || item == ERA_D_FMT || item == ERA_D_T_FMT || item == ERA_T_FMT) { + result = ""; + } + // Alternate digits + else if (item == ALT_DIGITS) { + result = ""; + } + else { + result = ""; + } + + return new RuntimeScalar(result).getList(); + } + + // Constant methods + public static RuntimeList const_ABDAY_1(RuntimeArray args, int ctx) { return new RuntimeScalar(ABDAY_1).getList(); } + public static RuntimeList const_ABDAY_2(RuntimeArray args, int ctx) { return new RuntimeScalar(ABDAY_2).getList(); } + public static RuntimeList const_ABDAY_3(RuntimeArray args, int ctx) { return new RuntimeScalar(ABDAY_3).getList(); } + public static RuntimeList const_ABDAY_4(RuntimeArray args, int ctx) { return new RuntimeScalar(ABDAY_4).getList(); } + public static RuntimeList const_ABDAY_5(RuntimeArray args, int ctx) { return new RuntimeScalar(ABDAY_5).getList(); } + public static RuntimeList const_ABDAY_6(RuntimeArray args, int ctx) { return new RuntimeScalar(ABDAY_6).getList(); } + public static RuntimeList const_ABDAY_7(RuntimeArray args, int ctx) { return new RuntimeScalar(ABDAY_7).getList(); } + + public static RuntimeList const_DAY_1(RuntimeArray args, int ctx) { return new RuntimeScalar(DAY_1).getList(); } + public static RuntimeList const_DAY_2(RuntimeArray args, int ctx) { return new RuntimeScalar(DAY_2).getList(); } + public static RuntimeList const_DAY_3(RuntimeArray args, int ctx) { return new RuntimeScalar(DAY_3).getList(); } + public static RuntimeList const_DAY_4(RuntimeArray args, int ctx) { return new RuntimeScalar(DAY_4).getList(); } + public static RuntimeList const_DAY_5(RuntimeArray args, int ctx) { return new RuntimeScalar(DAY_5).getList(); } + public static RuntimeList const_DAY_6(RuntimeArray args, int ctx) { return new RuntimeScalar(DAY_6).getList(); } + public static RuntimeList const_DAY_7(RuntimeArray args, int ctx) { return new RuntimeScalar(DAY_7).getList(); } + + public static RuntimeList const_ABMON_1(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_1).getList(); } + public static RuntimeList const_ABMON_2(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_2).getList(); } + public static RuntimeList const_ABMON_3(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_3).getList(); } + public static RuntimeList const_ABMON_4(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_4).getList(); } + public static RuntimeList const_ABMON_5(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_5).getList(); } + public static RuntimeList const_ABMON_6(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_6).getList(); } + public static RuntimeList const_ABMON_7(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_7).getList(); } + public static RuntimeList const_ABMON_8(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_8).getList(); } + public static RuntimeList const_ABMON_9(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_9).getList(); } + public static RuntimeList const_ABMON_10(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_10).getList(); } + public static RuntimeList const_ABMON_11(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_11).getList(); } + public static RuntimeList const_ABMON_12(RuntimeArray args, int ctx) { return new RuntimeScalar(ABMON_12).getList(); } + + public static RuntimeList const_MON_1(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_1).getList(); } + public static RuntimeList const_MON_2(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_2).getList(); } + public static RuntimeList const_MON_3(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_3).getList(); } + public static RuntimeList const_MON_4(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_4).getList(); } + public static RuntimeList const_MON_5(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_5).getList(); } + public static RuntimeList const_MON_6(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_6).getList(); } + public static RuntimeList const_MON_7(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_7).getList(); } + public static RuntimeList const_MON_8(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_8).getList(); } + public static RuntimeList const_MON_9(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_9).getList(); } + public static RuntimeList const_MON_10(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_10).getList(); } + public static RuntimeList const_MON_11(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_11).getList(); } + public static RuntimeList const_MON_12(RuntimeArray args, int ctx) { return new RuntimeScalar(MON_12).getList(); } + + public static RuntimeList const_D_T_FMT(RuntimeArray args, int ctx) { return new RuntimeScalar(D_T_FMT).getList(); } + public static RuntimeList const_D_FMT(RuntimeArray args, int ctx) { return new RuntimeScalar(D_FMT).getList(); } + public static RuntimeList const_T_FMT(RuntimeArray args, int ctx) { return new RuntimeScalar(T_FMT).getList(); } + public static RuntimeList const_T_FMT_AMPM(RuntimeArray args, int ctx) { return new RuntimeScalar(T_FMT_AMPM).getList(); } + public static RuntimeList const_AM_STR(RuntimeArray args, int ctx) { return new RuntimeScalar(AM_STR).getList(); } + public static RuntimeList const_PM_STR(RuntimeArray args, int ctx) { return new RuntimeScalar(PM_STR).getList(); } + + public static RuntimeList const_RADIXCHAR(RuntimeArray args, int ctx) { return new RuntimeScalar(RADIXCHAR).getList(); } + public static RuntimeList const_THOUSEP(RuntimeArray args, int ctx) { return new RuntimeScalar(THOUSEP).getList(); } + + public static RuntimeList const_YESEXPR(RuntimeArray args, int ctx) { return new RuntimeScalar(YESEXPR).getList(); } + public static RuntimeList const_NOEXPR(RuntimeArray args, int ctx) { return new RuntimeScalar(NOEXPR).getList(); } + public static RuntimeList const_YESSTR(RuntimeArray args, int ctx) { return new RuntimeScalar(YESSTR).getList(); } + public static RuntimeList const_NOSTR(RuntimeArray args, int ctx) { return new RuntimeScalar(NOSTR).getList(); } + + public static RuntimeList const_CODESET(RuntimeArray args, int ctx) { return new RuntimeScalar(CODESET).getList(); } + public static RuntimeList const_CRNCYSTR(RuntimeArray args, int ctx) { return new RuntimeScalar(CRNCYSTR).getList(); } + public static RuntimeList const_ERA(RuntimeArray args, int ctx) { return new RuntimeScalar(ERA).getList(); } + public static RuntimeList const_ERA_D_FMT(RuntimeArray args, int ctx) { return new RuntimeScalar(ERA_D_FMT).getList(); } + public static RuntimeList const_ERA_D_T_FMT(RuntimeArray args, int ctx) { return new RuntimeScalar(ERA_D_T_FMT).getList(); } + public static RuntimeList const_ERA_T_FMT(RuntimeArray args, int ctx) { return new RuntimeScalar(ERA_T_FMT).getList(); } + public static RuntimeList const_ALT_DIGITS(RuntimeArray args, int ctx) { return new RuntimeScalar(ALT_DIGITS).getList(); } +} diff --git a/src/main/perl/lib/I18N/Langinfo.pm b/src/main/perl/lib/I18N/Langinfo.pm new file mode 100644 index 000000000..d0a8b00c4 --- /dev/null +++ b/src/main/perl/lib/I18N/Langinfo.pm @@ -0,0 +1,521 @@ +package I18N::Langinfo; + +use 5.006; +use strict; +use warnings; +use Carp; + +use Exporter 'import'; +require XSLoader; + +our @EXPORT = qw(langinfo); + +our @EXPORT_OK = qw( + ABDAY_1 + ABDAY_2 + ABDAY_3 + ABDAY_4 + ABDAY_5 + ABDAY_6 + ABDAY_7 + ABMON_1 + ABMON_2 + ABMON_3 + ABMON_4 + ABMON_5 + ABMON_6 + ABMON_7 + ABMON_8 + ABMON_9 + ABMON_10 + ABMON_11 + ABMON_12 + ALT_DIGITS + AM_STR + CODESET + CRNCYSTR + DAY_1 + DAY_2 + DAY_3 + DAY_4 + DAY_5 + DAY_6 + DAY_7 + D_FMT + D_T_FMT + ERA + ERA_D_FMT + ERA_D_T_FMT + ERA_T_FMT + MON_1 + MON_2 + MON_3 + MON_4 + MON_5 + MON_6 + MON_7 + MON_8 + MON_9 + MON_10 + MON_11 + MON_12 + NOEXPR + NOSTR + PM_STR + RADIXCHAR + THOUSEP + T_FMT + T_FMT_AMPM + YESEXPR + YESSTR + _NL_ADDRESS_POSTAL_FMT + _NL_ADDRESS_COUNTRY_NAME + _NL_ADDRESS_COUNTRY_POST + _NL_ADDRESS_COUNTRY_AB2 + _NL_ADDRESS_COUNTRY_AB3 + _NL_ADDRESS_COUNTRY_CAR + _NL_ADDRESS_COUNTRY_NUM + _NL_ADDRESS_COUNTRY_ISBN + _NL_ADDRESS_LANG_NAME + _NL_ADDRESS_LANG_AB + _NL_ADDRESS_LANG_TERM + _NL_ADDRESS_LANG_LIB + _NL_IDENTIFICATION_TITLE + _NL_IDENTIFICATION_SOURCE + _NL_IDENTIFICATION_ADDRESS + _NL_IDENTIFICATION_CONTACT + _NL_IDENTIFICATION_EMAIL + _NL_IDENTIFICATION_TEL + _NL_IDENTIFICATION_FAX + _NL_IDENTIFICATION_LANGUAGE + _NL_IDENTIFICATION_TERRITORY + _NL_IDENTIFICATION_AUDIENCE + _NL_IDENTIFICATION_APPLICATION + _NL_IDENTIFICATION_ABBREVIATION + _NL_IDENTIFICATION_REVISION + _NL_IDENTIFICATION_DATE + _NL_IDENTIFICATION_CATEGORY + _NL_MEASUREMENT_MEASUREMENT + _NL_NAME_NAME_FMT + _NL_NAME_NAME_GEN + _NL_NAME_NAME_MR + _NL_NAME_NAME_MRS + _NL_NAME_NAME_MISS + _NL_NAME_NAME_MS + _NL_PAPER_HEIGHT + _NL_PAPER_WIDTH + _NL_TELEPHONE_TEL_INT_FMT + _NL_TELEPHONE_TEL_DOM_FMT + _NL_TELEPHONE_INT_SELECT + _NL_TELEPHONE_INT_PREFIX + ); + +our $VERSION = '0.24'; + +XSLoader::load(); + +1; +__END__ + +=encoding utf8 + +=head1 NAME + +I18N::Langinfo - query locale information + +=head1 SYNOPSIS + + use I18N::Langinfo; + +=head1 DESCRIPTION + +The langinfo() function queries various locale information that can be +used to localize output and user interfaces. It uses the current underlying +locale, regardless of whether or not it was called from within the scope of +S>. The langinfo() function requires +one numeric argument that identifies the locale constant to query: +if no argument is supplied, C<$_> is used. The numeric constants +appropriate to be used as arguments are exportable from I18N::Langinfo. + +The following example will import the langinfo() function itself and +three constants to be used as arguments to langinfo(): a constant for +the abbreviated first day of the week (the numbering starts from +Sunday = 1) and two more constants for the affirmative and negative +answers for a yes/no question in the current locale. + + use I18N::Langinfo qw(langinfo ABDAY_1 YESSTR NOSTR); + + my ($abday_1, $yesstr, $nostr) = + map { langinfo($_) } (ABDAY_1, YESSTR, NOSTR); + + print "$abday_1? [$yesstr/$nostr] "; + +In other words, in the "C" (or English) locale the above will probably +print something like: + + Sun? [yes/no] + +but under a French locale + + dim? [oui/non] + +The usually available constants are as follows. + +=over 4 + +=item * + +For abbreviated and full length days of the week and months of the year: + + ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5 ABDAY_6 ABDAY_7 + ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6 + ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12 + DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7 + MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 + MON_7 MON_8 MON_9 MON_10 MON_11 MON_12 + +=item * + +For the date-time, date, and time formats used by the strftime() function +(see L): + + D_T_FMT D_FMT T_FMT + +=item * + +For the locales for which it makes sense to have ante meridiem and post +meridiem time formats: + + AM_STR PM_STR T_FMT_AMPM + +=item * + +For the character code set being used (such as "ISO8859-1", "cp850", +"koi8-r", "sjis", "utf8", etc.): + + CODESET + +=item * + +For the symbol or string of characters that indicates a number is a monetary +value: + + CRNCYSTR + +An example is the dollar sign C<$>. Some locales not associated with +particular locations may have an empty currency string. (The C locale is +one.) Otherwise, the return of this is always prefixed by one of these three +characters: + +=over + +=item C<-> + +indicates that in this locale, the string precedes the numeric value, as in a +U.S. locale: C<$9.95>. + +=item C<+> + +indicates that in this locale, the string follows the numeric value, like +C<9.95USD>. + +=item C<.> + +indicates that in this locale, the string replaces the radix character, like +C<9$95>. + +=back + +=item * + +For the radix character used between the integer and the fractional part of +decimal numbers, and the group separator string for large-ish floating point +numbers (yes, these are redundant with +L): + + RADIXCHAR THOUSEP + +=item * + +For any alternate digits used in this locale besides the standard C<0..9>: + + ALT_DIGITS + +This returns a sequence of alternate numeric reprsesentations for the numbers +C<0> ... up to C<99>. The representations are returned in a single string, +with a semi-colon C<;> used to separated the individual ones. + +Most locales don't have alternate digits, so the string will be empty. + +To access this data conveniently, you could do something like + + use I18N::Langinfo qw(langinfo ALT_DIGITS); + my @alt_digits = split ';', langinfo(ALT_DIGITS); + +The array C<@alt_digits> will contain 0 elements if the current locale doesn't +have alternate digits specified for it. Otherwise, it will have as many +elements as the locale defines, with C<[0]> containing the alternate digit for +zero; C<[1]> for one; and so forth, up to potentially C<[99]> for the +alternate representation of ninety-nine. + +Be aware that the alternate representation in some locales for the numbers +0..9 will have a leading alternate-zero, so would look like the equivalent of +00..09. + +Running this program + + use I18N::Langinfo qw(langinfo ALT_DIGITS); + my @alt_digits = split ';', langinfo(ALT_DIGITS); + splice @alt_digits, 15; + print join " ", @alt_digits, "\n"; + +on a Japanese locale yields + +S> + +on some platforms. + +=item * + +For the affirmative and negative responses and expressions: + + YESSTR YESEXPR NOSTR NOEXPR + +=item * + +For the eras based on typically some ruler, such as the Japanese Emperor +(naturally only defined in the appropriate locales): + + ERA ERA_D_FMT ERA_D_T_FMT ERA_T_FMT + +=back + +In addition, Linux boxes have extra items, as follows. (When called from +other platform types, these return a stub value, of not much use.) + +=over + +=item C<_NL_ADDRESS_POSTAL_FMT> + +=item C<_NL_ADDRESS_COUNTRY_NAME> + +=item C<_NL_ADDRESS_COUNTRY_POST> + +=item C<_NL_ADDRESS_COUNTRY_AB2> + +=item C<_NL_ADDRESS_COUNTRY_AB3> + +=item C<_NL_ADDRESS_COUNTRY_CAR> + +=item C<_NL_ADDRESS_COUNTRY_NUM> + +=item C<_NL_ADDRESS_COUNTRY_ISBN> + +=item C<_NL_ADDRESS_LANG_NAME> + +=item C<_NL_ADDRESS_LANG_AB> + +=item C<_NL_ADDRESS_LANG_TERM> + +=item C<_NL_ADDRESS_LANG_LIB> + +On Linux boxes, these return information about the country for the current +locale. Further information is found in F + +=item C<_NL_IDENTIFICATION_TITLE> + +=item C<_NL_IDENTIFICATION_SOURCE> + +=item C<_NL_IDENTIFICATION_ADDRESS> + +=item C<_NL_IDENTIFICATION_CONTACT> + +=item C<_NL_IDENTIFICATION_EMAIL> + +=item C<_NL_IDENTIFICATION_TEL> + +=item C<_NL_IDENTIFICATION_FAX> + +=item C<_NL_IDENTIFICATION_LANGUAGE> + +=item C<_NL_IDENTIFICATION_TERRITORY> + +=item C<_NL_IDENTIFICATION_AUDIENCE> + +=item C<_NL_IDENTIFICATION_APPLICATION> + +=item C<_NL_IDENTIFICATION_ABBREVIATION> + +=item C<_NL_IDENTIFICATION_REVISION> + +=item C<_NL_IDENTIFICATION_DATE> + +=item C<_NL_IDENTIFICATION_CATEGORY> + +On Linux boxes, these return meta information about the current locale, +such as how to get in touch with its maintainers. +Further information is found in F + +=item C<_NL_MEASUREMENT_MEASUREMENT> + +On Linux boxes, it returns 1 if the metric system of measurement prevails in +the locale; or 2 if US customary units prevail. + +=item C<_NL_NAME_NAME_FMT> + +=item C<_NL_NAME_NAME_GEN> + +=item C<_NL_NAME_NAME_MR> + +=item C<_NL_NAME_NAME_MRS> + +=item C<_NL_NAME_NAME_MISS> + +=item C<_NL_NAME_NAME_MS> + +On Linux boxes, these return information about how names are formatted and +the personal salutations used in the current locale. Further information +is found in L and F + +=item C<_NL_PAPER_HEIGHT> + +=item C<_NL_PAPER_WIDTH> + +On Linux boxes, these return the standard size of sheets of paper (in +millimeters) in the current locale. + +=item C<_NL_TELEPHONE_TEL_INT_FMT> + +=item C<_NL_TELEPHONE_TEL_DOM_FMT> + +=item C<_NL_TELEPHONE_INT_SELECT> + +=item C<_NL_TELEPHONE_INT_PREFIX> + +On Linux boxes, these return information about how telephone numbers are +formatted (both domestically and international calling) in the current locale. +Further information is found in F + +=back + +=head2 For systems without C + +This module originally was just a wrapper for the libc C +function, and did not work on systems lacking it, such as Windows. + +Starting in Perl 5.28, this module works on all platforms. When +C is not available, it uses various methods to construct +what that function, if present, would return. But there are potential +glitches. These are the items that could be different: + +=over + +=item C + +Unimplemented, so returns C<"">. + +=item C + +This should work properly for Windows platforms. On almost all other modern +platforms, it will reliably return "UTF-8" if that is the code set. +Otherwise, it depends on the locale's name. If that is of the form +C, it will assume C is the code set; and it also knows about the +two locales "C" and "POSIX". If none of those apply it returns C<"">. + +=item C + +=item C + +=item C + +=item C + +Only the values for English are returned. C and C have been +removed from POSIX 2008, and are retained here for backwards compatibility. +Your platform's C may not support them. + +=item C + +On systems with a C> that recognizes the POSIX-defined C<%O> +format modifier (not Windows), perl tries hard to return these. The result +likely will go as high as what C would return, but not +necessarily; and the numbers from C<0..9> will always be stripped of leading +zeros. + +Without C<%O>, an empty string is always returned. + +=item C + +Always evaluates to C<%x>, the locale's appropriate date representation. + +=item C + +Always evaluates to C<%X>, the locale's appropriate time representation. + +=item C + +Always evaluates to C<%c>, the locale's appropriate date and time +representation. + +=item C + +The return may be incorrect for those rare locales where the currency symbol +replaces the radix character. If you have examples of it needing to work +differently, please file a report at L. + +=item C + +=item C + +=item C + +=item C + +These are derived by using C, and not all versions of that function +know about them. C<""> is returned for these on such systems. + +=item All C<_NL_I> items + +These return the same values as they do on boxes that don't have the +appropriate underlying locale categories. + +=back + +See your L for more information about the available +constants. (Often this means having to look directly at the +F C header file.) + +=head2 EXPORT + +By default only the C function is exported. + +=head1 BUGS + +Before Perl 5.28, the returned values are unreliable for the C and +C locale constants. + +Starting in 5.28, changing locales on threaded builds is supported on systems +that offer thread-safe locale functions. These include POSIX 2008 systems and +Windows starting with Visual Studio 2005, and this module will work properly +in such situations. However, on threaded builds on Windows prior to Visual +Studio 2015, retrieving the items C and C can result in a +race with a thread that has converted to use the global locale. It is quite +uncommon for a thread to have done this. It would be possible to construct a +workaround for this; patches welcome: see L. + +=head1 SEE ALSO + +L, L, L, L. + +=head1 AUTHOR + +Jarkko Hietaniemi, Ejhi@hut.fiE. Now maintained by Perl 5 porters. + +=head1 COPYRIGHT AND LICENSE + +Copyright 2001 by Jarkko Hietaniemi + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut diff --git a/src/main/perl/lib/constant.pm b/src/main/perl/lib/constant.pm index dd86be2b2..e4b8fd2d0 100644 --- a/src/main/perl/lib/constant.pm +++ b/src/main/perl/lib/constant.pm @@ -1,32 +1,450 @@ package constant; - +use 5.008; use strict; +use warnings::register; + +our $VERSION = '1.33'; +our %declared; + +#======================================================================= + +# Some names are evil choices. +my %keywords = map +($_, 1), qw{ BEGIN INIT CHECK END DESTROY AUTOLOAD }; +$keywords{UNITCHECK}++ if $] > 5.009; + +my %forced_into_main = map +($_, 1), + qw{ STDIN STDOUT STDERR ARGV ARGVOUT ENV INC SIG }; + +my %forbidden = (%keywords, %forced_into_main); +my $normal_constant_name = qr/^_?[^\W_0-9]\w*\z/; +my $tolerable = qr/^[A-Za-z_]\w*\z/; +my $boolean = qr/^[01]?\z/; + +BEGIN { + # We'd like to do use constant _CAN_PCS => $] > 5.009002 + # but that's a bit tricky before we load the constant module :-) + # By doing this, we save several run time checks for *every* call + # to import. + my $const = $] > 5.009002; + my $downgrade = $] < 5.015004; # && $] >= 5.008 + my $constarray = exists &_make_const; + if ($const) { + Internals::SvREADONLY($const, 1); + Internals::SvREADONLY($downgrade, 1); + $constant::{_CAN_PCS} = \$const; + $constant::{_DOWNGRADE} = \$downgrade; + $constant::{_CAN_PCS_FOR_ARRAY} = \$constarray; + } + else { + no strict 'refs'; + *{"_CAN_PCS"} = sub () {$const}; + *{"_DOWNGRADE"} = sub () { $downgrade }; + *{"_CAN_PCS_FOR_ARRAY"} = sub () { $constarray }; + } +} + +#======================================================================= +# import() - import symbols into user's namespace +# +# What we actually do is define a function in the caller's namespace +# which returns the value. The function we create will normally +# be inlined as a constant, thereby avoiding further sub calling +# overhead. +#======================================================================= sub import { my $class = shift; + return unless @_; # Ignore 'use constant;' + my $constants; + my $multiple = ref $_[0]; my $caller = caller; + my $flush_mro; + my $symtab; + + if (_CAN_PCS) { + no strict 'refs'; + $symtab = \%{$caller . '::'}; + }; - if (@_ == 1 && ref $_[0] eq 'HASH') { - my $constants = shift; - while (my ($name, $value) = each %$constants) { - _define_constant($caller, $name, $value); - } + if ( $multiple ) { + if (ref $_[0] ne 'HASH') { + require Carp; + Carp::croak("Invalid reference type '".ref(shift)."' not 'HASH'"); + } + $constants = shift; } else { - while (@_) { - my $name = shift; - my $value = shift; - _define_constant($caller, $name, $value); - } + unless (defined $_[0]) { + require Carp; + Carp::croak("Can't use undef as constant name"); + } + $constants->{+shift} = undef; } -} -sub _define_constant { - my ($package, $name, $value) = @_; - no strict 'refs'; - # Store directly in stash as a reference - this creates a proper constant - # that RuntimeStashEntry recognizes and sets constantValue on the RuntimeCode - ${"${package}::"}{$name} = \$value; + foreach my $name ( keys %$constants ) { + my $pkg; + my $symtab = $symtab; + my $orig_name = $name; + if ($name =~ s/(.*)(?:::|')(?=.)//s) { + $pkg = $1; + if (_CAN_PCS && $pkg ne $caller) { + no strict 'refs'; + $symtab = \%{$pkg . '::'}; + } + } + else { + $pkg = $caller; + } + + # Normal constant name + if ($name =~ $normal_constant_name and !$forbidden{$name}) { + # Everything is okay + + # Name forced into main, but we're not in main. Fatal. + } elsif ($forced_into_main{$name} and $pkg ne 'main') { + require Carp; + Carp::croak("Constant name '$name' is forced into main::"); + + # Starts with double underscore. Fatal. + } elsif ($name =~ /^__/) { + require Carp; + Carp::croak("Constant name '$name' begins with '__'"); + + # Maybe the name is tolerable + } elsif ($name =~ $tolerable) { + # Then we'll warn only if you've asked for warnings + if (warnings::enabled()) { + if ($keywords{$name}) { + warnings::warn("Constant name '$name' is a Perl keyword"); + } elsif ($forced_into_main{$name}) { + warnings::warn("Constant name '$name' is " . + "forced into package main::"); + } + } + + # Looks like a boolean + # use constant FRED == fred; + } elsif ($name =~ $boolean) { + require Carp; + if (@_) { + Carp::croak("Constant name '$name' is invalid"); + } else { + Carp::croak("Constant name looks like boolean value"); + } + + } else { + # Must have bad characters + require Carp; + Carp::croak("Constant name '$name' has invalid characters"); + } + + { + no strict 'refs'; + my $full_name = "${pkg}::$name"; + $declared{$full_name}++; + if ($multiple || @_ == 1) { + my $scalar = $multiple ? $constants->{$orig_name} : $_[0]; + + if (_DOWNGRADE) { # for 5.8 to 5.14 + # Work around perl bug #31991: Sub names (actually glob + # names in general) ignore the UTF8 flag. So we have to + # turn it off to get the "right" symbol table entry. + utf8::is_utf8 $name and utf8::encode $name; + } + + # The constant serves to optimise this entire block out on + # 5.8 and earlier. + if (_CAN_PCS) { + # Use a reference as a proxy for a constant subroutine. + # If this is not a glob yet, it saves space. If it is + # a glob, we must still create it this way to get the + # right internal flags set, as constants are distinct + # from subroutines created with sub(){...}. + # The check in Perl_ck_rvconst knows that inlinable + # constants from cv_const_sv are read only. So we have to: + Internals::SvREADONLY($scalar, 1); + if (!exists $symtab->{$name}) { + $symtab->{$name} = \$scalar; + ++$flush_mro->{$pkg}; + } + else { + local $constant::{_dummy} = \$scalar; + *$full_name = \&{"_dummy"}; + } + } else { + *$full_name = sub () { $scalar }; + } + } elsif (@_) { + my @list = @_; + if (_CAN_PCS_FOR_ARRAY) { + _make_const($list[$_]) for 0..$#list; + _make_const(@list); + if (!exists $symtab->{$name}) { + $symtab->{$name} = \@list; + $flush_mro->{$pkg}++; + } + else { + local $constant::{_dummy} = \@list; + *$full_name = \&{"_dummy"}; + } + } + else { *$full_name = sub () { @list }; } + } else { + *$full_name = sub () { }; + } + } + } + # Flush the cache exactly once if we make any direct symbol table changes. + if (_CAN_PCS && $flush_mro) { + mro::method_changed_in($_) for keys %$flush_mro; + } } 1; +__END__ + +=head1 NAME + +constant - Perl pragma to declare constants + +=head1 SYNOPSIS + + use constant PI => 4 * atan2(1, 1); + use constant DEBUG => 0; + + print "Pi equals ", PI, "...\n" if DEBUG; + + use constant { + SEC => 0, + MIN => 1, + HOUR => 2, + MDAY => 3, + MON => 4, + YEAR => 5, + WDAY => 6, + YDAY => 7, + ISDST => 8, + }; + + use constant WEEKDAYS => qw( + Sunday Monday Tuesday Wednesday Thursday Friday Saturday + ); + + print "Today is ", (WEEKDAYS)[ (localtime)[WDAY] ], ".\n"; + +=head1 DESCRIPTION + +This pragma allows you to declare constants at compile-time. + +When you declare a constant such as C using the method shown +above, each machine your script runs upon can have as many digits +of accuracy as it can use. Also, your program will be easier to +read, more likely to be maintained (and maintained correctly), and +far less likely to send a space probe to the wrong planet because +nobody noticed the one equation in which you wrote C<3.14195>. + +When a constant is used in an expression, Perl replaces it with its +value at compile time, and may then optimize the expression further. +In particular, any code in an C block will be optimized +away if the constant is false. + +=head1 NOTES + +As with all C directives, defining a constant happens at +compile time. Thus, it's probably not correct to put a constant +declaration inside of a conditional statement (like C). + +Constants defined using this module cannot be interpolated into +strings like variables. However, concatenation works just fine: + + print "Pi equals PI...\n"; # WRONG: does not expand "PI" + print "Pi equals ".PI."...\n"; # right + +Even though a reference may be declared as a constant, the reference may +point to data which may be changed, as this code shows. + + use constant ARRAY => [ 1,2,3,4 ]; + print ARRAY->[1]; + ARRAY->[1] = " be changed"; + print ARRAY->[1]; + +Constants belong to the package they are defined in. To refer to a +constant defined in another package, specify the full package name, as +in C. Constants may be exported by modules, +and may also be called as either class or instance methods, that is, +as C<< Some::Package->CONSTANT >> or as C<< $obj->CONSTANT >> where +C<$obj> is an instance of C. Subclasses may define +their own constants to override those in their base class. + +As of version 1.32 of this module, constants can be defined in packages +other than the caller, by including the package name in the name of the +constant: + + use constant "OtherPackage::FWIBBLE" => 7865; + constant->import("Other::FWOBBLE",$value); # dynamically at run time + +The use of all caps for constant names is merely a convention, +although it is recommended in order to make constants stand out +and to help avoid collisions with other barewords, keywords, and +subroutine names. Constant names must begin with a letter or +underscore. Names beginning with a double underscore are reserved. Some +poor choices for names will generate warnings, if warnings are enabled at +compile time. + +=head2 List constants + +Constants may be lists of more (or less) than one value. A constant +with no values evaluates to C in scalar context. Note that +constants with more than one value do I return their last value in +scalar context as one might expect. They currently return the number +of values, but B. Do not use constants +with multiple values in scalar context. + +B This implies that the expression defining the value of a +constant is evaluated in list context. This may produce surprises: + + use constant TIMESTAMP => localtime; # WRONG! + use constant TIMESTAMP => scalar localtime; # right + +The first line above defines C as a 9-element list, as +returned by C in list context. To set it to the string +returned by C in scalar context, an explicit C +keyword is required. + +List constants are lists, not arrays. To index or slice them, they +must be placed in parentheses. + + my @workdays = WEEKDAYS[1 .. 5]; # WRONG! + my @workdays = (WEEKDAYS)[1 .. 5]; # right + +=head2 Defining multiple constants at once + +Instead of writing multiple C statements, you may define +multiple constants in a single statement by giving, instead of the +constant name, a reference to a hash where the keys are the names of +the constants to be defined. Obviously, all constants defined using +this method must have a single value. + + use constant { + FOO => "A single value", + BAR => "This", "won't", "work!", # Error! + }; + +This is a fundamental limitation of the way hashes are constructed in +Perl. The error messages produced when this happens will often be +quite cryptic -- in the worst case there may be none at all, and +you'll only later find that something is broken. + +When defining multiple constants, you cannot use the values of other +constants defined in the same declaration. This is because the +calling package doesn't know about any constant within that group +until I the C statement is finished. + + use constant { + BITMASK => 0xAFBAEBA8, + NEGMASK => ~BITMASK, # Error! + }; + +=head2 Magic constants + +Magical values and references can be made into constants at compile +time, allowing for way cool stuff like this. (These error numbers +aren't totally portable, alas.) + + use constant E2BIG => ($! = 7); + print E2BIG, "\n"; # something like "Arg list too long" + print 0+E2BIG, "\n"; # "7" + +You can't produce a tied constant by giving a tied scalar as the +value. References to tied variables, however, can be used as +constants without any problems. + +=head1 TECHNICAL NOTES + +In the current implementation, scalar constants are actually +inlinable subroutines. As of version 5.004 of Perl, the appropriate +scalar constant is inserted directly in place of some subroutine +calls, thereby saving the overhead of a subroutine call. See +L for details about how and when this +happens. + +In the rare case in which you need to discover at run time whether a +particular constant has been declared via this module, you may use +this function to examine the hash C<%constant::declared>. If the given +constant name does not include a package name, the current package is +used. + + sub declared ($) { + use constant 1.01; # don't omit this! + my $name = shift; + $name =~ s/^::/main::/; + my $pkg = caller; + my $full_name = $name =~ /::/ ? $name : "${pkg}::$name"; + $constant::declared{$full_name}; + } + +=head1 CAVEATS + +List constants are not inlined unless you are using Perl v5.20 or higher. +In v5.20 or higher, they are still not read-only, but that may change in +future versions. + +It is not possible to have a subroutine or a keyword with the same +name as a constant in the same package. This is probably a Good Thing. + +A constant with a name in the list C is not allowed anywhere but in package C, for +technical reasons. + +Unlike constants in some languages, these cannot be overridden +on the command line or via environment variables. + +You can get into trouble if you use constants in a context which +automatically quotes barewords (as is true for any subroutine call). +For example, you can't say C<$hash{CONSTANT}> because C will +be interpreted as a string. Use C<$hash{CONSTANT()}> or +C<$hash{+CONSTANT}> to prevent the bareword quoting mechanism from +kicking in. Similarly, since the C<< => >> operator quotes a bareword +immediately to its left, you have to say C<< CONSTANT() => 'value' >> +(or simply use a comma in place of the big arrow) instead of +C<< CONSTANT => 'value' >>. + +=head1 SEE ALSO + +L - Facility for creating read-only scalars, arrays, hashes. + +L - Make read-only variables via attribute + +L - Perl extension to the C scalar flag + +L - A selection of general-utility hash subroutines (mostly +to lock/unlock keys and values) + +=head1 BUGS + +Please report any bugs or feature requests via the perlbug(1) utility. + +=head1 AUTHORS + +Tom Phoenix, EFE, with help from +many other folks. + +Multiple constant declarations at once added by Casey West, +EFE. + +Documentation mostly rewritten by Ilmari Karonen, +EFE. + +This program is maintained by the Perl 5 Porters. +The CPAN distribution is maintained by SEbastien Aperghis-Tramoni +EFE. + +=head1 COPYRIGHT & LICENSE + +Copyright (C) 1997, 1999 Tom Phoenix + +This module is free software; you can redistribute it or modify it +under the same terms as Perl itself. + +=cut From d812adc5aee55b7f1df798ef54dbaa4dc8ca2c5d Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 25 Mar 2026 09:23:14 +0100 Subject: [PATCH 3/7] Fix version->parse() to handle undef values Perl's version->parse(undef) and version->parse("undef") both return version 0. PerlOnJava was throwing "Invalid version format (non-numeric data)" instead. This fix is needed for CPAN module testing where ExtUtils::MakeMaker's MM->parse_version() returns the literal string "undef" for modules without version numbers (e.g., Dist::CheckConflicts, warnings::register). Changes: - Handle RuntimeScalarType.UNDEF by treating as version "0" - Handle literal string "undef" by treating as version "0" - Restructure validation to skip when handling undef cases Fixes DateTime test suite t/00-report-prereqs.t failure. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../runtime/perlmodule/Version.java | 82 +++++++++++-------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/Version.java b/src/main/java/org/perlonjava/runtime/perlmodule/Version.java index 8ea6142c0..bc1d5de98 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/Version.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/Version.java @@ -7,6 +7,7 @@ import static org.perlonjava.runtime.runtimetypes.GlobalVariable.getGlobalVariable; import static org.perlonjava.runtime.runtimetypes.RuntimeScalarCache.*; import static org.perlonjava.runtime.runtimetypes.RuntimeScalarType.DOUBLE; +import static org.perlonjava.runtime.runtimetypes.RuntimeScalarType.UNDEF; import static org.perlonjava.runtime.runtimetypes.RuntimeScalarType.VSTRING; // TODO - create test cases @@ -87,8 +88,13 @@ private static RuntimeList parseInternal(RuntimeArray args, int ctx, boolean for // Track whether the original input was a v-string boolean isVString = false; + // Handle undef - treat as version 0 (Perl behavior) + if (versionStr.type == UNDEF) { + version = "0"; + originalVersionStr = new RuntimeScalar("0"); + } // Handle VSTRING type (bare v-strings like v1.2.3) - if (versionStr.type == VSTRING) { + else if (versionStr.type == VSTRING) { isVString = true; // Convert VSTRING to dotted format String vstringValue = versionStr.value.toString(); @@ -102,47 +108,53 @@ private static RuntimeList parseInternal(RuntimeArray args, int ctx, boolean for } else { version = versionStr.toString().trim(); - if (version.isEmpty()) { - throw new PerlCompilerException("Invalid version format (version required)"); - } + // Handle literal string "undef" - treat as version 0 (Perl behavior) + if (version.equals("undef")) { + version = "0"; + originalVersionStr = new RuntimeScalar("0"); + } else { + if (version.isEmpty()) { + throw new PerlCompilerException("Invalid version format (version required)"); + } - // Check if original starts with 'v' - isVString = version.startsWith("v"); + // Check if original starts with 'v' + isVString = version.startsWith("v"); - // Validate version format - check for multiple underscores - int underscoreCount = 0; - for (char c : version.toCharArray()) { - if (c == '_') underscoreCount++; - } - if (underscoreCount > 1) { - throw new PerlCompilerException("Invalid version format (multiple underscores)"); - } + // Validate version format - check for multiple underscores + int underscoreCount = 0; + for (char c : version.toCharArray()) { + if (c == '_') underscoreCount++; + } + if (underscoreCount > 1) { + throw new PerlCompilerException("Invalid version format (multiple underscores)"); + } - // Validate version format - must contain at least one digit - // and be a valid version pattern (digits, dots, underscores, optional v prefix) - String checkVersion = isVString ? version.substring(1) : version; - checkVersion = checkVersion.replace("_", ""); + // Validate version format - must contain at least one digit + // and be a valid version pattern (digits, dots, underscores, optional v prefix) + String checkVersion = isVString ? version.substring(1) : version; + checkVersion = checkVersion.replace("_", ""); - // Version must start with a digit and only contain digits and dots - // (after removing v prefix and underscores) - if (!checkVersion.matches("\\d+(\\.\\d+)*")) { - throw new PerlCompilerException("Invalid version format (non-numeric data)"); - } + // Version must start with a digit and only contain digits and dots + // (after removing v prefix and underscores) + if (!checkVersion.matches("\\d+(\\.\\d+)*")) { + throw new PerlCompilerException("Invalid version format (non-numeric data)"); + } - if (versionStr.type == DOUBLE) { - // Format with enough precision but strip trailing zeros - version = String.format("%.6f", versionStr.getDouble()); - // Remove trailing zeros after decimal point - if (version.contains(".")) { - version = version.replaceAll("0+$", ""); - // Remove trailing dot if all decimals were zeros (e.g., "1." -> "1") - if (version.endsWith(".")) { - version = version.substring(0, version.length() - 1); + if (versionStr.type == DOUBLE) { + // Format with enough precision but strip trailing zeros + version = String.format("%.6f", versionStr.getDouble()); + // Remove trailing zeros after decimal point + if (version.contains(".")) { + version = version.replaceAll("0+$", ""); + // Remove trailing dot if all decimals were zeros (e.g., "1." -> "1") + if (version.endsWith(".")) { + version = version.substring(0, version.length() - 1); + } } + originalVersionStr = new RuntimeScalar(version); + } else { + originalVersionStr = versionStr; } - originalVersionStr = new RuntimeScalar(version); - } else { - originalVersionStr = versionStr; } } From abba6b9b901fe5d978d3e1546d9949eda72fb68d Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 25 Mar 2026 10:38:38 +0100 Subject: [PATCH 4/7] Fix $^H propagation between BEGIN blocks and Unicode regex matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - PerlLanguageProvider: Propagate $^H (strictOptions) bi-directionally between caller scope and BEGIN block execution scope. This ensures that: 1. BEGIN blocks inherit $^H from the enclosing lexical scope 2. Changes to $^H in BEGIN blocks persist for subsequent code in the same scope - RuntimeRegex: Use Unicode pattern (UNICODE_CHARACTER_CLASS) when the string has the UTF-8 flag set, regardless of whether characters are > 255. This fixes \\w matching Latin-1 characters like è (U+00E8) in UTF-8 strings. - Warnings: Allow warnings::enabled() to be called without arguments (checks if warnings are enabled for the calling package). Required by constant.pm. Test results vs master: - comp/hints.t: 23/31 (was 21/31) - improved by 2 tests - uni/gv.t: 158/206 (same as master) - uni/stash.t: 33/49 (same as master) - op/blocks.t: 9/26 (same as master) Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../scriptengine/PerlLanguageProvider.java | 10 +++++++++- .../org/perlonjava/core/Configuration.java | 2 +- .../runtime/perlmodule/Warnings.java | 11 ++++++++-- .../runtime/regex/RuntimeRegex.java | 20 +++++++++---------- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java b/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java index 65d16a6fc..56d3b5177 100644 --- a/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java +++ b/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java @@ -241,6 +241,12 @@ public static RuntimeList executePerlAST(Node ast, globalSymbolTable.addVariable("@_", "our", null); globalSymbolTable.addVariable("wantarray", "", null); + // Inherit $^H (strictOptions) from the caller's scope so BEGIN blocks + // can see and modify the enclosing scope's compile-time hints + if (savedCurrentScope != null) { + globalSymbolTable.setStrictOptions(savedCurrentScope.getStrictOptions()); + } + EmitterContext ctx = new EmitterContext( new JavaClassInfo(), globalSymbolTable.snapShot(), @@ -274,8 +280,10 @@ public static RuntimeList executePerlAST(Node ast, return executeCode(runtimeCode, ctx, false, contextType); } finally { - // Restore the caller's scope + // Propagate $^H changes back to the caller's scope so subsequent + // code in the same lexical block sees the updated hints if (savedCurrentScope != null) { + savedCurrentScope.setStrictOptions(ctx.symbolTable.getStrictOptions()); SpecialBlockParser.setCurrentScope(savedCurrentScope); } } diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 8c4cfa0fd..a34539bb2 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "0b0352af1"; + public static final String gitCommitId = "d812adc5a"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/Warnings.java b/src/main/java/org/perlonjava/runtime/perlmodule/Warnings.java index ddf7c51ad..cbc97c59b 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/Warnings.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/Warnings.java @@ -140,10 +140,17 @@ public static boolean warningExists(String category) { * @return A RuntimeList containing a boolean value. */ public static RuntimeList enabled(RuntimeArray args, int ctx) { - if (args.size() < 1 || args.size() > 2) { + if (args.size() > 2) { throw new IllegalStateException("Bad number of arguments for warnings::enabled()"); } - String category = args.get(0).toString(); + String category; + if (args.size() < 1) { + // No category specified - check if warnings are enabled for calling package + // Use "all" as the category to check general warning state + category = "all"; + } else { + category = args.get(0).toString(); + } boolean isEnabled = warningManager.isWarningEnabled(category); return new RuntimeScalar(isEnabled).getList(); } diff --git a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java index 28b8d8891..f42757051 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java +++ b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java @@ -518,11 +518,10 @@ private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeSc Pattern pattern = regex.pattern; String inputStr = string.toString(); - // Select appropriate pattern based on string's UTF-8 flag and content: + // Select appropriate pattern based on string's UTF-8 flag: // - /a flag or inline (?a): always use ASCII-only pattern // - BYTE_STRING: use ASCII-only pattern (Perl's "bytes" semantics) - // - UTF-8 string with Unicode chars (> 255): use Unicode pattern - // - UTF-8 string with only Latin-1 chars: use ASCII pattern (avoids false matches) + // - UTF-8 string: use Unicode pattern (Perl's Unicode semantics for \w, \d, \s) // This mimics Perl's behavior where \w, \d, \s semantics depend on UTF-8 flag if (regex.patternUnicode != null && regex.patternUnicode != regex.pattern) { if (regex.regexFlags != null && regex.regexFlags.isAscii()) { @@ -531,11 +530,12 @@ private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeSc } else if (hasInlineAsciiModifier(regex.patternString)) { // Inline (?a...) in pattern - use ASCII to be safe pattern = regex.pattern; - } else if (Utf8.isUtf8(string) && RuntimePosLvalue.hasUnicodeChars(string, inputStr)) { - // UTF-8 string with true Unicode content (> 255) - use Unicode matching + } else if (Utf8.isUtf8(string)) { + // UTF-8 string - use Unicode matching for \w, \d, \s + // This ensures Latin-1 characters like è (U+00E8) are matched by \w pattern = regex.patternUnicode; } - // else: BYTE_STRING or Latin-1 only content - keep ASCII pattern (default) + // else: BYTE_STRING - keep ASCII pattern (default) } CharSequence matchInput = new RegexTimeoutCharSequence(inputStr); @@ -857,7 +857,7 @@ public static RuntimeBase replaceRegex(RuntimeScalar quotedRegex, RuntimeScalar Pattern pattern = regex.pattern; - // Select appropriate pattern based on string's UTF-8 flag and content (same logic as matchRegex) + // Select appropriate pattern based on string's UTF-8 flag (same logic as matchRegex) if (regex.patternUnicode != null && regex.patternUnicode != regex.pattern) { if (regex.regexFlags != null && regex.regexFlags.isAscii()) { // /a flag - always ASCII @@ -865,11 +865,11 @@ public static RuntimeBase replaceRegex(RuntimeScalar quotedRegex, RuntimeScalar } else if (hasInlineAsciiModifier(regex.patternString)) { // Inline (?a...) in pattern - use ASCII to be safe pattern = regex.pattern; - } else if (Utf8.isUtf8(string) && RuntimePosLvalue.hasUnicodeChars(string, inputStr)) { - // UTF-8 string with true Unicode content (> 255) - use Unicode matching + } else if (Utf8.isUtf8(string)) { + // UTF-8 string - use Unicode matching for \w, \d, \s pattern = regex.patternUnicode; } - // else: BYTE_STRING or Latin-1 only content - keep ASCII pattern (default) + // else: BYTE_STRING - keep ASCII pattern (default) } CharSequence matchInput = new RegexTimeoutCharSequence(inputStr); From 217705588e85ad8925cdeb0bbf19478965dc35a6 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 25 Mar 2026 11:02:15 +0100 Subject: [PATCH 5/7] Fix ${qr//} dereference with strict refs enabled Add REGEX case to scalarDeref() to handle dereferencing Regexp objects. In Perl, ${qr/foo/} returns the stringified form "(?^:foo)". Previously this threw "Not a SCALAR reference" under strict refs. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- src/main/java/org/perlonjava/core/Configuration.java | 2 +- .../perlonjava/runtime/runtimetypes/RuntimeScalar.java | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index a34539bb2..cd74e4ba0 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "d812adc5a"; + public static final String gitCommitId = "abba6b9b9"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java index ccc97ffef..ee6433b6d 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java @@ -1121,6 +1121,14 @@ public RuntimeScalar scalarDeref() { yield newScalar; } case REFERENCE -> (RuntimeScalar) value; + case REGEX -> { + // Dereferencing a Regexp (qr//) returns its stringified form + // In Perl, ${qr/foo/} returns "(?^:foo)" + RuntimeScalar result = new RuntimeScalar(); + result.type = RuntimeScalarType.STRING; + result.value = this.value.toString(); + yield result; + } case GLOB -> { // Dereferencing a glob as scalar returns the scalar slot // e.g., ${*Foo::VERSION} or ${$glob} where $glob is a glob From 03308c996bd6eb4411c0aed0f585de38b0af889a Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 25 Mar 2026 11:25:06 +0100 Subject: [PATCH 6/7] Revert RuntimeRegex Unicode pattern change Revert the Unicode pattern change from commit abba6b9b9. The change exposed a pre-existing bug where hash keys lose their byte/UTF-8 flag, causing op/utfhash.t to regress from 91/99 to 89/99. Keep the hasUnicodeChars check to avoid the regression. The underlying hash key UTF-8 flag issue is a separate bug to fix later. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 2 +- .../runtime/regex/RuntimeRegex.java | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index cd74e4ba0..7fd9c27ab 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "abba6b9b9"; + public static final String gitCommitId = "217705588"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java index f42757051..28b8d8891 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java +++ b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java @@ -518,10 +518,11 @@ private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeSc Pattern pattern = regex.pattern; String inputStr = string.toString(); - // Select appropriate pattern based on string's UTF-8 flag: + // Select appropriate pattern based on string's UTF-8 flag and content: // - /a flag or inline (?a): always use ASCII-only pattern // - BYTE_STRING: use ASCII-only pattern (Perl's "bytes" semantics) - // - UTF-8 string: use Unicode pattern (Perl's Unicode semantics for \w, \d, \s) + // - UTF-8 string with Unicode chars (> 255): use Unicode pattern + // - UTF-8 string with only Latin-1 chars: use ASCII pattern (avoids false matches) // This mimics Perl's behavior where \w, \d, \s semantics depend on UTF-8 flag if (regex.patternUnicode != null && regex.patternUnicode != regex.pattern) { if (regex.regexFlags != null && regex.regexFlags.isAscii()) { @@ -530,12 +531,11 @@ private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeSc } else if (hasInlineAsciiModifier(regex.patternString)) { // Inline (?a...) in pattern - use ASCII to be safe pattern = regex.pattern; - } else if (Utf8.isUtf8(string)) { - // UTF-8 string - use Unicode matching for \w, \d, \s - // This ensures Latin-1 characters like è (U+00E8) are matched by \w + } else if (Utf8.isUtf8(string) && RuntimePosLvalue.hasUnicodeChars(string, inputStr)) { + // UTF-8 string with true Unicode content (> 255) - use Unicode matching pattern = regex.patternUnicode; } - // else: BYTE_STRING - keep ASCII pattern (default) + // else: BYTE_STRING or Latin-1 only content - keep ASCII pattern (default) } CharSequence matchInput = new RegexTimeoutCharSequence(inputStr); @@ -857,7 +857,7 @@ public static RuntimeBase replaceRegex(RuntimeScalar quotedRegex, RuntimeScalar Pattern pattern = regex.pattern; - // Select appropriate pattern based on string's UTF-8 flag (same logic as matchRegex) + // Select appropriate pattern based on string's UTF-8 flag and content (same logic as matchRegex) if (regex.patternUnicode != null && regex.patternUnicode != regex.pattern) { if (regex.regexFlags != null && regex.regexFlags.isAscii()) { // /a flag - always ASCII @@ -865,11 +865,11 @@ public static RuntimeBase replaceRegex(RuntimeScalar quotedRegex, RuntimeScalar } else if (hasInlineAsciiModifier(regex.patternString)) { // Inline (?a...) in pattern - use ASCII to be safe pattern = regex.pattern; - } else if (Utf8.isUtf8(string)) { - // UTF-8 string - use Unicode matching for \w, \d, \s + } else if (Utf8.isUtf8(string) && RuntimePosLvalue.hasUnicodeChars(string, inputStr)) { + // UTF-8 string with true Unicode content (> 255) - use Unicode matching pattern = regex.patternUnicode; } - // else: BYTE_STRING - keep ASCII pattern (default) + // else: BYTE_STRING or Latin-1 only content - keep ASCII pattern (default) } CharSequence matchInput = new RegexTimeoutCharSequence(inputStr); From bbdf592b1a48084aa6b95616e5685db15823c932 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 25 Mar 2026 11:39:43 +0100 Subject: [PATCH 7/7] Fix constant.pm to support Unicode constant names Add /u modifier to regex patterns for validating constant names. Without this, UTF-8 constant names fail validation because \w does not match Unicode word characters by default. Fixes uni/gv.t and uni/stash.t tests. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- src/main/perl/lib/constant.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/perl/lib/constant.pm b/src/main/perl/lib/constant.pm index e4b8fd2d0..e3c1f6a7f 100644 --- a/src/main/perl/lib/constant.pm +++ b/src/main/perl/lib/constant.pm @@ -17,8 +17,8 @@ my %forced_into_main = map +($_, 1), my %forbidden = (%keywords, %forced_into_main); -my $normal_constant_name = qr/^_?[^\W_0-9]\w*\z/; -my $tolerable = qr/^[A-Za-z_]\w*\z/; +my $normal_constant_name = qr/^_?[^\W_0-9]\w*\z/u; +my $tolerable = qr/^[A-Za-z_]\w*\z/u; my $boolean = qr/^[01]?\z/; BEGIN {