From f4d474a403e4f17176b4c703f8b07daa39d28092 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Tue, 21 Apr 2026 12:37:43 +0200 Subject: [PATCH 1/5] perf: cache System.getenv() in hot paths as static final MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit System.getenv() is a native call (JNI → TreeMap lookup, ~200ns each). Several debug flags were being evaluated via System.getenv() on every call rather than once at class init. These cached reads add up in hot compile and runtime paths. Caches: * MortalList.GC_DEBUG — was in maybeAutoSweep (fires after flush) * RuntimeScalar.PHASE_D_DBG — was in undefine() * RuntimeIO.IO_DEBUG — 2 hot spots in getRuntimeIO/close * IOOperator.IO_DEBUG — open/close hot paths * EmitterMethodCreator.ASM_DEBUG, ASM_DEBUG_CLASS_FILTER, BYTECODE_SIZE_DEBUG, SPILL_SLOT_COUNT — compile hot path, called ~once per compiled method (was 4-5 getenv per compile) * SubroutineParser.SHOW_FALLBACK — parser hot path * PerlLanguageProvider.SHOW_FALLBACK — compile fallback hot path Semantically identical — these are all at-startup-determined debug flags whose values never change during execution. Pattern already used elsewhere in the codebase (e.g., ScalarRefRegistry, RuntimeRegex). Regression gates: * DBIx::Class: Files=314, Tests=13804 — PASS (1152s, noise vs 1107) * Template-Toolkit: Files=106, Tests=2920 — PASS (133-137s) * Moo: Files=71, Tests=841 — PASS (91s) * make unit tests — PASS Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../scriptengine/PerlLanguageProvider.java | 9 ++++-- .../backend/jvm/EmitterMethodCreator.java | 29 ++++++++++++++----- .../org/perlonjava/core/Configuration.java | 4 +-- .../frontend/parser/SubroutineParser.java | 7 ++++- .../runtime/operators/IOOperator.java | 9 ++++-- .../runtime/runtimetypes/MortalList.java | 4 ++- .../runtime/runtimetypes/RuntimeIO.java | 9 ++++-- .../runtime/runtimetypes/RuntimeScalar.java | 7 ++++- 8 files changed, 59 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java b/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java index db2e8920f..5854758b3 100644 --- a/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java +++ b/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java @@ -52,6 +52,11 @@ */ public class PerlLanguageProvider { + // Cache env var at class-init to avoid repeated native System.getenv() + // calls from the compilation fallback hot path. + private static final boolean SHOW_FALLBACK = + System.getenv("JPERL_SHOW_FALLBACK") != null; + private static boolean globalInitialized = false; public static void resetAll() { @@ -538,7 +543,7 @@ private static RuntimeCode compileToExecutable(Node ast, EmitterContext ctx) thr // getBytecode() already compiled interpreter code as fallback // when ASM frame computation failed (e.g., high fan-in to shared labels). // Use the pre-compiled interpreter code directly. - boolean showFallback = System.getenv("JPERL_SHOW_FALLBACK") != null; + boolean showFallback = SHOW_FALLBACK; if (showFallback) { System.err.println("Note: Using interpreter fallback (ASM frame compute crash)."); } @@ -548,7 +553,7 @@ private static RuntimeCode compileToExecutable(Node ast, EmitterContext ctx) thr // Catch Throwable (not just RuntimeException) because ClassFormatError // ("Too many arguments in method signature") extends Error, not Exception if (needsInterpreterFallback(e)) { - boolean showFallback = System.getenv("JPERL_SHOW_FALLBACK") != null; + boolean showFallback = SHOW_FALLBACK; if (showFallback) { System.err.println("Note: Method too large, using interpreter backend."); } diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitterMethodCreator.java b/src/main/java/org/perlonjava/backend/jvm/EmitterMethodCreator.java index 1f970d883..d05c74118 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitterMethodCreator.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitterMethodCreator.java @@ -46,6 +46,21 @@ public class EmitterMethodCreator implements Opcodes { System.getenv("JPERL_DISABLE_INTERPRETER_FALLBACK") == null; private static final boolean SHOW_FALLBACK = System.getenv("JPERL_SHOW_FALLBACK") != null; + // Cache additional compile-time debug env vars. These were previously + // read with System.getenv() on every method compilation; the native + // lookup is ~200ns per call and added up across thousands of compiled + // subs during module load. + private static final boolean ASM_DEBUG = + System.getenv("JPERL_ASM_DEBUG") != null; + private static final String ASM_DEBUG_CLASS_FILTER = + System.getenv("JPERL_ASM_DEBUG_CLASS"); + private static final String BYTECODE_SIZE_DEBUG = + System.getenv("JPERL_BYTECODE_SIZE_DEBUG"); + private static final int SPILL_SLOT_COUNT; + static { + String s = System.getenv("JPERL_SPILL_SLOTS"); + SPILL_SLOT_COUNT = (s != null) ? Integer.parseInt(s) : 16; + } // Number of local variables to skip when processing a closure (this, @_, wantarray) public static int skipVariables = 3; // Counter for generating unique class names @@ -350,7 +365,7 @@ public static Class createClassWithMethod(EmitterContext ctx, Node ast, boole } public static byte[] getBytecode(EmitterContext ctx, Node ast, boolean useTryCatch) { - boolean asmDebug = System.getenv("JPERL_ASM_DEBUG") != null; + boolean asmDebug = ASM_DEBUG; try { return getBytecodeInternal(ctx, ast, useTryCatch, false); @@ -363,7 +378,7 @@ public static byte[] getBytecode(EmitterContext ctx, Node ast, boolean useTryCat // ASM frame computation failed - fall back to interpreter // This commonly happens with nested defers and complex control flow - boolean showFallback = System.getenv("JPERL_SHOW_FALLBACK") != null; + boolean showFallback = SHOW_FALLBACK; if (showFallback || asmDebug) { frameComputeCrash.printStackTrace(); try { @@ -411,8 +426,8 @@ private static byte[] getBytecodeInternal(EmitterContext ctx, Node ast, boolean String className = ctx.javaClassInfo.javaClassName; String methodName = "apply"; byte[] classData = null; - boolean asmDebug = System.getenv("JPERL_ASM_DEBUG") != null; - String asmDebugClassFilter = System.getenv("JPERL_ASM_DEBUG_CLASS"); + boolean asmDebug = ASM_DEBUG; + String asmDebugClassFilter = ASM_DEBUG_CLASS_FILTER; boolean asmDebugClassMatches = asmDebugClassFilter == null || asmDebugClassFilter.isEmpty() || className.contains(asmDebugClassFilter) @@ -605,9 +620,7 @@ private static byte[] getBytecodeInternal(EmitterContext ctx, Node ast, boolean mv.visitInsn(Opcodes.ICONST_0); mv.visitVarInsn(Opcodes.ISTORE, controlFlowActionSlot); - int spillSlotCount = System.getenv("JPERL_SPILL_SLOTS") != null - ? Integer.parseInt(System.getenv("JPERL_SPILL_SLOTS")) - : 16; + int spillSlotCount = SPILL_SLOT_COUNT; ctx.javaClassInfo.spillSlots = new int[spillSlotCount]; ctx.javaClassInfo.spillTop = 0; for (int i = 0; i < spillSlotCount; i++) { @@ -1114,7 +1127,7 @@ private static byte[] getBytecodeInternal(EmitterContext ctx, Node ast, boolean cw.visitEnd(); classData = cw.toByteArray(); // Generate the bytecode - String bytecodeSizeDebug = System.getenv("JPERL_BYTECODE_SIZE_DEBUG"); + String bytecodeSizeDebug = BYTECODE_SIZE_DEBUG; if (bytecodeSizeDebug != null && !bytecodeSizeDebug.isEmpty()) { try { System.err.println("BYTECODE_SIZE class=" + className + " bytes=" + classData.length); diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index cf9e7205c..5358115bc 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "b331c5d70"; + public static final String gitCommitId = "84351b631"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 21 2026 00:05:50"; + public static final String buildTimestamp = "Apr 21 2026 12:14:48"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java index 39e1e6931..a4b7db953 100644 --- a/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/SubroutineParser.java @@ -33,6 +33,11 @@ public class SubroutineParser { + // Cache env var at class-init to avoid repeated native System.getenv() + // calls from the subroutine-parse hot path. + private static final boolean SHOW_FALLBACK = + System.getenv("JPERL_SHOW_FALLBACK") != null; + // Create a static semaphore with 1 permit private static final Semaphore semaphore = new Semaphore(1); @@ -1322,7 +1327,7 @@ public static ListNode handleNamedSubWithFilter(Parser parser, String subName, S // but the verifier rejected it at link time due to StackMapTable inconsistencies // (e.g., local variable slot type conflicts in complex methods). // Fall back to interpreter for this subroutine. - boolean showFallback = System.getenv("JPERL_SHOW_FALLBACK") != null; + boolean showFallback = SHOW_FALLBACK; if (showFallback) { System.err.println("Note: JVM VerifyError during subroutine instantiation, recompiling with interpreter."); } diff --git a/src/main/java/org/perlonjava/runtime/operators/IOOperator.java b/src/main/java/org/perlonjava/runtime/operators/IOOperator.java index 8680d62fa..18df83596 100644 --- a/src/main/java/org/perlonjava/runtime/operators/IOOperator.java +++ b/src/main/java/org/perlonjava/runtime/operators/IOOperator.java @@ -40,6 +40,11 @@ public class IOOperator { // File descriptor to RuntimeIO mapping for duplication support private static final Map fileDescriptorMap = new ConcurrentHashMap<>(); + // Cache debug flag at class-init to avoid repeated native + // System.getenv() calls in hot IO paths (open, close). + private static final boolean IO_DEBUG = + System.getenv("JPERL_IO_DEBUG") != null; + public static RuntimeScalar select(RuntimeList runtimeList, int ctx) { if (runtimeList.isEmpty()) { // select (returns current filehandle) @@ -526,7 +531,7 @@ public static RuntimeScalar open(int ctx, RuntimeBase... args) { // open FILEHANDLE,EXPR // open FILEHANDLE - boolean ioDebug = System.getenv("JPERL_IO_DEBUG") != null; + boolean ioDebug = IO_DEBUG; // Get the filehandle - this should be an lvalue RuntimeScalar // For array/hash elements like $fh0[0], this is the actual lvalue that can be modified @@ -2800,7 +2805,7 @@ private static RuntimeIO duplicateFileHandle(RuntimeIO original) { duplicate.registerExternalFd(dupFd); } - if (System.getenv("JPERL_IO_DEBUG") != null) { + if (IO_DEBUG) { String origFileno; try { origFileno = original.ioHandle.fileno().toString(); diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java b/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java index 5751d5028..91ee9fde8 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/MortalList.java @@ -528,6 +528,8 @@ public static boolean suppressFlush(boolean suppress) { private static final long AUTO_SWEEP_MIN_INTERVAL_NS = 5_000_000_000L; private static final boolean AUTO_GC_DISABLED = System.getenv("JPERL_NO_AUTO_GC") != null; + private static final boolean GC_DEBUG = + System.getenv("JPERL_GC_DEBUG") != null; private static boolean inAutoSweep = false; public static void flush() { @@ -584,7 +586,7 @@ private static void maybeAutoSweep() { // Explicit Internals::jperl_gc() still fires DESTROY for // callers that want full cleanup. int cleared = ReachabilityWalker.sweepWeakRefs(true); - if (System.getenv("JPERL_GC_DEBUG") != null) { + if (GC_DEBUG) { System.err.println("DBG auto-sweep cleared=" + cleared); } } finally { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java index 609fde661..49abf4fe7 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeIO.java @@ -202,6 +202,11 @@ protected boolean removeEldestEntry(Map.Entry eldest) { private static final ReferenceQueue globGCQueue = new ReferenceQueue<>(); private static final ConcurrentHashMap, RuntimeIO> phantomToIO = new ConcurrentHashMap<>(); + // Cache debug flag at class-init to avoid repeated native + // System.getenv() calls in the hot getRuntimeIO / close paths. + private static final boolean IO_DEBUG = + System.getenv("JPERL_IO_DEBUG") != null; + /** * Registers an anonymous RuntimeGlob for GC-based fd recycling. * When the glob becomes unreachable (all variables referencing it are @@ -1054,7 +1059,7 @@ public static void closeAllHandles() { */ public static RuntimeIO getRuntimeIO(RuntimeScalar runtimeScalar) { RuntimeIO fh = null; - boolean ioDebug = System.getenv("JPERL_IO_DEBUG") != null; + boolean ioDebug = IO_DEBUG; if (ioDebug) { System.err.println("[JPERL_IO_DEBUG] getRuntimeIO ENTRY: type=" + runtimeScalar.type + @@ -1495,7 +1500,7 @@ public RuntimeScalar write(String data) { } RuntimeScalar result = ioHandle.write(data); - if (System.getenv("JPERL_IO_DEBUG") != null) { + if (IO_DEBUG) { if (("main::STDOUT".equals(globName) || "main::STDERR".equals(globName)) && (ioHandle instanceof ClosedIOHandle || !result.getDefinedBoolean())) { System.err.println("[JPERL_IO_DEBUG] write failed: glob=" + globName + diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java index 090942771..4356271ea 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java @@ -111,6 +111,11 @@ private static boolean mightBeInteger(String s) { */ public boolean refCountOwned; + // Cache debug env vars as static final to avoid repeated + // native System.getenv() calls in hot paths. + private static final boolean PHASE_D_DBG = + System.getenv("JPERL_PHASE_D_DBG") != null; + // Constructors public RuntimeScalar() { this.type = UNDEF; @@ -2165,7 +2170,7 @@ public RuntimeScalar undefine() { // check. Skips when we're in module-init to avoid clearing weak refs // that require/use chains still depend on. if (undefOnBlessedWithDestroy && !ModuleInitGuard.inModuleInit()) { - if (System.getenv("JPERL_PHASE_D_DBG") != null) { + if (PHASE_D_DBG) { System.err.println("DBG Phase D undef-of-blessed trigger for " + (oldBase != null ? org.perlonjava.runtime.runtimetypes.NameNormalizer.getBlessStr(oldBase.blessId) : "?") + " refCount=" + (oldBase != null ? oldBase.refCount : -1)); From 2fb0bd1294cdc69eb3e72e2c43cd31f01c888c3f Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Tue, 21 Apr 2026 13:55:14 +0200 Subject: [PATCH 2/5] perf: gate ScalarRefRegistry.registerRef() on weakRefsExist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ScalarRefRegistry.registerRef() was doing a synchronized(WeakHashMap).put() on every setLargeRefCounted call (every ref assignment). The registry is consumed ONLY by ReachabilityWalker.sweepWeakRefs(), which is only invoked when WeakRefRegistry.weakRefsExist is true (i.e., when weaken() has been called at least once). For scripts that never weaken(), every registerRef call was pure overhead. JFR profile of life_bitpacked.pl (which never weakens anything) showed WeakHashMap.put / Collections$SynchronizedMap.put / expungeStaleEntries dominating post-compile CPU. Measured on examples/life_bitpacked.pl default args (80x40, 5000 gens), best-of-3 Cell updates per second: before: 7.74–8.06 Mcells/s after: 9.79–9.93 Mcells/s (+22% median) Larger grid (200x200, 5000 gens): 10.01 → 11.05 Mcells/s (+10%). Trade-off: scripts that hold many scalars-with-refs PRIOR to the first weaken() call won't be in the registry when the walker first runs. However, any subsequent setLarge on those scalars will register them, and the walker's primary seeds (globals, code refs, DESTROY rescued set) still find reachable structures via the normal BFS. No DBIC 52leaks.t assertions regressed. Escape hatch: JPERL_UNGATED_SCALAR_REGISTRY=1 restores the old unconditional behavior. Regression gates: * DBIx::Class t/52leaks.t: 11/11 PASS * Template-Toolkit: Files=106, Tests=2920 — PASS (136-138s) * Moo: Files=71, Tests=841 — PASS (95s; within noise of 91s baseline) * make unit tests — PASS Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 4 +-- .../runtimetypes/ScalarRefRegistry.java | 30 ++++++++++++++----- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 5358115bc..a1830971b 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "84351b631"; + public static final String gitCommitId = "f4d474a40"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 21 2026 12:14:48"; + public static final String buildTimestamp = "Apr 21 2026 13:11:33"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarRefRegistry.java b/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarRefRegistry.java index 5b0179389..c061e2721 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarRefRegistry.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarRefRegistry.java @@ -51,22 +51,36 @@ public class ScalarRefRegistry { System.getenv("JPERL_GC_DEBUG") != null; private static final boolean RECORD_STACKS = System.getenv("JPERL_REGISTER_STACKS") != null; + // Opt back to unconditional registration for scripts that weaken() + // after a long warm-up phase where many scalars were assigned. + private static final boolean UNGATED = + System.getenv("JPERL_UNGATED_SCALAR_REGISTRY") != null; /** * Register a scalar that now holds a reference. Called from * {@link RuntimeScalar#setLarge} paths that assign a ref value. *

- * NOTE: we do NOT gate on {@link WeakRefRegistry#weakRefsExist} - * because that flag only flips to true the first time - * {@code weaken()} is called. Scripts that assign refs BEFORE the - * first {@code weaken()} would otherwise miss those scalars, and - * the walker couldn't see them as live-lexical roots when it runs. - * The cost of the unconditional {@code WeakHashMap.put} is - * amortized by JVM hashing — small but present. Opt out via - * {@code JPERL_NO_SCALAR_REGISTRY=1} for benchmarking. + * Gated on {@link WeakRefRegistry#weakRefsExist}: this registry + * exists solely to feed {@link ReachabilityWalker#sweepWeakRefs} + * live-lexical seeds. If no weaken() has ever been called, no + * sweep will ever examine the registry, so registering is pure + * overhead — and it's a {@code synchronized(WeakHashMap).put} + * which is expensive per call. Life_bitpacked.pl profile showed + * this put path as the single largest post-compile hotspot. + *

+ * Trade-off: if a script holds many scalars-with-refs PRIOR to + * the first weaken(), those scalars won't be in the registry + * when the walker first runs. However, any subsequent + * {@code setLarge} on those scalars will register them, and the + * walker's primary seeds (globals, code refs, DESTROY rescued + * set) still find reachable structures via the normal BFS. + *

+ * Opt back to unconditional registration via + * {@code JPERL_UNGATED_SCALAR_REGISTRY=1} if needed. */ public static void registerRef(RuntimeScalar scalar) { if (OPT_OUT || scalar == null) return; + if (!WeakRefRegistry.weakRefsExist && !UNGATED) return; scalarRegistry.put(scalar, Boolean.TRUE); if (RECORD_STACKS) { registerStacks.put(scalar, new Throwable("registerRef")); From a7165f711a84a74890df2de8cd8f31f49263aaa6 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Tue, 21 Apr 2026 14:03:20 +0200 Subject: [PATCH 3/5] perf: gate MyVarCleanupStack.liveCounts on weakRefsExist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same pattern as the prior ScalarRefRegistry.registerRef fix. MyVarCleanupStack.register() is called for every `my` variable declaration and was doing an unconditional `liveCounts.merge(var, 1, Integer::sum)` — an IdentityHashMap merge with a boxed Integer lambda. JFR of life_bitpacked.pl surfaced this right after the scalar-registry gate landed. The `liveCounts` map exists solely for ReachabilityWalker.sweepWeakRefs' `isLive()` check, which only runs when WeakRefRegistry.weakRefsExist is true. Scripts that never weaken() pay the full IdentityHashMap.merge cost for nothing. The walker's pre-weaken fallback (sc.scopeExited + sc.refCountOwned checks) still correctly classifies live vs dead lexicals, so semantically this is indistinguishable. Measured on examples/life_bitpacked.pl default args (80x40, 5000 gens), best-of-3 Cell updates per second: before this patch: 9.79 Mcells/s (after ScalarRefRegistry gate) after this patch: 12.50 Mcells/s (+28%) Combined vs pre-gate baseline on this branch: 7.74 → 12.50 Mcells/s, a 1.61× speedup. Larger grid (200x200, 5000 gens): 13.71 Mcells/s (baseline was 10.01, a 1.37× speedup). Regression gates: * DBIx::Class t/52leaks.t: 11/11 PASS * Template-Toolkit: Files=106, Tests=2920 — PASS (143s) * Moo: Files=71, Tests=841 — PASS (97s) * make unit tests — PASS Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- src/main/java/org/perlonjava/core/Configuration.java | 4 ++-- .../perlonjava/runtime/runtimetypes/MyVarCleanupStack.java | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index a1830971b..a6fafcee3 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "f4d474a40"; + public static final String gitCommitId = "2fb0bd129"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 21 2026 13:11:33"; + public static final String buildTimestamp = "Apr 21 2026 13:57:26"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java b/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java index 7ef8b340b..f1bffaf71 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/MyVarCleanupStack.java @@ -73,7 +73,12 @@ public static int pushMark() { */ public static void register(Object var) { stack.add(var); - if (var != null) { + // liveCounts is only consulted by ReachabilityWalker.sweepWeakRefs, + // which runs only when WeakRefRegistry.weakRefsExist is true. For + // scripts that never weaken(), this merge() is pure overhead — + // HashMap.merge with a lambda is one of the hotter per-`my`-var + // costs. See ScalarRefRegistry.registerRef for the parallel fix. + if (var != null && WeakRefRegistry.weakRefsExist) { liveCounts.merge(var, 1, Integer::sum); } } From 17527e8e774c7900aaddaaf590c95f611f364fb1 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Tue, 21 Apr 2026 14:14:29 +0200 Subject: [PATCH 4/5] perf: cache warning bits lookup + empty-args snapshot fast path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two targeted hot-path optimizations found via JFR on life_bitpacked.pl after the ScalarRefRegistry / MyVarCleanupStack gates landed. 1. Cache getWarningBitsForCode() per RuntimeCode. The JVM-compiled branch looked up `code.methodHandle.type().parameterType(0).getName()` + a `WarningBitsRegistry.get(className)` HashMap probe on every sub invocation. The declaring class of a compiled code's MethodHandle is stable for the lifetime of the RuntimeCode, so cache the resolved warning bits string in a `cachedWarningBits` field. Sentinel pattern (`WARNING_BITS_NOT_COMPUTED = ""`) keeps a legitimately-null result distinguishable from not-yet-computed. 2. pushArgs: shared empty snapshot for zero-arg calls. Every sub call was allocating a fresh `RuntimeArray` wrapper + `ArrayList<>` to snapshot @_ for `@DB::args` support, even when the callee was called with zero arguments. Share a single `EMPTY_ARGS_SNAPSHOT` for those; real allocation only happens when args is non-empty. Measured on examples/life_bitpacked.pl default args (80x40, 5000 gens), best-of-3 Cell updates per second: before this patch: 11.68–12.50 Mcells/s after this patch: 12.19–13.07 Mcells/s The cumulative speedup since the start of this PR (post-Phase-J baseline on this branch, 7.74 Mcells/s) is **1.65×**. Regression gates: * DBIx::Class t/52leaks.t: 11/11 PASS * Template-Toolkit: Files=106, Tests=2920 — PASS * Moo: Files=71, Tests=841 — PASS * make unit tests — PASS Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 4 +- .../runtime/runtimetypes/RuntimeCode.java | 53 +++++++++++++++---- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index a6fafcee3..da41f65de 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "2fb0bd129"; + public static final String gitCommitId = "a7165f711"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 21 2026 13:57:26"; + public static final String buildTimestamp = "Apr 21 2026 14:08:36"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java index 6b929ad28..24d9dff16 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java @@ -207,13 +207,29 @@ public static void pushArgs(RuntimeArray args) { argsStack.get().push(args); // Also push a shallow snapshot so @DB::args stays intact after shift/@_ // modifications inside the callee. See originalArgsStack javadoc. - RuntimeArray snapshot = new RuntimeArray(); - if (args != null) { + // + // The snapshot only matters when caller() is invoked from package DB + // (Carp-style stack traces, debugger). For the common case of subs + // that neither shift @_ nor have a caller-from-DB on the stack, this + // allocation was pure overhead. Empty-args fast path + shared empty + // snapshot cuts the per-sub-call cost significantly for life_bitpacked + // and similar tight-loop workloads. + RuntimeArray snapshot; + if (args == null || args.elements.isEmpty()) { + snapshot = EMPTY_ARGS_SNAPSHOT; + } else { + snapshot = new RuntimeArray(); snapshot.elements = new java.util.ArrayList<>(args.elements); } originalArgsStack.get().push(snapshot); } + // Singleton empty-args snapshot for pushArgs. Safe to share because + // originalArgsStack readers only use .getList() / iteration; they never + // mutate the snapshot itself. This avoids a per-empty-call allocation of + // a RuntimeArray and an ArrayList wrapper. + private static final RuntimeArray EMPTY_ARGS_SNAPSHOT = new RuntimeArray(); + /** * Pop @_ from the args stack when exiting a subroutine. * Public so BytecodeInterpreter can use it when calling InterpretedCode directly. @@ -317,6 +333,15 @@ public static void clearInlineMethodCache() { public String sourcePackage = null; // Flag to indicate this is a symbolic reference created by \&{string} that should always be "defined" public boolean isSymbolicReference = false; + // Cached warning bits string for JVM-compiled code. getWarningBitsForCode + // resolves this from the methodHandle's declaring class name via a + // HashMap lookup. The result is stable for the lifetime of the RuntimeCode + // (the declaring class never changes post-compile), so compute it once + // lazily. null-cached-as-sentinel: WARNING_BITS_NOT_COMPUTED means + // "not yet cached"; null result gets stored as + // WARNING_BITS_EXPLICITLY_NULL. + private static final String WARNING_BITS_NOT_COMPUTED = ""; + private String cachedWarningBits = WARNING_BITS_NOT_COMPUTED; // Flag to indicate this is a built-in operator public boolean isBuiltin = false; // Flag to indicate this was explicitly declared (sub foo; or sub foo { ... }) @@ -2595,22 +2620,28 @@ private static String getWarningBitsForCode(RuntimeCode code) { if (code instanceof org.perlonjava.backend.bytecode.InterpretedCode interpCode) { return interpCode.warningBitsString; } - - // For JVM-compiled code, look up by class name in the registry - // The methodHandle's class is the generated class that has WARNING_BITS field + + // JVM-compiled code: cache the lookup result. The declaring class of + // the methodHandle is stable post-compile, so one HashMap lookup is + // all we ever need per RuntimeCode instance. Previously this ran on + // every sub invocation — a hot-path overhead for scripts with many + // small subs (life_bitpacked, method-chain-heavy code). + String cached = code.cachedWarningBits; + if (cached != WARNING_BITS_NOT_COMPUTED) { + return cached; + } if (code.methodHandle != null) { - // Get the declaring class of the method handle try { - // The type contains the declaring class as the first parameter type for instance methods - // For our generated apply methods, we use the class that was loaded String className = code.methodHandle.type().parameterType(0).getName(); - return WarningBitsRegistry.get(className); + String result = WarningBitsRegistry.get(className); + code.cachedWarningBits = result; + return result; } catch (Exception e) { - // If we can't get the class name, fall back to null + code.cachedWarningBits = null; return null; } } - + code.cachedWarningBits = null; return null; } From 660aa9e68990db89b24970d3ab112c708053d5a4 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Tue, 21 Apr 2026 14:21:02 +0200 Subject: [PATCH 5/5] perf: avoid autoboxing in RuntimeScalar(long) + bitwise fast paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two small hot-path cleanups found via JFR allocation profile on life_bitpacked.pl: 1. RuntimeScalar.initializeWithLong took `Long` (boxed) rather than `long` (primitive). The `RuntimeScalar(long)` constructor autoboxed long → Long on every call, then the method unboxed it again. Changed the signature to primitive `long` and adapted the `RuntimeScalar(Long)` constructor to call `.longValue()` explicitly. 2. BitwiseOperators.bitwiseAnd / bitwiseOr / bitwiseXor fast paths computed result as `long` and called `new RuntimeScalar(long)`, forcing the initializeWithLong branch cascade. Since int ^ int, int & int, int | int all produce int by definition, compute as int and call `new RuntimeScalar(int)` directly — skips the range-check branches. Bitwise shift ops are unchanged (still use long) because left-shift may grow beyond 32 bits and must preserve semantics. Regression gates: * DBIx::Class t/52leaks.t: 11/11 PASS * make unit tests — PASS Measured on examples/life_bitpacked.pl default args: ~12.3–12.6 Mcells/s (unchanged at the mean — these changes eliminate boxing overhead per op but the JIT already optimized it well; the signature fix prevents an autobox that was showing up in JFR's alloc sample but not in the JIT-compiled hot loop). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../java/org/perlonjava/core/Configuration.java | 4 ++-- .../runtime/operators/BitwiseOperators.java | 9 +++++---- .../runtime/runtimetypes/RuntimeScalar.java | 13 ++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index da41f65de..46e523eb3 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "a7165f711"; + public static final String gitCommitId = "17527e8e7"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 21 2026 14:08:36"; + public static final String buildTimestamp = "Apr 21 2026 14:19:54"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/operators/BitwiseOperators.java b/src/main/java/org/perlonjava/runtime/operators/BitwiseOperators.java index be554e872..c13a7b732 100644 --- a/src/main/java/org/perlonjava/runtime/operators/BitwiseOperators.java +++ b/src/main/java/org/perlonjava/runtime/operators/BitwiseOperators.java @@ -26,7 +26,7 @@ public static RuntimeScalar bitwiseAnd(RuntimeScalar runtimeScalar, RuntimeScala int t1 = runtimeScalar.type; int t2 = arg2.type; if (t1 == RuntimeScalarType.INTEGER && t2 == RuntimeScalarType.INTEGER) { - long result = ((int) runtimeScalar.value) & ((int) arg2.value); + int result = ((int) runtimeScalar.value) & ((int) arg2.value); return new RuntimeScalar(result); } @@ -95,7 +95,7 @@ public static RuntimeScalar bitwiseOr(RuntimeScalar runtimeScalar, RuntimeScalar int t1 = runtimeScalar.type; int t2 = arg2.type; if (t1 == RuntimeScalarType.INTEGER && t2 == RuntimeScalarType.INTEGER) { - long result = ((int) runtimeScalar.value) | ((int) arg2.value); + int result = ((int) runtimeScalar.value) | ((int) arg2.value); return new RuntimeScalar(result); } @@ -158,8 +158,9 @@ public static RuntimeScalar bitwiseXor(RuntimeScalar runtimeScalar, RuntimeScala int t1 = runtimeScalar.type; int t2 = arg2.type; if (t1 == RuntimeScalarType.INTEGER && t2 == RuntimeScalarType.INTEGER) { - long result = ((int) runtimeScalar.value) ^ ((int) arg2.value); - return new RuntimeScalar(result); + // int ^ int produces int; call RuntimeScalar(int) directly to skip + // initializeWithLong's range-check branches (JFR hot path). + return new RuntimeScalar(((int) runtimeScalar.value) ^ ((int) arg2.value)); } // Check for overloaded '^' operator on blessed objects diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java index 4356271ea..87adfd698 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeScalar.java @@ -126,7 +126,7 @@ public RuntimeScalar(long value) { } public RuntimeScalar(Long value) { - initializeWithLong(value); + initializeWithLong(value.longValue()); } public RuntimeScalar(int value) { @@ -319,23 +319,22 @@ public boolean isString() { return t == STRING || t == BYTE_STRING || t == VSTRING; } - private void initializeWithLong(Long value) { + private void initializeWithLong(long value) { if (value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) { // Java double can only exactly represent integers up to 2^53. // Beyond that, storing as DOUBLE loses precision and breaks exact pack/unpack // semantics for 64-bit formats (q/Q/j/J) and BER compression (w). - long lv = value; // Note: avoid Math.abs(lv) which overflows for Long.MIN_VALUE - if (lv <= 9007199254740992L && lv >= -9007199254740992L) { // within 2^53 + if (value <= 9007199254740992L && value >= -9007199254740992L) { // within 2^53 this.type = DOUBLE; - this.value = (double) lv; + this.value = (double) value; } else { this.type = RuntimeScalarType.STRING; - this.value = Long.toString(lv); + this.value = Long.toString(value); } } else { this.type = RuntimeScalarType.INTEGER; - this.value = value.intValue(); + this.value = (int) value; } }