diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index 3d752fe18..d6f446fae 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -1,5 +1,6 @@ package org.perlonjava.backend.bytecode; +import org.perlonjava.frontend.analysis.RegexUsageDetector; import org.perlonjava.frontend.analysis.Visitor; import org.perlonjava.backend.jvm.EmitterMethodCreator; import org.perlonjava.backend.jvm.EmitterContext; @@ -726,6 +727,19 @@ public void visit(BlockNode node) { && node.elements.get(0) instanceof OperatorNode localOp && localOp.operator.equals("local"); + // Perl 5 block-level regex state scoping: save $1, $&, etc. on entry, restore on exit. + // Skip if blockIsSubroutine: the subroutine-level save in BytecodeInterpreter.execute() + // (savedRegexState + finally) already handles this, so block-level would be redundant. + // If last/next/redo jumps past the RESTORE opcode, the interpreter's truncation logic + // in RESTORE_REGEX_STATE handles cleanup of orphaned stack entries. + int regexStateReg = -1; + if (!(node instanceof AbstractNode an && an.getBooleanAnnotation("blockIsSubroutine")) + && RegexUsageDetector.containsRegexOperation(node)) { + regexStateReg = allocateRegister(); + emit(Opcodes.SAVE_REGEX_STATE); + emitReg(regexStateReg); + } + // If the first statement is a scoped package (package Foo { }), // save the DynamicVariableManager level before the block body so PUSH_PACKAGE is restored. int scopedPackageLevelReg = -1; @@ -790,6 +804,11 @@ public void visit(BlockNode node) { emitReg(scopedPackageLevelReg); } + if (regexStateReg >= 0) { + emit(Opcodes.RESTORE_REGEX_STATE); + emitReg(regexStateReg); + } + // Set lastResultReg to the outer register (or -1 if VOID context) lastResultReg = outerResultReg; } diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java index 17d020b5b..856e06f57 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java @@ -75,6 +75,16 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c java.util.Stack labeledBlockStack = new java.util.Stack<>(); // Each entry is [labelStringPoolIdx, exitPc] + // Block-level regex state stack, used by SAVE_REGEX_STATE/RESTORE_REGEX_STATE opcodes. + // Each block containing regex ops pushes a snapshot; the matching restore pops it. + // Lazily initialized because most subroutines don't have nested regex-using blocks. + java.util.ArrayList regexStateStack = null; + + // Subroutine-level regex state: unconditionally saved on entry, restored in the + // finally block. This implements Perl 5 semantics where $1, $&, etc. are + // dynamically scoped per subroutine. The finally block guarantees restoration + // even when the sub exits via return, die, or exception. + RegexState savedRegexState = new RegexState(); try { outer: while (true) { @@ -104,7 +114,13 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c if (retVal == null) { return new RuntimeList(); } - return retVal.getList(); + RuntimeList retList = retVal.getList(); + // Materialize $1, $&, etc. into concrete scalars BEFORE returning. + // The finally block will call savedRegexState.restore(), which overwrites + // global regex state. Any lazy ScalarSpecialVariable references in the + // return list must be resolved while this sub's regex state is still active. + RuntimeCode.materializeSpecialVarsInResult(retList); + return retList; } case Opcodes.GOTO: { @@ -1589,6 +1605,35 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; } + case Opcodes.SAVE_REGEX_STATE: { + // Block-level regex state save. Snapshot current regex state and + // store the stack level in register rd. The level is used by + // RESTORE_REGEX_STATE to find the correct snapshot and truncate + // any orphaned entries (e.g., if inner blocks were skipped by + // last/next/redo/die). + int rd = bytecode[pc++]; + if (regexStateStack == null) regexStateStack = new java.util.ArrayList<>(); + int level = regexStateStack.size(); + regexStateStack.add(new RegexState()); + registers[rd] = new RuntimeScalar(level); + break; + } + + case Opcodes.RESTORE_REGEX_STATE: { + // Block-level regex state restore. Restore snapshot at the saved + // level and discard all entries above it (handles cases where inner + // RESTORE opcodes were skipped by last/next/redo/die). + int rs = bytecode[pc++]; + int level = ((RuntimeScalar) registers[rs]).getInt(); + if (regexStateStack != null && level < regexStateStack.size()) { + regexStateStack.get(level).restore(); + while (regexStateStack.size() > level) { + regexStateStack.remove(regexStateStack.size() - 1); + } + } + break; + } + // ================================================================= // LIST OPERATIONS // ================================================================= @@ -2318,7 +2363,9 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c } } // end outer while } finally { - // Always pop the interpreter state + // Restore the caller's regex state. Runs after any return/die/exception, + // ensuring the caller sees its own $1, $&, etc. regardless of how the sub exited. + savedRegexState.restore(); InterpreterState.pop(); } } diff --git a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java index a9352ffb3..a77d0a5ac 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java +++ b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java @@ -121,30 +121,23 @@ public InterpretedCode(int[] bytecode, Object[] constants, String[] stringPool, /** * Override RuntimeCode.apply() to dispatch to interpreter. * - * This is the ONLY method that differs from compiled RuntimeCode. + *

This is the ONLY method that differs from compiled RuntimeCode. * The API signature is IDENTICAL, ensuring perfect compatibility. * + *

Regex state save/restore is handled inside {@code BytecodeInterpreter.execute()} + * (via {@code savedRegexState}/finally), not here. + * * @param args The arguments array (@_) * @param callContext The calling context (VOID/SCALAR/LIST) * @return RuntimeList containing the result (may be RuntimeControlFlowList) */ @Override public RuntimeList apply(RuntimeArray args, int callContext) { - // Dispatch to interpreter (not compiled bytecode) return BytecodeInterpreter.execute(this, args, callContext); } - /** - * Override RuntimeCode.apply() with subroutine name. - * - * @param subroutineName The subroutine name (for stack traces) - * @param args The arguments array (@_) - * @param callContext The calling context - * @return RuntimeList containing the result - */ @Override public RuntimeList apply(String subroutineName, RuntimeArray args, int callContext) { - // Dispatch to interpreter with subroutine name for stack traces return BytecodeInterpreter.execute(this, args, callContext, subroutineName); } diff --git a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java index 83b1f0062..02a22b473 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java +++ b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java @@ -1173,5 +1173,19 @@ public class Opcodes { * Format: POP_LABELED_BLOCK */ public static final short POP_LABELED_BLOCK = 355; + /** Save regex state (Perl 5 dynamic scoping of $1, $&, etc.) into register rd. + * The register receives an integer index into the interpreter's regexStateStack. + * Emitted at block entry for blocks containing regex operations. + * @see org.perlonjava.runtime.runtimetypes.RegexState + * Format: SAVE_REGEX_STATE rd */ + public static final short SAVE_REGEX_STATE = 356; + + /** Restore regex state from the level stored in register rs, undoing all + * regex state changes made within the block. Also truncates any orphaned + * stack entries (from inner blocks skipped by last/next/redo/die). + * Emitted at block exit. + * Format: RESTORE_REGEX_STATE rs */ + public static final short RESTORE_REGEX_STATE = 357; + private Opcodes() {} // Utility class - no instantiation } diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitBlock.java b/src/main/java/org/perlonjava/backend/jvm/EmitBlock.java index 6761b2324..a1c02eb1c 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitBlock.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitBlock.java @@ -3,7 +3,9 @@ import org.objectweb.asm.Label; import org.objectweb.asm.MethodVisitor; import org.objectweb.asm.Opcodes; -import org.perlonjava.backend.jvm.astrefactor.LargeBlockRefactorer;import org.perlonjava.frontend.analysis.EmitterVisitor; +import org.perlonjava.backend.jvm.astrefactor.LargeBlockRefactorer; +import org.perlonjava.frontend.analysis.EmitterVisitor; +import org.perlonjava.frontend.analysis.RegexUsageDetector; import org.perlonjava.frontend.astnode.*; import org.perlonjava.runtime.runtimetypes.RuntimeContextType; @@ -143,6 +145,20 @@ public static void emitBlock(EmitterVisitor emitterVisitor, BlockNode node) { // Setup 'local' environment if needed Local.localRecord localRecord = Local.localSetup(emitterVisitor.ctx, node, mv); + // Perl 5 block-level regex state scoping: save $1, $&, etc. on entry, restore on exit. + // Skip if blockIsSubroutine: EmitterMethodCreator already emits subroutine-level + // save/restore (regexStateSlot), so block-level would be redundant. + int regexStateLocal = -1; + if (!node.getBooleanAnnotation("blockIsSubroutine") + && RegexUsageDetector.containsRegexOperation(node)) { + regexStateLocal = emitterVisitor.ctx.symbolTable.allocateLocalVariable(); + mv.visitTypeInsn(Opcodes.NEW, "org/perlonjava/runtime/runtimetypes/RegexState"); + mv.visitInsn(Opcodes.DUP); + mv.visitMethodInsn(Opcodes.INVOKESPECIAL, + "org/perlonjava/runtime/runtimetypes/RegexState", "", "()V", false); + mv.visitVarInsn(Opcodes.ASTORE, regexStateLocal); + } + // Add redo label mv.visitLabel(redoLabel); @@ -249,6 +265,13 @@ public static void emitBlock(EmitterVisitor emitterVisitor, BlockNode node) { Local.localTeardown(localRecord, mv); + // Restore block-level regex state (counterpart to the save above) + if (regexStateLocal >= 0) { + mv.visitVarInsn(Opcodes.ALOAD, regexStateLocal); + mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, + "org/perlonjava/runtime/runtimetypes/RegexState", "restore", "()V", false); + } + emitterVisitor.ctx.symbolTable.exitScope(scopeIndex); emitterVisitor.ctx.logDebug("generateCodeBlock end"); } diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitForeach.java b/src/main/java/org/perlonjava/backend/jvm/EmitForeach.java index 491a814aa..3a1f22675 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitForeach.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitForeach.java @@ -4,6 +4,7 @@ import org.objectweb.asm.MethodVisitor; import org.objectweb.asm.Opcodes; import org.perlonjava.frontend.analysis.EmitterVisitor; +import org.perlonjava.frontend.analysis.RegexUsageDetector; import org.perlonjava.frontend.astnode.*; import org.perlonjava.runtime.perlmodule.Warnings; import org.perlonjava.runtime.runtimetypes.RuntimeContextType; @@ -517,6 +518,18 @@ public static void emitFor1(EmitterVisitor emitterVisitor, For1Node node) { int bodyScopeIndex = emitterVisitor.ctx.symbolTable.enterScope(); Local.localRecord bodyLocalRecord = Local.localSetup(emitterVisitor.ctx, blockNode, mv); + // Perl 5 regex state scoping for foreach body. Each iteration saves/restores + // independently. No blockIsSubroutine check needed: foreach body is never a sub. + int regexStateLocal = -1; + if (RegexUsageDetector.containsRegexOperation(blockNode)) { + regexStateLocal = emitterVisitor.ctx.symbolTable.allocateLocalVariable(); + mv.visitTypeInsn(Opcodes.NEW, "org/perlonjava/runtime/runtimetypes/RegexState"); + mv.visitInsn(Opcodes.DUP); + mv.visitMethodInsn(Opcodes.INVOKESPECIAL, + "org/perlonjava/runtime/runtimetypes/RegexState", "", "()V", false); + mv.visitVarInsn(Opcodes.ASTORE, regexStateLocal); + } + pushGotoLabelsForBlock(emitterVisitor, blockNode); java.util.List list = blockNode.elements; @@ -544,6 +557,13 @@ public static void emitFor1(EmitterVisitor emitterVisitor, For1Node node) { popGotoLabelsForBlock(emitterVisitor, blockNode); + // Restore block-level regex state at end of each iteration + if (regexStateLocal >= 0) { + mv.visitVarInsn(Opcodes.ALOAD, regexStateLocal); + mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, + "org/perlonjava/runtime/runtimetypes/RegexState", "restore", "()V", false); + } + Local.localTeardown(bodyLocalRecord, mv); emitterVisitor.ctx.symbolTable.exitScope(bodyScopeIndex); } else { diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitterMethodCreator.java b/src/main/java/org/perlonjava/backend/jvm/EmitterMethodCreator.java index 6151612e7..6e3b85b36 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitterMethodCreator.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitterMethodCreator.java @@ -650,6 +650,16 @@ private static byte[] getBytecodeInternal(EmitterContext ctx, Node ast, boolean // Setup local variables and environment for the method Local.localRecord localRecord = Local.localSetup(ctx, ast, mv); + // Subroutine-level regex state scoping (Perl 5 semantics): unconditionally save + // the caller's $1, $&, etc. on entry. Restored at returnLabel before ARETURN. + // This is separate from block-level scoping (EmitBlock/EmitForeach + RegexUsageDetector). + int regexStateSlot = ctx.symbolTable.allocateLocalVariable(); + mv.visitTypeInsn(Opcodes.NEW, "org/perlonjava/runtime/runtimetypes/RegexState"); + mv.visitInsn(Opcodes.DUP); + mv.visitMethodInsn(Opcodes.INVOKESPECIAL, + "org/perlonjava/runtime/runtimetypes/RegexState", "", "()V", false); + mv.visitVarInsn(Opcodes.ASTORE, regexStateSlot); + // Store the computed RuntimeList return value in a dedicated local slot. // This keeps the operand stack empty at join labels (endCatch), avoiding // inconsistent stack map frames when multiple control-flow paths merge. @@ -1041,6 +1051,20 @@ private static byte[] getBytecodeInternal(EmitterContext ctx, Node ast, boolean mv.visitVarInsn(Opcodes.ALOAD, returnListSlot); } + // Materialize $1, $&, etc. into concrete scalars BEFORE restoring regex state. + // The return list may contain lazy ScalarSpecialVariable references; if we + // restored first, they would resolve to the caller's (stale) values. + mv.visitInsn(Opcodes.DUP); + mv.visitMethodInsn(Opcodes.INVOKESTATIC, + "org/perlonjava/runtime/runtimetypes/RuntimeCode", + "materializeSpecialVarsInResult", + "(Lorg/perlonjava/runtime/runtimetypes/RuntimeList;)V", false); + + // Restore caller's regex state (counterpart to the save at method entry) + mv.visitVarInsn(Opcodes.ALOAD, regexStateSlot); + mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL, + "org/perlonjava/runtime/runtimetypes/RegexState", "restore", "()V", false); + // Teardown local variables and environment after the return value is materialized Local.localTeardown(localRecord, mv); @@ -1502,6 +1526,10 @@ public static Class loadBytecode(EmitterContext ctx, byte[] classData) { */ public static RuntimeCode createRuntimeCode( EmitterContext ctx, Node ast, boolean useTryCatch) { + // Ensure block-level regex save/restore is skipped for the outermost block of a sub/method. + // For anonymous subs this is set by SubroutineNode constructor, but for named subs the block + // is passed directly here without going through SubroutineNode. + ast.setAnnotation("blockIsSubroutine", true); try { // Try compiler path Class generatedClass = createClassWithMethod(ctx, ast, useTryCatch); diff --git a/src/main/java/org/perlonjava/frontend/analysis/RegexUsageDetector.java b/src/main/java/org/perlonjava/frontend/analysis/RegexUsageDetector.java new file mode 100644 index 000000000..e2337f8e2 --- /dev/null +++ b/src/main/java/org/perlonjava/frontend/analysis/RegexUsageDetector.java @@ -0,0 +1,94 @@ +package org.perlonjava.frontend.analysis; + +import org.perlonjava.frontend.astnode.*; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.List; + +/** + * Detects whether an AST subtree contains regex operations (=~, !~, split, m//, s///). + * Used as an optimization gate: block-level regex state save/restore is only emitted + * for blocks that actually use regex, avoiding unnecessary snapshots of {@code RegexState}. + * + *

The walk stops at {@link SubroutineNode} boundaries because nested subroutines + * get their own subroutine-level regex state save/restore (in {@code EmitterMethodCreator} + * / {@code BytecodeInterpreter}), so their regex usage should not trigger block-level + * save/restore in the enclosing scope. + * + * @see org.perlonjava.runtime.runtimetypes.RegexState + */ +public class RegexUsageDetector { + + /** Unary operators that perform regex matching/substitution. */ + private static final java.util.Set REGEX_OPERATORS = + java.util.Set.of("matchRegex", "replaceRegex"); + /** Binary operators that perform regex matching (=~, !~) or use regex internally (split). */ + private static final java.util.Set REGEX_BINARY_OPERATORS = + java.util.Set.of("=~", "!~", "split"); + + /** + * Returns true if the AST rooted at {@code root} contains any regex operation, + * excluding nested subroutine bodies (which have their own regex state scope). + * Uses iterative DFS to avoid StackOverflow on deeply nested ASTs. + */ + public static boolean containsRegexOperation(Node root) { + if (root == null) return false; + Deque stack = new ArrayDeque<>(); + stack.push(root); + while (!stack.isEmpty()) { + Node node = stack.pop(); + if (node == null) continue; + // Stop at subroutine boundaries: nested subs have their own regex state scope + if (node instanceof SubroutineNode) continue; + if (node instanceof OperatorNode op) { + if (REGEX_OPERATORS.contains(op.operator)) return true; + if (op.operand != null) stack.push(op.operand); + } else if (node instanceof BinaryOperatorNode bop) { + if (REGEX_BINARY_OPERATORS.contains(bop.operator)) return true; + if (bop.left != null) stack.push(bop.left); + if (bop.right != null) stack.push(bop.right); + } else if (node instanceof BlockNode bn) { + pushAll(stack, bn.elements); + } else if (node instanceof ListNode ln) { + pushAll(stack, ln.elements); + if (ln.handle != null) stack.push(ln.handle); + } else if (node instanceof IfNode ifn) { + if (ifn.condition != null) stack.push(ifn.condition); + if (ifn.thenBranch != null) stack.push(ifn.thenBranch); + if (ifn.elseBranch != null) stack.push(ifn.elseBranch); + } else if (node instanceof For1Node f1) { + if (f1.variable != null) stack.push(f1.variable); + if (f1.list != null) stack.push(f1.list); + if (f1.body != null) stack.push(f1.body); + if (f1.continueBlock != null) stack.push(f1.continueBlock); + } else if (node instanceof For3Node f3) { + if (f3.initialization != null) stack.push(f3.initialization); + if (f3.condition != null) stack.push(f3.condition); + if (f3.increment != null) stack.push(f3.increment); + if (f3.body != null) stack.push(f3.body); + if (f3.continueBlock != null) stack.push(f3.continueBlock); + } else if (node instanceof TernaryOperatorNode tern) { + if (tern.condition != null) stack.push(tern.condition); + if (tern.trueExpr != null) stack.push(tern.trueExpr); + if (tern.falseExpr != null) stack.push(tern.falseExpr); + } else if (node instanceof TryNode tryN) { + if (tryN.tryBlock != null) stack.push(tryN.tryBlock); + if (tryN.catchBlock != null) stack.push(tryN.catchBlock); + if (tryN.finallyBlock != null) stack.push(tryN.finallyBlock); + } else if (node instanceof HashLiteralNode hn) { + pushAll(stack, hn.elements); + } else if (node instanceof ArrayLiteralNode an) { + pushAll(stack, an.elements); + } + } + return false; + } + + private static void pushAll(Deque stack, List elements) { + for (int i = elements.size() - 1; i >= 0; i--) { + Node e = elements.get(i); + if (e != null) stack.push(e); + } + } +} diff --git a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java index 48cf2a194..9c6159ca3 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java +++ b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java @@ -56,6 +56,9 @@ protected boolean removeEldestEntry(Map.Entry eldest) { public static String lastSuccessfulMatchString = null; // ${^LAST_SUCCESSFUL_PATTERN} public static RuntimeRegex lastSuccessfulPattern = null; + // Capture groups from the last successful match that had captures. + // In Perl 5, $1/$2/etc persist across non-capturing matches. + public static String[] lastCaptureGroups = null; // Indicates if \G assertion is used private final boolean useGAssertion = false; // Compiled regex pattern @@ -462,12 +465,17 @@ private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeSc // Always initialize $1, $2, @+, @-, $`, $&, $' for every successful match globalMatcher = matcher; globalMatchString = inputStr; - // Store match information to avoid IllegalStateException later + if (captureCount > 0) { + lastCaptureGroups = new String[captureCount]; + for (int i = 0; i < captureCount; i++) { + lastCaptureGroups[i] = matcher.group(i + 1); + } + } else { + lastCaptureGroups = null; + } lastMatchedString = matcher.group(0); lastMatchStart = matcher.start(); lastMatchEnd = matcher.end(); - // System.err.println("DEBUG: Set globalMatcher for match at position " + matcher.start() + "-" + matcher.end()); - // System.err.println("DEBUG: Stored match info - matched: '" + lastMatchedString + "', start: " + lastMatchStart + ", end: " + lastMatchEnd); if (regex.regexFlags.isGlobalMatch() && captureCount < 1 && ctx == RuntimeContextType.LIST) { // Global match and no captures, in list context return the matched string @@ -546,6 +554,9 @@ private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeSc lastMatchedString = null; lastMatchStart = -1; lastMatchEnd = -1; + if (matcher.groupCount() > 0) { + lastCaptureGroups = null; + } } if (found) { @@ -715,7 +726,14 @@ public static RuntimeBase replaceRegex(RuntimeScalar quotedRegex, RuntimeScalar // Initialize $1, $2, @+, @- only when we have a match globalMatcher = matcher; globalMatchString = inputStr; - // Store match information + if (matcher.groupCount() > 0) { + lastCaptureGroups = new String[matcher.groupCount()]; + for (int i = 0; i < matcher.groupCount(); i++) { + lastCaptureGroups[i] = matcher.group(i + 1); + } + } else { + lastCaptureGroups = null; + } lastMatchedString = matcher.group(0); lastMatchStart = matcher.start(); lastMatchEnd = matcher.end(); @@ -812,6 +830,7 @@ public static void initialize() { lastSuccessfulMatchStart = -1; lastSuccessfulMatchEnd = -1; lastSuccessfulMatchString = null; + lastCaptureGroups = null; // Reset regex cache matched flags reset(); @@ -844,50 +863,62 @@ public static String postMatchString() { } public static String captureString(int group) { - if (globalMatcher == null || group < 0 || group > globalMatcher.groupCount()) { + if (group <= 0) { + return lastMatchedString; + } + if (lastCaptureGroups == null || group > lastCaptureGroups.length) { return null; } - return globalMatcher.group(group); + return lastCaptureGroups[group - 1]; } public static String lastCaptureString() { - if (globalMatcher == null) { + if (lastCaptureGroups == null || lastCaptureGroups.length == 0) { return null; } - int lastGroup = globalMatcher.groupCount(); - return globalMatcher.group(lastGroup); + return lastCaptureGroups[lastCaptureGroups.length - 1]; } public static RuntimeScalar matcherStart(int group) { - if (globalMatcher == null) { - return scalarUndef; + if (group == 0) { + return lastMatchStart >= 0 ? getScalarInt(lastMatchStart) : scalarUndef; } - if (group < 0 || group > globalMatcher.groupCount()) { + if (globalMatcher == null) { return scalarUndef; } - int start = globalMatcher.start(group); - // If the group didn't participate in the match, start() returns -1 - // Perl returns undef in this case - if (start == -1) { + try { + if (group < 0 || group > globalMatcher.groupCount()) { + return scalarUndef; + } + int start = globalMatcher.start(group); + if (start == -1) { + return scalarUndef; + } + return getScalarInt(start); + } catch (IllegalStateException e) { return scalarUndef; } - return getScalarInt(start); } public static RuntimeScalar matcherEnd(int group) { - if (globalMatcher == null) { - return scalarUndef; + if (group == 0) { + return lastMatchEnd >= 0 ? getScalarInt(lastMatchEnd) : scalarUndef; } - if (group < 0 || group > globalMatcher.groupCount()) { + if (globalMatcher == null) { return scalarUndef; } - int end = globalMatcher.end(group); - // If the group didn't participate in the match, end() returns -1 - // Perl returns undef in this case - if (end == -1) { + try { + if (group < 0 || group > globalMatcher.groupCount()) { + return scalarUndef; + } + int end = globalMatcher.end(group); + if (end == -1) { + return scalarUndef; + } + return getScalarInt(end); + } catch (IllegalStateException e) { return scalarUndef; } - return getScalarInt(end); } public static int matcherSize() { diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RegexState.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RegexState.java index 83b1c95b5..64262f659 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RegexState.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RegexState.java @@ -5,8 +5,31 @@ import java.util.regex.Matcher; /** - * RegexState holds a snapshot of all regex-related state. - * Used to save and restore regex state when entering/exiting eval blocks. + * Immutable snapshot of all regex-related global state (Perl's $1, $&, $`, $', etc.). + * + *

In Perl 5, regex match variables are dynamically scoped: each subroutine and + * each block that contains regex operations saves the current state on entry and + * restores it on exit. This ensures that a caller's match variables are not + * clobbered by callees or inner blocks. + * + *

Two levels of scoping use this class: + *

+ * + *

Important ordering constraint: When a subroutine returns a value containing + * lazy {@link org.perlonjava.runtime.specialvariables.ScalarSpecialVariable} references (e.g., $1), + * those must be materialized via {@link RuntimeCode#materializeSpecialVarsInResult} + * BEFORE calling {@link #restore()}, otherwise the restored (caller's) state would be + * read instead of the subroutine's state. + * + * @see org.perlonjava.runtime.regex.RuntimeRegex for the global static fields being snapshotted */ public class RegexState { public final Matcher globalMatcher; @@ -19,9 +42,9 @@ public class RegexState { public final int lastSuccessfulMatchEnd; public final String lastSuccessfulMatchString; public final RuntimeRegex lastSuccessfulPattern; + public final String[] lastCaptureGroups; public RegexState() { - // Save all the static fields from RuntimeRegex this.globalMatcher = RuntimeRegex.globalMatcher; this.globalMatchString = RuntimeRegex.globalMatchString; this.lastMatchedString = RuntimeRegex.lastMatchedString; @@ -32,10 +55,10 @@ public RegexState() { this.lastSuccessfulMatchEnd = RuntimeRegex.lastSuccessfulMatchEnd; this.lastSuccessfulMatchString = RuntimeRegex.lastSuccessfulMatchString; this.lastSuccessfulPattern = RuntimeRegex.lastSuccessfulPattern; + this.lastCaptureGroups = RuntimeRegex.lastCaptureGroups; } public void restore() { - // Restore all the static fields to RuntimeRegex RuntimeRegex.globalMatcher = this.globalMatcher; RuntimeRegex.globalMatchString = this.globalMatchString; RuntimeRegex.lastMatchedString = this.lastMatchedString; @@ -46,5 +69,6 @@ public void restore() { RuntimeRegex.lastSuccessfulMatchEnd = this.lastSuccessfulMatchEnd; RuntimeRegex.lastSuccessfulMatchString = this.lastSuccessfulMatchString; RuntimeRegex.lastSuccessfulPattern = this.lastSuccessfulPattern; + RuntimeRegex.lastCaptureGroups = this.lastCaptureGroups; } } diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java index 83623ab9e..77469eda3 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/RuntimeCode.java @@ -1708,8 +1708,12 @@ public boolean defined() { } /** - * Method to apply (execute) a subroutine reference. - * Invokes the method associated with the code object, passing the RuntimeArray and RuntimeContextType as arguments. + * Invokes the JVM-compiled method associated with this code object. + * + *

Regex state scoping ($1, $&, etc.) is NOT handled here. For JVM-compiled code + * it is emitted directly into the generated method by {@code EmitterMethodCreator} + * ({@code regexStateSlot} save/restore). For interpreted code, {@code InterpretedCode} + * overrides this method and delegates to {@code BytecodeInterpreter.execute()}. * * @param a the RuntimeArray containing the arguments for the subroutine * @param callContext the context in which the subroutine is called @@ -1717,20 +1721,20 @@ public boolean defined() { */ public RuntimeList apply(RuntimeArray a, int callContext) { if (constantValue != null) { - // Alternative way to create constants like: `$constant::{_CAN_PCS} = \$const` return new RuntimeList(constantValue); } try { - // Wait for the compilerThread to finish if it exists if (this.compilerSupplier != null) { - this.compilerSupplier.get(); // Wait for the task to finish + this.compilerSupplier.get(); } + RuntimeList result; if (isStatic) { - return (RuntimeList) this.methodHandle.invoke(a, callContext); + result = (RuntimeList) this.methodHandle.invoke(a, callContext); } else { - return (RuntimeList) this.methodHandle.invoke(this.codeObject, a, callContext); + result = (RuntimeList) this.methodHandle.invoke(this.codeObject, a, callContext); } + return result; } catch (NullPointerException e) { if (this.methodHandle == null) { @@ -1738,11 +1742,8 @@ public RuntimeList apply(RuntimeArray a, int callContext) { } else if (this.codeObject == null && !isStatic) { throw new PerlCompilerException("Subroutine exists but has null code object at "); } else { - // Original NPE from somewhere else throw new PerlCompilerException("Null pointer exception in subroutine call: " + e.getMessage() + " at "); } - - //throw new PerlCompilerException("Undefined subroutine called at "); } catch (InvocationTargetException e) { Throwable targetException = e.getTargetException(); if (!(targetException instanceof RuntimeException)) { @@ -1756,21 +1757,20 @@ public RuntimeList apply(RuntimeArray a, int callContext) { public RuntimeList apply(String subroutineName, RuntimeArray a, int callContext) { if (constantValue != null) { - // Alternative way to create constants like: `$constant::{_CAN_PCS} = \$const` return new RuntimeList(constantValue); } try { - // Wait for the compilerThread to finish if it exists if (this.compilerSupplier != null) { - // System.out.println("Waiting for compiler thread to finish..."); - this.compilerSupplier.get(); // Wait for the task to finish + this.compilerSupplier.get(); } + RuntimeList result; if (isStatic) { - return (RuntimeList) this.methodHandle.invoke(a, callContext); + result = (RuntimeList) this.methodHandle.invoke(a, callContext); } else { - return (RuntimeList) this.methodHandle.invoke(this.codeObject, a, callContext); + result = (RuntimeList) this.methodHandle.invoke(this.codeObject, a, callContext); } + return result; } catch (NullPointerException e) { throw new PerlCompilerException("Undefined subroutine &" + subroutineName + " called at "); } catch (InvocationTargetException e) { @@ -1784,6 +1784,25 @@ public RuntimeList apply(String subroutineName, RuntimeArray a, int callContext) } } + /** + * Replace lazy {@link ScalarSpecialVariable} references ($1, $&, etc.) in a return list + * with concrete {@link RuntimeScalar} copies. Must be called BEFORE {@link RegexState#restore()} + * so that the values reflect the subroutine's regex state, not the caller's. + */ + public static void materializeSpecialVarsInResult(RuntimeList result) { + List elems = result.elements; + for (int i = 0; i < elems.size(); i++) { + RuntimeBase elem = elems.get(i); + if (elem instanceof ScalarSpecialVariable ssv) { + RuntimeScalar resolved = ssv.getValueAsScalar(); + RuntimeScalar concrete = new RuntimeScalar(); + concrete.type = resolved.type; + concrete.value = resolved.value; + elems.set(i, concrete); + } + } + } + /** * Returns a string representation of the CODE reference. * diff --git a/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarSpecialVariable.java b/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarSpecialVariable.java index cfea89ab4..be64a1ae6 100644 --- a/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarSpecialVariable.java +++ b/src/main/java/org/perlonjava/runtime/runtimetypes/ScalarSpecialVariable.java @@ -108,7 +108,7 @@ public RuntimeScalar addToScalar(RuntimeScalar var) { * * @return The RuntimeScalar value of the special variable, or null if not available. */ - private RuntimeScalar getValueAsScalar() { + RuntimeScalar getValueAsScalar() { try { RuntimeScalar result = switch (variableId) { case CAPTURE -> {