Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.perlonjava.backend.bytecode;

import org.perlonjava.frontend.analysis.RegexUsageDetector;
import org.perlonjava.frontend.analysis.Visitor;
import org.perlonjava.backend.jvm.EmitterMethodCreator;
import org.perlonjava.backend.jvm.EmitterContext;
Expand Down Expand Up @@ -726,6 +727,19 @@ public void visit(BlockNode node) {
&& node.elements.get(0) instanceof OperatorNode localOp
&& localOp.operator.equals("local");

// Perl 5 block-level regex state scoping: save $1, $&, etc. on entry, restore on exit.
// Skip if blockIsSubroutine: the subroutine-level save in BytecodeInterpreter.execute()
// (savedRegexState + finally) already handles this, so block-level would be redundant.
// If last/next/redo jumps past the RESTORE opcode, the interpreter's truncation logic
// in RESTORE_REGEX_STATE handles cleanup of orphaned stack entries.
int regexStateReg = -1;
if (!(node instanceof AbstractNode an && an.getBooleanAnnotation("blockIsSubroutine"))
&& RegexUsageDetector.containsRegexOperation(node)) {
regexStateReg = allocateRegister();
emit(Opcodes.SAVE_REGEX_STATE);
emitReg(regexStateReg);
}

// If the first statement is a scoped package (package Foo { }),
// save the DynamicVariableManager level before the block body so PUSH_PACKAGE is restored.
int scopedPackageLevelReg = -1;
Expand Down Expand Up @@ -790,6 +804,11 @@ public void visit(BlockNode node) {
emitReg(scopedPackageLevelReg);
}

if (regexStateReg >= 0) {
emit(Opcodes.RESTORE_REGEX_STATE);
emitReg(regexStateReg);
}

// Set lastResultReg to the outer register (or -1 if VOID context)
lastResultReg = outerResultReg;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c
java.util.Stack<int[]> labeledBlockStack = new java.util.Stack<>();
// Each entry is [labelStringPoolIdx, exitPc]

// Block-level regex state stack, used by SAVE_REGEX_STATE/RESTORE_REGEX_STATE opcodes.
// Each block containing regex ops pushes a snapshot; the matching restore pops it.
// Lazily initialized because most subroutines don't have nested regex-using blocks.
java.util.ArrayList<RegexState> regexStateStack = null;

// Subroutine-level regex state: unconditionally saved on entry, restored in the
// finally block. This implements Perl 5 semantics where $1, $&, etc. are
// dynamically scoped per subroutine. The finally block guarantees restoration
// even when the sub exits via return, die, or exception.
RegexState savedRegexState = new RegexState();
try {
outer:
while (true) {
Expand Down Expand Up @@ -104,7 +114,13 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c
if (retVal == null) {
return new RuntimeList();
}
return retVal.getList();
RuntimeList retList = retVal.getList();
// Materialize $1, $&, etc. into concrete scalars BEFORE returning.
// The finally block will call savedRegexState.restore(), which overwrites
// global regex state. Any lazy ScalarSpecialVariable references in the
// return list must be resolved while this sub's regex state is still active.
RuntimeCode.materializeSpecialVarsInResult(retList);
return retList;
}

case Opcodes.GOTO: {
Expand Down Expand Up @@ -1589,6 +1605,35 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c
break;
}

case Opcodes.SAVE_REGEX_STATE: {
// Block-level regex state save. Snapshot current regex state and
// store the stack level in register rd. The level is used by
// RESTORE_REGEX_STATE to find the correct snapshot and truncate
// any orphaned entries (e.g., if inner blocks were skipped by
// last/next/redo/die).
int rd = bytecode[pc++];
if (regexStateStack == null) regexStateStack = new java.util.ArrayList<>();
int level = regexStateStack.size();
regexStateStack.add(new RegexState());
registers[rd] = new RuntimeScalar(level);
break;
}

case Opcodes.RESTORE_REGEX_STATE: {
// Block-level regex state restore. Restore snapshot at the saved
// level and discard all entries above it (handles cases where inner
// RESTORE opcodes were skipped by last/next/redo/die).
int rs = bytecode[pc++];
int level = ((RuntimeScalar) registers[rs]).getInt();
if (regexStateStack != null && level < regexStateStack.size()) {
regexStateStack.get(level).restore();
while (regexStateStack.size() > level) {
regexStateStack.remove(regexStateStack.size() - 1);
}
}
break;
}

// =================================================================
// LIST OPERATIONS
// =================================================================
Expand Down Expand Up @@ -2318,7 +2363,9 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c
}
} // end outer while
} finally {
// Always pop the interpreter state
// Restore the caller's regex state. Runs after any return/die/exception,
// ensuring the caller sees its own $1, $&, etc. regardless of how the sub exited.
savedRegexState.restore();
InterpreterState.pop();
}
}
Expand Down
15 changes: 4 additions & 11 deletions src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java
Original file line number Diff line number Diff line change
Expand Up @@ -121,30 +121,23 @@ public InterpretedCode(int[] bytecode, Object[] constants, String[] stringPool,
/**
* Override RuntimeCode.apply() to dispatch to interpreter.
*
* This is the ONLY method that differs from compiled RuntimeCode.
* <p>This is the ONLY method that differs from compiled RuntimeCode.
* The API signature is IDENTICAL, ensuring perfect compatibility.
*
* <p>Regex state save/restore is handled inside {@code BytecodeInterpreter.execute()}
* (via {@code savedRegexState}/finally), not here.
*
* @param args The arguments array (@_)
* @param callContext The calling context (VOID/SCALAR/LIST)
* @return RuntimeList containing the result (may be RuntimeControlFlowList)
*/
@Override
public RuntimeList apply(RuntimeArray args, int callContext) {
// Dispatch to interpreter (not compiled bytecode)
return BytecodeInterpreter.execute(this, args, callContext);
}

/**
* Override RuntimeCode.apply() with subroutine name.
*
* @param subroutineName The subroutine name (for stack traces)
* @param args The arguments array (@_)
* @param callContext The calling context
* @return RuntimeList containing the result
*/
@Override
public RuntimeList apply(String subroutineName, RuntimeArray args, int callContext) {
// Dispatch to interpreter with subroutine name for stack traces
return BytecodeInterpreter.execute(this, args, callContext, subroutineName);
}

Expand Down
14 changes: 14 additions & 0 deletions src/main/java/org/perlonjava/backend/bytecode/Opcodes.java
Original file line number Diff line number Diff line change
Expand Up @@ -1173,5 +1173,19 @@ public class Opcodes {
* Format: POP_LABELED_BLOCK */
public static final short POP_LABELED_BLOCK = 355;

/** Save regex state (Perl 5 dynamic scoping of $1, $&amp;, etc.) into register rd.
* The register receives an integer index into the interpreter's regexStateStack.
* Emitted at block entry for blocks containing regex operations.
* @see org.perlonjava.runtime.runtimetypes.RegexState
* Format: SAVE_REGEX_STATE rd */
public static final short SAVE_REGEX_STATE = 356;

/** Restore regex state from the level stored in register rs, undoing all
* regex state changes made within the block. Also truncates any orphaned
* stack entries (from inner blocks skipped by last/next/redo/die).
* Emitted at block exit.
* Format: RESTORE_REGEX_STATE rs */
public static final short RESTORE_REGEX_STATE = 357;

private Opcodes() {} // Utility class - no instantiation
}
25 changes: 24 additions & 1 deletion src/main/java/org/perlonjava/backend/jvm/EmitBlock.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodVisitor;
import org.objectweb.asm.Opcodes;
import org.perlonjava.backend.jvm.astrefactor.LargeBlockRefactorer;import org.perlonjava.frontend.analysis.EmitterVisitor;
import org.perlonjava.backend.jvm.astrefactor.LargeBlockRefactorer;
import org.perlonjava.frontend.analysis.EmitterVisitor;
import org.perlonjava.frontend.analysis.RegexUsageDetector;
import org.perlonjava.frontend.astnode.*;
import org.perlonjava.runtime.runtimetypes.RuntimeContextType;

Expand Down Expand Up @@ -143,6 +145,20 @@ public static void emitBlock(EmitterVisitor emitterVisitor, BlockNode node) {
// Setup 'local' environment if needed
Local.localRecord localRecord = Local.localSetup(emitterVisitor.ctx, node, mv);

// Perl 5 block-level regex state scoping: save $1, $&, etc. on entry, restore on exit.
// Skip if blockIsSubroutine: EmitterMethodCreator already emits subroutine-level
// save/restore (regexStateSlot), so block-level would be redundant.
int regexStateLocal = -1;
if (!node.getBooleanAnnotation("blockIsSubroutine")
&& RegexUsageDetector.containsRegexOperation(node)) {
regexStateLocal = emitterVisitor.ctx.symbolTable.allocateLocalVariable();
mv.visitTypeInsn(Opcodes.NEW, "org/perlonjava/runtime/runtimetypes/RegexState");
mv.visitInsn(Opcodes.DUP);
mv.visitMethodInsn(Opcodes.INVOKESPECIAL,
"org/perlonjava/runtime/runtimetypes/RegexState", "<init>", "()V", false);
mv.visitVarInsn(Opcodes.ASTORE, regexStateLocal);
}

// Add redo label
mv.visitLabel(redoLabel);

Expand Down Expand Up @@ -249,6 +265,13 @@ public static void emitBlock(EmitterVisitor emitterVisitor, BlockNode node) {

Local.localTeardown(localRecord, mv);

// Restore block-level regex state (counterpart to the save above)
if (regexStateLocal >= 0) {
mv.visitVarInsn(Opcodes.ALOAD, regexStateLocal);
mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL,
"org/perlonjava/runtime/runtimetypes/RegexState", "restore", "()V", false);
}

emitterVisitor.ctx.symbolTable.exitScope(scopeIndex);
emitterVisitor.ctx.logDebug("generateCodeBlock end");
}
Expand Down
20 changes: 20 additions & 0 deletions src/main/java/org/perlonjava/backend/jvm/EmitForeach.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.objectweb.asm.MethodVisitor;
import org.objectweb.asm.Opcodes;
import org.perlonjava.frontend.analysis.EmitterVisitor;
import org.perlonjava.frontend.analysis.RegexUsageDetector;
import org.perlonjava.frontend.astnode.*;
import org.perlonjava.runtime.perlmodule.Warnings;
import org.perlonjava.runtime.runtimetypes.RuntimeContextType;
Expand Down Expand Up @@ -517,6 +518,18 @@ public static void emitFor1(EmitterVisitor emitterVisitor, For1Node node) {
int bodyScopeIndex = emitterVisitor.ctx.symbolTable.enterScope();
Local.localRecord bodyLocalRecord = Local.localSetup(emitterVisitor.ctx, blockNode, mv);

// Perl 5 regex state scoping for foreach body. Each iteration saves/restores
// independently. No blockIsSubroutine check needed: foreach body is never a sub.
int regexStateLocal = -1;
if (RegexUsageDetector.containsRegexOperation(blockNode)) {
regexStateLocal = emitterVisitor.ctx.symbolTable.allocateLocalVariable();
mv.visitTypeInsn(Opcodes.NEW, "org/perlonjava/runtime/runtimetypes/RegexState");
mv.visitInsn(Opcodes.DUP);
mv.visitMethodInsn(Opcodes.INVOKESPECIAL,
"org/perlonjava/runtime/runtimetypes/RegexState", "<init>", "()V", false);
mv.visitVarInsn(Opcodes.ASTORE, regexStateLocal);
}

pushGotoLabelsForBlock(emitterVisitor, blockNode);

java.util.List<Node> list = blockNode.elements;
Expand Down Expand Up @@ -544,6 +557,13 @@ public static void emitFor1(EmitterVisitor emitterVisitor, For1Node node) {

popGotoLabelsForBlock(emitterVisitor, blockNode);

// Restore block-level regex state at end of each iteration
if (regexStateLocal >= 0) {
mv.visitVarInsn(Opcodes.ALOAD, regexStateLocal);
mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL,
"org/perlonjava/runtime/runtimetypes/RegexState", "restore", "()V", false);
}

Local.localTeardown(bodyLocalRecord, mv);
emitterVisitor.ctx.symbolTable.exitScope(bodyScopeIndex);
} else {
Expand Down
28 changes: 28 additions & 0 deletions src/main/java/org/perlonjava/backend/jvm/EmitterMethodCreator.java
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,16 @@ private static byte[] getBytecodeInternal(EmitterContext ctx, Node ast, boolean
// Setup local variables and environment for the method
Local.localRecord localRecord = Local.localSetup(ctx, ast, mv);

// Subroutine-level regex state scoping (Perl 5 semantics): unconditionally save
// the caller's $1, $&, etc. on entry. Restored at returnLabel before ARETURN.
// This is separate from block-level scoping (EmitBlock/EmitForeach + RegexUsageDetector).
int regexStateSlot = ctx.symbolTable.allocateLocalVariable();
mv.visitTypeInsn(Opcodes.NEW, "org/perlonjava/runtime/runtimetypes/RegexState");
mv.visitInsn(Opcodes.DUP);
mv.visitMethodInsn(Opcodes.INVOKESPECIAL,
"org/perlonjava/runtime/runtimetypes/RegexState", "<init>", "()V", false);
mv.visitVarInsn(Opcodes.ASTORE, regexStateSlot);

// Store the computed RuntimeList return value in a dedicated local slot.
// This keeps the operand stack empty at join labels (endCatch), avoiding
// inconsistent stack map frames when multiple control-flow paths merge.
Expand Down Expand Up @@ -1041,6 +1051,20 @@ private static byte[] getBytecodeInternal(EmitterContext ctx, Node ast, boolean
mv.visitVarInsn(Opcodes.ALOAD, returnListSlot);
}

// Materialize $1, $&, etc. into concrete scalars BEFORE restoring regex state.
// The return list may contain lazy ScalarSpecialVariable references; if we
// restored first, they would resolve to the caller's (stale) values.
mv.visitInsn(Opcodes.DUP);
mv.visitMethodInsn(Opcodes.INVOKESTATIC,
"org/perlonjava/runtime/runtimetypes/RuntimeCode",
"materializeSpecialVarsInResult",
"(Lorg/perlonjava/runtime/runtimetypes/RuntimeList;)V", false);

// Restore caller's regex state (counterpart to the save at method entry)
mv.visitVarInsn(Opcodes.ALOAD, regexStateSlot);
mv.visitMethodInsn(Opcodes.INVOKEVIRTUAL,
"org/perlonjava/runtime/runtimetypes/RegexState", "restore", "()V", false);

// Teardown local variables and environment after the return value is materialized
Local.localTeardown(localRecord, mv);

Expand Down Expand Up @@ -1502,6 +1526,10 @@ public static Class<?> loadBytecode(EmitterContext ctx, byte[] classData) {
*/
public static RuntimeCode createRuntimeCode(
EmitterContext ctx, Node ast, boolean useTryCatch) {
// Ensure block-level regex save/restore is skipped for the outermost block of a sub/method.
// For anonymous subs this is set by SubroutineNode constructor, but for named subs the block
// is passed directly here without going through SubroutineNode.
ast.setAnnotation("blockIsSubroutine", true);
try {
// Try compiler path
Class<?> generatedClass = createClassWithMethod(ctx, ast, useTryCatch);
Expand Down
Loading