diff --git a/dev/presentations/German_Perl_Raku_Workshop_2026/slides.md b/dev/presentations/German_Perl_Raku_Workshop_2026/slides.md index b0e996213..587b9331e 100644 --- a/dev/presentations/German_Perl_Raku_Workshop_2026/slides.md +++ b/dev/presentations/German_Perl_Raku_Workshop_2026/slides.md @@ -672,10 +672,11 @@ Regex cache (1000 patterns) for performance. Unsupported: recursive patterns, va **Core classes:** -1. **RuntimeScalar** - Context-aware string/number/reference -2. **RuntimeArray** - Auto-vivification, slicing, context -3. **RuntimeHash** - Lazy init, ordered keys -4. **RuntimeCode** - Code refs with closures +1. **RuntimeScalar** - Dynamically typed scalar: integer, double, string, reference, undef, or special types (regex, glob, tied, dualvar) +2. **RuntimeArray** - Dynamic list of `RuntimeScalar` elements; supports plain, autovivifying, tied, and read-only modes +3. **RuntimeHash** - Associative array; supports plain, autovivifying, and tied modes +4. **RuntimeCode** - Compiled subroutine or eval string; holds either a JVM `MethodHandle` or `InterpretedCode` for the Internal VM +5. **RuntimeGlob** - Typeglob (`*foo`); name holder that delegates slot access to the global symbol table maps **Key:** Perl semantics on JVM objects. All shared between JVM compiler and Internal VM. Context tracking, auto-vivification, truthiness, and string/number coercion are implemented consistently across both backends. @@ -693,12 +694,10 @@ say $c->(); # 1 say $c->(); # 2 ``` -**Implementation:** -- `VariableCaptureAnalyzer` identifies which lexical variables each sub closes over at compile time -- Captured variables are stored in a shared cell (a reference-counted box) -- The `CREATE_CLOSURE_VAR` opcode allocates these cells at closure creation time -- Both the outer scope and the inner sub hold a reference to the same cell — mutations are visible to both -- Works identically in both the JVM backend and the Internal VM +**Implementation (JVM backend):** +- Each anonymous sub is compiled into a new JVM class; all visible lexical variables are passed as constructor arguments +- Captured variables (`RuntimeScalar`, `RuntimeArray`, or `RuntimeHash` depending on sigil) are shared by Java reference — both the outer scope and the inner sub hold a reference to the same object, so mutations are visible to both +- The Internal VM uses a dedicated opcode for closure variable allocation, but shares the same runtime objects at runtime --- diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index 49ce56d2a..098b97112 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -660,6 +660,17 @@ private RuntimeBase getVariableValueFromContext(String varName, EmitterContext c // VISITOR METHODS // ========================================================================= + /** + * Compiles a block node, creating a new lexical scope. + * + *

Special case: the parser wraps implicit-{@code $_} foreach loops as + * {@code BlockNode([local $_, For1Node(needsArrayOfAlias=true)])}. + * In that pattern the {@code local $_} child is skipped here because + * {@link #visit(For1Node)} emits {@code LOCAL_SCALAR_SAVE_LEVEL} itself, + * which atomically saves the pre-push dynamic level and calls {@code makeLocal}. + * This allows {@code POP_LOCAL_LEVEL} after the loop to restore {@code $_} + * correctly regardless of nesting depth. + */ @Override public void visit(BlockNode node) { // Blocks create a new lexical scope @@ -670,11 +681,23 @@ public void visit(BlockNode node) { outerResultReg = allocateRegister(); } + // Detect the BlockNode([local $_, For1Node(needsArrayOfAlias)]) pattern produced + // by the parser for implicit-$_ foreach loops. For1Node emits LOCAL_SCALAR_SAVE_LEVEL + // itself, so the 'local $_' child must be skipped here to avoid double-emission. + // Using a local variable (not a field) makes this safe against nesting and exceptions. + boolean skipFirstChild = node.elements.size() == 2 + && node.elements.get(1) instanceof For1Node for1 + && for1.needsArrayOfAlias + && node.elements.get(0) instanceof OperatorNode localOp + && localOp.operator.equals("local"); + enterScope(); // Visit each statement in the block int numStatements = node.elements.size(); for (int i = 0; i < numStatements; i++) { + // Skip the 'local $_' child when For1Node handles it via LOCAL_SCALAR_SAVE_LEVEL + if (i == 0 && skipFirstChild) continue; Node stmt = node.elements.get(i); // Track line number for this statement (like codegen's setDebugInfoLineNumber) @@ -2175,8 +2198,7 @@ void compileVariableDeclaration(OperatorNode node, String op) { Boolean.TRUE.equals(node.annotations.get("isDeclaredReference")); // It's a global variable - emit SLOW_OP to call GlobalRuntimeScalar.makeLocal() - String packageName = getCurrentPackage(); - String globalVarName = packageName + "::" + ((IdentifierNode) sigilOp.operand).name; + String globalVarName = NameNormalizer.normalizeVariableName(((IdentifierNode) sigilOp.operand).name, getCurrentPackage()); int nameIdx = addToStringPool(globalVarName); int rd = allocateRegister(); @@ -2218,8 +2240,7 @@ void compileVariableDeclaration(OperatorNode node, String op) { } // Localize global variable - String packageName = getCurrentPackage(); - String globalVarName = packageName + "::" + idNode.name; + String globalVarName = NameNormalizer.normalizeVariableName(idNode.name, getCurrentPackage()); int nameIdx = addToStringPool(globalVarName); int rd = allocateRegister(); @@ -2291,8 +2312,7 @@ void compileVariableDeclaration(OperatorNode node, String op) { } // Localize global variable - String packageName = getCurrentPackage(); - String globalVarName = packageName + "::" + idNode.name; + String globalVarName = NameNormalizer.normalizeVariableName(idNode.name, getCurrentPackage()); int nameIdx = addToStringPool(globalVarName); int rd = allocateRegister(); @@ -2335,8 +2355,7 @@ void compileVariableDeclaration(OperatorNode node, String op) { } // Localize global variable - String packageName = getCurrentPackage(); - String globalVarName = packageName + "::" + idNode.name; + String globalVarName = NameNormalizer.normalizeVariableName(idNode.name, getCurrentPackage()); int nameIdx = addToStringPool(globalVarName); int rd = allocateRegister(); @@ -2360,8 +2379,7 @@ void compileVariableDeclaration(OperatorNode node, String op) { } // Localize global variable - String packageName = getCurrentPackage(); - String globalVarName = packageName + "::" + idNode.name; + String globalVarName = NameNormalizer.normalizeVariableName(idNode.name, getCurrentPackage()); int nameIdx = addToStringPool(globalVarName); int rd = allocateRegister(); @@ -2384,8 +2402,7 @@ void compileVariableDeclaration(OperatorNode node, String op) { } // Localize global variable - String packageName = getCurrentPackage(); - String globalVarName = packageName + "::" + idNode.name; + String globalVarName = NameNormalizer.normalizeVariableName(idNode.name, getCurrentPackage()); int nameIdx = addToStringPool(globalVarName); int rd = allocateRegister(); @@ -3276,17 +3293,55 @@ private void visitEvalBlock(SubroutineNode node) { lastResultReg = resultReg; } + /** + * Compiles a foreach-style loop ({@code for my $var (@list) { body }}). + * + *

Uses a do-while bytecode layout to eliminate the back-edge + * {@code GOTO} that would otherwise execute on every iteration: + *

+     *   GOTO loopCheck          // one-time entry jump
+     * body:
+     *   <body>
+     * loopCheck:               // next/continue target
+     *   FOREACH_NEXT_OR_EXIT -> body   // jump back if has next; fall through if exhausted
+     * exit:
+     * 
+ * + *

For global loop variables (e.g. implicit {@code $_}, {@code needsArrayOfAlias=true}): + *

+ */ @Override public void visit(For1Node node) { // For1Node: foreach-style loop // for my $var (@list) { body } + // + // For global loop variables (needsArrayOfAlias=true, e.g. implicit $_): + // The parser wraps this as BlockNode([local $_, For1Node]). + // visit(BlockNode) detects this pattern and skips the 'local $_' child directly, + // so For1Node emits LOCAL_SCALAR_SAVE_LEVEL here (saves pre-push level atomically), + // uses FOREACH_GLOBAL_NEXT_OR_EXIT per iteration (hasNext+next+alias), + // and POP_LOCAL_LEVEL after the loop (restores $_ correctly for nested loops). + + // Determine if this is a global loop variable (e.g. $_). + String globalLoopVarName = null; + if (node.needsArrayOfAlias && node.variable instanceof OperatorNode varOp + && varOp.operator.equals("$") && varOp.operand instanceof IdentifierNode idNode) { + globalLoopVarName = NameNormalizer.normalizeVariableName(idNode.name, getCurrentPackage()); + } // Step 1: Evaluate list in list context node.list.accept(this); int listReg = lastResultReg; // Step 2: Create iterator from the list - // This works for RuntimeArray, RuntimeList, PerlRange, etc. int iterReg = allocateRegister(); emit(Opcodes.ITERATOR_CREATE); emitReg(iterReg); @@ -3294,83 +3349,103 @@ public void visit(For1Node node) { // Step 3: Allocate loop variable register BEFORE entering scope // This ensures both iterReg and varReg are protected from recycling - int varReg = -1; - if (node.variable != null && node.variable instanceof OperatorNode) { - OperatorNode varOp = (OperatorNode) node.variable; - if (varOp.operator.equals("my") && varOp.operand instanceof OperatorNode) { - OperatorNode sigilOp = (OperatorNode) varOp.operand; - if (sigilOp.operator.equals("$") && sigilOp.operand instanceof IdentifierNode) { - String varName = "$" + ((IdentifierNode) sigilOp.operand).name; - // Don't add to scope yet - just allocate register - varReg = allocateRegister(); - } - } - } - - // If no variable declared, allocate a temporary register - if (varReg == -1) { - varReg = allocateRegister(); + int varReg = allocateRegister(); + + // Step 3b: For global loop variable: emit LOCAL_SCALAR_SAVE_LEVEL. + // This atomically saves getLocalLevel() into levelReg (pre-push), then calls makeLocal. + // POP_LOCAL_LEVEL(levelReg) after the loop correctly restores $_ for any nesting depth. + int levelReg = -1; + if (globalLoopVarName != null) { + levelReg = allocateRegister(); + int nameIdx = addToStringPool(globalLoopVarName); + emit(Opcodes.LOCAL_SCALAR_SAVE_LEVEL); + emitReg(varReg); // rd: receives makeLocal result (the new localized container) + emitReg(levelReg); // levelReg: receives pre-push dynamic level + emit(nameIdx); } // Step 4: Enter new scope for loop variable - // Now baseRegisterForStatement will be set past both iterReg and varReg, - // protecting them from being recycled by recycleTemporaryRegisters() enterScope(); - // Step 5: If we have a named loop variable, add it to the scope now + // Step 5: If we have a named lexical loop variable, add it to the scope now if (node.variable != null && node.variable instanceof OperatorNode) { OperatorNode varOp = (OperatorNode) node.variable; if (varOp.operator.equals("my") && varOp.operand instanceof OperatorNode) { OperatorNode sigilOp = (OperatorNode) varOp.operand; if (sigilOp.operator.equals("$") && sigilOp.operand instanceof IdentifierNode) { String varName = "$" + ((IdentifierNode) sigilOp.operand).name; - // Add to scope and track for variableRegistry variableScopes.peek().put(varName, varReg); allDeclaredVariables.put(varName, varReg); } } } - // Step 6: Push loop info onto stack for last/next/redo - int loopStartPc = bytecode.size(); - LoopInfo loopInfo = new LoopInfo(node.labelName, loopStartPc, true); // true = foreach is a true loop + // Step 6: Emit initial GOTO to the loop check (do-while structure). + // This avoids a back-edge GOTO on every iteration: the superinstruction at + // the bottom jumps backward to the body start if the iterator has more elements. + // Layout: GOTO check | body | check: FOREACH_NEXT_OR_EXIT → body | exit + emit(Opcodes.GOTO); + int entryJumpPc = bytecode.size(); + emitInt(0); // placeholder: will be patched to loopCheckPc + + // Step 7: Body start (redo jumps here) + int bodyStartPc = bytecode.size(); + LoopInfo loopInfo = new LoopInfo(node.labelName, bodyStartPc, true); loopStack.push(loopInfo); - // Step 7: Loop start - combined check/next/exit (superinstruction) - // Emit FOREACH_NEXT_OR_EXIT superinstruction - // This combines: hasNext check, next() call, and conditional jump - // Format: FOREACH_NEXT_OR_EXIT varReg, iterReg, exitTarget (absolute address) - emit(Opcodes.FOREACH_NEXT_OR_EXIT); - emitReg(varReg); // destination register for element - emitReg(iterReg); // iterator register - int loopEndJumpPc = bytecode.size(); - emitInt(0); // placeholder for exit target (absolute, will be patched) - - // Step 8: Execute body (redo jumps here) + // Step 8: Execute body if (node.body != null) { node.body.accept(this); } - // Step 9: Continue point (next jumps here) - loopInfo.continuePc = bytecode.size(); - - // Step 10: Jump back to loop start - emit(Opcodes.GOTO); - emitInt(loopStartPc); + // Step 9: Loop check (next/continue jumps here) - the superinstruction + int loopCheckPc = bytecode.size(); + loopInfo.continuePc = loopCheckPc; + patchJump(entryJumpPc, loopCheckPc); // patch the entry GOTO + + // Step 10: Emit the loop superinstruction at the bottom (do-while check). + // If iterator has next: load element (and alias for global vars), jump back to body. + // If exhausted: fall through to exit. + int loopEndJumpPc; + if (globalLoopVarName != null) { + // FOREACH_GLOBAL_NEXT_OR_EXIT: hasNext + next + aliasGlobalVariable + conditional jump + int nameIdx = addToStringPool(globalLoopVarName); + emit(Opcodes.FOREACH_GLOBAL_NEXT_OR_EXIT); + emitReg(varReg); + emitReg(iterReg); + emit(nameIdx); + loopEndJumpPc = bytecode.size(); + emitInt(bodyStartPc); // jump backward to body start if has next + } else { + // FOREACH_NEXT_OR_EXIT: hasNext + next + conditional jump (lexical or temp var) + emit(Opcodes.FOREACH_NEXT_OR_EXIT); + emitReg(varReg); + emitReg(iterReg); + loopEndJumpPc = bytecode.size(); + emitInt(bodyStartPc); // jump backward to body start if has next + } - // Step 11: Loop end - patch the forward jump (last jumps here) + // Step 11: Loop exit - fall-through after the superinstruction int loopEndPc = bytecode.size(); - patchJump(loopEndJumpPc, loopEndPc); + + // Step 11b: Restore global loop variable after loop exits. + // POP_LOCAL_LEVEL(levelReg) pops to the pre-makeLocal level, undoing both + // the makeLocal push and all aliasGlobalVariable replacements. Correct for + // any nesting depth because levelReg holds the exact pre-push level. + if (levelReg >= 0) { + emit(Opcodes.POP_LOCAL_LEVEL); + emitReg(levelReg); + } // Step 12: Patch all last/next/redo jumps for (int pc : loopInfo.breakPcs) { patchJump(pc, loopEndPc); } for (int pc : loopInfo.nextPcs) { - patchJump(pc, loopInfo.continuePc); + patchJump(pc, loopCheckPc); } for (int pc : loopInfo.redoPcs) { - patchJump(pc, loopStartPc); + patchJump(pc, bodyStartPc); } // Step 13: Pop loop info and exit scope diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java index 71509d5d7..d169ef0a3 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java @@ -223,6 +223,55 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; } + case Opcodes.LOCAL_SCALAR_SAVE_LEVEL: { + // Superinstruction: save dynamic level BEFORE makeLocal, then localize. + // Atomically: levelReg = getLocalLevel(), rd = makeLocal(name). + // The pre-push level in levelReg is used by POP_LOCAL_LEVEL after the loop. + int rd = bytecode[pc++]; + int levelReg = bytecode[pc++]; + int nameIdx = bytecode[pc++]; + String name = code.stringPool[nameIdx]; + + registers[levelReg] = new RuntimeScalar(DynamicVariableManager.getLocalLevel()); + registers[rd] = GlobalRuntimeScalar.makeLocal(name); + break; + } + + case Opcodes.POP_LOCAL_LEVEL: { + // Restore DynamicVariableManager to a previously saved local level. + // Matches JVM compiler's DynamicVariableManager.popToLocalLevel(savedLevel) call. + int rs = bytecode[pc++]; + int savedLevel = ((RuntimeScalar) registers[rs]).getInt(); + DynamicVariableManager.popToLocalLevel(savedLevel); + break; + } + + case Opcodes.FOREACH_GLOBAL_NEXT_OR_EXIT: { + // Superinstruction: foreach loop step for a global loop variable (e.g. $_). + // Combines: hasNext check, next() into varReg, aliasGlobalVariable, conditional jump. + // Do-while layout: if hasNext jump to bodyTarget, else fall through to exit. + int rd = bytecode[pc++]; + int iterReg = bytecode[pc++]; + int nameIdx = bytecode[pc++]; + int bodyTarget = readInt(bytecode, pc); + pc += 2; + + String name = code.stringPool[nameIdx]; + RuntimeScalar iterScalar = (RuntimeScalar) registers[iterReg]; + @SuppressWarnings("unchecked") + java.util.Iterator iterator = + (java.util.Iterator) iterScalar.value; + + if (iterator.hasNext()) { + RuntimeScalar element = iterator.next(); + registers[rd] = element; + GlobalVariable.aliasGlobalVariable(name, element); + pc = bodyTarget; // ABSOLUTE jump back to body start + } + // else: fall through to exit + break; + } + case Opcodes.STORE_GLOBAL_ARRAY: { // Store global array: GlobalVariable.getGlobalArray(name).setFromList(list) int nameIdx = bytecode[pc++]; @@ -543,14 +592,14 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; case Opcodes.FOREACH_NEXT_OR_EXIT: { - // Superinstruction for foreach loops - // Combines: hasNext check, next() call, and conditional exit - // Format: FOREACH_NEXT_OR_EXIT rd, iterReg, exitTarget - // If hasNext: rd = iterator.next(), continue to next instruction - // Else: jump to exitTarget (absolute address) + // Superinstruction for foreach loops (do-while layout). + // Combines: hasNext check, next() call, and conditional jump to body. + // Format: FOREACH_NEXT_OR_EXIT rd, iterReg, bodyTarget + // If hasNext: rd = iterator.next(), jump to bodyTarget (backward) + // Else: fall through to exit (iterator exhausted) int rd = bytecode[pc++]; int iterReg = bytecode[pc++]; - int exitTarget = readInt(bytecode, pc); // Absolute target address + int bodyTarget = readInt(bytecode, pc); // Absolute target address pc += 2; // Skip the int we just read RuntimeScalar iterScalar = (RuntimeScalar) registers[iterReg]; @@ -559,13 +608,11 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c (java.util.Iterator) iterScalar.value; if (iterator.hasNext()) { - // Get next element and continue to body + // Get next element and jump back to body registers[rd] = iterator.next(); - // Fall through to next instruction (body) - } else { - // Exit loop - jump to absolute target - pc = exitTarget; // ABSOLUTE jump, not relative! + pc = bodyTarget; // ABSOLUTE jump back to body start } + // else: fall through to exit break; } diff --git a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java index 31524036f..d14d144d2 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java +++ b/src/main/java/org/perlonjava/backend/bytecode/InterpretedCode.java @@ -1138,15 +1138,16 @@ public String disassemble() { rs = bytecode[pc++]; sb.append("ITERATOR_NEXT r").append(rd).append(" = r").append(rs).append(".next()\n"); break; - case Opcodes.FOREACH_NEXT_OR_EXIT: + case Opcodes.FOREACH_NEXT_OR_EXIT: { rd = bytecode[pc++]; int iterReg = bytecode[pc++]; - int exitTarget = readInt(bytecode, pc); // Absolute target address + int bodyTarget = readInt(bytecode, pc); // Absolute body address pc += 2; sb.append("FOREACH_NEXT_OR_EXIT r").append(rd) - .append(" = r").append(iterReg).append(".next() or goto ") - .append(exitTarget).append("\n"); + .append(" = r").append(iterReg).append(".next() and goto ") + .append(bodyTarget).append("\n"); break; + } case Opcodes.SUBTRACT_ASSIGN: rd = bytecode[pc++]; rs = bytecode[pc++]; @@ -1264,6 +1265,27 @@ public String disassemble() { nameIdx = bytecode[pc++]; sb.append("LOCAL_SCALAR r").append(rd).append(" = local $").append(stringPool[nameIdx]).append("\n"); break; + case Opcodes.LOCAL_SCALAR_SAVE_LEVEL: { + rd = bytecode[pc++]; + int levelReg = bytecode[pc++]; + nameIdx = bytecode[pc++]; + sb.append("LOCAL_SCALAR_SAVE_LEVEL r").append(rd).append(", level=r").append(levelReg) + .append(" = local $").append(stringPool[nameIdx]).append("\n"); + break; + } + case Opcodes.POP_LOCAL_LEVEL: + rs = bytecode[pc++]; + sb.append("POP_LOCAL_LEVEL DynamicVariableManager.popToLocalLevel(r").append(rs).append(")\n"); + break; + case Opcodes.FOREACH_GLOBAL_NEXT_OR_EXIT: { + rd = bytecode[pc++]; + int fgIterReg = bytecode[pc++]; + nameIdx = bytecode[pc++]; + int fgBody = readInt(bytecode, pc); pc += 2; + sb.append("FOREACH_GLOBAL_NEXT_OR_EXIT r").append(rd).append(" = r").append(fgIterReg) + .append(".next(), alias $").append(stringPool[nameIdx]).append(" and goto ").append(fgBody).append("\n"); + break; + } // DEPRECATED: SLOW_OP case removed - opcode 87 is no longer emitted // All operations now use direct opcodes (114-154) diff --git a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java index 1248456f1..c9784cce9 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java +++ b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java @@ -986,5 +986,23 @@ public class Opcodes { public static final short GMTIME = 300; public static final short CRYPT = 301; + /** Superinstruction: save dynamic level BEFORE makeLocal, then localize global scalar. + * Atomically: levelReg = getLocalLevel(), rd = makeLocal(stringPool[nameIdx]). + * The saved pre-push level is used by POP_LOCAL_LEVEL after the loop to fully restore $_. + * Format: LOCAL_SCALAR_SAVE_LEVEL rd levelReg nameIdx */ + public static final short LOCAL_SCALAR_SAVE_LEVEL = 302; + + /** Restore DynamicVariableManager to a previously saved local level. + * Matches JVM compiler's DynamicVariableManager.popToLocalLevel(savedLevel) call. + * Format: POP_LOCAL_LEVEL rs */ + public static final short POP_LOCAL_LEVEL = 303; + + /** Superinstruction: foreach loop step for a global loop variable (e.g. $_). + * Combines: hasNext check, next() into varReg, aliasGlobalVariable(name, varReg), conditional exit. + * If iterator has next: varReg = next(), aliasGlobalVariable(name, varReg), fall through. + * If iterator exhausted: jump to exitTarget (absolute address). + * Format: FOREACH_GLOBAL_NEXT_OR_EXIT varReg iterReg nameIdx exitTarget */ + public static final short FOREACH_GLOBAL_NEXT_OR_EXIT = 304; + private Opcodes() {} // Utility class - no instantiation }