From 4ee5f1ff7aec6daea0cfaf0eee251f8f84671a15 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Thu, 12 Feb 2026 16:14:43 +0100 Subject: [PATCH 1/4] Implement array and hash element access with proper setFromList semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for $array[index] and $hash{key} operations using correct Perl semantics where assignment modifies content, doesn't create new objects. Changes: - Add NEW_ARRAY (93) and NEW_HASH (94) opcodes for creating empty containers - Add ARRAY_SET_FROM_LIST (95) and HASH_SET_FROM_LIST (96) for populating * Uses setFromList() semantic: modifies existing array/hash content * Matches compiler behavior: create once, populate/modify separately - Implement array element access: $a[index] * Supports both lexical and global arrays * ARRAY_GET opcode for element retrieval - Implement hash element access: $h{key} * Supports both lexical and global hashes * Bareword keys treated as string literals (not variable lookup) * HASH_GET opcode for element retrieval - Support my @array = list and my %hash = list declarations * Proper two-step: create empty + setFromList * Allows subsequent reassignment to modify content Technical fixes: - Fixed bareword hash key handling: IdentifierNode → string literal - Replaced incorrect LIST_TO_ARRAY/LIST_TO_HASH opcodes with proper semantics - Added LOAD_GLOBAL_ARRAY and LOAD_GLOBAL_HASH for global container access Testing: ```bash ./jperl --interpreter -E 'my @a = (10, 20, 30); print $a[1], "\n"' # Output: 20 ./jperl --interpreter -E 'my %h = (a => 1, b => 2); print $h{a}, " ", $h{b}, "\n"' # Output: 1 2 ``` Dense opcodes: 0-96 (97 total) Co-Authored-By: Claude Opus 4.6 --- .../interpreter/BytecodeCompiler.java | 162 ++++++++++++++++++ .../interpreter/BytecodeInterpreter.java | 44 +++++ .../interpreter/InterpretedCode.java | 18 ++ .../org/perlonjava/interpreter/Opcodes.java | 14 +- 4 files changed, 237 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java index e85df291a..2d132b46e 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java @@ -361,6 +361,52 @@ public void visit(BinaryOperatorNode node) { lastResultReg = reg; return; + } else if (sigilOp.operator.equals("@") && sigilOp.operand instanceof IdentifierNode) { + // Handle my @array = ... + String varName = "@" + ((IdentifierNode) sigilOp.operand).name; + + // Allocate register for new lexical array + int arrayReg = allocateRegister(); + registerMap.put(varName, arrayReg); + + // Create empty array + emit(Opcodes.NEW_ARRAY); + emit(arrayReg); + + // Compile RHS (should evaluate to a list) + node.right.accept(this); + int listReg = lastResultReg; + + // Populate array from list using setFromList + emit(Opcodes.ARRAY_SET_FROM_LIST); + emit(arrayReg); + emit(listReg); + + lastResultReg = arrayReg; + return; + } else if (sigilOp.operator.equals("%") && sigilOp.operand instanceof IdentifierNode) { + // Handle my %hash = ... + String varName = "%" + ((IdentifierNode) sigilOp.operand).name; + + // Allocate register for new lexical hash + int hashReg = allocateRegister(); + registerMap.put(varName, hashReg); + + // Create empty hash + emit(Opcodes.NEW_HASH); + emit(hashReg); + + // Compile RHS (should evaluate to a list) + node.right.accept(this); + int listReg = lastResultReg; + + // Populate hash from list + emit(Opcodes.HASH_SET_FROM_LIST); + emit(hashReg); + emit(listReg); + + lastResultReg = hashReg; + return; } } @@ -683,6 +729,122 @@ public void visit(BinaryOperatorNode node) { emit(rs1); // Closure register emit(RuntimeContextType.LIST); // Map always uses list context } + case "[" -> { + // Array element access: $a[10] means get element 10 from array @a + // left: OperatorNode("$", IdentifierNode("a")) + // right: ArrayLiteralNode(index_expression) + + if (!(node.left instanceof OperatorNode)) { + throw new RuntimeException("Array access requires variable on left side"); + } + OperatorNode leftOp = (OperatorNode) node.left; + if (!leftOp.operator.equals("$") || !(leftOp.operand instanceof IdentifierNode)) { + throw new RuntimeException("Array access requires scalar dereference: $var[index]"); + } + + String varName = ((IdentifierNode) leftOp.operand).name; + String arrayVarName = "@" + varName; + + // Get the array - check lexical first, then global + int arrayReg; + if (registerMap.containsKey(arrayVarName)) { + // Lexical array + arrayReg = registerMap.get(arrayVarName); + } else { + // Global array - load it + arrayReg = allocateRegister(); + String globalArrayName = "main::" + varName; + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emit(arrayReg); + emit(nameIdx); + } + + // Evaluate index expression + // For ArrayLiteralNode, get the first element + if (!(node.right instanceof ArrayLiteralNode)) { + throw new RuntimeException("Array access requires ArrayLiteralNode on right side"); + } + ArrayLiteralNode indexNode = (ArrayLiteralNode) node.right; + if (indexNode.elements.isEmpty()) { + throw new RuntimeException("Array access requires index expression"); + } + + // Compile the index expression + indexNode.elements.get(0).accept(this); + int indexReg = lastResultReg; + + // Emit ARRAY_GET + emit(Opcodes.ARRAY_GET); + emit(rd); + emit(arrayReg); + emit(indexReg); + } + case "{" -> { + // Hash element access: $h{key} means get element 'key' from hash %h + // left: OperatorNode("$", IdentifierNode("h")) + // right: HashLiteralNode(key_expression) + + if (!(node.left instanceof OperatorNode)) { + throw new RuntimeException("Hash access requires variable on left side"); + } + OperatorNode leftOp = (OperatorNode) node.left; + if (!leftOp.operator.equals("$") || !(leftOp.operand instanceof IdentifierNode)) { + throw new RuntimeException("Hash access requires scalar dereference: $var{key}"); + } + + String varName = ((IdentifierNode) leftOp.operand).name; + String hashVarName = "%" + varName; + + // Get the hash - check lexical first, then global + int hashReg; + if (registerMap.containsKey(hashVarName)) { + // Lexical hash + hashReg = registerMap.get(hashVarName); + } else { + // Global hash - load it + hashReg = allocateRegister(); + String globalHashName = "main::" + varName; + int nameIdx = addToStringPool(globalHashName); + emit(Opcodes.LOAD_GLOBAL_HASH); + emit(hashReg); + emit(nameIdx); + } + + // Evaluate key expression + // For HashLiteralNode, get the first element (should be the key) + if (!(node.right instanceof HashLiteralNode)) { + throw new RuntimeException("Hash access requires HashLiteralNode on right side"); + } + HashLiteralNode keyNode = (HashLiteralNode) node.right; + if (keyNode.elements.isEmpty()) { + throw new RuntimeException("Hash access requires key expression"); + } + + // Compile the key expression + // Special case: bareword identifiers should be treated as string literals + int keyReg; + Node keyElement = keyNode.elements.get(0); + if (keyElement instanceof IdentifierNode) { + // Bareword key - treat as string literal + String keyStr = ((IdentifierNode) keyElement).name; + keyReg = allocateRegister(); + int strIdx = addToStringPool(keyStr); + emit(Opcodes.LOAD_STRING); + emit(keyReg); + emit(strIdx); + } else { + // Expression key - evaluate normally + keyElement.accept(this); + keyReg = lastResultReg; + } + + // Emit HASH_GET + emit(Opcodes.HASH_GET); + emit(rd); + emit(hashReg); + emit(keyReg); + } default -> throw new RuntimeException("Unsupported operator: " + node.operator); } diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java index ebb663391..8fc12f2b8 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java @@ -946,6 +946,50 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; } + case Opcodes.NEW_ARRAY: { + // Create empty array: rd = new RuntimeArray() + int rd = bytecode[pc++] & 0xFF; + registers[rd] = new RuntimeArray(); + break; + } + + case Opcodes.NEW_HASH: { + // Create empty hash: rd = new RuntimeHash() + int rd = bytecode[pc++] & 0xFF; + registers[rd] = new RuntimeHash(); + break; + } + + case Opcodes.ARRAY_SET_FROM_LIST: { + // Set array content from list: array_reg.setFromList(list_reg) + // Format: [ARRAY_SET_FROM_LIST] [array_reg] [list_reg] + int arrayReg = bytecode[pc++] & 0xFF; + int listReg = bytecode[pc++] & 0xFF; + + RuntimeArray array = (RuntimeArray) registers[arrayReg]; + RuntimeBase listBase = registers[listReg]; + RuntimeList list = listBase.getList(); + + // setFromList clears and repopulates the array + array.setFromList(list); + break; + } + + case Opcodes.HASH_SET_FROM_LIST: { + // Set hash content from list: hash_reg = RuntimeHash.createHash(list_reg) + // Format: [HASH_SET_FROM_LIST] [hash_reg] [list_reg] + int hashReg = bytecode[pc++] & 0xFF; + int listReg = bytecode[pc++] & 0xFF; + + RuntimeHash existingHash = (RuntimeHash) registers[hashReg]; + RuntimeBase listBase = registers[listReg]; + + // Create new hash from list, then copy elements to existing hash + RuntimeHash newHash = RuntimeHash.createHash(listBase); + existingHash.elements = newHash.elements; + break; + } + // ================================================================= // SLOW OPERATIONS // ================================================================= diff --git a/src/main/java/org/perlonjava/interpreter/InterpretedCode.java b/src/main/java/org/perlonjava/interpreter/InterpretedCode.java index ad3b368ef..e3f2de458 100644 --- a/src/main/java/org/perlonjava/interpreter/InterpretedCode.java +++ b/src/main/java/org/perlonjava/interpreter/InterpretedCode.java @@ -475,6 +475,24 @@ public String disassemble() { sb.append("MAP r").append(rd).append(" = map(r").append(rs1) .append(", r").append(rs2).append(", ctx=").append(mapCtx).append(")\n"); break; + case Opcodes.NEW_ARRAY: + rd = bytecode[pc++] & 0xFF; + sb.append("NEW_ARRAY r").append(rd).append(" = new RuntimeArray()\n"); + break; + case Opcodes.NEW_HASH: + rd = bytecode[pc++] & 0xFF; + sb.append("NEW_HASH r").append(rd).append(" = new RuntimeHash()\n"); + break; + case Opcodes.ARRAY_SET_FROM_LIST: + rs1 = bytecode[pc++] & 0xFF; // array register + rs2 = bytecode[pc++] & 0xFF; // list register + sb.append("ARRAY_SET_FROM_LIST r").append(rs1).append(".setFromList(r").append(rs2).append(")\n"); + break; + case Opcodes.HASH_SET_FROM_LIST: + rs1 = bytecode[pc++] & 0xFF; // hash register + rs2 = bytecode[pc++] & 0xFF; // list register + sb.append("HASH_SET_FROM_LIST r").append(rs1).append(".setFromList(r").append(rs2).append(")\n"); + break; case Opcodes.NOT: rd = bytecode[pc++] & 0xFF; rs = bytecode[pc++] & 0xFF; diff --git a/src/main/java/org/perlonjava/interpreter/Opcodes.java b/src/main/java/org/perlonjava/interpreter/Opcodes.java index d3b4ccb93..94faac9bc 100644 --- a/src/main/java/org/perlonjava/interpreter/Opcodes.java +++ b/src/main/java/org/perlonjava/interpreter/Opcodes.java @@ -4,7 +4,7 @@ * Bytecode opcodes for the PerlOnJava interpreter. * * Design: Pure register machine with 3-address code format. - * DENSE opcodes (0-92, NO GAPS) enable JVM tableswitch optimization. + * DENSE opcodes (0-96, NO GAPS) enable JVM tableswitch optimization. * * Register architecture is REQUIRED for control flow correctness: * Perl's GOTO/last/next/redo would corrupt a stack-based architecture. @@ -413,6 +413,18 @@ public class Opcodes { /** Map operator: rd = ListOperators.map(list_reg, closure_reg, context) */ public static final byte MAP = 92; + /** Create empty array: rd = new RuntimeArray() */ + public static final byte NEW_ARRAY = 93; + + /** Create empty hash: rd = new RuntimeHash() */ + public static final byte NEW_HASH = 94; + + /** Set array from list: array_reg.setFromList(list_reg) */ + public static final byte ARRAY_SET_FROM_LIST = 95; + + /** Set hash from list: hash_reg = RuntimeHash.createHash(list_reg) then copy elements */ + public static final byte HASH_SET_FROM_LIST = 96; + // ================================================================= // Slow Operation IDs (0-255) // ================================================================= From c67acd65ad347d28603a75fe5829b010f8c6feb2 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Thu, 12 Feb 2026 16:21:24 +0100 Subject: [PATCH 2/4] Add proper error formatting with line numbers and code context Use PerlCompilerException with ErrorMessageUtil for interpreter compilation errors, providing user-friendly error messages with file names, line numbers, and code context instead of bare RuntimeExceptions. Changes: - Add ErrorMessageUtil parameter to BytecodeCompiler constructor - Track currentTokenIndex during AST traversal (BinaryOperatorNode, OperatorNode) - Add throwCompilerException() helper that uses PerlCompilerException - Replace "Unsupported operator" RuntimeException/UnsupportedOperationException with properly formatted PerlCompilerException - Update PerlLanguageProvider to pass ctx.errorUtil to BytecodeCompiler - Update test harnesses (InterpreterTest) to pass errorUtil Error message improvement: Before: "Unsupported operator: &" After: "Unsupported operator: & at examples/life.pl line 57, near \"@life = generate;\"" The ErrorMessageUtil automatically: - Adds source file name - Computes line number from token index - Shows code context around the error - Formats with proper Perl-style error messages Co-Authored-By: Claude Opus 4.6 --- .../interpreter/BytecodeCompiler.java | 48 +++++++++++++++++-- .../interpreter/InterpreterTest.java | 2 +- .../scriptengine/PerlLanguageProvider.java | 3 +- 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java index 2d132b46e..050e0a7ae 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java @@ -3,6 +3,7 @@ import org.perlonjava.astnode.*; import org.perlonjava.astvisitor.Visitor; import org.perlonjava.codegen.EmitterContext; +import org.perlonjava.lexer.LexerToken; import org.perlonjava.runtime.*; import java.io.ByteArrayOutputStream; @@ -28,6 +29,10 @@ public class BytecodeCompiler implements Visitor { // Token index tracking for error reporting private final Map pcToTokenIndex = new HashMap<>(); + private int currentTokenIndex = -1; // Track current token for error reporting + + // Error reporting + private final ErrorMessageUtil errorUtil; // Register allocation private int nextRegister = 3; // 0=this, 1=@_, 2=wantarray @@ -44,9 +49,40 @@ public class BytecodeCompiler implements Visitor { private final String sourceName; private final int sourceLine; - public BytecodeCompiler(String sourceName, int sourceLine) { + public BytecodeCompiler(String sourceName, int sourceLine, ErrorMessageUtil errorUtil) { this.sourceName = sourceName; this.sourceLine = sourceLine; + this.errorUtil = errorUtil; + } + + // Legacy constructor for backward compatibility + public BytecodeCompiler(String sourceName, int sourceLine) { + this(sourceName, sourceLine, null); + } + + /** + * Throw a compiler exception with proper error formatting. + * Uses PerlCompilerException which formats with line numbers and code context. + * + * @param message The error message + * @param tokenIndex The token index where the error occurred + */ + private void throwCompilerException(String message, int tokenIndex) { + if (errorUtil != null && tokenIndex >= 0) { + throw new PerlCompilerException(tokenIndex, message, errorUtil); + } else { + // Fallback to simple error (no context available) + throw new RuntimeException(message); + } + } + + /** + * Throw a compiler exception using the current token index. + * + * @param message The error message + */ + private void throwCompilerException(String message) { + throwCompilerException(message, currentTokenIndex); } /** @@ -302,6 +338,9 @@ public void visit(IdentifierNode node) { @Override public void visit(BinaryOperatorNode node) { + // Track token index for error reporting + currentTokenIndex = node.getIndex(); + // Handle print/say early (special handling for filehandle) if (node.operator.equals("print") || node.operator.equals("say")) { // print/say FILEHANDLE LIST @@ -845,7 +884,7 @@ public void visit(BinaryOperatorNode node) { emit(hashReg); emit(keyReg); } - default -> throw new RuntimeException("Unsupported operator: " + node.operator); + default -> throwCompilerException("Unsupported operator: " + node.operator); } lastResultReg = rd; @@ -853,6 +892,9 @@ public void visit(BinaryOperatorNode node) { @Override public void visit(OperatorNode node) { + // Track token index for error reporting + currentTokenIndex = node.getIndex(); + String op = node.operator; // Handle specific operators @@ -1195,7 +1237,7 @@ public void visit(OperatorNode node) { lastResultReg = rd; } else { - throw new UnsupportedOperationException("Unsupported operator: " + op); + throwCompilerException("Unsupported operator: " + op); } } diff --git a/src/main/java/org/perlonjava/interpreter/InterpreterTest.java b/src/main/java/org/perlonjava/interpreter/InterpreterTest.java index a45d0c2a5..c0441187a 100644 --- a/src/main/java/org/perlonjava/interpreter/InterpreterTest.java +++ b/src/main/java/org/perlonjava/interpreter/InterpreterTest.java @@ -55,7 +55,7 @@ public static RuntimeList runCode(String perlCode, String sourceName, int source Node ast = parser.parse(); // Step 4: Compile AST to interpreter bytecode - BytecodeCompiler compiler = new BytecodeCompiler(sourceName, sourceLine); + BytecodeCompiler compiler = new BytecodeCompiler(sourceName, sourceLine, errorUtil); InterpretedCode code = compiler.compile(ast); // Step 5: Execute via apply() (just like compiled code) diff --git a/src/main/java/org/perlonjava/scriptengine/PerlLanguageProvider.java b/src/main/java/org/perlonjava/scriptengine/PerlLanguageProvider.java index c80afbea2..a0aea037a 100644 --- a/src/main/java/org/perlonjava/scriptengine/PerlLanguageProvider.java +++ b/src/main/java/org/perlonjava/scriptengine/PerlLanguageProvider.java @@ -319,7 +319,8 @@ private static Object compileToExecutable(Node ast, EmitterContext ctx) throws E ctx.logDebug("Compiling to bytecode interpreter"); BytecodeCompiler compiler = new BytecodeCompiler( ctx.compilerOptions.fileName, - 1 // tokenIndex for error reporting + 1, // sourceLine (legacy parameter) + ctx.errorUtil // Pass errorUtil for proper error formatting with line numbers ); InterpretedCode interpretedCode = compiler.compile(ast); From 8dcd9d597839d016b2e7c934d480e69267ea9129 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Thu, 12 Feb 2026 16:26:00 +0100 Subject: [PATCH 3/4] Implement named subroutine calls with & operator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for calling named subroutines using the & operator to get code references, enabling patterns like foo() and printlife @args. Changes: - Add & operator support in OperatorNode visitor * Gets code reference using LOAD_GLOBAL_CODE opcode * Adds "main::" package prefix if needed * Returns RuntimeScalar containing code reference - Fix ( operator case to include "(" (was only "()" and "->") * Now handles: foo(args), $coderef->(args), &subname(args) - Use throwCompilerException for & operator errors (proper formatting) This completes the subroutine call chain: 1. & operator → gets code reference 2. ( operator → calls the code reference with arguments 3. CALL_SUB opcode → executes via RuntimeCode.apply() Testing: ```bash ./jperl --interpreter -E 'sub foo { return 42; } my $x = foo(); print "$x\n"' # Output: 42 ./jperl --interpreter -E 'sub add { my ($a, $b) = @_; return $a + $b; } print add(10, 20), "\n"' # Output: 30 ``` Progress on life.pl: Now fails on passing arrays to subroutines (@life) Co-Authored-By: Claude Opus 4.6 --- .../interpreter/BytecodeCompiler.java | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java index 050e0a7ae..592b98a47 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java @@ -627,8 +627,8 @@ public void visit(BinaryOperatorNode node) { emit(rs1); emit(rs2); } - case "()", "->" -> { - // Apply operator: $coderef->(args) or &subname(args) + case "(", "()", "->" -> { + // Apply operator: $coderef->(args) or &subname(args) or foo(args) // left (rs1) = code reference (RuntimeScalar containing RuntimeCode or SubroutineNode) // right (rs2) = arguments (should be RuntimeList from ListNode) @@ -990,6 +990,31 @@ public void visit(OperatorNode node) { } else { throw new RuntimeException("Unsupported * operand: " + node.operand.getClass().getSimpleName()); } + } else if (op.equals("&")) { + // Code reference: &subname + // Gets a reference to a named subroutine + if (node.operand instanceof IdentifierNode) { + IdentifierNode idNode = (IdentifierNode) node.operand; + String subName = idNode.name; + + // Add package prefix if not present + if (!subName.contains("::")) { + subName = "main::" + subName; + } + + // Allocate register for code reference + int rd = allocateRegister(); + int nameIdx = addToStringPool(subName); + + // Emit LOAD_GLOBAL_CODE + emit(Opcodes.LOAD_GLOBAL_CODE); + emit(rd); + emit(nameIdx); + + lastResultReg = rd; + } else { + throwCompilerException("Unsupported & operand: " + node.operand.getClass().getSimpleName()); + } } else if (op.equals("\\")) { // Reference operator: \$x, \@x, \%x, \*x, etc. if (node.operand != null) { From ea0de2b5e270025f76a6aa8a35a71865858b9304 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Thu, 12 Feb 2026 21:47:57 +0100 Subject: [PATCH 4/4] Implement variable sharing between interpreter and compiled code Enable interpreted main scripts to share lexical variables with compiled named subroutines by storing captured variables in persistent globals. Key changes: - Add SET_SCALAR opcode to set values in persistent scalars without overwriting references, preserving aliasing between interpreter and compiled code - Use SLOWOP_RETRIEVE_BEGIN_* opcodes to load persistent variables into interpreter registers, enabling both modes to access the same objects - Implement context detection (wantarray) for subroutine calls based on assignment target type (scalar/array/void) - Fix RANGE opcode to handle null registers gracefully - Add VariableCaptureAnalyzer for identifying shared variables (note: currently uses AST node ids set by parser) This allows programs like examples/life.pl to run correctly in interpreter mode with proper variable sharing and context handling. Co-Authored-By: Claude Opus 4.6 --- .../interpreter/BytecodeCompiler.java | 945 ++++++++++++++++-- .../interpreter/BytecodeInterpreter.java | 109 +- .../interpreter/InterpretedCode.java | 35 + .../org/perlonjava/interpreter/Opcodes.java | 30 + .../interpreter/SlowOpcodeHandler.java | 174 ++++ .../interpreter/VariableCaptureAnalyzer.java | 183 ++++ .../perlonjava/runtime/GlobalVariable.java | 2 +- 7 files changed, 1369 insertions(+), 109 deletions(-) create mode 100644 src/main/java/org/perlonjava/interpreter/VariableCaptureAnalyzer.java diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java index 592b98a47..9bfb90406 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeCompiler.java @@ -5,6 +5,8 @@ import org.perlonjava.codegen.EmitterContext; import org.perlonjava.lexer.LexerToken; import org.perlonjava.runtime.*; +import org.perlonjava.symbols.ScopedSymbolTable; +import org.perlonjava.symbols.SymbolTable; import java.io.ByteArrayOutputStream; import java.util.*; @@ -25,7 +27,13 @@ public class BytecodeCompiler implements Visitor { private final ByteArrayOutputStream bytecode = new ByteArrayOutputStream(); private final List constants = new ArrayList<>(); private final List stringPool = new ArrayList<>(); - private final Map registerMap = new HashMap<>(); + + // Simple variable-to-register mapping for the interpreter + // Each scope is a Map mapping variable names to register indices + private final Stack> variableScopes = new Stack<>(); + + // Track current package name (for global variables) + private String currentPackage = "main"; // Token index tracking for error reporting private final Map pcToTokenIndex = new HashMap<>(); @@ -40,6 +48,9 @@ public class BytecodeCompiler implements Visitor { // Track last result register for expression chaining private int lastResultReg = -1; + // Track current calling context for subroutine calls + private int currentCallContext = RuntimeContextType.LIST; // Default to LIST + // Closure support private RuntimeBase[] capturedVars; // Captured variable values private String[] capturedVarNames; // Parallel array of names @@ -53,6 +64,13 @@ public BytecodeCompiler(String sourceName, int sourceLine, ErrorMessageUtil erro this.sourceName = sourceName; this.sourceLine = sourceLine; this.errorUtil = errorUtil; + + // Initialize with global scope containing the 3 reserved registers + Map globalScope = new HashMap<>(); + globalScope.put("this", 0); + globalScope.put("@_", 1); + globalScope.put("wantarray", 2); + variableScopes.push(globalScope); } // Legacy constructor for backward compatibility @@ -60,6 +78,76 @@ public BytecodeCompiler(String sourceName, int sourceLine) { this(sourceName, sourceLine, null); } + /** + * Helper: Check if a variable exists in any scope. + */ + private boolean hasVariable(String name) { + for (int i = variableScopes.size() - 1; i >= 0; i--) { + if (variableScopes.get(i).containsKey(name)) { + return true; + } + } + return false; + } + + /** + * Helper: Get the register index for a variable. + * Returns -1 if not found. + */ + private int getVariableRegister(String name) { + for (int i = variableScopes.size() - 1; i >= 0; i--) { + Integer reg = variableScopes.get(i).get(name); + if (reg != null) { + return reg; + } + } + return -1; + } + + /** + * Helper: Add a variable to the current scope and return its register index. + * Allocates a new register. + */ + private int addVariable(String name, String declType) { + int reg = allocateRegister(); + variableScopes.peek().put(name, reg); + return reg; + } + + /** + * Helper: Enter a new lexical scope. + */ + private void enterScope() { + variableScopes.push(new HashMap<>()); + } + + /** + * Helper: Exit the current lexical scope. + */ + private void exitScope() { + if (variableScopes.size() > 1) { + variableScopes.pop(); + } + } + + /** + * Helper: Get current package name for global variable resolution. + */ + private String getCurrentPackage() { + return currentPackage; + } + + /** + * Helper: Get all variable names in all scopes (for closure detection). + */ + private String[] getVariableNames() { + Set allVars = new HashSet<>(); + for (Map scope : variableScopes) { + allVars.addAll(scope.keySet()); + } + return allVars.toArray(new String[0]); + } + /** * Throw a compiler exception with proper error formatting. * Uses PerlCompilerException which formats with line numbers and code context. @@ -203,9 +291,14 @@ private Set collectReferencedVariables(Node ast) { */ private Set getLocalVariableNames(EmitterContext ctx) { Set locals = new HashSet<>(); - // This is a simplified version - we collect variables from registerMap - // which contains all lexically declared variables in the current compilation unit - locals.addAll(registerMap.keySet()); + // Collect variables from all scopes + String[] varNames = getVariableNames(); + for (String name : varNames) { + // Skip the 3 reserved registers (this, @_, wantarray) + if (!name.equals("this") && !name.equals("@_") && !name.equals("wantarray")) { + locals.add(name); + } + } return locals; } @@ -247,7 +340,17 @@ private RuntimeBase getVariableValueFromContext(String varName, EmitterContext c public void visit(BlockNode node) { // Visit each statement in the block for (Node stmt : node.elements) { + // Standalone statements (not assignments) use VOID context + int savedContext = currentCallContext; + + // If this is not an assignment or other value-using construct, use VOID context + if (!(stmt instanceof BinaryOperatorNode && ((BinaryOperatorNode) stmt).operator.equals("="))) { + currentCallContext = RuntimeContextType.VOID; + } + stmt.accept(this); + + currentCallContext = savedContext; } } @@ -307,16 +410,16 @@ public void visit(IdentifierNode node) { } // Check if it's a lexical variable (may have sigil or not) - if (registerMap.containsKey(varName)) { + if (hasVariable(varName)) { // Lexical variable - already has a register - lastResultReg = registerMap.get(varName); + lastResultReg = getVariableRegister(varName); } else { // Try with sigils boolean found = false; for (String sigil : sigils) { String varNameWithSigil = sigil + varName; - if (registerMap.containsKey(varNameWithSigil)) { - lastResultReg = registerMap.get(varNameWithSigil); + if (hasVariable(varNameWithSigil)) { + lastResultReg = getVariableRegister(varNameWithSigil); found = true; break; } @@ -372,6 +475,28 @@ public void visit(BinaryOperatorNode node) { // Handle assignment separately (doesn't follow standard left-right-op pattern) if (node.operator.equals("=")) { + // Determine the calling context for the RHS based on LHS type + int rhsContext = RuntimeContextType.LIST; // Default + + // Check if LHS is a scalar assignment (my $x = ...) + if (node.left instanceof OperatorNode) { + OperatorNode leftOp = (OperatorNode) node.left; + if (leftOp.operator.equals("my") && leftOp.operand instanceof OperatorNode) { + OperatorNode sigilOp = (OperatorNode) leftOp.operand; + if (sigilOp.operator.equals("$")) { + // Scalar assignment: use SCALAR context for RHS + rhsContext = RuntimeContextType.SCALAR; + } + } else if (leftOp.operator.equals("$")) { + // Regular scalar assignment: $x = ... + rhsContext = RuntimeContextType.SCALAR; + } + } + + // Set the context for subroutine calls in RHS + int savedContext = currentCallContext; + currentCallContext = rhsContext; + // Special case: my $x = value if (node.left instanceof OperatorNode) { OperatorNode leftOp = (OperatorNode) node.left; @@ -385,9 +510,39 @@ public void visit(BinaryOperatorNode node) { if (sigilOp.operator.equals("$") && sigilOp.operand instanceof IdentifierNode) { String varName = "$" + ((IdentifierNode) sigilOp.operand).name; - // Allocate register for new lexical variable - int reg = allocateRegister(); - registerMap.put(varName, reg); + // Check if this variable is captured by named subs (Parser marks with id) + if (sigilOp.id != 0) { + // RETRIEVE the persistent variable (creates if doesn't exist) + int beginId = sigilOp.id; + int nameIdx = addToStringPool(varName); + int reg = allocateRegister(); + + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_RETRIEVE_BEGIN_SCALAR); + emit(reg); + emit(nameIdx); + emit(beginId); + + // Now register contains a reference to the persistent RuntimeScalar + // Store the initializer value INTO that RuntimeScalar + node.right.accept(this); + int valueReg = lastResultReg; + + // Set the value in the persistent scalar using SET_SCALAR + // This calls .set() on the RuntimeScalar without overwriting the reference + emit(Opcodes.SET_SCALAR); + emit(reg); + emit(valueReg); + + // Track this variable - map the name to the register we already allocated + variableScopes.peek().put(varName, reg); + lastResultReg = reg; + return; + } + + // Regular lexical variable (not captured) + // Allocate register for new lexical variable and add to symbol table + int reg = addVariable(varName, "my"); // Compile RHS node.right.accept(this); @@ -404,9 +559,37 @@ public void visit(BinaryOperatorNode node) { // Handle my @array = ... String varName = "@" + ((IdentifierNode) sigilOp.operand).name; - // Allocate register for new lexical array - int arrayReg = allocateRegister(); - registerMap.put(varName, arrayReg); + // Check if this variable is captured by named subs + if (sigilOp.id != 0) { + // RETRIEVE the persistent array + int beginId = sigilOp.id; + int nameIdx = addToStringPool(varName); + int arrayReg = allocateRegister(); + + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_RETRIEVE_BEGIN_ARRAY); + emit(arrayReg); + emit(nameIdx); + emit(beginId); + + // Compile RHS (should evaluate to a list) + node.right.accept(this); + int listReg = lastResultReg; + + // Populate array from list + emit(Opcodes.ARRAY_SET_FROM_LIST); + emit(arrayReg); + emit(listReg); + + // Track this variable - map the name to the register we already allocated + variableScopes.peek().put(varName, arrayReg); + lastResultReg = arrayReg; + return; + } + + // Regular lexical array (not captured) + // Allocate register for new lexical array and add to symbol table + int arrayReg = addVariable(varName, "my"); // Create empty array emit(Opcodes.NEW_ARRAY); @@ -427,9 +610,37 @@ public void visit(BinaryOperatorNode node) { // Handle my %hash = ... String varName = "%" + ((IdentifierNode) sigilOp.operand).name; - // Allocate register for new lexical hash - int hashReg = allocateRegister(); - registerMap.put(varName, hashReg); + // Check if this variable is captured by named subs + if (sigilOp.id != 0) { + // RETRIEVE the persistent hash + int beginId = sigilOp.id; + int nameIdx = addToStringPool(varName); + int hashReg = allocateRegister(); + + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_RETRIEVE_BEGIN_HASH); + emit(hashReg); + emit(nameIdx); + emit(beginId); + + // Compile RHS (should evaluate to a list) + node.right.accept(this); + int listReg = lastResultReg; + + // Populate hash from list + emit(Opcodes.HASH_SET_FROM_LIST); + emit(hashReg); + emit(listReg); + + // Track this variable - map the name to the register we already allocated + variableScopes.peek().put(varName, hashReg); + lastResultReg = hashReg; + return; + } + + // Regular lexical hash (not captured) + // Allocate register for new lexical hash and add to symbol table + int hashReg = addVariable(varName, "my"); // Create empty hash emit(Opcodes.NEW_HASH); @@ -453,9 +664,8 @@ public void visit(BinaryOperatorNode node) { if (myOperand instanceof IdentifierNode) { String varName = ((IdentifierNode) myOperand).name; - // Allocate register for new lexical variable - int reg = allocateRegister(); - registerMap.put(varName, reg); + // Allocate register for new lexical variable and add to symbol table + int reg = addVariable(varName, "my"); // Compile RHS node.right.accept(this); @@ -470,6 +680,50 @@ public void visit(BinaryOperatorNode node) { return; } } + + // Special case: local $x = value + if (leftOp.operator.equals("local")) { + // Extract variable from "local" operand + Node localOperand = leftOp.operand; + + // Handle local $x (where $x is OperatorNode("$", IdentifierNode("x"))) + if (localOperand instanceof OperatorNode) { + OperatorNode sigilOp = (OperatorNode) localOperand; + if (sigilOp.operator.equals("$") && sigilOp.operand instanceof IdentifierNode) { + String varName = "$" + ((IdentifierNode) sigilOp.operand).name; + + // Check if it's a lexical variable (should not be localized) + if (hasVariable(varName)) { + throwCompilerException("Can't localize lexical variable " + varName); + return; + } + + // It's a global variable - emit SLOW_OP to call GlobalRuntimeScalar.makeLocal() + String packageName = getCurrentPackage(); + String globalVarName = packageName + "::" + ((IdentifierNode) sigilOp.operand).name; + int nameIdx = addToStringPool(globalVarName); + + int localReg = allocateRegister(); + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_LOCAL_SCALAR); + emit(localReg); + emit(nameIdx); + + // Compile RHS + node.right.accept(this); + int valueReg = lastResultReg; + + // Assign value to the localized variable + // The localized variable is a RuntimeScalar, so we use set() on it + emit(Opcodes.STORE_GLOBAL_SCALAR); + emit(nameIdx); + emit(valueReg); + + lastResultReg = localReg; + return; + } + } + } } // Regular assignment: $x = value @@ -489,8 +743,8 @@ public void visit(BinaryOperatorNode node) { String rightLeftVarName = "$" + ((IdentifierNode) rightLeftOp.operand).name; // Pattern match: $x = $x + $y (emit ADD_ASSIGN) - if (leftVarName.equals(rightLeftVarName) && registerMap.containsKey(leftVarName)) { - int targetReg = registerMap.get(leftVarName); + if (leftVarName.equals(rightLeftVarName) && hasVariable(leftVarName)) { + int targetReg = getVariableRegister(leftVarName); // Compile RHS operand ($y) rightBin.right.accept(this); @@ -519,12 +773,13 @@ public void visit(BinaryOperatorNode node) { if (leftOp.operator.equals("$") && leftOp.operand instanceof IdentifierNode) { String varName = "$" + ((IdentifierNode) leftOp.operand).name; - if (registerMap.containsKey(varName)) { + if (hasVariable(varName)) { // Lexical variable - copy to its register - int targetReg = registerMap.get(varName); + int targetReg = getVariableRegister(varName); emit(Opcodes.MOVE); emit(targetReg); emit(valueReg); + lastResultReg = targetReg; } else { // Global variable @@ -534,15 +789,63 @@ public void visit(BinaryOperatorNode node) { emit(valueReg); lastResultReg = valueReg; } + } else if (leftOp.operator.equals("@") && leftOp.operand instanceof IdentifierNode) { + // Array assignment: @array = ... + String varName = "@" + ((IdentifierNode) leftOp.operand).name; + + int arrayReg; + if (hasVariable(varName)) { + // Lexical array + arrayReg = getVariableRegister(varName); + } else { + // Global array - load it + arrayReg = allocateRegister(); + String globalArrayName = "main::" + ((IdentifierNode) leftOp.operand).name; + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emit(arrayReg); + emit(nameIdx); + } + + // Populate array from list using setFromList + emit(Opcodes.ARRAY_SET_FROM_LIST); + emit(arrayReg); + emit(valueReg); + + lastResultReg = arrayReg; + } else if (leftOp.operator.equals("%") && leftOp.operand instanceof IdentifierNode) { + // Hash assignment: %hash = ... + String varName = "%" + ((IdentifierNode) leftOp.operand).name; + + int hashReg; + if (hasVariable(varName)) { + // Lexical hash + hashReg = getVariableRegister(varName); + } else { + // Global hash - load it + hashReg = allocateRegister(); + String globalHashName = "main::" + ((IdentifierNode) leftOp.operand).name; + int nameIdx = addToStringPool(globalHashName); + emit(Opcodes.LOAD_GLOBAL_HASH); + emit(hashReg); + emit(nameIdx); + } + + // Populate hash from list using setFromList + emit(Opcodes.HASH_SET_FROM_LIST); + emit(hashReg); + emit(valueReg); + + lastResultReg = hashReg; } else { - throw new RuntimeException("Assignment to non-scalar not yet supported"); + throw new RuntimeException("Assignment to unsupported operator: " + leftOp.operator); } } else if (node.left instanceof IdentifierNode) { String varName = ((IdentifierNode) node.left).name; - if (registerMap.containsKey(varName)) { + if (hasVariable(varName)) { // Lexical variable - copy to its register - int targetReg = registerMap.get(varName); + int targetReg = getVariableRegister(varName); emit(Opcodes.MOVE); emit(targetReg); emit(valueReg); @@ -558,6 +861,9 @@ public void visit(BinaryOperatorNode node) { } else { throw new RuntimeException("Assignment to non-identifier not yet supported: " + node.left.getClass().getSimpleName()); } + + // Restore the calling context + currentCallContext = savedContext; return; } @@ -597,6 +903,12 @@ public void visit(BinaryOperatorNode node) { emit(rs1); emit(rs2); } + case "x" -> { + emit(Opcodes.REPEAT); + emit(rd); + emit(rs1); + emit(rs2); + } case "<=>" -> { emit(Opcodes.COMPARE_NUM); emit(rd); @@ -643,7 +955,7 @@ public void visit(BinaryOperatorNode node) { emit(rd); // Result register emit(rs1); // Code reference register emit(rs2); // Arguments register (RuntimeList to be converted to RuntimeArray) - emit(RuntimeContextType.SCALAR); // Context (TODO: detect from usage) + emit(currentCallContext); // Use current calling context // Note: CALL_SUB may return RuntimeControlFlowList // The interpreter will handle control flow propagation @@ -770,43 +1082,60 @@ public void visit(BinaryOperatorNode node) { } case "[" -> { // Array element access: $a[10] means get element 10 from array @a - // left: OperatorNode("$", IdentifierNode("a")) + // Also handles multidimensional: $a[0][1] means $a[0]->[1] + // left: OperatorNode("$", IdentifierNode("a")) OR BinaryOperatorNode (for chained access) // right: ArrayLiteralNode(index_expression) - if (!(node.left instanceof OperatorNode)) { - throw new RuntimeException("Array access requires variable on left side"); - } - OperatorNode leftOp = (OperatorNode) node.left; - if (!leftOp.operator.equals("$") || !(leftOp.operand instanceof IdentifierNode)) { - throw new RuntimeException("Array access requires scalar dereference: $var[index]"); - } + int arrayReg = -1; // Will be initialized in if/else branches - String varName = ((IdentifierNode) leftOp.operand).name; - String arrayVarName = "@" + varName; + if (node.left instanceof OperatorNode) { + // Simple case: $var[index] + OperatorNode leftOp = (OperatorNode) node.left; + if (!leftOp.operator.equals("$") || !(leftOp.operand instanceof IdentifierNode)) { + throwCompilerException("Array access requires scalar dereference: $var[index]"); + } - // Get the array - check lexical first, then global - int arrayReg; - if (registerMap.containsKey(arrayVarName)) { - // Lexical array - arrayReg = registerMap.get(arrayVarName); - } else { - // Global array - load it + String varName = ((IdentifierNode) leftOp.operand).name; + String arrayVarName = "@" + varName; + + // Get the array - check lexical first, then global + if (hasVariable(arrayVarName)) { + // Lexical array + arrayReg = getVariableRegister(arrayVarName); + } else { + // Global array - load it + arrayReg = allocateRegister(); + String globalArrayName = "main::" + varName; + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emit(arrayReg); + emit(nameIdx); + } + } else if (node.left instanceof BinaryOperatorNode) { + // Multidimensional case: $a[0][1] is really $a[0]->[1] + // Compile left side (which returns a scalar containing an array reference) + node.left.accept(this); + int scalarReg = lastResultReg; + + // Dereference the array reference to get the actual array + // Use SLOW_OP with SLOWOP_DEREF_ARRAY arrayReg = allocateRegister(); - String globalArrayName = "main::" + varName; - int nameIdx = addToStringPool(globalArrayName); - emit(Opcodes.LOAD_GLOBAL_ARRAY); + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_DEREF_ARRAY); emit(arrayReg); - emit(nameIdx); + emit(scalarReg); + } else { + throwCompilerException("Array access requires variable or expression on left side"); } // Evaluate index expression // For ArrayLiteralNode, get the first element if (!(node.right instanceof ArrayLiteralNode)) { - throw new RuntimeException("Array access requires ArrayLiteralNode on right side"); + throwCompilerException("Array access requires ArrayLiteralNode on right side"); } ArrayLiteralNode indexNode = (ArrayLiteralNode) node.right; if (indexNode.elements.isEmpty()) { - throw new RuntimeException("Array access requires index expression"); + throwCompilerException("Array access requires index expression"); } // Compile the index expression @@ -837,9 +1166,9 @@ public void visit(BinaryOperatorNode node) { // Get the hash - check lexical first, then global int hashReg; - if (registerMap.containsKey(hashVarName)) { + if (hasVariable(hashVarName)) { // Lexical hash - hashReg = registerMap.get(hashVarName); + hashReg = getVariableRegister(hashVarName); } else { // Global hash - load it hashReg = allocateRegister(); @@ -899,32 +1228,172 @@ public void visit(OperatorNode node) { // Handle specific operators if (op.equals("my")) { - // my $x - variable declaration - // The operand will be OperatorNode("$", IdentifierNode("x")) + // my $x / my @x / my %x - variable declaration + // The operand will be OperatorNode("$"/"@"/"%", IdentifierNode("x")) if (node.operand instanceof OperatorNode) { OperatorNode sigilOp = (OperatorNode) node.operand; - if (sigilOp.operator.equals("$") && sigilOp.operand instanceof IdentifierNode) { - String varName = "$" + ((IdentifierNode) sigilOp.operand).name; - int reg = allocateRegister(); - registerMap.put(varName, reg); + String sigil = sigilOp.operator; + + if (sigilOp.operand instanceof IdentifierNode) { + String varName = sigil + ((IdentifierNode) sigilOp.operand).name; + + // Check if this variable is captured by closures (sigilOp.id != 0) + if (sigilOp.id != 0) { + // Variable is captured by compiled named subs + // Store as persistent variable so both interpreted and compiled code can access it + // Don't use a local register; instead load/store through persistent globals + + // For now, retrieve the persistent variable and store in register + // This handles BEGIN-initialized variables + int reg = allocateRegister(); + int nameIdx = addToStringPool(varName); + + switch (sigil) { + case "$" -> { + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_RETRIEVE_BEGIN_SCALAR); + emit(reg); + emit(nameIdx); + emit(sigilOp.id); + // Track this as a captured variable - map to the register we allocated + variableScopes.peek().put(varName, reg); + } + case "@" -> { + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_RETRIEVE_BEGIN_ARRAY); + emit(reg); + emit(nameIdx); + emit(sigilOp.id); + variableScopes.peek().put(varName, reg); + } + case "%" -> { + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_RETRIEVE_BEGIN_HASH); + emit(reg); + emit(nameIdx); + emit(sigilOp.id); + variableScopes.peek().put(varName, reg); + } + default -> throwCompilerException("Unsupported variable type: " + sigil); + } - // Load undef initially - emit(Opcodes.LOAD_UNDEF); - emit(reg); + lastResultReg = reg; + return; + } + + // Regular lexical variable (not captured) + int reg = addVariable(varName, "my"); + + // Normal initialization: load undef/empty array/empty hash + switch (sigil) { + case "$" -> { + emit(Opcodes.LOAD_UNDEF); + emit(reg); + } + case "@" -> { + emit(Opcodes.NEW_ARRAY); + emit(reg); + } + case "%" -> { + emit(Opcodes.NEW_HASH); + emit(reg); + } + default -> throwCompilerException("Unsupported variable type: " + sigil); + } lastResultReg = reg; return; } } throw new RuntimeException("Unsupported my operand: " + node.operand.getClass().getSimpleName()); + } else if (op.equals("our")) { + // our $x / our @x / our %x - package variable declaration + // The operand will be OperatorNode("$"/"@"/"%", IdentifierNode("x")) + if (node.operand instanceof OperatorNode) { + OperatorNode sigilOp = (OperatorNode) node.operand; + String sigil = sigilOp.operator; + + if (sigilOp.operand instanceof IdentifierNode) { + String varName = sigil + ((IdentifierNode) sigilOp.operand).name; + + // Check if already declared in current scope + if (hasVariable(varName)) { + // Already declared, just return the existing register + lastResultReg = getVariableRegister(varName); + return; + } + + // Allocate register and add to symbol table + int reg = addVariable(varName, "our"); + + // Load from global variable + // Get current package from symbol table + String packageName = getCurrentPackage(); + String globalVarName = packageName + "::" + ((IdentifierNode) sigilOp.operand).name; + int nameIdx = addToStringPool(globalVarName); + + switch (sigil) { + case "$" -> { + emit(Opcodes.LOAD_GLOBAL_SCALAR); + emit(reg); + emit(nameIdx); + } + case "@" -> { + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emit(reg); + emit(nameIdx); + } + case "%" -> { + emit(Opcodes.LOAD_GLOBAL_HASH); + emit(reg); + emit(nameIdx); + } + default -> throwCompilerException("Unsupported variable type: " + sigil); + } + + lastResultReg = reg; + return; + } + } + throw new RuntimeException("Unsupported our operand: " + node.operand.getClass().getSimpleName()); + } else if (op.equals("local")) { + // local $x - temporarily localize a global variable // The operand will be OperatorNode("$", IdentifierNode("x")) + if (node.operand instanceof OperatorNode) { + OperatorNode sigilOp = (OperatorNode) node.operand; + + if (sigilOp.operator.equals("$") && sigilOp.operand instanceof IdentifierNode) { + String varName = "$" + ((IdentifierNode) sigilOp.operand).name; + + // Check if it's a lexical variable (should not be localized) + if (hasVariable(varName)) { + throwCompilerException("Can't localize lexical variable " + varName); + return; + } + + // It's a global variable - emit SLOW_OP to call GlobalRuntimeScalar.makeLocal() + String packageName = getCurrentPackage(); + String globalVarName = packageName + "::" + ((IdentifierNode) sigilOp.operand).name; + int nameIdx = addToStringPool(globalVarName); + + int rd = allocateRegister(); + emitWithToken(Opcodes.SLOW_OP, node.getIndex()); + emit(Opcodes.SLOWOP_LOCAL_SCALAR); + emit(rd); + emit(nameIdx); + + lastResultReg = rd; + return; + } + } + throw new RuntimeException("Unsupported local operand: " + node.operand.getClass().getSimpleName()); } else if (op.equals("$")) { // Scalar variable dereference: $x if (node.operand instanceof IdentifierNode) { String varName = "$" + ((IdentifierNode) node.operand).name; - if (registerMap.containsKey(varName)) { + if (hasVariable(varName)) { // Lexical variable - use existing register - lastResultReg = registerMap.get(varName); + lastResultReg = getVariableRegister(varName); } else { // Global variable - load it // Add package prefix if not present (match compiler behavior) @@ -957,14 +1426,113 @@ public void visit(OperatorNode node) { return; } - // For now, only support @_ - other arrays require global variable support - throw new RuntimeException("Array variables other than @_ not yet supported: " + varName); + // Check if it's a lexical array + if (hasVariable(varName)) { + // Lexical array - use existing register + lastResultReg = getVariableRegister(varName); + return; + } + + // Global array - load it + int rd = allocateRegister(); + String globalArrayName = "main::" + ((IdentifierNode) node.operand).name; + int nameIdx = addToStringPool(globalArrayName); + + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emit(rd); + emit(nameIdx); + + lastResultReg = rd; } else { - throw new RuntimeException("Unsupported @ operand: " + node.operand.getClass().getSimpleName()); + throwCompilerException("Unsupported @ operand: " + node.operand.getClass().getSimpleName()); + } + } else if (op.equals("scalar")) { + // Scalar context: scalar(@array) or scalar(%hash) or scalar(expr) + // Forces scalar context on the operand + if (node.operand != null) { + // Special case: if operand is a ListNode with single array/hash variable, + // compile it directly in scalar context instead of list context + if (node.operand instanceof ListNode) { + ListNode listNode = (ListNode) node.operand; + if (listNode.elements.size() == 1) { + Node elem = listNode.elements.get(0); + if (elem instanceof OperatorNode) { + OperatorNode opNode = (OperatorNode) elem; + if (opNode.operator.equals("@")) { + // scalar(@array) - get array size + if (opNode.operand instanceof IdentifierNode) { + String varName = "@" + ((IdentifierNode) opNode.operand).name; + + int arrayReg; + if (varName.equals("@_")) { + arrayReg = 1; + } else if (hasVariable(varName)) { + arrayReg = getVariableRegister(varName); + } else { + // Global array + arrayReg = allocateRegister(); + String globalArrayName = "main::" + ((IdentifierNode) opNode.operand).name; + int nameIdx = addToStringPool(globalArrayName); + emit(Opcodes.LOAD_GLOBAL_ARRAY); + emit(arrayReg); + emit(nameIdx); + } + + // Emit ARRAY_SIZE + int rd = allocateRegister(); + emit(Opcodes.ARRAY_SIZE); + emit(rd); + emit(arrayReg); + + lastResultReg = rd; + return; + } + } else if (opNode.operator.equals("%")) { + // scalar(%hash) - get hash size (not implemented yet) + throwCompilerException("scalar(%hash) not yet implemented"); + } + } + } + } + + // General case: compile operand and let ARRAY_SIZE handle type conversion + node.operand.accept(this); + int operandReg = lastResultReg; + + int rd = allocateRegister(); + emit(Opcodes.ARRAY_SIZE); + emit(rd); + emit(operandReg); + + lastResultReg = rd; + } else { + throwCompilerException("scalar operator requires an operand"); } } else if (op.equals("%")) { // Hash variable dereference: %x - throw new RuntimeException("Hash variables not yet supported"); + if (node.operand instanceof IdentifierNode) { + String varName = "%" + ((IdentifierNode) node.operand).name; + + // Check if it's a lexical hash + if (hasVariable(varName)) { + // Lexical hash - use existing register + lastResultReg = getVariableRegister(varName); + return; + } + + // Global hash - load it + int rd = allocateRegister(); + String globalHashName = "main::" + ((IdentifierNode) node.operand).name; + int nameIdx = addToStringPool(globalHashName); + + emit(Opcodes.LOAD_GLOBAL_HASH); + emit(rd); + emit(nameIdx); + + lastResultReg = rd; + } else { + throwCompilerException("Unsupported % operand: " + node.operand.getClass().getSimpleName()); + } } else if (op.equals("*")) { // Glob variable dereference: *x if (node.operand instanceof IdentifierNode) { @@ -1079,8 +1647,8 @@ public void visit(OperatorNode node) { if (node.operand instanceof IdentifierNode) { String varName = ((IdentifierNode) node.operand).name; - if (registerMap.containsKey(varName)) { - int varReg = registerMap.get(varName); + if (hasVariable(varName)) { + int varReg = getVariableRegister(varName); // Use optimized autoincrement/decrement opcodes if (isPostfix) { @@ -1110,8 +1678,8 @@ public void visit(OperatorNode node) { if (innerOp.operator.equals("$") && innerOp.operand instanceof IdentifierNode) { String varName = "$" + ((IdentifierNode) innerOp.operand).name; - if (registerMap.containsKey(varName)) { - int varReg = registerMap.get(varName); + if (hasVariable(varName)) { + int varReg = getVariableRegister(varName); // Use optimized autoincrement/decrement opcodes if (isPostfix) { @@ -1182,6 +1750,35 @@ public void visit(OperatorNode node) { emit(oneReg); } + lastResultReg = rd; + } else if (op.equals("sleep")) { + // sleep $seconds + // Calls Time.sleep(seconds) + int rd = allocateRegister(); + + if (node.operand != null) { + // sleep($seconds) - evaluate operand + node.operand.accept(this); + int secondsReg = lastResultReg; + + // Emit SLOW_OP with SLOWOP_SLEEP + emit(Opcodes.SLOW_OP); + emit(Opcodes.SLOWOP_SLEEP); + emit(rd); + emit(secondsReg); + } else { + // sleep with no argument - defaults to infinity (but we'll use a large number) + int maxReg = allocateRegister(); + emit(Opcodes.LOAD_INT); + emit(maxReg); + emitInt(Integer.MAX_VALUE); + + emit(Opcodes.SLOW_OP); + emit(Opcodes.SLOWOP_SLEEP); + emit(rd); + emit(maxReg); + } + lastResultReg = rd; } else if (op.equals("die")) { // die $message; @@ -1261,6 +1858,14 @@ public void visit(OperatorNode node) { } lastResultReg = rd; + } else if (op.equals("undef")) { + // undef operator - returns undefined value + // Can be used standalone: undef + // Or with an operand to undef a variable: undef $x (not implemented yet) + int undefReg = allocateRegister(); + emit(Opcodes.LOAD_UNDEF); + emit(undefReg); + lastResultReg = undefReg; } else { throwCompilerException("Unsupported operator: " + op); } @@ -1476,11 +2081,85 @@ public void visit(SubroutineNode node) { // Anonymous subroutine: sub { ... } visitAnonymousSubroutine(node); } else { - // Named subroutine - not yet supported - throw new UnsupportedOperationException("Named subroutines not yet implemented in interpreter: " + node.name); + // Named subroutine: sub foo { ... } + // NOTE: Named subs are compiled by the parser, not here. + // They use the JVM compiler which has full closure support. + // This is fine because compiled and interpreted code share the same RuntimeCode API. + visitNamedSubroutine(node); } } + /** + * Visit a named subroutine: sub foo { ... } + * + * NOTE: In practice, named subroutines are compiled by the parser using the JVM compiler, + * not the interpreter. This method exists for completeness but may not be called for + * typical named sub definitions. The parser creates compiled RuntimeCode objects that + * interoperate seamlessly with interpreted code via the shared RuntimeCode.apply() API. + */ + private void visitNamedSubroutine(SubroutineNode node) { + // Step 1: Collect outer variables used by this subroutine + Set usedVars = new HashSet<>(); + VariableCollectorVisitor collector = new VariableCollectorVisitor(usedVars); + node.block.accept(collector); + + // Step 2: Filter to only include lexical variables that exist in current scope + List closureVarNames = new ArrayList<>(); + List closureVarIndices = new ArrayList<>(); + + for (String varName : usedVars) { + int varIndex = getVariableRegister(varName); + if (varIndex != -1 && varIndex >= 3) { + closureVarNames.add(varName); + closureVarIndices.add(varIndex); + } + } + + // Step 3: Create a new BytecodeCompiler for the subroutine body + BytecodeCompiler subCompiler = new BytecodeCompiler(this.sourceName, node.getIndex(), this.errorUtil); + + // Step 4: Pre-populate sub-compiler's variable scope with captured variables + for (String varName : closureVarNames) { + subCompiler.addVariable(varName, "my"); + } + + // Step 5: Compile the subroutine body + InterpretedCode subCode = subCompiler.compile(node.block); + + // Step 6: Emit bytecode to create closure with captured variables at RUNTIME + int codeReg = allocateRegister(); + + if (closureVarIndices.isEmpty()) { + RuntimeScalar codeScalar = new RuntimeScalar((RuntimeCode) subCode); + int constIdx = addToConstantPool(codeScalar); + emit(Opcodes.LOAD_CONST); + emit(codeReg); + emit(constIdx); + } else { + int templateIdx = addToConstantPool(subCode); + emit(Opcodes.CREATE_CLOSURE); + emit(codeReg); + emit(templateIdx); + emit(closureVarIndices.size()); + for (int regIdx : closureVarIndices) { + emit(regIdx); + } + } + + // Step 7: Store in global namespace + String fullName = node.name; + if (!fullName.contains("::")) { + fullName = "main::" + fullName; + } + + int nameIdx = addToStringPool(fullName); + emit(Opcodes.STORE_GLOBAL_CODE); + emit(nameIdx); + emit(codeReg); + + lastResultReg = -1; + } + /** * Visit an anonymous subroutine: sub { ... } * @@ -1489,26 +2168,64 @@ public void visit(SubroutineNode node) { * * The result is an InterpretedCode wrapped in RuntimeScalar, stored in lastResultReg. */ + /** + * Visit an anonymous subroutine: sub { ... } + * + * Compiles the subroutine body to bytecode with closure support. + * Anonymous subs capture lexical variables from the enclosing scope. + */ private void visitAnonymousSubroutine(SubroutineNode node) { - // Create a new BytecodeCompiler for the subroutine body - BytecodeCompiler subCompiler = new BytecodeCompiler(this.sourceName, node.getIndex()); + // Step 1: Collect outer variables used by this subroutine + Set usedVars = new HashSet<>(); + VariableCollectorVisitor collector = new VariableCollectorVisitor(usedVars); + node.block.accept(collector); + + // Step 2: Filter to only include lexical variables that exist in current scope + List closureVarNames = new ArrayList<>(); + List closureVarIndices = new ArrayList<>(); + + for (String varName : usedVars) { + int varIndex = getVariableRegister(varName); + if (varIndex != -1 && varIndex >= 3) { + closureVarNames.add(varName); + closureVarIndices.add(varIndex); + } + } - // Compile the subroutine body to InterpretedCode - InterpretedCode subCode = subCompiler.compile(node.block); + // Step 3: Create a new BytecodeCompiler for the subroutine body + BytecodeCompiler subCompiler = new BytecodeCompiler(this.sourceName, node.getIndex(), this.errorUtil); + + // Step 4: Pre-populate sub-compiler's variable scope with captured variables + for (String varName : closureVarNames) { + subCompiler.addVariable(varName, "my"); + } - // Wrap InterpretedCode in RuntimeScalar - // Explicitly cast to RuntimeCode to ensure RuntimeScalar(RuntimeCode) constructor is called - RuntimeScalar codeScalar = new RuntimeScalar((RuntimeCode) subCode); + // Step 5: Compile the subroutine body + InterpretedCode subCode = subCompiler.compile(node.block); - // Store the wrapped code in constants pool and load it into a register - int constIdx = addToConstantPool(codeScalar); - int rd = allocateRegister(); + // Step 6: Create closure or simple code ref + int codeReg = allocateRegister(); - emit(Opcodes.LOAD_CONST); - emit(rd); - emit(constIdx); + if (closureVarIndices.isEmpty()) { + // No closures - just wrap the InterpretedCode + RuntimeScalar codeScalar = new RuntimeScalar((RuntimeCode) subCode); + int constIdx = addToConstantPool(codeScalar); + emit(Opcodes.LOAD_CONST); + emit(codeReg); + emit(constIdx); + } else { + // Has closures - emit CREATE_CLOSURE + int templateIdx = addToConstantPool(subCode); + emit(Opcodes.CREATE_CLOSURE); + emit(codeReg); + emit(templateIdx); + emit(closureVarIndices.size()); + for (int regIdx : closureVarIndices) { + emit(regIdx); + } + } - lastResultReg = rd; + lastResultReg = codeReg; } /** @@ -1594,11 +2311,23 @@ public void visit(For1Node node) { int listReg = lastResultReg; // Step 2: Convert to RuntimeArray if needed - // TODO: Handle list-to-array conversion - int arrayReg = allocateRegister(); - emit(Opcodes.CREATE_ARRAY); // Placeholder - need to convert list to array - emit(arrayReg); - emit(listReg); + // Check if listReg contains an array or needs conversion + int arrayReg; + + // If the list is an array variable (like @x), the register already contains the array + // Otherwise, we need to create a temporary array from the list + if (node.list instanceof OperatorNode && ((OperatorNode) node.list).operator.equals("@")) { + // Direct array variable - register contains RuntimeArray + arrayReg = listReg; + } else { + // Need to convert list to array + arrayReg = allocateRegister(); + emit(Opcodes.NEW_ARRAY); + emit(arrayReg); + emit(Opcodes.ARRAY_SET_FROM_LIST); + emit(arrayReg); + emit(listReg); + } // Step 3: Allocate iterator index register int indexReg = allocateRegister(); @@ -1612,20 +2341,29 @@ public void visit(For1Node node) { emit(sizeReg); emit(arrayReg); - // Step 5: Allocate loop variable register - int varReg = allocateRegister(); + // Step 5: Enter new scope for loop variable + enterScope(); + + // Step 6: Declare loop variable in the new scope + // CRITICAL: We must let addVariable allocate the register so it's synchronized + int varReg = -1; if (node.variable != null && node.variable instanceof OperatorNode) { OperatorNode varOp = (OperatorNode) node.variable; if (varOp.operator.equals("my") && varOp.operand instanceof OperatorNode) { OperatorNode sigilOp = (OperatorNode) varOp.operand; if (sigilOp.operator.equals("$") && sigilOp.operand instanceof IdentifierNode) { String varName = "$" + ((IdentifierNode) sigilOp.operand).name; - registerMap.put(varName, varReg); + varReg = addVariable(varName, "my"); } } } - // Step 6: Loop start - check if index < size + // If no variable declared, allocate a temporary register + if (varReg == -1) { + varReg = allocateRegister(); + } + + // Step 7: Loop start - check if index < size int loopStartPc = bytecode.size(); // Compare index with size @@ -1641,31 +2379,34 @@ public void visit(For1Node node) { int loopEndJumpPc = bytecode.size(); emitInt(0); // Placeholder for jump target - // Step 7: Get array element and assign to loop variable + // Step 8: Get array element and assign to loop variable emit(Opcodes.ARRAY_GET); emit(varReg); emit(arrayReg); emit(indexReg); - // Step 8: Execute body + // Step 9: Execute body if (node.body != null) { node.body.accept(this); } - // Step 9: Increment index + // Step 10: Increment index emit(Opcodes.ADD_SCALAR_INT); emit(indexReg); emit(indexReg); emitInt(1); - // Step 10: Jump back to loop start + // Step 11: Jump back to loop start emit(Opcodes.GOTO); emitInt(loopStartPc); - // Step 11: Loop end - patch the forward jump + // Step 12: Loop end - patch the forward jump int loopEndPc = bytecode.size(); patchJump(loopEndJumpPc, loopEndPc); + // Step 13: Exit scope + exitScope(); + lastResultReg = -1; // For loop returns empty } diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java index 8fc12f2b8..04a2d3430 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java @@ -221,6 +221,60 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; } + case Opcodes.STORE_GLOBAL_CODE: { + // Store global code: GlobalVariable.globalCodeRefs.put(name, codeRef) + int nameIdx = bytecode[pc++] & 0xFF; + int codeReg = bytecode[pc++] & 0xFF; + String name = code.stringPool[nameIdx]; + RuntimeScalar codeRef = (RuntimeScalar) registers[codeReg]; + // Store the code reference in the global namespace + GlobalVariable.globalCodeRefs.put(name, codeRef); + break; + } + + case Opcodes.CREATE_CLOSURE: { + // Create closure with captured variables + // Format: CREATE_CLOSURE rd template_idx num_captures reg1 reg2 ... + int rd = bytecode[pc++] & 0xFF; + int templateIdx = bytecode[pc++] & 0xFF; + int numCaptures = bytecode[pc++] & 0xFF; + + // Get the template InterpretedCode from constants + InterpretedCode template = (InterpretedCode) code.constants[templateIdx]; + + // Capture the current register values + RuntimeBase[] capturedVars = new RuntimeBase[numCaptures]; + for (int i = 0; i < numCaptures; i++) { + int captureReg = bytecode[pc++] & 0xFF; + capturedVars[i] = registers[captureReg]; + } + + // Create a new InterpretedCode with the captured variables + InterpretedCode closureCode = new InterpretedCode( + template.bytecode, + template.constants, + template.stringPool, + template.maxRegisters, + capturedVars, // The captured variables! + template.sourceName, + template.sourceLine, + template.pcToTokenIndex + ); + + // Wrap in RuntimeScalar + registers[rd] = new RuntimeScalar((RuntimeCode) closureCode); + break; + } + + case Opcodes.SET_SCALAR: { + // Set scalar value: registers[rd].set(registers[rs]) + // Used to set the value in a persistent scalar without overwriting the reference + int rd = bytecode[pc++] & 0xFF; + int rs = bytecode[pc++] & 0xFF; + ((RuntimeScalar) registers[rd]).set((RuntimeScalar) registers[rs]); + break; + } + // ================================================================= // ARITHMETIC OPERATORS // ================================================================= @@ -313,6 +367,21 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c break; } + case Opcodes.REPEAT: { + // String/list repetition: rd = rs1 x rs2 + int rd = bytecode[pc++] & 0xFF; + int rs1 = bytecode[pc++] & 0xFF; + int rs2 = bytecode[pc++] & 0xFF; + // Call Operator.repeat(base, count, context) + // Context: 1 = scalar context (for string repetition) + registers[rd] = Operator.repeat( + registers[rs1], + (RuntimeScalar) registers[rs2], + 1 // scalar context + ); + break; + } + case Opcodes.LENGTH: { // String length: rd = length(rs) int rd = bytecode[pc++] & 0xFF; @@ -408,6 +477,14 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int rd = bytecode[pc++] & 0xFF; int arrayReg = bytecode[pc++] & 0xFF; int indexReg = bytecode[pc++] & 0xFF; + + // Check type + if (!(registers[arrayReg] instanceof RuntimeArray)) { + throw new RuntimeException("ARRAY_GET: register " + arrayReg + " contains " + + (registers[arrayReg] == null ? "null" : registers[arrayReg].getClass().getName()) + + " instead of RuntimeArray"); + } + RuntimeArray arr = (RuntimeArray) registers[arrayReg]; RuntimeScalar idx = (RuntimeScalar) registers[indexReg]; // Uses RuntimeArray API directly @@ -438,11 +515,24 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c } case Opcodes.ARRAY_SIZE: { - // Array size: rd = scalar(@array) + // Array size: rd = scalar(@array) or scalar(list) int rd = bytecode[pc++] & 0xFF; - int arrayReg = bytecode[pc++] & 0xFF; - RuntimeArray arr = (RuntimeArray) registers[arrayReg]; - registers[rd] = new RuntimeScalar(arr.size()); + int operandReg = bytecode[pc++] & 0xFF; + RuntimeBase operand = registers[operandReg]; + + int size; + if (operand instanceof RuntimeArray) { + size = ((RuntimeArray) operand).size(); + } else if (operand instanceof RuntimeList) { + size = ((RuntimeList) operand).size(); + } else if (operand instanceof RuntimeScalar) { + // Scalar in array context - treat as 1-element list + size = 1; + } else { + throw new RuntimeException("ARRAY_SIZE: register " + operandReg + " contains unexpected type: " + + (operand == null ? "null" : operand.getClass().getName())); + } + registers[rd] = new RuntimeScalar(size); break; } @@ -899,8 +989,15 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c int startReg = bytecode[pc++] & 0xFF; int endReg = bytecode[pc++] & 0xFF; - RuntimeScalar start = (RuntimeScalar) registers[startReg]; - RuntimeScalar end = (RuntimeScalar) registers[endReg]; + RuntimeBase startBase = registers[startReg]; + RuntimeBase endBase = registers[endReg]; + + // Handle null registers by creating undef scalars + RuntimeScalar start = (startBase instanceof RuntimeScalar) ? (RuntimeScalar) startBase : + (startBase == null) ? new RuntimeScalar() : startBase.scalar(); + RuntimeScalar end = (endBase instanceof RuntimeScalar) ? (RuntimeScalar) endBase : + (endBase == null) ? new RuntimeScalar() : endBase.scalar(); + PerlRange range = PerlRange.createRange(start, end); registers[rd] = range; break; diff --git a/src/main/java/org/perlonjava/interpreter/InterpretedCode.java b/src/main/java/org/perlonjava/interpreter/InterpretedCode.java index e3f2de458..476d88134 100644 --- a/src/main/java/org/perlonjava/interpreter/InterpretedCode.java +++ b/src/main/java/org/perlonjava/interpreter/InterpretedCode.java @@ -493,6 +493,23 @@ public String disassemble() { rs2 = bytecode[pc++] & 0xFF; // list register sb.append("HASH_SET_FROM_LIST r").append(rs1).append(".setFromList(r").append(rs2).append(")\n"); break; + case Opcodes.STORE_GLOBAL_CODE: + int codeNameIdx = bytecode[pc++] & 0xFF; + rs = bytecode[pc++] & 0xFF; + sb.append("STORE_GLOBAL_CODE '").append(stringPool[codeNameIdx]).append("' = r").append(rs).append("\n"); + break; + case Opcodes.CREATE_CLOSURE: + rd = bytecode[pc++] & 0xFF; + int templateIdx = bytecode[pc++] & 0xFF; + int numCaptures = bytecode[pc++] & 0xFF; + sb.append("CREATE_CLOSURE r").append(rd).append(" = closure(template[").append(templateIdx).append("], captures=["); + for (int i = 0; i < numCaptures; i++) { + if (i > 0) sb.append(", "); + int captureReg = bytecode[pc++] & 0xFF; + sb.append("r").append(captureReg); + } + sb.append("])\n"); + break; case Opcodes.NOT: rd = bytecode[pc++] & 0xFF; rs = bytecode[pc++] & 0xFF; @@ -524,6 +541,24 @@ public String disassemble() { String globName = stringPool[globNameIdx]; sb.append(" r").append(rd).append(" = *").append(globName); break; + case Opcodes.SLOWOP_RETRIEVE_BEGIN_SCALAR: + case Opcodes.SLOWOP_RETRIEVE_BEGIN_ARRAY: + case Opcodes.SLOWOP_RETRIEVE_BEGIN_HASH: + // Format: [rd] [name_idx] [begin_id] + rd = bytecode[pc++] & 0xFF; + int varNameIdx = bytecode[pc++] & 0xFF; + int beginId = bytecode[pc++] & 0xFF; + String varName = stringPool[varNameIdx]; + sb.append(" r").append(rd).append(" = ").append(varName) + .append(" (BEGIN_").append(beginId).append(")"); + break; + case Opcodes.SLOWOP_LOCAL_SCALAR: + // Format: [rd] [name_idx] + rd = bytecode[pc++] & 0xFF; + int localNameIdx = bytecode[pc++] & 0xFF; + String localVarName = stringPool[localNameIdx]; + sb.append(" r").append(rd).append(" = local ").append(localVarName); + break; default: sb.append(" (operands not decoded)"); break; diff --git a/src/main/java/org/perlonjava/interpreter/Opcodes.java b/src/main/java/org/perlonjava/interpreter/Opcodes.java index 94faac9bc..8ea3a415b 100644 --- a/src/main/java/org/perlonjava/interpreter/Opcodes.java +++ b/src/main/java/org/perlonjava/interpreter/Opcodes.java @@ -425,6 +425,18 @@ public class Opcodes { /** Set hash from list: hash_reg = RuntimeHash.createHash(list_reg) then copy elements */ public static final byte HASH_SET_FROM_LIST = 96; + /** Store global code: GlobalVariable.getGlobalCodeRef().put(stringPool[nameIdx], codeRef) */ + public static final byte STORE_GLOBAL_CODE = 97; + + /** Create closure with captured variables: rd = createClosure(template, registers[rs1], registers[rs2], ...) + * Format: CREATE_CLOSURE rd template_const_idx num_captures reg1 reg2 ... */ + public static final byte CREATE_CLOSURE = 98; + + /** Set scalar value: ((RuntimeScalar)registers[rd]).set((RuntimeScalar)registers[rs]) + * Format: SET_SCALAR rd rs + * Used to set the value in a persistent scalar without overwriting the reference */ + public static final byte SET_SCALAR = 99; + // ================================================================= // Slow Operation IDs (0-255) // ================================================================= @@ -497,6 +509,24 @@ public class Opcodes { /** Slow op ID: rd = getGlobalIO(name) - load glob/filehandle from global variables */ public static final int SLOWOP_LOAD_GLOB = 21; + /** Slow op ID: rd = Time.sleep(seconds) - sleep for specified seconds */ + public static final int SLOWOP_SLEEP = 22; + + /** Slow op ID: rd = deref_array(scalar_ref) - dereference array reference for multidimensional access */ + public static final int SLOWOP_DEREF_ARRAY = 23; + + /** Slow op ID: rd = PersistentVariable.retrieveBeginScalar(var_name, begin_id) - retrieve BEGIN scalar */ + public static final int SLOWOP_RETRIEVE_BEGIN_SCALAR = 24; + + /** Slow op ID: rd = PersistentVariable.retrieveBeginArray(var_name, begin_id) - retrieve BEGIN array */ + public static final int SLOWOP_RETRIEVE_BEGIN_ARRAY = 25; + + /** Slow op ID: rd = PersistentVariable.retrieveBeginHash(var_name, begin_id) - retrieve BEGIN hash */ + public static final int SLOWOP_RETRIEVE_BEGIN_HASH = 26; + + /** Slow op ID: rd = GlobalRuntimeScalar.makeLocal(var_name) - temporarily localize global variable */ + public static final int SLOWOP_LOCAL_SCALAR = 27; + // ================================================================= // OPCODES 93-255: RESERVED FOR FUTURE FAST OPERATIONS // ================================================================= diff --git a/src/main/java/org/perlonjava/interpreter/SlowOpcodeHandler.java b/src/main/java/org/perlonjava/interpreter/SlowOpcodeHandler.java index 1ded8f5f9..7da7d3e9e 100644 --- a/src/main/java/org/perlonjava/interpreter/SlowOpcodeHandler.java +++ b/src/main/java/org/perlonjava/interpreter/SlowOpcodeHandler.java @@ -145,6 +145,24 @@ public static int execute( case Opcodes.SLOWOP_LOAD_GLOB: return executeLoadGlob(bytecode, pc, registers, code); + case Opcodes.SLOWOP_SLEEP: + return executeSleep(bytecode, pc, registers); + + case Opcodes.SLOWOP_DEREF_ARRAY: + return executeDerefArray(bytecode, pc, registers); + + case Opcodes.SLOWOP_RETRIEVE_BEGIN_SCALAR: + return executeRetrieveBeginScalar(bytecode, pc, registers, code); + + case Opcodes.SLOWOP_RETRIEVE_BEGIN_ARRAY: + return executeRetrieveBeginArray(bytecode, pc, registers, code); + + case Opcodes.SLOWOP_RETRIEVE_BEGIN_HASH: + return executeRetrieveBeginHash(bytecode, pc, registers, code); + + case Opcodes.SLOWOP_LOCAL_SCALAR: + return executeLocalScalar(bytecode, pc, registers, code); + default: throw new RuntimeException( "Unknown slow operation ID: " + slowOpId + @@ -181,6 +199,12 @@ public static String getSlowOpName(int slowOpId) { case Opcodes.SLOWOP_EVAL_STRING -> "eval"; case Opcodes.SLOWOP_SELECT -> "select"; case Opcodes.SLOWOP_LOAD_GLOB -> "load_glob"; + case Opcodes.SLOWOP_SLEEP -> "sleep"; + case Opcodes.SLOWOP_DEREF_ARRAY -> "deref_array"; + case Opcodes.SLOWOP_RETRIEVE_BEGIN_SCALAR -> "retrieve_begin_scalar"; + case Opcodes.SLOWOP_RETRIEVE_BEGIN_ARRAY -> "retrieve_begin_array"; + case Opcodes.SLOWOP_RETRIEVE_BEGIN_HASH -> "retrieve_begin_hash"; + case Opcodes.SLOWOP_LOCAL_SCALAR -> "local_scalar"; default -> "slowop_" + slowOpId; }; } @@ -560,6 +584,156 @@ private static int executeLoadGlob( return pc; } + /** + * Sleep for specified seconds. + * Format: [rd] [rs_seconds] + * + * @param bytecode The bytecode array + * @param pc The program counter + * @param registers The register file + * @return The new program counter + */ + private static int executeSleep( + byte[] bytecode, + int pc, + RuntimeBase[] registers) { + + int rd = bytecode[pc++] & 0xFF; + int secondsReg = bytecode[pc++] & 0xFF; + + // Convert to scalar (handles both RuntimeScalar and RuntimeList) + RuntimeBase secondsBase = registers[secondsReg]; + RuntimeScalar seconds = secondsBase.scalar(); + + // Call Time.sleep() + RuntimeScalar result = org.perlonjava.operators.Time.sleep(seconds); + + registers[rd] = result; + return pc; + } + + /** + * Dereference array reference for multidimensional array access. + * Handles: $array[0][1] which is really $array[0]->[1] + * + * @param bytecode The bytecode array + * @param pc Program counter (points after slowOpId) + * @param registers Register array + * @return Updated program counter + */ + private static int executeDerefArray( + byte[] bytecode, + int pc, + RuntimeBase[] registers) { + + int rd = bytecode[pc++] & 0xFF; + int scalarReg = bytecode[pc++] & 0xFF; + + RuntimeBase scalarBase = registers[scalarReg]; + + // If it's already an array, use it directly + if (scalarBase instanceof RuntimeArray) { + registers[rd] = scalarBase; + return pc; + } + + // Otherwise, dereference as array reference + RuntimeScalar scalar = scalarBase.scalar(); + + // Get the dereferenced array using Perl's array dereference semantics + RuntimeArray array = scalar.arrayDeref(); + + registers[rd] = array; + return pc; + } + + /** + * SLOWOP_RETRIEVE_BEGIN_SCALAR: Retrieve persistent scalar from BEGIN block + * Format: [SLOWOP_RETRIEVE_BEGIN_SCALAR] [rd] [nameIdx] [begin_id] + * Effect: rd = PersistentVariable.retrieveBeginScalar(stringPool[nameIdx], begin_id) + */ + private static int executeRetrieveBeginScalar( + byte[] bytecode, + int pc, + RuntimeBase[] registers, + InterpretedCode code) { + + int rd = bytecode[pc++] & 0xFF; + int nameIdx = bytecode[pc++] & 0xFF; + int beginId = bytecode[pc++] & 0xFF; + + String varName = code.stringPool[nameIdx]; + RuntimeScalar result = PersistentVariable.retrieveBeginScalar(varName, beginId); + + registers[rd] = result; + return pc; + } + + /** + * SLOWOP_RETRIEVE_BEGIN_ARRAY: Retrieve persistent array from BEGIN block + * Format: [SLOWOP_RETRIEVE_BEGIN_ARRAY] [rd] [nameIdx] [begin_id] + * Effect: rd = PersistentVariable.retrieveBeginArray(stringPool[nameIdx], begin_id) + */ + private static int executeRetrieveBeginArray( + byte[] bytecode, + int pc, + RuntimeBase[] registers, + InterpretedCode code) { + + int rd = bytecode[pc++] & 0xFF; + int nameIdx = bytecode[pc++] & 0xFF; + int beginId = bytecode[pc++] & 0xFF; + + String varName = code.stringPool[nameIdx]; + RuntimeArray result = PersistentVariable.retrieveBeginArray(varName, beginId); + + registers[rd] = result; + return pc; + } + + /** + * SLOWOP_RETRIEVE_BEGIN_HASH: Retrieve persistent hash from BEGIN block + * Format: [SLOWOP_RETRIEVE_BEGIN_HASH] [rd] [nameIdx] [begin_id] + * Effect: rd = PersistentVariable.retrieveBeginHash(stringPool[nameIdx], begin_id) + */ + private static int executeRetrieveBeginHash( + byte[] bytecode, + int pc, + RuntimeBase[] registers, + InterpretedCode code) { + + int rd = bytecode[pc++] & 0xFF; + int nameIdx = bytecode[pc++] & 0xFF; + int beginId = bytecode[pc++] & 0xFF; + + String varName = code.stringPool[nameIdx]; + RuntimeHash result = PersistentVariable.retrieveBeginHash(varName, beginId); + + registers[rd] = result; + return pc; + } + + /** + * SLOWOP_LOCAL_SCALAR: Temporarily localize a global scalar variable + * Format: [SLOWOP_LOCAL_SCALAR] [rd] [nameIdx] + * Effect: rd = GlobalRuntimeScalar.makeLocal(stringPool[nameIdx]) + */ + private static int executeLocalScalar( + byte[] bytecode, + int pc, + RuntimeBase[] registers, + InterpretedCode code) { + + int rd = bytecode[pc++] & 0xFF; + int nameIdx = bytecode[pc++] & 0xFF; + + String varName = code.stringPool[nameIdx]; + RuntimeScalar result = org.perlonjava.runtime.GlobalRuntimeScalar.makeLocal(varName); + + registers[rd] = result; + return pc; + } + private SlowOpcodeHandler() { // Utility class - no instantiation } diff --git a/src/main/java/org/perlonjava/interpreter/VariableCaptureAnalyzer.java b/src/main/java/org/perlonjava/interpreter/VariableCaptureAnalyzer.java new file mode 100644 index 000000000..0a9ce9be0 --- /dev/null +++ b/src/main/java/org/perlonjava/interpreter/VariableCaptureAnalyzer.java @@ -0,0 +1,183 @@ +package org.perlonjava.interpreter; + +import org.perlonjava.astnode.*; +import org.perlonjava.astvisitor.Visitor; + +import java.util.*; + +/** + * Analyzes which lexical variables in the main script are captured by named subroutines. + * + *

In interpreter mode, when named subroutines are compiled, they need access to + * lexical variables from the outer scope. This analyzer identifies which variables + * need to be stored in persistent global storage (using the BEGIN mechanism) so + * both the interpreter and compiled code can access them.

+ * + *

Example

+ *
+ * my $width = 20;
+ * sub neighbors {
+ *     # Uses $width - needs persistent storage
+ *     return $width * 2;
+ * }
+ * 
+ * + *

Algorithm

+ *
    + *
  1. Scan main script AST for named subroutine definitions
  2. + *
  3. For each named sub, collect all variable references
  4. + *
  5. Filter to only include lexical variables from outer scope
  6. + *
  7. Return set of captured variable names
  8. + *
+ * + * @see BytecodeCompiler + * @see PersistentVariable + */ +public class VariableCaptureAnalyzer { + + /** + * Analyzes which variables in the main script are captured by named subroutines. + * + * @param mainScript The AST of the main script (typically a BlockNode) + * @param outerScopeVars Set of variable names declared in the outer (main) scope + * @return Set of variable names that need persistent storage + */ + public static Set analyze(Node mainScript, Set outerScopeVars) { + Set capturedVars = new HashSet<>(); + + // Find all named subroutine definitions + List namedSubs = findNamedSubroutines(mainScript); + + // For each named sub, find which outer variables it references + for (SubroutineNode sub : namedSubs) { + Set referencedVars = findVariableReferences(sub.block); + + // Only include variables that are declared in outer scope + for (String var : referencedVars) { + if (outerScopeVars.contains(var)) { + capturedVars.add(var); + } + } + } + + return capturedVars; + } + + /** + * Recursively finds all named subroutine definitions in the AST. + */ + private static List findNamedSubroutines(Node node) { + List subs = new ArrayList<>(); + + if (node instanceof SubroutineNode) { + SubroutineNode sub = (SubroutineNode) node; + // Only include named subroutines (not anonymous closures) + if (sub.name != null && !sub.name.isEmpty()) { + subs.add(sub); + } + } + + // Recursively search children + if (node instanceof BlockNode) { + for (Node child : ((BlockNode) node).elements) { + subs.addAll(findNamedSubroutines(child)); + } + } else if (node instanceof OperatorNode) { + OperatorNode op = (OperatorNode) node; + if (op.operand != null) { + subs.addAll(findNamedSubroutines(op.operand)); + } + } else if (node instanceof For1Node) { + For1Node forNode = (For1Node) node; + if (forNode.body != null) { + subs.addAll(findNamedSubroutines(forNode.body)); + } + } else if (node instanceof For3Node) { + For3Node forNode = (For3Node) node; + if (forNode.body != null) { + subs.addAll(findNamedSubroutines(forNode.body)); + } + } else if (node instanceof BinaryOperatorNode) { + BinaryOperatorNode bin = (BinaryOperatorNode) node; + if (bin.left != null) subs.addAll(findNamedSubroutines(bin.left)); + if (bin.right != null) subs.addAll(findNamedSubroutines(bin.right)); + } else if (node instanceof TernaryOperatorNode) { + TernaryOperatorNode tern = (TernaryOperatorNode) node; + if (tern.condition != null) subs.addAll(findNamedSubroutines(tern.condition)); + if (tern.trueExpr != null) subs.addAll(findNamedSubroutines(tern.trueExpr)); + if (tern.falseExpr != null) subs.addAll(findNamedSubroutines(tern.falseExpr)); + } + + return subs; + } + + /** + * Recursively finds all variable references in a node and its children. + * Returns variable names with their sigil (e.g., "$width", "@array", "%hash"). + */ + private static Set findVariableReferences(Node node) { + Set vars = new HashSet<>(); + + if (node == null) { + return vars; + } + + // Check if this node is a variable reference + if (node instanceof IdentifierNode) { + IdentifierNode id = (IdentifierNode) node; + String name = id.name; + // Only include lexical variables (not package variables with ::) + if (!name.contains("::")) { + vars.add(name); + } + } + + // Recursively search children + if (node instanceof BlockNode) { + for (Node child : ((BlockNode) node).elements) { + vars.addAll(findVariableReferences(child)); + } + } else if (node instanceof OperatorNode) { + OperatorNode op = (OperatorNode) node; + if (op.operand != null) { + vars.addAll(findVariableReferences(op.operand)); + } + } else if (node instanceof SubroutineNode) { + // Don't recurse into nested subroutines - they have their own scope + // We only care about variables in the immediate subroutine + SubroutineNode sub = (SubroutineNode) node; + if (sub.block != null) { + vars.addAll(findVariableReferences(sub.block)); + } + } else if (node instanceof For1Node) { + For1Node forNode = (For1Node) node; + if (forNode.variable != null) vars.addAll(findVariableReferences(forNode.variable)); + if (forNode.list != null) vars.addAll(findVariableReferences(forNode.list)); + if (forNode.body != null) vars.addAll(findVariableReferences(forNode.body)); + } else if (node instanceof For3Node) { + For3Node forNode = (For3Node) node; + if (forNode.initialization != null) vars.addAll(findVariableReferences(forNode.initialization)); + if (forNode.condition != null) vars.addAll(findVariableReferences(forNode.condition)); + if (forNode.increment != null) vars.addAll(findVariableReferences(forNode.increment)); + if (forNode.body != null) vars.addAll(findVariableReferences(forNode.body)); + } else if (node instanceof BinaryOperatorNode) { + BinaryOperatorNode bin = (BinaryOperatorNode) node; + if (bin.left != null) vars.addAll(findVariableReferences(bin.left)); + if (bin.right != null) vars.addAll(findVariableReferences(bin.right)); + } else if (node instanceof TernaryOperatorNode) { + TernaryOperatorNode tern = (TernaryOperatorNode) node; + if (tern.condition != null) vars.addAll(findVariableReferences(tern.condition)); + if (tern.trueExpr != null) vars.addAll(findVariableReferences(tern.trueExpr)); + if (tern.falseExpr != null) vars.addAll(findVariableReferences(tern.falseExpr)); + } else if (node instanceof ListNode) { + ListNode list = (ListNode) node; + for (Node element : list.elements) { + if (element != null) { + vars.addAll(findVariableReferences(element)); + } + } + } + + return vars; + } +} diff --git a/src/main/java/org/perlonjava/runtime/GlobalVariable.java b/src/main/java/org/perlonjava/runtime/GlobalVariable.java index 80b26c136..1a879e529 100644 --- a/src/main/java/org/perlonjava/runtime/GlobalVariable.java +++ b/src/main/java/org/perlonjava/runtime/GlobalVariable.java @@ -32,7 +32,7 @@ public class GlobalVariable { // Maps fully-qualified names (package::subname) to indicate they should be called // as user-defined subroutines instead of built-in operators public static final Map isSubs = new HashMap<>(); - static final Map globalCodeRefs = new HashMap<>(); + public static final Map globalCodeRefs = new HashMap<>(); static final Map globalIORefs = new HashMap<>(); static final Map globalFormatRefs = new HashMap<>();