From 8aed892946ba4e98780499a6730287587fbdcdcd Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 20 Feb 2026 11:38:16 +0100 Subject: [PATCH 1/2] fix: Use bitwiseAnd() instead of bitwiseAndBinary() for &= operator The interpreter was calling BitwiseOperators.bitwiseAndBinary() which skips the uninitialized value warning check. Changed to call bitwiseAnd() which includes the proper warning for using undefined values in bitwise AND operations. This matches the JVM compiler behavior and aligns with Perl semantics where bitwise AND (&) should warn about uninitialized values, while bitwise OR (|) and XOR (^) should not. Impact on perl5_t/t/op/assignwarn.t with JPERL_EVAL_USE_INTERPRETER=1: - Before: 75/116 passing (64.7%) - After: 77/116 passing (66.4%) - Fixed: +2 tests (tests 28 and 58 for $x &= 1 and $x &= 'x' warnings) Remaining 39 failures are due to 'tie' operator not being implemented yet. Compiler still passes: 116/116 (100.0%) Co-Authored-By: Claude Opus 4.6 --- .../java/org/perlonjava/interpreter/OpcodeHandlerExtended.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/perlonjava/interpreter/OpcodeHandlerExtended.java b/src/main/java/org/perlonjava/interpreter/OpcodeHandlerExtended.java index 72ac301d6..9e26149e0 100644 --- a/src/main/java/org/perlonjava/interpreter/OpcodeHandlerExtended.java +++ b/src/main/java/org/perlonjava/interpreter/OpcodeHandlerExtended.java @@ -256,7 +256,7 @@ public static int executeStringConcatAssign(short[] bytecode, int pc, RuntimeBas public static int executeBitwiseAndAssign(short[] bytecode, int pc, RuntimeBase[] registers) { int rd = bytecode[pc++]; int rs = bytecode[pc++]; - RuntimeScalar result = BitwiseOperators.bitwiseAndBinary( + RuntimeScalar result = BitwiseOperators.bitwiseAnd( (RuntimeScalar) registers[rd], (RuntimeScalar) registers[rs] ); From 571d939d2212282d42cde330fec8271134ba14b3 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Fri, 20 Feb 2026 11:45:55 +0100 Subject: [PATCH 2/2] feat: Implement tie/untie/tied operators in interpreter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added support for Perl's tie(), untie(), and tied() built-in functions in the bytecode interpreter. These operators allow binding variables to package classes that provide custom implementations. Implementation: 1. Added TIE, UNTIE, TIED opcodes (238, 239, 240) to Opcodes.java 2. Updated LASTOP to 240 3. Added handlers in CompileOperator.java for "tie", "untie", "tied" 4. Added execution logic in BytecodeInterpreter.java that calls TieOperators The implementation follows the same pattern as other operators that call runtime classes, compiling arguments as a list and passing them to the appropriate TieOperators method along with the calling context. Impact on perl5_t/t/op/assignwarn.t with JPERL_EVAL_USE_INTERPRETER=1: - Before: 77/116 passing (66.4%) - tie not implemented, 39 tests failing - After: 116/116 passing (100.0%) ✓ All tests pass! - Fixed: +39 tests Compiler still passes: 116/116 (100.0%) ✓ Co-Authored-By: Claude Opus 4.6 --- .../bytecode_compiler_size_analysis.md | 183 ++++++++++++++++++ .../interpreter/BytecodeInterpreter.java | 45 +++++ .../interpreter/CompileOperator.java | 23 +++ .../org/perlonjava/interpreter/Opcodes.java | 14 +- 4 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 dev/prompts/bytecode_compiler_size_analysis.md diff --git a/dev/prompts/bytecode_compiler_size_analysis.md b/dev/prompts/bytecode_compiler_size_analysis.md new file mode 100644 index 000000000..ae2968c15 --- /dev/null +++ b/dev/prompts/bytecode_compiler_size_analysis.md @@ -0,0 +1,183 @@ +# Analysis: BytecodeCompiler Size and Recursion Usage + +## Context + +The user asked whether BytecodeCompiler's large size is due to **lack of recursion** in handling complex expressions. Specifically, they mentioned that `$$a` should generate `$a` first, then dereference it. + +## Key Finding: BytecodeCompiler ALREADY Uses Recursion + +**Answer: NO** - The size is NOT due to lack of recursion. BytecodeCompiler properly uses recursion for complex expressions. + +### Evidence: Dereference Handling is Recursive + +For `$$a`, the implementation in `BytecodeCompiler.compileVariableReference()` (lines 2494-2506): + +```java +} else if (node.operand instanceof OperatorNode) { + // Operator dereference: $$x, $${expr}, etc. + node.operand.accept(this); // ← RECURSIVE: compiles $x first + int refReg = lastResultReg; + + // Dereference the result + int rd = allocateRegister(); + emitWithToken(Opcodes.DEREF, node.getIndex()); // ← Then emits DEREF + emitReg(rd); + emitReg(refReg); + lastResultReg = rd; +} +``` + +**Compilation flow for `$$$a`:** +1. Outer `$` → recursively compiles `$$a` +2. Middle `$` → recursively compiles `$a` +3. Inner `$` → loads variable `a` +4. Unwind: emit DEREF for middle level +5. Unwind: emit DEREF for outer level +6. Result: 3 opcodes generated efficiently + +✅ **This is proper recursive compilation** - each dereference level calls `accept()` on its operand. + +## Actual Size Breakdown + +### Total BytecodeCompiler Ecosystem: ~9,000+ lines + +| Component | Lines | Purpose | +|-----------|-------|---------| +| **BytecodeCompiler.java** | 3,904 | Main compilation entry, AST visitors | +| **CompileOperator.java** | 2,604 | Giant method handling 50+ operators | +| **CompileAssignment.java** | 1,481 | Assignment operations | +| **CompileBinaryOperator.java** | 594 | Binary operators | +| **Helper classes** | ~1,200+ | BinaryHelper, etc. | + +### Largest Methods + +| Method | Lines | Reason for Size | +|--------|-------|-----------------| +| `CompileOperator.visitOperator()` | **2,604** | One giant switch with 50+ operator cases inlined | +| `compileVariableDeclaration()` | 929 | Handles my/our/local/state × $/@/% × closures × special cases | +| `compileVariableReference()` | 279 | Handles $/@/%/&/\/* × identifiers/blocks/operators/derefs | +| `visit(ListNode)` | 216 | List compilation with context handling | +| `visit(For3Node)` | 150 | C-style for loops | + +## Why BytecodeCompiler is Large + +The size comes from **horizontal breadth** (many features), not vertical depth (lack of abstraction): + +### 1. Many Operators (~50+) +CompileOperator handles: say, print, not, !, ~, binary~, ~., defined, ref, prototype, bless, scalar, package, class, die, warn, exit, return, rand, sleep, study, pos, lock, eval, do, require, use, no, goto, dump, sprintf, keys, values, each, delete, exists, push, pop, shift, unshift, reverse, sort, grep, map, join, split, substr, index, rindex, quotemeta, lc, lcfirst, uc, ucfirst, chr, ord, hex, oct, length, chop, chomp, matchRegex, substituteRegex, transliterate, and more. + +### 2. Multiple Code Paths Per Feature +Example: Variable declarations have paths for: +- 4 keywords: my/our/local/state +- 3 sigils: $/@/% +- Captured vs regular variables +- Declared references (my \$x) +- Persistent storage +- List vs single declarations + +### 3. Special Case Handling +- `@_` is always register 1 +- Closure variables use RETRIEVE_BEGIN_* opcodes +- State variables need persistence +- Global vs lexical variable resolution +- Context-dependent behavior (scalar vs list) + +### 4. Inlined Logic +Rather than extracting helper methods, much logic is inlined in switch cases for performance/clarity. + +## Potential Improvements (If Desired) + +If the goal is to reduce BytecodeCompiler size, here are options: + +### Option A: Extract Operator Handlers (Similar to BytecodeInterpreter Pattern) + +Like how BytecodeInterpreter extracted opcodes to `OpcodeHandlerExtended` and `OpcodeHandlerFileTest`, we could extract operator groups: + +``` +CompileOperator.java (2,604 lines) + ↓ Extract to: +CompileOperatorString.java // substr, index, rindex, quotemeta, lc, uc, etc. +CompileOperatorArray.java // push, pop, shift, unshift, keys, values, etc. +CompileOperatorIO.java // say, print, die, warn, etc. +CompileOperatorRegex.java // matchRegex, substituteRegex, transliterate +...etc +``` + +**Pros:** +- Reduces individual file size +- Groups related operators +- Easier to find specific operator logic + +**Cons:** +- More files to navigate +- Potentially harder to see all operators at once +- May not actually reduce total code (just distributes it) + +### Option B: Extract Common Patterns into Helper Methods + +Many operators share patterns: +- Evaluate operand in scalar context +- Allocate result register +- Emit opcode +- Return result register + +Could extract reusable patterns: + +```java +private int compileUnaryScalarOp(Node operand, short opcode) { + int savedContext = currentCallContext; + currentCallContext = RuntimeContextType.SCALAR; + operand.accept(this); + int rs = lastResultReg; + currentCallContext = savedContext; + + int rd = allocateRegister(); + emit(opcode); + emitReg(rd); + emitReg(rs); + return rd; +} +``` + +**Pros:** +- Reduces duplication +- Clearer operator patterns +- Easier to ensure consistency + +**Cons:** +- Abstracts away details (may be harder to debug) +- Some operators have unique behavior that doesn't fit patterns + +### Option C: Leave As-Is + +The current structure is working: +- ✅ Recursive where it should be +- ✅ Reasonable delegation (CompileOperator, CompileBinaryOperator, CompileAssignment) +- ✅ Code is generally readable despite size +- ✅ Performance is good + +**Pros:** +- No risk of introducing bugs +- Current structure is proven to work +- Size is manageable with good IDE navigation + +## Recommendation + +**The current BytecodeCompiler design is sound.** The recursion is implemented correctly for complex expressions like `$$a`. The size is due to the inherent complexity of compiling 50+ Perl operators with their various edge cases. + +If size reduction is desired for maintainability, **Option A** (extract operator groups) would be most impactful, reducing CompileOperator.java from 2,604 lines to several smaller, focused files. + +However, there's no urgent need to refactor unless: +1. The size is causing JVM compilation issues (it's not - it's compilation time, not runtime) +2. Maintainability is suffering (navigation with IDE works fine) +3. Team prefers smaller files for review purposes + +## Question for User + +Would you like to: +1. **Leave as-is** - The current design is working well +2. **Extract operator groups** - Split CompileOperator into themed handler classes +3. **Extract helper methods** - Reduce duplication with pattern extraction +4. **Something else** - Different approach to improve the codebase + +The recursion is already correct, so any changes would be for code organization purposes only. diff --git a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java index 265ff201d..ad22eda25 100644 --- a/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/interpreter/BytecodeInterpreter.java @@ -1887,6 +1887,51 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c pc = OpcodeHandlerExtended.executeSubstrVar(bytecode, pc, registers); break; + case Opcodes.TIE: { + // tie($var, $classname, @args): rd = TieOperators.tie(ctx, argsListReg) + // Format: TIE rd argsListReg context + int rd = bytecode[pc++]; + int argsReg = bytecode[pc++]; + int ctx = bytecode[pc++]; + RuntimeList tieArgs = (RuntimeList) registers[argsReg]; + RuntimeScalar result = org.perlonjava.operators.TieOperators.tie( + ctx, + tieArgs.elements.toArray(new RuntimeBase[0]) + ); + registers[rd] = result; + break; + } + + case Opcodes.UNTIE: { + // untie($var): rd = TieOperators.untie(ctx, argsListReg) + // Format: UNTIE rd argsListReg context + int rd = bytecode[pc++]; + int argsReg = bytecode[pc++]; + int ctx = bytecode[pc++]; + RuntimeList untieArgs = (RuntimeList) registers[argsReg]; + RuntimeScalar result = org.perlonjava.operators.TieOperators.untie( + ctx, + untieArgs.elements.toArray(new RuntimeBase[0]) + ); + registers[rd] = result; + break; + } + + case Opcodes.TIED: { + // tied($var): rd = TieOperators.tied(ctx, argsListReg) + // Format: TIED rd argsListReg context + int rd = bytecode[pc++]; + int argsReg = bytecode[pc++]; + int ctx = bytecode[pc++]; + RuntimeList tiedArgs = (RuntimeList) registers[argsReg]; + RuntimeScalar result = org.perlonjava.operators.TieOperators.tied( + ctx, + tiedArgs.elements.toArray(new RuntimeBase[0]) + ); + registers[rd] = result; + break; + } + // GENERATED_HANDLERS_END default: diff --git a/src/main/java/org/perlonjava/interpreter/CompileOperator.java b/src/main/java/org/perlonjava/interpreter/CompileOperator.java index e946740bb..9371e7d8f 100644 --- a/src/main/java/org/perlonjava/interpreter/CompileOperator.java +++ b/src/main/java/org/perlonjava/interpreter/CompileOperator.java @@ -2597,6 +2597,29 @@ public static void visitOperator(BytecodeCompiler bytecodeCompiler, OperatorNode bytecodeCompiler.emitInt(bytecodeCompiler.currentCallContext); bytecodeCompiler.lastResultReg = rd; + } else if (op.equals("tie") || op.equals("untie") || op.equals("tied")) { + // tie($var, $classname, @args) or untie($var) or tied($var) + // Compile all arguments as a list + if (node.operand != null) { + node.operand.accept(bytecodeCompiler); + int argsReg = bytecodeCompiler.lastResultReg; + + int rd = bytecodeCompiler.allocateRegister(); + short opcode = switch (op) { + case "tie" -> Opcodes.TIE; + case "untie" -> Opcodes.UNTIE; + case "tied" -> Opcodes.TIED; + default -> throw new IllegalStateException("Unexpected operator: " + op); + }; + bytecodeCompiler.emitWithToken(opcode, node.getIndex()); + bytecodeCompiler.emitReg(rd); + bytecodeCompiler.emitReg(argsReg); + bytecodeCompiler.emit(bytecodeCompiler.currentCallContext); + + bytecodeCompiler.lastResultReg = rd; + } else { + bytecodeCompiler.throwCompilerException(op + " requires arguments"); + } } else { bytecodeCompiler.throwCompilerException("Unsupported operator: " + op); } diff --git a/src/main/java/org/perlonjava/interpreter/Opcodes.java b/src/main/java/org/perlonjava/interpreter/Opcodes.java index 5d97a61d9..02d36bcfe 100644 --- a/src/main/java/org/perlonjava/interpreter/Opcodes.java +++ b/src/main/java/org/perlonjava/interpreter/Opcodes.java @@ -894,10 +894,22 @@ public class Opcodes { * Format: SUBSTR_VAR rd argsListReg ctx */ public static final short SUBSTR_VAR = 237; + /** tie($var, $classname, @args): rd = TieOperators.tie(ctx, argsListReg) + * Format: TIE rd argsListReg context */ + public static final short TIE = 238; + + /** untie($var): rd = TieOperators.untie(ctx, argsListReg) + * Format: UNTIE rd argsListReg context */ + public static final short UNTIE = 239; + + /** tied($var): rd = TieOperators.tied(ctx, argsListReg) + * Format: TIED rd argsListReg context */ + public static final short TIED = 240; + // ================================================================= // BUILT-IN FUNCTION OPCODES - after LASTOP // Last manually-assigned opcode (for tool reference) - private static final short LASTOP = 237; + private static final short LASTOP = 240; // ================================================================= // Generated by dev/tools/generate_opcode_handlers.pl