From 28a8fdc5db18a2deacec891240f9d25a9149c2f3 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 12 Mar 2026 12:03:57 +0100 Subject: [PATCH 01/10] Add HASH_DEREF_FETCH and ARRAY_DEREF_FETCH superoperators These superoperators combine multiple bytecode instructions into single operations for better interpreter performance: - HASH_DEREF_FETCH (opcode 381): Combines DEREF_HASH + LOAD_STRING + HASH_GET for patterns like $hashref->{key} with bareword or string literal keys - ARRAY_DEREF_FETCH (opcode 382): Combines DEREF_ARRAY + LOAD_INT + ARRAY_GET for patterns like $arrayref->[n] with integer literal indices Based on bytecode analysis of ExifTool tests, HASH_DEREF_FETCH alone eliminates ~2,498 instruction sequences (~7.3% of hash operations). Design document: dev/design/superoperators.md Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- dev/design/superoperators.md | 407 ++++++++++++++++++ .../backend/bytecode/BytecodeCompiler.java | 99 ++++- .../backend/bytecode/BytecodeInterpreter.java | 50 +++ .../bytecode/CompileBinaryOperator.java | 85 +++- .../backend/bytecode/Disassemble.java | 27 ++ .../perlonjava/backend/bytecode/Opcodes.java | 22 + 6 files changed, 653 insertions(+), 37 deletions(-) create mode 100644 dev/design/superoperators.md diff --git a/dev/design/superoperators.md b/dev/design/superoperators.md new file mode 100644 index 000000000..7d62a58eb --- /dev/null +++ b/dev/design/superoperators.md @@ -0,0 +1,407 @@ +# Superoperator Design for PerlOnJava Bytecode Interpreter + +## Overview + +This document analyzes common bytecode instruction sequences in PerlOnJava's interpreter +and proposes "superoperators" - combined opcodes that replace frequent multi-instruction +patterns with single, optimized operations. + +## Analysis Methodology + +Bytecode was collected from running ExifTool tests with `--interpreter --disassemble`: +```bash +cd Image-ExifTool-13.44 +java -jar ../target/perlonjava-3.0.0.jar --interpreter --disassemble -Ilib t/ExifTool.t +``` + +## Top Operations by Frequency (ExifTool.t) + +| Operation | Count | % of Total | +|-----------|-------|------------| +| LOAD_BYTE_STRING | 16,876 | 24.4% | +| LOAD_INT | 6,705 | 9.7% | +| RETURN | 6,492 | 9.4% | +| CREATE_LIST | 5,124 | 7.4% | +| ALIAS | 4,446 | 6.4% | +| LOAD_STRING | 4,042 | 5.8% | +| DEREF_HASH | 3,626 | 5.2% | +| GOTO_IF_FALSE | 3,425 | 5.0% | +| HASH_GET | 3,252 | 4.7% | +| LOAD_UNDEF | 2,420 | 3.5% | +| MATCH_REGEX | 2,125 | 3.1% | +| LOAD_GLOBAL_SCALAR | 2,117 | 3.1% | + +## Common Two-Instruction Sequences + +| Sequence | Count | +|----------|-------| +| LOAD_BYTE_STRING -> LOAD_BYTE_STRING | 8,587 | +| DEREF_HASH -> LOAD_STRING | 3,014 | +| LOAD_STRING -> HASH_GET | 2,558 | +| LOAD_BYTE_STRING -> QUOTE_REGEX | 1,782 | +| MATCH_REGEX -> ALIAS | 1,660 | +| CREATE_LIST -> CALL_SUB | 1,166 | + +## Common Three-Instruction Sequences + +| Sequence | Count | +|----------|-------| +| DEREF_HASH -> LOAD_STRING -> HASH_GET | 2,498 | +| LOAD_STRING -> HASH_GET -> MATCH_REGEX | 1,196 | +| MATCH_REGEX -> ALIAS -> RESTORE_REGEX_STATE | 1,188 | +| CREATE_LIST -> CALL_SUB -> RETURN | 681 | + +--- + +## Proposed Superoperators + +### 1. HASH_DEREF_FETCH (Priority: P1 - HIGHEST) + +**Pattern replaced:** +``` +DEREF_HASH r26 = %{r22} +LOAD_STRING r27 = "key" +HASH_GET r28 = r26{r27} +``` + +**New opcode:** +``` +HASH_DEREF_FETCH r28 = %{r22}{"key"} +``` + +**Occurrences:** 2,498 (ExifTool.t) + +**Semantics:** +- Input: hashref register, string constant index +- Output: fetched value +- Equivalent to: `$$hashref{key}` or `$hash->{key}` + +**Bytecode format:** +``` +HASH_DEREF_FETCH dest_reg, hashref_reg, string_constant_index +``` + +**Implementation notes:** +- Combines hash dereference + key load + hash get into single dispatch +- Eliminates 2 intermediate register allocations +- String key is stored in constants table (already interned) + +--- + +### 2. ARRAY_DEREF_FETCH (Priority: P1) + +**Pattern replaced:** +``` +DEREF_ARRAY r10 = @{r8} +LOAD_INT r11 = 0 +ARRAY_GET r12 = r10[r11] +``` + +**New opcode:** +``` +ARRAY_DEREF_FETCH r12 = @{r8}[0] +``` + +**Occurrences:** ~700 (estimated from DEREF_ARRAY + LOAD_INT + ARRAY_GET sequences) + +**Semantics:** +- Input: arrayref register, integer index (constant or register) +- Output: fetched element +- Equivalent to: `$$arrayref[n]` or `$array->[n]` + +**Bytecode format:** +``` +ARRAY_DEREF_FETCH dest_reg, arrayref_reg, index_constant +ARRAY_DEREF_FETCH_REG dest_reg, arrayref_reg, index_reg # for variable indices +``` + +--- + +### 3. MATCH_ALIAS_RESTORE (Priority: P2) + +**Pattern replaced:** +``` +MATCH_REGEX r10 = r8 =~ r9 +ALIAS r11 = r10 +RESTORE_REGEX_STATE +``` + +**New opcode:** +``` +MATCH_ALIAS_RESTORE r11 = r8 =~ r9 +``` + +**Occurrences:** 1,188 + +**Semantics:** +- Perform regex match +- Alias result to destination +- Restore regex state +- All in one dispatch + +--- + +### 4. HASH_FETCH_MATCH (Priority: P2) + +**Pattern replaced:** +``` +LOAD_STRING r27 = "key" +HASH_GET r28 = r26{r27} +MATCH_REGEX r29 = r28 =~ r30 +``` + +**New opcode:** +``` +HASH_FETCH_MATCH r29 = r26{"key"} =~ r30 +``` + +**Occurrences:** 1,196 + +--- + +### 5. EQ_STR_BRANCH / EQ_NUM_BRANCH (Priority: P3) + +**Pattern replaced:** +``` +EQ_STR r10 = r8 eq r9 +GOTO_IF_FALSE r10 -> target +``` + +**New opcode:** +``` +EQ_STR_BRANCH_FALSE r8, r9 -> target +``` + +**Occurrences:** 433 (string) + 396 (numeric) = 829 + +--- + +### 6. CALL_RETURN (Priority: P3) + +**Pattern replaced:** +``` +CREATE_LIST r7 = [] +CALL_SUB r3 = r6->(r7, ctx=2) +RETURN r3 +``` + +**New opcode:** +``` +CALL_RETURN r3 = r6->([]) +``` + +**Occurrences:** 681 + +--- + +## Implementation Priority Matrix + +| Superoperator | Ops Saved | Total Savings | Complexity | Priority | +|---------------|-----------|---------------|------------|----------| +| HASH_DEREF_FETCH | 2 | ~5,000 | Low | **P1** | +| ARRAY_DEREF_FETCH | 2 | ~1,400 | Low | **P1** | +| MATCH_ALIAS_RESTORE | 2 | ~2,400 | Medium | **P2** | +| HASH_FETCH_MATCH | 2 | ~2,400 | Medium | **P2** | +| EQ_STR_BRANCH | 1 | ~830 | Low | **P3** | +| CALL_RETURN | 2 | ~1,360 | Low | **P3** | + +--- + +## Files to Modify + +### Opcodes.java +Add new opcode constants: +```java +public static final int HASH_DEREF_FETCH = 0x80; +public static final int ARRAY_DEREF_FETCH = 0x81; +public static final int MATCH_ALIAS_RESTORE = 0x82; +// etc. +``` + +### BytecodeCompiler.java +Detect patterns during compilation and emit superoperators: +- In `visit(HashAccessNode)` - detect deref + fetch pattern +- In `visit(ArrayAccessNode)` - detect deref + fetch pattern + +### BytecodeInterpreter.java +Add handler cases for new opcodes: +```java +case Opcodes.HASH_DEREF_FETCH: { + int destReg = code[ip++]; + int hashrefReg = code[ip++]; + int keyConstIdx = code[ip++]; + RuntimeHash hash = registers[hashrefReg].hashDeref(); + String key = (String) constants[keyConstIdx]; + registers[destReg] = hash.get(key); + break; +} +``` + +--- + +## Performance Expectations + +Implementing HASH_DEREF_FETCH and ARRAY_DEREF_FETCH alone would: +- Eliminate ~6,400 instruction dispatches in ExifTool tests +- Reduce interpreter loop iterations by ~10% +- Improve cache locality (fewer register accesses) + +The benefits compound in tight loops where hash/array access is repeated. + +--- + +## Testing Strategy + +1. Run existing test suite to ensure no regressions +2. Compare bytecode output before/after (count instructions) +3. Benchmark ExifTool test execution time +4. Verify correctness with edge cases: + - Undefined hash/array refs + - Autovivification + - Tied hashes/arrays + - Magical variables + +--- + +## Profile Comparison: ExifTool vs life_bitpacked.pl + +To validate superoperator priorities, we analyzed `examples/life_bitpacked.pl` which has a very different workload profile (bitwise operations vs hash access). + +### life_bitpacked.pl - Top Operations + +| Operation | Count | Notes | +|-----------|-------|-------| +| LOAD_GLOBAL_SCALAR | 429 | Package variable access | +| SET_PACKAGE | 379 | Mostly Getopt::Long | +| LOAD_INT | 342 | Heavy numeric computation | +| LOAD_BYTE_STRING | 326 | String constants | +| CREATE_LIST | 229 | | +| GOTO_IF_FALSE | 143 | Conditionals | +| BITWISE_AND_BINARY | 56 | Bitpacking operations | +| BITWISE_OR_BINARY | 32 | Bitpacking operations | + +### life_bitpacked.pl - Top Two-Instruction Sequences + +| Sequence | Count | +|----------|-------| +| SET_PACKAGE 'Getopt::Long' -> LOAD_GLOBAL_SCALAR | 326 | +| LOAD_BYTE_STRING -> LOAD_BYTE_STRING | 108 | +| LOAD_BYTE_STRING -> LOAD_INT | 81 | +| CREATE_LIST -> CALL_SUB | 63 | +| LOAD_BYTE_STRING -> EQ_STR | 38 | +| CREATE_LIST -> JOIN | 38 | +| EQ_STR -> GOTO_IF_FALSE | 33 | +| LOAD_INT -> BITWISE_AND_BINARY | 30 | + +### life_bitpacked.pl - Top Three-Instruction Sequences + +| Sequence | Count | +|----------|-------| +| SET_PACKAGE 'Getopt::Long' -> LOAD_GLOBAL_SCALAR -> SET_PACKAGE 'Getopt::Long' | 310 | +| CREATE_LIST -> CALL_SUB -> RETURN | 38 | +| LOAD_BYTE_STRING -> EQ_STR -> GOTO_IF_FALSE | 29 | +| LOAD_INT -> BITWISE_AND_BINARY -> LOAD_INT | 19 | +| RIGHT_SHIFT -> LOAD_INT -> BITWISE_AND_BINARY | 18 | +| LOAD_INT -> LEFT_SHIFT -> BITWISE_OR_BINARY | 17 | +| BITWISE_AND_BINARY -> LOAD_INT -> LEFT_SHIFT | 17 | + +### Cross-Workload Analysis + +**Common patterns (good superoperator candidates):** +1. `CREATE_LIST -> CALL_SUB -> RETURN` - appears in both workloads +2. `EQ_STR -> GOTO_IF_FALSE` - conditional string comparison +3. `LOAD_INT -> BITWISE_AND_BINARY` - bit extraction + +**ExifTool-specific patterns (hash-heavy):** +1. `DEREF_HASH -> LOAD_STRING -> HASH_GET` - hash dereference (2,498 occurrences) +2. `MATCH_REGEX -> ALIAS -> RESTORE_REGEX_STATE` - regex matching (1,188 occurrences) + +**life_bitpacked.pl-specific patterns (bitwise-heavy):** +1. `LOAD_INT -> BITWISE_AND_BINARY -> LOAD_INT` - bit masking +2. `RIGHT_SHIFT -> LOAD_INT -> BITWISE_AND_BINARY` - bit extraction +3. `LOAD_INT -> LEFT_SHIFT -> BITWISE_OR_BINARY` - bit packing + +### Revised Priority Recommendations + +Based on cross-workload analysis: + +| Priority | Superoperator | Rationale | +|----------|---------------|-----------| +| **P1** | HASH_DEREF_FETCH | High impact for hash-heavy code (ExifTool, most real apps) | +| **P1** | EQ_STR_BRANCH | Common in both workloads | +| **P2** | CALL_RETURN | Common in both workloads | +| **P2** | MATCH_ALIAS_RESTORE | High impact for regex-heavy code | +| **P3** | BIT_EXTRACT (new) | `(value >> shift) & mask` for numeric code | +| **P3** | BIT_INSERT (new) | `value | (bits << shift)` for numeric code | + +--- + +## Future Work + +- **Peephole optimizer**: Post-compilation pass to detect and replace patterns +- **Profile-guided optimization**: Collect runtime frequency data to prioritize hot patterns +- **JIT hints**: Mark hot superoperator sequences for potential JVM compilation + +--- + +## Appendix: Raw Analysis Commands + +```bash +# Count single operations +grep -E '^\s+[0-9]+:' bytecode.txt | sed 's/^[^:]*: //' | \ + sed 's/ r[0-9].*$//' | sort | uniq -c | sort -rn + +# Count two-instruction sequences +grep -E '^\s+[0-9]+:' bytecode.txt | sed 's/^[^:]*: //' | \ + sed 's/ r[0-9].*$//' | \ + awk 'NR>1{print prev" -> "$0} {prev=$0}' | sort | uniq -c | sort -rn + +# Count three-instruction sequences +grep -E '^\s+[0-9]+:' bytecode.txt | sed 's/^[^:]*: //' | \ + sed 's/ r[0-9].*$//' | \ + awk 'NR>2{print prev2" -> "prev" -> "$0} {prev2=prev; prev=$0}' | \ + sort | uniq -c | sort -rn +``` + +--- + +## Progress Tracking + +### Current Status: Phase 3 Complete - P1 Superoperators Implemented + +### Completed Phases +- [x] Phase 1: ExifTool bytecode analysis (2025-03-12) + - Generated bytecode from Image-ExifTool-13.44/t/ExifTool.t + - Identified HASH_DEREF_FETCH as highest-impact superoperator + - Documented 6 proposed superoperators with implementation details + +- [x] Phase 2: Cross-workload validation (2025-03-12) + - Analyzed examples/life_bitpacked.pl (numeric/bitwise workload) + - Validated that EQ_STR_BRANCH and CALL_RETURN are universal + - Identified workload-specific patterns (hash vs bitwise) + - Updated priority recommendations based on cross-workload data + +- [x] Phase 3: P1 Superoperator Implementation (2025-03-12) + - Implemented HASH_DEREF_FETCH (opcode 381) + - Combines: DEREF_HASH + LOAD_STRING + HASH_GET + - Format: HASH_DEREF_FETCH rd hashref_reg key_string_idx + - Optimizes: $hashref->{key} with bareword or string literal key + - Implemented ARRAY_DEREF_FETCH (opcode 382) + - Combines: DEREF_ARRAY + LOAD_INT + ARRAY_GET + - Format: ARRAY_DEREF_FETCH rd arrayref_reg index_immediate + - Optimizes: $arrayref->[n] with integer literal index + - Files modified: + - Opcodes.java: Added opcode constants 381, 382 + - CompileBinaryOperator.java: Added pattern detection for -> operator + - BytecodeCompiler.java: Added pattern detection for general access + - BytecodeInterpreter.java: Added execution handlers + - Disassemble.java: Added disassembly support + +### Next Steps +1. Implement EQ_STR_BRANCH superoperator (P1) +2. Benchmark performance improvement +3. Consider implementing MATCH_ALIAS_RESTORE (P2) + +### Resolved Questions +- Superoperators are emitted at compile time (pattern detection during bytecode generation) +- Autovivification and tied variables: handled by hashDeref()/arrayDeref() calls in handler diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index 2a2339245..52b837944 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -1765,7 +1765,32 @@ void handleGeneralArrayAccess(BinaryOperatorNode node) { // Handle single element access if (indexNode.elements.size() == 1) { Node indexExpr = indexNode.elements.get(0); - // Compile index in SCALAR context to ensure RuntimeScalar + + // Check if we can use the ARRAY_DEREF_FETCH superoperator + // Conditions: index is a small integer literal and strict refs is enabled + if (indexExpr instanceof NumberNode numNode && isStrictRefsEnabled()) { + String value = numNode.value.replace("_", ""); + try { + // Check if it's a small integer (fits in 16-bit) + if (ScalarUtils.isInteger(value)) { + int intValue = Integer.parseInt(value); + // SUPEROPERATOR: Integer literal index with strict refs - use ARRAY_DEREF_FETCH + // This combines DEREF_ARRAY + LOAD_INT + ARRAY_GET into one opcode + int rd = allocateOutputRegister(); + emit(Opcodes.ARRAY_DEREF_FETCH); + emitReg(rd); + emitReg(baseReg); + emitInt(intValue); + + lastResultReg = rd; + return; + } + } catch (NumberFormatException e) { + // Fall through to general case + } + } + + // General case: Compile index in SCALAR context to ensure RuntimeScalar compileNode(indexExpr, -1, RuntimeContextType.SCALAR); int indexReg = lastResultReg; @@ -1825,29 +1850,25 @@ void handleGeneralHashAccess(BinaryOperatorNode node) { if (keyNode.elements.size() == 1) { Node keyExpr = keyNode.elements.get(0); - // Compile the key - int keyReg; - if (keyExpr instanceof IdentifierNode) { - // Bareword key - autoquote it - String keyString = ((IdentifierNode) keyExpr).name; - keyReg = allocateRegister(); - int keyIdx = addToStringPool(keyString); - emit(Opcodes.LOAD_STRING); - emitReg(keyReg); - emit(keyIdx); - } else { - // Expression key - compile it in SCALAR context to ensure RuntimeScalar - compileNode(keyExpr, -1, RuntimeContextType.SCALAR); - keyReg = lastResultReg; - } - // Check if this is a glob slot access: *X{key} // In this case, node.left is an OperatorNode with operator "*" boolean isGlobSlotAccess = (node.left instanceof OperatorNode) && ((OperatorNode) node.left).operator.equals("*"); if (isGlobSlotAccess) { - // For glob slot access, call hashDerefGetNonStrict directly + // For glob slot access, compile the key and call hashDerefGetNonStrict directly + int keyReg; + if (keyExpr instanceof IdentifierNode) { + String keyString = ((IdentifierNode) keyExpr).name; + keyReg = allocateRegister(); + int keyIdx = addToStringPool(keyString); + emit(Opcodes.LOAD_STRING); + emitReg(keyReg); + emit(keyIdx); + } else { + compileNode(keyExpr, -1, RuntimeContextType.SCALAR); + keyReg = lastResultReg; + } // This uses RuntimeGlob's override which accesses the slot without dereferencing int rd = allocateOutputRegister(); emit(Opcodes.GLOB_SLOT_GET); @@ -1855,8 +1876,50 @@ void handleGeneralHashAccess(BinaryOperatorNode node) { emitReg(baseReg); emitReg(keyReg); + lastResultReg = rd; + } else if (keyExpr instanceof IdentifierNode && isStrictRefsEnabled()) { + // SUPEROPERATOR: Bareword key with strict refs - use HASH_DEREF_FETCH + // This combines DEREF_HASH + LOAD_STRING + HASH_GET into one opcode + String keyString = ((IdentifierNode) keyExpr).name; + int keyIdx = addToStringPool(keyString); + + int rd = allocateOutputRegister(); + emit(Opcodes.HASH_DEREF_FETCH); + emitReg(rd); + emitReg(baseReg); + emit(keyIdx); + + lastResultReg = rd; + } else if (keyExpr instanceof StringNode && isStrictRefsEnabled()) { + // SUPEROPERATOR: String literal key with strict refs - use HASH_DEREF_FETCH + // This combines DEREF_HASH + LOAD_STRING + HASH_GET into one opcode + String keyString = ((StringNode) keyExpr).value; + int keyIdx = addToStringPool(keyString); + + int rd = allocateOutputRegister(); + emit(Opcodes.HASH_DEREF_FETCH); + emitReg(rd); + emitReg(baseReg); + emit(keyIdx); + lastResultReg = rd; } else { + // General case: compile the key and use separate opcodes + int keyReg; + if (keyExpr instanceof IdentifierNode) { + // Bareword key - autoquote it (non-strict refs case) + String keyString = ((IdentifierNode) keyExpr).name; + keyReg = allocateRegister(); + int keyIdx = addToStringPool(keyString); + emit(Opcodes.LOAD_STRING); + emitReg(keyReg); + emit(keyIdx); + } else { + // Expression key - compile it in SCALAR context to ensure RuntimeScalar + compileNode(keyExpr, -1, RuntimeContextType.SCALAR); + keyReg = lastResultReg; + } + // Normal hash access: dereference first, then get element // The base might be either: // 1. A RuntimeHash (from %hash which was a hash variable) diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java index 262ed176d..00b8f54ed 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java @@ -1619,6 +1619,56 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c DebugHooks.debug(file, line, code, registers, siteIndex); } + // ================================================================= + // SUPEROPERATORS - Combined instruction sequences for performance + // ================================================================= + + case Opcodes.HASH_DEREF_FETCH -> { + // Combined: DEREF_HASH + LOAD_STRING + HASH_GET + // Format: HASH_DEREF_FETCH rd hashref_reg key_string_idx + // Equivalent to: $hashref->{key} + int rd = bytecode[pc++]; + int hashrefReg = bytecode[pc++]; + int keyIdx = bytecode[pc++]; + + RuntimeBase hashrefBase = registers[hashrefReg]; + + // Dereference to get the hash + RuntimeHash hash; + if (hashrefBase instanceof RuntimeHash) { + hash = (RuntimeHash) hashrefBase; + } else { + hash = hashrefBase.scalar().hashDeref(); + } + + // Get the element using string key from pool + String key = code.stringPool[keyIdx]; + registers[rd] = hash.get(key); + } + + case Opcodes.ARRAY_DEREF_FETCH -> { + // Combined: DEREF_ARRAY + LOAD_INT + ARRAY_GET + // Format: ARRAY_DEREF_FETCH rd arrayref_reg index_immediate + // Equivalent to: $arrayref->[n] + int rd = bytecode[pc++]; + int arrayrefReg = bytecode[pc++]; + int index = readInt(bytecode, pc); + pc += 1; + + RuntimeBase arrayrefBase = registers[arrayrefReg]; + + // Dereference to get the array + RuntimeArray array; + if (arrayrefBase instanceof RuntimeArray) { + array = (RuntimeArray) arrayrefBase; + } else { + array = arrayrefBase.scalar().arrayDeref(); + } + + // Get the element at index + registers[rd] = array.get(index); + } + default -> { int opcodeInt = opcode; throw new RuntimeException( diff --git a/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java b/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java index c00a43ed6..9fd3a4c96 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java +++ b/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java @@ -134,29 +134,51 @@ static void visitBinaryOperator(BytecodeCompiler bytecodeCompiler, BinaryOperato bytecodeCompiler.compileNode(node.left, -1, RuntimeContextType.SCALAR); int scalarRefReg = bytecodeCompiler.lastResultReg; - int hashReg = bytecodeCompiler.allocateRegister(); - bytecodeCompiler.emitWithToken(Opcodes.DEREF_HASH, node.getIndex()); - bytecodeCompiler.emitReg(hashReg); - bytecodeCompiler.emitReg(scalarRefReg); - if (keyNode.elements.isEmpty()) { bytecodeCompiler.throwCompilerException("Hash dereference requires key"); } - int keyReg; Node keyElement = keyNode.elements.get(0); - if (keyElement instanceof IdentifierNode) { - String keyString = ((IdentifierNode) keyElement).name; - keyReg = bytecodeCompiler.allocateRegister(); + + // SUPEROPERATOR: Check if we can use HASH_DEREF_FETCH + // Conditions: single bareword or string literal key + if (keyElement instanceof IdentifierNode idNode) { + // Bareword key - use superoperator + String keyString = idNode.name; int keyIdx = bytecodeCompiler.addToStringPool(keyString); - bytecodeCompiler.emit(Opcodes.LOAD_STRING); - bytecodeCompiler.emitReg(keyReg); + + int rd = bytecodeCompiler.allocateOutputRegister(); + bytecodeCompiler.emit(Opcodes.HASH_DEREF_FETCH); + bytecodeCompiler.emitReg(rd); + bytecodeCompiler.emitReg(scalarRefReg); bytecodeCompiler.emit(keyIdx); - } else { - bytecodeCompiler.compileNode(keyElement, -1, RuntimeContextType.SCALAR); - keyReg = bytecodeCompiler.lastResultReg; + + bytecodeCompiler.lastResultReg = rd; + return; + } else if (keyElement instanceof StringNode strNode) { + // String literal key - use superoperator + String keyString = strNode.value; + int keyIdx = bytecodeCompiler.addToStringPool(keyString); + + int rd = bytecodeCompiler.allocateOutputRegister(); + bytecodeCompiler.emit(Opcodes.HASH_DEREF_FETCH); + bytecodeCompiler.emitReg(rd); + bytecodeCompiler.emitReg(scalarRefReg); + bytecodeCompiler.emit(keyIdx); + + bytecodeCompiler.lastResultReg = rd; + return; } + // General case: expression key - use separate opcodes + int hashReg = bytecodeCompiler.allocateRegister(); + bytecodeCompiler.emitWithToken(Opcodes.DEREF_HASH, node.getIndex()); + bytecodeCompiler.emitReg(hashReg); + bytecodeCompiler.emitReg(scalarRefReg); + + bytecodeCompiler.compileNode(keyElement, -1, RuntimeContextType.SCALAR); + int keyReg = bytecodeCompiler.lastResultReg; + // Access hash element int rd = bytecodeCompiler.allocateOutputRegister(); bytecodeCompiler.emit(Opcodes.HASH_GET); @@ -174,16 +196,41 @@ static void visitBinaryOperator(BytecodeCompiler bytecodeCompiler, BinaryOperato bytecodeCompiler.compileNode(node.left, -1, RuntimeContextType.SCALAR); int scalarRefReg = bytecodeCompiler.lastResultReg; + if (indexNode.elements.isEmpty()) { + bytecodeCompiler.throwCompilerException("Array dereference requires index"); + } + + Node indexElement = indexNode.elements.get(0); + + // SUPEROPERATOR: Check if we can use ARRAY_DEREF_FETCH + // Conditions: integer literal index + if (indexElement instanceof NumberNode numNode) { + String value = numNode.value.replace("_", ""); + try { + if (org.perlonjava.runtime.runtimetypes.ScalarUtils.isInteger(value)) { + int intValue = Integer.parseInt(value); + // Use superoperator for integer literal index + int rd = bytecodeCompiler.allocateOutputRegister(); + bytecodeCompiler.emit(Opcodes.ARRAY_DEREF_FETCH); + bytecodeCompiler.emitReg(rd); + bytecodeCompiler.emitReg(scalarRefReg); + bytecodeCompiler.emitInt(intValue); + + bytecodeCompiler.lastResultReg = rd; + return; + } + } catch (NumberFormatException e) { + // Fall through to general case + } + } + + // General case: use separate opcodes int arrayReg = bytecodeCompiler.allocateRegister(); bytecodeCompiler.emitWithToken(Opcodes.DEREF_ARRAY, node.getIndex()); bytecodeCompiler.emitReg(arrayReg); bytecodeCompiler.emitReg(scalarRefReg); - if (indexNode.elements.isEmpty()) { - bytecodeCompiler.throwCompilerException("Array dereference requires index"); - } - - bytecodeCompiler.compileNode(indexNode.elements.get(0), -1, RuntimeContextType.SCALAR); + bytecodeCompiler.compileNode(indexElement, -1, RuntimeContextType.SCALAR); int indexReg = bytecodeCompiler.lastResultReg; // Access array element diff --git a/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java b/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java index 98c8a69c3..726f28a07 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java +++ b/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java @@ -2098,6 +2098,33 @@ public static String disassemble(InterpretedCode interpretedCode) { break; } + // ================================================================= + // SUPEROPERATORS + // ================================================================= + + case Opcodes.HASH_DEREF_FETCH: { + rd = interpretedCode.bytecode[pc++]; + int hashrefReg = interpretedCode.bytecode[pc++]; + int keyIdx = interpretedCode.bytecode[pc++]; + sb.append("HASH_DEREF_FETCH r").append(rd) + .append(" = r").append(hashrefReg).append("->{\""); + if (interpretedCode.stringPool != null && keyIdx < interpretedCode.stringPool.length) { + sb.append(interpretedCode.stringPool[keyIdx]); + } + sb.append("\"}\n"); + break; + } + + case Opcodes.ARRAY_DEREF_FETCH: { + rd = interpretedCode.bytecode[pc++]; + int arrayrefReg = interpretedCode.bytecode[pc++]; + int index = InterpretedCode.readInt(interpretedCode.bytecode, pc); + pc += 1; + sb.append("ARRAY_DEREF_FETCH r").append(rd) + .append(" = r").append(arrayrefReg).append("->[").append(index).append("]\n"); + break; + } + default: sb.append("UNKNOWN(").append(opcode).append(")\n"); break; diff --git a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java index 6ab8b1e9f..a3652884a 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java +++ b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java @@ -1880,6 +1880,28 @@ public class Opcodes { */ public static final short KILL = 380; + // ================================================================= + // SUPEROPERATORS (381+) - Combined instruction sequences + // ================================================================= + + /** + * Hash dereference + string key + fetch in one operation. + * Pattern replaced: DEREF_HASH + LOAD_STRING + HASH_GET + * Format: HASH_DEREF_FETCH rd hashref_reg key_string_index + * Effect: rd = hashref_reg->hashDeref().get(stringPool[key_string_index]) + * Equivalent to: $hashref->{key} + */ + public static final short HASH_DEREF_FETCH = 381; + + /** + * Array dereference + integer index + fetch in one operation. + * Pattern replaced: DEREF_ARRAY + LOAD_INT + ARRAY_GET + * Format: ARRAY_DEREF_FETCH rd arrayref_reg index_immediate + * Effect: rd = arrayref_reg->arrayDeref().get(index_immediate) + * Equivalent to: $arrayref->[n] + */ + public static final short ARRAY_DEREF_FETCH = 382; + private Opcodes() { } // Utility class - no instantiation } From 5022684055fe4ac2539cd1961b64ed4a80949352 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 12 Mar 2026 12:09:39 +0100 Subject: [PATCH 02/10] Refactor superoperator emission into reusable helper methods - Add emitHashDerefGet() and emitArrayDerefGet() helpers in BytecodeCompiler - Refactor handleGeneralHashAccess() and handleGeneralArrayAccess() to use helpers - Refactor CompileBinaryOperator -> operator handling to use helpers - Enables superoperators for both $h->{a}{b} (implicit arrows) and $h->{a}->{b} (explicit arrows) - Reduces code duplication across 3 call sites Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../backend/bytecode/BytecodeCompiler.java | 278 ++++++++++-------- .../bytecode/CompileBinaryOperator.java | 96 +----- .../org/perlonjava/core/Configuration.java | 2 +- 3 files changed, 157 insertions(+), 219 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index 52b837944..e791692aa 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -1741,6 +1741,152 @@ void handleCompoundAssignment(BinaryOperatorNode node) { lastResultReg = targetReg; } + // ========================================================================= + // SUPEROPERATOR HELPERS + // These methods centralize the logic for emitting superoperators vs + // regular instruction sequences. Called from both BytecodeCompiler and + // CompileBinaryOperator to handle all code paths consistently. + // ========================================================================= + + /** + * Emit hash dereference + element access, using HASH_DEREF_FETCH superoperator when possible. + * Handles: $hashref->{key} or expr{key} patterns + * + * @param baseReg Register containing the hashref + * @param keyExpr The key expression node + * @param tokenIndex Token index for error reporting + * @return The result register containing the hash element + */ + int emitHashDerefGet(int baseReg, Node keyExpr, int tokenIndex) { + // Try to use superoperator for constant string keys + String constantKey = getConstantStringKey(keyExpr); + if (constantKey != null) { + // SUPEROPERATOR: Constant key - use HASH_DEREF_FETCH + int keyIdx = addToStringPool(constantKey); + int rd = allocateOutputRegister(); + emit(Opcodes.HASH_DEREF_FETCH); + emitReg(rd); + emitReg(baseReg); + emit(keyIdx); + return rd; + } + + // General case: compile key, dereference, then get + int keyReg; + if (keyExpr instanceof IdentifierNode) { + // Bareword key - autoquote it + String keyString = ((IdentifierNode) keyExpr).name; + keyReg = allocateRegister(); + int keyIdx = addToStringPool(keyString); + emit(Opcodes.LOAD_STRING); + emitReg(keyReg); + emit(keyIdx); + } else { + compileNode(keyExpr, -1, RuntimeContextType.SCALAR); + keyReg = lastResultReg; + } + + int hashReg = allocateRegister(); + if (isStrictRefsEnabled()) { + emitWithToken(Opcodes.DEREF_HASH, tokenIndex); + emitReg(hashReg); + emitReg(baseReg); + } else { + int pkgIdx = addToStringPool(getCurrentPackage()); + emitWithToken(Opcodes.DEREF_HASH_NONSTRICT, tokenIndex); + emitReg(hashReg); + emitReg(baseReg); + emit(pkgIdx); + } + + int rd = allocateOutputRegister(); + emit(Opcodes.HASH_GET); + emitReg(rd); + emitReg(hashReg); + emitReg(keyReg); + return rd; + } + + /** + * Emit array dereference + element access, using ARRAY_DEREF_FETCH superoperator when possible. + * Handles: $arrayref->[n] or expr[n] patterns + * + * @param baseReg Register containing the arrayref + * @param indexExpr The index expression node + * @param tokenIndex Token index for error reporting + * @return The result register containing the array element + */ + int emitArrayDerefGet(int baseReg, Node indexExpr, int tokenIndex) { + // Try to use superoperator for integer literal indices + Integer constantIndex = getConstantIntegerIndex(indexExpr); + if (constantIndex != null) { + // SUPEROPERATOR: Integer literal - use ARRAY_DEREF_FETCH + int rd = allocateOutputRegister(); + emit(Opcodes.ARRAY_DEREF_FETCH); + emitReg(rd); + emitReg(baseReg); + emitInt(constantIndex); + return rd; + } + + // General case: compile index, dereference, then get + compileNode(indexExpr, -1, RuntimeContextType.SCALAR); + int indexReg = lastResultReg; + + int arrayReg = allocateRegister(); + if (isStrictRefsEnabled()) { + emitWithToken(Opcodes.DEREF_ARRAY, tokenIndex); + emitReg(arrayReg); + emitReg(baseReg); + } else { + int pkgIdx = addToStringPool(getCurrentPackage()); + emitWithToken(Opcodes.DEREF_ARRAY_NONSTRICT, tokenIndex); + emitReg(arrayReg); + emitReg(baseReg); + emit(pkgIdx); + } + + int rd = allocateOutputRegister(); + emit(Opcodes.ARRAY_GET); + emitReg(rd); + emitReg(arrayReg); + emitReg(indexReg); + return rd; + } + + /** + * Extract a constant string key from a key expression node. + * Returns the string value for IdentifierNode (bareword) or StringNode, + * null otherwise. + */ + private String getConstantStringKey(Node keyExpr) { + if (keyExpr instanceof IdentifierNode idNode) { + return idNode.name; + } else if (keyExpr instanceof StringNode strNode) { + return strNode.value; + } + return null; + } + + /** + * Extract a constant integer index from an index expression node. + * Returns the integer value for NumberNode with valid integer, + * null otherwise. + */ + private Integer getConstantIntegerIndex(Node indexExpr) { + if (indexExpr instanceof NumberNode numNode) { + String value = numNode.value.replace("_", ""); + try { + if (ScalarUtils.isInteger(value)) { + return Integer.parseInt(value); + } + } catch (NumberFormatException e) { + // Not a valid integer + } + } + return null; + } + /** * Handle general array access: expr[index] * Example: $matrix[1][0] where $matrix[1] returns an arrayref @@ -1762,66 +1908,9 @@ void handleGeneralArrayAccess(BinaryOperatorNode node) { return; } - // Handle single element access + // Handle single element access using helper if (indexNode.elements.size() == 1) { - Node indexExpr = indexNode.elements.get(0); - - // Check if we can use the ARRAY_DEREF_FETCH superoperator - // Conditions: index is a small integer literal and strict refs is enabled - if (indexExpr instanceof NumberNode numNode && isStrictRefsEnabled()) { - String value = numNode.value.replace("_", ""); - try { - // Check if it's a small integer (fits in 16-bit) - if (ScalarUtils.isInteger(value)) { - int intValue = Integer.parseInt(value); - // SUPEROPERATOR: Integer literal index with strict refs - use ARRAY_DEREF_FETCH - // This combines DEREF_ARRAY + LOAD_INT + ARRAY_GET into one opcode - int rd = allocateOutputRegister(); - emit(Opcodes.ARRAY_DEREF_FETCH); - emitReg(rd); - emitReg(baseReg); - emitInt(intValue); - - lastResultReg = rd; - return; - } - } catch (NumberFormatException e) { - // Fall through to general case - } - } - - // General case: Compile index in SCALAR context to ensure RuntimeScalar - compileNode(indexExpr, -1, RuntimeContextType.SCALAR); - int indexReg = lastResultReg; - - // The base might be either: - // 1. A RuntimeArray (from $array which was an array variable) - // 2. A RuntimeScalar containing an arrayref (from $matrix[1]) - // We need to handle both cases. The ARRAY_GET opcode should handle - // dereferencing if needed, or we can use a deref+get sequence. - - // For now, let's assume it's a scalar with arrayref and dereference it first - int arrayReg = allocateRegister(); - if (isStrictRefsEnabled()) { - emit(Opcodes.DEREF_ARRAY); - emitReg(arrayReg); - emitReg(baseReg); - } else { - int pkgIdx = addToStringPool(getCurrentPackage()); - emit(Opcodes.DEREF_ARRAY_NONSTRICT); - emitReg(arrayReg); - emitReg(baseReg); - emit(pkgIdx); - } - - // Now get the element - int rd = allocateOutputRegister(); - emit(Opcodes.ARRAY_GET); - emitReg(rd); - emitReg(arrayReg); - emitReg(indexReg); - - lastResultReg = rd; + lastResultReg = emitArrayDerefGet(baseReg, indexNode.elements.get(0), node.getIndex()); } else { throwCompilerException("Multi-element array access not yet implemented"); } @@ -1876,77 +1965,10 @@ void handleGeneralHashAccess(BinaryOperatorNode node) { emitReg(baseReg); emitReg(keyReg); - lastResultReg = rd; - } else if (keyExpr instanceof IdentifierNode && isStrictRefsEnabled()) { - // SUPEROPERATOR: Bareword key with strict refs - use HASH_DEREF_FETCH - // This combines DEREF_HASH + LOAD_STRING + HASH_GET into one opcode - String keyString = ((IdentifierNode) keyExpr).name; - int keyIdx = addToStringPool(keyString); - - int rd = allocateOutputRegister(); - emit(Opcodes.HASH_DEREF_FETCH); - emitReg(rd); - emitReg(baseReg); - emit(keyIdx); - - lastResultReg = rd; - } else if (keyExpr instanceof StringNode && isStrictRefsEnabled()) { - // SUPEROPERATOR: String literal key with strict refs - use HASH_DEREF_FETCH - // This combines DEREF_HASH + LOAD_STRING + HASH_GET into one opcode - String keyString = ((StringNode) keyExpr).value; - int keyIdx = addToStringPool(keyString); - - int rd = allocateOutputRegister(); - emit(Opcodes.HASH_DEREF_FETCH); - emitReg(rd); - emitReg(baseReg); - emit(keyIdx); - lastResultReg = rd; } else { - // General case: compile the key and use separate opcodes - int keyReg; - if (keyExpr instanceof IdentifierNode) { - // Bareword key - autoquote it (non-strict refs case) - String keyString = ((IdentifierNode) keyExpr).name; - keyReg = allocateRegister(); - int keyIdx = addToStringPool(keyString); - emit(Opcodes.LOAD_STRING); - emitReg(keyReg); - emit(keyIdx); - } else { - // Expression key - compile it in SCALAR context to ensure RuntimeScalar - compileNode(keyExpr, -1, RuntimeContextType.SCALAR); - keyReg = lastResultReg; - } - - // Normal hash access: dereference first, then get element - // The base might be either: - // 1. A RuntimeHash (from %hash which was a hash variable) - // 2. A RuntimeScalar containing a hashref (from $hash{outer}) - // We need to handle both cases. Dereference if needed. - - int hashReg = allocateRegister(); - if (isStrictRefsEnabled()) { - emit(Opcodes.DEREF_HASH); - emitReg(hashReg); - emitReg(baseReg); - } else { - int pkgIdx = addToStringPool(getCurrentPackage()); - emit(Opcodes.DEREF_HASH_NONSTRICT); - emitReg(hashReg); - emitReg(baseReg); - emit(pkgIdx); - } - - // Now get the element - int rd = allocateOutputRegister(); - emit(Opcodes.HASH_GET); - emitReg(rd); - emitReg(hashReg); - emitReg(keyReg); - - lastResultReg = rd; + // Use helper for normal hash access (handles superoperator + fallback) + lastResultReg = emitHashDerefGet(baseReg, keyExpr, node.getIndex()); } } else { throwCompilerException("Multi-element hash access not yet implemented"); diff --git a/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java b/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java index 9fd3a4c96..2a54b9c0d 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java +++ b/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java @@ -138,55 +138,9 @@ static void visitBinaryOperator(BytecodeCompiler bytecodeCompiler, BinaryOperato bytecodeCompiler.throwCompilerException("Hash dereference requires key"); } - Node keyElement = keyNode.elements.get(0); - - // SUPEROPERATOR: Check if we can use HASH_DEREF_FETCH - // Conditions: single bareword or string literal key - if (keyElement instanceof IdentifierNode idNode) { - // Bareword key - use superoperator - String keyString = idNode.name; - int keyIdx = bytecodeCompiler.addToStringPool(keyString); - - int rd = bytecodeCompiler.allocateOutputRegister(); - bytecodeCompiler.emit(Opcodes.HASH_DEREF_FETCH); - bytecodeCompiler.emitReg(rd); - bytecodeCompiler.emitReg(scalarRefReg); - bytecodeCompiler.emit(keyIdx); - - bytecodeCompiler.lastResultReg = rd; - return; - } else if (keyElement instanceof StringNode strNode) { - // String literal key - use superoperator - String keyString = strNode.value; - int keyIdx = bytecodeCompiler.addToStringPool(keyString); - - int rd = bytecodeCompiler.allocateOutputRegister(); - bytecodeCompiler.emit(Opcodes.HASH_DEREF_FETCH); - bytecodeCompiler.emitReg(rd); - bytecodeCompiler.emitReg(scalarRefReg); - bytecodeCompiler.emit(keyIdx); - - bytecodeCompiler.lastResultReg = rd; - return; - } - - // General case: expression key - use separate opcodes - int hashReg = bytecodeCompiler.allocateRegister(); - bytecodeCompiler.emitWithToken(Opcodes.DEREF_HASH, node.getIndex()); - bytecodeCompiler.emitReg(hashReg); - bytecodeCompiler.emitReg(scalarRefReg); - - bytecodeCompiler.compileNode(keyElement, -1, RuntimeContextType.SCALAR); - int keyReg = bytecodeCompiler.lastResultReg; - - // Access hash element - int rd = bytecodeCompiler.allocateOutputRegister(); - bytecodeCompiler.emit(Opcodes.HASH_GET); - bytecodeCompiler.emitReg(rd); - bytecodeCompiler.emitReg(hashReg); - bytecodeCompiler.emitReg(keyReg); - - bytecodeCompiler.lastResultReg = rd; + // Use helper for hash deref get (handles superoperator + fallback) + bytecodeCompiler.lastResultReg = bytecodeCompiler.emitHashDerefGet( + scalarRefReg, keyNode.elements.get(0), node.getIndex()); return; } else if (node.right instanceof ArrayLiteralNode indexNode) { // Arrayref dereference: $ref->[index] @@ -200,47 +154,9 @@ static void visitBinaryOperator(BytecodeCompiler bytecodeCompiler, BinaryOperato bytecodeCompiler.throwCompilerException("Array dereference requires index"); } - Node indexElement = indexNode.elements.get(0); - - // SUPEROPERATOR: Check if we can use ARRAY_DEREF_FETCH - // Conditions: integer literal index - if (indexElement instanceof NumberNode numNode) { - String value = numNode.value.replace("_", ""); - try { - if (org.perlonjava.runtime.runtimetypes.ScalarUtils.isInteger(value)) { - int intValue = Integer.parseInt(value); - // Use superoperator for integer literal index - int rd = bytecodeCompiler.allocateOutputRegister(); - bytecodeCompiler.emit(Opcodes.ARRAY_DEREF_FETCH); - bytecodeCompiler.emitReg(rd); - bytecodeCompiler.emitReg(scalarRefReg); - bytecodeCompiler.emitInt(intValue); - - bytecodeCompiler.lastResultReg = rd; - return; - } - } catch (NumberFormatException e) { - // Fall through to general case - } - } - - // General case: use separate opcodes - int arrayReg = bytecodeCompiler.allocateRegister(); - bytecodeCompiler.emitWithToken(Opcodes.DEREF_ARRAY, node.getIndex()); - bytecodeCompiler.emitReg(arrayReg); - bytecodeCompiler.emitReg(scalarRefReg); - - bytecodeCompiler.compileNode(indexElement, -1, RuntimeContextType.SCALAR); - int indexReg = bytecodeCompiler.lastResultReg; - - // Access array element - int rd = bytecodeCompiler.allocateOutputRegister(); - bytecodeCompiler.emit(Opcodes.ARRAY_GET); - bytecodeCompiler.emitReg(rd); - bytecodeCompiler.emitReg(arrayReg); - bytecodeCompiler.emitReg(indexReg); - - bytecodeCompiler.lastResultReg = rd; + // Use helper for array deref get (handles superoperator + fallback) + bytecodeCompiler.lastResultReg = bytecodeCompiler.emitArrayDerefGet( + scalarRefReg, indexNode.elements.get(0), node.getIndex()); return; } // Code reference call: $code->() or $code->(@args) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index a78c3ca3e..cee75fa0d 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "3d194fd2a"; + public static final String gitCommitId = "28a8fdc5d"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). From 2b44236923052a80641e28b8f9a53de90443a086 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 12 Mar 2026 12:09:59 +0100 Subject: [PATCH 03/10] docs: Update superoperators design doc with refactoring notes Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- dev/design/superoperators.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dev/design/superoperators.md b/dev/design/superoperators.md index 7d62a58eb..abb061bcc 100644 --- a/dev/design/superoperators.md +++ b/dev/design/superoperators.md @@ -405,3 +405,10 @@ grep -E '^\s+[0-9]+:' bytecode.txt | sed 's/^[^:]*: //' | \ ### Resolved Questions - Superoperators are emitted at compile time (pattern detection during bytecode generation) - Autovivification and tied variables: handled by hashDeref()/arrayDeref() calls in handler + +### Phase 3.1: Code Refactoring (2025-03-12) +- Added `emitHashDerefGet()` and `emitArrayDerefGet()` helpers in BytecodeCompiler.java +- Refactored `handleGeneralHashAccess()` and `handleGeneralArrayAccess()` to use helpers +- Refactored CompileBinaryOperator.java `->` operator handling to use helpers +- **Result**: Superoperators now work for both `$h->{a}{b}` (implicit arrows) and `$h->{a}->{b}` (explicit arrows) +- Code duplication reduced across 3 call sites From 328fc6dda4b7fd0f87ee1c918b488297c5ae3c92 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 12 Mar 2026 14:06:32 +0100 Subject: [PATCH 04/10] Fix superoperator regression: don't use in handleGeneralArrayAccess/HashAccess The superoperators (ARRAY_DEREF_FETCH, HASH_DEREF_FETCH) expect a scalar containing a reference, but handleGeneralArrayAccess and handleGeneralHashAccess can receive a RuntimeList (e.g., from `(caller)[0]`). This caused `(caller)[0]` and similar expressions to fail with: Can't use string ("...") as an ARRAY ref while "strict refs" in use Fix: Keep superoperators only in the -> operator handler (CompileBinaryOperator) where the left side is always a scalar reference. For handleGeneralArrayAccess and handleGeneralHashAccess, use the original DEREF_ARRAY/HASH + ARRAY/HASH_GET instruction sequence which correctly handles all input types. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../backend/bytecode/BytecodeCompiler.java | 99 +++++++++++++++---- .../org/perlonjava/core/Configuration.java | 2 +- 2 files changed, 83 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index e791692aa..139e7cc6a 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -1890,6 +1890,9 @@ private Integer getConstantIntegerIndex(Node indexExpr) { /** * Handle general array access: expr[index] * Example: $matrix[1][0] where $matrix[1] returns an arrayref + * NOTE: Does NOT use superoperators because the base might be a RuntimeList + * (e.g., (caller)[0]), not a scalar arrayref. Superoperators are only safe + * in the -> operator handler where the left side is always a scalar. */ void handleGeneralArrayAccess(BinaryOperatorNode node) { // Compile the left side (the expression that should yield an array or arrayref) @@ -1908,9 +1911,40 @@ void handleGeneralArrayAccess(BinaryOperatorNode node) { return; } - // Handle single element access using helper + // Handle single element access if (indexNode.elements.size() == 1) { - lastResultReg = emitArrayDerefGet(baseReg, indexNode.elements.get(0), node.getIndex()); + Node indexExpr = indexNode.elements.get(0); + // Compile index in SCALAR context to ensure RuntimeScalar + compileNode(indexExpr, -1, RuntimeContextType.SCALAR); + int indexReg = lastResultReg; + + // The base might be either: + // 1. A RuntimeArray (from $array which was an array variable) + // 2. A RuntimeScalar containing an arrayref (from $matrix[1]) + // 3. A RuntimeList (from (caller) or similar) + // DEREF_ARRAY handles all these cases correctly. + + int arrayReg = allocateRegister(); + if (isStrictRefsEnabled()) { + emit(Opcodes.DEREF_ARRAY); + emitReg(arrayReg); + emitReg(baseReg); + } else { + int pkgIdx = addToStringPool(getCurrentPackage()); + emit(Opcodes.DEREF_ARRAY_NONSTRICT); + emitReg(arrayReg); + emitReg(baseReg); + emit(pkgIdx); + } + + // Now get the element + int rd = allocateOutputRegister(); + emit(Opcodes.ARRAY_GET); + emitReg(rd); + emitReg(arrayReg); + emitReg(indexReg); + + lastResultReg = rd; } else { throwCompilerException("Multi-element array access not yet implemented"); } @@ -1919,6 +1953,8 @@ void handleGeneralArrayAccess(BinaryOperatorNode node) { /** * Handle general hash access: expr{key} * Example: $hash{outer}{inner} where $hash{outer} returns a hashref + * NOTE: Does NOT use superoperators because the base might not be a scalar hashref. + * Superoperators are only safe in the -> operator handler. */ void handleGeneralHashAccess(BinaryOperatorNode node) { // Compile the left side (the expression that should yield a hash or hashref) @@ -1939,25 +1975,29 @@ void handleGeneralHashAccess(BinaryOperatorNode node) { if (keyNode.elements.size() == 1) { Node keyExpr = keyNode.elements.get(0); + // Compile the key + int keyReg; + if (keyExpr instanceof IdentifierNode) { + // Bareword key - autoquote it + String keyString = ((IdentifierNode) keyExpr).name; + keyReg = allocateRegister(); + int keyIdx = addToStringPool(keyString); + emit(Opcodes.LOAD_STRING); + emitReg(keyReg); + emit(keyIdx); + } else { + // Expression key - compile it in SCALAR context to ensure RuntimeScalar + compileNode(keyExpr, -1, RuntimeContextType.SCALAR); + keyReg = lastResultReg; + } + // Check if this is a glob slot access: *X{key} // In this case, node.left is an OperatorNode with operator "*" boolean isGlobSlotAccess = (node.left instanceof OperatorNode) && ((OperatorNode) node.left).operator.equals("*"); if (isGlobSlotAccess) { - // For glob slot access, compile the key and call hashDerefGetNonStrict directly - int keyReg; - if (keyExpr instanceof IdentifierNode) { - String keyString = ((IdentifierNode) keyExpr).name; - keyReg = allocateRegister(); - int keyIdx = addToStringPool(keyString); - emit(Opcodes.LOAD_STRING); - emitReg(keyReg); - emit(keyIdx); - } else { - compileNode(keyExpr, -1, RuntimeContextType.SCALAR); - keyReg = lastResultReg; - } + // For glob slot access, call hashDerefGetNonStrict directly // This uses RuntimeGlob's override which accesses the slot without dereferencing int rd = allocateOutputRegister(); emit(Opcodes.GLOB_SLOT_GET); @@ -1967,8 +2007,33 @@ void handleGeneralHashAccess(BinaryOperatorNode node) { lastResultReg = rd; } else { - // Use helper for normal hash access (handles superoperator + fallback) - lastResultReg = emitHashDerefGet(baseReg, keyExpr, node.getIndex()); + // Normal hash access: dereference first, then get element + // The base might be either: + // 1. A RuntimeHash (from %hash which was a hash variable) + // 2. A RuntimeScalar containing a hashref (from $hash{outer}) + // We need to handle both cases. Dereference if needed. + + int hashReg = allocateRegister(); + if (isStrictRefsEnabled()) { + emit(Opcodes.DEREF_HASH); + emitReg(hashReg); + emitReg(baseReg); + } else { + int pkgIdx = addToStringPool(getCurrentPackage()); + emit(Opcodes.DEREF_HASH_NONSTRICT); + emitReg(hashReg); + emitReg(baseReg); + emit(pkgIdx); + } + + // Now get the element + int rd = allocateOutputRegister(); + emit(Opcodes.HASH_GET); + emitReg(rd); + emitReg(hashReg); + emitReg(keyReg); + + lastResultReg = rd; } } else { throwCompilerException("Multi-element hash access not yet implemented"); diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index cee75fa0d..92f20f8e3 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "28a8fdc5d"; + public static final String gitCommitId = "2b4423692"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). From 49f40023e1178be182381fedf86dba287a36198f Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 12 Mar 2026 14:07:33 +0100 Subject: [PATCH 05/10] docs: Update superoperators design doc with RuntimeList bug fix Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- dev/design/superoperators.md | 13 +++++++++++++ .../java/org/perlonjava/core/Configuration.java | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/dev/design/superoperators.md b/dev/design/superoperators.md index abb061bcc..63d8c71a8 100644 --- a/dev/design/superoperators.md +++ b/dev/design/superoperators.md @@ -412,3 +412,16 @@ grep -E '^\s+[0-9]+:' bytecode.txt | sed 's/^[^:]*: //' | \ - Refactored CompileBinaryOperator.java `->` operator handling to use helpers - **Result**: Superoperators now work for both `$h->{a}{b}` (implicit arrows) and `$h->{a}->{b}` (explicit arrows) - Code duplication reduced across 3 call sites + +### Phase 3.2: Bug Fix - RuntimeList Handling (2025-03-12) +- **Bug**: Superoperators in `handleGeneralArrayAccess()` and `handleGeneralHashAccess()` + caused `(caller)[0]` and similar expressions to fail with: + `Can't use string ("...") as an ARRAY ref while "strict refs" in use` +- **Root cause**: Superoperators (`ARRAY_DEREF_FETCH`, `HASH_DEREF_FETCH`) expect a scalar + containing a reference, but these handlers can receive a RuntimeList (e.g., from `(caller)`) +- **Fix**: Reverted `handleGeneralArrayAccess()` and `handleGeneralHashAccess()` to use + the original DEREF_ARRAY/HASH + ARRAY/HASH_GET instruction sequence, which correctly + handles all input types (RuntimeArray, RuntimeList, RuntimeScalar with reference) +- **Superoperators remain in**: CompileBinaryOperator.java `->` operator handler, where + the left side is always compiled in SCALAR context and thus guaranteed to be a scalar reference +- This fix resolves the Getopt::Long / life_bitpacked.pl regression diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 92f20f8e3..b334b5f59 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "2b4423692"; + public static final String gitCommitId = "328fc6dda"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). From 90d577d0bef3f8570316e15c7f44fe465bbd36c4 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 12 Mar 2026 14:32:15 +0100 Subject: [PATCH 06/10] Add NONSTRICT variants of deref superoperators - Add HASH_DEREF_FETCH_NONSTRICT (383) and ARRAY_DEREF_FETCH_NONSTRICT (384) - Update BytecodeInterpreter with handlers for new opcodes - Update BytecodeCompiler emitHashDerefGet/emitArrayDerefGet to use NONSTRICT variants when strict refs is not enabled - Add disassembler support for NONSTRICT superoperators - Fix (expr)[index] handling in CompileBinaryOperator (ListNode transform) Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../backend/bytecode/BytecodeCompiler.java | 40 ++++++++++++---- .../backend/bytecode/BytecodeInterpreter.java | 48 +++++++++++++++++++ .../bytecode/CompileBinaryOperator.java | 11 +++++ .../backend/bytecode/Disassemble.java | 33 +++++++++++++ .../perlonjava/backend/bytecode/Opcodes.java | 18 +++++++ 5 files changed, 140 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index 139e7cc6a..e57a25184 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -1761,13 +1761,23 @@ int emitHashDerefGet(int baseReg, Node keyExpr, int tokenIndex) { // Try to use superoperator for constant string keys String constantKey = getConstantStringKey(keyExpr); if (constantKey != null) { - // SUPEROPERATOR: Constant key - use HASH_DEREF_FETCH int keyIdx = addToStringPool(constantKey); int rd = allocateOutputRegister(); - emit(Opcodes.HASH_DEREF_FETCH); - emitReg(rd); - emitReg(baseReg); - emit(keyIdx); + if (isStrictRefsEnabled()) { + // SUPEROPERATOR: Constant key + strict refs - use HASH_DEREF_FETCH + emit(Opcodes.HASH_DEREF_FETCH); + emitReg(rd); + emitReg(baseReg); + emit(keyIdx); + } else { + // SUPEROPERATOR: Constant key + non-strict - use HASH_DEREF_FETCH_NONSTRICT + int pkgIdx = addToStringPool(getCurrentPackage()); + emit(Opcodes.HASH_DEREF_FETCH_NONSTRICT); + emitReg(rd); + emitReg(baseReg); + emit(keyIdx); + emit(pkgIdx); + } return rd; } @@ -1820,12 +1830,22 @@ int emitArrayDerefGet(int baseReg, Node indexExpr, int tokenIndex) { // Try to use superoperator for integer literal indices Integer constantIndex = getConstantIntegerIndex(indexExpr); if (constantIndex != null) { - // SUPEROPERATOR: Integer literal - use ARRAY_DEREF_FETCH int rd = allocateOutputRegister(); - emit(Opcodes.ARRAY_DEREF_FETCH); - emitReg(rd); - emitReg(baseReg); - emitInt(constantIndex); + if (isStrictRefsEnabled()) { + // SUPEROPERATOR: Integer literal + strict refs - use ARRAY_DEREF_FETCH + emit(Opcodes.ARRAY_DEREF_FETCH); + emitReg(rd); + emitReg(baseReg); + emitInt(constantIndex); + } else { + // SUPEROPERATOR: Integer literal + non-strict - use ARRAY_DEREF_FETCH_NONSTRICT + int pkgIdx = addToStringPool(getCurrentPackage()); + emit(Opcodes.ARRAY_DEREF_FETCH_NONSTRICT); + emitReg(rd); + emitReg(baseReg); + emitInt(constantIndex); + emit(pkgIdx); + } return rd; } diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java index 00b8f54ed..50065166b 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeInterpreter.java @@ -1669,6 +1669,54 @@ public static RuntimeList execute(InterpretedCode code, RuntimeArray args, int c registers[rd] = array.get(index); } + case Opcodes.HASH_DEREF_FETCH_NONSTRICT -> { + // Combined: DEREF_HASH_NONSTRICT + LOAD_STRING + HASH_GET + // Format: HASH_DEREF_FETCH_NONSTRICT rd hashref_reg key_string_idx pkg_string_idx + // Equivalent to: $hashref->{key} without strict refs + int rd = bytecode[pc++]; + int hashrefReg = bytecode[pc++]; + int keyIdx = bytecode[pc++]; + int pkgIdx = bytecode[pc++]; + + RuntimeBase hashrefBase = registers[hashrefReg]; + + // Dereference to get the hash (non-strict allows symbolic refs) + RuntimeHash hash; + if (hashrefBase instanceof RuntimeHash) { + hash = (RuntimeHash) hashrefBase; + } else { + hash = hashrefBase.scalar().hashDerefNonStrict(code.stringPool[pkgIdx]); + } + + // Get the element using string key from pool + String key = code.stringPool[keyIdx]; + registers[rd] = hash.get(key); + } + + case Opcodes.ARRAY_DEREF_FETCH_NONSTRICT -> { + // Combined: DEREF_ARRAY_NONSTRICT + LOAD_INT + ARRAY_GET + // Format: ARRAY_DEREF_FETCH_NONSTRICT rd arrayref_reg index_immediate pkg_string_idx + // Equivalent to: $arrayref->[n] without strict refs + int rd = bytecode[pc++]; + int arrayrefReg = bytecode[pc++]; + int index = readInt(bytecode, pc); + pc += 1; + int pkgIdx = bytecode[pc++]; + + RuntimeBase arrayrefBase = registers[arrayrefReg]; + + // Dereference to get the array (non-strict allows symbolic refs) + RuntimeArray array; + if (arrayrefBase instanceof RuntimeArray) { + array = (RuntimeArray) arrayrefBase; + } else { + array = arrayrefBase.scalar().arrayDerefNonStrict(code.stringPool[pkgIdx]); + } + + // Get the element at index + registers[rd] = array.get(index); + } + default -> { int opcodeInt = opcode; throw new RuntimeException( diff --git a/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java b/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java index 2a54b9c0d..95fe2221d 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java +++ b/src/main/java/org/perlonjava/backend/bytecode/CompileBinaryOperator.java @@ -280,6 +280,17 @@ else if (node.right instanceof BinaryOperatorNode rightCall) { } } + // Handle ListNode case: (expr)[index] like (caller)[0] + // Transform to [expr]->[index] like JVM does + if (node.left instanceof ListNode listNode) { + // Create: ArrayLiteralNode containing the list elements + // Then: BinaryOperatorNode("->", arrayLiteral, node.right) + ArrayLiteralNode arrayLiteral = new ArrayLiteralNode(listNode.elements, listNode.getIndex()); + BinaryOperatorNode arrowNode = new BinaryOperatorNode("->", arrayLiteral, node.right, node.getIndex()); + arrowNode.accept(bytecodeCompiler); + return; + } + // Handle general case: expr[index] // This covers cases like $matrix[1][0] where $matrix[1] is an expression bytecodeCompiler.handleGeneralArrayAccess(node); diff --git a/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java b/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java index 726f28a07..d90173235 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java +++ b/src/main/java/org/perlonjava/backend/bytecode/Disassemble.java @@ -2125,6 +2125,39 @@ public static String disassemble(InterpretedCode interpretedCode) { break; } + case Opcodes.HASH_DEREF_FETCH_NONSTRICT: { + rd = interpretedCode.bytecode[pc++]; + int hashrefReg = interpretedCode.bytecode[pc++]; + int keyIdx = interpretedCode.bytecode[pc++]; + int pkgIdxH = interpretedCode.bytecode[pc++]; + sb.append("HASH_DEREF_FETCH_NONSTRICT r").append(rd) + .append(" = r").append(hashrefReg).append("->{\""); + if (interpretedCode.stringPool != null && keyIdx < interpretedCode.stringPool.length) { + sb.append(interpretedCode.stringPool[keyIdx]); + } + sb.append("\"} pkg="); + if (interpretedCode.stringPool != null && pkgIdxH < interpretedCode.stringPool.length) { + sb.append(interpretedCode.stringPool[pkgIdxH]); + } + sb.append("\n"); + break; + } + + case Opcodes.ARRAY_DEREF_FETCH_NONSTRICT: { + rd = interpretedCode.bytecode[pc++]; + int arrayrefReg = interpretedCode.bytecode[pc++]; + int index = InterpretedCode.readInt(interpretedCode.bytecode, pc); + pc += 1; + int pkgIdxA = interpretedCode.bytecode[pc++]; + sb.append("ARRAY_DEREF_FETCH_NONSTRICT r").append(rd) + .append(" = r").append(arrayrefReg).append("->[").append(index).append("] pkg="); + if (interpretedCode.stringPool != null && pkgIdxA < interpretedCode.stringPool.length) { + sb.append(interpretedCode.stringPool[pkgIdxA]); + } + sb.append("\n"); + break; + } + default: sb.append("UNKNOWN(").append(opcode).append(")\n"); break; diff --git a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java index a3652884a..944e50080 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java +++ b/src/main/java/org/perlonjava/backend/bytecode/Opcodes.java @@ -1902,6 +1902,24 @@ public class Opcodes { */ public static final short ARRAY_DEREF_FETCH = 382; + /** + * Hash dereference + string key + fetch (non-strict refs version). + * Pattern replaced: DEREF_HASH_NONSTRICT + LOAD_STRING + HASH_GET + * Format: HASH_DEREF_FETCH_NONSTRICT rd hashref_reg key_string_index pkg_string_idx + * Effect: rd = hashref_reg->hashDerefNonStrict(pkg).get(stringPool[key_string_index]) + * Equivalent to: $hashref->{key} without strict refs + */ + public static final short HASH_DEREF_FETCH_NONSTRICT = 383; + + /** + * Array dereference + integer index + fetch (non-strict refs version). + * Pattern replaced: DEREF_ARRAY_NONSTRICT + LOAD_INT + ARRAY_GET + * Format: ARRAY_DEREF_FETCH_NONSTRICT rd arrayref_reg index_immediate pkg_string_idx + * Effect: rd = arrayref_reg->arrayDerefNonStrict(pkg).get(index_immediate) + * Equivalent to: $arrayref->[n] without strict refs + */ + public static final short ARRAY_DEREF_FETCH_NONSTRICT = 384; + private Opcodes() { } // Utility class - no instantiation } From 3c68d67e1b7be8868e99d85d8b08d1fe81504333 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 12 Mar 2026 14:32:36 +0100 Subject: [PATCH 07/10] docs: Update superoperators design doc with NONSTRICT variants Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- dev/design/superoperators.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dev/design/superoperators.md b/dev/design/superoperators.md index 63d8c71a8..78bd8ecb7 100644 --- a/dev/design/superoperators.md +++ b/dev/design/superoperators.md @@ -425,3 +425,14 @@ grep -E '^\s+[0-9]+:' bytecode.txt | sed 's/^[^:]*: //' | \ - **Superoperators remain in**: CompileBinaryOperator.java `->` operator handler, where the left side is always compiled in SCALAR context and thus guaranteed to be a scalar reference - This fix resolves the Getopt::Long / life_bitpacked.pl regression + +### Phase 3.3: NONSTRICT Variants (2025-03-12) +- Added `HASH_DEREF_FETCH_NONSTRICT` (opcode 383) and `ARRAY_DEREF_FETCH_NONSTRICT` (opcode 384) +- These handle symbolic references when `no strict 'refs'` is in effect +- Format includes package name for symbolic ref resolution: + - `HASH_DEREF_FETCH_NONSTRICT rd hashref_reg key_string_idx pkg_string_idx` + - `ARRAY_DEREF_FETCH_NONSTRICT rd arrayref_reg index_immediate pkg_string_idx` +- Updated `emitHashDerefGet()` and `emitArrayDerefGet()` to choose between strict/nonstrict variants +- Added disassembler support for NONSTRICT variants +- Fixed `(expr)[index]` compilation: Added ListNode-to-ArrayLiteralNode transformation in + CompileBinaryOperator.java (matching JVM backend in Dereference.java) From d74a3f0fc184ab5006b236c968c240150cd6b0ea Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 12 Mar 2026 14:36:43 +0100 Subject: [PATCH 08/10] Re-enable superoperators in handleGeneralArrayAccess and handleGeneralHashAccess Now that ListNode cases like (caller)[0] are transformed to array literal + arrow deref before reaching these handlers, it is safe to use superoperators. - handleGeneralArrayAccess: uses emitArrayDerefGet() for chained array access - handleGeneralHashAccess: uses emitHashDerefGet() for chained hash access - Removed redundant code that was duplicating the helper logic - Changed handleGeneralArrayAccess to use SCALAR context (not LIST) Example improvement for $v[1]{a}{b}{c}->[2]: - Before: 50 shorts (DEREF_HASH + LOAD_STRING + HASH_GET sequences) - After: 32 shorts (HASH_DEREF_FETCH_NONSTRICT superoperators) Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../backend/bytecode/BytecodeCompiler.java | 107 ++++-------------- 1 file changed, 25 insertions(+), 82 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index e57a25184..ab8e696dc 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -1910,15 +1910,14 @@ private Integer getConstantIntegerIndex(Node indexExpr) { /** * Handle general array access: expr[index] * Example: $matrix[1][0] where $matrix[1] returns an arrayref - * NOTE: Does NOT use superoperators because the base might be a RuntimeList - * (e.g., (caller)[0]), not a scalar arrayref. Superoperators are only safe - * in the -> operator handler where the left side is always a scalar. + * Uses superoperators when the index is an integer literal. + * NOTE: RuntimeList cases like (caller)[0] are handled by ListNode transformation + * in CompileBinaryOperator.java before reaching this method. */ void handleGeneralArrayAccess(BinaryOperatorNode node) { // Compile the left side (the expression that should yield an array or arrayref) - // Force LIST context so comma expressions like (0,0,1,1) create a list, - // not just return the last value (which happens in scalar context) - compileNode(node.left, -1, RuntimeContextType.LIST); + // Use SCALAR context since we expect an arrayref (ListNode cases are transformed earlier) + compileNode(node.left, -1, RuntimeContextType.SCALAR); int baseReg = lastResultReg; // Compile the index expression (right side) @@ -1934,37 +1933,8 @@ void handleGeneralArrayAccess(BinaryOperatorNode node) { // Handle single element access if (indexNode.elements.size() == 1) { Node indexExpr = indexNode.elements.get(0); - // Compile index in SCALAR context to ensure RuntimeScalar - compileNode(indexExpr, -1, RuntimeContextType.SCALAR); - int indexReg = lastResultReg; - - // The base might be either: - // 1. A RuntimeArray (from $array which was an array variable) - // 2. A RuntimeScalar containing an arrayref (from $matrix[1]) - // 3. A RuntimeList (from (caller) or similar) - // DEREF_ARRAY handles all these cases correctly. - - int arrayReg = allocateRegister(); - if (isStrictRefsEnabled()) { - emit(Opcodes.DEREF_ARRAY); - emitReg(arrayReg); - emitReg(baseReg); - } else { - int pkgIdx = addToStringPool(getCurrentPackage()); - emit(Opcodes.DEREF_ARRAY_NONSTRICT); - emitReg(arrayReg); - emitReg(baseReg); - emit(pkgIdx); - } - - // Now get the element - int rd = allocateOutputRegister(); - emit(Opcodes.ARRAY_GET); - emitReg(rd); - emitReg(arrayReg); - emitReg(indexReg); - - lastResultReg = rd; + // Use superoperator helper - handles both constant and variable indices + lastResultReg = emitArrayDerefGet(baseReg, indexExpr, node.getIndex()); } else { throwCompilerException("Multi-element array access not yet implemented"); } @@ -1973,8 +1943,7 @@ void handleGeneralArrayAccess(BinaryOperatorNode node) { /** * Handle general hash access: expr{key} * Example: $hash{outer}{inner} where $hash{outer} returns a hashref - * NOTE: Does NOT use superoperators because the base might not be a scalar hashref. - * Superoperators are only safe in the -> operator handler. + * Uses superoperators when the key is a constant string. */ void handleGeneralHashAccess(BinaryOperatorNode node) { // Compile the left side (the expression that should yield a hash or hashref) @@ -1995,28 +1964,26 @@ void handleGeneralHashAccess(BinaryOperatorNode node) { if (keyNode.elements.size() == 1) { Node keyExpr = keyNode.elements.get(0); - // Compile the key - int keyReg; - if (keyExpr instanceof IdentifierNode) { - // Bareword key - autoquote it - String keyString = ((IdentifierNode) keyExpr).name; - keyReg = allocateRegister(); - int keyIdx = addToStringPool(keyString); - emit(Opcodes.LOAD_STRING); - emitReg(keyReg); - emit(keyIdx); - } else { - // Expression key - compile it in SCALAR context to ensure RuntimeScalar - compileNode(keyExpr, -1, RuntimeContextType.SCALAR); - keyReg = lastResultReg; - } - // Check if this is a glob slot access: *X{key} // In this case, node.left is an OperatorNode with operator "*" boolean isGlobSlotAccess = (node.left instanceof OperatorNode) && ((OperatorNode) node.left).operator.equals("*"); if (isGlobSlotAccess) { + // Compile the key for glob slot access + int keyReg; + if (keyExpr instanceof IdentifierNode) { + String keyString = ((IdentifierNode) keyExpr).name; + keyReg = allocateRegister(); + int keyIdx = addToStringPool(keyString); + emit(Opcodes.LOAD_STRING); + emitReg(keyReg); + emit(keyIdx); + } else { + compileNode(keyExpr, -1, RuntimeContextType.SCALAR); + keyReg = lastResultReg; + } + // For glob slot access, call hashDerefGetNonStrict directly // This uses RuntimeGlob's override which accesses the slot without dereferencing int rd = allocateOutputRegister(); @@ -2027,33 +1994,9 @@ void handleGeneralHashAccess(BinaryOperatorNode node) { lastResultReg = rd; } else { - // Normal hash access: dereference first, then get element - // The base might be either: - // 1. A RuntimeHash (from %hash which was a hash variable) - // 2. A RuntimeScalar containing a hashref (from $hash{outer}) - // We need to handle both cases. Dereference if needed. - - int hashReg = allocateRegister(); - if (isStrictRefsEnabled()) { - emit(Opcodes.DEREF_HASH); - emitReg(hashReg); - emitReg(baseReg); - } else { - int pkgIdx = addToStringPool(getCurrentPackage()); - emit(Opcodes.DEREF_HASH_NONSTRICT); - emitReg(hashReg); - emitReg(baseReg); - emit(pkgIdx); - } - - // Now get the element - int rd = allocateOutputRegister(); - emit(Opcodes.HASH_GET); - emitReg(rd); - emitReg(hashReg); - emitReg(keyReg); - - lastResultReg = rd; + // Normal hash access: use superoperator helper + // The helper handles both RuntimeHash and RuntimeScalar with hashref + lastResultReg = emitHashDerefGet(baseReg, keyExpr, node.getIndex()); } } else { throwCompilerException("Multi-element hash access not yet implemented"); From 8e72e38b6a6e610138718e69a6cfd8c78f9026a7 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 12 Mar 2026 14:37:01 +0100 Subject: [PATCH 09/10] docs: Update superoperators design doc with Phase 3.4 Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- dev/design/superoperators.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dev/design/superoperators.md b/dev/design/superoperators.md index 78bd8ecb7..9c7749e07 100644 --- a/dev/design/superoperators.md +++ b/dev/design/superoperators.md @@ -436,3 +436,12 @@ grep -E '^\s+[0-9]+:' bytecode.txt | sed 's/^[^:]*: //' | \ - Added disassembler support for NONSTRICT variants - Fixed `(expr)[index]` compilation: Added ListNode-to-ArrayLiteralNode transformation in CompileBinaryOperator.java (matching JVM backend in Dereference.java) + +### Phase 3.4: Re-enable Superoperators in General Handlers (2025-03-12) +- Re-enabled superoperators in `handleGeneralArrayAccess()` and `handleGeneralHashAccess()` +- Now that ListNode cases (like `(caller)[0]`) are transformed before reaching these handlers, + it's safe to use superoperators +- Both handlers now use `emitArrayDerefGet()` / `emitHashDerefGet()` helpers +- Changed `handleGeneralArrayAccess` to compile left side in SCALAR context (not LIST) +- **Result**: Chained access like `$v[1]{a}{b}{c}->[2]` now uses superoperators throughout +- **Bytecode reduction**: Example went from 50 shorts to 32 shorts (36% reduction) From 208673f6aaba1e9430fdde584c758f5cfe8eaa72 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Thu, 12 Mar 2026 15:00:33 +0100 Subject: [PATCH 10/10] Fix ArrayLiteralNode to evaluate elements in LIST context Matches JVM backend behavior (EmitLiteral.java line 55-56): 'Perl semantics: array literal elements are always evaluated in LIST context' This fixes regressions in op/bop.t tests 36-38 where (keys %h)[0] was incorrectly returning the count instead of the first key, because keys was evaluated in SCALAR context. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin --- .../backend/bytecode/BytecodeCompiler.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index ab8e696dc..94d54b36e 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -4072,10 +4072,15 @@ private int[] toShortArray() { public void visit(ArrayLiteralNode node) { // Array literal: [expr1, expr2, ...] // In Perl, [..] creates an ARRAY REFERENCE (RuntimeScalar containing RuntimeArray) + // Perl semantics: array literal elements are always evaluated in LIST context // Implementation: - // 1. Create a list with all elements + // 1. Create a list with all elements (in LIST context) // 2. Convert list to array reference using CREATE_ARRAY (which now returns reference) + // Save current context and use LIST context for elements + int savedContext = currentCallContext; + currentCallContext = RuntimeContextType.LIST; + // Fast path: empty array if (node.elements.isEmpty()) { // Create empty RuntimeList @@ -4091,16 +4096,20 @@ public void visit(ArrayLiteralNode node) { emitReg(listReg); lastResultReg = refReg; + currentCallContext = savedContext; return; } - // General case: evaluate all elements + // General case: evaluate all elements in LIST context int[] elementRegs = new int[node.elements.size()]; for (int i = 0; i < node.elements.size(); i++) { - node.elements.get(i).accept(this); + compileNode(node.elements.get(i), -1, RuntimeContextType.LIST); elementRegs[i] = lastResultReg; } + // Restore context + currentCallContext = savedContext; + // Create RuntimeList with all elements int listReg = allocateRegister(); emit(Opcodes.CREATE_LIST);