diff --git a/src/main/java/org/perlonjava/astrefactor/LargeNodeRefactorer.java b/src/main/java/org/perlonjava/astrefactor/LargeNodeRefactorer.java index 608c05847..6a12a6099 100644 --- a/src/main/java/org/perlonjava/astrefactor/LargeNodeRefactorer.java +++ b/src/main/java/org/perlonjava/astrefactor/LargeNodeRefactorer.java @@ -150,7 +150,8 @@ private static List createNestedListClosures(List chunks, int tokenI * @return true if the list exceeds size thresholds and should be refactored */ private static boolean shouldRefactor(List elements) { - // Use sampling to estimate bytecode size - avoid O(n) traversal + // Estimate bytecode size by visiting all elements (no sampling) + // Sampling was causing inaccurate estimates for mixed element types int n = elements.size(); if (n == 0) { return false; @@ -160,14 +161,13 @@ private static boolean shouldRefactor(List elements) { return size > LARGE_BYTECODE_SIZE; } - int sampleSize = Math.min(10, n); - long totalSampleSize = 0; - for (int i = 0; i < sampleSize; i++) { - int index = (int) (((long) i * (n - 1)) / (sampleSize - 1)); - totalSampleSize += BytecodeSizeEstimator.estimateSnippetSize(elements.get(index)); + // Estimate all elements for accurate size calculation + long totalSize = 0; + for (Node element : elements) { + totalSize += BytecodeSizeEstimator.estimateSnippetSize(element); } - long estimatedTotalSize = (totalSampleSize * n) / sampleSize; - return estimatedTotalSize > LARGE_BYTECODE_SIZE; + + return totalSize > LARGE_BYTECODE_SIZE; } /** diff --git a/src/main/java/org/perlonjava/astvisitor/BytecodeSizeEstimator.java b/src/main/java/org/perlonjava/astvisitor/BytecodeSizeEstimator.java index 0814f83fc..a264fe1dd 100644 --- a/src/main/java/org/perlonjava/astvisitor/BytecodeSizeEstimator.java +++ b/src/main/java/org/perlonjava/astvisitor/BytecodeSizeEstimator.java @@ -160,9 +160,11 @@ public void visit(NumberNode node) { @Override public void visit(StringNode node) { - // String literals: LDC + object creation = LDC_INSTRUCTION + OBJECT_CREATION (10 bytes) - int stringSize = LDC_INSTRUCTION + OBJECT_CREATION; - estimatedSize += stringSize; + // String literals: Based on actual disassembly showing: + // LDC (2-3 bytes for constant pool index) + // INVOKESTATIC (3 bytes) - getScalarByteString or similar + // Total: 5-6 bytes per string + estimatedSize += LDC_INSTRUCTION + INVOKE_STATIC; // 3 + 3 = 6 bytes } @Override @@ -216,12 +218,25 @@ public void visit(BlockNode node) { @Override public void visit(ListNode node) { - // Mirror EmitLiteral.emitList() patterns - estimatedSize += OBJECT_CREATION; // Create RuntimeList + // Mirror EmitLiteral.emitList() patterns in LIST context + // Based on actual disassembly: each element requires DUP + element evaluation + add + + estimatedSize += OBJECT_CREATION; // Create RuntimeList (NEW + DUP + INVOKESPECIAL = 7 bytes) for (Node element : node.elements) { + // Per-element list overhead (DUP + add call) + estimatedSize += DUP_INSTRUCTION; // Duplicate RuntimeList reference (1 byte) + estimatedSize += METHOD_CALL_OVERHEAD; // RuntimeList.add() call (4 bytes) + + // Let the element estimate itself via visitor pattern element.accept(this); - estimatedSize += METHOD_CALL_OVERHEAD; // Add to list + } + + // Constant pool overhead for large lists + // When constant pool grows beyond 256 entries, LDC becomes LDC_W (3 bytes instead of 2) + if (node.elements.size() > 200) { + // Large constant pool: LDC_W costs 3 bytes instead of 2 + estimatedSize += node.elements.size(); // +1 byte per element for LDC_W } }