From 43719d170ff2153f1fa4c4667a2a80ee97efa34b Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Thu, 2 Apr 2026 18:13:17 +0200 Subject: [PATCH 1/4] Fix \K regex assertion and restore attribute validation Implement \K (keep left) assertion in s/// and m// operators. Previously \K was silently stripped from patterns, causing incorrect substitution behavior. Now \K correctly preserves text before the assertion point during replacement. Implementation: - RegexPreprocessorHelper: Convert \K to named capture group that marks the keep position - RuntimeRegex: In substitution, replace only from \K position to match end instead of full match. In match, adjust match vars accordingly - Propagate hasBackslashK flag through getReplacementRegex copies - Adjust capture group numbering to skip the internal perlK group With \K working, restore variable attribute validation: - OperatorParser: Re-add compile-time error for our variables without MODIFY_*_ATTRIBUTES handler - Attributes.java: Re-add runtime error for my/state variables without handler Test improvements (+27 tests): - attrs.t: 148->156 (+8), uni/attrs.t: 30->33 (+3) - decl-refs.t: 330->346 (+16) Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 2 +- .../frontend/parser/OperatorParser.java | 11 +- .../runtime/perlmodule/Attributes.java | 19 ++- .../runtime/regex/RegexPreprocessor.java | 10 ++ .../regex/RegexPreprocessorHelper.java | 9 +- .../runtime/regex/RuntimeRegex.java | 159 ++++++++++++++---- 6 files changed, 164 insertions(+), 46 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index c946e278e..caa3a9fb9 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "f98f6f772"; + public static final String gitCommitId = "a40e7c5fa"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java b/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java index fd34a5f62..0013d4868 100644 --- a/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/OperatorParser.java @@ -1310,11 +1310,12 @@ private static void callModifyVariableAttributes(Parser parser, String packageNa emitReservedWordWarning(svtype, nonBuiltinAttrs, parser); } else { // No MODIFY_*_ATTRIBUTES handler found at compile time. - // Don't throw — the handler may be set dynamically (e.g., via glob - // in enclosing eval). The \K regex bug (pre-existing) also corrupts - // handler names in decl-refs.t tests, making handlers invisible. - // Runtime dispatch in Attributes.java will silently return if no - // handler exists. See dev/design/attributes.md "Known Issue: \K". + // For 'our': error immediately (global vars are compile-time). + // For 'my'/'state': defer to runtime dispatch — the handler may + // not be visible yet (e.g., set via glob in enclosing eval). + if (operator.equals("our")) { + SubroutineParser.throwInvalidAttributeError(svtype, nonBuiltinAttrs, parser); + } } } } diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/Attributes.java b/src/main/java/org/perlonjava/runtime/perlmodule/Attributes.java index 5c33976aa..a25b262f1 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/Attributes.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/Attributes.java @@ -512,13 +512,20 @@ public static void runtimeDispatchModifyVariableAttributes( CallerStack.pop(); CallerStack.pop(); } + } else { + // No handler found at runtime — throw error. + // For 'our' variables, this is caught at compile time. + // For 'my'/'state', the compile-time check is deferred to here + // so that dynamically-set handlers (via glob in eval) are found. + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < nonBuiltinAttrs.size(); i++) { + if (i > 0) sb.append(" : "); + sb.append(nonBuiltinAttrs.get(i)); + } + throw new PerlCompilerException( + "Invalid " + svtype + " attribute" + + (nonBuiltinAttrs.size() > 1 ? "s" : "") + ": " + sb); } - // If no handler found at runtime, silently return. - // For 'our' variables, the compile-time check already threw. - // For 'my'/'state', ideally we'd throw here, but the \K regex bug - // (pre-existing) corrupts handler names in decl-refs.t, causing - // false "Invalid attribute" errors. Deferring until \K is fixed. - // See dev/design/attributes.md "Known Issue: \K Regex Bug". } /** diff --git a/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessor.java b/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessor.java index 677dafbe1..424b6974b 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessor.java +++ b/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessor.java @@ -56,6 +56,7 @@ public class RegexPreprocessor { static boolean deferredUnicodePropertyEncountered; static boolean inlinePFlagEncountered; static boolean branchResetEncountered; + static boolean backslashKEncountered; static void markDeferredUnicodePropertyEncountered() { deferredUnicodePropertyEncountered = true; @@ -73,6 +74,14 @@ static boolean hadBranchReset() { return branchResetEncountered; } + static void markBackslashK() { + backslashKEncountered = true; + } + + static boolean hadBackslashK() { + return backslashKEncountered; + } + /** * Preprocesses a given regex string to make it compatible with Java's regex engine. * This involves handling various constructs and escape sequences that Java does not @@ -88,6 +97,7 @@ static String preProcessRegex(String s, RegexFlags regexFlags) { deferredUnicodePropertyEncountered = false; inlinePFlagEncountered = false; branchResetEncountered = false; + backslashKEncountered = false; // First, escape invalid quantifier braces (Perl compatibility) // DISABLED: Causes test regressions - needs more work diff --git a/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java b/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java index bfa999900..a9efc6f4f 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java +++ b/src/main/java/org/perlonjava/runtime/regex/RegexPreprocessorHelper.java @@ -224,11 +224,12 @@ static int handleEscapeSequences(String s, StringBuilder sb, int c, int offset, return offset; } else if (nextChar == 'K') { // \K - keep assertion (reset start of match) - // Convert to positive lookbehind for everything before this point - // This is a simplified implementation + // Insert a zero-width named capture group to mark the \K position. + // During substitution, text before this position is preserved ("kept"). sb.setLength(sb.length() - 1); // Remove the backslash - // Mark position but don't add anything - this needs special handling - // For now, just ignore it to avoid compilation errors + RegexPreprocessor.markBackslashK(); + RegexPreprocessor.captureGroupCount++; + sb.append("(?)"); return offset; } else if ((nextChar == 'b' || nextChar == 'B') && offset + 1 < length && s.charAt(offset + 1) == '{') { // Handle \b{...} and \B{...} boundary assertions diff --git a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java index 0457adb8d..e672581ae 100644 --- a/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java +++ b/src/main/java/org/perlonjava/runtime/regex/RuntimeRegex.java @@ -60,6 +60,8 @@ protected boolean removeEldestEntry(Map.Entry eldest) { // ${^LAST_SUCCESSFUL_PATTERN} public static RuntimeRegex lastSuccessfulPattern = null; public static boolean lastMatchUsedPFlag = false; + // Tracks if the last match used \K, so matcherStart/matcherEnd/matcherSize adjust group offsets + public static boolean lastMatchUsedBackslashK = false; // Capture groups from the last successful match that had captures. // In Perl 5, $1/$2/etc persist across non-capturing matches. public static String[] lastCaptureGroups = null; @@ -85,6 +87,7 @@ protected boolean removeEldestEntry(Map.Entry eldest) { private boolean hasCodeBlockCaptures = false; // True if regex has (?{...}) code blocks private boolean deferredUserDefinedUnicodeProperties = false; private boolean hasBranchReset = false; // True if pattern uses (?|...) branch reset + private boolean hasBackslashK = false; // True if pattern uses \K (keep assertion) public RuntimeRegex() { this.regexFlags = null; @@ -150,6 +153,7 @@ public static RuntimeRegex compile(String patternString, String modifiers) { regex.deferredUserDefinedUnicodeProperties = RegexPreprocessor.hadDeferredUnicodePropertyEncountered(); regex.hasPreservesMatch = regex.regexFlags.preservesMatch() || RegexPreprocessor.hadInlinePFlag(); regex.hasBranchReset = RegexPreprocessor.hadBranchReset(); + regex.hasBackslashK = RegexPreprocessor.hadBackslashK(); regex.patternString = patternString; regex.javaPatternString = javaPattern; @@ -403,6 +407,9 @@ public static RuntimeScalar getReplacementRegex(RuntimeScalar patternString, Run regex.hasPreservesMatch = resolvedRegex.hasPreservesMatch; regex.useGAssertion = resolvedRegex.useGAssertion; regex.patternFlags = resolvedRegex.patternFlags; + regex.hasBranchReset = resolvedRegex.hasBranchReset; + regex.hasBackslashK = resolvedRegex.hasBackslashK; + regex.hasCodeBlockCaptures = resolvedRegex.hasCodeBlockCaptures; // Only recompile if we have new modifiers that actually change the flags if (!modifierStr.isEmpty()) { @@ -426,6 +433,9 @@ public static RuntimeScalar getReplacementRegex(RuntimeScalar patternString, Run regex.hasPreservesMatch = recompiledRegex.hasPreservesMatch; regex.useGAssertion = recompiledRegex.useGAssertion; regex.patternFlags = recompiledRegex.patternFlags; + regex.hasBranchReset = recompiledRegex.hasBranchReset; + regex.hasBackslashK = recompiledRegex.hasBackslashK; + regex.hasCodeBlockCaptures = recompiledRegex.hasCodeBlockCaptures; } else { // Just update the flags without recompiling regex.regexFlags = newFlags; @@ -625,26 +635,53 @@ private static RuntimeBase matchRegexDirect(RuntimeScalar quotedRegex, RuntimeSc // Always initialize $1, $2, @+, @-, $`, $&, $' for every successful match globalMatcher = matcher; globalMatchString = inputStr; + lastMatchUsedBackslashK = regex.hasBackslashK; if (captureCount > 0) { - lastCaptureGroups = new String[captureCount]; - for (int i = 0; i < captureCount; i++) { - lastCaptureGroups[i] = matcher.group(i + 1); + if (regex.hasBackslashK) { + // Skip the internal perlK capture group + int perlKGroup = getPerlKGroup(matcher); + int userGroupCount = captureCount - 1; + if (userGroupCount > 0) { + lastCaptureGroups = new String[userGroupCount]; + int destIdx = 0; + for (int i = 1; i <= captureCount; i++) { + if (i == perlKGroup) continue; + lastCaptureGroups[destIdx++] = matcher.group(i); + } + } else { + lastCaptureGroups = null; + } + } else { + lastCaptureGroups = new String[captureCount]; + for (int i = 0; i < captureCount; i++) { + lastCaptureGroups[i] = matcher.group(i + 1); + } } } else { lastCaptureGroups = null; } - lastMatchedString = matcher.group(0); - lastMatchStart = matcher.start(); + + // For \K, adjust match start/string so $& is only the post-\K portion + if (regex.hasBackslashK) { + int keepEnd = matcher.end("perlK"); + lastMatchedString = inputStr.substring(keepEnd, matcher.end()); + lastMatchStart = keepEnd; + } else { + lastMatchedString = matcher.group(0); + lastMatchStart = matcher.start(); + } lastMatchEnd = matcher.end(); if (regex.regexFlags.isGlobalMatch() && captureCount < 1 && ctx == RuntimeContextType.LIST) { // Global match and no captures, in list context return the matched string - String matchedStr = matcher.group(0); + String matchedStr = regex.hasBackslashK ? lastMatchedString : matcher.group(0); matchedGroups.add(new RuntimeScalar(matchedStr)); } else { // save captures in return list if needed if (ctx == RuntimeContextType.LIST) { + int perlKGroup = regex.hasBackslashK ? getPerlKGroup(matcher) : -1; for (int i = 1; i <= captureCount; i++) { + if (i == perlKGroup) continue; // skip internal \K marker group String matchedStr = matcher.group(i); if (regex.hasBranchReset) { // For branch reset patterns (?|...), skip null groups @@ -922,6 +959,9 @@ public static RuntimeBase replaceRegex(RuntimeScalar quotedRegex, RuntimeScalar // Don't reset globalMatcher here - only reset it if we actually find a match // This preserves capture variables from previous matches when substitution doesn't match + // Track position for manual replacement when \K is used + int lastAppendEnd = 0; + // Perform the substitution try { while (matcher.find()) { @@ -930,16 +970,41 @@ public static RuntimeBase replaceRegex(RuntimeScalar quotedRegex, RuntimeScalar // Initialize $1, $2, @+, @- only when we have a match globalMatcher = matcher; globalMatchString = inputStr; + lastMatchUsedBackslashK = regex.hasBackslashK; if (matcher.groupCount() > 0) { - lastCaptureGroups = new String[matcher.groupCount()]; - for (int i = 0; i < matcher.groupCount(); i++) { - lastCaptureGroups[i] = matcher.group(i + 1); + if (regex.hasBackslashK) { + // Skip the internal perlK capture group when populating $1, $2, etc. + int perlKGroup = getPerlKGroup(matcher); + int userGroupCount = matcher.groupCount() - 1; + if (userGroupCount > 0) { + lastCaptureGroups = new String[userGroupCount]; + int destIdx = 0; + for (int i = 1; i <= matcher.groupCount(); i++) { + if (i == perlKGroup) continue; + lastCaptureGroups[destIdx++] = matcher.group(i); + } + } else { + lastCaptureGroups = null; + } + } else { + lastCaptureGroups = new String[matcher.groupCount()]; + for (int i = 0; i < matcher.groupCount(); i++) { + lastCaptureGroups[i] = matcher.group(i + 1); + } } } else { lastCaptureGroups = null; } - lastMatchedString = matcher.group(0); - lastMatchStart = matcher.start(); + + // For \K, adjust match start so $& is only the post-\K portion + if (regex.hasBackslashK) { + int keepEnd = matcher.end("perlK"); + lastMatchStart = keepEnd; + lastMatchedString = inputStr.substring(keepEnd, matcher.end()); + } else { + lastMatchStart = matcher.start(); + lastMatchedString = matcher.group(0); + } lastMatchEnd = matcher.end(); String replacementStr; @@ -955,19 +1020,16 @@ public static RuntimeBase replaceRegex(RuntimeScalar quotedRegex, RuntimeScalar } if (replacementStr != null) { - // In Java regex replacement strings: - // - // - $1, $2, etc. refer to capture groups from the pattern - // - $0 refers to the entire match - // - \ is used for escaping - // - // When you pass $x as the replacement string, Java interprets it as trying to reference capture group "x", which doesn't exist (capture groups are numbered, not named with letters in basic Java regex). - - // replacementStr = replacementStr.replaceAll("\\\\", "\\\\\\\\"); - - // Append the text before the match and the replacement to the result buffer - // matcher.appendReplacement(resultBuffer, replacementStr); - matcher.appendReplacement(resultBuffer, Matcher.quoteReplacement(replacementStr)); + if (regex.hasBackslashK) { + // \K: preserve text before \K position, only replace after it + int keepEnd = matcher.end("perlK"); + resultBuffer.append(inputStr, lastAppendEnd, keepEnd); + resultBuffer.append(replacementStr); + lastAppendEnd = matcher.end(); + } else { + // Normal replacement: replace the entire match + matcher.appendReplacement(resultBuffer, Matcher.quoteReplacement(replacementStr)); + } } // If not a global match, break after the first replacement @@ -980,7 +1042,11 @@ public static RuntimeBase replaceRegex(RuntimeScalar quotedRegex, RuntimeScalar found = 0; } // Append the remaining text after the last match to the result buffer - matcher.appendTail(resultBuffer); + if (regex.hasBackslashK) { + resultBuffer.append(inputStr, lastAppendEnd, inputStr.length()); + } else { + matcher.appendTail(resultBuffer); + } if (found > 0) { String finalResult = resultBuffer.toString(); @@ -1099,10 +1165,12 @@ public static RuntimeScalar matcherStart(int group) { return scalarUndef; } try { - if (group < 0 || group > globalMatcher.groupCount()) { + // Adjust group number to skip the internal perlK group + int javaGroup = adjustGroupForBackslashK(group); + if (javaGroup < 0 || javaGroup > globalMatcher.groupCount()) { return scalarUndef; } - int start = globalMatcher.start(group); + int start = globalMatcher.start(javaGroup); if (start == -1) { return scalarUndef; } @@ -1120,10 +1188,12 @@ public static RuntimeScalar matcherEnd(int group) { return scalarUndef; } try { - if (group < 0 || group > globalMatcher.groupCount()) { + // Adjust group number to skip the internal perlK group + int javaGroup = adjustGroupForBackslashK(group); + if (javaGroup < 0 || javaGroup > globalMatcher.groupCount()) { return scalarUndef; } - int end = globalMatcher.end(group); + int end = globalMatcher.end(javaGroup); if (end == -1) { return scalarUndef; } @@ -1137,8 +1207,27 @@ public static int matcherSize() { if (globalMatcher == null) { return 0; } + int size = globalMatcher.groupCount(); + // Subtract the internal perlK group if \K was used + if (lastMatchUsedBackslashK) { + size--; + } // +1 because groupCount is zero-based, and we include the entire match - return globalMatcher.groupCount() + 1; + return size + 1; + } + + /** + * Adjust a Perl capture group number to a Java matcher group number, + * skipping the internal perlK named group when \K is active. + */ + private static int adjustGroupForBackslashK(int perlGroup) { + if (!lastMatchUsedBackslashK || globalMatcher == null) { + return perlGroup; + } + int perlKGroup = getPerlKGroup(globalMatcher); + if (perlKGroup < 0) return perlGroup; + // Perl groups before perlK: same number. At or after: add 1. + return perlGroup >= perlKGroup ? perlGroup + 1 : perlGroup; } /** @@ -1483,4 +1572,14 @@ public RuntimeScalar getLastCodeBlockResult() { return null; } + + /** + * Get the group number of the internal perlK named capture group. + * This group is inserted by the preprocessor at the \K position. + */ + private static int getPerlKGroup(Matcher matcher) { + Map namedGroups = matcher.pattern().namedGroups(); + Integer group = namedGroups.get("perlK"); + return group != null ? group : -1; + } } From 9ac685c8e7b68c035c12fe32a0becb74af639a44 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Thu, 2 Apr 2026 18:15:30 +0200 Subject: [PATCH 2/4] Add \K regex assertion to changelog and feature matrix Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- docs/about/changelog.md | 1 + docs/reference/feature-matrix.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/about/changelog.md b/docs/about/changelog.md index 1c944ead0..740dac3be 100644 --- a/docs/about/changelog.md +++ b/docs/about/changelog.md @@ -21,6 +21,7 @@ Release history of PerlOnJava. See [Roadmap](roadmap.md) for future plans. open FH, "-|" or exec @cmd; - Bugfix: parser now handles `@{${...}}` nested dereference in push/unshift. - Bugfix: regex octal escapes `\10`-`\377` now work correctly. +- Bugfix: `\K` (keep left) assertion now works in `m//` and `s///`. - Bugfix: operator override in Time::Hires now works. - Bugfix: internal temp variables are now pre-initialized. - Optimization: faster list assignment. diff --git a/docs/reference/feature-matrix.md b/docs/reference/feature-matrix.md index 3d19cc84b..53d3ce773 100644 --- a/docs/reference/feature-matrix.md +++ b/docs/reference/feature-matrix.md @@ -372,6 +372,7 @@ my @copy = @{$z}; # ERROR - ✅ **Unicode Properties**: Add regex properties supported by Perl but missing in Java regex. - ✅ **Possessive Quantifiers**: Quantifiers like `*+`, `++`, `?+`, or `{n,m}+`, which disable backtracking, are not supported. - ✅ **Atomic Grouping**: Use of `(?>...)` for atomic groups is supported. +- ✅ **`\K` assertion**: Keep left — in `s///`, text before `\K` is preserved; match variables reflect only the portion after `\K`. - ✅ **Preprocessor**: `\Q`, `\L`, `\U`, `\l`, `\u`, `\E` are preprocessed in regex. - ✅ **Overloading**: `qr` overloading is implemented. See also [overload pragma](#pragmas). From a83b50717cc98146c85340a3bca6cc419a248c32 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Thu, 2 Apr 2026 18:30:06 +0200 Subject: [PATCH 3/4] Fix variable attribute dispatch for list declarations When my ($x,$y) : attr is parsed, the 'attributes' annotation is placed on the parent OperatorNode but was not propagated to individual variable nodes during list expansion. This caused runtime attribute dispatch to silently skip list variable declarations. Fix in both backends: - JVM emitter (EmitVariable.java): propagate parent attribute annotations to each child my/state node in list expansion - Interpreter (BytecodeCompiler.java): call emitVarAttrsIfNeeded for each variable in list declarations Test improvements (+4): - attrs.t: 156->158 (only TODO test 155 remains) - uni/attrs.t: 33->35/35 (fully passing) Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../backend/bytecode/BytecodeCompiler.java | 4 +++ .../perlonjava/backend/jvm/EmitVariable.java | 26 +++++++++++++++++++ .../org/perlonjava/core/Configuration.java | 2 +- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java index 31948abfd..466aa8084 100644 --- a/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java +++ b/src/main/java/org/perlonjava/backend/bytecode/BytecodeCompiler.java @@ -2722,6 +2722,10 @@ void compileVariableDeclaration(OperatorNode node, String op) { throwCompilerException("Unsupported variable type in list declaration: " + sigil); } + // Runtime attribute dispatch for list variable declarations. + // Attributes are stored on the parent my/state node, propagate to each element. + emitVarAttrsIfNeeded(node, reg, sigil); + varRegs.add(reg); wrapWithRef.add(isDeclaredReference); } diff --git a/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java b/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java index 8416821b4..3f73f97d2 100644 --- a/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java +++ b/src/main/java/org/perlonjava/backend/jvm/EmitVariable.java @@ -1180,6 +1180,13 @@ static void handleMyOperator(EmitterVisitor emitterVisitor, OperatorNode node) { if (node.operand instanceof ListNode listNode) { // my ($a, $b) our ($a, $b) // process each item of the list; then returns the list if (CompilerOptions.DEBUG_ENABLED) ctx.logDebug("handleMyOperator: ListNode operand, contextType=" + ctx.contextType + ", annotations=" + node.annotations); + + // Propagate attribute annotations from the parent declaration to each child. + // When my ($x,$y) : attr is parsed, the "attributes" annotation is on the + // parent OperatorNode but must be carried to each individual variable node + // so that runtime attribute dispatch fires for each variable. + boolean hasParentAttrs = node.annotations != null && node.annotations.containsKey("attributes"); + for (Node element : listNode.elements) { if (element instanceof OperatorNode && "undef".equals(((OperatorNode) element).operator)) { continue; // skip "undef" @@ -1205,6 +1212,11 @@ static void handleMyOperator(EmitterVisitor emitterVisitor, OperatorNode node) { if (scalarVarNode.annotations != null && Boolean.TRUE.equals(scalarVarNode.annotations.get("isDeclaredReference"))) { myNode.setAnnotation("isDeclaredReference", true); } + if (hasParentAttrs) { + myNode.annotations = myNode.annotations != null ? new java.util.HashMap<>(myNode.annotations) : new java.util.HashMap<>(); + myNode.annotations.put("attributes", node.annotations.get("attributes")); + myNode.annotations.put("attributePackage", node.annotations.get("attributePackage")); + } myNode.accept(emitterVisitor.with(RuntimeContextType.VOID)); } else if (operatorNode.operand instanceof ListNode nestedList) { // Handle my(\($d, $e)) - nested list with backslash @@ -1220,16 +1232,30 @@ static void handleMyOperator(EmitterVisitor emitterVisitor, OperatorNode node) { // Create a my node for each variable OperatorNode myNode = new OperatorNode(operator, scalarVarNode, listNode.tokenIndex); myNode.setAnnotation("isDeclaredReference", true); + if (hasParentAttrs) { + myNode.annotations.put("attributes", node.annotations.get("attributes")); + myNode.annotations.put("attributePackage", node.annotations.get("attributePackage")); + } myNode.accept(emitterVisitor.with(RuntimeContextType.VOID)); } } } else { // Unknown structure, fall through to default handling OperatorNode myNode = new OperatorNode(operator, element, listNode.tokenIndex); + if (hasParentAttrs) { + myNode.annotations = new java.util.HashMap<>(); + myNode.annotations.put("attributes", node.annotations.get("attributes")); + myNode.annotations.put("attributePackage", node.annotations.get("attributePackage")); + } myNode.accept(emitterVisitor.with(RuntimeContextType.VOID)); } } else { OperatorNode myNode = new OperatorNode(operator, element, listNode.tokenIndex); + if (hasParentAttrs) { + myNode.annotations = new java.util.HashMap<>(); + myNode.annotations.put("attributes", node.annotations.get("attributes")); + myNode.annotations.put("attributePackage", node.annotations.get("attributePackage")); + } myNode.accept(emitterVisitor.with(RuntimeContextType.VOID)); } } diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index caa3a9fb9..403e96762 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "a40e7c5fa"; + public static final String gitCommitId = "9ac685c8e"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). From 94618a95126843e8caf0b995f517a1ec8d2c3985 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Thu, 2 Apr 2026 18:32:16 +0200 Subject: [PATCH 4/4] Update attributes.md design doc with Phase 8 progress Attribute test suite is now at 97.6% pass rate (248/254): - attrs.t: 158/159 (only TODO test remains) - uni/attrs.t: 35/35 (fully passing) - attrproto.t: 51/52 - attrhand.t: 4/4 Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/design/attributes.md | 232 ++++++++------------------------------- 1 file changed, 45 insertions(+), 187 deletions(-) diff --git a/dev/design/attributes.md b/dev/design/attributes.md index 4aaed2b9b..74a6ccb26 100644 --- a/dev/design/attributes.md +++ b/dev/design/attributes.md @@ -168,7 +168,7 @@ Variable attribute dispatch happens in the **emitter/compiler** — when a `my`/ ## Progress Tracking -### Current Status: Phase 7 partially complete (isDeclared + Attribute::Handlers + error format) +### Current Status: Phase 8 — \K regex fix + variable attribute list dispatch complete ### Completed Phases @@ -201,215 +201,73 @@ Variable attribute dispatch happens in the **emitter/compiler** — when a `my`/ - Fixed variable attribute error format with BEGIN failed suffix - Files: `RuntimeCode.java`, `RuntimeGlob.java`, `SubroutineParser.java` -### Current Test Results (2026-04-02) - -| File | Before | After | Delta | -|------|--------|-------|-------| -| attrs.t | 49/130 → 111/158* | 152/158 | +41 | -| attrproto.t | 3/52 | 51/52 | +48 | -| attrhand.t | 0/0 | 4/4 | +4 | -| uni/attrs.t | 10/34 | 29/34 | +19 | -| **Total** | **62/216** | **236/248** | **+112** | +- [x] Phase 8: \K regex fix + variable attribute list dispatch (2026-04-02) + - Fixed `\K` (keep left) regex assertion in `m//` and `s///` + - `\K` was silently stripped from patterns; now inserts `(?)` named capture + - Substitution preserves text before `\K` position, adjusts match variables + - Capture group numbering offsets internal perlK group for user captures + - Restored compile-time/runtime attribute validation (was blocked by \K bug) + - Fixed list variable attribute dispatch: `my ($x,$y) : attr` now properly + propagates attribute annotations from parent node to each child in both + JVM emitter (EmitVariable.java) and interpreter (BytecodeCompiler.java) + - Files: `RegexPreprocessorHelper.java`, `RegexPreprocessor.java`, `RuntimeRegex.java`, + `OperatorParser.java`, `Attributes.java`, `EmitVariable.java`, `BytecodeCompiler.java` + +### Current Test Results (2026-04-02, PR #423) + +| File | Original | After PR #420 | After PR #423 | Total Delta | +|------|----------|---------------|---------------|-------------| +| attrs.t | 49/130 | 152/158* | 158/159** | +109 | +| attrproto.t | 3/52 | 51/52 | 51/52 | +48 | +| attrhand.t | 0/0 | 4/4 | 4/4 | +4 | +| uni/attrs.t | 10/34 | 29/34 | 35/35 | +25 | +| decl-refs.t | — | 346/408 | 346/408 | — | +| **Total** | **62/216** | **236/248** | **248/254** | **+186** | \* attrs.t grew from 130 to 158 tests because the test no longer crashes partway through. +\*\* Only failure is TODO test 155 (RT #3605: ternary/attribute parsing ambiguity). -### Remaining Failures Analysis +### Remaining Failures Analysis (updated 2026-04-02) -#### attrproto.t: 4 remaining (48-51) +#### attrproto.t: 1 remaining (48) -**Root cause: `my sub` parser missing attribute loop after prototype** +**Root cause: `my sub` prototype not stored on RuntimeCode in interpreter backend** | Test | Issue | |------|-------| -| 48 | `my sub lexsub1(bar) : prototype(baz) {}` — `:prototype(baz)` not parsed | -| 49 | Illegal proto warning not emitted for `(bar)` on lexical sub | -| 50 | Illegal proto warning not emitted for `(baz)` on lexical sub | -| 51 | "Prototype overridden" warning not emitted | - -**Fix:** In `StatementResolver.java`, after parsing `(prototype)` for `my sub`, add: -1. Call `emitIllegalProtoWarning()` for the parenthesized prototype -2. A second `while (peek(parser).text.equals(":"))` attribute-parsing loop - -**Effort:** Small — straightforward parser fix. - -#### attrs.t: 24 remaining - -**Group A: `attributes::get` not returning built-in attrs (8 tests: 35-42)** - -| Test | Expected | Got | Issue | -|------|----------|-----|-------| -| 35 | `"method Z"` | `"method"` | `FETCH_CODE_ATTRIBUTES` result not merged with built-in attrs | -| 36 | `"lvalue"` | `""` | `_fetch_attrs` not returning `lvalue` for predeclared subs | -| 37 | `"lvalue method"` | `""` | Same — multiple built-in attrs not returned | -| 38 | `"lvalue"` | `""` | `lvalue` on predeclared then defined sub not fetched | -| 39 | `"method"` | `""` | `method` on already-defined sub not fetched | -| 40 | `"method Z"` | `"Z"` | `method` from built-in + `Z` from FETCH not combined | -| 41-42 | `2`, `4` | `1`, `2` | Variable `tie` via `MODIFY_SCALAR_ATTRIBUTES` — `my $x : A0` dispatch missing | +| 48 | `eval 'my sub lexsub1(bar) : prototype(baz) {}; prototype \&lexsub1'` returns empty — `\&lexsub` produces REF instead of CODE in interpreter | -**Root cause:** `_fetch_attrs` in `Attributes.java` doesn't return `lvalue`/`method` from `RuntimeCode.attributes`. Tests 41-42 need variable attribute dispatch from the parser/emitter. +**Root cause:** In the interpreter (eval STRING), `\&lexsub` returns a REF reference (to the scalar holding the code) instead of a CODE reference. This is an interpreter parity issue with how lexical subs are stored and referenced — not an attribute-specific bug. Tests 49-52 now pass (warnings are correct). -**Fix:** -- Fix `_fetch_attrs` to filter and return built-in CODE attrs (`lvalue`, `method`, `const`) -- For 41-42: implement variable attribute dispatch in emitter (Phase 2) - -**Group B: Variable attribute dispatch missing (4 tests: 27-28, 41-42)** +#### attrs.t: 1 remaining (155 — TODO) | Test | Issue | |------|-------| -| 27 | `my A $x : plugh` — `MODIFY_SCALAR_ATTRIBUTES` not called, no "may clash" warning | -| 28 | Same for multiple attrs | -| 41-42 | `my $x : A0` in loop — tie via MODIFY_SCALAR_ATTRIBUTES not happening | - -**Fix:** Implement variable attribute dispatch. When the parser sees `my $x : Foo`, generate `attributes::->import(__PACKAGE__, \$x, "Foo")`. This requires emitter changes. - -**Group C: Error detection issues (5 tests: 20, 44-45, 87, uni/23)** - -| Test | Expected | Got | Issue | -|------|----------|-----|-------| -| 20 | Error with quoted attr names | Error without quotes | Error message formatting: attrs need double-quoting | -| 44 | `Can't declare scalar dereference in "our"` | `Invalid SCALAR attribute: foo` | Parser doesn't detect `our ${""} : foo` as dereference | -| 45 | `Can't declare scalar dereference in "my"` | `Invalid SCALAR attribute: bar` | Same for `my $$foo : bar` | -| 87 | `Global symbol "$nosuchvar" requires` | `Invalid CODE attribute: foo` | Strict error should be emitted instead of attr error | -| 154 | (TODO test) No separator error | Gets separator error | `$a ? my $var : my $othervar` — `:` parsed as attr separator | - -**Fix:** Multiple parser improvements needed. Tests 44-45 need dereference detection. Test 87 needs strict checking before attribute validation. Test 154 is a known TODO. - -**Group D: `:const` attribute (2 tests: 140, 145)** - -| Test | Expected | Got | Issue | -|------|----------|-----|-------| -| 140 | `Useless use of attribute "const"` warning | No warning | `const` not handled in `_modify_attrs` | -| 145 | `32487` (const closure value) | `undef` | `:Const` -> `const` via MODIFY_CODE_ATTRIBUTES not applied | - -**Fix:** Implement `:const` in `Attributes.java._modify_attrs()` — call the anon sub immediately and capture return value. - -**Group E: `MODIFY_CODE_ATTRIBUTES` returning custom error (2 tests: 32, uni/16)** - -| Test | Expected | Got | -|------|----------|-----| -| 32 | `X at ` (die in handler) | `Invalid CODE attribute: foo` | - -**Root cause:** When `MODIFY_CODE_ATTRIBUTES` dies, the die message should propagate. Currently the error is being replaced by the default "Invalid CODE attribute" message. - -**Group F: Closure prototype handling (3 tests: 124-126)** - -| Test | Expected | Got | -|------|----------|-----| -| 124 | `Closure prototype called` error | Empty `$@` | -| 125 | `Closure prototype called` error | `Not a CODE reference` | -| 126 | `undef` | `"referencing closure prototype"` | - -**Root cause:** Closure prototypes (stubs with captured lexicals) should die with "Closure prototype called" when invoked. This is a runtime feature, not an attribute-specific issue. - -**Group G: Error message suffix (3 tests: 155-157)** - -| Test | Expected | Got | -|------|----------|-----| -| 155 | `...at - line 1.\nBEGIN failed--compilation aborted at - line 1.` | `...at - line 1, near ""` | -| 156 | Same pattern for arrays | Same | -| 157 | Same pattern for hashes | Same | - -**Root cause:** `fresh_perl_is` tests run `./jperl` as a subprocess. The error message format is `"at - line 1."` + `"BEGIN failed--compilation aborted"` suffix. PerlOnJava produces `"at - line 1, near \"\""` instead. +| 155 | (TODO test) `$a ? my $var : my $othervar` — `:` parsed as attr separator instead of ternary | -**Fix:** Two issues: (1) error location format, (2) missing "BEGIN failed" propagation. +**Status:** This is a known Perl 5 edge case (RT #3605). The TODO marker means it's expected to fail. -#### uni/attrs.t: 11 remaining +#### uni/attrs.t: 0 remaining — FULLY PASSING (35/35) -These mirror attrs.t failures with Unicode identifiers: -- Tests 8, 11-12, 16-18, 20-21, 23, 30-31 — same root causes as attrs.t groups A-F above +### Next Steps -### Next Steps (Priority Order) +Most attribute tests now pass. The remaining work is lower-priority: -#### Phase 2: `attributes::get` built-in attrs (HIGH — 8 tests) +#### Interpreter parity: `\&lexsub` in eval STRING (1 test: attrproto.t 48) -Fix `_fetch_attrs` in `Attributes.java` to return built-in CODE attributes (`lvalue`, `method`, `const`) from `RuntimeCode.attributes`. This is a small Java change. +In the interpreter, `\&lexsub` creates a REF (to the scalar holding the code) instead of a CODE reference. This affects `prototype \&lexsub` and is a general interpreter parity issue, not attribute-specific. -- **Files:** `Attributes.java` -- **Tests fixed:** attrs.t 35-40, uni/attrs.t equivalent -- **Effort:** Small - -#### Phase 3: Variable attribute dispatch (MEDIUM — 6+ tests) - -When the parser encounters `my $x : Foo` or `our @arr : Bar`, generate calls to `attributes::->import(__PACKAGE__, \$var, @attrs)`. This requires: - -1. In the JVM emitter (`EmitVariable.java` or `EmitOperator.java`): when a variable declaration has `"attributes"` annotation, emit `attributes::->import(PKG, \$var, @attrs)` -2. In the bytecode interpreter: same -3. Timing: compile-time for `our`, run-time for `my`/`state` - -- **Files:** `EmitVariable.java`, `CompileAssignment.java` (interpreter) -- **Tests fixed:** attrs.t 27-28, 41-42; uni/attrs.t 11-12, 17-18 -- **Effort:** Medium - -#### Phase 4: `my sub` attribute parsing (SMALL — 4 tests) - -Add attribute parsing after prototype in `StatementResolver.java` `my sub` path: -1. Call `emitIllegalProtoWarning()` for `(proto)` syntax -2. Add second `:` attribute loop after prototype - -- **Files:** `StatementResolver.java` -- **Tests fixed:** attrproto.t 48-51 -- **Effort:** Small - -#### Phase 5: `:const` attribute (SMALL — 2 tests) - -Implement `:const` in `Attributes.java._modify_attrs()`: -- When `const` is applied to an already-defined sub, emit "Useless use" warning -- When `const` is applied during sub definition, invoke the sub immediately and replace with constant - -- **Files:** `Attributes.java`, possibly `SubroutineParser.java` -- **Tests fixed:** attrs.t 140, 145 -- **Effort:** Small-Medium - -#### Phase 6: Error message improvements (MEDIUM — 7 tests) - -1. Quote attribute names in error messages with `"attr"` format (test 20) -2. Detect `our ${""}` and `my $$foo` as dereferences before attribute processing (tests 44-45) -3. Ensure `MODIFY_CODE_ATTRIBUTES` die propagates correctly (tests 32, uni/16) -4. Fix "BEGIN failed--compilation aborted" error suffix (tests 155-157) -5. Ensure strict errors take priority over attribute errors (test 87) - -- **Files:** `Attributes.java`, `SubroutineParser.java`, parser error handling -- **Tests fixed:** attrs.t 20, 32, 44-45, 87, 155-157; uni/attrs.t 8, 16, 20-21, 23 -- **Effort:** Medium - -#### Phase 7: Closure prototype feature (LOW — 4 tests) - -PerlOnJava does not have the "closure prototype" concept that Perl 5 has. In Perl 5, when a named sub is compiled that closes over lexical variables, the initial CV (before cloning) is a "closure prototype" — it has the captured variable slots but they are not yet bound to specific pad instances. This prototype is accessible via `MODIFY_CODE_ATTRIBUTES` (`$_[1]` before the sub is fully instantiated). Calling a closure prototype should die with "Closure prototype called". - -**What needs to be implemented:** -1. Detect when a RuntimeCode is a closure prototype (has captured variable slots but the closure hasn't been instantiated/cloned yet) -2. In `RuntimeCode.apply()`, check for the prototype state and die with "Closure prototype called" instead of executing the body -3. The prototype should still be referenceable (test 126: `\&{$proto}` should return a ref to it) - -**Test details:** -- Test 124: `eval { $proto->() }` — should die with `/^Closure prototype called/` -- Test 125: `eval { () = &$proto }` — should die with `/^Closure prototype called/` -- Test 126: `\&{$proto}` — should return a reference (referencing closure prototype) - -- **Files:** `RuntimeCode.java`, possibly `EmitSubroutine.java` -- **Tests fixed:** attrs.t 124-126; uni/attrs.t 30-31 -- **Effort:** Medium — requires implementing a new concept in the runtime - -#### Phase 8: Attribute::Handlers (LOW — 4 tests) - -The infrastructure (`attributes.pm`, CHECK blocks, MODIFY_CODE_ATTRIBUTES) is now in place. The remaining blockers are likely edge cases in glob manipulation, ref-identity comparison, or `undef &sub` syntax within `Attribute::Handlers.pm` internals. - -- **Files:** Possibly runtime fixes -- **Tests fixed:** attrhand.t 1-4 -- **Effort:** Unknown — needs investigation +- **Files:** Interpreter's variable reference handling +- **Effort:** Medium — requires changes to how lexical subs are dereferenced in the interpreter ### Estimated Final Results -| Phase | Tests Fixed | Cumulative | -|-------|-----------|------------| -| Current | — | 205/244 (84%) | -| Phase 2 | +8 | 213/244 (87%) | -| Phase 3 | +6 | 219/244 (90%) | -| Phase 4 | +4 | 223/244 (91%) | -| Phase 5 | +2 | 225/244 (92%) | -| Phase 6 | +12 | 237/244 (97%) | -| Phase 7 | +4 | 241/244 (99%) | -| Phase 8 | +4 | 244/244 (100%) | +| Status | Tests Passing | Pass Rate | +|--------|-------------|-----------| +| Current (PR #423) | 248/254 | 97.6% | +| If interpreter \&lexsub fixed | 249/254 | 98.0% | + +The only remaining attrs.t failure is TODO test 155 (expected failure). ### Open Questions