From 7b9de2645976c3b42dbf2c0446898aecfd71f942 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Fri, 24 Apr 2026 13:05:15 +0200 Subject: [PATCH] fix(parser): process escape sequences inside \Q...\E quotemeta regions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, StringDoubleQuoted used an `inQuotemeta` flag that disabled normal escape-sequence handling once \Q was encountered. As a result, sequences like \t, \n, \\, \x41, \041, \cA, \$ and \@ were not decoded inside \Q...\E — they were passed through as literal backslash + char before quotemeta() ran, producing extra backslashes. Real Perl first applies string escapes, then quotemeta. For example: "\Q\\\E" -> real: len=2 (\\) jperl (old): len=4 "\Q\t\E" -> real: len=2 (\) jperl (old): len=3 (\\t) "\Q\x41\E" -> real: len=1 (A) jperl (old): len=5 This also fixed a related bug where nested \Q\Q...\E\E was incorrectly treated as idempotent instead of applying quotemeta twice. Fix: remove the `inQuotemeta` flag entirely. \Q simply pushes a "Q" case modifier onto the existing stack; \E pops and wraps the accumulated content in quotemeta(). Escape processing and variable interpolation continue to work normally inside the region, exactly matching Perl. Impact: - Text::Reform 1.20: t/reform.t test 37 now passes (was failing on `form("<<<<<\Q<[^|>]\\\E",123)`); module now installs cleanly via jcpan, unblocking Text::Autoformat and WWW::Wikipedia. - WWW::Wikipedia: with Text::Reform installed, the 11 tests that were failing on `Can't locate Text/Reform.pm` now load the module. The remaining failures are live network calls over http:// (Wikipedia redirects to https://), unrelated to this change. Verified all 11 escape edge cases now match real Perl byte-for-byte. All unit tests pass. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 6 +-- .../frontend/parser/StringDoubleQuoted.java | 41 ++++--------------- 2 files changed, 10 insertions(+), 37 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 2522435f3..74bebc7a5 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,14 +33,14 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "3611e23bd"; + public static final String gitCommitId = "4623fa856"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitDate = "2026-04-23"; + public static final String gitCommitDate = "2026-04-24"; /** * Build timestamp in Perl 5 "Compiled at" format (e.g., "Apr 7 2026 11:20:00"). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 24 2026 12:28:51"; + public static final String buildTimestamp = "Apr 24 2026 13:01:40"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java b/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java index ba9812f6e..1446b489b 100644 --- a/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java +++ b/src/main/java/org/perlonjava/frontend/parser/StringDoubleQuoted.java @@ -56,14 +56,6 @@ public class StringDoubleQuoted extends StringSegmentParser { */ private final Stack caseModifiers = new Stack<>(); - /** - * Flag indicating whether we're inside a \Q...\E quotemeta region. - * - *

When true, all special characters (including $ and @) are treated as literals, - * and escape sequences are not processed (except \E to end the region). - */ - private boolean inQuotemeta = false; - /** * Private constructor for StringDoubleQuoted parser. * @@ -357,37 +349,20 @@ private Node createJoinNode(List nodes) { /** * Parses escape sequences based on context. * - *

This method delegates to different escape handling based on the - * parseEscapes flag and quotemeta mode: + *

Delegates to different escape handling based on the parseEscapes flag: *

    - *
  • inQuotemeta=true: Only \E is special, everything else is literal
  • *
  • parseEscapes=true: Process escapes like \n to actual newline
  • *
  • parseEscapes=false: Preserve escapes for regex engine
  • *
+ * + *

Note: \Q...\E quotemeta regions are handled via the case-modifier stack + * (pushing a "Q" modifier in the \Q handler and applying it in \E), so no + * special in-string state is needed. Inside \Q, escape sequences and variable + * interpolation continue to work normally; the accumulated content is wrapped + * in quotemeta() at the point where \E is encountered. */ @Override protected void parseEscapeSequence() { - if (inQuotemeta) { - // In quotemeta mode, everything is literal except \E - var token = tokens.get(parser.tokenIndex); - if (token.text.startsWith("E")) { - // End quotemeta mode - TokenUtils.consumeChar(parser); - flushCurrentSegment(); - if (!caseModifiers.isEmpty() && caseModifiers.peek().type.equals("Q")) { - applyCaseModifier(caseModifiers.pop()); - } - inQuotemeta = false; - } else if (token.text.startsWith("Q")) { - // In quotemeta mode, \Q is idempotent and should be ignored. - TokenUtils.consumeChar(parser); - } else { - // Everything else is literal, including the backslash - currentSegment.append("\\"); - } - return; - } - if (parseEscapes) { parseDoubleQuotedEscapes(); } else { @@ -423,7 +398,6 @@ private void parseDoubleQuotedEscapesRegex() { // Quotemeta modifier case "Q" -> { flushCurrentSegment(); - inQuotemeta = true; caseModifiers.push(new CaseModifier("Q", false)); } @@ -525,7 +499,6 @@ private void parseDoubleQuotedEscapes() { // Quotemeta modifier case "Q" -> { flushCurrentSegment(); - inQuotemeta = true; caseModifiers.push(new CaseModifier("Q", false)); }