From 768e97f8049234c103d26818a929f3bd097b5f80 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Wed, 29 Apr 2026 10:52:22 +0200 Subject: [PATCH] fix(parser): do not consume keywords across whitespace in qualified names In real perl, qualified identifiers like %Foo:: cannot contain whitespace around the :: separator. So "%Foo:: and 2" tokenizes as the stash hash %Foo:: followed by the low-precedence operator `and`, while "%Foo::and" (no space) is the hash named "and" in package Foo. PerlOnJava's IdentifierParser was skipping whitespace immediately after :: (and after the legacy ' separator), which caused it to greedily pull the next keyword into the qualified name. As a result, "%Foo:: and 2" was parsed as "%Foo::and 2" and rejected with a syntax error. This broke the bundled Dumpvalue.pm at line 110 and %overload:: and defined &{'overload::StrVal'}; which in turn broke any code path that lazily required Dumpvalue. The user-visible symptom was that CPAN.pm's error reporter (require'd from CPAN/Shell.pm) failed to compile, so e.g. `jcpan -t JSON::Literal` died with a confusing parse error instead of the actual CPAN error. Fix: stop calling Whitespace.skipWhitespace after consuming :: or ' in parseComplexIdentifierInner. The next token must be flush against the separator to be treated as a continuation of the qualified name. Adds a regression test (src/test/resources/unit/stash_var_keyword_op.t) covering the keyword operators (and / or / xor / not / cmp), the high-precedence forms (&&, ||) which were already correct, and the exact Dumpvalue.pm pattern that triggered the original report. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 4 +- .../frontend/parser/IdentifierParser.java | 20 +++-- .../resources/unit/stash_var_keyword_op.t | 77 +++++++++++++++++++ 3 files changed, 93 insertions(+), 8 deletions(-) create mode 100644 src/test/resources/unit/stash_var_keyword_op.t diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 0644eb8b5..d5ec19601 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "5f0ebe111"; + public static final String gitCommitId = "3fb27ed18"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 29 2026 10:39:59"; + public static final String buildTimestamp = "Apr 29 2026 10:51:13"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/frontend/parser/IdentifierParser.java b/src/main/java/org/perlonjava/frontend/parser/IdentifierParser.java index ded9bd4d3..f36d8aeed 100644 --- a/src/main/java/org/perlonjava/frontend/parser/IdentifierParser.java +++ b/src/main/java/org/perlonjava/frontend/parser/IdentifierParser.java @@ -383,10 +383,9 @@ public static String parseComplexIdentifierInner(Parser parser, boolean insideBr variableName.append("::"); parser.tokenIndex++; - // Skip whitespace after ' - parser.tokenIndex = Whitespace.skipWhitespace(parser, parser.tokenIndex, parser.tokens); - - // Update token references + // Update token references. Do NOT skip whitespace here: + // in real perl, qualified names cannot contain whitespace + // around the separator. "$Foo' bar" is not "$Foo::bar". token = parser.tokens.get(parser.tokenIndex); nextToken = parser.tokens.get(parser.tokenIndex + 1); @@ -404,8 +403,16 @@ public static String parseComplexIdentifierInner(Parser parser, boolean insideBr variableName.append(token.text); parser.tokenIndex++; - // Skip whitespace after :: - parser.tokenIndex = Whitespace.skipWhitespace(parser, parser.tokenIndex, parser.tokens); + // Do NOT skip whitespace after ::. In real perl, "$Foo:: bar" + // is parsed as the stash glob "$Foo::" followed by the bareword + // "bar"; whitespace breaks the qualified name. Specifically, + // "%Foo:: and 2" must tokenize as the stash hash %Foo:: followed + // by the low-precedence operator `and`, not as %Foo::and. + // Previously we skipped whitespace here and accidentally pulled + // the next keyword (and / or / not / xor / cmp / eq / ...) into + // the identifier, which broke e.g. the bundled Dumpvalue.pm + // (`and %overload:: and defined ...`) and any code path that + // required loading it (notably CPAN.pm's error reporter). // Check what follows :: token = parser.tokens.get(parser.tokenIndex); @@ -413,6 +420,7 @@ public static String parseComplexIdentifierInner(Parser parser, boolean insideBr // After ::, only identifiers or another :: are allowed (or ' as package separator) // Note: Keywords CAN be valid identifier parts after :: (e.g., $Foo::and, &UNIVERSAL::isa) + // — but only when they are flush against ::, with no intervening whitespace. if (token.type != LexerTokenType.IDENTIFIER && !token.text.equals("::") && !token.text.equals("'")) { // Nothing valid follows ::, so return what we have return variableName.toString(); diff --git a/src/test/resources/unit/stash_var_keyword_op.t b/src/test/resources/unit/stash_var_keyword_op.t new file mode 100644 index 000000000..e9e3c84b0 --- /dev/null +++ b/src/test/resources/unit/stash_var_keyword_op.t @@ -0,0 +1,77 @@ +use strict; +use warnings; +use Test::More tests => 12; + +# Regression: parsing of a package-stash variable like %Foo:: must NOT +# consume a following whitespace-separated keyword (and / or / not / xor / +# cmp / eq / ne / lt / gt / le / ge / x) as part of the identifier. +# +# Real perl tokenizes "%Foo:: and 2" as the stash hash %Foo:: followed by +# the low-precedence operator "and"; only "%Foo::and" (no space) is the +# hash named "and" in package Foo. PerlOnJava previously skipped whitespace +# after :: and accidentally produced %Foo::and, causing a syntax error and +# (because the bundled Dumpvalue.pm uses `and %overload:: and ...`) breaking +# any code path through CPAN.pm's error reporter. +# See https://github.com/fglock/PerlOnJava/issues for the cpan -t JSON::Literal +# repro that surfaced this. + +package Foo; +our $touched = 0; + +package main; + +# Make sure the stash exists before we read it. +$Foo::dummy = 1; + +# 1: bare stash hash followed by `and` operator +my $r1 = (%Foo:: and 1); +is($r1, 1, '%Foo:: and 1 (whitespace before "and" must keep "and" as operator)'); + +# 2: bare stash hash followed by `or` operator +my $r2 = (%Foo:: or 'fallback'); +ok($r2, '%Foo:: or ... (whitespace before "or" must keep "or" as operator)'); + +# 3: bare stash hash followed by `not` is just illegal-as-statement in perl, +# but `! %Foo:: and 1` and `not %Foo:: and 1` parse fine. +my $r3 = (not %Foo::) ? 0 : 1; +is($r3, 1, 'not %Foo:: (whitespace after :: before nothing parses)'); + +# 4..7: same with the other low-precedence keyword operators +my $r4 = (%Foo:: xor 0); +ok($r4, '%Foo:: xor 0'); + +my $r5 = (1 and %Foo:: and 2); +is($r5, 2, '1 and %Foo:: and 2'); + +my $r6 = (1 and %Foo::); +ok($r6, '1 and %Foo:: (trailing stash with no following operator)'); + +my $r7 = (%Foo:: && 1); +is($r7, 1, '%Foo:: && 1 (high-precedence form, regression check)'); + +# 8: comparison operators must also stay as operators +my @keys = sort keys %Foo::; +ok(@keys, 'keys %Foo:: returns something'); +ok((scalar(@keys) cmp 0) >= 0, 'scalar(keys %Foo::) cmp 0'); + +# 9: %Foo::and (no space) should still be the hash named "and" in Foo +%Foo::and = (a => 1); +is(scalar(keys %Foo::and), 1, '%Foo::and (no space) is still hash named "and" in Foo'); + +# 10: $Foo::or (no space) is the scalar named "or" in Foo +$Foo::or = 'value'; +is($Foo::or, 'value', '$Foo::or (no space) is still scalar named "or" in Foo'); + +# 11: control case — Dumpvalue's exact pattern from line 110 must compile +eval q{ + my $self = { bareStringify => 1 }; + my $val = "x"; + no strict 'refs'; + $val = &{'overload::StrVal'}($val) + if $self->{bareStringify} and ref \$val + and %overload:: and defined &{'overload::StrVal'}; + 1; +} or do { + fail("Dumpvalue.pm pattern compiles: $@"); +}; +pass("Dumpvalue.pm pattern (and %overload:: and defined ...) compiles cleanly");