Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
443 changes: 443 additions & 0 deletions dev/modules/devel_declare.md

Large diffs are not rendered by default.

460 changes: 460 additions & 0 deletions dev/modules/html_element.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,14 @@ public void visit(For3Node node) {
if (node.body != null) {
node.body.accept(this);
}
// continueBlock holds variables referenced by `while {} continue { ... }`.
// Forgetting this caused the selective-capture optimisation in
// SubroutineParser to drop those lexicals from the closure, which
// tripped HTML/Element.pm's look_down at runtime with a
// "Global symbol $nillio requires explicit package name" error.
if (node.continueBlock != null) {
node.continueBlock.accept(this);
}
}

@Override
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/perlonjava/core/Configuration.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public final class Configuration {
* Automatically populated by Gradle/Maven during build.
* DO NOT EDIT MANUALLY - this value is replaced at build time.
*/
public static final String gitCommitId = "016a235c7";
public static final String gitCommitId = "2bfd27045";

/**
* Git commit date of the build (ISO format: YYYY-MM-DD).
Expand All @@ -48,7 +48,7 @@ public final class Configuration {
* Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at"
* DO NOT EDIT MANUALLY - this value is replaced at build time.
*/
public static final String buildTimestamp = "Apr 25 2026 10:11:42";
public static final String buildTimestamp = "Apr 25 2026 19:38:25";

// Prevent instantiation
private Configuration() {
Expand Down
66 changes: 66 additions & 0 deletions src/main/java/org/perlonjava/frontend/parser/PrototypeArgs.java
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,27 @@ private static int handleTypeGlobArgument(Parser parser, ListNode args, boolean

Node typeglobRef = FileHandle.parseBarewordHandle(parser, idNode.name);
args.elements.add(typeglobRef == null ? expr : typeglobRef);
} else if (expr instanceof StringNode strNode
&& isBuiltinOperator(parser)
&& isValidFilehandleName(strNode.value)) {
// Constant-string filehandle name in a Perl built-in that takes
// a `*` (glob/filehandle) argument: open("FH", $path),
// close "FH", binmode "FH", fileno "FH", eof "FH", and friends.
// Real Perl looks the string up as a typeglob name (the legacy
// "indirect filehandle" idiom). PerlOnJava used to pass the
// literal through as a plain scalar, which produced
// "Modification of a read-only value attempted" in open's case
// and silent no-ops elsewhere.
//
// This is gated to **built-in** operators because the `*`
// prototype is generic: for user-defined `sub foo (*) { }`,
// real Perl passes a literal string through as a SCALAR
// (only barewords and globs get typeglob conversion). See
// comp/proto.t's `star "FOO"` / `star2 "FOO", "BAR"` cases.
String name = strNode.value;
GlobalVariable.getGlobalIO(FileHandle.normalizeBarewordHandle(parser, name));
Node typeglobRef = FileHandle.parseBarewordHandle(parser, name);
args.elements.add(typeglobRef == null ? expr : typeglobRef);
} else {
// Bare scalars
Node scalarArg = ParserNodeUtils.toScalarContext(expr);
Expand All @@ -578,6 +599,51 @@ private static int handleTypeGlobArgument(Parser parser, ListNode args, boolean
return 1;
}

/**
* True if the operator currently being parsed is a Perl built-in
* (registered in {@link ParserTables#CORE_PROTOTYPES}). Used to
* decide whether a literal-string argument in a `*` (glob) slot
* should be looked up as a typeglob (built-in semantics) or passed
* through as a plain scalar (user-defined sub semantics).
*/
private static boolean isBuiltinOperator(Parser parser) {
String name = parser.ctx.symbolTable.getCurrentSubroutine();
return name != null && ParserTables.CORE_PROTOTYPES.containsKey(name);
}

/**
* True if the given string is a syntactically valid Perl filehandle/glob name:
* one or more identifier components (`[A-Za-z_][A-Za-z0-9_]*`) separated by
* `::`. Used to recognise e.g. `open("FH", ...)` or `open("Pkg::FH", ...)`
* and route the literal string to the same path as a bareword.
*/
private static boolean isValidFilehandleName(String s) {
if (s == null || s.isEmpty()) return false;
int n = s.length();
int i = 0;
while (i < n) {
char c = s.charAt(i);
if (!(Character.isLetter(c) || c == '_')) return false;
i++;
while (i < n) {
char d = s.charAt(i);
if (Character.isLetterOrDigit(d) || d == '_') {
i++;
} else {
break;
}
}
if (i >= n) return true;
// Expect "::" between identifier components
if (i + 1 < n && s.charAt(i) == ':' && s.charAt(i + 1) == ':') {
i += 2;
} else {
return false;
}
}
return false;
}

private static void handleListOrHashArgument(Parser parser, ListNode args, boolean needComma) {
if (needComma && !isComma(TokenUtils.peek(parser))) {
return;
Expand Down
111 changes: 108 additions & 3 deletions src/main/java/org/perlonjava/runtime/perlmodule/HTMLParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,46 @@ private static RuntimeArray buildEventDataFromArgspec(String argspec, String eve
RuntimeArray.push(result, new RuntimeScalar(""));
}
} else {
RuntimeArray.push(result, new RuntimeScalar(""));
// Fall back to tokens[0] for non-PI events
RuntimeArray tokensArr = buildTokensArray(eventName, eventArgs);
if (tokensArr.size() > 0) {
RuntimeArray.push(result, tokensArr.get(0));
} else {
RuntimeArray.push(result, new RuntimeScalar(""));
}
}
break;

case "tokens":
// Array reference of all tokens for this event.
// start => [tagname, attr1, val1, attr2, val2, ...]
// end => [tagname]
// text/dtext => [text]
// comment => [comment_body]
// declaration => [declaration_body]
// process => [pi_body]
RuntimeArray.push(result,
buildTokensArray(eventName, eventArgs).createReference());
break;

case "tokenpos":
// Array reference of [start, end] byte-offset pairs
// matching `tokens`. We don't track byte offsets yet, so
// return a same-length arrayref of [0, 0] pairs. This is
// good enough for callers that just iterate; downstream
// modules treating tokenpos as authoritative will need
// proper offset tracking (currently a TODO at the
// `offset`/`offset_end` cases).
{
RuntimeArray pos = new RuntimeArray();
RuntimeArray tokensArr = buildTokensArray(eventName, eventArgs);
for (int i = 0; i < tokensArr.size(); i++) {
RuntimeArray pair = new RuntimeArray();
RuntimeArray.push(pair, new RuntimeScalar(0));
RuntimeArray.push(pair, new RuntimeScalar(0));
RuntimeArray.push(pos, pair.createReference());
}
RuntimeArray.push(result, pos.createReference());
}
break;

Expand All @@ -693,15 +732,81 @@ private static RuntimeArray buildEventDataFromArgspec(String argspec, String eve
break;

default:
// Unknown argspec token - pass empty string
RuntimeArray.push(result, new RuntimeScalar(""));
// tokenN where N is a non-negative integer => tokens[N]
if (token.length() > 5 && token.startsWith("token")
&& token.substring(5).chars().allMatch(Character::isDigit)) {
int idx;
try {
idx = Integer.parseInt(token.substring(5));
} catch (NumberFormatException e) {
idx = -1;
}
RuntimeArray tokensArr = buildTokensArray(eventName, eventArgs);
if (idx >= 0 && idx < tokensArr.size()) {
RuntimeArray.push(result, tokensArr.get(idx));
} else {
RuntimeArray.push(result, new RuntimeScalar(""));
}
} else {
// Unknown argspec token - pass empty string
RuntimeArray.push(result, new RuntimeScalar(""));
}
break;
}
}

return result;
}

/**
* Build the `tokens` array for a given event, per HTML::Parser semantics.
* See `case "tokens":` above for the per-event shape.
*
* @param eventName the event name (start, end, text, comment, ...)
* @param eventArgs the internal event-arg tuple as passed to fireEvent
* @return a flat RuntimeArray of token scalars (NOT yet a reference)
*/
private static RuntimeArray buildTokensArray(String eventName, RuntimeScalar[] eventArgs) {
RuntimeArray tokens = new RuntimeArray();
if (eventArgs == null || eventArgs.length == 0) {
return tokens;
}
switch (eventName) {
case "start":
// eventArgs = [tagname, attr_hashref, attrseq_arrayref, original_text]
RuntimeArray.push(tokens, eventArgs[0]);
if (eventArgs.length > 2) {
RuntimeScalar attrHashRef = eventArgs[1];
RuntimeScalar attrSeqRef = eventArgs[2];
RuntimeHash attrHash = attrHashRef.hashDeref();
RuntimeArray attrSeq = attrSeqRef.arrayDeref();
int n = attrSeq.size();
for (int i = 0; i < n; i++) {
RuntimeScalar key = attrSeq.get(i);
String keyStr = key.toString();
RuntimeArray.push(tokens, key);
RuntimeArray.push(tokens, attrHash.get(keyStr));
}
}
break;
case "end":
case "text":
case "dtext":
case "comment":
case "declaration":
case "process":
case "default":
RuntimeArray.push(tokens, eventArgs[0]);
break;
default:
// Unknown event: best-effort, push the first arg.
RuntimeArray.push(tokens, eventArgs[0]);
break;
}
return tokens;
}


/**
* Basic HTML parser - fires text, start, end events.
* This is a simplified version; Phase 2 will port the full hparser.c logic.
Expand Down
44 changes: 44 additions & 0 deletions src/test/resources/unit/closure.t
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,48 @@ use feature 'say';
is($inner->(), 130, "nested closure sees both outer updates");
}

# Closure capture inside `while {} continue { ... }` block of a sub
# Regression test: VariableCollectorVisitor.visit(For3Node) used to skip
# continueBlock, so the selective-capture optimisation in SubroutineParser
# would drop variables only referenced from the continue block. The lazy
# compiler then failed at first call with
# Global symbol "$nillio" requires explicit package name
# This was discovered via HTML/Element.pm look_down() in HTML-Tree 5.07.
{
my $captured = [42];
my $foo = sub {
my @pile = (1);
my @out;
my $this;
while (defined($this = shift @pile)) {
push @out, $this;
}
continue {
push @out, @{$captured};
}
return @out;
};
is_deeply([$foo->()], [1, 42], "continue block captures outer my variable");
}

# Same shape with named sub (forces lazy-compile path)
{
my $sentinel = [99];
my @drained;
sub _drain_it {
my @pile = (1, 2);
my @out;
my $this;
while (defined($this = shift @pile)) {
push @out, $this;
}
continue {
push @out, @{$sentinel};
}
return @out;
}
is_deeply([_drain_it()], [1, 99, 2, 99],
"named sub: continue block captures outer my variable");
}

done_testing();
79 changes: 79 additions & 0 deletions src/test/resources/unit/open_string_filehandle.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
use strict;
use warnings;
use Test::More;
use File::Temp qw(tempfile);

# Regression test: 2-arg / 3-arg open with a constant-string first
# argument used to die with "Modification of a read-only value
# attempted" because PerlOnJava treated the literal as a plain
# (read-only) scalar instead of looking it up as a typeglob name.
#
# Real Perl autovivifies *main::FH from the string. Discovered in
# HTML-Tree 5.07 t/oldparse.t which uses
# open( "INFILE", "$TestInput" ) or die "$!";

my ($fh_w, $path) = tempfile(UNLINK => 1);
print {$fh_w} "line one\nline two\n";
close $fh_w;

# 2-arg form: open("FH", $path)
{
open("LITFH1", $path) or die "open LITFH1: $!";
my $line = <LITFH1>;
close LITFH1;
is($line, "line one\n", "2-arg open with literal-string filehandle works");
}

# 3-arg form: open("FH", "<", $path)
{
open("LITFH2", "<", $path) or die "open LITFH2: $!";
my @lines = <LITFH2>;
close LITFH2;
is(scalar(@lines), 2, "3-arg open with literal-string filehandle reads file");
is($lines[1], "line two\n", " ... and produces the right second line");
}

# Package-qualified literal name
{
open("Foo::Bar::FH", "<", $path) or die "open Foo::Bar::FH: $!";
my $line = <Foo::Bar::FH>;
close Foo::Bar::FH;
is($line, "line one\n", "package-qualified literal-string filehandle works");
}

# Lvalue scalar form must still work (regression check that the new
# StringNode case doesn't shadow the my $fh path)
{
open(my $fh, "<", $path) or die "open my \$fh: $!";
my $line = <$fh>;
close $fh;
is($line, "line one\n", "lvalue scalar form still works");
}

# Other built-ins with `*` prototype must also accept a literal-string
# filehandle name (close, fileno, binmode, eof, ...). Real Perl looks
# the string up as a typeglob.
{
open(MYFH, "<", $path) or die $!;
is(fileno("MYFH"), fileno(MYFH),
'fileno with string FH name matches bareword');
ok(close("MYFH"), 'close with string FH name returns truthy');
# And the bareword should now actually be closed.
ok(!fileno(MYFH), 'bareword FH is closed after close("MYFH")');
}

# User-defined sub with `*` prototype must NOT typeglob-convert a string
# literal — only Perl built-ins (those registered in CORE_PROTOTYPES) do
# the indirect-handle lookup. Regression for proto.t failures
# (star "FOO" / star2 "FOO", "BAR") seen when this fix was first landed.
{
my @got;
sub _proto_star (*&) { push @got, $_[0]; $_[1]->() }
_proto_star "ABC", sub { 1 };
is($got[0], "ABC",
'literal string passed to user sub with `*` prototype stays SCALAR');
is(ref(\$got[0]), 'SCALAR',
' ... and is not silently promoted to a glob');
}

done_testing();
Loading