From ad22ba56a2756ab9b51b050ea00d3f27a1e1258f Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Fri, 27 Mar 2026 17:23:09 +0100 Subject: [PATCH 1/5] fix: detect __DATA__/__END__ and no Module; terminators in source filters When applying source filters, detect when the filtered output ends with a terminator pattern (__DATA__, __END__, or "no Module;") and stop filtering at that point. The remaining source after the terminator is appended unchanged. This is important for Filter::Simple which uses these terminators to mark where the filter should stop processing. Without this fix: - Content after __DATA__/__END__ would be lost or filtered incorrectly - Code after "no Module;" would be filtered when it should pass through Fixes: - Filter::Simple t/data.t - DATA section now preserved correctly - Filter::Simple t/filter.t - "no Module;" terminator now handled Note: t/filter_only.t still fails due to a separate POD parsing bug (content between =end and =cut not treated as POD). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 2 +- .../runtime/perlmodule/FilterUtilCall.java | 26 ++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 5e9af45fa..6ecff8344 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "8bb1eff41"; + public static final String gitCommitId = "6de2022af"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/FilterUtilCall.java b/src/main/java/org/perlonjava/runtime/perlmodule/FilterUtilCall.java index c7ffde8df..4db2f5fb5 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/FilterUtilCall.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/FilterUtilCall.java @@ -293,6 +293,29 @@ public static String applyFilters(String sourceCode) { if (debug) { System.err.println("[FILTER] Got chunk: " + chunk); } + + // Check if the chunk ends with __DATA__, __END__, or "no Module;" terminator + // If so, stop filtering and append remaining source unchanged + // This is important for Filter::Simple which stops at these terminators + // Pattern matches: + // - __DATA__ or __END__ at end of line + // - "no ModuleName;" at start of line (with optional comment) + if (chunk.matches("(?sm).*^__(?:DATA|END)__\\s*$") || + chunk.matches("(?sm).*^\\s*no\\s+[\\w:]+\\s*;.*$")) { + // Append remaining source unchanged + if (debug) { + System.err.println("[FILTER] Hit terminator, currentLine=" + context.currentLine + + ", totalLines=" + context.sourceLines.length); + } + while (context.currentLine < context.sourceLines.length) { + filteredCode.append(context.sourceLines[context.currentLine]); + context.currentLine++; + } + continueFiltering = false; + if (debug) { + System.err.println("[FILTER] Hit __DATA__/__END__ terminator, appending remaining source unchanged"); + } + } } // Check status - convert to scalar if it's a list @@ -318,7 +341,8 @@ public static String applyFilters(String sourceCode) { } if (debug) { - System.err.println("[FILTER] Final filtered code: " + filteredCode.toString().substring(0, Math.min(200, filteredCode.length()))); + System.err.println("[FILTER] Final filtered code length: " + filteredCode.length()); + System.err.println("[FILTER] Final filtered code: " + filteredCode.toString()); } return filteredCode.toString(); From 9153c9ba8c94f1d593e2281a30a2d2d01cf9d3bc Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Fri, 27 Mar 2026 17:28:13 +0100 Subject: [PATCH 2/5] fix: correct POD parsing - only =cut terminates POD, not =end Per perlpod documentation, =end formatname only ends a =begin block, but does not exit the POD section. Content between =end and =cut should still be treated as POD documentation, not code. This fixes Filter::Simple filter_only.t which uses =begin/=end blocks. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 2 +- .../frontend/parser/Whitespace.java | 6 +- src/test/resources/unit/pod.t | 77 +++++++++++++++++++ 3 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 src/test/resources/unit/pod.t diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 6ecff8344..69d64f6e8 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "6de2022af"; + public static final String gitCommitId = "0dee8b213"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/java/org/perlonjava/frontend/parser/Whitespace.java b/src/main/java/org/perlonjava/frontend/parser/Whitespace.java index 12b8bbb9d..e8959f77f 100644 --- a/src/main/java/org/perlonjava/frontend/parser/Whitespace.java +++ b/src/main/java/org/perlonjava/frontend/parser/Whitespace.java @@ -51,12 +51,12 @@ public static int skipWhitespace(Parser parser, int tokenIndex, List if (tokenIndex + 1 < tokens.size() && tokens.get(tokenIndex + 1).type == LexerTokenType.IDENTIFIER) { boolean inPod = true; - // Skip through pod section until 'cut' or 'end' is found + // Skip through pod section until '=cut' is found + // Note: '=end formatname' only ends a =begin block, not the entire POD section while (tokenIndex < tokens.size() && inPod) { String podEqual = tokens.get(tokenIndex).text; String podToken = tokens.get(tokenIndex + 1).text; - if (podEqual.equals("=") - && (podToken.equals("cut") || podToken.equals("end"))) { + if (podEqual.equals("=") && podToken.equals("cut")) { inPod = false; // End of pod } diff --git a/src/test/resources/unit/pod.t b/src/test/resources/unit/pod.t new file mode 100644 index 000000000..268ca03a7 --- /dev/null +++ b/src/test/resources/unit/pod.t @@ -0,0 +1,77 @@ +# Test POD (Plain Old Documentation) parsing + +use strict; +use warnings; + +print "1..6\n"; + +my $test = 1; + +=pod + +Basic POD block + +=cut + +print "ok $test - basic POD block\n"; +$test++; + +=head1 NAME + +Test documentation + +=cut + +print "ok $test - =head1 POD block\n"; +$test++; + +=begin scrumbly + +This is inside a scrumbly format block. + +=end scrumbly + +This text is between =end scrumbly and =cut. +Per perlpod, this should be treated as POD, not code. +foo bar baz + +=cut + +print "ok $test - =begin/=end block with trailing content\n"; +$test++; + +=begin comment + +A comment block + +=end comment + +More POD content after =end but before =cut + +=cut + +print "ok $test - =begin comment block\n"; +$test++; + +=pod + +=end + +standalone =end stays in POD + +=cut + +print "ok $test - standalone =end inside POD\n"; +$test++; + +=begin cut + +this format is named 'cut' + +=end cut + +still in POD after =end cut + +=cut + +print "ok $test - =begin cut / =end cut format\n"; From 021c320dce05388ba7e8eb3ac79d43db3095641f Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Fri, 27 Mar 2026 17:52:07 +0100 Subject: [PATCH 3/5] fix: correct FILTER_ONLY @transforms scope for PerlOnJava execution model In native Perl, source filters process remaining source incrementally during parsing, so each filter completes before the next filter module is loaded. In PerlOnJava, we tokenize upfront then apply filters, so multiple filter modules may be loaded before filters run. This caused @transforms (a package variable in Filter::Simple) to accumulate transforms from different modules. When the second filter ran, its $multitransform closure would include the first module transforms, causing incorrect behavior. Fix: Make @transforms lexical in FILTER_ONLY so each call has its own transform list. Mark Filter::Simple as protected in config.yaml. Also adds Filter::Simple to bundled modules (with tests). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/import-perl5/config.yaml | 11 +++++++++++ src/main/java/org/perlonjava/core/Configuration.java | 2 +- src/main/perl/lib/Filter/Simple.pm | 7 +++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/dev/import-perl5/config.yaml b/dev/import-perl5/config.yaml index 1bae669ae..3eb963bf3 100644 --- a/dev/import-perl5/config.yaml +++ b/dev/import-perl5/config.yaml @@ -640,6 +640,17 @@ imports: - source: perl5/cpan/Term-ANSIColor/lib/Term/ANSIColor.pm target: src/main/perl/lib/Term/ANSIColor.pm + # Filter::Simple - Simplified source filtering (used by Log::Log4perl :resurrect, etc.) + # Protected: has PerlOnJava-specific fix for @transforms scope in FILTER_ONLY + - source: perl5/dist/Filter-Simple/lib/Filter/Simple.pm + target: src/main/perl/lib/Filter/Simple.pm + protected: true + + # Tests for Filter::Simple + - source: perl5/dist/Filter-Simple/t + target: perl5_t/Filter-Simple + type: directory + # Class::Struct - Declare struct-like datatypes as Perl classes # Required by File::stat.pm - source: perl5/lib/Class/Struct.pm diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 69d64f6e8..06592f092 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "0dee8b213"; + public static final String gitCommitId = "9153c9ba8"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). diff --git a/src/main/perl/lib/Filter/Simple.pm b/src/main/perl/lib/Filter/Simple.pm index 924c2aecb..fafc5c07b 100644 --- a/src/main/perl/lib/Filter/Simple.pm +++ b/src/main/perl/lib/Filter/Simple.pm @@ -144,6 +144,13 @@ sub FILTER (&;$) { sub FILTER_ONLY { my $caller = caller; + # PerlOnJava fix: @transforms must be lexical, not package-scoped. + # In native Perl, filters process source incrementally during parsing, + # so each filter completes before the next filter module is loaded. + # In PerlOnJava, we tokenize upfront then apply filters, so multiple + # filter modules may be loaded before any filter runs. Using a package + # variable causes transforms from different modules to accumulate. + my @transforms; while (@_ > 1) { my ($what, $how) = splice(@_, 0, 2); fail "Unknown selector: $what" From a6326543dabfcf21e04164f6b3f9cb66f98e2535 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Fri, 27 Mar 2026 17:54:05 +0100 Subject: [PATCH 4/5] docs: document FILTER_ONLY @transforms issue and future Java-side fix Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/design/source_filters.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/dev/design/source_filters.md b/dev/design/source_filters.md index bc94f8129..7d69b8b56 100644 --- a/dev/design/source_filters.md +++ b/dev/design/source_filters.md @@ -475,6 +475,37 @@ print "X"; # Should print Y ### Remaining Work - [ ] **Phase 4**: Add method filter support (currently returns original source for method filters) - [ ] Add debug environment variable documentation (JPERL_FILTER_DEBUG=1) +- [ ] **Phase 5**: Fix FILTER_ONLY @transforms issue in Java instead of patching Filter::Simple (see below) + +### Known Issues + +#### FILTER_ONLY @transforms Scope Issue (2026-03-27) + +**Problem**: When multiple filter modules using `FILTER_ONLY` are loaded in sequence, the second filter's `$multitransform` closure incorrectly includes transforms from the first module. + +**Root Cause**: In Filter::Simple, `@transforms` is a package variable. In native Perl, this works because filters process source incrementally - each filter completes before the next filter module is loaded. In PerlOnJava, we tokenize upfront then apply filters, so multiple filter modules may be loaded before any filter runs, causing `@transforms` to accumulate transforms from different modules. + +**Current Fix**: Patched `Filter::Simple.pm` to make `@transforms` lexical in `FILTER_ONLY`: +```perl +sub FILTER_ONLY { + my $caller = caller; + my @transforms; # Made lexical instead of package-scoped + ... +} +``` + +**TODO - Proper Java-side Fix**: The ideal solution would be to fix this in PerlOnJava's module loading code: +1. Before loading a module that may use `FILTER_ONLY`, save `@Filter::Simple::transforms` +2. Clear `@Filter::Simple::transforms` +3. After module loading completes, restore the saved value + +This would allow using unmodified upstream Filter::Simple. The challenge is detecting which modules will use `FILTER_ONLY` before loading them. Possible approaches: +- Clear `@Filter::Simple::transforms` before every `require` (may have side effects) +- Track filter module loading depth and isolate transforms per level +- Hook into Filter::Simple's FILTER_ONLY to auto-reset before each call + +**Files affected by current fix**: +- `src/main/perl/lib/Filter/Simple.pm` (marked as `protected: true` in config.yaml) ### Files Modified - `src/main/java/org/perlonjava/runtime/perlmodule/FilterUtilCall.java` From 23ed6059499646abf9797943e9dbb489d86e3d21 Mon Sep 17 00:00:00 2001 From: "Flavio S. Glock" Date: Fri, 27 Mar 2026 17:56:31 +0100 Subject: [PATCH 5/5] docs: add Filter::Simple to changelog and feature matrix Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- docs/about/changelog.md | 3 ++- docs/reference/feature-matrix.md | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/about/changelog.md b/docs/about/changelog.md index c4ec38a09..ed30a84cd 100644 --- a/docs/about/changelog.md +++ b/docs/about/changelog.md @@ -31,6 +31,8 @@ Release history of PerlOnJava. See [Roadmap](roadmap.md) for future plans. - The interpreter mode excels at dynamic eval STRING operations (46x faster than compilation for unique strings, matching Perl 5 performance). For general code, it runs only 15% slower than Perl 5. It is also useful for implementing debugging, handling "Method too large" errors, and enabling Android and GraalVM compatibility. - Planned release date: 2026-04-10. +- Add modules: `Filter::Simple` with `FILTER` and `FILTER_ONLY` support. + - Work in Progress - PerlIO - `get_layers` @@ -45,7 +47,6 @@ Release history of PerlOnJava. See [Roadmap](roadmap.md) for future plans. - `ungetc` - Auto-bless filehandle into IO::Handle subclass - IO::Seekable - - Filter::Simple - Math::BigInt - Text::ParseWords - Text::Tabs diff --git a/docs/reference/feature-matrix.md b/docs/reference/feature-matrix.md index d0dc1a213..2a0090893 100644 --- a/docs/reference/feature-matrix.md +++ b/docs/reference/feature-matrix.md @@ -692,6 +692,7 @@ The `:encoding()` layer supports all encodings provided by Java's `Charset.forNa - ✅ **ExtUtils::MakeMaker** module: PerlOnJava version installs pure Perl modules directly. - ✅ **Fcntl** module - ✅ **FileHandle** module +- ✅ **Filter::Simple** module: `FILTER` and `FILTER_ONLY` for source code filtering. - ✅ **File::Basename** use the same version as Perl. - ✅ **File::Find** use the same version as Perl. - ✅ **File::Spec::Functions** module.