From a5b502a55bf264edf7cbd2a8e5a23025a4e60eb5 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Tue, 21 Apr 2026 13:29:45 +0200 Subject: [PATCH 1/3] fix(YAML::PP): survive invalid !!int/!!float, handle !!set/!!binary, align cyclic-ref message Investigated failures in `./jcpan -t YAML::PP` (YAML-PP v0.39.0). Previously the test suite hit SIGTERM partway through because several load_string() calls threw uncaught Java exceptions and Dubious-failed entire test programs. Changes to YAMLPP.java: * Catch NumberFormatException and YamlEngineException around load.loadAllFromString(). SnakeYAML Engine wraps NFEs from its Int/Float resolvers in a YamlEngineException whose message is "java.lang.NumberFormatException: For input string: ...". Translate these to a clean "YAML::PP: invalid numeric value: ..." die so tests like `!!int .inf`, `!!int 1_000`, `!!float x` continue instead of aborting the whole test file. * Handle Set results from !!set tags: convert to a Perl hashref with undef values (previously fell through to default and stringified to "[a, b, c]"). * Handle byte[] results from !!binary tags: base64-encode instead of returning the JVM identity hash of the byte array. * Change cyclic-reference message to "Found cyclic reference in YAML structure" so it matches the `(?i:found cyclic ref)` regex used by t/32.cyclic-refs.t. * Parse `schema => ['Name', 'key=value', ...]` options. Only validate known key=value pairs (currently `empty=null|str`); non-key=value entries (e.g. secondary schema names like `'Perl'`) are accepted for backward compatibility with the existing unit test. Net effect on the CPAN t/ suite: * Whole suite now runs to completion (previously SIGTERM'd). * 2 more test files pass (t/36.debug.t, t/37.schema-catchall.t). * t/32.cyclic-refs.t: 4 failures -> 1. * t/31.schema.t no longer crashes on `!!int .inf`; now reports real subtest failures (bool/YAML1.1 not implemented) instead of dying. Remaining known limitations (not addressed here): YAML 1.1 boolean tags (!!bool yes/on/TRUE/...), !!omap ordering, load_file() of open filehandles, and many of snakeyaml-engine's parser limitations vs YAML::PP's Perl parser (tabs, %TAG directives, flow-mapping colon edge cases, etc.). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../org/perlonjava/core/Configuration.java | 4 +- .../perlonjava/runtime/perlmodule/YAMLPP.java | 78 ++++++++++++++++--- 2 files changed, 69 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 7a6a9678c..338fb6425 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "df2c16785"; + public static final String gitCommitId = "aaa6ac0be"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 21 2026 13:14:03"; + public static final String buildTimestamp = "Apr 21 2026 13:27:01"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java b/src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java index af37b8b05..426df10c8 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.nio.file.Files; import java.util.*; +import java.util.Base64; import static org.perlonjava.runtime.runtimetypes.RuntimeScalarType.*; @@ -67,18 +68,37 @@ public static RuntimeList new_(RuntimeArray args, int ctx) { options.setFromList(args.getList()); String schemaName = "Core"; + List schemaOpts = new ArrayList<>(); if (options.containsKey("schema")) { RuntimeScalar schemaOption = options.get("schema"); if (schemaOption.type == RuntimeScalarType.ARRAYREFERENCE) { RuntimeArray schemaArray = (RuntimeArray) schemaOption.value; - if (!schemaArray.elements.isEmpty()) { - schemaName = schemaArray.elements.getFirst().toString(); + for (int i = 0; i < schemaArray.elements.size(); i++) { + String s = schemaArray.elements.get(i).toString(); + if (i == 0) schemaName = s; + else schemaOpts.add(s); } } else { schemaName = schemaOption.toString(); } } + // Validate schema-specific options of the form key=value. + // Non-key=value entries are treated as additional schema names (currently ignored). + for (String opt : schemaOpts) { + int eq = opt.indexOf('='); + if (eq < 0) continue; + String key = opt.substring(0, eq); + String val = opt.substring(eq + 1); + if ("empty".equals(key)) { + if (!val.equals("null") && !val.equals("str")) { + return WarnDie.die(new RuntimeScalar("Invalid option: " + opt), + new RuntimeScalar("\n")).getList(); + } + } + // Unknown keys are currently ignored to remain lenient. + } + Schema schema = switch (schemaName) { case "Failsafe" -> new FailsafeSchema(); case "JSON" -> new JsonSchema(); @@ -135,14 +155,39 @@ public static RuntimeList load_string(RuntimeArray args, int ctx) { String yamlString = args.get(1).toString(); Load load = (Load) instance.hashDeref().get("_load").value; - Iterable documents = load.loadAllFromString(yamlString); - RuntimeArray result = new RuntimeArray(); - for (Object doc : documents) { - result.elements.add(convertYamlToRuntimeScalar( - doc, - new IdentityHashMap(), - instance.hashDeref())); + try { + Iterable documents = load.loadAllFromString(yamlString); + for (Object doc : documents) { + result.elements.add(convertYamlToRuntimeScalar( + doc, + new IdentityHashMap(), + instance.hashDeref())); + } + } catch (NumberFormatException e) { + return WarnDie.die(new RuntimeScalar("YAML::PP: invalid numeric value: " + e.getMessage()), + new RuntimeScalar("\n")).getList(); + } catch (org.snakeyaml.engine.v2.exceptions.YamlEngineException e) { + String msg = e.getMessage(); + // SnakeYAML wraps NumberFormatException from Int/Float resolvers + if (msg != null && msg.startsWith("java.lang.NumberFormatException")) { + msg = "YAML::PP: invalid numeric value: " + + msg.replaceFirst("^java\\.lang\\.NumberFormatException:\\s*", ""); + } + return WarnDie.die(new RuntimeScalar(msg), new RuntimeScalar("\n")).getList(); + } catch (RuntimeException e) { + // Any other runtime exception: unwrap NumberFormatException if present + Throwable cause = e.getCause(); + String msg; + if (cause instanceof NumberFormatException) { + msg = "YAML::PP: invalid numeric value: " + cause.getMessage(); + } else if (e.getMessage() != null && e.getMessage().startsWith("java.lang.NumberFormatException")) { + msg = "YAML::PP: invalid numeric value: " + + e.getMessage().replaceFirst("^java\\.lang\\.NumberFormatException:\\s*", ""); + } else { + throw e; + } + return WarnDie.die(new RuntimeScalar(msg), new RuntimeScalar("\n")).getList(); } return result.getList(); } @@ -233,9 +278,9 @@ private static RuntimeScalar convertYamlToRuntimeScalar(Object yaml, IdentityHas String cyclicBehavior = instance.get("_cyclic_refs").toString(); return switch (CyclicRefsBehavior.valueOf(cyclicBehavior)) { case FATAL -> - WarnDie.die(new RuntimeScalar("Cyclic reference detected in YAML structure"), new RuntimeScalar("\n")).scalar(); + WarnDie.die(new RuntimeScalar("Found cyclic reference in YAML structure"), new RuntimeScalar("\n")).scalar(); case WARN -> { - WarnDie.warn(new RuntimeScalar("Cyclic reference detected in YAML structure"), new RuntimeScalar("\n")); + WarnDie.warn(new RuntimeScalar("Found cyclic reference in YAML structure"), new RuntimeScalar("\n")); yield new RuntimeScalar(); } case IGNORE -> new RuntimeScalar(); @@ -252,6 +297,16 @@ private static RuntimeScalar convertYamlToRuntimeScalar(Object yaml, IdentityHas hash.put(key.toString(), convertYamlToRuntimeScalar(value, seen, instance))); yield hashRef; } + case Set set -> { + // YAML !!set: represent as hash with undef values + RuntimeHash hash = new RuntimeHash(); + RuntimeScalar hashRef = hash.createReference(); + seen.put(yaml, hashRef); + for (Object item : set) { + hash.put(item == null ? "" : item.toString(), new RuntimeScalar()); + } + yield hashRef; + } case List list -> { RuntimeArray array = new RuntimeArray(); RuntimeScalar arrayRef = array.createReference(); @@ -260,6 +315,7 @@ private static RuntimeScalar convertYamlToRuntimeScalar(Object yaml, IdentityHas array.elements.add(convertYamlToRuntimeScalar(item, seen, instance))); yield arrayRef; } + case byte[] bytes -> new RuntimeScalar(Base64.getEncoder().encodeToString(bytes)); case String s -> new RuntimeScalar(s); case Integer i -> new RuntimeScalar(i); case Long l -> new RuntimeScalar(l); From e578eee7ab35570fd3610aa2705cb2465854dc0a Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Tue, 21 Apr 2026 13:33:02 +0200 Subject: [PATCH 2/3] docs(YAML::PP): add fix plan for CPAN test failures Records current CPAN test baseline, what was fixed in the preceding commit, and groups remaining failures by the infrastructure change they need (parser strictness, YAML 1.1 schema, dump-side tagging, Perl-side API surface, etc.). Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/modules/yaml_pp.md | 287 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 287 insertions(+) create mode 100644 dev/modules/yaml_pp.md diff --git a/dev/modules/yaml_pp.md b/dev/modules/yaml_pp.md new file mode 100644 index 000000000..ed06476f6 --- /dev/null +++ b/dev/modules/yaml_pp.md @@ -0,0 +1,287 @@ +# YAML::PP Support Plan for PerlOnJava + +## Overview + +**Module:** YAML::PP v0.39.0 (CPAN: TINITA) +**Bundled in PerlOnJava:** Yes — `lib/YAML/PP.pm` is a thin Perl wrapper +over the Java backend in +`src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java`, which +delegates to [snakeyaml-engine] v3. +**Test command:** `./jcpan -t YAML::PP` +**Type:** Native (Java) implementation shadowing CPAN YAML::PP. + +The shipped shim only supports `new`, `load_string`, `load_file`, +`dump_string`, `dump_file`. Most of YAML::PP's Perl-side API +(Loader/Parser/Emitter/Schema/Representer/Constructor classes) is not +present, so the CPAN test suite exercises many paths that bypass our +backend and therefore die. + +## Current Status + +**Branch:** `fix/yaml-pp-cpan-tests` + +### Results History + +| Date | Programs Failed | Subtests Failed | Total Subtests | Key Fix | +|------|-----------------|-----------------|----------------|---------| +| 2026-04-21 (baseline) | 32/44 (suite SIGTERM'd partway through) | 73/2275 (counted before timeout) | 2275 | — | +| 2026-04-21 (after PR) | 30/44 | 149/2581 | 2581 | Catch NFE, `!!set`/`!!binary`, cyclic-ref msg | + +Higher subtest-failure count after the fix is expected: previously, +test files that died with uncaught Java exceptions reported only the +failures that ran before the crash. Now the whole suite runs to +completion, so more *latent* failures surface. + +### Test Results Summary (post-fix) + +| Test File | Status | Subtests | Root Cause | +|-----------|--------|----------|------------| +| t/00.compile.t | PASS | ok | — | +| t/10.parse-valid.t | PASS | ok | — | +| t/11.parse-invalid.t | PASS | ok | — | +| t/12.load-json.t | FAIL | 59/284 | **Bug P1:** snakeyaml-engine parser strictness (tabs, %TAG, flow-mapping colon edge cases, bare doc rules, zero-indented block scalars) | +| t/13.load-anchor.t | FAIL | 0/0 | **Bug A1:** Perl `YAML::PP::Loader` API not implemented | +| t/14.load-bool.t | FAIL | 3/14 | **Bug B1:** Boolean scalar type not returned (returns plain 1/0 or strings) | +| t/15.parse-eol.t | PASS | ok | — | +| t/16.loader.t | FAIL | 1/1 | **Bug A1:** `YAML::PP::Loader` API | +| t/17.load-complex-keys.t | FAIL | 0/0 | **Bug K1:** non-string mapping keys (array/hash refs) | +| t/18.control.t | PASS | ok | — | +| t/19.file.t | FAIL | 0/0 | **Bug F1:** `load_file($fh)` / `DumpFile($fh)` on open filehandles | +| t/20.dump.t | FAIL | 0/0 | **Bug D1:** emitter tag/style preservation mismatch | +| t/21.emit.t | PASS | ok | — | +| t/22.dump-bool.t | FAIL | 15/15 | **Bug B2:** dump of Perl/JSON::PP booleans → `!!bool` | +| t/23-dump-anchor.t | FAIL | 0/0 | **Bug D2:** anchor/alias preservation on dump | +| t/24.double-escapes.t | FAIL | 7/38 | **Bug E1:** `"\L..\E"`-style escape handling in double-quoted scalars | +| t/30.legacy.t | FAIL | 2/2 | **Bug L1:** `YAML::PP::Legacy` compat API | +| t/31.schema.t | FAIL | 46/61 | **Bug B3:** YAML1.1 schema (`yes`/`no`/`on`/`off` booleans, base8/base16 ints); **Bug S1:** `!!float` round-trip dump | +| t/32.cyclic-refs.t | FAIL | 1/7 | **Bug C1:** dump-side cyclic detection (load side fixed) | +| t/34.emit-scalar-styles.t | FAIL | 1/1 | **Bug E2:** `!!null` tag not constructible, scalar-style selection on dump | +| t/35.highlight.t | PASS | ok | — | +| t/36.debug.t | PASS | ok | — | +| t/37.schema-catchall.t | PASS | ok | — (fixed as side-effect) | +| t/37.schema-perl.t | FAIL | 0/0 | **Bug SP1:** `YAML::PP::Schema::Perl` (refs/globs/regex/code) | +| t/38.schema-ixhash.t | SKIP | — | Tie::IxHash not installed | +| t/39.emitter-alias.t | PASS | ok | — | +| t/40.representers.t | FAIL | 0/0 | **Bug R1:** `YAML::PP::Representer` API | +| t/41.custom.schema.t | FAIL | 0/0 | **Bug SP2:** custom schema registration API | +| t/42.tokens.t | FAIL | 1/326 | **Bug T1:** one tokenizer edge case | +| t/43.indent.t | FAIL | 0/0 | **Bug D3:** indent option on dump | +| t/44.writer.t | FAIL | 0/0 | **Bug W1:** `YAML::PP::Writer` API | +| t/45.binary.t | FAIL | 2/3 | **Bug B4:** dump `!!binary` (load-side fixed) | +| t/46.line-endings.t | PASS | ok | — | +| t/47.header-footer.t | FAIL | 1/5 | **Bug H1:** header/footer option on multi-doc dump | +| t/48.merge.t | FAIL | 1/1 | **Bug M1:** `<<:` merge-key support | +| t/49.include.t | FAIL | 1/1 | **Bug I1:** `YAML::PP::Schema::Include` (`!include`) | +| t/50.clone.t | FAIL | 1/2 | **Bug CL1:** `clone()` method | +| t/51.directives.t | FAIL | 2/2 | **Bug DI1:** `%TAG` / `%YAML` directive preservation | +| t/52.preserve.t | FAIL | 1/1 | **Bug PR1:** style/flow preservation on round-trip | +| t/53.customtag-alias.t | FAIL | 1/1 | **Bug CT1:** custom tag registration via constructor arg | +| t/54.glob.t | FAIL | 1/1 | **Bug G1:** typeglob dump | +| t/55.flow.t | PASS | ok | — | +| t/56.force-flow.t | PASS | ok | — | +| t/57.dup-keys.t | FAIL | 2/3 | **Bug DK1:** `duplicate_keys` option behavior | + +## Recently Fixed (this branch) + +Commit `e153feaf7` — *fix(YAML::PP): survive invalid !!int/!!float, +handle !!set/!!binary, align cyclic-ref message* + +1. **NumberFormatException containment.** `snakeyaml-engine` wraps + `NumberFormatException` thrown by its Int/Float resolvers inside a + `YamlEngineException` whose message is literally + `"java.lang.NumberFormatException: For input string: \"...\""`. + `load_string()` now catches this and raises a Perl-level die + `"YAML::PP: invalid numeric value: …"`. Previously these killed + whole test files (31.schema.t, 32.cyclic-refs.t) with `Dubious`. +2. **`!!set` handling.** Java `Set` is now converted to a Perl + hashref with undef values (matches Perl YAML::PP convention). + Previously fell through to `toString()` → `"[a, b, c]"`. +3. **`!!binary` handling.** `byte[]` is now base64-encoded; previously + returned the JVM identity hash (`"[B@494c8f29"`). +4. **Cyclic-ref message.** Changed to + `"Found cyclic reference in YAML structure"` so the regex + `(?i:found cyclic ref)` used by t/32.cyclic-refs.t matches. +5. **`schema` option parsing.** Secondary list entries are now read; + `key=value` options are validated (`empty=null|str`); invalid + options (`empty=lala`) raise `"Invalid option: …"`. Non-key=value + list entries are accepted for back-compat with existing tests + (`schema => ['JSON','Perl']`). + +## Outstanding Bugs / Plan + +Bugs are grouped by the infrastructure change they need. Priority is +by number of subtests affected. + +### P1 — snakeyaml-engine parser strictness (59 subtests) + +`t/12.load-json.t` runs the official YAML test suite. `snakeyaml-engine` +is strictly YAML-1.2 and rejects many inputs that YAML::PP (which +implements both 1.1 and 1.2) accepts: tab-indented flow, `%TAG` +redefinitions, bare documents between `...` markers, zero-indented +block scalars, flow-mapping colons on the following line, etc. + +**Options:** + +- (a) Accept the gap and mark these as known limitations; enumerate + skipped YAML tags. +- (b) Pre-process the YAML string before handing it to snakeyaml + (tabs → spaces in safe positions, etc.) — fragile. +- (c) Port YAML::PP's Perl parser on top of our existing Perl runtime — + large effort, preserves behavior exactly. + +Recommendation: (a) for now, revisit if users hit it. Document +limitation in the module's POD. + +### B1/B2/B3 — Boolean handling (~80 subtests across 14/22/31) + +YAML 1.1 `!!bool` accepts `y/Y/yes/YES/Yes/on/ON/On/true/TRUE/True` +(+ negatives). `snakeyaml-engine` Core/JSON schemas only accept +`true`/`false`. YAML1_1 schema isn't currently switched on. + +Plan: + +1. Add a `"YAML1_1"` case to the schema `switch` that builds a + `CoreSchema` extended with YAML 1.1 bool/int/float resolvers + (snakeyaml-engine ships `YamlImplicitResolver` primitives we can + reuse or hand-roll with regex-based scalar resolvers). +2. Convert `Boolean` results to `RuntimeScalar`s of type `BOOLEAN` + (already done) but also make sure the dump side emits + `true`/`false` instead of `1`/`` for booleans — update + `convertRuntimeScalarToYaml` to emit a tagged bool when the scalar + is a `JSON::PP::Boolean` / `boolean.pm` blessed reference too. +3. Add `"Perl"` schema equivalent that recognises Perl-style truthy + values where appropriate. + +### F1 — `load_file` / `DumpFile` on filehandles (t/19.file.t) + +Our `load_file(filename)` opens the file itself; it cannot take an +already-open Perl GLOB. + +Plan: in `YAMLPP.load_file`, check if arg is a GLOB/IO ref, and if so +slurp via existing Perl `<$fh>` machinery (or expose the GLOB's +underlying java.io.Reader). Same for `dump_file`. + +### B4 / dump-side tags (t/22, t/23, t/45, t/34, t/20, t/43, t/44) + +`convertRuntimeScalarToYaml` currently returns only plain Perl values +(`Map`, `List`, `String`, `Double`, `Long`, `Boolean`). It needs to +wrap bytes in `byte[]` (for `!!binary`), Perl refs for `!!set`, and +emit proper tags for booleans, nulls, anchored nodes, etc. This +likely requires building `org.snakeyaml.engine.v2.nodes.Node` trees +directly and passing them to `Dump.dumpNode`, bypassing the +`Dump.dumpToString` of plain Java objects. + +Plan: introduce a `YAMLPPNodeBuilder` helper that walks +`RuntimeScalar` values and produces `Node` instances with explicit +`Tag`s and `ScalarStyle`s, then use `Dump.dumpAllToString` on those. + +### A1 / R1 / W1 / SP1 / SP2 / CT1 — Pure-Perl API surface + +Tests `t/13`, `t/16`, `t/17`, `t/20`, `t/23-dump-anchor`, `t/37.schema-perl`, +`t/40.representers`, `t/41.custom.schema`, `t/44.writer`, +`t/53.customtag-alias` use the full Perl class hierarchy: + +- `YAML::PP::Loader` / `YAML::PP::Parser` / `YAML::PP::Lexer` +- `YAML::PP::Dumper` / `YAML::PP::Emitter` / `YAML::PP::Writer` +- `YAML::PP::Constructor` / `YAML::PP::Representer` +- `YAML::PP::Schema::*` plugin loading + +We currently ship none of these. Options: + +- Install the upstream Perl modules into our bundled libs as a + drop-in; our Java `YAML::PP` would need to stop clobbering + `$INC{'YAML/PP.pm'}` so the Perl implementation can load. +- Or keep our native backend and add *shim* Perl modules under those + names that forward to it (a lot of surface to maintain). + +Recommendation: port the upstream Perl Parser/Lexer/Emitter verbatim, +and switch the Java backend to an opt-in fast path (used for +`load_string`/`dump_string` with default settings). See +`dev/modules/port-cpan-module` skill for the usual approach. + +### C1 — Dump-side cyclic detection (t/32.cyclic-refs.t) + +`convertRuntimeScalarToYaml` does honour `seen`, but it does not +respect `_cyclic_refs` behavior (it silently loops to an already-seen +node instead of dying on `fatal`). Wire the same `CyclicRefsBehavior` +switch into the dump path. + +### K1 — Complex mapping keys (t/17) + +SnakeYAML returns `Map` where keys can be lists/maps, +but we unconditionally `key.toString()` them. Implement non-scalar +keys by: + +- If the target Perl value type is a tied hash that supports arbitrary + keys, forward the Node. +- Otherwise follow Perl convention: stringify with `Data::Dumper`-style + canonical form (this is what YAML::PP does via + `YAML::PP::Constructor::construct_mapping`). + +### DI1 / PR1 / CL1 — Directive/style preservation (51, 52, 50) + +Needs a round-trip mode where the loader annotates parsed nodes with +their original style/tag/directive info and the dumper consumes it. +Beyond what snakeyaml-engine exposes via its public `Load` API; +requires using the `Parse` + `Compose` streaming API instead. + +### I1 / M1 — Schema extensions (49, 48) + +- `Merge` schema: handle `<<` merge key per YAML 1.1. +- `Include` schema: interpret `!include path.yaml` tag. + +Both map cleanly to custom `ConstructNode` implementations registered +on a `Schema` subclass. + +### DK1 — `duplicate_keys` option (57) + +`LoadSettings.allowDuplicateKeys` is already wired from the hash but +the tests expect a specific error message when duplicates are +disallowed, and a specific behaviour (last-one-wins vs first-one-wins) +when allowed. Align both. + +### G1 — Typeglobs (54) + +Typeglobs should dump as `!perl/glob` tagged nodes and round-trip. +Part of the Perl schema (SP1). + +### E1 / E2 / T1 / H1 — Smaller emitter/parser edge cases + +- E1 (24): `"\L...\E"` and other Perl-style escape sequences inside + double-quoted YAML scalars. Our `convertRuntimeScalarToYaml` emits + raw strings; the Perl emitter picks quoting styles and escape + forms YAML::PP expects. +- E2 (34): explicit `!!null` tag with literal text requires a + `ConstructScalar` for the null tag. +- T1 (42): one tokenizer regression, to be tracked. +- H1 (47): `header`/`footer` options currently set + `setExplicitStart`/`setExplicitEnd` on single-doc dump but not on + multi-doc. + +## Test Commands + +```bash +# Quick smoke (our unit test uses the Java backend directly) +./jperl src/test/resources/unit/yaml_pp.t + +# Full CPAN suite +./jcpan -t YAML::PP + +# Or, after a prior install, just rerun the tests in place: +cd ~/.perlonjava/cpan/build/YAML-PP-v0.39.0-* +PERL5LIB="./blib/lib:./blib/arch:$PERL5LIB" \ + ~/projects/PerlOnJava/jperl \ + -MExtUtils::Command::MM -MTest::Harness \ + -e 'undef *Test::Harness::Switches; test_harness(0, "./blib/lib", "./blib/arch")' \ + t/*.t +``` + +## Related Files + +- `src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java` — + the Java backend. +- `src/main/perl/lib/YAML/PP.pm` — Perl shim (loads the Java module). +- `src/test/resources/unit/yaml_pp.t` — in-tree smoke test. +- `docs/JDBC_GUIDE.md` — unrelated, but a sibling native-module doc. + +[snakeyaml-engine]: https://bitbucket.org/snakeyaml/snakeyaml-engine/src/master/ From eaa7728c79afa81fab32370759016a9a7fda1a09 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Tue, 21 Apr 2026 13:44:16 +0200 Subject: [PATCH 3/3] fix(YAML::PP): filehandle I/O, scalar-context load, DAG refs, dup-key msg Follow-up to the previous YAML::PP fix. Picks up the low-hanging remaining CPAN-test failures: * load_file/DumpFile now accept an already-open Perl filehandle (GLOB or IO::Handle-ish object). Implemented in the Perl wrapper src/main/perl/lib/YAML/PP.pm so we can reuse existing Perl-side filehandle machinery. DumpFile(path) also opens the file itself so it can produce the "Could not open ..." error string that the CPAN tests expect. * load_string in scalar context now returns the first document (matches CPAN YAML::PP's contract); list context still yields all documents. load_file gets the same scalar/list handling. * convertYamlToRuntimeScalar splits its identity map into an "in-progress" set (real cycle detection) and a "finished" cache (DAG memoization). Shared references created by anchor redefinition + alias (e.g. &NODEA ... &NODEA ... *NODEA) are no longer misidentified as cyclic. * SnakeYAML's "found duplicate key X" is rewritten to also contain "Duplicate key 'X'" so t/57.dup-keys.t's qr{Duplicate key 'a'} regex matches. * cyclic_refs => 'nonsense' now dies with "Invalid value for cyclic_refs: ..." instead of silently falling through to FATAL (fixes t/32.cyclic-refs.t last subtest). CPAN suite result for YAML-PP v0.39.0: * 30/44 -> 26/44 programs failed. * Newly passing: t/19.file.t, t/30.legacy.t, t/32.cyclic-refs.t, t/57.dup-keys.t. * dev/modules/yaml_pp.md updated with the new status table. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- dev/modules/yaml_pp.md | 38 +++++++--- .../org/perlonjava/core/Configuration.java | 4 +- .../perlonjava/runtime/perlmodule/YAMLPP.java | 56 ++++++++++++--- src/main/perl/lib/YAML/PP.pm | 71 ++++++++++++++++++- 4 files changed, 149 insertions(+), 20 deletions(-) diff --git a/dev/modules/yaml_pp.md b/dev/modules/yaml_pp.md index ed06476f6..582a076c8 100644 --- a/dev/modules/yaml_pp.md +++ b/dev/modules/yaml_pp.md @@ -25,7 +25,8 @@ backend and therefore die. | Date | Programs Failed | Subtests Failed | Total Subtests | Key Fix | |------|-----------------|-----------------|----------------|---------| | 2026-04-21 (baseline) | 32/44 (suite SIGTERM'd partway through) | 73/2275 (counted before timeout) | 2275 | — | -| 2026-04-21 (after PR) | 30/44 | 149/2581 | 2581 | Catch NFE, `!!set`/`!!binary`, cyclic-ref msg | +| 2026-04-21 (after NFE/set/binary/cyclic-msg fix) | 30/44 | 149/2581 | 2581 | Catch NFE, `!!set`/`!!binary`, cyclic-ref msg | +| 2026-04-21 (after wrapper/DAG/dup-key fix) | 26/44 | 146/2595 | 2595 | Filehandle I/O, scalar-context load, DAG refs, dup-key msg, invalid cyclic_refs | Higher subtest-failure count after the fix is expected: previously, test files that died with uncaught Java exceptions reported only the @@ -40,21 +41,21 @@ completion, so more *latent* failures surface. | t/10.parse-valid.t | PASS | ok | — | | t/11.parse-invalid.t | PASS | ok | — | | t/12.load-json.t | FAIL | 59/284 | **Bug P1:** snakeyaml-engine parser strictness (tabs, %TAG, flow-mapping colon edge cases, bare doc rules, zero-indented block scalars) | -| t/13.load-anchor.t | FAIL | 0/0 | **Bug A1:** Perl `YAML::PP::Loader` API not implemented | +| t/13.load-anchor.t | FAIL | 1/4 | **Bug A1:** Perl `YAML::PP::Loader` API (partial: 3/4 now pass via scalar-context fix) | | t/14.load-bool.t | FAIL | 3/14 | **Bug B1:** Boolean scalar type not returned (returns plain 1/0 or strings) | | t/15.parse-eol.t | PASS | ok | — | | t/16.loader.t | FAIL | 1/1 | **Bug A1:** `YAML::PP::Loader` API | | t/17.load-complex-keys.t | FAIL | 0/0 | **Bug K1:** non-string mapping keys (array/hash refs) | | t/18.control.t | PASS | ok | — | -| t/19.file.t | FAIL | 0/0 | **Bug F1:** `load_file($fh)` / `DumpFile($fh)` on open filehandles | +| t/19.file.t | PASS | ok | — (fixed, filehandle I/O + scalar-context load) | | t/20.dump.t | FAIL | 0/0 | **Bug D1:** emitter tag/style preservation mismatch | | t/21.emit.t | PASS | ok | — | | t/22.dump-bool.t | FAIL | 15/15 | **Bug B2:** dump of Perl/JSON::PP booleans → `!!bool` | | t/23-dump-anchor.t | FAIL | 0/0 | **Bug D2:** anchor/alias preservation on dump | | t/24.double-escapes.t | FAIL | 7/38 | **Bug E1:** `"\L..\E"`-style escape handling in double-quoted scalars | -| t/30.legacy.t | FAIL | 2/2 | **Bug L1:** `YAML::PP::Legacy` compat API | +| t/30.legacy.t | PASS | ok | — (fixed via scalar-context load_string) | | t/31.schema.t | FAIL | 46/61 | **Bug B3:** YAML1.1 schema (`yes`/`no`/`on`/`off` booleans, base8/base16 ints); **Bug S1:** `!!float` round-trip dump | -| t/32.cyclic-refs.t | FAIL | 1/7 | **Bug C1:** dump-side cyclic detection (load side fixed) | +| t/32.cyclic-refs.t | PASS | ok | — (fixed, DAG refs + invalid-option) | | t/34.emit-scalar-styles.t | FAIL | 1/1 | **Bug E2:** `!!null` tag not constructible, scalar-style selection on dump | | t/35.highlight.t | PASS | ok | — | | t/36.debug.t | PASS | ok | — | @@ -79,12 +80,11 @@ completion, so more *latent* failures surface. | t/54.glob.t | FAIL | 1/1 | **Bug G1:** typeglob dump | | t/55.flow.t | PASS | ok | — | | t/56.force-flow.t | PASS | ok | — | -| t/57.dup-keys.t | FAIL | 2/3 | **Bug DK1:** `duplicate_keys` option behavior | +| t/57.dup-keys.t | PASS | ok | — (fixed, rewritten duplicate-key error) | ## Recently Fixed (this branch) -Commit `e153feaf7` — *fix(YAML::PP): survive invalid !!int/!!float, -handle !!set/!!binary, align cyclic-ref message* +**Commit 1** — *fix(YAML::PP): survive invalid !!int/!!float, handle !!set/!!binary, align cyclic-ref message* 1. **NumberFormatException containment.** `snakeyaml-engine` wraps `NumberFormatException` thrown by its Int/Float resolvers inside a @@ -107,6 +107,28 @@ handle !!set/!!binary, align cyclic-ref message* list entries are accepted for back-compat with existing tests (`schema => ['JSON','Perl']`). +**Commit 2** — *fix(YAML::PP): filehandle I/O, scalar-context load, DAG refs, dup-key msg* + +6. **Filehandle input/output** (`t/19.file.t`): the Perl wrapper + `YAML::PP.pm` now detects GLOB/IO::Handle arguments to + `load_file`/`LoadFile`/`dump_file`/`DumpFile` and slurps or prints + via the filehandle; `DumpFile(path)` opens the file itself so it + can produce the expected `"Could not open …"` error message. +7. **`load_string` scalar context**: the wrapper now returns only + the first document in scalar context (matches CPAN YAML::PP + contract); list context still returns all documents. +8. **DAG reference handling** (`t/32.cyclic-refs.t`): + `convertYamlToRuntimeScalar` now uses separate `seen` (current + traversal stack) and `cache` (finished containers) sets. Shared + references (like an anchor that's redefined and then aliased to a + sibling subtree) are reused rather than misidentified as cycles. +9. **Duplicate-key error** (`t/57.dup-keys.t`): SnakeYAML's + `found duplicate key X` is rewritten to include + `Duplicate key 'X'` so the test's regex matches. +10. **Invalid `cyclic_refs` option** (`t/32.cyclic-refs.t`): + `cyclic_refs => 'nonsense'` now dies with `Invalid value for + cyclic_refs: '…'` instead of silently defaulting to `fatal`. + ## Outstanding Bugs / Plan Bugs are grouped by the infrastructure change they need. Priority is diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 338fb6425..090062a5e 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "aaa6ac0be"; + public static final String gitCommitId = "38f2feac1"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 21 2026 13:27:01"; + public static final String buildTimestamp = "Apr 21 2026 14:44:58"; // Prevent instantiation private Configuration() { diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java b/src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java index 426df10c8..3dd7b651f 100644 --- a/src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java +++ b/src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java @@ -109,12 +109,17 @@ public static RuntimeList new_(RuntimeArray args, int ctx) { CyclicRefsBehavior cyclicRefs = CyclicRefsBehavior.FATAL; if (options.containsKey("cyclic_refs")) { String cyclicRefsOption = options.get("cyclic_refs").toString().toLowerCase(); - cyclicRefs = switch (cyclicRefsOption) { - case "warn" -> CyclicRefsBehavior.WARN; - case "ignore" -> CyclicRefsBehavior.IGNORE; - case "allow" -> CyclicRefsBehavior.ALLOW; - default -> CyclicRefsBehavior.FATAL; - }; + switch (cyclicRefsOption) { + case "fatal" -> cyclicRefs = CyclicRefsBehavior.FATAL; + case "warn" -> cyclicRefs = CyclicRefsBehavior.WARN; + case "ignore" -> cyclicRefs = CyclicRefsBehavior.IGNORE; + case "allow" -> cyclicRefs = CyclicRefsBehavior.ALLOW; + default -> { + return WarnDie.die(new RuntimeScalar( + "Invalid value for cyclic_refs: '" + cyclicRefsOption + "'"), + new RuntimeScalar("\n")).getList(); + } + } } DumpSettings dumpSettings = DumpSettings.builder() @@ -173,6 +178,13 @@ public static RuntimeList load_string(RuntimeArray args, int ctx) { if (msg != null && msg.startsWith("java.lang.NumberFormatException")) { msg = "YAML::PP: invalid numeric value: " + msg.replaceFirst("^java\\.lang\\.NumberFormatException:\\s*", ""); + } else if (msg != null && msg.contains("found duplicate key ")) { + // Rewrite to match YAML::PP CPAN error format: Duplicate key 'NAME' + java.util.regex.Matcher m = + java.util.regex.Pattern.compile("found duplicate key (\\S+)").matcher(msg); + if (m.find()) { + msg = "Duplicate key '" + m.group(1) + "' " + msg; + } } return WarnDie.die(new RuntimeScalar(msg), new RuntimeScalar("\n")).getList(); } catch (RuntimeException e) { @@ -270,10 +282,32 @@ public static RuntimeList dump_file(RuntimeArray args, int ctx) { */ @SuppressWarnings("unchecked") private static RuntimeScalar convertYamlToRuntimeScalar(Object yaml, IdentityHashMap seen, RuntimeHash instance) { + return convertYamlToRuntimeScalar(yaml, seen, new IdentityHashMap<>(), instance); + } + + /** + * Converts YAML objects to RuntimeScalar representations. + * + * `seen` tracks containers that are currently on the traversal stack + * (used for cycle detection). `cache` tracks containers we've already + * finished converting (used so that shared DAG references are reused + * instead of duplicated). + */ + @SuppressWarnings("unchecked") + private static RuntimeScalar convertYamlToRuntimeScalar(Object yaml, + IdentityHashMap seen, + IdentityHashMap cache, + RuntimeHash instance) { if (yaml == null) { return new RuntimeScalar(); } + // Already-finished container: reuse (supports shared/DAG references). + if (cache.containsKey(yaml)) { + return cache.get(yaml); + } + + // Container currently on the stack: real cycle. if (seen.containsKey(yaml)) { String cyclicBehavior = instance.get("_cyclic_refs").toString(); return switch (CyclicRefsBehavior.valueOf(cyclicBehavior)) { @@ -294,7 +328,9 @@ private static RuntimeScalar convertYamlToRuntimeScalar(Object yaml, IdentityHas RuntimeScalar hashRef = hash.createReference(); seen.put(yaml, hashRef); map.forEach((key, value) -> - hash.put(key.toString(), convertYamlToRuntimeScalar(value, seen, instance))); + hash.put(key.toString(), convertYamlToRuntimeScalar(value, seen, cache, instance))); + seen.remove(yaml); + cache.put(yaml, hashRef); yield hashRef; } case Set set -> { @@ -305,6 +341,8 @@ private static RuntimeScalar convertYamlToRuntimeScalar(Object yaml, IdentityHas for (Object item : set) { hash.put(item == null ? "" : item.toString(), new RuntimeScalar()); } + seen.remove(yaml); + cache.put(yaml, hashRef); yield hashRef; } case List list -> { @@ -312,7 +350,9 @@ private static RuntimeScalar convertYamlToRuntimeScalar(Object yaml, IdentityHas RuntimeScalar arrayRef = array.createReference(); seen.put(yaml, arrayRef); list.forEach(item -> - array.elements.add(convertYamlToRuntimeScalar(item, seen, instance))); + array.elements.add(convertYamlToRuntimeScalar(item, seen, cache, instance))); + seen.remove(yaml); + cache.put(yaml, arrayRef); yield arrayRef; } case byte[] bytes -> new RuntimeScalar(Base64.getEncoder().encodeToString(bytes)); diff --git a/src/main/perl/lib/YAML/PP.pm b/src/main/perl/lib/YAML/PP.pm index 425407ad7..4bc09f9df 100644 --- a/src/main/perl/lib/YAML/PP.pm +++ b/src/main/perl/lib/YAML/PP.pm @@ -11,6 +11,7 @@ package YAML::PP; use Exporter "import"; use warnings; use strict; +use Scalar::Util qw(blessed reftype); use XSLoader; XSLoader::load( 'YAML::PP' ); @@ -22,6 +23,23 @@ our @EXPORT_OK = qw(Load Dump LoadFile DumpFile); my $YPP; # cache +# Detect a filehandle (GLOB, GLOB ref, IO::Handle-ish object, or CODE that does fileno). +sub _is_filehandle { + my ($arg) = @_; + return 0 unless defined $arg; + return 0 unless ref $arg; + my $rt = reftype($arg) || ''; + return 1 if $rt eq 'GLOB'; + return 1 if blessed($arg) && $arg->can('getline'); + return 0; +} + +sub _slurp_fh { + my ($fh) = @_; + local $/; + return scalar <$fh>; +} + sub Load { ($YPP ||= __PACKAGE__->new)->load_string(@_); } @@ -31,11 +49,60 @@ sub Dump { } sub LoadFile { - ($YPP ||= __PACKAGE__->new)->load_file(@_); + my ($file) = @_; + my $ypp = ($YPP ||= __PACKAGE__->new); + if (_is_filehandle($file)) { + return $ypp->load_string(_slurp_fh($file)); + } + return $ypp->load_file($file); } sub DumpFile { - ($YPP ||= __PACKAGE__->new)->dump_file(@_); + my ($file, @data) = @_; + my $ypp = ($YPP ||= __PACKAGE__->new); + if (_is_filehandle($file)) { + my $yaml = $ypp->dump_string(@data); + print { $file } $yaml; + return 1; + } + # Open file ourselves so we can produce YAML::PP-style error message + open(my $fh, '>', $file) or die "Could not open '$file' for writing: $!\n"; + print { $fh } $ypp->dump_string(@data); + close $fh; + return 1; +} + +# Method wrappers so $ypp->load_file($fh) / $ypp->dump_file($fh) work too, +# and so load_string in scalar context returns the first document (matches +# the CPAN YAML::PP contract). +{ + my $orig_load_string = \&load_string; + my $orig_load_file = \&load_file; + my $orig_dump_file = \&dump_file; + + no warnings 'redefine'; + *load_string = sub { + my ($self, $yaml) = @_; + my @docs = $orig_load_string->($self, $yaml); + return wantarray ? @docs : $docs[0]; + }; + *load_file = sub { + my ($self, $file) = @_; + if (_is_filehandle($file)) { + return $self->load_string(_slurp_fh($file)); + } + my @docs = $orig_load_file->($self, $file); + return wantarray ? @docs : $docs[0]; + }; + *dump_file = sub { + my ($self, $file, @data) = @_; + if (_is_filehandle($file)) { + my $yaml = $self->dump_string(@data); + print { $file } $yaml; + return 1; + } + return $orig_dump_file->($self, $file, @data); + }; } 1;