diff --git a/dev/modules/yaml_any_fixes.md b/dev/modules/yaml_any_fixes.md new file mode 100644 index 000000000..fa57cb917 --- /dev/null +++ b/dev/modules/yaml_any_fixes.md @@ -0,0 +1,677 @@ +# `jcpan -t YAML::Any` — Investigation and Fix Plan + +## Summary + +`jcpan -t YAML::Any` (i.e. `make test` for the `YAML-1.31` distribution) fails +on PerlOnJava in two distinct ways: + +1. **Source-filter leak** — a `Filter::Util::Call` filter installed by + `use Spiffy -Base;` inside `Test::Base.pm` is consumed by a *nested* + `require Exporter::Heavy` instead of being applied to the rest of + `Test::Base.pm`. Result: `Test::Base.pm` fails to compile with + `syntax error … near "=> [qw"` at line 53, and ~14 of the 30 test + files (`basic-tests.t`, `bugs-emailed.t`, `bugs-rt.t`, `dump-*.t`, + `load-*.t`, …) fail with `Compilation failed in require`. + +2. **`t/2-scalars.t`: 5/12 sub-tests fail** because PerlOnJava's + "bundled `YAML.pm`" has output semantics that diverge from + YAML.pm 1.31 (the version under test). + +Of the two, (1) is by far the bigger problem: it's a generic +PerlOnJava bug that breaks **every** module that uses `Spiffy -Base`, +`Switch`, `Filter::Simple`, or any other source filter that installs +during a `use` whose `import()` transitively `require`s another file. + +--- + +## Bug 1: source-filter state leaks across nested compilations + +### Symptom + +``` +$ ./jperl -e 'package Foo; use Spiffy -Base; field _filters => [qw(a b)];' +"my" variable $self masks earlier declaration in same scope at + jar:PERL5LIB/Exporter/Heavy.pm line 237, near ", $wanted" +syntax error at -e line 1, near "=> [qw" +``` + +The first warning ("`my $self` masks earlier declaration … `Exporter/Heavy.pm` +line 237") is the smoking gun: Spiffy's filter — which injects +`my $self = shift;` after every `sub …{` — was applied to +`Exporter::Heavy.pm`, not to the user's source. + +### Root cause + +`Filter::Util::Call`'s Java implementation in +`org.perlonjava.runtime.perlmodule.FilterUtilCall` keeps two pieces of +state in a `ThreadLocal`: + +| field | purpose | +|---|---| +| `filterContext.filterStack` | stack of currently-installed source filters | +| `filterInstalledDuringUse` | one-shot flag set by `real_import()` and consumed by `wasFilterInstalled()` so the parser knows to re-tokenize after a `use` | + +The parser uses these in +`StatementParser.applySourceFilterToRemainingTokens()`: after every +`use Foo …;`, it calls `wasFilterInstalled()`, and if true rejoins the +remaining tokens, runs them through the filter chain, re-tokenizes, +and finally calls `clearFilters()`. + +Both pieces of state are **process-global per thread** — they are +**not** scoped to the file currently being compiled. So the following +sequence misbehaves: + +``` +parse Test::Base.pm + parse "use Spiffy -Base;" + Spiffy::import: + Filter::Util::Call::filter_add(...) # push filter; flag := true + Exporter::export(...) + require Exporter::Heavy.pm # nested compilation + parse "use warnings;" (etc.) + wasFilterInstalled() == true # ← consumed here! + applyFilters(remaining-of-Heavy.pm) # ← Heavy.pm gets rewritten + clearFilters() # ← stack emptied + back in Test::Base.pm: + wasFilterInstalled() == false # parent never sees it + → rest of Test::Base.pm parsed unfiltered + → "field _filters => [qw(...)]" → syntax error +``` + +### Minimal repro / proof + +```perl +# fails +package Foo; +use Spiffy -Base; +field _filters => [qw(a b)]; + +# works (Exporter::Heavy already loaded → no nested require) +package Foo; +use Exporter::Heavy; +use Spiffy -Base; +field _filters => [qw(a b)]; +``` + +The only difference is whether `Exporter::Heavy` is loaded *during* +`Spiffy::import` or *before* it. + +### Does this match real Perl semantics? + +**Yes — and the current behaviour does not.** + +Real Perl associates source filters with the *compilation unit* +(`PL_compiling` / `PL_rsfp_filters`). Each `require`/`do FILE` opens +a new compilation state; the new file gets its own (initially empty) +filter chain. When the nested compilation finishes, the original +state is restored, including any filters the outer file installed. +That's why on real Perl, Spiffy's filter is applied to the +`*.pm` file in which `use Spiffy -Base;` literally appears, regardless +of what other files Spiffy's import happens to load. + +Spiffy itself relies on this: line 82 of `Spiffy.pm` reads + +```perl +spiffy_filter() + if ($args->{-selfless} or $args->{-Base}) and + not $filtered_files->{(caller($stack_frame))[1]}++; +``` + +i.e. it asks "have I already filtered *this caller's file*?". The +filter is intended to be scoped to that file — which is exactly the +semantics PerlOnJava is missing. + +### Proposed fix + +Treat the filter state as part of the per-compilation-unit context and +save/restore it at every entry to `executePerlCode`. + +#### Step 1 — add save/restore primitives to `FilterUtilCall` + +```java +public static final class FilterState { + final RuntimeList stack; + final boolean installedFlag; + FilterState(RuntimeList s, boolean f) { stack = s; installedFlag = f; } +} + +/** Save current filter state and reset to empty. Returns token to pass back to restore(). */ +public static FilterState saveAndReset() { + FilterContext ctx = filterContext.get(); + FilterState saved = new FilterState(ctx.filterStack, filterInstalledDuringUse.get()); + ctx.filterStack = new RuntimeList(); + ctx.sourceLines = null; + ctx.currentLine = 0; + filterInstalledDuringUse.set(false); + return saved; +} + +public static void restore(FilterState saved) { + FilterContext ctx = filterContext.get(); + ctx.filterStack = saved.stack; + filterInstalledDuringUse.set(saved.installedFlag); +} +``` + +#### Step 2 — wrap `executePerlCode` + +In `org.perlonjava.app.scriptengine.PerlLanguageProvider.executePerlCode`, +at the very top: + +```java +FilterUtilCall.FilterState savedFilterState = FilterUtilCall.saveAndReset(); +try { + // ... existing body ... +} finally { + FilterUtilCall.restore(savedFilterState); +} +``` + +`executePerlCode` is the single funnel for every nested compilation +(`require`, `do FILE`, eval-string, the recursive call inside +`preprocessWithBeginFilters`), so this one wrap covers all cases. + +#### Step 3 — keep `clearFilters()` in `applySourceFilterToRemainingTokens` + +That call still makes sense: once the parent file's filter has been +applied to the remaining tokens, the filter has done its job for that +compilation unit. After save/restore is in place, clearing only +affects the current (parent) frame, never the caller's frame. + +### Does the fix behave exactly like system Perl? + +For the cases that matter for `jcpan -t`, **yes**: + +- **Filter scoped to outer file.** Spiffy's filter is applied to the + rest of `Test::Base.pm` only, never to `Exporter::Heavy.pm`. ✓ +- **Nested file gets a clean chain.** A `require`d file may install + its own filters (e.g. its own `BEGIN { … filter_add … }`), and + those apply only to its own remaining source. ✓ +- **Outer file's filter survives the nested load.** When control + returns from `require Exporter::Heavy`, the parent's filter is + still active, so the next `wasFilterInstalled()` check fires + correctly. ✓ +- **`BEGIN { … filter_add … }`-style filters** (handled by + `preprocessWithBeginFilters`) keep working: that path also goes + through `executePerlCode` for the synthetic compile, so its filter + installation is scoped to that synthetic compile and doesn't leak. + ✓ + +Known small differences from real Perl that the fix does **not** +change (and that don't matter for YAML::Any): + +- PerlOnJava applies the filter once to the entire remaining token + buffer (rejoin → filter → re-tokenize), whereas real Perl drives + the filter line-by-line through the lexer. Filters that depend on + fine-grained interaction with the lexer (very rare; Spiffy doesn't) + still behave differently. This is a pre-existing limitation + documented in `FilterUtilCall.java`. +- `__DATA__` / `__END__` handling is approximated by a regex in + `applyFilters`; real Perl just stops feeding the filter at the + end-of-source marker. Adequate for Spiffy/Switch/Filter::Simple. + +The save/restore fix is independent of those limitations and matches +real Perl on the property "a source filter installed by `use Foo;` +applies to the file containing that `use`, and only to that file". + +### Test plan + +1. Add a regression test under `src/test/resources/unit/` + that mirrors the minimal repro — `package Foo; use Spiffy -Base; field _filters => [qw(a b)]; ...` — and asserts it compiles + and runs. +2. `make test-bundled-modules` (covers anything that already exercises + `Filter::Util::Call`, e.g. `Switch`). +3. Re-run `jcpan -t YAML::Any`. Expectation: every `t/*.t` that + currently dies with `syntax error at .../Test/Base.pm line 53, + near "=> [qw"` now reaches its real test body. Pre-existing YAML + semantic failures (Bug 2, below) remain. +4. Spot-check that no previously-passing module regresses: + - `make` (full unit-test run). + - `jcpan -t Switch` if available — Switch is the canonical + `Filter::Util::Call` user. + +### Risks / open questions + +- **Outer-`use` Spiffy filter when the file has *no* further code.** + Trivial — `applySourceFilterToRemainingTokens` is a no-op on an + empty buffer. +- **`eval STRING` inside a filtered file.** After the fix, the eval + starts with an empty filter chain (just like real Perl, where the + string-eval gets its own compilation unit and inherits no source + filter from the surrounding file). This is the correct behaviour + but it's a small change for any caller that currently happens to + inherit a leaked filter. None known. +- **Threading.** `filterContext` and `filterInstalledDuringUse` are + already `ThreadLocal`, so the save/restore is per-thread. Fine. + +--- + +## Bug 2: `YAML.pm` semantics differ from upstream YAML 1.31 + +### What's actually bundled + +There is **no** Perl `YAML.pm` shipped that mirrors the real +`YAML-1.31` distribution. Instead PerlOnJava ships: + +| File | Role | +|---|---| +| `src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java` | Java implementation of `YAML::PP` built on top of `org.snakeyaml.engine.v2`. Registers `new`, `load_string`, `load_file`, `dump_string`, `dump_file`. | +| `src/main/perl/lib/YAML.pm` | 12-line **Perl wrapper** that re-exports `Load`/`Dump`/`LoadFile`/`DumpFile` from `YAML::PP` and sets `$YAML::VERSION = '1.31'`. | + +So "bundled YAML" is **YAML::PP (Java) wrapped by a tiny Perl +shim that calls itself `YAML 1.31`**. Functionally adequate for +*using* YAML, but the shim impersonates `YAML.pm`'s version number +without matching its serialization quirks. + +### Test failures in `t/2-scalars.t` + +``` +got: '--- null\n' expected: '--- ~\n' +got: '1' expected: 'true' +got: '' expected: 'false' +error: 'while scanning a quoted scalar … found unexpected end of stream' + expected: 'Can't parse single' / 'Can't parse double' +``` + +These all stem from snakeyaml-engine emitting **JSON / YAML 1.2** +syntax (`null`, `true`, `false`) and producing libyaml-style error +messages, whereas YAML.pm 1.0/1.1 emits `~` for undef, `true`/`false` +as plain strings, and dies with the classic `Can't parse single +quoted string` / `Can't parse double quoted string` messages. + +### Concrete behavioural deltas + +Probed on the current build: + +| input | YAML.pm 1.31 expects | bundled `YAML.pm` (= `YAML::PP`) returns | +|---|---|---| +| `Dump(undef)` | `--- ~\n` | `--- null\n` | +| `Load("--- true\n")` | string `"true"` | boolean true → stringifies to `"1"` | +| `Load("--- false\n")` | string `"false"` | boolean false → stringifies to `""` | +| malformed `'…\n…` | error matches `Can't parse single` | `while scanning a quoted scalar … found unexpected end of stream` | +| malformed `"…\n…` | error matches `Can't parse double` | same as above | + +Five mismatches, all in `t/2-scalars.t` — exactly the 5 failures we +observed. Everything else (`Dump(42)`, `Load("--- 42\n")`, `Load("--- ~\n")`, +the round-trip on a hash, the giant-string round-trip) already works. + +### Recommended fix + +Surgical, contained in the **YAML.pm shim only** — the Java +`YAML::PP` keeps emitting faithful YAML 1.2 output for direct +`use YAML::PP;` consumers, and we add a thin "yaml-pm compat" +post/pre-processing layer on top. + +#### Change `src/main/perl/lib/YAML.pm` + +Replace the current 12-line wrapper with something like: + +```perl +package YAML; +use strict; +use warnings; +use YAML::PP; +use Scalar::Util qw(blessed reftype); +use Exporter 'import'; + +our @EXPORT = qw(Load Dump); +our @EXPORT_OK = qw(LoadFile DumpFile freeze thaw); +our $VERSION = '1.31'; + +my $YPP = YAML::PP->new; # Core schema, indent 2 — matches YAML.pm defaults + +# ----- Dump: post-process so undef serialises as `~` -------------------- +sub _undef_to_tilde { + # YAML.pm 1.x emits `~` for undef; YAML::PP emits `null`. + # Only replace `null` when it appears as a YAML scalar token + # (after `: `, `- `, `--- `), never inside quoted strings. + my $s = shift; + $s =~ s/^(\s*-\s+)null$/$1~/mg; # sequence item + $s =~ s/^(\s*[^"\s][^"]*?:\s+)null$/$1~/mg; # mapping value + $s =~ s/^(---\s+)null$/$1~/mg; # top-level scalar + $s; +} + +sub Dump { _undef_to_tilde($YPP->dump_string(@_)) } +sub DumpFile { my $f = shift; _spew($f, Dump(@_)) } + +# ----- Load: stringify booleans, rewrite parser errors ------------------ +sub _stringify_bools { + my $node = shift; + if (blessed($node) && $node->isa('JSON::PP::Boolean')) { + return $node ? 'true' : 'false'; + } + my $rt = ref $node ? reftype($node) : ''; + if ($rt eq 'HASH') { _stringify_bools(\$_) for values %$node } + elsif ($rt eq 'ARRAY') { _stringify_bools(\$_) for @$node } + elsif ($rt eq 'SCALAR' || $rt eq 'REF') { ... } + return $node; +} + +sub Load { + my ($yaml) = @_; + my @docs = eval { $YPP->load_string($yaml) }; + if (my $e = $@) { + # Translate snakeyaml-engine messages into YAML.pm-style errors. + if ($e =~ /scanning a quoted scalar/ && $e =~ /unexpected end/) { + # Inspect the quoting style of the offending line. + my $kind = ($yaml =~ /:\s*'/) ? 'single' + : ($yaml =~ /:\s*"/) ? 'double' + : 'quoted'; + die "Can't parse $kind quoted string\n"; + } + die $e; + } + @docs = map { _bool_to_string($_) } @docs; + return wantarray ? @docs : $docs[0]; +} +sub LoadFile { Load(_slurp(shift)) } + +*freeze = \&Dump; +*thaw = \&Load; + +1; +``` + +(The above is illustrative — the actual patch needs the full +`_bool_to_string` walker and proper `_slurp` / `_spew` helpers; see +the existing 12-line file for the entry-point signature contract.) + +#### What changes for direct `YAML::PP` users + +Nothing. `YAML::PP->load_string`/`dump_string` remain +JSON-flavoured (YAML 1.2): `null`, `true`/`false` as +`JSON::PP::Boolean`, snakeyaml error messages. Only the `YAML::*` +entry points get the compat layer. + +#### Why post-process instead of changing `YAMLPP.java`? + +- `YAML::PP` on real Perl already emits `null` and JSON-style + booleans — it's the YAML 1.2 schema, not a PerlOnJava bug. Modules + that explicitly use `YAML::PP` expect that. +- Adding a "YAML.pm compat" mode to the Java side would either: + (a) require a new schema in `YAMLPP.java`, ~50 lines of + resolver/representer plumbing, or + (b) add a per-instance flag that the load/dump paths inspect — a + pile of conditional code in hot paths. + Doing it in the 12-line Perl shim keeps the fix in **one** file + and 100 % out of the JVM hot path. +- The compat layer is the kind of thing real `YAML::Any` does too + when it picks a backend. + +### Does this fix match real YAML.pm 1.31 semantics? + +For `t/2-scalars.t` and the typical `use YAML;` consumer, **yes**: + +- `Dump(undef)` → `--- ~\n` ✓ +- `Load("--- ~\n")`, `Load("---\n")`, `Load("--- ''\n")` already work ✓ +- `Load("--- true\n")`, `Load("--- false\n")` → strings `"true"`/`"false"` ✓ +- malformed quoted strings → die with `Can't parse single`/`Can't parse double` ✓ +- numbers, strings, refs, hashes, arrays — unchanged from current + (already passing) behaviour ✓ + +Caveats not addressed by this fix (and not exercised by the test +suite): + +- YAML.pm 1.x emits some scalar-styling decisions (line wrapping + thresholds, when to single-quote vs plain) that YAML::PP differs + on. Out of scope; nothing in `t/2-scalars.t` tests it. +- YAML.pm 1.x has its own anchor/alias numbering scheme (`&1`, `*1`) + whereas YAML::PP uses `&a001`. Already different, no regression. +- Other YAML modules that depend on YAML.pm's error message text + (e.g. `YAML::Tiny`-derived test suites) may need similar + translations; address case-by-case. + +--- + +## Path to making **all** YAML-1.31 tests pass + +The fixes above unblock `t/2-scalars.t` and the ~14 test files that +currently die with `Compilation failed in require` because of the +Spiffy filter leak. Getting **every** one of the 54 `t/*.t` files +green is a different conversation: the test suite was written against +one specific Perl implementation (`YAML.pm` 1.x) and probes its +*exact* output, error text, anchor numbering, blessing protocol, +B::Deparse code dumping, etc. PerlOnJava ships a YAML 1.2 engine +under that name, so a small wrapper can never satisfy all of +those tests. + +Realistically, three approaches scale to "all tests pass": + +### Approach A — install the *real* `YAML.pm` from CPAN (recommended) + +`YAML-1.31` is **pure Perl** (only deps: `YAML::Mo` — bundled with +the dist — `Scalar::Util`, `B`). No XS. Its tests are written to +exercise its own implementation. The clean path is: + +1. Land the **Spiffy filter-leak fix** (Bug 1) so `Test::Base.pm` + compiles. Without this, no YAML test even starts. +2. **Stop bundling `YAML.pm`** as an unconditional shim. Two + sub-options: + - **A1.** Remove `src/main/perl/lib/YAML.pm` from the JAR + entirely. Users who want `Load`/`Dump` either `use YAML::PP;` + (Java-backed, works) or run `cpan YAML` (real Perl, works). + - **A2.** Keep the shim as a *fallback*: change the + `MakeMaker.pm` SKIP logic at line 357 to allow `YAML.pm`, + `YAML/Loader*.pm`, `YAML/Dumper*.pm`, `YAML/Mo.pm`, etc. to be + overwritten by the CPAN version. A1 is cleaner; A2 keeps the + "out of the box `use YAML;` works" property. +3. Run `jcpan -t YAML`. Tests now run against real `YAML.pm` 1.31. +4. Triage the residual failures. Real `YAML.pm` exercises a fair + amount of Perl machinery that can hit PerlOnJava limitations: + - `B::Deparse` (for `Dump` of code refs — `dump-code.t`, + `load-code.t`) + - Glob serialisation (`dump-blessed-glob.t`) + - Tied hashes / iterating restored objects + (`pugs-objects.t`, `marshall.t`) + - `${\ \my $x}` style refs of refs, `weaken`-tracked cycles + (`references.t`, `bugs-rt.t`) + - Regex stringification for `qr//` (`regexp.t`) + - Numification edge cases (`numify.t`, `dump-stringy-numbers.t`) + - `local $/` slurp + UTF-8 BOM handling + (`dump-file-utf8.t`, `io-handle.t`) + - Symbol-table magic in `YAML::Mo` (`*{$M.'Object::new'} = sub{...}`, + `${$P.':E'}`, etc.) +5. Each residual failure becomes one of: + - a real PerlOnJava bug (fix it — likely small per failure), + - an unsupportable feature (e.g. `DESTROY`-based teardown in a + test — skip with `SKIP` block, document in this file), + - a YAML.pm bug already known upstream (skip). + +Cost: medium. Most fixes are one-liners or small parser/runtime +patches. Reward: future YAML.pm-derived tests "just work". + +### Approach B — make `YAML::PP` faithful to YAML.pm 1.x in the shim + +Extend the YAML.pm shim (the strategy in this document so far) to +cover **everything** the test suite checks, not just the 5 mismatches +in `t/2-scalars.t`. In practice that means re-implementing +YAML.pm's output format on top of `YAML::PP`'s parser/emitter: + +- A custom Dumper that emits `~`, plain `true`/`false`, + YAML.pm-style anchors `&1`/`*1`, YAML.pm's quoting heuristics + (when to single-quote, when to fold), YAML.pm's `Indent`/`UseHeader`/ + `UseAliases`/`UseBlock`/`UseFold`/`SortKeys` semantics. +- A custom Loader that respects `$YAML::LoadCode`, + `$YAML::LoadBlessed`, the "perl/code", "perl/glob", "perl/regexp", + "perl/hash:Class" tag families used by `Marshall`/`Bless`. +- Error-message translation table for every snakeyaml exception that + the test suite pattern-matches (`load-fails.t`, `errors.t`). +- B::Deparse-based code-ref serialisation for `dump-code.t`. +- `YAML::Node`/`YAML::Tag` shims so tests that introspect node info + (`node-info.t`, `preserve.t`) see the right object structure. + +Cost: high. Probably 1000–2000 lines of compatibility Perl plus +non-trivial Java work in `YAMLPP.java` (custom representer/resolver +chains, listener for raw-token style preservation). Reward: a +single self-contained shim with no CPAN bootstrap dependency. +Downside: every future YAML.pm release re-creates the diff. + +### Approach C — port real `YAML.pm` 1.31 into the bundled JAR + +Replace the 12-line shim with the actual `YAML.pm` / `YAML/Loader.pm` +/ `YAML/Dumper.pm` / `YAML/Mo.pm` / `YAML/Marshall.pm` / +`YAML/Node.pm` / `YAML/Types.pm` / `YAML/Error.pm` / `YAML/Tag.pm` +files, vendored under `src/main/perl/lib/`. Do NOT load `YAML::PP` +from `YAML`. + +Same end state as A but the user doesn't need to run `cpan YAML` to +get YAML.pm semantics. Mechanics are identical to A from then on — +the same residual triage list, the same per-failure PerlOnJava bug +fixes. + +Cost: low to vendor + same triage work as A. Reward: matches +upstream out of the box. Risk: one more pure-Perl module that we +re-sync on each upstream release. + +### Recommendation + +Approach **A2** (or **C**, which is materially the same thing +delivered as a vendor copy). Concrete sequence: + +1. **Land the source-filter save/restore fix** in + `PerlLanguageProvider.executePerlCode`. This alone unblocks the + bulk of the suite. +2. **Whitelist** the YAML distribution's `.pm` files in + `MakeMaker.pm`'s SKIP logic so `cpan YAML` can install over the + shim. Or vendor real `YAML.pm` (Approach C). +3. **Run** `jcpan -t YAML` and capture the new failure set in this + document. Each remaining failure becomes its own bullet here + plus, where applicable, a small ticket / PR. +4. **Stop using the wrapper for tests.** When PerlOnJava sees a + distribution whose own name is `YAML`, it should use the + user-installed real `YAML.pm`, not the bundled shim — the test + harness should test what's about to be released, not what + PerlOnJava ships. + +Approach B is reserved for the case where bootstrapping CPAN-`YAML` +is itself a hard problem. Today it isn't: `cpan YAML` works on +PerlOnJava once Bug 1 is fixed (its only real dependency is its own +`YAML::Mo`, vendored inside the dist). + +### What "all tests pass" probably looks like in practice + +After steps 1–3 above, an honest expectation is: + +| outcome | rough % of files | what to do | +|---|---|---| +| pass cleanly | ~60–75% | nothing | +| pass after a small PerlOnJava fix | ~15–25% | file as targeted bugs (regex/B::Deparse/etc.) | +| skip because of `fork`/`threads`/DESTROY | ~5% | `SKIP` block, document | +| genuine YAML.pm-1.x quirks too costly to match | <5% | document as known mismatch | + +There is no single PR that lands "all YAML tests pass". But there +is a well-defined sequence — Spiffy fix → switch to real YAML.pm → +triage residue — that converges on it. + +--- + +## Suggested order of work + +1. **Fix Bug 1** (source-filter save/restore in `executePerlCode`). + This is the high-leverage change: it unblocks YAML::Any, all of + `Switch`, all of `Filter::Simple`, and any future `Spiffy`-based + module — for a ~30-line change in one file. +2. Re-run `jcpan -t YAML::Any` and record the new pass/fail set. +3. **Pick an option for Bug 2** based on what's still failing after + step 2 and how much real YAML.pm-1.31 PerlOnJava can tolerate. +4. Add a unit-test regression test under `src/test/resources/unit/` + for the Spiffy/source-filter scenario so this exact failure mode + can't silently come back. + +## References + +- `src/main/java/org/perlonjava/runtime/perlmodule/FilterUtilCall.java` +- `src/main/java/org/perlonjava/frontend/parser/StatementParser.java` + (`applySourceFilterToRemainingTokens`, ~line 1290) +- `src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java` + (`executePerlCode`, ~line 71) +- `src/main/java/org/perlonjava/runtime/perlmodule/YAMLPP.java` +- `src/main/perl/lib/YAML.pm` +- `~/.cpan/build/Net-IPv4Addr-0.10-1` — unrelated, but the previous + `jcpan -t Net::IPv4Addr` investigation lives in the session log. + +## Progress Tracking + +### Current Status: Bug 2 fixed; Bug 1 still pending + +### Completed Phases + +- [x] **Bug 2 — vendor real YAML.pm 1.31** (2026-04-28) + - Replaced `src/main/perl/lib/YAML.pm` (the 12-line YAML::PP shim) + with the actual pure-Perl `YAML.pm` 1.31 distribution. + - Added `src/main/perl/lib/YAML/{Any,Dumper,Error,Loader,Marshall, + Mo,Node,Tag,Types}.pm` plus `YAML/Loader/Base.pm` and + `YAML/Dumper/Base.pm`. + - Kept `src/main/java/.../YAMLPP.java` and `src/main/perl/lib/YAML/PP.pm` + untouched — `use YAML::PP;` (Java-backed) is a separately + documented bundled module with its own unit test (`yaml_pp.t`). + - `make` is green. + - Verified directly against the YAML-1.31 distribution's tests + (Test::Base-using tests still blocked on Bug 1): + + | test | before | after | + |-----------------------|---------|---------| + | `t/000-compile-modules.t` | n/a (couldn't run) | 12/12 | + | `t/2-scalars.t` | 7/12 | 12/12 | + | `t/dump-synopsis.t` | n/a | 1/1 | + | `t/issue-149.t` | n/a | 1/1 | + | `t/issue-69.t` | n/a | 2/2 | + | `t/numify.t` | n/a | 6/6 | + | `t/roundtrip.t` | n/a | 1/1 | + | `t/rt-90593.t` | n/a | 2/2 | + | `t/preserve.t` | n/a | 0/1 ← real PerlOnJava issue, see below | + + 35 of the remaining 54 files still die at compile time with + `syntax error at .../Test/Base.pm line 53, near "=> [qw"` — + that's exactly the Spiffy filter-leak (Bug 1). + +### Next Steps + +1. **Land Bug 1 (Spiffy filter leak).** Implement + `FilterUtilCall.saveAndReset()` / `restore()` and wire them into + `PerlLanguageProvider.executePerlCode`. Add a regression test + under `src/test/resources/unit/` mirroring the Spiffy minimal + repro (`package Foo; use Spiffy -Base; field _filters => [qw(a b)];`). + This single change should unblock all 35 currently-blocked + `t/*.t` files. +2. After Bug 1 lands, re-run the full YAML test sweep and triage + the residual failures. Expected long-tail issues: + - `t/preserve.t` — `Preserve` option: hash key ordering. PerlOnJava + hashes are not insertion-ordered by default, so the round-trip + produces sorted output instead of the original key order. + Either fix in `RuntimeHash` (preserve insertion order) or skip. + - `t/dump-code.t` / `t/load-code.t` — B::Deparse round-trips for + code refs. + - `t/dump-blessed-glob.t` — glob serialisation. + - `t/regexp.t` — `qr//` stringification differences. + - `t/dump-file-utf8.t` / `t/io-handle.t` — UTF-8 / BOM handling. +3. Document any tests that hit unsupportable Perl features + (`fork`, `threads`, deterministic `DESTROY`) with `SKIP` blocks + plus a note in this file. + +### Open Questions + +- `t/2-scalars.t` test 10 (a 600 KB string round-trip through real + YAML.pm) takes ~99 s on PerlOnJava — almost all of it inside + YAML::Loader's regex-driven parser. Acceptable for `make test` + but worth profiling later. +- Is there any module that *intentionally* relies on the current + filter leak? None known on CPAN, but worth a sanity sweep on the + bundled-modules suite once the fix is in. + +### Why YAMLPP.java was kept (not removed) + +`YAML::PP` is a separately bundled module: + +- documented in `docs/reference/bundled-modules.md` and + `docs/reference/feature-matrix.md`, +- has a dedicated unit test (`src/test/resources/unit/yaml_pp.t`), +- exposes a different API surface (`YAML::PP->new(schema => …)`, + `cyclic_refs`, `boolean`, …) that real CPAN consumers expect. + +Direct callers of `use YAML::PP;` would regress if we deleted it. +`Storable.java` already uses `org.snakeyaml.engine` directly and +doesn't go through `YAMLPP.java`, so it's unaffected either way — +but `YAML::PP` itself remains a valid bundled module on its own +merits. diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java index 1484eb6f8..c1d6e27a0 100644 --- a/src/main/java/org/perlonjava/core/Configuration.java +++ b/src/main/java/org/perlonjava/core/Configuration.java @@ -33,7 +33,7 @@ public final class Configuration { * Automatically populated by Gradle/Maven during build. * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String gitCommitId = "25b6fa935"; + public static final String gitCommitId = "785f794f7"; /** * Git commit date of the build (ISO format: YYYY-MM-DD). @@ -48,7 +48,7 @@ public final class Configuration { * Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at" * DO NOT EDIT MANUALLY - this value is replaced at build time. */ - public static final String buildTimestamp = "Apr 28 2026 18:03:29"; + public static final String buildTimestamp = "Apr 28 2026 19:44:47"; // Prevent instantiation private Configuration() { diff --git a/src/main/perl/lib/YAML.pm b/src/main/perl/lib/YAML.pm index a334886f1..a1baa0457 100644 --- a/src/main/perl/lib/YAML.pm +++ b/src/main/perl/lib/YAML.pm @@ -1,16 +1,117 @@ package YAML; +our $VERSION = '1.31'; -use strict; -use warnings; -use YAML::PP qw(Load Dump LoadFile DumpFile); -use Exporter 'import'; +use YAML::Mo; -our @EXPORT = qw(Load Dump); -our @EXPORT_OK = qw(LoadFile DumpFile freeze thaw); -our $VERSION = '1.31'; # Match CPAN YAML version; we wrap YAML::PP +use Exporter; +push @YAML::ISA, 'Exporter'; +our @EXPORT = qw{ Dump Load }; +our @EXPORT_OK = qw{ freeze thaw DumpFile LoadFile Bless Blessed }; +our ( + $UseCode, $DumpCode, $LoadCode, + $SpecVersion, + $UseHeader, $UseVersion, $UseBlock, $UseFold, $UseAliases, + $Indent, $SortKeys, $Preserve, + $AnchorPrefix, $CompressSeries, $InlineSeries, $Purity, + $Stringify, $Numify, $LoadBlessed, $QuoteNumericStrings, + $DumperClass, $LoaderClass +); -# Storable-compatible aliases used by POE::Filter::Reference -*freeze = \&Dump; -*thaw = \&Load; +use YAML::Node; # XXX This is a temp fix for Module::Build +use Scalar::Util qw/ openhandle /; + +# XXX This VALUE nonsense needs to go. +use constant VALUE => "\x07YAML\x07VALUE\x07"; + +# YAML Object Properties +has dumper_class => default => sub {'YAML::Dumper'}; +has loader_class => default => sub {'YAML::Loader'}; +has dumper_object => default => sub {$_[0]->init_action_object("dumper")}; +has loader_object => default => sub {$_[0]->init_action_object("loader")}; + +sub Dump { + my $yaml = YAML->new; + $yaml->dumper_class($YAML::DumperClass) + if $YAML::DumperClass; + return $yaml->dumper_object->dump(@_); +} + +sub Load { + my $yaml = YAML->new; + $yaml->loader_class($YAML::LoaderClass) + if $YAML::LoaderClass; + return $yaml->loader_object->load(@_); +} + +{ + no warnings 'once'; + # freeze/thaw is the API for Storable string serialization. Some + # modules make use of serializing packages on if they use freeze/thaw. + *freeze = \ &Dump; + *thaw = \ &Load; +} + +sub DumpFile { + my $OUT; + my $filename = shift; + if (openhandle $filename) { + $OUT = $filename; + } + else { + my $mode = '>'; + if ($filename =~ /^\s*(>{1,2})\s*(.*)$/) { + ($mode, $filename) = ($1, $2); + } + open $OUT, $mode, $filename + or YAML::Mo::Object->die('YAML_DUMP_ERR_FILE_OUTPUT', $filename, "$!"); + } + binmode $OUT, ':utf8'; # if $Config{useperlio} eq 'define'; + local $/ = "\n"; # reset special to "sane" + print $OUT Dump(@_); + unless (ref $filename eq 'GLOB') { + close $OUT + or do { + my $errsav = $!; + YAML::Mo::Object->die('YAML_DUMP_ERR_FILE_OUTPUT_CLOSE', $filename, $errsav); + } + } +} + +sub LoadFile { + my $IN; + my $filename = shift; + if (openhandle $filename) { + $IN = $filename; + } + else { + open $IN, '<', $filename + or YAML::Mo::Object->die('YAML_LOAD_ERR_FILE_INPUT', $filename, "$!"); + } + binmode $IN, ':utf8'; # if $Config{useperlio} eq 'define'; + return Load(do { local $/; <$IN> }); +} + +sub init_action_object { + my $self = shift; + my $object_class = (shift) . '_class'; + my $module_name = $self->$object_class; + eval "require $module_name"; + $self->die("Error in require $module_name - $@") + if $@ and "$@" !~ /Can't locate/; + my $object = $self->$object_class->new; + $object->set_global_options; + return $object; +} + +my $global = {}; +sub Bless { + require YAML::Dumper::Base; + YAML::Dumper::Base::bless($global, @_) +} +sub Blessed { + require YAML::Dumper::Base; + YAML::Dumper::Base::blessed($global, @_) +} +sub global_object { $global } 1; diff --git a/src/main/perl/lib/YAML/Any.pm b/src/main/perl/lib/YAML/Any.pm new file mode 100644 index 000000000..f82578629 --- /dev/null +++ b/src/main/perl/lib/YAML/Any.pm @@ -0,0 +1,123 @@ +use strict; use warnings; +package YAML::Any; +our $VERSION = '1.31'; + +use Exporter (); + +@YAML::Any::ISA = 'Exporter'; +@YAML::Any::EXPORT = qw(Dump Load); +@YAML::Any::EXPORT_OK = qw(DumpFile LoadFile); + +my @dump_options = qw( + UseCode + DumpCode + SpecVersion + Indent + UseHeader + UseVersion + SortKeys + AnchorPrefix + UseBlock + UseFold + CompressSeries + InlineSeries + UseAliases + Purity + Stringify +); + +my @load_options = qw( + UseCode + LoadCode + Preserve +); + +my @implementations = qw( + YAML::XS + YAML::Syck + YAML::Old + YAML + YAML::Tiny +); + +sub import { + __PACKAGE__->implementation; + goto &Exporter::import; +} + +sub Dump { + no strict 'refs'; + no warnings 'once'; + my $implementation = __PACKAGE__->implementation; + for my $option (@dump_options) { + my $var = "$implementation\::$option"; + my $value = $$var; + local $$var; + $$var = defined $value ? $value : ${"YAML::$option"}; + } + return &{"$implementation\::Dump"}(@_); +} + +sub DumpFile { + no strict 'refs'; + no warnings 'once'; + my $implementation = __PACKAGE__->implementation; + for my $option (@dump_options) { + my $var = "$implementation\::$option"; + my $value = $$var; + local $$var; + $$var = defined $value ? $value : ${"YAML::$option"}; + } + return &{"$implementation\::DumpFile"}(@_); +} + +sub Load { + no strict 'refs'; + no warnings 'once'; + my $implementation = __PACKAGE__->implementation; + for my $option (@load_options) { + my $var = "$implementation\::$option"; + my $value = $$var; + local $$var; + $$var = defined $value ? $value : ${"YAML::$option"}; + } + return &{"$implementation\::Load"}(@_); +} + +sub LoadFile { + no strict 'refs'; + no warnings 'once'; + my $implementation = __PACKAGE__->implementation; + for my $option (@load_options) { + my $var = "$implementation\::$option"; + my $value = $$var; + local $$var; + $$var = defined $value ? $value : ${"YAML::$option"}; + } + return &{"$implementation\::LoadFile"}(@_); +} + +sub order { + return @YAML::Any::_TEST_ORDER + if @YAML::Any::_TEST_ORDER; + return @implementations; +} + +sub implementation { + my @order = __PACKAGE__->order; + for my $module (@order) { + my $path = $module; + $path =~ s/::/\//g; + $path .= '.pm'; + return $module if exists $INC{$path}; + eval "require $module; 1" and return $module; + } + croak("YAML::Any couldn't find any of these YAML implementations: @order"); +} + +sub croak { + require Carp; + Carp::croak(@_); +} + +1; diff --git a/src/main/perl/lib/YAML/Dumper.pm b/src/main/perl/lib/YAML/Dumper.pm new file mode 100644 index 000000000..a6c3adbdc --- /dev/null +++ b/src/main/perl/lib/YAML/Dumper.pm @@ -0,0 +1,578 @@ +package YAML::Dumper; + +use YAML::Mo; +extends 'YAML::Dumper::Base'; + +use YAML::Dumper::Base; +use YAML::Node; +use YAML::Types; +use Scalar::Util qw(); +use B (); +use Carp (); + +# Context constants +use constant KEY => 3; +use constant BLESSED => 4; +use constant FROMARRAY => 5; +use constant VALUE => "\x07YAML\x07VALUE\x07"; + +# Common YAML character sets +my $ESCAPE_CHAR = '[\\x00-\\x08\\x0b-\\x0d\\x0e-\\x1f]'; +my $LIT_CHAR = '|'; + +#============================================================================== +# OO version of Dump. YAML->new->dump($foo); +sub dump { + my $self = shift; + $self->stream(''); + $self->document(0); + for my $document (@_) { + $self->{document}++; + $self->transferred({}); + $self->id_refcnt({}); + $self->id_anchor({}); + $self->anchor(1); + $self->level(0); + $self->offset->[0] = 0 - $self->indent_width; + $self->_prewalk($document); + $self->_emit_header($document); + $self->_emit_node($document); + } + return $self->stream; +} + +# Every YAML document in the stream must begin with a YAML header, unless +# there is only a single document and the user requests "no header". +sub _emit_header { + my $self = shift; + my ($node) = @_; + if (not $self->use_header and + $self->document == 1 + ) { + $self->die('YAML_DUMP_ERR_NO_HEADER') + unless ref($node) =~ /^(HASH|ARRAY)$/; + $self->die('YAML_DUMP_ERR_NO_HEADER') + if ref($node) eq 'HASH' and keys(%$node) == 0; + $self->die('YAML_DUMP_ERR_NO_HEADER') + if ref($node) eq 'ARRAY' and @$node == 0; + # XXX Also croak if aliased, blessed, or ynode + $self->headless(1); + return; + } + $self->{stream} .= '---'; +# XXX Consider switching to 1.1 style + if ($self->use_version) { +# $self->{stream} .= " #YAML:1.0"; + } +} + +# Walk the tree to be dumped and keep track of its reference counts. +# This function is where the Dumper does all its work. All type +# transfers happen here. +sub _prewalk { + my $self = shift; + my $stringify = $self->stringify; + my ($class, $type, $node_id) = $self->node_info(\$_[0], $stringify); + + # Handle typeglobs + if ($type eq 'GLOB') { + $self->transferred->{$node_id} = + YAML::Type::glob->yaml_dump($_[0]); + $self->_prewalk($self->transferred->{$node_id}); + return; + } + + # Handle regexps + if (ref($_[0]) eq 'Regexp') { + return; + } + + # Handle Purity for scalars. + # XXX can't find a use case yet. Might be YAGNI. + if (not ref $_[0]) { + $self->{id_refcnt}{$node_id}++ if $self->purity; + return; + } + + # Make a copy of original + my $value = $_[0]; + ($class, $type, $node_id) = $self->node_info($value, $stringify); + + # Must be a stringified object. + return if (ref($value) and not $type); + + # Look for things already transferred. + if ($self->transferred->{$node_id}) { + (undef, undef, $node_id) = (ref $self->transferred->{$node_id}) + ? $self->node_info($self->transferred->{$node_id}, $stringify) + : $self->node_info(\ $self->transferred->{$node_id}, $stringify); + $self->{id_refcnt}{$node_id}++; + return; + } + + # Handle code refs + if ($type eq 'CODE') { + $self->transferred->{$node_id} = 'placeholder'; + YAML::Type::code->yaml_dump( + $self->dump_code, + $_[0], + $self->transferred->{$node_id} + ); + ($class, $type, $node_id) = + $self->node_info(\ $self->transferred->{$node_id}, $stringify); + $self->{id_refcnt}{$node_id}++; + return; + } + + # Handle blessed things + if (defined $class) { + if ($value->can('yaml_dump')) { + $value = $value->yaml_dump; + } + elsif ($type eq 'SCALAR') { + $self->transferred->{$node_id} = 'placeholder'; + YAML::Type::blessed->yaml_dump + ($_[0], $self->transferred->{$node_id}); + ($class, $type, $node_id) = + $self->node_info(\ $self->transferred->{$node_id}, $stringify); + $self->{id_refcnt}{$node_id}++; + return; + } + else { + $value = YAML::Type::blessed->yaml_dump($value); + } + $self->transferred->{$node_id} = $value; + (undef, $type, $node_id) = $self->node_info($value, $stringify); + } + + # Handle YAML Blessed things + require YAML; + if (defined YAML->global_object()->{blessed_map}{$node_id}) { + $value = YAML->global_object()->{blessed_map}{$node_id}; + $self->transferred->{$node_id} = $value; + ($class, $type, $node_id) = $self->node_info($value, $stringify); + $self->_prewalk($value); + return; + } + + # Handle hard refs + if ($type eq 'REF' or $type eq 'SCALAR') { + $value = YAML::Type::ref->yaml_dump($value); + $self->transferred->{$node_id} = $value; + (undef, $type, $node_id) = $self->node_info($value, $stringify); + } + + # Handle ref-to-glob's + elsif ($type eq 'GLOB') { + my $ref_ynode = $self->transferred->{$node_id} = + YAML::Type::ref->yaml_dump($value); + + my $glob_ynode = $ref_ynode->{&VALUE} = + YAML::Type::glob->yaml_dump($$value); + + (undef, undef, $node_id) = $self->node_info($glob_ynode, $stringify); + $self->transferred->{$node_id} = $glob_ynode; + $self->_prewalk($glob_ynode); + return; + } + + # Increment ref count for node + return if ++($self->{id_refcnt}{$node_id}) > 1; + + # Keep on walking + if ($type eq 'HASH') { + $self->_prewalk($value->{$_}) + for keys %{$value}; + return; + } + elsif ($type eq 'ARRAY') { + $self->_prewalk($_) + for @{$value}; + return; + } + + # Unknown type. Need to know about it. + $self->warn(<<"..."); +YAML::Dumper can't handle dumping this type of data. +Please report this to the author. + +id: $node_id +type: $type +class: $class +value: $value + +... + + return; +} + +# Every data element and sub data element is a node. +# Everything emitted goes through this function. +sub _emit_node { + my $self = shift; + my ($type, $node_id); + my $ref = ref($_[0]); + if ($ref) { + if ($ref eq 'Regexp') { + $self->_emit(' !!perl/regexp'); + $self->_emit_str("$_[0]"); + return; + } + (undef, $type, $node_id) = $self->node_info($_[0], $self->stringify); + } + else { + $type = $ref || 'SCALAR'; + (undef, undef, $node_id) = $self->node_info(\$_[0], $self->stringify); + } + + my ($ynode, $tag) = ('') x 2; + my ($value, $context) = (@_, 0); + + if (defined $self->transferred->{$node_id}) { + $value = $self->transferred->{$node_id}; + $ynode = ynode($value); + if (ref $value) { + $tag = defined $ynode ? $ynode->tag->short : ''; + (undef, $type, $node_id) = + $self->node_info($value, $self->stringify); + } + else { + $ynode = ynode($self->transferred->{$node_id}); + $tag = defined $ynode ? $ynode->tag->short : ''; + $type = 'SCALAR'; + (undef, undef, $node_id) = + $self->node_info( + \ $self->transferred->{$node_id}, + $self->stringify + ); + } + } + elsif ($ynode = ynode($value)) { + $tag = $ynode->tag->short; + } + + if ($self->use_aliases) { + $self->{id_refcnt}{$node_id} ||= 0; + if ($self->{id_refcnt}{$node_id} > 1) { + if (defined $self->{id_anchor}{$node_id}) { + $self->{stream} .= ' *' . $self->{id_anchor}{$node_id} . "\n"; + return; + } + my $anchor = $self->anchor_prefix . $self->{anchor}++; + $self->{stream} .= ' &' . $anchor; + $self->{id_anchor}{$node_id} = $anchor; + } + } + + return $self->_emit_str("$value") # Stringified object + if ref($value) and not $type; + return $self->_emit_scalar($value, $tag) + if $type eq 'SCALAR' and $tag; + return $self->_emit_str($value) + if $type eq 'SCALAR'; + return $self->_emit_mapping($value, $tag, $node_id, $context) + if $type eq 'HASH'; + return $self->_emit_sequence($value, $tag) + if $type eq 'ARRAY'; + $self->warn('YAML_DUMP_WARN_BAD_NODE_TYPE', $type); + return $self->_emit_str("$value"); +} + +# A YAML mapping is akin to a Perl hash. +sub _emit_mapping { + my $self = shift; + my ($value, $tag, $node_id, $context) = @_; + $self->{stream} .= " !$tag" if $tag; + + # Sometimes 'keys' fails. Like on a bad tie implementation. + my $empty_hash = not(eval {keys %$value}); + $self->warn('YAML_EMIT_WARN_KEYS', $@) if $@; + return ($self->{stream} .= " {}\n") if $empty_hash; + + # If CompressSeries is on (default) and legal is this context, then + # use it and make the indent level be 2 for this node. + if ($context == FROMARRAY and + $self->compress_series and + not (defined $self->{id_anchor}{$node_id} or $tag or $empty_hash) + ) { + $self->{stream} .= ' '; + $self->offset->[$self->level+1] = $self->offset->[$self->level] + 2; + } + else { + $context = 0; + $self->{stream} .= "\n" + unless $self->headless && not($self->headless(0)); + $self->offset->[$self->level+1] = + $self->offset->[$self->level] + $self->indent_width; + } + + $self->{level}++; + my @keys; + if ($self->sort_keys == 1) { + if (ynode($value)) { + @keys = keys %$value; + } + else { + @keys = sort keys %$value; + } + } + elsif ($self->sort_keys == 2) { + @keys = sort keys %$value; + } + # XXX This is hackish but sometimes handy. Not sure whether to leave it in. + elsif (ref($self->sort_keys) eq 'ARRAY') { + my $i = 1; + my %order = map { ($_, $i++) } @{$self->sort_keys}; + @keys = sort { + (defined $order{$a} and defined $order{$b}) + ? ($order{$a} <=> $order{$b}) + : ($a cmp $b); + } keys %$value; + } + else { + @keys = keys %$value; + } + # Force the YAML::VALUE ('=') key to sort last. + if (exists $value->{&VALUE}) { + for (my $i = 0; $i < @keys; $i++) { + if ($keys[$i] eq &VALUE) { + splice(@keys, $i, 1); + push @keys, &VALUE; + last; + } + } + } + + for my $key (@keys) { + $self->_emit_key($key, $context); + $context = 0; + $self->{stream} .= ':'; + $self->_emit_node($value->{$key}); + } + $self->{level}--; +} + +# A YAML series is akin to a Perl array. +sub _emit_sequence { + my $self = shift; + my ($value, $tag) = @_; + $self->{stream} .= " !$tag" if $tag; + + return ($self->{stream} .= " []\n") if @$value == 0; + + $self->{stream} .= "\n" + unless $self->headless && not($self->headless(0)); + + # XXX Really crufty feature. Better implemented by ynodes. + if ($self->inline_series and + @$value <= $self->inline_series and + not (scalar grep {ref or /\n/} @$value) + ) { + $self->{stream} =~ s/\n\Z/ /; + $self->{stream} .= '['; + for (my $i = 0; $i < @$value; $i++) { + $self->_emit_str($value->[$i], KEY); + last if $i == $#{$value}; + $self->{stream} .= ', '; + } + $self->{stream} .= "]\n"; + return; + } + + $self->offset->[$self->level + 1] = + $self->offset->[$self->level] + $self->indent_width; + $self->{level}++; + for my $val (@$value) { + $self->{stream} .= ' ' x $self->offset->[$self->level]; + $self->{stream} .= '-'; + $self->_emit_node($val, FROMARRAY); + } + $self->{level}--; +} + +# Emit a mapping key +sub _emit_key { + my $self = shift; + my ($value, $context) = @_; + $self->{stream} .= ' ' x $self->offset->[$self->level] + unless $context == FROMARRAY; + $self->_emit_str($value, KEY); +} + +# Emit a blessed SCALAR +sub _emit_scalar { + my $self = shift; + my ($value, $tag) = @_; + $self->{stream} .= " !$tag"; + $self->_emit_str($value, BLESSED); +} + +sub _emit { + my $self = shift; + $self->{stream} .= join '', @_; +} + +# Emit a string value. YAML has many scalar styles. This routine attempts to +# guess the best style for the text. +sub _emit_str { + my $self = shift; + my $type = $_[1] || 0; + + # Use heuristics to find the best scalar emission style. + $self->offset->[$self->level + 1] = + $self->offset->[$self->level] + $self->indent_width; + $self->{level}++; + + my $sf = $type == KEY ? '' : ' '; + my $sb = $type == KEY ? '? ' : ' '; + my $ef = $type == KEY ? '' : "\n"; + my $eb = "\n"; + + while (1) { + $self->_emit($sf), + $self->_emit_plain($_[0]), + $self->_emit($ef), last + if not defined $_[0]; + $self->_emit($sf, '=', $ef), last + if $_[0] eq VALUE; + $self->_emit($sf), + $self->_emit_double($_[0]), + $self->_emit($ef), last + if $_[0] =~ /$ESCAPE_CHAR/; + if ($_[0] =~ /\n/) { + $self->_emit($sb), + $self->_emit_block($LIT_CHAR, $_[0]), + $self->_emit($eb), last + if $self->use_block; + Carp::cluck "[YAML] \$UseFold is no longer supported" + if $self->use_fold; + $self->_emit($sf), + $self->_emit_double($_[0]), + $self->_emit($ef), last + if length $_[0] <= 30; + $self->_emit($sf), + $self->_emit_double($_[0]), + $self->_emit($ef), last + if $_[0] !~ /\n\s*\S/; + $self->_emit($sb), + $self->_emit_block($LIT_CHAR, $_[0]), + $self->_emit($eb), last; + } + $self->_emit($sf), + $self->_emit_number($_[0]), + $self->_emit($ef), last + if $self->is_literal_number($_[0]); + $self->_emit($sf), + $self->_emit_plain($_[0]), + $self->_emit($ef), last + if $self->is_valid_plain($_[0]); + $self->_emit($sf), + $self->_emit_double($_[0]), + $self->_emit($ef), last + if $_[0] =~ /'/; + $self->_emit($sf), + $self->_emit_single($_[0]), + $self->_emit($ef); + last; + } + + $self->{level}--; + + return; +} + +sub is_literal_number { + my $self = shift; + # Stolen from JSON::Tiny + return B::svref_2object(\$_[0])->FLAGS & (B::SVp_IOK | B::SVp_NOK) + && 0 + $_[0] eq $_[0]; +} + +sub _emit_number { + my $self = shift; + return $self->_emit_plain($_[0]); +} + +# Check whether or not a scalar should be emitted as an plain scalar. +sub is_valid_plain { + my $self = shift; + return 0 unless length $_[0]; + return 0 if $self->quote_numeric_strings and Scalar::Util::looks_like_number($_[0]); + # refer to YAML::Loader::parse_inline_simple() + return 0 if $_[0] =~ /^[\s\{\[\~\`\'\"\!\@\#\>\|\%\&\?\*\^]/; + return 0 if $_[0] =~ /[\{\[\]\},]/; + return 0 if $_[0] =~ /[:\-\?]\s/; + return 0 if $_[0] =~ /\s#/; + return 0 if $_[0] =~ /\:(\s|$)/; + return 0 if $_[0] =~ /[\s\|\>]$/; + return 0 if $_[0] eq '-'; + return 0 if $_[0] eq '='; + return 1; +} + +sub _emit_block { + my $self = shift; + my ($indicator, $value) = @_; + $self->{stream} .= $indicator; + $value =~ /(\n*)\Z/; + my $chomp = length $1 ? (length $1 > 1) ? '+' : '' : '-'; + $value = '~' if not defined $value; + $self->{stream} .= $chomp; + $self->{stream} .= $self->indent_width if $value =~ /^\s/; + $self->{stream} .= $self->indent($value); +} + +# Plain means that the scalar is unquoted. +sub _emit_plain { + my $self = shift; + $self->{stream} .= defined $_[0] ? $_[0] : '~'; +} + +# Double quoting is for single lined escaped strings. +sub _emit_double { + my $self = shift; + (my $escaped = $self->escape($_[0])) =~ s/"/\\"/g; + $self->{stream} .= qq{"$escaped"}; +} + +# Single quoting is for single lined unescaped strings. +sub _emit_single { + my $self = shift; + my $item = shift; + $item =~ s{'}{''}g; + $self->{stream} .= "'$item'"; +} + +#============================================================================== +# Utility subroutines. +#============================================================================== + +# Indent a scalar to the current indentation level. +sub indent { + my $self = shift; + my ($text) = @_; + return $text unless length $text; + $text =~ s/\n\Z//; + my $indent = ' ' x $self->offset->[$self->level]; + $text =~ s/^/$indent/gm; + $text = "\n$text"; + return $text; +} + +# Escapes for unprintable characters +my @escapes = qw(\0 \x01 \x02 \x03 \x04 \x05 \x06 \a + \x08 \t \n \v \f \r \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 + \x18 \x19 \x1a \e \x1c \x1d \x1e \x1f + ); + +# Escape the unprintable characters +sub escape { + my $self = shift; + my ($text) = @_; + $text =~ s/\\/\\\\/g; + $text =~ s/([\x00-\x1f])/$escapes[ord($1)]/ge; + return $text; +} + +1; diff --git a/src/main/perl/lib/YAML/Dumper/Base.pm b/src/main/perl/lib/YAML/Dumper/Base.pm new file mode 100644 index 000000000..23db7b1e6 --- /dev/null +++ b/src/main/perl/lib/YAML/Dumper/Base.pm @@ -0,0 +1,111 @@ +package YAML::Dumper::Base; + +use YAML::Mo; + +use YAML::Node; + +# YAML Dumping options +has spec_version => default => sub {'1.0'}; +has indent_width => default => sub {2}; +has use_header => default => sub {1}; +has use_version => default => sub {0}; +has sort_keys => default => sub {1}; +has anchor_prefix => default => sub {''}; +has dump_code => default => sub {0}; +has use_block => default => sub {0}; +has use_fold => default => sub {0}; +has compress_series => default => sub {1}; +has inline_series => default => sub {0}; +has use_aliases => default => sub {1}; +has purity => default => sub {0}; +has stringify => default => sub {0}; +has quote_numeric_strings => default => sub {0}; + +# Properties +has stream => default => sub {''}; +has document => default => sub {0}; +has transferred => default => sub {{}}; +has id_refcnt => default => sub {{}}; +has id_anchor => default => sub {{}}; +has anchor => default => sub {1}; +has level => default => sub {0}; +has offset => default => sub {[]}; +has headless => default => sub {0}; +has blessed_map => default => sub {{}}; + +# Global Options are an idea taken from Data::Dumper. Really they are just +# sugar on top of real OO properties. They make the simple Dump/Load API +# easy to configure. +sub set_global_options { + my $self = shift; + $self->spec_version($YAML::SpecVersion) + if defined $YAML::SpecVersion; + $self->indent_width($YAML::Indent) + if defined $YAML::Indent; + $self->use_header($YAML::UseHeader) + if defined $YAML::UseHeader; + $self->use_version($YAML::UseVersion) + if defined $YAML::UseVersion; + $self->sort_keys($YAML::SortKeys) + if defined $YAML::SortKeys; + $self->anchor_prefix($YAML::AnchorPrefix) + if defined $YAML::AnchorPrefix; + $self->dump_code($YAML::DumpCode || $YAML::UseCode) + if defined $YAML::DumpCode or defined $YAML::UseCode; + $self->use_block($YAML::UseBlock) + if defined $YAML::UseBlock; + $self->use_fold($YAML::UseFold) + if defined $YAML::UseFold; + $self->compress_series($YAML::CompressSeries) + if defined $YAML::CompressSeries; + $self->inline_series($YAML::InlineSeries) + if defined $YAML::InlineSeries; + $self->use_aliases($YAML::UseAliases) + if defined $YAML::UseAliases; + $self->purity($YAML::Purity) + if defined $YAML::Purity; + $self->stringify($YAML::Stringify) + if defined $YAML::Stringify; + $self->quote_numeric_strings($YAML::QuoteNumericStrings) + if defined $YAML::QuoteNumericStrings; +} + +sub dump { + my $self = shift; + $self->die('dump() not implemented in this class.'); +} + +sub blessed { + my $self = shift; + my ($ref) = @_; + $ref = \$_[0] unless ref $ref; + my (undef, undef, $node_id) = YAML::Mo::Object->node_info($ref); + $self->{blessed_map}->{$node_id}; +} + +sub bless { + my $self = shift; + my ($ref, $blessing) = @_; + my $ynode; + $ref = \$_[0] unless ref $ref; + my (undef, undef, $node_id) = YAML::Mo::Object->node_info($ref); + if (not defined $blessing) { + $ynode = YAML::Node->new($ref); + } + elsif (ref $blessing) { + $self->die() unless ynode($blessing); + $ynode = $blessing; + } + else { + no strict 'refs'; + my $transfer = $blessing . "::yaml_dump"; + $self->die() unless defined &{$transfer}; + $ynode = &{$transfer}($ref); + $self->die() unless ynode($ynode); + } + $self->{blessed_map}->{$node_id} = $ynode; + my $object = ynode($ynode) or $self->die(); + return $object; +} + +1; diff --git a/src/main/perl/lib/YAML/Error.pm b/src/main/perl/lib/YAML/Error.pm new file mode 100644 index 000000000..1a258fc87 --- /dev/null +++ b/src/main/perl/lib/YAML/Error.pm @@ -0,0 +1,191 @@ +package YAML::Error; + +use YAML::Mo; + +has 'code'; +has 'type' => default => sub {'Error'}; +has 'line'; +has 'document'; +has 'arguments' => default => sub {[]}; + +my ($error_messages, %line_adjust); + +sub format_message { + my $self = shift; + my $output = 'YAML ' . $self->type . ': '; + my $code = $self->code; + if ($error_messages->{$code}) { + $code = sprintf($error_messages->{$code}, @{$self->arguments}); + } + $output .= $code . "\n"; + + $output .= ' Code: ' . $self->code . "\n" + if defined $self->code; + $output .= ' Line: ' . $self->line . "\n" + if defined $self->line; + $output .= ' Document: ' . $self->document . "\n" + if defined $self->document; + return $output; +} + +sub error_messages { + $error_messages; +} + +%$error_messages = map {s/^\s+//;s/\\n/\n/;$_} split "\n", <<'...'; +YAML_PARSE_ERR_BAD_CHARS + Invalid characters in stream. This parser only supports printable ASCII +YAML_PARSE_ERR_BAD_MAJOR_VERSION + Can't parse a %s document with a 1.0 parser +YAML_PARSE_WARN_BAD_MINOR_VERSION + Parsing a %s document with a 1.0 parser +YAML_PARSE_WARN_MULTIPLE_DIRECTIVES + '%s directive used more than once' +YAML_PARSE_ERR_TEXT_AFTER_INDICATOR + No text allowed after indicator +YAML_PARSE_ERR_NO_ANCHOR + No anchor for alias '*%s' +YAML_PARSE_ERR_NO_SEPARATOR + Expected separator '---' +YAML_PARSE_ERR_SINGLE_LINE + Couldn't parse single line value +YAML_PARSE_ERR_BAD_ANCHOR + Invalid anchor +YAML_DUMP_ERR_INVALID_INDENT + Invalid Indent width specified: '%s' +YAML_LOAD_USAGE + usage: YAML::Load($yaml_stream_scalar) +YAML_PARSE_ERR_BAD_NODE + Can't parse node +YAML_PARSE_ERR_BAD_EXPLICIT + Unsupported explicit transfer: '%s' +YAML_DUMP_USAGE_DUMPCODE + Invalid value for DumpCode: '%s' +YAML_LOAD_ERR_FILE_INPUT + Couldn't open %s for input:\n%s +YAML_DUMP_ERR_FILE_CONCATENATE + Can't concatenate to YAML file %s +YAML_DUMP_ERR_FILE_OUTPUT + Couldn't open %s for output:\n%s +YAML_DUMP_ERR_FILE_OUTPUT_CLOSE + Error closing %s:\n%s +YAML_DUMP_ERR_NO_HEADER + With UseHeader=0, the node must be a plain hash or array +YAML_DUMP_WARN_BAD_NODE_TYPE + Can't perform serialization for node type: '%s' +YAML_EMIT_WARN_KEYS + Encountered a problem with 'keys':\n%s +YAML_DUMP_WARN_DEPARSE_FAILED + Deparse failed for CODE reference +YAML_DUMP_WARN_CODE_DUMMY + Emitting dummy subroutine for CODE reference +YAML_PARSE_ERR_MANY_EXPLICIT + More than one explicit transfer +YAML_PARSE_ERR_MANY_IMPLICIT + More than one implicit request +YAML_PARSE_ERR_MANY_ANCHOR + More than one anchor +YAML_PARSE_ERR_ANCHOR_ALIAS + Can't define both an anchor and an alias +YAML_PARSE_ERR_BAD_ALIAS + Invalid alias +YAML_PARSE_ERR_MANY_ALIAS + More than one alias +YAML_LOAD_ERR_NO_CONVERT + Can't convert implicit '%s' node to explicit '%s' node +YAML_LOAD_ERR_NO_DEFAULT_VALUE + No default value for '%s' explicit transfer +YAML_LOAD_ERR_NON_EMPTY_STRING + Only the empty string can be converted to a '%s' +YAML_LOAD_ERR_BAD_MAP_TO_SEQ + Can't transfer map as sequence. Non numeric key '%s' encountered. +YAML_DUMP_ERR_BAD_GLOB + '%s' is an invalid value for Perl glob +YAML_DUMP_ERR_BAD_REGEXP + '%s' is an invalid value for Perl Regexp +YAML_LOAD_ERR_BAD_MAP_ELEMENT + Invalid element in map +YAML_LOAD_WARN_DUPLICATE_KEY + Duplicate map key '%s' found. Ignoring. +YAML_LOAD_ERR_BAD_SEQ_ELEMENT + Invalid element in sequence +YAML_PARSE_ERR_INLINE_MAP + Can't parse inline map +YAML_PARSE_ERR_INLINE_SEQUENCE + Can't parse inline sequence +YAML_PARSE_ERR_BAD_DOUBLE + Can't parse double quoted string +YAML_PARSE_ERR_BAD_SINGLE + Can't parse single quoted string +YAML_PARSE_ERR_BAD_INLINE_IMPLICIT + Can't parse inline implicit value '%s' +YAML_PARSE_ERR_BAD_IMPLICIT + Unrecognized implicit value '%s' +YAML_PARSE_ERR_INDENTATION + Error. Invalid indentation level +YAML_PARSE_ERR_INCONSISTENT_INDENTATION + Inconsistent indentation level +YAML_LOAD_WARN_UNRESOLVED_ALIAS + Can't resolve alias *%s +YAML_LOAD_WARN_NO_REGEXP_IN_REGEXP + No 'REGEXP' element for Perl regexp +YAML_LOAD_WARN_BAD_REGEXP_ELEM + Unknown element '%s' in Perl regexp +YAML_LOAD_WARN_GLOB_NAME + No 'NAME' element for Perl glob +YAML_LOAD_WARN_PARSE_CODE + Couldn't parse Perl code scalar: %s +YAML_LOAD_WARN_CODE_DEPARSE + Won't parse Perl code unless $YAML::LoadCode is set +YAML_EMIT_ERR_BAD_LEVEL + Internal Error: Bad level detected +YAML_PARSE_WARN_AMBIGUOUS_TAB + Amibiguous tab converted to spaces +YAML_LOAD_WARN_BAD_GLOB_ELEM + Unknown element '%s' in Perl glob +YAML_PARSE_ERR_ZERO_INDENT + Can't use zero as an indentation width +YAML_LOAD_WARN_GLOB_IO + Can't load an IO filehandle. Yet!!! +... + +%line_adjust = map {($_, 1)} + qw(YAML_PARSE_ERR_BAD_MAJOR_VERSION + YAML_PARSE_WARN_BAD_MINOR_VERSION + YAML_PARSE_ERR_TEXT_AFTER_INDICATOR + YAML_PARSE_ERR_NO_ANCHOR + YAML_PARSE_ERR_MANY_EXPLICIT + YAML_PARSE_ERR_MANY_IMPLICIT + YAML_PARSE_ERR_MANY_ANCHOR + YAML_PARSE_ERR_ANCHOR_ALIAS + YAML_PARSE_ERR_BAD_ALIAS + YAML_PARSE_ERR_MANY_ALIAS + YAML_LOAD_ERR_NO_CONVERT + YAML_LOAD_ERR_NO_DEFAULT_VALUE + YAML_LOAD_ERR_NON_EMPTY_STRING + YAML_LOAD_ERR_BAD_MAP_TO_SEQ + YAML_LOAD_ERR_BAD_STR_TO_INT + YAML_LOAD_ERR_BAD_STR_TO_DATE + YAML_LOAD_ERR_BAD_STR_TO_TIME + YAML_LOAD_WARN_DUPLICATE_KEY + YAML_PARSE_ERR_INLINE_MAP + YAML_PARSE_ERR_INLINE_SEQUENCE + YAML_PARSE_ERR_BAD_DOUBLE + YAML_PARSE_ERR_BAD_SINGLE + YAML_PARSE_ERR_BAD_INLINE_IMPLICIT + YAML_PARSE_ERR_BAD_IMPLICIT + YAML_LOAD_WARN_NO_REGEXP_IN_REGEXP + YAML_LOAD_WARN_BAD_REGEXP_ELEM + YAML_LOAD_WARN_REGEXP_CREATE + YAML_LOAD_WARN_GLOB_NAME + YAML_LOAD_WARN_PARSE_CODE + YAML_LOAD_WARN_CODE_DEPARSE + YAML_LOAD_WARN_BAD_GLOB_ELEM + YAML_PARSE_ERR_ZERO_INDENT + ); + +package YAML::Warning; + +our @ISA = 'YAML::Error'; + +1; diff --git a/src/main/perl/lib/YAML/Loader.pm b/src/main/perl/lib/YAML/Loader.pm new file mode 100644 index 000000000..b3c9122f2 --- /dev/null +++ b/src/main/perl/lib/YAML/Loader.pm @@ -0,0 +1,839 @@ +package YAML::Loader; + +use YAML::Mo; +extends 'YAML::Loader::Base'; + +use YAML::Loader::Base; +use YAML::Types; +use YAML::Node; + +# Context constants +use constant LEAF => 1; +use constant COLLECTION => 2; +use constant VALUE => "\x07YAML\x07VALUE\x07"; +use constant COMMENT => "\x07YAML\x07COMMENT\x07"; + +# Common YAML character sets +my $ESCAPE_CHAR = '[\\x00-\\x08\\x0b-\\x0d\\x0e-\\x1f]'; +my $FOLD_CHAR = '>'; +my $LIT_CHAR = '|'; +my $LIT_CHAR_RX = "\\$LIT_CHAR"; + +sub load { + my $self = shift; + $self->stream($_[0] || ''); + return $self->_parse(); +} + +# Top level function for parsing. Parse each document in order and +# handle processing for YAML headers. +sub _parse { + my $self = shift; + my (%directives, $preface); + $self->{stream} =~ s|\015\012|\012|g; + $self->{stream} =~ s|\015|\012|g; + $self->line(0); + $self->die('YAML_PARSE_ERR_BAD_CHARS') + if $self->stream =~ /$ESCAPE_CHAR/; + $self->{stream} =~ s/(.)\n\Z/$1/s; + $self->lines([split /\x0a/, $self->stream, -1]); + $self->line(1); + # Throw away any comments or blanks before the header (or start of + # content for headerless streams) + $self->_parse_throwaway_comments(); + $self->document(0); + $self->documents([]); + $self->zero_indent([]); + # Add an "assumed" header if there is no header and the stream is + # not empty (after initial throwaways). + if (not $self->eos) { + if ($self->lines->[0] !~ /^---(\s|$)/) { + unshift @{$self->lines}, '---'; + $self->{line}--; + } + } + + # Main Loop. Parse out all the top level nodes and return them. + while (not $self->eos) { + $self->anchor2node({}); + $self->{document}++; + $self->done(0); + $self->level(0); + $self->offset->[0] = -1; + + if ($self->lines->[0] =~ /^---\s*(.*)$/) { + my @words = split /\s/, $1; + %directives = (); + while (@words) { + if ($words[0] =~ /^#(\w+):(\S.*)$/) { + my ($key, $value) = ($1, $2); + shift(@words); + if (defined $directives{$key}) { + $self->warn('YAML_PARSE_WARN_MULTIPLE_DIRECTIVES', + $key, $self->document); + next; + } + $directives{$key} = $value; + } + elsif ($words[0] eq '') { + shift @words; + } + else { + last; + } + } + $self->preface(join ' ', @words); + } + else { + $self->die('YAML_PARSE_ERR_NO_SEPARATOR'); + } + + if (not $self->done) { + $self->_parse_next_line(COLLECTION); + } + if ($self->done) { + $self->{indent} = -1; + $self->content(''); + } + + $directives{YAML} ||= '1.0'; + $directives{TAB} ||= 'NONE'; + ($self->{major_version}, $self->{minor_version}) = + split /\./, $directives{YAML}, 2; + $self->die('YAML_PARSE_ERR_BAD_MAJOR_VERSION', $directives{YAML}) + if $self->major_version ne '1'; + $self->warn('YAML_PARSE_WARN_BAD_MINOR_VERSION', $directives{YAML}) + if $self->minor_version ne '0'; + $self->die('Unrecognized TAB policy') + unless $directives{TAB} =~ /^(NONE|\d+)(:HARD)?$/; + + push @{$self->documents}, $self->_parse_node(); + } + return wantarray ? @{$self->documents} : $self->documents->[-1]; +} + +# This function is the dispatcher for parsing each node. Every node +# recurses back through here. (Inlines are an exception as they have +# their own sub-parser.) +sub _parse_node { + my $self = shift; + my $preface = $self->preface; + $self->preface(''); + my ($node, $type, $indicator, $chomp, $parsed_inline) = ('') x 5; + my ($anchor, $alias, $explicit, $implicit) = ('') x 4; + ($anchor, $alias, $explicit, $implicit, $preface) = + $self->_parse_qualifiers($preface); + if ($anchor) { + $self->anchor2node->{$anchor} = CORE::bless [], 'YAML-anchor2node'; + } + $self->inline(''); + while (length $preface) { + if ($preface =~ s/^($FOLD_CHAR|$LIT_CHAR_RX)//) { + $indicator = $1; + if ($preface =~ s/^([+-])[0-9]*//) { + $chomp = $1; + } + elsif ($preface =~ s/^[0-9]+([+-]?)//) { + $chomp = $1; + } + if ($preface =~ s/^(?:\s+#.*$|\s*)$//) { + } + else { + $self->die('YAML_PARSE_ERR_TEXT_AFTER_INDICATOR'); + } + } + else { + $self->inline($preface); + $preface = ''; + } + } + if ($alias) { + $self->die('YAML_PARSE_ERR_NO_ANCHOR', $alias) + unless defined $self->anchor2node->{$alias}; + if (ref($self->anchor2node->{$alias}) ne 'YAML-anchor2node') { + $node = $self->anchor2node->{$alias}; + } + else { + $node = do {my $sv = "*$alias"}; + push @{$self->anchor2node->{$alias}}, [\$node, $self->line]; + } + } + elsif (length $self->inline) { + $node = $self->_parse_inline(1, $implicit, $explicit); + $parsed_inline = 1; + if (length $self->inline) { + $self->die('YAML_PARSE_ERR_SINGLE_LINE'); + } + } + elsif ($indicator eq $LIT_CHAR) { + $self->{level}++; + $node = $self->_parse_block($chomp); + $node = $self->_parse_implicit($node) if $implicit; + $self->{level}--; + } + elsif ($indicator eq $FOLD_CHAR) { + $self->{level}++; + $node = $self->_parse_unfold($chomp); + $node = $self->_parse_implicit($node) if $implicit; + $self->{level}--; + } + else { + $self->{level}++; + $self->offset->[$self->level] ||= 0; + if ($self->indent == $self->offset->[$self->level]) { + if ($self->content =~ /^-( |$)/) { + $node = $self->_parse_seq($anchor); + } + elsif ($self->content =~ /(^\?|\:( |$))/) { + $node = $self->_parse_mapping($anchor); + } + elsif ($preface =~ /^\s*$/) { + $node = $self->_parse_implicit(''); + } + else { + $self->die('YAML_PARSE_ERR_BAD_NODE'); + } + } + else { + $node = undef; + } + $self->{level}--; + } + $#{$self->offset} = $self->level; + + if ($explicit) { + $node = $self->_parse_explicit($node, $explicit) if !$parsed_inline; + } + if ($anchor) { + if (ref($self->anchor2node->{$anchor}) eq 'YAML-anchor2node') { + # XXX Can't remember what this code actually does + for my $ref (@{$self->anchor2node->{$anchor}}) { + ${$ref->[0]} = $node; + $self->warn('YAML_LOAD_WARN_UNRESOLVED_ALIAS', + $anchor, $ref->[1]); + } + } + $self->anchor2node->{$anchor} = $node; + } + return $node; +} + +# Preprocess the qualifiers that may be attached to any node. +sub _parse_qualifiers { + my $self = shift; + my ($preface) = @_; + my ($anchor, $alias, $explicit, $implicit, $token) = ('') x 5; + $self->inline(''); + while ($preface =~ /^[&*!]/) { + if ($preface =~ s/^\!(\S+)\s*//) { + $self->die('YAML_PARSE_ERR_MANY_EXPLICIT') if $explicit; + $explicit = $1; + } + elsif ($preface =~ s/^\!\s*//) { + $self->die('YAML_PARSE_ERR_MANY_IMPLICIT') if $implicit; + $implicit = 1; + } + elsif ($preface =~ s/^\&([^ ,:]*)\s*//) { + $token = $1; + $self->die('YAML_PARSE_ERR_BAD_ANCHOR') + unless $token =~ /^[a-zA-Z0-9_.\/-]+$/; + $self->die('YAML_PARSE_ERR_MANY_ANCHOR') if $anchor; + $self->die('YAML_PARSE_ERR_ANCHOR_ALIAS') if $alias; + $anchor = $token; + } + elsif ($preface =~ s/^\*([^ ,:]*)\s*//) { + $token = $1; + $self->die('YAML_PARSE_ERR_BAD_ALIAS') + unless $token =~ /^[a-zA-Z0-9_.\/-]+$/; + $self->die('YAML_PARSE_ERR_MANY_ALIAS') if $alias; + $self->die('YAML_PARSE_ERR_ANCHOR_ALIAS') if $anchor; + $alias = $token; + } + } + return ($anchor, $alias, $explicit, $implicit, $preface); +} + +# Morph a node to it's explicit type +sub _parse_explicit { + my $self = shift; + my ($node, $explicit) = @_; + my ($type, $class); + if ($explicit =~ /^\!?perl\/(hash|array|ref|scalar)(?:\:(\w(\w|\:\:)*)?)?$/) { + ($type, $class) = (($1 || ''), ($2 || '')); + + # FIXME # die unless uc($type) eq ref($node) ? + + if ( $type eq "ref" ) { + $self->die('YAML_LOAD_ERR_NO_DEFAULT_VALUE', 'XXX', $explicit) + unless exists $node->{VALUE()} and scalar(keys %$node) == 1; + + my $value = $node->{VALUE()}; + $node = \$value; + } + + if ( $type eq "scalar" and length($class) and !ref($node) ) { + my $value = $node; + $node = \$value; + } + + if ( length($class) and $YAML::LoadBlessed ) { + CORE::bless($node, $class); + } + + return $node; + } + if ($explicit =~ m{^!?perl/(glob|regexp|code)(?:\:(\w(\w|\:\:)*)?)?$}) { + ($type, $class) = (($1 || ''), ($2 || '')); + my $type_class = "YAML::Type::$type"; + no strict 'refs'; + if ($type_class->can('yaml_load')) { + return $type_class->yaml_load($node, $class, $self); + } + else { + $self->die('YAML_LOAD_ERR_NO_CONVERT', 'XXX', $explicit); + } + } + # This !perl/@Foo and !perl/$Foo are deprecated but still parsed + elsif ($YAML::TagClass->{$explicit} || + $explicit =~ m{^perl/(\@|\$)?([a-zA-Z](\w|::)+)$} + ) { + $class = $YAML::TagClass->{$explicit} || $2; + if ($class->can('yaml_load')) { + require YAML::Node; + return $class->yaml_load(YAML::Node->new($node, $explicit)); + } + elsif ($YAML::LoadBlessed) { + if (ref $node) { + return CORE::bless $node, $class; + } + else { + return CORE::bless \$node, $class; + } + } + else { + return $node; + } + } + elsif (ref $node) { + require YAML::Node; + return YAML::Node->new($node, $explicit); + } + else { + # XXX This is likely wrong. Failing test: + # --- !unknown 'scalar value' + return $node; + } +} + +# Parse a YAML mapping into a Perl hash +sub _parse_mapping { + my $self = shift; + my ($anchor) = @_; + my $mapping = $self->preserve ? YAML::Node->new({}) : {}; + $self->anchor2node->{$anchor} = $mapping; + my $key; + while (not $self->done and $self->indent == $self->offset->[$self->level]) { + # If structured key: + if ($self->{content} =~ s/^\?\s*//) { + $self->preface($self->content); + $self->_parse_next_line(COLLECTION); + $key = $self->_parse_node(); + $key = "$key"; + } + # If "default" key (equals sign) + elsif ($self->{content} =~ s/^\=\s*(?=:)//) { + $key = VALUE; + } + # If "comment" key (slash slash) + elsif ($self->{content} =~ s/^\=\s*(?=:)//) { + $key = COMMENT; + } + # Regular scalar key: + else { + $self->inline($self->content); + $key = $self->_parse_inline(); + $key = "$key"; + $self->content($self->inline); + $self->inline(''); + } + + unless ($self->{content} =~ s/^:(?:\s+#.*$|\s*)//) { + $self->die('YAML_LOAD_ERR_BAD_MAP_ELEMENT'); + } + $self->preface($self->content); + my $level = $self->level; + + # we can get a zero indented sequence, possibly + my $zero_indent = $self->zero_indent; + $zero_indent->[ $level ] = 0; + $self->_parse_next_line(COLLECTION); + my $value = $self->_parse_node(); + $#$zero_indent = $level; + + if (exists $mapping->{$key}) { + $self->warn('YAML_LOAD_WARN_DUPLICATE_KEY', $key); + } + else { + $mapping->{$key} = $value; + } + } + return $mapping; +} + +# Parse a YAML sequence into a Perl array +sub _parse_seq { + my $self = shift; + my ($anchor) = @_; + my $seq = []; + $self->anchor2node->{$anchor} = $seq; + while (not $self->done and $self->indent == $self->offset->[$self->level]) { + if ($self->content =~ /^-(?: (.*))?$/) { + $self->preface(defined($1) ? $1 : ''); + } + else { + if ($self->zero_indent->[ $self->level ]) { + last; + } + $self->die('YAML_LOAD_ERR_BAD_SEQ_ELEMENT'); + } + + # Check whether the preface looks like a YAML mapping ("key: value"). + # This is complicated because it has to account for the possibility + # that a key is a quoted string, which itself may contain escaped + # quotes. + my $preface = $self->preface; + if ($preface =~ m/^ (\s*) ( - (?: \ .* | $ ) ) /x) { + $self->indent($self->offset->[$self->level] + 2 + length($1)); + $self->content($2); + $self->level($self->level + 1); + $self->offset->[$self->level] = $self->indent; + $self->preface(''); + push @$seq, $self->_parse_seq(''); + $self->{level}--; + $#{$self->offset} = $self->level; + } + elsif ( + $preface =~ /^ (\s*) ((') (?:''|[^'])*? ' \s* \: (?:\ |$).*) $/x or + $preface =~ /^ (\s*) ((") (?:\\\\|[^"])*? " \s* \: (?:\ |$).*) $/x or + $preface =~ /^ (\s*) (\?.*$)/x or + $preface =~ /^ (\s*) ([^'"\s:#&!\[\]\{\},*|>].*\:(\ .*|$))/x + ) { + $self->indent($self->offset->[$self->level] + 2 + length($1)); + $self->content($2); + $self->level($self->level + 1); + $self->offset->[$self->level] = $self->indent; + $self->preface(''); + push @$seq, $self->_parse_mapping(''); + $self->{level}--; + $#{$self->offset} = $self->level; + } + else { + $self->_parse_next_line(COLLECTION); + push @$seq, $self->_parse_node(); + } + } + return $seq; +} + +# Parse an inline value. Since YAML supports inline collections, this is +# the top level of a sub parsing. +sub _parse_inline { + my $self = shift; + my ($top, $top_implicit, $top_explicit) = (@_, '', '', ''); + $self->{inline} =~ s/^\s*(.*)\s*$/$1/; # OUCH - mugwump + my ($node, $anchor, $alias, $explicit, $implicit) = ('') x 5; + ($anchor, $alias, $explicit, $implicit, $self->{inline}) = + $self->_parse_qualifiers($self->inline); + if ($anchor) { + $self->anchor2node->{$anchor} = CORE::bless [], 'YAML-anchor2node'; + } + $implicit ||= $top_implicit; + $explicit ||= $top_explicit; + ($top_implicit, $top_explicit) = ('', ''); + if ($alias) { + $self->die('YAML_PARSE_ERR_NO_ANCHOR', $alias) + unless defined $self->anchor2node->{$alias}; + if (ref($self->anchor2node->{$alias}) ne 'YAML-anchor2node') { + $node = $self->anchor2node->{$alias}; + } + else { + $node = do {my $sv = "*$alias"}; + push @{$self->anchor2node->{$alias}}, [\$node, $self->line]; + } + } + elsif ($self->inline =~ /^\{/) { + $node = $self->_parse_inline_mapping($anchor); + } + elsif ($self->inline =~ /^\[/) { + $node = $self->_parse_inline_seq($anchor); + } + elsif ($self->inline =~ /^"/) { + $node = $self->_parse_inline_double_quoted(); + $node = $self->_unescape($node); + $node = $self->_parse_implicit($node) if $implicit; + } + elsif ($self->inline =~ /^'/) { + $node = $self->_parse_inline_single_quoted(); + $node = $self->_parse_implicit($node) if $implicit; + } + else { + if ($top) { + $node = $self->inline; + $self->inline(''); + } + else { + $node = $self->_parse_inline_simple(); + } + $node = $self->_parse_implicit($node) unless $explicit; + + if ($self->numify and defined $node and not ref $node and length $node + and $node =~ m/\A-?(?:0|[1-9][0-9]*)?(?:\.[0-9]*)?(?:[eE][+-]?[0-9]+)?\z/) { + $node += 0; + } + } + if ($explicit) { + $node = $self->_parse_explicit($node, $explicit); + } + if ($anchor) { + if (ref($self->anchor2node->{$anchor}) eq 'YAML-anchor2node') { + for my $ref (@{$self->anchor2node->{$anchor}}) { + ${$ref->[0]} = $node; + $self->warn('YAML_LOAD_WARN_UNRESOLVED_ALIAS', + $anchor, $ref->[1]); + } + } + $self->anchor2node->{$anchor} = $node; + } + return $node; +} + +# Parse the inline YAML mapping into a Perl hash +sub _parse_inline_mapping { + my $self = shift; + my ($anchor) = @_; + my $node = {}; + $self->anchor2node->{$anchor} = $node; + + $self->die('YAML_PARSE_ERR_INLINE_MAP') + unless $self->{inline} =~ s/^\{\s*//; + while (not $self->{inline} =~ s/^\s*\}(\s+#.*$|\s*)//) { + my $key = $self->_parse_inline(); + $self->die('YAML_PARSE_ERR_INLINE_MAP') + unless $self->{inline} =~ s/^\: \s*//; + my $value = $self->_parse_inline(); + if (exists $node->{$key}) { + $self->warn('YAML_LOAD_WARN_DUPLICATE_KEY', $key); + } + else { + $node->{$key} = $value; + } + next if $self->inline =~ /^\s*\}/; + $self->die('YAML_PARSE_ERR_INLINE_MAP') + unless $self->{inline} =~ s/^\,\s*//; + } + return $node; +} + +# Parse the inline YAML sequence into a Perl array +sub _parse_inline_seq { + my $self = shift; + my ($anchor) = @_; + my $node = []; + $self->anchor2node->{$anchor} = $node; + + $self->die('YAML_PARSE_ERR_INLINE_SEQUENCE') + unless $self->{inline} =~ s/^\[\s*//; + while (not $self->{inline} =~ s/^\s*\](\s+#.*$|\s*)//) { + my $value = $self->_parse_inline(); + push @$node, $value; + next if $self->inline =~ /^\s*\]/; + $self->die('YAML_PARSE_ERR_INLINE_SEQUENCE') + unless $self->{inline} =~ s/^\,\s*//; + } + return $node; +} + +# Parse the inline double quoted string. +sub _parse_inline_double_quoted { + my $self = shift; + my $inline = $self->inline; + if ($inline =~ s/^"//) { + my $node = ''; + + while ($inline =~ s/^(\\.|[^"\\]+)//) { + my $capture = $1; + $capture =~ s/^\\"/"/; + $node .= $capture; + last unless length $inline; + } + if ($inline =~ s/^"(?:\s+#.*|\s*)//) { + $self->inline($inline); + return $node; + } + } + $self->die('YAML_PARSE_ERR_BAD_DOUBLE'); +} + + +# Parse the inline single quoted string. +sub _parse_inline_single_quoted { + my $self = shift; + my $inline = $self->inline; + if ($inline =~ s/^'//) { + my $node = ''; + while ($inline =~ s/^(''|[^']+)//) { + my $capture = $1; + $capture =~ s/^''/'/; + $node .= $capture; + last unless length $inline; + } + if ($inline =~ s/^'(?:\s+#.*|\s*)//) { + $self->inline($inline); + return $node; + } + } + $self->die('YAML_PARSE_ERR_BAD_SINGLE'); +} + +# Parse the inline unquoted string and do implicit typing. +sub _parse_inline_simple { + my $self = shift; + my $value; + if ($self->inline =~ /^(|[^!@#%^&*].*?)(?=[\[\]\{\},]|, |: |- |:\s*$|$)/) { + $value = $1; + substr($self->{inline}, 0, length($1)) = ''; + } + else { + $self->die('YAML_PARSE_ERR_BAD_INLINE_IMPLICIT', $value); + } + return $value; +} + +sub _parse_implicit { + my $self = shift; + my ($value) = @_; + # remove trailing comments and whitespace + $value =~ s/^#.*$//; + $value =~ s/\s+#.*$//; + $value =~ s/\s*$//; + return $value if $value eq ''; + return undef if $value =~ /^~$/; + return $value + unless $value =~ /^[\@\`]/ or + $value =~ /^[\-\?]\s/; + $self->die('YAML_PARSE_ERR_BAD_IMPLICIT', $value); +} + +# Unfold a YAML multiline scalar into a single string. +sub _parse_unfold { + my $self = shift; + my ($chomp) = @_; + my $node = ''; + my $space = 0; + while (not $self->done and $self->indent == $self->offset->[$self->level]) { + $node .= $self->content. "\n"; + $self->_parse_next_line(LEAF); + } + $node =~ s/^(\S.*)\n(?=\S)/$1 /gm; + $node =~ s/^(\S.*)\n(\n+\S)/$1$2/gm; + $node =~ s/\n*\Z// unless $chomp eq '+'; + $node .= "\n" unless $chomp; + return $node; +} + +# Parse a YAML block style scalar. This is like a Perl here-document. +sub _parse_block { + my $self = shift; + my ($chomp) = @_; + my $node = ''; + while (not $self->done and $self->indent == $self->offset->[$self->level]) { + $node .= $self->content . "\n"; + $self->_parse_next_line(LEAF); + } + return $node if '+' eq $chomp; + $node =~ s/\n*\Z/\n/; + $node =~ s/\n\Z// if $chomp eq '-'; + return $node; +} + +# Handle Perl style '#' comments. Comments must be at the same indentation +# level as the collection line following them. +sub _parse_throwaway_comments { + my $self = shift; + while (@{$self->lines} and + $self->lines->[0] =~ m{^\s*(\#|$)} + ) { + shift @{$self->lines}; + $self->{line}++; + } + $self->eos($self->{done} = not @{$self->lines}); +} + +# This is the routine that controls what line is being parsed. It gets called +# once for each line in the YAML stream. +# +# This routine must: +# 1) Skip past the current line +# 2) Determine the indentation offset for a new level +# 3) Find the next _content_ line +# A) Skip over any throwaways (Comments/blanks) +# B) Set $self->indent, $self->content, $self->line +# 4) Expand tabs appropriately +sub _parse_next_line { + my $self = shift; + my ($type) = @_; + my $level = $self->level; + my $offset = $self->offset->[$level]; + $self->die('YAML_EMIT_ERR_BAD_LEVEL') unless defined $offset; + shift @{$self->lines}; + $self->eos($self->{done} = not @{$self->lines}); + if ($self->eos) { + $self->offset->[$level + 1] = $offset + 1; + return; + } + $self->{line}++; + + # Determine the offset for a new leaf node + # TODO + if ($self->preface =~ + qr/(?:^|\s)(?:$FOLD_CHAR|$LIT_CHAR_RX)(?:[+-]([0-9]*)|([0-9]*)[+-]?)(?:\s+#.*|\s*)$/ + ) { + my $explicit_indent = defined $1 ? $1 : defined $2 ? $2 : ''; + $self->die('YAML_PARSE_ERR_ZERO_INDENT') + if length($explicit_indent) and $explicit_indent == 0; + $type = LEAF; + if (length($explicit_indent)) { + $self->offset->[$level + 1] = $offset + $explicit_indent; + } + else { + # First get rid of any comments. + while (@{$self->lines} && ($self->lines->[0] =~ /^\s*#/)) { + $self->lines->[0] =~ /^( *)/; + last unless length($1) <= $offset; + shift @{$self->lines}; + $self->{line}++; + } + $self->eos($self->{done} = not @{$self->lines}); + return if $self->eos; + if ($self->lines->[0] =~ /^( *)\S/ and length($1) > $offset) { + $self->offset->[$level+1] = length($1); + } + else { + $self->offset->[$level+1] = $offset + 1; + } + } + $offset = $self->offset->[++$level]; + } + # Determine the offset for a new collection level + elsif ($type == COLLECTION and + $self->preface =~ /^(\s*(\!\S*|\&\S+))*\s*$/) { + $self->_parse_throwaway_comments(); + my $zero_indent = $self->zero_indent; + if ($self->eos) { + $self->offset->[$level+1] = $offset + 1; + return; + } + elsif ( + defined $zero_indent->[ $level ] + and not $zero_indent->[ $level ] + and $self->lines->[0] =~ /^( {$offset,})-(?: |$)/ + ) { + my $new_offset = length($1); + $self->offset->[$level+1] = $new_offset; + if ($new_offset == $offset) { + $zero_indent->[ $level+1 ] = 1; + } + } + else { + $self->lines->[0] =~ /^( *)\S/ or + $self->die('YAML_PARSE_ERR_NONSPACE_INDENTATION'); + if (length($1) > $offset) { + $self->offset->[$level+1] = length($1); + } + else { + $self->offset->[$level+1] = $offset + 1; + } + } + $offset = $self->offset->[++$level]; + } + + if ($type == LEAF) { + if (@{$self->lines} and + $self->lines->[0] =~ m{^( *)(\#)} and + length($1) < $offset + ) { + if ( length($1) < $offset) { + shift @{$self->lines}; + $self->{line}++; + # every comment after that is also thrown away regardless + # of identation + while (@{$self->lines} and + $self->lines->[0] =~ m{^( *)(\#)} + ) { + shift @{$self->lines}; + $self->{line}++; + } + } + } + $self->eos($self->{done} = not @{$self->lines}); + } + else { + $self->_parse_throwaway_comments(); + } + return if $self->eos; + + if ($self->lines->[0] =~ /^---(\s|$)/) { + $self->done(1); + return; + } + if ($type == LEAF and + $self->lines->[0] =~ /^ {$offset}(.*)$/ + ) { + $self->indent($offset); + $self->content($1); + } + elsif ($self->lines->[0] =~ /^\s*$/) { + $self->indent($offset); + $self->content(''); + } + else { + $self->lines->[0] =~ /^( *)(\S.*)$/; + while ($self->offset->[$level] > length($1)) { + $level--; + } + $self->die('YAML_PARSE_ERR_INCONSISTENT_INDENTATION') + if $self->offset->[$level] != length($1); + $self->indent(length($1)); + $self->content($2); + } + $self->die('YAML_PARSE_ERR_INDENTATION') + if $self->indent - $offset > 1; +} + +#============================================================================== +# Utility subroutines. +#============================================================================== + +# Printable characters for escapes +my %unescapes = ( + 0 => "\x00", + a => "\x07", + t => "\x09", + n => "\x0a", + 'v' => "\x0b", # Potential v-string error on 5.6.2 if not quoted + f => "\x0c", + r => "\x0d", + e => "\x1b", + '\\' => '\\', + ); + +# Transform all the backslash style escape characters to their literal meaning +sub _unescape { + my $self = shift; + my ($node) = @_; + $node =~ s/\\([never\\fart0]|x([0-9a-fA-F]{2}))/ + (length($1)>1)?pack("H2",$2):$unescapes{$1}/gex; + return $node; +} + +1; diff --git a/src/main/perl/lib/YAML/Loader/Base.pm b/src/main/perl/lib/YAML/Loader/Base.pm new file mode 100644 index 000000000..aa1b8e987 --- /dev/null +++ b/src/main/perl/lib/YAML/Loader/Base.pm @@ -0,0 +1,38 @@ +package YAML::Loader::Base; + +use YAML::Mo; + +has load_code => default => sub {0}; +has preserve => default => sub {0}; +has stream => default => sub {''}; +has document => default => sub {0}; +has line => default => sub {0}; +has documents => default => sub {[]}; +has lines => default => sub {[]}; +has eos => default => sub {0}; +has done => default => sub {0}; +has anchor2node => default => sub {{}}; +has level => default => sub {0}; +has offset => default => sub {[]}; +has preface => default => sub {''}; +has content => default => sub {''}; +has indent => default => sub {0}; +has major_version => default => sub {0}; +has minor_version => default => sub {0}; +has inline => default => sub {''}; +has numify => default => sub {0}; +has zero_indent => default => sub {[]}; + +sub set_global_options { + my $self = shift; + $self->load_code($YAML::LoadCode || $YAML::UseCode) + if defined $YAML::LoadCode or defined $YAML::UseCode; + $self->preserve($YAML::Preserve) if defined $YAML::Preserve; + $self->numify($YAML::Numify) if defined $YAML::Numify; +} + +sub load { + die 'load() not implemented in this class.'; +} + +1; diff --git a/src/main/perl/lib/YAML/Marshall.pm b/src/main/perl/lib/YAML/Marshall.pm new file mode 100644 index 000000000..14d378bed --- /dev/null +++ b/src/main/perl/lib/YAML/Marshall.pm @@ -0,0 +1,47 @@ +use strict; use warnings; +package YAML::Marshall; + +use YAML::Node (); + +sub import { + my $class = shift; + no strict 'refs'; + my $package = caller; + unless (grep { $_ eq $class} @{$package . '::ISA'}) { + push @{$package . '::ISA'}, $class; + } + + my $tag = shift; + if ( $tag ) { + no warnings 'once'; + $YAML::TagClass->{$tag} = $package; + ${$package . "::YamlTag"} = $tag; + } +} + +sub yaml_dump { + my $self = shift; + no strict 'refs'; + my $tag = ${ref($self) . "::YamlTag"} || 'perl/' . ref($self); + $self->yaml_node($self, $tag); +} + +sub yaml_load { + my ($class, $node) = @_; + if (my $ynode = $class->yaml_ynode($node)) { + $node = $ynode->{NODE}; + } + bless $node, $class; +} + +sub yaml_node { + shift; + YAML::Node->new(@_); +} + +sub yaml_ynode { + shift; + YAML::Node::ynode(@_); +} + +1; diff --git a/src/main/perl/lib/YAML/Mo.pm b/src/main/perl/lib/YAML/Mo.pm new file mode 100644 index 000000000..ebc354333 --- /dev/null +++ b/src/main/perl/lib/YAML/Mo.pm @@ -0,0 +1,80 @@ +package YAML::Mo; +# use Mo qw[builder default import]; +# The following line of code was produced from the previous line by +# Mo::Inline version 0.4 +no warnings;my$M=__PACKAGE__.'::';*{$M.Object::new}=sub{my$c=shift;my$s=bless{@_},$c;my%n=%{$c.'::'.':E'};map{$s->{$_}=$n{$_}->()if!exists$s->{$_}}keys%n;$s};*{$M.import}=sub{import warnings;$^H|=1538;my($P,%e,%o)=caller.'::';shift;eval"no Mo::$_",&{$M.$_.::e}($P,\%e,\%o,\@_)for@_;return if$e{M};%e=(extends,sub{eval"no $_[0]()";@{$P.ISA}=$_[0]},has,sub{my$n=shift;my$m=sub{$#_?$_[0]{$n}=$_[1]:$_[0]{$n}};@_=(default,@_)if!($#_%2);$m=$o{$_}->($m,$n,@_)for sort keys%o;*{$P.$n}=$m},%e,);*{$P.$_}=$e{$_}for keys%e;@{$P.ISA}=$M.Object};*{$M.'builder::e'}=sub{my($P,$e,$o)=@_;$o->{builder}=sub{my($m,$n,%a)=@_;my$b=$a{builder}or return$m;my$i=exists$a{lazy}?$a{lazy}:!${$P.':N'};$i or ${$P.':E'}{$n}=\&{$P.$b}and return$m;sub{$#_?$m->(@_):!exists$_[0]{$n}?$_[0]{$n}=$_[0]->$b:$m->(@_)}}};*{$M.'default::e'}=sub{my($P,$e,$o)=@_;$o->{default}=sub{my($m,$n,%a)=@_;exists$a{default}or return$m;my($d,$r)=$a{default};my$g='HASH'eq($r=ref$d)?sub{+{%$d}}:'ARRAY'eq$r?sub{[@$d]}:'CODE'eq$r?$d:sub{$d};my$i=exists$a{lazy}?$a{lazy}:!${$P.':N'};$i or ${$P.':E'}{$n}=$g and return$m;sub{$#_?$m->(@_):!exists$_[0]{$n}?$_[0]{$n}=$g->(@_):$m->(@_)}}};my$i=\&import;*{$M.import}=sub{(@_==2 and not$_[1])?pop@_:@_==1?push@_,grep!/import/,@f:();goto&$i};@f=qw[builder default import];use strict;use warnings; + +our $DumperModule = 'Data::Dumper'; + +my ($_new_error, $_info, $_scalar_info); + +no strict 'refs'; +*{$M.'Object::die'} = sub { + my $self = shift; + my $error = $self->$_new_error(@_); + $error->type('Error'); + Carp::croak($error->format_message); +}; + +*{$M.'Object::warn'} = sub { + my $self = shift; + return unless $^W; + my $error = $self->$_new_error(@_); + $error->type('Warning'); + Carp::cluck($error->format_message); +}; + +# This code needs to be refactored to be simpler and more precise, and no, +# Scalar::Util doesn't DWIM. +# +# Can't handle: +# * blessed regexp +*{$M.'Object::node_info'} = sub { + my $self = shift; + my $stringify = $_[1] || 0; + my ($class, $type, $id) = + ref($_[0]) + ? $stringify + ? &$_info("$_[0]") + : do { + require overload; + my @info = &$_info(overload::StrVal($_[0])); + if (ref($_[0]) eq 'Regexp') { + @info[0, 1] = (undef, 'REGEXP'); + } + @info; + } + : &$_scalar_info($_[0]); + ($class, $type, $id) = &$_scalar_info("$_[0]") + unless $id; + return wantarray ? ($class, $type, $id) : $id; +}; + +#------------------------------------------------------------------------------- +$_info = sub { + return (($_[0]) =~ qr{^(?:(.*)\=)?([^=]*)\(([^\(]*)\)$}o); +}; + +$_scalar_info = sub { + my $id = 'undef'; + if (defined $_[0]) { + \$_[0] =~ /\((\w+)\)$/o or CORE::die(); + $id = "$1-S"; + } + return (undef, undef, $id); +}; + +$_new_error = sub { + require Carp; + my $self = shift; + require YAML::Error; + + my $code = shift || 'unknown error'; + my $error = YAML::Error->new(code => $code); + $error->line($self->line) if $self->can('line'); + $error->document($self->document) if $self->can('document'); + $error->arguments([@_]); + return $error; +}; + +1; diff --git a/src/main/perl/lib/YAML/Node.pm b/src/main/perl/lib/YAML/Node.pm new file mode 100644 index 000000000..81c272715 --- /dev/null +++ b/src/main/perl/lib/YAML/Node.pm @@ -0,0 +1,218 @@ +use strict; use warnings; +package YAML::Node; + +use YAML::Tag; +require YAML::Mo; + +use Exporter; +our @ISA = qw(Exporter YAML::Mo::Object); +our @EXPORT = qw(ynode); + +sub ynode { + my $self; + if (ref($_[0]) eq 'HASH') { + $self = tied(%{$_[0]}); + } + elsif (ref($_[0]) eq 'ARRAY') { + $self = tied(@{$_[0]}); + } + elsif (ref(\$_[0]) eq 'GLOB') { + $self = tied(*{$_[0]}); + } + else { + $self = tied($_[0]); + } + return (ref($self) =~ /^yaml_/) ? $self : undef; +} + +sub new { + my ($class, $node, $tag) = @_; + my $self; + $self->{NODE} = $node; + my (undef, $type) = YAML::Mo::Object->node_info($node); + $self->{KIND} = (not defined $type) ? 'scalar' : + ($type eq 'ARRAY') ? 'sequence' : + ($type eq 'HASH') ? 'mapping' : + $class->die("Can't create YAML::Node from '$type'"); + tag($self, ($tag || '')); + if ($self->{KIND} eq 'scalar') { + yaml_scalar->new($self, $_[1]); + return \ $_[1]; + } + my $package = "yaml_" . $self->{KIND}; + $package->new($self) +} + +sub node { $_->{NODE} } +sub kind { $_->{KIND} } +sub tag { + my ($self, $value) = @_; + if (defined $value) { + $self->{TAG} = YAML::Tag->new($value); + return $self; + } + else { + return $self->{TAG}; + } +} +sub keys { + my ($self, $value) = @_; + if (defined $value) { + $self->{KEYS} = $value; + return $self; + } + else { + return $self->{KEYS}; + } +} + +#============================================================================== +package yaml_scalar; + +@yaml_scalar::ISA = qw(YAML::Node); + +sub new { + my ($class, $self) = @_; + tie $_[2], $class, $self; +} + +sub TIESCALAR { + my ($class, $self) = @_; + bless $self, $class; + $self +} + +sub FETCH { + my ($self) = @_; + $self->{NODE} +} + +sub STORE { + my ($self, $value) = @_; + $self->{NODE} = $value +} + +#============================================================================== +package yaml_sequence; + +@yaml_sequence::ISA = qw(YAML::Node); + +sub new { + my ($class, $self) = @_; + my $new; + tie @$new, $class, $self; + $new +} + +sub TIEARRAY { + my ($class, $self) = @_; + bless $self, $class +} + +sub FETCHSIZE { + my ($self) = @_; + scalar @{$self->{NODE}}; +} + +sub FETCH { + my ($self, $index) = @_; + $self->{NODE}[$index] +} + +sub STORE { + my ($self, $index, $value) = @_; + $self->{NODE}[$index] = $value +} + +sub undone { + die "Not implemented yet"; # XXX +} + +*STORESIZE = *POP = *PUSH = *SHIFT = *UNSHIFT = *SPLICE = *DELETE = *EXISTS = +*STORESIZE = *POP = *PUSH = *SHIFT = *UNSHIFT = *SPLICE = *DELETE = *EXISTS = +*undone; # XXX Must implement before release + +#============================================================================== +package yaml_mapping; + +@yaml_mapping::ISA = qw(YAML::Node); + +sub new { + my ($class, $self) = @_; + @{$self->{KEYS}} = sort keys %{$self->{NODE}}; + my $new; + tie %$new, $class, $self; + $new +} + +sub TIEHASH { + my ($class, $self) = @_; + bless $self, $class +} + +sub FETCH { + my ($self, $key) = @_; + if (exists $self->{NODE}{$key}) { + return (grep {$_ eq $key} @{$self->{KEYS}}) + ? $self->{NODE}{$key} : undef; + } + return $self->{HASH}{$key}; +} + +sub STORE { + my ($self, $key, $value) = @_; + if (exists $self->{NODE}{$key}) { + $self->{NODE}{$key} = $value; + } + elsif (exists $self->{HASH}{$key}) { + $self->{HASH}{$key} = $value; + } + else { + if (not grep {$_ eq $key} @{$self->{KEYS}}) { + push(@{$self->{KEYS}}, $key); + } + $self->{HASH}{$key} = $value; + } + $value +} + +sub DELETE { + my ($self, $key) = @_; + my $return; + if (exists $self->{NODE}{$key}) { + $return = $self->{NODE}{$key}; + } + elsif (exists $self->{HASH}{$key}) { + $return = delete $self->{NODE}{$key}; + } + for (my $i = 0; $i < @{$self->{KEYS}}; $i++) { + if ($self->{KEYS}[$i] eq $key) { + splice(@{$self->{KEYS}}, $i, 1); + } + } + return $return; +} + +sub CLEAR { + my ($self) = @_; + @{$self->{KEYS}} = (); + %{$self->{HASH}} = (); +} + +sub FIRSTKEY { + my ($self) = @_; + $self->{ITER} = 0; + $self->{KEYS}[0] +} + +sub NEXTKEY { + my ($self) = @_; + $self->{KEYS}[++$self->{ITER}] +} + +sub EXISTS { + my ($self, $key) = @_; + exists $self->{NODE}{$key} +} + +1; diff --git a/src/main/perl/lib/YAML/Tag.pm b/src/main/perl/lib/YAML/Tag.pm new file mode 100644 index 000000000..57aef461c --- /dev/null +++ b/src/main/perl/lib/YAML/Tag.pm @@ -0,0 +1,19 @@ +use strict; use warnings; +package YAML::Tag; + +use overload '""' => sub { ${$_[0]} }; + +sub new { + my ($class, $self) = @_; + bless \$self, $class +} + +sub short { + ${$_[0]} +} + +sub canonical { + ${$_[0]} +} + +1; diff --git a/src/main/perl/lib/YAML/Types.pm b/src/main/perl/lib/YAML/Types.pm new file mode 100644 index 000000000..b70e6ce64 --- /dev/null +++ b/src/main/perl/lib/YAML/Types.pm @@ -0,0 +1,240 @@ +package YAML::Types; + +use YAML::Mo; +use YAML::Node; + +# XXX These classes and their APIs could still use some refactoring, +# but at least they work for now. +#------------------------------------------------------------------------------- +package YAML::Type::blessed; + +use YAML::Mo; # XXX + +sub yaml_dump { + my $self = shift; + my ($value) = @_; + my ($class, $type) = YAML::Mo::Object->node_info($value); + no strict 'refs'; + my $kind = lc($type) . ':'; + my $tag = ${$class . '::ClassTag'} || + "!perl/$kind$class"; + if ($type eq 'REF') { + YAML::Node->new( + {(&YAML::VALUE, ${$_[0]})}, $tag + ); + } + elsif ($type eq 'SCALAR') { + $_[1] = $$value; + YAML::Node->new($_[1], $tag); + } + elsif ($type eq 'GLOB') { + # blessed glob support is minimal, and will not round-trip + # initial aim: to not cause an error + return YAML::Type::glob->yaml_dump($value, $tag); + } else { + YAML::Node->new($value, $tag); + } +} + +#------------------------------------------------------------------------------- +package YAML::Type::undef; + +sub yaml_dump { + my $self = shift; +} + +sub yaml_load { + my $self = shift; +} + +#------------------------------------------------------------------------------- +package YAML::Type::glob; + +sub yaml_dump { + my $self = shift; + # $_[0] remains as the glob + my $tag = pop @_ if 2==@_; + + $tag = '!perl/glob:' unless defined $tag; + my $ynode = YAML::Node->new({}, $tag); + for my $type (qw(PACKAGE NAME SCALAR ARRAY HASH CODE IO)) { + my $value = *{$_[0]}{$type}; + $value = $$value if $type eq 'SCALAR'; + if (defined $value) { + if ($type eq 'IO') { + my @stats = qw(device inode mode links uid gid rdev size + atime mtime ctime blksize blocks); + undef $value; + $value->{stat} = YAML::Node->new({}); + if ($value->{fileno} = fileno(*{$_[0]})) { + local $^W; + map {$value->{stat}{shift @stats} = $_} stat(*{$_[0]}); + $value->{tell} = tell(*{$_[0]}); + } + } + $ynode->{$type} = $value; + } + } + return $ynode; +} + +sub yaml_load { + my $self = shift; + my ($node, $class, $loader) = @_; + my ($name, $package); + if (defined $node->{NAME}) { + $name = $node->{NAME}; + delete $node->{NAME}; + } + else { + $loader->warn('YAML_LOAD_WARN_GLOB_NAME'); + return undef; + } + if (defined $node->{PACKAGE}) { + $package = $node->{PACKAGE}; + delete $node->{PACKAGE}; + } + else { + $package = 'main'; + } + no strict 'refs'; + if (exists $node->{SCALAR}) { + if ($YAML::LoadBlessed and $loader->load_code) { + *{"${package}::$name"} = \$node->{SCALAR}; + } + delete $node->{SCALAR}; + } + for my $elem (qw(ARRAY HASH CODE IO)) { + if (exists $node->{$elem}) { + if ($elem eq 'IO') { + $loader->warn('YAML_LOAD_WARN_GLOB_IO'); + delete $node->{IO}; + next; + } + if ($YAML::LoadBlessed and $loader->load_code) { + *{"${package}::$name"} = $node->{$elem}; + } + delete $node->{$elem}; + } + } + for my $elem (sort keys %$node) { + $loader->warn('YAML_LOAD_WARN_BAD_GLOB_ELEM', $elem); + } + return *{"${package}::$name"}; +} + +#------------------------------------------------------------------------------- +package YAML::Type::code; + +my $dummy_warned = 0; +my $default = '{ "DUMMY" }'; + +sub yaml_dump { + my $self = shift; + my $code; + my ($dumpflag, $value) = @_; + my ($class, $type) = YAML::Mo::Object->node_info($value); + my $tag = "!perl/code"; + $tag .= ":$class" if defined $class; + if (not $dumpflag) { + $code = $default; + } + else { + bless $value, "CODE" if $class; + eval { require B::Deparse }; + return if $@; + my $deparse = B::Deparse->new(); + eval { + local $^W = 0; + $code = $deparse->coderef2text($value); + }; + if ($@) { + warn YAML::YAML_DUMP_WARN_DEPARSE_FAILED() if $^W; + $code = $default; + } + bless $value, $class if $class; + chomp $code; + $code .= "\n"; + } + $_[2] = $code; + YAML::Node->new($_[2], $tag); +} + +sub yaml_load { + my $self = shift; + my ($node, $class, $loader) = @_; + if ($loader->load_code) { + my $code = eval "package main; sub $node"; + if ($@) { + $loader->warn('YAML_LOAD_WARN_PARSE_CODE', $@); + return sub {}; + } + else { + CORE::bless $code, $class if ($class and $YAML::LoadBlessed); + return $code; + } + } + else { + return CORE::bless sub {}, $class if ($class and $YAML::LoadBlessed); + return sub {}; + } +} + +#------------------------------------------------------------------------------- +package YAML::Type::ref; + +sub yaml_dump { + my $self = shift; + YAML::Node->new({(&YAML::VALUE, ${$_[0]})}, '!perl/ref') +} + +sub yaml_load { + my $self = shift; + my ($node, $class, $loader) = @_; + $loader->die('YAML_LOAD_ERR_NO_DEFAULT_VALUE', 'ptr') + unless exists $node->{&YAML::VALUE}; + return \$node->{&YAML::VALUE}; +} + +#------------------------------------------------------------------------------- +package YAML::Type::regexp; + +# XXX Be sure to handle blessed regexps (if possible) +sub yaml_dump { + die "YAML::Type::regexp::yaml_dump not currently implemented"; +} + +use constant _QR_TYPES => { + '' => sub { qr{$_[0]} }, + x => sub { qr{$_[0]}x }, + i => sub { qr{$_[0]}i }, + s => sub { qr{$_[0]}s }, + m => sub { qr{$_[0]}m }, + ix => sub { qr{$_[0]}ix }, + sx => sub { qr{$_[0]}sx }, + mx => sub { qr{$_[0]}mx }, + si => sub { qr{$_[0]}si }, + mi => sub { qr{$_[0]}mi }, + ms => sub { qr{$_[0]}sm }, + six => sub { qr{$_[0]}six }, + mix => sub { qr{$_[0]}mix }, + msx => sub { qr{$_[0]}msx }, + msi => sub { qr{$_[0]}msi }, + msix => sub { qr{$_[0]}msix }, +}; + +sub yaml_load { + my $self = shift; + my ($node, $class) = @_; + return qr{$node} unless $node =~ /^\(\?([\^\-uxism]*):(.*)\)\z/s; + my ($flags, $re) = ($1, $2); + $flags =~ s/-.*//; + $flags =~ s/^\^//; + $flags =~ tr/u//d; + my $sub = _QR_TYPES->{$flags} || sub { qr{$_[0]} }; + my $qr = &$sub($re); + bless $qr, $class if (length $class and $YAML::LoadBlessed); + return $qr; +} + +1; diff --git a/src/test/resources/module/YAML/t/2-scalars.t b/src/test/resources/module/YAML/t/2-scalars.t new file mode 100644 index 000000000..2525abbc1 --- /dev/null +++ b/src/test/resources/module/YAML/t/2-scalars.t @@ -0,0 +1,64 @@ +# This test modified from YAML::Syck suite +use strict; +use Test::More; + +use Config; +require YAML; +YAML->import; + +is(Dump(42), "--- 42\n"); +is(Load("--- 42\n"), 42); + +is(Dump(undef), "--- ~\n"); +is(Load("--- ~\n"), undef); +is(Load("---\n"), undef); +is(Load("--- ''\n"), ''); + +is(Load("--- true\n"), "true"); +is(Load("--- false\n"), "false"); + +# $YAML::Syck::ImplicitTyping = $YAML::Syck::ImplicitTyping = 1; +# +# is(Load("--- true\n"), 1); +# is(Load("--- false\n"), ''); + +my $Data = { + Test => ' + Test Drive D:\\', +}; + +is_deeply(Load(Dump($Data)), $Data); + +if ($^V ge v5.9.0) { + # see https://github.com/ingydotnet/yaml-pm/issues/186 + unless ($Config{config_args} =~ / \-fsanitize \= (?: address | undefined ) \b /x) { + # Large data tests. See also https://bugzilla.redhat.com/show_bug.cgi?id=192400. + $Data = ' äø<> " \' " \'' x 40_000; + is(Load(Dump($Data)), $Data); + } +} + +{ + my $yaml1 = <<'EOM'; +a: 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +b: 2 +EOM + my $yaml2 = <<'EOM'; +a: "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +b: 2 +EOM + my $error; + eval { + my @data = Load($yaml1); + }; + $error = $@; + cmp_ok($error, '=~', "Can't parse single", "Single quoted without end"); + + eval { + my @data = Load($yaml2); + }; + $error = $@; + cmp_ok($error, '=~', "Can't parse double", "Double quoted without end"); +} + +done_testing; diff --git a/src/test/resources/module/YAML/t/dump-synopsis.t b/src/test/resources/module/YAML/t/dump-synopsis.t new file mode 100644 index 000000000..d65c49ae2 --- /dev/null +++ b/src/test/resources/module/YAML/t/dump-synopsis.t @@ -0,0 +1,21 @@ +use strict; +use warnings; + +use Test::More tests => 1; + +my $success = 0; +my $err; +{ + local $@; + eval { + require YAML::Dumper; + my $hash = {}; + my $dumper = YAML::Dumper->new(); + my $string = $dumper->dump($hash); + $success = 1; + }; + $err = $@; +} +is( $success, 1, "Basic YAML::Dumper usage worked as expected" ) + or diag( explain($err) ); + diff --git a/src/test/resources/module/YAML/t/issue-149.t b/src/test/resources/module/YAML/t/issue-149.t new file mode 100644 index 000000000..016d8f1cd --- /dev/null +++ b/src/test/resources/module/YAML/t/issue-149.t @@ -0,0 +1,10 @@ +use Test::More; +use YAML; + +YAML::Load("a: b"); +YAML::Load("a:\n b: c"); +YAML::Load("a: b\nc: d"); + +pass "YAML w/o final newlines loads"; + +done_testing; diff --git a/src/test/resources/module/YAML/t/issue-69.t b/src/test/resources/module/YAML/t/issue-69.t new file mode 100644 index 000000000..79db080a4 --- /dev/null +++ b/src/test/resources/module/YAML/t/issue-69.t @@ -0,0 +1,10 @@ +use Test::More tests => 2; +use YAML; + +my $seq = eval { YAML::Load("foo: [bar] "); 1 }; +my $map = eval { YAML::Load("foo: {bar: 42} "); 1 }; + +ok($seq, "YAML inline sequence with trailing space loads"); +ok($map, "YAML inline mapping with trailing space loads"); + +done_testing; diff --git a/src/test/resources/module/YAML/t/numify.t b/src/test/resources/module/YAML/t/numify.t new file mode 100644 index 000000000..3fb13b65d --- /dev/null +++ b/src/test/resources/module/YAML/t/numify.t @@ -0,0 +1,30 @@ +use Test::More tests => 6; +use YAML (); +use B; + +my $yaml = <<'EOM'; +int: 23 +float: 3.14 +exp: 1e-5 +EOM + +my $data1 = do { + local $YAML::Numify = 1; + YAML::Load($yaml); +}; +my $data2 = YAML::Load($yaml); + +my $int1 = B::svref_2object(\$data1->{int})->FLAGS & (B::SVp_IOK | B::SVp_NOK); +my $int2 = B::svref_2object(\$data2->{int})->FLAGS & (B::SVp_IOK | B::SVp_NOK); +my $float1 = B::svref_2object(\$data1->{float})->FLAGS & (B::SVp_IOK | B::SVp_NOK); +my $float2 = B::svref_2object(\$data2->{float})->FLAGS & (B::SVp_IOK | B::SVp_NOK); +my $exp1 = B::svref_2object(\$data1->{exp})->FLAGS & (B::SVp_IOK | B::SVp_NOK); +my $exp2 = B::svref_2object(\$data2->{exp})->FLAGS & (B::SVp_IOK | B::SVp_NOK); + +ok($int1, "int with \$YAML::Numify"); +ok(! $int2, "int without \$YAML::Numify"); +ok($float1, "float with \$YAML::Numify"); +ok(! $float2, "float without \$YAML::Numify"); +ok($exp1, "exp with \$YAML::Numify"); +ok(! $exp2, "exp without \$YAML::Numify"); +done_testing; diff --git a/src/test/resources/module/YAML/t/roundtrip.t b/src/test/resources/module/YAML/t/roundtrip.t new file mode 100644 index 000000000..d557ff17d --- /dev/null +++ b/src/test/resources/module/YAML/t/roundtrip.t @@ -0,0 +1,14 @@ +use strict; +use warnings; + +use YAML; +use Test::More tests => 1; +use Test::Deep; + +my %in = ( '=' => 'value' ); +my $yaml = Dump \%in; +my $roundtrip = Load $yaml; +cmp_deeply($roundtrip, \%in, "Roundtrip with '=' hash key"); + + +done_testing; diff --git a/src/test/resources/module/YAML/t/rt-90593.t b/src/test/resources/module/YAML/t/rt-90593.t new file mode 100644 index 000000000..b030678ea --- /dev/null +++ b/src/test/resources/module/YAML/t/rt-90593.t @@ -0,0 +1,21 @@ +# https://rt.cpan.org/Public/Bug/Display.html?id=90593 +use Test::More; + +if ($] < 5.010000) { + plan skip_all => "Skip old perls"; +} +else { + plan tests => 2; +} + +use YAML; +use constant LENGTH => 1000000; + +$SIG{__WARN__} = sub { die @_ }; + +my $yaml = 'x: "' . ('x' x LENGTH) . '"' . "\n"; + +my $hash = Load $yaml; + +is ref($hash), 'HASH', 'Loaded a hash'; +is length($hash->{x}), LENGTH, 'Long scalar loaded';