diff --git a/src/Document/Emitter/Emitter.php b/src/Document/Emitter/Emitter.php index 7bfe6f5..76853db 100644 --- a/src/Document/Emitter/Emitter.php +++ b/src/Document/Emitter/Emitter.php @@ -200,6 +200,17 @@ private function emitScalar(ScalarNode $node): void } return; } + // A Plain-styled synthesized scalar carrying a real newline + // would otherwise fall through to double-quoted and emit `\n` + // escapes, silently changing the byte sequence a reader gets + // back. Upgrade to a literal block so newlines round-trip. + if ($this->shouldUpgradePlainToLiteralBlock($node)) { + if ($properties !== '') { + $this->buffer .= rtrim($properties) . $this->lineEnding; + } + $this->emitPlainAsLiteralBlock($node, 0, atLineStart: true); + return; + } $this->buffer .= $properties . $this->scalarOrRaw($node); $this->buffer .= $this->lineEnding; } @@ -325,6 +336,20 @@ private function emitMapEntry(MapEntry $entry, int $indent): void } return; } + // Plain-styled synthesized value carrying real newlines: + // upgrade to literal block so a reader gets the same bytes + // back instead of an escaped `\n` sequence. See + // shouldUpgradePlainToLiteralBlock for the gating. + if ($this->shouldUpgradePlainToLiteralBlock($value)) { + if ($properties !== '') { + $this->buffer .= ' ' . rtrim($properties); + } + $this->emitPlainAsLiteralBlock($value, $indent, atLineStart: false); + if (!str_ends_with($this->buffer, $this->lineEnding)) { + $this->buffer .= $this->lineEnding; + } + return; + } $this->buffer .= ' ' . $properties . $this->scalarOrRaw($value); } elseif ($value instanceof AliasNode) { // Alias as entry value. @@ -629,6 +654,143 @@ private function isBlockStyleScalar(ScalarNode $node): bool || $style === ScalarStyle::FoldedBlock; } + /** + * True when a Plain-styled scalar carrying real newlines should + * be upgraded to a synthesized literal block on emit. + * + * Motivation: the Plain -> SingleQuoted -> DoubleQuoted upgrade + * ladder in emitString ends at DoubleQuoted for any string that + * contains "\n", which then escapes each newline as the two + * characters `\n`. That is a silent round-trip data change for + * callers that hand the AST a real multi-line string (for + * example horde/HordeYmlFile writing changelog entries). + * Emitting a literal block instead keeps newlines as newlines. + * + * Gating: + * - Style must be Plain. Pinned SingleQuoted / DoubleQuoted are + * honored (the existing style upgrade for pinned-single with + * a newline still goes to DoubleQuoted). + * - Value must be a string containing at least one "\n". + * - No rawSource pinned (loader preserved a specific form). + * - No tag pinned (a tag disambiguates the type and belongs on + * the caller-chosen style). + * - Content must be literal-block-safe: only "\n" and "\t" as + * low control bytes, no lines starting with whitespace (would + * require an explicit indent indicator we do not synthesize + * yet), no lines consisting only of whitespace before a chomp + * boundary. Otherwise fall through to double-quoted. + */ + private function shouldUpgradePlainToLiteralBlock(ScalarNode $node): bool + { + if ($node->getStyle() !== ScalarStyle::Plain) { + return false; + } + if ($node->getRawSource() !== null) { + return false; + } + if ($node->getTag() !== null) { + return false; + } + $value = $node->getValue(); + if (!is_string($value) || !str_contains($value, "\n")) { + return false; + } + // Only "\n" and "\t" are permitted as low control bytes. "\r", + // form feed, NUL, etc. force double-quoted with hex escapes. + for ($i = 0, $n = strlen($value); $i < $n; $i++) { + $byte = $value[$i]; + $ord = ord($byte); + if ($ord < 0x20 && $byte !== "\n" && $byte !== "\t") { + return false; + } + } + // A line starting with space or tab is ambiguous without an + // explicit indent indicator (`|2` etc.). We do not synthesize + // that yet, so fall back to double-quoted for those. + // + // A line that is only spaces or tabs is also risky at chomp + // boundaries. Refuse those too. + $lines = explode("\n", $value); + // Strip a single trailing empty line (from a final "\n"); it + // just marks Clip chomp and is fine. + if ($lines !== [] && end($lines) === '') { + array_pop($lines); + } + foreach ($lines as $line) { + if ($line === '') { + continue; // blank line inside content is fine. + } + $first = $line[0]; + if ($first === ' ' || $first === "\t") { + return false; + } + if (trim($line, " \t") === '') { + return false; + } + } + return true; + } + + /** + * Emit a Plain-styled multi-line string as a literal block. The + * caller has already written the map-entry key + `:` (or the + * root position is empty). The indicator is chosen from the + * value's trailing bytes: + * + * ends with exactly one "\n" -> `|` (Clip) + * does not end with "\n" -> `|-` (Strip) + * ends with more than one trailing "\n" -> `|+` (Keep) + * + * Content lines are written at parentIndent + 2. Blank interior + * lines are emitted as bare newlines (no trailing spaces). + * + * $atLineStart is true when the caller sits at column 0 (root + * scalar path); false when the caller has just written "key:" or + * similar and needs a single leading space before the indicator. + */ + private function emitPlainAsLiteralBlock( + ScalarNode $node, + int $parentIndent, + bool $atLineStart, + ): void { + $value = (string) $node->getValue(); + + // Pick chomp from the trailing newline count. + $trailingNewlines = 0; + for ($i = strlen($value) - 1; $i >= 0 && $value[$i] === "\n"; $i--) { + $trailingNewlines++; + } + if ($trailingNewlines === 0) { + $indicator = '|-'; + } elseif ($trailingNewlines === 1) { + $indicator = '|'; + } else { + $indicator = '|+'; + } + + if (!$atLineStart) { + $this->buffer .= ' '; + } + $this->buffer .= $indicator . $this->lineEnding; + + $contentIndent = $parentIndent + 2; + $lines = explode("\n", $value); + // Drop trailing empties: chomp handles them. `|-` has zero + // trailing "\n" so nothing to drop. `|` has one, giving one + // empty tail element. `|+` has more than one; those we do not + // re-emit because chomp restores them on read. + while ($lines !== [] && end($lines) === '') { + array_pop($lines); + } + foreach ($lines as $line) { + if ($line === '') { + $this->buffer .= $this->lineEnding; + continue; + } + $this->buffer .= str_repeat(' ', $contentIndent) . $line . $this->lineEnding; + } + } + /** * Emit a synthesized block scalar (no rawSource). Used in * emitMapEntry when a user has constructed a ScalarNode with diff --git a/test/unit/Document/Emitter/EmitterPlainMultilineLiteralBlockTest.php b/test/unit/Document/Emitter/EmitterPlainMultilineLiteralBlockTest.php new file mode 100644 index 0000000..15c84d8 --- /dev/null +++ b/test/unit/Document/Emitter/EmitterPlainMultilineLiteralBlockTest.php @@ -0,0 +1,169 @@ + SingleQuoted -> DoubleQuoted ladder and producing an + * escaped `\n` sequence. Regression against the bug where a + * changelog.yml release entry written by horde-components ended up + * containing literal `\n` characters in the notes field. + */ +#[CoversClass(Emitter::class)] +final class EmitterPlainMultilineLiteralBlockTest extends TestCase +{ + private function emitRoot(ScalarNode $node): string + { + $stream = new YamlStream(); + $doc = new YamlDocument(); + $doc->setParentStream($stream); + $doc->setRootInternal($node); + $stream->appendInternalDocument($doc); + return (new Emitter())->emit($stream); + } + + private function emitMap(string $key, ScalarNode $value): string + { + $map = new MapNode(); + $map->appendChildInternal(new MapEntry(new ScalarNode($key), $value)); + $stream = new YamlStream(); + $doc = new YamlDocument(); + $doc->setParentStream($stream); + $doc->setRootInternal($map); + $stream->appendInternalDocument($doc); + return (new Emitter())->emit($stream); + } + + public function testPlainMultilineMapValueEmitsAsLiteralBlockNotEscaped(): void + { + // This is the changelog.yml case: a Plain-styled scalar with + // real newlines given as a map value. Before the fix, the + // emitter wrote `notes: "line1\nline2\n"`. After the fix it + // emits a literal block and no `\n` escapes appear. + $value = "line1\nline2\n"; + $output = $this->emitMap('notes', new ScalarNode($value)); + + $expected = "notes: |\n line1\n line2\n"; + $this->assertSame($expected, $output); + $this->assertStringNotContainsString('\\n', $output); + } + + public function testPlainMultilineRootScalarEmitsAsLiteralBlock(): void + { + $output = $this->emitRoot(new ScalarNode("line1\nline2\n")); + + $expected = "|\n line1\n line2\n"; + $this->assertSame($expected, $output); + } + + public function testTrailingNewlineChoosesClipIndicator(): void + { + $output = $this->emitMap('n', new ScalarNode("a\nb\n")); + $this->assertStringContainsString(": |\n", $output); + $this->assertStringNotContainsString(': |-', $output); + $this->assertStringNotContainsString(': |+', $output); + } + + public function testNoTrailingNewlineChoosesStripIndicator(): void + { + $output = $this->emitMap('n', new ScalarNode("a\nb")); + $this->assertStringContainsString(": |-\n", $output); + } + + public function testMultipleTrailingNewlinesChoosesKeepIndicator(): void + { + $output = $this->emitMap('n', new ScalarNode("a\nb\n\n\n")); + $this->assertStringContainsString(": |+\n", $output); + } + + public function testPinnedSingleQuotedWithNewlineStillUpgradesToDoubleQuoted(): void + { + // Regression: pinned styles bypass the literal-block upgrade. + // The existing "single can't carry newlines, fall through to + // double" contract in EmitterStyleUpgradeTest still holds. + $node = new ScalarNode("line1\nline2", ScalarStyle::SingleQuoted); + $output = $this->emitRoot($node); + $this->assertSame("\"line1\\nline2\"\n", $output); + } + + public function testPinnedDoubleQuotedWithNewlineStillEscapes(): void + { + $node = new ScalarNode("line1\nline2", ScalarStyle::DoubleQuoted); + $output = $this->emitRoot($node); + $this->assertSame("\"line1\\nline2\"\n", $output); + } + + public function testTaggedPlainMultilineFallsBackToDoubleQuoted(): void + { + // Explicit tag means the caller cares about typing; keep the + // ladder behavior in that case. + $node = new ScalarNode("a\nb"); + $node->setTag('!custom'); + $output = $this->emitRoot($node); + $this->assertStringContainsString('\\n', $output); + $this->assertStringNotContainsString("\n a\n", $output); + } + + public function testLineStartingWithSpaceFallsBackToDoubleQuoted(): void + { + // Content whose first character (or any line's first + // character) is a space would require an explicit indent + // indicator (`|2` etc.) we do not synthesize. Fall through + // to double-quoted for those. + $output = $this->emitMap('n', new ScalarNode(" leading\nnormal")); + $this->assertStringContainsString('\\n', $output); + } + + public function testCarriageReturnForcesDoubleQuoted(): void + { + // Literal blocks cannot carry a bare "\r"; force double-quoted + // with hex escapes. + $output = $this->emitMap('n', new ScalarNode("a\r\nb")); + $this->assertStringContainsString('\\r', $output); + $this->assertStringNotContainsString(": |\n", $output); + } + + public function testSingleLinePlainStillEmitsPlain(): void + { + // No newline in value: the upgrade must not trigger. This + // guards against accidentally routing every plain string + // through the literal-block path. + $output = $this->emitMap('n', new ScalarNode('just one line')); + $this->assertSame("n: just one line\n", $output); + } + + public function testRoundTripPreservesNewlines(): void + { + // End-to-end: emit a synthesized multi-line notes entry, feed + // the output back to Yaml::load, and confirm the string comes + // back with real newlines. This is the changelog contract. + $value = "fix(security): plug hole\nfeat(imp): new toggle\n"; + $output = $this->emitMap('notes', new ScalarNode($value)); + + $parsed = Yaml::load($output); + $this->assertIsArray($parsed); + $this->assertArrayHasKey('notes', $parsed); + $this->assertSame($value, $parsed['notes']); + } +}