Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 162 additions & 0 deletions src/Document/Emitter/Emitter.php
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,17 @@ private function emitScalar(ScalarNode $node): void
}
return;
}
// A Plain-styled synthesized scalar carrying a real newline
// would otherwise fall through to double-quoted and emit `\n`
// escapes, silently changing the byte sequence a reader gets
// back. Upgrade to a literal block so newlines round-trip.
if ($this->shouldUpgradePlainToLiteralBlock($node)) {
if ($properties !== '') {
$this->buffer .= rtrim($properties) . $this->lineEnding;
}
$this->emitPlainAsLiteralBlock($node, 0, atLineStart: true);
return;
}
$this->buffer .= $properties . $this->scalarOrRaw($node);
$this->buffer .= $this->lineEnding;
}
Expand Down Expand Up @@ -325,6 +336,20 @@ private function emitMapEntry(MapEntry $entry, int $indent): void
}
return;
}
// Plain-styled synthesized value carrying real newlines:
// upgrade to literal block so a reader gets the same bytes
// back instead of an escaped `\n` sequence. See
// shouldUpgradePlainToLiteralBlock for the gating.
if ($this->shouldUpgradePlainToLiteralBlock($value)) {
if ($properties !== '') {
$this->buffer .= ' ' . rtrim($properties);
}
$this->emitPlainAsLiteralBlock($value, $indent, atLineStart: false);
if (!str_ends_with($this->buffer, $this->lineEnding)) {
$this->buffer .= $this->lineEnding;
}
return;
}
$this->buffer .= ' ' . $properties . $this->scalarOrRaw($value);
} elseif ($value instanceof AliasNode) {
// Alias as entry value.
Expand Down Expand Up @@ -629,6 +654,143 @@ private function isBlockStyleScalar(ScalarNode $node): bool
|| $style === ScalarStyle::FoldedBlock;
}

/**
* True when a Plain-styled scalar carrying real newlines should
* be upgraded to a synthesized literal block on emit.
*
* Motivation: the Plain -> SingleQuoted -> DoubleQuoted upgrade
* ladder in emitString ends at DoubleQuoted for any string that
* contains "\n", which then escapes each newline as the two
* characters `\n`. That is a silent round-trip data change for
* callers that hand the AST a real multi-line string (for
* example horde/HordeYmlFile writing changelog entries).
* Emitting a literal block instead keeps newlines as newlines.
*
* Gating:
* - Style must be Plain. Pinned SingleQuoted / DoubleQuoted are
* honored (the existing style upgrade for pinned-single with
* a newline still goes to DoubleQuoted).
* - Value must be a string containing at least one "\n".
* - No rawSource pinned (loader preserved a specific form).
* - No tag pinned (a tag disambiguates the type and belongs on
* the caller-chosen style).
* - Content must be literal-block-safe: only "\n" and "\t" as
* low control bytes, no lines starting with whitespace (would
* require an explicit indent indicator we do not synthesize
* yet), no lines consisting only of whitespace before a chomp
* boundary. Otherwise fall through to double-quoted.
*/
private function shouldUpgradePlainToLiteralBlock(ScalarNode $node): bool
{
if ($node->getStyle() !== ScalarStyle::Plain) {
return false;
}
if ($node->getRawSource() !== null) {
return false;
}
if ($node->getTag() !== null) {
return false;
}
$value = $node->getValue();
if (!is_string($value) || !str_contains($value, "\n")) {
return false;
}
// Only "\n" and "\t" are permitted as low control bytes. "\r",
// form feed, NUL, etc. force double-quoted with hex escapes.
for ($i = 0, $n = strlen($value); $i < $n; $i++) {
$byte = $value[$i];
$ord = ord($byte);
if ($ord < 0x20 && $byte !== "\n" && $byte !== "\t") {
return false;
}
}
// A line starting with space or tab is ambiguous without an
// explicit indent indicator (`|2` etc.). We do not synthesize
// that yet, so fall back to double-quoted for those.
//
// A line that is only spaces or tabs is also risky at chomp
// boundaries. Refuse those too.
$lines = explode("\n", $value);
// Strip a single trailing empty line (from a final "\n"); it
// just marks Clip chomp and is fine.
if ($lines !== [] && end($lines) === '') {
array_pop($lines);
}
foreach ($lines as $line) {
if ($line === '') {
continue; // blank line inside content is fine.
}
$first = $line[0];
if ($first === ' ' || $first === "\t") {
return false;
}
if (trim($line, " \t") === '') {
return false;
}
}
return true;
}

/**
* Emit a Plain-styled multi-line string as a literal block. The
* caller has already written the map-entry key + `:` (or the
* root position is empty). The indicator is chosen from the
* value's trailing bytes:
*
* ends with exactly one "\n" -> `|` (Clip)
* does not end with "\n" -> `|-` (Strip)
* ends with more than one trailing "\n" -> `|+` (Keep)
*
* Content lines are written at parentIndent + 2. Blank interior
* lines are emitted as bare newlines (no trailing spaces).
*
* $atLineStart is true when the caller sits at column 0 (root
* scalar path); false when the caller has just written "key:" or
* similar and needs a single leading space before the indicator.
*/
private function emitPlainAsLiteralBlock(
ScalarNode $node,
int $parentIndent,
bool $atLineStart,
): void {
$value = (string) $node->getValue();

// Pick chomp from the trailing newline count.
$trailingNewlines = 0;
for ($i = strlen($value) - 1; $i >= 0 && $value[$i] === "\n"; $i--) {
$trailingNewlines++;
}
if ($trailingNewlines === 0) {
$indicator = '|-';
} elseif ($trailingNewlines === 1) {
$indicator = '|';
} else {
$indicator = '|+';
}

if (!$atLineStart) {
$this->buffer .= ' ';
}
$this->buffer .= $indicator . $this->lineEnding;

$contentIndent = $parentIndent + 2;
$lines = explode("\n", $value);
// Drop trailing empties: chomp handles them. `|-` has zero
// trailing "\n" so nothing to drop. `|` has one, giving one
// empty tail element. `|+` has more than one; those we do not
// re-emit because chomp restores them on read.
while ($lines !== [] && end($lines) === '') {
array_pop($lines);
}
foreach ($lines as $line) {
if ($line === '') {
$this->buffer .= $this->lineEnding;
continue;
}
$this->buffer .= str_repeat(' ', $contentIndent) . $line . $this->lineEnding;
}
}

/**
* Emit a synthesized block scalar (no rawSource). Used in
* emitMapEntry when a user has constructed a ScalarNode with
Expand Down
169 changes: 169 additions & 0 deletions test/unit/Document/Emitter/EmitterPlainMultilineLiteralBlockTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
<?php

declare(strict_types=1);

/**
* Copyright 2008-2026 The Horde Project (http://www.horde.org/)
*
* See the enclosed file LICENSE for license information (BSD). If you
* did not receive this file, see http://www.horde.org/licenses/bsd.
*/

namespace Horde\Yaml\Test\Unit\Document\Emitter;

use Horde\Yaml\Document\Emitter\Emitter;
use Horde\Yaml\Document\Node\MapEntry;
use Horde\Yaml\Document\Node\MapNode;
use Horde\Yaml\Document\Node\ScalarNode;
use Horde\Yaml\Document\Node\ScalarStyle;
use Horde\Yaml\Document\YamlDocument;
use Horde\Yaml\Document\YamlStream;
use Horde\Yaml\Yaml;
use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\TestCase;

/**
* Verifies that a Plain-styled synthesized scalar carrying real
* newlines emits as a YAML literal block instead of falling through
* the Plain -> SingleQuoted -> DoubleQuoted ladder and producing an
* escaped `\n` sequence. Regression against the bug where a
* changelog.yml release entry written by horde-components ended up
* containing literal `\n` characters in the notes field.
*/
#[CoversClass(Emitter::class)]
final class EmitterPlainMultilineLiteralBlockTest extends TestCase
{
private function emitRoot(ScalarNode $node): string
{
$stream = new YamlStream();
$doc = new YamlDocument();
$doc->setParentStream($stream);
$doc->setRootInternal($node);
$stream->appendInternalDocument($doc);
return (new Emitter())->emit($stream);
}

private function emitMap(string $key, ScalarNode $value): string
{
$map = new MapNode();
$map->appendChildInternal(new MapEntry(new ScalarNode($key), $value));
$stream = new YamlStream();
$doc = new YamlDocument();
$doc->setParentStream($stream);
$doc->setRootInternal($map);
$stream->appendInternalDocument($doc);
return (new Emitter())->emit($stream);
}

public function testPlainMultilineMapValueEmitsAsLiteralBlockNotEscaped(): void
{
// This is the changelog.yml case: a Plain-styled scalar with
// real newlines given as a map value. Before the fix, the
// emitter wrote `notes: "line1\nline2\n"`. After the fix it
// emits a literal block and no `\n` escapes appear.
$value = "line1\nline2\n";
$output = $this->emitMap('notes', new ScalarNode($value));

$expected = "notes: |\n line1\n line2\n";
$this->assertSame($expected, $output);
$this->assertStringNotContainsString('\\n', $output);
}

public function testPlainMultilineRootScalarEmitsAsLiteralBlock(): void
{
$output = $this->emitRoot(new ScalarNode("line1\nline2\n"));

$expected = "|\n line1\n line2\n";
$this->assertSame($expected, $output);
}

public function testTrailingNewlineChoosesClipIndicator(): void
{
$output = $this->emitMap('n', new ScalarNode("a\nb\n"));
$this->assertStringContainsString(": |\n", $output);
$this->assertStringNotContainsString(': |-', $output);
$this->assertStringNotContainsString(': |+', $output);
}

public function testNoTrailingNewlineChoosesStripIndicator(): void
{
$output = $this->emitMap('n', new ScalarNode("a\nb"));
$this->assertStringContainsString(": |-\n", $output);
}

public function testMultipleTrailingNewlinesChoosesKeepIndicator(): void
{
$output = $this->emitMap('n', new ScalarNode("a\nb\n\n\n"));
$this->assertStringContainsString(": |+\n", $output);
}

public function testPinnedSingleQuotedWithNewlineStillUpgradesToDoubleQuoted(): void
{
// Regression: pinned styles bypass the literal-block upgrade.
// The existing "single can't carry newlines, fall through to
// double" contract in EmitterStyleUpgradeTest still holds.
$node = new ScalarNode("line1\nline2", ScalarStyle::SingleQuoted);
$output = $this->emitRoot($node);
$this->assertSame("\"line1\\nline2\"\n", $output);
}

public function testPinnedDoubleQuotedWithNewlineStillEscapes(): void
{
$node = new ScalarNode("line1\nline2", ScalarStyle::DoubleQuoted);
$output = $this->emitRoot($node);
$this->assertSame("\"line1\\nline2\"\n", $output);
}

public function testTaggedPlainMultilineFallsBackToDoubleQuoted(): void
{
// Explicit tag means the caller cares about typing; keep the
// ladder behavior in that case.
$node = new ScalarNode("a\nb");
$node->setTag('!custom');
$output = $this->emitRoot($node);
$this->assertStringContainsString('\\n', $output);
$this->assertStringNotContainsString("\n a\n", $output);
}

public function testLineStartingWithSpaceFallsBackToDoubleQuoted(): void
{
// Content whose first character (or any line's first
// character) is a space would require an explicit indent
// indicator (`|2` etc.) we do not synthesize. Fall through
// to double-quoted for those.
$output = $this->emitMap('n', new ScalarNode(" leading\nnormal"));
$this->assertStringContainsString('\\n', $output);
}

public function testCarriageReturnForcesDoubleQuoted(): void
{
// Literal blocks cannot carry a bare "\r"; force double-quoted
// with hex escapes.
$output = $this->emitMap('n', new ScalarNode("a\r\nb"));
$this->assertStringContainsString('\\r', $output);
$this->assertStringNotContainsString(": |\n", $output);
}

public function testSingleLinePlainStillEmitsPlain(): void
{
// No newline in value: the upgrade must not trigger. This
// guards against accidentally routing every plain string
// through the literal-block path.
$output = $this->emitMap('n', new ScalarNode('just one line'));
$this->assertSame("n: just one line\n", $output);
}

public function testRoundTripPreservesNewlines(): void
{
// End-to-end: emit a synthesized multi-line notes entry, feed
// the output back to Yaml::load, and confirm the string comes
// back with real newlines. This is the changelog contract.
$value = "fix(security): plug hole\nfeat(imp): new toggle\n";
$output = $this->emitMap('notes', new ScalarNode($value));

$parsed = Yaml::load($output);
$this->assertIsArray($parsed);
$this->assertArrayHasKey('notes', $parsed);
$this->assertSame($value, $parsed['notes']);
}
}
Loading