Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 106 additions & 22 deletions src/Converter/HtmlToDjot.php
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,9 @@ protected function processNode(DOMNode $node): string

return match ($tagName) {
'section' => $this->processSection($node),
'html', 'body', 'article', 'main', 'header', 'footer', 'nav', 'aside',
'address', 'dialog', 'fieldset', 'form', 'hgroup', 'menu', 'search' => $this->processBlock($node),
'html', 'body' => $this->processBlock($node),
'article', 'main', 'header', 'footer', 'nav', 'aside',
'address', 'dialog', 'fieldset', 'form', 'hgroup', 'menu', 'search' => $this->processGenericBlockContainer($node),
'details' => $this->processDetails($node),
'div' => $this->processDiv($node),
'p' => $this->processParagraph($node),
Expand Down Expand Up @@ -541,8 +542,25 @@ protected function processDetails(DOMElement $node): string
return $this->processCollapsibleAdmonition($node);
}

// Otherwise treat as regular block
return $this->processBlock($node);
return $this->processGenericBlockContainer($node);
}

protected function processGenericBlockContainer(DOMElement $node): string
{
$tagName = strtolower($node->tagName);
$attrs = $this->formatBlockAttributes($node);

if ($tagName !== 'details' && $attrs === '') {
return $this->processBlock($node);
}

$content = trim($this->processBlock($node));
$output = $attrs . '::: ' . $tagName . "\n";
if ($content !== '') {
$output .= $content . "\n";
}

return $output . ":::\n\n";
}

/**
Expand Down Expand Up @@ -1208,8 +1226,9 @@ protected function processList(DOMElement $node): string

$prefix = $isOrdered ? $counter . $marker . ' ' : $marker . ' ' . $checkbox;

// Process list item content, separating text from nested lists
$textContent = '';
// Process list item content, separating nested lists from other content
$contentParts = [];
$inlineBuffer = '';
$nestedContent = '';

foreach ($child->childNodes as $liChild) {
Expand All @@ -1221,33 +1240,56 @@ protected function processList(DOMElement $node): string
} elseif ($childTag === 'input' && $liChild->getAttribute('type') === 'checkbox') {
// Skip checkbox inputs (handled via $checkbox prefix)
continue;
} elseif (in_array($childTag, $this->blockElements, true)) {
$this->flushListItemInlineBuffer($contentParts, $inlineBuffer);
$content = trim($this->processNode($liChild));
if ($content !== '') {
$contentParts[] = $content;
}
} else {
$textContent .= $this->processNode($liChild);
$inlineBuffer .= $this->processNode($liChild);
}
} else {
$textContent .= $this->processNode($liChild);
$inlineBuffer .= $this->processNode($liChild);
}
}

$textContent = trim($textContent);

// Handle multi-line text content
$lines = explode("\n", $textContent);
$firstLine = array_shift($lines);
$output .= $indent . $prefix . $firstLine . "\n";
$this->flushListItemInlineBuffer($contentParts, $inlineBuffer);

// Add list item attributes on next line (indented)
$liAttrs = $this->getElementAttributes($child);
if ($liAttrs !== '') {
$output .= $indent . str_repeat(' ', strlen($prefix)) . '{' . $liAttrs . "}\n";
}
$continuation = $indent . str_repeat(' ', strlen($prefix));

if ($lines) {
$continuation = str_repeat(' ', strlen($prefix));
foreach ($lines as $line) {
if (trim($line) !== '') {
$output .= $indent . $continuation . $line . "\n";
if ($contentParts === []) {
$output .= $indent . $prefix . "\n";
if ($liAttrs !== '') {
$output .= $continuation . '{' . $liAttrs . "}\n";
}
} else {
$firstPart = array_shift($contentParts);
$firstPartLines = preg_split('/\R/', $firstPart) ?: [''];
$firstLine = array_shift($firstPartLines);

if ($this->isListItemBlockPart($firstPart)) {
$output .= $indent . $prefix . "\n\n";
if ($liAttrs !== '') {
$output .= $continuation . '{' . $liAttrs . "}\n";
}
$output .= $this->indentListItemPart($firstPart, $continuation) . "\n";
} else {
$output .= $indent . $prefix . $firstLine . "\n";
if ($liAttrs !== '') {
$output .= $continuation . '{' . $liAttrs . "}\n";
}
foreach ($firstPartLines as $line) {
if (trim($line) !== '') {
$output .= $continuation . $line . "\n";
}
}
}

foreach ($contentParts as $part) {
$output .= "\n" . $this->indentListItemPart($part, $continuation) . "\n";
}
}

Expand All @@ -1271,6 +1313,41 @@ protected function processListItem(DOMElement $node): string
return $this->processChildren($node);
}

/**
* @param list<string> $contentParts
* @param string $inlineBuffer
*/
protected function flushListItemInlineBuffer(array &$contentParts, string &$inlineBuffer): void
{
$inlineContent = trim($inlineBuffer);
if ($inlineContent !== '') {
$contentParts[] = $inlineContent;
}
$inlineBuffer = '';
}

protected function isListItemBlockPart(string $content): bool
{
return str_contains($content, "\n")
|| str_starts_with($content, '>')
|| str_starts_with($content, '```')
|| str_starts_with($content, ':::')
|| str_starts_with($content, '|')
|| str_starts_with($content, '#');
}

protected function indentListItemPart(string $content, string $indent): string
{
$lines = preg_split('/\R/', $content) ?: [];
$output = [];

foreach ($lines as $line) {
$output[] = $line === '' ? '' : $indent . $line;
}

return implode("\n", $output);
}

/**
* Check if a list item contains a checkbox input
*/
Expand Down Expand Up @@ -2088,6 +2165,13 @@ protected function cleanup(string $djot): string
continue;
}

// Preserve indented continuation lines inside list items
if ($inList && preg_match('/^\s{2,}\S/', $line)) {
$result[] = $line;

continue;
}

// Preserve indentation for definition content (indented lines after `: term`)
if ($inDefinitionList && preg_match('/^ /', $line)) {
$result[] = $line;
Expand Down
76 changes: 76 additions & 0 deletions tests/TestCase/Converter/HtmlToDjotTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,69 @@ public function testNestedListWithBlankLine(): void
$this->assertMatchesRegularExpression('/- Item 2\n\n\s+- Nested 1/', $result);
}

public function testListItemWithMultipleParagraphsKeepsParagraphBreaks(): void
{
$html = '<ul><li><p>One</p><p>Two</p></li></ul>';
$result = $this->converter->convert($html);

$this->assertSame("- One\n\n Two\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString("<li>\n<p>One</p>\n<p>Two</p>\n</li>", $htmlBack);
}

public function testListItemWithBlockquoteKeepsNestedBlockquote(): void
{
$html = '<ul><li><p>One</p><blockquote><p>Quote</p></blockquote></li></ul>';
$result = $this->converter->convert($html);

$this->assertSame("- One\n\n > Quote\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString("<li>\n<p>One</p>\n<blockquote>", $htmlBack);
$this->assertStringContainsString('<p>Quote</p>', $htmlBack);
}

public function testListItemWithOnlyCodeBlockKeepsIndentedCodeFence(): void
{
$html = '<ul><li><pre><code>code</code></pre></li></ul>';
$result = $this->converter->convert($html);

$this->assertSame("- \n\n ```\n code\n ```\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString("<li>\n<pre><code>code", $htmlBack);
}

public function testEmptyListItemWithAttributesKeepsIndentedAttributeBlock(): void
{
$html = '<ul><li id="empty"></li></ul>';
$result = $this->converter->convert($html);

$this->assertSame("- \n {#empty}\n", $result);
}

public function testListItemWithDetailsKeepsIndentedTaggedContainer(): void
{
$html = '<ul><li><details><summary>Title</summary><p>Body</p></details></li></ul>';
$result = $this->converter->convert($html);

$this->assertSame("- \n\n ::: details\n Title\n\n Body\n :::\n", $result);
}

public function testListItemWithHeadingKeepsIndentedHeadingBlock(): void
{
$html = '<ul><li><h2>Head</h2></li></ul>';
$result = $this->converter->convert($html);

$this->assertSame("- \n\n ## Head\n", $result);
}

public function testHtml5BlockContainerWithoutAttributesFallsBackToPlainBlock(): void
{
$html = '<article><p>X</p></article>';
$result = $this->converter->convert($html);

$this->assertSame("X\n", $result);
}

public function testDeeplyNestedList(): void
{
$html = '<ul><li>Level 1<ul><li>Level 2<ul><li>Level 3</li></ul></li></ul></li></ul>';
Expand Down Expand Up @@ -1013,6 +1076,7 @@ public function testDetailsElement(): void
$html = '<details><summary>Click to expand</summary><p>Hidden content here</p></details>';
$result = $this->converter->convert($html);

$this->assertStringContainsString("::: details\n", $result);
$this->assertStringContainsString('Click to expand', $result);
$this->assertStringContainsString('Hidden content here', $result);
}
Expand Down Expand Up @@ -1084,10 +1148,22 @@ public function testHtml5BlockElementsWithAttributes(): void
$html = '<details class="faq" id="q1"><summary>Question?</summary><p>Answer.</p></details>';
$result = $this->converter->convert($html);

$this->assertStringContainsString('{#q1 .faq}', $result);
$this->assertStringContainsString("::: details\n", $result);
$this->assertStringContainsString('Question?', $result);
$this->assertStringContainsString('Answer.', $result);
}

public function testHtml5BlockContainerWithAttributesUsesTaggedFencedDiv(): void
{
$html = '<article id="a1" data-kind="post"><p>X</p></article>';
$result = $this->converter->convert($html);

$this->assertStringContainsString('{#a1 data-kind=post}', $result);
$this->assertStringContainsString("::: article\n", $result);
$this->assertStringContainsString("X\n", $result);
}

// ==================== Round-trip Table Separators ====================

public function testTableSeparatorWidthsRoundTrip(): void
Expand Down
Loading