diff --git a/src/Converter/HtmlToDjot.php b/src/Converter/HtmlToDjot.php index e6fe1c2..51fb759 100644 --- a/src/Converter/HtmlToDjot.php +++ b/src/Converter/HtmlToDjot.php @@ -219,8 +219,9 @@ protected function processNode(DOMNode $node): string return match ($tagName) { 'section' => $this->processSection($node), - 'html', 'body', 'article', 'main', 'header', 'footer', 'nav', 'aside', - 'address', 'dialog', 'fieldset', 'form', 'hgroup', 'menu', 'search' => $this->processBlock($node), + 'html', 'body' => $this->processBlock($node), + 'article', 'main', 'header', 'footer', 'nav', 'aside', + 'address', 'dialog', 'fieldset', 'form', 'hgroup', 'menu', 'search' => $this->processGenericBlockContainer($node), 'details' => $this->processDetails($node), 'div' => $this->processDiv($node), 'p' => $this->processParagraph($node), @@ -541,8 +542,25 @@ protected function processDetails(DOMElement $node): string return $this->processCollapsibleAdmonition($node); } - // Otherwise treat as regular block - return $this->processBlock($node); + return $this->processGenericBlockContainer($node); + } + + protected function processGenericBlockContainer(DOMElement $node): string + { + $tagName = strtolower($node->tagName); + $attrs = $this->formatBlockAttributes($node); + + if ($tagName !== 'details' && $attrs === '') { + return $this->processBlock($node); + } + + $content = trim($this->processBlock($node)); + $output = $attrs . '::: ' . $tagName . "\n"; + if ($content !== '') { + $output .= $content . "\n"; + } + + return $output . ":::\n\n"; } /** @@ -1208,8 +1226,9 @@ protected function processList(DOMElement $node): string $prefix = $isOrdered ? $counter . $marker . ' ' : $marker . ' ' . $checkbox; - // Process list item content, separating text from nested lists - $textContent = ''; + // Process list item content, separating nested lists from other content + $contentParts = []; + $inlineBuffer = ''; $nestedContent = ''; foreach ($child->childNodes as $liChild) { @@ -1221,33 +1240,56 @@ protected function processList(DOMElement $node): string } elseif ($childTag === 'input' && $liChild->getAttribute('type') === 'checkbox') { // Skip checkbox inputs (handled via $checkbox prefix) continue; + } elseif (in_array($childTag, $this->blockElements, true)) { + $this->flushListItemInlineBuffer($contentParts, $inlineBuffer); + $content = trim($this->processNode($liChild)); + if ($content !== '') { + $contentParts[] = $content; + } } else { - $textContent .= $this->processNode($liChild); + $inlineBuffer .= $this->processNode($liChild); } } else { - $textContent .= $this->processNode($liChild); + $inlineBuffer .= $this->processNode($liChild); } } - $textContent = trim($textContent); - - // Handle multi-line text content - $lines = explode("\n", $textContent); - $firstLine = array_shift($lines); - $output .= $indent . $prefix . $firstLine . "\n"; + $this->flushListItemInlineBuffer($contentParts, $inlineBuffer); // Add list item attributes on next line (indented) $liAttrs = $this->getElementAttributes($child); - if ($liAttrs !== '') { - $output .= $indent . str_repeat(' ', strlen($prefix)) . '{' . $liAttrs . "}\n"; - } + $continuation = $indent . str_repeat(' ', strlen($prefix)); - if ($lines) { - $continuation = str_repeat(' ', strlen($prefix)); - foreach ($lines as $line) { - if (trim($line) !== '') { - $output .= $indent . $continuation . $line . "\n"; + if ($contentParts === []) { + $output .= $indent . $prefix . "\n"; + if ($liAttrs !== '') { + $output .= $continuation . '{' . $liAttrs . "}\n"; + } + } else { + $firstPart = array_shift($contentParts); + $firstPartLines = preg_split('/\R/', $firstPart) ?: ['']; + $firstLine = array_shift($firstPartLines); + + if ($this->isListItemBlockPart($firstPart)) { + $output .= $indent . $prefix . "\n\n"; + if ($liAttrs !== '') { + $output .= $continuation . '{' . $liAttrs . "}\n"; + } + $output .= $this->indentListItemPart($firstPart, $continuation) . "\n"; + } else { + $output .= $indent . $prefix . $firstLine . "\n"; + if ($liAttrs !== '') { + $output .= $continuation . '{' . $liAttrs . "}\n"; } + foreach ($firstPartLines as $line) { + if (trim($line) !== '') { + $output .= $continuation . $line . "\n"; + } + } + } + + foreach ($contentParts as $part) { + $output .= "\n" . $this->indentListItemPart($part, $continuation) . "\n"; } } @@ -1271,6 +1313,41 @@ protected function processListItem(DOMElement $node): string return $this->processChildren($node); } + /** + * @param list $contentParts + * @param string $inlineBuffer + */ + protected function flushListItemInlineBuffer(array &$contentParts, string &$inlineBuffer): void + { + $inlineContent = trim($inlineBuffer); + if ($inlineContent !== '') { + $contentParts[] = $inlineContent; + } + $inlineBuffer = ''; + } + + protected function isListItemBlockPart(string $content): bool + { + return str_contains($content, "\n") + || str_starts_with($content, '>') + || str_starts_with($content, '```') + || str_starts_with($content, ':::') + || str_starts_with($content, '|') + || str_starts_with($content, '#'); + } + + protected function indentListItemPart(string $content, string $indent): string + { + $lines = preg_split('/\R/', $content) ?: []; + $output = []; + + foreach ($lines as $line) { + $output[] = $line === '' ? '' : $indent . $line; + } + + return implode("\n", $output); + } + /** * Check if a list item contains a checkbox input */ @@ -2088,6 +2165,13 @@ protected function cleanup(string $djot): string continue; } + // Preserve indented continuation lines inside list items + if ($inList && preg_match('/^\s{2,}\S/', $line)) { + $result[] = $line; + + continue; + } + // Preserve indentation for definition content (indented lines after `: term`) if ($inDefinitionList && preg_match('/^ /', $line)) { $result[] = $line; diff --git a/tests/TestCase/Converter/HtmlToDjotTest.php b/tests/TestCase/Converter/HtmlToDjotTest.php index b23c6f4..3e6b19a 100644 --- a/tests/TestCase/Converter/HtmlToDjotTest.php +++ b/tests/TestCase/Converter/HtmlToDjotTest.php @@ -582,6 +582,69 @@ public function testNestedListWithBlankLine(): void $this->assertMatchesRegularExpression('/- Item 2\n\n\s+- Nested 1/', $result); } + public function testListItemWithMultipleParagraphsKeepsParagraphBreaks(): void + { + $html = ''; + $result = $this->converter->convert($html); + + $this->assertSame("- One\n\n Two\n", $result); + $htmlBack = (new DjotConverter())->convert($result); + $this->assertStringContainsString("
  • \n

    One

    \n

    Two

    \n
  • ", $htmlBack); + } + + public function testListItemWithBlockquoteKeepsNestedBlockquote(): void + { + $html = ''; + $result = $this->converter->convert($html); + + $this->assertSame("- One\n\n > Quote\n", $result); + $htmlBack = (new DjotConverter())->convert($result); + $this->assertStringContainsString("
  • \n

    One

    \n
    ", $htmlBack); + $this->assertStringContainsString('

    Quote

    ', $htmlBack); + } + + public function testListItemWithOnlyCodeBlockKeepsIndentedCodeFence(): void + { + $html = '
    • code
    '; + $result = $this->converter->convert($html); + + $this->assertSame("- \n\n ```\n code\n ```\n", $result); + $htmlBack = (new DjotConverter())->convert($result); + $this->assertStringContainsString("
  • \n
    code", $htmlBack);
    +    }
    +
    +    public function testEmptyListItemWithAttributesKeepsIndentedAttributeBlock(): void
    +    {
    +        $html = '
    '; + $result = $this->converter->convert($html); + + $this->assertSame("- \n {#empty}\n", $result); + } + + public function testListItemWithDetailsKeepsIndentedTaggedContainer(): void + { + $html = '
    • Title

      Body

    '; + $result = $this->converter->convert($html); + + $this->assertSame("- \n\n ::: details\n Title\n\n Body\n :::\n", $result); + } + + public function testListItemWithHeadingKeepsIndentedHeadingBlock(): void + { + $html = '
    • Head

    '; + $result = $this->converter->convert($html); + + $this->assertSame("- \n\n ## Head\n", $result); + } + + public function testHtml5BlockContainerWithoutAttributesFallsBackToPlainBlock(): void + { + $html = '

    X

    '; + $result = $this->converter->convert($html); + + $this->assertSame("X\n", $result); + } + public function testDeeplyNestedList(): void { $html = '
    • Level 1
      • Level 2
        • Level 3
    '; @@ -1013,6 +1076,7 @@ public function testDetailsElement(): void $html = '
    Click to expand

    Hidden content here

    '; $result = $this->converter->convert($html); + $this->assertStringContainsString("::: details\n", $result); $this->assertStringContainsString('Click to expand', $result); $this->assertStringContainsString('Hidden content here', $result); } @@ -1084,10 +1148,22 @@ public function testHtml5BlockElementsWithAttributes(): void $html = '
    Question?

    Answer.

    '; $result = $this->converter->convert($html); + $this->assertStringContainsString('{#q1 .faq}', $result); + $this->assertStringContainsString("::: details\n", $result); $this->assertStringContainsString('Question?', $result); $this->assertStringContainsString('Answer.', $result); } + public function testHtml5BlockContainerWithAttributesUsesTaggedFencedDiv(): void + { + $html = '

    X

    '; + $result = $this->converter->convert($html); + + $this->assertStringContainsString('{#a1 data-kind=post}', $result); + $this->assertStringContainsString("::: article\n", $result); + $this->assertStringContainsString("X\n", $result); + } + // ==================== Round-trip Table Separators ==================== public function testTableSeparatorWidthsRoundTrip(): void