diff --git a/src/Converter/HtmlToDjot.php b/src/Converter/HtmlToDjot.php index 51fb759..1d881af 100644 --- a/src/Converter/HtmlToDjot.php +++ b/src/Converter/HtmlToDjot.php @@ -889,6 +889,10 @@ protected function extractRoundTripSource(DOMElement $node, string $tagName): ?s protected function processLink(DOMElement $node): string { + if ($this->linkRequiresRawHtmlFallback($node)) { + return $this->processRawHtmlInlineElement($node); + } + if ($node->hasAttribute('data-djot-heading-ref')) { $target = $node->getAttribute('data-djot-heading-ref'); $displayText = $node->getAttribute('data-djot-heading-ref-display'); @@ -934,6 +938,8 @@ protected function processLink(DOMElement $node): string $text = $href; } + $text = $this->escapeLinkOrImageLabel($text); + // Check for @mention (round-trip support for MentionsExtension) if ($node->hasAttribute('data-username')) { $username = $node->getAttribute('data-username'); @@ -993,9 +999,15 @@ protected function processLink(DOMElement $node): string protected function processImage(DOMElement $node): string { $src = $node->getAttribute('src'); - $alt = $node->getAttribute('alt'); + $rawAlt = $node->getAttribute('alt'); $title = $node->getAttribute('title'); + if ($this->requiresRawImageFallback($rawAlt)) { + return $this->processRawHtmlInlineElement($node); + } + + $alt = $this->escapeLinkOrImageLabel($rawAlt); + // Check for reference image (round-trip support) if ($node->hasAttribute('data-djot-ref')) { $refLabel = $node->getAttribute('data-djot-ref'); @@ -1413,7 +1425,7 @@ protected function processTable(DOMElement $node): string $tag = strtolower($cell->tagName); if ($tag === 'th' || $tag === 'td') { // Get cell content with cell attributes - $cellContent = trim($this->processChildren($cell)); + $cellContent = $this->serializeTableCellContent($cell); $cellAttrs = $this->getElementAttributes($cell); if ($cellAttrs !== '') { // Cell attributes go after opening pipe: |{.class} content | @@ -1524,6 +1536,24 @@ protected function getDirectTableRows(DOMElement $table): array return $rows; } + protected function serializeTableCellContent(DOMElement $cell): string + { + $hasBlockChildren = false; + + foreach ($cell->childNodes as $child) { + if ($child instanceof DOMElement && in_array(strtolower($child->tagName), $this->blockElements, true)) { + $hasBlockChildren = true; + + break; + } + } + + $content = $hasBlockChildren ? $this->processBlock($cell) : $this->processChildren($cell); + $content = trim($content); + + return preg_replace('/\s+/', ' ', $content) ?? $content; + } + protected function findFirstDirectChildByTagName(DOMElement $node, string $tagName): ?DOMElement { $tagName = strtolower($tagName); @@ -1697,6 +1727,33 @@ protected function processRawInline(DOMElement $node): string return $backticks . $content . $backticks . '{=' . $format . '}'; } + protected function processRawHtmlInlineElement(DOMElement $node): string + { + $html = $node->ownerDocument?->saveHTML($node); + if (!is_string($html)) { + $html = ''; + } + + $backticks = StringUtil::findSafeCodeFence($html, 1); + + return $backticks . $html . $backticks . '{=html}'; + } + + protected function linkRequiresRawHtmlFallback(DOMElement $node): bool + { + foreach ($node->childNodes as $child) { + if ( + $child instanceof DOMElement + && strtolower($child->tagName) === 'img' + && $this->requiresRawImageFallback($child->getAttribute('alt')) + ) { + return true; + } + } + + return false; + } + /** * Process semantic HTML elements to Djot span syntax * @@ -2094,7 +2151,7 @@ protected function processFootnoteContent(DOMElement $li): string } // Process the remaining content - $content = trim($this->processChildren($clone)); + $content = trim($this->processBlock($clone)); return $content; } @@ -2107,15 +2164,26 @@ protected function formatFootnoteDefinition(string|int $label, string $content): $formatted = '[^' . $label . ']: ' . $firstLine; foreach ($lines as $line) { - $formatted .= "\n"; - if ($line !== '') { - $formatted .= ' ' . $line; - } + $formatted .= "\n " . $line; } return $formatted; } + protected function escapeLinkOrImageLabel(string $text): string + { + return str_replace( + ['\\', '[', ']'], + ['\\\\', '\[', '\]'], + $text, + ); + } + + protected function requiresRawImageFallback(string $alt): bool + { + return strpbrk($alt, '[]\\') !== false; + } + protected function cleanup(string $djot): string { // Remove leading whitespace from lines (except in code blocks and indented content) @@ -2123,6 +2191,7 @@ protected function cleanup(string $djot): string $inCodeBlock = false; $inDefinitionList = false; $inList = false; + $inFootnote = false; $result = []; foreach ($lines as $line) { @@ -2140,10 +2209,20 @@ protected function cleanup(string $djot): string continue; } + if (preg_match('/^\[\^[^\]]+\]:\s*/', $line) === 1) { + $result[] = $line; + $inDefinitionList = false; + $inList = false; + $inFootnote = true; + + continue; + } + // Track definition lists (`: term` starts one) if (str_starts_with($line, ': ')) { $inDefinitionList = true; $inList = false; + $inFootnote = false; $result[] = $line; continue; @@ -2154,6 +2233,7 @@ protected function cleanup(string $djot): string $result[] = $line; $inDefinitionList = false; $inList = true; + $inFootnote = false; continue; } @@ -2186,9 +2266,15 @@ protected function cleanup(string $djot): string continue; } + if ($inFootnote && preg_match('/^\s{2,}\S/', $line)) { + $result[] = $line; + + continue; + } + // Blank line (or whitespace-only line) ends definition list context but not list context if (trim($line) === '') { - $result[] = ''; // Normalize to empty string + $result[] = $inFootnote ? ' ' : ''; // Normalize to empty string unless footnote continuation needs indentation continue; } @@ -2197,6 +2283,7 @@ protected function cleanup(string $djot): string $result[] = ltrim($line); $inDefinitionList = false; $inList = false; + $inFootnote = false; } $djot = implode("\n", $result); diff --git a/tests/TestCase/Converter/HtmlToDjotTest.php b/tests/TestCase/Converter/HtmlToDjotTest.php index 3e6b19a..b86a97f 100644 --- a/tests/TestCase/Converter/HtmlToDjotTest.php +++ b/tests/TestCase/Converter/HtmlToDjotTest.php @@ -130,6 +130,24 @@ public function testLinkWithQuotedTitleEscapesDjotTitle(): void $this->assertSame("[Example](https://example.com \"a \\\"quote\\\" here\")\n", $result); } + public function testLinkEscapesClosingBracketInLabel(): void + { + $result = $this->converter->convert('a ] b'); + + $this->assertSame("[a \\] b](https://example.com)\n", $result); + $htmlBack = (new DjotConverter())->convert($result); + $this->assertStringContainsString('a ] b', $htmlBack); + } + + public function testLinkEscapesBackslashInLabel(): void + { + $result = $this->converter->convert('a \\ b'); + + $this->assertSame("[a \\\\ b](https://example.com)\n", $result); + $htmlBack = (new DjotConverter())->convert($result); + $this->assertStringContainsString('a \ b', $htmlBack); + } + // ==================== Images ==================== public function testImage(): void @@ -150,6 +168,33 @@ public function testImageWithQuotedTitleEscapesDjotTitle(): void $this->assertSame("![Alt](image.jpg \"a \\\"quote\\\" here\")\n", $result); } + public function testImageWithBracketInAltFallsBackToRawHtml(): void + { + $result = $this->converter->convert('a [ b'); + + $this->assertSame("`\"a`{=html}\n", $result); + $htmlBack = (new DjotConverter())->convert($result); + $this->assertStringContainsString('a [ b', $htmlBack); + } + + public function testImageWithBackslashInAltFallsBackToRawHtml(): void + { + $result = $this->converter->convert('a \\ b'); + + $this->assertSame("`\"a`{=html}\n", $result); + $htmlBack = (new DjotConverter())->convert($result); + $this->assertStringContainsString('a \\ b', $htmlBack); + } + + public function testLinkWrappingProblematicImageFallsBackToRawHtml(): void + { + $result = $this->converter->convert('a [ b'); + + $this->assertSame("`\"a`{=html}\n", $result); + $htmlBack = (new DjotConverter())->convert($result); + $this->assertStringContainsString('a [ b', $htmlBack); + } + // ==================== Code ==================== public function testInlineCode(): void @@ -450,6 +495,18 @@ public function testEndnotesSectionDoesNotTreatNestedListItemsAsFootnotes(): voi $this->assertStringNotContainsString("\n1. nested", $result); } + public function testEndnotesSectionKeepsMultilineFootnoteInsideDefinition(): void + { + $html = '
  1. One

    Two

    ↩︎

'; + $result = $this->converter->convert($html); + + $this->assertSame("[^1]: One\n \n Two\n", $result); + + $htmlBack = (new DjotConverter())->convert("ref[^1]\n\n" . $result); + $this->assertStringContainsString('

One

', $htmlBack); + $this->assertStringContainsString('

TwoassertStringContainsString('^ Monthly Sales Data', $result); } + public function testTableCellWithMultipleParagraphsFallsBackToSingleLineCellText(): void + { + $html = '

One

Two

'; + $result = $this->converter->convert($html); + + $this->assertSame("| One Two |\n", $result); + $htmlBack = (new DjotConverter())->convert($result); + $this->assertStringContainsString('One Two', $htmlBack); + } + + public function testTableCellWithNestedListFallsBackToSingleLineCellText(): void + { + $html = '
  • Item
'; + $result = $this->converter->convert($html); + + $this->assertSame("| - Item |\n", $result); + $htmlBack = (new DjotConverter())->convert($result); + $this->assertStringContainsString('- Item', $htmlBack); + } + public function testTableWithMultilineCaptionKeepsAllCaptionTextInsideCaption(): void { $html = '

cap one

cap two

x
';