From a2f4f8c245f02d03d297f2f5ade1fbda3d6dbf84 Mon Sep 17 00:00:00 2001 From: mscherer Date: Mon, 16 Mar 2026 18:13:48 +0100 Subject: [PATCH 1/4] Add line comment syntax (`%%` to end of line) Implements line comments that start with `%%` and extend to end of line: - Inline: `Text before %% this is ignored` - Full line: `%% This entire line is a comment` Implementation: - InlineParser: strips `%%` to end of line before parsing inline content - BlockParser: handles full-line comments (lines starting with `%%`) The existing comment syntaxes (`{% ... %}` and `%%%` fenced) are preserved. Refs jgm/djot#67, jgm/djot#384 --- src/Parser/BlockParser.php | 32 ++++++++- src/Parser/InlineParser.php | 27 ++++++++ tests/TestCase/LineCommentTest.php | 103 +++++++++++++++++++++++++++++ 3 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 tests/TestCase/LineCommentTest.php diff --git a/src/Parser/BlockParser.php b/src/Parser/BlockParser.php index 8230a6a..3e8c889 100644 --- a/src/Parser/BlockParser.php +++ b/src/Parser/BlockParser.php @@ -667,10 +667,12 @@ protected function parseBlocks(Node $parent, array $lines, int $indent): void } // Try to match block elements in order of precedence + // Line comment (%%) must come before fenced comment (%%%) // Fenced comment must come before thematic break (%%% vs ---) // Comment and raw block must come before code block since ``` =format is a special case // Caption must come before paragraph to catch `^ caption text` - $consumed = $this->tryParseFencedComment($parent, $lines, $i) + $consumed = $this->tryParseLineComment($lines, $i) + ?? $this->tryParseFencedComment($parent, $lines, $i) ?? $this->tryParseComment($parent, $lines, $i) ?? $this->tryParseRawBlock($parent, $lines, $i) ?? $this->tryParseCodeBlock($parent, $lines, $i) @@ -889,6 +891,34 @@ protected function tryParseCodeBlock(Node $parent, array $lines, int $start): ?i return $i - $start; } + /** + * Try to parse a line comment (%% to end of line) + * + * A line starting with %% (after optional whitespace) is a full-line comment. + * It is completely ignored without creating any nodes. + * + * @param array $lines + * @param int $start + */ + protected function tryParseLineComment(array $lines, int $start): ?int + { + $line = $lines[$start]; + $trimmed = ltrim($line); + + // Check if line starts with %% + if (!str_starts_with($trimmed, '%%')) { + return null; + } + + // Make sure it's not %%% (fenced comment opener) + if (str_starts_with($trimmed, '%%%')) { + return null; + } + + // Line comment - consume the line without creating any node + return 1; + } + /** * Try to parse a comment block {% ... %} * diff --git a/src/Parser/InlineParser.php b/src/Parser/InlineParser.php index c0dfc7c..b1ccf02 100644 --- a/src/Parser/InlineParser.php +++ b/src/Parser/InlineParser.php @@ -180,6 +180,8 @@ public function parse(Node $parent, string $text, int $sourceLine = 0): void { $this->delimiterStack = []; $this->currentLine = $sourceLine; + // Strip line comments (%% to end of line) before parsing + $text = $this->removeLineComments($text); $this->parseInlines($parent, $text); } @@ -1721,6 +1723,31 @@ protected function removeAttributeComments(string $attrStr): string return $result ?? $attrStr; } + /** + * Remove line comments from text: %% to end of line + * + * Line comments start with %% and extend to the end of the line. + * They are stripped before inline parsing. + */ + protected function removeLineComments(string $text): string + { + // Process line by line to strip %% to end of line + $lines = explode("\n", $text); + $result = []; + + foreach ($lines as $line) { + // Find %% that's not inside verbatim/code spans + // Simple approach: just strip %% to end of line + $commentPos = strpos($line, '%%'); + if ($commentPos !== false) { + $line = rtrim(substr($line, 0, $commentPos)); + } + $result[] = $line; + } + + return implode("\n", $result); + } + /** * Apply attributes from a string to a node */ diff --git a/tests/TestCase/LineCommentTest.php b/tests/TestCase/LineCommentTest.php new file mode 100644 index 0000000..b2ee289 --- /dev/null +++ b/tests/TestCase/LineCommentTest.php @@ -0,0 +1,103 @@ +converter = new DjotConverter(); + } + + public function testInlineLineComment(): void + { + $djot = 'This is visible %% but this is a comment'; + $expected = "

This is visible

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testFullLineComment(): void + { + $djot = "%% This entire line is a comment\nThis is visible"; + $expected = "

This is visible

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testMultipleLineComments(): void + { + $djot = "%% These lines are\n%% commented out\nThis line is not"; + $expected = "

This line is not

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentWithinParagraph(): void + { + $djot = "First line %% comment\nSecond line"; + $expected = "

First line\nSecond line

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentDoesNotAffectFencedComment(): void + { + // %%% is a fenced comment, not a line comment + $djot = "%%%\nThis is inside fenced comment\n%%%\n\nParagraph"; + $expected = "

Paragraph

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentPreservesTextBefore(): void + { + $djot = 'Text before %% comment after'; + $expected = "

Text before

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentWithEmphasis(): void + { + $djot = '_emphasis_ %% and comment'; + $expected = "

emphasis

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentInBlockQuote(): void + { + $djot = '> Quote text %% with comment'; + $expected = "
\n

Quote text

\n
\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentInList(): void + { + $djot = "- Item one %% comment\n- Item two"; + $expected = "\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentInHeading(): void + { + $djot = '# Heading %% comment'; + // The section ID is derived from the visible heading text + $result = $this->converter->convert($djot); + $this->assertStringContainsString('

Heading

', $result); + $this->assertStringNotContainsString('comment', $result); + } +} From 372c98b731b8685bb4f4c62afb829ca2e2ea44b2 Mon Sep 17 00:00:00 2001 From: mscherer Date: Mon, 23 Mar 2026 08:50:33 +0100 Subject: [PATCH 2/4] Fix line comments stripping %% inside protected contexts The %% line comment syntax was incorrectly stripping content from inside: - Code spans (`code %% here`) - Quoted attribute values ([text]{title="%% value"}) - Link URLs ([link](url%%test)) - Math spans ($x %% y$) This change rewrites removeLineComments() to be context-aware, properly skipping over: - Backtick spans (code) - Quoted strings (single and double) - Parenthesized content (links) - Curly brace blocks (attributes) - Math delimiters ($ and $$) - Escaped characters Added 9 tests to cover these edge cases. --- src/Parser/InlineParser.php | 219 +++++++++++++++++++++++++++-- tests/TestCase/LineCommentTest.php | 93 ++++++++++++ 2 files changed, 303 insertions(+), 9 deletions(-) diff --git a/src/Parser/InlineParser.php b/src/Parser/InlineParser.php index 47c02e9..3534271 100644 --- a/src/Parser/InlineParser.php +++ b/src/Parser/InlineParser.php @@ -1712,27 +1712,228 @@ protected function removeAttributeComments(string $attrStr): string * Remove line comments from text: %% to end of line * * Line comments start with %% and extend to the end of the line. - * They are stripped before inline parsing. + * They are stripped before inline parsing, but only when not inside: + * - Code spans (backticks) + * - Quoted strings (in attributes or link titles) + * - Link/image destinations */ protected function removeLineComments(string $text): string { - // Process line by line to strip %% to end of line $lines = explode("\n", $text); $result = []; foreach ($lines as $line) { - // Find %% that's not inside verbatim/code spans - // Simple approach: just strip %% to end of line - $commentPos = strpos($line, '%%'); - if ($commentPos !== false) { - $line = rtrim(substr($line, 0, $commentPos)); - } - $result[] = $line; + $result[] = $this->stripLineComment($line); } return implode("\n", $result); } + /** + * Strip %% comment from a single line, respecting context + */ + protected function stripLineComment(string $line): string + { + $length = strlen($line); + $pos = 0; + + while ($pos < $length - 1) { + $char = $line[$pos]; + $nextChar = $line[$pos + 1]; + + // Check for escape sequence + if ($char === '\\' && $pos + 1 < $length) { + $pos += 2; // Skip escaped character + continue; + } + + // Check for %% (line comment) - but not %%% + if ($char === '%' && $nextChar === '%') { + // Make sure it's not %%% (fenced comment marker) + if ($pos + 2 < $length && $line[$pos + 2] === '%') { + $pos++; + continue; + } + // Found line comment - strip rest of line + return rtrim(substr($line, 0, $pos)); + } + + // Skip backtick spans (code) + if ($char === '`') { + $pos = $this->skipBacktickSpan($line, $pos, $length); + continue; + } + + // Skip dollar signs (math) - $...$ or $$...$$ + if ($char === '$') { + $pos = $this->skipMathSpan($line, $pos, $length); + continue; + } + + // Skip parenthesized content (link destinations/titles) + if ($char === '(') { + $pos = $this->skipParenthesized($line, $pos, $length); + continue; + } + + // Skip curly braces (attributes) - handle quoted values inside + if ($char === '{') { + $pos = $this->skipAttributeBlock($line, $pos, $length); + continue; + } + + $pos++; + } + + return $line; + } + + /** + * Skip a backtick span (code), returning position after closing backticks + */ + protected function skipBacktickSpan(string $line, int $start, int $length): int + { + // Count opening backticks + $backtickCount = 0; + $pos = $start; + while ($pos < $length && $line[$pos] === '`') { + $backtickCount++; + $pos++; + } + + // Find matching closing backticks + while ($pos <= $length - $backtickCount) { + if (substr($line, $pos, $backtickCount) === str_repeat('`', $backtickCount)) { + // Check it's exactly this many backticks (not more) + $afterBackticks = $pos + $backtickCount; + if ($afterBackticks >= $length || $line[$afterBackticks] !== '`') { + return $afterBackticks; + } + } + $pos++; + } + + // No closing found, return end + return $length; + } + + /** + * Skip a math span ($...$ or $$...$$), returning position after closing + */ + protected function skipMathSpan(string $line, int $start, int $length): int + { + $pos = $start; + $isDisplay = ($pos + 1 < $length && $line[$pos + 1] === '$'); + $delimiter = $isDisplay ? '$$' : '$'; + $delimLen = strlen($delimiter); + + $pos += $delimLen; // Skip opening delimiter + + // Find closing delimiter + while ($pos <= $length - $delimLen) { + if ($line[$pos] === '\\' && $pos + 1 < $length) { + $pos += 2; // Skip escaped character + continue; + } + if (substr($line, $pos, $delimLen) === $delimiter) { + return $pos + $delimLen; + } + $pos++; + } + + return $length; + } + + /** + * Skip parenthesized content (link destination/title), returning position after ) + */ + protected function skipParenthesized(string $line, int $start, int $length): int + { + $pos = $start + 1; // Skip opening ( + $depth = 1; + + while ($pos < $length && $depth > 0) { + $char = $line[$pos]; + + if ($char === '\\' && $pos + 1 < $length) { + $pos += 2; // Skip escaped character + continue; + } + + // Handle quoted strings inside parentheses (link titles) + if ($char === '"' || $char === "'") { + $pos = $this->skipQuotedString($line, $pos, $length, $char); + continue; + } + + if ($char === '(') { + $depth++; + } elseif ($char === ')') { + $depth--; + } + $pos++; + } + + return $pos; + } + + /** + * Skip an attribute block {...}, returning position after } + */ + protected function skipAttributeBlock(string $line, int $start, int $length): int + { + $pos = $start + 1; // Skip opening { + $depth = 1; + + while ($pos < $length && $depth > 0) { + $char = $line[$pos]; + + if ($char === '\\' && $pos + 1 < $length) { + $pos += 2; // Skip escaped character + continue; + } + + // Handle quoted attribute values + if ($char === '"' || $char === "'") { + $pos = $this->skipQuotedString($line, $pos, $length, $char); + continue; + } + + if ($char === '{') { + $depth++; + } elseif ($char === '}') { + $depth--; + } + $pos++; + } + + return $pos; + } + + /** + * Skip a quoted string, returning position after closing quote + */ + protected function skipQuotedString(string $line, int $start, int $length, string $quote): int + { + $pos = $start + 1; // Skip opening quote + + while ($pos < $length) { + $char = $line[$pos]; + + if ($char === '\\' && $pos + 1 < $length) { + $pos += 2; // Skip escaped character + continue; + } + + if ($char === $quote) { + return $pos + 1; + } + $pos++; + } + + return $length; + } + /** * Apply attributes from a string to a node */ diff --git a/tests/TestCase/LineCommentTest.php b/tests/TestCase/LineCommentTest.php index b2ee289..a7c8077 100644 --- a/tests/TestCase/LineCommentTest.php +++ b/tests/TestCase/LineCommentTest.php @@ -100,4 +100,97 @@ public function testLineCommentInHeading(): void $this->assertStringContainsString('

Heading

', $result); $this->assertStringNotContainsString('comment', $result); } + + /** + * %% inside code spans should NOT be treated as a comment + */ + public function testPercentInCodeSpanPreserved(): void + { + $djot = '`code %% not a comment`'; + $expected = "

code %% not a comment

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testPercentInCodeSpanMidText(): void + { + $djot = 'Before `a %% b` after'; + $expected = "

Before a %% b after

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + /** + * %% inside quoted attribute values should NOT be treated as a comment + */ + public function testPercentInQuotedAttributePreserved(): void + { + $djot = '[text]{title="%% not a comment"}'; + $expected = "

text

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testPercentInSingleQuotedAttributePreserved(): void + { + $djot = "[text]{title='%% test'}"; + $expected = "

text

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + /** + * %% inside link URLs should NOT be treated as a comment + */ + public function testPercentInLinkUrlPreserved(): void + { + $djot = '[link](url%%test)'; + $expected = "

link

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + /** + * Escaped %% should NOT be treated as a comment + */ + public function testEscapedPercentNotComment(): void + { + $djot = 'Text \\%\\% not a comment'; + $expected = "

Text %% not a comment

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + /** + * %% inside inline math should NOT be treated as a comment + */ + public function testPercentInMathPreserved(): void + { + $djot = '$x %% y$'; + $result = $this->converter->convert($djot); + // Math content should be preserved (whether parsed as math or not) + $this->assertStringContainsString('%%', $result); + } + + /** + * Line comment should work with both {% %} and %% + */ + public function testMixedCommentSyntax(): void + { + $djot = 'Text {% inline comment %} more %% line comment'; + $expected = "

Text more

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + /** + * %% should strip first occurrence to end of line + */ + public function testMultiplePercentOnLine(): void + { + $djot = 'a %% b %% c'; + $expected = "

a

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } } From d0767bdf39fcffa9dfb3d23bee62fe781e5249b6 Mon Sep 17 00:00:00 2001 From: mscherer Date: Mon, 23 Mar 2026 08:51:44 +0100 Subject: [PATCH 3/4] Fix code style: add blank lines before jump statements --- src/Parser/InlineParser.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Parser/InlineParser.php b/src/Parser/InlineParser.php index 3534271..5ee7c55 100644 --- a/src/Parser/InlineParser.php +++ b/src/Parser/InlineParser.php @@ -1744,6 +1744,7 @@ protected function stripLineComment(string $line): string // Check for escape sequence if ($char === '\\' && $pos + 1 < $length) { $pos += 2; // Skip escaped character + continue; } @@ -1752,8 +1753,10 @@ protected function stripLineComment(string $line): string // Make sure it's not %%% (fenced comment marker) if ($pos + 2 < $length && $line[$pos + 2] === '%') { $pos++; + continue; } + // Found line comment - strip rest of line return rtrim(substr($line, 0, $pos)); } @@ -1761,24 +1764,28 @@ protected function stripLineComment(string $line): string // Skip backtick spans (code) if ($char === '`') { $pos = $this->skipBacktickSpan($line, $pos, $length); + continue; } // Skip dollar signs (math) - $...$ or $$...$$ if ($char === '$') { $pos = $this->skipMathSpan($line, $pos, $length); + continue; } // Skip parenthesized content (link destinations/titles) if ($char === '(') { $pos = $this->skipParenthesized($line, $pos, $length); + continue; } // Skip curly braces (attributes) - handle quoted values inside if ($char === '{') { $pos = $this->skipAttributeBlock($line, $pos, $length); + continue; } @@ -1833,6 +1840,7 @@ protected function skipMathSpan(string $line, int $start, int $length): int while ($pos <= $length - $delimLen) { if ($line[$pos] === '\\' && $pos + 1 < $length) { $pos += 2; // Skip escaped character + continue; } if (substr($line, $pos, $delimLen) === $delimiter) { @@ -1857,12 +1865,14 @@ protected function skipParenthesized(string $line, int $start, int $length): int if ($char === '\\' && $pos + 1 < $length) { $pos += 2; // Skip escaped character + continue; } // Handle quoted strings inside parentheses (link titles) if ($char === '"' || $char === "'") { $pos = $this->skipQuotedString($line, $pos, $length, $char); + continue; } @@ -1890,12 +1900,14 @@ protected function skipAttributeBlock(string $line, int $start, int $length): in if ($char === '\\' && $pos + 1 < $length) { $pos += 2; // Skip escaped character + continue; } // Handle quoted attribute values if ($char === '"' || $char === "'") { $pos = $this->skipQuotedString($line, $pos, $length, $char); + continue; } @@ -1922,6 +1934,7 @@ protected function skipQuotedString(string $line, int $start, int $length, strin if ($char === '\\' && $pos + 1 < $length) { $pos += 2; // Skip escaped character + continue; } From 4eaadac5ffbfada367b80e9c8eb5c2a6854c3dc1 Mon Sep 17 00:00:00 2001 From: mscherer Date: Mon, 23 Mar 2026 09:14:31 +0100 Subject: [PATCH 4/4] Fix inline {% %} comments not preserving content after closing The block parser was incorrectly treating `{% comment %} text` as a block-level comment, consuming the entire line and losing "text". This fix updates `isCommentOpener()` to only treat a line as a block comment when: - The comment spans multiple lines (no closing %} on same line), OR - The comment is alone on the line (nothing after %}) Single-line comments with content after them are now handled as inline comments by the attribute parser, which correctly strips the comment while preserving surrounding text. Fixes: - `{% comment %} text` now renders as `

text

` - `{% one %} text {% two %}` now renders as `

text

` Added 5 tests covering inline comment edge cases. --- src/Parser/Block/FencedBlockParser.php | 28 +++++++++++- tests/TestCase/DjotConverterTest.php | 60 ++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/src/Parser/Block/FencedBlockParser.php b/src/Parser/Block/FencedBlockParser.php index 284d8ca..fd2f3fa 100644 --- a/src/Parser/Block/FencedBlockParser.php +++ b/src/Parser/Block/FencedBlockParser.php @@ -140,13 +140,37 @@ public function parseRawBlockOpener(string $line): ?array /** * Check if a line opens a comment block. * + * Block-level comments are only recognized when: + * - The comment spans multiple lines (no closing %} on same line), OR + * - The comment is alone on the line (nothing meaningful after %}) + * + * Single-line comments with content after them like `{% comment %} text` + * should be handled as inline comments by the inline parser. + * * @param string $line The line to check * - * @return bool True if this line opens a comment + * @return bool True if this line opens a block comment */ public function isCommentOpener(string $line): bool { - return str_contains($line, '{%') && str_starts_with(trim($line), '{%'); + $trimmed = trim($line); + if (!str_starts_with($trimmed, '{%')) { + return false; + } + + // Check if there's a closing %} on the same line + $closePos = strpos($trimmed, '%}'); + if ($closePos === false) { + // No closing on this line - it's a multi-line block comment + return true; + } + + // There's a closing %} - check if there's content after it + $afterClose = trim(substr($trimmed, $closePos + 2)); + + // If nothing after the closing, treat as block comment + // If there's content after, let inline parser handle it + return $afterClose === ''; } /** diff --git a/tests/TestCase/DjotConverterTest.php b/tests/TestCase/DjotConverterTest.php index 33d8035..7b41ead 100644 --- a/tests/TestCase/DjotConverterTest.php +++ b/tests/TestCase/DjotConverterTest.php @@ -2133,6 +2133,66 @@ public function testCommentBetweenParagraphs(): void $this->assertStringNotContainsString('hidden', $result); } + /** + * Inline comment at start of line should preserve text after it + */ + public function testInlineCommentAtStartPreservesTextAfter(): void + { + $djot = '{% comment %} text after'; + $result = $this->converter->convert($djot); + + $this->assertStringContainsString('text after', $result); + $this->assertStringNotContainsString('comment', $result); + } + + /** + * Multiple inline comments on same line + */ + public function testMultipleInlineComments(): void + { + $djot = '{% one %} text {% two %}'; + $result = $this->converter->convert($djot); + + $this->assertStringContainsString('text', $result); + $this->assertStringNotContainsString('one', $result); + $this->assertStringNotContainsString('two', $result); + } + + /** + * Inline comment in middle of text + */ + public function testInlineCommentInMiddle(): void + { + $djot = 'before {% comment %} after'; + $result = $this->converter->convert($djot); + + $this->assertStringContainsString('before', $result); + $this->assertStringContainsString('after', $result); + $this->assertStringNotContainsString('comment', $result); + } + + /** + * Inline comment should not strip {% %} inside code spans + */ + public function testInlineCommentNotInCodeSpan(): void + { + $djot = '`{% not a comment %}`'; + $result = $this->converter->convert($djot); + + $this->assertStringContainsString('{% not a comment %}', $result); + } + + /** + * Inline comment should not strip {% %} inside quoted attributes + */ + public function testInlineCommentNotInQuotedAttribute(): void + { + $djot = '[text]{title="{% not %}"}'; + $result = $this->converter->convert($djot); + + $this->assertStringContainsString('{% not %}', $result); + } + // Edge cases: Raw content public function testRawBlockNonHtml(): void