diff --git a/src/Parser/BlockParser.php b/src/Parser/BlockParser.php index 57286e0..c7ccc2f 100644 --- a/src/Parser/BlockParser.php +++ b/src/Parser/BlockParser.php @@ -667,10 +667,12 @@ protected function parseBlocks(Node $parent, array $lines, int $indent): void } // Try to match block elements in order of precedence + // Line comment (%%) must come before fenced comment (%%%) // Fenced comment must come before thematic break (%%% vs ---) // Comment and raw block must come before code block since ``` =format is a special case // Caption must come before paragraph to catch `^ caption text` - $consumed = $this->tryParseFencedComment($parent, $lines, $i) + $consumed = $this->tryParseLineComment($lines, $i) + ?? $this->tryParseFencedComment($parent, $lines, $i) ?? $this->tryParseComment($parent, $lines, $i) ?? $this->tryParseRawBlock($parent, $lines, $i) ?? $this->tryParseCodeBlock($parent, $lines, $i) @@ -889,6 +891,34 @@ protected function tryParseCodeBlock(Node $parent, array $lines, int $start): ?i return $i - $start; } + /** + * Try to parse a line comment (%% to end of line) + * + * A line starting with %% (after optional whitespace) is a full-line comment. + * It is completely ignored without creating any nodes. + * + * @param array $lines + * @param int $start + */ + protected function tryParseLineComment(array $lines, int $start): ?int + { + $line = $lines[$start]; + $trimmed = ltrim($line); + + // Check if line starts with %% + if (!str_starts_with($trimmed, '%%')) { + return null; + } + + // Make sure it's not %%% (fenced comment opener) + if (str_starts_with($trimmed, '%%%')) { + return null; + } + + // Line comment - consume the line without creating any node + return 1; + } + /** * Try to parse a comment block {% ... %} * diff --git a/src/Parser/InlineParser.php b/src/Parser/InlineParser.php index bc3bfb9..5ee7c55 100644 --- a/src/Parser/InlineParser.php +++ b/src/Parser/InlineParser.php @@ -180,6 +180,8 @@ public function parse(Node $parent, string $text, int $sourceLine = 0): void { $this->delimiterStack = []; $this->currentLine = $sourceLine; + // Strip line comments (%% to end of line) before parsing + $text = $this->removeLineComments($text); $this->parseInlines($parent, $text); } @@ -1706,6 +1708,245 @@ protected function removeAttributeComments(string $attrStr): string return $result ?? $attrStr; } + /** + * Remove line comments from text: %% to end of line + * + * Line comments start with %% and extend to the end of the line. + * They are stripped before inline parsing, but only when not inside: + * - Code spans (backticks) + * - Quoted strings (in attributes or link titles) + * - Link/image destinations + */ + protected function removeLineComments(string $text): string + { + $lines = explode("\n", $text); + $result = []; + + foreach ($lines as $line) { + $result[] = $this->stripLineComment($line); + } + + return implode("\n", $result); + } + + /** + * Strip %% comment from a single line, respecting context + */ + protected function stripLineComment(string $line): string + { + $length = strlen($line); + $pos = 0; + + while ($pos < $length - 1) { + $char = $line[$pos]; + $nextChar = $line[$pos + 1]; + + // Check for escape sequence + if ($char === '\\' && $pos + 1 < $length) { + $pos += 2; // Skip escaped character + + continue; + } + + // Check for %% (line comment) - but not %%% + if ($char === '%' && $nextChar === '%') { + // Make sure it's not %%% (fenced comment marker) + if ($pos + 2 < $length && $line[$pos + 2] === '%') { + $pos++; + + continue; + } + + // Found line comment - strip rest of line + return rtrim(substr($line, 0, $pos)); + } + + // Skip backtick spans (code) + if ($char === '`') { + $pos = $this->skipBacktickSpan($line, $pos, $length); + + continue; + } + + // Skip dollar signs (math) - $...$ or $$...$$ + if ($char === '$') { + $pos = $this->skipMathSpan($line, $pos, $length); + + continue; + } + + // Skip parenthesized content (link destinations/titles) + if ($char === '(') { + $pos = $this->skipParenthesized($line, $pos, $length); + + continue; + } + + // Skip curly braces (attributes) - handle quoted values inside + if ($char === '{') { + $pos = $this->skipAttributeBlock($line, $pos, $length); + + continue; + } + + $pos++; + } + + return $line; + } + + /** + * Skip a backtick span (code), returning position after closing backticks + */ + protected function skipBacktickSpan(string $line, int $start, int $length): int + { + // Count opening backticks + $backtickCount = 0; + $pos = $start; + while ($pos < $length && $line[$pos] === '`') { + $backtickCount++; + $pos++; + } + + // Find matching closing backticks + while ($pos <= $length - $backtickCount) { + if (substr($line, $pos, $backtickCount) === str_repeat('`', $backtickCount)) { + // Check it's exactly this many backticks (not more) + $afterBackticks = $pos + $backtickCount; + if ($afterBackticks >= $length || $line[$afterBackticks] !== '`') { + return $afterBackticks; + } + } + $pos++; + } + + // No closing found, return end + return $length; + } + + /** + * Skip a math span ($...$ or $$...$$), returning position after closing + */ + protected function skipMathSpan(string $line, int $start, int $length): int + { + $pos = $start; + $isDisplay = ($pos + 1 < $length && $line[$pos + 1] === '$'); + $delimiter = $isDisplay ? '$$' : '$'; + $delimLen = strlen($delimiter); + + $pos += $delimLen; // Skip opening delimiter + + // Find closing delimiter + while ($pos <= $length - $delimLen) { + if ($line[$pos] === '\\' && $pos + 1 < $length) { + $pos += 2; // Skip escaped character + + continue; + } + if (substr($line, $pos, $delimLen) === $delimiter) { + return $pos + $delimLen; + } + $pos++; + } + + return $length; + } + + /** + * Skip parenthesized content (link destination/title), returning position after ) + */ + protected function skipParenthesized(string $line, int $start, int $length): int + { + $pos = $start + 1; // Skip opening ( + $depth = 1; + + while ($pos < $length && $depth > 0) { + $char = $line[$pos]; + + if ($char === '\\' && $pos + 1 < $length) { + $pos += 2; // Skip escaped character + + continue; + } + + // Handle quoted strings inside parentheses (link titles) + if ($char === '"' || $char === "'") { + $pos = $this->skipQuotedString($line, $pos, $length, $char); + + continue; + } + + if ($char === '(') { + $depth++; + } elseif ($char === ')') { + $depth--; + } + $pos++; + } + + return $pos; + } + + /** + * Skip an attribute block {...}, returning position after } + */ + protected function skipAttributeBlock(string $line, int $start, int $length): int + { + $pos = $start + 1; // Skip opening { + $depth = 1; + + while ($pos < $length && $depth > 0) { + $char = $line[$pos]; + + if ($char === '\\' && $pos + 1 < $length) { + $pos += 2; // Skip escaped character + + continue; + } + + // Handle quoted attribute values + if ($char === '"' || $char === "'") { + $pos = $this->skipQuotedString($line, $pos, $length, $char); + + continue; + } + + if ($char === '{') { + $depth++; + } elseif ($char === '}') { + $depth--; + } + $pos++; + } + + return $pos; + } + + /** + * Skip a quoted string, returning position after closing quote + */ + protected function skipQuotedString(string $line, int $start, int $length, string $quote): int + { + $pos = $start + 1; // Skip opening quote + + while ($pos < $length) { + $char = $line[$pos]; + + if ($char === '\\' && $pos + 1 < $length) { + $pos += 2; // Skip escaped character + + continue; + } + + if ($char === $quote) { + return $pos + 1; + } + $pos++; + } + + return $length; + } + /** * Apply attributes from a string to a node */ diff --git a/tests/TestCase/LineCommentTest.php b/tests/TestCase/LineCommentTest.php new file mode 100644 index 0000000..a7c8077 --- /dev/null +++ b/tests/TestCase/LineCommentTest.php @@ -0,0 +1,196 @@ +converter = new DjotConverter(); + } + + public function testInlineLineComment(): void + { + $djot = 'This is visible %% but this is a comment'; + $expected = "

This is visible

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testFullLineComment(): void + { + $djot = "%% This entire line is a comment\nThis is visible"; + $expected = "

This is visible

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testMultipleLineComments(): void + { + $djot = "%% These lines are\n%% commented out\nThis line is not"; + $expected = "

This line is not

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentWithinParagraph(): void + { + $djot = "First line %% comment\nSecond line"; + $expected = "

First line\nSecond line

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentDoesNotAffectFencedComment(): void + { + // %%% is a fenced comment, not a line comment + $djot = "%%%\nThis is inside fenced comment\n%%%\n\nParagraph"; + $expected = "

Paragraph

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentPreservesTextBefore(): void + { + $djot = 'Text before %% comment after'; + $expected = "

Text before

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentWithEmphasis(): void + { + $djot = '_emphasis_ %% and comment'; + $expected = "

emphasis

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentInBlockQuote(): void + { + $djot = '> Quote text %% with comment'; + $expected = "
\n

Quote text

\n
\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentInList(): void + { + $djot = "- Item one %% comment\n- Item two"; + $expected = "\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testLineCommentInHeading(): void + { + $djot = '# Heading %% comment'; + // The section ID is derived from the visible heading text + $result = $this->converter->convert($djot); + $this->assertStringContainsString('

Heading

', $result); + $this->assertStringNotContainsString('comment', $result); + } + + /** + * %% inside code spans should NOT be treated as a comment + */ + public function testPercentInCodeSpanPreserved(): void + { + $djot = '`code %% not a comment`'; + $expected = "

code %% not a comment

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testPercentInCodeSpanMidText(): void + { + $djot = 'Before `a %% b` after'; + $expected = "

Before a %% b after

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + /** + * %% inside quoted attribute values should NOT be treated as a comment + */ + public function testPercentInQuotedAttributePreserved(): void + { + $djot = '[text]{title="%% not a comment"}'; + $expected = "

text

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + public function testPercentInSingleQuotedAttributePreserved(): void + { + $djot = "[text]{title='%% test'}"; + $expected = "

text

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + /** + * %% inside link URLs should NOT be treated as a comment + */ + public function testPercentInLinkUrlPreserved(): void + { + $djot = '[link](url%%test)'; + $expected = "

link

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + /** + * Escaped %% should NOT be treated as a comment + */ + public function testEscapedPercentNotComment(): void + { + $djot = 'Text \\%\\% not a comment'; + $expected = "

Text %% not a comment

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + /** + * %% inside inline math should NOT be treated as a comment + */ + public function testPercentInMathPreserved(): void + { + $djot = '$x %% y$'; + $result = $this->converter->convert($djot); + // Math content should be preserved (whether parsed as math or not) + $this->assertStringContainsString('%%', $result); + } + + /** + * Line comment should work with both {% %} and %% + */ + public function testMixedCommentSyntax(): void + { + $djot = 'Text {% inline comment %} more %% line comment'; + $expected = "

Text more

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } + + /** + * %% should strip first occurrence to end of line + */ + public function testMultiplePercentOnLine(): void + { + $djot = 'a %% b %% c'; + $expected = "

a

\n"; + + $this->assertSame($expected, $this->converter->convert($djot)); + } +}