From f028c976c2bcdc18cf2e8098350c540c3a8dd701 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 23 Mar 2026 18:12:24 +0100 Subject: [PATCH 01/10] Add Document transformer tests for content parser and site fallback --- .../tests/transformer/class-test-document.php | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tests/phpunit/tests/transformer/class-test-document.php b/tests/phpunit/tests/transformer/class-test-document.php index 3915ac3..3f49e89 100644 --- a/tests/phpunit/tests/transformer/class-test-document.php +++ b/tests/phpunit/tests/transformer/class-test-document.php @@ -9,11 +9,33 @@ namespace Atmosphere\Tests\Transformer; -require_once __DIR__ . '/class-stub-parser.php'; - use WP_UnitTestCase; +use Atmosphere\Content_Parser\Content_Parser; use Atmosphere\Transformer\Document; +/** + * Stub content parser for testing. + */ +class Stub_Parser implements Content_Parser { + + /** + * {@inheritDoc} + */ + public function get_type(): string { + return 'test.stub.parser'; + } + + /** + * {@inheritDoc} + */ + public function parse( string $content, \WP_Post $post ): array { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable + return array( + '$type' => 'test.stub.parser', + 'text' => $content, + ); + } +} + /** * Document transformer tests. */ From c5576d14f6714dd80db73447fa35e02abb382c74 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 23 Mar 2026 18:15:46 +0100 Subject: [PATCH 02/10] Move stub parser to its own file to satisfy PHPCS one-class-per-file rule --- .../tests/transformer/class-test-document.php | 26 ++----------------- 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/tests/phpunit/tests/transformer/class-test-document.php b/tests/phpunit/tests/transformer/class-test-document.php index 3f49e89..3915ac3 100644 --- a/tests/phpunit/tests/transformer/class-test-document.php +++ b/tests/phpunit/tests/transformer/class-test-document.php @@ -9,33 +9,11 @@ namespace Atmosphere\Tests\Transformer; +require_once __DIR__ . '/class-stub-parser.php'; + use WP_UnitTestCase; -use Atmosphere\Content_Parser\Content_Parser; use Atmosphere\Transformer\Document; -/** - * Stub content parser for testing. - */ -class Stub_Parser implements Content_Parser { - - /** - * {@inheritDoc} - */ - public function get_type(): string { - return 'test.stub.parser'; - } - - /** - * {@inheritDoc} - */ - public function parse( string $content, \WP_Post $post ): array { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable - return array( - '$type' => 'test.stub.parser', - 'text' => $content, - ); - } -} - /** * Document transformer tests. */ From 1e4e5249dc55b1293389aee44d1b118b4f008732 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 23 Mar 2026 17:59:20 +0100 Subject: [PATCH 03/10] Add Markpub content parser for standard.site documents Ship a Markpub parser (at.markpub.markdown) as the default content parser, registered via the atmosphere_content_parser filter. - Add Markpub class that walks Gutenberg blocks via parse_blocks() and converts each to GFM markdown. - Register Markpub as the default parser in Atmosphere::init(). - Add tests for all supported block types and filter integration. --- .github/changelog/markpub-parser | 4 + includes/class-atmosphere.php | 4 + includes/content-parser/class-markpub.php | 375 ++++++++++++++++++ .../content-parser/class-test-markpub.php | 234 +++++++++++ 4 files changed, 617 insertions(+) create mode 100644 .github/changelog/markpub-parser create mode 100644 includes/content-parser/class-markpub.php create mode 100644 tests/phpunit/tests/content-parser/class-test-markpub.php diff --git a/.github/changelog/markpub-parser b/.github/changelog/markpub-parser new file mode 100644 index 0000000..5beee3d --- /dev/null +++ b/.github/changelog/markpub-parser @@ -0,0 +1,4 @@ +Significance: minor +Type: added + +Add rich content support for standard.site documents using the Markpub format. diff --git a/includes/class-atmosphere.php b/includes/class-atmosphere.php index 8ab16b3..20bb68e 100644 --- a/includes/class-atmosphere.php +++ b/includes/class-atmosphere.php @@ -9,6 +9,7 @@ \defined( 'ABSPATH' ) || exit; +use Atmosphere\Content_Parser\Markpub; use Atmosphere\OAuth\Client; use Atmosphere\Transformer\Document; use Atmosphere\Transformer\Publication; @@ -44,6 +45,9 @@ public function init(): void { // Plugin integrations. Load::init(); + // Default content parser (Markpub). + \add_filter( 'atmosphere_content_parser', static fn() => new Markpub() ); + // JSON preview for AT Protocol records. \add_action( 'template_redirect', array( $this, 'preview' ) ); diff --git a/includes/content-parser/class-markpub.php b/includes/content-parser/class-markpub.php new file mode 100644 index 0000000..0b31bc6 --- /dev/null +++ b/includes/content-parser/class-markpub.php @@ -0,0 +1,375 @@ + 'at.markpub.markdown', + 'text' => array( + '$type' => 'at.markpub.text', + 'markdown' => $markdown, + ), + 'flavor' => 'gfm', + 'extensions' => array( 'strikethrough', 'table' ), + ); + } + + /** + * Convert a single WordPress block to markdown. + * + * @param array $block Parsed block from parse_blocks(). + * @return string|null Markdown string or null to skip. + */ + private static function transform_block( array $block ): ?string { + if ( empty( $block['blockName'] ) ) { + // Classic (non-block) content or whitespace. + $html = \trim( $block['innerHTML'] ?? '' ); + if ( empty( $html ) ) { + return null; + } + + return self::inline_html_to_markdown( $html ); + } + + return match ( $block['blockName'] ) { + 'core/paragraph' => self::paragraph( $block ), + 'core/heading' => self::heading( $block ), + 'core/image' => self::image( $block ), + 'core/list' => self::listing( $block ), + 'core/quote' => self::quote( $block ), + 'core/code' => self::code( $block ), + 'core/preformatted' => self::preformatted( $block ), + 'core/separator' => '---', + 'core/spacer' => null, + 'core/group', + 'core/columns', + 'core/column' => self::container( $block ), + default => self::fallback( $block ), + }; + } + + /** + * Paragraph block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function paragraph( array $block ): ?string { + $html = \trim( $block['innerHTML'] ?? '' ); + if ( empty( $html ) ) { + return null; + } + + return self::inline_html_to_markdown( $html ); + } + + /** + * Heading block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function heading( array $block ): ?string { + $level = $block['attrs']['level'] ?? 2; + $text = self::inline_html_to_markdown( $block['innerHTML'] ?? '' ); + + if ( empty( \trim( $text ) ) ) { + return null; + } + + return \str_repeat( '#', (int) $level ) . ' ' . \trim( $text ); + } + + /** + * Image block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function image( array $block ): ?string { + $html = $block['innerHTML'] ?? ''; + $src = ''; + $alt = ''; + + $processor = new \WP_HTML_Tag_Processor( $html ); + if ( $processor->next_tag( 'IMG' ) ) { + $src = $processor->get_attribute( 'src' ) ?? ''; + $alt = $processor->get_attribute( 'alt' ) ?? ''; + } + + if ( empty( $src ) ) { + return null; + } + + $md = '![' . $alt . '](' . $src . ')'; + + // Check for a caption in figcaption. + $caption_proc = new \WP_HTML_Tag_Processor( $html ); + if ( $caption_proc->next_tag( 'FIGCAPTION' ) ) { + $caption = \wp_strip_all_tags( + \preg_replace( '#.*]*>#si', '', $html ) + ); + $caption = \trim( \preg_replace( '#.*#si', '', $caption ) ); + + if ( ! empty( $caption ) ) { + $md .= "\n" . $caption; + } + } + + return $md; + } + + /** + * List block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function listing( array $block ): ?string { + $ordered = ! empty( $block['attrs']['ordered'] ); + $items = array(); + + if ( ! empty( $block['innerBlocks'] ) ) { + foreach ( $block['innerBlocks'] as $i => $inner ) { + $text = self::inline_html_to_markdown( $inner['innerHTML'] ?? '' ); + $text = \trim( $text ); + + if ( empty( $text ) ) { + continue; + } + + $prefix = $ordered ? ( $i + 1 ) . '. ' : '- '; + $items[] = $prefix . $text; + } + } + + return empty( $items ) ? null : \implode( "\n", $items ); + } + + /** + * Quote block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function quote( array $block ): ?string { + $lines = array(); + + if ( ! empty( $block['innerBlocks'] ) ) { + foreach ( $block['innerBlocks'] as $inner ) { + $md = self::transform_block( $inner ); + if ( null !== $md ) { + $lines[] = $md; + } + } + } + + if ( empty( $lines ) ) { + $text = self::inline_html_to_markdown( $block['innerHTML'] ?? '' ); + if ( empty( \trim( $text ) ) ) { + return null; + } + $lines = array( \trim( $text ) ); + } + + $quoted = \implode( "\n", $lines ); + + // Prefix each line with >. + return \implode( + "\n", + \array_map( + static fn( $line ) => '> ' . $line, + \explode( "\n", $quoted ) + ) + ); + } + + /** + * Code block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function code( array $block ): ?string { + $text = \wp_strip_all_tags( $block['innerHTML'] ?? '' ); + $text = \html_entity_decode( $text, ENT_QUOTES, 'UTF-8' ); + $text = \trim( $text ); + + if ( empty( $text ) ) { + return null; + } + + $lang = $block['attrs']['language'] ?? ''; + + return '```' . $lang . "\n" . $text . "\n```"; + } + + /** + * Preformatted block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function preformatted( array $block ): ?string { + return self::code( $block ); + } + + /** + * Container block — flatten inner blocks. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function container( array $block ): ?string { + if ( empty( $block['innerBlocks'] ) ) { + return null; + } + + $parts = array(); + + foreach ( $block['innerBlocks'] as $inner ) { + $md = self::transform_block( $inner ); + if ( null !== $md ) { + $parts[] = $md; + } + } + + return empty( $parts ) ? null : \implode( "\n\n", $parts ); + } + + /** + * Fallback for unknown block types. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function fallback( array $block ): ?string { + if ( ! empty( $block['innerBlocks'] ) ) { + return self::container( $block ); + } + + $html = \trim( $block['innerHTML'] ?? '' ); + if ( empty( $html ) ) { + return null; + } + + return self::inline_html_to_markdown( $html ); + } + + /** + * Convert inline HTML formatting to markdown. + * + * Handles links, bold, italic, strikethrough, inline code, + * images, and line breaks. Strips block-level wrappers and + * remaining tags. + * + * @param string $html HTML string. + * @return string Markdown string. + */ + private static function inline_html_to_markdown( string $html ): string { + $html = \trim( $html ); + + if ( empty( $html ) ) { + return ''; + } + + $md = $html; + + // Inline images. + $md = \preg_replace_callback( + '#]+>#si', + static function ( $m ) { + $processor = new \WP_HTML_Tag_Processor( $m[0] ); + if ( $processor->next_tag( 'IMG' ) ) { + $src = $processor->get_attribute( 'src' ) ?? ''; + $alt = $processor->get_attribute( 'alt' ) ?? ''; + return '![' . $alt . '](' . $src . ')'; + } + return ''; + }, + $md + ); + + // Links. + $md = \preg_replace_callback( + '#]+href=["\']([^"\']*)["\'][^>]*>(.*?)#si', + static fn( $m ) => '[' . \wp_strip_all_tags( $m[2] ) . '](' . $m[1] . ')', + $md + ); + + // Bold. + $md = \preg_replace( '#<(?:strong|b)>(.*?)#si', '**$1**', $md ); + + // Italic. + $md = \preg_replace( '#<(?:em|i)>(.*?)#si', '*$1*', $md ); + + // Strikethrough. + $md = \preg_replace( '#<(?:s|del|strike)>(.*?)#si', '~~$1~~', $md ); + + // Inline code. + $md = \preg_replace( '#(.*?)#si', '`$1`', $md ); + + // Line breaks. + $md = \preg_replace( '##si', " \n", $md ); + + // Strip block-level wrappers and remaining tags. + $md = \wp_strip_all_tags( $md ); + + // Decode HTML entities. + $md = \html_entity_decode( $md, ENT_QUOTES, 'UTF-8' ); + + return \trim( $md ); + } +} diff --git a/tests/phpunit/tests/content-parser/class-test-markpub.php b/tests/phpunit/tests/content-parser/class-test-markpub.php new file mode 100644 index 0000000..99409ec --- /dev/null +++ b/tests/phpunit/tests/content-parser/class-test-markpub.php @@ -0,0 +1,234 @@ +parser = new Markpub(); + } + + /** + * Test get_type returns the markpub NSID. + */ + public function test_get_type() { + $this->assertSame( 'at.markpub.markdown', $this->parser->get_type() ); + } + + /** + * Test parse returns correct top-level structure. + */ + public function test_parse_returns_correct_structure() { + $post = self::factory()->post->create_and_get(); + $result = $this->parser->parse( + '

Hello world

', + $post + ); + + $this->assertArrayHasKey( '$type', $result ); + $this->assertSame( 'at.markpub.markdown', $result['$type'] ); + $this->assertArrayHasKey( 'text', $result ); + $this->assertSame( 'at.markpub.text', $result['text']['$type'] ); + $this->assertArrayHasKey( 'markdown', $result['text'] ); + $this->assertSame( 'gfm', $result['flavor'] ); + $this->assertContains( 'strikethrough', $result['extensions'] ); + } + + /** + * Test paragraph conversion. + */ + public function test_converts_paragraphs() { + $post = self::factory()->post->create_and_get(); + $content = "\n

First paragraph

\n\n\n" + . "\n

Second paragraph

\n"; + + $result = $this->parser->parse( $content, $post ); + $markdown = $result['text']['markdown']; + + $this->assertStringContainsString( 'First paragraph', $markdown ); + $this->assertStringContainsString( 'Second paragraph', $markdown ); + $this->assertStringNotContainsString( '

', $markdown ); + } + + /** + * Test heading conversion. + */ + public function test_converts_headings() { + $post = self::factory()->post->create_and_get(); + $content = '

My Heading

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '## My Heading', $result['text']['markdown'] ); + } + + /** + * Test heading level 3. + */ + public function test_converts_heading_level_3() { + $post = self::factory()->post->create_and_get(); + $content = '

Sub Heading

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '### Sub Heading', $result['text']['markdown'] ); + } + + /** + * Test link conversion in a paragraph. + */ + public function test_converts_links() { + $post = self::factory()->post->create_and_get(); + $content = '

Visit Example today.

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '[Example](https://example.com)', $result['text']['markdown'] ); + } + + /** + * Test bold conversion. + */ + public function test_converts_bold() { + $post = self::factory()->post->create_and_get(); + $content = '

This is bold text.

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '**bold**', $result['text']['markdown'] ); + } + + /** + * Test italic conversion. + */ + public function test_converts_italic() { + $post = self::factory()->post->create_and_get(); + $content = '

This is italic text.

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '*italic*', $result['text']['markdown'] ); + } + + /** + * Test image block conversion. + */ + public function test_converts_images() { + $post = self::factory()->post->create_and_get(); + $content = '
A photo
'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '![A photo](https://example.com/photo.jpg)', $result['text']['markdown'] ); + } + + /** + * Test code block conversion. + */ + public function test_converts_code_blocks() { + $post = self::factory()->post->create_and_get(); + $content = '
echo "hello";
'; + $result = $this->parser->parse( $content, $post ); + $md = $result['text']['markdown']; + + $this->assertStringContainsString( '```', $md ); + $this->assertStringContainsString( 'echo "hello";', $md ); + } + + /** + * Test inline code conversion. + */ + public function test_converts_inline_code() { + $post = self::factory()->post->create_and_get(); + $content = '

Use the parse() method.

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '`parse()`', $result['text']['markdown'] ); + } + + /** + * Test separator block becomes horizontal rule. + */ + public function test_converts_separator() { + $post = self::factory()->post->create_and_get(); + $content = "

Before

\n\n" + . "
\n\n" + . '

After

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '---', $result['text']['markdown'] ); + } + + /** + * Test empty content produces empty markdown. + */ + public function test_empty_content() { + $post = self::factory()->post->create_and_get(); + $result = $this->parser->parse( '', $post ); + + $this->assertSame( '', $result['text']['markdown'] ); + } + + /** + * Test the atmosphere_html_to_markdown filter. + */ + public function test_html_to_markdown_filter() { + \add_filter( + 'atmosphere_html_to_markdown', + static fn() => 'custom markdown', + 10, + 2 + ); + + $post = self::factory()->post->create_and_get(); + $result = $this->parser->parse( + '

Hello

', + $post + ); + + $this->assertSame( 'custom markdown', $result['text']['markdown'] ); + + \remove_all_filters( 'atmosphere_html_to_markdown' ); + } + + /** + * Test strikethrough conversion. + */ + public function test_converts_strikethrough() { + $post = self::factory()->post->create_and_get(); + $content = '

This is deleted text.

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '~~deleted~~', $result['text']['markdown'] ); + } + + /** + * Test classic (non-block) content is handled as fallback. + */ + public function test_classic_content_fallback() { + $post = self::factory()->post->create_and_get(); + $result = $this->parser->parse( '

Classic editor content with bold.

', $post ); + $md = $result['text']['markdown']; + + $this->assertStringContainsString( '**bold**', $md ); + $this->assertStringContainsString( 'Classic editor content', $md ); + } +} From 6c6065dcd6a10da95d7e2a7b0b5c497867d3accd Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 23 Mar 2026 18:01:51 +0100 Subject: [PATCH 04/10] Declare accepted args on content parser filter registration --- includes/class-atmosphere.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/class-atmosphere.php b/includes/class-atmosphere.php index 20bb68e..b86d273 100644 --- a/includes/class-atmosphere.php +++ b/includes/class-atmosphere.php @@ -46,7 +46,7 @@ public function init(): void { Load::init(); // Default content parser (Markpub). - \add_filter( 'atmosphere_content_parser', static fn() => new Markpub() ); + \add_filter( 'atmosphere_content_parser', static fn( $parser, $post ) => new Markpub(), 10, 2 ); // JSON preview for AT Protocol records. \add_action( 'template_redirect', array( $this, 'preview' ) ); From 693f37466e9d2485cf3c79f226587b76b447e55a Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 23 Mar 2026 18:02:33 +0100 Subject: [PATCH 05/10] Fix ordered list counter and markdown link parentheses - Use a separate counter for ordered lists so numbering stays sequential when empty items are skipped. - Percent-encode parentheses in link URLs to prevent breaking markdown link syntax. --- includes/content-parser/class-markpub.php | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/includes/content-parser/class-markpub.php b/includes/content-parser/class-markpub.php index 0b31bc6..7e7ae24 100644 --- a/includes/content-parser/class-markpub.php +++ b/includes/content-parser/class-markpub.php @@ -180,9 +180,10 @@ private static function image( array $block ): ?string { private static function listing( array $block ): ?string { $ordered = ! empty( $block['attrs']['ordered'] ); $items = array(); + $counter = 0; if ( ! empty( $block['innerBlocks'] ) ) { - foreach ( $block['innerBlocks'] as $i => $inner ) { + foreach ( $block['innerBlocks'] as $inner ) { $text = self::inline_html_to_markdown( $inner['innerHTML'] ?? '' ); $text = \trim( $text ); @@ -190,7 +191,8 @@ private static function listing( array $block ): ?string { continue; } - $prefix = $ordered ? ( $i + 1 ) . '. ' : '- '; + ++$counter; + $prefix = $ordered ? $counter . '. ' : '- '; $items[] = $prefix . $text; } } @@ -342,10 +344,10 @@ static function ( $m ) { $md ); - // Links. + // Links — percent-encode parentheses to avoid breaking markdown syntax. $md = \preg_replace_callback( '#]+href=["\']([^"\']*)["\'][^>]*>(.*?)#si', - static fn( $m ) => '[' . \wp_strip_all_tags( $m[2] ) . '](' . $m[1] . ')', + static fn( $m ) => '[' . \wp_strip_all_tags( $m[2] ) . '](' . \str_replace( array( '(', ')' ), array( '%28', '%29' ), $m[1] ) . ')', $md ); From 9f74de8a46c44d078911aabadc275a0ad7cd18f6 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 23 Mar 2026 18:09:58 +0100 Subject: [PATCH 06/10] Fix PHPCS warnings for unused closure parameters --- includes/class-atmosphere.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/class-atmosphere.php b/includes/class-atmosphere.php index b86d273..20bb68e 100644 --- a/includes/class-atmosphere.php +++ b/includes/class-atmosphere.php @@ -46,7 +46,7 @@ public function init(): void { Load::init(); // Default content parser (Markpub). - \add_filter( 'atmosphere_content_parser', static fn( $parser, $post ) => new Markpub(), 10, 2 ); + \add_filter( 'atmosphere_content_parser', static fn() => new Markpub() ); // JSON preview for AT Protocol records. \add_action( 'template_redirect', array( $this, 'preview' ) ); From 920dbeab3b037a57e08e852cc1eb0365fa9f2526 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Mon, 23 Mar 2026 18:27:16 +0100 Subject: [PATCH 07/10] Fix Document test to account for default Markpub parser registration --- tests/phpunit/tests/transformer/class-test-document.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/phpunit/tests/transformer/class-test-document.php b/tests/phpunit/tests/transformer/class-test-document.php index 3915ac3..3a84260 100644 --- a/tests/phpunit/tests/transformer/class-test-document.php +++ b/tests/phpunit/tests/transformer/class-test-document.php @@ -20,9 +20,12 @@ class Test_Document extends WP_UnitTestCase { /** - * Test that content field is absent when no parser is registered. + * Test that content field is absent when parser filter returns null. */ public function test_content_absent_without_parser() { + \remove_all_filters( 'atmosphere_content_parser' ); + \add_filter( 'atmosphere_content_parser', '__return_null' ); + $post = self::factory()->post->create_and_get( array( 'post_content' => 'Some content here.' ) ); @@ -31,6 +34,8 @@ public function test_content_absent_without_parser() { $record = $transformer->transform(); $this->assertArrayNotHasKey( 'content', $record ); + + \remove_all_filters( 'atmosphere_content_parser' ); } /** From 89910432ce71f8b377e2093e83b23066051db272 Mon Sep 17 00:00:00 2001 From: Matthias Pfefferle Date: Wed, 22 Apr 2026 08:25:08 +0200 Subject: [PATCH 08/10] Address Markpub parser review feedback - Widen Content_Parser::parse return type to ?array; Markpub returns null when the trimmed markdown is empty so Document omits the content field instead of shipping an empty record. Document skips the atmosphere_document_content filter on null to keep the filter contract strictly array-typed. - Drop 'table' from advertised extensions; core/table currently falls through to fallback and is stripped by wp_strip_all_tags, so the advertised capability didn't match emitted output. - Reorder the figcaption extraction so the closing-tag strip runs before wp_strip_all_tags. Otherwise sibling content after (e.g. a trailing

inside the same

) would bleed into the caption text. - Wrap preg_replace / preg_replace_callback calls in safe_replace helpers that fall back to the original input on PCRE failure and emit a wp_trigger_error. Without this, a pathological input could erase the post body silently when preg_replace returns null and that null cascades through subsequent string ops. - Fix the phpcs:ignore rule on unused $post parameters to reference the sniff that actually fires (Generic.CodeAnalysis.UnusedFunction Parameter alongside VariableAnalysis), and document why $post is declared but unused. --- includes/content-parser/class-markpub.php | 93 ++++++++++++++++--- .../interface-content-parser.php | 9 +- includes/transformer/class-document.php | 4 + .../content-parser/class-test-markpub.php | 42 ++++++++- .../tests/transformer/class-stub-parser.php | 13 ++- .../tests/transformer/class-test-document.php | 34 +++++++ 6 files changed, 174 insertions(+), 21 deletions(-) diff --git a/includes/content-parser/class-markpub.php b/includes/content-parser/class-markpub.php index 7e7ae24..3ec0019 100644 --- a/includes/content-parser/class-markpub.php +++ b/includes/content-parser/class-markpub.php @@ -30,10 +30,13 @@ public function get_type(): string { /** * {@inheritDoc} * + * $post is required by the Content_Parser contract so parsers can + * access post metadata; Markpub only needs $content. + * * @param string $content Raw post content. * @param \WP_Post $post The WordPress post object. */ - public function parse( string $content, \WP_Post $post ): array { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable + public function parse( string $content, \WP_Post $post ): ?array { // phpcs:ignore Generic.CodeAnalysis.UnusedFunctionParameter.Found, VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable $blocks = \parse_blocks( $content ); $parts = array(); @@ -55,6 +58,10 @@ public function parse( string $content, \WP_Post $post ): array { // phpcs:ignor */ $markdown = \apply_filters( 'atmosphere_html_to_markdown', $markdown, $content ); + if ( '' === \trim( $markdown ) ) { + return null; + } + return array( '$type' => 'at.markpub.markdown', 'text' => array( @@ -62,7 +69,7 @@ public function parse( string $content, \WP_Post $post ): array { // phpcs:ignor 'markdown' => $markdown, ), 'flavor' => 'gfm', - 'extensions' => array( 'strikethrough', 'table' ), + 'extensions' => array( 'strikethrough' ), ); } @@ -158,10 +165,13 @@ private static function image( array $block ): ?string { // Check for a caption in figcaption. $caption_proc = new \WP_HTML_Tag_Processor( $html ); if ( $caption_proc->next_tag( 'FIGCAPTION' ) ) { - $caption = \wp_strip_all_tags( - \preg_replace( '#.*]*>#si', '', $html ) - ); - $caption = \trim( \preg_replace( '#.*#si', '', $caption ) ); + // Strip both ends of the figcaption tag BEFORE stripping + // remaining tags, so sibling content after + // (e.g. a trailing

inside the same

) doesn't + // bleed into the caption text. + $caption = self::safe_replace( '#.*]*>#si', '', $html ); + $caption = self::safe_replace( '#.*#si', '', $caption ); + $caption = \trim( \wp_strip_all_tags( $caption ) ); if ( ! empty( $caption ) ) { $md .= "\n" . $caption; @@ -330,7 +340,7 @@ private static function inline_html_to_markdown( string $html ): string { $md = $html; // Inline images. - $md = \preg_replace_callback( + $md = self::safe_replace_callback( '#]+>#si', static function ( $m ) { $processor = new \WP_HTML_Tag_Processor( $m[0] ); @@ -345,26 +355,26 @@ static function ( $m ) { ); // Links — percent-encode parentheses to avoid breaking markdown syntax. - $md = \preg_replace_callback( + $md = self::safe_replace_callback( '#]+href=["\']([^"\']*)["\'][^>]*>(.*?)#si', static fn( $m ) => '[' . \wp_strip_all_tags( $m[2] ) . '](' . \str_replace( array( '(', ')' ), array( '%28', '%29' ), $m[1] ) . ')', $md ); // Bold. - $md = \preg_replace( '#<(?:strong|b)>(.*?)#si', '**$1**', $md ); + $md = self::safe_replace( '#<(?:strong|b)>(.*?)#si', '**$1**', $md ); // Italic. - $md = \preg_replace( '#<(?:em|i)>(.*?)#si', '*$1*', $md ); + $md = self::safe_replace( '#<(?:em|i)>(.*?)#si', '*$1*', $md ); // Strikethrough. - $md = \preg_replace( '#<(?:s|del|strike)>(.*?)#si', '~~$1~~', $md ); + $md = self::safe_replace( '#<(?:s|del|strike)>(.*?)#si', '~~$1~~', $md ); // Inline code. - $md = \preg_replace( '#(.*?)#si', '`$1`', $md ); + $md = self::safe_replace( '#(.*?)#si', '`$1`', $md ); // Line breaks. - $md = \preg_replace( '##si', " \n", $md ); + $md = self::safe_replace( '##si', " \n", $md ); // Strip block-level wrappers and remaining tags. $md = \wp_strip_all_tags( $md ); @@ -374,4 +384,61 @@ static function ( $m ) { return \trim( $md ); } + + /** + * Wraps preg_replace with a fallback that preserves the input on PCRE failure. + * + * The underlying preg_replace returns null on engine failure + * (e.g. backtrack or recursion limit hit on pathological input). + * Without a guard, null cascades through subsequent string + * operations and can erase the whole buffer with no signal. + * + * @param string $pattern Pattern. + * @param string $replacement Replacement. + * @param string $subject Input. + * @return string Replaced string, or the original on PCRE failure. + */ + private static function safe_replace( string $pattern, string $replacement, string $subject ): string { + $result = \preg_replace( $pattern, $replacement, $subject ); + + if ( null === $result ) { + self::warn_pcre_failure( $pattern ); + return $subject; + } + + return $result; + } + + /** + * Wraps preg_replace_callback with the same failure guard as safe_replace(). + * + * @param string $pattern Pattern. + * @param callable $callback Callback. + * @param string $subject Input. + * @return string Replaced string, or the original on PCRE failure. + */ + private static function safe_replace_callback( string $pattern, callable $callback, string $subject ): string { + $result = \preg_replace_callback( $pattern, $callback, $subject ); + + if ( null === $result ) { + self::warn_pcre_failure( $pattern ); + return $subject; + } + + return $result; + } + + /** + * Emit a warning about a PCRE failure without hard-failing. + * + * @param string $pattern The pattern that failed. + */ + private static function warn_pcre_failure( string $pattern ): void { + if ( \function_exists( 'wp_trigger_error' ) ) { + \wp_trigger_error( + __METHOD__, + \sprintf( 'PCRE failure on pattern %s; preserving input.', $pattern ) + ); + } + } } diff --git a/includes/content-parser/interface-content-parser.php b/includes/content-parser/interface-content-parser.php index 4f11827..c9264b0 100644 --- a/includes/content-parser/interface-content-parser.php +++ b/includes/content-parser/interface-content-parser.php @@ -21,7 +21,10 @@ interface Content_Parser { * Parse WordPress post content into an AT Protocol content object. * * The returned array must include a '$type' key identifying the - * lexicon type (e.g. 'at.markpub.markdown'). + * lexicon type (e.g. 'at.markpub.markdown'). Return null to signal + * that the parser produced no usable output — Document will then + * omit the content field — which is preferable to shipping an + * empty-text record. * * Receives raw post content so parsers can choose their own * strategy: parse_blocks() for block-aware parsing, or @@ -29,9 +32,9 @@ interface Content_Parser { * * @param string $content Raw post content (post_content). * @param \WP_Post $post The WordPress post object. - * @return array AT Protocol content object. + * @return array|null AT Protocol content object, or null to omit. */ - public function parse( string $content, \WP_Post $post ): array; + public function parse( string $content, \WP_Post $post ): ?array; /** * The lexicon NSID this parser produces. diff --git a/includes/transformer/class-document.php b/includes/transformer/class-document.php index 5510b88..5d6b084 100644 --- a/includes/transformer/class-document.php +++ b/includes/transformer/class-document.php @@ -177,6 +177,10 @@ private function get_content(): ?array { $content = $parser->parse( $this->object->post_content, $this->object ); + if ( null === $content ) { + return null; + } + /** * Filters the parsed content object before adding to the document record. * diff --git a/tests/phpunit/tests/content-parser/class-test-markpub.php b/tests/phpunit/tests/content-parser/class-test-markpub.php index 99409ec..2fa50e5 100644 --- a/tests/phpunit/tests/content-parser/class-test-markpub.php +++ b/tests/phpunit/tests/content-parser/class-test-markpub.php @@ -178,13 +178,12 @@ public function test_converts_separator() { } /** - * Test empty content produces empty markdown. + * Test empty content returns null so Document can omit content. */ public function test_empty_content() { - $post = self::factory()->post->create_and_get(); - $result = $this->parser->parse( '', $post ); + $post = self::factory()->post->create_and_get(); - $this->assertSame( '', $result['text']['markdown'] ); + $this->assertNull( $this->parser->parse( '', $post ) ); } /** @@ -231,4 +230,39 @@ public function test_classic_content_fallback() { $this->assertStringContainsString( '**bold**', $md ); $this->assertStringContainsString( 'Classic editor content', $md ); } + + /** + * Test that sibling content after inside the same + *
does not bleed into the extracted caption text. + */ + public function test_image_caption_does_not_include_sibling_content() { + $post = self::factory()->post->create_and_get(); + $content = "\n" + . '
' + . 'A photo' + . '
Real caption
' + . '

Should not appear in caption

' + . '
' + . "\n"; + + $result = $this->parser->parse( $content, $post ); + $md = $result['text']['markdown']; + + $this->assertStringContainsString( 'Real caption', $md ); + $this->assertStringNotContainsString( 'Should not appear in caption', $md ); + } + + /** + * Test that a post made up entirely of blocks that produce no + * markdown (e.g. core/spacer) returns null so Document can omit + * the content field. + */ + public function test_parse_returns_null_when_markdown_is_empty() { + $post = self::factory()->post->create_and_get(); + $content = "\n" + . '' . "\n" + . ''; + + $this->assertNull( $this->parser->parse( $content, $post ) ); + } } diff --git a/tests/phpunit/tests/transformer/class-stub-parser.php b/tests/phpunit/tests/transformer/class-stub-parser.php index b869820..0231dd8 100644 --- a/tests/phpunit/tests/transformer/class-stub-parser.php +++ b/tests/phpunit/tests/transformer/class-stub-parser.php @@ -14,6 +14,13 @@ */ class Stub_Parser implements Content_Parser { + /** + * Whether parse() should return null. + * + * @var bool + */ + public bool $return_null = false; + /** * {@inheritDoc} */ @@ -27,7 +34,11 @@ public function get_type(): string { * @param string $content Raw post content. * @param \WP_Post $post The WordPress post object. */ - public function parse( string $content, \WP_Post $post ): array { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable + public function parse( string $content, \WP_Post $post ): ?array { // phpcs:ignore Generic.CodeAnalysis.UnusedFunctionParameter.Found, VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable + if ( $this->return_null ) { + return null; + } + return array( '$type' => 'test.stub.parser', 'text' => $content, diff --git a/tests/phpunit/tests/transformer/class-test-document.php b/tests/phpunit/tests/transformer/class-test-document.php index 3a84260..01c24a1 100644 --- a/tests/phpunit/tests/transformer/class-test-document.php +++ b/tests/phpunit/tests/transformer/class-test-document.php @@ -100,6 +100,40 @@ public function test_content_ignored_with_invalid_parser() { \remove_all_filters( 'atmosphere_content_parser' ); } + /** + * Test that when the parser returns null for non-empty content, + * the content field is omitted and the atmosphere_document_content + * filter is not invoked. + */ + public function test_content_absent_when_parser_returns_null() { + $parser = new Stub_Parser(); + $parser->return_null = true; + + \add_filter( 'atmosphere_content_parser', static fn() => $parser ); + + $filter_called = false; + \add_filter( + 'atmosphere_document_content', + static function ( $content ) use ( &$filter_called ) { + $filter_called = true; + return $content; + } + ); + + $post = self::factory()->post->create_and_get( + array( 'post_content' => 'Some content.' ) + ); + + $transformer = new Document( $post ); + $record = $transformer->transform(); + + $this->assertArrayNotHasKey( 'content', $record ); + $this->assertFalse( $filter_called ); + + \remove_all_filters( 'atmosphere_content_parser' ); + \remove_all_filters( 'atmosphere_document_content' ); + } + /** * Test that content field is absent for empty post content. */ From d90575d87f0872724dc49a7722c29faa2e351b7c Mon Sep 17 00:00:00 2001 From: Brandon Kraft Date: Wed, 22 Apr 2026 09:53:05 -0500 Subject: [PATCH 09/10] Skip empty-output blocks in Markpub handlers paragraph(), fallback(), and the classic-content branch checked empty( $html ) on raw innerHTML. For content like

the wrapper is non-empty, so these handlers fell through to inline_html_to_markdown() which trims to "". The empty string survived into the parts array and produced a spurious "\n\n" separator before the next block in mixed-content posts. Match the pattern heading() already uses: convert first, return null when the result is empty. --- includes/content-parser/class-markpub.php | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/includes/content-parser/class-markpub.php b/includes/content-parser/class-markpub.php index 3ec0019..a8e2005 100644 --- a/includes/content-parser/class-markpub.php +++ b/includes/content-parser/class-markpub.php @@ -82,12 +82,9 @@ public function parse( string $content, \WP_Post $post ): ?array { // phpcs:igno private static function transform_block( array $block ): ?string { if ( empty( $block['blockName'] ) ) { // Classic (non-block) content or whitespace. - $html = \trim( $block['innerHTML'] ?? '' ); - if ( empty( $html ) ) { - return null; - } + $md = self::inline_html_to_markdown( $block['innerHTML'] ?? '' ); - return self::inline_html_to_markdown( $html ); + return '' === $md ? null : $md; } return match ( $block['blockName'] ) { @@ -114,12 +111,9 @@ private static function transform_block( array $block ): ?string { * @return string|null */ private static function paragraph( array $block ): ?string { - $html = \trim( $block['innerHTML'] ?? '' ); - if ( empty( $html ) ) { - return null; - } + $md = self::inline_html_to_markdown( $block['innerHTML'] ?? '' ); - return self::inline_html_to_markdown( $html ); + return '' === $md ? null : $md; } /** @@ -312,12 +306,9 @@ private static function fallback( array $block ): ?string { return self::container( $block ); } - $html = \trim( $block['innerHTML'] ?? '' ); - if ( empty( $html ) ) { - return null; - } + $md = self::inline_html_to_markdown( $block['innerHTML'] ?? '' ); - return self::inline_html_to_markdown( $html ); + return '' === $md ? null : $md; } /** From 5610f98226484ea9424e98935efa7d272abc63ff Mon Sep 17 00:00:00 2001 From: Brandon Kraft Date: Thu, 23 Apr 2026 03:03:01 -0500 Subject: [PATCH 10/10] Expand Markpub parser test coverage (#31) --- .../content-parser/class-test-markpub.php | 345 +++++++++++++++++- 1 file changed, 334 insertions(+), 11 deletions(-) diff --git a/tests/phpunit/tests/content-parser/class-test-markpub.php b/tests/phpunit/tests/content-parser/class-test-markpub.php index 2fa50e5..bf59ea3 100644 --- a/tests/phpunit/tests/content-parser/class-test-markpub.php +++ b/tests/phpunit/tests/content-parser/class-test-markpub.php @@ -82,7 +82,7 @@ public function test_converts_headings() { $content = '

My Heading

'; $result = $this->parser->parse( $content, $post ); - $this->assertStringContainsString( '## My Heading', $result['text']['markdown'] ); + $this->assertSame( '## My Heading', $result['text']['markdown'] ); } /** @@ -93,7 +93,7 @@ public function test_converts_heading_level_3() { $content = '

Sub Heading

'; $result = $this->parser->parse( $content, $post ); - $this->assertStringContainsString( '### Sub Heading', $result['text']['markdown'] ); + $this->assertSame( '### Sub Heading', $result['text']['markdown'] ); } /** @@ -147,10 +147,8 @@ public function test_converts_code_blocks() { $post = self::factory()->post->create_and_get(); $content = '
echo "hello";
'; $result = $this->parser->parse( $content, $post ); - $md = $result['text']['markdown']; - $this->assertStringContainsString( '```', $md ); - $this->assertStringContainsString( 'echo "hello";', $md ); + $this->assertSame( "```\necho \"hello\";\n```", $result['text']['markdown'] ); } /** @@ -174,7 +172,7 @@ public function test_converts_separator() { . '

After

'; $result = $this->parser->parse( $content, $post ); - $this->assertStringContainsString( '---', $result['text']['markdown'] ); + $this->assertSame( "Before\n\n---\n\nAfter", $result['text']['markdown'] ); } /** @@ -188,22 +186,33 @@ public function test_empty_content() { /** * Test the atmosphere_html_to_markdown filter. + * + * Verifies the filter callback receives ($markdown, $content) so + * callers can inspect the raw source alongside the conversion. */ public function test_html_to_markdown_filter() { + $received = array(); + \add_filter( 'atmosphere_html_to_markdown', - static fn() => 'custom markdown', + static function ( $markdown, $content ) use ( &$received ) { + $received = array( + 'markdown' => $markdown, + 'content' => $content, + ); + return 'custom markdown'; + }, 10, 2 ); $post = self::factory()->post->create_and_get(); - $result = $this->parser->parse( - '

Hello

', - $post - ); + $source = '

Hello

'; + $result = $this->parser->parse( $source, $post ); $this->assertSame( 'custom markdown', $result['text']['markdown'] ); + $this->assertSame( 'Hello', $received['markdown'] ); + $this->assertSame( $source, $received['content'] ); \remove_all_filters( 'atmosphere_html_to_markdown' ); } @@ -265,4 +274,318 @@ public function test_parse_returns_null_when_markdown_is_empty() { $this->assertNull( $this->parser->parse( $content, $post ) ); } + + /** + * Test ordered list produces numbered markdown. + */ + public function test_listing_ordered() { + $post = self::factory()->post->create_and_get(); + $content = "\n
    " + . '
  1. First
  2. ' + . '
  3. Second
  4. ' + . '
  5. Third
  6. ' + . "
\n"; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "1. First\n2. Second\n3. Third", $result['text']['markdown'] ); + } + + /** + * Test unordered list produces dashed markdown. + */ + public function test_listing_unordered() { + $post = self::factory()->post->create_and_get(); + $content = "\n
    " + . '
  • First
  • ' + . '
  • Second
  • ' + . '
  • Third
  • ' + . "
\n"; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "- First\n- Second\n- Third", $result['text']['markdown'] ); + } + + /** + * Test ordered list skips empty items without gapping the counter. + */ + public function test_listing_skips_empty_items_without_gap() { + $post = self::factory()->post->create_and_get(); + $content = "\n
    " + . '
  1. First
  2. ' + . '
  3. ' + . '
  4. Third
  5. ' + . "
\n"; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "1. First\n2. Third", $result['text']['markdown'] ); + } + + /** + * Test list items preserve inline formatting. + */ + public function test_listing_preserves_inline_formatting() { + $post = self::factory()->post->create_and_get(); + $content = "\n
    " + . '
  • some bold
  • ' + . "
\n"; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '- some **bold**', $result['text']['markdown'] ); + } + + /** + * Test quote block wraps an inner paragraph in a "> " prefix. + */ + public function test_quote_with_inner_paragraph() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

Paragraph text

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '> Paragraph text', $result['text']['markdown'] ); + } + + /** + * Test quote block prefixes every inner line. + */ + public function test_quote_prefixes_every_line() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

First

' + . '

Second

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "> First\n> Second", $result['text']['markdown'] ); + } + + /** + * Test quote falls back to innerHTML when no innerBlocks are present. + */ + public function test_quote_innerhtml_fallback() { + $post = self::factory()->post->create_and_get(); + $content = '
Direct quote text
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '> Direct quote text', $result['text']['markdown'] ); + } + + /** + * Test core/group containers flatten inner block markdown. + */ + public function test_container_group() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

Inside group

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'Inside group', $result['text']['markdown'] ); + } + + /** + * Test core/columns containers flatten inner block markdown. + */ + public function test_container_columns() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

Inside columns

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'Inside columns', $result['text']['markdown'] ); + } + + /** + * Test core/column containers flatten inner block markdown. + */ + public function test_container_column() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

Inside column

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'Inside column', $result['text']['markdown'] ); + } + + /** + * Test fallback delegates to container() when innerBlocks exist. + */ + public function test_fallback_delegates_to_container_with_inner_blocks() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

Inside unknown

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'Inside unknown', $result['text']['markdown'] ); + } + + /** + * Test image() skips blocks without an tag so surrounding + * content renders with no empty separator. + * + * Uses a mixed fixture so a regression returning "" instead of null + * would produce a leading blank line and fail this exact-match + * assertion (the whole-post empty guard in parse() would otherwise + * mask the handler bug). + */ + public function test_image_without_img_tag_is_skipped_cleanly() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '
Just a caption
' + . "
\n\n" + . '

After

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'After', $result['text']['markdown'] ); + } + + /** + * Test heading defaults to level 2 when attrs.level is missing. + */ + public function test_heading_defaults_to_level_2() { + $post = self::factory()->post->create_and_get(); + $content = '

Default level

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '## Default level', $result['text']['markdown'] ); + } + + /** + * Test whitespace-only heading block is skipped cleanly. + * + * Mixed with a non-empty sibling so a regression returning "" from + * heading() would produce a leading blank line and fail the exact + * assertion (the whole-post empty guard would otherwise hide it). + */ + public function test_heading_whitespace_is_skipped_cleanly() { + $post = self::factory()->post->create_and_get(); + $content = "

\n\n" + . '

After

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'After', $result['text']['markdown'] ); + } + + /** + * Test whitespace-only paragraph block is skipped cleanly. + * + * Mixed with a non-empty sibling so a regression returning "" from + * paragraph() would produce a leading blank line and fail the exact + * assertion (the whole-post empty guard would otherwise hide it). + */ + public function test_paragraph_whitespace_is_skipped_cleanly() { + $post = self::factory()->post->create_and_get(); + $content = "

\n\n" + . '

After

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'After', $result['text']['markdown'] ); + } + + /** + * Test code block emits the configured language in the fence. + */ + public function test_code_emits_language_fence() { + $post = self::factory()->post->create_and_get(); + $content = '
echo 1;
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertStringStartsWith( "```php\n", $result['text']['markdown'] ); + } + + /** + * Test code block decodes HTML entities inside the fence. + */ + public function test_code_decodes_html_entities() { + $post = self::factory()->post->create_and_get(); + $content = '
<div>
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "```\n
\n```", $result['text']['markdown'] ); + } + + /** + * Test link URLs have parentheses percent-encoded to protect markdown syntax. + */ + public function test_link_url_parens_percent_encoded() { + $post = self::factory()->post->create_and_get(); + $content = '

See Foo.

'; + + $result = $this->parser->parse( $content, $post ); + $md = $result['text']['markdown']; + + $this->assertStringContainsString( '%28bar%29', $md ); + $this->assertStringNotContainsString( '(bar)', $md ); + } + + /** + * Test
converts to a markdown hard break (two spaces + newline). + */ + public function test_br_converts_to_hard_break() { + $post = self::factory()->post->create_and_get(); + $content = '

line1
line2

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( "line1 \nline2", $result['text']['markdown'] ); + } + + /** + * Test HTML entities are decoded in inline paragraph text. + */ + public function test_inline_html_entities_decoded() { + $post = self::factory()->post->create_and_get(); + $content = '

AT&T’s

'; + + $result = $this->parser->parse( $content, $post ); + $md = $result['text']['markdown']; + + $this->assertStringContainsString( 'AT&T', $md ); + $this->assertStringContainsString( "\xE2\x80\x99", $md ); + } + + /** + * Test inline inside a paragraph converts via inline_html_to_markdown. + */ + public function test_inline_image_inside_paragraph() { + $post = self::factory()->post->create_and_get(); + $content = '

Look x here

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'Look ![x](x.jpg) here', $result['text']['markdown'] ); + } + + /** + * Test nested inline formatting (bold wrapping italic). + */ + public function test_nested_inline_formatting() { + $post = self::factory()->post->create_and_get(); + $content = '

bold italic

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '**bold *italic***', $result['text']['markdown'] ); + } }