diff --git a/.github/changelog/markpub-parser b/.github/changelog/markpub-parser new file mode 100644 index 0000000..5beee3d --- /dev/null +++ b/.github/changelog/markpub-parser @@ -0,0 +1,4 @@ +Significance: minor +Type: added + +Add rich content support for standard.site documents using the Markpub format. diff --git a/includes/class-atmosphere.php b/includes/class-atmosphere.php index adef278..4a1a0fb 100644 --- a/includes/class-atmosphere.php +++ b/includes/class-atmosphere.php @@ -9,6 +9,7 @@ \defined( 'ABSPATH' ) || exit; +use Atmosphere\Content_Parser\Markpub; use Atmosphere\OAuth\Client; use Atmosphere\Transformer\Document; use Atmosphere\Transformer\Publication; @@ -48,6 +49,9 @@ public function init(): void { // Plugin integrations. Load::init(); + // Default content parser (Markpub). + \add_filter( 'atmosphere_content_parser', static fn() => new Markpub() ); + // JSON preview for AT Protocol records. \add_action( 'template_redirect', array( $this, 'preview' ) ); diff --git a/includes/content-parser/class-markpub.php b/includes/content-parser/class-markpub.php new file mode 100644 index 0000000..a8e2005 --- /dev/null +++ b/includes/content-parser/class-markpub.php @@ -0,0 +1,435 @@ + 'at.markpub.markdown', + 'text' => array( + '$type' => 'at.markpub.text', + 'markdown' => $markdown, + ), + 'flavor' => 'gfm', + 'extensions' => array( 'strikethrough' ), + ); + } + + /** + * Convert a single WordPress block to markdown. + * + * @param array $block Parsed block from parse_blocks(). + * @return string|null Markdown string or null to skip. + */ + private static function transform_block( array $block ): ?string { + if ( empty( $block['blockName'] ) ) { + // Classic (non-block) content or whitespace. + $md = self::inline_html_to_markdown( $block['innerHTML'] ?? '' ); + + return '' === $md ? null : $md; + } + + return match ( $block['blockName'] ) { + 'core/paragraph' => self::paragraph( $block ), + 'core/heading' => self::heading( $block ), + 'core/image' => self::image( $block ), + 'core/list' => self::listing( $block ), + 'core/quote' => self::quote( $block ), + 'core/code' => self::code( $block ), + 'core/preformatted' => self::preformatted( $block ), + 'core/separator' => '---', + 'core/spacer' => null, + 'core/group', + 'core/columns', + 'core/column' => self::container( $block ), + default => self::fallback( $block ), + }; + } + + /** + * Paragraph block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function paragraph( array $block ): ?string { + $md = self::inline_html_to_markdown( $block['innerHTML'] ?? '' ); + + return '' === $md ? null : $md; + } + + /** + * Heading block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function heading( array $block ): ?string { + $level = $block['attrs']['level'] ?? 2; + $text = self::inline_html_to_markdown( $block['innerHTML'] ?? '' ); + + if ( empty( \trim( $text ) ) ) { + return null; + } + + return \str_repeat( '#', (int) $level ) . ' ' . \trim( $text ); + } + + /** + * Image block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function image( array $block ): ?string { + $html = $block['innerHTML'] ?? ''; + $src = ''; + $alt = ''; + + $processor = new \WP_HTML_Tag_Processor( $html ); + if ( $processor->next_tag( 'IMG' ) ) { + $src = $processor->get_attribute( 'src' ) ?? ''; + $alt = $processor->get_attribute( 'alt' ) ?? ''; + } + + if ( empty( $src ) ) { + return null; + } + + $md = '![' . $alt . '](' . $src . ')'; + + // Check for a caption in figcaption. + $caption_proc = new \WP_HTML_Tag_Processor( $html ); + if ( $caption_proc->next_tag( 'FIGCAPTION' ) ) { + // Strip both ends of the figcaption tag BEFORE stripping + // remaining tags, so sibling content after + // (e.g. a trailing

inside the same

) doesn't + // bleed into the caption text. + $caption = self::safe_replace( '#.*]*>#si', '', $html ); + $caption = self::safe_replace( '#.*#si', '', $caption ); + $caption = \trim( \wp_strip_all_tags( $caption ) ); + + if ( ! empty( $caption ) ) { + $md .= "\n" . $caption; + } + } + + return $md; + } + + /** + * List block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function listing( array $block ): ?string { + $ordered = ! empty( $block['attrs']['ordered'] ); + $items = array(); + $counter = 0; + + if ( ! empty( $block['innerBlocks'] ) ) { + foreach ( $block['innerBlocks'] as $inner ) { + $text = self::inline_html_to_markdown( $inner['innerHTML'] ?? '' ); + $text = \trim( $text ); + + if ( empty( $text ) ) { + continue; + } + + ++$counter; + $prefix = $ordered ? $counter . '. ' : '- '; + $items[] = $prefix . $text; + } + } + + return empty( $items ) ? null : \implode( "\n", $items ); + } + + /** + * Quote block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function quote( array $block ): ?string { + $lines = array(); + + if ( ! empty( $block['innerBlocks'] ) ) { + foreach ( $block['innerBlocks'] as $inner ) { + $md = self::transform_block( $inner ); + if ( null !== $md ) { + $lines[] = $md; + } + } + } + + if ( empty( $lines ) ) { + $text = self::inline_html_to_markdown( $block['innerHTML'] ?? '' ); + if ( empty( \trim( $text ) ) ) { + return null; + } + $lines = array( \trim( $text ) ); + } + + $quoted = \implode( "\n", $lines ); + + // Prefix each line with >. + return \implode( + "\n", + \array_map( + static fn( $line ) => '> ' . $line, + \explode( "\n", $quoted ) + ) + ); + } + + /** + * Code block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function code( array $block ): ?string { + $text = \wp_strip_all_tags( $block['innerHTML'] ?? '' ); + $text = \html_entity_decode( $text, ENT_QUOTES, 'UTF-8' ); + $text = \trim( $text ); + + if ( empty( $text ) ) { + return null; + } + + $lang = $block['attrs']['language'] ?? ''; + + return '```' . $lang . "\n" . $text . "\n```"; + } + + /** + * Preformatted block. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function preformatted( array $block ): ?string { + return self::code( $block ); + } + + /** + * Container block — flatten inner blocks. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function container( array $block ): ?string { + if ( empty( $block['innerBlocks'] ) ) { + return null; + } + + $parts = array(); + + foreach ( $block['innerBlocks'] as $inner ) { + $md = self::transform_block( $inner ); + if ( null !== $md ) { + $parts[] = $md; + } + } + + return empty( $parts ) ? null : \implode( "\n\n", $parts ); + } + + /** + * Fallback for unknown block types. + * + * @param array $block Parsed block. + * @return string|null + */ + private static function fallback( array $block ): ?string { + if ( ! empty( $block['innerBlocks'] ) ) { + return self::container( $block ); + } + + $md = self::inline_html_to_markdown( $block['innerHTML'] ?? '' ); + + return '' === $md ? null : $md; + } + + /** + * Convert inline HTML formatting to markdown. + * + * Handles links, bold, italic, strikethrough, inline code, + * images, and line breaks. Strips block-level wrappers and + * remaining tags. + * + * @param string $html HTML string. + * @return string Markdown string. + */ + private static function inline_html_to_markdown( string $html ): string { + $html = \trim( $html ); + + if ( empty( $html ) ) { + return ''; + } + + $md = $html; + + // Inline images. + $md = self::safe_replace_callback( + '#]+>#si', + static function ( $m ) { + $processor = new \WP_HTML_Tag_Processor( $m[0] ); + if ( $processor->next_tag( 'IMG' ) ) { + $src = $processor->get_attribute( 'src' ) ?? ''; + $alt = $processor->get_attribute( 'alt' ) ?? ''; + return '![' . $alt . '](' . $src . ')'; + } + return ''; + }, + $md + ); + + // Links — percent-encode parentheses to avoid breaking markdown syntax. + $md = self::safe_replace_callback( + '#]+href=["\']([^"\']*)["\'][^>]*>(.*?)#si', + static fn( $m ) => '[' . \wp_strip_all_tags( $m[2] ) . '](' . \str_replace( array( '(', ')' ), array( '%28', '%29' ), $m[1] ) . ')', + $md + ); + + // Bold. + $md = self::safe_replace( '#<(?:strong|b)>(.*?)#si', '**$1**', $md ); + + // Italic. + $md = self::safe_replace( '#<(?:em|i)>(.*?)#si', '*$1*', $md ); + + // Strikethrough. + $md = self::safe_replace( '#<(?:s|del|strike)>(.*?)#si', '~~$1~~', $md ); + + // Inline code. + $md = self::safe_replace( '#(.*?)#si', '`$1`', $md ); + + // Line breaks. + $md = self::safe_replace( '##si', " \n", $md ); + + // Strip block-level wrappers and remaining tags. + $md = \wp_strip_all_tags( $md ); + + // Decode HTML entities. + $md = \html_entity_decode( $md, ENT_QUOTES, 'UTF-8' ); + + return \trim( $md ); + } + + /** + * Wraps preg_replace with a fallback that preserves the input on PCRE failure. + * + * The underlying preg_replace returns null on engine failure + * (e.g. backtrack or recursion limit hit on pathological input). + * Without a guard, null cascades through subsequent string + * operations and can erase the whole buffer with no signal. + * + * @param string $pattern Pattern. + * @param string $replacement Replacement. + * @param string $subject Input. + * @return string Replaced string, or the original on PCRE failure. + */ + private static function safe_replace( string $pattern, string $replacement, string $subject ): string { + $result = \preg_replace( $pattern, $replacement, $subject ); + + if ( null === $result ) { + self::warn_pcre_failure( $pattern ); + return $subject; + } + + return $result; + } + + /** + * Wraps preg_replace_callback with the same failure guard as safe_replace(). + * + * @param string $pattern Pattern. + * @param callable $callback Callback. + * @param string $subject Input. + * @return string Replaced string, or the original on PCRE failure. + */ + private static function safe_replace_callback( string $pattern, callable $callback, string $subject ): string { + $result = \preg_replace_callback( $pattern, $callback, $subject ); + + if ( null === $result ) { + self::warn_pcre_failure( $pattern ); + return $subject; + } + + return $result; + } + + /** + * Emit a warning about a PCRE failure without hard-failing. + * + * @param string $pattern The pattern that failed. + */ + private static function warn_pcre_failure( string $pattern ): void { + if ( \function_exists( 'wp_trigger_error' ) ) { + \wp_trigger_error( + __METHOD__, + \sprintf( 'PCRE failure on pattern %s; preserving input.', $pattern ) + ); + } + } +} diff --git a/includes/content-parser/interface-content-parser.php b/includes/content-parser/interface-content-parser.php index 4f11827..c9264b0 100644 --- a/includes/content-parser/interface-content-parser.php +++ b/includes/content-parser/interface-content-parser.php @@ -21,7 +21,10 @@ interface Content_Parser { * Parse WordPress post content into an AT Protocol content object. * * The returned array must include a '$type' key identifying the - * lexicon type (e.g. 'at.markpub.markdown'). + * lexicon type (e.g. 'at.markpub.markdown'). Return null to signal + * that the parser produced no usable output — Document will then + * omit the content field — which is preferable to shipping an + * empty-text record. * * Receives raw post content so parsers can choose their own * strategy: parse_blocks() for block-aware parsing, or @@ -29,9 +32,9 @@ interface Content_Parser { * * @param string $content Raw post content (post_content). * @param \WP_Post $post The WordPress post object. - * @return array AT Protocol content object. + * @return array|null AT Protocol content object, or null to omit. */ - public function parse( string $content, \WP_Post $post ): array; + public function parse( string $content, \WP_Post $post ): ?array; /** * The lexicon NSID this parser produces. diff --git a/includes/transformer/class-document.php b/includes/transformer/class-document.php index 98a8a72..3745616 100644 --- a/includes/transformer/class-document.php +++ b/includes/transformer/class-document.php @@ -177,6 +177,10 @@ private function get_content(): ?array { $content = $parser->parse( $this->object->post_content, $this->object ); + if ( null === $content ) { + return null; + } + /** * Filters the parsed content object before adding to the document record. * diff --git a/tests/phpunit/tests/content-parser/class-test-markpub.php b/tests/phpunit/tests/content-parser/class-test-markpub.php new file mode 100644 index 0000000..bf59ea3 --- /dev/null +++ b/tests/phpunit/tests/content-parser/class-test-markpub.php @@ -0,0 +1,591 @@ +parser = new Markpub(); + } + + /** + * Test get_type returns the markpub NSID. + */ + public function test_get_type() { + $this->assertSame( 'at.markpub.markdown', $this->parser->get_type() ); + } + + /** + * Test parse returns correct top-level structure. + */ + public function test_parse_returns_correct_structure() { + $post = self::factory()->post->create_and_get(); + $result = $this->parser->parse( + '

Hello world

', + $post + ); + + $this->assertArrayHasKey( '$type', $result ); + $this->assertSame( 'at.markpub.markdown', $result['$type'] ); + $this->assertArrayHasKey( 'text', $result ); + $this->assertSame( 'at.markpub.text', $result['text']['$type'] ); + $this->assertArrayHasKey( 'markdown', $result['text'] ); + $this->assertSame( 'gfm', $result['flavor'] ); + $this->assertContains( 'strikethrough', $result['extensions'] ); + } + + /** + * Test paragraph conversion. + */ + public function test_converts_paragraphs() { + $post = self::factory()->post->create_and_get(); + $content = "\n

First paragraph

\n\n\n" + . "\n

Second paragraph

\n"; + + $result = $this->parser->parse( $content, $post ); + $markdown = $result['text']['markdown']; + + $this->assertStringContainsString( 'First paragraph', $markdown ); + $this->assertStringContainsString( 'Second paragraph', $markdown ); + $this->assertStringNotContainsString( '

', $markdown ); + } + + /** + * Test heading conversion. + */ + public function test_converts_headings() { + $post = self::factory()->post->create_and_get(); + $content = '

My Heading

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '## My Heading', $result['text']['markdown'] ); + } + + /** + * Test heading level 3. + */ + public function test_converts_heading_level_3() { + $post = self::factory()->post->create_and_get(); + $content = '

Sub Heading

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '### Sub Heading', $result['text']['markdown'] ); + } + + /** + * Test link conversion in a paragraph. + */ + public function test_converts_links() { + $post = self::factory()->post->create_and_get(); + $content = '

Visit Example today.

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '[Example](https://example.com)', $result['text']['markdown'] ); + } + + /** + * Test bold conversion. + */ + public function test_converts_bold() { + $post = self::factory()->post->create_and_get(); + $content = '

This is bold text.

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '**bold**', $result['text']['markdown'] ); + } + + /** + * Test italic conversion. + */ + public function test_converts_italic() { + $post = self::factory()->post->create_and_get(); + $content = '

This is italic text.

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '*italic*', $result['text']['markdown'] ); + } + + /** + * Test image block conversion. + */ + public function test_converts_images() { + $post = self::factory()->post->create_and_get(); + $content = '
A photo
'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '![A photo](https://example.com/photo.jpg)', $result['text']['markdown'] ); + } + + /** + * Test code block conversion. + */ + public function test_converts_code_blocks() { + $post = self::factory()->post->create_and_get(); + $content = '
echo "hello";
'; + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "```\necho \"hello\";\n```", $result['text']['markdown'] ); + } + + /** + * Test inline code conversion. + */ + public function test_converts_inline_code() { + $post = self::factory()->post->create_and_get(); + $content = '

Use the parse() method.

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '`parse()`', $result['text']['markdown'] ); + } + + /** + * Test separator block becomes horizontal rule. + */ + public function test_converts_separator() { + $post = self::factory()->post->create_and_get(); + $content = "

Before

\n\n" + . "
\n\n" + . '

After

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "Before\n\n---\n\nAfter", $result['text']['markdown'] ); + } + + /** + * Test empty content returns null so Document can omit content. + */ + public function test_empty_content() { + $post = self::factory()->post->create_and_get(); + + $this->assertNull( $this->parser->parse( '', $post ) ); + } + + /** + * Test the atmosphere_html_to_markdown filter. + * + * Verifies the filter callback receives ($markdown, $content) so + * callers can inspect the raw source alongside the conversion. + */ + public function test_html_to_markdown_filter() { + $received = array(); + + \add_filter( + 'atmosphere_html_to_markdown', + static function ( $markdown, $content ) use ( &$received ) { + $received = array( + 'markdown' => $markdown, + 'content' => $content, + ); + return 'custom markdown'; + }, + 10, + 2 + ); + + $post = self::factory()->post->create_and_get(); + $source = '

Hello

'; + $result = $this->parser->parse( $source, $post ); + + $this->assertSame( 'custom markdown', $result['text']['markdown'] ); + $this->assertSame( 'Hello', $received['markdown'] ); + $this->assertSame( $source, $received['content'] ); + + \remove_all_filters( 'atmosphere_html_to_markdown' ); + } + + /** + * Test strikethrough conversion. + */ + public function test_converts_strikethrough() { + $post = self::factory()->post->create_and_get(); + $content = '

This is deleted text.

'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '~~deleted~~', $result['text']['markdown'] ); + } + + /** + * Test classic (non-block) content is handled as fallback. + */ + public function test_classic_content_fallback() { + $post = self::factory()->post->create_and_get(); + $result = $this->parser->parse( '

Classic editor content with bold.

', $post ); + $md = $result['text']['markdown']; + + $this->assertStringContainsString( '**bold**', $md ); + $this->assertStringContainsString( 'Classic editor content', $md ); + } + + /** + * Test that sibling content after inside the same + *
does not bleed into the extracted caption text. + */ + public function test_image_caption_does_not_include_sibling_content() { + $post = self::factory()->post->create_and_get(); + $content = "\n" + . '
' + . 'A photo' + . '
Real caption
' + . '

Should not appear in caption

' + . '
' + . "\n"; + + $result = $this->parser->parse( $content, $post ); + $md = $result['text']['markdown']; + + $this->assertStringContainsString( 'Real caption', $md ); + $this->assertStringNotContainsString( 'Should not appear in caption', $md ); + } + + /** + * Test that a post made up entirely of blocks that produce no + * markdown (e.g. core/spacer) returns null so Document can omit + * the content field. + */ + public function test_parse_returns_null_when_markdown_is_empty() { + $post = self::factory()->post->create_and_get(); + $content = "\n" + . '' . "\n" + . ''; + + $this->assertNull( $this->parser->parse( $content, $post ) ); + } + + /** + * Test ordered list produces numbered markdown. + */ + public function test_listing_ordered() { + $post = self::factory()->post->create_and_get(); + $content = "\n
    " + . '
  1. First
  2. ' + . '
  3. Second
  4. ' + . '
  5. Third
  6. ' + . "
\n"; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "1. First\n2. Second\n3. Third", $result['text']['markdown'] ); + } + + /** + * Test unordered list produces dashed markdown. + */ + public function test_listing_unordered() { + $post = self::factory()->post->create_and_get(); + $content = "\n
    " + . '
  • First
  • ' + . '
  • Second
  • ' + . '
  • Third
  • ' + . "
\n"; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "- First\n- Second\n- Third", $result['text']['markdown'] ); + } + + /** + * Test ordered list skips empty items without gapping the counter. + */ + public function test_listing_skips_empty_items_without_gap() { + $post = self::factory()->post->create_and_get(); + $content = "\n
    " + . '
  1. First
  2. ' + . '
  3. ' + . '
  4. Third
  5. ' + . "
\n"; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "1. First\n2. Third", $result['text']['markdown'] ); + } + + /** + * Test list items preserve inline formatting. + */ + public function test_listing_preserves_inline_formatting() { + $post = self::factory()->post->create_and_get(); + $content = "\n
    " + . '
  • some bold
  • ' + . "
\n"; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '- some **bold**', $result['text']['markdown'] ); + } + + /** + * Test quote block wraps an inner paragraph in a "> " prefix. + */ + public function test_quote_with_inner_paragraph() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

Paragraph text

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '> Paragraph text', $result['text']['markdown'] ); + } + + /** + * Test quote block prefixes every inner line. + */ + public function test_quote_prefixes_every_line() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

First

' + . '

Second

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "> First\n> Second", $result['text']['markdown'] ); + } + + /** + * Test quote falls back to innerHTML when no innerBlocks are present. + */ + public function test_quote_innerhtml_fallback() { + $post = self::factory()->post->create_and_get(); + $content = '
Direct quote text
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '> Direct quote text', $result['text']['markdown'] ); + } + + /** + * Test core/group containers flatten inner block markdown. + */ + public function test_container_group() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

Inside group

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'Inside group', $result['text']['markdown'] ); + } + + /** + * Test core/columns containers flatten inner block markdown. + */ + public function test_container_columns() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

Inside columns

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'Inside columns', $result['text']['markdown'] ); + } + + /** + * Test core/column containers flatten inner block markdown. + */ + public function test_container_column() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

Inside column

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'Inside column', $result['text']['markdown'] ); + } + + /** + * Test fallback delegates to container() when innerBlocks exist. + */ + public function test_fallback_delegates_to_container_with_inner_blocks() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '

Inside unknown

' + . '
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'Inside unknown', $result['text']['markdown'] ); + } + + /** + * Test image() skips blocks without an tag so surrounding + * content renders with no empty separator. + * + * Uses a mixed fixture so a regression returning "" instead of null + * would produce a leading blank line and fail this exact-match + * assertion (the whole-post empty guard in parse() would otherwise + * mask the handler bug). + */ + public function test_image_without_img_tag_is_skipped_cleanly() { + $post = self::factory()->post->create_and_get(); + $content = '
' + . '
Just a caption
' + . "
\n\n" + . '

After

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'After', $result['text']['markdown'] ); + } + + /** + * Test heading defaults to level 2 when attrs.level is missing. + */ + public function test_heading_defaults_to_level_2() { + $post = self::factory()->post->create_and_get(); + $content = '

Default level

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '## Default level', $result['text']['markdown'] ); + } + + /** + * Test whitespace-only heading block is skipped cleanly. + * + * Mixed with a non-empty sibling so a regression returning "" from + * heading() would produce a leading blank line and fail the exact + * assertion (the whole-post empty guard would otherwise hide it). + */ + public function test_heading_whitespace_is_skipped_cleanly() { + $post = self::factory()->post->create_and_get(); + $content = "

\n\n" + . '

After

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'After', $result['text']['markdown'] ); + } + + /** + * Test whitespace-only paragraph block is skipped cleanly. + * + * Mixed with a non-empty sibling so a regression returning "" from + * paragraph() would produce a leading blank line and fail the exact + * assertion (the whole-post empty guard would otherwise hide it). + */ + public function test_paragraph_whitespace_is_skipped_cleanly() { + $post = self::factory()->post->create_and_get(); + $content = "

\n\n" + . '

After

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'After', $result['text']['markdown'] ); + } + + /** + * Test code block emits the configured language in the fence. + */ + public function test_code_emits_language_fence() { + $post = self::factory()->post->create_and_get(); + $content = '
echo 1;
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertStringStartsWith( "```php\n", $result['text']['markdown'] ); + } + + /** + * Test code block decodes HTML entities inside the fence. + */ + public function test_code_decodes_html_entities() { + $post = self::factory()->post->create_and_get(); + $content = '
<div>
'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( "```\n
\n```", $result['text']['markdown'] ); + } + + /** + * Test link URLs have parentheses percent-encoded to protect markdown syntax. + */ + public function test_link_url_parens_percent_encoded() { + $post = self::factory()->post->create_and_get(); + $content = '

See Foo.

'; + + $result = $this->parser->parse( $content, $post ); + $md = $result['text']['markdown']; + + $this->assertStringContainsString( '%28bar%29', $md ); + $this->assertStringNotContainsString( '(bar)', $md ); + } + + /** + * Test
converts to a markdown hard break (two spaces + newline). + */ + public function test_br_converts_to_hard_break() { + $post = self::factory()->post->create_and_get(); + $content = '

line1
line2

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( "line1 \nline2", $result['text']['markdown'] ); + } + + /** + * Test HTML entities are decoded in inline paragraph text. + */ + public function test_inline_html_entities_decoded() { + $post = self::factory()->post->create_and_get(); + $content = '

AT&T’s

'; + + $result = $this->parser->parse( $content, $post ); + $md = $result['text']['markdown']; + + $this->assertStringContainsString( 'AT&T', $md ); + $this->assertStringContainsString( "\xE2\x80\x99", $md ); + } + + /** + * Test inline inside a paragraph converts via inline_html_to_markdown. + */ + public function test_inline_image_inside_paragraph() { + $post = self::factory()->post->create_and_get(); + $content = '

Look x here

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( 'Look ![x](x.jpg) here', $result['text']['markdown'] ); + } + + /** + * Test nested inline formatting (bold wrapping italic). + */ + public function test_nested_inline_formatting() { + $post = self::factory()->post->create_and_get(); + $content = '

bold italic

'; + + $result = $this->parser->parse( $content, $post ); + + $this->assertSame( '**bold *italic***', $result['text']['markdown'] ); + } +} diff --git a/tests/phpunit/tests/transformer/class-stub-parser.php b/tests/phpunit/tests/transformer/class-stub-parser.php index b869820..0231dd8 100644 --- a/tests/phpunit/tests/transformer/class-stub-parser.php +++ b/tests/phpunit/tests/transformer/class-stub-parser.php @@ -14,6 +14,13 @@ */ class Stub_Parser implements Content_Parser { + /** + * Whether parse() should return null. + * + * @var bool + */ + public bool $return_null = false; + /** * {@inheritDoc} */ @@ -27,7 +34,11 @@ public function get_type(): string { * @param string $content Raw post content. * @param \WP_Post $post The WordPress post object. */ - public function parse( string $content, \WP_Post $post ): array { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable + public function parse( string $content, \WP_Post $post ): ?array { // phpcs:ignore Generic.CodeAnalysis.UnusedFunctionParameter.Found, VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable + if ( $this->return_null ) { + return null; + } + return array( '$type' => 'test.stub.parser', 'text' => $content, diff --git a/tests/phpunit/tests/transformer/class-test-document.php b/tests/phpunit/tests/transformer/class-test-document.php index 3915ac3..01c24a1 100644 --- a/tests/phpunit/tests/transformer/class-test-document.php +++ b/tests/phpunit/tests/transformer/class-test-document.php @@ -20,9 +20,12 @@ class Test_Document extends WP_UnitTestCase { /** - * Test that content field is absent when no parser is registered. + * Test that content field is absent when parser filter returns null. */ public function test_content_absent_without_parser() { + \remove_all_filters( 'atmosphere_content_parser' ); + \add_filter( 'atmosphere_content_parser', '__return_null' ); + $post = self::factory()->post->create_and_get( array( 'post_content' => 'Some content here.' ) ); @@ -31,6 +34,8 @@ public function test_content_absent_without_parser() { $record = $transformer->transform(); $this->assertArrayNotHasKey( 'content', $record ); + + \remove_all_filters( 'atmosphere_content_parser' ); } /** @@ -95,6 +100,40 @@ public function test_content_ignored_with_invalid_parser() { \remove_all_filters( 'atmosphere_content_parser' ); } + /** + * Test that when the parser returns null for non-empty content, + * the content field is omitted and the atmosphere_document_content + * filter is not invoked. + */ + public function test_content_absent_when_parser_returns_null() { + $parser = new Stub_Parser(); + $parser->return_null = true; + + \add_filter( 'atmosphere_content_parser', static fn() => $parser ); + + $filter_called = false; + \add_filter( + 'atmosphere_document_content', + static function ( $content ) use ( &$filter_called ) { + $filter_called = true; + return $content; + } + ); + + $post = self::factory()->post->create_and_get( + array( 'post_content' => 'Some content.' ) + ); + + $transformer = new Document( $post ); + $record = $transformer->transform(); + + $this->assertArrayNotHasKey( 'content', $record ); + $this->assertFalse( $filter_called ); + + \remove_all_filters( 'atmosphere_content_parser' ); + \remove_all_filters( 'atmosphere_document_content' ); + } + /** * Test that content field is absent for empty post content. */