diff --git a/.github/changelog/markpub-parser b/.github/changelog/markpub-parser
new file mode 100644
index 0000000..5beee3d
--- /dev/null
+++ b/.github/changelog/markpub-parser
@@ -0,0 +1,4 @@
+Significance: minor
+Type: added
+
+Add rich content support for standard.site documents using the Markpub format.
diff --git a/includes/class-atmosphere.php b/includes/class-atmosphere.php
index adef278..4a1a0fb 100644
--- a/includes/class-atmosphere.php
+++ b/includes/class-atmosphere.php
@@ -9,6 +9,7 @@
\defined( 'ABSPATH' ) || exit;
+use Atmosphere\Content_Parser\Markpub;
use Atmosphere\OAuth\Client;
use Atmosphere\Transformer\Document;
use Atmosphere\Transformer\Publication;
@@ -48,6 +49,9 @@ public function init(): void {
// Plugin integrations.
Load::init();
+ // Default content parser (Markpub).
+ \add_filter( 'atmosphere_content_parser', static fn() => new Markpub() );
+
// JSON preview for AT Protocol records.
\add_action( 'template_redirect', array( $this, 'preview' ) );
diff --git a/includes/content-parser/class-markpub.php b/includes/content-parser/class-markpub.php
new file mode 100644
index 0000000..a8e2005
--- /dev/null
+++ b/includes/content-parser/class-markpub.php
@@ -0,0 +1,435 @@
+ 'at.markpub.markdown',
+ 'text' => array(
+ '$type' => 'at.markpub.text',
+ 'markdown' => $markdown,
+ ),
+ 'flavor' => 'gfm',
+ 'extensions' => array( 'strikethrough' ),
+ );
+ }
+
+ /**
+ * Convert a single WordPress block to markdown.
+ *
+ * @param array $block Parsed block from parse_blocks().
+ * @return string|null Markdown string or null to skip.
+ */
+ private static function transform_block( array $block ): ?string {
+ if ( empty( $block['blockName'] ) ) {
+ // Classic (non-block) content or whitespace.
+ $md = self::inline_html_to_markdown( $block['innerHTML'] ?? '' );
+
+ return '' === $md ? null : $md;
+ }
+
+ return match ( $block['blockName'] ) {
+ 'core/paragraph' => self::paragraph( $block ),
+ 'core/heading' => self::heading( $block ),
+ 'core/image' => self::image( $block ),
+ 'core/list' => self::listing( $block ),
+ 'core/quote' => self::quote( $block ),
+ 'core/code' => self::code( $block ),
+ 'core/preformatted' => self::preformatted( $block ),
+ 'core/separator' => '---',
+ 'core/spacer' => null,
+ 'core/group',
+ 'core/columns',
+ 'core/column' => self::container( $block ),
+ default => self::fallback( $block ),
+ };
+ }
+
+ /**
+ * Paragraph block.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function paragraph( array $block ): ?string {
+ $md = self::inline_html_to_markdown( $block['innerHTML'] ?? '' );
+
+ return '' === $md ? null : $md;
+ }
+
+ /**
+ * Heading block.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function heading( array $block ): ?string {
+ $level = $block['attrs']['level'] ?? 2;
+ $text = self::inline_html_to_markdown( $block['innerHTML'] ?? '' );
+
+ if ( empty( \trim( $text ) ) ) {
+ return null;
+ }
+
+ return \str_repeat( '#', (int) $level ) . ' ' . \trim( $text );
+ }
+
+ /**
+ * Image block.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function image( array $block ): ?string {
+ $html = $block['innerHTML'] ?? '';
+ $src = '';
+ $alt = '';
+
+ $processor = new \WP_HTML_Tag_Processor( $html );
+ if ( $processor->next_tag( 'IMG' ) ) {
+ $src = $processor->get_attribute( 'src' ) ?? '';
+ $alt = $processor->get_attribute( 'alt' ) ?? '';
+ }
+
+ if ( empty( $src ) ) {
+ return null;
+ }
+
+ $md = '';
+
+ // Check for a caption in figcaption.
+ $caption_proc = new \WP_HTML_Tag_Processor( $html );
+ if ( $caption_proc->next_tag( 'FIGCAPTION' ) ) {
+ // Strip both ends of the figcaption tag BEFORE stripping
+ // remaining tags, so sibling content after
+ // (e.g. a trailing
inside the same ) doesn't
+ // bleed into the caption text.
+ $caption = self::safe_replace( '#.*]*>#si', '', $html );
+ $caption = self::safe_replace( '# .*#si', '', $caption );
+ $caption = \trim( \wp_strip_all_tags( $caption ) );
+
+ if ( ! empty( $caption ) ) {
+ $md .= "\n" . $caption;
+ }
+ }
+
+ return $md;
+ }
+
+ /**
+ * List block.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function listing( array $block ): ?string {
+ $ordered = ! empty( $block['attrs']['ordered'] );
+ $items = array();
+ $counter = 0;
+
+ if ( ! empty( $block['innerBlocks'] ) ) {
+ foreach ( $block['innerBlocks'] as $inner ) {
+ $text = self::inline_html_to_markdown( $inner['innerHTML'] ?? '' );
+ $text = \trim( $text );
+
+ if ( empty( $text ) ) {
+ continue;
+ }
+
+ ++$counter;
+ $prefix = $ordered ? $counter . '. ' : '- ';
+ $items[] = $prefix . $text;
+ }
+ }
+
+ return empty( $items ) ? null : \implode( "\n", $items );
+ }
+
+ /**
+ * Quote block.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function quote( array $block ): ?string {
+ $lines = array();
+
+ if ( ! empty( $block['innerBlocks'] ) ) {
+ foreach ( $block['innerBlocks'] as $inner ) {
+ $md = self::transform_block( $inner );
+ if ( null !== $md ) {
+ $lines[] = $md;
+ }
+ }
+ }
+
+ if ( empty( $lines ) ) {
+ $text = self::inline_html_to_markdown( $block['innerHTML'] ?? '' );
+ if ( empty( \trim( $text ) ) ) {
+ return null;
+ }
+ $lines = array( \trim( $text ) );
+ }
+
+ $quoted = \implode( "\n", $lines );
+
+ // Prefix each line with >.
+ return \implode(
+ "\n",
+ \array_map(
+ static fn( $line ) => '> ' . $line,
+ \explode( "\n", $quoted )
+ )
+ );
+ }
+
+ /**
+ * Code block.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function code( array $block ): ?string {
+ $text = \wp_strip_all_tags( $block['innerHTML'] ?? '' );
+ $text = \html_entity_decode( $text, ENT_QUOTES, 'UTF-8' );
+ $text = \trim( $text );
+
+ if ( empty( $text ) ) {
+ return null;
+ }
+
+ $lang = $block['attrs']['language'] ?? '';
+
+ return '```' . $lang . "\n" . $text . "\n```";
+ }
+
+ /**
+ * Preformatted block.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function preformatted( array $block ): ?string {
+ return self::code( $block );
+ }
+
+ /**
+ * Container block — flatten inner blocks.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function container( array $block ): ?string {
+ if ( empty( $block['innerBlocks'] ) ) {
+ return null;
+ }
+
+ $parts = array();
+
+ foreach ( $block['innerBlocks'] as $inner ) {
+ $md = self::transform_block( $inner );
+ if ( null !== $md ) {
+ $parts[] = $md;
+ }
+ }
+
+ return empty( $parts ) ? null : \implode( "\n\n", $parts );
+ }
+
+ /**
+ * Fallback for unknown block types.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function fallback( array $block ): ?string {
+ if ( ! empty( $block['innerBlocks'] ) ) {
+ return self::container( $block );
+ }
+
+ $md = self::inline_html_to_markdown( $block['innerHTML'] ?? '' );
+
+ return '' === $md ? null : $md;
+ }
+
+ /**
+ * Convert inline HTML formatting to markdown.
+ *
+ * Handles links, bold, italic, strikethrough, inline code,
+ * images, and line breaks. Strips block-level wrappers and
+ * remaining tags.
+ *
+ * @param string $html HTML string.
+ * @return string Markdown string.
+ */
+ private static function inline_html_to_markdown( string $html ): string {
+ $html = \trim( $html );
+
+ if ( empty( $html ) ) {
+ return '';
+ }
+
+ $md = $html;
+
+ // Inline images.
+ $md = self::safe_replace_callback(
+ '# ]+>#si',
+ static function ( $m ) {
+ $processor = new \WP_HTML_Tag_Processor( $m[0] );
+ if ( $processor->next_tag( 'IMG' ) ) {
+ $src = $processor->get_attribute( 'src' ) ?? '';
+ $alt = $processor->get_attribute( 'alt' ) ?? '';
+ return '';
+ }
+ return '';
+ },
+ $md
+ );
+
+ // Links — percent-encode parentheses to avoid breaking markdown syntax.
+ $md = self::safe_replace_callback(
+ '#]+href=["\']([^"\']*)["\'][^>]*>(.*?) #si',
+ static fn( $m ) => '[' . \wp_strip_all_tags( $m[2] ) . '](' . \str_replace( array( '(', ')' ), array( '%28', '%29' ), $m[1] ) . ')',
+ $md
+ );
+
+ // Bold.
+ $md = self::safe_replace( '#<(?:strong|b)>(.*?)(?:strong|b)>#si', '**$1**', $md );
+
+ // Italic.
+ $md = self::safe_replace( '#<(?:em|i)>(.*?)(?:em|i)>#si', '*$1*', $md );
+
+ // Strikethrough.
+ $md = self::safe_replace( '#<(?:s|del|strike)>(.*?)(?:s|del|strike)>#si', '~~$1~~', $md );
+
+ // Inline code.
+ $md = self::safe_replace( '#(.*?)#si', '`$1`', $md );
+
+ // Line breaks.
+ $md = self::safe_replace( '# #si', " \n", $md );
+
+ // Strip block-level wrappers and remaining tags.
+ $md = \wp_strip_all_tags( $md );
+
+ // Decode HTML entities.
+ $md = \html_entity_decode( $md, ENT_QUOTES, 'UTF-8' );
+
+ return \trim( $md );
+ }
+
+ /**
+ * Wraps preg_replace with a fallback that preserves the input on PCRE failure.
+ *
+ * The underlying preg_replace returns null on engine failure
+ * (e.g. backtrack or recursion limit hit on pathological input).
+ * Without a guard, null cascades through subsequent string
+ * operations and can erase the whole buffer with no signal.
+ *
+ * @param string $pattern Pattern.
+ * @param string $replacement Replacement.
+ * @param string $subject Input.
+ * @return string Replaced string, or the original on PCRE failure.
+ */
+ private static function safe_replace( string $pattern, string $replacement, string $subject ): string {
+ $result = \preg_replace( $pattern, $replacement, $subject );
+
+ if ( null === $result ) {
+ self::warn_pcre_failure( $pattern );
+ return $subject;
+ }
+
+ return $result;
+ }
+
+ /**
+ * Wraps preg_replace_callback with the same failure guard as safe_replace().
+ *
+ * @param string $pattern Pattern.
+ * @param callable $callback Callback.
+ * @param string $subject Input.
+ * @return string Replaced string, or the original on PCRE failure.
+ */
+ private static function safe_replace_callback( string $pattern, callable $callback, string $subject ): string {
+ $result = \preg_replace_callback( $pattern, $callback, $subject );
+
+ if ( null === $result ) {
+ self::warn_pcre_failure( $pattern );
+ return $subject;
+ }
+
+ return $result;
+ }
+
+ /**
+ * Emit a warning about a PCRE failure without hard-failing.
+ *
+ * @param string $pattern The pattern that failed.
+ */
+ private static function warn_pcre_failure( string $pattern ): void {
+ if ( \function_exists( 'wp_trigger_error' ) ) {
+ \wp_trigger_error(
+ __METHOD__,
+ \sprintf( 'PCRE failure on pattern %s; preserving input.', $pattern )
+ );
+ }
+ }
+}
diff --git a/includes/content-parser/interface-content-parser.php b/includes/content-parser/interface-content-parser.php
index 4f11827..c9264b0 100644
--- a/includes/content-parser/interface-content-parser.php
+++ b/includes/content-parser/interface-content-parser.php
@@ -21,7 +21,10 @@ interface Content_Parser {
* Parse WordPress post content into an AT Protocol content object.
*
* The returned array must include a '$type' key identifying the
- * lexicon type (e.g. 'at.markpub.markdown').
+ * lexicon type (e.g. 'at.markpub.markdown'). Return null to signal
+ * that the parser produced no usable output — Document will then
+ * omit the content field — which is preferable to shipping an
+ * empty-text record.
*
* Receives raw post content so parsers can choose their own
* strategy: parse_blocks() for block-aware parsing, or
@@ -29,9 +32,9 @@ interface Content_Parser {
*
* @param string $content Raw post content (post_content).
* @param \WP_Post $post The WordPress post object.
- * @return array AT Protocol content object.
+ * @return array|null AT Protocol content object, or null to omit.
*/
- public function parse( string $content, \WP_Post $post ): array;
+ public function parse( string $content, \WP_Post $post ): ?array;
/**
* The lexicon NSID this parser produces.
diff --git a/includes/transformer/class-document.php b/includes/transformer/class-document.php
index 98a8a72..3745616 100644
--- a/includes/transformer/class-document.php
+++ b/includes/transformer/class-document.php
@@ -177,6 +177,10 @@ private function get_content(): ?array {
$content = $parser->parse( $this->object->post_content, $this->object );
+ if ( null === $content ) {
+ return null;
+ }
+
/**
* Filters the parsed content object before adding to the document record.
*
diff --git a/tests/phpunit/tests/content-parser/class-test-markpub.php b/tests/phpunit/tests/content-parser/class-test-markpub.php
new file mode 100644
index 0000000..bf59ea3
--- /dev/null
+++ b/tests/phpunit/tests/content-parser/class-test-markpub.php
@@ -0,0 +1,591 @@
+parser = new Markpub();
+ }
+
+ /**
+ * Test get_type returns the markpub NSID.
+ */
+ public function test_get_type() {
+ $this->assertSame( 'at.markpub.markdown', $this->parser->get_type() );
+ }
+
+ /**
+ * Test parse returns correct top-level structure.
+ */
+ public function test_parse_returns_correct_structure() {
+ $post = self::factory()->post->create_and_get();
+ $result = $this->parser->parse(
+ 'Hello world
',
+ $post
+ );
+
+ $this->assertArrayHasKey( '$type', $result );
+ $this->assertSame( 'at.markpub.markdown', $result['$type'] );
+ $this->assertArrayHasKey( 'text', $result );
+ $this->assertSame( 'at.markpub.text', $result['text']['$type'] );
+ $this->assertArrayHasKey( 'markdown', $result['text'] );
+ $this->assertSame( 'gfm', $result['flavor'] );
+ $this->assertContains( 'strikethrough', $result['extensions'] );
+ }
+
+ /**
+ * Test paragraph conversion.
+ */
+ public function test_converts_paragraphs() {
+ $post = self::factory()->post->create_and_get();
+ $content = "\nFirst paragraph
\n\n\n"
+ . "\nSecond paragraph
\n";
+
+ $result = $this->parser->parse( $content, $post );
+ $markdown = $result['text']['markdown'];
+
+ $this->assertStringContainsString( 'First paragraph', $markdown );
+ $this->assertStringContainsString( 'Second paragraph', $markdown );
+ $this->assertStringNotContainsString( '', $markdown );
+ }
+
+ /**
+ * Test heading conversion.
+ */
+ public function test_converts_headings() {
+ $post = self::factory()->post->create_and_get();
+ $content = '
My Heading ';
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( '## My Heading', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test heading level 3.
+ */
+ public function test_converts_heading_level_3() {
+ $post = self::factory()->post->create_and_get();
+ $content = 'Sub Heading ';
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( '### Sub Heading', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test link conversion in a paragraph.
+ */
+ public function test_converts_links() {
+ $post = self::factory()->post->create_and_get();
+ $content = 'Visit Example today.
';
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertStringContainsString( '[Example](https://example.com)', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test bold conversion.
+ */
+ public function test_converts_bold() {
+ $post = self::factory()->post->create_and_get();
+ $content = 'This is bold text.
';
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertStringContainsString( '**bold**', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test italic conversion.
+ */
+ public function test_converts_italic() {
+ $post = self::factory()->post->create_and_get();
+ $content = 'This is italic text.
';
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertStringContainsString( '*italic*', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test image block conversion.
+ */
+ public function test_converts_images() {
+ $post = self::factory()->post->create_and_get();
+ $content = ' ';
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertStringContainsString( '', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test code block conversion.
+ */
+ public function test_converts_code_blocks() {
+ $post = self::factory()->post->create_and_get();
+ $content = 'echo "hello"; ';
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( "```\necho \"hello\";\n```", $result['text']['markdown'] );
+ }
+
+ /**
+ * Test inline code conversion.
+ */
+ public function test_converts_inline_code() {
+ $post = self::factory()->post->create_and_get();
+ $content = 'Use the parse() method.
';
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertStringContainsString( '`parse()`', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test separator block becomes horizontal rule.
+ */
+ public function test_converts_separator() {
+ $post = self::factory()->post->create_and_get();
+ $content = "Before
\n\n"
+ . " \n\n"
+ . 'After
';
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( "Before\n\n---\n\nAfter", $result['text']['markdown'] );
+ }
+
+ /**
+ * Test empty content returns null so Document can omit content.
+ */
+ public function test_empty_content() {
+ $post = self::factory()->post->create_and_get();
+
+ $this->assertNull( $this->parser->parse( '', $post ) );
+ }
+
+ /**
+ * Test the atmosphere_html_to_markdown filter.
+ *
+ * Verifies the filter callback receives ($markdown, $content) so
+ * callers can inspect the raw source alongside the conversion.
+ */
+ public function test_html_to_markdown_filter() {
+ $received = array();
+
+ \add_filter(
+ 'atmosphere_html_to_markdown',
+ static function ( $markdown, $content ) use ( &$received ) {
+ $received = array(
+ 'markdown' => $markdown,
+ 'content' => $content,
+ );
+ return 'custom markdown';
+ },
+ 10,
+ 2
+ );
+
+ $post = self::factory()->post->create_and_get();
+ $source = 'Hello
';
+ $result = $this->parser->parse( $source, $post );
+
+ $this->assertSame( 'custom markdown', $result['text']['markdown'] );
+ $this->assertSame( 'Hello', $received['markdown'] );
+ $this->assertSame( $source, $received['content'] );
+
+ \remove_all_filters( 'atmosphere_html_to_markdown' );
+ }
+
+ /**
+ * Test strikethrough conversion.
+ */
+ public function test_converts_strikethrough() {
+ $post = self::factory()->post->create_and_get();
+ $content = 'This is deleted text.
';
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertStringContainsString( '~~deleted~~', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test classic (non-block) content is handled as fallback.
+ */
+ public function test_classic_content_fallback() {
+ $post = self::factory()->post->create_and_get();
+ $result = $this->parser->parse( 'Classic editor content with bold .
', $post );
+ $md = $result['text']['markdown'];
+
+ $this->assertStringContainsString( '**bold**', $md );
+ $this->assertStringContainsString( 'Classic editor content', $md );
+ }
+
+ /**
+ * Test that sibling content after inside the same
+ * does not bleed into the extracted caption text.
+ */
+ public function test_image_caption_does_not_include_sibling_content() {
+ $post = self::factory()->post->create_and_get();
+ $content = "\n"
+ . ''
+ . ' '
+ . 'Real caption '
+ . 'Should not appear in caption
'
+ . ' '
+ . "\n";
+
+ $result = $this->parser->parse( $content, $post );
+ $md = $result['text']['markdown'];
+
+ $this->assertStringContainsString( 'Real caption', $md );
+ $this->assertStringNotContainsString( 'Should not appear in caption', $md );
+ }
+
+ /**
+ * Test that a post made up entirely of blocks that produce no
+ * markdown (e.g. core/spacer) returns null so Document can omit
+ * the content field.
+ */
+ public function test_parse_returns_null_when_markdown_is_empty() {
+ $post = self::factory()->post->create_and_get();
+ $content = "\n"
+ . '
' . "\n"
+ . '';
+
+ $this->assertNull( $this->parser->parse( $content, $post ) );
+ }
+
+ /**
+ * Test ordered list produces numbered markdown.
+ */
+ public function test_listing_ordered() {
+ $post = self::factory()->post->create_and_get();
+ $content = "\n"
+ . 'First '
+ . 'Second '
+ . 'Third '
+ . " \n";
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( "1. First\n2. Second\n3. Third", $result['text']['markdown'] );
+ }
+
+ /**
+ * Test unordered list produces dashed markdown.
+ */
+ public function test_listing_unordered() {
+ $post = self::factory()->post->create_and_get();
+ $content = "\n"
+ . 'First '
+ . 'Second '
+ . 'Third '
+ . " \n";
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( "- First\n- Second\n- Third", $result['text']['markdown'] );
+ }
+
+ /**
+ * Test ordered list skips empty items without gapping the counter.
+ */
+ public function test_listing_skips_empty_items_without_gap() {
+ $post = self::factory()->post->create_and_get();
+ $content = "\n"
+ . 'First '
+ . ' '
+ . 'Third '
+ . " \n";
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( "1. First\n2. Third", $result['text']['markdown'] );
+ }
+
+ /**
+ * Test list items preserve inline formatting.
+ */
+ public function test_listing_preserves_inline_formatting() {
+ $post = self::factory()->post->create_and_get();
+ $content = "\n\n";
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( '- some **bold**', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test quote block wraps an inner paragraph in a "> " prefix.
+ */
+ public function test_quote_with_inner_paragraph() {
+ $post = self::factory()->post->create_and_get();
+ $content = ''
+ . 'Paragraph text
'
+ . ' ';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( '> Paragraph text', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test quote block prefixes every inner line.
+ */
+ public function test_quote_prefixes_every_line() {
+ $post = self::factory()->post->create_and_get();
+ $content = ''
+ . 'First
'
+ . 'Second
'
+ . ' ';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( "> First\n> Second", $result['text']['markdown'] );
+ }
+
+ /**
+ * Test quote falls back to innerHTML when no innerBlocks are present.
+ */
+ public function test_quote_innerhtml_fallback() {
+ $post = self::factory()->post->create_and_get();
+ $content = 'Direct quote text ';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( '> Direct quote text', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test core/group containers flatten inner block markdown.
+ */
+ public function test_container_group() {
+ $post = self::factory()->post->create_and_get();
+ $content = ''
+ . '
Inside group
'
+ . '
';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( 'Inside group', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test core/columns containers flatten inner block markdown.
+ */
+ public function test_container_columns() {
+ $post = self::factory()->post->create_and_get();
+ $content = ''
+ . '
Inside columns
'
+ . '
';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( 'Inside columns', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test core/column containers flatten inner block markdown.
+ */
+ public function test_container_column() {
+ $post = self::factory()->post->create_and_get();
+ $content = ''
+ . '
Inside column
'
+ . '
';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( 'Inside column', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test fallback delegates to container() when innerBlocks exist.
+ */
+ public function test_fallback_delegates_to_container_with_inner_blocks() {
+ $post = self::factory()->post->create_and_get();
+ $content = ''
+ . '
Inside unknown
'
+ . '
';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( 'Inside unknown', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test image() skips blocks without an tag so surrounding
+ * content renders with no empty separator.
+ *
+ * Uses a mixed fixture so a regression returning "" instead of null
+ * would produce a leading blank line and fail this exact-match
+ * assertion (the whole-post empty guard in parse() would otherwise
+ * mask the handler bug).
+ */
+ public function test_image_without_img_tag_is_skipped_cleanly() {
+ $post = self::factory()->post->create_and_get();
+ $content = ''
+ . 'Just a caption '
+ . " \n\n"
+ . 'After
';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( 'After', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test heading defaults to level 2 when attrs.level is missing.
+ */
+ public function test_heading_defaults_to_level_2() {
+ $post = self::factory()->post->create_and_get();
+ $content = 'Default level ';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( '## Default level', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test whitespace-only heading block is skipped cleanly.
+ *
+ * Mixed with a non-empty sibling so a regression returning "" from
+ * heading() would produce a leading blank line and fail the exact
+ * assertion (the whole-post empty guard would otherwise hide it).
+ */
+ public function test_heading_whitespace_is_skipped_cleanly() {
+ $post = self::factory()->post->create_and_get();
+ $content = " \n\n"
+ . 'After
';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( 'After', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test whitespace-only paragraph block is skipped cleanly.
+ *
+ * Mixed with a non-empty sibling so a regression returning "" from
+ * paragraph() would produce a leading blank line and fail the exact
+ * assertion (the whole-post empty guard would otherwise hide it).
+ */
+ public function test_paragraph_whitespace_is_skipped_cleanly() {
+ $post = self::factory()->post->create_and_get();
+ $content = "
\n\n"
+ . 'After
';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( 'After', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test code block emits the configured language in the fence.
+ */
+ public function test_code_emits_language_fence() {
+ $post = self::factory()->post->create_and_get();
+ $content = 'echo 1; ';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertStringStartsWith( "```php\n", $result['text']['markdown'] );
+ }
+
+ /**
+ * Test code block decodes HTML entities inside the fence.
+ */
+ public function test_code_decodes_html_entities() {
+ $post = self::factory()->post->create_and_get();
+ $content = '<div> ';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( "```\n\n```", $result['text']['markdown'] );
+ }
+
+ /**
+ * Test link URLs have parentheses percent-encoded to protect markdown syntax.
+ */
+ public function test_link_url_parens_percent_encoded() {
+ $post = self::factory()->post->create_and_get();
+ $content = '
See Foo .
';
+
+ $result = $this->parser->parse( $content, $post );
+ $md = $result['text']['markdown'];
+
+ $this->assertStringContainsString( '%28bar%29', $md );
+ $this->assertStringNotContainsString( '(bar)', $md );
+ }
+
+ /**
+ * Test
converts to a markdown hard break (two spaces + newline).
+ */
+ public function test_br_converts_to_hard_break() {
+ $post = self::factory()->post->create_and_get();
+ $content = '
line1 line2
';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertStringContainsString( "line1 \nline2", $result['text']['markdown'] );
+ }
+
+ /**
+ * Test HTML entities are decoded in inline paragraph text.
+ */
+ public function test_inline_html_entities_decoded() {
+ $post = self::factory()->post->create_and_get();
+ $content = '
AT&T’s
';
+
+ $result = $this->parser->parse( $content, $post );
+ $md = $result['text']['markdown'];
+
+ $this->assertStringContainsString( 'AT&T', $md );
+ $this->assertStringContainsString( "\xE2\x80\x99", $md );
+ }
+
+ /**
+ * Test inline
inside a paragraph converts via inline_html_to_markdown.
+ */
+ public function test_inline_image_inside_paragraph() {
+ $post = self::factory()->post->create_and_get();
+ $content = '
Look here
';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( 'Look  here', $result['text']['markdown'] );
+ }
+
+ /**
+ * Test nested inline formatting (bold wrapping italic).
+ */
+ public function test_nested_inline_formatting() {
+ $post = self::factory()->post->create_and_get();
+ $content = '
bold italic
';
+
+ $result = $this->parser->parse( $content, $post );
+
+ $this->assertSame( '**bold *italic***', $result['text']['markdown'] );
+ }
+}
diff --git a/tests/phpunit/tests/transformer/class-stub-parser.php b/tests/phpunit/tests/transformer/class-stub-parser.php
index b869820..0231dd8 100644
--- a/tests/phpunit/tests/transformer/class-stub-parser.php
+++ b/tests/phpunit/tests/transformer/class-stub-parser.php
@@ -14,6 +14,13 @@
*/
class Stub_Parser implements Content_Parser {
+ /**
+ * Whether parse() should return null.
+ *
+ * @var bool
+ */
+ public bool $return_null = false;
+
/**
* {@inheritDoc}
*/
@@ -27,7 +34,11 @@ public function get_type(): string {
* @param string $content Raw post content.
* @param \WP_Post $post The WordPress post object.
*/
- public function parse( string $content, \WP_Post $post ): array { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable
+ public function parse( string $content, \WP_Post $post ): ?array { // phpcs:ignore Generic.CodeAnalysis.UnusedFunctionParameter.Found, VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable
+ if ( $this->return_null ) {
+ return null;
+ }
+
return array(
'$type' => 'test.stub.parser',
'text' => $content,
diff --git a/tests/phpunit/tests/transformer/class-test-document.php b/tests/phpunit/tests/transformer/class-test-document.php
index 3915ac3..01c24a1 100644
--- a/tests/phpunit/tests/transformer/class-test-document.php
+++ b/tests/phpunit/tests/transformer/class-test-document.php
@@ -20,9 +20,12 @@
class Test_Document extends WP_UnitTestCase {
/**
- * Test that content field is absent when no parser is registered.
+ * Test that content field is absent when parser filter returns null.
*/
public function test_content_absent_without_parser() {
+ \remove_all_filters( 'atmosphere_content_parser' );
+ \add_filter( 'atmosphere_content_parser', '__return_null' );
+
$post = self::factory()->post->create_and_get(
array( 'post_content' => 'Some content here.' )
);
@@ -31,6 +34,8 @@ public function test_content_absent_without_parser() {
$record = $transformer->transform();
$this->assertArrayNotHasKey( 'content', $record );
+
+ \remove_all_filters( 'atmosphere_content_parser' );
}
/**
@@ -95,6 +100,40 @@ public function test_content_ignored_with_invalid_parser() {
\remove_all_filters( 'atmosphere_content_parser' );
}
+ /**
+ * Test that when the parser returns null for non-empty content,
+ * the content field is omitted and the atmosphere_document_content
+ * filter is not invoked.
+ */
+ public function test_content_absent_when_parser_returns_null() {
+ $parser = new Stub_Parser();
+ $parser->return_null = true;
+
+ \add_filter( 'atmosphere_content_parser', static fn() => $parser );
+
+ $filter_called = false;
+ \add_filter(
+ 'atmosphere_document_content',
+ static function ( $content ) use ( &$filter_called ) {
+ $filter_called = true;
+ return $content;
+ }
+ );
+
+ $post = self::factory()->post->create_and_get(
+ array( 'post_content' => 'Some content.' )
+ );
+
+ $transformer = new Document( $post );
+ $record = $transformer->transform();
+
+ $this->assertArrayNotHasKey( 'content', $record );
+ $this->assertFalse( $filter_called );
+
+ \remove_all_filters( 'atmosphere_content_parser' );
+ \remove_all_filters( 'atmosphere_document_content' );
+ }
+
/**
* Test that content field is absent for empty post content.
*/