diff --git a/.github/changelog/content-parser b/.github/changelog/content-parser
new file mode 100644
index 0000000..5beee3d
--- /dev/null
+++ b/.github/changelog/content-parser
@@ -0,0 +1,4 @@
+Significance: minor
+Type: added
+
+Add rich content support for standard.site documents using the Markpub format.
diff --git a/composer.json b/composer.json
index d0940c5..887285b 100644
--- a/composer.json
+++ b/composer.json
@@ -27,8 +27,7 @@
},
"autoload": {
"classmap": [
- "includes/",
- "integrations/"
+ "includes/"
]
},
"scripts": {
diff --git a/includes/class-atmosphere.php b/includes/class-atmosphere.php
index 8110424..b0099ed 100644
--- a/includes/class-atmosphere.php
+++ b/includes/class-atmosphere.php
@@ -13,7 +13,6 @@
use Atmosphere\Transformer\Document;
use Atmosphere\Transformer\Publication;
use Atmosphere\Transformer\TID;
-use Atmosphere\Integrations\Load;
use Atmosphere\WP_Admin\Admin;
/**
@@ -41,8 +40,8 @@ public function init(): void {
\add_action( 'init', array( $this, 'register_wellknown_rewrite' ) );
\add_action( 'template_redirect', array( $this, 'serve_wellknown_publication' ) );
- // Plugin integrations.
- Load::init();
+ // JSON preview for AT Protocol records.
+ \add_action( 'template_redirect', array( $this, 'preview' ) );
// Post lifecycle hooks.
\add_action( 'transition_post_status', array( $this, 'on_status_change' ), 10, 3 );
@@ -145,6 +144,45 @@ public function serve_wellknown_publication(): void {
exit;
}
+ /**
+ * Serve a JSON preview of the AT Protocol record for a post.
+ *
+ * Append ?atproto to a singular post URL to see the document
+ * record JSON. Optionally pass ?atproto={parser} to preview
+ * with a specific content parser (requires the parser to be
+ * registered via the atmosphere_content_parser filter).
+ */
+ public function preview(): void {
+ // phpcs:ignore WordPress.Security.NonceVerification.Recommended
+ if ( ! isset( $_GET['atproto'] ) || ! \is_singular() ) {
+ return;
+ }
+
+ if ( ! \current_user_can( 'edit_posts' ) ) {
+ return;
+ }
+
+ $post = \get_queried_object();
+
+ if ( ! $post instanceof \WP_Post ) {
+ return;
+ }
+
+ if ( ! \in_array( $post->post_type, Backfill::syncable_post_types(), true ) ) {
+ \status_header( 404 );
+ exit;
+ }
+
+ $transformer = new Document( $post );
+ $record = $transformer->transform();
+
+ \status_header( 200 );
+ \header( 'Content-Type: application/json; charset=utf-8' );
+ // phpcs:ignore WordPress.WP.AlternativeFunctions.json_encode_json_encode
+ echo \wp_json_encode( $record, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE );
+ exit;
+ }
+
/**
* Handle post status transitions.
*
diff --git a/includes/content-parser/class-markpub.php b/includes/content-parser/class-markpub.php
new file mode 100644
index 0000000..0b31bc6
--- /dev/null
+++ b/includes/content-parser/class-markpub.php
@@ -0,0 +1,375 @@
+ 'at.markpub.markdown',
+ 'text' => array(
+ '$type' => 'at.markpub.text',
+ 'markdown' => $markdown,
+ ),
+ 'flavor' => 'gfm',
+ 'extensions' => array( 'strikethrough', 'table' ),
+ );
+ }
+
+ /**
+ * Convert a single WordPress block to markdown.
+ *
+ * @param array $block Parsed block from parse_blocks().
+ * @return string|null Markdown string or null to skip.
+ */
+ private static function transform_block( array $block ): ?string {
+ if ( empty( $block['blockName'] ) ) {
+ // Classic (non-block) content or whitespace.
+ $html = \trim( $block['innerHTML'] ?? '' );
+ if ( empty( $html ) ) {
+ return null;
+ }
+
+ return self::inline_html_to_markdown( $html );
+ }
+
+ return match ( $block['blockName'] ) {
+ 'core/paragraph' => self::paragraph( $block ),
+ 'core/heading' => self::heading( $block ),
+ 'core/image' => self::image( $block ),
+ 'core/list' => self::listing( $block ),
+ 'core/quote' => self::quote( $block ),
+ 'core/code' => self::code( $block ),
+ 'core/preformatted' => self::preformatted( $block ),
+ 'core/separator' => '---',
+ 'core/spacer' => null,
+ 'core/group',
+ 'core/columns',
+ 'core/column' => self::container( $block ),
+ default => self::fallback( $block ),
+ };
+ }
+
+ /**
+ * Paragraph block.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function paragraph( array $block ): ?string {
+ $html = \trim( $block['innerHTML'] ?? '' );
+ if ( empty( $html ) ) {
+ return null;
+ }
+
+ return self::inline_html_to_markdown( $html );
+ }
+
+ /**
+ * Heading block.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function heading( array $block ): ?string {
+ $level = $block['attrs']['level'] ?? 2;
+ $text = self::inline_html_to_markdown( $block['innerHTML'] ?? '' );
+
+ if ( empty( \trim( $text ) ) ) {
+ return null;
+ }
+
+ return \str_repeat( '#', (int) $level ) . ' ' . \trim( $text );
+ }
+
+ /**
+ * Image block.
+ *
+ * @param array $block Parsed block.
+ * @return string|null
+ */
+ private static function image( array $block ): ?string {
+ $html = $block['innerHTML'] ?? '';
+ $src = '';
+ $alt = '';
+
+ $processor = new \WP_HTML_Tag_Processor( $html );
+ if ( $processor->next_tag( 'IMG' ) ) {
+ $src = $processor->get_attribute( 'src' ) ?? '';
+ $alt = $processor->get_attribute( 'alt' ) ?? '';
+ }
+
+ if ( empty( $src ) ) {
+ return null;
+ }
+
+ $md = '';
+
+ // Check for a caption in figcaption.
+ $caption_proc = new \WP_HTML_Tag_Processor( $html );
+ if ( $caption_proc->next_tag( 'FIGCAPTION' ) ) {
+ $caption = \wp_strip_all_tags(
+ \preg_replace( '#.*]+>#si',
+ static function ( $m ) {
+ $processor = new \WP_HTML_Tag_Processor( $m[0] );
+ if ( $processor->next_tag( 'IMG' ) ) {
+ $src = $processor->get_attribute( 'src' ) ?? '';
+ $alt = $processor->get_attribute( 'alt' ) ?? '';
+ return '';
+ }
+ return '';
+ },
+ $md
+ );
+
+ // Links.
+ $md = \preg_replace_callback(
+ '#]+href=["\']([^"\']*)["\'][^>]*>(.*?)#si',
+ static fn( $m ) => '[' . \wp_strip_all_tags( $m[2] ) . '](' . $m[1] . ')',
+ $md
+ );
+
+ // Bold.
+ $md = \preg_replace( '#<(?:strong|b)>(.*?)(?:strong|b)>#si', '**$1**', $md );
+
+ // Italic.
+ $md = \preg_replace( '#<(?:em|i)>(.*?)(?:em|i)>#si', '*$1*', $md );
+
+ // Strikethrough.
+ $md = \preg_replace( '#<(?:s|del|strike)>(.*?)(?:s|del|strike)>#si', '~~$1~~', $md );
+
+ // Inline code.
+ $md = \preg_replace( '#
(.*?)#si', '`$1`', $md );
+
+ // Line breaks.
+ $md = \preg_replace( '#
#si', " \n", $md );
+
+ // Strip block-level wrappers and remaining tags.
+ $md = \wp_strip_all_tags( $md );
+
+ // Decode HTML entities.
+ $md = \html_entity_decode( $md, ENT_QUOTES, 'UTF-8' );
+
+ return \trim( $md );
+ }
+}
diff --git a/includes/content-parser/interface-content-parser.php b/includes/content-parser/interface-content-parser.php
new file mode 100644
index 0000000..4f11827
--- /dev/null
+++ b/includes/content-parser/interface-content-parser.php
@@ -0,0 +1,42 @@
+ $this->to_iso8601( $this->object->post_date_gmt ),
);
- // Publication reference.
+ // Publication reference (required by spec).
$pub_tid = \get_option( 'atmosphere_publication_tid' );
if ( $pub_tid ) {
$record['site'] = build_at_uri( get_did(), 'site.standard.publication', $pub_tid );
+ } else {
+ // Fall back to site URL for standalone documents.
+ $record['site'] = \untrailingslashit( \get_home_url() );
}
// Relative path.
@@ -89,6 +94,12 @@ public function transform(): array {
$record['textContent'] = $text_content;
}
+ // Parsed rich content (open union).
+ $content = $this->get_content();
+ if ( ! empty( $content ) ) {
+ $record['content'] = $content;
+ }
+
// Tags.
$tags = $this->collect_tags( $this->object );
if ( ! empty( $tags ) ) {
@@ -140,6 +151,43 @@ public function get_rkey(): string {
return $rkey;
}
+ /**
+ * Get parsed content for the document's content union field.
+ *
+ * @return array|null Parsed content object or null.
+ */
+ private function get_content(): ?array {
+ if ( empty( \trim( $this->object->post_content ) ) ) {
+ return null;
+ }
+
+ /**
+ * Filters the content parser used for site.standard.document records.
+ *
+ * Return a Content_Parser instance to override the default parser.
+ * Return null to disable the content field entirely.
+ *
+ * @param Content_Parser|null $parser The content parser. Default: Markpub.
+ * @param \WP_Post $post The WordPress post.
+ */
+ $parser = \apply_filters( 'atmosphere_content_parser', new Markpub(), $this->object );
+
+ if ( ! $parser instanceof Content_Parser ) {
+ return null;
+ }
+
+ $content = $parser->parse( $this->object->post_content, $this->object );
+
+ /**
+ * Filters the parsed content object before adding to the document record.
+ *
+ * @param array $content The parsed content object.
+ * @param \WP_Post $post The WordPress post.
+ * @param Content_Parser $parser The parser that produced the content.
+ */
+ return \apply_filters( 'atmosphere_document_content', $content, $this->object, $parser );
+ }
+
/**
* Render post content to plain text.
*
diff --git a/tests/phpunit/tests/content-parser/class-test-markpub.php b/tests/phpunit/tests/content-parser/class-test-markpub.php
new file mode 100644
index 0000000..99409ec
--- /dev/null
+++ b/tests/phpunit/tests/content-parser/class-test-markpub.php
@@ -0,0 +1,234 @@
+parser = new Markpub();
+ }
+
+ /**
+ * Test get_type returns the markpub NSID.
+ */
+ public function test_get_type() {
+ $this->assertSame( 'at.markpub.markdown', $this->parser->get_type() );
+ }
+
+ /**
+ * Test parse returns correct top-level structure.
+ */
+ public function test_parse_returns_correct_structure() {
+ $post = self::factory()->post->create_and_get();
+ $result = $this->parser->parse(
+ '
Hello world
', + $post + ); + + $this->assertArrayHasKey( '$type', $result ); + $this->assertSame( 'at.markpub.markdown', $result['$type'] ); + $this->assertArrayHasKey( 'text', $result ); + $this->assertSame( 'at.markpub.text', $result['text']['$type'] ); + $this->assertArrayHasKey( 'markdown', $result['text'] ); + $this->assertSame( 'gfm', $result['flavor'] ); + $this->assertContains( 'strikethrough', $result['extensions'] ); + } + + /** + * Test paragraph conversion. + */ + public function test_converts_paragraphs() { + $post = self::factory()->post->create_and_get(); + $content = "\nFirst paragraph
\n\n\n" + . "\nSecond paragraph
\n"; + + $result = $this->parser->parse( $content, $post ); + $markdown = $result['text']['markdown']; + + $this->assertStringContainsString( 'First paragraph', $markdown ); + $this->assertStringContainsString( 'Second paragraph', $markdown ); + $this->assertStringNotContainsString( '', $markdown ); + } + + /** + * Test heading conversion. + */ + public function test_converts_headings() { + $post = self::factory()->post->create_and_get(); + $content = '
Visit Example today.
'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '[Example](https://example.com)', $result['text']['markdown'] ); + } + + /** + * Test bold conversion. + */ + public function test_converts_bold() { + $post = self::factory()->post->create_and_get(); + $content = 'This is bold text.
'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '**bold**', $result['text']['markdown'] ); + } + + /** + * Test italic conversion. + */ + public function test_converts_italic() { + $post = self::factory()->post->create_and_get(); + $content = 'This is italic text.
'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '*italic*', $result['text']['markdown'] ); + } + + /** + * Test image block conversion. + */ + public function test_converts_images() { + $post = self::factory()->post->create_and_get(); + $content = '
echo "hello";';
+ $result = $this->parser->parse( $content, $post );
+ $md = $result['text']['markdown'];
+
+ $this->assertStringContainsString( '```', $md );
+ $this->assertStringContainsString( 'echo "hello";', $md );
+ }
+
+ /**
+ * Test inline code conversion.
+ */
+ public function test_converts_inline_code() {
+ $post = self::factory()->post->create_and_get();
+ $content = 'Use the parse() method.
Before
\n\n" + . "After
'; + $result = $this->parser->parse( $content, $post ); + + $this->assertStringContainsString( '---', $result['text']['markdown'] ); + } + + /** + * Test empty content produces empty markdown. + */ + public function test_empty_content() { + $post = self::factory()->post->create_and_get(); + $result = $this->parser->parse( '', $post ); + + $this->assertSame( '', $result['text']['markdown'] ); + } + + /** + * Test the atmosphere_html_to_markdown filter. + */ + public function test_html_to_markdown_filter() { + \add_filter( + 'atmosphere_html_to_markdown', + static fn() => 'custom markdown', + 10, + 2 + ); + + $post = self::factory()->post->create_and_get(); + $result = $this->parser->parse( + 'Hello
', + $post + ); + + $this->assertSame( 'custom markdown', $result['text']['markdown'] ); + + \remove_all_filters( 'atmosphere_html_to_markdown' ); + } + + /** + * Test strikethrough conversion. + */ + public function test_converts_strikethrough() { + $post = self::factory()->post->create_and_get(); + $content = 'This is deleted text.
Classic editor content with bold.
', $post ); + $md = $result['text']['markdown']; + + $this->assertStringContainsString( '**bold**', $md ); + $this->assertStringContainsString( 'Classic editor content', $md ); + } +}