From 10579bba81a39a40226e22b5520a351842c27789 Mon Sep 17 00:00:00 2001 From: Brandon Kraft Date: Thu, 30 Apr 2026 15:46:25 -0500 Subject: [PATCH 1/4] Default teaser-thread to 3 posts and embed link card on terminal CTA The teaser-thread strategy now produces a hook + body chunk + CTA reply by default, falling back to the previous hook + CTA pair when the body is too short for a chunk. The terminal CTA reply now carries an app.bsky.embed.external link card (title + excerpt + thumbnail), so direct visitors to a single post still have a clear path back to the WordPress article. Both changes target real-world UX gaps surfaced by FOSSE: 2-post threads get bundled/hidden on bsky.app's profile views, and a CTA without a card provides no rich affordance for the link. The atmosphere_teaser_thread_posts filter keeps its existing contract (2..5 string entries) and stays the universal escape hatch. The embed attaches to whichever entry is terminal so 2-entry filter overrides and the short-post fallback both still ship a CTA-with-card. Fixes Automattic/fosse#64 --- .github/changelog/teaser-thread-3post-card | 4 + includes/transformer/class-post.php | 126 ++++++--- includes/wp-admin/class-admin.php | 2 +- tests/phpunit/tests/class-test-publisher.php | 26 +- .../tests/transformer/class-test-post.php | 244 ++++++++++++++++-- 5 files changed, 349 insertions(+), 53 deletions(-) create mode 100644 .github/changelog/teaser-thread-3post-card diff --git a/.github/changelog/teaser-thread-3post-card b/.github/changelog/teaser-thread-3post-card new file mode 100644 index 0000000..2105f0c --- /dev/null +++ b/.github/changelog/teaser-thread-3post-card @@ -0,0 +1,4 @@ +Significance: minor +Type: changed + +Long-form teaser threads now use a 3-post default (hook, body chunk, "continue reading" reply with a link card), so the thread reliably surfaces on bsky.app profiles and the terminal post offers a clear path back to the WordPress article. diff --git a/includes/transformer/class-post.php b/includes/transformer/class-post.php index 0502e81..4f335d9 100644 --- a/includes/transformer/class-post.php +++ b/includes/transformer/class-post.php @@ -386,9 +386,13 @@ public function is_short_form_post(): bool { * permalink + app.bsky.embed.external card. * - `'truncate-link'`: 1 record, body text + inline permalink, * no embed card. - * - `'teaser-thread'`: 2+ records forming a reply chain - * (hook + CTA by default; filterable to 3 posts via - * `atmosphere_teaser_thread_posts`). + * - `'teaser-thread'`: a reply chain of hook + body chunk + CTA + * (3 records by default; falls back to `[ hook, cta ]` when the + * post body is too short for a body chunk). The terminal CTA + * entry carries an `app.bsky.embed.external` link card so the + * reader has a clear path back to the WordPress post regardless + * of which entry surfaces. Filterable via + * `atmosphere_teaser_thread_posts`. * - unknown values: treated as `'link-card'`. * * Empty-body guard: for `'teaser-thread'` and `'truncate-link'`, @@ -457,7 +461,16 @@ public function build_long_form_records(): array { } $records = array(); - foreach ( $this->build_teaser_thread() as $i => $text ) { + $texts = $this->build_teaser_thread(); + $last = \count( $texts ) - 1; + // Attach an `app.bsky.embed.external` link card to the + // terminal CTA entry. Without it, even when the thread + // surfaces, the only link affordance is the URL in the + // CTA's text — a card gives the reader title, excerpt, + // and thumbnail. The embed attaches to "last entry," + // not "index 2," so a 2-entry fallback or filter + // override still ships a CTA-with-card. + foreach ( $texts as $i => $text ) { $records[] = $this->record_for_thread_entry( (string) $text, 0 === $i, @@ -465,7 +478,8 @@ public function build_long_form_records(): array { 'strategy' => 'teaser-thread', 'thread_index' => $i, 'is_thread_reply' => 0 !== $i, - ) + ), + $i === $last ? $this->build_embed() : null ); } return $records; @@ -630,7 +644,13 @@ private function build_truncate_link_text(): string { } /** - * Compose the default 2-post teaser thread: hook + CTA-with-link. + * Compose the default teaser thread: hook + body chunk + CTA-with-link. + * + * 2-post self-reply threads bundle/hide on bsky.app's profile views + * (`getAuthorFeed?filter=posts_no_replies` drops the root, + * `posts_with_replies` shows the reply but not the root). A 3-post + * thread surfaces normally on the Posts tab, so the default shape is + * 3 entries: a hook, a body chunk continuing the prose, and the CTA. * * Hook precedence: * 1. If the post has a `post_excerpt`, use it (plain-text @@ -639,47 +659,76 @@ private function build_truncate_link_text(): string { * at this length, so word-boundary fallback is enough. * 2. Otherwise, use the first ~280 chars of the body text, * cut at a **sentence boundary**. The hook is the final - * prose shown before the CTA post, so we never end + * prose shown before the body chunk, so we never end * mid-sentence. 280 leaves ~20 chars of headroom for future * variants that append trailing content. * - * CTA is an internationalised `Continue reading: `. - * - * Filterable via `atmosphere_teaser_thread_posts`. Downstream - * filters may return 3 entries to extend the thread; in that - * case the intermediate body-to-body cut (entry 1 → entry 2) - * may be at a word boundary, but the final body entry before - * the CTA (entry 2 → entry 3) must still cut at a sentence - * boundary. The return contract does not capture this — it's - * the filter author's responsibility. - * - * @return string[] Text of each post in order. At least 2 entries. + * Body chunk: + * - Excerpt-as-hook: the chunk starts from the start of the body — + * curated excerpts are not sliding windows over the body. + * - Body-as-hook: the chunk continues after the hook's cut point; + * hook and chunk are non-overlapping windows over the same + * plain-text body. + * - Same ~280-char sentence-bounded budget as the hook. + * - Dropped (and the output reduces to `[ hook, cta ]`) when the + * post body is exhausted or fewer than ~10 chars of prose remain. + * + * CTA is an internationalised `Continue reading: `. The + * link-card embed attached at the call site (`build_long_form_records`) + * applies to whichever entry is terminal — so the 2-entry fallback + * still ships a CTA-with-card. + * + * Filterable via `atmosphere_teaser_thread_posts`; the filter is the + * final transformation point and may return any 2..5 string entries. + * + * @return string[] Text of each post in order. 2 or 3 entries by + * default; up to 5 when overridden by filter. */ private function build_teaser_thread(): array { $excerpt = sanitize_text( (string) $this->object->post_excerpt ); + $plain = $this->render_post_content_plain( $this->object ); if ( \mb_strlen( $excerpt ) >= 10 ) { - $hook = $this->truncate_to_budget( $excerpt, 300, false ); + $hook = $this->truncate_to_budget( $excerpt, 300, false ); + $chunk_source = $plain; } else { - $plain = $this->render_post_content_plain( $this->object ); - $hook = $this->truncate_to_budget( $plain, 280, true ); - } - - $cta = $this->teaser_thread_cta_text(); + $hook = $this->truncate_to_budget( $plain, 280, true ); + // Strip the hard-cap ellipsis (when present) before measuring + // how much of the plain body the hook consumed; the + // sentence/word-cut paths return clean prefixes so this is a + // no-op there. `mb_substr` keeps the strip char-aware — + // `rtrim($hook, '…')` would strip individual UTF-8 bytes from + // the multi-byte ellipsis sequence and can corrupt the trailing + // non-ASCII char before it. + $consumed = '…' === \mb_substr( $hook, -1 ) + ? \mb_substr( $hook, 0, \mb_strlen( $hook ) - 1 ) + : $hook; + $chunk_source = \mb_substr( $plain, \mb_strlen( $consumed ) ); + } + + $chunk_source = \ltrim( $chunk_source ); + $cta = $this->teaser_thread_cta_text(); + + $default = \mb_strlen( $chunk_source ) >= 10 + ? array( $hook, $this->truncate_to_budget( $chunk_source, 280, true ), $cta ) + : array( $hook, $cta ); /** * Filters the default teaser-thread post texts. * - * @param string[] $posts 2-entry array: [ hook, cta ]. + * @param string[] $posts Default array: 2 entries `[ hook, cta ]` + * when the body is too short for a body + * chunk, otherwise 3 entries + * `[ hook, body_chunk, cta ]`. * @param \WP_Post $post The post being composed. */ - $filtered = \apply_filters( 'atmosphere_teaser_thread_posts', array( $hook, $cta ), $this->object ); + $filtered = \apply_filters( 'atmosphere_teaser_thread_posts', $default, $this->object ); // Defensive: a filter that returns a non-iterable or non-string // entries would otherwise fatal on the caller's foreach. Fall - // back to the default pair on anything unexpected. + // back to the default on anything unexpected. if ( ! \is_array( $filtered ) || empty( $filtered ) ) { - return array( $hook, $cta ); + return $default; } $texts = array(); @@ -698,10 +747,10 @@ private function build_teaser_thread(): array { if ( \count( $texts ) < 2 ) { \_doing_it_wrong( 'atmosphere_teaser_thread_posts', - \esc_html__( 'The atmosphere_teaser_thread_posts filter must return at least 2 string entries; falling back to the default hook + CTA pair.', 'atmosphere' ), + \esc_html__( 'The atmosphere_teaser_thread_posts filter must return at least 2 string entries; falling back to the default hook + body chunk + CTA shape.', 'atmosphere' ), 'unreleased' ); - return array( $hook, $cta ); + return $default; } // Cap at 5 to contain PDS rate-limit blast radius on mid-thread @@ -739,12 +788,17 @@ private function has_composable_body(): bool { * is the indexed representation of the WP post for the Bluesky * algorithm. Non-root replies are conversational and omit tags. * - * @param string $text Pre-composed post text. - * @param bool $is_root Whether this record is the thread root. - * @param array $context Additional filter context. + * `$embed` is set only by the teaser-thread caller for the terminal + * CTA entry; `reply` and `embed` are independent fields in + * `app.bsky.feed.post`'s lexicon, so a record carrying both is fine. + * + * @param string $text Pre-composed post text. + * @param bool $is_root Whether this record is the thread root. + * @param array $context Additional filter context. + * @param array|null $embed Optional `app.bsky.embed.external` card. * @return array Bsky post record (no reply). */ - private function record_for_thread_entry( string $text, bool $is_root = false, array $context = array() ): array { + private function record_for_thread_entry( string $text, bool $is_root = false, array $context = array(), ?array $embed = null ): array { $record = array( '$type' => 'app.bsky.feed.post', 'text' => $text, @@ -757,6 +811,10 @@ private function record_for_thread_entry( string $text, bool $is_root = false, a $record['facets'] = $facets; } + if ( null !== $embed ) { + $record['embed'] = $embed; + } + if ( $is_root ) { $tags = $this->collect_tags( $this->object ); if ( ! empty( $tags ) ) { diff --git a/includes/wp-admin/class-admin.php b/includes/wp-admin/class-admin.php index c02ae84..21225aa 100644 --- a/includes/wp-admin/class-admin.php +++ b/includes/wp-admin/class-admin.php @@ -350,7 +350,7 @@ private static function long_form_composition_choice( string $strategy ): array case 'teaser-thread': return array( 'label' => \__( 'Teaser thread', 'atmosphere' ), - 'help' => \__( 'A two-post Bluesky thread: a hook followed by a "continue reading" reply with the permalink.', 'atmosphere' ), + 'help' => \__( 'A short Bluesky thread: a hook, a body chunk, and a "continue reading" reply with a link card back to the WordPress post.', 'atmosphere' ), ); case 'link-card': default: diff --git a/tests/phpunit/tests/class-test-publisher.php b/tests/phpunit/tests/class-test-publisher.php index 604557b..56acbbd 100644 --- a/tests/phpunit/tests/class-test-publisher.php +++ b/tests/phpunit/tests/class-test-publisher.php @@ -374,6 +374,9 @@ public function test_publish_teaser_thread_writes_root_first_then_reply_sequenti array( 'post_title' => 'A Long-Form Post', 'post_content' => 'Body content that is enough to compose a hook from.', + // Body absorbs entirely into the hook → 2-entry fallback shape, + // keeping the publisher protocol assertions below at 2 records. + 'post_excerpt' => '', ) ); @@ -425,6 +428,9 @@ public function test_publish_teaser_thread_partial_meta_written_after_root() { array( 'post_title' => 'A Long-Form Post', 'post_content' => 'Body content that is enough to compose a hook from.', + // Body absorbs entirely into the hook → 2-entry fallback shape, + // keeping the publisher protocol assertions below at 2 records. + 'post_excerpt' => '', ) ); @@ -456,6 +462,9 @@ public function test_publish_teaser_thread_final_meta_has_ordered_triples() { array( 'post_title' => 'A Long-Form Post', 'post_content' => 'Body content that is enough to compose a hook from.', + // Body absorbs entirely into the hook → 2-entry fallback shape, + // keeping the publisher protocol assertions below at 2 records. + 'post_excerpt' => '', ) ); @@ -543,7 +552,10 @@ public function test_publish_thread_continues_when_doc_ref_update_fails() { array( 'post_title' => 'A Long-Form Post', 'post_excerpt' => 'A curated excerpt long enough to compose a hook from.', - 'post_content' => 'Body content that has plenty to teaser from for a thread.', + // Empty body so the hook absorbs everything and the default + // shape stays at 2 entries — the protocol assertions below + // expect a single reply write. + 'post_content' => '', ) ); @@ -644,6 +656,9 @@ public function test_publish_teaser_thread_rollback_on_second_write_failure() { array( 'post_title' => 'A Long-Form Post', 'post_content' => 'Body content that is enough to compose a hook from.', + // Body absorbs entirely into the hook → 2-entry fallback shape, + // keeping the publisher protocol assertions below at 2 records. + 'post_excerpt' => '', ) ); @@ -703,6 +718,9 @@ public function test_publish_teaser_thread_rollback_failing_surfaces_partial_sta array( 'post_title' => 'A Long-Form Post', 'post_content' => 'Body content that is enough to compose a hook from.', + // Body absorbs entirely into the hook → 2-entry fallback shape, + // keeping the publisher protocol assertions below at 2 records. + 'post_excerpt' => '', ) ); @@ -797,6 +815,9 @@ public function test_update_thread_in_place_when_record_counts_match() { array( 'post_title' => 'A Long-Form Post', 'post_content' => 'Body content that is enough to compose a hook from.', + // Body absorbs entirely into the hook → 2-entry fallback shape, + // matching the stored 2-entry meta below for an in-place update. + 'post_excerpt' => '', ) ); @@ -866,6 +887,9 @@ public function test_update_thread_rewrites_on_strategy_change() { array( 'post_title' => 'A Long-Form Post', 'post_content' => 'Body content that is enough to compose a hook from.', + // Body absorbs entirely into the hook → 2-entry fallback shape, + // keeping the post-rewrite assertions below at 2 records. + 'post_excerpt' => '', ) ); diff --git a/tests/phpunit/tests/transformer/class-test-post.php b/tests/phpunit/tests/transformer/class-test-post.php index 3675a39..e6b27c5 100644 --- a/tests/phpunit/tests/transformer/class-test-post.php +++ b/tests/phpunit/tests/transformer/class-test-post.php @@ -350,7 +350,8 @@ static function ( $record ) { $records = ( new Post( $post ) )->build_long_form_records(); - $this->assertCount( 2, $records ); + // Default thread shape is hook + body chunk + CTA. + $this->assertCount( 3, $records ); foreach ( $records as $record ) { $this->assertStringEndsWith( ' __transformed__', $record['text'] ); } @@ -389,14 +390,18 @@ static function ( $record, $filtered_post, $context = array() ) use ( &$seen ) { ( new Post( $post ) )->build_long_form_records(); - $this->assertCount( 2, $seen ); - $this->assertNotEmpty( $seen[0]['createdAt'] ); - $this->assertNotEmpty( $seen[1]['createdAt'] ); - $this->assertSame( 'teaser-thread', $seen[0]['context']['strategy'] ?? '' ); + // Default thread shape is hook + body chunk + CTA. + $this->assertCount( 3, $seen ); + foreach ( $seen as $entry ) { + $this->assertNotEmpty( $entry['createdAt'] ); + $this->assertSame( 'teaser-thread', $entry['context']['strategy'] ?? '' ); + } $this->assertSame( 0, $seen[0]['context']['thread_index'] ?? null ); $this->assertFalse( $seen[0]['context']['is_thread_reply'] ?? true ); $this->assertSame( 1, $seen[1]['context']['thread_index'] ?? null ); $this->assertTrue( $seen[1]['context']['is_thread_reply'] ?? false ); + $this->assertSame( 2, $seen[2]['context']['thread_index'] ?? null ); + $this->assertTrue( $seen[2]['context']['is_thread_reply'] ?? false ); } /** @@ -499,12 +504,12 @@ public function test_build_long_form_records_teaser_thread_filter_entries_are_sa } /** - * Teaser-thread default: 2 entries, hook cut at sentence punctuation, - * CTA starts with `Continue reading: `. + * Teaser-thread default: 3 entries — hook (sentence-cut), body chunk + * continuing the prose, and CTA `Continue reading: `. * * @covers ::build_long_form_records */ - public function test_build_long_form_records_teaser_thread_default_two_entries() { + public function test_build_long_form_records_teaser_thread_default_three_entries() { $post = self::factory()->post->create_and_get( array( 'post_title' => 'A Long Post', @@ -519,7 +524,7 @@ public function test_build_long_form_records_teaser_thread_default_two_entries() $records = ( new Post( $post ) )->build_long_form_records(); - $this->assertCount( 2, $records ); + $this->assertCount( 3, $records ); // Hook. $hook = $records[0]['text']; @@ -528,12 +533,23 @@ public function test_build_long_form_records_teaser_thread_default_two_entries() $this->assertStringNotContainsString( \get_permalink( $post ), $hook ); $this->assertArrayNotHasKey( 'embed', $records[0] ); + // Body chunk: non-empty, sentence-bounded, distinct prose from the hook. + $chunk = $records[1]['text']; + $this->assertNotEmpty( \trim( $chunk ) ); + $this->assertLessThanOrEqual( 280, \mb_strlen( $chunk ) ); + $this->assertContains( + \substr( \rtrim( $chunk ), -1 ), + array( '.', '!', '?' ), + 'Body chunk should end at sentence punctuation when one is in budget.' + ); + $this->assertArrayNotHasKey( 'embed', $records[1] ); + // CTA. - $cta = $records[1]['text']; + $cta = $records[2]['text']; $this->assertMatchesRegularExpression( '~^Continue reading: https?://~', $cta ); $has_cta_link_facet = false; - foreach ( $records[1]['facets'] ?? array() as $facet ) { + foreach ( $records[2]['facets'] ?? array() as $facet ) { foreach ( $facet['features'] as $feature ) { if ( 'app.bsky.richtext.facet#link' === ( $feature['$type'] ?? '' ) ) { $has_cta_link_facet = true; @@ -577,6 +593,10 @@ public function test_build_long_form_records_teaser_thread_hook_falls_back_to_wo /** * Post excerpt, when set, takes precedence over body-derived hooks. * + * The body chunk continues from the start of the body, not from where + * the excerpt would have ended in the body — the excerpt is curated + * copy, not a sliding window over the body. + * * @covers ::build_long_form_records */ public function test_build_long_form_records_teaser_thread_uses_excerpt_when_set() { @@ -592,7 +612,10 @@ public function test_build_long_form_records_teaser_thread_uses_excerpt_when_set $records = ( new Post( $post ) )->build_long_form_records(); + $this->assertCount( 3, $records ); $this->assertSame( 'Custom-curated hook copy.', $records[0]['text'] ); + $this->assertStringContainsString( 'Body sentence one.', $records[1]['text'] ); + $this->assertStringNotContainsString( 'Custom-curated', $records[1]['text'] ); } /** @@ -722,11 +745,13 @@ function ( $downgrade_post, $requested, $effective ) use ( &$events ) { } /** - * Downstream filters may extend the thread to 3 posts. + * Downstream filters can swap the default 3-entry shape for any 2..5 + * string array; the link-card embed still attaches to whatever entry + * is last. * * @covers ::build_long_form_records */ - public function test_build_long_form_records_teaser_thread_filter_extends_to_three() { + public function test_build_long_form_records_teaser_thread_filter_replaces_text_keeping_terminal_embed() { $post = self::factory()->post->create_and_get( array( 'post_title' => 'Titled', @@ -746,13 +771,18 @@ public function test_build_long_form_records_teaser_thread_filter_extends_to_thr $this->assertSame( 'Hook post', $records[0]['text'] ); $this->assertSame( 'Key takeaway', $records[1]['text'] ); $this->assertSame( 'Call to action link', $records[2]['text'] ); + + $this->assertArrayNotHasKey( 'embed', $records[0] ); + $this->assertArrayNotHasKey( 'embed', $records[1] ); + $this->assertArrayHasKey( 'embed', $records[2] ); + $this->assertSame( 'app.bsky.embed.external', $records[2]['embed']['$type'] ); } /** * Filter that returns fewer than 2 entries should trigger - * _doing_it_wrong and fall back to the default hook + CTA pair — - * a 1-entry return would silently route to publish_single() and - * drop the CTA. + * _doing_it_wrong and fall back to the default hook + body chunk + CTA + * shape — a 1-entry return would silently route to publish_single() + * and drop the CTA. * * @covers ::build_long_form_records */ @@ -771,9 +801,189 @@ public function test_build_long_form_records_teaser_thread_filter_under_two_fall $records = ( new Post( $post ) )->build_long_form_records(); - $this->assertCount( 2, $records ); + $this->assertCount( 3, $records ); $this->assertNotSame( 'Just one entry', $records[0]['text'] ); + $this->assertMatchesRegularExpression( '~^Continue reading: ~', $records[2]['text'] ); + } + + /** + * Body-path hook: body chunk continues from where the hook cut off + * — the hook and the chunk are non-overlapping windows over the same + * plain-text body. + * + * @covers ::build_long_form_records + */ + public function test_build_long_form_records_teaser_thread_body_chunk_continues_after_hook_cut() { + $post = self::factory()->post->create_and_get( + array( + 'post_title' => 'Titled', + // 35 sentences × 10 chars = 350 chars; first 28 land in the hook. + 'post_content' => \str_repeat( 'Hi there. ', 35 ), + 'post_excerpt' => '', + ) + ); + + \add_filter( 'atmosphere_long_form_composition', fn() => 'teaser-thread' ); + + $records = ( new Post( $post ) )->build_long_form_records(); + + $this->assertCount( 3, $records ); + + $hook = $records[0]['text']; + $chunk = $records[1]['text']; + + // The plain text is "Hi there. " repeated; sentence-cut at byte 279 + // produces a 279-char hook ("Hi there. " * 27 + "Hi there.") and a + // chunk continuing with the remaining 7 sentences. + $this->assertSame( 279, \mb_strlen( $hook ) ); + $this->assertNotEmpty( \trim( $chunk ) ); + $this->assertNotSame( $hook, $chunk, 'Body chunk must not duplicate the hook text.' ); + + // Reconstructing hook + chunk in order should yield a prefix of the + // underlying plain body — proving non-overlap. + $reconstructed = \rtrim( $hook ) . ' ' . \ltrim( $chunk ); + $this->assertStringStartsWith( $reconstructed, \str_repeat( 'Hi there. ', 35 ) . ' ' ); + } + + /** + * Excerpt-path hook: body chunk comes from the start of the body, not + * from where the excerpt would have ended in the body. Curated + * excerpts are not sliding windows over the body. + * + * @covers ::build_long_form_records + */ + public function test_build_long_form_records_teaser_thread_excerpt_hook_chunk_starts_from_body() { + $post = self::factory()->post->create_and_get( + array( + 'post_title' => 'Titled', + 'post_content' => 'First body sentence. Second body sentence. Third body sentence.', + 'post_excerpt' => 'A curated standalone teaser.', + ) + ); + + \add_filter( 'atmosphere_long_form_composition', fn() => 'teaser-thread' ); + + $records = ( new Post( $post ) )->build_long_form_records(); + + $this->assertCount( 3, $records ); + $this->assertSame( 'A curated standalone teaser.', $records[0]['text'] ); + + // Body chunk begins with the first body sentence — not a slice that + // skipped past the excerpt's char-count. + $this->assertStringStartsWith( 'First body sentence.', $records[1]['text'] ); + } + + /** + * Short post: when the hook absorbs the entire body, the body chunk is + * dropped rather than padded with empty text. The CTA stays terminal + * and still carries the link-card embed. + * + * @covers ::build_long_form_records + */ + public function test_build_long_form_records_teaser_thread_short_body_falls_back_to_two_entries() { + $post = self::factory()->post->create_and_get( + array( + 'post_title' => 'Titled', + 'post_content' => 'A single short sentence.', + 'post_excerpt' => '', + ) + ); + + \add_filter( 'atmosphere_long_form_composition', fn() => 'teaser-thread' ); + + $records = ( new Post( $post ) )->build_long_form_records(); + + $this->assertCount( 2, $records ); + $this->assertSame( 'A single short sentence.', $records[0]['text'] ); $this->assertMatchesRegularExpression( '~^Continue reading: ~', $records[1]['text'] ); + + // Embed attaches to the terminal entry: "last entry," not "index 2". + $this->assertArrayNotHasKey( 'embed', $records[0] ); + $this->assertArrayHasKey( 'embed', $records[1] ); + $this->assertSame( 'app.bsky.embed.external', $records[1]['embed']['$type'] ); + } + + /** + * The terminal CTA record carries an `app.bsky.embed.external` link + * card pointing at the WP permalink, with the post title as `title` + * and the excerpt as `description`. Locks in the embed default so a + * future refactor that drops it surfaces immediately. + * + * @covers ::build_long_form_records + */ + public function test_build_long_form_records_teaser_thread_cta_record_carries_link_card_embed() { + $post = self::factory()->post->create_and_get( + array( + 'post_title' => 'Distinct Post Title', + 'post_content' => 'Body content with enough prose to compose a hook from.', + 'post_excerpt' => 'Distinct curated excerpt.', + ) + ); + + \add_filter( 'atmosphere_long_form_composition', fn() => 'teaser-thread' ); + + $records = ( new Post( $post ) )->build_long_form_records(); + $terminal = $records[ \count( $records ) - 1 ]; + + $this->assertArrayHasKey( 'embed', $terminal ); + $this->assertSame( 'app.bsky.embed.external', $terminal['embed']['$type'] ); + + $external = $terminal['embed']['external']; + $this->assertSame( \get_permalink( $post ), $external['uri'] ); + $this->assertSame( 'Distinct Post Title', $external['title'] ); + $this->assertSame( 'Distinct curated excerpt.', $external['description'] ); + } + + /** + * The hook (root) record has no `embed` field — the link card lives + * only on the terminal CTA reply, where it's a useful affordance. + * + * @covers ::build_long_form_records + */ + public function test_build_long_form_records_teaser_thread_root_record_has_no_embed() { + $post = self::factory()->post->create_and_get( + array( + 'post_title' => 'Titled', + 'post_content' => 'Body content with enough prose to compose a hook from.', + 'post_excerpt' => 'Curated excerpt.', + ) + ); + + \add_filter( 'atmosphere_long_form_composition', fn() => 'teaser-thread' ); + + $records = ( new Post( $post ) )->build_long_form_records(); + + $this->assertArrayNotHasKey( 'embed', $records[0] ); + } + + /** + * Filter override that returns 2 entries reduces the thread to 2 + * records; the terminal entry still gets the link-card embed because + * the embed attaches to "last entry," not "index 2." + * + * @covers ::build_long_form_records + */ + public function test_build_long_form_records_teaser_thread_filter_two_entries_terminal_has_embed() { + $post = self::factory()->post->create_and_get( + array( + 'post_title' => 'Titled', + 'post_content' => 'Body content with enough prose to compose a hook from.', + 'post_excerpt' => 'Curated excerpt.', + ) + ); + + \add_filter( 'atmosphere_long_form_composition', fn() => 'teaser-thread' ); + \add_filter( + 'atmosphere_teaser_thread_posts', + fn() => array( 'Custom hook', 'Custom CTA' ) + ); + + $records = ( new Post( $post ) )->build_long_form_records(); + + $this->assertCount( 2, $records ); + $this->assertArrayNotHasKey( 'embed', $records[0] ); + $this->assertArrayHasKey( 'embed', $records[1] ); + $this->assertSame( 'app.bsky.embed.external', $records[1]['embed']['$type'] ); } /** From 5f136bbb6f96c8447bac8addfc738685a89ee51c Mon Sep 17 00:00:00 2001 From: Brandon Kraft Date: Thu, 30 Apr 2026 16:43:34 -0500 Subject: [PATCH 2/4] Address PR review: Unicode whitespace + filter contract fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sweep of multi-reviewer findings on the teaser-thread iteration: * sanitize_text now strips NBSP (U+00A0) and ideographic space (U+3000) too — the prior /\s+/ regex (no /u) left them intact, so an excerpt of `\u{00A0}` × 10 would survive the 10-char floor in has_composable_body() and a filter returning whitespace-only entries would publish empty records. * Body-chunk extraction in build_teaser_thread() ltrims with /^\s+/u so a multibyte whitespace boundary in the body cannot leak invisible leading whitespace into the chunk record. * atmosphere_teaser_thread_posts now triggers _doing_it_wrong on a non-array filter return (parity with the existing < 2 entries case), so filter authors don't silently fall back to the default. Filter docblock now spells out the post-filter pipeline (sanitize_text + truncate + 5-cap) so authors don't think their strings ship verbatim. * _doing_it_wrong message no longer claims a 3-entry fallback when the default may be 2-entry; settings page help text mentions the body chunk is optional. New regression tests: * sanitize_text Unicode-whitespace normalization. * Hard-cap multibyte ellipsis path: chunk continues from the next codepoint, not corrupted by a byte-level rtrim. * Body chunk word-cut fallback when the chunk source has no sentence punctuation. * Filter cap of 5 entries (silent array_slice). * Non-array filter return triggers _doing_it_wrong. * Whitespace-only filter entries fall back after sanitisation. --- includes/functions.php | 5 +- includes/transformer/class-post.php | 27 ++- includes/wp-admin/class-admin.php | 2 +- tests/phpunit/tests/class-test-functions.php | 13 ++ .../tests/transformer/class-test-post.php | 177 ++++++++++++++++++ 5 files changed, 217 insertions(+), 7 deletions(-) diff --git a/includes/functions.php b/includes/functions.php index 60524e7..75309c9 100644 --- a/includes/functions.php +++ b/includes/functions.php @@ -54,7 +54,10 @@ function build_at_uri( string $did, string $collection, string $rkey ): string { function sanitize_text( string $text ): string { $text = \wp_strip_all_tags( $text ); $text = \html_entity_decode( $text, ENT_QUOTES, 'UTF-8' ); - $text = \preg_replace( '/\s+/', ' ', $text ); + // `/u` matches Unicode whitespace too — without it NBSP (U+00A0), + // ideographic space (U+3000), and similar survive both this collapse + // and the trim() below, masquerading as real prose downstream. + $text = \preg_replace( '/\s+/u', ' ', $text ); return \trim( $text ); } diff --git a/includes/transformer/class-post.php b/includes/transformer/class-post.php index 4f335d9..ac5eede 100644 --- a/includes/transformer/class-post.php +++ b/includes/transformer/class-post.php @@ -706,7 +706,11 @@ private function build_teaser_thread(): array { $chunk_source = \mb_substr( $plain, \mb_strlen( $consumed ) ); } - $chunk_source = \ltrim( $chunk_source ); + // Unicode-aware leading-whitespace strip: `\ltrim` only handles + // ASCII whitespace, so NBSP (U+00A0) and ideographic space + // (U+3000) at the start of `$chunk_source` would otherwise leak + // into the body chunk as leading invisible whitespace. + $chunk_source = \preg_replace( '/^\s+/u', '', $chunk_source ); $cta = $this->teaser_thread_cta_text(); $default = \mb_strlen( $chunk_source ) >= 10 @@ -716,6 +720,14 @@ private function build_teaser_thread(): array { /** * Filters the default teaser-thread post texts. * + * Filtered entries are not shipped verbatim: each string passes + * through `sanitize_text()` and is clamped to 300 chars by + * `truncate_to_budget()`, and the array is silently capped at 5 + * entries (PDS rate-limit blast-radius guard for mid-thread + * failures). Returning a non-array, an empty array, or fewer + * than 2 valid string entries triggers `_doing_it_wrong` and + * falls back to the default array. + * * @param string[] $posts Default array: 2 entries `[ hook, cta ]` * when the body is too short for a body * chunk, otherwise 3 entries @@ -724,10 +736,15 @@ private function build_teaser_thread(): array { */ $filtered = \apply_filters( 'atmosphere_teaser_thread_posts', $default, $this->object ); - // Defensive: a filter that returns a non-iterable or non-string - // entries would otherwise fatal on the caller's foreach. Fall - // back to the default on anything unexpected. + // Defensive: a non-iterable or empty filter return would fatal on + // the caller's foreach. Surface the misuse so the filter author + // notices, then fall back to the default array. if ( ! \is_array( $filtered ) || empty( $filtered ) ) { + \_doing_it_wrong( + 'atmosphere_teaser_thread_posts', + \esc_html__( 'The atmosphere_teaser_thread_posts filter must return a non-empty array of strings; falling back to the default teaser-thread shape.', 'atmosphere' ), + 'unreleased' + ); return $default; } @@ -747,7 +764,7 @@ private function build_teaser_thread(): array { if ( \count( $texts ) < 2 ) { \_doing_it_wrong( 'atmosphere_teaser_thread_posts', - \esc_html__( 'The atmosphere_teaser_thread_posts filter must return at least 2 string entries; falling back to the default hook + body chunk + CTA shape.', 'atmosphere' ), + \esc_html__( 'The atmosphere_teaser_thread_posts filter must return at least 2 string entries; falling back to the default teaser-thread shape.', 'atmosphere' ), 'unreleased' ); return $default; diff --git a/includes/wp-admin/class-admin.php b/includes/wp-admin/class-admin.php index 21225aa..1f920b5 100644 --- a/includes/wp-admin/class-admin.php +++ b/includes/wp-admin/class-admin.php @@ -350,7 +350,7 @@ private static function long_form_composition_choice( string $strategy ): array case 'teaser-thread': return array( 'label' => \__( 'Teaser thread', 'atmosphere' ), - 'help' => \__( 'A short Bluesky thread: a hook, a body chunk, and a "continue reading" reply with a link card back to the WordPress post.', 'atmosphere' ), + 'help' => \__( 'A short Bluesky thread: a hook, an optional body chunk for longer posts, and a "continue reading" reply with a link card back to the WordPress post.', 'atmosphere' ), ); case 'link-card': default: diff --git a/tests/phpunit/tests/class-test-functions.php b/tests/phpunit/tests/class-test-functions.php index 8aa4299..506d33d 100644 --- a/tests/phpunit/tests/class-test-functions.php +++ b/tests/phpunit/tests/class-test-functions.php @@ -57,6 +57,19 @@ public function test_sanitize_text() { $this->assertSame( 'a & b', sanitize_text( 'a & b' ) ); } + /** + * Unicode whitespace (NBSP, ideographic space) collapses and trims + * just like ASCII whitespace. Without the `/u` regex flag a NBSP-only + * string would survive both the collapse and the trim and leak + * downstream as fake "prose." + */ + public function test_sanitize_text_normalises_unicode_whitespace() { + $this->assertSame( 'A B', sanitize_text( "A\xC2\xA0\xC2\xA0B" ) ); + $this->assertSame( 'A B', sanitize_text( "A\xE3\x80\x80B" ) ); + $this->assertSame( '', sanitize_text( "\xC2\xA0\xC2\xA0" ) ); + $this->assertSame( '', sanitize_text( "\xE3\x80\x80\xE3\x80\x80" ) ); + } + /** * Test truncate_text respects limit. */ diff --git a/tests/phpunit/tests/transformer/class-test-post.php b/tests/phpunit/tests/transformer/class-test-post.php index e6b27c5..f2c7efb 100644 --- a/tests/phpunit/tests/transformer/class-test-post.php +++ b/tests/phpunit/tests/transformer/class-test-post.php @@ -986,6 +986,183 @@ public function test_build_long_form_records_teaser_thread_filter_two_entries_te $this->assertSame( 'app.bsky.embed.external', $records[1]['embed']['$type'] ); } + /** + * Hard-cap multibyte path: a body of unbroken multibyte runs (no + * spaces, no sentence punctuation) forces `truncate_to_budget` into + * the hard-cap branch where the hook ends in `…`. The body chunk + * must continue from the next plain-text codepoint, not corrupt the + * trailing multibyte char of the hook (which `rtrim($hook, '…')` + * would do — this test pins the `mb_substr` safety the PR added). + * + * @covers ::build_long_form_records + */ + public function test_build_long_form_records_teaser_thread_hook_hard_cap_multibyte_chunk_offset() { + $post = self::factory()->post->create_and_get( + array( + 'post_title' => 'Titled', + // 100 × `日本語` = 300 codepoints, no whitespace or sentence + // punctuation, forcing the hook into the hard-cap path. + 'post_content' => \str_repeat( '日本語', 100 ), + 'post_excerpt' => '', + ) + ); + + \add_filter( 'atmosphere_long_form_composition', fn() => 'teaser-thread' ); + + $records = ( new Post( $post ) )->build_long_form_records(); + + $this->assertCount( 3, $records ); + + $hook = $records[0]['text']; + $chunk = $records[1]['text']; + + $this->assertSame( '…', \mb_substr( $hook, -1 ), 'Hard-cap hook should end with the ellipsis marker.' ); + $this->assertSame( 280, \mb_strlen( $hook ) ); + + // First codepoint of the chunk should be the next codepoint of + // the original prose — no UTF-8 corruption from a byte-level + // rtrim, no overlap with the hook's last consumed codepoint. + $consumed = \mb_substr( $hook, 0, 279 ); + $this->assertSame( + \mb_substr( \str_repeat( '日本語', 100 ), \mb_strlen( $consumed ), 1 ), + \mb_substr( $chunk, 0, 1 ) + ); + } + + /** + * Body chunk falls back to a word boundary when its source has no + * sentence punctuation in the first 280 chars. Pins the chunk's + * truncation contract — the same sentence-preferred / + * word-fallback / hard-cap order as the hook. + * + * @covers ::build_long_form_records + */ + public function test_build_long_form_records_teaser_thread_body_chunk_word_cut_fallback() { + $post = self::factory()->post->create_and_get( + array( + 'post_title' => 'Titled', + // One sentence so the hook lands at the period, then a + // long stream of 8-char words separated by spaces but + // with no further punctuation — forces the chunk into + // the word-boundary fallback branch. + 'post_content' => 'First sentence. ' . \str_repeat( 'abcdefgh ijklmnop ', 36 ), + 'post_excerpt' => '', + ) + ); + + \add_filter( 'atmosphere_long_form_composition', fn() => 'teaser-thread' ); + + $records = ( new Post( $post ) )->build_long_form_records(); + + $this->assertCount( 3, $records ); + + $chunk = $records[1]['text']; + + $this->assertLessThanOrEqual( 280, \mb_strlen( $chunk ) ); + $this->assertDoesNotMatchRegularExpression( + '~\s\S{1,7}$~', + $chunk, + 'Word-cut chunk should end at a complete word, not mid-word.' + ); + // No sentence punctuation in the chunk source means the chunk + // itself should not contain `.`/`!`/`?` either. + $this->assertDoesNotMatchRegularExpression( '~[.!?]~', $chunk ); + } + + /** + * Filter return is silently capped at 5 entries to bound the + * compensating-delete blast radius on a mid-thread publish failure. + * + * @covers ::build_long_form_records + */ + public function test_build_long_form_records_teaser_thread_filter_caps_at_five_entries() { + $post = self::factory()->post->create_and_get( + array( + 'post_title' => 'Titled', + 'post_content' => 'Body content with enough prose to compose a hook from.', + ) + ); + + \add_filter( 'atmosphere_long_form_composition', fn() => 'teaser-thread' ); + \add_filter( + 'atmosphere_teaser_thread_posts', + fn() => array( 'One', 'Two', 'Three', 'Four', 'Five', 'Six', 'Seven' ) + ); + + $records = ( new Post( $post ) )->build_long_form_records(); + + $this->assertCount( 5, $records ); + $this->assertSame( 'Five', $records[4]['text'] ); + $this->assertArrayHasKey( 'embed', $records[4], 'Embed still attaches to the last entry after the cap.' ); + } + + /** + * Filter that returns a non-array value triggers `_doing_it_wrong` + * and falls back to the default — same treatment as the < 2 valid + * entries case, so filter authors get visibility into both misuse + * shapes. + * + * @covers ::build_long_form_records + */ + public function test_build_long_form_records_teaser_thread_filter_non_array_falls_back() { + $this->setExpectedIncorrectUsage( 'atmosphere_teaser_thread_posts' ); + + $post = self::factory()->post->create_and_get( + array( + 'post_title' => 'Titled', + 'post_content' => 'Body content with enough prose to compose a hook from.', + 'post_excerpt' => 'Curated excerpt.', + ) + ); + + \add_filter( 'atmosphere_long_form_composition', fn() => 'teaser-thread' ); + \add_filter( 'atmosphere_teaser_thread_posts', fn() => null ); + + $records = ( new Post( $post ) )->build_long_form_records(); + + $this->assertGreaterThanOrEqual( 2, \count( $records ) ); + $this->assertMatchesRegularExpression( + '~^Continue reading: ~', + $records[ \count( $records ) - 1 ]['text'] + ); + } + + /** + * Filter that returns only whitespace-equivalent entries (NBSP, + * ideographic space) is treated as < 2 valid entries after + * sanitisation. Locks in the Unicode-whitespace behavior of + * `sanitize_text` — without `/u` on its whitespace regex these + * would survive trim and ship as fake records. + * + * @covers ::build_long_form_records + */ + public function test_build_long_form_records_teaser_thread_filter_whitespace_only_entries_fall_back() { + $this->setExpectedIncorrectUsage( 'atmosphere_teaser_thread_posts' ); + + $post = self::factory()->post->create_and_get( + array( + 'post_title' => 'Titled', + 'post_content' => 'Body content with enough prose to compose a hook from.', + 'post_excerpt' => 'Curated excerpt.', + ) + ); + + \add_filter( 'atmosphere_long_form_composition', fn() => 'teaser-thread' ); + \add_filter( + 'atmosphere_teaser_thread_posts', + fn() => array( "\xC2\xA0\xC2\xA0", "\xE3\x80\x80\xE3\x80\x80" ) + ); + + $records = ( new Post( $post ) )->build_long_form_records(); + + // Default (excerpt + body) should resurface; the CTA stays terminal. + $this->assertGreaterThanOrEqual( 2, \count( $records ) ); + $this->assertMatchesRegularExpression( + '~^Continue reading: ~', + $records[ \count( $records ) - 1 ]['text'] + ); + } + /** * Every record in a thread carries the same `langs` array. * From 15307573e5ebcce4ac361a42f806037f0ca9e7ca Mon Sep 17 00:00:00 2001 From: Brandon Kraft Date: Thu, 30 Apr 2026 16:51:00 -0500 Subject: [PATCH 3/4] Guard against null preg_replace return on malformed UTF-8 PCRE in `/u` mode validates the input as UTF-8 and returns null on malformed sequences (e.g. orphaned continuation bytes). PHP 8.1+ trim(null) and mb_strlen(null) raise TypeError, so on invalid input sanitize_text() and build_teaser_thread()'s leading-whitespace strip would fatal instead of degrading gracefully. Both call sites now check is_string on the preg_replace result and fall back to the pre-replacement value, mirroring how truncate_to_budget already handles its own /u regex. New test pins the sanitize_text behavior on a malformed sequence. --- includes/functions.php | 5 ++++- includes/transformer/class-post.php | 7 +++++-- tests/phpunit/tests/class-test-functions.php | 12 ++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/includes/functions.php b/includes/functions.php index 75309c9..da4c630 100644 --- a/includes/functions.php +++ b/includes/functions.php @@ -57,7 +57,10 @@ function sanitize_text( string $text ): string { // `/u` matches Unicode whitespace too — without it NBSP (U+00A0), // ideographic space (U+3000), and similar survive both this collapse // and the trim() below, masquerading as real prose downstream. - $text = \preg_replace( '/\s+/u', ' ', $text ); + // PCRE in `/u` mode returns null on invalid UTF-8; fall back to the + // pre-replacement text so trim() doesn't TypeError on PHP 8.1+. + $collapsed = \preg_replace( '/\s+/u', ' ', $text ); + $text = \is_string( $collapsed ) ? $collapsed : $text; return \trim( $text ); } diff --git a/includes/transformer/class-post.php b/includes/transformer/class-post.php index ac5eede..92b1493 100644 --- a/includes/transformer/class-post.php +++ b/includes/transformer/class-post.php @@ -709,8 +709,11 @@ private function build_teaser_thread(): array { // Unicode-aware leading-whitespace strip: `\ltrim` only handles // ASCII whitespace, so NBSP (U+00A0) and ideographic space // (U+3000) at the start of `$chunk_source` would otherwise leak - // into the body chunk as leading invisible whitespace. - $chunk_source = \preg_replace( '/^\s+/u', '', $chunk_source ); + // into the body chunk as leading invisible whitespace. PCRE in + // `/u` mode returns null on invalid UTF-8; fall back to the + // pre-strip slice so the `mb_strlen` check below stays string-safe. + $stripped = \preg_replace( '/^\s+/u', '', $chunk_source ); + $chunk_source = \is_string( $stripped ) ? $stripped : $chunk_source; $cta = $this->teaser_thread_cta_text(); $default = \mb_strlen( $chunk_source ) >= 10 diff --git a/tests/phpunit/tests/class-test-functions.php b/tests/phpunit/tests/class-test-functions.php index 506d33d..938501d 100644 --- a/tests/phpunit/tests/class-test-functions.php +++ b/tests/phpunit/tests/class-test-functions.php @@ -70,6 +70,18 @@ public function test_sanitize_text_normalises_unicode_whitespace() { $this->assertSame( '', sanitize_text( "\xE3\x80\x80\xE3\x80\x80" ) ); } + /** + * `/u`-mode preg_replace returns null on malformed UTF-8; the + * function must not TypeError when that happens. Locks in the + * defensive `is_string` fallback. + */ + public function test_sanitize_text_handles_invalid_utf8_without_fataling() { + // 0xC3 0x28 is a malformed UTF-8 sequence (continuation byte missing). + $result = sanitize_text( "ok \xC3\x28 still here" ); + $this->assertIsString( $result ); + $this->assertNotSame( '', $result ); + } + /** * Test truncate_text respects limit. */ From 0430d00aa57982b0829c519f47203eb750637b30 Mon Sep 17 00:00:00 2001 From: Brandon Kraft Date: Thu, 30 Apr 2026 16:57:06 -0500 Subject: [PATCH 4/4] test: reword empty-body fixture comment to match the actual fallback path --- tests/phpunit/tests/class-test-publisher.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/phpunit/tests/class-test-publisher.php b/tests/phpunit/tests/class-test-publisher.php index 56acbbd..d259cb8 100644 --- a/tests/phpunit/tests/class-test-publisher.php +++ b/tests/phpunit/tests/class-test-publisher.php @@ -552,9 +552,9 @@ public function test_publish_thread_continues_when_doc_ref_update_fails() { array( 'post_title' => 'A Long-Form Post', 'post_excerpt' => 'A curated excerpt long enough to compose a hook from.', - // Empty body so the hook absorbs everything and the default - // shape stays at 2 entries — the protocol assertions below - // expect a single reply write. + // Empty body: hook comes from the excerpt and there is no + // body chunk, so the default shape is [hook, cta] and the + // protocol assertions below expect a single reply write. 'post_content' => '', ) );