From 15863a8e3670b3105bf5451dd119505b4c83e4cb Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Thu, 26 Jan 2023 15:48:28 -0700
Subject: [PATCH 01/36] Introduce HTML Tag Processor
This commit pulls in the HTML Tag Processor from the Gutenbeg repository.
The Tag Processor attempts to be an HTML5-spec-compliant parser that
provides the ability in PHP to find specific HTML tags and then add,
remove, or update attributes on that tag. It provides a safe and reliable
way to modify the attribute on HTML tags.
```php
// Add missing `rel` attribute to links.
$p = new WP_HTML_Tag_Processor( $block_content );
if ( $p->next_tag( 'A' ) && empty( $p->get_attribute( 'rel' ) ) ) {
$p->set_attribute( 'noopener nofollow' );
}
return $p->get_updated_html();
```
Introduced originally in WordPress/Gutenberg#42485 and developed within
the Gutenberg repository, this HTML parsing system was built in order
to address a persistent need (properly modifying HTML tag attributes)
and was motivated after a sequence of block editor defects which stemmed
from mismatches between actual HTML code and expectectations for HTML
input running through existing naive string-search-based solutions.
The Tag Processor is intended to operate fast enough to avoid being an
obstacle on page render while using as little memory overhead as possible.
It is practically a zero-memory-overhead system, and only allocates memory
as changes to the input HTML document are enqueued, releasing that memory
when flushing those changes to the document, moving on to find the next
tag, or flushing its entire output via `get_updated_html()`.
Rigor has been taken to ensure that the Tag Processor will not be consfused
by unexpected or non-normative HTML input, including issues arising from
quoting, from different syntax rules within ``, `',
+ );
+
+ $examples['Complex script with many parsing states'] = array(
+ '-->-->',
+ );
+ return $examples;
+ }
+
+ /**
+ * @ticket 56299
+ *
+ * @covers next_tag
+ *
+ * @dataProvider data_rcdata_state
+ */
+ public function test_next_tag_ignores_the_contents_of_a_rcdata_tag( $rcdata_then_div, $rcdata_tag ) {
+ $p = new WP_HTML_Tag_Processor( $rcdata_then_div );
+ $p->next_tag();
+ $this->assertSame( strtoupper( $rcdata_tag ), $p->get_tag(), "The first found tag was not '$rcdata_tag'" );
+ $p->next_tag();
+ $this->assertSame( 'DIV', $p->get_tag(), "The second found tag was not 'div'" );
+ }
+
+ /**
+ * Data provider for test_ignores_contents_of_a_rcdata_tag().
+ *
+ * @return array {
+ * @type array {
+ * @type string $rcdata_then_div The HTML snippet containing RCDATA and div tags.
+ * @type string $rcdata_tag The RCDATA tag.
+ * }
+ * }
+ */
+ public function data_rcdata_state() {
+ $examples = array();
+ $examples['Simple textarea'] = array(
+ '',
+ 'TEXTAREA',
+ );
+
+ $examples['Simple title'] = array(
+ 'Back to notifications',
+ 'TITLE',
+ );
+
+ $examples['Comment opener inside a textarea tag should be ignored'] = array(
+ '
test',
+ 'test',
+ );
+
+ $examples['Double-quotes escaped in double-quote attribute value'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['Unquoted attribute value'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['Unquoted attribute value with tag-like value'] = array(
+ ' >test',
+ ' >test',
+ );
+
+ $examples['Unquoted attribute value with tag-like value followed by tag-like data'] = array(
+ '>test',
+ '>test',
+ );
+
+ $examples['1'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['2'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['4'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['5'] = array(
+ 'code>test',
+ 'code>test',
+ );
+
+ $examples['6'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['7'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['8'] = array(
+ 'id="test">test',
+ 'id="test">test',
+ );
+
+ $examples['9'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['10'] = array(
+ '>test',
+ '>test',
+ );
+
+ $examples['11'] = array(
+ 'The applicative operator <* works well in Haskell; is what?test',
+ 'The applicative operator <* works well in Haskell; is what?test',
+ );
+
+ $examples['12'] = array(
+ '<3 is a heart but is a tag.test',
+ '<3 is a heart but is a tag.test',
+ );
+
+ $examples['13'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['14'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['15'] = array(
+ ' a HTML Tag]]>test',
+ ' a HTML Tag]]>test',
+ );
+
+ $examples['16'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['17'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['18'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['19'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['20'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['21'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['22'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['23'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['24'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['25'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['Multiple unclosed tags treated as a single tag'] = array(
+ <<
+ test
+HTML
+ ,
+ <<
+ test
+HTML
+ ,
+ );
+
+ $examples['27'] = array(
+ 'test',
+ 'test',
+ );
+
+ $examples['28'] = array(
+ 'test',
+ 'test',
+ );
+
+ return $examples;
+ }
+}
From 40e1cb3e794de8f000ff63c07887fe2b85071d91 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Mon, 30 Jan 2023 13:29:05 -0700
Subject: [PATCH 02/36] Move class_exists calls to wp-html
---
.../class-wp-html-attribute-token.php | 4 ----
src/wp-includes/class-wp-html-span.php | 4 ----
.../class-wp-html-tag-processor.php | 5 ----
.../class-wp-html-text-replacement.php | 4 ----
src/wp-includes/wp-html.php | 24 ++++++++++++-------
5 files changed, 16 insertions(+), 25 deletions(-)
diff --git a/src/wp-includes/class-wp-html-attribute-token.php b/src/wp-includes/class-wp-html-attribute-token.php
index 21147e30bfe1f..7b3d571872358 100644
--- a/src/wp-includes/class-wp-html-attribute-token.php
+++ b/src/wp-includes/class-wp-html-attribute-token.php
@@ -7,8 +7,6 @@
* @since 6.2.0
*/
-if ( ! class_exists( 'WP_HTML_Attribute_Token' ) ) :
-
/**
* Data structure for the attribute token that allows to drastically improve performance.
*
@@ -89,5 +87,3 @@ public function __construct( $name, $value_start, $value_length, $start, $end, $
$this->is_true = $is_true;
}
}
-
-endif;
diff --git a/src/wp-includes/class-wp-html-span.php b/src/wp-includes/class-wp-html-span.php
index 376e391dc1c44..39e603662b17b 100644
--- a/src/wp-includes/class-wp-html-span.php
+++ b/src/wp-includes/class-wp-html-span.php
@@ -7,8 +7,6 @@
* @since 6.2.0
*/
-if ( ! class_exists( 'WP_HTML_Span' ) ) :
-
/**
* Represents a textual span inside an HTML document.
*
@@ -52,5 +50,3 @@ public function __construct( $start, $end ) {
$this->end = $end;
}
}
-
-endif;
diff --git a/src/wp-includes/class-wp-html-tag-processor.php b/src/wp-includes/class-wp-html-tag-processor.php
index 24e67a3adc83f..0c35e939cccae 100644
--- a/src/wp-includes/class-wp-html-tag-processor.php
+++ b/src/wp-includes/class-wp-html-tag-processor.php
@@ -26,8 +26,6 @@
* @since 6.2.0
*/
-if ( ! class_exists( 'WP_HTML_Tag_Processor' ) ) :
-
/**
* Processes an input HTML document by applying a specified set
* of patches to that input. Tokenizes HTML but does not fully
@@ -2042,6 +2040,3 @@ private function matches() {
return true;
}
}
-
-endif;
-
diff --git a/src/wp-includes/class-wp-html-text-replacement.php b/src/wp-includes/class-wp-html-text-replacement.php
index 4461df473aadd..e3ada169d76ef 100644
--- a/src/wp-includes/class-wp-html-text-replacement.php
+++ b/src/wp-includes/class-wp-html-text-replacement.php
@@ -7,8 +7,6 @@
* @since 6.2.0
*/
-if ( ! class_exists( 'WP_HTML_Text_Replacement' ) ) :
-
/**
* Data structure used to replace existing content from start to end that allows to drastically improve performance.
*
@@ -59,5 +57,3 @@ public function __construct( $start, $end, $text ) {
$this->text = $text;
}
}
-
-endif;
diff --git a/src/wp-includes/wp-html.php b/src/wp-includes/wp-html.php
index 1806643104794..a0d238cef7823 100644
--- a/src/wp-includes/wp-html.php
+++ b/src/wp-includes/wp-html.php
@@ -15,14 +15,20 @@
* terms of speed as well as memory use.
*/
-/** WP_HTML_Attribute_Token class */
-require_once ABSPATH . WPINC . '/class-wp-html-attribute-token.php';
+if ( ! class_exists( 'WP_HTML_Attribute_Token' ) ) {
+ /** WP_HTML_Attribute_Token class */
+ require_once ABSPATH . WPINC . '/class-wp-html-attribute-token.php';
+}
-/** WP_HTML_Span class */
-require_once ABSPATH . WPINC . '/class-wp-html-span.php';
+if ( ! class_exists( 'WP_HTML_Span' ) ) {
+ /** WP_HTML_Span class */
+ require_once ABSPATH . WPINC . '/class-wp-html-span.php';
+}
-/** WP_HTML_Text_Replacement class */
-require_once ABSPATH . WPINC . '/class-wp-html-text-replacement.php';
+if ( ! class_exists( 'WP_HTML_Text_Replacement' ) ) {
+ /** WP_HTML_Text_Replacement class */
+ require_once ABSPATH . WPINC . '/class-wp-html-text-replacement.php';
+}
/*
* The WP_HTML_Tag_Processor is intended for linearly scanning through
@@ -30,5 +36,7 @@
* and adding, removing, or modifying attributes on those tags.
*/
-/** WP_HTML_Tag_Processor class */
-require_once ABSPATH . WPINC . '/class-wp-html-tag-processor.php';
+if ( ! class_exists( 'WP_HTML_Tag_Processor' ) ) {
+ /** WP_HTML_Tag_Processor class */
+ require_once ABSPATH . WPINC . '/class-wp-html-tag-processor.php';
+}
From 8b507e5417a2a64c6ac155444e44de2566d5d643 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Mon, 30 Jan 2023 13:32:46 -0700
Subject: [PATCH 03/36] Mark helper classes `final`
---
src/wp-includes/class-wp-html-attribute-token.php | 2 +-
src/wp-includes/class-wp-html-span.php | 2 +-
src/wp-includes/class-wp-html-text-replacement.php | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/wp-includes/class-wp-html-attribute-token.php b/src/wp-includes/class-wp-html-attribute-token.php
index 7b3d571872358..05224f8549ef1 100644
--- a/src/wp-includes/class-wp-html-attribute-token.php
+++ b/src/wp-includes/class-wp-html-attribute-token.php
@@ -17,7 +17,7 @@
*
* @see WP_HTML_Tag_Processor
*/
-class WP_HTML_Attribute_Token {
+final class WP_HTML_Attribute_Token {
/**
* Attribute name.
*
diff --git a/src/wp-includes/class-wp-html-span.php b/src/wp-includes/class-wp-html-span.php
index 39e603662b17b..2f902f3831f03 100644
--- a/src/wp-includes/class-wp-html-span.php
+++ b/src/wp-includes/class-wp-html-span.php
@@ -20,7 +20,7 @@
*
* @see WP_HTML_Tag_Processor
*/
-class WP_HTML_Span {
+final class WP_HTML_Span {
/**
* Byte offset into document where span begins.
*
diff --git a/src/wp-includes/class-wp-html-text-replacement.php b/src/wp-includes/class-wp-html-text-replacement.php
index e3ada169d76ef..a8341ad33acfe 100644
--- a/src/wp-includes/class-wp-html-text-replacement.php
+++ b/src/wp-includes/class-wp-html-text-replacement.php
@@ -17,7 +17,7 @@
*
* @see WP_HTML_Tag_Processor
*/
-class WP_HTML_Text_Replacement {
+final class WP_HTML_Text_Replacement {
/**
* Byte offset into document where replacement span begins.
*
From 561acff5157b9db342244f066d7f5017a7e167a2 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Mon, 30 Jan 2023 14:08:30 -0700
Subject: [PATCH 04/36] Updates from review feedback, mostly docs
---
.../class-wp-html-tag-processor.php | 45 ++-
.../html/wpHtmlTagProcessorBookmarks.php | 38 +-
.../tests/html/wpHtmlTagProcessorTest.php | 376 ++++++++++--------
3 files changed, 255 insertions(+), 204 deletions(-)
diff --git a/src/wp-includes/class-wp-html-tag-processor.php b/src/wp-includes/class-wp-html-tag-processor.php
index 0c35e939cccae..888271df150c6 100644
--- a/src/wp-includes/class-wp-html-tag-processor.php
+++ b/src/wp-includes/class-wp-html-tag-processor.php
@@ -239,7 +239,7 @@ class WP_HTML_Tag_Processor {
* Whether to visit tag closers, e.g. , when walking an input document.
*
* @since 6.2.0
- * @var boolean
+ * @var bool
*/
private $stop_on_tag_closers;
@@ -279,7 +279,7 @@ class WP_HTML_Tag_Processor {
* ```
*
* @since 6.2.0
- * @var ?int
+ * @var int|null
*/
private $tag_name_starts_at;
@@ -294,7 +294,7 @@ class WP_HTML_Tag_Processor {
* ```
*
* @since 6.2.0
- * @var ?int
+ * @var int|null
*/
private $tag_name_length;
@@ -310,14 +310,14 @@ class WP_HTML_Tag_Processor {
* ```
*
* @since 6.2.0
- * @var ?int
+ * @var int|null
*/
private $tag_ends_at;
/**
* Whether the current tag is an opening tag, e.g.
, or a closing tag, e.g.
.
*
- * @var boolean
+ * @var bool
*/
private $is_closing_tag;
@@ -445,8 +445,8 @@ public function __construct( $html ) {
*
* @since 6.2.0
*
- * @param array|string $query {
- * Which tag name to find, having which class, etc.
+ * @param array|string|null $query {
+ * Optional. Which tag name to find, having which class, etc. Default is to find any tag.
*
* @type string|null $tag_name Which tag to find, or `null` for "any tag."
* @type int|null $match_offset Find the Nth tag matching all search criteria.
@@ -570,21 +570,23 @@ public function next_tag( $query = null ) {
* }
* ```
*
- * Because bookmarks maintain their position they don't
- * expose any internal offsets for the HTML document
+ * Because bookmarks maintain their position, they don't
+ * expose any internal offsets for the HTML document,
* and can't be used with normal string functions.
*
* Because bookmarks allocate memory and require processing
- * for every applied update they are limited and require
+ * for every applied update, they are limited and require
* a name. They should not be created inside a loop.
*
- * Bookmarks are a powerful tool to enable complicated behavior;
- * consider double-checking that you need this tool if you are
+ * Bookmarks are a powerful tool to enable complicated behavior.
+ * Consider double-checking that you need this tool if you are
* reaching for it, as inappropriate use could lead to broken
* HTML structure or unwanted processing overhead.
*
+ * @since 6.2.0
+ *
* @param string $name Identifies this particular bookmark.
- * @return false|void
+ * @return bool|void
* @throws Exception Throws on invalid bookmark name if WP_DEBUG set.
*/
public function set_bookmark( $name ) {
@@ -742,7 +744,7 @@ private function skip_script_data() {
* escaped mode if we aren't already there.
*
* Inside the escaped modes it's ignored and
- * shouldn't ever pull us out of double-escaped
+ * should never pull us out of double-escaped
* and back into escaped.
*
* We'll continue parsing past it regardless of
@@ -878,9 +880,11 @@ private function parse_next_tag() {
return true;
}
- // If we didn't find a tag opener, and we can't be
- // transitioning into different markup states, then
- // we can abort because there aren't any more tags.
+ /*
+ * If we didn't find a tag opener, and we can't be
+ * transitioning into different markup states, then
+ * we can abort because there aren't any more tags.
+ */
if ( $at + 1 >= strlen( $html ) ) {
return false;
}
@@ -1111,11 +1115,12 @@ private function after_tag() {
* Converts class name updates into tag attributes updates
* (they are accumulated in different data formats for performance).
*
- * @return void
* @since 6.2.0
*
* @see $classname_updates
* @see $lexical_updates
+ *
+ * @return void
*/
private function class_name_updates_to_attributes_updates() {
if ( count( $this->classname_updates ) === 0 ) {
@@ -1155,7 +1160,7 @@ private function class_name_updates_to_attributes_updates() {
* Tracks the cursor position in the existing class
* attribute value where we're currently parsing.
*
- * @var integer
+ * @var int
*/
$at = 0;
@@ -1173,7 +1178,7 @@ private function class_name_updates_to_attributes_updates() {
*
* This flag is set upon the first change that requires a string update.
*
- * @var boolean
+ * @var bool
*/
$modified = false;
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php b/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php
index c92d0023d16c2..25a335453ccbc 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php
@@ -18,7 +18,7 @@ class WP_HTML_Tag_Processor_Bookmark_Test extends WP_UnitTestCase {
/**
* @ticket 56299
*
- * @covers set_bookmark
+ * @covers ::set_bookmark
*/
public function test_set_bookmark() {
$p = new WP_HTML_Tag_Processor( '
One
Two
Three
' );
@@ -32,7 +32,7 @@ public function test_set_bookmark() {
/**
* @ticket 56299
*
- * @covers release_bookmark
+ * @covers ::release_bookmark
*/
public function test_release_bookmark() {
$p = new WP_HTML_Tag_Processor( '
One
Two
Three
' );
@@ -45,8 +45,8 @@ public function test_release_bookmark() {
/**
* @ticket 56299
*
- * @covers seek
- * @covers set_bookmark
+ * @covers ::seek
+ * @covers ::set_bookmark
*/
public function test_seek() {
$p = new WP_HTML_Tag_Processor( '
';
From bc170866527e6e7afb174f4ac8512e0150b54064 Mon Sep 17 00:00:00 2001
From: hellofromtonya
Date: Mon, 30 Jan 2023 16:35:44 -0600
Subject: [PATCH 09/36] Renames test filenames to coding standard
---
...lTagProcessorBookmarks.php => wpHtmlTagProcessor-bookmark.php} | 0
.../html/{wpHtmlTagProcessorTest.php => wpHtmlTagProcessor.php} | 0
2 files changed, 0 insertions(+), 0 deletions(-)
rename tests/phpunit/tests/html/{wpHtmlTagProcessorBookmarks.php => wpHtmlTagProcessor-bookmark.php} (100%)
rename tests/phpunit/tests/html/{wpHtmlTagProcessorTest.php => wpHtmlTagProcessor.php} (100%)
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
similarity index 100%
rename from tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php
rename to tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
similarity index 100%
rename from tests/phpunit/tests/html/wpHtmlTagProcessorTest.php
rename to tests/phpunit/tests/html/wpHtmlTagProcessor.php
From 334e4155a3c210dd4a13bf8cf9817fec67cc4b7f Mon Sep 17 00:00:00 2001
From: hellofromtonya
Date: Mon, 30 Jan 2023 17:08:10 -0600
Subject: [PATCH 10/36] Cleans HEADS from merge conflict from test file
---
.../phpunit/tests/html/wpHtmlTagProcessor.php | 53 -------------------
1 file changed, 53 deletions(-)
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
index 265ac927b1ced..730a4c017928b 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
@@ -1528,13 +1528,8 @@ public function test_set_attribute_with_value_equals_to_true_adds_a_boolean_html
/**
* @ticket 56299
*
-<<<<<<< HEAD
- * @covers ::set_attribute
- * @covers ::get_updated_html
-=======
* @covers WP_HTML_Tag_Processor::set_attribute
* @covers WP_HTML_Tag_Processor::get_updated_html
->>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements)
*/
public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_markup() {
$p = new WP_HTML_Tag_Processor(
@@ -1551,13 +1546,8 @@ public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_ma
/**
* @ticket 56299
*
-<<<<<<< HEAD
- * @covers ::set_attribute
- * @covers ::get_updated_html
-=======
* @covers WP_HTML_Tag_Processor::set_attribute
* @covers WP_HTML_Tag_Processor::get_updated_html
->>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements)
*/
public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() {
$html_input = '';
@@ -1570,13 +1560,8 @@ public function test_setting_a_missing_attribute_to_false_does_not_change_the_ma
/**
* @ticket 56299
*
-<<<<<<< HEAD
- * @covers ::set_attribute
- * @covers ::get_updated_html
-=======
* @covers WP_HTML_Tag_Processor::set_attribute
* @covers WP_HTML_Tag_Processor::get_updated_html
->>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements)
*/
public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit_value_to_the_markup() {
$p = new WP_HTML_Tag_Processor(
@@ -1593,13 +1578,8 @@ public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit
/**
* @ticket 56299
*
-<<<<<<< HEAD
- * @covers ::get_tag
- * @covers ::next_tag
-=======
* @covers WP_HTML_Tag_Processor::get_tag
* @covers WP_HTML_Tag_Processor::next_tag
->>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements)
*/
public function test_unclosed_script_tag_should_not_cause_an_infinite_loop() {
$p = new WP_HTML_Tag_Processor( ' tags, so if we're not seeing the
- * start of one of these tokens we can proceed to the next
- * potential match in the text.
+ * At this point the only remaining state-changes occur with the
+ * tags; unless one of these appears next,
+ * proceed scanning to the next potential token in the text.
*/
if ( ! (
$at + 6 < $doc_length &&
@@ -855,8 +859,10 @@ private function skip_script_data() {
}
/*
- * We also have to make sure we terminate the script tag opener/closer
- * to avoid making partial matches on strings like `= $doc_length ) {
continue;
@@ -929,11 +935,13 @@ private function parse_next_tag() {
/*
* HTML tag names must start with [a-zA-Z] otherwise they are not tags.
- * For example, "<3" is rendered as text, not a tag opener. This means
- * if we have at least one letter following the "<" then we _do_ have
- * a tag opener and can process it as such. This is more common than
- * HTML comments, DOCTYPE tags, and other structure starting with "<"
- * so it's good to check first for the presence of the tag.
+ * For example, "<3" is rendered as text, not a tag opener. If at least
+ * one letter follows the "<" then _it is_ a tag, but if the following
+ * character is anything else it _is not a tag_.
+ *
+ * It's not uncommon to find non-tags starting with `<` in an HTML
+ * document, so it's good for performance to make this pre-check before
+ * continuing to attempt to parse a tag name.
*
* Reference:
* * https://html.spec.whatwg.org/multipage/parsing.html#data-state
@@ -949,9 +957,8 @@ private function parse_next_tag() {
}
/*
- * If we didn't find a tag opener, and we can't be
- * transitioning into different markup states, then
- * we can abort because there aren't any more tags.
+ * Abort if no tag is found before the end of
+ * the document. There is nothing left to parse.
*/
if ( $at + 1 >= strlen( $html ) ) {
return false;
@@ -963,7 +970,7 @@ private function parse_next_tag() {
*/
if ( '!' === $html[ $at + 1 ] ) {
/*
- *
+ *
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
if (
@@ -981,7 +988,7 @@ private function parse_next_tag() {
}
/*
- *
+ *
* The CDATA is case-sensitive.
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
@@ -1005,7 +1012,7 @@ private function parse_next_tag() {
}
/*
- *
+ *
* These are ASCII-case-insensitive.
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
@@ -1030,14 +1037,14 @@ private function parse_next_tag() {
/*
* Anything else here is an incorrectly-opened comment and transitions
- * to the bogus comment state - we can skip to the nearest >.
+ * to the bogus comment state - skip to the nearest >.
*/
$at = strpos( $html, '>', $at + 1 );
continue;
}
/*
- * transitions to a bogus comment state – we can skip to the nearest >
+ * transitions to a bogus comment state – skip to the nearest >
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
if ( '?' === $html[ $at + 1 ] ) {
@@ -1071,9 +1078,10 @@ private function parse_next_attribute() {
}
/*
- * Treat the equal sign ("=") as a part of the attribute name if it is the
- * first encountered byte:
- * https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
+ * Treat the equal sign as a part of the attribute
+ * name if it is the first encountered byte.
+ *
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
*/
$name_length = '=' === $this->html[ $this->parsed_bytes ]
? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->parsed_bytes + 1 )
@@ -1176,6 +1184,7 @@ private function skip_whitespace() {
* @since 6.2.0
*
* @return void
+ * @throws Exception
*/
private function after_tag() {
$this->class_name_updates_to_attributes_updates();
@@ -1224,9 +1233,9 @@ private function class_name_updates_to_attributes_updates() {
/**
* Updated "class" attribute value.
*
- * This is incrementally built as we scan through the existing class
- * attribute, omitting removed classes as we do so, and then appending
- * added classes at the end. Only when we're done processing will the
+ * This is incrementally built while scanning through the existing class
+ * attribute, skipping removed classes on the way, and then appending
+ * added classes at the end. Only when finished processing will the
* value contain the final new value.
* @var string $class
@@ -1234,24 +1243,25 @@ private function class_name_updates_to_attributes_updates() {
$class = '';
/**
- * Tracks the cursor position in the existing class
- * attribute value where we're currently parsing.
+ * Tracks the cursor position in the existing
+ * class attribute value while parsing.
*
* @var int $at
*/
$at = 0;
/**
- * Indicates if we have made any actual modifications to the existing
- * class attribute value, used to short-circuit string copying.
+ * Indicates if there's any need to modify the existing class attribute.
*
- * It's possible that we are intending to remove certain classes and add
- * others in such a way that we don't modify the existing value because
- * calls to `add_class()` and `remove_class()` occur independent of the
- * input values sent to the WP_HTML_Tag_Processor. That is, we might call
- * `remove_class()` for a class that isn't already present and we might
- * call `add_class()` for one that is, in which case we wouldn't need
- * to break apart the string and rebuild it.
+ * If a call to `add_class()` and `remove_class()` wouldn't impact
+ * the `class` attribute value then there's no need to rebuild it.
+ * For example, when adding a class that's already present or
+ * removing one that isn't.
+ *
+ * This flag enables a performance optimization when none of the enqueued
+ * class updates would impact the `class` attribute; namely, that the
+ * processor can continue without modifying the input document, as if
+ * none of the `add_class()` or `remove_class()` calls had been made.
*
* This flag is set upon the first change that requires a string update.
*
@@ -1270,7 +1280,7 @@ private function class_name_updates_to_attributes_updates() {
// Capture the class name – it's everything until the next whitespace.
$name_length = strcspn( $existing_class, " \t\f\r\n", $at );
if ( 0 === $name_length ) {
- // We're done, no more class names.
+ // If no more class names are found then that's the end.
break;
}
@@ -1283,7 +1293,7 @@ private function class_name_updates_to_attributes_updates() {
self::REMOVE_CLASS === $this->classname_updates[ $name ]
);
- // Once we've seen a class, we should never add it again.
+ // If a class has already been seen then skip it; it should not be added twice.
if ( ! $remove_class ) {
$this->classname_updates[ $name ] = self::SKIP_CLASS;
}
@@ -1296,16 +1306,20 @@ private function class_name_updates_to_attributes_updates() {
/*
* Otherwise, append it to the new "class" attribute value.
*
- * By preserving the existing whitespace instead of only adding a single
- * space (which is a valid transformation we can make) we'll introduce
- * fewer changes to the HTML content and hopefully make comparing
- * before/after easier for people trying to debug the modified output.
+ * There are options for handling whitespace between tags.
+ * Preserving the existing whitespace produces fewer changes
+ * to the HTML content and should clarify the before/after
+ * content when debugging the modified output.
+ *
+ * This approach contrasts normalizing the inter-class
+ * whitespace to a single space, which might appear cleaner
+ * in the output HTML but produce a noisier change.
*/
$class .= substr( $existing_class, $ws_at, $ws_length );
$class .= $name;
}
- // Add new classes by appending the ones we haven't already seen.
+ // Add new classes by appending those which haven't already been seen.
foreach ( $this->classname_updates as $name => $operation ) {
if ( self::ADD_CLASS === $operation ) {
$modified = true;
@@ -1340,14 +1354,14 @@ private function apply_attributes_updates() {
}
/*
- * Attribute updates can be enqueued in any order but as we
- * progress through the document to replace them we have to
- * make our replacements in the order in which they are found
- * in that document.
+ * Attribute updates can be enqueued in any order but updates
+ * to the document must occur in lexical order; that is, each
+ * replacement must be made before all others which follow it
+ * at later string indices in the input document.
*
- * Sorting the updates ensures we don't make our replacements
- * out of order, which could otherwise lead to mangled output,
- * partially-duplicate attributes, and overwritten attributes.
+ * Sorting avoid making out-of-order replacements which
+ * can lead to mangled output, partially-duplicated
+ * attributes, and overwritten attributes.
*/
usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) );
@@ -1357,15 +1371,16 @@ private function apply_attributes_updates() {
$this->updated_bytes = $diff->end;
}
+ /*
+ * Adjust bookmark locations to account for how the text
+ * replacements adjust offsets in the input document.
+ */
foreach ( $this->bookmarks as $bookmark ) {
/*
- * As we loop through $this->lexical_updates, we not only need to track
- * each bookmark's start and end offsets to $diff->start, but also
- * against all the accumulated changes which are being applied before the
- * bookmark. Each of these changes could impact that starting offset.
- *
- * To account for this we run a first pass through all changes for each
- * bookmark and accumulate that total delta before applying it at the end.
+ * Each lexical update which appears before the bookmark's endpoints
+ * might shift the offsets for those endpoints. Loop through each change
+ * and accumulate the total shift for each bookmark, then apply that
+ * shift after tallying the full delta.
*/
$head_delta = 0;
$tail_delta = 0;
@@ -1454,11 +1469,8 @@ private static function sort_start_ascending( $a, $b ) {
}
/*
- * We shouldn't ever get here because it would imply
- * that we have two identical updates, or that we're
- * trying to replace the same input text twice. Still
- * we'll handle this sort to preserve determinism,
- * which might come in handy when debugging.
+ * This code should be unreachable, because it implies the two replacements
+ * start at the same location and contain the same text.
*/
return $a->end - $b->end;
}
@@ -1501,7 +1513,7 @@ private function get_enqueued_attribute_value( $comparable_name ) {
* 'update' === $p->get_enqueued_attribute_value( 'data-test-id' );
* ```
*
- * Here we detect this based on the absence of the `=`, which _must_ exist in any
+ * Detect this difference based on the absence of the `=`, which _must_ exist in any
* attribute containing a value, e.g. ``.
* ¹ ²
* 1. Attribute with a string value.
@@ -1555,19 +1567,21 @@ public function get_attribute( $name ) {
$comparable = strtolower( $name );
/*
- * For every attribute other than `class` we can perform a quick check if there's an
- * enqueued lexical update whose value we should prefer over what's in the input HTML.
+ * For every attribute other than `class` it's possible to perform a quick check if
+ * there's an enqueued lexical update whose value takes priority over what's found in
+ * the input document.
*
- * The `class` attribute is special though because we expose the helpers `add_class`
- * and `remove_class` which form a builder for the `class` attribute, so we have to
- * additionally check if there are any enqueued class changes. If there are, we need
- * to first flush them out so can report the full string value of the attribute.
+ * The `class` attribute is special though because of the exposed helpers `add_class`
+ * and `remove_class`. These form a builder for the `class` attribute, so an additional
+ * check for enqueued class changes is required in addition to the check for any enqueued
+ * attribute values. If any exist, those enqueued class changes must first be flushed out
+ * into an attribute value update.
*/
if ( 'class' === $name ) {
$this->class_name_updates_to_attributes_updates();
}
- // If we have an update for this attribute, return the updated value.
+ // Return any enqueued attribute value updates if they exist.
$enqueued_value = $this->get_enqueued_attribute_value( $comparable );
if ( false !== $enqueued_value ) {
return $enqueued_value;
@@ -1712,26 +1726,23 @@ public function set_attribute( $name, $value ) {
}
/*
- * Verify that the attribute name is allowable. In WP_DEBUG
- * environments we want to crash quickly to alert developers
- * of typos and issues; but in production we don't want to
- * interrupt a normal page view, so we'll silently avoid
- * updating the attribute in those cases.
+ * WordPress rejects more characters than are strictly forbidden
+ * in HTML5. This is to prevent additional security risks deeper
+ * in the WordPress and plugin stack. Specifically the
+ * less-than (<) greater-than (>) and ampersand (&) aren't allowed.
*
- * Of note, we're disallowing more characters than are strictly
- * forbidden in HTML5. This is to prevent additional security
- * risks deeper in the WordPress and plugin stack. Specifically
- * we reject the less-than (<) greater-than (>) and ampersand (&).
- *
- * The use of a PCRE match allows us to look for specific Unicode
+ * The use of a PCRE match enables looking for specific Unicode
* code points without writing a UTF-8 decoder. Whereas scanning
* for one-byte characters is trivial (with `strcspn`), scanning
* for the longer byte sequences would be more complicated. Given
- * that this shouldn't be in the hot path for execution, we can
- * compromise the efficiency at this point without a noticeable
- * impact on the overall system.
+ * that this shouldn't be in the hot path for execution, it's a
+ * reasonable compromise in efficiency without introducing a
+ * noticeable impact on the overall system.
*
* @see https://html.spec.whatwg.org/#attributes-2
+ *
+ * @TODO as the only regex pattern maybe we should take it out? are
+ * Unicode patterns available broadly in Core?
*/
if ( preg_match(
'~[' .
@@ -1861,7 +1872,14 @@ public function remove_attribute( $name ) {
$this->classname_updates = array();
}
- // If we updated an attribute we didn't originally have, remove the enqueued update and move on.
+ /*
+ * If updating an attribute that didn't exist in the input
+ * document, then remove the enqueued update and move on.
+ *
+ * For example, this might occur when calling `remove_attribute()`
+ * after calling `set_attribute()` for the same attribute
+ * and when that attribute wasn't originally present.
+ */
if ( ! isset( $this->attributes[ $name ] ) ) {
if ( isset( $this->lexical_updates[ $name ] ) ) {
unset( $this->lexical_updates[ $name ] );
@@ -1936,6 +1954,7 @@ public function remove_class( $class_name ) {
* @see get_updated_html
*
* @return string The processed HTML.
+ * @throws Exception
*/
public function __toString() {
return $this->get_updated_html();
@@ -1975,7 +1994,12 @@ public function get_updated_html() {
// 3. Point this tag processor at the original tag opener and consume it
/*
- * When we get here we're at the end of the tag name, and we want to rewind to before it
+ * At this point the internal cursor points to the end of the tag name.
+ * Rewind before the tag name starts so that it's as if the cursor didn't
+ * move; a call to `next_tag()` will reparse the recently-updated attributes
+ * and additional calls to modify the attributes will apply at this same
+ * lcoation.
+ *
*
Previous HTMLMore HTML
* ^ | back up by the length of the tag name plus the opening <
* \<-/ back up by strlen("em") + 1 ==> 3
@@ -2021,7 +2045,7 @@ private function parse_query( $query ) {
return;
}
- // If not using the string interface we have to pass an associative array.
+ // If not using the string interface, an associative array is required.
if ( ! is_array( $query ) ) {
return;
}
@@ -2056,7 +2080,7 @@ private function matches() {
return false;
}
- // Do we match a case-insensitive HTML tag name?
+ // Does the tag name match the requested tag name in a case-insensitive manner?
if ( null !== $this->sought_tag_name ) {
/*
* String (byte) length lookup is fast. If they aren't the
@@ -2067,12 +2091,16 @@ private function matches() {
}
/*
- * Otherwise we have to check for each character if they
- * are the same, and only `strtoupper()` if we have to.
- * Presuming that most people will supply lowercase tag
- * names and most HTML will contain lowercase tag names,
- * most of the time this runs we shouldn't expect to
- * actually run the case-folding comparison.
+ * Check each character to determine if they are the same.
+ * Defer calls to `strtoupper()` to avoid them when possible.
+ * Calling `strcasecmp()` here tested slowed than comparing each
+ * character, so unless benchmarks show otherwise, it should
+ * not be used.
+ *
+ * It's expected that most of the time that this runs, a
+ * lower-case tag name will be supplied and the input will
+ * contain lower-case tag names, thus normally bypassing
+ * the case comparison code.
*/
for ( $i = 0; $i < $this->tag_name_length; $i++ ) {
$html_char = $this->html[ $this->tag_name_starts_at + $i ];
@@ -2090,19 +2118,22 @@ private function matches() {
return false;
}
- // Do we match a byte-for-byte (case-sensitive and encoding-form-sensitive) class name?
+ /*
+ * Match byte-for-byte (case-sensitive and encoding-form-sensitive) on the class name.
+ *
+ * This will overlook certain classes that exist in other lexical variations
+ * than was supplied to the search query, but requires more complicated searching.
+ */
if ( $needs_class_name ) {
$class_start = $this->attributes['class']->value_starts_at;
$class_end = $class_start + $this->attributes['class']->value_length;
$class_at = $class_start;
/*
- * We're going to have to jump through potential matches here because
- * it's possible that we have classes containing the class name we're
- * looking for. For instance, if we are looking for "even" we don't
- * want to be confused when we come to the class "not-even." This is
- * secured by ensuring that we find our sought-after class and that
- * it's surrounded on both sides by proper boundaries.
+ * Ensure that boundaries surround the class name to avoid matching on
+ * substrings of a longer name. For example, the sequence "not-odd"
+ * should not match for the class "odd" even though "odd" is found
+ * within the class attribute text.
*
* See https://html.spec.whatwg.org/#attributes-3
* See https://html.spec.whatwg.org/#space-separated-tokens
@@ -2113,9 +2144,7 @@ private function matches() {
$class_at < $class_end
) {
/*
- * Verify this class starts at a boundary. If it were at 0 we'd be at
- * the start of the string and that would be fine, otherwise we have
- * to start at a place where the preceding character is whitespace.
+ * Verify this class starts at a boundary.
*/
if ( $class_at > $class_start ) {
$character = $this->html[ $class_at - 1 ];
@@ -2127,9 +2156,7 @@ private function matches() {
}
/*
- * Similarly, verify this class ends at a boundary as well. Here we
- * can end at the very end of the string value, otherwise we have
- * to end at a place where the next character is whitespace.
+ * Verify this class ends at a boundary as well.
*/
if ( $class_at + strlen( $this->sought_class_name ) < $class_end ) {
$character = $this->html[ $class_at + strlen( $this->sought_class_name ) ];
From 361710d91056320ca2fd2e232e02b8fa6fdde17a Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Tue, 31 Jan 2023 14:32:50 -0700
Subject: [PATCH 15/36] Rename library to "HTML-API" instead of "HTML"
---
.../{html => html-api}/class-wp-html-attribute-token.php | 2 +-
src/wp-includes/{html => html-api}/class-wp-html-span.php | 2 +-
.../{html => html-api}/class-wp-html-tag-processor.php | 2 +-
.../{html => html-api}/class-wp-html-text-replacement.php | 2 +-
src/wp-settings.php | 8 ++++----
tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php | 4 ++--
tests/phpunit/tests/html/wpHtmlTagProcessor.php | 4 ++--
7 files changed, 12 insertions(+), 12 deletions(-)
rename src/wp-includes/{html => html-api}/class-wp-html-attribute-token.php (98%)
rename src/wp-includes/{html => html-api}/class-wp-html-span.php (97%)
rename src/wp-includes/{html => html-api}/class-wp-html-tag-processor.php (99%)
rename src/wp-includes/{html => html-api}/class-wp-html-text-replacement.php (98%)
diff --git a/src/wp-includes/html/class-wp-html-attribute-token.php b/src/wp-includes/html-api/class-wp-html-attribute-token.php
similarity index 98%
rename from src/wp-includes/html/class-wp-html-attribute-token.php
rename to src/wp-includes/html-api/class-wp-html-attribute-token.php
index 05224f8549ef1..f5c6bda0c1c51 100644
--- a/src/wp-includes/html/class-wp-html-attribute-token.php
+++ b/src/wp-includes/html-api/class-wp-html-attribute-token.php
@@ -3,7 +3,7 @@
* HTML Tag Processor: Attribute token structure class.
*
* @package WordPress
- * @subpackage HTML
+ * @subpackage HTML-API
* @since 6.2.0
*/
diff --git a/src/wp-includes/html/class-wp-html-span.php b/src/wp-includes/html-api/class-wp-html-span.php
similarity index 97%
rename from src/wp-includes/html/class-wp-html-span.php
rename to src/wp-includes/html-api/class-wp-html-span.php
index 2f902f3831f03..9fbc96b31870d 100644
--- a/src/wp-includes/html/class-wp-html-span.php
+++ b/src/wp-includes/html-api/class-wp-html-span.php
@@ -3,7 +3,7 @@
* HTML Span: Represents a textual span inside an HTML document.
*
* @package WordPress
- * @subpackage HTML
+ * @subpackage HTML-API
* @since 6.2.0
*/
diff --git a/src/wp-includes/html/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
similarity index 99%
rename from src/wp-includes/html/class-wp-html-tag-processor.php
rename to src/wp-includes/html-api/class-wp-html-tag-processor.php
index 9176dd7ce3794..6e12b0280906e 100644
--- a/src/wp-includes/html/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -22,7 +22,7 @@
* doesn't handle attribute character reference decoding rules.
*
* @package WordPress
- * @subpackage HTML
+ * @subpackage HTML-API
* @since 6.2.0
*/
diff --git a/src/wp-includes/html/class-wp-html-text-replacement.php b/src/wp-includes/html-api/class-wp-html-text-replacement.php
similarity index 98%
rename from src/wp-includes/html/class-wp-html-text-replacement.php
rename to src/wp-includes/html-api/class-wp-html-text-replacement.php
index a8341ad33acfe..024bd60d1c155 100644
--- a/src/wp-includes/html/class-wp-html-text-replacement.php
+++ b/src/wp-includes/html-api/class-wp-html-text-replacement.php
@@ -3,7 +3,7 @@
* HTML Tag Processor: Text replacement class.
*
* @package WordPress
- * @subpackage HTML
+ * @subpackage HTML-API
* @since 6.2.0
*/
diff --git a/src/wp-settings.php b/src/wp-settings.php
index 73bc1e1092fec..e867fc3009a9c 100644
--- a/src/wp-settings.php
+++ b/src/wp-settings.php
@@ -234,10 +234,10 @@
require ABSPATH . WPINC . '/class-wp-oembed-controller.php';
require ABSPATH . WPINC . '/media.php';
require ABSPATH . WPINC . '/http.php';
-require ABSPATH . WPINC . '/html/class-wp-html-attribute-token.php';
-require ABSPATH . WPINC . '/html/class-wp-html-span.php';
-require ABSPATH . WPINC . '/html/class-wp-html-text-replacement.php';
-require ABSPATH . WPINC . '/html/class-wp-html-tag-processor.php';
+require ABSPATH . WPINC . '/html-api/class-wp-html-attribute-token.php';
+require ABSPATH . WPINC . '/html-api/class-wp-html-span.php';
+require ABSPATH . WPINC . '/html-api/class-wp-html-text-replacement.php';
+require ABSPATH . WPINC . '/html-api/class-wp-html-tag-processor.php';
require ABSPATH . WPINC . '/class-wp-http.php';
require ABSPATH . WPINC . '/class-wp-http-streams.php';
require ABSPATH . WPINC . '/class-wp-http-curl.php';
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
index 4e1ea93053f93..a15c180362589 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
@@ -3,11 +3,11 @@
* Unit tests covering WP_HTML_Tag_Processor bookmark functionality.
*
* @package WordPress
- * @subpackage HTML
+ * @subpackage HTML-API
*/
/**
- * @group html
+ * @group html-api
*
* @coversDefaultClass WP_HTML_Tag_Processor
*/
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
index 730a4c017928b..e375925f51f25 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
@@ -3,11 +3,11 @@
* Unit tests covering WP_HTML_Tag_Processor functionality.
*
* @package WordPress
- * @subpackage HTML
+ * @subpackage HTML-API
*/
/**
- * @group html
+ * @group html-api
*
* @coversDefaultClass WP_HTML_Tag_Processor
*/
From 2d1411a8ea90f58a9e236114a1ad9d583c6ba7e1 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Tue, 31 Jan 2023 14:33:55 -0700
Subject: [PATCH 16/36] Un-finalize helper classes
---
src/wp-includes/html-api/class-wp-html-attribute-token.php | 2 +-
src/wp-includes/html-api/class-wp-html-span.php | 2 +-
src/wp-includes/html-api/class-wp-html-text-replacement.php | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/wp-includes/html-api/class-wp-html-attribute-token.php b/src/wp-includes/html-api/class-wp-html-attribute-token.php
index f5c6bda0c1c51..2c52164a979f0 100644
--- a/src/wp-includes/html-api/class-wp-html-attribute-token.php
+++ b/src/wp-includes/html-api/class-wp-html-attribute-token.php
@@ -17,7 +17,7 @@
*
* @see WP_HTML_Tag_Processor
*/
-final class WP_HTML_Attribute_Token {
+class WP_HTML_Attribute_Token {
/**
* Attribute name.
*
diff --git a/src/wp-includes/html-api/class-wp-html-span.php b/src/wp-includes/html-api/class-wp-html-span.php
index 9fbc96b31870d..d92778cd3a222 100644
--- a/src/wp-includes/html-api/class-wp-html-span.php
+++ b/src/wp-includes/html-api/class-wp-html-span.php
@@ -20,7 +20,7 @@
*
* @see WP_HTML_Tag_Processor
*/
-final class WP_HTML_Span {
+class WP_HTML_Span {
/**
* Byte offset into document where span begins.
*
diff --git a/src/wp-includes/html-api/class-wp-html-text-replacement.php b/src/wp-includes/html-api/class-wp-html-text-replacement.php
index 024bd60d1c155..912b4a56a5eb4 100644
--- a/src/wp-includes/html-api/class-wp-html-text-replacement.php
+++ b/src/wp-includes/html-api/class-wp-html-text-replacement.php
@@ -17,7 +17,7 @@
*
* @see WP_HTML_Tag_Processor
*/
-final class WP_HTML_Text_Replacement {
+class WP_HTML_Text_Replacement {
/**
* Byte offset into document where replacement span begins.
*
From d8fdf41c11a58a652234c0acc49fdf444bd21295 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Tue, 31 Jan 2023 14:38:16 -0700
Subject: [PATCH 17/36] Replace throwing with trigger_error( E_USER_WARNING )
---
.../html-api/class-wp-html-tag-processor.php | 22 ++++++-------------
1 file changed, 7 insertions(+), 15 deletions(-)
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 6e12b0280906e..e21214201dcef 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -655,7 +655,6 @@ public function next_tag( $query = null ) {
*
* @param string $name Identifies this particular bookmark.
* @return bool
- * @throws Exception Throws on invalid bookmark name if WP_DEBUG set.
*/
public function set_bookmark( $name ) {
if ( null === $this->tag_name_starts_at ) {
@@ -663,8 +662,8 @@ public function set_bookmark( $name ) {
}
if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= self::MAX_BOOKMARKS ) {
- if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
- throw new Exception( "Tried to jump to a non-existent HTML bookmark {$name}." );
+ if ( WP_DEBUG ) {
+ trigger_error( "Tried to jump to a non-existent HTML bookmark {$name}.", E_USER_WARNING );
}
return false;
}
@@ -1184,7 +1183,6 @@ private function skip_whitespace() {
* @since 6.2.0
*
* @return void
- * @throws Exception
*/
private function after_tag() {
$this->class_name_updates_to_attributes_updates();
@@ -1206,7 +1204,6 @@ private function after_tag() {
* @since 6.2.0
*
* @return void
- * @throws Exception
*/
private function class_name_updates_to_attributes_updates() {
if ( count( $this->classname_updates ) === 0 ) {
@@ -1421,19 +1418,18 @@ private function apply_attributes_updates() {
*
* @param string $bookmark_name Jump to the place in the document identified by this bookmark name.
* @return bool
- * @throws Exception Throws on invalid bookmark name if WP_DEBUG set.
*/
public function seek( $bookmark_name ) {
if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) {
- if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
- throw new Exception( 'Invalid bookmark name' );
+ if ( WP_DEBUG ) {
+ trigger_error( 'Invalid bookmark name', E_USER_WARNING );
}
return false;
}
if ( ++$this->seek_count > self::MAX_SEEK_OPS ) {
- if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
- throw new Exception( 'Too many calls to seek() - this can lead to performance issues.' );
+ if ( WP_DEBUG ) {
+ trigger_error( 'Too many calls to seek() - this can lead to performance issues.', E_USER_WARNING );
}
return false;
}
@@ -1557,7 +1553,6 @@ private function get_enqueued_attribute_value( $comparable_name ) {
*
* @param string $name Name of attribute whose value is requested.
* @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`.
- * @throws Exception
*/
public function get_attribute( $name ) {
if ( null === $this->tag_name_starts_at ) {
@@ -1718,7 +1713,6 @@ public function is_tag_closer() {
* @param string $name The attribute name to target.
* @param string|bool $value The new attribute value.
* @return bool Whether an attribute value was set.
- * @throws Exception When WP_DEBUG is true and the attribute name is invalid.
*/
public function set_attribute( $name, $value ) {
if ( $this->is_closing_tag || null === $this->tag_name_starts_at ) {
@@ -1761,7 +1755,7 @@ public function set_attribute( $name, $value ) {
$name
) ) {
if ( WP_DEBUG ) {
- throw new Exception( 'Invalid attribute name' );
+ trigger_error( 'Invalid attribute name', E_USER_WARNING );
}
return false;
@@ -1954,7 +1948,6 @@ public function remove_class( $class_name ) {
* @see get_updated_html
*
* @return string The processed HTML.
- * @throws Exception
*/
public function __toString() {
return $this->get_updated_html();
@@ -1966,7 +1959,6 @@ public function __toString() {
* @since 6.2.0
*
* @return string The processed HTML.
- * @throws Exception
*/
public function get_updated_html() {
// Short-circuit if there are no new updates to apply.
From 1465218def09e9eabd27893ef194ab5a2ef0cc59 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 07:39:09 -0700
Subject: [PATCH 18/36] Add test to check for bug when encounting unexpected
closer
---
.../phpunit/tests/html/wpHtmlTagProcessor.php | 33 +++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
index e375925f51f25..c98abe3de13a3 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
@@ -1714,6 +1714,39 @@ public function data_next_tag_ignores_contents_of_rcdata_tag() {
);
}
+ /**
+ * @ticket 56299
+ *
+ * @covers WP_HTML_Tag_Processor::next_tag
+ *
+ * @dataProvider data_skips_contents_of_script_and_rcdata_regions
+ *
+ * @param $input_html HTML with multiple divs, one of which carries the "target" attribute.
+ */
+ public function test_skips_contents_of_script_and_rcdata_regions($input_html ) {
+ $p = new WP_HTML_Tag_Processor( $input_html );
+ $p->next_tag( 'div' );
+
+ $this->assertTrue( $p->get_attribute( 'target' ) );
+ }
+
+ /**
+ * Data provider
+ *
+ * @return string[]
+ */
+ public function data_skips_contents_of_script_and_rcdata_regions() {
+ return array(
+ 'Balanced SCRIPT tags' => '
',
+ 'Unexpected SCRIPT closer after DIV' => 'console.log("
")
',
+ 'Unexpected SCRIPT closer before DIV' => 'console.log("")
',
+ 'Missing SCRIPT closer' => '
',
+ 'TITLE before DIV' => '
',
+ 'SCRIPT inside TITLE' => '
',
+ 'TITLE in TEXTAREA' => '
',
+ );
+ }
+
/**
* @ticket 56299
*
From 5c1a5d5529c1c8621cdec2fe78fa3c8598ced39c Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 10:01:37 -0700
Subject: [PATCH 19/36] Update tests: fix data provider and remove Exception
expectation
---
.../tests/html/wpHtmlTagProcessor-bookmark.php | 4 ----
tests/phpunit/tests/html/wpHtmlTagProcessor.php | 16 ++++++++--------
2 files changed, 8 insertions(+), 12 deletions(-)
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
index a15c180362589..0b63f4dafd154 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
@@ -339,8 +339,6 @@ public function test_limits_the_number_of_bookmarks() {
$p = new WP_HTML_Tag_Processor( '
One
Two
Three
' );
$p->next_tag( 'li' );
- $this->expectException( Exception::class );
-
for ( $i = 0;$i < WP_HTML_Tag_Processor::MAX_BOOKMARKS;$i++ ) {
$this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" );
}
@@ -358,8 +356,6 @@ public function test_limits_the_number_of_seek_calls() {
$p->next_tag( 'li' );
$p->set_bookmark( 'bookmark' );
- $this->expectException( Exception::class );
-
for ( $i = 0; $i < WP_HTML_Tag_Processor::MAX_SEEK_OPS; $i++ ) {
$this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' );
}
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
index c98abe3de13a3..3bd965e637233 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
@@ -1733,17 +1733,17 @@ public function test_skips_contents_of_script_and_rcdata_regions($input_html ) {
/**
* Data provider
*
- * @return string[]
+ * @return array[]
*/
public function data_skips_contents_of_script_and_rcdata_regions() {
return array(
- 'Balanced SCRIPT tags' => '
',
- 'Unexpected SCRIPT closer after DIV' => 'console.log("
")
',
- 'Unexpected SCRIPT closer before DIV' => 'console.log("")
' ),
);
}
From c50ffeec49c9250450c938f661cf30cac6a859d9 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 10:04:39 -0700
Subject: [PATCH 20/36] Lint issue
---
tests/phpunit/tests/html/wpHtmlTagProcessor.php | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
index 3bd965e637233..7f8ff0895f042 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
@@ -1723,7 +1723,7 @@ public function data_next_tag_ignores_contents_of_rcdata_tag() {
*
* @param $input_html HTML with multiple divs, one of which carries the "target" attribute.
*/
- public function test_skips_contents_of_script_and_rcdata_regions($input_html ) {
+ public function test_skips_contents_of_script_and_rcdata_regions( $input_html ) {
$p = new WP_HTML_Tag_Processor( $input_html );
$p->next_tag( 'div' );
From 9a5ccf042e0e396a26a76cdd3ff2eb1dede70294 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 10:28:52 -0700
Subject: [PATCH 21/36] Fix broken tests
---
src/wp-includes/html-api/class-wp-html-tag-processor.php | 4 ++--
tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php | 5 ++++-
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index e21214201dcef..ac004b524ca5d 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -554,7 +554,7 @@ public function next_tag( $query = null ) {
* @TODO: Add unit test case and fix (if necessary) for RCDATA tag closer coming before RCDATA tag opener.
*/
$t = $this->html[ $this->tag_name_starts_at ];
- if ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) {
+ if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) {
$tag_name = $this->get_tag();
if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) {
@@ -663,7 +663,7 @@ public function set_bookmark( $name ) {
if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= self::MAX_BOOKMARKS ) {
if ( WP_DEBUG ) {
- trigger_error( "Tried to jump to a non-existent HTML bookmark {$name}.", E_USER_WARNING );
+ trigger_error( "Too many bookmarks: cannot create '{$name}'", E_USER_WARNING );
}
return false;
}
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
index 0b63f4dafd154..12c5917446de8 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
@@ -339,10 +339,11 @@ public function test_limits_the_number_of_bookmarks() {
$p = new WP_HTML_Tag_Processor( '
One
Two
Three
' );
$p->next_tag( 'li' );
- for ( $i = 0;$i < WP_HTML_Tag_Processor::MAX_BOOKMARKS;$i++ ) {
+ for ( $i = 0; $i < WP_HTML_Tag_Processor::MAX_BOOKMARKS; $i++ ) {
$this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" );
}
+ $this->expectWarningMessageMatches( '/Too many bookmarks/' );
$this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." );
}
@@ -359,6 +360,8 @@ public function test_limits_the_number_of_seek_calls() {
for ( $i = 0; $i < WP_HTML_Tag_Processor::MAX_SEEK_OPS; $i++ ) {
$this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' );
}
+
+ $this->expectWarningMessageMatches( 'Too many calls to seek()' );
$this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." );
}
}
From 13dd7d72329ed2014ffd9f0c4bb95dc295e953e9 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 10:42:13 -0700
Subject: [PATCH 22/36] Remove some TODOs, most were done already
---
.../html-api/class-wp-html-tag-processor.php | 26 +++++++++----------
1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index ac004b524ca5d..d06d2574ad76e 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -8,18 +8,18 @@
* Instead this scans linearly through a document and only parses
* the HTML tag openers.
*
- * @TODO: Unify language around "currently-opened tag."
- * @TODO: Organize unit test cases into normative tests, edge-case tests, regression tests.
- * @TODO: Clean up attribute token class after is_true addition
- * @TODO: Prune whitespace when removing classes/attributes: e.g. "a b c" -> "c" not " c"
- * @TODO: Skip over `/` in attributes area, split attribute names by `/`
- * @TODO: Decode HTML references/entities in class names when matching.
- * E.g. match having class `1<"2` needs to recognize `class="1<"2"`.
- * @TODO: Decode character references in `get_attribute()`
- * @TODO: Properly escape attribute value in `set_attribute()`
- * @TODO: Add slow mode to escape character entities in CSS class names?
- * (This requires a custom decoder since `html_entity_decode()`
- * doesn't handle attribute character reference decoding rules.
+ * ### Possible future direction for this module
+ *
+ * - Prune the whitespace when removing classes/attributes: e.g. "a b c" -> "c" not " c".
+ * This would increase the size of the changes for some operations but leave more
+ * natural-looking output HTML.
+ * - Decode HTML character references within class names when matching. E.g. match having
+ * class `1<"2` needs to recognize `class="1<"2"`. Currently the Tag Processor
+ * will fail to find the right tag if the class name is encoded as such.
+ * - Properly decode HTML character references in `get_attribute()`. PHP's
+ * `html_entity_decode()` is wrong in a couple ways: it doesn't account for the
+ * no-ambiguous-ampersand rule, and it improperly handles the way semicolons may
+ * or may not terminate a character reference.
*
* @package WordPress
* @subpackage HTML-API
@@ -550,8 +550,6 @@ public function next_tag( $query = null ) {
* For non-DATA sections which might contain text that looks like HTML tags but
* isn't, scan with the appropriate alternative mode. Looking at the first letter
* of the tag name as a pre-check avoids a string allocation when it's not needed.
- *
- * @TODO: Add unit test case and fix (if necessary) for RCDATA tag closer coming before RCDATA tag opener.
*/
$t = $this->html[ $this->tag_name_starts_at ];
if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) {
From b31cca41cfc400709a3d36b05be036db0acdf25d Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 11:20:59 -0700
Subject: [PATCH 23/36] Expand design and limitations discussion
---
.../html-api/class-wp-html-tag-processor.php | 42 +++++++++++++++++--
1 file changed, 38 insertions(+), 4 deletions(-)
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index d06d2574ad76e..0e4392c810608 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -206,12 +206,46 @@
* }
* ```
*
- * ## Design limitations
+ * ## Design and limitations
*
- * @TODO: Expand this section
+ * The Tag Processor is designed to linearly scan HTML documents and tokenize
+ * HTML tags and their attributes. It's designed to do this as efficiently as
+ * possible without compromising parsing integrity. Therefore it will be
+ * slower than some methods of modifying HTML, such as those incorporating
+ * over-simplified PCRE patterns, but will not introduce the defects and
+ * failures that those methods bring in, which lead to broken page renders
+ * and often to security vulnerabilities. On the other hand, it will be faster
+ * than full-blown HTML parsers such as DOMDocument and use considerably
+ * less memory. It requires a negligible memory overhead, enough to consider
+ * it a zero-overhead system.
*
- * - No nesting: cannot match open and close tag.
- * - Class names are not decoded if they contain character references.
+ * The performance characteristics are maintained by avoiding tree construction
+ * and semantic cleanups which are specified in HTML5. Because of this, for
+ * example, it's not possible for the Tag Processor to associate any given
+ * opening tag with its corresponding closing tag, or to return the inner markup
+ * inside an element. Systems may be built on top of the Tag Processor to do
+ * this, but the Tag Processor is and should be constrained so it can remain an
+ * efficient, low-level, and reliable HTML scanner.
+ *
+ * The Tag Processor's design incorporates a "garbage-in-garbage-out" philosophy.
+ * HTML5 specifies that certain invalid content be transformed into different forms
+ * for display, such as removing null bytes from an input document and replacing
+ * invalid characters with the Unicode replacement character U+FFFD �. Where errors
+ * or transformations exist within the HTML5 specification, the Tag Processor leaves
+ * those invalid inputs untouched, passing them through to the final browser to handle.
+ * While this implies that certain operations will be non-spec-compliant, such as
+ * reading the value of an attribute with invalid content, it also preserves a
+ * simplicity and efficiency for handling those error cases.
+ *
+ * Most operations within the Tag Processor are designed to minimize the difference
+ * between an input and output document for any given change. For example, the
+ * `add_class` and `remove_class` methods preserve whitespace and the class ordering
+ * within the `class` attribute; and when encountering tags with duplicated attributes,
+ * the Tag Processor will leave those invalid duplicate attributes where they are but
+ * update the proper attribute which the browser will read for parsing its value. An
+ * exception to this rule is that all attribute updates store their values as
+ * double-quoted strings, meaning that attributes on input with single-quoted or
+ * unquoted values will appear in the output with double-quotes.
*
* @since 6.2.0
*/
From 1a9bec05c8c3b3bfda5e1cb5adcb9effc863cf19 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 11:24:38 -0700
Subject: [PATCH 24/36] Loosen assertion on warning
---
tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
index 12c5917446de8..2be4b40363429 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
@@ -343,7 +343,7 @@ public function test_limits_the_number_of_bookmarks() {
$this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" );
}
- $this->expectWarningMessageMatches( '/Too many bookmarks/' );
+ $this->expectWarning();
$this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." );
}
@@ -361,7 +361,7 @@ public function test_limits_the_number_of_seek_calls() {
$this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' );
}
- $this->expectWarningMessageMatches( 'Too many calls to seek()' );
+ $this->expectWarning();
$this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." );
}
}
From 28e9bf359529c3412438a8135dbfe165aeaffe27 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 13:49:36 -0700
Subject: [PATCH 25/36] Rename some properties to clarify their purpose and
expand comments.
Also:
- Change visibility of some properties to `protected` to aid with
in-progress expansion of the HTML API.
- Refactor short-circuit checks in `get_updated_html()` for clarity.
---
.../html-api/class-wp-html-tag-processor.php | 203 +++++++++++-------
1 file changed, 122 insertions(+), 81 deletions(-)
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 0e4392c810608..a56e7ecd9c2a5 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -319,29 +319,50 @@ class WP_HTML_Tag_Processor {
private $stop_on_tag_closers;
/**
- * The updated HTML document.
+ * Holds updated HTML as updates are applied.
+ *
+ * Updates and unmodified portions of the input document are
+ * appended to this value as they are applied. It will hold
+ * a copy of the updated document up until the point of the
+ * latest applied update. The fully-updated HTML document
+ * will comprise this value plus the part of the input document
+ * which follows that latest update.
+ *
+ * @see $bytes_already_copied
*
* @since 6.2.0
* @var string
*/
- private $updated_html = '';
+ private $output_buffer = '';
/**
- * How many bytes from the original HTML document were already read.
+ * How many bytes from the original HTML document have been read and parsed.
+ *
+ * This value points to the latest byte offset in the input document which
+ * has been already parsed. It is the internal cursor for the Tag Processor
+ * and updates while scanning through the HTML tokens.
*
* @since 6.2.0
* @var int
*/
- private $parsed_bytes = 0;
+ private $bytes_already_parsed = 0;
/**
- * How many bytes from the original HTML document were already treated
- * with the requested replacements.
+ * How many bytes from the input HTML document have already been
+ * copied into the output buffer.
+ *
+ * Lexical updates are enqueued and processed in batches. Prior
+ * to any given update in the input document, there might exist
+ * a span of HTML unaffected by any changes. This span ought to
+ * be copied verbatim into the output buffer before applying the
+ * following update. This value will point to the starting byte
+ * offset in the input document where that unaffected span of
+ * HTML starts.
*
* @since 6.2.0
* @var int
*/
- private $updated_bytes = 0;
+ private $bytes_already_copied = 0;
/**
* Byte offset in input document where current tag name starts.
@@ -458,7 +479,7 @@ class WP_HTML_Tag_Processor {
* @since 6.2.0
* @var WP_HTML_Span[]
*/
- private $bookmarks = array();
+ protected $bookmarks = array();
const ADD_CLASS = true;
const REMOVE_CLASS = false;
@@ -507,7 +528,7 @@ class WP_HTML_Tag_Processor {
* @since 6.2.0
* @var WP_HTML_Text_Replacement[]
*/
- private $lexical_updates = array();
+ protected $lexical_updates = array();
/**
* Tracks and limits `seek()` calls to prevent accidental infinite loops.
@@ -516,7 +537,7 @@ class WP_HTML_Tag_Processor {
* @since 6.2.0
* @var int
*/
- private $seek_count = 0;
+ protected $seek_count = 0;
/**
* Constructor.
@@ -551,13 +572,13 @@ public function next_tag( $query = null ) {
$already_found = 0;
do {
- if ( $this->parsed_bytes >= strlen( $this->html ) ) {
+ if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
return false;
}
// Find the next tag if it exists.
if ( false === $this->parse_next_tag() ) {
- $this->parsed_bytes = strlen( $this->html );
+ $this->bytes_already_parsed = strlen( $this->html );
return false;
}
@@ -568,12 +589,12 @@ public function next_tag( $query = null ) {
}
// Ensure that the tag closes before the end of the document.
- $tag_ends_at = strpos( $this->html, '>', $this->parsed_bytes );
+ $tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed );
if ( false === $tag_ends_at ) {
return false;
}
- $this->tag_ends_at = $tag_ends_at;
- $this->parsed_bytes = $tag_ends_at;
+ $this->tag_ends_at = $tag_ends_at;
+ $this->bytes_already_parsed = $tag_ends_at;
// Finally, check if the parsed tag and its attributes match the search query.
if ( $this->matches() ) {
@@ -590,13 +611,13 @@ public function next_tag( $query = null ) {
$tag_name = $this->get_tag();
if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) {
- $this->parsed_bytes = strlen( $this->html );
+ $this->bytes_already_parsed = strlen( $this->html );
return false;
} elseif (
( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) &&
! $this->skip_rcdata( $tag_name )
) {
- $this->parsed_bytes = strlen( $this->html );
+ $this->bytes_already_parsed = strlen( $this->html );
return false;
}
}
@@ -744,14 +765,14 @@ private function skip_rcdata( $tag_name ) {
$doc_length = strlen( $html );
$tag_length = strlen( $tag_name );
- $at = $this->parsed_bytes;
+ $at = $this->bytes_already_parsed;
while ( false !== $at && $at < $doc_length ) {
$at = strpos( $this->html, '', $at );
// If there is no possible tag closer then fail.
if ( false === $at || ( $at + $tag_length ) >= $doc_length ) {
- $this->parsed_bytes = $doc_length;
+ $this->bytes_already_parsed = $doc_length;
return false;
}
@@ -775,8 +796,8 @@ private function skip_rcdata( $tag_name ) {
}
}
- $at += $tag_length;
- $this->parsed_bytes = $at;
+ $at += $tag_length;
+ $this->bytes_already_parsed = $at;
/*
* Ensure that the tag name terminates to avoid matching on
@@ -792,13 +813,13 @@ private function skip_rcdata( $tag_name ) {
while ( $this->parse_next_attribute() ) {
continue;
}
- $at = $this->parsed_bytes;
+ $at = $this->bytes_already_parsed;
if ( $at >= strlen( $this->html ) ) {
return false;
}
if ( '>' === $html[ $at ] || '/' === $html[ $at ] ) {
- ++$this->parsed_bytes;
+ ++$this->bytes_already_parsed;
return true;
}
}
@@ -815,7 +836,7 @@ private function skip_script_data() {
$state = 'unescaped';
$html = $this->html;
$doc_length = strlen( $html );
- $at = $this->parsed_bytes;
+ $at = $this->bytes_already_parsed;
while ( false !== $at && $at < $doc_length ) {
$at += strcspn( $html, '-<', $at );
@@ -916,8 +937,8 @@ private function skip_script_data() {
}
if ( $is_closing ) {
- $this->parsed_bytes = $at;
- if ( $this->parsed_bytes >= $doc_length ) {
+ $this->bytes_already_parsed = $at;
+ if ( $this->bytes_already_parsed >= $doc_length ) {
return false;
}
@@ -925,8 +946,8 @@ private function skip_script_data() {
continue;
}
- if ( '>' === $html[ $this->parsed_bytes ] ) {
- ++$this->parsed_bytes;
+ if ( '>' === $html[ $this->bytes_already_parsed ] ) {
+ ++$this->bytes_already_parsed;
return true;
}
}
@@ -949,7 +970,7 @@ private function parse_next_tag() {
$html = $this->html;
$doc_length = strlen( $html );
- $at = $this->parsed_bytes;
+ $at = $this->bytes_already_parsed;
while ( false !== $at && $at < $doc_length ) {
$at = strpos( $html, '<', $at );
@@ -981,9 +1002,9 @@ private function parse_next_tag() {
$tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 );
if ( $tag_name_prefix_length > 0 ) {
++$at;
- $this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
- $this->tag_name_starts_at = $at;
- $this->parsed_bytes = $at + $this->tag_name_length;
+ $this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
+ $this->tag_name_starts_at = $at;
+ $this->bytes_already_parsed = $at + $this->tag_name_length;
return true;
}
@@ -1103,8 +1124,8 @@ private function parse_next_tag() {
*/
private function parse_next_attribute() {
// Skip whitespace and slashes.
- $this->parsed_bytes += strspn( $this->html, " \t\f\r\n/", $this->parsed_bytes );
- if ( $this->parsed_bytes >= strlen( $this->html ) ) {
+ $this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n/", $this->bytes_already_parsed );
+ if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
return false;
}
@@ -1114,53 +1135,53 @@ private function parse_next_attribute() {
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
*/
- $name_length = '=' === $this->html[ $this->parsed_bytes ]
- ? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->parsed_bytes + 1 )
- : strcspn( $this->html, "=/> \t\f\r\n", $this->parsed_bytes );
+ $name_length = '=' === $this->html[ $this->bytes_already_parsed ]
+ ? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed + 1 )
+ : strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed );
// No attribute, just tag closer.
- if ( 0 === $name_length || $this->parsed_bytes + $name_length >= strlen( $this->html ) ) {
+ if ( 0 === $name_length || $this->bytes_already_parsed + $name_length >= strlen( $this->html ) ) {
return false;
}
- $attribute_start = $this->parsed_bytes;
- $attribute_name = substr( $this->html, $attribute_start, $name_length );
- $this->parsed_bytes += $name_length;
- if ( $this->parsed_bytes >= strlen( $this->html ) ) {
+ $attribute_start = $this->bytes_already_parsed;
+ $attribute_name = substr( $this->html, $attribute_start, $name_length );
+ $this->bytes_already_parsed += $name_length;
+ if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
return false;
}
$this->skip_whitespace();
- if ( $this->parsed_bytes >= strlen( $this->html ) ) {
+ if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
return false;
}
- $has_value = '=' === $this->html[ $this->parsed_bytes ];
+ $has_value = '=' === $this->html[ $this->bytes_already_parsed ];
if ( $has_value ) {
- ++$this->parsed_bytes;
+ ++$this->bytes_already_parsed;
$this->skip_whitespace();
- if ( $this->parsed_bytes >= strlen( $this->html ) ) {
+ if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
return false;
}
- switch ( $this->html[ $this->parsed_bytes ] ) {
+ switch ( $this->html[ $this->bytes_already_parsed ] ) {
case "'":
case '"':
- $quote = $this->html[ $this->parsed_bytes ];
- $value_start = $this->parsed_bytes + 1;
- $value_length = strcspn( $this->html, $quote, $value_start );
- $attribute_end = $value_start + $value_length + 1;
- $this->parsed_bytes = $attribute_end;
+ $quote = $this->html[ $this->bytes_already_parsed ];
+ $value_start = $this->bytes_already_parsed + 1;
+ $value_length = strcspn( $this->html, $quote, $value_start );
+ $attribute_end = $value_start + $value_length + 1;
+ $this->bytes_already_parsed = $attribute_end;
break;
default:
- $value_start = $this->parsed_bytes;
- $value_length = strcspn( $this->html, "> \t\f\r\n", $value_start );
- $attribute_end = $value_start + $value_length;
- $this->parsed_bytes = $attribute_end;
+ $value_start = $this->bytes_already_parsed;
+ $value_length = strcspn( $this->html, "> \t\f\r\n", $value_start );
+ $attribute_end = $value_start + $value_length;
+ $this->bytes_already_parsed = $attribute_end;
}
} else {
- $value_start = $this->parsed_bytes;
+ $value_start = $this->bytes_already_parsed;
$value_length = 0;
$attribute_end = $attribute_start + $name_length;
}
@@ -1206,7 +1227,7 @@ private function parse_next_attribute() {
* @return void
*/
private function skip_whitespace() {
- $this->parsed_bytes += strspn( $this->html, " \t\f\r\n", $this->parsed_bytes );
+ $this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n", $this->bytes_already_parsed );
}
/**
@@ -1395,9 +1416,9 @@ private function apply_attributes_updates() {
usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) );
foreach ( $this->lexical_updates as $diff ) {
- $this->updated_html .= substr( $this->html, $this->updated_bytes, $diff->start - $this->updated_bytes );
- $this->updated_html .= $diff->text;
- $this->updated_bytes = $diff->end;
+ $this->output_buffer .= substr( $this->html, $this->bytes_already_copied, $diff->start - $this->bytes_already_copied );
+ $this->output_buffer .= $diff->text;
+ $this->bytes_already_copied = $diff->end;
}
/*
@@ -1470,9 +1491,9 @@ public function seek( $bookmark_name ) {
$this->get_updated_html();
// Point this tag processor before the sought tag opener and consume it.
- $this->parsed_bytes = $this->bookmarks[ $bookmark_name ]->start;
- $this->updated_bytes = $this->parsed_bytes;
- $this->updated_html = substr( $this->html, 0, $this->updated_bytes );
+ $this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start;
+ $this->bytes_already_copied = $this->bytes_already_parsed;
+ $this->output_buffer = substr( $this->html, 0, $this->bytes_already_copied );
return $this->next_tag();
}
@@ -1993,42 +2014,62 @@ public function __toString() {
* @return string The processed HTML.
*/
public function get_updated_html() {
- // Short-circuit if there are no new updates to apply.
- if ( ! count( $this->classname_updates ) && ! count( $this->lexical_updates ) ) {
- return $this->updated_html . substr( $this->html, $this->updated_bytes );
+ $requires_no_updating = 0 === count( $this->classname_updates ) && 0 === count( $this->lexical_updates );
+
+ /*
+ * When there is nothing more to update and nothing has already been
+ * updated, return the original document and avoid a string copy.
+ */
+ if ( $requires_no_updating && $this->bytes_already_copied === 0 ) {
+ return $this->html;
+ }
+
+ /*
+ * If there are no updates left to apply, but some have already
+ * been applied, then finish by copying the rest of the input
+ * to the end of the updated document and return.
+ */
+ if ( $requires_no_updating && $this->bytes_already_copied > 0 ) {
+ return $this->output_buffer . substr( $this->html, $this->bytes_already_copied );
}
- // Otherwise: apply the updates, rewind before the current tag, and parse it again.
- $delta_between_updated_html_end_and_current_tag_end = substr(
+ // Apply the updates, rewind to before the current tag, and reparse the attributes.
+ $content_up_to_opened_tag_name = $this->output_buffer . substr(
$this->html,
- $this->updated_bytes,
- $this->tag_name_starts_at + $this->tag_name_length - $this->updated_bytes
+ $this->bytes_already_copied,
+ $this->tag_name_starts_at + $this->tag_name_length - $this->bytes_already_copied
);
- $updated_html_up_to_current_tag_name_end = $this->updated_html . $delta_between_updated_html_end_and_current_tag_end;
- // 1. Apply the attributes updates to the original HTML
+ /*
+ * 1. Apply the edits by flushing them to the output_buffer and updating the copied byte count.
+ *
+ * Note: `apply_attributes_updates()` modifies `$this->output_buffer`.
+ */
$this->class_name_updates_to_attributes_updates();
$this->apply_attributes_updates();
- // 2. Replace the original HTML with the updated HTML
- $this->html = $this->updated_html . substr( $this->html, $this->updated_bytes );
- $this->updated_html = $updated_html_up_to_current_tag_name_end;
- $this->updated_bytes = strlen( $this->updated_html );
-
- // 3. Point this tag processor at the original tag opener and consume it
+ /*
+ * 2. Replace the original HTML with the now-updated HTML it's possible to seek to a previous
+ * location and have a consistent view of the updated document.
+ */
+ $this->html = $this->output_buffer . substr( $this->html, $this->bytes_already_copied );
+ $this->output_buffer = $content_up_to_opened_tag_name;
+ $this->bytes_already_copied = strlen( $this->output_buffer );
/*
+ * 3. Point this tag processor at the original tag opener and consume it
+ *
* At this point the internal cursor points to the end of the tag name.
* Rewind before the tag name starts so that it's as if the cursor didn't
* move; a call to `next_tag()` will reparse the recently-updated attributes
* and additional calls to modify the attributes will apply at this same
- * lcoation.
+ * location.
*
*
Previous HTMLMore HTML
* ^ | back up by the length of the tag name plus the opening <
* \<-/ back up by strlen("em") + 1 ==> 3
*/
- $this->parsed_bytes = strlen( $updated_html_up_to_current_tag_name_end ) - $this->tag_name_length - 1;
+ $this->bytes_already_parsed = strlen( $content_up_to_opened_tag_name ) - $this->tag_name_length - 1;
$this->next_tag();
return $this->html;
From 1e2ef09b0de2a4cee0cd6a70c6edea577cbd7dc3 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 14:07:01 -0700
Subject: [PATCH 26/36] Linter: yoda condition
---
src/wp-includes/html-api/class-wp-html-tag-processor.php | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index a56e7ecd9c2a5..3efe83d3d7de3 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2020,7 +2020,7 @@ public function get_updated_html() {
* When there is nothing more to update and nothing has already been
* updated, return the original document and avoid a string copy.
*/
- if ( $requires_no_updating && $this->bytes_already_copied === 0 ) {
+ if ( $requires_no_updating && 0 === $this->bytes_already_copied ) {
return $this->html;
}
From 243dc7ccd8267cb5a84440000d876726da1fad79 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 14:12:14 -0700
Subject: [PATCH 27/36] Typos in comments
---
src/wp-includes/html-api/class-wp-html-tag-processor.php | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 3efe83d3d7de3..f29fbd80c9a01 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2041,7 +2041,7 @@ public function get_updated_html() {
);
/*
- * 1. Apply the edits by flushing them to the output_buffer and updating the copied byte count.
+ * 1. Apply the edits by flushing them to the output buffer and updating the copied byte count.
*
* Note: `apply_attributes_updates()` modifies `$this->output_buffer`.
*/
@@ -2049,8 +2049,8 @@ public function get_updated_html() {
$this->apply_attributes_updates();
/*
- * 2. Replace the original HTML with the now-updated HTML it's possible to seek to a previous
- * location and have a consistent view of the updated document.
+ * 2. Replace the original HTML with the now-updated HTML so that it's possible to
+ * seek to a previous location and have a consistent view of the updated document.
*/
$this->html = $this->output_buffer . substr( $this->html, $this->bytes_already_copied );
$this->output_buffer = $content_up_to_opened_tag_name;
From 81529882ee2498ef8ae1f3e9e839989f4df73ee0 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 15:44:44 -0700
Subject: [PATCH 28/36] Rework @covers attributes
---
.../html/wpHtmlTagProcessor-bookmark.php | 44 +++--
.../phpunit/tests/html/wpHtmlTagProcessor.php | 160 ++++--------------
2 files changed, 52 insertions(+), 152 deletions(-)
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
index 2be4b40363429..d78ca26e0a4d8 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
@@ -16,7 +16,7 @@ class Tests_HTML_wpHtmlTagProcessor_Bookmark extends WP_UnitTestCase {
/**
* @ticket 56299
*
- * @covers ::set_bookmark
+ * @covers WP_HTML_Tag_Processor::set_bookmark
*/
public function test_set_bookmark() {
$p = new WP_HTML_Tag_Processor( '
One
Two
Three
' );
@@ -30,7 +30,7 @@ public function test_set_bookmark() {
/**
* @ticket 56299
*
- * @covers ::release_bookmark
+ * @covers WP_HTML_Tag_Processor::release_bookmark
*/
public function test_release_bookmark() {
$p = new WP_HTML_Tag_Processor( '
One
Two
Three
' );
@@ -43,8 +43,7 @@ public function test_release_bookmark() {
/**
* @ticket 56299
*
- * @covers ::seek
- * @covers ::set_bookmark
+ * @covers WP_HTML_Tag_Processor::seek
*/
public function test_seek() {
$p = new WP_HTML_Tag_Processor( '
',
$p->get_updated_html()
);
@@ -284,8 +280,7 @@ public function test_updates_bookmark_for_additions_before_both_sides() {
/**
* @ticket 56299
*
- * @covers ::seek
- * @covers ::set_bookmark
+ * @covers WP_HTML_Tag_Processor::seek
*/
public function test_updates_bookmark_for_deletions_after_both_sides() {
$p = new WP_HTML_Tag_Processor( '
First
Second
' );
@@ -297,7 +292,7 @@ public function test_updates_bookmark_for_deletions_after_both_sides() {
$p->seek( 'first' );
$p->set_attribute( 'untouched', true );
- $this->assertEquals(
+ $this->assertSame(
/** @TODO: we shouldn't have to assert the extra space after removing the attribute. */
'
First
Second
',
$p->get_updated_html()
@@ -307,8 +302,7 @@ public function test_updates_bookmark_for_deletions_after_both_sides() {
/**
* @ticket 56299
*
- * @covers ::seek
- * @covers ::set_bookmark
+ * @covers WP_HTML_Tag_Processor::seek
*/
public function test_updates_bookmark_for_deletions_before_both_sides() {
$p = new WP_HTML_Tag_Processor( '
First
Second
' );
@@ -323,7 +317,7 @@ public function test_updates_bookmark_for_deletions_before_both_sides() {
$p->seek( 'second' );
$p->set_attribute( 'safe', true );
- $this->assertEquals(
+ $this->assertSame(
/** @TODO: we shouldn't have to assert the extra space after removing the attribute. */
'
First
Second
',
$p->get_updated_html()
@@ -333,7 +327,7 @@ public function test_updates_bookmark_for_deletions_before_both_sides() {
/**
* @ticket 56299
*
- * @covers ::set_bookmark
+ * @covers WP_HTML_Tag_Processor::set_bookmark
*/
public function test_limits_the_number_of_bookmarks() {
$p = new WP_HTML_Tag_Processor( '
One
Two
Three
' );
@@ -350,7 +344,7 @@ public function test_limits_the_number_of_bookmarks() {
/**
* @ticket 56299
*
- * @covers ::seek
+ * @covers WP_HTML_Tag_Processor::seek
*/
public function test_limits_the_number_of_seek_calls() {
$p = new WP_HTML_Tag_Processor( '
One
Two
Three
' );
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
index 7f8ff0895f042..60a935196ee70 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor.php
@@ -30,7 +30,6 @@ public function test_get_tag_returns_null_before_finding_tags() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::get_tag
*/
public function test_get_tag_returns_null_when_not_in_open_tag() {
@@ -43,7 +42,6 @@ public function test_get_tag_returns_null_when_not_in_open_tag() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::get_tag
*/
public function test_get_tag_returns_open_tag_name() {
@@ -67,7 +65,6 @@ public function test_get_attribute_returns_null_before_finding_tags() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_get_attribute_returns_null_when_not_in_open_tag() {
@@ -80,7 +77,6 @@ public function test_get_attribute_returns_null_when_not_in_open_tag() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_get_attribute_returns_null_when_in_closing_tag() {
@@ -94,7 +90,6 @@ public function test_get_attribute_returns_null_when_in_closing_tag() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_get_attribute_returns_null_when_attribute_missing() {
@@ -107,7 +102,6 @@ public function test_get_attribute_returns_null_when_attribute_missing() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_get_attribute_returns_attribute_value() {
@@ -120,7 +114,6 @@ public function test_get_attribute_returns_attribute_value() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_get_attribute_returns_true_for_boolean_attribute() {
@@ -133,13 +126,12 @@ public function test_get_attribute_returns_true_for_boolean_attribute() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_get_attribute_returns_string_for_truthy_attributes() {
$p = new WP_HTML_Tag_Processor( '
Test
' );
- $this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' );
+ $this->assertTrue( $p->next_tag(), 'Querying an existing tag did not return true' );
$this->assertSame( 'enabled', $p->get_attribute( 'enabled' ), 'Accessing a boolean "enabled" attribute value did not return true' );
$this->assertSame( '1', $p->get_attribute( 'checked' ), 'Accessing a checked=1 attribute value did not return "1"' );
$this->assertSame( 'true', $p->get_attribute( 'hidden' ), 'Accessing a hidden="true" attribute value did not return "true"' );
@@ -148,7 +140,7 @@ public function test_get_attribute_returns_string_for_truthy_attributes() {
/**
* @ticket 56299
*
- * @covers ::get_attribute
+ * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_get_attribute_decodes_html_character_references() {
$p = new WP_HTML_Tag_Processor( '' );
@@ -160,13 +152,12 @@ public function test_get_attribute_decodes_html_character_references() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_attributes_parser_treats_slash_as_attribute_separator() {
$p = new WP_HTML_Tag_Processor( '
Test
' );
- $this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' );
+ $this->assertTrue( $p->next_tag(), 'Querying an existing tag did not return true' );
$this->assertTrue( $p->get_attribute( 'a' ), 'Accessing an existing attribute did not return true' );
$this->assertTrue( $p->get_attribute( 'b' ), 'Accessing an existing attribute did not return true' );
$this->assertTrue( $p->get_attribute( 'c' ), 'Accessing an existing attribute did not return true' );
@@ -177,7 +168,6 @@ public function test_attributes_parser_treats_slash_as_attribute_separator() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::get_attribute
*
* @dataProvider data_attribute_name_case_variants
@@ -194,7 +184,6 @@ public function test_get_attribute_is_case_insensitive_for_attributes_with_value
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::get_attribute
*
* @dataProvider data_attribute_name_case_variants
@@ -225,7 +214,6 @@ public function data_attribute_name_case_variants() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::remove_attribute
*/
public function test_remove_attribute_is_case_insensitive() {
@@ -239,7 +227,6 @@ public function test_remove_attribute_is_case_insensitive() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::set_attribute
*/
public function test_set_attribute_is_case_insensitive() {
@@ -314,8 +301,6 @@ public function test_get_attribute_names_with_prefix_returns_matching_attribute_
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
* @covers WP_HTML_Tag_Processor::get_attribute_names_with_prefix
*/
public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() {
@@ -397,7 +382,7 @@ public function test_get_updated_html_applies_the_updates_so_far_and_keeps_the_p
/**
* @ticket 56299
*
- * @covers ::get_updated_html
+ * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_get_updated_html_without_updating_any_attributes_returns_the_original_html() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -463,8 +448,7 @@ public function test_next_tag_should_stop_on_closers_only_when_requested() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
- * @covers WP_HTML_Tag_Processor::get_updated_html
+ * @covers WP_HTML_Tag_Processor::set_attribute
*/
public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -484,12 +468,10 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar
/**
* @ticket 56299
*
- * @covers ::is_tag_closer
- * @covers ::set_attribute
- * @covers ::remove_attribute
- * @covers ::add_class
- * @covers ::remove_class
- * @covers ::get_updated_html
+ * @covers WP_HTML_Tag_Processor::set_attribute
+ * @covers WP_HTML_Tag_Processor::remove_attribute
+ * @covers WP_HTML_Tag_Processor::add_class
+ * @covers WP_HTML_Tag_Processor::remove_class
*/
public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() {
$p = new WP_HTML_Tag_Processor( '' );
@@ -522,21 +504,21 @@ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() {
}
/**
- * Passing a double quote inside of an attribute values could lead to an XSS attack as follows:
+ * Passing a double quote inside of an attribute value could lead to an XSS attack as follows:
*
- *
+ * ```php
* $p = new WP_HTML_Tag_Processor( '' );
* $p->next_tag();
* $p->set_attribute('class', '" onclick="alert');
* echo $p;
* //
- *
+ * ```
*
* To prevent it, `set_attribute` calls `esc_attr()` on its given values.
*
- *
+ * ```php
*
- *
+ * ```
*
* @ticket 56299
*
@@ -591,8 +573,6 @@ public function data_set_attribute_prevents_xss() {
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -614,11 +594,9 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
* @covers WP_HTML_Tag_Processor::get_attribute
*/
- public function test_get_attribute_returns_updated_values_before_they_are_updated() {
+ public function test_get_attribute_returns_updated_values_before_they_are_applied() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
$p->next_tag();
$p->set_attribute( 'test-attribute', 'test-value' );
@@ -638,11 +616,9 @@ public function test_get_attribute_returns_updated_values_before_they_are_update
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::set_attribute
* @covers WP_HTML_Tag_Processor::get_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
- public function test_get_attribute_returns_updated_values_before_they_are_updated_with_different_name_casing() {
+ public function test_get_attribute_returns_updated_values_before_they_are_applied_with_different_name_casing() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
$p->next_tag();
$p->set_attribute( 'test-ATTribute', 'test-value' );
@@ -662,11 +638,9 @@ public function test_get_attribute_returns_updated_values_before_they_are_update
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
* @covers WP_HTML_Tag_Processor::get_attribute
*/
- public function test_get_attribute_reflects_added_class_names_before_they_are_updated() {
+ public function test_get_attribute_reflects_added_class_names_before_they_are_applied() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
$p->next_tag();
$p->add_class( 'my-class' );
@@ -686,11 +660,9 @@ public function test_get_attribute_reflects_added_class_names_before_they_are_up
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
* @covers WP_HTML_Tag_Processor::get_attribute
*/
- public function test_get_attribute_reflects_added_class_names_before_they_are_updated_and_retains_classes_from_previous_add_class_calls() {
+ public function test_get_attribute_reflects_added_class_names_before_they_are_applied_and_retains_classes_from_previous_add_class_calls() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
$p->next_tag();
$p->add_class( 'my-class' );
@@ -718,11 +690,9 @@ public function test_get_attribute_reflects_added_class_names_before_they_are_up
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::remove_attribute
* @covers WP_HTML_Tag_Processor::get_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
- public function test_get_attribute_reflects_removed_attribute_before_it_is_updated() {
+ public function test_get_attribute_reflects_removed_attribute_before_it_is_applied() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
$p->next_tag();
$p->remove_attribute( 'id' );
@@ -741,12 +711,9 @@ public function test_get_attribute_reflects_removed_attribute_before_it_is_updat
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::remove_attribute
* @covers WP_HTML_Tag_Processor::get_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
- public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_it_is_updated() {
+ public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_those_updates_are_applied() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
$p->next_tag();
$p->set_attribute( 'test-attribute', 'test-value' );
@@ -766,12 +733,9 @@ public function test_get_attribute_reflects_adding_and_then_removing_an_attribut
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::remove_attribute
* @covers WP_HTML_Tag_Processor::get_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
- public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_it_is_updated() {
+ public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_those_updates_are_applied() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
$p->next_tag();
$p->set_attribute( 'id', 'test-value' );
@@ -791,11 +755,9 @@ public function test_get_attribute_reflects_setting_and_then_removing_an_existin
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::remove_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
* @covers WP_HTML_Tag_Processor::get_attribute
*/
- public function test_get_attribute_reflects_removed_class_names_before_they_are_updated() {
+ public function test_get_attribute_reflects_removed_class_names_before_they_are_applied() {
$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
$p->next_tag();
$p->remove_class( 'with-border' );
@@ -815,12 +777,9 @@ public function test_get_attribute_reflects_removed_class_names_before_they_are_
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::remove_class
* @covers WP_HTML_Tag_Processor::get_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
- public function test_get_attribute_reflects_setting_and_then_removing_a_class_name_before_it_is_updated() {
+ public function test_get_attribute_reflects_setting_and_then_removing_a_class_name_before_those_updates_are_applied() {
$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
$p->next_tag();
$p->add_class( 'foo-class' );
@@ -841,12 +800,9 @@ public function test_get_attribute_reflects_setting_and_then_removing_a_class_na
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::remove_class
* @covers WP_HTML_Tag_Processor::get_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
- public function test_get_attribute_reflects_duplicating_and_then_removing_an_existing_class_name_before_it_is_updated() {
+ public function test_get_attribute_reflects_duplicating_and_then_removing_an_existing_class_name_before_those_updates_are_applied() {
$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
$p->next_tag();
$p->add_class( 'with-border' );
@@ -871,9 +827,8 @@ public function test_get_attribute_reflects_duplicating_and_then_removing_an_exi
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
- public function test_update_first_when_duplicated_attribute() {
+ public function test_update_first_attribute_when_duplicated_attributes_exist() {
$p = new WP_HTML_Tag_Processor( '
Text
' );
$p->next_tag();
$p->set_attribute( 'id', 'updated-id' );
@@ -885,7 +840,6 @@ public function test_update_first_when_duplicated_attribute() {
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -897,8 +851,8 @@ public function test_set_attribute_with_an_existing_attribute_name_updates_its_v
/**
* @ticket 56299
*
+ * @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -917,12 +871,13 @@ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the
* and introducing additional complexity to correctly handle this scenario doesn't seem to be worth it.
* Let's revisit if and when this becomes a problem.
*
- * This test is in place to confirm this behavior, while incorrect, is well-defined.
+ * This test is in place to confirm this behavior, which while incorrect, is well-defined.
+ * A later fix introduced to the Tag Processor should update this test to reflect the
+ * wanted and correct behavior.
*
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::remove_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_remove_first_when_duplicated_attribute() {
$p = new WP_HTML_Tag_Processor( '
Text
' );
@@ -936,7 +891,6 @@ public function test_remove_first_when_duplicated_attribute() {
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::remove_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -950,7 +904,6 @@ public function test_remove_attribute_with_an_existing_attribute_name_removes_it
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::remove_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -964,8 +917,6 @@ public function test_remove_attribute_with_a_non_existing_attribute_name_does_no
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_add_class_creates_a_class_attribute_when_there_is_none() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -988,8 +939,6 @@ public function test_add_class_creates_a_class_attribute_when_there_is_none() {
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_calling_add_class_twice_creates_a_class_attribute_with_both_class_names_when_there_is_no_class_attribute() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -1013,8 +962,6 @@ public function test_calling_add_class_twice_creates_a_class_attribute_with_both
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::remove_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_remove_class_does_not_change_the_markup_when_there_is_no_class_attribute() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -1036,8 +983,6 @@ public function test_remove_class_does_not_change_the_markup_when_there_is_no_cl
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_add_class_appends_class_names_to_the_existing_class_attribute_when_one_already_exists() {
$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
@@ -1061,8 +1006,6 @@ public function test_add_class_appends_class_names_to_the_existing_class_attribu
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::remove_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_remove_class_removes_a_single_class_from_the_class_attribute_when_one_exists() {
$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
@@ -1085,8 +1028,6 @@ public function test_remove_class_removes_a_single_class_from_the_class_attribut
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::remove_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_calling_remove_class_with_all_listed_class_names_removes_the_existing_class_attribute_from_the_markup() {
$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
@@ -1109,8 +1050,6 @@ public function test_calling_remove_class_with_all_listed_class_names_removes_th
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_add_class_does_not_add_duplicate_class_names() {
$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
@@ -1133,8 +1072,6 @@ public function test_add_class_does_not_add_duplicate_class_names() {
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_add_class_preserves_class_name_order_when_a_duplicate_class_name_is_added() {
$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
@@ -1157,8 +1094,6 @@ public function test_add_class_preserves_class_name_order_when_a_duplicate_class
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_add_class_when_there_is_a_class_attribute_with_excessive_whitespaces() {
$p = new WP_HTML_Tag_Processor(
@@ -1183,8 +1118,6 @@ public function test_add_class_when_there_is_a_class_attribute_with_excessive_wh
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::remove_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_remove_class_preserves_whitespaces_when_there_is_a_class_attribute_with_excessive_whitespaces() {
$p = new WP_HTML_Tag_Processor(
@@ -1209,8 +1142,6 @@ public function test_remove_class_preserves_whitespaces_when_there_is_a_class_at
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::remove_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_removing_all_classes_removes_the_existing_class_attribute_from_the_markup_even_when_excessive_whitespaces_are_present() {
$p = new WP_HTML_Tag_Processor(
@@ -1242,8 +1173,6 @@ public function test_removing_all_classes_removes_the_existing_class_attribute_f
*
* @covers WP_HTML_Tag_Processor::add_class
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
- * @covers WP_HTML_Tag_Processor::get_attribute
*/
public function test_set_attribute_takes_priority_over_add_class() {
$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
@@ -1291,8 +1220,6 @@ public function test_set_attribute_takes_priority_over_add_class() {
*
* @covers WP_HTML_Tag_Processor::add_class
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_set_attribute_takes_priority_over_add_class_even_before_updating() {
$p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES );
@@ -1329,10 +1256,7 @@ public function test_set_attribute_takes_priority_over_add_class_even_before_upd
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::set_attribute
* @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_add_class_overrides_boolean_class_attribute() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -1354,10 +1278,7 @@ public function test_add_class_overrides_boolean_class_attribute() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::set_attribute
* @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_add_class_overrides_boolean_class_attribute_even_before_updating() {
$p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE );
@@ -1479,9 +1400,7 @@ public function test_advanced_use_case() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::remove_attribute
- * @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
+ * @covers WP_HTML_Tag_Processor::next_tag
*/
public function test_correctly_parses_html_attributes_wrapped_in_single_quotation_marks() {
$p = new WP_HTML_Tag_Processor(
@@ -1511,9 +1430,8 @@ public function test_correctly_parses_html_attributes_wrapped_in_single_quotatio
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
- public function test_set_attribute_with_value_equals_to_true_adds_a_boolean_html_attribute_with_implicit_value() {
+ public function test_set_attribute_with_value_equal_to_true_adds_a_boolean_html_attribute_with_implicit_value() {
$p = new WP_HTML_Tag_Processor(
''
);
@@ -1529,7 +1447,6 @@ public function test_set_attribute_with_value_equals_to_true_adds_a_boolean_html
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_markup() {
$p = new WP_HTML_Tag_Processor(
@@ -1547,7 +1464,6 @@ public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_ma
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() {
$html_input = '';
@@ -1561,7 +1477,6 @@ public function test_setting_a_missing_attribute_to_false_does_not_change_the_ma
* @ticket 56299
*
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit_value_to_the_markup() {
$p = new WP_HTML_Tag_Processor(
@@ -1578,7 +1493,6 @@ public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::get_tag
* @covers WP_HTML_Tag_Processor::next_tag
*/
public function test_unclosed_script_tag_should_not_cause_an_infinite_loop() {
@@ -1752,7 +1666,6 @@ public function data_skips_contents_of_script_and_rcdata_regions() {
*
* @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_can_query_and_update_wrongly_nested_tags() {
$p = new WP_HTML_Tag_Processor(
@@ -1773,7 +1686,6 @@ public function test_can_query_and_update_wrongly_nested_tags() {
*
* @covers WP_HTML_Tag_Processor::next_tag
* @covers WP_HTML_Tag_Processor::remove_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_removing_specific_attributes_in_malformed_html() {
$p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED );
@@ -1788,9 +1700,7 @@ public function test_removing_specific_attributes_in_malformed_html() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_Tag
* @covers WP_HTML_Tag_Processor::set_attribute
- * @covers WP_HTML_Tag_Processor::get_updated_html
*/
public function test_updating_specific_attributes_in_malformed_html() {
$p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED );
@@ -1807,10 +1717,8 @@ public function test_updating_specific_attributes_in_malformed_html() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
- * @covers WP_HTML_Tag_Processor::set_attribute
* @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
+ * @covers WP_HTML_Tag_Processor::set_attribute
*
* @dataProvider data_updating_attributes
*
@@ -1872,10 +1780,8 @@ public function data_updating_attributes() {
/**
* @ticket 56299
*
- * @covers WP_HTML_Tag_Processor::next_tag
- * @covers WP_HTML_Tag_Processor::set_attribute
* @covers WP_HTML_Tag_Processor::add_class
- * @covers WP_HTML_Tag_Processor::get_updated_html
+ * @covers WP_HTML_Tag_Processor::set_attribute
*
* @dataProvider data_updating_attributes_in_malformed_html
*
From a5f2d9670f1a7d0f424629fb7af0638c9b59d5ef Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 15:44:56 -0700
Subject: [PATCH 29/36] Was doing it wrong w.r.t. doing_it_wrong
---
.../html-api/class-wp-html-tag-processor.php | 37 +++++++++++++------
1 file changed, 25 insertions(+), 12 deletions(-)
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index f29fbd80c9a01..b4d966819a049 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -715,9 +715,11 @@ public function set_bookmark( $name ) {
}
if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= self::MAX_BOOKMARKS ) {
- if ( WP_DEBUG ) {
- trigger_error( "Too many bookmarks: cannot create '{$name}'", E_USER_WARNING );
- }
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'Too many bookmarks: cannot create any more.' ),
+ '6.2.0'
+ );
return false;
}
@@ -1474,16 +1476,20 @@ private function apply_attributes_updates() {
*/
public function seek( $bookmark_name ) {
if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) {
- if ( WP_DEBUG ) {
- trigger_error( 'Invalid bookmark name', E_USER_WARNING );
- }
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'Unknown bookmark name.' ),
+ '6.2.0'
+ );
return false;
}
if ( ++$this->seek_count > self::MAX_SEEK_OPS ) {
- if ( WP_DEBUG ) {
- trigger_error( 'Too many calls to seek() - this can lead to performance issues.', E_USER_WARNING );
- }
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'Too many calls to seek() - this can lead to performance issues.' ),
+ '6.2.0'
+ );
return false;
}
@@ -1807,9 +1813,11 @@ public function set_attribute( $name, $value ) {
']~Ssu',
$name
) ) {
- if ( WP_DEBUG ) {
- trigger_error( 'Invalid attribute name', E_USER_WARNING );
- }
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'Invalid attribute name.' ),
+ '6.2.0'
+ );
return false;
}
@@ -2112,6 +2120,11 @@ private function parse_query( $query ) {
// If not using the string interface, an associative array is required.
if ( ! is_array( $query ) ) {
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'The query argument must be an array or a tag name.' ),
+ '6.2.0'
+ );
return;
}
From 5b1d47e7b03641581ba45ee20971778ea287e112 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 16:45:15 -0700
Subject: [PATCH 30/36] Add additional type check to avoid throwing
_doing_it_wrong error where none was thrown before.
---
src/wp-includes/html-api/class-wp-html-tag-processor.php | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index b4d966819a049..59fa288a44f75 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2118,6 +2118,11 @@ private function parse_query( $query ) {
return;
}
+ // An empty query parameter applies no restrictions on the search.
+ if ( null === $query ) {
+ return;
+ }
+
// If not using the string interface, an associative array is required.
if ( ! is_array( $query ) ) {
_doing_it_wrong(
From 3f9b274437f1d298addff628aa531646e2668e09 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 17:12:30 -0700
Subject: [PATCH 31/36] Lada la di
---
tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
index d78ca26e0a4d8..bd483bfee94ea 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
@@ -337,7 +337,7 @@ public function test_limits_the_number_of_bookmarks() {
$this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" );
}
- $this->expectWarning();
+ $this->expectNotice();
$this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." );
}
@@ -355,7 +355,7 @@ public function test_limits_the_number_of_seek_calls() {
$this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' );
}
- $this->expectWarning();
+ $this->expectNotice();
$this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." );
}
}
From 1b8c75c12cd4fa52d610d5ed638929c7912ba65c Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 17:23:27 -0700
Subject: [PATCH 32/36] Remove checks that _doing_it_wrong throws a notice
---
tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php | 2 --
1 file changed, 2 deletions(-)
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
index bd483bfee94ea..5623dd76685d2 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
@@ -337,7 +337,6 @@ public function test_limits_the_number_of_bookmarks() {
$this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" );
}
- $this->expectNotice();
$this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." );
}
@@ -355,7 +354,6 @@ public function test_limits_the_number_of_seek_calls() {
$this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' );
}
- $this->expectNotice();
$this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." );
}
}
From aad531083a2eb33a051b1c8782a6c75a6d51c8b3 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Wed, 1 Feb 2023 18:14:04 -0700
Subject: [PATCH 33/36] Set expected incorrect usage in tests.
---
tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
index 5623dd76685d2..bb40038bdf35f 100644
--- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
+++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php
@@ -337,6 +337,7 @@ public function test_limits_the_number_of_bookmarks() {
$this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" );
}
+ $this->setExpectedIncorrectUsage( 'WP_HTML_Tag_Processor::set_bookmark' );
$this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." );
}
@@ -354,6 +355,7 @@ public function test_limits_the_number_of_seek_calls() {
$this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' );
}
+ $this->setExpectedIncorrectUsage( 'WP_HTML_Tag_Processor::seek' );
$this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." );
}
}
From 4a438505ec984836dbc45950b77ed1536b95d611 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Thu, 2 Feb 2023 10:23:01 -0700
Subject: [PATCH 34/36] Docblock updates
---
.../html-api/class-wp-html-tag-processor.php | 29 ++++++++++---------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 59fa288a44f75..8247aa87908dd 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -27,9 +27,7 @@
*/
/**
- * Processes an input HTML document by applying a specified set
- * of patches to that input. Tokenizes HTML but does not fully
- * parse the input document.
+ * Modifies attributes in an HTML document for tags matching a query.
*
* ## Usage
*
@@ -707,7 +705,7 @@ public function next_tag( $query = null ) {
* @since 6.2.0
*
* @param string $name Identifies this particular bookmark.
- * @return bool
+ * @return bool Whether the bookmark was successfully created.
*/
public function set_bookmark( $name ) {
if ( null === $this->tag_name_starts_at ) {
@@ -739,7 +737,7 @@ public function set_bookmark( $name ) {
* performance overhead it requires.
*
* @param string $name Name of the bookmark to remove.
- * @return bool
+ * @return bool Whether the bookmark already existed before removal.
*/
public function release_bookmark( $name ) {
if ( ! array_key_exists( $name, $this->bookmarks ) ) {
@@ -753,8 +751,7 @@ public function release_bookmark( $name ) {
/**
- * Skips the contents of the title and textarea tags until an appropriate
- * tag closer is found.
+ * Skips contents of title and textarea tags.
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
* @since 6.2.0
@@ -830,9 +827,11 @@ private function skip_rcdata( $tag_name ) {
}
/**
- * Skips the contents of