From 15863a8e3670b3105bf5451dd119505b4c83e4cb Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 26 Jan 2023 15:48:28 -0700 Subject: [PATCH 01/36] Introduce HTML Tag Processor This commit pulls in the HTML Tag Processor from the Gutenbeg repository. The Tag Processor attempts to be an HTML5-spec-compliant parser that provides the ability in PHP to find specific HTML tags and then add, remove, or update attributes on that tag. It provides a safe and reliable way to modify the attribute on HTML tags. ```php // Add missing `rel` attribute to links. $p = new WP_HTML_Tag_Processor( $block_content ); if ( $p->next_tag( 'A' ) && empty( $p->get_attribute( 'rel' ) ) ) { $p->set_attribute( 'noopener nofollow' ); } return $p->get_updated_html(); ``` Introduced originally in WordPress/Gutenberg#42485 and developed within the Gutenberg repository, this HTML parsing system was built in order to address a persistent need (properly modifying HTML tag attributes) and was motivated after a sequence of block editor defects which stemmed from mismatches between actual HTML code and expectectations for HTML input running through existing naive string-search-based solutions. The Tag Processor is intended to operate fast enough to avoid being an obstacle on page render while using as little memory overhead as possible. It is practically a zero-memory-overhead system, and only allocates memory as changes to the input HTML document are enqueued, releasing that memory when flushing those changes to the document, moving on to find the next tag, or flushing its entire output via `get_updated_html()`. Rigor has been taken to ensure that the Tag Processor will not be consfused by unexpected or non-normative HTML input, including issues arising from quoting, from different syntax rules within ``, `<textarea>`, and `<script>` tags, from the appearance of rare but legitimate comment and XML-like regions, and from a variety of syntax abnormalities such as unbalanced tags, incomplete syntax, and overlapping tags. The Tag Processor is constrained to parsing an HTML document as a stream of tokens. It will not build an HTML tree or generate a DOM representation of a document. It is designed to start at the beginning of an HTML document and linearly scan through it, potentially modifying that document as it scans. It has no access to the markup inside or around tags and it has no ability to determine which tag openers and tag closers belong to each other, or determine the nesting depth of a given tag. It includes a primitive bookmarking system to remember tags it has previously visited. These bookmarks refer to specific tags, not to string offsets, and continue to point to the same place in the document as edits are applied. By asking the Tag Processor to seek to a given bookmark it's possible to back up and continue processsing again content that has already been traversed. Attribute values are sanitized with `esc_attr()` and rendered as double-quoted attributes. On read they are unescaped and unquoted. Authors wishing to rely on the Tag Processor therefore are free to pass around data as normal strings. Convenience methods for adding and removing CSS class names exist in order to remove the need to process the `class` attribute. ```php // Update heading block class names $p = new WP_HTML_Tag_Processor( $html ); while ( $p->next_tag() ) { switch ( $p->get_tag() ) { case 'H1': case 'H2': case 'H3': case 'H4': case 'H5': case 'H6': $p->remove_class( 'wp-heading' ); $p->add_class( 'wp-block-heading' ); break; } return $p->get_updated_html(); ``` The Tag Processor is intended to be a reliable low-level library for traversing HTML documents and higher-level APIs are to be built upon it. Immediately, and in Core Gutenberg blocks it is meant to replace HTML modification that currently relies on RegExp patterns and simpler string replacements. See the following for examples of such replacement: https://github.com/WordPress/gutenberg/pull/44600/commits/13157844ac9aa4307a7a1e3abc54d0f7b0c333cd https://github.com/WordPress/gutenberg/pull/45469/files#diff-dcd9e1f9b87ca63efe9f1e834b4d3048778d3eca41aa39c636f8b16a5bb452d2L46 https://github.com/WordPress/gutenberg/pull/46625 Co-Authored-By: Adam Zielinski <adam@adamziel.com> Co-Authored-By: Bernie Reiter <ockham@raz.or.at> Co-Authored-By: Grzegorz Ziolkowski <grzegorz@gziolo.pl> --- .../class-wp-html-attribute-token.php | 93 + src/wp-includes/class-wp-html-span.php | 56 + .../class-wp-html-tag-processor.php | 2047 +++++++++++++++++ .../class-wp-html-text-replacement.php | 63 + src/wp-includes/wp-html.php | 34 + src/wp-settings.php | 1 + .../html/wpHtmlTagProcessorBookmarks.php | 370 +++ .../tests/html/wpHtmlTagProcessorTest.php | 1908 +++++++++++++++ 8 files changed, 4572 insertions(+) create mode 100644 src/wp-includes/class-wp-html-attribute-token.php create mode 100644 src/wp-includes/class-wp-html-span.php create mode 100644 src/wp-includes/class-wp-html-tag-processor.php create mode 100644 src/wp-includes/class-wp-html-text-replacement.php create mode 100644 src/wp-includes/wp-html.php create mode 100644 tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php create mode 100644 tests/phpunit/tests/html/wpHtmlTagProcessorTest.php diff --git a/src/wp-includes/class-wp-html-attribute-token.php b/src/wp-includes/class-wp-html-attribute-token.php new file mode 100644 index 0000000000000..21147e30bfe1f --- /dev/null +++ b/src/wp-includes/class-wp-html-attribute-token.php @@ -0,0 +1,93 @@ +<?php +/** + * HTML Tag Processor: Attribute token structure class. + * + * @package WordPress + * @subpackage HTML + * @since 6.2.0 + */ + +if ( ! class_exists( 'WP_HTML_Attribute_Token' ) ) : + +/** + * Data structure for the attribute token that allows to drastically improve performance. + * + * This class is for internal usage of the WP_HTML_Tag_Processor class. + * + * @access private + * @since 6.2.0 + * + * @see WP_HTML_Tag_Processor + */ +class WP_HTML_Attribute_Token { + /** + * Attribute name. + * + * @since 6.2.0 + * @var string + */ + public $name; + + /** + * Attribute value. + * + * @since 6.2.0 + * @var int + */ + public $value_starts_at; + + /** + * How many bytes the value occupies in the input HTML. + * + * @since 6.2.0 + * @var int + */ + public $value_length; + + /** + * The string offset where the attribute name starts. + * + * @since 6.2.0 + * @var int + */ + public $start; + + /** + * The string offset after the attribute value or its name. + * + * @since 6.2.0 + * @var int + */ + public $end; + + /** + * Whether the attribute is a boolean attribute with value `true`. + * + * @since 6.2.0 + * @var bool + */ + public $is_true; + + /** + * Constructor. + * + * @since 6.2.0 + * + * @param string $name Attribute name. + * @param int $value_start Attribute value. + * @param int $value_length Number of bytes attribute value spans. + * @param int $start The string offset where the attribute name starts. + * @param int $end The string offset after the attribute value or its name. + * @param bool $is_true Whether the attribute is a boolean attribute with true value. + */ + public function __construct( $name, $value_start, $value_length, $start, $end, $is_true ) { + $this->name = $name; + $this->value_starts_at = $value_start; + $this->value_length = $value_length; + $this->start = $start; + $this->end = $end; + $this->is_true = $is_true; + } +} + +endif; diff --git a/src/wp-includes/class-wp-html-span.php b/src/wp-includes/class-wp-html-span.php new file mode 100644 index 0000000000000..376e391dc1c44 --- /dev/null +++ b/src/wp-includes/class-wp-html-span.php @@ -0,0 +1,56 @@ +<?php +/** + * HTML Span: Represents a textual span inside an HTML document. + * + * @package WordPress + * @subpackage HTML + * @since 6.2.0 + */ + +if ( ! class_exists( 'WP_HTML_Span' ) ) : + +/** + * Represents a textual span inside an HTML document. + * + * This is a two-tuple in disguise, used to avoid the memory + * overhead involved in using an array for the same purpose. + * + * This class is for internal usage of the WP_HTML_Tag_Processor class. + * + * @access private + * @since 6.2.0 + * + * @see WP_HTML_Tag_Processor + */ +class WP_HTML_Span { + /** + * Byte offset into document where span begins. + * + * @since 6.2.0 + * @var int + */ + public $start; + + /** + * Byte offset into document where span ends. + * + * @since 6.2.0 + * @var int + */ + public $end; + + /** + * Constructor. + * + * @since 6.2.0 + * + * @param int $start Byte offset into document where replacement span begins. + * @param int $end Byte offset into document where replacement span ends. + */ + public function __construct( $start, $end ) { + $this->start = $start; + $this->end = $end; + } +} + +endif; diff --git a/src/wp-includes/class-wp-html-tag-processor.php b/src/wp-includes/class-wp-html-tag-processor.php new file mode 100644 index 0000000000000..24e67a3adc83f --- /dev/null +++ b/src/wp-includes/class-wp-html-tag-processor.php @@ -0,0 +1,2047 @@ +<?php +/** + * Scans through an HTML document to find specific tags, then + * transforms those tags by adding, removing, or updating the + * values of the HTML attributes within that tag (opener). + * + * Does not fully parse HTML or _recurse_ into the HTML structure + * Instead this scans linearly through a document and only parses + * the HTML tag openers. + * + * @TODO: Unify language around "currently-opened tag." + * @TODO: Organize unit test cases into normative tests, edge-case tests, regression tests. + * @TODO: Clean up attribute token class after is_true addition + * @TODO: Prune whitespace when removing classes/attributes: e.g. "a b c" -> "c" not " c" + * @TODO: Skip over `/` in attributes area, split attribute names by `/` + * @TODO: Decode HTML references/entities in class names when matching. + * E.g. match having class `1<"2` needs to recognize `class="1<"2"`. + * @TODO: Decode character references in `get_attribute()` + * @TODO: Properly escape attribute value in `set_attribute()` + * @TODO: Add slow mode to escape character entities in CSS class names? + * (This requires a custom decoder since `html_entity_decode()` + * doesn't handle attribute character reference decoding rules. + * + * @package WordPress + * @subpackage HTML + * @since 6.2.0 + */ + +if ( ! class_exists( 'WP_HTML_Tag_Processor' ) ) : + +/** + * Processes an input HTML document by applying a specified set + * of patches to that input. Tokenizes HTML but does not fully + * parse the input document. + * + * ## Usage + * + * Use of this class requires three steps: + * + * 1. Create a new class instance with your input HTML document. + * 2. Find the tag(s) you are looking for. + * 3. Request changes to the attributes in those tag(s). + * + * Example: + * ```php + * $tags = new WP_HTML_Tag_Processor( $html ); + * if ( $tags->next_tag( [ 'tag_name' => 'option' ] ) ) { + * $tags->set_attribute( 'selected', true ); + * } + * ``` + * + * ### Finding tags + * + * The `next_tag()` function moves the internal cursor through + * your input HTML document until it finds a tag meeting any of + * the supplied restrictions in the optional query argument. If + * no argument is provided then it will find the next HTML tag, + * regardless of what kind it is. + * + * If you want to _find whatever the next tag is_: + * ```php + * $tags->next_tag(); + * ``` + * + * | Goal | Query | + * |-----------------------------------------------------------|----------------------------------------------------------------------------| + * | Find any tag. | `$tags->next_tag();` | + * | Find next image tag. | `$tags->next_tag( [ 'tag_name' => 'img' ] );` | + * | Find next tag containing the `fullwidth` CSS class. | `$tags->next_tag( [ 'class_name' => 'fullwidth' ] );` | + * | Find next image tag containing the `fullwidth` CSS class. | `$tags->next_tag( [ 'tag_name' => 'img', 'class_name' => 'fullwidth' ] );` | + * + * If a tag was found meeting your criteria then `next_tag()` + * will return `true` and you can proceed to modify it. If it + * returns `false`, however, it failed to find the tag and + * moved the cursor to the end of the file. + * + * Once the cursor reaches the end of the file the processor + * is done and if you want to reach an earlier tag you will + * need to recreate the processor and start over. The internal + * cursor can only proceed forward, never backing up. + * + * #### Custom queries + * + * Sometimes it's necessary to further inspect an HTML tag than + * the query syntax here permits. In these cases one may further + * inspect the search results using the read-only functions + * provided by the processor or external state or variables. + * + * Example: + * ```php + * // Paint up to the first five DIV or SPAN tags marked with the "jazzy" style. + * $remaining_count = 5; + * while ( $remaining_count > 0 && $tags->next_tag() ) { + * if ( + * ( 'DIV' === $tags->get_tag() || 'SPAN' === $tags->get_tag() ) && + * 'jazzy' === $tags->get_attribute( 'data-style' ) + * ) { + * $tags->add_class( 'theme-style-everest-jazz' ); + * $remaining_count--; + * } + * } + * ``` + * + * `get_attribute()` will return `null` if the attribute wasn't present + * on the tag when it was called. It may return `""` (the empty string) + * in cases where the attribute was present but its value was empty. + * For boolean attributes, those whose name is present but no value is + * given, it will return `true` (the only way to set `false` for an + * attribute is to remove it). + * + * ### Modifying HTML attributes for a found tag + * + * Once you've found the start of an opening tag you can modify + * any number of the attributes on that tag. You can set a new + * value for an attribute, remove the entire attribute, or do + * nothing and move on to the next opening tag. + * + * Example: + * ```php + * if ( $tags->next_tag( [ 'class' => 'wp-group-block' ] ) ) { + * $tags->set_attribute( 'title', 'This groups the contained content.' ); + * $tags->remove_attribute( 'data-test-id' ); + * } + * ``` + * + * If `set_attribute()` is called for an existing attribute it will + * overwrite the existing value. Similarly, calling `remove_attribute()` + * for a non-existing attribute has no effect on the document. Both + * of these methods are safe to call without knowing if a given attribute + * exists beforehand. + * + * ### Modifying CSS classes for a found tag + * + * The tag processor treats the `class` attribute as a special case. + * Because it's a common operation to add or remove CSS classes you + * can do so using this interface. + * + * As with attribute values, adding or removing CSS classes is a safe + * operation that doesn't require checking if the attribute or class + * exists before making changes. If removing the only class then the + * entire `class` attribute will be removed. + * + * Example: + * ```php + * // from `<span>Yippee!</span>` + * // to `<span class="is-active">Yippee!</span>` + * $tags->add_class( 'is-active' ); + * + * // from `<span class="excited">Yippee!</span>` + * // to `<span class="excited is-active">Yippee!</span>` + * $tags->add_class( 'is-active' ); + * + * // from `<span class="is-active heavy-accent">Yippee!</span>` + * // to `<span class="is-active heavy-accent">Yippee!</span>` + * $tags->add_class( 'is-active' ); + * + * // from `<input type="text" class="is-active rugby not-disabled" length="24">` + * // to `<input type="text" class="is-active not-disabled" length="24"> + * $tags->remove_class( 'rugby' ); + * + * // from `<input type="text" class="rugby" length="24">` + * // to `<input type="text" length="24"> + * $tags->remove_class( 'rugby' ); + * + * // from `<input type="text" length="24">` + * // to `<input type="text" length="24"> + * $tags->remove_class( 'rugby' ); + * ``` + * + * ## Design limitations + * + * @TODO: Expand this section + * + * - No nesting: cannot match open and close tag. + * - Class names are not decoded if they contain character references. + * + * @since 6.2.0 + */ +class WP_HTML_Tag_Processor { + /** + * The maximum number of bookmarks allowed to exist at + * any given time. + * + * @see set_bookmark(); + * @since 6.2.0 + * @var int + */ + const MAX_BOOKMARKS = 10; + + /** + * Maximum number of times seek() can be called. + * Prevents accidental infinite loops. + * + * @see seek() + * @since 6.2.0 + * @var int + */ + const MAX_SEEK_OPS = 1000; + + /** + * The HTML document to parse. + * + * @since 6.2.0 + * @var string + */ + private $html; + + /** + * The last query passed to next_tag(). + * + * @since 6.2.0 + * @var array|null + */ + private $last_query; + + /** + * The tag name this processor currently scans for. + * + * @since 6.2.0 + * @var string|null + */ + private $sought_tag_name; + + /** + * The CSS class name this processor currently scans for. + * + * @since 6.2.0 + * @var string|null + */ + private $sought_class_name; + + /** + * The match offset this processor currently scans for. + * + * @since 6.2.0 + * @var int|null + */ + private $sought_match_offset; + + /** + * Whether to visit tag closers, e.g. </div>, when walking an input document. + * + * @since 6.2.0 + * @var boolean + */ + private $stop_on_tag_closers; + + /** + * The updated HTML document. + * + * @since 6.2.0 + * @var string + */ + private $updated_html = ''; + + /** + * How many bytes from the original HTML document were already read. + * + * @since 6.2.0 + * @var int + */ + private $parsed_bytes = 0; + + /** + * How many bytes from the original HTML document were already treated + * with the requested replacements. + * + * @since 6.2.0 + * @var int + */ + private $updated_bytes = 0; + + /** + * Byte offset in input document where current tag name starts. + * + * Example: + * ``` + * <div id="test">... + * 01234 + * - tag name starts at 1 + * ``` + * + * @since 6.2.0 + * @var ?int + */ + private $tag_name_starts_at; + + /** + * Byte length of current tag name. + * + * Example: + * ``` + * <div id="test">... + * 01234 + * --- tag name length is 3 + * ``` + * + * @since 6.2.0 + * @var ?int + */ + private $tag_name_length; + + /** + * Byte offset in input document where current tag token ends. + * + * Example: + * ``` + * <div id="test">... + * 0 1 | + * 01234567890123456 + * --- tag name ends at 14 + * ``` + * + * @since 6.2.0 + * @var ?int + */ + private $tag_ends_at; + + /** + * Whether the current tag is an opening tag, e.g. <div>, or a closing tag, e.g. </div>. + * + * @var boolean + */ + private $is_closing_tag; + + /** + * Lazily-built index of attributes found within an HTML tag, keyed by the attribute name. + * + * Example: + * <code> + * // supposing the parser is working through this content + * // and stops after recognizing the `id` attribute + * // <div id="test-4" class=outline title="data:text/plain;base64=asdk3nk1j3fo8"> + * // ^ parsing will continue from this point + * $this->attributes = [ + * 'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 ) + * ]; + * + * // when picking up parsing again, or when asking to find the + * // `class` attribute we will continue and add to this array + * $this->attributes = [ + * 'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 ), + * 'class' => new WP_HTML_Attribute_Match( 'class', 'outline', 18, 32 ) + * ]; + * + * // Note that only the `class` attribute value is stored in the index. + * // That's because it is the only value used by this class at the moment. + * </code> + * + * @since 6.2.0 + * @var WP_HTML_Attribute_Token[] + */ + private $attributes = array(); + + /** + * Which class names to add or remove from a tag. + * + * These are tracked separately from attribute updates because they are + * semantically distinct, whereas this interface exists for the common + * case of adding and removing class names while other attributes are + * generally modified as with DOM `setAttribute` calls. + * + * When modifying an HTML document these will eventually be collapsed + * into a single lexical update to replace the `class` attribute. + * + * Example: + * <code> + * // Add the `wp-block-group` class, remove the `wp-group` class. + * $classname_updates = [ + * // Indexed by a comparable class name + * 'wp-block-group' => WP_HTML_Tag_Processor::ADD_CLASS, + * 'wp-group' => WP_HTML_Tag_Processor::REMOVE_CLASS + * ]; + * </code> + * + * @since 6.2.0 + * @var bool[] + */ + private $classname_updates = array(); + + /** + * Tracks a semantic location in the original HTML which + * shifts with updates as they are applied to the document. + * + * @since 6.2.0 + * @var WP_HTML_Span[] + */ + private $bookmarks = array(); + + const ADD_CLASS = true; + const REMOVE_CLASS = false; + const SKIP_CLASS = null; + + /** + * Lexical replacements to apply to input HTML document. + * + * HTML modifications collapse into lexical replacements in order to + * provide an efficient mechanism to update documents lazily and in + * order to support a variety of semantic modifications without + * building a complicated parsing machinery. That is, it's up to + * the calling class to generate the lexical modification from the + * semantic change requested. + * + * Example: + * <code> + * // Replace an attribute stored with a new value, indices + * // sourced from the lazily-parsed HTML recognizer. + * $start = $attributes['src']->start; + * $end = $attributes['src']->end; + * $modifications[] = new WP_HTML_Text_Replacement( $start, $end, get_the_post_thumbnail_url() ); + * + * // Correspondingly, something like this + * // will appear in the replacements array. + * $replacements = [ + * WP_HTML_Text_Replacement( 14, 28, 'https://my-site.my-domain/wp-content/uploads/2014/08/kittens.jpg' ) + * ]; + * </code> + * + * @since 6.2.0 + * @var WP_HTML_Text_Replacement[] + */ + private $lexical_updates = array(); + + /** + * Tracks how many times we've performed a `seek()` + * so that we can prevent accidental infinite loops. + * + * @see seek + * @since 6.2.0 + * @var int + */ + private $seek_count = 0; + + /** + * Constructor. + * + * @since 6.2.0 + * + * @param string $html HTML to process. + */ + public function __construct( $html ) { + $this->html = $html; + } + + /** + * Finds the next tag matching the $query. + * + * @since 6.2.0 + * + * @param array|string $query { + * Which tag name to find, having which class, etc. + * + * @type string|null $tag_name Which tag to find, or `null` for "any tag." + * @type int|null $match_offset Find the Nth tag matching all search criteria. + * 0 for "first" tag, 2 for "third," etc. + * Defaults to first tag. + * @type string|null $class_name Tag must contain this whole class name to match. + * } + * @return boolean Whether a tag was matched. + */ + public function next_tag( $query = null ) { + $this->parse_query( $query ); + $already_found = 0; + + do { + if ( $this->parsed_bytes >= strlen( $this->html ) ) { + return false; + } + + /* + * Unfortunately we can't try to search for only the tag name we want because that might + * lead us to skip over other tags and lose track of our place. So we need to search for + * _every_ tag and then check after we find one if it's the one we are looking for. + */ + if ( false === $this->parse_next_tag() ) { + $this->parsed_bytes = strlen( $this->html ); + + return false; + } + + while ( $this->parse_next_attribute() ) { + continue; + } + + $tag_ends_at = strpos( $this->html, '>', $this->parsed_bytes ); + if ( false === $tag_ends_at ) { + return false; + } + $this->tag_ends_at = $tag_ends_at; + $this->parsed_bytes = $tag_ends_at; + + if ( $this->matches() ) { + ++$already_found; + } + + // Avoid copying the tag name string when possible. + $t = $this->html[ $this->tag_name_starts_at ]; + if ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) { + $tag_name = $this->get_tag(); + + if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) { + $this->parsed_bytes = strlen( $this->html ); + return false; + } elseif ( + ( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) && + ! $this->skip_rcdata( $tag_name ) + ) { + $this->parsed_bytes = strlen( $this->html ); + return false; + } + } + } while ( $already_found < $this->sought_match_offset ); + + return true; + } + + + /** + * Sets a bookmark in the HTML document. + * + * Bookmarks represent specific places or tokens in the HTML + * document, such as a tag opener or closer. When applying + * edits to a document, such as setting an attribute, the + * text offsets of that token may shift; the bookmark is + * kept updated with those shifts and remains stable unless + * the entire span of text in which the token sits is removed. + * + * Release bookmarks when they are no longer needed. + * + * Example: + * ``` + * <main><h2>Surprising fact you may not know!</h2></main> + * ^ ^ + * \-|-- this `H2` opener bookmark tracks the token + * + * <main class="clickbait"><h2>Surprising fact you may no… + * ^ ^ + * \-|-- it shifts with edits + * ``` + * + * Bookmarks provide the ability to seek to a previously-scanned + * place in the HTML document. This avoids the need to re-scan + * the entire thing. + * + * Example: + * ``` + * <ul><li>One</li><li>Two</li><li>Three</li></ul> + * ^^^^ + * want to note this last item + * + * $p = new WP_HTML_Tag_Processor( $html ); + * $in_list = false; + * while ( $p->next_tag( [ 'tag_closers' => $in_list ? 'visit' : 'skip' ] ) ) { + * if ( 'UL' === $p->get_tag() ) { + * if ( $p->is_tag_closer() ) { + * $in_list = false; + * $p->set_bookmark( 'resume' ); + * if ( $p->seek( 'last-li' ) ) { + * $p->add_class( 'last-li' ); + * } + * $p->seek( 'resume' ); + * $p->release_bookmark( 'last-li' ); + * $p->release_bookmark( 'resume' ); + * } else { + * $in_list = true; + * } + * } + * + * if ( 'LI' === $p->get_tag() ) { + * $p->set_bookmark( 'last-li' ); + * } + * } + * ``` + * + * Because bookmarks maintain their position they don't + * expose any internal offsets for the HTML document + * and can't be used with normal string functions. + * + * Because bookmarks allocate memory and require processing + * for every applied update they are limited and require + * a name. They should not be created inside a loop. + * + * Bookmarks are a powerful tool to enable complicated behavior; + * consider double-checking that you need this tool if you are + * reaching for it, as inappropriate use could lead to broken + * HTML structure or unwanted processing overhead. + * + * @param string $name Identifies this particular bookmark. + * @return false|void + * @throws Exception Throws on invalid bookmark name if WP_DEBUG set. + */ + public function set_bookmark( $name ) { + if ( null === $this->tag_name_starts_at ) { + return false; + } + + if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= self::MAX_BOOKMARKS ) { + if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) { + throw new Exception( "Tried to jump to a non-existent HTML bookmark {$name}." ); + } + return false; + } + + $this->bookmarks[ $name ] = new WP_HTML_Span( + $this->tag_name_starts_at - 1, + $this->tag_ends_at + ); + + return true; + } + + + /** + * Removes a bookmark if you no longer need to use it. + * + * Releasing a bookmark frees up the small performance + * overhead they require, mainly in the form of compute + * costs when modifying the document. + * + * @param string $name Name of the bookmark to remove. + * @return bool + */ + public function release_bookmark( $name ) { + if ( ! array_key_exists( $name, $this->bookmarks ) ) { + return false; + } + + unset( $this->bookmarks[ $name ] ); + + return true; + } + + + /** + * Skips the contents of the title and textarea tags until an appropriate + * tag closer is found. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state + * @param string $tag_name – the lowercase tag name which will close the RCDATA region. + * @since 6.2.0 + */ + private function skip_rcdata( $tag_name ) { + $html = $this->html; + $doc_length = strlen( $html ); + $tag_length = strlen( $tag_name ); + + $at = $this->parsed_bytes; + + while ( false !== $at && $at < $doc_length ) { + $at = strpos( $this->html, '</', $at ); + + // If we have no possible tag closer then fail. + if ( false === $at || ( $at + $tag_length ) >= $doc_length ) { + $this->parsed_bytes = $doc_length; + return false; + } + + $at += 2; + + /* + * We have to find a case-insensitive match to the tag name. + * Note also that since tag names are limited to US-ASCII + * characters we can ignore any kind of Unicode normalizing + * forms when comparing. If we get a non-ASCII character it + * will never be a match. + */ + for ( $i = 0; $i < $tag_length; $i++ ) { + $tag_char = $tag_name[ $i ]; + $html_char = $html[ $at + $i ]; + + if ( $html_char !== $tag_char && strtoupper( $html_char ) !== $tag_char ) { + $at += $i; + continue 2; + } + } + + $at += $tag_length; + $this->parsed_bytes = $at; + + /* + * Ensure we terminate the tag name, otherwise we might, + * for example, accidentally match the sequence + * "</textarearug>" for "</textarea>". + */ + $c = $html[ $at ]; + if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) { + continue; + } + + while ( $this->parse_next_attribute() ) { + continue; + } + $at = $this->parsed_bytes; + if ( $at >= strlen( $this->html ) ) { + return false; + } + + if ( '>' === $html[ $at ] || '/' === $html[ $at ] ) { + ++$this->parsed_bytes; + return true; + } + } + + return false; + } + + /** + * Skips the contents of <script> tags. + * + * @since 6.2.0 + */ + private function skip_script_data() { + $state = 'unescaped'; + $html = $this->html; + $doc_length = strlen( $html ); + $at = $this->parsed_bytes; + + while ( false !== $at && $at < $doc_length ) { + $at += strcspn( $html, '-<', $at ); + + /* + * Regardless of the state we're in, a "-->" + * will break out of it and bring us back + * into the normal unescaped script mode. + */ + if ( + $at + 2 < $doc_length && + '-' === $html[ $at ] && + '-' === $html[ $at + 1 ] && + '>' === $html[ $at + 2 ] + ) { + $at += 3; + $state = 'unescaped'; + continue; + } + + // Everything past here has to start with "<". + if ( $at + 1 >= $doc_length || '<' !== $html[ $at++ ] ) { + continue; + } + + /* + * On the other hand, "<!--" only enters the + * escaped mode if we aren't already there. + * + * Inside the escaped modes it's ignored and + * shouldn't ever pull us out of double-escaped + * and back into escaped. + * + * We'll continue parsing past it regardless of + * our state though to avoid backtracking once + * we recognize the snippet. + */ + if ( + $at + 2 < $doc_length && + '!' === $html[ $at ] && + '-' === $html[ $at + 1 ] && + '-' === $html[ $at + 2 ] + ) { + $at += 3; + $state = 'unescaped' === $state ? 'escaped' : $state; + continue; + } + + if ( '/' === $html[ $at ] ) { + $is_closing = true; + ++$at; + } else { + $is_closing = false; + } + + /* + * At this point we're only examining state-changes based off of + * the <script> or </script> tags, so if we're not seeing the + * start of one of these tokens we can proceed to the next + * potential match in the text. + */ + if ( ! ( + $at + 6 < $doc_length && + ( 's' === $html[ $at ] || 'S' === $html[ $at ] ) && + ( 'c' === $html[ $at + 1 ] || 'C' === $html[ $at + 1 ] ) && + ( 'r' === $html[ $at + 2 ] || 'R' === $html[ $at + 2 ] ) && + ( 'i' === $html[ $at + 3 ] || 'I' === $html[ $at + 3 ] ) && + ( 'p' === $html[ $at + 4 ] || 'P' === $html[ $at + 4 ] ) && + ( 't' === $html[ $at + 5 ] || 'T' === $html[ $at + 5 ] ) + ) ) { + ++$at; + continue; + } + + /* + * We also have to make sure we terminate the script tag opener/closer + * to avoid making partial matches on strings like `<script123`. + */ + if ( $at + 6 >= $doc_length ) { + continue; + } + $at += 6; + $c = $html[ $at ]; + if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) { + ++$at; + continue; + } + + if ( 'escaped' === $state && ! $is_closing ) { + $state = 'double-escaped'; + continue; + } + + if ( 'double-escaped' === $state && $is_closing ) { + $state = 'escaped'; + continue; + } + + if ( $is_closing ) { + $this->parsed_bytes = $at; + if ( $this->parsed_bytes >= $doc_length ) { + return false; + } + + while ( $this->parse_next_attribute() ) { + continue; + } + + if ( '>' === $html[ $this->parsed_bytes ] ) { + ++$this->parsed_bytes; + return true; + } + } + + ++$at; + } + + return false; + } + + /** + * Parses the next tag. + * + * @since 6.2.0 + */ + private function parse_next_tag() { + $this->after_tag(); + + $html = $this->html; + $doc_length = strlen( $html ); + $at = $this->parsed_bytes; + + while ( false !== $at && $at < $doc_length ) { + $at = strpos( $html, '<', $at ); + if ( false === $at ) { + return false; + } + + if ( '/' === $this->html[ $at + 1 ] ) { + $this->is_closing_tag = true; + $at++; + } else { + $this->is_closing_tag = false; + } + + /* + * HTML tag names must start with [a-zA-Z] otherwise they are not tags. + * For example, "<3" is rendered as text, not a tag opener. This means + * if we have at least one letter following the "<" then we _do_ have + * a tag opener and can process it as such. This is more common than + * HTML comments, DOCTYPE tags, and other structure starting with "<" + * so it's good to check first for the presence of the tag. + * + * Reference: + * * https://html.spec.whatwg.org/multipage/parsing.html#data-state + * * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state + */ + $tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 ); + if ( $tag_name_prefix_length > 0 ) { + ++$at; + $this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length ); + $this->tag_name_starts_at = $at; + $this->parsed_bytes = $at + $this->tag_name_length; + return true; + } + + // If we didn't find a tag opener, and we can't be + // transitioning into different markup states, then + // we can abort because there aren't any more tags. + if ( $at + 1 >= strlen( $html ) ) { + return false; + } + + // <! transitions to markup declaration open state + // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state + if ( '!' === $html[ $at + 1 ] ) { + // <!-- transitions to a bogus comment state – we can skip to the nearest --> + // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state + if ( + strlen( $html ) > $at + 3 && + '-' === $html[ $at + 2 ] && + '-' === $html[ $at + 3 ] + ) { + $closer_at = strpos( $html, '-->', $at + 4 ); + if ( false === $closer_at ) { + return false; + } + + $at = $closer_at + 3; + continue; + } + + // <![CDATA[ transitions to CDATA section state – we can skip to the nearest ]]> + // The CDATA is case-sensitive. + // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state + if ( + strlen( $html ) > $at + 8 && + '[' === $html[ $at + 2 ] && + 'C' === $html[ $at + 3 ] && + 'D' === $html[ $at + 4 ] && + 'A' === $html[ $at + 5 ] && + 'T' === $html[ $at + 6 ] && + 'A' === $html[ $at + 7 ] && + '[' === $html[ $at + 8 ] + ) { + $closer_at = strpos( $html, ']]>', $at + 9 ); + if ( false === $closer_at ) { + return false; + } + + $at = $closer_at + 3; + continue; + } + + /* + * <!DOCTYPE transitions to DOCTYPE state – we can skip to the nearest > + * These are ASCII-case-insensitive. + * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state + */ + if ( + strlen( $html ) > $at + 8 && + 'D' === strtoupper( $html[ $at + 2 ] ) && + 'O' === strtoupper( $html[ $at + 3 ] ) && + 'C' === strtoupper( $html[ $at + 4 ] ) && + 'T' === strtoupper( $html[ $at + 5 ] ) && + 'Y' === strtoupper( $html[ $at + 6 ] ) && + 'P' === strtoupper( $html[ $at + 7 ] ) && + 'E' === strtoupper( $html[ $at + 8 ] ) + ) { + $closer_at = strpos( $html, '>', $at + 9 ); + if ( false === $closer_at ) { + return false; + } + + $at = $closer_at + 1; + continue; + } + + /* + * Anything else here is an incorrectly-opened comment and transitions + * to the bogus comment state - we can skip to the nearest >. + */ + $at = strpos( $html, '>', $at + 1 ); + continue; + } + + /* + * <? transitions to a bogus comment state – we can skip to the nearest > + * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state + */ + if ( '?' === $html[ $at + 1 ] ) { + $closer_at = strpos( $html, '>', $at + 2 ); + if ( false === $closer_at ) { + return false; + } + + $at = $closer_at + 1; + continue; + } + + ++$at; + } + + return false; + } + + /** + * Parses the next attribute. + * + * @since 6.2.0 + */ + private function parse_next_attribute() { + // Skip whitespace and slashes. + $this->parsed_bytes += strspn( $this->html, " \t\f\r\n/", $this->parsed_bytes ); + if ( $this->parsed_bytes >= strlen( $this->html ) ) { + return false; + } + + /* + * Treat the equal sign ("=") as a part of the attribute name if it is the + * first encountered byte: + * https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state + */ + $name_length = '=' === $this->html[ $this->parsed_bytes ] + ? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->parsed_bytes + 1 ) + : strcspn( $this->html, "=/> \t\f\r\n", $this->parsed_bytes ); + + // No attribute, just tag closer. + if ( 0 === $name_length || $this->parsed_bytes + $name_length >= strlen( $this->html ) ) { + return false; + } + + $attribute_start = $this->parsed_bytes; + $attribute_name = substr( $this->html, $attribute_start, $name_length ); + $this->parsed_bytes += $name_length; + if ( $this->parsed_bytes >= strlen( $this->html ) ) { + return false; + } + + $this->skip_whitespace(); + if ( $this->parsed_bytes >= strlen( $this->html ) ) { + return false; + } + + $has_value = '=' === $this->html[ $this->parsed_bytes ]; + if ( $has_value ) { + ++$this->parsed_bytes; + $this->skip_whitespace(); + if ( $this->parsed_bytes >= strlen( $this->html ) ) { + return false; + } + + switch ( $this->html[ $this->parsed_bytes ] ) { + case "'": + case '"': + $quote = $this->html[ $this->parsed_bytes ]; + $value_start = $this->parsed_bytes + 1; + $value_length = strcspn( $this->html, $quote, $value_start ); + $attribute_end = $value_start + $value_length + 1; + $this->parsed_bytes = $attribute_end; + break; + + default: + $value_start = $this->parsed_bytes; + $value_length = strcspn( $this->html, "> \t\f\r\n", $value_start ); + $attribute_end = $value_start + $value_length; + $this->parsed_bytes = $attribute_end; + } + } else { + $value_start = $this->parsed_bytes; + $value_length = 0; + $attribute_end = $attribute_start + $name_length; + } + + if ( $attribute_end >= strlen( $this->html ) ) { + return false; + } + + if ( $this->is_closing_tag ) { + return true; + } + + /* + * > There must never be two or more attributes on + * > the same start tag whose names are an ASCII + * > case-insensitive match for each other. + * - HTML 5 spec + * + * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive + */ + $comparable_name = strtolower( $attribute_name ); + + // If an attribute is listed many times, only use the first declaration and ignore the rest. + if ( ! array_key_exists( $comparable_name, $this->attributes ) ) { + $this->attributes[ $comparable_name ] = new WP_HTML_Attribute_Token( + $attribute_name, + $value_start, + $value_length, + $attribute_start, + $attribute_end, + ! $has_value + ); + } + + return $this->attributes[ $comparable_name ]; + } + + /** + * Move the pointer past any immediate successive whitespace. + * + * @since 6.2.0 + * + * @return void + */ + private function skip_whitespace() { + $this->parsed_bytes += strspn( $this->html, " \t\f\r\n", $this->parsed_bytes ); + } + + /** + * Applies attribute updates and cleans up once a tag is fully parsed. + * + * @since 6.2.0 + * + * @return void + */ + private function after_tag() { + $this->class_name_updates_to_attributes_updates(); + $this->apply_attributes_updates(); + $this->tag_name_starts_at = null; + $this->tag_name_length = null; + $this->tag_ends_at = null; + $this->is_closing_tag = null; + $this->attributes = array(); + } + + /** + * Converts class name updates into tag attributes updates + * (they are accumulated in different data formats for performance). + * + * @return void + * @since 6.2.0 + * + * @see $classname_updates + * @see $lexical_updates + */ + private function class_name_updates_to_attributes_updates() { + if ( count( $this->classname_updates ) === 0 ) { + return; + } + + $existing_class = $this->get_enqueued_attribute_value( 'class' ); + if ( null === $existing_class || true === $existing_class ) { + $existing_class = ''; + } + + if ( false === $existing_class && isset( $this->attributes['class'] ) ) { + $existing_class = substr( + $this->html, + $this->attributes['class']->value_starts_at, + $this->attributes['class']->value_length + ); + } + + if ( false === $existing_class ) { + $existing_class = ''; + } + + /** + * Updated "class" attribute value. + * + * This is incrementally built as we scan through the existing class + * attribute, omitting removed classes as we do so, and then appending + * added classes at the end. Only when we're done processing will the + * value contain the final new value. + + * @var string + */ + $class = ''; + + /** + * Tracks the cursor position in the existing class + * attribute value where we're currently parsing. + * + * @var integer + */ + $at = 0; + + /** + * Indicates if we have made any actual modifications to the existing + * class attribute value, used to short-circuit string copying. + * + * It's possible that we are intending to remove certain classes and + * add others in such a way that we don't modify the existing value + * because calls to `add_class()` and `remove_class()` occur + * independent of the input values sent to the WP_HTML_Tag_Processor. That is, we + * might call `remove_class()` for a class that isn't already present + * and we might call `add_class()` for one that is, in which case we + * wouldn't need to break apart the string and rebuild it. + * + * This flag is set upon the first change that requires a string update. + * + * @var boolean + */ + $modified = false; + + // Remove unwanted classes by only copying the new ones. + $existing_class_length = strlen( $existing_class ); + while ( $at < $existing_class_length ) { + // Skip to the first non-whitespace character. + $ws_at = $at; + $ws_length = strspn( $existing_class, " \t\f\r\n", $ws_at ); + $at += $ws_length; + + // Capture the class name – it's everything until the next whitespace. + $name_length = strcspn( $existing_class, " \t\f\r\n", $at ); + if ( 0 === $name_length ) { + // We're done, no more class names. + break; + } + + $name = substr( $existing_class, $at, $name_length ); + $at += $name_length; + + // If this class is marked for removal, start processing the next one. + $remove_class = ( + isset( $this->classname_updates[ $name ] ) && + self::REMOVE_CLASS === $this->classname_updates[ $name ] + ); + + // Once we've seen a class, we should never add it again. + if ( ! $remove_class ) { + $this->classname_updates[ $name ] = self::SKIP_CLASS; + } + + if ( $remove_class ) { + $modified = true; + continue; + } + + /* + * Otherwise, append it to the new "class" attribute value. + * + * By preserving the existing whitespace instead of only adding a single + * space (which is a valid transformation we can make) we'll introduce + * fewer changes to the HTML content and hopefully make comparing + * before/after easier for people trying to debug the modified output. + */ + $class .= substr( $existing_class, $ws_at, $ws_length ); + $class .= $name; + } + + // Add new classes by appending the ones we haven't already seen. + foreach ( $this->classname_updates as $name => $operation ) { + if ( self::ADD_CLASS === $operation ) { + $modified = true; + + $class .= strlen( $class ) > 0 ? ' ' : ''; + $class .= $name; + } + } + + $this->classname_updates = array(); + if ( ! $modified ) { + return; + } + + if ( strlen( $class ) > 0 ) { + $this->set_attribute( 'class', $class ); + } else { + $this->remove_attribute( 'class' ); + } + } + + /** + * Applies updates to attributes. + * + * @since 6.2.0 + */ + private function apply_attributes_updates() { + if ( ! count( $this->lexical_updates ) ) { + return; + } + + /* + * Attribute updates can be enqueued in any order but as we + * progress through the document to replace them we have to + * make our replacements in the order in which they are found + * in that document. + * + * Sorting the updates ensures we don't make our replacements + * out of order, which could otherwise lead to mangled output, + * partially-duplicate attributes, and overwritten attributes. + */ + usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) ); + + foreach ( $this->lexical_updates as $diff ) { + $this->updated_html .= substr( $this->html, $this->updated_bytes, $diff->start - $this->updated_bytes ); + $this->updated_html .= $diff->text; + $this->updated_bytes = $diff->end; + } + + foreach ( $this->bookmarks as $bookmark ) { + /* + * As we loop through $this->lexical_updates, we keep comparing + * $bookmark->start and $bookmark->end to $diff->start. We can't + * change it and still expect the correct result, so let's accumulate + * the deltas separately and apply them all at once after the loop. + */ + $head_delta = 0; + $tail_delta = 0; + + foreach ( $this->lexical_updates as $diff ) { + $update_head = $bookmark->start >= $diff->start; + $update_tail = $bookmark->end >= $diff->start; + + if ( ! $update_head && ! $update_tail ) { + break; + } + + $delta = strlen( $diff->text ) - ( $diff->end - $diff->start ); + + if ( $update_head ) { + $head_delta += $delta; + } + + if ( $update_tail ) { + $tail_delta += $delta; + } + } + + $bookmark->start += $head_delta; + $bookmark->end += $tail_delta; + } + + $this->lexical_updates = array(); + } + + /** + * Move the current pointer in the Tag Processor to a given bookmark's location. + * + * In order to prevent accidental infinite loops, there's a + * maximum limit on the number of times seek() can be called. + * + * @param string $bookmark_name Jump to the place in the document identified by this bookmark name. + * @return bool + * @throws Exception Throws on invalid bookmark name if WP_DEBUG set. + */ + public function seek( $bookmark_name ) { + if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) { + if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) { + throw new Exception( 'Invalid bookmark name' ); + } + return false; + } + + if ( ++$this->seek_count > self::MAX_SEEK_OPS ) { + if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) { + throw new Exception( 'Too many calls to seek() - this can lead to performance issues.' ); + } + return false; + } + + // Flush out any pending updates to the document. + $this->get_updated_html(); + + // Point this tag processor before the sought tag opener and consume it. + $this->parsed_bytes = $this->bookmarks[ $bookmark_name ]->start; + $this->updated_bytes = $this->parsed_bytes; + $this->updated_html = substr( $this->html, 0, $this->updated_bytes ); + return $this->next_tag(); + } + + /** + * Compare two WP_HTML_Text_Replacement objects. + * + * @since 6.2.0 + * + * @param WP_HTML_Text_Replacement $a First attribute update. + * @param WP_HTML_Text_Replacement $b Second attribute update. + * @return integer + */ + private static function sort_start_ascending( $a, $b ) { + $by_start = $a->start - $b->start; + if ( 0 !== $by_start ) { + return $by_start; + } + + $by_text = isset( $a->text, $b->text ) ? strcmp( $a->text, $b->text ) : 0; + if ( 0 !== $by_text ) { + return $by_text; + } + + /* + * We shouldn't ever get here because it would imply + * that we have two identical updates, or that we're + * trying to replace the same input text twice. Still + * we'll handle this sort to preserve determinism, + * which might come in handy when debugging. + */ + return $a->end - $b->end; + } + + /** + * Return the enqueued value for a given attribute, if one exists. + * + * Enqueued updates can take different data types: + * - If an update is enqueued and is boolean, the return will be `true` + * - If an update is otherwise enqueued, the return will be the string value of that update. + * - If an attribute is enqueued to be removed, the return will be `null` to indicate that. + * - If no updates are enqueued, the return will be `false` to differentiate from "removed." + * + * @since 6.2.0 + * + * @param string $comparable_name The attribute name in its comparable form. + * @return string|boolean|null Value of enqueued update if present, otherwise false. + */ + private function get_enqueued_attribute_value( $comparable_name ) { + if ( ! isset( $this->lexical_updates[ $comparable_name ] ) ) { + return false; + } + + $enqueued_text = $this->lexical_updates[ $comparable_name ]->text; + + // Removed attributes erase the entire span. + if ( '' === $enqueued_text ) { + return null; + } + + /* + * Boolean attribute updates are just the attribute name without a corresponding value. + * + * This value might differ from the given comparable name in that there could be leading + * or trailing whitespace, and that the casing follows the name given in `set_attribute`. + * + * Example: + * ``` + * $p->set_attribute( 'data-TEST-id', 'update' ); + * 'update' === $p->get_enqueued_attribute_value( 'data-test-id' ); + * ``` + * + * Here we detect this based on the absence of the `=`, which _must_ exist in any + * attribute containing a value, e.g. `<input type="text" enabled />`. + * ¹ ² + * 1. Attribute with a string value. + * 2. Boolean attribute whose value is `true`. + */ + $equals_at = strpos( $enqueued_text, '=' ); + if ( false === $equals_at ) { + return true; + } + + /* + * Finally, a normal update's value will appear after the `=` and + * be double-quoted, as performed incidentally by `set_attribute`. + * + * e.g. `type="text"` + * ¹² ³ + * 1. Equals is here. + * 2. Double-quoting starts one after the equals sign. + * 3. Double-quoting ends at the last character in the update. + */ + $enqueued_value = substr( $enqueued_text, $equals_at + 2, -1 ); + return html_entity_decode( $enqueued_value ); + } + + /** + * Returns the value of the parsed attribute in the currently-opened tag. + * + * Example: + * <code> + * $p = new WP_HTML_Tag_Processor( '<div enabled class="test" data-test-id="14">Test</div>' ); + * $p->next_tag( [ 'class_name' => 'test' ] ) === true; + * $p->get_attribute( 'data-test-id' ) === '14'; + * $p->get_attribute( 'enabled' ) === true; + * $p->get_attribute( 'aria-label' ) === null; + * + * $p->next_tag( [] ) === false; + * $p->get_attribute( 'class' ) === null; + * </code> + * + * @since 6.2.0 + * + * @param string $name Name of attribute whose value is requested. + * @return string|true|null Value of attribute or `null` if not available. + * Boolean attributes return `true`. + */ + public function get_attribute( $name ) { + if ( null === $this->tag_name_starts_at ) { + return null; + } + + $comparable = strtolower( $name ); + + /* + * For every attribute other than `class` we can perform a quick check if there's an + * enqueued lexical update whose value we should prefer over what's in the input HTML. + * + * The `class` attribute is special though because we expose the helpers `add_class` + * and `remove_class` which form a builder for the `class` attribute, so we have to + * additionally check if there are any enqueued class changes. If there are, we need + * to first flush them out so can report the full string value of the attribute. + */ + if ( 'class' === $name ) { + $this->class_name_updates_to_attributes_updates(); + } + + // If we have an update for this attribute, return the updated value. + $enqueued_value = $this->get_enqueued_attribute_value( $comparable ); + if ( false !== $enqueued_value ) { + return $enqueued_value; + } + + if ( ! isset( $this->attributes[ $comparable ] ) ) { + return null; + } + + $attribute = $this->attributes[ $comparable ]; + + /* + * This flag distinguishes an attribute with no value + * from an attribute with an empty string value. For + * unquoted attributes this could look very similar. + * It refers to whether an `=` follows the name. + * + * e.g. <div boolean-attribute empty-attribute=></div> + * ¹ ² + * 1. Attribute `boolean-attribute` is `true`. + * 2. Attribute `empty-attribute` is `""`. + */ + if ( true === $attribute->is_true ) { + return true; + } + + $raw_value = substr( $this->html, $attribute->value_starts_at, $attribute->value_length ); + + return html_entity_decode( $raw_value ); + } + + /** + * Returns the lowercase names of all attributes matching a given prefix in the currently-opened tag. + * + * Note that matching is case-insensitive. This is in accordance with the spec: + * + * > There must never be two or more attributes on + * > the same start tag whose names are an ASCII + * > case-insensitive match for each other. + * - HTML 5 spec + * + * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive + * + * Example: + * <code> + * $p = new WP_HTML_Tag_Processor( '<div data-ENABLED class="test" DATA-test-id="14">Test</div>' ); + * $p->next_tag( [ 'class_name' => 'test' ] ) === true; + * $p->get_attribute_names_with_prefix( 'data-' ) === array( 'data-enabled', 'data-test-id' ); + * + * $p->next_tag( [] ) === false; + * $p->get_attribute_names_with_prefix( 'data-' ) === null; + * </code> + * + * @since 6.2.0 + * + * @param string $prefix Prefix of requested attribute names. + * @return array|null List of attribute names, or `null` if not at a tag. + */ + function get_attribute_names_with_prefix( $prefix ) { + if ( $this->is_closing_tag || null === $this->tag_name_starts_at ) { + return null; + } + + $comparable = strtolower( $prefix ); + + $matches = array(); + foreach ( array_keys( $this->attributes ) as $attr_name ) { + if ( str_starts_with( $attr_name, $comparable ) ) { + $matches[] = $attr_name; + } + } + return $matches; + } + + /** + * Returns the lowercase name of the currently-opened tag. + * + * Example: + * <code> + * $p = new WP_HTML_Tag_Processor( '<DIV CLASS="test">Test</DIV>' ); + * $p->next_tag( [] ) === true; + * $p->get_tag() === 'DIV'; + * + * $p->next_tag( [] ) === false; + * $p->get_tag() === null; + * </code> + * + * @since 6.2.0 + * + * @return string|null Name of current tag in input HTML, or `null` if none currently open. + */ + public function get_tag() { + if ( null === $this->tag_name_starts_at ) { + return null; + } + + $tag_name = substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length ); + + return strtoupper( $tag_name ); + } + + /** + * Indicates if the current tag token is a tag closer. + * + * Example: + * <code> + * $p = new WP_HTML_Tag_Processor( '<div></div>' ); + * $p->next_tag( [ 'tag_name' => 'div', 'tag_closers' => 'visit' ] ); + * $p->is_tag_closer() === false; + * + * $p->next_tag( [ 'tag_name' => 'div', 'tag_closers' => 'visit' ] ); + * $p->is_tag_closer() === true; + * </code> + * + * @return bool + */ + public function is_tag_closer() { + return $this->is_closing_tag; + } + + /** + * Updates or creates a new attribute on the currently matched tag with the value passed. + * + * For boolean attributes special handling is provided: + * - When `true` is passed as the value, then only the attribute name is added to the tag. + * - When `false` is passed, the attribute gets removed if it existed before. + * + * For string attributes, the value is escaped using the `esc_attr` function. + * + * @since 6.2.0 + * + * @param string $name The attribute name to target. + * @param string|boolean $value The new attribute value. + * @throws Exception When WP_DEBUG is true and the attribute name is invalid. + */ + public function set_attribute( $name, $value ) { + if ( $this->is_closing_tag || null === $this->tag_name_starts_at ) { + return false; + } + + /* + * Verify that the attribute name is allowable. In WP_DEBUG + * environments we want to crash quickly to alert developers + * of typos and issues; but in production we don't want to + * interrupt a normal page view, so we'll silently avoid + * updating the attribute in those cases. + * + * Of note, we're disallowing more characters than are strictly + * forbidden in HTML5. This is to prevent additional security + * risks deeper in the WordPress and plugin stack. Specifically + * we reject the less-than (<) greater-than (>) and ampersand (&). + * + * The use of a PCRE match allows us to look for specific Unicode + * code points without writing a UTF-8 decoder. Whereas scanning + * for one-byte characters is trivial (with `strcspn`), scanning + * for the longer byte sequences would be more complicated, and + * this shouldn't be in the hot path for execution so we can + * compromise on the efficiency at this point. + * + * @see https://html.spec.whatwg.org/#attributes-2 + */ + if ( preg_match( + '~[' . + // Syntax-like characters. + '"\'>&</ =' . + // Control characters. + '\x{00}-\x{1F}' . + // HTML noncharacters. + '\x{FDD0}-\x{FDEF}' . + '\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}' . + '\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}' . + '\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}' . + '\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}' . + '\x{10FFFE}\x{10FFFF}' . + ']~Ssu', + $name + ) ) { + if ( WP_DEBUG ) { + throw new Exception( 'Invalid attribute name' ); + } + + return; + } + + /* + * > The values "true" and "false" are not allowed on boolean attributes. + * > To represent a false value, the attribute has to be omitted altogether. + * - HTML5 spec, https://html.spec.whatwg.org/#boolean-attributes + */ + if ( false === $value ) { + $this->remove_attribute( $name ); + return; + } + + if ( true === $value ) { + $updated_attribute = $name; + } else { + $escaped_new_value = esc_attr( $value ); + $updated_attribute = "{$name}=\"{$escaped_new_value}\""; + } + + /* + * > There must never be two or more attributes on + * > the same start tag whose names are an ASCII + * > case-insensitive match for each other. + * - HTML 5 spec + * + * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive + */ + $comparable_name = strtolower( $name ); + + if ( isset( $this->attributes[ $comparable_name ] ) ) { + /* + * Update an existing attribute. + * + * Example – set attribute id to "new" in <div id="initial_id" />: + * <div id="initial_id"/> + * ^-------------^ + * start end + * replacement: `id="new"` + * + * Result: <div id="new"/> + */ + $existing_attribute = $this->attributes[ $comparable_name ]; + $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement( + $existing_attribute->start, + $existing_attribute->end, + $updated_attribute + ); + } else { + /* + * Create a new attribute at the tag's name end. + * + * Example – add attribute id="new" to <div />: + * <div/> + * ^ + * start and end + * replacement: ` id="new"` + * + * Result: <div id="new"/> + */ + $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement( + $this->tag_name_starts_at + $this->tag_name_length, + $this->tag_name_starts_at + $this->tag_name_length, + ' ' . $updated_attribute + ); + } + + /* + * Any calls to update the `class` attribute directly should wipe out any + * enqueued class changes from `add_class` and `remove_class`. + */ + if ( 'class' === $comparable_name && ! empty( $this->classname_updates ) ) { + $this->classname_updates = array(); + } + } + + /** + * Removes an attribute of the currently matched tag. + * + * @since 6.2.0 + * + * @param string $name The attribute name to remove. + */ + public function remove_attribute( $name ) { + if ( $this->is_closing_tag ) { + return false; + } + + /* + * > There must never be two or more attributes on + * > the same start tag whose names are an ASCII + * > case-insensitive match for each other. + * - HTML 5 spec + * + * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive + */ + $name = strtolower( $name ); + + /* + * Any calls to update the `class` attribute directly should wipe out any + * enqueued class changes from `add_class` and `remove_class`. + */ + if ( 'class' === $name && count( $this->classname_updates ) !== 0 ) { + $this->classname_updates = array(); + } + + // If we updated an attribute we didn't originally have, remove the enqueued update and move on. + if ( ! isset( $this->attributes[ $name ] ) ) { + if ( isset( $this->lexical_updates[ $name ] ) ) { + unset( $this->lexical_updates[ $name ] ); + } + return false; + } + + /* + * Removes an existing tag attribute. + * + * Example – remove the attribute id from <div id="main"/>: + * <div id="initial_id"/> + * ^-------------^ + * start end + * replacement: `` + * + * Result: <div /> + */ + $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement( + $this->attributes[ $name ]->start, + $this->attributes[ $name ]->end, + '' + ); + } + + /** + * Adds a new class name to the currently matched tag. + * + * @since 6.2.0 + * + * @param string $class_name The class name to add. + */ + public function add_class( $class_name ) { + if ( $this->is_closing_tag ) { + return false; + } + + if ( null !== $this->tag_name_starts_at ) { + $this->classname_updates[ $class_name ] = self::ADD_CLASS; + } + } + + /** + * Removes a class name from the currently matched tag. + * + * @since 6.2.0 + * + * @param string $class_name The class name to remove. + */ + public function remove_class( $class_name ) { + if ( $this->is_closing_tag ) { + return false; + } + + if ( null !== $this->tag_name_starts_at ) { + $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; + } + } + + /** + * Returns the string representation of the HTML Tag Processor. + * + * @since 6.2.0 + * @see get_updated_html + * + * @return string The processed HTML. + */ + public function __toString() { + return $this->get_updated_html(); + } + + /** + * Returns the string representation of the HTML Tag Processor. + * + * @since 6.2.0 + * + * @return string The processed HTML. + */ + public function get_updated_html() { + // Short-circuit if there are no new updates to apply. + if ( ! count( $this->classname_updates ) && ! count( $this->lexical_updates ) ) { + return $this->updated_html . substr( $this->html, $this->updated_bytes ); + } + + // Otherwise: apply the updates, rewind before the current tag, and parse it again. + $delta_between_updated_html_end_and_current_tag_end = substr( + $this->html, + $this->updated_bytes, + $this->tag_name_starts_at + $this->tag_name_length - $this->updated_bytes + ); + $updated_html_up_to_current_tag_name_end = $this->updated_html . $delta_between_updated_html_end_and_current_tag_end; + + // 1. Apply the attributes updates to the original HTML + $this->class_name_updates_to_attributes_updates(); + $this->apply_attributes_updates(); + + // 2. Replace the original HTML with the updated HTML + $this->html = $this->updated_html . substr( $this->html, $this->updated_bytes ); + $this->updated_html = $updated_html_up_to_current_tag_name_end; + $this->updated_bytes = strlen( $this->updated_html ); + + // 3. Point this tag processor at the original tag opener and consume it + + /* + * When we get here we're at the end of the tag name, and we want to rewind to before it + * <p>Previous HTML<em>More HTML</em></p> + * ^ | back up by the length of the tag name plus the opening < + * \<-/ back up by strlen("em") + 1 ==> 3 + */ + $this->parsed_bytes = strlen( $updated_html_up_to_current_tag_name_end ) - $this->tag_name_length - 1; + $this->next_tag(); + + return $this->html; + } + + /** + * Prepares tag search criteria from input interface. + * + * @since 6.2.0 + * + * @param array|string $query { + * Which tag name to find, having which class. + * + * @type string|null $tag_name Which tag to find, or `null` for "any tag." + * @type string|null $class_name Tag must contain this class name to match. + * @type string $tag_closers "visit" or "skip": whether to stop on tag closers, e.g. </div>. + * } + */ + private function parse_query( $query ) { + if ( null !== $query && $query === $this->last_query ) { + return; + } + + $this->last_query = $query; + $this->sought_tag_name = null; + $this->sought_class_name = null; + $this->sought_match_offset = 1; + $this->stop_on_tag_closers = false; + + // A single string value means "find the tag of this name". + if ( is_string( $query ) ) { + $this->sought_tag_name = $query; + return; + } + + // If not using the string interface we have to pass an associative array. + if ( ! is_array( $query ) ) { + return; + } + + if ( isset( $query['tag_name'] ) && is_string( $query['tag_name'] ) ) { + $this->sought_tag_name = $query['tag_name']; + } + + if ( isset( $query['class_name'] ) && is_string( $query['class_name'] ) ) { + $this->sought_class_name = $query['class_name']; + } + + if ( isset( $query['match_offset'] ) && is_int( $query['match_offset'] ) && 0 < $query['match_offset'] ) { + $this->sought_match_offset = $query['match_offset']; + } + + if ( isset( $query['tag_closers'] ) ) { + $this->stop_on_tag_closers = 'visit' === $query['tag_closers']; + } + } + + + /** + * Checks whether a given tag and its attributes match the search criteria. + * + * @since 6.2.0 + * + * @return boolean + */ + private function matches() { + if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) { + return false; + } + + // Do we match a case-insensitive HTML tag name? + if ( null !== $this->sought_tag_name ) { + /* + * String (byte) length lookup is fast. If they aren't the + * same length then they can't be the same string values. + */ + if ( strlen( $this->sought_tag_name ) !== $this->tag_name_length ) { + return false; + } + + /* + * Otherwise we have to check for each character if they + * are the same, and only `strtoupper()` if we have to. + * Presuming that most people will supply lowercase tag + * names and most HTML will contain lowercase tag names, + * most of the time this runs we shouldn't expect to + * actually run the case-folding comparison. + */ + for ( $i = 0; $i < $this->tag_name_length; $i++ ) { + $html_char = $this->html[ $this->tag_name_starts_at + $i ]; + $tag_char = $this->sought_tag_name[ $i ]; + + if ( $html_char !== $tag_char && strtoupper( $html_char ) !== $tag_char ) { + return false; + } + } + } + + $needs_class_name = null !== $this->sought_class_name; + + if ( $needs_class_name && ! isset( $this->attributes['class'] ) ) { + return false; + } + + // Do we match a byte-for-byte (case-sensitive and encoding-form-sensitive) class name? + if ( $needs_class_name ) { + $class_start = $this->attributes['class']->value_starts_at; + $class_end = $class_start + $this->attributes['class']->value_length; + $class_at = $class_start; + + /* + * We're going to have to jump through potential matches here because + * it's possible that we have classes containing the class name we're + * looking for. For instance, if we are looking for "even" we don't + * want to be confused when we come to the class "not-even." This is + * secured by ensuring that we find our sought-after class and that + * it's surrounded on both sides by proper boundaries. + * + * See https://html.spec.whatwg.org/#attributes-3 + * See https://html.spec.whatwg.org/#space-separated-tokens + */ + while ( + // phpcs:ignore WordPress.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition + false !== ( $class_at = strpos( $this->html, $this->sought_class_name, $class_at ) ) && + $class_at < $class_end + ) { + /* + * Verify this class starts at a boundary. If it were at 0 we'd be at + * the start of the string and that would be fine, otherwise we have + * to start at a place where the preceding character is whitespace. + */ + if ( $class_at > $class_start ) { + $character = $this->html[ $class_at - 1 ]; + + if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) { + $class_at += strlen( $this->sought_class_name ); + continue; + } + } + + /* + * Similarly, verify this class ends at a boundary as well. Here we + * can end at the very end of the string value, otherwise we have + * to end at a place where the next character is whitespace. + */ + if ( $class_at + strlen( $this->sought_class_name ) < $class_end ) { + $character = $this->html[ $class_at + strlen( $this->sought_class_name ) ]; + + if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) { + $class_at += strlen( $this->sought_class_name ); + continue; + } + } + + return true; + } + + return false; + } + + return true; + } +} + +endif; + diff --git a/src/wp-includes/class-wp-html-text-replacement.php b/src/wp-includes/class-wp-html-text-replacement.php new file mode 100644 index 0000000000000..4461df473aadd --- /dev/null +++ b/src/wp-includes/class-wp-html-text-replacement.php @@ -0,0 +1,63 @@ +<?php +/** + * HTML Tag Processor: Text replacement class. + * + * @package WordPress + * @subpackage HTML + * @since 6.2.0 + */ + +if ( ! class_exists( 'WP_HTML_Text_Replacement' ) ) : + +/** + * Data structure used to replace existing content from start to end that allows to drastically improve performance. + * + * This class is for internal usage of the WP_HTML_Tag_Processor class. + * + * @access private + * @since 6.2.0 + * + * @see WP_HTML_Tag_Processor + */ +class WP_HTML_Text_Replacement { + /** + * Byte offset into document where replacement span begins. + * + * @since 6.2.0 + * @var int + */ + public $start; + + /** + * Byte offset into document where replacement span ends. + * + * @since 6.2.0 + * @var int + */ + public $end; + + /** + * Span of text to insert in document to replace existing content from start to end. + * + * @since 6.2.0 + * @var string + */ + public $text; + + /** + * Constructor. + * + * @since 6.2.0 + * + * @param int $start Byte offset into document where replacement span begins. + * @param int $end Byte offset into document where replacement span ends. + * @param string $text Span of text to insert in document to replace existing content from start to end. + */ + public function __construct( $start, $end, $text ) { + $this->start = $start; + $this->end = $end; + $this->text = $text; + } +} + +endif; diff --git a/src/wp-includes/wp-html.php b/src/wp-includes/wp-html.php new file mode 100644 index 0000000000000..1806643104794 --- /dev/null +++ b/src/wp-includes/wp-html.php @@ -0,0 +1,34 @@ +<?php +/** + * HTML parsing and modification API + * + * @since 6.2 + * + * @package WordPress + * @subpackage HTML + */ + +/* + * These helper classes are used by the Tag Processor for tracking + * content as it parses HTML documents. Using these helper classes + * instead of PHP arrays has a dramatic impact on performance, in + * terms of speed as well as memory use. + */ + +/** WP_HTML_Attribute_Token class */ +require_once ABSPATH . WPINC . '/class-wp-html-attribute-token.php'; + +/** WP_HTML_Span class */ +require_once ABSPATH . WPINC . '/class-wp-html-span.php'; + +/** WP_HTML_Text_Replacement class */ +require_once ABSPATH . WPINC . '/class-wp-html-text-replacement.php'; + +/* + * The WP_HTML_Tag_Processor is intended for linearly scanning through + * an HTML document, searching for HTML tags matching a given query, + * and adding, removing, or modifying attributes on those tags. + */ + +/** WP_HTML_Tag_Processor class */ +require_once ABSPATH . WPINC . '/class-wp-html-tag-processor.php'; diff --git a/src/wp-settings.php b/src/wp-settings.php index 3ed93b2f36629..5385a3356a8c8 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -234,6 +234,7 @@ require ABSPATH . WPINC . '/class-wp-oembed-controller.php'; require ABSPATH . WPINC . '/media.php'; require ABSPATH . WPINC . '/http.php'; +require ABSPATH . WPINC . '/wp-html.php'; require ABSPATH . WPINC . '/class-wp-http.php'; require ABSPATH . WPINC . '/class-wp-http-streams.php'; require ABSPATH . WPINC . '/class-wp-http-curl.php'; diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php b/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php new file mode 100644 index 0000000000000..c92d0023d16c2 --- /dev/null +++ b/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php @@ -0,0 +1,370 @@ +<?php +/** + * Unit tests covering WP_HTML_Tag_Processor bookmark functionality. + * + * @package WordPress + * @subpackage HTML + */ + +require_once ABSPATH . WPINC . '/wp-html.php'; + +/** + * @group html + * + * @coversDefaultClass WP_HTML_Tag_Processor + */ +class WP_HTML_Tag_Processor_Bookmark_Test extends WP_UnitTestCase { + + /** + * @ticket 56299 + * + * @covers set_bookmark + */ + public function test_set_bookmark() { + $p = new WP_HTML_Tag_Processor( '<ul><li>One</li><li>Two</li><li>Three</li></ul>' ); + $p->next_tag( 'li' ); + $this->assertTrue( $p->set_bookmark( 'first li' ), 'Could not allocate a "first li" bookmark.' ); + $p->next_tag( 'li' ); + $this->assertTrue( $p->set_bookmark( 'second li' ), 'Could not allocate a "second li" bookmark.' ); + $this->assertTrue( $p->set_bookmark( 'first li' ), 'Could not move the "first li" bookmark.' ); + } + + /** + * @ticket 56299 + * + * @covers release_bookmark + */ + public function test_release_bookmark() { + $p = new WP_HTML_Tag_Processor( '<ul><li>One</li><li>Two</li><li>Three</li></ul>' ); + $p->next_tag( 'li' ); + $this->assertFalse( $p->release_bookmark( 'first li' ), 'Released a non-existing bookmark.' ); + $p->set_bookmark( 'first li' ); + $this->assertTrue( $p->release_bookmark( 'first li' ), 'Could not release a bookmark.' ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_seek() { + $p = new WP_HTML_Tag_Processor( '<ul><li>One</li><li>Two</li><li>Three</li></ul>' ); + $p->next_tag( 'li' ); + $p->set_bookmark( 'first li' ); + + $p->next_tag( 'li' ); + $p->set_attribute( 'foo-2', 'bar-2' ); + + $p->seek( 'first li' ); + $p->set_attribute( 'foo-1', 'bar-1' ); + + $this->assertEquals( + '<ul><li foo-1="bar-1">One</li><li foo-2="bar-2">Two</li><li>Three</li></ul>', + $p->get_updated_html() + ); + } + + /** + * WP_HTML_Tag_Processor used to test for the diffs affecting + * the adjusted bookmark position while simultaneously adjusting + * the bookmark in question. As a result, updating the bookmarks + * of a next tag while removing two subsequent attributes in + * a previous tag unfolded like this: + * + * 1. Check if the first removed attribute is before the bookmark: + * + * <button twenty_one_characters 7_chars></button><button></button> + * ^-------------------^ ^ + * diff applied here the bookmark is here + * + * (Yes it is) + * + * 2. Move the bookmark to the left by the attribute length: + * + * <button twenty_one_characters 7_chars></button><button></button> + * ^ + * the bookmark is here + * + * 3. Check if the second removed attribute is before the bookmark: + * + * <button twenty_one_characters 7_chars></button><button></button> + * ^ ^-----^ + * bookmark diff + * + * This time, it isn't! + * + * The fix in the WP_HTML_Tag_Processor involves doing all the checks + * before moving the bookmark. This test is here to guard us from + * the erroneous behavior accidentally returning one day. + * + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + * @covers apply_attributes_updates + */ + public function test_removing_long_attributes_doesnt_break_seek() { + $input = <<<HTML + <button twenty_one_characters 7_chars></button><button></button> +HTML; + $p = new WP_HTML_Tag_Processor( $input ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'first' ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'second' ); + + $this->assertTrue( + $p->seek( 'first' ), + 'Seek() to the first button has failed' + ); + $p->remove_attribute( 'twenty_one_characters' ); + $p->remove_attribute( '7_chars' ); + + $this->assertTrue( + $p->seek( 'second' ), + 'Seek() to the second button has failed' + ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_bookmarks_complex_use_case() { + $input = <<<HTML +<div selected class="merge-message" checked> + <div class="select-menu d-inline-block"> + <div checked class="BtnGroup MixedCaseHTML position-relative" /> + <div checked class="BtnGroup MixedCaseHTML position-relative"> + <button type="button" class="merge-box-button btn-group-merge rounded-left-2 btn BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled=""> + Merge pull request + </button> + + <button type="button" class="merge-box-button btn-group-squash rounded-left-2 btn BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled=""> + Squash and merge + </button> + + <button type="button" class="merge-box-button btn-group-rebase rounded-left-2 btn BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled=""> + Rebase and merge + </button> + + <button aria-label="Select merge method" disabled="disabled" type="button" data-view-component="true" class="select-menu-button btn BtnGroup-item"></button> + </div> + </div> +</div> +HTML; + $expected_output = <<<HTML +<div selected class="merge-message" checked> + <div class="select-menu d-inline-block"> + <div class="BtnGroup MixedCaseHTML position-relative" /> + <div checked class="BtnGroup MixedCaseHTML position-relative"> + <button type="submit" class="merge-box-button btn-group-merge rounded-left-2 btn BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled=""> + Merge pull request + </button> + + <button class="hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled=""> + Squash and merge + </button> + + <button id="rebase-and-merge" disabled=""> + Rebase and merge + </button> + + <button id="last-button" ></button> + </div> + </div> +</div> +HTML; + $p = new WP_HTML_Tag_Processor( $input ); + $p->next_tag( 'div' ); + $p->next_tag( 'div' ); + $p->next_tag( 'div' ); + $p->set_bookmark( 'first div' ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'first button' ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'second button' ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'third button' ); + $p->next_tag( 'button' ); + $p->set_bookmark( 'fourth button' ); + + $p->seek( 'first button' ); + $p->set_attribute( 'type', 'submit' ); + + $this->assertTrue( + $p->seek( 'third button' ), + 'Seek() to the third button failed' + ); + $p->remove_attribute( 'class' ); + $p->remove_attribute( 'type' ); + $p->remove_attribute( 'aria-expanded' ); + $p->set_attribute( 'id', 'rebase-and-merge' ); + $p->remove_attribute( 'data-details-container' ); + + $this->assertTrue( + $p->seek( 'first div' ), + 'Seek() to the first div failed' + ); + $p->set_attribute( 'checked', false ); + + $this->assertTrue( + $p->seek( 'fourth button' ), + 'Seek() to the fourth button failed' + ); + $p->set_attribute( 'id', 'last-button' ); + $p->remove_attribute( 'class' ); + $p->remove_attribute( 'type' ); + $p->remove_attribute( 'checked' ); + $p->remove_attribute( 'aria-label' ); + $p->remove_attribute( 'disabled' ); + $p->remove_attribute( 'data-view-component' ); + + $this->assertTrue( + $p->seek( 'second button' ), + 'Seek() to the second button failed' + ); + $p->remove_attribute( 'type' ); + $p->set_attribute( 'class', 'hx_create-pr-button' ); + + $this->assertEquals( + $expected_output, + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_updates_bookmark_for_additions_after_both_sides() { + $p = new WP_HTML_Tag_Processor( '<div>First</div><div>Second</div>' ); + $p->next_tag(); + $p->set_bookmark( 'first' ); + $p->next_tag(); + $p->add_class( 'second' ); + + $p->seek( 'first' ); + $p->add_class( 'first' ); + + $this->assertEquals( + '<div class="first">First</div><div class="second">Second</div>', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_updates_bookmark_for_additions_before_both_sides() { + $p = new WP_HTML_Tag_Processor( '<div>First</div><div>Second</div>' ); + $p->next_tag(); + $p->set_bookmark( 'first' ); + $p->next_tag(); + $p->set_bookmark( 'second' ); + + $p->seek( 'first' ); + $p->add_class( 'first' ); + + $p->seek( 'second' ); + $p->add_class( 'second' ); + + $this->assertEquals( + '<div class="first">First</div><div class="second">Second</div>', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_updates_bookmark_for_deletions_after_both_sides() { + $p = new WP_HTML_Tag_Processor( '<div>First</div><div disabled>Second</div>' ); + $p->next_tag(); + $p->set_bookmark( 'first' ); + $p->next_tag(); + $p->remove_attribute( 'disabled' ); + + $p->seek( 'first' ); + $p->set_attribute( 'untouched', true ); + + $this->assertEquals( + /** @TODO: we shouldn't have to assert the extra space after removing the attribute. */ + '<div untouched>First</div><div >Second</div>', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers seek + * @covers set_bookmark + */ + public function test_updates_bookmark_for_deletions_before_both_sides() { + $p = new WP_HTML_Tag_Processor( '<div disabled>First</div><div>Second</div>' ); + $p->next_tag(); + $p->set_bookmark( 'first' ); + $p->next_tag(); + $p->set_bookmark( 'second' ); + + $p->seek( 'first' ); + $p->remove_attribute( 'disabled' ); + + $p->seek( 'second' ); + $p->set_attribute( 'safe', true ); + + $this->assertEquals( + /** @TODO: we shouldn't have to assert the extra space after removing the attribute. */ + '<div >First</div><div safe>Second</div>', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers set_bookmark + */ + public function test_limits_the_number_of_bookmarks() { + $p = new WP_HTML_Tag_Processor( '<ul><li>One</li><li>Two</li><li>Three</li></ul>' ); + $p->next_tag( 'li' ); + + $this->expectException( Exception::class ); + + for ( $i = 0;$i < WP_HTML_Tag_Processor::MAX_BOOKMARKS;$i++ ) { + $this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" ); + } + + $this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." ); + } + + /** + * @ticket 56299 + * + * @covers seek + */ + public function test_limits_the_number_of_seek_calls() { + $p = new WP_HTML_Tag_Processor( '<ul><li>One</li><li>Two</li><li>Three</li></ul>' ); + $p->next_tag( 'li' ); + $p->set_bookmark( 'bookmark' ); + + $this->expectException( Exception::class ); + + for ( $i = 0; $i < WP_HTML_Tag_Processor::MAX_SEEK_OPS; $i++ ) { + $this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' ); + } + $this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." ); + } +} diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php b/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php new file mode 100644 index 0000000000000..92f87099362ec --- /dev/null +++ b/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php @@ -0,0 +1,1908 @@ +<?php +/** + * Unit tests covering WP_HTML_Tag_Processor functionality. + * + * @package WordPress + * @subpackage HTML + */ + +require_once ABSPATH . WPINC . '/wp-html.php'; + +/** + * @group html + * + * @coversDefaultClass WP_HTML_Tag_Processor + */ +class WP_HTML_Tag_Processor_Test extends WP_UnitTestCase { + const HTML_SIMPLE = '<div id="first"><span id="second">Text</span></div>'; + const HTML_WITH_CLASSES = '<div class="main with-border" id="first"><span class="not-main bold with-border" id="second">Text</span></div>'; + const HTML_MALFORMED = '<div><span class="d-md-none" Notifications</span><span class="d-none d-md-inline">Back to notifications</span></div>'; + + /** + * @ticket 56299 + * + * @covers get_tag + */ + public function test_get_tag_returns_null_before_finding_tags() { + $p = new WP_HTML_Tag_Processor( '<div>Test</div>' ); + $this->assertNull( $p->get_tag() ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers get_tag + */ + public function test_get_tag_returns_null_when_not_in_open_tag() { + $p = new WP_HTML_Tag_Processor( '<div>Test</div>' ); + $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); + $this->assertNull( $p->get_tag(), 'Accessing a non-existing tag did not return null' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers get_tag + */ + public function test_get_tag_returns_open_tag_name() { + $p = new WP_HTML_Tag_Processor( '<div>Test</div>' ); + $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' ); + $this->assertSame( 'DIV', $p->get_tag(), 'Accessing an existing tag name did not return "div"' ); + } + + /** + * @ticket 56299 + * + * @covers get_attribute + */ + public function test_get_attribute_returns_null_before_finding_tags() { + $p = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' ); + $this->assertNull( $p->get_attribute( 'class' ) ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers get_attribute + */ + public function test_get_attribute_returns_null_when_not_in_open_tag() { + $p = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' ); + $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); + $this->assertNull( $p->get_attribute( 'class' ), 'Accessing an attribute of a non-existing tag did not return null' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers get_attribute + */ + public function test_get_attribute_returns_null_when_in_closing_tag() { + $p = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' ); + $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' ); + $this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Querying an existing closing tag did not return true' ); + $this->assertNull( $p->get_attribute( 'class' ), 'Accessing an attribute of a closing tag did not return null' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers get_attribute + */ + public function test_get_attribute_returns_null_when_attribute_missing() { + $p = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' ); + $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' ); + $this->assertNull( $p->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers get_attribute + */ + public function test_get_attribute_returns_attribute_value() { + $p = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' ); + $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' ); + $this->assertSame( 'test', $p->get_attribute( 'class' ), 'Accessing a class="test" attribute value did not return "test"' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers get_attribute + */ + public function test_get_attribute_returns_true_for_boolean_attribute() { + $p = new WP_HTML_Tag_Processor( '<div enabled class="test">Test</div>' ); + $this->assertTrue( $p->next_tag( array( 'class_name' => 'test' ) ), 'Querying an existing tag did not return true' ); + $this->assertTrue( $p->get_attribute( 'enabled' ), 'Accessing a boolean "enabled" attribute value did not return true' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers get_attribute + */ + public function test_get_attribute_returns_string_for_truthy_attributes() { + $p = new WP_HTML_Tag_Processor( '<div enabled=enabled checked=1 hidden="true" class="test">Test</div>' ); + $this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' ); + $this->assertSame( 'enabled', $p->get_attribute( 'enabled' ), 'Accessing a boolean "enabled" attribute value did not return true' ); + $this->assertSame( '1', $p->get_attribute( 'checked' ), 'Accessing a checked=1 attribute value did not return "1"' ); + $this->assertSame( 'true', $p->get_attribute( 'hidden' ), 'Accessing a hidden="true" attribute value did not return "true"' ); + } + + /** + * @ticket 56299 + * + * @covers WP_HTML_Tag_Processor::get_attribute + */ + public function test_get_attribute_decodes_html_character_references() { + $p = new WP_HTML_Tag_Processor( '<div id="the "grande" is < 32oz†"></div>' ); + $p->next_tag(); + $this->assertSame( 'the "grande" is < 32oz†', $p->get_attribute( 'id' ), 'HTML Attribute value was returned without decoding character references' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers get_attribute + */ + public function test_attributes_parser_treats_slash_as_attribute_separator() { + $p = new WP_HTML_Tag_Processor( '<div a/b/c/d/e="test">Test</div>' ); + $this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' ); + $this->assertTrue( $p->get_attribute( 'a' ), 'Accessing an existing attribute did not return true' ); + $this->assertTrue( $p->get_attribute( 'b' ), 'Accessing an existing attribute did not return true' ); + $this->assertTrue( $p->get_attribute( 'c' ), 'Accessing an existing attribute did not return true' ); + $this->assertTrue( $p->get_attribute( 'd' ), 'Accessing an existing attribute did not return true' ); + $this->assertSame( 'test', $p->get_attribute( 'e' ), 'Accessing an existing e="test" did not return "test"' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers get_attribute + */ + public function test_attributes_parser_is_case_insensitive() { + $p = new WP_HTML_Tag_Processor( '<div DATA-enabled="true" data-VISIBLE>Test</div>' ); + $p->next_tag(); + $p->get_attribute( 'data-enabled' ); + $this->assertEquals( 'true', $p->get_attribute( 'DATA-enabled' ), 'A case-insensitive get_attribute call did not return "true".' ); + $this->assertEquals( 'true', $p->get_attribute( 'data-enabled' ), 'A case-insensitive get_attribute call did not return "true".' ); + $this->assertEquals( 'true', $p->get_attribute( 'DATA-ENABLED' ), 'A case-insensitive get_attribute call did not return "true".' ); + $this->assertEquals( true, $p->get_attribute( 'data-VISIBLE' ), 'A case-insensitive get_attribute call did not return true.' ); + $this->assertEquals( true, $p->get_attribute( 'DATA-visible' ), 'A case-insensitive get_attribute call did not return true.' ); + $this->assertEquals( true, $p->get_attribute( 'dAtA-ViSiBlE' ), 'A case-insensitive get_attribute call did not return true.' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers remove_attribute + */ + public function test_remove_attribute_is_case_insensitive() { + $p = new WP_HTML_Tag_Processor( '<div DATA-enabled="true">Test</div>' ); + $p->next_tag(); + $p->remove_attribute( 'data-enabled' ); + $this->assertEquals( '<div >Test</div>', $p->get_updated_html(), 'A case-insensitive remove_attribute call did not remove the attribute.' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers set_attribute + */ + public function test_set_attribute_is_case_insensitive() { + $p = new WP_HTML_Tag_Processor( '<div DATA-enabled="true">Test</div>' ); + $p->next_tag(); + $p->set_attribute( 'data-enabled', 'abc' ); + $this->assertEquals( '<div data-enabled="abc">Test</div>', $p->get_updated_html(), 'A case-insensitive set_attribute call did not update the existing attribute.' ); + } + + /** + * @ticket 56299 + * + * @covers get_attribute_names_with_prefix + */ + public function test_get_attribute_names_with_prefix_returns_null_before_finding_tags() { + $p = new WP_HTML_Tag_Processor( '<div data-foo="bar">Test</div>' ); + $this->assertNull( $p->get_attribute_names_with_prefix( 'data-' ) ); + } + + /** + * @ticket 56299 + * + * @covers get_attribute_names_with_prefix + */ + public function test_get_attribute_names_with_prefix_returns_null_when_not_in_open_tag() { + $p = new WP_HTML_Tag_Processor( '<div data-foo="bar">Test</div>' ); + $p->next_tag( 'p' ); + $this->assertNull( $p->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a non-existing tag did not return null' ); + } + + /** + * @ticket 56299 + * + * @covers get_attribute_names_with_prefix + */ + public function test_get_attribute_names_with_prefix_returns_null_when_in_closing_tag() { + $p = new WP_HTML_Tag_Processor( '<div data-foo="bar">Test</div>' ); + $p->next_tag( 'div' ); + $p->next_tag( array( 'tag_closers' => 'visit' ) ); + $this->assertNull( $p->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a closing tag did not return null' ); + } + + /** + * @ticket 56299 + * + * @covers get_attribute_names_with_prefix + */ + public function test_get_attribute_names_with_prefix_returns_empty_array_when_no_attributes_present() { + $p = new WP_HTML_Tag_Processor( '<div>Test</div>' ); + $p->next_tag( 'div' ); + $this->assertSame( array(), $p->get_attribute_names_with_prefix( 'data-' ), 'Accessing the attributes on a tag without any did not return an empty array' ); + } + + /** + * @ticket 56299 + * + * @covers get_attribute_names_with_prefix + */ + public function test_get_attribute_names_with_prefix_returns_matching_attribute_names_in_lowercase() { + $p = new WP_HTML_Tag_Processor( '<div DATA-enabled class="test" data-test-ID="14">Test</div>' ); + $p->next_tag(); + $this->assertSame( + array( 'data-enabled', 'data-test-id' ), + $p->get_attribute_names_with_prefix( 'data-' ) + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + * @covers get_attribute_names_with_prefix + */ + public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() { + $p = new WP_HTML_Tag_Processor( '<div data-foo="bar">Test</div>' ); + $p->next_tag(); + $p->set_attribute( 'data-test-id', '14' ); + $this->assertSame( + '<div data-test-id="14" data-foo="bar">Test</div>', + $p->get_updated_html(), + "Updated HTML doesn't include attribute added via set_attribute" + ); + $this->assertSame( + array( 'data-test-id', 'data-foo' ), + $p->get_attribute_names_with_prefix( 'data-' ), + "Accessing attribute names doesn't find attribute added via set_attribute" + ); + } + + /** + * @ticket 56299 + * + * @covers __toString + */ + public function tostring_returns_updated_html() { + $p = new WP_HTML_Tag_Processor( '<hr id="remove" /><div enabled class="test">Test</div><span id="span-id"></span>' ); + $p->next_tag(); + $p->remove_attribute( 'id' ); + + $p->next_tag(); + $p->set_attribute( 'id', 'div-id-1' ); + $p->add_class( 'new_class_1' ); + + $this->assertEquals( + $p->get_updated_html(), + (string) $p + ); + } + + /** + * @ticket 56299 + * + * @covers get_updated_html + */ + public function test_get_updated_html_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() { + $p = new WP_HTML_Tag_Processor( '<hr id="remove" /><div enabled class="test">Test</div><span id="span-id"></span>' ); + $p->next_tag(); + $p->remove_attribute( 'id' ); + + $p->next_tag(); + $p->set_attribute( 'id', 'div-id-1' ); + $p->add_class( 'new_class_1' ); + $this->assertSame( + '<hr /><div id="div-id-1" enabled class="test new_class_1">Test</div><span id="span-id"></span>', + $p->get_updated_html(), + 'Calling get_updated_html after updating the attributes of the second tag returned different HTML than expected' + ); + + $p->set_attribute( 'id', 'div-id-2' ); + $p->add_class( 'new_class_2' ); + $this->assertSame( + '<hr /><div id="div-id-2" enabled class="test new_class_1 new_class_2">Test</div><span id="span-id"></span>', + $p->get_updated_html(), + 'Calling get_updated_html after updating the attributes of the second tag for the second time returned different HTML than expected' + ); + + $p->next_tag(); + $p->remove_attribute( 'id' ); + $this->assertSame( + '<hr /><div id="div-id-2" enabled class="test new_class_1 new_class_2">Test</div><span ></span>', + $p->get_updated_html(), + 'Calling get_updated_html after removing the id attribute of the third tag returned different HTML than expected' + ); + } + + /** + * @ticket 56299 + * + * @covers get_updated_html + */ + public function test_get_updated_html_without_updating_any_attributes_returns_the_original_html() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $this->assertSame( self::HTML_SIMPLE, $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + */ + public function test_next_tag_with_no_arguments_should_find_the_next_existing_tag() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $this->assertTrue( $p->next_tag(), 'Querying an existing tag did not return true' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + */ + public function test_next_tag_should_return_false_for_a_non_existing_tag() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); + } + + /** + * @covers next_tag + * @covers is_tag_closer + */ + public function test_next_tag_should_stop_on_closers_only_when_requested() { + $p = new WP_HTML_Tag_Processor( '<div><img /></div>' ); + $this->assertTrue( $p->next_tag( array( 'tag_name' => 'div' ) ), 'Did not find desired tag opener' ); + $this->assertFalse( $p->next_tag( array( 'tag_name' => 'div' ) ), 'Visited an unwanted tag, a tag closer' ); + + $p = new WP_HTML_Tag_Processor( '<div><img /></div>' ); + $p->next_tag( + array( + 'tag_name' => 'div', + 'tag_closers' => 'visit', + ) + ); + $this->assertFalse( $p->is_tag_closer(), 'Indicated a tag opener is a tag closer' ); + $this->assertTrue( + $p->next_tag( + array( + 'tag_name' => 'div', + 'tag_closers' => 'visit', + ) + ), + 'Did not stop at desired tag closer' + ); + $this->assertTrue( $p->is_tag_closer(), 'Indicated a tag closer is a tag opener' ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers get_updated_html + */ + public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); + $this->assertFalse( $p->next_tag( 'div' ), 'Querying a non-existing tag did not return false' ); + $p->set_attribute( 'id', 'primary' ); + $this->assertSame( + self::HTML_SIMPLE, + $p->get_updated_html(), + 'Calling get_updated_html after updating a non-existing tag returned an HTML that was different from the original HTML' + ); + } + + public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() { + $p = new WP_HTML_Tag_Processor( '<div id=3></div invalid-id=4>' ); + $p->next_tag( + array( + 'tag_name' => 'div', + 'tag_closers' => 'visit', + ) + ); + $this->assertFalse( $p->is_tag_closer(), 'Skipped tag opener' ); + $p->next_tag( + array( + 'tag_name' => 'div', + 'tag_closers' => 'visit', + ) + ); + $this->assertTrue( $p->is_tag_closer(), 'Skipped tag closer' ); + $this->assertFalse( $p->set_attribute( 'id', 'test' ), "Allowed setting an attribute on a tag closer when it shouldn't have" ); + $this->assertFalse( $p->remove_attribute( 'invalid-id' ), "Allowed removing an attribute on a tag closer when it shouldn't have" ); + $this->assertFalse( $p->add_class( 'sneaky' ), "Allowed adding a class on a tag closer when it shouldn't have" ); + $this->assertFalse( $p->remove_class( 'not-appearing-in-this-test' ), "Allowed removing a class on a tag closer when it shouldn't have" ); + $this->assertSame( + '<div id=3></div invalid-id=4>', + $p->get_updated_html(), + 'Calling get_updated_html after updating a non-existing tag returned an HTML that was different from the original HTML' + ); + } + + /** + * Passing a double quote inside of an attribute values could lead to an XSS attack as follows: + * + * <code> + * $p = new WP_HTML_Tag_Processor( '<div class="header"></div>' ); + * $p->next_tag(); + * $p->set_attribute('class', '" onclick="alert'); + * echo $p; + * // <div class="" onclick="alert"></div> + * </code> + * + * To prevent it, `set_attribute` calls `esc_attr()` on its given values. + * + * <code> + * <div class="" onclick="alert"></div> + * </code> + * + * @ticket 56299 + * + * @dataProvider data_set_attribute_escapable_values + * @covers set_attribute + */ + public function test_set_attribute_prevents_xss( $attribute_value ) { + $p = new WP_HTML_Tag_Processor( '<div></div>' ); + $p->next_tag(); + $p->set_attribute( 'test', $attribute_value ); + + /* + * Testing the escaping is hard using tools that properly parse + * HTML because they might interpret the escaped values. It's hard + * with tools that don't understand HTML because they might get + * confused by improperly-escaped values. + * + * For this test, since we control the input HTML we're going to + * do what looks like the opposite of what we want to be doing with + * this library but are only doing so because we have full control + * over the content and because we want to look at the raw values. + */ + $match = null; + preg_match( '~^<div test=(.*)></div>$~', $p->get_updated_html(), $match ); + list( , $actual_value ) = $match; + + $this->assertEquals( $actual_value, '"' . esc_attr( $attribute_value ) . '"' ); + } + + /** + * Data provider with HTML attribute values that might need escaping. + */ + public function data_set_attribute_escapable_values() { + return array( + array( '"' ), + array( '"' ), + array( '&' ), + array( '&' ), + array( '€' ), + array( "'" ), + array( '<>' ), + array( '"";' ), + array( '" onclick="alert(\'1\');"><span onclick=""></span><script>alert("1")</script>' ), + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + * @covers get_attribute + */ + public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'test-attribute', 'test-value' ); + $this->assertSame( + '<div test-attribute="test-value" id="first"><span id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not include attribute added via set_attribute()' + ); + $this->assertSame( + 'test-value', + $p->get_attribute( 'test-attribute' ), + 'get_attribute() (called after get_updated_html()) did not return attribute added via set_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + * @covers get_attribute + */ + public function test_get_attribute_returns_updated_values_before_they_are_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'test-attribute', 'test-value' ); + $this->assertSame( + 'test-value', + $p->get_attribute( 'test-attribute' ), + 'get_attribute() (called before get_updated_html()) did not return attribute added via set_attribute()' + ); + $this->assertSame( + '<div test-attribute="test-value" id="first"><span id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not include attribute added via set_attribute()' + ); + } + + public function test_get_attribute_returns_updated_values_before_they_are_updated_with_different_name_casing() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'test-ATTribute', 'test-value' ); + $this->assertSame( + 'test-value', + $p->get_attribute( 'test-attribute' ), + 'get_attribute() (called before get_updated_html()) did not return attribute added via set_attribute()' + ); + $this->assertSame( + '<div test-ATTribute="test-value" id="first"><span id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not include attribute added via set_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_get_attribute_reflects_added_class_names_before_they_are_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->add_class( 'my-class' ); + $this->assertSame( + 'my-class', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) did not return class name added via add_class()' + ); + $this->assertSame( + '<div class="my-class" id="first"><span id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not include class name added via add_class()' + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_get_attribute_reflects_added_class_names_before_they_are_updated_and_retains_classes_from_previous_add_class_calls() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->add_class( 'my-class' ); + $this->assertSame( + 'my-class', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) did not return class name added via add_class()' + ); + $p->add_class( 'my-other-class' ); + $this->assertSame( + 'my-class my-other-class', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) did not return class names added via subsequent add_class() calls' + ); + $this->assertSame( + '<div class="my-class my-other-class" id="first"><span id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not include class names added via subsequent add_class() calls' + ); + } + + /** + * @ticket 56299 + * + * @covers remove_attribute + * @covers get_attribute + * @covers get_updated_html + */ + public function test_get_attribute_reflects_removed_attribute_before_it_is_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->remove_attribute( 'id' ); + $this->assertNull( + $p->get_attribute( 'id' ), + 'get_attribute() (called before get_updated_html()) returned attribute that was removed by remove_attribute()' + ); + $this->assertSame( + '<div ><span id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML includes attribute that was removed by remove_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers remove_attribute + * @covers get_attribute + * @covers get_updated_html + */ + public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_it_is_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'test-attribute', 'test-value' ); + $p->remove_attribute( 'test-attribute' ); + $this->assertNull( + $p->get_attribute( 'test-attribute' ), + 'get_attribute() (called before get_updated_html()) returned attribute that was added via set_attribute() and then removed by remove_attribute()' + ); + $this->assertSame( + self::HTML_SIMPLE, + $p->get_updated_html(), + 'Updated HTML includes attribute that was added via set_attribute() and then removed by remove_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers remove_attribute + * @covers get_attribute + * @covers get_updated_html + */ + public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_it_is_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'id', 'test-value' ); + $p->remove_attribute( 'id' ); + $this->assertNull( + $p->get_attribute( 'id' ), + 'get_attribute() (called before get_updated_html()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()' + ); + $this->assertSame( + '<div ><span id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML includes attribute that was overwritten by set_attribute() and then removed by remove_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_get_attribute_reflects_removed_class_names_before_they_are_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->remove_class( 'with-border' ); + $this->assertSame( + 'main', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) returned the wrong attribute after calling remove_attribute()' + ); + $this->assertSame( + '<div class="main" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML includes wrong attribute after calling remove_attribute()' + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers remove_class + * @covers get_attribute + * @covers get_updated_html + */ + public function test_get_attribute_reflects_setting_and_then_removing_a_class_name_before_it_is_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'foo-class' ); + $p->remove_class( 'foo-class' ); + $this->assertSame( + 'main with-border', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) returned class name that was added via add_class() and then removed by remove_class()' + ); + $this->assertSame( + self::HTML_WITH_CLASSES, + $p->get_updated_html(), + 'Updated HTML includes class that was added via add_class() and then removed by remove_class()' + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers remove_class + * @covers get_attribute + * @covers get_updated_html + */ + public function test_get_attribute_reflects_duplicating_and_then_removing_an_existing_class_name_before_it_is_updated() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'with-border' ); + $p->remove_class( 'with-border' ); + $this->assertSame( + 'main', + $p->get_attribute( 'class' ), + 'get_attribute() (called before get_updated_html()) returned class name that was duplicated via add_class() and then removed by remove_class()' + ); + $this->assertSame( + '<div class="main" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML includes class that was duplicated via add_class() and then removed by remove_class()' + ); + } + + /** + * According to HTML spec, only the first instance of an attribute counts. + * The other ones are ignored. + * + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_update_first_when_duplicated_attribute() { + $p = new WP_HTML_Tag_Processor( '<div id="update-me" id="ignored-id"><span id="second">Text</span></div>' ); + $p->next_tag(); + $p->set_attribute( 'id', 'updated-id' ); + $this->assertSame( '<div id="updated-id" id="ignored-id"><span id="second">Text</span></div>', $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'id', 'new-id' ); + $this->assertSame( '<div id="new-id"><span id="second">Text</span></div>', $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + while ( $p->next_tag() ) { + $p->set_attribute( 'data-foo', 'bar' ); + } + + $this->assertSame( '<div data-foo="bar" id="first"><span data-foo="bar" id="second">Text</span></div>', $p->get_updated_html() ); + } + + /** + * Removing an attribute that's listed many times, e.g. `<div id="a" id="b" />` should remove + * all its instances and output just `<div />`. + * + * Today, however, WP_HTML_Tag_Processor only removes the first such attribute. It seems like a corner case + * and introducing additional complexity to correctly handle this scenario doesn't seem to be worth it. + * Let's revisit if and when this becomes a problem. + * + * This test is in place to confirm this behavior, while incorrect, is well-defined. + * + * @ticket 56299 + * + * @covers remove_attribute + * @covers get_updated_html + */ + public function test_remove_first_when_duplicated_attribute() { + $p = new WP_HTML_Tag_Processor( '<div id="update-me" id="ignored-id"><span id="second">Text</span></div>' ); + $p->next_tag(); + $p->remove_attribute( 'id' ); + $this->assertSame( '<div id="ignored-id"><span id="second">Text</span></div>', $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers remove_attribute + * @covers get_updated_html + */ + public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->remove_attribute( 'id' ); + $this->assertSame( '<div ><span id="second">Text</span></div>', $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers remove_attribute + * @covers get_updated_html + */ + public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->remove_attribute( 'no-such-attribute' ); + $this->assertSame( self::HTML_SIMPLE, $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_add_class_creates_a_class_attribute_when_there_is_none() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->add_class( 'foo-class' ); + $this->assertSame( + '<div class="foo-class" id="first"><span id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not include class name added via add_class()' + ); + $this->assertSame( + 'foo-class', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) did not return class name added via add_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_calling_add_class_twice_creates_a_class_attribute_with_both_class_names_when_there_is_no_class_attribute() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->add_class( 'foo-class' ); + $p->add_class( 'bar-class' ); + $this->assertSame( + '<div class="foo-class bar-class" id="first"><span id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not include class names added via subsequent add_class() calls' + ); + $this->assertSame( + 'foo-class bar-class', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) did not return class names added via subsequent add_class() calls" + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_remove_class_does_not_change_the_markup_when_there_is_no_class_attribute() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->remove_class( 'foo-class' ); + $this->assertSame( + self::HTML_SIMPLE, + $p->get_updated_html(), + 'Updated HTML includes class name that was removed by remove_class()' + ); + $this->assertNull( + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) did not return null for class name that was removed by remove_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_add_class_appends_class_names_to_the_existing_class_attribute_when_one_already_exists() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'foo-class' ); + $p->add_class( 'bar-class' ); + $this->assertSame( + '<div class="main with-border foo-class bar-class" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not reflect class names added to existing class attribute via subsequent add_class() calls' + ); + $this->assertSame( + 'main with-border foo-class bar-class', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect class names added to existing class attribute via subsequent add_class() calls" + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_remove_class_removes_a_single_class_from_the_class_attribute_when_one_exists() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->remove_class( 'main' ); + $this->assertSame( + '<div class=" with-border" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not reflect class name removed from existing class attribute via remove_class()' + ); + $this->assertSame( + ' with-border', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect class name removed from existing class attribute via remove_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_calling_remove_class_with_all_listed_class_names_removes_the_existing_class_attribute_from_the_markup() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->remove_class( 'main' ); + $p->remove_class( 'with-border' ); + $this->assertSame( + '<div id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not reflect class attribute removed via subesequent remove_class() calls' + ); + $this->assertNull( + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) did not return null for class attribute removed via subesequent remove_class() calls" + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_add_class_does_not_add_duplicate_class_names() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'with-border' ); + $this->assertSame( + '<div class="main with-border" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not reflect deduplicated class name added via add_class()' + ); + $this->assertSame( + 'main with-border', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect deduplicated class name added via add_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_add_class_preserves_class_name_order_when_a_duplicate_class_name_is_added() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'main' ); + $this->assertSame( + '<div class="main with-border" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not reflect class name order after adding duplicated class name via add_class()' + ); + $this->assertSame( + 'main with-border', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect class name order after adding duplicated class name added via add_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers add_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_add_class_when_there_is_a_class_attribute_with_excessive_whitespaces() { + $p = new WP_HTML_Tag_Processor( + '<div class=" main with-border " id="first"><span class="not-main bold with-border" id="second">Text</span></div>' + ); + $p->next_tag(); + $p->add_class( 'foo-class' ); + $this->assertSame( + '<div class=" main with-border foo-class" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not reflect existing excessive whitespace after adding class name via add_class()' + ); + $this->assertSame( + ' main with-border foo-class', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect existing excessive whitespace after adding class name via add_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_remove_class_preserves_whitespaces_when_there_is_a_class_attribute_with_excessive_whitespaces() { + $p = new WP_HTML_Tag_Processor( + '<div class=" main with-border " id="first"><span class="not-main bold with-border" id="second">Text</span></div>' + ); + $p->next_tag(); + $p->remove_class( 'with-border' ); + $this->assertSame( + '<div class=" main" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not reflect existing excessive whitespace after removing class name via remove_class()' + ); + $this->assertSame( + ' main', + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) does not reflect existing excessive whitespace after removing class name via removing_class()" + ); + } + + /** + * @ticket 56299 + * + * @covers remove_class + * @covers get_updated_html + * @covers get_attribute + */ + public function test_removing_all_classes_removes_the_existing_class_attribute_from_the_markup_even_when_excessive_whitespaces_are_present() { + $p = new WP_HTML_Tag_Processor( + '<div class=" main with-border " id="first"><span class="not-main bold with-border" id="second">Text</span></div>' + ); + $p->next_tag(); + $p->remove_class( 'main' ); + $p->remove_class( 'with-border' ); + $this->assertSame( + '<div id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + 'Updated HTML does not reflect removed class attribute after removing all class names via remove_class()' + ); + $this->assertNull( + $p->get_attribute( 'class' ), + "get_attribute( 'class' ) did not return null after removing all class names via remove_class()" + ); + } + + /** + * When add_class( $different_value ) is called _after_ set_attribute( 'class', $value ), the + * final class name should be "$value $different_value". In other words, the `add_class` call + * should append its class to the one(s) set by `set_attribute`. When `add_class( $different_value )` + * is called _before_ `set_attribute( 'class', $value )`, however, the final class name should be + * "$value" instead, as any direct updates to the `class` attribute supersede any changes enqueued + * via the class builder methods. + * + * @ticket 56299 + * + * @covers add_class + * @covers set_attribute + * @covers get_updated_html + * @covers get_attribute + */ + public function test_set_attribute_takes_priority_over_add_class() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'add_class' ); + $p->set_attribute( 'class', 'set_attribute' ); + $this->assertSame( + '<div class="set_attribute" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" + ); + $this->assertSame( + 'set_attribute', + $p->get_attribute( 'class' ), + "Calling get_attribute after updating first tag's attributes did not return the expected class name" + ); + + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->set_attribute( 'class', 'set_attribute' ); + $p->add_class( 'add_class' ); + $this->assertSame( + '<div class="set_attribute add_class" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" + ); + $this->assertSame( + 'set_attribute add_class', + $p->get_attribute( 'class' ), + "Calling get_attribute after updating first tag's attributes did not return the expected class name" + ); + } + + /** + * When add_class( $different_value ) is called _after_ set_attribute( 'class', $value ), the + * final class name should be "$value $different_value". In other words, the `add_class` call + * should append its class to the one(s) set by `set_attribute`. When `add_class( $different_value )` + * is called _before_ `set_attribute( 'class', $value )`, however, the final class name should be + * "$value" instead, as any direct updates to the `class` attribute supersede any changes enqueued + * via the class builder methods. + * + * This is still true if we read enqueued updates before calling `get_updated_html()`. + * + * @ticket 56299 + * + * @covers add_class + * @covers set_attribute + * @covers get_attribute + * @covers get_updated_html + */ + public function test_set_attribute_takes_priority_over_add_class_even_before_updating() { + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->add_class( 'add_class' ); + $p->set_attribute( 'class', 'set_attribute' ); + $this->assertSame( + 'set_attribute', + $p->get_attribute( 'class' ), + "Calling get_attribute after updating first tag's attributes did not return the expected class name" + ); + $this->assertSame( + '<div class="set_attribute" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" + ); + + $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); + $p->next_tag(); + $p->set_attribute( 'class', 'set_attribute' ); + $p->add_class( 'add_class' ); + $this->assertSame( + 'set_attribute add_class', + $p->get_attribute( 'class' ), + "Calling get_attribute after updating first tag's attributes did not return the expected class name" + ); + $this->assertSame( + '<div class="set_attribute add_class" id="first"><span class="not-main bold with-border" id="second">Text</span></div>', + $p->get_updated_html(), + "Calling get_updated_html after updating first tag's attributes did not return the expected HTML" + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers add_class + * @covers get_attribute + * @covers get_updated_html + */ + public function test_add_class_overrides_boolean_class_attribute() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'class', true ); + $p->add_class( 'add_class' ); + $this->assertSame( + '<div class="add_class" id="first"><span id="second">Text</span></div>', + $p->get_updated_html(), + "Updated HTML doesn't reflect class added via add_class that was originally set as boolean attribute" + ); + $this->assertSame( + 'add_class', + $p->get_attribute( 'class' ), + "get_attribute (called after get_updated_html()) doesn't reflect class added via add_class that was originally set as boolean attribute" + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers add_class + * @covers get_attribute + * @covers get_updated_html + */ + public function test_add_class_overrides_boolean_class_attribute_even_before_updating() { + $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $p->next_tag(); + $p->set_attribute( 'class', true ); + $p->add_class( 'add_class' ); + $this->assertSame( + 'add_class', + $p->get_attribute( 'class' ), + "get_attribute (called before get_updated_html()) doesn't reflect class added via add_class that was originally set as boolean attribute" + ); + $this->assertSame( + '<div class="add_class" id="first"><span id="second">Text</span></div>', + $p->get_updated_html(), + "Updated HTML doesn't reflect class added via add_class that was originally set as boolean attribute" + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers remove_attribute + * @covers add_class + * @covers remove_class + * @covers get_updated_html + */ + public function test_advanced_use_case() { + $input = <<<HTML +<div selected class="merge-message" checked> + <div class="select-menu d-inline-block"> + <div checked class="BtnGroup MixedCaseHTML position-relative" /> + <div checked class="BtnGroup MixedCaseHTML position-relative"> + <button type="button" class="merge-box-button btn-group-merge rounded-left-2 btn BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled=""> + Merge pull request + </button> + + <button type="button" class="merge-box-button btn-group-squash rounded-left-2 btn BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled=""> + Squash and merge + </button> + + <button type="button" class="merge-box-button btn-group-rebase rounded-left-2 btn BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled=""> + Rebase and merge + </button> + + <button aria-label="Select merge method" disabled="disabled" type="button" data-view-component="true" class="select-menu-button btn BtnGroup-item"></button> + </div> + </div> +</div> +HTML; + + $expected_output = <<<HTML +<div data-details="{ "key": "value" }" selected class="merge-message is-processed" checked> + <div class="select-menu d-inline-block"> + <div checked class=" MixedCaseHTML position-relative button-group Another-Mixed-Case" /> + <div checked class=" MixedCaseHTML position-relative button-group Another-Mixed-Case"> + <button type="button" class="merge-box-button btn-group-merge rounded-left-2 btn BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled=""> + Merge pull request + </button> + + <button type="button" class="merge-box-button btn-group-squash rounded-left-2 btn BtnGroup-item js-details-target hx_create-pr-button" aria-expanded="false" data-details-container=".js-merge-pr" disabled=""> + Squash and merge + </button> + + <button type="button" aria-expanded="false" data-details-container=".js-merge-pr" disabled=""> + Rebase and merge + </button> + + <button aria-label="Select merge method" disabled="disabled" type="button" data-view-component="true" class="select-menu-button btn BtnGroup-item"></button> + </div> + </div> +</div> +HTML; + + $p = new WP_HTML_Tag_Processor( $input ); + $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' ); + $p->set_attribute( 'data-details', '{ "key": "value" }' ); + $p->add_class( 'is-processed' ); + $this->assertTrue( + $p->next_tag( + array( + 'tag_name' => 'div', + 'class_name' => 'BtnGroup', + ) + ), + 'Querying an existing tag did not return true' + ); + $p->remove_class( 'BtnGroup' ); + $p->add_class( 'button-group' ); + $p->add_class( 'Another-Mixed-Case' ); + $this->assertTrue( + $p->next_tag( + array( + 'tag_name' => 'div', + 'class_name' => 'BtnGroup', + ) + ), + 'Querying an existing tag did not return true' + ); + $p->remove_class( 'BtnGroup' ); + $p->add_class( 'button-group' ); + $p->add_class( 'Another-Mixed-Case' ); + $this->assertTrue( + $p->next_tag( + array( + 'tag_name' => 'button', + 'class_name' => 'btn', + 'match_offset' => 3, + ) + ), + 'Querying an existing tag did not return true' + ); + $p->remove_attribute( 'class' ); + $this->assertFalse( $p->next_tag( 'non-existent' ), 'Querying a non-existing tag did not return false' ); + $p->set_attribute( 'class', 'test' ); + $this->assertSame( $expected_output, $p->get_updated_html(), 'Calling get_updated_html after updating the attributes did not return the expected HTML' ); + } + + /** + * @ticket 56299 + * + * @covers remove_attribute + * @covers set_attribute + * @covers get_updated_html + */ + public function test_correctly_parses_html_attributes_wrapped_in_single_quotation_marks() { + $p = new WP_HTML_Tag_Processor( + '<div id=\'first\'><span id=\'second\'>Text</span></div>' + ); + $p->next_tag( + array( + 'tag_name' => 'div', + 'id' => 'first', + ) + ); + $p->remove_attribute( 'id' ); + $p->next_tag( + array( + 'tag_name' => 'span', + 'id' => 'second', + ) + ); + $p->set_attribute( 'id', 'single-quote' ); + $this->assertSame( + '<div ><span id="single-quote">Text</span></div>', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_set_attribute_with_value_equals_to_true_adds_a_boolean_html_attribute_with_implicit_value() { + $p = new WP_HTML_Tag_Processor( + '<form action="/action_page.php"><input type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>' + ); + $p->next_tag( 'input' ); + $p->set_attribute( 'checked', true ); + $this->assertSame( + '<form action="/action_page.php"><input checked type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_markup() { + $p = new WP_HTML_Tag_Processor( + '<form action="/action_page.php"><input checked type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>' + ); + $p->next_tag( 'input' ); + $p->set_attribute( 'checked', false ); + $this->assertSame( + '<form action="/action_page.php"><input type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() { + $html_input = '<form action="/action_page.php"><input type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>'; + $p = new WP_HTML_Tag_Processor( $html_input ); + $p->next_tag( 'input' ); + $p->set_attribute( 'checked', false ); + $this->assertSame( $html_input, $p->get_updated_html() ); + } + + /** + * @ticket 56299 + * + * @covers set_attribute + * @covers get_updated_html + */ + public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit_value_to_the_markup() { + $p = new WP_HTML_Tag_Processor( + '<form action="/action_page.php"><input checked type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>' + ); + $p->next_tag( 'input' ); + $p->set_attribute( 'checked', 'checked' ); + $this->assertSame( + '<form action="/action_page.php"><input checked="checked" type="checkbox" name="vehicle" value="Bike"><label for="vehicle">I have a bike</label></form>', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers get_tag + * @covers next_tag + */ + public function test_unclosed_script_tag_should_not_cause_an_infinite_loop() { + $p = new WP_HTML_Tag_Processor( '<script>' ); + $p->next_tag(); + $this->assertSame( 'SCRIPT', $p->get_tag() ); + $p->next_tag(); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * + * @dataProvider data_script_state + */ + public function test_next_tag_ignores_the_contents_of_a_script_tag( $script_then_div ) { + $p = new WP_HTML_Tag_Processor( $script_then_div ); + $p->next_tag(); + $this->assertSame( 'SCRIPT', $p->get_tag(), 'The first found tag was not "script"' ); + $p->next_tag(); + $this->assertSame( 'DIV', $p->get_tag(), 'The second found tag was not "div"' ); + } + + /** + * Data provider for test_ignores_contents_of_a_script_tag(). + * + * @return array { + * @type array { + * @type string $script_then_div The HTML snippet containing script and div tags. + * } + * } + */ + public function data_script_state() { + $examples = array(); + + $examples['Simple script tag'] = array( + '<script><span class="d-none d-md-inline">Back to notifications</span></script><div></div>', + ); + + $examples['Simple uppercase script tag'] = array( + '<script><span class="d-none d-md-inline">Back to notifications</span></SCRIPT><div></div>', + ); + + $examples['Script with a comment opener inside should end at the next script tag closer (dash dash escaped state)'] = array( + '<script class="d-md-none"><!--</script><div></div>-->', + ); + + $examples['Script with a comment opener and a script tag opener inside should end two script tag closer later (double escaped state)'] = array( + '<script class="d-md-none"><!--<script><span1></script><span2></span2></script><div></div>-->', + ); + + $examples['Double escaped script with a tricky opener'] = array( + '<script class="d-md-none"><!--<script attr="</script>"></script>"><div></div>', + ); + + $examples['Double escaped script with a tricky closer'] = array( + '<script class="d-md-none"><!--<script><span></script attr="</script>"><div></div>', + ); + + $examples['Double escaped, then escaped, then double escaped'] = array( + '<script class="d-md-none"><!--<script></script><script></script><span></span></script><div></div>', + ); + + $examples['Script with a commented a script tag opener inside should at the next tag closer (dash dash escaped state)'] = array( + '<script class="d-md-none"><!--<script>--><span></script><div></div>-->', + ); + + $examples['Script closer with another script tag in closer attributes'] = array( + '<script><span class="d-none d-md-inline">Back to notifications</title</span></script <script><div></div>', + ); + + $examples['Script closer with attributes'] = array( + '<script class="d-md-none"><span class="d-none d-md-inline">Back to notifications</span></script id="test"><div></div>', + ); + + $examples['Script opener with title closer inside'] = array( + '<script class="d-md-none">
', + ); + + $examples['Complex script with many parsing states'] = array( + '-->
-->', + ); + return $examples; + } + + /** + * @ticket 56299 + * + * @covers next_tag + * + * @dataProvider data_rcdata_state + */ + public function test_next_tag_ignores_the_contents_of_a_rcdata_tag( $rcdata_then_div, $rcdata_tag ) { + $p = new WP_HTML_Tag_Processor( $rcdata_then_div ); + $p->next_tag(); + $this->assertSame( strtoupper( $rcdata_tag ), $p->get_tag(), "The first found tag was not '$rcdata_tag'" ); + $p->next_tag(); + $this->assertSame( 'DIV', $p->get_tag(), "The second found tag was not 'div'" ); + } + + /** + * Data provider for test_ignores_contents_of_a_rcdata_tag(). + * + * @return array { + * @type array { + * @type string $rcdata_then_div The HTML snippet containing RCDATA and div tags. + * @type string $rcdata_tag The RCDATA tag. + * } + * } + */ + public function data_rcdata_state() { + $examples = array(); + $examples['Simple textarea'] = array( + '
', + 'TEXTAREA', + ); + + $examples['Simple title'] = array( + '<span class="d-none d-md-inline">Back to notifications</title</span>
', + 'TITLE', + ); + + $examples['Comment opener inside a textarea tag should be ignored'] = array( + '
-->', + 'TEXTAREA', + ); + + $examples['Textarea closer with another textarea tag in closer attributes'] = array( + '
', + 'TEXTAREA', + ); + + $examples['Textarea closer with attributes'] = array( + '
', + 'TEXTAREA', + ); + + $examples['Textarea opener with title closer inside'] = array( + '
', + 'TEXTAREA', + ); + return $examples; + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers set_attribute + * @covers get_updated_html + */ + public function test_can_query_and_update_wrongly_nested_tags() { + $p = new WP_HTML_Tag_Processor( + '123

456789

' + ); + $p->next_tag( 'span' ); + $p->set_attribute( 'class', 'span-class' ); + $p->next_tag( 'p' ); + $p->set_attribute( 'class', 'p-class' ); + $this->assertSame( + '123

456789

', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers remove_attribute + * @covers get_updated_html + */ + public function test_removing_attributes_works_even_in_malformed_html() { + $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); + $p->next_tag( 'span' ); + $p->remove_attribute( 'Notifications<' ); + $this->assertSame( + '
Back to notifications
', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers next_Tag + * @covers set_attribute + * @covers get_updated_html + */ + public function test_updating_attributes_works_even_in_malformed_html_1() { + $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); + $p->next_tag( 'span' ); + $p->set_attribute( 'id', 'first' ); + $p->next_tag( 'span' ); + $p->set_attribute( 'id', 'second' ); + $this->assertSame( + '
Back to notifications
', + $p->get_updated_html() + ); + } + + /** + * @ticket 56299 + * + * @covers next_tag + * @covers set_attribute + * @covers add_class + * @covers get_updated_html + * + * @dataProvider data_malformed_tag + */ + public function test_updating_attributes_works_even_in_malformed_html_2( $html_input, $html_expected ) { + $p = new WP_HTML_Tag_Processor( $html_input ); + $p->next_tag(); + $p->set_attribute( 'foo', 'bar' ); + $p->add_class( 'firstTag' ); + $p->next_tag(); + $p->add_class( 'secondTag' ); + $this->assertSame( + $html_expected, + $p->get_updated_html() + ); + } + + /** + * Data provider for test_updates_when_malformed_tag(). + * + * @return array { + * @type array { + * @type string $html_input The input HTML snippet. + * @type string $html_expected The expected HTML snippet after processing. + * } + * } + */ + public function data_malformed_tag() { + $null_byte = chr( 0 ); + $examples = array(); + $examples['Invalid entity inside attribute value'] = array( + 'test', + 'test', + ); + + $examples['HTML tag opening inside attribute value'] = array( + '
This <is> a <strong is="true">thing.
test', + '
This <is> a <strong is="true">thing.
test', + ); + + $examples['HTML tag brackets in attribute values and data markup'] = array( + '
This <is> a <strong is="true">thing.
test', + '
This <is> a <strong is="true">thing.
test', + ); + + $examples['Single and double quotes in attribute value'] = array( + '

test', + '

test', + ); + + $examples['Unquoted attribute values'] = array( + '


test', + '
test', + ); + + $examples['Double-quotes escaped in double-quote attribute value'] = array( + '
test', + '
test', + ); + + $examples['Unquoted attribute value'] = array( + '
test', + '
test', + ); + + $examples['Unquoted attribute value with tag-like value'] = array( + '
>test', + '
>test', + ); + + $examples['Unquoted attribute value with tag-like value followed by tag-like data'] = array( + '
>test', + '
>test', + ); + + $examples['1'] = array( + '
test', + '
test', + ); + + $examples['2'] = array( + '
test', + '
test', + ); + + $examples['4'] = array( + '
test', + '
test', + ); + + $examples['5'] = array( + '
code>test', + '
code>test', + ); + + $examples['6'] = array( + '
test', + '
test', + ); + + $examples['7'] = array( + '
test', + '
test', + ); + + $examples['8'] = array( + '
id="test">test', + '
id="test">test', + ); + + $examples['9'] = array( + '
test', + '
test', + ); + + $examples['10'] = array( + 'test', + 'test', + ); + + $examples['11'] = array( + 'The applicative operator <* works well in Haskell; is what?test', + 'The applicative operator <* works well in Haskell; is what?test', + ); + + $examples['12'] = array( + '<3 is a heart but is a tag.test', + '<3 is a heart but is a tag.test', + ); + + $examples['13'] = array( + 'test', + 'test', + ); + + $examples['14'] = array( + 'test', + 'test', + ); + + $examples['15'] = array( + ' a HTML Tag]]>test', + ' a HTML Tag]]>test', + ); + + $examples['16'] = array( + '
test', + '
test', + ); + + $examples['17'] = array( + '
test', + '
test', + ); + + $examples['18'] = array( + '
test', + '
test', + ); + + $examples['19'] = array( + '
test', + '
test', + ); + + $examples['20'] = array( + '
test', + '
test', + ); + + $examples['21'] = array( + '
test', + '
test', + ); + + $examples['22'] = array( + '
test', + '
test', + ); + + $examples['23'] = array( + '
test', + '
test', + ); + + $examples['24'] = array( + '
test', + '
test', + ); + + $examples['25'] = array( + '
test', + '
test', + ); + + $examples['Multiple unclosed tags treated as a single tag'] = array( + << + test +HTML + , + << + test +HTML + , + ); + + $examples['27'] = array( + '
test', + '
test', + ); + + $examples['28'] = array( + '
test', + '
test', + ); + + return $examples; + } +} From 40e1cb3e794de8f000ff63c07887fe2b85071d91 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 30 Jan 2023 13:29:05 -0700 Subject: [PATCH 02/36] Move class_exists calls to wp-html --- .../class-wp-html-attribute-token.php | 4 ---- src/wp-includes/class-wp-html-span.php | 4 ---- .../class-wp-html-tag-processor.php | 5 ---- .../class-wp-html-text-replacement.php | 4 ---- src/wp-includes/wp-html.php | 24 ++++++++++++------- 5 files changed, 16 insertions(+), 25 deletions(-) diff --git a/src/wp-includes/class-wp-html-attribute-token.php b/src/wp-includes/class-wp-html-attribute-token.php index 21147e30bfe1f..7b3d571872358 100644 --- a/src/wp-includes/class-wp-html-attribute-token.php +++ b/src/wp-includes/class-wp-html-attribute-token.php @@ -7,8 +7,6 @@ * @since 6.2.0 */ -if ( ! class_exists( 'WP_HTML_Attribute_Token' ) ) : - /** * Data structure for the attribute token that allows to drastically improve performance. * @@ -89,5 +87,3 @@ public function __construct( $name, $value_start, $value_length, $start, $end, $ $this->is_true = $is_true; } } - -endif; diff --git a/src/wp-includes/class-wp-html-span.php b/src/wp-includes/class-wp-html-span.php index 376e391dc1c44..39e603662b17b 100644 --- a/src/wp-includes/class-wp-html-span.php +++ b/src/wp-includes/class-wp-html-span.php @@ -7,8 +7,6 @@ * @since 6.2.0 */ -if ( ! class_exists( 'WP_HTML_Span' ) ) : - /** * Represents a textual span inside an HTML document. * @@ -52,5 +50,3 @@ public function __construct( $start, $end ) { $this->end = $end; } } - -endif; diff --git a/src/wp-includes/class-wp-html-tag-processor.php b/src/wp-includes/class-wp-html-tag-processor.php index 24e67a3adc83f..0c35e939cccae 100644 --- a/src/wp-includes/class-wp-html-tag-processor.php +++ b/src/wp-includes/class-wp-html-tag-processor.php @@ -26,8 +26,6 @@ * @since 6.2.0 */ -if ( ! class_exists( 'WP_HTML_Tag_Processor' ) ) : - /** * Processes an input HTML document by applying a specified set * of patches to that input. Tokenizes HTML but does not fully @@ -2042,6 +2040,3 @@ private function matches() { return true; } } - -endif; - diff --git a/src/wp-includes/class-wp-html-text-replacement.php b/src/wp-includes/class-wp-html-text-replacement.php index 4461df473aadd..e3ada169d76ef 100644 --- a/src/wp-includes/class-wp-html-text-replacement.php +++ b/src/wp-includes/class-wp-html-text-replacement.php @@ -7,8 +7,6 @@ * @since 6.2.0 */ -if ( ! class_exists( 'WP_HTML_Text_Replacement' ) ) : - /** * Data structure used to replace existing content from start to end that allows to drastically improve performance. * @@ -59,5 +57,3 @@ public function __construct( $start, $end, $text ) { $this->text = $text; } } - -endif; diff --git a/src/wp-includes/wp-html.php b/src/wp-includes/wp-html.php index 1806643104794..a0d238cef7823 100644 --- a/src/wp-includes/wp-html.php +++ b/src/wp-includes/wp-html.php @@ -15,14 +15,20 @@ * terms of speed as well as memory use. */ -/** WP_HTML_Attribute_Token class */ -require_once ABSPATH . WPINC . '/class-wp-html-attribute-token.php'; +if ( ! class_exists( 'WP_HTML_Attribute_Token' ) ) { + /** WP_HTML_Attribute_Token class */ + require_once ABSPATH . WPINC . '/class-wp-html-attribute-token.php'; +} -/** WP_HTML_Span class */ -require_once ABSPATH . WPINC . '/class-wp-html-span.php'; +if ( ! class_exists( 'WP_HTML_Span' ) ) { + /** WP_HTML_Span class */ + require_once ABSPATH . WPINC . '/class-wp-html-span.php'; +} -/** WP_HTML_Text_Replacement class */ -require_once ABSPATH . WPINC . '/class-wp-html-text-replacement.php'; +if ( ! class_exists( 'WP_HTML_Text_Replacement' ) ) { + /** WP_HTML_Text_Replacement class */ + require_once ABSPATH . WPINC . '/class-wp-html-text-replacement.php'; +} /* * The WP_HTML_Tag_Processor is intended for linearly scanning through @@ -30,5 +36,7 @@ * and adding, removing, or modifying attributes on those tags. */ -/** WP_HTML_Tag_Processor class */ -require_once ABSPATH . WPINC . '/class-wp-html-tag-processor.php'; +if ( ! class_exists( 'WP_HTML_Tag_Processor' ) ) { + /** WP_HTML_Tag_Processor class */ + require_once ABSPATH . WPINC . '/class-wp-html-tag-processor.php'; +} From 8b507e5417a2a64c6ac155444e44de2566d5d643 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 30 Jan 2023 13:32:46 -0700 Subject: [PATCH 03/36] Mark helper classes `final` --- src/wp-includes/class-wp-html-attribute-token.php | 2 +- src/wp-includes/class-wp-html-span.php | 2 +- src/wp-includes/class-wp-html-text-replacement.php | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/class-wp-html-attribute-token.php b/src/wp-includes/class-wp-html-attribute-token.php index 7b3d571872358..05224f8549ef1 100644 --- a/src/wp-includes/class-wp-html-attribute-token.php +++ b/src/wp-includes/class-wp-html-attribute-token.php @@ -17,7 +17,7 @@ * * @see WP_HTML_Tag_Processor */ -class WP_HTML_Attribute_Token { +final class WP_HTML_Attribute_Token { /** * Attribute name. * diff --git a/src/wp-includes/class-wp-html-span.php b/src/wp-includes/class-wp-html-span.php index 39e603662b17b..2f902f3831f03 100644 --- a/src/wp-includes/class-wp-html-span.php +++ b/src/wp-includes/class-wp-html-span.php @@ -20,7 +20,7 @@ * * @see WP_HTML_Tag_Processor */ -class WP_HTML_Span { +final class WP_HTML_Span { /** * Byte offset into document where span begins. * diff --git a/src/wp-includes/class-wp-html-text-replacement.php b/src/wp-includes/class-wp-html-text-replacement.php index e3ada169d76ef..a8341ad33acfe 100644 --- a/src/wp-includes/class-wp-html-text-replacement.php +++ b/src/wp-includes/class-wp-html-text-replacement.php @@ -17,7 +17,7 @@ * * @see WP_HTML_Tag_Processor */ -class WP_HTML_Text_Replacement { +final class WP_HTML_Text_Replacement { /** * Byte offset into document where replacement span begins. * From 561acff5157b9db342244f066d7f5017a7e167a2 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 30 Jan 2023 14:08:30 -0700 Subject: [PATCH 04/36] Updates from review feedback, mostly docs --- .../class-wp-html-tag-processor.php | 45 ++- .../html/wpHtmlTagProcessorBookmarks.php | 38 +- .../tests/html/wpHtmlTagProcessorTest.php | 376 ++++++++++-------- 3 files changed, 255 insertions(+), 204 deletions(-) diff --git a/src/wp-includes/class-wp-html-tag-processor.php b/src/wp-includes/class-wp-html-tag-processor.php index 0c35e939cccae..888271df150c6 100644 --- a/src/wp-includes/class-wp-html-tag-processor.php +++ b/src/wp-includes/class-wp-html-tag-processor.php @@ -239,7 +239,7 @@ class WP_HTML_Tag_Processor { * Whether to visit tag closers, e.g. , when walking an input document. * * @since 6.2.0 - * @var boolean + * @var bool */ private $stop_on_tag_closers; @@ -279,7 +279,7 @@ class WP_HTML_Tag_Processor { * ``` * * @since 6.2.0 - * @var ?int + * @var int|null */ private $tag_name_starts_at; @@ -294,7 +294,7 @@ class WP_HTML_Tag_Processor { * ``` * * @since 6.2.0 - * @var ?int + * @var int|null */ private $tag_name_length; @@ -310,14 +310,14 @@ class WP_HTML_Tag_Processor { * ``` * * @since 6.2.0 - * @var ?int + * @var int|null */ private $tag_ends_at; /** * Whether the current tag is an opening tag, e.g.
, or a closing tag, e.g.
. * - * @var boolean + * @var bool */ private $is_closing_tag; @@ -445,8 +445,8 @@ public function __construct( $html ) { * * @since 6.2.0 * - * @param array|string $query { - * Which tag name to find, having which class, etc. + * @param array|string|null $query { + * Optional. Which tag name to find, having which class, etc. Default is to find any tag. * * @type string|null $tag_name Which tag to find, or `null` for "any tag." * @type int|null $match_offset Find the Nth tag matching all search criteria. @@ -570,21 +570,23 @@ public function next_tag( $query = null ) { * } * ``` * - * Because bookmarks maintain their position they don't - * expose any internal offsets for the HTML document + * Because bookmarks maintain their position, they don't + * expose any internal offsets for the HTML document, * and can't be used with normal string functions. * * Because bookmarks allocate memory and require processing - * for every applied update they are limited and require + * for every applied update, they are limited and require * a name. They should not be created inside a loop. * - * Bookmarks are a powerful tool to enable complicated behavior; - * consider double-checking that you need this tool if you are + * Bookmarks are a powerful tool to enable complicated behavior. + * Consider double-checking that you need this tool if you are * reaching for it, as inappropriate use could lead to broken * HTML structure or unwanted processing overhead. * + * @since 6.2.0 + * * @param string $name Identifies this particular bookmark. - * @return false|void + * @return bool|void * @throws Exception Throws on invalid bookmark name if WP_DEBUG set. */ public function set_bookmark( $name ) { @@ -742,7 +744,7 @@ private function skip_script_data() { * escaped mode if we aren't already there. * * Inside the escaped modes it's ignored and - * shouldn't ever pull us out of double-escaped + * should never pull us out of double-escaped * and back into escaped. * * We'll continue parsing past it regardless of @@ -878,9 +880,11 @@ private function parse_next_tag() { return true; } - // If we didn't find a tag opener, and we can't be - // transitioning into different markup states, then - // we can abort because there aren't any more tags. + /* + * If we didn't find a tag opener, and we can't be + * transitioning into different markup states, then + * we can abort because there aren't any more tags. + */ if ( $at + 1 >= strlen( $html ) ) { return false; } @@ -1111,11 +1115,12 @@ private function after_tag() { * Converts class name updates into tag attributes updates * (they are accumulated in different data formats for performance). * - * @return void * @since 6.2.0 * * @see $classname_updates * @see $lexical_updates + * + * @return void */ private function class_name_updates_to_attributes_updates() { if ( count( $this->classname_updates ) === 0 ) { @@ -1155,7 +1160,7 @@ private function class_name_updates_to_attributes_updates() { * Tracks the cursor position in the existing class * attribute value where we're currently parsing. * - * @var integer + * @var int */ $at = 0; @@ -1173,7 +1178,7 @@ private function class_name_updates_to_attributes_updates() { * * This flag is set upon the first change that requires a string update. * - * @var boolean + * @var bool */ $modified = false; diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php b/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php index c92d0023d16c2..25a335453ccbc 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php @@ -18,7 +18,7 @@ class WP_HTML_Tag_Processor_Bookmark_Test extends WP_UnitTestCase { /** * @ticket 56299 * - * @covers set_bookmark + * @covers ::set_bookmark */ public function test_set_bookmark() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); @@ -32,7 +32,7 @@ public function test_set_bookmark() { /** * @ticket 56299 * - * @covers release_bookmark + * @covers ::release_bookmark */ public function test_release_bookmark() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); @@ -45,8 +45,8 @@ public function test_release_bookmark() { /** * @ticket 56299 * - * @covers seek - * @covers set_bookmark + * @covers ::seek + * @covers ::set_bookmark */ public function test_seek() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); @@ -100,9 +100,9 @@ public function test_seek() { * * @ticket 56299 * - * @covers seek - * @covers set_bookmark - * @covers apply_attributes_updates + * @covers ::seek + * @covers ::set_bookmark + * @covers ::apply_attributes_updates */ public function test_removing_long_attributes_doesnt_break_seek() { $input = <<First
Second
' ); @@ -261,8 +261,8 @@ public function test_updates_bookmark_for_additions_after_both_sides() { /** * @ticket 56299 * - * @covers seek - * @covers set_bookmark + * @covers ::seek + * @covers ::set_bookmark */ public function test_updates_bookmark_for_additions_before_both_sides() { $p = new WP_HTML_Tag_Processor( '
First
Second
' ); @@ -286,8 +286,8 @@ public function test_updates_bookmark_for_additions_before_both_sides() { /** * @ticket 56299 * - * @covers seek - * @covers set_bookmark + * @covers ::seek + * @covers ::set_bookmark */ public function test_updates_bookmark_for_deletions_after_both_sides() { $p = new WP_HTML_Tag_Processor( '
First
Second
' ); @@ -309,8 +309,8 @@ public function test_updates_bookmark_for_deletions_after_both_sides() { /** * @ticket 56299 * - * @covers seek - * @covers set_bookmark + * @covers ::seek + * @covers ::set_bookmark */ public function test_updates_bookmark_for_deletions_before_both_sides() { $p = new WP_HTML_Tag_Processor( '
First
Second
' ); @@ -335,7 +335,7 @@ public function test_updates_bookmark_for_deletions_before_both_sides() { /** * @ticket 56299 * - * @covers set_bookmark + * @covers ::set_bookmark */ public function test_limits_the_number_of_bookmarks() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); @@ -353,7 +353,7 @@ public function test_limits_the_number_of_bookmarks() { /** * @ticket 56299 * - * @covers seek + * @covers ::seek */ public function test_limits_the_number_of_seek_calls() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php b/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php index 92f87099362ec..e5a19e82dbde8 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php @@ -140,7 +140,7 @@ public function test_get_attribute_returns_string_for_truthy_attributes() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::get_attribute + * @covers ::get_attribute */ public function test_get_attribute_decodes_html_character_references() { $p = new WP_HTML_Tag_Processor( '
' ); @@ -151,8 +151,8 @@ public function test_get_attribute_decodes_html_character_references() { /** * @ticket 56299 * - * @covers next_tag - * @covers get_attribute + * @covers ::next_tag + * @covers ::get_attribute */ public function test_attributes_parser_treats_slash_as_attribute_separator() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -167,26 +167,58 @@ public function test_attributes_parser_treats_slash_as_attribute_separator() { /** * @ticket 56299 * - * @covers next_tag - * @covers get_attribute + * @covers ::next_tag + * @covers ::get_attribute + * + * @dataProvider data_attribute_name_case_variants + * + * @param string $attribute_name Name of data-enabled attribute with case variations. */ - public function test_attributes_parser_is_case_insensitive() { - $p = new WP_HTML_Tag_Processor( '
Test
' ); + public function test_get_attribute_is_case_insensitive_for_attributes_with_values( $attribute_name ) { + $p = new WP_HTML_Tag_Processor( '
Test
' ); $p->next_tag(); - $p->get_attribute( 'data-enabled' ); - $this->assertEquals( 'true', $p->get_attribute( 'DATA-enabled' ), 'A case-insensitive get_attribute call did not return "true".' ); - $this->assertEquals( 'true', $p->get_attribute( 'data-enabled' ), 'A case-insensitive get_attribute call did not return "true".' ); - $this->assertEquals( 'true', $p->get_attribute( 'DATA-ENABLED' ), 'A case-insensitive get_attribute call did not return "true".' ); - $this->assertEquals( true, $p->get_attribute( 'data-VISIBLE' ), 'A case-insensitive get_attribute call did not return true.' ); - $this->assertEquals( true, $p->get_attribute( 'DATA-visible' ), 'A case-insensitive get_attribute call did not return true.' ); - $this->assertEquals( true, $p->get_attribute( 'dAtA-ViSiBlE' ), 'A case-insensitive get_attribute call did not return true.' ); + $this->assertSame( 'true', $p->get_attribute( $attribute_name ) ); } /** * @ticket 56299 * - * @covers next_tag - * @covers remove_attribute + * @covers ::next_tag + * @covers ::get_attribute + * + * @dataProvider data_attribute_name_case_variants + * + * @param string $attribute_name Name of data-enabled attribute with case variations. + */ + public function test_attributes_parser_is_case_insensitive_for_attributes_without_values( $attribute_name ) { + $p = new WP_HTML_Tag_Processor( '
Test
' ); + $p->next_tag(); + $this->assertTrue( $p->get_attribute( $attribute_name ) ); + } + + /** + * Data provider for attribute names in various casings. + * + * @return array { + * @type array { + * @type string $attribute_name Name of data-enabled attribute with case variations. + * } + * } + */ + public function data_attribute_name_case_variants() { + return array( + array( 'DATA-enabled' ), + array( 'data-enabled' ), + array( 'DATA-ENABLED' ), + array( 'DatA-EnABled' ), + ); + } + + /** + * @ticket 56299 + * + * @covers ::next_tag + * @covers ::remove_attribute */ public function test_remove_attribute_is_case_insensitive() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -198,8 +230,8 @@ public function test_remove_attribute_is_case_insensitive() { /** * @ticket 56299 * - * @covers next_tag - * @covers set_attribute + * @covers ::next_tag + * @covers ::set_attribute */ public function test_set_attribute_is_case_insensitive() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -211,7 +243,7 @@ public function test_set_attribute_is_case_insensitive() { /** * @ticket 56299 * - * @covers get_attribute_names_with_prefix + * @covers ::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_before_finding_tags() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -221,7 +253,7 @@ public function test_get_attribute_names_with_prefix_returns_null_before_finding /** * @ticket 56299 * - * @covers get_attribute_names_with_prefix + * @covers ::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_when_not_in_open_tag() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -232,7 +264,7 @@ public function test_get_attribute_names_with_prefix_returns_null_when_not_in_op /** * @ticket 56299 * - * @covers get_attribute_names_with_prefix + * @covers ::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_when_in_closing_tag() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -244,7 +276,7 @@ public function test_get_attribute_names_with_prefix_returns_null_when_in_closin /** * @ticket 56299 * - * @covers get_attribute_names_with_prefix + * @covers ::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_empty_array_when_no_attributes_present() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -255,7 +287,7 @@ public function test_get_attribute_names_with_prefix_returns_empty_array_when_no /** * @ticket 56299 * - * @covers get_attribute_names_with_prefix + * @covers ::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_matching_attribute_names_in_lowercase() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -269,9 +301,9 @@ public function test_get_attribute_names_with_prefix_returns_matching_attribute_ /** * @ticket 56299 * - * @covers set_attribute - * @covers get_updated_html - * @covers get_attribute_names_with_prefix + * @covers ::set_attribute + * @covers ::get_updated_html + * @covers ::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -292,9 +324,9 @@ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_ /** * @ticket 56299 * - * @covers __toString + * @covers ::__toString */ - public function tostring_returns_updated_html() { + public function test_to_string_returns_updated_html() { $p = new WP_HTML_Tag_Processor( '
Test
' ); $p->next_tag(); $p->remove_attribute( 'id' ); @@ -312,7 +344,7 @@ public function tostring_returns_updated_html() { /** * @ticket 56299 * - * @covers get_updated_html + * @covers ::get_updated_html */ public function test_get_updated_html_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -348,7 +380,7 @@ public function test_get_updated_html_applies_the_updates_so_far_and_keeps_the_p /** * @ticket 56299 * - * @covers get_updated_html + * @covers ::get_updated_html */ public function test_get_updated_html_without_updating_any_attributes_returns_the_original_html() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -358,7 +390,7 @@ public function test_get_updated_html_without_updating_any_attributes_returns_th /** * @ticket 56299 * - * @covers next_tag + * @covers ::next_tag */ public function test_next_tag_with_no_arguments_should_find_the_next_existing_tag() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -368,7 +400,7 @@ public function test_next_tag_with_no_arguments_should_find_the_next_existing_ta /** * @ticket 56299 * - * @covers next_tag + * @covers ::next_tag */ public function test_next_tag_should_return_false_for_a_non_existing_tag() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -376,8 +408,10 @@ public function test_next_tag_should_return_false_for_a_non_existing_tag() { } /** - * @covers next_tag - * @covers is_tag_closer + * @ticket 56299 + * + * @covers ::next_tag + * @covers ::is_tag_closer */ public function test_next_tag_should_stop_on_closers_only_when_requested() { $p = new WP_HTML_Tag_Processor( '
' ); @@ -407,8 +441,8 @@ public function test_next_tag_should_stop_on_closers_only_when_requested() { /** * @ticket 56299 * - * @covers next_tag - * @covers get_updated_html + * @covers ::next_tag + * @covers ::get_updated_html */ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -422,6 +456,16 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar ); } + /** + * @ticket 56299 + * + * @covers ::is_tag_closer + * @covers ::set_attribute + * @covers ::remove_attribute + * @covers ::add_class + * @covers ::remove_class + * @covers ::get_updated_html + */ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() { $p = new WP_HTML_Tag_Processor( '
' ); $p->next_tag( @@ -469,7 +513,9 @@ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() { * @ticket 56299 * * @dataProvider data_set_attribute_escapable_values - * @covers set_attribute + * @covers ::set_attribute + * + * @param string $attribute_value Value with potential XSS exploit. */ public function test_set_attribute_prevents_xss( $attribute_value ) { $p = new WP_HTML_Tag_Processor( '
' ); @@ -514,9 +560,9 @@ public function data_set_attribute_escapable_values() { /** * @ticket 56299 * - * @covers set_attribute - * @covers get_updated_html - * @covers get_attribute + * @covers ::set_attribute + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -537,9 +583,9 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr /** * @ticket 56299 * - * @covers set_attribute - * @covers get_updated_html - * @covers get_attribute + * @covers ::set_attribute + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_get_attribute_returns_updated_values_before_they_are_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -576,9 +622,9 @@ public function test_get_attribute_returns_updated_values_before_they_are_update /** * @ticket 56299 * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::add_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_get_attribute_reflects_added_class_names_before_they_are_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -599,9 +645,9 @@ public function test_get_attribute_reflects_added_class_names_before_they_are_up /** * @ticket 56299 * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::add_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_get_attribute_reflects_added_class_names_before_they_are_updated_and_retains_classes_from_previous_add_class_calls() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -628,9 +674,9 @@ public function test_get_attribute_reflects_added_class_names_before_they_are_up /** * @ticket 56299 * - * @covers remove_attribute - * @covers get_attribute - * @covers get_updated_html + * @covers ::remove_attribute + * @covers ::get_attribute + * @covers ::get_updated_html */ public function test_get_attribute_reflects_removed_attribute_before_it_is_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -650,10 +696,10 @@ public function test_get_attribute_reflects_removed_attribute_before_it_is_updat /** * @ticket 56299 * - * @covers set_attribute - * @covers remove_attribute - * @covers get_attribute - * @covers get_updated_html + * @covers ::set_attribute + * @covers ::remove_attribute + * @covers ::get_attribute + * @covers ::get_updated_html */ public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_it_is_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -674,10 +720,10 @@ public function test_get_attribute_reflects_adding_and_then_removing_an_attribut /** * @ticket 56299 * - * @covers set_attribute - * @covers remove_attribute - * @covers get_attribute - * @covers get_updated_html + * @covers ::set_attribute + * @covers ::remove_attribute + * @covers ::get_attribute + * @covers ::get_updated_html */ public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_it_is_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -698,9 +744,9 @@ public function test_get_attribute_reflects_setting_and_then_removing_an_existin /** * @ticket 56299 * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::remove_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_get_attribute_reflects_removed_class_names_before_they_are_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -721,10 +767,10 @@ public function test_get_attribute_reflects_removed_class_names_before_they_are_ /** * @ticket 56299 * - * @covers add_class - * @covers remove_class - * @covers get_attribute - * @covers get_updated_html + * @covers ::add_class + * @covers ::remove_class + * @covers ::get_attribute + * @covers ::get_updated_html */ public function test_get_attribute_reflects_setting_and_then_removing_a_class_name_before_it_is_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -746,10 +792,10 @@ public function test_get_attribute_reflects_setting_and_then_removing_a_class_na /** * @ticket 56299 * - * @covers add_class - * @covers remove_class - * @covers get_attribute - * @covers get_updated_html + * @covers ::add_class + * @covers ::remove_class + * @covers ::get_attribute + * @covers ::get_updated_html */ public function test_get_attribute_reflects_duplicating_and_then_removing_an_existing_class_name_before_it_is_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -774,8 +820,8 @@ public function test_get_attribute_reflects_duplicating_and_then_removing_an_exi * * @ticket 56299 * - * @covers set_attribute - * @covers get_updated_html + * @covers ::set_attribute + * @covers ::get_updated_html */ public function test_update_first_when_duplicated_attribute() { $p = new WP_HTML_Tag_Processor( '
Text
' ); @@ -787,8 +833,8 @@ public function test_update_first_when_duplicated_attribute() { /** * @ticket 56299 * - * @covers set_attribute - * @covers get_updated_html + * @covers ::set_attribute + * @covers ::get_updated_html */ public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -800,8 +846,8 @@ public function test_set_attribute_with_an_existing_attribute_name_updates_its_v /** * @ticket 56299 * - * @covers set_attribute - * @covers get_updated_html + * @covers ::set_attribute + * @covers ::get_updated_html */ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -824,8 +870,8 @@ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the * * @ticket 56299 * - * @covers remove_attribute - * @covers get_updated_html + * @covers ::remove_attribute + * @covers ::get_updated_html */ public function test_remove_first_when_duplicated_attribute() { $p = new WP_HTML_Tag_Processor( '
Text
' ); @@ -837,8 +883,8 @@ public function test_remove_first_when_duplicated_attribute() { /** * @ticket 56299 * - * @covers remove_attribute - * @covers get_updated_html + * @covers ::remove_attribute + * @covers ::get_updated_html */ public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -850,8 +896,8 @@ public function test_remove_attribute_with_an_existing_attribute_name_removes_it /** * @ticket 56299 * - * @covers remove_attribute - * @covers get_updated_html + * @covers ::remove_attribute + * @covers ::get_updated_html */ public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -863,9 +909,9 @@ public function test_remove_attribute_with_a_non_existing_attribute_name_does_no /** * @ticket 56299 * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::add_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_add_class_creates_a_class_attribute_when_there_is_none() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -886,9 +932,9 @@ public function test_add_class_creates_a_class_attribute_when_there_is_none() { /** * @ticket 56299 * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::add_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_calling_add_class_twice_creates_a_class_attribute_with_both_class_names_when_there_is_no_class_attribute() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -910,9 +956,9 @@ public function test_calling_add_class_twice_creates_a_class_attribute_with_both /** * @ticket 56299 * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::remove_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_remove_class_does_not_change_the_markup_when_there_is_no_class_attribute() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -932,9 +978,9 @@ public function test_remove_class_does_not_change_the_markup_when_there_is_no_cl /** * @ticket 56299 * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::add_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_add_class_appends_class_names_to_the_existing_class_attribute_when_one_already_exists() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -956,9 +1002,9 @@ public function test_add_class_appends_class_names_to_the_existing_class_attribu /** * @ticket 56299 * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::remove_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_remove_class_removes_a_single_class_from_the_class_attribute_when_one_exists() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -979,9 +1025,9 @@ public function test_remove_class_removes_a_single_class_from_the_class_attribut /** * @ticket 56299 * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::remove_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_calling_remove_class_with_all_listed_class_names_removes_the_existing_class_attribute_from_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1002,9 +1048,9 @@ public function test_calling_remove_class_with_all_listed_class_names_removes_th /** * @ticket 56299 * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::add_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_add_class_does_not_add_duplicate_class_names() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1025,9 +1071,9 @@ public function test_add_class_does_not_add_duplicate_class_names() { /** * @ticket 56299 * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::add_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_add_class_preserves_class_name_order_when_a_duplicate_class_name_is_added() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1048,9 +1094,9 @@ public function test_add_class_preserves_class_name_order_when_a_duplicate_class /** * @ticket 56299 * - * @covers add_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::add_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_add_class_when_there_is_a_class_attribute_with_excessive_whitespaces() { $p = new WP_HTML_Tag_Processor( @@ -1073,9 +1119,9 @@ public function test_add_class_when_there_is_a_class_attribute_with_excessive_wh /** * @ticket 56299 * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::remove_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_remove_class_preserves_whitespaces_when_there_is_a_class_attribute_with_excessive_whitespaces() { $p = new WP_HTML_Tag_Processor( @@ -1098,9 +1144,9 @@ public function test_remove_class_preserves_whitespaces_when_there_is_a_class_at /** * @ticket 56299 * - * @covers remove_class - * @covers get_updated_html - * @covers get_attribute + * @covers ::remove_class + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_removing_all_classes_removes_the_existing_class_attribute_from_the_markup_even_when_excessive_whitespaces_are_present() { $p = new WP_HTML_Tag_Processor( @@ -1130,10 +1176,10 @@ public function test_removing_all_classes_removes_the_existing_class_attribute_f * * @ticket 56299 * - * @covers add_class - * @covers set_attribute - * @covers get_updated_html - * @covers get_attribute + * @covers ::add_class + * @covers ::set_attribute + * @covers ::get_updated_html + * @covers ::get_attribute */ public function test_set_attribute_takes_priority_over_add_class() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1179,10 +1225,10 @@ public function test_set_attribute_takes_priority_over_add_class() { * * @ticket 56299 * - * @covers add_class - * @covers set_attribute - * @covers get_attribute - * @covers get_updated_html + * @covers ::add_class + * @covers ::set_attribute + * @covers ::get_attribute + * @covers ::get_updated_html */ public function test_set_attribute_takes_priority_over_add_class_even_before_updating() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1219,10 +1265,10 @@ public function test_set_attribute_takes_priority_over_add_class_even_before_upd /** * @ticket 56299 * - * @covers set_attribute - * @covers add_class - * @covers get_attribute - * @covers get_updated_html + * @covers ::set_attribute + * @covers ::add_class + * @covers ::get_attribute + * @covers ::get_updated_html */ public function test_add_class_overrides_boolean_class_attribute() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -1244,10 +1290,10 @@ public function test_add_class_overrides_boolean_class_attribute() { /** * @ticket 56299 * - * @covers set_attribute - * @covers add_class - * @covers get_attribute - * @covers get_updated_html + * @covers ::set_attribute + * @covers ::add_class + * @covers ::get_attribute + * @covers ::get_updated_html */ public function test_add_class_overrides_boolean_class_attribute_even_before_updating() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -1269,11 +1315,11 @@ public function test_add_class_overrides_boolean_class_attribute_even_before_upd /** * @ticket 56299 * - * @covers set_attribute - * @covers remove_attribute - * @covers add_class - * @covers remove_class - * @covers get_updated_html + * @covers ::set_attribute + * @covers ::remove_attribute + * @covers ::add_class + * @covers ::remove_class + * @covers ::get_updated_html */ public function test_advanced_use_case() { $input = <<' ); @@ -1481,7 +1527,7 @@ public function test_unclosed_script_tag_should_not_cause_an_infinite_loop() { /** * @ticket 56299 * - * @covers next_tag + * @covers ::next_tag * * @dataProvider data_script_state */ @@ -1558,7 +1604,7 @@ public function data_script_state() { /** * @ticket 56299 * - * @covers next_tag + * @covers ::next_tag * * @dataProvider data_rcdata_state */ @@ -1617,9 +1663,9 @@ public function data_rcdata_state() { /** * @ticket 56299 * - * @covers next_tag - * @covers set_attribute - * @covers get_updated_html + * @covers ::next_tag + * @covers ::set_attribute + * @covers ::get_updated_html */ public function test_can_query_and_update_wrongly_nested_tags() { $p = new WP_HTML_Tag_Processor( @@ -1638,9 +1684,9 @@ public function test_can_query_and_update_wrongly_nested_tags() { /** * @ticket 56299 * - * @covers next_tag - * @covers remove_attribute - * @covers get_updated_html + * @covers ::next_tag + * @covers ::remove_attribute + * @covers ::get_updated_html */ public function test_removing_attributes_works_even_in_malformed_html() { $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); @@ -1655,9 +1701,9 @@ public function test_removing_attributes_works_even_in_malformed_html() { /** * @ticket 56299 * - * @covers next_Tag - * @covers set_attribute - * @covers get_updated_html + * @covers ::next_Tag + * @covers ::set_attribute + * @covers ::get_updated_html */ public function test_updating_attributes_works_even_in_malformed_html_1() { $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); @@ -1674,10 +1720,10 @@ public function test_updating_attributes_works_even_in_malformed_html_1() { /** * @ticket 56299 * - * @covers next_tag - * @covers set_attribute - * @covers add_class - * @covers get_updated_html + * @covers ::next_tag + * @covers ::set_attribute + * @covers ::add_class + * @covers ::get_updated_html * * @dataProvider data_malformed_tag */ From 57550e7440998d3046421572812b4d705c19257b Mon Sep 17 00:00:00 2001 From: hellofromtonya Date: Mon, 30 Jan 2023 15:39:48 -0600 Subject: [PATCH 05/36] WP_HTML_Tag_Processor_Test: test improvements * Rename data providers to match test per coding standard. * Restructure data provider datasets into a single array form for consistency. * Add `WP_HTML_Tag_Processor::` to @covers methods per coding standard. * Add empty line between set up and assertion groupings. * Moved well-formed HTML into separate test of updating attributes. * Replaced assertEquals() with assertSame(). --- .../tests/html/wpHtmlTagProcessorTest.php | 1048 +++++++++-------- 1 file changed, 585 insertions(+), 463 deletions(-) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php b/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php index e5a19e82dbde8..29930029f724d 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php @@ -21,21 +21,23 @@ class WP_HTML_Tag_Processor_Test extends WP_UnitTestCase { /** * @ticket 56299 * - * @covers get_tag + * @covers WP_HTML_Tag_Processor::get_tag */ public function test_get_tag_returns_null_before_finding_tags() { $p = new WP_HTML_Tag_Processor( '
Test
' ); + $this->assertNull( $p->get_tag() ); } /** * @ticket 56299 * - * @covers next_tag - * @covers get_tag + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_tag */ public function test_get_tag_returns_null_when_not_in_open_tag() { $p = new WP_HTML_Tag_Processor( '
Test
' ); + $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); $this->assertNull( $p->get_tag(), 'Accessing a non-existing tag did not return null' ); } @@ -43,11 +45,12 @@ public function test_get_tag_returns_null_when_not_in_open_tag() { /** * @ticket 56299 * - * @covers next_tag - * @covers get_tag + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_tag */ public function test_get_tag_returns_open_tag_name() { $p = new WP_HTML_Tag_Processor( '
Test
' ); + $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' ); $this->assertSame( 'DIV', $p->get_tag(), 'Accessing an existing tag name did not return "div"' ); } @@ -55,21 +58,23 @@ public function test_get_tag_returns_open_tag_name() { /** * @ticket 56299 * - * @covers get_attribute + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_null_before_finding_tags() { $p = new WP_HTML_Tag_Processor( '
Test
' ); + $this->assertNull( $p->get_attribute( 'class' ) ); } /** * @ticket 56299 * - * @covers next_tag - * @covers get_attribute + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_null_when_not_in_open_tag() { $p = new WP_HTML_Tag_Processor( '
Test
' ); + $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); $this->assertNull( $p->get_attribute( 'class' ), 'Accessing an attribute of a non-existing tag did not return null' ); } @@ -77,11 +82,12 @@ public function test_get_attribute_returns_null_when_not_in_open_tag() { /** * @ticket 56299 * - * @covers next_tag - * @covers get_attribute + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_null_when_in_closing_tag() { $p = new WP_HTML_Tag_Processor( '
Test
' ); + $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' ); $this->assertTrue( $p->next_tag( array( 'tag_closers' => 'visit' ) ), 'Querying an existing closing tag did not return true' ); $this->assertNull( $p->get_attribute( 'class' ), 'Accessing an attribute of a closing tag did not return null' ); @@ -90,11 +96,12 @@ public function test_get_attribute_returns_null_when_in_closing_tag() { /** * @ticket 56299 * - * @covers next_tag - * @covers get_attribute + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_null_when_attribute_missing() { $p = new WP_HTML_Tag_Processor( '
Test
' ); + $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' ); $this->assertNull( $p->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' ); } @@ -102,11 +109,12 @@ public function test_get_attribute_returns_null_when_attribute_missing() { /** * @ticket 56299 * - * @covers next_tag - * @covers get_attribute + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_attribute_value() { $p = new WP_HTML_Tag_Processor( '
Test
' ); + $this->assertTrue( $p->next_tag( 'div' ), 'Querying an existing tag did not return true' ); $this->assertSame( 'test', $p->get_attribute( 'class' ), 'Accessing a class="test" attribute value did not return "test"' ); } @@ -114,11 +122,12 @@ public function test_get_attribute_returns_attribute_value() { /** * @ticket 56299 * - * @covers next_tag - * @covers get_attribute + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_true_for_boolean_attribute() { $p = new WP_HTML_Tag_Processor( '
Test
' ); + $this->assertTrue( $p->next_tag( array( 'class_name' => 'test' ) ), 'Querying an existing tag did not return true' ); $this->assertTrue( $p->get_attribute( 'enabled' ), 'Accessing a boolean "enabled" attribute value did not return true' ); } @@ -126,11 +135,12 @@ public function test_get_attribute_returns_true_for_boolean_attribute() { /** * @ticket 56299 * - * @covers next_tag - * @covers get_attribute + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_string_for_truthy_attributes() { $p = new WP_HTML_Tag_Processor( '' ); + $this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' ); $this->assertSame( 'enabled', $p->get_attribute( 'enabled' ), 'Accessing a boolean "enabled" attribute value did not return true' ); $this->assertSame( '1', $p->get_attribute( 'checked' ), 'Accessing a checked=1 attribute value did not return "1"' ); @@ -145,17 +155,19 @@ public function test_get_attribute_returns_string_for_truthy_attributes() { public function test_get_attribute_decodes_html_character_references() { $p = new WP_HTML_Tag_Processor( '
' ); $p->next_tag(); + $this->assertSame( 'the "grande" is < 32oz†', $p->get_attribute( 'id' ), 'HTML Attribute value was returned without decoding character references' ); } /** * @ticket 56299 * - * @covers ::next_tag - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_attributes_parser_treats_slash_as_attribute_separator() { $p = new WP_HTML_Tag_Processor( '
Test
' ); + $this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' ); $this->assertTrue( $p->get_attribute( 'a' ), 'Accessing an existing attribute did not return true' ); $this->assertTrue( $p->get_attribute( 'b' ), 'Accessing an existing attribute did not return true' ); @@ -167,8 +179,8 @@ public function test_attributes_parser_treats_slash_as_attribute_separator() { /** * @ticket 56299 * - * @covers ::next_tag - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_attribute * * @dataProvider data_attribute_name_case_variants * @@ -177,14 +189,15 @@ public function test_attributes_parser_treats_slash_as_attribute_separator() { public function test_get_attribute_is_case_insensitive_for_attributes_with_values( $attribute_name ) { $p = new WP_HTML_Tag_Processor( '
Test
' ); $p->next_tag(); - $this->assertSame( 'true', $p->get_attribute( $attribute_name ) ); + + $this->assertSame( 'true', $p->get_attribute( $attribute_name ) ); } /** * @ticket 56299 * - * @covers ::next_tag - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_attribute * * @dataProvider data_attribute_name_case_variants * @@ -193,17 +206,14 @@ public function test_get_attribute_is_case_insensitive_for_attributes_with_value public function test_attributes_parser_is_case_insensitive_for_attributes_without_values( $attribute_name ) { $p = new WP_HTML_Tag_Processor( '
Test
' ); $p->next_tag(); - $this->assertTrue( $p->get_attribute( $attribute_name ) ); + + $this->assertTrue( $p->get_attribute( $attribute_name ) ); } /** - * Data provider for attribute names in various casings. + * Data provider. * - * @return array { - * @type array { - * @type string $attribute_name Name of data-enabled attribute with case variations. - * } - * } + * @return array[]. */ public function data_attribute_name_case_variants() { return array( @@ -217,33 +227,35 @@ public function data_attribute_name_case_variants() { /** * @ticket 56299 * - * @covers ::next_tag - * @covers ::remove_attribute + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::remove_attribute */ public function test_remove_attribute_is_case_insensitive() { $p = new WP_HTML_Tag_Processor( '
Test
' ); $p->next_tag(); $p->remove_attribute( 'data-enabled' ); - $this->assertEquals( '
Test
', $p->get_updated_html(), 'A case-insensitive remove_attribute call did not remove the attribute.' ); + + $this->assertSame( '
Test
', $p->get_updated_html(), 'A case-insensitive remove_attribute call did not remove the attribute.' ); } /** * @ticket 56299 * - * @covers ::next_tag - * @covers ::set_attribute + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::set_attribute */ public function test_set_attribute_is_case_insensitive() { $p = new WP_HTML_Tag_Processor( '
Test
' ); $p->next_tag(); $p->set_attribute( 'data-enabled', 'abc' ); - $this->assertEquals( '
Test
', $p->get_updated_html(), 'A case-insensitive set_attribute call did not update the existing attribute.' ); + + $this->assertSame( '
Test
', $p->get_updated_html(), 'A case-insensitive set_attribute call did not update the existing attribute.' ); } /** * @ticket 56299 * - * @covers ::get_attribute_names_with_prefix + * @covers WP_HTML_Tag_Processor::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_before_finding_tags() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -253,7 +265,7 @@ public function test_get_attribute_names_with_prefix_returns_null_before_finding /** * @ticket 56299 * - * @covers ::get_attribute_names_with_prefix + * @covers WP_HTML_Tag_Processor::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_when_not_in_open_tag() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -264,34 +276,37 @@ public function test_get_attribute_names_with_prefix_returns_null_when_not_in_op /** * @ticket 56299 * - * @covers ::get_attribute_names_with_prefix + * @covers WP_HTML_Tag_Processor::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_when_in_closing_tag() { $p = new WP_HTML_Tag_Processor( '
Test
' ); $p->next_tag( 'div' ); $p->next_tag( array( 'tag_closers' => 'visit' ) ); + $this->assertNull( $p->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a closing tag did not return null' ); } /** * @ticket 56299 * - * @covers ::get_attribute_names_with_prefix + * @covers WP_HTML_Tag_Processor::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_empty_array_when_no_attributes_present() { $p = new WP_HTML_Tag_Processor( '
Test
' ); $p->next_tag( 'div' ); + $this->assertSame( array(), $p->get_attribute_names_with_prefix( 'data-' ), 'Accessing the attributes on a tag without any did not return an empty array' ); } /** * @ticket 56299 * - * @covers ::get_attribute_names_with_prefix + * @covers WP_HTML_Tag_Processor::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_matching_attribute_names_in_lowercase() { $p = new WP_HTML_Tag_Processor( '
Test
' ); $p->next_tag(); + $this->assertSame( array( 'data-enabled', 'data-test-id' ), $p->get_attribute_names_with_prefix( 'data-' ) @@ -301,14 +316,15 @@ public function test_get_attribute_names_with_prefix_returns_matching_attribute_ /** * @ticket 56299 * - * @covers ::set_attribute - * @covers ::get_updated_html - * @covers ::get_attribute_names_with_prefix + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() { $p = new WP_HTML_Tag_Processor( '
Test
' ); $p->next_tag(); $p->set_attribute( 'data-test-id', '14' ); + $this->assertSame( '
Test
', $p->get_updated_html(), @@ -324,7 +340,7 @@ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_ /** * @ticket 56299 * - * @covers ::__toString + * @covers WP_HTML_Tag_Processor::__toString */ public function test_to_string_returns_updated_html() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -335,7 +351,7 @@ public function test_to_string_returns_updated_html() { $p->set_attribute( 'id', 'div-id-1' ); $p->add_class( 'new_class_1' ); - $this->assertEquals( + $this->assertSame( $p->get_updated_html(), (string) $p ); @@ -344,7 +360,7 @@ public function test_to_string_returns_updated_html() { /** * @ticket 56299 * - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_get_updated_html_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() { $p = new WP_HTML_Tag_Processor( '
Test
' ); @@ -354,6 +370,7 @@ public function test_get_updated_html_applies_the_updates_so_far_and_keeps_the_p $p->next_tag(); $p->set_attribute( 'id', 'div-id-1' ); $p->add_class( 'new_class_1' ); + $this->assertSame( '
Test
', $p->get_updated_html(), @@ -362,6 +379,7 @@ public function test_get_updated_html_applies_the_updates_so_far_and_keeps_the_p $p->set_attribute( 'id', 'div-id-2' ); $p->add_class( 'new_class_2' ); + $this->assertSame( '
Test
', $p->get_updated_html(), @@ -370,6 +388,7 @@ public function test_get_updated_html_applies_the_updates_so_far_and_keeps_the_p $p->next_tag(); $p->remove_attribute( 'id' ); + $this->assertSame( '
Test
', $p->get_updated_html(), @@ -384,37 +403,41 @@ public function test_get_updated_html_applies_the_updates_so_far_and_keeps_the_p */ public function test_get_updated_html_without_updating_any_attributes_returns_the_original_html() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $this->assertSame( self::HTML_SIMPLE, $p->get_updated_html() ); } /** * @ticket 56299 * - * @covers ::next_tag + * @covers WP_HTML_Tag_Processor::next_tag */ public function test_next_tag_with_no_arguments_should_find_the_next_existing_tag() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $this->assertTrue( $p->next_tag(), 'Querying an existing tag did not return true' ); } /** * @ticket 56299 * - * @covers ::next_tag + * @covers WP_HTML_Tag_Processor::next_tag */ public function test_next_tag_should_return_false_for_a_non_existing_tag() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); } /** * @ticket 56299 * - * @covers ::next_tag - * @covers ::is_tag_closer + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::is_tag_closer */ public function test_next_tag_should_stop_on_closers_only_when_requested() { $p = new WP_HTML_Tag_Processor( '
' ); + $this->assertTrue( $p->next_tag( array( 'tag_name' => 'div' ) ), 'Did not find desired tag opener' ); $this->assertFalse( $p->next_tag( array( 'tag_name' => 'div' ) ), 'Visited an unwanted tag, a tag closer' ); @@ -425,6 +448,7 @@ public function test_next_tag_should_stop_on_closers_only_when_requested() { 'tag_closers' => 'visit', ) ); + $this->assertFalse( $p->is_tag_closer(), 'Indicated a tag opener is a tag closer' ); $this->assertTrue( $p->next_tag( @@ -441,14 +465,17 @@ public function test_next_tag_should_stop_on_closers_only_when_requested() { /** * @ticket 56299 * - * @covers ::next_tag - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); + $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); $this->assertFalse( $p->next_tag( 'div' ), 'Querying a non-existing tag did not return false' ); + $p->set_attribute( 'id', 'primary' ); + $this->assertSame( self::HTML_SIMPLE, $p->get_updated_html(), @@ -474,13 +501,16 @@ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() { 'tag_closers' => 'visit', ) ); + $this->assertFalse( $p->is_tag_closer(), 'Skipped tag opener' ); + $p->next_tag( array( 'tag_name' => 'div', 'tag_closers' => 'visit', ) ); + $this->assertTrue( $p->is_tag_closer(), 'Skipped tag closer' ); $this->assertFalse( $p->set_attribute( 'id', 'test' ), "Allowed setting an attribute on a tag closer when it shouldn't have" ); $this->assertFalse( $p->remove_attribute( 'invalid-id' ), "Allowed removing an attribute on a tag closer when it shouldn't have" ); @@ -512,10 +542,10 @@ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() { * * @ticket 56299 * - * @dataProvider data_set_attribute_escapable_values - * @covers ::set_attribute + * @dataProvider data_set_attribute_prevents_xss + * @covers WP_HTML_Tag_Processor::set_attribute * - * @param string $attribute_value Value with potential XSS exploit. + * @param string $attribute_value AValue with potential XSS exploit. */ public function test_set_attribute_prevents_xss( $attribute_value ) { $p = new WP_HTML_Tag_Processor( '
' ); @@ -528,22 +558,24 @@ public function test_set_attribute_prevents_xss( $attribute_value ) { * with tools that don't understand HTML because they might get * confused by improperly-escaped values. * - * For this test, since we control the input HTML we're going to - * do what looks like the opposite of what we want to be doing with - * this library but are only doing so because we have full control - * over the content and because we want to look at the raw values. + * Since the input HTML is known, the test will do what looks like + * the opposite of what is expected to be done with this library. + * But by doing so, the test (a) has full control over the + * content and (b) looks at the raw values. */ $match = null; preg_match( '~^
$~', $p->get_updated_html(), $match ); list( , $actual_value ) = $match; - $this->assertEquals( $actual_value, '"' . esc_attr( $attribute_value ) . '"' ); + $this->assertSame( '"' . esc_attr( $attribute_value ) . '"', $actual_value ); } /** - * Data provider with HTML attribute values that might need escaping. + * Data provider. + * + * @return string[][]. */ - public function data_set_attribute_escapable_values() { + public function data_set_attribute_prevents_xss() { return array( array( '"' ), array( '"' ), @@ -560,14 +592,15 @@ public function data_set_attribute_escapable_values() { /** * @ticket 56299 * - * @covers ::set_attribute - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->set_attribute( 'test-attribute', 'test-value' ); + $this->assertSame( '
Text
', $p->get_updated_html(), @@ -583,14 +616,15 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr /** * @ticket 56299 * - * @covers ::set_attribute - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_updated_values_before_they_are_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->set_attribute( 'test-attribute', 'test-value' ); + $this->assertSame( 'test-value', $p->get_attribute( 'test-attribute' ), @@ -603,10 +637,18 @@ public function test_get_attribute_returns_updated_values_before_they_are_update ); } + /** + * @ticket 56299 + * + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html + */ public function test_get_attribute_returns_updated_values_before_they_are_updated_with_different_name_casing() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->set_attribute( 'test-ATTribute', 'test-value' ); + $this->assertSame( 'test-value', $p->get_attribute( 'test-attribute' ), @@ -622,14 +664,15 @@ public function test_get_attribute_returns_updated_values_before_they_are_update /** * @ticket 56299 * - * @covers ::add_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_reflects_added_class_names_before_they_are_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->add_class( 'my-class' ); + $this->assertSame( 'my-class', $p->get_attribute( 'class' ), @@ -645,20 +688,23 @@ public function test_get_attribute_reflects_added_class_names_before_they_are_up /** * @ticket 56299 * - * @covers ::add_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_reflects_added_class_names_before_they_are_updated_and_retains_classes_from_previous_add_class_calls() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->add_class( 'my-class' ); + $this->assertSame( 'my-class', $p->get_attribute( 'class' ), 'get_attribute() (called before get_updated_html()) did not return class name added via add_class()' ); + $p->add_class( 'my-other-class' ); + $this->assertSame( 'my-class my-other-class', $p->get_attribute( 'class' ), @@ -674,14 +720,15 @@ public function test_get_attribute_reflects_added_class_names_before_they_are_up /** * @ticket 56299 * - * @covers ::remove_attribute - * @covers ::get_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::remove_attribute + * @covers WP_HTML_Tag_Processor::get_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_get_attribute_reflects_removed_attribute_before_it_is_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->remove_attribute( 'id' ); + $this->assertNull( $p->get_attribute( 'id' ), 'get_attribute() (called before get_updated_html()) returned attribute that was removed by remove_attribute()' @@ -696,16 +743,17 @@ public function test_get_attribute_reflects_removed_attribute_before_it_is_updat /** * @ticket 56299 * - * @covers ::set_attribute - * @covers ::remove_attribute - * @covers ::get_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::remove_attribute + * @covers WP_HTML_Tag_Processor::get_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_it_is_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->set_attribute( 'test-attribute', 'test-value' ); $p->remove_attribute( 'test-attribute' ); + $this->assertNull( $p->get_attribute( 'test-attribute' ), 'get_attribute() (called before get_updated_html()) returned attribute that was added via set_attribute() and then removed by remove_attribute()' @@ -720,16 +768,17 @@ public function test_get_attribute_reflects_adding_and_then_removing_an_attribut /** * @ticket 56299 * - * @covers ::set_attribute - * @covers ::remove_attribute - * @covers ::get_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::remove_attribute + * @covers WP_HTML_Tag_Processor::get_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_it_is_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->set_attribute( 'id', 'test-value' ); $p->remove_attribute( 'id' ); + $this->assertNull( $p->get_attribute( 'id' ), 'get_attribute() (called before get_updated_html()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()' @@ -744,14 +793,15 @@ public function test_get_attribute_reflects_setting_and_then_removing_an_existin /** * @ticket 56299 * - * @covers ::remove_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::remove_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_reflects_removed_class_names_before_they_are_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); $p->next_tag(); $p->remove_class( 'with-border' ); + $this->assertSame( 'main', $p->get_attribute( 'class' ), @@ -767,16 +817,17 @@ public function test_get_attribute_reflects_removed_class_names_before_they_are_ /** * @ticket 56299 * - * @covers ::add_class - * @covers ::remove_class - * @covers ::get_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::remove_class + * @covers WP_HTML_Tag_Processor::get_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_get_attribute_reflects_setting_and_then_removing_a_class_name_before_it_is_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); $p->next_tag(); $p->add_class( 'foo-class' ); $p->remove_class( 'foo-class' ); + $this->assertSame( 'main with-border', $p->get_attribute( 'class' ), @@ -792,16 +843,17 @@ public function test_get_attribute_reflects_setting_and_then_removing_a_class_na /** * @ticket 56299 * - * @covers ::add_class - * @covers ::remove_class - * @covers ::get_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::remove_class + * @covers WP_HTML_Tag_Processor::get_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_get_attribute_reflects_duplicating_and_then_removing_an_existing_class_name_before_it_is_updated() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); $p->next_tag(); $p->add_class( 'with-border' ); $p->remove_class( 'with-border' ); + $this->assertSame( 'main', $p->get_attribute( 'class' ), @@ -820,21 +872,22 @@ public function test_get_attribute_reflects_duplicating_and_then_removing_an_exi * * @ticket 56299 * - * @covers ::set_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_update_first_when_duplicated_attribute() { $p = new WP_HTML_Tag_Processor( '
Text
' ); $p->next_tag(); $p->set_attribute( 'id', 'updated-id' ); + $this->assertSame( '
Text
', $p->get_updated_html() ); } /** * @ticket 56299 * - * @covers ::set_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -846,8 +899,8 @@ public function test_set_attribute_with_an_existing_attribute_name_updates_its_v /** * @ticket 56299 * - * @covers ::set_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -870,53 +923,57 @@ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the * * @ticket 56299 * - * @covers ::remove_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::remove_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_remove_first_when_duplicated_attribute() { $p = new WP_HTML_Tag_Processor( '
Text
' ); $p->next_tag(); $p->remove_attribute( 'id' ); + $this->assertSame( '
Text
', $p->get_updated_html() ); } /** * @ticket 56299 * - * @covers ::remove_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::remove_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->remove_attribute( 'id' ); + $this->assertSame( '
Text
', $p->get_updated_html() ); } /** * @ticket 56299 * - * @covers ::remove_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::remove_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->remove_attribute( 'no-such-attribute' ); + $this->assertSame( self::HTML_SIMPLE, $p->get_updated_html() ); } /** * @ticket 56299 * - * @covers ::add_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_add_class_creates_a_class_attribute_when_there_is_none() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->add_class( 'foo-class' ); + $this->assertSame( '
Text
', $p->get_updated_html(), @@ -932,15 +989,16 @@ public function test_add_class_creates_a_class_attribute_when_there_is_none() { /** * @ticket 56299 * - * @covers ::add_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_calling_add_class_twice_creates_a_class_attribute_with_both_class_names_when_there_is_no_class_attribute() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->add_class( 'foo-class' ); $p->add_class( 'bar-class' ); + $this->assertSame( '
Text
', $p->get_updated_html(), @@ -956,14 +1014,15 @@ public function test_calling_add_class_twice_creates_a_class_attribute_with_both /** * @ticket 56299 * - * @covers ::remove_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::remove_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_remove_class_does_not_change_the_markup_when_there_is_no_class_attribute() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->remove_class( 'foo-class' ); + $this->assertSame( self::HTML_SIMPLE, $p->get_updated_html(), @@ -978,15 +1037,16 @@ public function test_remove_class_does_not_change_the_markup_when_there_is_no_cl /** * @ticket 56299 * - * @covers ::add_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_add_class_appends_class_names_to_the_existing_class_attribute_when_one_already_exists() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); $p->next_tag(); $p->add_class( 'foo-class' ); $p->add_class( 'bar-class' ); + $this->assertSame( '
Text
', $p->get_updated_html(), @@ -1002,14 +1062,15 @@ public function test_add_class_appends_class_names_to_the_existing_class_attribu /** * @ticket 56299 * - * @covers ::remove_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::remove_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_remove_class_removes_a_single_class_from_the_class_attribute_when_one_exists() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); $p->next_tag(); $p->remove_class( 'main' ); + $this->assertSame( '
Text
', $p->get_updated_html(), @@ -1025,15 +1086,16 @@ public function test_remove_class_removes_a_single_class_from_the_class_attribut /** * @ticket 56299 * - * @covers ::remove_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::remove_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_calling_remove_class_with_all_listed_class_names_removes_the_existing_class_attribute_from_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); $p->next_tag(); $p->remove_class( 'main' ); $p->remove_class( 'with-border' ); + $this->assertSame( '
Text
', $p->get_updated_html(), @@ -1048,14 +1110,15 @@ public function test_calling_remove_class_with_all_listed_class_names_removes_th /** * @ticket 56299 * - * @covers ::add_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_add_class_does_not_add_duplicate_class_names() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); $p->next_tag(); $p->add_class( 'with-border' ); + $this->assertSame( '
Text
', $p->get_updated_html(), @@ -1071,14 +1134,15 @@ public function test_add_class_does_not_add_duplicate_class_names() { /** * @ticket 56299 * - * @covers ::add_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_add_class_preserves_class_name_order_when_a_duplicate_class_name_is_added() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); $p->next_tag(); $p->add_class( 'main' ); + $this->assertSame( '
Text
', $p->get_updated_html(), @@ -1094,9 +1158,9 @@ public function test_add_class_preserves_class_name_order_when_a_duplicate_class /** * @ticket 56299 * - * @covers ::add_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_add_class_when_there_is_a_class_attribute_with_excessive_whitespaces() { $p = new WP_HTML_Tag_Processor( @@ -1104,6 +1168,7 @@ public function test_add_class_when_there_is_a_class_attribute_with_excessive_wh ); $p->next_tag(); $p->add_class( 'foo-class' ); + $this->assertSame( '
Text
', $p->get_updated_html(), @@ -1119,9 +1184,9 @@ public function test_add_class_when_there_is_a_class_attribute_with_excessive_wh /** * @ticket 56299 * - * @covers ::remove_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::remove_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_remove_class_preserves_whitespaces_when_there_is_a_class_attribute_with_excessive_whitespaces() { $p = new WP_HTML_Tag_Processor( @@ -1129,6 +1194,7 @@ public function test_remove_class_preserves_whitespaces_when_there_is_a_class_at ); $p->next_tag(); $p->remove_class( 'with-border' ); + $this->assertSame( '
Text
', $p->get_updated_html(), @@ -1144,9 +1210,9 @@ public function test_remove_class_preserves_whitespaces_when_there_is_a_class_at /** * @ticket 56299 * - * @covers ::remove_class - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::remove_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_removing_all_classes_removes_the_existing_class_attribute_from_the_markup_even_when_excessive_whitespaces_are_present() { $p = new WP_HTML_Tag_Processor( @@ -1176,10 +1242,10 @@ public function test_removing_all_classes_removes_the_existing_class_attribute_f * * @ticket 56299 * - * @covers ::add_class - * @covers ::set_attribute - * @covers ::get_updated_html - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_set_attribute_takes_priority_over_add_class() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1221,14 +1287,14 @@ public function test_set_attribute_takes_priority_over_add_class() { * "$value" instead, as any direct updates to the `class` attribute supersede any changes enqueued * via the class builder methods. * - * This is still true if we read enqueued updates before calling `get_updated_html()`. + * This is still true when reading enqueued updates before calling `get_updated_html()`. * * @ticket 56299 * - * @covers ::add_class - * @covers ::set_attribute - * @covers ::get_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_set_attribute_takes_priority_over_add_class_even_before_updating() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1265,10 +1331,10 @@ public function test_set_attribute_takes_priority_over_add_class_even_before_upd /** * @ticket 56299 * - * @covers ::set_attribute - * @covers ::add_class - * @covers ::get_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_add_class_overrides_boolean_class_attribute() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -1290,10 +1356,10 @@ public function test_add_class_overrides_boolean_class_attribute() { /** * @ticket 56299 * - * @covers ::set_attribute - * @covers ::add_class - * @covers ::get_attribute - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_add_class_overrides_boolean_class_attribute_even_before_updating() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -1315,11 +1381,11 @@ public function test_add_class_overrides_boolean_class_attribute_even_before_upd /** * @ticket 56299 * - * @covers ::set_attribute - * @covers ::remove_attribute - * @covers ::add_class - * @covers ::remove_class - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::remove_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::remove_class + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_advanced_use_case() { $input = <<>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) */ public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_markup() { $p = new WP_HTML_Tag_Processor( @@ -1482,8 +1553,13 @@ public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_ma /** * @ticket 56299 * +<<<<<<< HEAD * @covers ::set_attribute * @covers ::get_updated_html +======= + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html +>>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) */ public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() { $html_input = '
'; @@ -1496,8 +1572,13 @@ public function test_setting_a_missing_attribute_to_false_does_not_change_the_ma /** * @ticket 56299 * +<<<<<<< HEAD * @covers ::set_attribute * @covers ::get_updated_html +======= + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html +>>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) */ public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit_value_to_the_markup() { $p = new WP_HTML_Tag_Processor( @@ -1514,8 +1595,13 @@ public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit /** * @ticket 56299 * +<<<<<<< HEAD * @covers ::get_tag * @covers ::next_tag +======= + * @covers WP_HTML_Tag_Processor::get_tag + * @covers WP_HTML_Tag_Processor::next_tag +>>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) */ public function test_unclosed_script_tag_should_not_cause_an_infinite_loop() { $p = new WP_HTML_Tag_Processor( '
', - ); + public function data_next_tag_ignores_script_tag_contents() { + return array( + 'Simple script tag' => array( + '
', + ), - $examples['Simple uppercase script tag'] = array( - '
', - ); + 'Simple uppercase script tag' => array( + '
', + ), - $examples['Script with a comment opener inside should end at the next script tag closer (dash dash escaped state)'] = array( - '
-->', - ); + 'Script with a comment opener inside should end at the next script tag closer (dash dash escaped state)' => array( + '
-->', + ), - $examples['Script with a comment opener and a script tag opener inside should end two script tag closer later (double escaped state)'] = array( - '
-->', - ); + 'Script with a comment opener and a script tag opener inside should end two script tag closer later (double escaped state)' => array( + '
-->', + ), - $examples['Double escaped script with a tricky opener'] = array( - '">
', - ); + 'Double escaped script with a tricky opener' => array( + '">
', + ), - $examples['Double escaped script with a tricky closer'] = array( - '">
', - ); + 'Double escaped script with a tricky closer' => array( + '">
', + ), - $examples['Double escaped, then escaped, then double escaped'] = array( - '
', - ); + 'Double escaped, then escaped, then double escaped' => array( + '
', + ), - $examples['Script with a commented a script tag opener inside should at the next tag closer (dash dash escaped state)'] = array( - '
-->', - ); + 'Script with a commented a script tag opener inside should at the next tag closer (dash dash escaped state)' => array( + '
-->', + ), - $examples['Script closer with another script tag in closer attributes'] = array( - '
', - ); + 'Script closer with another script tag in closer attributes' => array( + '
', + ), - $examples['Script closer with attributes'] = array( - '
', - ); + 'Script closer with attributes' => array( + '
', + ), - $examples['Script opener with title closer inside'] = array( - '
', - ); + 'Script opener with title closer inside' => array( + '
', + ), - $examples['Complex script with many parsing states'] = array( - '-->
-->', + 'Complex script with many parsing states' => array( + '-->
-->', + ), ); - return $examples; } /** * @ticket 56299 * +<<<<<<< HEAD * @covers ::next_tag +======= + * @covers WP_HTML_Tag_Processor::next_tag +>>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) * - * @dataProvider data_rcdata_state + * @dataProvider data_next_tag_ignores_contents_of_rcdata_tag + * + * @param string $rcdata_then_div HTML with RCDATA before a DIV. + * @param string $rcdata_tag RCDATA tag. */ - public function test_next_tag_ignores_the_contents_of_a_rcdata_tag( $rcdata_then_div, $rcdata_tag ) { + public function test_next_tag_ignores_contents_of_rcdata_tag( $rcdata_then_div, $rcdata_tag ) { $p = new WP_HTML_Tag_Processor( $rcdata_then_div ); $p->next_tag(); - $this->assertSame( strtoupper( $rcdata_tag ), $p->get_tag(), "The first found tag was not '$rcdata_tag'" ); + $this->assertSame( $rcdata_tag, $p->get_tag(), "The first found tag was not '$rcdata_tag'" ); $p->next_tag(); $this->assertSame( 'DIV', $p->get_tag(), "The second found tag was not 'div'" ); } /** - * Data provider for test_ignores_contents_of_a_rcdata_tag(). + * Data provider. * - * @return array { - * @type array { - * @type string $rcdata_then_div The HTML snippet containing RCDATA and div tags. - * @type string $rcdata_tag The RCDATA tag. - * } - * } + * @return array[] */ - public function data_rcdata_state() { - $examples = array(); - $examples['Simple textarea'] = array( - '
', - 'TEXTAREA', - ); - - $examples['Simple title'] = array( - '<span class="d-none d-md-inline">Back to notifications</title</span>
', - 'TITLE', - ); - - $examples['Comment opener inside a textarea tag should be ignored'] = array( - '
-->', - 'TEXTAREA', - ); - - $examples['Textarea closer with another textarea tag in closer attributes'] = array( - '
', - 'TEXTAREA', - ); - - $examples['Textarea closer with attributes'] = array( - '
', - 'TEXTAREA', - ); - - $examples['Textarea opener with title closer inside'] = array( - '
', - 'TEXTAREA', + public function data_next_tag_ignores_contents_of_rcdata_tag() { + return array( + 'simple textarea' => array( + 'rcdata_then_div' => '
', + 'rcdata_tag' => 'TEXTAREA', + ), + 'simple title' => array( + 'rcdata_then_div' => '<span class="d-none d-md-inline">Back to notifications</title</span>
', + 'rcdata_tag' => 'TITLE', + ), + 'comment opener inside a textarea tag should be ignored' => array( + 'rcdata_then_div' => '
-->', + 'rcdata_tag' => 'TEXTAREA', + ), + 'textarea closer with another textarea tag in closer attributes' => array( + 'rcdata_then_div' => '
', + 'rcdata_tag' => 'TEXTAREA', + ), + 'textarea closer with attributes' => array( + 'rcdata_then_div' => '
', + 'rcdata_tag' => 'TEXTAREA', + ), + 'textarea opener with title closer inside' => array( + 'rcdata_then_div' => '
', + 'rcdata_tag' => 'TEXTAREA', + ), ); - return $examples; } /** * @ticket 56299 * +<<<<<<< HEAD * @covers ::next_tag * @covers ::set_attribute * @covers ::get_updated_html +======= + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html +>>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) */ public function test_can_query_and_update_wrongly_nested_tags() { $p = new WP_HTML_Tag_Processor( @@ -1684,11 +1774,17 @@ public function test_can_query_and_update_wrongly_nested_tags() { /** * @ticket 56299 * +<<<<<<< HEAD * @covers ::next_tag * @covers ::remove_attribute * @covers ::get_updated_html +======= + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::remove_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html +>>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) */ - public function test_removing_attributes_works_even_in_malformed_html() { + public function test_removing_specific_attributes_in_malformed_html() { $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); $p->next_tag( 'span' ); $p->remove_attribute( 'Notifications<' ); @@ -1701,11 +1797,17 @@ public function test_removing_attributes_works_even_in_malformed_html() { /** * @ticket 56299 * +<<<<<<< HEAD * @covers ::next_Tag * @covers ::set_attribute * @covers ::get_updated_html +======= + * @covers WP_HTML_Tag_Processor::next_Tag + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::get_updated_html +>>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) */ - public function test_updating_attributes_works_even_in_malformed_html_1() { + public function test_updating_specific_attributes_in_malformed_html() { $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); $p->next_tag( 'span' ); $p->set_attribute( 'id', 'first' ); @@ -1720,235 +1822,255 @@ public function test_updating_attributes_works_even_in_malformed_html_1() { /** * @ticket 56299 * +<<<<<<< HEAD * @covers ::next_tag * @covers ::set_attribute * @covers ::add_class * @covers ::get_updated_html +======= + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_updated_html +>>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) + * + * @dataProvider data_updating_attributes * - * @dataProvider data_malformed_tag + * @param string $html HTML to process. + * @param string $expected Expected updated HTML. */ - public function test_updating_attributes_works_even_in_malformed_html_2( $html_input, $html_expected ) { - $p = new WP_HTML_Tag_Processor( $html_input ); + public function test_updating_attributes( $html, $expected ) { + $p = new WP_HTML_Tag_Processor( $html ); $p->next_tag(); $p->set_attribute( 'foo', 'bar' ); $p->add_class( 'firstTag' ); $p->next_tag(); $p->add_class( 'secondTag' ); + $this->assertSame( - $html_expected, + $expected, $p->get_updated_html() ); } /** - * Data provider for test_updates_when_malformed_tag(). + * Data provider. * - * @return array { - * @type array { - * @type string $html_input The input HTML snippet. - * @type string $html_expected The expected HTML snippet after processing. - * } - * } + * @return array[] */ - public function data_malformed_tag() { - $null_byte = chr( 0 ); - $examples = array(); - $examples['Invalid entity inside attribute value'] = array( - 'test', - 'test', - ); - - $examples['HTML tag opening inside attribute value'] = array( - '
This <is> a <strong is="true">thing.
test', - '
This <is> a <strong is="true">thing.
test', - ); - - $examples['HTML tag brackets in attribute values and data markup'] = array( - '
This <is> a <strong is="true">thing.
test', - '
This <is> a <strong is="true">thing.
test', - ); - - $examples['Single and double quotes in attribute value'] = array( - '

test', - '

test', - ); - - $examples['Unquoted attribute values'] = array( - '


test', - '
test', - ); - - $examples['Double-quotes escaped in double-quote attribute value'] = array( - '
test', - '
test', - ); - - $examples['Unquoted attribute value'] = array( - '
test', - '
test', - ); - - $examples['Unquoted attribute value with tag-like value'] = array( - '
>test', - '
>test', - ); - - $examples['Unquoted attribute value with tag-like value followed by tag-like data'] = array( - '
>test', - '
>test', - ); - - $examples['1'] = array( - '
test', - '
test', - ); - - $examples['2'] = array( - '
test', - '
test', - ); - - $examples['4'] = array( - '
test', - '
test', - ); - - $examples['5'] = array( - '
code>test', - '
code>test', - ); - - $examples['6'] = array( - '
test', - '
test', - ); - - $examples['7'] = array( - '
test', - '
test', - ); - - $examples['8'] = array( - '
id="test">test', - '
id="test">test', - ); - - $examples['9'] = array( - '
test', - '
test', - ); - - $examples['10'] = array( - 'test', - 'test', - ); - - $examples['11'] = array( - 'The applicative operator <* works well in Haskell; is what?test', - 'The applicative operator <* works well in Haskell; is what?test', - ); - - $examples['12'] = array( - '<3 is a heart but is a tag.test', - '<3 is a heart but is a tag.test', - ); - - $examples['13'] = array( - 'test', - 'test', - ); - - $examples['14'] = array( - 'test', - 'test', - ); - - $examples['15'] = array( - ' a HTML Tag]]>test', - ' a HTML Tag]]>test', - ); - - $examples['16'] = array( - '
test', - '
test', - ); - - $examples['17'] = array( - '
test', - '
test', - ); - - $examples['18'] = array( - '
test', - '
test', - ); - - $examples['19'] = array( - '
test', - '
test', - ); - - $examples['20'] = array( - '
test', - '
test', - ); - - $examples['21'] = array( - '
test', - '
test', - ); - - $examples['22'] = array( - '
test', - '
test', + public function data_updating_attributes() { + return array( + 'tags inside of a comment' => array( + 'input' => 'test', + 'expected' => 'test', + ), + 'does not parse <3' => array( + 'input' => '<3 is a heart but is a tag.test', + 'expected' => '<3 is a heart but is a tag.test', + ), + 'does not parse <*' => array( + 'input' => 'The applicative operator <* works well in Haskell; is what?test', + 'expected' => 'The applicative operator <* works well in Haskell; is what?test', + ), + ' in content' => array( + 'input' => 'test', + 'expected' => 'test', + ), + 'custom asdf attribute' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'custom data-* attribute' => array( + 'input' => '

Some content for a test

', + 'expected' => '

Some content for a test

', + ), + 'tag inside of CDATA' => array( + 'input' => ' a HTML Tag]]>test', + 'expected' => ' a HTML Tag]]>test', + ), ); + } - $examples['23'] = array( - '
test', - '
test', - ); + /** + * @ticket 56299 + * + * @covers WP_HTML_Tag_Processor::next_tag + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::get_updated_html + * + * @dataProvider data_updating_attributes_in_malformed_html + * + * @param string $html HTML to process. + * @param string $expected Expected updated HTML. + */ + public function test_updating_attributes_in_malformed_html( $html, $expected ) { + $p = new WP_HTML_Tag_Processor( $html ); + $p->next_tag(); + $p->set_attribute( 'foo', 'bar' ); + $p->add_class( 'firstTag' ); + $p->next_tag(); + $p->add_class( 'secondTag' ); - $examples['24'] = array( - '
test', - '
test', + $this->assertSame( + $expected, + $p->get_updated_html() ); + } - $examples['25'] = array( - '
test', - '
test', - ); + /** + * Data provider. + * + * @return array[] + */ + public function data_updating_attributes_in_malformed_html() { + $null_byte = chr( 0 ); - $examples['Multiple unclosed tags treated as a single tag'] = array( - << - test + return array( + 'Invalid entity inside attribute value' => array( + 'input' => 'test', + 'expected' => 'test', + ), + 'HTML tag opening inside attribute value' => array( + 'input' => '
This <is> a <strong is="true">thing.
test', + 'expected' => '
This <is> a <strong is="true">thing.
test', + ), + 'HTML tag brackets in attribute values and data markup' => array( + 'input' => '
This <is> a <strong is="true">thing.
test', + 'expected' => '
This <is> a <strong is="true">thing.
test', + ), + 'Single and double quotes in attribute value' => array( + 'input' => '

test', + 'expected' => '

test', + ), + 'Unquoted attribute values' => array( + 'input' => '


test', + 'expected' => '
test', + ), + 'Double-quotes escaped in double-quote attribute value' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'Unquoted attribute value' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'Unquoted attribute value with tag-like value' => array( + 'input' => '
>test', + 'expected' => '
>test', + ), + 'Unquoted attribute value with tag-like value followed by tag-like data' => array( + 'input' => '
>test', + 'expected' => '
>test', + ), + 'id=&quo;code' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'id/test=5' => array( + 'input' => '
test', + 'expected' => '
test', + ), + '
as the id value' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'id=>code' => array( + 'input' => '
code>test', + 'expected' => '
code>test', + ), + 'id"quo="test"' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'id without double quotation marks around null byte' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'Unexpected > before an attribute' => array( + 'input' => '
id="test">test', + 'expected' => '
id="test">test', + ), + 'Unexpected = before an attribute' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'Unexpected === before an attribute' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'Missing closing data-tag tag' => array( + 'input' => 'The applicative operator <* works well in Haskell; is what?test', + 'expected' => 'The applicative operator <* works well in Haskell; is what?test', + ), + 'Missing closing t3 tag' => array( + 'input' => '<3 is a heart but is a tag.test', + 'expected' => '<3 is a heart but is a tag.test', + ), + 'invalid comment opening tag' => array( + 'input' => 'test', + 'expected' => 'test', + ), + '=asdf as attribute name' => array( + 'input' => '
test', + 'expected' => '
test', + ), + '== as attribute name with value' => array( + 'input' => '
test', + 'expected' => '
test', + ), + '=5 as attribute' => array( + 'input' => '
test', + 'expected' => '
test', + ), + '= as attribute' => array( + 'input' => '
test', + 'expected' => '
test', + ), + '== as attribute' => array( + 'input' => '
test', + 'expected' => '
test', + ), + '=== as attribute' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'unsupported disabled attribute' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'malformed custom attributes' => array( + 'input' => '
test', + 'expected' => '
test', + ), + 'Multiple unclosed tags treated as a single tag' => array( + 'input' => << + test HTML - , - << - test + , + 'expected' => << + test HTML - , - ); - - $examples['27'] = array( - '
test', - '
test', - ); - - $examples['28'] = array( - '
test', - '
test', + , + ), + '
' => array( + 'input' => '
test', + 'expected' => '
test', + ), + '
' => array( + 'input' => '
test', + 'expected' => '
test', + ), ); - - return $examples; } } From b708c6b618770c7e80981c93c0bd2ab9d73a7819 Mon Sep 17 00:00:00 2001 From: hellofromtonya Date: Mon, 30 Jan 2023 16:25:06 -0600 Subject: [PATCH 06/36] Load API files directly from wp-settings.php --- src/wp-includes/wp-html.php | 42 ------------------- src/wp-settings.php | 5 ++- .../html/wpHtmlTagProcessorBookmarks.php | 5 ++- .../tests/html/wpHtmlTagProcessorTest.php | 5 ++- 4 files changed, 12 insertions(+), 45 deletions(-) delete mode 100644 src/wp-includes/wp-html.php diff --git a/src/wp-includes/wp-html.php b/src/wp-includes/wp-html.php deleted file mode 100644 index a0d238cef7823..0000000000000 --- a/src/wp-includes/wp-html.php +++ /dev/null @@ -1,42 +0,0 @@ - Date: Mon, 30 Jan 2023 16:30:04 -0600 Subject: [PATCH 07/36] Tests: remove loading API files --- tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php | 5 ----- tests/phpunit/tests/html/wpHtmlTagProcessorTest.php | 5 ----- 2 files changed, 10 deletions(-) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php b/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php index bcc584fe19971..3ee75eb140b52 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php @@ -6,11 +6,6 @@ * @subpackage HTML */ -require_once ABSPATH . WPINC . '/class-wp-html-attribute-token.php'; -require_once ABSPATH . WPINC . '/class-wp-html-span.php'; -require_once ABSPATH . WPINC . '/class-wp-html-text-replacement.php'; -require_once ABSPATH . WPINC . '/class-wp-html-tag-processor.php'; - /** * @group html * diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php b/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php index 5a42081307fc5..4c0915feca9b4 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php @@ -6,11 +6,6 @@ * @subpackage HTML */ -require_once ABSPATH . WPINC . '/class-wp-html-attribute-token.php'; -require_once ABSPATH . WPINC . '/class-wp-html-span.php'; -require_once ABSPATH . WPINC . '/class-wp-html-text-replacement.php'; -require_once ABSPATH . WPINC . '/class-wp-html-tag-processor.php'; - /** * @group html * From 8bdfae4022fe2a34021f2466302a4e8e776ae043 Mon Sep 17 00:00:00 2001 From: hellofromtonya Date: Mon, 30 Jan 2023 16:31:59 -0600 Subject: [PATCH 08/36] Renames test classes to coding standard Tests_{APIorGroup}_className. --- tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php | 2 +- tests/phpunit/tests/html/wpHtmlTagProcessorTest.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php b/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php index 3ee75eb140b52..4e1ea93053f93 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php @@ -11,7 +11,7 @@ * * @coversDefaultClass WP_HTML_Tag_Processor */ -class WP_HTML_Tag_Processor_Bookmark_Test extends WP_UnitTestCase { +class Tests_HTML_wpHtmlTagProcessor_Bookmark extends WP_UnitTestCase { /** * @ticket 56299 diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php b/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php index 4c0915feca9b4..265ac927b1ced 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php @@ -11,7 +11,7 @@ * * @coversDefaultClass WP_HTML_Tag_Processor */ -class WP_HTML_Tag_Processor_Test extends WP_UnitTestCase { +class Tests_HTML_wpHtmlTagProcessor extends WP_UnitTestCase { const HTML_SIMPLE = '
Text
'; const HTML_WITH_CLASSES = '
Text
'; const HTML_MALFORMED = '
Back to notifications
'; From bc170866527e6e7afb174f4ac8512e0150b54064 Mon Sep 17 00:00:00 2001 From: hellofromtonya Date: Mon, 30 Jan 2023 16:35:44 -0600 Subject: [PATCH 09/36] Renames test filenames to coding standard --- ...lTagProcessorBookmarks.php => wpHtmlTagProcessor-bookmark.php} | 0 .../html/{wpHtmlTagProcessorTest.php => wpHtmlTagProcessor.php} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/phpunit/tests/html/{wpHtmlTagProcessorBookmarks.php => wpHtmlTagProcessor-bookmark.php} (100%) rename tests/phpunit/tests/html/{wpHtmlTagProcessorTest.php => wpHtmlTagProcessor.php} (100%) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php similarity index 100% rename from tests/phpunit/tests/html/wpHtmlTagProcessorBookmarks.php rename to tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessorTest.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php similarity index 100% rename from tests/phpunit/tests/html/wpHtmlTagProcessorTest.php rename to tests/phpunit/tests/html/wpHtmlTagProcessor.php From 334e4155a3c210dd4a13bf8cf9817fec67cc4b7f Mon Sep 17 00:00:00 2001 From: hellofromtonya Date: Mon, 30 Jan 2023 17:08:10 -0600 Subject: [PATCH 10/36] Cleans HEADS from merge conflict from test file --- .../phpunit/tests/html/wpHtmlTagProcessor.php | 53 ------------------- 1 file changed, 53 deletions(-) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php index 265ac927b1ced..730a4c017928b 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor.php @@ -1528,13 +1528,8 @@ public function test_set_attribute_with_value_equals_to_true_adds_a_boolean_html /** * @ticket 56299 * -<<<<<<< HEAD - * @covers ::set_attribute - * @covers ::get_updated_html -======= * @covers WP_HTML_Tag_Processor::set_attribute * @covers WP_HTML_Tag_Processor::get_updated_html ->>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) */ public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_markup() { $p = new WP_HTML_Tag_Processor( @@ -1551,13 +1546,8 @@ public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_ma /** * @ticket 56299 * -<<<<<<< HEAD - * @covers ::set_attribute - * @covers ::get_updated_html -======= * @covers WP_HTML_Tag_Processor::set_attribute * @covers WP_HTML_Tag_Processor::get_updated_html ->>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) */ public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() { $html_input = '
'; @@ -1570,13 +1560,8 @@ public function test_setting_a_missing_attribute_to_false_does_not_change_the_ma /** * @ticket 56299 * -<<<<<<< HEAD - * @covers ::set_attribute - * @covers ::get_updated_html -======= * @covers WP_HTML_Tag_Processor::set_attribute * @covers WP_HTML_Tag_Processor::get_updated_html ->>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) */ public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit_value_to_the_markup() { $p = new WP_HTML_Tag_Processor( @@ -1593,13 +1578,8 @@ public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit /** * @ticket 56299 * -<<<<<<< HEAD - * @covers ::get_tag - * @covers ::next_tag -======= * @covers WP_HTML_Tag_Processor::get_tag * @covers WP_HTML_Tag_Processor::next_tag ->>>>>>> 43ac2567fa (WP_HTML_Tag_Processor_Test: test improvements) */ public function test_unclosed_script_tag_should_not_cause_an_infinite_loop() { $p = new WP_HTML_Tag_Processor( ' closer --- .../phpunit/tests/html/wpHtmlTagProcessor.php | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php index e375925f51f25..c98abe3de13a3 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor.php @@ -1714,6 +1714,39 @@ public function data_next_tag_ignores_contents_of_rcdata_tag() { ); } + /** + * @ticket 56299 + * + * @covers WP_HTML_Tag_Processor::next_tag + * + * @dataProvider data_skips_contents_of_script_and_rcdata_regions + * + * @param $input_html HTML with multiple divs, one of which carries the "target" attribute. + */ + public function test_skips_contents_of_script_and_rcdata_regions($input_html ) { + $p = new WP_HTML_Tag_Processor( $input_html ); + $p->next_tag( 'div' ); + + $this->assertTrue( $p->get_attribute( 'target' ) ); + } + + /** + * Data provider + * + * @return string[] + */ + public function data_skips_contents_of_script_and_rcdata_regions() { + return array( + 'Balanced SCRIPT tags' => '
', + 'Unexpected SCRIPT closer after DIV' => 'console.log("
")
', + 'Unexpected SCRIPT closer before DIV' => 'console.log("")
', + 'Missing SCRIPT closer' => '
', + 'TITLE before DIV' => '<div>
', + 'SCRIPT inside TITLE' => '<script><div>
', + 'TITLE in TEXTAREA' => '
', + ); + } + /** * @ticket 56299 * From 5c1a5d5529c1c8621cdec2fe78fa3c8598ced39c Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 10:01:37 -0700 Subject: [PATCH 19/36] Update tests: fix data provider and remove Exception expectation --- .../tests/html/wpHtmlTagProcessor-bookmark.php | 4 ---- tests/phpunit/tests/html/wpHtmlTagProcessor.php | 16 ++++++++-------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php index a15c180362589..0b63f4dafd154 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php @@ -339,8 +339,6 @@ public function test_limits_the_number_of_bookmarks() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); $p->next_tag( 'li' ); - $this->expectException( Exception::class ); - for ( $i = 0;$i < WP_HTML_Tag_Processor::MAX_BOOKMARKS;$i++ ) { $this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" ); } @@ -358,8 +356,6 @@ public function test_limits_the_number_of_seek_calls() { $p->next_tag( 'li' ); $p->set_bookmark( 'bookmark' ); - $this->expectException( Exception::class ); - for ( $i = 0; $i < WP_HTML_Tag_Processor::MAX_SEEK_OPS; $i++ ) { $this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' ); } diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php index c98abe3de13a3..3bd965e637233 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor.php @@ -1733,17 +1733,17 @@ public function test_skips_contents_of_script_and_rcdata_regions($input_html ) { /** * Data provider * - * @return string[] + * @return array[] */ public function data_skips_contents_of_script_and_rcdata_regions() { return array( - 'Balanced SCRIPT tags' => '
', - 'Unexpected SCRIPT closer after DIV' => 'console.log("
")
', - 'Unexpected SCRIPT closer before DIV' => 'console.log("")
', - 'Missing SCRIPT closer' => '
', - 'TITLE before DIV' => '<div>
', - 'SCRIPT inside TITLE' => '<script><div>
', - 'TITLE in TEXTAREA' => '
', + 'Balanced SCRIPT tags' => array( '
' ), + 'Unexpected SCRIPT closer after DIV' => array( 'console.log("
")
' ), + 'Unexpected SCRIPT closer before DIV' => array( 'console.log("")
' ), + 'Missing SCRIPT closer' => array( '
' ), + 'TITLE before DIV' => array( '<div>
' ), + 'SCRIPT inside TITLE' => array( '<script><div>
' ), + 'TITLE in TEXTAREA' => array( '
' ), ); } From c50ffeec49c9250450c938f661cf30cac6a859d9 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 10:04:39 -0700 Subject: [PATCH 20/36] Lint issue --- tests/phpunit/tests/html/wpHtmlTagProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php index 3bd965e637233..7f8ff0895f042 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor.php @@ -1723,7 +1723,7 @@ public function data_next_tag_ignores_contents_of_rcdata_tag() { * * @param $input_html HTML with multiple divs, one of which carries the "target" attribute. */ - public function test_skips_contents_of_script_and_rcdata_regions($input_html ) { + public function test_skips_contents_of_script_and_rcdata_regions( $input_html ) { $p = new WP_HTML_Tag_Processor( $input_html ); $p->next_tag( 'div' ); From 9a5ccf042e0e396a26a76cdd3ff2eb1dede70294 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 10:28:52 -0700 Subject: [PATCH 21/36] Fix broken tests --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 4 ++-- tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index e21214201dcef..ac004b524ca5d 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -554,7 +554,7 @@ public function next_tag( $query = null ) { * @TODO: Add unit test case and fix (if necessary) for RCDATA tag closer coming before RCDATA tag opener. */ $t = $this->html[ $this->tag_name_starts_at ]; - if ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) { + if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) { $tag_name = $this->get_tag(); if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) { @@ -663,7 +663,7 @@ public function set_bookmark( $name ) { if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= self::MAX_BOOKMARKS ) { if ( WP_DEBUG ) { - trigger_error( "Tried to jump to a non-existent HTML bookmark {$name}.", E_USER_WARNING ); + trigger_error( "Too many bookmarks: cannot create '{$name}'", E_USER_WARNING ); } return false; } diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php index 0b63f4dafd154..12c5917446de8 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php @@ -339,10 +339,11 @@ public function test_limits_the_number_of_bookmarks() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); $p->next_tag( 'li' ); - for ( $i = 0;$i < WP_HTML_Tag_Processor::MAX_BOOKMARKS;$i++ ) { + for ( $i = 0; $i < WP_HTML_Tag_Processor::MAX_BOOKMARKS; $i++ ) { $this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" ); } + $this->expectWarningMessageMatches( '/Too many bookmarks/' ); $this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." ); } @@ -359,6 +360,8 @@ public function test_limits_the_number_of_seek_calls() { for ( $i = 0; $i < WP_HTML_Tag_Processor::MAX_SEEK_OPS; $i++ ) { $this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' ); } + + $this->expectWarningMessageMatches( 'Too many calls to seek()' ); $this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." ); } } From 13dd7d72329ed2014ffd9f0c4bb95dc295e953e9 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 10:42:13 -0700 Subject: [PATCH 22/36] Remove some TODOs, most were done already --- .../html-api/class-wp-html-tag-processor.php | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index ac004b524ca5d..d06d2574ad76e 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -8,18 +8,18 @@ * Instead this scans linearly through a document and only parses * the HTML tag openers. * - * @TODO: Unify language around "currently-opened tag." - * @TODO: Organize unit test cases into normative tests, edge-case tests, regression tests. - * @TODO: Clean up attribute token class after is_true addition - * @TODO: Prune whitespace when removing classes/attributes: e.g. "a b c" -> "c" not " c" - * @TODO: Skip over `/` in attributes area, split attribute names by `/` - * @TODO: Decode HTML references/entities in class names when matching. - * E.g. match having class `1<"2` needs to recognize `class="1<"2"`. - * @TODO: Decode character references in `get_attribute()` - * @TODO: Properly escape attribute value in `set_attribute()` - * @TODO: Add slow mode to escape character entities in CSS class names? - * (This requires a custom decoder since `html_entity_decode()` - * doesn't handle attribute character reference decoding rules. + * ### Possible future direction for this module + * + * - Prune the whitespace when removing classes/attributes: e.g. "a b c" -> "c" not " c". + * This would increase the size of the changes for some operations but leave more + * natural-looking output HTML. + * - Decode HTML character references within class names when matching. E.g. match having + * class `1<"2` needs to recognize `class="1<"2"`. Currently the Tag Processor + * will fail to find the right tag if the class name is encoded as such. + * - Properly decode HTML character references in `get_attribute()`. PHP's + * `html_entity_decode()` is wrong in a couple ways: it doesn't account for the + * no-ambiguous-ampersand rule, and it improperly handles the way semicolons may + * or may not terminate a character reference. * * @package WordPress * @subpackage HTML-API @@ -550,8 +550,6 @@ public function next_tag( $query = null ) { * For non-DATA sections which might contain text that looks like HTML tags but * isn't, scan with the appropriate alternative mode. Looking at the first letter * of the tag name as a pre-check avoids a string allocation when it's not needed. - * - * @TODO: Add unit test case and fix (if necessary) for RCDATA tag closer coming before RCDATA tag opener. */ $t = $this->html[ $this->tag_name_starts_at ]; if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) { From b31cca41cfc400709a3d36b05be036db0acdf25d Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 11:20:59 -0700 Subject: [PATCH 23/36] Expand design and limitations discussion --- .../html-api/class-wp-html-tag-processor.php | 42 +++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index d06d2574ad76e..0e4392c810608 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -206,12 +206,46 @@ * } * ``` * - * ## Design limitations + * ## Design and limitations * - * @TODO: Expand this section + * The Tag Processor is designed to linearly scan HTML documents and tokenize + * HTML tags and their attributes. It's designed to do this as efficiently as + * possible without compromising parsing integrity. Therefore it will be + * slower than some methods of modifying HTML, such as those incorporating + * over-simplified PCRE patterns, but will not introduce the defects and + * failures that those methods bring in, which lead to broken page renders + * and often to security vulnerabilities. On the other hand, it will be faster + * than full-blown HTML parsers such as DOMDocument and use considerably + * less memory. It requires a negligible memory overhead, enough to consider + * it a zero-overhead system. * - * - No nesting: cannot match open and close tag. - * - Class names are not decoded if they contain character references. + * The performance characteristics are maintained by avoiding tree construction + * and semantic cleanups which are specified in HTML5. Because of this, for + * example, it's not possible for the Tag Processor to associate any given + * opening tag with its corresponding closing tag, or to return the inner markup + * inside an element. Systems may be built on top of the Tag Processor to do + * this, but the Tag Processor is and should be constrained so it can remain an + * efficient, low-level, and reliable HTML scanner. + * + * The Tag Processor's design incorporates a "garbage-in-garbage-out" philosophy. + * HTML5 specifies that certain invalid content be transformed into different forms + * for display, such as removing null bytes from an input document and replacing + * invalid characters with the Unicode replacement character U+FFFD �. Where errors + * or transformations exist within the HTML5 specification, the Tag Processor leaves + * those invalid inputs untouched, passing them through to the final browser to handle. + * While this implies that certain operations will be non-spec-compliant, such as + * reading the value of an attribute with invalid content, it also preserves a + * simplicity and efficiency for handling those error cases. + * + * Most operations within the Tag Processor are designed to minimize the difference + * between an input and output document for any given change. For example, the + * `add_class` and `remove_class` methods preserve whitespace and the class ordering + * within the `class` attribute; and when encountering tags with duplicated attributes, + * the Tag Processor will leave those invalid duplicate attributes where they are but + * update the proper attribute which the browser will read for parsing its value. An + * exception to this rule is that all attribute updates store their values as + * double-quoted strings, meaning that attributes on input with single-quoted or + * unquoted values will appear in the output with double-quotes. * * @since 6.2.0 */ From 1a9bec05c8c3b3bfda5e1cb5adcb9effc863cf19 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 11:24:38 -0700 Subject: [PATCH 24/36] Loosen assertion on warning --- tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php index 12c5917446de8..2be4b40363429 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php @@ -343,7 +343,7 @@ public function test_limits_the_number_of_bookmarks() { $this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" ); } - $this->expectWarningMessageMatches( '/Too many bookmarks/' ); + $this->expectWarning(); $this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." ); } @@ -361,7 +361,7 @@ public function test_limits_the_number_of_seek_calls() { $this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' ); } - $this->expectWarningMessageMatches( 'Too many calls to seek()' ); + $this->expectWarning(); $this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." ); } } From 28e9bf359529c3412438a8135dbfe165aeaffe27 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 13:49:36 -0700 Subject: [PATCH 25/36] Rename some properties to clarify their purpose and expand comments. Also: - Change visibility of some properties to `protected` to aid with in-progress expansion of the HTML API. - Refactor short-circuit checks in `get_updated_html()` for clarity. --- .../html-api/class-wp-html-tag-processor.php | 203 +++++++++++------- 1 file changed, 122 insertions(+), 81 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 0e4392c810608..a56e7ecd9c2a5 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -319,29 +319,50 @@ class WP_HTML_Tag_Processor { private $stop_on_tag_closers; /** - * The updated HTML document. + * Holds updated HTML as updates are applied. + * + * Updates and unmodified portions of the input document are + * appended to this value as they are applied. It will hold + * a copy of the updated document up until the point of the + * latest applied update. The fully-updated HTML document + * will comprise this value plus the part of the input document + * which follows that latest update. + * + * @see $bytes_already_copied * * @since 6.2.0 * @var string */ - private $updated_html = ''; + private $output_buffer = ''; /** - * How many bytes from the original HTML document were already read. + * How many bytes from the original HTML document have been read and parsed. + * + * This value points to the latest byte offset in the input document which + * has been already parsed. It is the internal cursor for the Tag Processor + * and updates while scanning through the HTML tokens. * * @since 6.2.0 * @var int */ - private $parsed_bytes = 0; + private $bytes_already_parsed = 0; /** - * How many bytes from the original HTML document were already treated - * with the requested replacements. + * How many bytes from the input HTML document have already been + * copied into the output buffer. + * + * Lexical updates are enqueued and processed in batches. Prior + * to any given update in the input document, there might exist + * a span of HTML unaffected by any changes. This span ought to + * be copied verbatim into the output buffer before applying the + * following update. This value will point to the starting byte + * offset in the input document where that unaffected span of + * HTML starts. * * @since 6.2.0 * @var int */ - private $updated_bytes = 0; + private $bytes_already_copied = 0; /** * Byte offset in input document where current tag name starts. @@ -458,7 +479,7 @@ class WP_HTML_Tag_Processor { * @since 6.2.0 * @var WP_HTML_Span[] */ - private $bookmarks = array(); + protected $bookmarks = array(); const ADD_CLASS = true; const REMOVE_CLASS = false; @@ -507,7 +528,7 @@ class WP_HTML_Tag_Processor { * @since 6.2.0 * @var WP_HTML_Text_Replacement[] */ - private $lexical_updates = array(); + protected $lexical_updates = array(); /** * Tracks and limits `seek()` calls to prevent accidental infinite loops. @@ -516,7 +537,7 @@ class WP_HTML_Tag_Processor { * @since 6.2.0 * @var int */ - private $seek_count = 0; + protected $seek_count = 0; /** * Constructor. @@ -551,13 +572,13 @@ public function next_tag( $query = null ) { $already_found = 0; do { - if ( $this->parsed_bytes >= strlen( $this->html ) ) { + if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { return false; } // Find the next tag if it exists. if ( false === $this->parse_next_tag() ) { - $this->parsed_bytes = strlen( $this->html ); + $this->bytes_already_parsed = strlen( $this->html ); return false; } @@ -568,12 +589,12 @@ public function next_tag( $query = null ) { } // Ensure that the tag closes before the end of the document. - $tag_ends_at = strpos( $this->html, '>', $this->parsed_bytes ); + $tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed ); if ( false === $tag_ends_at ) { return false; } - $this->tag_ends_at = $tag_ends_at; - $this->parsed_bytes = $tag_ends_at; + $this->tag_ends_at = $tag_ends_at; + $this->bytes_already_parsed = $tag_ends_at; // Finally, check if the parsed tag and its attributes match the search query. if ( $this->matches() ) { @@ -590,13 +611,13 @@ public function next_tag( $query = null ) { $tag_name = $this->get_tag(); if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) { - $this->parsed_bytes = strlen( $this->html ); + $this->bytes_already_parsed = strlen( $this->html ); return false; } elseif ( ( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) && ! $this->skip_rcdata( $tag_name ) ) { - $this->parsed_bytes = strlen( $this->html ); + $this->bytes_already_parsed = strlen( $this->html ); return false; } } @@ -744,14 +765,14 @@ private function skip_rcdata( $tag_name ) { $doc_length = strlen( $html ); $tag_length = strlen( $tag_name ); - $at = $this->parsed_bytes; + $at = $this->bytes_already_parsed; while ( false !== $at && $at < $doc_length ) { $at = strpos( $this->html, '= $doc_length ) { - $this->parsed_bytes = $doc_length; + $this->bytes_already_parsed = $doc_length; return false; } @@ -775,8 +796,8 @@ private function skip_rcdata( $tag_name ) { } } - $at += $tag_length; - $this->parsed_bytes = $at; + $at += $tag_length; + $this->bytes_already_parsed = $at; /* * Ensure that the tag name terminates to avoid matching on @@ -792,13 +813,13 @@ private function skip_rcdata( $tag_name ) { while ( $this->parse_next_attribute() ) { continue; } - $at = $this->parsed_bytes; + $at = $this->bytes_already_parsed; if ( $at >= strlen( $this->html ) ) { return false; } if ( '>' === $html[ $at ] || '/' === $html[ $at ] ) { - ++$this->parsed_bytes; + ++$this->bytes_already_parsed; return true; } } @@ -815,7 +836,7 @@ private function skip_script_data() { $state = 'unescaped'; $html = $this->html; $doc_length = strlen( $html ); - $at = $this->parsed_bytes; + $at = $this->bytes_already_parsed; while ( false !== $at && $at < $doc_length ) { $at += strcspn( $html, '-<', $at ); @@ -916,8 +937,8 @@ private function skip_script_data() { } if ( $is_closing ) { - $this->parsed_bytes = $at; - if ( $this->parsed_bytes >= $doc_length ) { + $this->bytes_already_parsed = $at; + if ( $this->bytes_already_parsed >= $doc_length ) { return false; } @@ -925,8 +946,8 @@ private function skip_script_data() { continue; } - if ( '>' === $html[ $this->parsed_bytes ] ) { - ++$this->parsed_bytes; + if ( '>' === $html[ $this->bytes_already_parsed ] ) { + ++$this->bytes_already_parsed; return true; } } @@ -949,7 +970,7 @@ private function parse_next_tag() { $html = $this->html; $doc_length = strlen( $html ); - $at = $this->parsed_bytes; + $at = $this->bytes_already_parsed; while ( false !== $at && $at < $doc_length ) { $at = strpos( $html, '<', $at ); @@ -981,9 +1002,9 @@ private function parse_next_tag() { $tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 ); if ( $tag_name_prefix_length > 0 ) { ++$at; - $this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length ); - $this->tag_name_starts_at = $at; - $this->parsed_bytes = $at + $this->tag_name_length; + $this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length ); + $this->tag_name_starts_at = $at; + $this->bytes_already_parsed = $at + $this->tag_name_length; return true; } @@ -1103,8 +1124,8 @@ private function parse_next_tag() { */ private function parse_next_attribute() { // Skip whitespace and slashes. - $this->parsed_bytes += strspn( $this->html, " \t\f\r\n/", $this->parsed_bytes ); - if ( $this->parsed_bytes >= strlen( $this->html ) ) { + $this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n/", $this->bytes_already_parsed ); + if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { return false; } @@ -1114,53 +1135,53 @@ private function parse_next_attribute() { * * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state */ - $name_length = '=' === $this->html[ $this->parsed_bytes ] - ? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->parsed_bytes + 1 ) - : strcspn( $this->html, "=/> \t\f\r\n", $this->parsed_bytes ); + $name_length = '=' === $this->html[ $this->bytes_already_parsed ] + ? 1 + strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed + 1 ) + : strcspn( $this->html, "=/> \t\f\r\n", $this->bytes_already_parsed ); // No attribute, just tag closer. - if ( 0 === $name_length || $this->parsed_bytes + $name_length >= strlen( $this->html ) ) { + if ( 0 === $name_length || $this->bytes_already_parsed + $name_length >= strlen( $this->html ) ) { return false; } - $attribute_start = $this->parsed_bytes; - $attribute_name = substr( $this->html, $attribute_start, $name_length ); - $this->parsed_bytes += $name_length; - if ( $this->parsed_bytes >= strlen( $this->html ) ) { + $attribute_start = $this->bytes_already_parsed; + $attribute_name = substr( $this->html, $attribute_start, $name_length ); + $this->bytes_already_parsed += $name_length; + if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { return false; } $this->skip_whitespace(); - if ( $this->parsed_bytes >= strlen( $this->html ) ) { + if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { return false; } - $has_value = '=' === $this->html[ $this->parsed_bytes ]; + $has_value = '=' === $this->html[ $this->bytes_already_parsed ]; if ( $has_value ) { - ++$this->parsed_bytes; + ++$this->bytes_already_parsed; $this->skip_whitespace(); - if ( $this->parsed_bytes >= strlen( $this->html ) ) { + if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { return false; } - switch ( $this->html[ $this->parsed_bytes ] ) { + switch ( $this->html[ $this->bytes_already_parsed ] ) { case "'": case '"': - $quote = $this->html[ $this->parsed_bytes ]; - $value_start = $this->parsed_bytes + 1; - $value_length = strcspn( $this->html, $quote, $value_start ); - $attribute_end = $value_start + $value_length + 1; - $this->parsed_bytes = $attribute_end; + $quote = $this->html[ $this->bytes_already_parsed ]; + $value_start = $this->bytes_already_parsed + 1; + $value_length = strcspn( $this->html, $quote, $value_start ); + $attribute_end = $value_start + $value_length + 1; + $this->bytes_already_parsed = $attribute_end; break; default: - $value_start = $this->parsed_bytes; - $value_length = strcspn( $this->html, "> \t\f\r\n", $value_start ); - $attribute_end = $value_start + $value_length; - $this->parsed_bytes = $attribute_end; + $value_start = $this->bytes_already_parsed; + $value_length = strcspn( $this->html, "> \t\f\r\n", $value_start ); + $attribute_end = $value_start + $value_length; + $this->bytes_already_parsed = $attribute_end; } } else { - $value_start = $this->parsed_bytes; + $value_start = $this->bytes_already_parsed; $value_length = 0; $attribute_end = $attribute_start + $name_length; } @@ -1206,7 +1227,7 @@ private function parse_next_attribute() { * @return void */ private function skip_whitespace() { - $this->parsed_bytes += strspn( $this->html, " \t\f\r\n", $this->parsed_bytes ); + $this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n", $this->bytes_already_parsed ); } /** @@ -1395,9 +1416,9 @@ private function apply_attributes_updates() { usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) ); foreach ( $this->lexical_updates as $diff ) { - $this->updated_html .= substr( $this->html, $this->updated_bytes, $diff->start - $this->updated_bytes ); - $this->updated_html .= $diff->text; - $this->updated_bytes = $diff->end; + $this->output_buffer .= substr( $this->html, $this->bytes_already_copied, $diff->start - $this->bytes_already_copied ); + $this->output_buffer .= $diff->text; + $this->bytes_already_copied = $diff->end; } /* @@ -1470,9 +1491,9 @@ public function seek( $bookmark_name ) { $this->get_updated_html(); // Point this tag processor before the sought tag opener and consume it. - $this->parsed_bytes = $this->bookmarks[ $bookmark_name ]->start; - $this->updated_bytes = $this->parsed_bytes; - $this->updated_html = substr( $this->html, 0, $this->updated_bytes ); + $this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start; + $this->bytes_already_copied = $this->bytes_already_parsed; + $this->output_buffer = substr( $this->html, 0, $this->bytes_already_copied ); return $this->next_tag(); } @@ -1993,42 +2014,62 @@ public function __toString() { * @return string The processed HTML. */ public function get_updated_html() { - // Short-circuit if there are no new updates to apply. - if ( ! count( $this->classname_updates ) && ! count( $this->lexical_updates ) ) { - return $this->updated_html . substr( $this->html, $this->updated_bytes ); + $requires_no_updating = 0 === count( $this->classname_updates ) && 0 === count( $this->lexical_updates ); + + /* + * When there is nothing more to update and nothing has already been + * updated, return the original document and avoid a string copy. + */ + if ( $requires_no_updating && $this->bytes_already_copied === 0 ) { + return $this->html; + } + + /* + * If there are no updates left to apply, but some have already + * been applied, then finish by copying the rest of the input + * to the end of the updated document and return. + */ + if ( $requires_no_updating && $this->bytes_already_copied > 0 ) { + return $this->output_buffer . substr( $this->html, $this->bytes_already_copied ); } - // Otherwise: apply the updates, rewind before the current tag, and parse it again. - $delta_between_updated_html_end_and_current_tag_end = substr( + // Apply the updates, rewind to before the current tag, and reparse the attributes. + $content_up_to_opened_tag_name = $this->output_buffer . substr( $this->html, - $this->updated_bytes, - $this->tag_name_starts_at + $this->tag_name_length - $this->updated_bytes + $this->bytes_already_copied, + $this->tag_name_starts_at + $this->tag_name_length - $this->bytes_already_copied ); - $updated_html_up_to_current_tag_name_end = $this->updated_html . $delta_between_updated_html_end_and_current_tag_end; - // 1. Apply the attributes updates to the original HTML + /* + * 1. Apply the edits by flushing them to the output_buffer and updating the copied byte count. + * + * Note: `apply_attributes_updates()` modifies `$this->output_buffer`. + */ $this->class_name_updates_to_attributes_updates(); $this->apply_attributes_updates(); - // 2. Replace the original HTML with the updated HTML - $this->html = $this->updated_html . substr( $this->html, $this->updated_bytes ); - $this->updated_html = $updated_html_up_to_current_tag_name_end; - $this->updated_bytes = strlen( $this->updated_html ); - - // 3. Point this tag processor at the original tag opener and consume it + /* + * 2. Replace the original HTML with the now-updated HTML it's possible to seek to a previous + * location and have a consistent view of the updated document. + */ + $this->html = $this->output_buffer . substr( $this->html, $this->bytes_already_copied ); + $this->output_buffer = $content_up_to_opened_tag_name; + $this->bytes_already_copied = strlen( $this->output_buffer ); /* + * 3. Point this tag processor at the original tag opener and consume it + * * At this point the internal cursor points to the end of the tag name. * Rewind before the tag name starts so that it's as if the cursor didn't * move; a call to `next_tag()` will reparse the recently-updated attributes * and additional calls to modify the attributes will apply at this same - * lcoation. + * location. * *

Previous HTMLMore HTML

* ^ | back up by the length of the tag name plus the opening < * \<-/ back up by strlen("em") + 1 ==> 3 */ - $this->parsed_bytes = strlen( $updated_html_up_to_current_tag_name_end ) - $this->tag_name_length - 1; + $this->bytes_already_parsed = strlen( $content_up_to_opened_tag_name ) - $this->tag_name_length - 1; $this->next_tag(); return $this->html; From 1e2ef09b0de2a4cee0cd6a70c6edea577cbd7dc3 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 14:07:01 -0700 Subject: [PATCH 26/36] Linter: yoda condition --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index a56e7ecd9c2a5..3efe83d3d7de3 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -2020,7 +2020,7 @@ public function get_updated_html() { * When there is nothing more to update and nothing has already been * updated, return the original document and avoid a string copy. */ - if ( $requires_no_updating && $this->bytes_already_copied === 0 ) { + if ( $requires_no_updating && 0 === $this->bytes_already_copied ) { return $this->html; } From 243dc7ccd8267cb5a84440000d876726da1fad79 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 14:12:14 -0700 Subject: [PATCH 27/36] Typos in comments --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 3efe83d3d7de3..f29fbd80c9a01 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -2041,7 +2041,7 @@ public function get_updated_html() { ); /* - * 1. Apply the edits by flushing them to the output_buffer and updating the copied byte count. + * 1. Apply the edits by flushing them to the output buffer and updating the copied byte count. * * Note: `apply_attributes_updates()` modifies `$this->output_buffer`. */ @@ -2049,8 +2049,8 @@ public function get_updated_html() { $this->apply_attributes_updates(); /* - * 2. Replace the original HTML with the now-updated HTML it's possible to seek to a previous - * location and have a consistent view of the updated document. + * 2. Replace the original HTML with the now-updated HTML so that it's possible to + * seek to a previous location and have a consistent view of the updated document. */ $this->html = $this->output_buffer . substr( $this->html, $this->bytes_already_copied ); $this->output_buffer = $content_up_to_opened_tag_name; From 81529882ee2498ef8ae1f3e9e839989f4df73ee0 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 15:44:44 -0700 Subject: [PATCH 28/36] Rework @covers attributes --- .../html/wpHtmlTagProcessor-bookmark.php | 44 +++-- .../phpunit/tests/html/wpHtmlTagProcessor.php | 160 ++++-------------- 2 files changed, 52 insertions(+), 152 deletions(-) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php index 2be4b40363429..d78ca26e0a4d8 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php @@ -16,7 +16,7 @@ class Tests_HTML_wpHtmlTagProcessor_Bookmark extends WP_UnitTestCase { /** * @ticket 56299 * - * @covers ::set_bookmark + * @covers WP_HTML_Tag_Processor::set_bookmark */ public function test_set_bookmark() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); @@ -30,7 +30,7 @@ public function test_set_bookmark() { /** * @ticket 56299 * - * @covers ::release_bookmark + * @covers WP_HTML_Tag_Processor::release_bookmark */ public function test_release_bookmark() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); @@ -43,8 +43,7 @@ public function test_release_bookmark() { /** * @ticket 56299 * - * @covers ::seek - * @covers ::set_bookmark + * @covers WP_HTML_Tag_Processor::seek */ public function test_seek() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); @@ -57,7 +56,7 @@ public function test_seek() { $p->seek( 'first li' ); $p->set_attribute( 'foo-1', 'bar-1' ); - $this->assertEquals( + $this->assertSame( '
  • One
  • Two
  • Three
', $p->get_updated_html() ); @@ -98,9 +97,8 @@ public function test_seek() { * * @ticket 56299 * - * @covers ::seek - * @covers ::set_bookmark - * @covers ::apply_attributes_updates + * @covers WP_HTML_Tag_Processor::seek + * @covers WP_HTML_Tag_Processor::set_bookmark */ public function test_removing_long_attributes_doesnt_break_seek() { $input = <<remove_attribute( 'type' ); $p->set_attribute( 'class', 'hx_create-pr-button' ); - $this->assertEquals( + $this->assertSame( $expected_output, $p->get_updated_html() ); @@ -237,8 +235,7 @@ public function test_bookmarks_complex_use_case() { /** * @ticket 56299 * - * @covers ::seek - * @covers ::set_bookmark + * @covers WP_HTML_Tag_Processor::seek */ public function test_updates_bookmark_for_additions_after_both_sides() { $p = new WP_HTML_Tag_Processor( '
First
Second
' ); @@ -250,7 +247,7 @@ public function test_updates_bookmark_for_additions_after_both_sides() { $p->seek( 'first' ); $p->add_class( 'first' ); - $this->assertEquals( + $this->assertSame( '
First
Second
', $p->get_updated_html() ); @@ -259,8 +256,7 @@ public function test_updates_bookmark_for_additions_after_both_sides() { /** * @ticket 56299 * - * @covers ::seek - * @covers ::set_bookmark + * @covers WP_HTML_Tag_Processor::seek */ public function test_updates_bookmark_for_additions_before_both_sides() { $p = new WP_HTML_Tag_Processor( '
First
Second
' ); @@ -275,7 +271,7 @@ public function test_updates_bookmark_for_additions_before_both_sides() { $p->seek( 'second' ); $p->add_class( 'second' ); - $this->assertEquals( + $this->assertSame( '
First
Second
', $p->get_updated_html() ); @@ -284,8 +280,7 @@ public function test_updates_bookmark_for_additions_before_both_sides() { /** * @ticket 56299 * - * @covers ::seek - * @covers ::set_bookmark + * @covers WP_HTML_Tag_Processor::seek */ public function test_updates_bookmark_for_deletions_after_both_sides() { $p = new WP_HTML_Tag_Processor( '
First
Second
' ); @@ -297,7 +292,7 @@ public function test_updates_bookmark_for_deletions_after_both_sides() { $p->seek( 'first' ); $p->set_attribute( 'untouched', true ); - $this->assertEquals( + $this->assertSame( /** @TODO: we shouldn't have to assert the extra space after removing the attribute. */ '
First
Second
', $p->get_updated_html() @@ -307,8 +302,7 @@ public function test_updates_bookmark_for_deletions_after_both_sides() { /** * @ticket 56299 * - * @covers ::seek - * @covers ::set_bookmark + * @covers WP_HTML_Tag_Processor::seek */ public function test_updates_bookmark_for_deletions_before_both_sides() { $p = new WP_HTML_Tag_Processor( '
First
Second
' ); @@ -323,7 +317,7 @@ public function test_updates_bookmark_for_deletions_before_both_sides() { $p->seek( 'second' ); $p->set_attribute( 'safe', true ); - $this->assertEquals( + $this->assertSame( /** @TODO: we shouldn't have to assert the extra space after removing the attribute. */ '
First
Second
', $p->get_updated_html() @@ -333,7 +327,7 @@ public function test_updates_bookmark_for_deletions_before_both_sides() { /** * @ticket 56299 * - * @covers ::set_bookmark + * @covers WP_HTML_Tag_Processor::set_bookmark */ public function test_limits_the_number_of_bookmarks() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); @@ -350,7 +344,7 @@ public function test_limits_the_number_of_bookmarks() { /** * @ticket 56299 * - * @covers ::seek + * @covers WP_HTML_Tag_Processor::seek */ public function test_limits_the_number_of_seek_calls() { $p = new WP_HTML_Tag_Processor( '
  • One
  • Two
  • Three
' ); diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor.php b/tests/phpunit/tests/html/wpHtmlTagProcessor.php index 7f8ff0895f042..60a935196ee70 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor.php @@ -30,7 +30,6 @@ public function test_get_tag_returns_null_before_finding_tags() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::get_tag */ public function test_get_tag_returns_null_when_not_in_open_tag() { @@ -43,7 +42,6 @@ public function test_get_tag_returns_null_when_not_in_open_tag() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::get_tag */ public function test_get_tag_returns_open_tag_name() { @@ -67,7 +65,6 @@ public function test_get_attribute_returns_null_before_finding_tags() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_null_when_not_in_open_tag() { @@ -80,7 +77,6 @@ public function test_get_attribute_returns_null_when_not_in_open_tag() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_null_when_in_closing_tag() { @@ -94,7 +90,6 @@ public function test_get_attribute_returns_null_when_in_closing_tag() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_null_when_attribute_missing() { @@ -107,7 +102,6 @@ public function test_get_attribute_returns_null_when_attribute_missing() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_attribute_value() { @@ -120,7 +114,6 @@ public function test_get_attribute_returns_attribute_value() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_true_for_boolean_attribute() { @@ -133,13 +126,12 @@ public function test_get_attribute_returns_true_for_boolean_attribute() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_returns_string_for_truthy_attributes() { $p = new WP_HTML_Tag_Processor( '' ); - $this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' ); + $this->assertTrue( $p->next_tag(), 'Querying an existing tag did not return true' ); $this->assertSame( 'enabled', $p->get_attribute( 'enabled' ), 'Accessing a boolean "enabled" attribute value did not return true' ); $this->assertSame( '1', $p->get_attribute( 'checked' ), 'Accessing a checked=1 attribute value did not return "1"' ); $this->assertSame( 'true', $p->get_attribute( 'hidden' ), 'Accessing a hidden="true" attribute value did not return "true"' ); @@ -148,7 +140,7 @@ public function test_get_attribute_returns_string_for_truthy_attributes() { /** * @ticket 56299 * - * @covers ::get_attribute + * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_get_attribute_decodes_html_character_references() { $p = new WP_HTML_Tag_Processor( '
' ); @@ -160,13 +152,12 @@ public function test_get_attribute_decodes_html_character_references() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_attributes_parser_treats_slash_as_attribute_separator() { $p = new WP_HTML_Tag_Processor( '
Test
' ); - $this->assertTrue( $p->next_tag( array() ), 'Querying an existing tag did not return true' ); + $this->assertTrue( $p->next_tag(), 'Querying an existing tag did not return true' ); $this->assertTrue( $p->get_attribute( 'a' ), 'Accessing an existing attribute did not return true' ); $this->assertTrue( $p->get_attribute( 'b' ), 'Accessing an existing attribute did not return true' ); $this->assertTrue( $p->get_attribute( 'c' ), 'Accessing an existing attribute did not return true' ); @@ -177,7 +168,6 @@ public function test_attributes_parser_treats_slash_as_attribute_separator() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::get_attribute * * @dataProvider data_attribute_name_case_variants @@ -194,7 +184,6 @@ public function test_get_attribute_is_case_insensitive_for_attributes_with_value /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::get_attribute * * @dataProvider data_attribute_name_case_variants @@ -225,7 +214,6 @@ public function data_attribute_name_case_variants() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::remove_attribute */ public function test_remove_attribute_is_case_insensitive() { @@ -239,7 +227,6 @@ public function test_remove_attribute_is_case_insensitive() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::set_attribute */ public function test_set_attribute_is_case_insensitive() { @@ -314,8 +301,6 @@ public function test_get_attribute_names_with_prefix_returns_matching_attribute_ /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html * @covers WP_HTML_Tag_Processor::get_attribute_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() { @@ -397,7 +382,7 @@ public function test_get_updated_html_applies_the_updates_so_far_and_keeps_the_p /** * @ticket 56299 * - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_get_updated_html_without_updating_any_attributes_returns_the_original_html() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -463,8 +448,7 @@ public function test_next_tag_should_stop_on_closers_only_when_requested() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag - * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute */ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -484,12 +468,10 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar /** * @ticket 56299 * - * @covers ::is_tag_closer - * @covers ::set_attribute - * @covers ::remove_attribute - * @covers ::add_class - * @covers ::remove_class - * @covers ::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute + * @covers WP_HTML_Tag_Processor::remove_attribute + * @covers WP_HTML_Tag_Processor::add_class + * @covers WP_HTML_Tag_Processor::remove_class */ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() { $p = new WP_HTML_Tag_Processor( '
' ); @@ -522,21 +504,21 @@ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() { } /** - * Passing a double quote inside of an attribute values could lead to an XSS attack as follows: + * Passing a double quote inside of an attribute value could lead to an XSS attack as follows: * - * + * ```php * $p = new WP_HTML_Tag_Processor( '
' ); * $p->next_tag(); * $p->set_attribute('class', '" onclick="alert'); * echo $p; * //
- *
+ * ``` * * To prevent it, `set_attribute` calls `esc_attr()` on its given values. * - * + * ```php *
- *
+ * ``` * * @ticket 56299 * @@ -591,8 +573,6 @@ public function data_set_attribute_prevents_xss() { * @ticket 56299 * * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -614,11 +594,9 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html * @covers WP_HTML_Tag_Processor::get_attribute */ - public function test_get_attribute_returns_updated_values_before_they_are_updated() { + public function test_get_attribute_returns_updated_values_before_they_are_applied() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->set_attribute( 'test-attribute', 'test-value' ); @@ -638,11 +616,9 @@ public function test_get_attribute_returns_updated_values_before_they_are_update /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::set_attribute * @covers WP_HTML_Tag_Processor::get_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ - public function test_get_attribute_returns_updated_values_before_they_are_updated_with_different_name_casing() { + public function test_get_attribute_returns_updated_values_before_they_are_applied_with_different_name_casing() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->set_attribute( 'test-ATTribute', 'test-value' ); @@ -662,11 +638,9 @@ public function test_get_attribute_returns_updated_values_before_they_are_update /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_updated_html * @covers WP_HTML_Tag_Processor::get_attribute */ - public function test_get_attribute_reflects_added_class_names_before_they_are_updated() { + public function test_get_attribute_reflects_added_class_names_before_they_are_applied() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->add_class( 'my-class' ); @@ -686,11 +660,9 @@ public function test_get_attribute_reflects_added_class_names_before_they_are_up /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_updated_html * @covers WP_HTML_Tag_Processor::get_attribute */ - public function test_get_attribute_reflects_added_class_names_before_they_are_updated_and_retains_classes_from_previous_add_class_calls() { + public function test_get_attribute_reflects_added_class_names_before_they_are_applied_and_retains_classes_from_previous_add_class_calls() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->add_class( 'my-class' ); @@ -718,11 +690,9 @@ public function test_get_attribute_reflects_added_class_names_before_they_are_up /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::remove_attribute * @covers WP_HTML_Tag_Processor::get_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ - public function test_get_attribute_reflects_removed_attribute_before_it_is_updated() { + public function test_get_attribute_reflects_removed_attribute_before_it_is_applied() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->remove_attribute( 'id' ); @@ -741,12 +711,9 @@ public function test_get_attribute_reflects_removed_attribute_before_it_is_updat /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::remove_attribute * @covers WP_HTML_Tag_Processor::get_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ - public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_it_is_updated() { + public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_those_updates_are_applied() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->set_attribute( 'test-attribute', 'test-value' ); @@ -766,12 +733,9 @@ public function test_get_attribute_reflects_adding_and_then_removing_an_attribut /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::remove_attribute * @covers WP_HTML_Tag_Processor::get_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ - public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_it_is_updated() { + public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_those_updates_are_applied() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); $p->next_tag(); $p->set_attribute( 'id', 'test-value' ); @@ -791,11 +755,9 @@ public function test_get_attribute_reflects_setting_and_then_removing_an_existin /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::remove_class - * @covers WP_HTML_Tag_Processor::get_updated_html * @covers WP_HTML_Tag_Processor::get_attribute */ - public function test_get_attribute_reflects_removed_class_names_before_they_are_updated() { + public function test_get_attribute_reflects_removed_class_names_before_they_are_applied() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); $p->next_tag(); $p->remove_class( 'with-border' ); @@ -815,12 +777,9 @@ public function test_get_attribute_reflects_removed_class_names_before_they_are_ /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::remove_class * @covers WP_HTML_Tag_Processor::get_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ - public function test_get_attribute_reflects_setting_and_then_removing_a_class_name_before_it_is_updated() { + public function test_get_attribute_reflects_setting_and_then_removing_a_class_name_before_those_updates_are_applied() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); $p->next_tag(); $p->add_class( 'foo-class' ); @@ -841,12 +800,9 @@ public function test_get_attribute_reflects_setting_and_then_removing_a_class_na /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::remove_class * @covers WP_HTML_Tag_Processor::get_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ - public function test_get_attribute_reflects_duplicating_and_then_removing_an_existing_class_name_before_it_is_updated() { + public function test_get_attribute_reflects_duplicating_and_then_removing_an_existing_class_name_before_those_updates_are_applied() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); $p->next_tag(); $p->add_class( 'with-border' ); @@ -871,9 +827,8 @@ public function test_get_attribute_reflects_duplicating_and_then_removing_an_exi * @ticket 56299 * * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ - public function test_update_first_when_duplicated_attribute() { + public function test_update_first_attribute_when_duplicated_attributes_exist() { $p = new WP_HTML_Tag_Processor( '
Text
' ); $p->next_tag(); $p->set_attribute( 'id', 'updated-id' ); @@ -885,7 +840,6 @@ public function test_update_first_when_duplicated_attribute() { * @ticket 56299 * * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -897,8 +851,8 @@ public function test_set_attribute_with_an_existing_attribute_name_updates_its_v /** * @ticket 56299 * + * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -917,12 +871,13 @@ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the * and introducing additional complexity to correctly handle this scenario doesn't seem to be worth it. * Let's revisit if and when this becomes a problem. * - * This test is in place to confirm this behavior, while incorrect, is well-defined. + * This test is in place to confirm this behavior, which while incorrect, is well-defined. + * A later fix introduced to the Tag Processor should update this test to reflect the + * wanted and correct behavior. * * @ticket 56299 * * @covers WP_HTML_Tag_Processor::remove_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_remove_first_when_duplicated_attribute() { $p = new WP_HTML_Tag_Processor( '
Text
' ); @@ -936,7 +891,6 @@ public function test_remove_first_when_duplicated_attribute() { * @ticket 56299 * * @covers WP_HTML_Tag_Processor::remove_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -950,7 +904,6 @@ public function test_remove_attribute_with_an_existing_attribute_name_removes_it * @ticket 56299 * * @covers WP_HTML_Tag_Processor::remove_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -964,8 +917,6 @@ public function test_remove_attribute_with_a_non_existing_attribute_name_does_no * @ticket 56299 * * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_add_class_creates_a_class_attribute_when_there_is_none() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -988,8 +939,6 @@ public function test_add_class_creates_a_class_attribute_when_there_is_none() { * @ticket 56299 * * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_calling_add_class_twice_creates_a_class_attribute_with_both_class_names_when_there_is_no_class_attribute() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -1013,8 +962,6 @@ public function test_calling_add_class_twice_creates_a_class_attribute_with_both * @ticket 56299 * * @covers WP_HTML_Tag_Processor::remove_class - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_remove_class_does_not_change_the_markup_when_there_is_no_class_attribute() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -1036,8 +983,6 @@ public function test_remove_class_does_not_change_the_markup_when_there_is_no_cl * @ticket 56299 * * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_add_class_appends_class_names_to_the_existing_class_attribute_when_one_already_exists() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1061,8 +1006,6 @@ public function test_add_class_appends_class_names_to_the_existing_class_attribu * @ticket 56299 * * @covers WP_HTML_Tag_Processor::remove_class - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_remove_class_removes_a_single_class_from_the_class_attribute_when_one_exists() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1085,8 +1028,6 @@ public function test_remove_class_removes_a_single_class_from_the_class_attribut * @ticket 56299 * * @covers WP_HTML_Tag_Processor::remove_class - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_calling_remove_class_with_all_listed_class_names_removes_the_existing_class_attribute_from_the_markup() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1109,8 +1050,6 @@ public function test_calling_remove_class_with_all_listed_class_names_removes_th * @ticket 56299 * * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_add_class_does_not_add_duplicate_class_names() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1133,8 +1072,6 @@ public function test_add_class_does_not_add_duplicate_class_names() { * @ticket 56299 * * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_add_class_preserves_class_name_order_when_a_duplicate_class_name_is_added() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1157,8 +1094,6 @@ public function test_add_class_preserves_class_name_order_when_a_duplicate_class * @ticket 56299 * * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_add_class_when_there_is_a_class_attribute_with_excessive_whitespaces() { $p = new WP_HTML_Tag_Processor( @@ -1183,8 +1118,6 @@ public function test_add_class_when_there_is_a_class_attribute_with_excessive_wh * @ticket 56299 * * @covers WP_HTML_Tag_Processor::remove_class - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_remove_class_preserves_whitespaces_when_there_is_a_class_attribute_with_excessive_whitespaces() { $p = new WP_HTML_Tag_Processor( @@ -1209,8 +1142,6 @@ public function test_remove_class_preserves_whitespaces_when_there_is_a_class_at * @ticket 56299 * * @covers WP_HTML_Tag_Processor::remove_class - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_removing_all_classes_removes_the_existing_class_attribute_from_the_markup_even_when_excessive_whitespaces_are_present() { $p = new WP_HTML_Tag_Processor( @@ -1242,8 +1173,6 @@ public function test_removing_all_classes_removes_the_existing_class_attribute_f * * @covers WP_HTML_Tag_Processor::add_class * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html - * @covers WP_HTML_Tag_Processor::get_attribute */ public function test_set_attribute_takes_priority_over_add_class() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1291,8 +1220,6 @@ public function test_set_attribute_takes_priority_over_add_class() { * * @covers WP_HTML_Tag_Processor::add_class * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_set_attribute_takes_priority_over_add_class_even_before_updating() { $p = new WP_HTML_Tag_Processor( self::HTML_WITH_CLASSES ); @@ -1329,10 +1256,7 @@ public function test_set_attribute_takes_priority_over_add_class_even_before_upd /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::set_attribute * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_add_class_overrides_boolean_class_attribute() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -1354,10 +1278,7 @@ public function test_add_class_overrides_boolean_class_attribute() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::set_attribute * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_add_class_overrides_boolean_class_attribute_even_before_updating() { $p = new WP_HTML_Tag_Processor( self::HTML_SIMPLE ); @@ -1479,9 +1400,7 @@ public function test_advanced_use_case() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::remove_attribute - * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::next_tag */ public function test_correctly_parses_html_attributes_wrapped_in_single_quotation_marks() { $p = new WP_HTML_Tag_Processor( @@ -1511,9 +1430,8 @@ public function test_correctly_parses_html_attributes_wrapped_in_single_quotatio * @ticket 56299 * * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ - public function test_set_attribute_with_value_equals_to_true_adds_a_boolean_html_attribute_with_implicit_value() { + public function test_set_attribute_with_value_equal_to_true_adds_a_boolean_html_attribute_with_implicit_value() { $p = new WP_HTML_Tag_Processor( '
' ); @@ -1529,7 +1447,6 @@ public function test_set_attribute_with_value_equals_to_true_adds_a_boolean_html * @ticket 56299 * * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_markup() { $p = new WP_HTML_Tag_Processor( @@ -1547,7 +1464,6 @@ public function test_setting_a_boolean_attribute_to_false_removes_it_from_the_ma * @ticket 56299 * * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() { $html_input = '
'; @@ -1561,7 +1477,6 @@ public function test_setting_a_missing_attribute_to_false_does_not_change_the_ma * @ticket 56299 * * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit_value_to_the_markup() { $p = new WP_HTML_Tag_Processor( @@ -1578,7 +1493,6 @@ public function test_setting_a_boolean_attribute_to_a_string_value_adds_explicit /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::get_tag * @covers WP_HTML_Tag_Processor::next_tag */ public function test_unclosed_script_tag_should_not_cause_an_infinite_loop() { @@ -1752,7 +1666,6 @@ public function data_skips_contents_of_script_and_rcdata_regions() { * * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_can_query_and_update_wrongly_nested_tags() { $p = new WP_HTML_Tag_Processor( @@ -1773,7 +1686,6 @@ public function test_can_query_and_update_wrongly_nested_tags() { * * @covers WP_HTML_Tag_Processor::next_tag * @covers WP_HTML_Tag_Processor::remove_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_removing_specific_attributes_in_malformed_html() { $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); @@ -1788,9 +1700,7 @@ public function test_removing_specific_attributes_in_malformed_html() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_Tag * @covers WP_HTML_Tag_Processor::set_attribute - * @covers WP_HTML_Tag_Processor::get_updated_html */ public function test_updating_specific_attributes_in_malformed_html() { $p = new WP_HTML_Tag_Processor( self::HTML_MALFORMED ); @@ -1807,10 +1717,8 @@ public function test_updating_specific_attributes_in_malformed_html() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag - * @covers WP_HTML_Tag_Processor::set_attribute * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute * * @dataProvider data_updating_attributes * @@ -1872,10 +1780,8 @@ public function data_updating_attributes() { /** * @ticket 56299 * - * @covers WP_HTML_Tag_Processor::next_tag - * @covers WP_HTML_Tag_Processor::set_attribute * @covers WP_HTML_Tag_Processor::add_class - * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::set_attribute * * @dataProvider data_updating_attributes_in_malformed_html * From a5f2d9670f1a7d0f424629fb7af0638c9b59d5ef Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 15:44:56 -0700 Subject: [PATCH 29/36] Was doing it wrong w.r.t. doing_it_wrong --- .../html-api/class-wp-html-tag-processor.php | 37 +++++++++++++------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index f29fbd80c9a01..b4d966819a049 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -715,9 +715,11 @@ public function set_bookmark( $name ) { } if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= self::MAX_BOOKMARKS ) { - if ( WP_DEBUG ) { - trigger_error( "Too many bookmarks: cannot create '{$name}'", E_USER_WARNING ); - } + _doing_it_wrong( + __METHOD__, + __( 'Too many bookmarks: cannot create any more.' ), + '6.2.0' + ); return false; } @@ -1474,16 +1476,20 @@ private function apply_attributes_updates() { */ public function seek( $bookmark_name ) { if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) { - if ( WP_DEBUG ) { - trigger_error( 'Invalid bookmark name', E_USER_WARNING ); - } + _doing_it_wrong( + __METHOD__, + __( 'Unknown bookmark name.' ), + '6.2.0' + ); return false; } if ( ++$this->seek_count > self::MAX_SEEK_OPS ) { - if ( WP_DEBUG ) { - trigger_error( 'Too many calls to seek() - this can lead to performance issues.', E_USER_WARNING ); - } + _doing_it_wrong( + __METHOD__, + __( 'Too many calls to seek() - this can lead to performance issues.' ), + '6.2.0' + ); return false; } @@ -1807,9 +1813,11 @@ public function set_attribute( $name, $value ) { ']~Ssu', $name ) ) { - if ( WP_DEBUG ) { - trigger_error( 'Invalid attribute name', E_USER_WARNING ); - } + _doing_it_wrong( + __METHOD__, + __( 'Invalid attribute name.' ), + '6.2.0' + ); return false; } @@ -2112,6 +2120,11 @@ private function parse_query( $query ) { // If not using the string interface, an associative array is required. if ( ! is_array( $query ) ) { + _doing_it_wrong( + __METHOD__, + __( 'The query argument must be an array or a tag name.' ), + '6.2.0' + ); return; } From 5b1d47e7b03641581ba45ee20971778ea287e112 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 16:45:15 -0700 Subject: [PATCH 30/36] Add additional type check to avoid throwing _doing_it_wrong error where none was thrown before. --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index b4d966819a049..59fa288a44f75 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -2118,6 +2118,11 @@ private function parse_query( $query ) { return; } + // An empty query parameter applies no restrictions on the search. + if ( null === $query ) { + return; + } + // If not using the string interface, an associative array is required. if ( ! is_array( $query ) ) { _doing_it_wrong( From 3f9b274437f1d298addff628aa531646e2668e09 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 17:12:30 -0700 Subject: [PATCH 31/36] Lada la di --- tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php index d78ca26e0a4d8..bd483bfee94ea 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php @@ -337,7 +337,7 @@ public function test_limits_the_number_of_bookmarks() { $this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" ); } - $this->expectWarning(); + $this->expectNotice(); $this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." ); } @@ -355,7 +355,7 @@ public function test_limits_the_number_of_seek_calls() { $this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' ); } - $this->expectWarning(); + $this->expectNotice(); $this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." ); } } From 1b8c75c12cd4fa52d610d5ed638929c7912ba65c Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 17:23:27 -0700 Subject: [PATCH 32/36] Remove checks that _doing_it_wrong throws a notice --- tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php index bd483bfee94ea..5623dd76685d2 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php @@ -337,7 +337,6 @@ public function test_limits_the_number_of_bookmarks() { $this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" ); } - $this->expectNotice(); $this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." ); } @@ -355,7 +354,6 @@ public function test_limits_the_number_of_seek_calls() { $this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' ); } - $this->expectNotice(); $this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." ); } } From aad531083a2eb33a051b1c8782a6c75a6d51c8b3 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Feb 2023 18:14:04 -0700 Subject: [PATCH 33/36] Set expected incorrect usage in tests. --- tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php index 5623dd76685d2..bb40038bdf35f 100644 --- a/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php +++ b/tests/phpunit/tests/html/wpHtmlTagProcessor-bookmark.php @@ -337,6 +337,7 @@ public function test_limits_the_number_of_bookmarks() { $this->assertTrue( $p->set_bookmark( "bookmark $i" ), "Could not allocate the bookmark #$i" ); } + $this->setExpectedIncorrectUsage( 'WP_HTML_Tag_Processor::set_bookmark' ); $this->assertFalse( $p->set_bookmark( 'final bookmark' ), "Allocated $i bookmarks, which is one above the limit." ); } @@ -354,6 +355,7 @@ public function test_limits_the_number_of_seek_calls() { $this->assertTrue( $p->seek( 'bookmark' ), 'Could not seek to the "bookmark"' ); } + $this->setExpectedIncorrectUsage( 'WP_HTML_Tag_Processor::seek' ); $this->assertFalse( $p->seek( 'bookmark' ), "$i-th seek() to the bookmark succeeded, even though it should exceed the allowed limit." ); } } From 4a438505ec984836dbc45950b77ed1536b95d611 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 2 Feb 2023 10:23:01 -0700 Subject: [PATCH 34/36] Docblock updates --- .../html-api/class-wp-html-tag-processor.php | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 59fa288a44f75..8247aa87908dd 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -27,9 +27,7 @@ */ /** - * Processes an input HTML document by applying a specified set - * of patches to that input. Tokenizes HTML but does not fully - * parse the input document. + * Modifies attributes in an HTML document for tags matching a query. * * ## Usage * @@ -707,7 +705,7 @@ public function next_tag( $query = null ) { * @since 6.2.0 * * @param string $name Identifies this particular bookmark. - * @return bool + * @return bool Whether the bookmark was successfully created. */ public function set_bookmark( $name ) { if ( null === $this->tag_name_starts_at ) { @@ -739,7 +737,7 @@ public function set_bookmark( $name ) { * performance overhead it requires. * * @param string $name Name of the bookmark to remove. - * @return bool + * @return bool Whether the bookmark already existed before removal. */ public function release_bookmark( $name ) { if ( ! array_key_exists( $name, $this->bookmarks ) ) { @@ -753,8 +751,7 @@ public function release_bookmark( $name ) { /** - * Skips the contents of the title and textarea tags until an appropriate - * tag closer is found. + * Skips contents of title and textarea tags. * * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state * @since 6.2.0 @@ -830,9 +827,11 @@ private function skip_rcdata( $tag_name ) { } /** - * Skips the contents of