From 15b4215285fc10f4851493abfffaa39a8c656057 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 18 Sep 2024 14:45:39 -0700 Subject: [PATCH 01/11] Add todos --- plugins/optimization-detective/class-od-html-tag-processor.php | 2 ++ plugins/optimization-detective/optimization.php | 1 + 2 files changed, 3 insertions(+) diff --git a/plugins/optimization-detective/class-od-html-tag-processor.php b/plugins/optimization-detective/class-od-html-tag-processor.php index 2975e47a49..e830c760e4 100644 --- a/plugins/optimization-detective/class-od-html-tag-processor.php +++ b/plugins/optimization-detective/class-od-html-tag-processor.php @@ -342,6 +342,8 @@ public function next_token(): bool { /** * Gets the number of times the cursor has moved. * + * @todo Not needed once core short-circuits seek() when current cursor is the same as the sought-bookmark. + * * @since n.e.x.t * @see self::next_token() * @see self::seek() diff --git a/plugins/optimization-detective/optimization.php b/plugins/optimization-detective/optimization.php index 47c6cc4526..9f43b9127a 100644 --- a/plugins/optimization-detective/optimization.php +++ b/plugins/optimization-detective/optimization.php @@ -219,6 +219,7 @@ function od_optimize_template_output_buffer( string $buffer ): string { $processor->set_bookmark( $current_tag_bookmark ); // TODO: Should we break if this returns false? foreach ( $visitors as $visitor ) { + // TODO: Remove get_cursor_move_count() logic once core automatically short-circuits seek(). $cursor_move_count = $processor->get_cursor_move_count(); $tracked_in_url_metrics = $visitor( $tag_visitor_context ) || $tracked_in_url_metrics; From 5bded1569437da0e1d676de82e5a868d4cc2f8c6 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 18 Sep 2024 15:27:16 -0700 Subject: [PATCH 02/11] WIP --- .../class-od-tag-visitor-context.php | 12 ++++++------ plugins/optimization-detective/optimization.php | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/plugins/optimization-detective/class-od-tag-visitor-context.php b/plugins/optimization-detective/class-od-tag-visitor-context.php index b31f02b3af..55847e37f2 100644 --- a/plugins/optimization-detective/class-od-tag-visitor-context.php +++ b/plugins/optimization-detective/class-od-tag-visitor-context.php @@ -20,9 +20,9 @@ final class OD_Tag_Visitor_Context { /** - * HTML tag processor. + * HTML (tag) processor. * - * @var OD_HTML_Tag_Processor + * @var OD_HTML_Tag_Processor|OD_HTML_Processor * @readonly */ public $processor; @@ -46,11 +46,11 @@ final class OD_Tag_Visitor_Context { /** * Constructor. * - * @param OD_HTML_Tag_Processor $processor HTML tag processor. - * @param OD_URL_Metrics_Group_Collection $url_metrics_group_collection URL metrics group collection. - * @param OD_Link_Collection $link_collection Link collection. + * @param OD_HTML_Tag_Processor|OD_HTML_Processor $processor HTML tag processor. + * @param OD_URL_Metrics_Group_Collection $url_metrics_group_collection URL metrics group collection. + * @param OD_Link_Collection $link_collection Link collection. */ - public function __construct( OD_HTML_Tag_Processor $processor, OD_URL_Metrics_Group_Collection $url_metrics_group_collection, OD_Link_Collection $link_collection ) { + public function __construct( $processor, OD_URL_Metrics_Group_Collection $url_metrics_group_collection, OD_Link_Collection $link_collection ) { $this->processor = $processor; $this->url_metrics_group_collection = $url_metrics_group_collection; $this->link_collection = $link_collection; diff --git a/plugins/optimization-detective/optimization.php b/plugins/optimization-detective/optimization.php index 9f43b9127a..82f6420ef0 100644 --- a/plugins/optimization-detective/optimization.php +++ b/plugins/optimization-detective/optimization.php @@ -178,6 +178,7 @@ function od_optimize_template_output_buffer( string $buffer ): string { // If the initial tag is not an open HTML tag, then abort since the buffer is not a complete HTML document. $processor = new OD_HTML_Tag_Processor( $buffer ); + //$processor = new OD_HTML_Processor::create_full_parser( $buffer ); if ( ! ( $processor->next_tag() && ! $processor->is_tag_closer() && From 384b44c655014709bb4467aa0985270a5223bd09 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Wed, 18 Sep 2024 18:49:12 -0700 Subject: [PATCH 03/11] Try using WP_HTML_Processor in Optimization Detective --- .../class-od-html-processor.php | 418 ++++++++++++++++++ .../optimization-detective/optimization.php | 10 +- 2 files changed, 426 insertions(+), 2 deletions(-) create mode 100644 plugins/optimization-detective/class-od-html-processor.php diff --git a/plugins/optimization-detective/class-od-html-processor.php b/plugins/optimization-detective/class-od-html-processor.php new file mode 100644 index 0000000000..85bf370baf --- /dev/null +++ b/plugins/optimization-detective/class-od-html-processor.php @@ -0,0 +1,418 @@ +open_stack_tags` and + * `$this->open_stack_indices` whenever calling `self::set_bookmark()`. + * Then whenever `self::seek()` is called, the bookmarked open stacks are + * populated back into `$this->open_stack_tags` and `$this->open_stack_indices`. + * + * @since 0.4.0 + * @var array + */ + private $bookmarked_open_stacks = array(); + + /** + * XPath for the current tag. + * + * This is used so that repeated calls to {@see self::get_xpath()} won't needlessly reconstruct the string. This + * gets cleared whenever {@see self::open_tags()} iterates to the next tag. + * + * @since n.e.x.t + * @var string|null + */ + private $current_xpath = null; + + /** + * Mapping of bookmark name to a list of HTML strings which will be inserted at the time get_updated_html() is called. + * + * @since n.e.x.t + * @var array + */ + private $buffered_text_replacements = array(); + + /** + * Count for the number of times that the cursor was moved. + * + * @since n.e.x.t + * @var int + * @see self::next_token() + * @see self::seek() + */ + private $cursor_move_count = 0; + + /** + * Creates an HTML processor in the full parsing mode. + * + * It's likely that a fragment parser is more appropriate, unless sending an + * entire HTML document from start to finish. Consider a fragment parser with + * a context node of ``. + * + * Since UTF-8 is the only currently-accepted charset, if working with a + * document that isn't UTF-8, it's important to convert the document before + * creating the processor: pass in the converted HTML. + * + * @param string $html Input HTML document to process. + * @param string|null $known_definite_encoding Optional. If provided, specifies the charset used + * in the input byte stream. Currently must be UTF-8. + * @return static|null The created processor if successful, otherwise null. + */ + public static function create_full_parser( $html, $known_definite_encoding = 'UTF-8' ) { + return parent::create_full_parser( $html, $known_definite_encoding ); + } + + /** + * Finds the next open tag. + * + * @since n.e.x.t + * + * @return bool Whether a tag was matched. + */ + public function next_open_tag(): bool { + while ( $this->next_tag() ) { + if ( ! $this->is_tag_closer() ) { + return true; + } + } + return false; + } + + /** + * Finds the next token in the HTML document. + * + * @inheritDoc + * @since n.e.x.t + * + * @return bool Whether a token was parsed. + */ + public function next_token(): bool { + $this->current_xpath = null; // Clear cache. + ++$this->cursor_move_count; + if ( ! parent::next_token() ) { + return false; + } + + if ( $this->get_token_type() === '#tag' && $this->is_tag_closer() ) { + $tag_name = $this->get_tag(); + + // Set bookmarks for insertion of preload links and the detection script module. + if ( 'HEAD' === $tag_name ) { + $this->set_bookmark( self::END_OF_HEAD_BOOKMARK ); + } elseif ( 'BODY' === $tag_name ) { + $this->set_bookmark( self::END_OF_BODY_BOOKMARK ); + } + } + return true; + } + + /** + * Gets the number of times the cursor has moved. + * + * @todo Not needed once core short-circuits seek() when current cursor is the same as the sought-bookmark. + * + * @since n.e.x.t + * @see self::next_token() + * @see self::seek() + * + * @return int Count of times the cursor has moved. + */ + public function get_cursor_move_count(): int { + return $this->cursor_move_count; + } + + /** + * Updates or creates a new attribute on the currently matched tag with the passed value. + * + * @inheritDoc + * @since n.e.x.t + * + * @param string $name The attribute name to target. + * @param string|bool $value The new attribute value. + * @return bool Whether an attribute value was set. + */ + public function set_attribute( $name, $value ): bool { // phpcs:ignore SlevomatCodingStandard.TypeHints.ParameterTypeHint.MissingNativeTypeHint + $existing_value = $this->get_attribute( $name ); + $result = parent::set_attribute( $name, $value ); + if ( $result ) { + if ( is_string( $existing_value ) ) { + $this->set_meta_attribute( "replaced-{$name}", $existing_value ); + } else { + $this->set_meta_attribute( "added-{$name}", true ); + } + } + return $result; + } + + /** + * Sets a meta attribute. + * + * All meta attributes are prefixed with data-od-. + * + * @since n.e.x.t + * + * @param string $name Meta attribute name. + * @param string|true $value Value. + * @return bool Whether an attribute was set. + */ + public function set_meta_attribute( string $name, $value ): bool { + return parent::set_attribute( "data-od-{$name}", $value ); + } + + /** + * Removes an attribute from the currently-matched tag. + * + * @inheritDoc + * @since n.e.x.t + * + * @param string $name The attribute name to remove. + */ + public function remove_attribute( $name ): bool { // phpcs:ignore SlevomatCodingStandard.TypeHints.ParameterTypeHint.MissingNativeTypeHint + $old_value = $this->get_attribute( $name ); + $result = parent::remove_attribute( $name ); + if ( $result ) { + $this->set_meta_attribute( "removed-{$name}", is_string( $old_value ) ? $old_value : true ); + } + return $result; + } + + /** + * Move the internal cursor in the Tag Processor to a given bookmark's location. + * + * @inheritDoc + * @since 0.4.0 + * + * @param string $bookmark_name Jump to the place in the document identified by this bookmark name. + * @return bool Whether the internal cursor was successfully moved to the bookmark's location. + */ + public function seek( $bookmark_name ): bool { + $result = parent::seek( $bookmark_name ); + if ( $result ) { + $this->open_stack_tags = $this->bookmarked_open_stacks[ $bookmark_name ]['tags']; + $this->open_stack_indices = $this->bookmarked_open_stacks[ $bookmark_name ]['indices']; + } + return $result; + } + + /** + * Sets a bookmark in the HTML document. + * + * @inheritDoc + * @since 0.4.0 + * + * @param string $bookmark_name Identifies this particular bookmark. + * @return bool Whether the bookmark was successfully created. + */ + public function set_bookmark( $bookmark_name ): bool { + $result = parent::set_bookmark( $bookmark_name ); + if ( $result ) { + $this->bookmarked_open_stacks[ $bookmark_name ] = array( + 'tags' => $this->open_stack_tags, + 'indices' => $this->open_stack_indices, + ); + } + return $result; + } + + /** + * Removes a bookmark that is no longer needed. + * + * @inheritDoc + * @since n.e.x.t + * + * @param string $bookmark_name Name of the bookmark to remove. + * @return bool Whether the bookmark already existed before removal. + */ + public function release_bookmark( $bookmark_name ): bool { + if ( in_array( $bookmark_name, array( self::END_OF_HEAD_BOOKMARK, self::END_OF_BODY_BOOKMARK ), true ) ) { + $this->warn( + __METHOD__, + /* translators: %s is the bookmark name */ + sprintf( 'The %s bookmark is not allowed to be released.', 'optimization-detective' ) + ); + return false; + } + unset( $this->bookmarked_open_stacks[ $bookmark_name ] ); + return parent::release_bookmark( $bookmark_name ); + } + + /** + * Gets indexed breadcrumbs for the current open tag. + * + * A breadcrumb consists of a tag name and its sibling index. + * + * @since n.e.x.t + * + * @return Generator Breadcrumb. + */ + private function get_indexed_breadcrumbs(): Generator { + foreach ( $this->open_stack_tags as $i => $breadcrumb_tag_name ) { + yield array( $breadcrumb_tag_name, $this->open_stack_indices[ $i ] ); + } + } + + /** + * Gets XPath for the current open tag. + * + * It would be nicer if this were like `/html[1]/body[2]` but in XPath the position() here refers to the + * index of the preceding node set. So it has to rather be written `/*[1][self::html]/*[2][self::body]`. + * + * @since n.e.x.t + * + * @return string XPath. + */ + public function get_xpath(): string { + if ( null === $this->current_xpath ) { + $this->current_xpath = ''; + foreach ( $this->get_indexed_breadcrumbs() as list( $tag_name, $index ) ) { + $this->current_xpath .= sprintf( '/*[%d][self::%s]', $index + 1, $tag_name ); + } + } + return $this->current_xpath; + } + + /** + * Append HTML to the HEAD. + * + * The provided HTML must be valid! No validation is performed. + * + * @since n.e.x.t + * + * @param string $html HTML to inject. + */ + public function append_head_html( string $html ): void { + $this->buffered_text_replacements[ self::END_OF_HEAD_BOOKMARK ][] = $html; + } + + /** + * Append HTML to the BODY. + * + * The provided HTML must be valid! No validation is performed. + * + * @since n.e.x.t + * + * @param string $html HTML to inject. + */ + public function append_body_html( string $html ): void { + $this->buffered_text_replacements[ self::END_OF_BODY_BOOKMARK ][] = $html; + } + + /** + * Gets the final updated HTML. + * + * This should only be called after the closing HTML tag has been reached and just before + * calling {@see WP_HTML_Processor::get_updated_html()} to send the document back in the response. + * + * @since n.e.x.t + * + * @return string Final updated HTML. + */ + public function get_final_updated_html(): string { + foreach ( array_keys( $this->buffered_text_replacements ) as $bookmark ) { + $html_strings = $this->buffered_text_replacements[ $bookmark ]; + if ( count( $html_strings ) === 0 ) { + continue; + } + if ( ! $this->has_bookmark( $bookmark ) ) { + $this->warn( + __METHOD__, + sprintf( + /* translators: %s is the bookmark name */ + __( 'Unable to append markup to %s since the bookmark no longer exists.', 'optimization-detective' ), + $bookmark + ) + ); + } else { + $start = $this->bookmarks[ $bookmark ]->start; + + $this->lexical_updates[] = new WP_HTML_Text_Replacement( + $start, + 0, + implode( '', $html_strings ) + ); + + unset( $this->buffered_text_replacements[ $bookmark ] ); + } + } + + return parent::get_updated_html(); + } + + /** + * Warns of bad markup. + * + * @since n.e.x.t + * + * @param string $function_name Function name. + * @param string $message Warning message. + */ + private function warn( string $function_name, string $message ): void { + wp_trigger_error( + $function_name, + esc_html( $message ) + ); + } +} diff --git a/plugins/optimization-detective/optimization.php b/plugins/optimization-detective/optimization.php index 82f6420ef0..3efc224fa9 100644 --- a/plugins/optimization-detective/optimization.php +++ b/plugins/optimization-detective/optimization.php @@ -177,8 +177,14 @@ function od_optimize_template_output_buffer( string $buffer ): string { } // If the initial tag is not an open HTML tag, then abort since the buffer is not a complete HTML document. - $processor = new OD_HTML_Tag_Processor( $buffer ); - //$processor = new OD_HTML_Processor::create_full_parser( $buffer ); + if ( version_compare( get_bloginfo( 'version' ), strtok( '6.7', '-' ), '>=' ) ) { + $processor = OD_HTML_Processor::create_full_parser( $buffer ); + if ( null === $processor ) { + return $buffer; + } + } else { + $processor = new OD_HTML_Tag_Processor( $buffer ); + } if ( ! ( $processor->next_tag() && ! $processor->is_tag_closer() && From 28271242b87df0f79569978b90f132555736eeb6 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Fri, 20 Sep 2024 15:16:29 -0700 Subject: [PATCH 04/11] Reuse get_current_depth() --- .../optimization-detective/class-od-html-tag-processor.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/optimization-detective/class-od-html-tag-processor.php b/plugins/optimization-detective/class-od-html-tag-processor.php index e830c760e4..caebbce38a 100644 --- a/plugins/optimization-detective/class-od-html-tag-processor.php +++ b/plugins/optimization-detective/class-od-html-tag-processor.php @@ -284,11 +284,11 @@ public function next_token(): bool { $i = array_search( 'P', $this->open_stack_tags, true ); if ( false !== $i ) { array_splice( $this->open_stack_tags, (int) $i ); - array_splice( $this->open_stack_indices, count( $this->open_stack_tags ) ); + array_splice( $this->open_stack_indices, $this->get_current_depth() ); } } - $level = count( $this->open_stack_tags ); + $level = $this->get_current_depth(); $this->open_stack_tags[] = $tag_name; if ( ! isset( $this->open_stack_indices[ $level ] ) ) { From 67b3426abbcab37ab8ac488c0fdc46828cf3379c Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Mon, 23 Sep 2024 09:21:02 -0700 Subject: [PATCH 05/11] Fix including HTML Parser --- plugins/optimization-detective/load.php | 1 + plugins/optimization-detective/optimization.php | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/optimization-detective/load.php b/plugins/optimization-detective/load.php index 52902fb31f..49e977f76e 100644 --- a/plugins/optimization-detective/load.php +++ b/plugins/optimization-detective/load.php @@ -98,6 +98,7 @@ static function ( string $version ): void { // Core infrastructure classes. require_once __DIR__ . '/class-od-data-validation-exception.php'; require_once __DIR__ . '/class-od-html-tag-processor.php'; + require_once __DIR__ . '/class-od-html-processor.php'; require_once __DIR__ . '/class-od-url-metric.php'; require_once __DIR__ . '/class-od-strict-url-metric.php'; require_once __DIR__ . '/class-od-url-metrics-group.php'; diff --git a/plugins/optimization-detective/optimization.php b/plugins/optimization-detective/optimization.php index 3efc224fa9..2d32fb3968 100644 --- a/plugins/optimization-detective/optimization.php +++ b/plugins/optimization-detective/optimization.php @@ -177,7 +177,7 @@ function od_optimize_template_output_buffer( string $buffer ): string { } // If the initial tag is not an open HTML tag, then abort since the buffer is not a complete HTML document. - if ( version_compare( get_bloginfo( 'version' ), strtok( '6.7', '-' ), '>=' ) ) { + if ( version_compare( strtok( get_bloginfo( 'version' ), '-' ), '6.7', '>=' ) ) { $processor = OD_HTML_Processor::create_full_parser( $buffer ); if ( null === $processor ) { return $buffer; From 430f75d6669248b81973f1ef770379c8d69108e6 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Mon, 23 Sep 2024 09:21:19 -0700 Subject: [PATCH 06/11] WIP (stashed) --- .../class-od-html-processor.php | 121 ++++++++---------- 1 file changed, 55 insertions(+), 66 deletions(-) diff --git a/plugins/optimization-detective/class-od-html-processor.php b/plugins/optimization-detective/class-od-html-processor.php index 85bf370baf..aef65dd4b8 100644 --- a/plugins/optimization-detective/class-od-html-processor.php +++ b/plugins/optimization-detective/class-od-html-processor.php @@ -46,35 +46,24 @@ final class OD_HTML_Processor extends WP_HTML_Processor { */ const END_OF_BODY_BOOKMARK = 'optimization_detective_end_of_body'; - - /** - * Open stack tags. - * - * @since 0.4.0 - * @var string[] - */ - private $open_stack_tags = array(); - /** * Open stack indices. * - * @since 0.4.0 + * @since n.e.x.t * @var int[] */ private $open_stack_indices = array(); /** - * Bookmarked open stacks. + * Bookmarked open stack indices. * - * This is populated with the contents of `$this->open_stack_tags` and - * `$this->open_stack_indices` whenever calling `self::set_bookmark()`. - * Then whenever `self::seek()` is called, the bookmarked open stacks are - * populated back into `$this->open_stack_tags` and `$this->open_stack_indices`. + * This is populated with the contents of `$this->open_stack_indices` whenever calling `self::set_bookmark()`. Then + * whenever `self::seek()` is called, the bookmarked open stacks are populated back into `$this->open_stack_indices`. * - * @since 0.4.0 - * @var array + * @since n.e.x.t + * @var array */ - private $bookmarked_open_stacks = array(); + private $bookmarked_open_stack_indices = array(); /** * XPath for the current tag. @@ -106,24 +95,12 @@ final class OD_HTML_Processor extends WP_HTML_Processor { private $cursor_move_count = 0; /** - * Creates an HTML processor in the full parsing mode. - * - * It's likely that a fragment parser is more appropriate, unless sending an - * entire HTML document from start to finish. Consider a fragment parser with - * a context node of ``. - * - * Since UTF-8 is the only currently-accepted charset, if working with a - * document that isn't UTF-8, it's important to convert the document before - * creating the processor: pass in the converted HTML. + * Previous depth. * - * @param string $html Input HTML document to process. - * @param string|null $known_definite_encoding Optional. If provided, specifies the charset used - * in the input byte stream. Currently must be UTF-8. - * @return static|null The created processor if successful, otherwise null. + * @since n.e.x.t + * @var int */ - public static function create_full_parser( $html, $known_definite_encoding = 'UTF-8' ) { - return parent::create_full_parser( $html, $known_definite_encoding ); - } + private $previous_depth = -1; /** * Finds the next open tag. @@ -150,20 +127,47 @@ public function next_open_tag(): bool { * @return bool Whether a token was parsed. */ public function next_token(): bool { + $previous_depth = $this->previous_depth; + $current_depth = $this->get_current_depth(); + $this->previous_depth = $current_depth; + $this->current_xpath = null; // Clear cache. ++$this->cursor_move_count; if ( ! parent::next_token() ) { + $this->open_stack_indices = array(); return false; } - if ( $this->get_token_type() === '#tag' && $this->is_tag_closer() ) { - $tag_name = $this->get_tag(); + if ( $this->get_token_type() === '#tag' ) { + if ( $current_depth < $previous_depth ) { + array_splice( $this->open_stack_indices, $current_depth ); + } elseif ( ! isset( $this->open_stack_indices[ $current_depth ] ) ) { + $this->open_stack_indices[ $current_depth ] = 0; + } else { + ++$this->open_stack_indices[ $current_depth ]; + } - // Set bookmarks for insertion of preload links and the detection script module. - if ( 'HEAD' === $tag_name ) { - $this->set_bookmark( self::END_OF_HEAD_BOOKMARK ); - } elseif ( 'BODY' === $tag_name ) { - $this->set_bookmark( self::END_OF_BODY_BOOKMARK ); +// if ( $current_depth === 0 ) { +// echo '=========>' . $this->get_tag() . PHP_EOL; +// } + +// if ( $current_depth > $previous_depth ) { +// $this->open_stack_tags[] = $this->get_tag(); +// } elseif ( $current_depth < $previous_depth ) { +// array_splice( $this->open_stack_indices, $current_depth ); +// } else { +// +// } + + if ( $current_depth < $previous_depth ) { + $tag_name = $this->get_tag(); + + // Set bookmarks for insertion of preload links and the detection script module. + if ( 'HEAD' === $tag_name ) { + $this->set_bookmark( self::END_OF_HEAD_BOOKMARK ); + } elseif ( 'BODY' === $tag_name ) { + $this->set_bookmark( self::END_OF_BODY_BOOKMARK ); + } } } return true; @@ -251,8 +255,7 @@ public function remove_attribute( $name ): bool { // phpcs:ignore SlevomatCoding public function seek( $bookmark_name ): bool { $result = parent::seek( $bookmark_name ); if ( $result ) { - $this->open_stack_tags = $this->bookmarked_open_stacks[ $bookmark_name ]['tags']; - $this->open_stack_indices = $this->bookmarked_open_stacks[ $bookmark_name ]['indices']; + $this->open_stack_indices = $this->bookmarked_open_stack_indices[ $bookmark_name ]; } return $result; } @@ -269,10 +272,7 @@ public function seek( $bookmark_name ): bool { public function set_bookmark( $bookmark_name ): bool { $result = parent::set_bookmark( $bookmark_name ); if ( $result ) { - $this->bookmarked_open_stacks[ $bookmark_name ] = array( - 'tags' => $this->open_stack_tags, - 'indices' => $this->open_stack_indices, - ); + $this->bookmarked_open_stack_indices[ $bookmark_name ] = $this->open_stack_indices; } return $result; } @@ -295,25 +295,10 @@ public function release_bookmark( $bookmark_name ): bool { ); return false; } - unset( $this->bookmarked_open_stacks[ $bookmark_name ] ); + unset( $this->bookmarked_open_stack_indices[ $bookmark_name ] ); return parent::release_bookmark( $bookmark_name ); } - /** - * Gets indexed breadcrumbs for the current open tag. - * - * A breadcrumb consists of a tag name and its sibling index. - * - * @since n.e.x.t - * - * @return Generator Breadcrumb. - */ - private function get_indexed_breadcrumbs(): Generator { - foreach ( $this->open_stack_tags as $i => $breadcrumb_tag_name ) { - yield array( $breadcrumb_tag_name, $this->open_stack_indices[ $i ] ); - } - } - /** * Gets XPath for the current open tag. * @@ -327,7 +312,8 @@ private function get_indexed_breadcrumbs(): Generator { public function get_xpath(): string { if ( null === $this->current_xpath ) { $this->current_xpath = ''; - foreach ( $this->get_indexed_breadcrumbs() as list( $tag_name, $index ) ) { + foreach ( $this->get_breadcrumbs() ?? array() as $i => $tag_name ) { + $index = $this->open_stack_indices[ $i ] ?? 0; $this->current_xpath .= sprintf( '/*[%d][self::%s]', $index + 1, $tag_name ); } } @@ -376,7 +362,10 @@ public function get_final_updated_html(): string { if ( count( $html_strings ) === 0 ) { continue; } - if ( ! $this->has_bookmark( $bookmark ) ) { + + $actual_bookmark_name = "_{$bookmark}"; + + if ( ! isset( $this->bookmarks[ $actual_bookmark_name ] ) ) { $this->warn( __METHOD__, sprintf( @@ -386,7 +375,7 @@ public function get_final_updated_html(): string { ) ); } else { - $start = $this->bookmarks[ $bookmark ]->start; + $start = $this->bookmarks[ $actual_bookmark_name ]->start; $this->lexical_updates[] = new WP_HTML_Text_Replacement( $start, From ac91d57910319f59728fcffd70e7d7b55d52381a Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Fri, 18 Oct 2024 08:59:23 -0700 Subject: [PATCH 07/11] Pass context into helper methods instead of processor --- .../class-embed-optimizer-tag-visitor.php | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/plugins/embed-optimizer/class-embed-optimizer-tag-visitor.php b/plugins/embed-optimizer/class-embed-optimizer-tag-visitor.php index edccb15cf3..268a162eb2 100644 --- a/plugins/embed-optimizer/class-embed-optimizer-tag-visitor.php +++ b/plugins/embed-optimizer/class-embed-optimizer-tag-visitor.php @@ -31,14 +31,14 @@ final class Embed_Optimizer_Tag_Visitor { * * @since n.e.x.t * - * @param OD_HTML_Tag_Processor $processor Processor. + * @param OD_Tag_Visitor_Context $context Tag visitor context. * @return bool Whether at the tag. */ - private function is_embed_figure( OD_HTML_Tag_Processor $processor ): bool { + private function is_embed_figure( OD_Tag_Visitor_Context $context ): bool { return ( - 'FIGURE' === $processor->get_tag() + 'FIGURE' === $context->processor->get_tag() && - true === $processor->has_class( 'wp-block-embed' ) + true === $context->processor->has_class( 'wp-block-embed' ) ); } @@ -47,14 +47,14 @@ private function is_embed_figure( OD_HTML_Tag_Processor $processor ): bool { * * @since n.e.x.t * - * @param OD_HTML_Tag_Processor $processor Processor. + * @param OD_Tag_Visitor_Context $context Tag visitor context. * @return bool Whether the tag should be measured and stored in URL metrics */ - private function is_embed_wrapper( OD_HTML_Tag_Processor $processor ): bool { + private function is_embed_wrapper( OD_Tag_Visitor_Context $context ): bool { return ( - 'DIV' === $processor->get_tag() + 'DIV' === $context->processor->get_tag() && - true === $processor->has_class( 'wp-block-embed__wrapper' ) + true === $context->processor->has_class( 'wp-block-embed__wrapper' ) ); } @@ -73,12 +73,12 @@ public function __invoke( OD_Tag_Visitor_Context $context ): bool { * The only thing we need to do if it is a div.wp-block-embed__wrapper tag is return true so that the tag * will get measured and stored in the URL Metrics. */ - if ( $this->is_embed_wrapper( $processor ) ) { + if ( $this->is_embed_wrapper( $context ) ) { return true; } // Short-circuit if not a figure.wp-block-embed tag. - if ( ! $this->is_embed_figure( $processor ) ) { + if ( ! $this->is_embed_figure( $context ) ) { return false; } From defb2655281898d80ac7bce3dbb8c70b80325eab Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Fri, 18 Oct 2024 09:09:56 -0700 Subject: [PATCH 08/11] Address PHPStan errors --- plugins/optimization-detective/class-od-html-processor.php | 2 +- plugins/optimization-detective/optimization.php | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/optimization-detective/class-od-html-processor.php b/plugins/optimization-detective/class-od-html-processor.php index aef65dd4b8..b5da5fa393 100644 --- a/plugins/optimization-detective/class-od-html-processor.php +++ b/plugins/optimization-detective/class-od-html-processor.php @@ -128,7 +128,7 @@ public function next_open_tag(): bool { */ public function next_token(): bool { $previous_depth = $this->previous_depth; - $current_depth = $this->get_current_depth(); + $current_depth = $this->get_current_depth(); // @phpstan-ignore method.notFound (Not yet part of szepeviktor/phpstan-wordpress.) $this->previous_depth = $current_depth; $this->current_xpath = null; // Clear cache. diff --git a/plugins/optimization-detective/optimization.php b/plugins/optimization-detective/optimization.php index 2d32fb3968..b92e42f4c0 100644 --- a/plugins/optimization-detective/optimization.php +++ b/plugins/optimization-detective/optimization.php @@ -177,9 +177,9 @@ function od_optimize_template_output_buffer( string $buffer ): string { } // If the initial tag is not an open HTML tag, then abort since the buffer is not a complete HTML document. - if ( version_compare( strtok( get_bloginfo( 'version' ), '-' ), '6.7', '>=' ) ) { - $processor = OD_HTML_Processor::create_full_parser( $buffer ); - if ( null === $processor ) { + if ( version_compare( (string) strtok( get_bloginfo( 'version' ), '-' ), '6.7', '>=' ) ) { + $processor = OD_HTML_Processor::create_full_parser( $buffer ); // @phpstan-ignore staticMethod.notFound (Not yet part of szepeviktor/phpstan-wordpress.) + if ( ! ( $processor instanceof OD_HTML_Processor ) ) { return $buffer; } } else { From f825c2174ea5522de50381ace13016bac15b1f73 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Fri, 18 Oct 2024 12:46:29 -0700 Subject: [PATCH 09/11] Fix checking for non-HTML documents when using full HTML Processor --- plugins/optimization-detective/optimization.php | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/plugins/optimization-detective/optimization.php b/plugins/optimization-detective/optimization.php index b92e42f4c0..3803449393 100644 --- a/plugins/optimization-detective/optimization.php +++ b/plugins/optimization-detective/optimization.php @@ -178,6 +178,20 @@ function od_optimize_template_output_buffer( string $buffer ): string { // If the initial tag is not an open HTML tag, then abort since the buffer is not a complete HTML document. if ( version_compare( (string) strtok( get_bloginfo( 'version' ), '-' ), '6.7', '>=' ) ) { + /* + * When using the full HTML processor, we have to resort to using the tag processor first to determine if it is + * HTML since the HTML processor will wrap the non-HTML in HTML and BODY tags according to the HTML spec. + * TODO: De-duplicate this logic with the below 'HTML' root tag check. + */ + $tag_processor = new WP_HTML_Tag_Processor( $buffer ); + if ( ! ( + $tag_processor->next_tag() && + ! $tag_processor->is_tag_closer() && + 'HTML' === $tag_processor->get_tag() + ) ) { + return $buffer; + } + $processor = OD_HTML_Processor::create_full_parser( $buffer ); // @phpstan-ignore staticMethod.notFound (Not yet part of szepeviktor/phpstan-wordpress.) if ( ! ( $processor instanceof OD_HTML_Processor ) ) { return $buffer; From 62aeeb934d70442173d040329b9370ec1696e655 Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Fri, 18 Oct 2024 13:33:03 -0700 Subject: [PATCH 10/11] Remove needless sprintf --- plugins/optimization-detective/tests/test-cases/many-images.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/optimization-detective/tests/test-cases/many-images.php b/plugins/optimization-detective/tests/test-cases/many-images.php index a5526001cf..20cfa3f09e 100644 --- a/plugins/optimization-detective/tests/test-cases/many-images.php +++ b/plugins/optimization-detective/tests/test-cases/many-images.php @@ -24,7 +24,7 @@ static function () { $tags = array(); for ( $i = 1; $i < WP_HTML_Tag_Processor::MAX_SEEK_OPS + 1; $i++ ) { - $tags[] = sprintf( 'Foo' ); + $tags[] = 'Foo'; } return $tags; } From c61646b5e4f70cc317fdacfaa92ea8196d25ab6d Mon Sep 17 00:00:00 2001 From: Weston Ruter Date: Fri, 18 Oct 2024 16:05:23 -0700 Subject: [PATCH 11/11] Hack enough of HTML Processor to get it to work --- .../class-od-html-processor.php | 161 +++++++++++------- 1 file changed, 95 insertions(+), 66 deletions(-) diff --git a/plugins/optimization-detective/class-od-html-processor.php b/plugins/optimization-detective/class-od-html-processor.php index b5da5fa393..c2b55c13ba 100644 --- a/plugins/optimization-detective/class-od-html-processor.php +++ b/plugins/optimization-detective/class-od-html-processor.php @@ -50,7 +50,7 @@ final class OD_HTML_Processor extends WP_HTML_Processor { * Open stack indices. * * @since n.e.x.t - * @var int[] + * @var array */ private $open_stack_indices = array(); @@ -61,7 +61,7 @@ final class OD_HTML_Processor extends WP_HTML_Processor { * whenever `self::seek()` is called, the bookmarked open stacks are populated back into `$this->open_stack_indices`. * * @since n.e.x.t - * @var array + * @var array> */ private $bookmarked_open_stack_indices = array(); @@ -95,12 +95,97 @@ final class OD_HTML_Processor extends WP_HTML_Processor { private $cursor_move_count = 0; /** - * Previous depth. + * Constructor. * - * @since n.e.x.t - * @var int + * Do not use this method. Use the static creator methods instead. + * + * @access private + * + * @since n.e.x.t + * + * @see WP_HTML_Processor::create_fragment() + * + * @param string $html HTML to process. + * @param string|null $use_the_static_create_methods_instead This constructor should not be called manually. + * + * @throws ReflectionException When unable to access private properties. */ - private $previous_depth = -1; + public function __construct( string $html, ?string $use_the_static_create_methods_instead = null ) { + parent::__construct( $html, $use_the_static_create_methods_instead ); + + $html_processor_reflection = new ReflectionClass( WP_HTML_Processor::class ); + $state_property_reflection = $html_processor_reflection->getProperty( 'state' ); + $state_property_reflection->setAccessible( true ); + + /** + * State. + * + * @var WP_HTML_Processor_State $state + */ + $state = $state_property_reflection->getValue( $this ); + + $stack_of_open_elements_reflection = new ReflectionObject( $state->stack_of_open_elements ); + + $push_handler_reflection = $stack_of_open_elements_reflection->getProperty( 'push_handler' ); + $push_handler_reflection->setAccessible( true ); + $existing_push_handler = $push_handler_reflection->getValue( $state->stack_of_open_elements ); + + $pop_handler_reflection = $stack_of_open_elements_reflection->getProperty( 'pop_handler' ); + $pop_handler_reflection->setAccessible( true ); + $existing_pop_handler = $pop_handler_reflection->getValue( $state->stack_of_open_elements ); + + $state->stack_of_open_elements->set_push_handler( // @phpstan-ignore method.notFound (Not yet part of szepeviktor/phpstan-wordpress.) + function ( WP_HTML_Token $token ) use ( $existing_push_handler, $state ): void { + if ( $existing_push_handler instanceof Closure ) { + $existing_push_handler( $token ); + } + + if ( '#' !== $token->node_name[0] && 'html' !== $token->node_name ) { + $this->current_xpath = null; // Clear cache. + + $depth = $state->stack_of_open_elements->count(); + if ( ! isset( $this->open_stack_indices[ $depth ] ) ) { + $this->open_stack_indices[ $depth ] = array( + 'tag_name' => $token->node_name, + 'index' => 0, + ); + } else { + $this->open_stack_indices[ $depth ]['tag_name'] = $token->node_name; + ++$this->open_stack_indices[ $depth ]['index']; + } + } + } + ); + $state->stack_of_open_elements->set_pop_handler( // @phpstan-ignore method.notFound (Not yet part of szepeviktor/phpstan-wordpress.) + function ( WP_HTML_Token $token ) use ( $existing_pop_handler, $state ): void { + if ( $existing_pop_handler instanceof Closure ) { + $existing_pop_handler( $token ); + } + + if ( '#' !== $token->node_name[0] && 'html' !== $token->node_name ) { + $this->current_xpath = null; + + if ( count( $this->open_stack_indices ) > $state->stack_of_open_elements->count() + 1 ) { + array_pop( $this->open_stack_indices ); + } + } + + if ( 'HEAD' === $token->node_name ) { + $this->set_bookmark( self::END_OF_HEAD_BOOKMARK ); + } elseif ( 'BODY' === $token->node_name ) { + // TODO: This currently always fails because self::STATE_COMPLETE === $this->parser_state, so the below hack is required. + $this->set_bookmark( self::END_OF_BODY_BOOKMARK ); + } + } + ); + + // TODO: This is a hack! It's only needed because of a failure to set a bookmark when the BODY tag is popped above. + $body_end_position = strripos( $html, '' ); + if ( false === $body_end_position ) { + $body_end_position = strlen( $html ); + } + $this->bookmarks[ '_' . self::END_OF_BODY_BOOKMARK ] = new WP_HTML_Span( $body_end_position, 0 ); + } /** * Finds the next open tag. @@ -118,61 +203,6 @@ public function next_open_tag(): bool { return false; } - /** - * Finds the next token in the HTML document. - * - * @inheritDoc - * @since n.e.x.t - * - * @return bool Whether a token was parsed. - */ - public function next_token(): bool { - $previous_depth = $this->previous_depth; - $current_depth = $this->get_current_depth(); // @phpstan-ignore method.notFound (Not yet part of szepeviktor/phpstan-wordpress.) - $this->previous_depth = $current_depth; - - $this->current_xpath = null; // Clear cache. - ++$this->cursor_move_count; - if ( ! parent::next_token() ) { - $this->open_stack_indices = array(); - return false; - } - - if ( $this->get_token_type() === '#tag' ) { - if ( $current_depth < $previous_depth ) { - array_splice( $this->open_stack_indices, $current_depth ); - } elseif ( ! isset( $this->open_stack_indices[ $current_depth ] ) ) { - $this->open_stack_indices[ $current_depth ] = 0; - } else { - ++$this->open_stack_indices[ $current_depth ]; - } - -// if ( $current_depth === 0 ) { -// echo '=========>' . $this->get_tag() . PHP_EOL; -// } - -// if ( $current_depth > $previous_depth ) { -// $this->open_stack_tags[] = $this->get_tag(); -// } elseif ( $current_depth < $previous_depth ) { -// array_splice( $this->open_stack_indices, $current_depth ); -// } else { -// -// } - - if ( $current_depth < $previous_depth ) { - $tag_name = $this->get_tag(); - - // Set bookmarks for insertion of preload links and the detection script module. - if ( 'HEAD' === $tag_name ) { - $this->set_bookmark( self::END_OF_HEAD_BOOKMARK ); - } elseif ( 'BODY' === $tag_name ) { - $this->set_bookmark( self::END_OF_BODY_BOOKMARK ); - } - } - } - return true; - } - /** * Gets the number of times the cursor has moved. * @@ -247,7 +277,7 @@ public function remove_attribute( $name ): bool { // phpcs:ignore SlevomatCoding * Move the internal cursor in the Tag Processor to a given bookmark's location. * * @inheritDoc - * @since 0.4.0 + * @since n.e.x.t * * @param string $bookmark_name Jump to the place in the document identified by this bookmark name. * @return bool Whether the internal cursor was successfully moved to the bookmark's location. @@ -264,7 +294,7 @@ public function seek( $bookmark_name ): bool { * Sets a bookmark in the HTML document. * * @inheritDoc - * @since 0.4.0 + * @since n.e.x.t * * @param string $bookmark_name Identifies this particular bookmark. * @return bool Whether the bookmark was successfully created. @@ -312,9 +342,8 @@ public function release_bookmark( $bookmark_name ): bool { public function get_xpath(): string { if ( null === $this->current_xpath ) { $this->current_xpath = ''; - foreach ( $this->get_breadcrumbs() ?? array() as $i => $tag_name ) { - $index = $this->open_stack_indices[ $i ] ?? 0; - $this->current_xpath .= sprintf( '/*[%d][self::%s]', $index + 1, $tag_name ); + foreach ( $this->open_stack_indices as $level ) { + $this->current_xpath .= sprintf( '/*[%d][self::%s]', $level['index'] + 1, $level['tag_name'] ); } } return $this->current_xpath;