Skip to content

Commit fe6c27b

Browse files
committed
Tests: Add new assertEqualHTML assertion.
Add a new `assertEqualHTML` method to `WP_UnitTestClass` for tests comparing HTML (potentially including block markup). Internally, the assertion builds a deterministic tree string representation of the markup (using the HTML API) and compares the results. The format of the tree is inspired by the HTML5lib-tests tree format. It is extended with a special representation of block delimiters and their attributes. This format also makes it easier to visually spot the differences between the two strings if the assertion fails. Finally, this changeset updates `Tests_Dependencies_Scripts` to remove its `assertEqualMarkup` and `parse_markup_fragment` methods, and to use the newly introduced `assertEqualHTML` instead. Props bernhard-reiter, jonsurrell, dmsnell, jorbin, gziolo. Fixes #63527. git-svn-id: https://develop.svn.wordpress.org/trunk@60295 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 20e4872 commit fe6c27b

File tree

4 files changed

+725
-198
lines changed

4 files changed

+725
-198
lines changed

tests/phpunit/includes/abstract-testcase.php

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
<?php
22

3+
require_once __DIR__ . '/build-visual-html-tree.php';
34
require_once __DIR__ . '/factory.php';
45
require_once __DIR__ . '/trac.php';
56

@@ -13,7 +14,6 @@
1314
* All WordPress unit tests should inherit from this class.
1415
*/
1516
abstract class WP_UnitTestCase_Base extends PHPUnit_Adapter_TestCase {
16-
1717
protected static $forced_tickets = array();
1818
protected $expected_deprecated = array();
1919
protected $caught_deprecated = array();
@@ -1180,6 +1180,44 @@ public function assertQueryTrue( ...$prop ) {
11801180
}
11811181
}
11821182

1183+
/**
1184+
* Check HTML markup (including blocks) for semantic equivalence.
1185+
*
1186+
* Given two markup strings, assert that they translate to the same semantic HTML tree,
1187+
* normalizing tag names, attribute names, and attribute order. Furthermore, attributes
1188+
* and class names are sorted and deduplicated, and whitespace in style attributes
1189+
* is normalized. Finally, block delimiter comments are recognized and normalized,
1190+
* applying the same principles.
1191+
*
1192+
* @since 6.9.0
1193+
*
1194+
* @param string $expected The expected HTML.
1195+
* @param string $actual The actual HTML.
1196+
* @param string|null $fragment_context Optional. The fragment context, for example "<td>" expected HTML
1197+
* must occur within "<table><tr>" fragment context. Default "<body>".
1198+
* Only "<body>" or `null` are supported at this time.
1199+
* Set to `null` to parse a full HTML document.
1200+
* @param string|null $message Optional. The assertion error message.
1201+
*/
1202+
public function assertEqualHTML( string $expected, string $actual, ?string $fragment_context = '<body>', $message = 'HTML markup was not equivalent.' ): void {
1203+
try {
1204+
$tree_expected = build_visual_html_tree( $expected, $fragment_context );
1205+
$tree_actual = build_visual_html_tree( $actual, $fragment_context );
1206+
} catch ( Exception $e ) {
1207+
// For PHP 8.4+, we can retry, using the built-in DOM\HTMLDocument parser.
1208+
if ( class_exists( 'DOM\HtmlDocument' ) ) {
1209+
$dom_expected = DOM\HtmlDocument::createFromString( $expected, LIBXML_NOERROR );
1210+
$tree_expected = build_visual_html_tree( $dom_expected->saveHtml(), $fragment_context );
1211+
$dom_actual = DOM\HtmlDocument::createFromString( $actual, LIBXML_NOERROR );
1212+
$tree_actual = build_visual_html_tree( $dom_actual->saveHtml(), $fragment_context );
1213+
} else {
1214+
throw $e;
1215+
}
1216+
}
1217+
1218+
$this->assertSame( $tree_expected, $tree_actual, $message );
1219+
}
1220+
11831221
/**
11841222
* Helper function to convert a single-level array containing text strings to a named data provider.
11851223
*
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
<?php
2+
3+
/* phpcs:disable WordPress.Security.EscapeOutput.ExceptionNotEscaped */
4+
5+
/**
6+
* Generates representation of the semantic HTML tree structure.
7+
*
8+
* This is inspired by the representation used by the HTML5lib tests. It's been extended here for
9+
* blocks to render the semantic structure of blocks and their attributes.
10+
* The order of attributes and class names is normalized both for HTML tags and blocks,
11+
* as is the whitespace in HTML tags' style attribute.
12+
*
13+
* For example, consider the following block markup:
14+
*
15+
* <!-- wp:separator {"className":"is-style-default has-custom-classname","style":{"spacing":{"margin":{"top":"50px","bottom":"50px"}}},"backgroundColor":"accent-1"} -->
16+
* <hr class="wp-block-separator is-style-default has-custom-classname" style="margin-top: 50px; margin-bottom: 50px" />
17+
* <!-- /wp:separator -->
18+
*
19+
* This will be represented as:
20+
*
21+
* BLOCK["core/separator"]
22+
* {
23+
* "backgroundColor": "accent-1",
24+
* "className": "has-custom-classname is-style-default",
25+
* "style": {
26+
* "spacing": {
27+
* "margin": {
28+
* "top": "50px",
29+
* "bottom": "50px"
30+
* }
31+
* }
32+
* }
33+
* }
34+
* <hr>
35+
* class="has-custom-classname is-style-default wp-block-separator"
36+
* style="margin-top:50px;margin-bottom:50px;"
37+
*
38+
*
39+
* @see https://github.com/WordPress/wordpress-develop/blob/trunk/tests/phpunit/data/html5lib-tests/tree-construction/README.md
40+
*
41+
* @since 6.9.0
42+
*
43+
* @throws WP_HTML_Unsupported_Exception|Error If the markup could not be parsed.
44+
*
45+
* @param string $html Given test HTML.
46+
* @param string|null $fragment_context Context element in which to parse HTML, such as BODY or SVG.
47+
* @return string Tree structure of parsed HTML, if supported.
48+
*/
49+
function build_visual_html_tree( string $html, ?string $fragment_context ): string {
50+
$processor = $fragment_context
51+
? WP_HTML_Processor::create_fragment( $html, $fragment_context )
52+
: WP_HTML_Processor::create_full_parser( $html );
53+
if ( null === $processor ) {
54+
throw new Error( 'Could not create a parser.' );
55+
}
56+
$tree_indent = ' ';
57+
58+
$output = '';
59+
$indent_level = 0;
60+
$was_text = null;
61+
$text_node = '';
62+
63+
$block_context = array();
64+
65+
while ( $processor->next_token() ) {
66+
if ( null !== $processor->get_last_error() ) {
67+
break;
68+
}
69+
70+
$token_name = $processor->get_token_name();
71+
$token_type = $processor->get_token_type();
72+
$is_closer = $processor->is_tag_closer();
73+
74+
if ( $was_text && '#text' !== $token_name ) {
75+
if ( '' !== $text_node ) {
76+
$output .= "{$text_node}\"\n";
77+
}
78+
$was_text = false;
79+
$text_node = '';
80+
}
81+
82+
switch ( $token_type ) {
83+
case '#doctype':
84+
$doctype = $processor->get_doctype_info();
85+
$output .= "<!DOCTYPE {$doctype->name}";
86+
if ( null !== $doctype->public_identifier || null !== $doctype->system_identifier ) {
87+
$output .= " \"{$doctype->public_identifier}\" \"{$doctype->system_identifier}\"";
88+
}
89+
$output .= ">\n";
90+
break;
91+
92+
case '#tag':
93+
$namespace = $processor->get_namespace();
94+
$tag_name = 'html' === $namespace
95+
? strtolower( $processor->get_tag() )
96+
: "{$namespace} {$processor->get_qualified_tag_name()}";
97+
98+
if ( $is_closer ) {
99+
--$indent_level;
100+
101+
if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) {
102+
--$indent_level;
103+
}
104+
105+
break;
106+
}
107+
108+
$tag_indent = $indent_level;
109+
110+
if ( $processor->expects_closer() ) {
111+
++$indent_level;
112+
}
113+
114+
$output .= str_repeat( $tree_indent, $tag_indent ) . "<{$tag_name}>\n";
115+
116+
$attribute_names = $processor->get_attribute_names_with_prefix( '' );
117+
if ( $attribute_names ) {
118+
$sorted_attributes = array();
119+
foreach ( $attribute_names as $attribute_name ) {
120+
$sorted_attributes[ $attribute_name ] = $processor->get_qualified_attribute_name( $attribute_name );
121+
}
122+
123+
/*
124+
* Sorts attributes to match html5lib sort order.
125+
*
126+
* - First comes normal HTML attributes.
127+
* - Then come adjusted foreign attributes; these have spaces in their names.
128+
* - Finally come non-adjusted foreign attributes; these have a colon in their names.
129+
*
130+
* Example:
131+
*
132+
* From: <math xlink:author definitionurl xlink:title xlink:show>
133+
* Sorted: 'definitionURL', 'xlink show', 'xlink title', 'xlink:author'
134+
*/
135+
uasort(
136+
$sorted_attributes,
137+
static function ( $a, $b ) {
138+
$a_has_ns = str_contains( $a, ':' );
139+
$b_has_ns = str_contains( $b, ':' );
140+
141+
// Attributes with `:` should follow all other attributes.
142+
if ( $a_has_ns !== $b_has_ns ) {
143+
return $a_has_ns ? 1 : -1;
144+
}
145+
146+
$a_has_sp = str_contains( $a, ' ' );
147+
$b_has_sp = str_contains( $b, ' ' );
148+
149+
// Attributes with a namespace ' ' should come after those without.
150+
if ( $a_has_sp !== $b_has_sp ) {
151+
return $a_has_sp ? 1 : -1;
152+
}
153+
154+
return $a <=> $b;
155+
}
156+
);
157+
158+
foreach ( $sorted_attributes as $attribute_name => $display_name ) {
159+
$val = $processor->get_attribute( $attribute_name );
160+
/*
161+
* Attributes with no value are `true` with the HTML API,
162+
* we use the empty string value in the tree structure.
163+
*/
164+
if ( true === $val ) {
165+
$val = '';
166+
} elseif ( 'class' === $attribute_name ) {
167+
$class_names = iterator_to_array( $processor->class_list() );
168+
sort( $class_names, SORT_STRING );
169+
$val = implode( ' ', $class_names );
170+
} elseif ( 'style' === $attribute_name ) {
171+
$normalized_style = '';
172+
foreach ( explode( ';', $val ) as $style ) {
173+
if ( empty( trim( $style ) ) ) {
174+
continue;
175+
}
176+
list( $style_key, $style_val ) = explode( ':', $style );
177+
178+
$style_key = trim( $style_key );
179+
$style_val = trim( $style_val );
180+
181+
$normalized_style .= "{$style_key}:{$style_val};";
182+
}
183+
$val = $normalized_style;
184+
}
185+
$output .= str_repeat( $tree_indent, $tag_indent + 1 ) . "{$display_name}=\"{$val}\"\n";
186+
}
187+
}
188+
189+
// Self-contained tags contain their inner contents as modifiable text.
190+
$modifiable_text = $processor->get_modifiable_text();
191+
if ( '' !== $modifiable_text ) {
192+
$output .= str_repeat( $tree_indent, $tag_indent + 1 ) . "\"{$modifiable_text}\"\n";
193+
}
194+
195+
if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) {
196+
$output .= str_repeat( $tree_indent, $indent_level ) . "content\n";
197+
++$indent_level;
198+
}
199+
200+
break;
201+
202+
case '#cdata-section':
203+
case '#text':
204+
$text_content = $processor->get_modifiable_text();
205+
if ( '' === trim( $text_content, " \f\t\r\n" ) ) {
206+
break;
207+
}
208+
$was_text = true;
209+
if ( '' === $text_node ) {
210+
$text_node .= str_repeat( $tree_indent, $indent_level ) . '"';
211+
}
212+
$text_node .= $text_content;
213+
break;
214+
215+
case '#funky-comment':
216+
// Comments must be "<" then "!-- " then the data then " -->".
217+
$output .= str_repeat( $tree_indent, $indent_level ) . "<!-- {$processor->get_modifiable_text()} -->\n";
218+
break;
219+
220+
case '#comment':
221+
// Comments must be "<" then "!--" then the data then "-->".
222+
$comment = "<!--{$processor->get_full_comment_text()}-->";
223+
224+
// Maybe the comment is a block delimiter.
225+
$parser = new WP_Block_Parser();
226+
$parser->document = $comment;
227+
$parser->offset = 0;
228+
list( $delimiter_type, $block_name, $block_attrs, $start_offset, $token_length ) = $parser->next_token();
229+
230+
switch ( $delimiter_type ) {
231+
case 'block-opener':
232+
case 'void-block':
233+
$output .= str_repeat( $tree_indent, $indent_level ) . "BLOCK[\"{$block_name}\"]\n";
234+
235+
if ( 'block-opener' === $delimiter_type ) {
236+
$block_context[] = $block_name;
237+
++$indent_level;
238+
}
239+
240+
// If they're no attributes, we're done here.
241+
if ( empty( $block_attrs ) ) {
242+
break;
243+
}
244+
245+
// Normalize attribute order.
246+
ksort( $block_attrs, SORT_STRING );
247+
248+
if ( isset( $block_attrs['className'] ) ) {
249+
// Normalize class name order (and de-duplicate), as we need to be tolerant of different orders.
250+
// (Style attributes don't need this treatment, as they are parsed into a nested array.)
251+
$block_class_processor = new WP_HTML_Tag_Processor( '<div>' );
252+
$block_class_processor->next_token();
253+
$block_class_processor->set_attribute( 'class', $block_attrs['className'] );
254+
$class_names = iterator_to_array( $block_class_processor->class_list() );
255+
sort( $class_names, SORT_STRING );
256+
$block_attrs['className'] = implode( ' ', $class_names );
257+
}
258+
259+
$block_attrs = json_encode( $block_attrs, JSON_PRETTY_PRINT );
260+
// Fix indentation by "halving" it (2 spaces instead of 4).
261+
// Additionally, we need to indent each line by the current indentation level.
262+
$block_attrs = preg_replace( '/^( +)\1/m', str_repeat( $tree_indent, $indent_level ) . '$1', $block_attrs );
263+
// Finally, indent the first line, and the last line (with the closing curly brace).
264+
$output .= str_repeat( $tree_indent, $indent_level ) . substr( $block_attrs, 0, -1 ) . str_repeat( $tree_indent, $indent_level ) . "}\n";
265+
break;
266+
case 'block-closer':
267+
// Is this a closer for the currently open block?
268+
if ( ! empty( $block_context ) && end( $block_context ) === $block_name ) {
269+
// If it's a closer, we don't add it to the output.
270+
// Instead, we decrease indentation and remove the block from block context stack.
271+
--$indent_level;
272+
array_pop( $block_context );
273+
}
274+
break;
275+
default: // Not a block delimiter.
276+
$output .= str_repeat( $tree_indent, $indent_level ) . $comment . "\n";
277+
break;
278+
}
279+
break;
280+
default:
281+
// phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_var_export
282+
$serialized_token_type = var_export( $processor->get_token_type(), true );
283+
throw new Error( "Unhandled token type for tree construction: {$serialized_token_type}" );
284+
}
285+
}
286+
287+
if ( null !== $processor->get_unsupported_exception() ) {
288+
throw $processor->get_unsupported_exception();
289+
}
290+
291+
if ( null !== $processor->get_last_error() ) {
292+
throw new Error( "Parser error: {$processor->get_last_error()}" );
293+
}
294+
295+
if ( $processor->paused_at_incomplete_token() ) {
296+
throw new Error( 'Paused at incomplete token.' );
297+
}
298+
299+
if ( '' !== $text_node ) {
300+
$output .= "{$text_node}\"\n";
301+
}
302+
303+
return $output;
304+
}

0 commit comments

Comments
 (0)