diff --git a/CHANGELOG.md b/CHANGELOG.md index 067df39..073aa68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # Changelog -All notable changes to `Create PHP Package` will be documented in this file. +All notable changes to `WP Block Converter` will be documented in this file. -## 0.1.0 - 202X-XX-XX +## 1.0.0 - 2022-12-19 - Initial release diff --git a/README.md b/README.md index 3c00581..d566235 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,46 @@ composer require alleyinteractive/wp-block-converter ## Usage -Use this package like so: +Use this package like so to convert HTML into Gutenberg Blocks: ```php -$package = Alley\\Block_Converter\WP_Block_Converter\WP_Block_Converter(); -$package->perform_magic(); +use Alley\WP\Block_Converter\Block_Converter; + +$converter = new Block_Converter(); + +$blocks = $converter->convert( '

Some HTML

' ); +``` + +### Filtering the Blocks + +The blocks can be filtered on a block-by-block basis or for an entire HTML body. + +#### `wp_block_converter_block` + +Filter the generated block for a specific node. + +```php +use Alley\WP\Block_Converter\Block; + +add_filter( 'wp_block_converter_block', function ( Block $block, \DOMElement $node ): ?Block { + // Modify the block before it is serialized. + $block->content = '...'; + $block->blockName = '...'; + $block->attributes = [ ... ]; + + return $block; +}, 10, 2 ); +``` + +#### `wp_block_converter_html_content` + +Filter the generated blocks for an entire HTML body. + +```php +add_filter( 'wp_block_converter_document_html', function( string $blocks, \DOMNodeList $content ): string { + // ... + return $blocks; +}, 10, 2 ); ``` ## Changelog @@ -30,7 +65,7 @@ Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed re This project is actively maintained by [Alley Interactive](https://github.com/alleyinteractive). Like what you see? [Come work -with us](https://alley.co/careers/). +with us](https://alley.com/careers/). - [Sean Fisher](https://github.com/srtfisher) - [All Contributors](../../contributors) diff --git a/composer.json b/composer.json index 4f54a8b..82f3030 100644 --- a/composer.json +++ b/composer.json @@ -31,6 +31,11 @@ }, "sort-packages": true }, + "autoload": { + "files": [ + "src/helpers.php" + ] + }, "extra": { "wordpress-autoloader": { "autoload": { diff --git a/phpcs.xml b/phpcs.xml index a491aed..c35223b 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -38,6 +38,7 @@ + diff --git a/phpunit.xml b/phpunit.xml index 7d8c272..3965e9e 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -11,8 +11,5 @@ tests/feature - - tests/unit - diff --git a/src/class-block-converter.php b/src/class-block-converter.php new file mode 100644 index 0000000..83839ea --- /dev/null +++ b/src/class-block-converter.php @@ -0,0 +1,387 @@ +html ); + + // Bail early if is empty. + if ( empty( $content->item( 0 )->childNodes ) ) { + return ''; + } + + $html = []; + + foreach ( $content->item( 0 )->childNodes as $node ) { + if ( '#text' === $node->nodeName ) { + continue; + } + + /** + * Hook to allow output customizations. + * + * @since 1.0.0 + * + * @param Block|null $block The generated block object. + * @param DOMNode $node The node being converted. + */ + $tag_block = apply_filters( 'wp_block_converter_block', $this->{$node->nodeName}( $node ), $node ); + + // Bail early if is empty. + if ( empty( $tag_block ) ) { + continue; + } + + // Merge the block into the HTML collection. + $html[] = $this->minify_block( (string) $tag_block ); + } + + $html = implode( "\n\n", $html ); + + // Remove empty blocks. + $html = $this->remove_empty_blocks( $html ); + + /** + * Content converted into blocks. + * + * @since 1.0.0 + * + * @param string $html HTML converted into Gutenberg blocks. + * @param DOMNodeList $content The original DOMNodeList. + */ + return trim( (string) apply_filters( 'wp_block_converter_document_html', $html, $content ) ); + } + + /** + * Magic function to call parsers for specific HTML tags. + * + * @param string $name The tag name. + * @param array $arguments The DOMNode. + * @return Block|null + */ + public function __call( $name, $arguments ): ?Block { + return match ( $name ) { + 'ul' => $this->ul( $arguments[0] ), + 'ol' => $this->ol( $arguments[0] ), + 'img' => $this->img( $arguments[0] ), + 'blockquote' => $this->blockquote( $arguments[0] ), + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' => $this->h( $arguments[0] ), + 'p', 'a', 'abbr', 'b', 'code', 'em', 'i', 'strong', 'sub', 'sup', 'span', 'u' => $this->p( $arguments[0] ), + 'br', 'cite', 'source' => null, + default => $this->html( $arguments[0] ), + }; + } + + /** + * Magic function to convert to a string. + */ + public function __toString(): string { + return $this->convert(); + } + + /** + * Create heading blocks. + * + * @param DOMNode $node The node. + * @return Block|null + */ + protected function h( DOMNode $node ): ?Block { + $content = static::get_node_html( $node ); + + if ( empty( $content ) ) { + return null; + } + + return new Block( + block_name: 'heading', + attributes: [ + 'level' => absint( str_replace( 'h', '', $node->nodeName ) ), + ], + content: $content, + ); + } + + /** + * Create blockquote block. + * + * @param DOMNode $node The node. + * @return Block|null + */ + protected function blockquote( DOMNode $node ): ?Block { + $content = static::get_node_html( $node ); + + if ( empty( $content ) ) { + return null; + } + + return new Block( + block_name: 'quote', + attributes: [], + content: $content, + ); + } + + /** + * Create paragraph blocks. + * + * @param DOMNode $node The node. + * @return Block|null + */ + protected function p( DOMNode $node ): ?Block { + $content = static::get_node_html( $node ); + + if ( empty( $content ) ) { + return null; + } + + return new Block( + block_name: 'paragraph', + attributes: [], + content: $content, + ); + } + + /** + * Create ul blocks. + * + * @param DOMNode $node The node. + * @return Block + */ + protected function ul( DOMNode $node ): Block { + return new Block( + block_name: 'list', + content: static::get_node_html( $node ), + ); + } + + /** + * Create img blocks. + * + * @param DOMElement|DOMNode $element The node. + * @return Block|null + */ + protected function img( DOMElement|DOMNode $element ): ?Block { + if ( ! $element instanceof DOMElement ) { + return null; + } + + $image_src = $element->getAttribute( 'data-srcset' ); + $alt = $element->getAttribute( 'alt' ); + + if ( empty( $image_src ) && ! empty( $element->getAttribute( 'src' ) ) ) { + $image_src = $element->getAttribute( 'src' ); + } + + $image_src = $this->upload_image( $image_src, $alt ?? '' ); + + return new Block( + block_name: 'image', + content: sprintf( + '
%s
', + esc_url( $image_src ?? '' ), + esc_attr( $alt ?? '' ), + ), + ); + } + + /** + * Create ol blocks. + * + * @param DOMNode $node The node. + * @return block + */ + protected function ol( DOMNode $node ): Block { + return new Block( + block_name: 'list', + attributes: [ + 'ordered' => true, + ], + content: static::get_node_html( $node ), + ); + } + + /** + * Create HTML blocks. + * + * @param DOMNode $node The node. + * @return Block|null + */ + protected function html( DOMNode $node ): ?Block { + // Get the raw HTML. + $html = static::get_node_html( $node ); + + if ( empty( $html ) ) { + return null; + } + + return new Block( + block_name: 'html', + content: $html, + ); + } + + /** + * Get nodes from a specific tag. + * + * @param DOMNode $node The current DOMNode. + * @param string $tag The tag to search for. + * @return DOMNodeList The raw HTML. + */ + public static function get_nodes( DOMNode $node, $tag ) { + return static::get_node_tag_from_html( + static::get_node_html( $node ), + $tag + ); + } + + /** + * Get the raw HTML from a DOMNode node. + * + * @param DOMNode $node The current DOMNode. + * @return string The raw HTML. + */ + public static function get_node_html( DOMNode $node ): string { + return $node->ownerDocument->saveHTML( $node ); + } + + /** + * Get the HTML content. + * + * @param string $html The HTML content. + * @param string $tag The tag to search for. + * @return DOMNodeList The list of DOMNodes. + */ + public static function get_node_tag_from_html( $html, $tag = 'body' ) { + $dom = new \DOMDocument(); + + $errors = libxml_use_internal_errors( true ); + + $dom->loadHTML( '' . $html ); + + libxml_use_internal_errors( $errors ); + + return $dom->getElementsByTagName( $tag ); + } + + /** + * Removing whitespace between blocks + * + * @param string $block Gutenberg blocks. + * @return string + */ + protected function minify_block( $block ) { + if ( preg_match( '/(\s){2,}/s', $block ) === 1 ) { + return preg_replace( '/(\s){2,}/s', '', $block ); + } + + return $block; + } + + /** + * Quick way to remove all URL arguments. + * + * @param string $url URL. + * @return string + */ + public function remove_image_args( $url ): string { + // Split url. + $url_parts = wp_parse_url( $url ); + + return $url_parts['scheme'] . '://' . $url_parts['host'] . $url_parts['path']; + } + + /** + * Upload image. + * + * @param string $src Image url. + * @param string $alt Image alt. + * @return string + */ + public function upload_image( string $src, string $alt ): string { + // Remove all image arguments. + $src = $this->remove_image_args( $src ); + + return create_or_get_attachment_from_url( $src, [ 'alt' => $alt ] ); + } + + /** + * Remove any empty blocks. + * + * @param string $html The current HTML. + * @return string $html The new HTML. + */ + public function remove_empty_blocks( string $html ): string { + $html = str_replace( + [ +// phpcs:disable +' +
+', +' +
 
+', +' +
 
+', +' +
  
+', +'


', +'




', +' +


+', +' +
+', +' +

+

+', +// phpcs:enable + ], + '', + $html + ); + + return $this->remove_empty_p_blocks( $html ); + } + + /** + * Remove any empty p blocks. + * + * @param string $html The current HTML. + * @return string $html The new HTML. + */ + public function remove_empty_p_blocks( string $html ): string { + return \preg_replace( '/(\<\!\-\- wp\:paragraph \-\-\>[\s\n\r]*?\[\s\n\r]*?\<\/p\>[\s\n\r]*?\<\!\-\- \/wp\:paragraph \-\-\>)/', '', $html ); + } +} diff --git a/src/class-block.php b/src/class-block.php new file mode 100644 index 0000000..d99c0cb --- /dev/null +++ b/src/class-block.php @@ -0,0 +1,37 @@ +block_name, $this->attributes, $this->content ); + } + + /** + * Convert the block to HTML. + */ + public function __toString() { + return $this->render(); + } +} diff --git a/src/class-wp-block-converter.php b/src/class-wp-block-converter.php deleted file mode 100644 index a39a04b..0000000 --- a/src/class-wp-block-converter.php +++ /dev/null @@ -1,15 +0,0 @@ - 'ids', + 'meta_key' => $meta_key, + 'meta_value' => $src, // phpcs:ignore WordPress.DB.SlowDBQuery.slow_db_query_meta_value + 'post_status' => 'any', + 'post_type' => 'attachment', + 'posts_per_page' => 1, + 'suppress_filters' => false, + ] + ); + + if ( ! empty( $attachment_ids ) ) { + return array_shift( $attachment_ids ); + } + + if ( ! function_exists( 'media_sideload_image' ) ) { + require_once ABSPATH . 'wp-admin/includes/file.php'; + require_once ABSPATH . 'wp-admin/includes/image.php'; + require_once ABSPATH . 'wp-admin/includes/media.php'; + } + + $attachment_id = media_sideload_image( $src, $args['parent_post_id'] ?? 0, $args['description'] ?? '', 'id' ); + + if ( is_wp_error( $attachment_id ) ) { + return $attachment_id; + } + + // Store the original URL for future reference. + update_post_meta( $attachment_id, $meta_key, $src ); + + $postarr = [ + 'post_content' => $args['description'] ?? null, + 'post_excerpt' => $args['caption'] ?? null, + 'post_title' => $args['title'] ?? null, + 'meta_input' => array_merge( + (array) ( $args['meta'] ?? [] ), + [ + '_wp_attachment_image_alt' => $args['alt'] ?? null, + ], + ), + ]; + + // Update the rest of the arguments if they were passed. + if ( ! empty( array_filter( $postarr ) ) ) { + $postarr['ID'] = $attachment_id; + + \wp_update_post( $postarr ); + } + + return $attachment_id; +} diff --git a/tests/feature/test-block-converter.php b/tests/feature/test-block-converter.php new file mode 100644 index 0000000..a0c7b14 --- /dev/null +++ b/tests/feature/test-block-converter.php @@ -0,0 +1,160 @@ +Content to migrate

Heading 01

'; + $converter = new Block_Converter( $html ); + $block = $converter->convert(); + + $this->assertNotEmpty( $block ); + $this->assertEquals( +'

Content to migrate

+ +

Heading 01

', + $block, + ); + } + + public function test_convert_heading_h1_to_block() { + $html = '

Another content

'; + $converter = new Block_Converter( $html ); + $block = $converter->convert(); + + $this->assertNotEmpty( $block ); + $this->assertSame( + '' . $html . '', + $block, + ); + } + + public function test_convert_heading_h2_to_block() { + $html = '

Another content

'; + $converter = new Block_Converter( $html ); + $block = $converter->convert(); + + $this->assertNotEmpty( $block ); + $this->assertSame( + '' . $html . '', + $block, + ); + } + + public function test_convert_ol_to_block() { + $html = '
  1. Random content
  2. Another random content
'; + $converter = new Block_Converter( $html ); + $block = $converter->convert(); + + $this->assertNotEmpty( $block ); + $this->assertSame( + '' . $html . '', + $block, + ); + } + + public function test_convert_ul_to_block() { + $html = ''; + $converter = new Block_Converter( $html ); + $block = $converter->convert(); + + $this->assertNotEmpty( $block ); + $this->assertSame( + "{$html}", + $block, + ); + } + + public function test_convert_paragraphs_to_block() { + $converter = new Block_Converter( '

bar

' ); + $block = $converter->convert(); + + $this->assertNotEmpty( $block ); + $this->assertSame( + '

bar

', + $block, + ); + } + + public function test_convert_with_empty_paragraphs_to_block() { + $converter = new Block_Converter( '

bar

' ); + $block = $converter->convert(); + + $this->assertNotEmpty( $block ); + $this->assertSame( + '

bar

', + $block, + ); + } + + public function test_convert_with_empty_paragraphs_of_arbitrary_length_to_block() { + $arbitraryNewLines = str_repeat( "\n\r", mt_rand( 1, 1000) ); + $arbitrarySpaces = str_repeat( " ", mt_rand( 1, 1000 ) ); + + $converter = new Block_Converter( '

bar

' . $arbitrarySpaces . $arbitraryNewLines . '

' ); + $block = $converter->convert(); + + $this->assertNotEmpty( $block ); + $this->assertSame( + $block, + '

bar

', + ); + } + + public function test_convert_with_filter_override_single_tag() { + $this->expectApplied( 'wp_block_converter_document_html' )->once(); + + $html = '

Content to migrate

Heading 01

'; + + add_filter( + 'wp_block_converter_block', + function ( Block $block ) { + remove_all_filters( 'wp_block_converter_block' ); + + $block->content = 'Override content'; + + return $block; + } + ); + + + $converter = new Block_Converter( $html ); + $block = $converter->convert(); + + + $this->assertSame( + 'Override content + +

Heading 01

', + $block, + ); + } + + public function test_convert_with_filter_override_entire_content() { + $this->expectApplied( 'wp_block_converter_block' )->twice(); + $this->expectApplied( 'wp_block_converter_document_html' )->once(); + + $html = '

Content to migrate

Heading 01

'; + + add_filter( 'wp_block_converter_document_html', fn () => 'Override' ); + + $converter = new Block_Converter( $html ); + $block = $converter->convert(); + + $this->assertSame( 'Override', $block ); + } +} diff --git a/tests/feature/test-example-test.php b/tests/feature/test-example-test.php deleted file mode 100644 index 0eace7a..0000000 --- a/tests/feature/test-example-test.php +++ /dev/null @@ -1,14 +0,0 @@ -assertTrue( true ); - $this->assertNotEmpty( home_url() ); - } -} diff --git a/tests/unit/test-example-unit-test.php b/tests/unit/test-example-unit-test.php deleted file mode 100644 index adf88b7..0000000 --- a/tests/unit/test-example-unit-test.php +++ /dev/null @@ -1,13 +0,0 @@ -assertTrue(true); - } -}