diff --git a/CHANGELOG.md b/CHANGELOG.md
index 067df39..073aa68 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
# Changelog
-All notable changes to `Create PHP Package` will be documented in this file.
+All notable changes to `WP Block Converter` will be documented in this file.
-## 0.1.0 - 202X-XX-XX
+## 1.0.0 - 2022-12-19
- Initial release
diff --git a/README.md b/README.md
index 3c00581..d566235 100644
--- a/README.md
+++ b/README.md
@@ -15,11 +15,46 @@ composer require alleyinteractive/wp-block-converter
## Usage
-Use this package like so:
+Use this package like so to convert HTML into Gutenberg Blocks:
```php
-$package = Alley\\Block_Converter\WP_Block_Converter\WP_Block_Converter();
-$package->perform_magic();
+use Alley\WP\Block_Converter\Block_Converter;
+
+$converter = new Block_Converter();
+
+$blocks = $converter->convert( '
Some HTML
' );
+```
+
+### Filtering the Blocks
+
+The blocks can be filtered on a block-by-block basis or for an entire HTML body.
+
+#### `wp_block_converter_block`
+
+Filter the generated block for a specific node.
+
+```php
+use Alley\WP\Block_Converter\Block;
+
+add_filter( 'wp_block_converter_block', function ( Block $block, \DOMElement $node ): ?Block {
+ // Modify the block before it is serialized.
+ $block->content = '...';
+ $block->blockName = '...';
+ $block->attributes = [ ... ];
+
+ return $block;
+}, 10, 2 );
+```
+
+#### `wp_block_converter_html_content`
+
+Filter the generated blocks for an entire HTML body.
+
+```php
+add_filter( 'wp_block_converter_document_html', function( string $blocks, \DOMNodeList $content ): string {
+ // ...
+ return $blocks;
+}, 10, 2 );
```
## Changelog
@@ -30,7 +65,7 @@ Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed re
This project is actively maintained by [Alley
Interactive](https://github.com/alleyinteractive). Like what you see? [Come work
-with us](https://alley.co/careers/).
+with us](https://alley.com/careers/).
- [Sean Fisher](https://github.com/srtfisher)
- [All Contributors](../../contributors)
diff --git a/composer.json b/composer.json
index 4f54a8b..82f3030 100644
--- a/composer.json
+++ b/composer.json
@@ -31,6 +31,11 @@
},
"sort-packages": true
},
+ "autoload": {
+ "files": [
+ "src/helpers.php"
+ ]
+ },
"extra": {
"wordpress-autoloader": {
"autoload": {
diff --git a/phpcs.xml b/phpcs.xml
index a491aed..c35223b 100644
--- a/phpcs.xml
+++ b/phpcs.xml
@@ -38,6 +38,7 @@
+
diff --git a/phpunit.xml b/phpunit.xml
index 7d8c272..3965e9e 100644
--- a/phpunit.xml
+++ b/phpunit.xml
@@ -11,8 +11,5 @@
tests/feature
-
- tests/unit
-
diff --git a/src/class-block-converter.php b/src/class-block-converter.php
new file mode 100644
index 0000000..83839ea
--- /dev/null
+++ b/src/class-block-converter.php
@@ -0,0 +1,387 @@
+html );
+
+ // Bail early if is empty.
+ if ( empty( $content->item( 0 )->childNodes ) ) {
+ return '';
+ }
+
+ $html = [];
+
+ foreach ( $content->item( 0 )->childNodes as $node ) {
+ if ( '#text' === $node->nodeName ) {
+ continue;
+ }
+
+ /**
+ * Hook to allow output customizations.
+ *
+ * @since 1.0.0
+ *
+ * @param Block|null $block The generated block object.
+ * @param DOMNode $node The node being converted.
+ */
+ $tag_block = apply_filters( 'wp_block_converter_block', $this->{$node->nodeName}( $node ), $node );
+
+ // Bail early if is empty.
+ if ( empty( $tag_block ) ) {
+ continue;
+ }
+
+ // Merge the block into the HTML collection.
+ $html[] = $this->minify_block( (string) $tag_block );
+ }
+
+ $html = implode( "\n\n", $html );
+
+ // Remove empty blocks.
+ $html = $this->remove_empty_blocks( $html );
+
+ /**
+ * Content converted into blocks.
+ *
+ * @since 1.0.0
+ *
+ * @param string $html HTML converted into Gutenberg blocks.
+ * @param DOMNodeList $content The original DOMNodeList.
+ */
+ return trim( (string) apply_filters( 'wp_block_converter_document_html', $html, $content ) );
+ }
+
+ /**
+ * Magic function to call parsers for specific HTML tags.
+ *
+ * @param string $name The tag name.
+ * @param array $arguments The DOMNode.
+ * @return Block|null
+ */
+ public function __call( $name, $arguments ): ?Block {
+ return match ( $name ) {
+ 'ul' => $this->ul( $arguments[0] ),
+ 'ol' => $this->ol( $arguments[0] ),
+ 'img' => $this->img( $arguments[0] ),
+ 'blockquote' => $this->blockquote( $arguments[0] ),
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' => $this->h( $arguments[0] ),
+ 'p', 'a', 'abbr', 'b', 'code', 'em', 'i', 'strong', 'sub', 'sup', 'span', 'u' => $this->p( $arguments[0] ),
+ 'br', 'cite', 'source' => null,
+ default => $this->html( $arguments[0] ),
+ };
+ }
+
+ /**
+ * Magic function to convert to a string.
+ */
+ public function __toString(): string {
+ return $this->convert();
+ }
+
+ /**
+ * Create heading blocks.
+ *
+ * @param DOMNode $node The node.
+ * @return Block|null
+ */
+ protected function h( DOMNode $node ): ?Block {
+ $content = static::get_node_html( $node );
+
+ if ( empty( $content ) ) {
+ return null;
+ }
+
+ return new Block(
+ block_name: 'heading',
+ attributes: [
+ 'level' => absint( str_replace( 'h', '', $node->nodeName ) ),
+ ],
+ content: $content,
+ );
+ }
+
+ /**
+ * Create blockquote block.
+ *
+ * @param DOMNode $node The node.
+ * @return Block|null
+ */
+ protected function blockquote( DOMNode $node ): ?Block {
+ $content = static::get_node_html( $node );
+
+ if ( empty( $content ) ) {
+ return null;
+ }
+
+ return new Block(
+ block_name: 'quote',
+ attributes: [],
+ content: $content,
+ );
+ }
+
+ /**
+ * Create paragraph blocks.
+ *
+ * @param DOMNode $node The node.
+ * @return Block|null
+ */
+ protected function p( DOMNode $node ): ?Block {
+ $content = static::get_node_html( $node );
+
+ if ( empty( $content ) ) {
+ return null;
+ }
+
+ return new Block(
+ block_name: 'paragraph',
+ attributes: [],
+ content: $content,
+ );
+ }
+
+ /**
+ * Create ul blocks.
+ *
+ * @param DOMNode $node The node.
+ * @return Block
+ */
+ protected function ul( DOMNode $node ): Block {
+ return new Block(
+ block_name: 'list',
+ content: static::get_node_html( $node ),
+ );
+ }
+
+ /**
+ * Create img blocks.
+ *
+ * @param DOMElement|DOMNode $element The node.
+ * @return Block|null
+ */
+ protected function img( DOMElement|DOMNode $element ): ?Block {
+ if ( ! $element instanceof DOMElement ) {
+ return null;
+ }
+
+ $image_src = $element->getAttribute( 'data-srcset' );
+ $alt = $element->getAttribute( 'alt' );
+
+ if ( empty( $image_src ) && ! empty( $element->getAttribute( 'src' ) ) ) {
+ $image_src = $element->getAttribute( 'src' );
+ }
+
+ $image_src = $this->upload_image( $image_src, $alt ?? '' );
+
+ return new Block(
+ block_name: 'image',
+ content: sprintf(
+ '',
+ esc_url( $image_src ?? '' ),
+ esc_attr( $alt ?? '' ),
+ ),
+ );
+ }
+
+ /**
+ * Create ol blocks.
+ *
+ * @param DOMNode $node The node.
+ * @return block
+ */
+ protected function ol( DOMNode $node ): Block {
+ return new Block(
+ block_name: 'list',
+ attributes: [
+ 'ordered' => true,
+ ],
+ content: static::get_node_html( $node ),
+ );
+ }
+
+ /**
+ * Create HTML blocks.
+ *
+ * @param DOMNode $node The node.
+ * @return Block|null
+ */
+ protected function html( DOMNode $node ): ?Block {
+ // Get the raw HTML.
+ $html = static::get_node_html( $node );
+
+ if ( empty( $html ) ) {
+ return null;
+ }
+
+ return new Block(
+ block_name: 'html',
+ content: $html,
+ );
+ }
+
+ /**
+ * Get nodes from a specific tag.
+ *
+ * @param DOMNode $node The current DOMNode.
+ * @param string $tag The tag to search for.
+ * @return DOMNodeList The raw HTML.
+ */
+ public static function get_nodes( DOMNode $node, $tag ) {
+ return static::get_node_tag_from_html(
+ static::get_node_html( $node ),
+ $tag
+ );
+ }
+
+ /**
+ * Get the raw HTML from a DOMNode node.
+ *
+ * @param DOMNode $node The current DOMNode.
+ * @return string The raw HTML.
+ */
+ public static function get_node_html( DOMNode $node ): string {
+ return $node->ownerDocument->saveHTML( $node );
+ }
+
+ /**
+ * Get the HTML content.
+ *
+ * @param string $html The HTML content.
+ * @param string $tag The tag to search for.
+ * @return DOMNodeList The list of DOMNodes.
+ */
+ public static function get_node_tag_from_html( $html, $tag = 'body' ) {
+ $dom = new \DOMDocument();
+
+ $errors = libxml_use_internal_errors( true );
+
+ $dom->loadHTML( '' . $html );
+
+ libxml_use_internal_errors( $errors );
+
+ return $dom->getElementsByTagName( $tag );
+ }
+
+ /**
+ * Removing whitespace between blocks
+ *
+ * @param string $block Gutenberg blocks.
+ * @return string
+ */
+ protected function minify_block( $block ) {
+ if ( preg_match( '/(\s){2,}/s', $block ) === 1 ) {
+ return preg_replace( '/(\s){2,}/s', '', $block );
+ }
+
+ return $block;
+ }
+
+ /**
+ * Quick way to remove all URL arguments.
+ *
+ * @param string $url URL.
+ * @return string
+ */
+ public function remove_image_args( $url ): string {
+ // Split url.
+ $url_parts = wp_parse_url( $url );
+
+ return $url_parts['scheme'] . '://' . $url_parts['host'] . $url_parts['path'];
+ }
+
+ /**
+ * Upload image.
+ *
+ * @param string $src Image url.
+ * @param string $alt Image alt.
+ * @return string
+ */
+ public function upload_image( string $src, string $alt ): string {
+ // Remove all image arguments.
+ $src = $this->remove_image_args( $src );
+
+ return create_or_get_attachment_from_url( $src, [ 'alt' => $alt ] );
+ }
+
+ /**
+ * Remove any empty blocks.
+ *
+ * @param string $html The current HTML.
+ * @return string $html The new HTML.
+ */
+ public function remove_empty_blocks( string $html ): string {
+ $html = str_replace(
+ [
+// phpcs:disable
+'
+
+',
+'
+
+',
+'
+
+',
+'
+
+',
+'
',
+'
',
+'
+
+',
+'
+
+',
+'
+
+
+',
+// phpcs:enable
+ ],
+ '',
+ $html
+ );
+
+ return $this->remove_empty_p_blocks( $html );
+ }
+
+ /**
+ * Remove any empty p blocks.
+ *
+ * @param string $html The current HTML.
+ * @return string $html The new HTML.
+ */
+ public function remove_empty_p_blocks( string $html ): string {
+ return \preg_replace( '/(\<\!\-\- wp\:paragraph \-\-\>[\s\n\r]*?\