diff --git a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php index a430306d20..60ebe10d3c 100644 --- a/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php +++ b/packages/playground/data-liberation/src/import/WP_Topological_Sorter.php @@ -9,9 +9,8 @@ */ class WP_Topological_Sorter { - public $posts = array(); - public $categories = array(); - public $category_index = array(); + public $posts = array(); + public $categories = array(); /** * Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarly post ID. @@ -50,9 +49,8 @@ public function map_category( $byte_offset, $data ) { } $this->categories[ $data['slug'] ] = array( - 'parent' => $data['parent'], - 'byte_offset' => $byte_offset, - 'visited' => false, + array_key_exists( 'parent', $data ) ? $data['parent'] : '', + $byte_offset, ); } @@ -85,8 +83,12 @@ public function map_post( $byte_offset, $data ) { /** * Get the byte offset of an element, and remove it from the list. + * + * @param int $id The ID of the post to get the byte offset. + * + * @return int|bool The byte offset of the post, or false if the post is not found. */ - public function get_byte_offset( $id ) { + public function get_post_byte_offset( $id ) { if ( ! $this->sorted ) { return false; } @@ -97,7 +99,7 @@ public function get_byte_offset( $id ) { // Remove the element from the array. unset( $this->posts[ $id ] ); - if ( 0 === count( $this->posts ) ) { + if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) { // All posts have been processed. $this->reset(); } @@ -108,17 +110,44 @@ public function get_byte_offset( $id ) { return false; } + /** + * Get the byte offset of an element, and remove it from the list. + * + * @param string $slug The slug of the category to get the byte offset. + * + * @return int|bool The byte offset of the category, or false if the category is not found. + */ + public function get_category_byte_offset( $slug ) { + if ( ! $this->sorted ) { + return false; + } + + if ( isset( $this->categories[ $slug ] ) ) { + $ret = $this->categories[ $slug ]; + + // Remove the element from the array. + unset( $this->categories[ $slug ] ); + + if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) { + // All categories have been processed. + $this->reset(); + } + + return $ret; + } + + return false; + } + public function is_sorted() { return $this->sorted; } /** - * Sort posts topologically. + * Sort elements topologically. * - * Children posts should not be processed before their parent has been processed. - * This method sorts the posts in the order they should be processed. - * - * Sorted posts will be stored as attachments and posts/pages separately. + * Elements should not be processed before their parent has been processed. + * This method sorts the elements in the order they should be processed. */ public function sort_topologically( $free_space = true ) { foreach ( $this->categories as $slug => $category ) { @@ -126,6 +155,7 @@ public function sort_topologically( $free_space = true ) { } $this->sort_elements( $this->posts ); + $this->sort_elements( $this->categories ); // Free some space. if ( $free_space ) { @@ -136,6 +166,14 @@ public function sort_topologically( $free_space = true ) { // Save only the byte offset. $this->posts[ $id ] = $element[1]; } + + /** + * @TODO: all the elements that have not been moved can be flushed away. + */ + foreach ( $this->categories as $slug => $element ) { + // Save only the byte offset. + $this->categories[ $slug ] = $element[1]; + } } $this->sorted = true; diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php index 28079e416c..840a1805ef 100644 --- a/packages/playground/data-liberation/tests/WPStreamImporterTests.php +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -15,24 +15,23 @@ protected function setUp(): void { } } - /** - * @before + /** + * @before * * TODO: Run each test in a fresh Playground instance instead of sharing the global * state like this. - */ - public function clean_up_uploads(): void - { - $files = glob( '/wordpress/wp-content/uploads/*' ); - foreach( $files as $file ) { - if( is_dir( $file ) ) { - array_map( 'unlink', glob( "$file/*.*" ) ); - rmdir( $file ); - } else { - unlink( $file ); - } - } - } + */ + public function clean_up_uploads(): void { + $files = glob( '/wordpress/wp-content/uploads/*' ); + foreach ( $files as $file ) { + if ( is_dir( $file ) ) { + array_map( 'unlink', glob( "$file/*.*" ) ); + rmdir( $file ); + } else { + unlink( $file ); + } + } + } public function test_import_simple_wxr() { $import = data_liberation_import( __DIR__ . '/wxr/small-export.xml' ); @@ -44,7 +43,7 @@ public function test_frontloading() { $wxr_path = __DIR__ . '/wxr/frontloading-1-attachment.xml'; $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); $this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS ); - while( $importer->next_step() ) { + while ( $importer->next_step() ) { // noop } $files = glob( '/wordpress/wp-content/uploads/*' ); @@ -57,17 +56,17 @@ public function test_resume_frontloading() { $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); $this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS ); - $progress_url = null; + $progress_url = null; $progress_value = null; - for($i = 0; $i < 20; ++$i) { + for ( $i = 0; $i < 20; ++$i ) { $importer->next_step(); $progress = $importer->get_frontloading_progress(); - if( count( $progress ) === 0 ) { + if ( count( $progress ) === 0 ) { continue; } - $progress_url = array_keys( $progress )[0]; + $progress_url = array_keys( $progress )[0]; $progress_value = array_values( $progress )[0]; - if( null === $progress_value['received'] ) { + if ( null === $progress_value['received'] ) { continue; } break; @@ -78,22 +77,22 @@ public function test_resume_frontloading() { $this->assertEquals( 'https://wpthemetestdata.files.wordpress.com/2008/06/canola2.jpg', $progress_url ); $this->assertGreaterThan( 0, $progress_value['total'] ); - $cursor = $importer->get_reentrancy_cursor(); - $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, [], $cursor ); + $cursor = $importer->get_reentrancy_cursor(); + $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, array(), $cursor ); // Rewind back to the entity we were on. $this->assertTrue( $importer->next_step() ); // Restart the download of the same entity – from scratch. - $progress_value = []; - for($i = 0; $i < 20; ++$i) { + $progress_value = array(); + for ( $i = 0; $i < 20; ++$i ) { $importer->next_step(); $progress = $importer->get_frontloading_progress(); - if( count( $progress ) === 0 ) { + if ( count( $progress ) === 0 ) { continue; } - $progress_url = array_keys( $progress )[0]; + $progress_url = array_keys( $progress )[0]; $progress_value = array_values( $progress )[0]; - if( null === $progress_value['received'] ) { + if ( null === $progress_value['received'] ) { continue; } break; @@ -105,17 +104,17 @@ public function test_resume_frontloading() { } /** - * + * Test resume entity import. */ public function test_resume_entity_import() { $wxr_path = __DIR__ . '/wxr/entities-options-and-posts.xml'; $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); $this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_IMPORT_ENTITIES ); - for($i = 0; $i < 11; ++$i) { + for ( $i = 0; $i < 11; ++$i ) { $this->assertTrue( $importer->next_step() ); - $cursor = $importer->get_reentrancy_cursor(); - $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, [], $cursor ); + $cursor = $importer->get_reentrancy_cursor(); + $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, array(), $cursor ); // Rewind back to the entity we were on. // Note this means we may attempt to insert it twice. It's // the importer's job to detect that and skip the duplicate @@ -125,6 +124,18 @@ public function test_resume_entity_import() { $this->assertFalse( $importer->next_step() ); } + public function test_sort_categories() { + $wxr_path = __DIR__ . '/wxr/mixed-categories.xml'; + $importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path ); + $this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_TOPOLOGICAL_SORT ); + + while ( $importer->next_step() ) { + if ( $importer->get_next_stage() === WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS ) { + break; + } + } + } + private function skip_to_stage( WP_Stream_Importer $importer, string $stage ) { do { while ( $importer->next_step() ) { diff --git a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php index 9e176d5be2..e454496823 100644 --- a/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php +++ b/packages/playground/data-liberation/tests/WPTopologicalSorterTests.php @@ -23,8 +23,8 @@ public function test_parent_after_child() { $sorter->sort_topologically(); $this->assertEquals( array( 2 => 20, 1 => 10 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); - $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); + $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); + $this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) ); $this->assertFalse( $sorter->is_sorted() ); } @@ -37,7 +37,7 @@ public function test_child_after_parent() { $sorter->sort_topologically(); $this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); + $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); } public function test_orphaned_post() { @@ -48,8 +48,8 @@ public function test_orphaned_post() { $sorter->sort_topologically(); $this->assertEquals( array( 1 => 10, 2 => 20 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); - $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); + $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); + $this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) ); } public function test_chain_parent_child_after() { @@ -80,9 +80,9 @@ public function test_get_byte_offsets_consume_array() { $this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts ); - $this->assertEquals( 10, $sorter->get_byte_offset( 1 ) ); - $this->assertEquals( 20, $sorter->get_byte_offset( 2 ) ); - $this->assertEquals( 30, $sorter->get_byte_offset( 3 ) ); + $this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) ); + $this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) ); + $this->assertEquals( 30, $sorter->get_post_byte_offset( 3 ) ); $this->assertCount( 0, $sorter->posts ); } diff --git a/packages/playground/data-liberation/tests/wxr/mixed-categories.xml b/packages/playground/data-liberation/tests/wxr/mixed-categories.xml new file mode 100644 index 0000000000..ae74a7530e --- /dev/null +++ b/packages/playground/data-liberation/tests/wxr/mixed-categories.xml @@ -0,0 +1,82 @@ + + + + + Mixed Categories + https://playground.wordpress.net/scope:funny-chic-valley + + Fri, 29 Nov 2024 12:36:23 +0000 + en-US + 1.2 + https://playground.wordpress.net/scope:funny-chic-valley + https://playground.wordpress.net/scope:funny-chic-valley + + + 1 + + + + + + + + + 5 + + + + + + 1 + + + + + + 3 + + + + + + 2 + + + + + + 5 + + + + + + + 1 + + + + + + + 3 + + + + + + + 2 + + + + + + +