Skip to content

Commit

Permalink
Add support for categories
Browse files Browse the repository at this point in the history
  • Loading branch information
zaerl committed Nov 29, 2024
1 parent ca5eab7 commit 495be8b
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
*/
class WP_Topological_Sorter {

public $posts = array();
public $categories = array();
public $category_index = array();
public $posts = array();
public $categories = array();

/**
* Variable for keeping counts of orphaned posts/attachments, it'll also be assigned as temporarly post ID.
Expand Down Expand Up @@ -50,9 +49,8 @@ public function map_category( $byte_offset, $data ) {
}

$this->categories[ $data['slug'] ] = array(
'parent' => $data['parent'],
'byte_offset' => $byte_offset,
'visited' => false,
array_key_exists( 'parent', $data ) ? $data['parent'] : '',
$byte_offset,
);
}

Expand Down Expand Up @@ -85,8 +83,12 @@ public function map_post( $byte_offset, $data ) {

/**
* Get the byte offset of an element, and remove it from the list.
*
* @param int $id The ID of the post to get the byte offset.
*
* @return int|bool The byte offset of the post, or false if the post is not found.
*/
public function get_byte_offset( $id ) {
public function get_post_byte_offset( $id ) {
if ( ! $this->sorted ) {
return false;
}
Expand All @@ -97,7 +99,7 @@ public function get_byte_offset( $id ) {
// Remove the element from the array.
unset( $this->posts[ $id ] );

if ( 0 === count( $this->posts ) ) {
if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) {
// All posts have been processed.
$this->reset();
}
Expand All @@ -108,24 +110,52 @@ public function get_byte_offset( $id ) {
return false;
}

/**
* Get the byte offset of an element, and remove it from the list.
*
* @param string $slug The slug of the category to get the byte offset.
*
* @return int|bool The byte offset of the category, or false if the category is not found.
*/
public function get_category_byte_offset( $slug ) {
if ( ! $this->sorted ) {
return false;
}

if ( isset( $this->categories[ $slug ] ) ) {
$ret = $this->categories[ $slug ];

// Remove the element from the array.
unset( $this->categories[ $slug ] );

if ( 0 === count( $this->categories ) && 0 === count( $this->posts ) ) {
// All categories have been processed.
$this->reset();
}

return $ret;
}

return false;
}

public function is_sorted() {
return $this->sorted;
}

/**
* Sort posts topologically.
* Sort elements topologically.
*
* Children posts should not be processed before their parent has been processed.
* This method sorts the posts in the order they should be processed.
*
* Sorted posts will be stored as attachments and posts/pages separately.
* Elements should not be processed before their parent has been processed.
* This method sorts the elements in the order they should be processed.
*/
public function sort_topologically( $free_space = true ) {
foreach ( $this->categories as $slug => $category ) {
$this->topological_category_sort( $slug, $category );
}

$this->sort_elements( $this->posts );
$this->sort_elements( $this->categories );

// Free some space.
if ( $free_space ) {
Expand All @@ -136,6 +166,14 @@ public function sort_topologically( $free_space = true ) {
// Save only the byte offset.
$this->posts[ $id ] = $element[1];
}

/**
* @TODO: all the elements that have not been moved can be flushed away.
*/
foreach ( $this->categories as $slug => $element ) {
// Save only the byte offset.
$this->categories[ $slug ] = $element[1];
}
}

$this->sorted = true;
Expand Down
75 changes: 43 additions & 32 deletions packages/playground/data-liberation/tests/WPStreamImporterTests.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,23 @@ protected function setUp(): void {
}
}

/**
* @before
/**
* @before
*
* TODO: Run each test in a fresh Playground instance instead of sharing the global
* state like this.
*/
public function clean_up_uploads(): void
{
$files = glob( '/wordpress/wp-content/uploads/*' );
foreach( $files as $file ) {
if( is_dir( $file ) ) {
array_map( 'unlink', glob( "$file/*.*" ) );
rmdir( $file );
} else {
unlink( $file );
}
}
}
*/
public function clean_up_uploads(): void {
$files = glob( '/wordpress/wp-content/uploads/*' );
foreach ( $files as $file ) {
if ( is_dir( $file ) ) {
array_map( 'unlink', glob( "$file/*.*" ) );
rmdir( $file );
} else {
unlink( $file );
}
}
}

public function test_import_simple_wxr() {
$import = data_liberation_import( __DIR__ . '/wxr/small-export.xml' );
Expand All @@ -44,7 +43,7 @@ public function test_frontloading() {
$wxr_path = __DIR__ . '/wxr/frontloading-1-attachment.xml';
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path );
$this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS );
while( $importer->next_step() ) {
while ( $importer->next_step() ) {
// noop
}
$files = glob( '/wordpress/wp-content/uploads/*' );
Expand All @@ -57,17 +56,17 @@ public function test_resume_frontloading() {
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path );
$this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS );

$progress_url = null;
$progress_url = null;
$progress_value = null;
for($i = 0; $i < 20; ++$i) {
for ( $i = 0; $i < 20; ++$i ) {
$importer->next_step();
$progress = $importer->get_frontloading_progress();
if( count( $progress ) === 0 ) {
if ( count( $progress ) === 0 ) {
continue;
}
$progress_url = array_keys( $progress )[0];
$progress_url = array_keys( $progress )[0];
$progress_value = array_values( $progress )[0];
if( null === $progress_value['received'] ) {
if ( null === $progress_value['received'] ) {
continue;
}
break;
Expand All @@ -78,22 +77,22 @@ public function test_resume_frontloading() {
$this->assertEquals( 'https://wpthemetestdata.files.wordpress.com/2008/06/canola2.jpg', $progress_url );
$this->assertGreaterThan( 0, $progress_value['total'] );

$cursor = $importer->get_reentrancy_cursor();
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, [], $cursor );
$cursor = $importer->get_reentrancy_cursor();
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, array(), $cursor );
// Rewind back to the entity we were on.
$this->assertTrue( $importer->next_step() );

// Restart the download of the same entity – from scratch.
$progress_value = [];
for($i = 0; $i < 20; ++$i) {
$progress_value = array();
for ( $i = 0; $i < 20; ++$i ) {
$importer->next_step();
$progress = $importer->get_frontloading_progress();
if( count( $progress ) === 0 ) {
if ( count( $progress ) === 0 ) {
continue;
}
$progress_url = array_keys( $progress )[0];
$progress_url = array_keys( $progress )[0];
$progress_value = array_values( $progress )[0];
if( null === $progress_value['received'] ) {
if ( null === $progress_value['received'] ) {
continue;
}
break;
Expand All @@ -105,17 +104,17 @@ public function test_resume_frontloading() {
}

/**
*
* Test resume entity import.
*/
public function test_resume_entity_import() {
$wxr_path = __DIR__ . '/wxr/entities-options-and-posts.xml';
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path );
$this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_IMPORT_ENTITIES );

for($i = 0; $i < 11; ++$i) {
for ( $i = 0; $i < 11; ++$i ) {
$this->assertTrue( $importer->next_step() );
$cursor = $importer->get_reentrancy_cursor();
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, [], $cursor );
$cursor = $importer->get_reentrancy_cursor();
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path, array(), $cursor );
// Rewind back to the entity we were on.
// Note this means we may attempt to insert it twice. It's
// the importer's job to detect that and skip the duplicate
Expand All @@ -125,6 +124,18 @@ public function test_resume_entity_import() {
$this->assertFalse( $importer->next_step() );
}

public function test_sort_categories() {
$wxr_path = __DIR__ . '/wxr/mixed-categories.xml';
$importer = WP_Stream_Importer::create_for_wxr_file( $wxr_path );
$this->skip_to_stage( $importer, WP_Stream_Importer::STAGE_TOPOLOGICAL_SORT );

while ( $importer->next_step() ) {
if ( $importer->get_next_stage() === WP_Stream_Importer::STAGE_FRONTLOAD_ASSETS ) {
break;
}
}
}

private function skip_to_stage( WP_Stream_Importer $importer, string $stage ) {
do {
while ( $importer->next_step() ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ public function test_parent_after_child() {
$sorter->sort_topologically();

$this->assertEquals( array( 2 => 20, 1 => 10 ), $sorter->posts );
$this->assertEquals( 10, $sorter->get_byte_offset( 1 ) );
$this->assertEquals( 20, $sorter->get_byte_offset( 2 ) );
$this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) );
$this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) );
$this->assertFalse( $sorter->is_sorted() );
}

Expand All @@ -37,7 +37,7 @@ public function test_child_after_parent() {
$sorter->sort_topologically();

$this->assertEquals( array( 1 => 10, 2 => 20, 3 => 30 ), $sorter->posts );
$this->assertEquals( 10, $sorter->get_byte_offset( 1 ) );
$this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) );
}

public function test_orphaned_post() {
Expand All @@ -48,8 +48,8 @@ public function test_orphaned_post() {
$sorter->sort_topologically();

$this->assertEquals( array( 1 => 10, 2 => 20 ), $sorter->posts );
$this->assertEquals( 10, $sorter->get_byte_offset( 1 ) );
$this->assertEquals( 20, $sorter->get_byte_offset( 2 ) );
$this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) );
$this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) );
}

public function test_chain_parent_child_after() {
Expand Down Expand Up @@ -80,9 +80,9 @@ public function test_get_byte_offsets_consume_array() {

$this->assertEquals( array( 3 => 30, 2 => 20, 1 => 10 ), $sorter->posts );

$this->assertEquals( 10, $sorter->get_byte_offset( 1 ) );
$this->assertEquals( 20, $sorter->get_byte_offset( 2 ) );
$this->assertEquals( 30, $sorter->get_byte_offset( 3 ) );
$this->assertEquals( 10, $sorter->get_post_byte_offset( 1 ) );
$this->assertEquals( 20, $sorter->get_post_byte_offset( 2 ) );
$this->assertEquals( 30, $sorter->get_post_byte_offset( 3 ) );
$this->assertCount( 0, $sorter->posts );
}

Expand Down
82 changes: 82 additions & 0 deletions packages/playground/data-liberation/tests/wxr/mixed-categories.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/"
>

<channel>
<title>Mixed Categories</title>
<link>https://playground.wordpress.net/scope:funny-chic-valley</link>
<description></description>
<pubDate>Fri, 29 Nov 2024 12:36:23 +0000</pubDate>
<language>en-US</language>
<wp:wxr_version>1.2</wp:wxr_version>
<wp:base_site_url>https://playground.wordpress.net/scope:funny-chic-valley</wp:base_site_url>
<wp:base_blog_url>https://playground.wordpress.net/scope:funny-chic-valley</wp:base_blog_url>

<wp:author>
<wp:author_id>1</wp:author_id>
<wp:author_login><![CDATA[admin]]></wp:author_login>
<wp:author_email><![CDATA[[email protected]]]></wp:author_email>
<wp:author_display_name><![CDATA[admin]]></wp:author_display_name>
<wp:author_first_name><![CDATA[]]></wp:author_first_name>
<wp:author_last_name><![CDATA[]]></wp:author_last_name>
</wp:author>

<wp:category>
<wp:term_id>5</wp:term_id>
<wp:category_nicename><![CDATA[parent-of-previous-category]]></wp:category_nicename>
<wp:category_parent><![CDATA[]]></wp:category_parent>
<wp:cat_name><![CDATA[Parent of previous category]]></wp:cat_name>
</wp:category>
<wp:category>
<wp:term_id>1</wp:term_id>
<wp:category_nicename><![CDATA[uncategorized]]></wp:category_nicename>
<wp:category_parent><![CDATA[]]></wp:category_parent>
<wp:cat_name><![CDATA[Test #1]]></wp:cat_name>
</wp:category>
<wp:category>
<wp:term_id>3</wp:term_id>
<wp:category_nicename><![CDATA[test-3]]></wp:category_nicename>
<wp:category_parent><![CDATA[uncategorized]]></wp:category_parent>
<wp:cat_name><![CDATA[Test #3]]></wp:cat_name>
</wp:category>
<wp:category>
<wp:term_id>2</wp:term_id>
<wp:category_nicename><![CDATA[test-child]]></wp:category_nicename>
<wp:category_parent><![CDATA[parent-of-previous-category]]></wp:category_parent>
<wp:cat_name><![CDATA[Test child]]></wp:cat_name>
</wp:category>
<wp:term>
<wp:term_id>5</wp:term_id>
<wp:term_taxonomy><![CDATA[category]]></wp:term_taxonomy>
<wp:term_slug><![CDATA[parent-of-previous-category]]></wp:term_slug>
<wp:term_parent><![CDATA[]]></wp:term_parent>
<wp:term_name><![CDATA[Parent of previous category]]></wp:term_name>
</wp:term>
<wp:term>
<wp:term_id>1</wp:term_id>
<wp:term_taxonomy><![CDATA[category]]></wp:term_taxonomy>
<wp:term_slug><![CDATA[uncategorized]]></wp:term_slug>
<wp:term_parent><![CDATA[]]></wp:term_parent>
<wp:term_name><![CDATA[Test #1]]></wp:term_name>
</wp:term>
<wp:term>
<wp:term_id>3</wp:term_id>
<wp:term_taxonomy><![CDATA[category]]></wp:term_taxonomy>
<wp:term_slug><![CDATA[test-3]]></wp:term_slug>
<wp:term_parent><![CDATA[uncategorized]]></wp:term_parent>
<wp:term_name><![CDATA[Test #3]]></wp:term_name>
</wp:term>
<wp:term>
<wp:term_id>2</wp:term_id>
<wp:term_taxonomy><![CDATA[category]]></wp:term_taxonomy>
<wp:term_slug><![CDATA[test-child]]></wp:term_slug>
<wp:term_parent><![CDATA[parent-of-previous-category]]></wp:term_parent>
<wp:term_name><![CDATA[Test child]]></wp:term_name>
</wp:term>
</channel>
</rss>

0 comments on commit 495be8b

Please sign in to comment.