-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathparallel_test.js
84 lines (69 loc) · 2.78 KB
/
parallel_test.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
const pathik = require('pathik');
const path = require('path');
const fs = require('fs');
async function main() {
// Create output directories
const baseDir = path.resolve('./output_test_js');
const parallelDir = path.join(baseDir, 'parallel');
const sequentialDir = path.join(baseDir, 'sequential');
fs.mkdirSync(parallelDir, { recursive: true });
fs.mkdirSync(sequentialDir, { recursive: true });
// Test URLs - mix of different sites
const urls = [
'https://example.com',
'https://httpbin.org/html',
'https://jsonplaceholder.typicode.com',
'https://books.toscrape.com',
'https://quotes.toscrape.com'
];
console.log(`Testing with ${urls.length} URLs...`);
// Test parallel crawling
console.log('\n=== PARALLEL CRAWLING ===');
const parallelStart = Date.now();
try {
const parallelResults = await pathik.crawl(urls, {
outputDir: parallelDir,
parallel: true
});
const parallelTime = (Date.now() - parallelStart) / 1000;
console.log(`Parallel crawling completed in ${parallelTime.toFixed(2)} seconds`);
// Print results summary
for (const [url, info] of Object.entries(parallelResults)) {
const status = info.html && info.markdown ? '✅ Success' : '❌ Failed';
console.log(` ${url}: ${status}`);
}
// Test sequential crawling
console.log('\n=== SEQUENTIAL CRAWLING ===');
const sequentialStart = Date.now();
try {
const sequentialResults = await pathik.crawl(urls, {
outputDir: sequentialDir,
parallel: false
});
const sequentialTime = (Date.now() - sequentialStart) / 1000;
console.log(`Sequential crawling completed in ${sequentialTime.toFixed(2)} seconds`);
// Print results summary
for (const [url, info] of Object.entries(sequentialResults)) {
const status = info.html && info.markdown ? '✅ Success' : '❌ Failed';
console.log(` ${url}: ${status}`);
}
// Compare performance
console.log('\n=== PERFORMANCE COMPARISON ===');
if (parallelTime < sequentialTime) {
const speedup = sequentialTime / parallelTime;
console.log(`Parallel crawling was ${speedup.toFixed(2)}x faster than sequential crawling`);
} else {
console.log(`Warning: Parallel crawling was not faster in this test`);
}
console.log(`Parallel: ${parallelTime.toFixed(2)}s vs Sequential: ${sequentialTime.toFixed(2)}s`);
console.log(`\nOutput files are located in: ${baseDir}`);
} catch (error) {
console.error(`Error during sequential crawling: ${error.message}`);
process.exit(1);
}
} catch (error) {
console.error(`Error during parallel crawling: ${error.message}`);
process.exit(1);
}
}
main().catch(console.error);