Open
Description
Bad news @wooorm - micromark/micromark#169, or something like it, is an issue here as well - I haven't yet grokked your edit maps, but either they don't solve the quadratic-complexity parsing problems or there's a separate performance bug in markdown-rs.
The constant factors are better, but the asymptotic complexity means that we're back to 60-second parse times on files that are just an order of magnitude bigger than the ones that caused 60-second parses in micromark-js.
For comparison, the "JS" lines on these graphs show micromark's performance using subtokenize 2.0.1, which picks up the fix in micromark/micromark#171.
Data collection sources
Run with node main.mjs
and cargo run --release
package.json
{
"dependencies": {
"mdast-util-from-markdown": "^2.0.0",
"micromark-extension-mdxjs": "^3.0.0"
}
}
main.mjs
import { fromMarkdown } from "mdast-util-from-markdown";
import { mdxjs } from "micromark-extension-mdxjs";
/* Generate test data */
const NUM_LINES_IN_CODE_SEG = 10000;
function generateTestData(size) {
let file = [];
for (let i = 0; i < size; i++) {
file.push("", "<DummyComponent code={`");
for (let j = 0; j < NUM_LINES_IN_CODE_SEG; j++) {
file.push(crypto.randomUUID());
}
file.push("`} />", "");
}
return file.join("\n");
}
/* Return the number of milliseconds need to parse a test case */
function testPerformance(size) {
const file = generateTestData(size);
const start = performance.now();
fromMarkdown(file, { extensions: [mdxjs()] });
const end = performance.now();
return end - start;
}
for (let reps = 0; reps < 3; reps++) {
for (let size = 4; size < 40; size = Math.floor(size * 1.35)) {
console.log(`${size}, ${testPerformance(size)}`);
}
}
Cargo.toml
[package]
name = "tmp"
version = "0.1.0"
edition = "2021"
[dependencies]
markdown = "1.0.0-alpha.16"
[dependencies.uuid]
version = "1.8.0"
features = [
"v4", # Lets you generate random UUIDs
"fast-rng", # Use a faster (but still sufficiently random) RNG
]
src/main.rs
const NUM_LINES_IN_CODE_SEG: i32 = 10000;
fn main() -> Result<(), String> {
for _ in 0..3 {
let mut reps = 3;
loop {
reps = ((reps as f64) * 1.35) as i32;
if reps > 40 {
break;
}
let mut result = String::new();
for _ in 1..reps {
result.push_str("\n<DummyComponent code={`\n");
for _ in 0..NUM_LINES_IN_CODE_SEG {
result.push_str(&uuid::Uuid::new_v4().to_string());
result.push('\n');
}
result.push_str("`}/>\n\n");
}
let start = std::time::Instant::now();
let mdast = markdown::to_mdast(&result, &markdown::ParseOptions::mdx())?;
let end = std::time::Instant::now();
let duration = (end - start).as_millis();
println!("{},{},{:?}", reps, duration, mdast.position());
}
}
Ok(())
}
Metadata
Metadata
Assignees
Labels
No labels