From 9343ffe13246fe524a280f85bbca78807e41ec3e Mon Sep 17 00:00:00 2001
From: Mohsen Azimi <me@azimi.me>
Date: Sun, 19 Jan 2025 10:32:01 +0700
Subject: [PATCH 1/7] feat: add performance test

---
 tests/test_perf.rs | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 tests/test_perf.rs

diff --git a/tests/test_perf.rs b/tests/test_perf.rs
new file mode 100644
index 0000000..fca904b
--- /dev/null
+++ b/tests/test_perf.rs
@@ -0,0 +1,42 @@
+use std::fs;
+use std::path::Path;
+use std::time::Instant;
+use yek::serialize_repo;
+
+#[test]
+fn test_serialization_performance() {
+    // Create test data directory
+    let test_dir = "test_perf_data";
+    fs::create_dir_all(test_dir).unwrap();
+
+    // Create test files of different sizes
+    let sizes = vec![1024, 1024 * 1024, 10 * 1024 * 1024]; // 1KB, 1MB, 10MB
+
+    for size in sizes {
+        let filename = format!("{}/file_{}_bytes.txt", test_dir, size);
+        let data = vec![b'a'; size];
+        fs::write(&filename, &data).unwrap();
+
+        // Measure serialization time
+        let start = Instant::now();
+        serialize_repo(
+            size,                           // max_size
+            Some(Path::new(test_dir)),      // base_path
+            false,                          // stream
+            false,                          // count_tokens
+            None,                           // config
+            Some(Path::new("perf_output")), // output_dir
+            None,                           // max_files
+        )
+        .unwrap();
+        let duration = start.elapsed();
+
+        println!("Serializing {}B took: {:?}", size, duration);
+
+        // Cleanup
+        fs::remove_dir_all("perf_output").unwrap();
+    }
+
+    // Final cleanup
+    fs::remove_dir_all(test_dir).unwrap();
+}

From d74b60a1b431911e97b6cc9ee2ea400905414b28 Mon Sep 17 00:00:00 2001
From: Mohsen Azimi <me@azimi.me>
Date: Sun, 19 Jan 2025 11:21:08 +0700
Subject: [PATCH 2/7] feat: parallel execution for better perf

---
 Cargo.lock               | 282 +++++++++++++++++++++++++++++++-
 Cargo.toml               |   7 +
 README.md                |  72 +++------
 benches/serialization.rs |  50 ++++++
 src/lib.rs               | 337 +++++++++++++++++++++------------------
 src/parallel.rs          | 337 +++++++++++++++++++++++++++++++++++++++
 tests/test_perf.rs       |  96 ++++++++---
 7 files changed, 950 insertions(+), 231 deletions(-)
 create mode 100644 benches/serialization.rs
 create mode 100644 src/parallel.rs

diff --git a/Cargo.lock b/Cargo.lock
index 11702f8..7184df0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -26,6 +26,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
 [[package]]
 name = "anstream"
 version = "0.6.18"
@@ -62,7 +68,7 @@ version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -72,7 +78,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125"
 dependencies = [
  "anstyle",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -145,6 +151,12 @@ dependencies = [
  "utf8-width",
 ]
 
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
 [[package]]
 name = "cc"
 version = "1.2.9"
@@ -174,6 +186,33 @@ dependencies = [
  "windows-targets",
 ]
 
+[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
 [[package]]
 name = "clap"
 version = "4.5.26"
@@ -217,7 +256,7 @@ dependencies = [
  "libc",
  "once_cell",
  "unicode-width",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -235,6 +274,64 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "is-terminal",
+ "itertools",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools",
+]
+
+[[package]]
+name = "crossbeam"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
+dependencies = [
+ "crossbeam-channel",
+ "crossbeam-deque",
+ "crossbeam-epoch",
+ "crossbeam-queue",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471"
+dependencies = [
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "crossbeam-deque"
 version = "0.8.6"
@@ -254,12 +351,27 @@ dependencies = [
  "crossbeam-utils",
 ]
 
+[[package]]
+name = "crossbeam-queue"
+version = "0.3.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
+dependencies = [
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "crossbeam-utils"
 version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 
+[[package]]
+name = "crunchy"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
+
 [[package]]
 name = "crypto-common"
 version = "0.1.6"
@@ -301,6 +413,12 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
 
+[[package]]
+name = "either"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
+
 [[package]]
 name = "encode_unicode"
 version = "1.0.0"
@@ -330,7 +448,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
 dependencies = [
  "libc",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -382,12 +500,34 @@ dependencies = [
  "regex-syntax",
 ]
 
+[[package]]
+name = "half"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.15.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
 
+[[package]]
+name = "hermit-abi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
+
+[[package]]
+name = "hermit-abi"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc"
+
 [[package]]
 name = "iana-time-zone"
 version = "0.1.61"
@@ -450,12 +590,32 @@ dependencies = [
  "web-time",
 ]
 
+[[package]]
+name = "is-terminal"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b"
+dependencies = [
+ "hermit-abi 0.4.0",
+ "libc",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "is_terminal_polyfill"
 version = "1.70.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
 
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itoa"
 version = "1.0.14"
@@ -536,6 +696,16 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "num_cpus"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
+dependencies = [
+ "hermit-abi 0.3.9",
+ "libc",
+]
+
 [[package]]
 name = "num_threads"
 version = "0.1.7"
@@ -557,6 +727,12 @@ version = "1.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
 
+[[package]]
+name = "oorandom"
+version = "11.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9"
+
 [[package]]
 name = "overload"
 version = "0.1.1"
@@ -569,6 +745,34 @@ version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
 
+[[package]]
+name = "plotters"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
+dependencies = [
+ "plotters-backend",
+]
+
 [[package]]
 name = "portable-atomic"
 version = "1.10.0"
@@ -629,6 +833,26 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "rayon"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "regex"
 version = "1.11.1"
@@ -668,7 +892,7 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -721,6 +945,18 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "serde_json"
+version = "1.0.136"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "336a0c23cf42a38d9eaa7cd22c7040d04e1228a19a933890805ffd00a16437d2"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
 [[package]]
 name = "serde_spanned"
 version = "0.6.8"
@@ -868,7 +1104,7 @@ dependencies = [
  "getrandom",
  "once_cell",
  "rustix",
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -920,6 +1156,16 @@ dependencies = [
  "time-core",
 ]
 
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "toml"
 version = "0.8.19"
@@ -1179,6 +1425,16 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "web-sys"
+version = "0.3.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "web-time"
 version = "1.1.0"
@@ -1211,7 +1467,7 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -1229,6 +1485,15 @@ dependencies = [
  "windows-targets",
 ]
 
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets",
+]
+
 [[package]]
 name = "windows-sys"
 version = "0.59.0"
@@ -1320,8 +1585,11 @@ dependencies = [
  "byte-unit",
  "chrono",
  "clap",
+ "criterion",
+ "crossbeam",
  "ignore",
  "indicatif",
+ "num_cpus",
  "predicates",
  "regex",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index e734cd9..c991bff 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,8 +7,10 @@ edition = "2021"
 anyhow = "1.0"
 byte-unit = "4.0"
 clap = "4.4"
+crossbeam = "0.8"
 ignore = "0.4"
 indicatif = "0.17"
+num_cpus = "1.15"
 regex = "1.10"
 serde = { version = "1.0", features = ["derive"] }
 sha2 = "0.10"
@@ -23,6 +25,11 @@ assert_cmd = "2.0"
 chrono = "0.4"
 predicates = "3.0"
 tempfile = "3.9"
+criterion = "0.5"
+
+[[bench]]
+name = "serialization"
+harness = false
 
 [profile.release]
 opt-level = 3
diff --git a/README.md b/README.md
index 4a998cb..9ccea3c 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,8 @@ A fast Rust based tool to read text-based files in a repository or directory, ch
 - Supports processing multiple directories in a single command.
 - Configurable via a `yek.toml` file.
 
+Yek ([يک](https://fa.wikipedia.org/wiki/۱)) means "One" in Farsi/Persian.
+
 ## Installation
 
 ### Via Homebrew (recommended for macOS)
@@ -58,31 +60,43 @@ export PATH=$(pwd)/target/release:$PATH
 
 ### Examples
 
-Process current directory:
+Process current directory and write to temp directory:
 
 ```bash
 yek
 ```
 
-Process specific directories:
+Pipe output to clipboard (macOS):
 
 ```bash
-yek src/ tests/
+yek src/ | pbcopy
 ```
 
-Process multiple repositories:
+Cap the max size to 128K tokens and only process the `src` directory:
 
 ```bash
-yek ~/code/project1 ~/code/project2
+yek --max-size 128000 --tokens src/
 ```
 
-Pipe output to clipboard:
+Cap the max size to 100KB and only process the `src` directory, writing to a specific directory:
 
 ```bash
-yek src/ | pbcopy
+yek --max-size 100KB --output-dir /tmp/yek src/
+```
+
+Process multiple directories:
+
+```bash
+yek src/ tests/
 ```
 
-### Run
+Process multiple repositories:
+
+```bash
+yek ~/code/project1 ~/code/project2
+```
+
+### Help
 
 ```bash
 yek --help
@@ -103,44 +117,6 @@ Options:
   -V, --version                  Print version
 ```
 
-## Examples
-
-- Serialize entire repository into chunks of 10MB (default):
-
-```bash
-yek
-```
-
-- Split repository into chunks of 128MB:
-
-```bash
-yek --max-size 128MB
-```
-
-- Split into chunks by token count instead of bytes:
-
-```bash
-yek --tokens --max-size 128000
-```
-
-- Serialize only files under a specific path:
-
-```bash
-yek src/app
-```
-
-- Process multiple directories:
-
-```bash
-yek src/app src/lib
-```
-
-- Stream output to another command:
-
-```bash
-yek | pbcopy
-```
-
 ## Configuration File
 
 You can place a file called `yek.toml` at your project root or pass a custom path via `--config`. The configuration file allows you to:
@@ -150,7 +126,9 @@ You can place a file called `yek.toml` at your project root or pass a custom pat
 3. Add additional binary file extensions to ignore (extends the built-in list)
 4. Configure Git-based priority boost
 
-Example configuration:
+### Example `yek.toml`
+
+This is optional, you can configure the `yek.toml` file at the root of your project.
 
 ```toml
 # Add patterns to ignore (in addition to .gitignore)
diff --git a/benches/serialization.rs b/benches/serialization.rs
new file mode 100644
index 0000000..315db18
--- /dev/null
+++ b/benches/serialization.rs
@@ -0,0 +1,50 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use yek::serialize_repo;
+
+fn create_test_data(dir: &Path, size: usize) {
+    let filename = dir.join(format!("file_{}_bytes.txt", size));
+    let data = vec![b'a'; size];
+    fs::write(&filename, &data).unwrap();
+}
+
+fn bench_serialization(c: &mut Criterion) {
+    let mut group = c.benchmark_group("serialization");
+    group.sample_size(10); // Number of samples to collect
+
+    // Test different file sizes
+    let sizes = vec![
+        1024,             // 1KB
+        1024 * 1024,      // 1MB
+        10 * 1024 * 1024, // 10MB
+    ];
+
+    for size in sizes {
+        // Create a new temporary directory for each benchmark
+        let temp_dir = TempDir::new().unwrap();
+        let output_dir = temp_dir.path().join("output");
+        create_test_data(temp_dir.path(), size);
+
+        group.bench_with_input(BenchmarkId::new("file_size", size), &size, |b, &size| {
+            b.iter(|| {
+                serialize_repo(
+                    black_box(size),
+                    Some(temp_dir.path()),
+                    false,
+                    false,
+                    None,
+                    Some(&output_dir),
+                    None,
+                )
+                .unwrap();
+                fs::remove_dir_all(&output_dir).unwrap();
+            });
+        });
+    }
+    group.finish();
+}
+
+criterion_group!(benches, bench_serialization);
+criterion_main!(benches);
diff --git a/src/lib.rs b/src/lib.rs
index 0b6e5e5..20a8861 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -9,6 +9,8 @@ use std::path::{Path, PathBuf};
 use std::process::{Command as SysCommand, Stdio};
 use tracing::{debug, info};
 use walkdir::WalkDir;
+mod parallel;
+use parallel::process_files_parallel;
 
 /// Helper macro to write debug statements both to standard debug log and to debug file if set.
 #[macro_export]
@@ -192,7 +194,11 @@ fn build_final_config(cfg: Option<YekConfig>) -> FinalConfig {
                 merged_ignore.push(reg);
             }
         }
-        // Merge or add new priority rules
+        // Clear default priority rules if user provides their own
+        if !user_cfg.priority_rules.is_empty() {
+            merged_priority.clear();
+        }
+        // Add user priority rules
         for user_rule in user_cfg.priority_rules {
             if user_rule.patterns.is_empty() {
                 continue;
@@ -348,14 +354,15 @@ pub fn get_file_priority(
     _ignore_pats: &[Regex],
     prio_list: &[PriorityPattern],
 ) -> i32 {
-    for prio in prio_list {
+    // Loop from highest score → lowest
+    for prio in prio_list.iter().rev() {
         for pat in &prio.patterns {
             if pat.is_match(rel_str) {
                 return prio.score;
             }
         }
     }
-    40 // fallback
+    0 // fallback if nothing matches - lower than any user-defined priority
 }
 
 /// Get the commit time of the most recent change to each file.
@@ -531,165 +538,188 @@ pub fn serialize_repo(
         None
     };
 
-    // Collect files with their priorities
-    let mut files: Vec<FileEntry> = Vec::new();
-    let mut total_size = 0;
-    let mut current_chunk = 0;
-    let mut current_chunk_files = Vec::new();
-
-    // Walk directory tree
-    for entry in WalkDir::new(base_path)
-        .follow_links(true)
-        .into_iter()
-        .filter_map(|e| e.ok())
-    {
-        let path = entry.path();
-        if !path.is_file() {
-            continue;
-        }
-
-        // Get path relative to base
-        let rel_path = path.strip_prefix(base_path).unwrap_or(path);
-        let rel_str = rel_path.to_string_lossy();
-
-        // Normalize path separators to forward slashes for consistent pattern matching
-        #[cfg(windows)]
-        let rel_str = rel_str.replace('\\', "/");
-
-        // Skip if matched by gitignore
-        #[cfg(windows)]
-        let gitignore_path = rel_path
-            .to_str()
-            .map(|s| s.replace('\\', "/"))
-            .map(PathBuf::from)
-            .unwrap_or(rel_path.to_path_buf());
-        #[cfg(not(windows))]
-        let gitignore_path = rel_path.to_path_buf();
-
-        if gitignore.matched(&gitignore_path, false).is_ignore() {
-            debug!("Skipping {} - matched by gitignore", rel_str);
-            continue;
-        }
+    if stream {
+        // For streaming, we still use the old single-threaded approach
+        let mut files: Vec<FileEntry> = Vec::new();
+        let mut total_size = 0;
+        let mut current_chunk = 0;
+        let mut current_chunk_files = Vec::new();
+
+        // Walk directory tree
+        for entry in WalkDir::new(base_path)
+            .follow_links(true)
+            .into_iter()
+            .filter_map(|e| e.ok())
+        {
+            let path = entry.path();
+            if !path.is_file() {
+                continue;
+            }
 
-        // Skip if matched by our ignore patterns
-        let mut skip = false;
-        #[cfg(windows)]
-        let pattern_path = rel_str.replace('\\', "/");
-        #[cfg(not(windows))]
-        let pattern_path = rel_str.to_string();
+            // Get path relative to base
+            let rel_path = path.strip_prefix(base_path).unwrap_or(path);
+            let rel_str = rel_path.to_string_lossy();
+
+            // Normalize path separators to forward slashes for consistent pattern matching
+            #[cfg(windows)]
+            let rel_str = rel_str.replace('\\', "/");
+
+            // Skip if matched by gitignore
+            #[cfg(windows)]
+            let gitignore_path = rel_path
+                .to_str()
+                .map(|s| s.replace('\\', "/"))
+                .map(PathBuf::from)
+                .unwrap_or(rel_path.to_path_buf());
+            #[cfg(not(windows))]
+            let gitignore_path = rel_path.to_path_buf();
+
+            if gitignore.matched(&gitignore_path, false).is_ignore() {
+                debug!("Skipping {} - matched by gitignore", rel_str);
+                continue;
+            }
 
-        for pat in &final_config.ignore_patterns {
-            if pat.is_match(&pattern_path) {
-                debug!("Skipping {} - matched ignore pattern", rel_str);
-                skip = true;
-                break;
+            // Skip if matched by our ignore patterns
+            let mut skip = false;
+            #[cfg(windows)]
+            let pattern_path = rel_str.replace('\\', "/");
+            #[cfg(not(windows))]
+            let pattern_path = rel_str.to_string();
+
+            for pat in &final_config.ignore_patterns {
+                if pat.is_match(&pattern_path) {
+                    debug!("Skipping {} - matched ignore pattern", rel_str);
+                    skip = true;
+                    break;
+                }
+            }
+            if skip {
+                continue;
             }
-        }
-        if skip {
-            continue;
-        }
 
-        // Calculate priority score
-        let mut priority = get_file_priority(
-            &pattern_path,
-            &final_config.ignore_patterns,
-            &final_config.priority_list,
-        );
+            // Calculate priority score
+            let mut priority = get_file_priority(
+                &pattern_path,
+                &final_config.ignore_patterns,
+                &final_config.priority_list,
+            );
 
-        // Apply rank-based boost if available
-        if let Some(ref boost_map) = recentness_boost {
-            if let Some(boost) = boost_map.get(&pattern_path) {
-                priority += *boost;
+            // Apply rank-based boost if available
+            if let Some(ref boost_map) = recentness_boost {
+                if let Some(boost) = boost_map.get(&pattern_path) {
+                    priority += *boost;
+                }
             }
-        }
 
-        files.push(FileEntry {
-            path: path.to_path_buf(),
-            priority,
-        });
-    }
+            files.push(FileEntry {
+                path: path.to_path_buf(),
+                priority,
+            });
+        }
 
-    // Sort files by priority (ascending) so higher priority files come last
-    files.sort_by(|a, b| a.priority.cmp(&b.priority));
+        // Sort files by priority (ascending) so higher priority files come last
+        files.sort_by(|a, b| a.priority.cmp(&b.priority));
 
-    // Process files in sorted order
-    for file in files {
-        let path = file.path;
-        let rel_path = path.strip_prefix(base_path).unwrap_or(&path);
-        let rel_str = rel_path.to_string_lossy();
+        // Process files in sorted order
+        for file in files {
+            let path = file.path;
+            let rel_path = path.strip_prefix(base_path).unwrap_or(&path);
+            let rel_str = rel_path.to_string_lossy();
 
-        // Skip binary files
-        if let Some(ref cfg) = config {
-            if !is_text_file(&path, &cfg.binary_extensions) {
+            // Skip binary files
+            if let Some(ref cfg) = config {
+                if !is_text_file(&path, &cfg.binary_extensions) {
+                    debug!("Skipping binary file: {}", rel_str);
+                    continue;
+                }
+            } else if !is_text_file(&path, &[]) {
                 debug!("Skipping binary file: {}", rel_str);
                 continue;
             }
-        } else if !is_text_file(&path, &[]) {
-            debug!("Skipping binary file: {}", rel_str);
-            continue;
-        }
 
-        // Read file content
-        let content = match fs::read_to_string(&path) {
-            Ok(c) => c,
-            Err(e) => {
-                debug!("Failed to read {}: {}", rel_str, e);
+            // Read file content
+            let content = match fs::read_to_string(&path) {
+                Ok(c) => c,
+                Err(e) => {
+                    debug!("Failed to read {}: {}", rel_str, e);
+                    continue;
+                }
+            };
+
+            let size = count_size(&content, count_tokens);
+            if size == 0 {
+                debug!("Skipping empty file: {}", rel_str);
                 continue;
             }
-        };
 
-        let size = count_size(&content, count_tokens);
-        if size == 0 {
-            debug!("Skipping empty file: {}", rel_str);
-            continue;
-        }
-
-        // If a single file is larger than max_size, split it into multiple chunks
-        if size > max_size {
-            debug_file!("File exceeds chunk size, splitting into multiple chunks");
-            let mut remaining = content.as_str();
-            let mut part = 0;
-
-            while !remaining.is_empty() {
-                let mut chunk_size = if count_tokens {
-                    // In token mode, count words until we hit max_size
-                    let mut chars = 0;
-                    for (tokens, word) in remaining.split_whitespace().enumerate() {
-                        if tokens + 1 > max_size {
-                            break;
+            // If a single file is larger than max_size, split it into multiple chunks
+            if size > max_size {
+                debug_file!("File exceeds chunk size, splitting into multiple chunks");
+                let mut remaining = content.as_str();
+                let mut part = 0;
+
+                while !remaining.is_empty() {
+                    let mut chunk_size = if count_tokens {
+                        // In token mode, count words until we hit max_size
+                        let mut chars = 0;
+                        for (tokens, word) in remaining.split_whitespace().enumerate() {
+                            if tokens + 1 > max_size {
+                                break;
+                            }
+                            chars += word.len() + 1; // +1 for space
                         }
-                        chars += word.len() + 1; // +1 for space
+                        chars
+                    } else {
+                        max_size
+                    };
+
+                    // Ensure we make progress even if no word boundary found
+                    if chunk_size == 0 {
+                        chunk_size = std::cmp::min(max_size, remaining.len());
                     }
-                    chars
-                } else {
-                    max_size
-                };
-
-                // Ensure we make progress even if no word boundary found
-                if chunk_size == 0 {
-                    chunk_size = std::cmp::min(max_size, remaining.len());
-                }
 
-                let (chunk, rest) = remaining.split_at(std::cmp::min(chunk_size, remaining.len()));
-                remaining = rest.trim_start();
+                    let (chunk, rest) =
+                        remaining.split_at(std::cmp::min(chunk_size, remaining.len()));
+                    remaining = rest.trim_start();
+
+                    let chunk_files =
+                        vec![(format!("{}:part{}", rel_str, part), chunk.to_string())];
+                    debug_file!("Written chunk {}", part);
+                    write_chunk(
+                        &chunk_files,
+                        part,
+                        output_dir.as_deref(),
+                        stream,
+                        count_tokens,
+                    )?;
+                    part += 1;
+                }
+                continue;
+            }
 
-                let chunk_files = vec![(format!("{}:part{}", rel_str, part), chunk.to_string())];
-                debug_file!("Written chunk {}", part);
+            // Check if adding this file would exceed chunk size
+            if total_size + size > max_size && !current_chunk_files.is_empty() {
+                // Write current chunk
                 write_chunk(
-                    &chunk_files,
-                    part,
+                    &current_chunk_files,
+                    current_chunk,
                     output_dir.as_deref(),
                     stream,
                     count_tokens,
                 )?;
-                part += 1;
+                debug_file!("Written chunk {}", current_chunk);
+                current_chunk += 1;
+                current_chunk_files.clear();
+                total_size = 0;
             }
-            continue;
+
+            // Add file to current chunk
+            current_chunk_files.push((rel_str.to_string(), content));
+            total_size += size;
         }
 
-        // Check if adding this file would exceed chunk size
-        if total_size + size > max_size && !current_chunk_files.is_empty() {
-            // Write current chunk
+        // Write final chunk if any files remain
+        if !current_chunk_files.is_empty() {
             write_chunk(
                 &current_chunk_files,
                 current_chunk,
@@ -698,32 +728,23 @@ pub fn serialize_repo(
                 count_tokens,
             )?;
             debug_file!("Written chunk {}", current_chunk);
-            current_chunk += 1;
-            current_chunk_files.clear();
-            total_size = 0;
         }
 
-        // Add file to current chunk
-        current_chunk_files.push((rel_str.to_string(), content));
-        total_size += size;
-    }
-
-    // Write final chunk if any files remain
-    if !current_chunk_files.is_empty() {
-        write_chunk(
-            &current_chunk_files,
-            current_chunk,
-            output_dir.as_deref(),
-            stream,
-            count_tokens,
-        )?;
-        debug_file!("Written chunk {}", current_chunk);
-    }
-
-    if stream {
         Ok(None)
+    } else if let Some(out_dir) = output_dir {
+        // Use parallel processing for non-streaming mode
+        process_files_parallel(
+            base_path,
+            max_size,
+            &out_dir,
+            config.as_ref(),
+            &final_config.ignore_patterns,
+            &final_config.priority_list,
+            recentness_boost.as_ref(),
+        )?;
+        Ok(Some(out_dir))
     } else {
-        Ok(output_dir)
+        Ok(None)
     }
 }
 
@@ -802,7 +823,7 @@ fn compute_recentness_boost(
         return HashMap::new();
     }
 
-    // Sort by ascending commit time
+    // Sort by ascending commit time => first is oldest
     let mut sorted: Vec<(&String, &u64)> = commit_times.iter().collect();
     sorted.sort_by_key(|(_, t)| **t);
 
@@ -819,8 +840,8 @@ fn compute_recentness_boost(
 
     let mut result = HashMap::new();
     for (i, (path, _time)) in sorted.iter().enumerate() {
-        let rank = i as f64 / last_index; // 0.0..1.0
-        let boost = (rank * max_boost as f64).round() as i32;
+        let rank = i as f64 / last_index; // 0.0..1.0 (older files get lower rank)
+        let boost = (rank * max_boost as f64).round() as i32; // Newer files get higher boost
         result.insert((*path).clone(), boost);
     }
     result
diff --git a/src/parallel.rs b/src/parallel.rs
new file mode 100644
index 0000000..e110de7
--- /dev/null
+++ b/src/parallel.rs
@@ -0,0 +1,337 @@
+use crate::is_text_file;
+use crate::{get_file_priority, get_recent_commit_times, PriorityPattern, YekConfig};
+use anyhow::Result;
+use crossbeam::channel::{bounded, Receiver, Sender};
+use ignore::{gitignore::GitignoreBuilder, WalkBuilder};
+use num_cpus;
+use regex::Regex;
+use std::collections::HashMap;
+use std::fs;
+use std::io::{BufReader, Read};
+use std::path::{Path, PathBuf};
+use std::thread;
+use std::time::SystemTime;
+use tracing::{debug, info};
+
+/// Represents a chunk of text read from one file
+#[derive(Debug)]
+pub struct FileChunk {
+    pub priority: i32,
+    pub file_index: usize,
+    pub part_index: usize,
+    pub rel_path: String,
+    pub content: String,
+}
+
+/// File entry with priority for sorting
+#[derive(Debug, Clone)]
+struct FileEntry {
+    path: PathBuf,
+    priority: i32,
+    file_index: usize,
+}
+
+/// Reads a file and determines if it's likely binary by checking for null bytes
+fn is_likely_binary(path: &Path) -> Result<bool> {
+    let f = fs::File::open(path)?;
+    let mut reader = BufReader::new(f);
+    let mut buf = [0; 4096];
+    let n = reader.read(&mut buf)?;
+    Ok(buf[..n].contains(&0))
+}
+
+/// Reads and chunks a single file, sending chunks through the channel
+fn read_and_send_chunks(
+    file_entry: FileEntry,
+    base_path: &Path,
+    tx: &Sender<FileChunk>,
+    max_size: usize,
+) -> Result<()> {
+    // Skip if binary
+    if is_likely_binary(&file_entry.path)? {
+        return Ok(());
+    }
+
+    // Read file content
+    let content = fs::read_to_string(&file_entry.path)?;
+    if content.is_empty() {
+        return Ok(());
+    }
+
+    // Get relative path for display
+    let rel_path = file_entry
+        .path
+        .strip_prefix(base_path)
+        .unwrap_or(&file_entry.path)
+        .to_string_lossy()
+        .into_owned();
+
+    // If smaller than max_size, send as single chunk
+    if content.len() <= max_size {
+        let chunk = FileChunk {
+            priority: file_entry.priority,
+            file_index: file_entry.file_index,
+            part_index: 0,
+            rel_path,
+            content,
+        };
+        tx.send(chunk).ok();
+        return Ok(());
+    }
+
+    // Otherwise split into chunks
+    let mut start = 0;
+    let mut part_index = 0;
+    let bytes = content.as_bytes();
+
+    while start < bytes.len() {
+        let end = (start + max_size).min(bytes.len());
+        let slice = &bytes[start..end];
+        let chunk_str = String::from_utf8_lossy(slice).into_owned();
+
+        let chunk = FileChunk {
+            priority: file_entry.priority,
+            file_index: file_entry.file_index,
+            part_index,
+            rel_path: rel_path.clone(),
+            content: chunk_str,
+        };
+
+        tx.send(chunk).ok();
+        start = end;
+        part_index += 1;
+    }
+
+    Ok(())
+}
+
+/// Main parallel processing function that coordinates workers and aggregator
+pub fn process_files_parallel(
+    base_dir: &Path,
+    max_size: usize,
+    output_dir: &Path,
+    config: Option<&YekConfig>,
+    ignore_patterns: &[Regex],
+    priority_list: &[PriorityPattern],
+    recentness_boost: Option<&HashMap<String, i32>>,
+) -> Result<()> {
+    // Create output directory
+    fs::create_dir_all(output_dir)?;
+
+    // Collect and sort files by priority
+    let files = collect_files(
+        base_dir,
+        config,
+        ignore_patterns,
+        priority_list,
+        recentness_boost,
+    )?;
+    if files.is_empty() {
+        return Ok(());
+    }
+
+    // Create channels for worker→aggregator communication
+    let (tx, rx) = bounded(256);
+
+    // Spawn aggregator thread
+    let output_dir = output_dir.to_path_buf();
+    let aggregator_handle = thread::spawn(move || aggregator_loop(rx, output_dir));
+
+    // Spawn worker threads
+    let num_threads = num_cpus::get();
+    let chunk_size = (files.len() + num_threads - 1) / num_threads;
+    let mut handles = Vec::new();
+
+    for chunk in files.chunks(chunk_size) {
+        let chunk_files = chunk.to_vec();
+        let sender = tx.clone();
+        let base_path = base_dir.to_path_buf();
+
+        let handle = thread::spawn(move || -> Result<()> {
+            for file_entry in chunk_files {
+                read_and_send_chunks(file_entry, &base_path, &sender, max_size)?;
+            }
+            Ok(())
+        });
+        handles.push(handle);
+    }
+
+    // Drop original sender
+    drop(tx);
+
+    // Wait for workers
+    for handle in handles {
+        handle.join().unwrap()?;
+    }
+
+    // Wait for aggregator
+    aggregator_handle.join().unwrap()?;
+
+    Ok(())
+}
+
+/// Collects files from directory respecting .gitignore and sorts by priority
+fn collect_files(
+    base_dir: &Path,
+    config: Option<&YekConfig>,
+    ignore_patterns: &[Regex],
+    priority_list: &[PriorityPattern],
+    recentness_boost: Option<&HashMap<String, i32>>,
+) -> Result<Vec<FileEntry>> {
+    // Build gitignore matcher
+    let mut builder = GitignoreBuilder::new(base_dir);
+    let gitignore_path = base_dir.join(".gitignore");
+    if gitignore_path.exists() {
+        builder.add(&gitignore_path);
+    }
+    let gitignore = builder
+        .build()
+        .unwrap_or_else(|_| GitignoreBuilder::new(base_dir).build().unwrap());
+
+    let mut builder = WalkBuilder::new(base_dir);
+    builder.follow_links(false).standard_filters(true);
+
+    let mut results = Vec::new();
+    let mut file_index = 0;
+
+    for entry in builder.build() {
+        if let Ok(entry) = entry {
+            if entry.file_type().map_or(false, |ft| ft.is_file()) {
+                let path = entry.path().to_path_buf();
+                let rel_path = path.strip_prefix(base_dir).unwrap_or(&path);
+                let rel_str = rel_path.to_string_lossy();
+
+                // Skip if matched by gitignore
+                #[cfg(windows)]
+                let gitignore_path = rel_path
+                    .to_str()
+                    .map(|s| s.replace('\\', "/"))
+                    .map(PathBuf::from)
+                    .unwrap_or(rel_path.to_path_buf());
+                #[cfg(not(windows))]
+                let gitignore_path = rel_path.to_path_buf();
+
+                if gitignore.matched(&gitignore_path, false).is_ignore() {
+                    debug!("Skipping {} - matched by gitignore", rel_str);
+                    continue;
+                }
+
+                // Skip if matched by our ignore patterns
+                let mut skip = false;
+                for pat in ignore_patterns {
+                    if pat.is_match(&rel_str) {
+                        debug!("Skipping {} - matched ignore pattern", rel_str);
+                        skip = true;
+                        break;
+                    }
+                }
+                if skip {
+                    continue;
+                }
+
+                // Skip binary files
+                if let Some(ref cfg) = config {
+                    if !is_text_file(&path, &cfg.binary_extensions) {
+                        debug!("Skipping binary file: {}", rel_str);
+                        continue;
+                    }
+                } else if !is_text_file(&path, &[]) {
+                    debug!("Skipping binary file: {}", rel_str);
+                    continue;
+                }
+
+                // Calculate priority score
+                let mut priority = get_file_priority(&rel_str, ignore_patterns, priority_list);
+
+                // Apply git recentness boost
+                if let Some(boost_map) = recentness_boost {
+                    if let Some(boost) = boost_map.get(&rel_str.to_string()) {
+                        priority += *boost;
+                    }
+                }
+
+                results.push(FileEntry {
+                    path,
+                    priority,
+                    file_index,
+                });
+                file_index += 1;
+            }
+        }
+    }
+
+    // Sort by priority (ascending) so higher priority files come last
+    results.sort_by(|a, b| {
+        // First sort by priority (ascending)
+        let p = a.priority.cmp(&b.priority);
+        if p != std::cmp::Ordering::Equal {
+            return p;
+        }
+        // If priorities are equal, sort by Git boost (ascending)
+        if let Some(boost_map) = recentness_boost {
+            let a_boost = boost_map
+                .get(&a.path.to_string_lossy().to_string())
+                .unwrap_or(&0);
+            let b_boost = boost_map
+                .get(&b.path.to_string_lossy().to_string())
+                .unwrap_or(&0);
+            return a_boost.cmp(b_boost); // Lower boost (older files) come first
+        }
+        std::cmp::Ordering::Equal
+    });
+    Ok(results)
+}
+
+/// Receives chunks from workers and writes them to files
+fn aggregator_loop(rx: Receiver<FileChunk>, output_dir: PathBuf) -> Result<()> {
+    // Create output directory
+    fs::create_dir_all(&output_dir)?;
+
+    // Collect all chunks
+    let mut all_chunks = Vec::new();
+    while let Ok(chunk) = rx.recv() {
+        all_chunks.push(chunk);
+    }
+
+    // Sort chunks by priority (ascending), then file_index, then part_index
+    // so that higher priority files come last
+    all_chunks.sort_by(|a, b| {
+        let p = a.priority.cmp(&b.priority);
+        if p != std::cmp::Ordering::Equal {
+            return p;
+        }
+        // Then sort by file_index
+        let f = a.file_index.cmp(&b.file_index);
+        if f != std::cmp::Ordering::Equal {
+            return f;
+        }
+        // Finally sort by part_index
+        a.part_index.cmp(&b.part_index)
+    });
+
+    // Write sorted chunks
+    let mut current_chunk = String::new();
+    let mut current_chunk_index = 0;
+
+    for chunk in all_chunks {
+        let mut content = String::new();
+        content.push_str(&format!(">>>> {}\n", chunk.rel_path));
+        content.push_str(&chunk.content);
+        content.push_str("\n\n");
+
+        current_chunk.push_str(&content);
+    }
+
+    // Write the final chunk
+    if !current_chunk.is_empty() {
+        let out_path = output_dir.join(format!("chunk-{}.txt", current_chunk_index));
+        fs::write(&out_path, &current_chunk)?;
+        info!(
+            "Written chunk {} with {} lines.",
+            current_chunk_index,
+            current_chunk.lines().count()
+        );
+    }
+
+    Ok(())
+}
diff --git a/tests/test_perf.rs b/tests/test_perf.rs
index fca904b..4d5f040 100644
--- a/tests/test_perf.rs
+++ b/tests/test_perf.rs
@@ -1,10 +1,39 @@
 use std::fs;
 use std::path::Path;
-use std::time::Instant;
+use std::time::{Duration, Instant};
 use yek::serialize_repo;
 
+struct PerfStats {
+    min: Duration,
+    max: Duration,
+    avg: Duration,
+    total_runs: usize,
+}
+
+impl PerfStats {
+    fn new() -> Self {
+        PerfStats {
+            min: Duration::from_secs(u64::MAX),
+            max: Duration::from_secs(0),
+            avg: Duration::from_secs(0),
+            total_runs: 0,
+        }
+    }
+
+    fn update(&mut self, duration: Duration) {
+        self.min = self.min.min(duration);
+        self.max = self.max.max(duration);
+        self.total_runs += 1;
+        // Compute running average
+        self.avg = (self.avg * (self.total_runs - 1) as u32 + duration) / self.total_runs as u32;
+    }
+}
+
 #[test]
 fn test_serialization_performance() {
+    const WARMUP_RUNS: usize = 2;
+    const BENCH_RUNS: usize = 5;
+
     // Create test data directory
     let test_dir = "test_perf_data";
     fs::create_dir_all(test_dir).unwrap();
@@ -12,29 +41,58 @@ fn test_serialization_performance() {
     // Create test files of different sizes
     let sizes = vec![1024, 1024 * 1024, 10 * 1024 * 1024]; // 1KB, 1MB, 10MB
 
+    println!("\nPerformance Benchmark Results:");
+    println!("------------------------------");
+
     for size in sizes {
         let filename = format!("{}/file_{}_bytes.txt", test_dir, size);
         let data = vec![b'a'; size];
         fs::write(&filename, &data).unwrap();
 
-        // Measure serialization time
-        let start = Instant::now();
-        serialize_repo(
-            size,                           // max_size
-            Some(Path::new(test_dir)),      // base_path
-            false,                          // stream
-            false,                          // count_tokens
-            None,                           // config
-            Some(Path::new("perf_output")), // output_dir
-            None,                           // max_files
-        )
-        .unwrap();
-        let duration = start.elapsed();
-
-        println!("Serializing {}B took: {:?}", size, duration);
-
-        // Cleanup
-        fs::remove_dir_all("perf_output").unwrap();
+        // Warmup runs
+        println!("\nFile size: {}B", size);
+        println!("Warmup runs...");
+        for _ in 0..WARMUP_RUNS {
+            serialize_repo(
+                size,
+                Some(Path::new(test_dir)),
+                false,
+                false,
+                None,
+                Some(Path::new("perf_output")),
+                None,
+            )
+            .unwrap();
+            fs::remove_dir_all("perf_output").unwrap();
+        }
+
+        // Benchmark runs
+        let mut stats = PerfStats::new();
+        println!("Benchmark runs...");
+
+        for run in 1..=BENCH_RUNS {
+            let start = Instant::now();
+            serialize_repo(
+                size,
+                Some(Path::new(test_dir)),
+                false,
+                false,
+                None,
+                Some(Path::new("perf_output")),
+                None,
+            )
+            .unwrap();
+            let duration = start.elapsed();
+            stats.update(duration);
+
+            println!("  Run {}: {:?}", run, duration);
+            fs::remove_dir_all("perf_output").unwrap();
+        }
+
+        println!("\nStats for {}B:", size);
+        println!("  Min: {:?}", stats.min);
+        println!("  Max: {:?}", stats.max);
+        println!("  Avg: {:?}", stats.avg);
     }
 
     // Final cleanup

From a3951ca32506bc2916be3e539adae4f02ec66fd3 Mon Sep 17 00:00:00 2001
From: Mohsen Azimi <me@azimi.me>
Date: Sun, 19 Jan 2025 11:23:51 +0700
Subject: [PATCH 3/7] style: fix linting issues in parallel.rs

---
 src/parallel.rs | 124 ++++++++++++++++++++++--------------------------
 1 file changed, 56 insertions(+), 68 deletions(-)

diff --git a/src/parallel.rs b/src/parallel.rs
index e110de7..ac4bbba 100644
--- a/src/parallel.rs
+++ b/src/parallel.rs
@@ -1,16 +1,15 @@
 use crate::is_text_file;
-use crate::{get_file_priority, get_recent_commit_times, PriorityPattern, YekConfig};
+use crate::{get_file_priority, PriorityPattern, YekConfig};
 use anyhow::Result;
 use crossbeam::channel::{bounded, Receiver, Sender};
 use ignore::{gitignore::GitignoreBuilder, WalkBuilder};
-use num_cpus;
+use num_cpus::get;
 use regex::Regex;
 use std::collections::HashMap;
 use std::fs;
 use std::io::{BufReader, Read};
 use std::path::{Path, PathBuf};
 use std::thread;
-use std::time::SystemTime;
 use tracing::{debug, info};
 
 /// Represents a chunk of text read from one file
@@ -138,8 +137,8 @@ pub fn process_files_parallel(
     let aggregator_handle = thread::spawn(move || aggregator_loop(rx, output_dir));
 
     // Spawn worker threads
-    let num_threads = num_cpus::get();
-    let chunk_size = (files.len() + num_threads - 1) / num_threads;
+    let num_threads = get();
+    let chunk_size = files.len().div_ceil(num_threads);
     let mut handles = Vec::new();
 
     for chunk in files.chunks(chunk_size) {
@@ -194,69 +193,67 @@ fn collect_files(
     let mut results = Vec::new();
     let mut file_index = 0;
 
-    for entry in builder.build() {
-        if let Ok(entry) = entry {
-            if entry.file_type().map_or(false, |ft| ft.is_file()) {
-                let path = entry.path().to_path_buf();
-                let rel_path = path.strip_prefix(base_dir).unwrap_or(&path);
-                let rel_str = rel_path.to_string_lossy();
-
-                // Skip if matched by gitignore
-                #[cfg(windows)]
-                let gitignore_path = rel_path
-                    .to_str()
-                    .map(|s| s.replace('\\', "/"))
-                    .map(PathBuf::from)
-                    .unwrap_or(rel_path.to_path_buf());
-                #[cfg(not(windows))]
-                let gitignore_path = rel_path.to_path_buf();
-
-                if gitignore.matched(&gitignore_path, false).is_ignore() {
-                    debug!("Skipping {} - matched by gitignore", rel_str);
-                    continue;
-                }
+    for entry in builder.build().flatten() {
+        if entry.file_type().is_some_and(|ft| ft.is_file()) {
+            let path = entry.path().to_path_buf();
+            let rel_path = path.strip_prefix(base_dir).unwrap_or(&path);
+            let rel_str = rel_path.to_string_lossy();
+
+            // Skip if matched by gitignore
+            #[cfg(windows)]
+            let gitignore_path = rel_path
+                .to_str()
+                .map(|s| s.replace('\\', "/"))
+                .map(PathBuf::from)
+                .unwrap_or(rel_path.to_path_buf());
+            #[cfg(not(windows))]
+            let gitignore_path = rel_path.to_path_buf();
+
+            if gitignore.matched(&gitignore_path, false).is_ignore() {
+                debug!("Skipping {} - matched by gitignore", rel_str);
+                continue;
+            }
 
-                // Skip if matched by our ignore patterns
-                let mut skip = false;
-                for pat in ignore_patterns {
-                    if pat.is_match(&rel_str) {
-                        debug!("Skipping {} - matched ignore pattern", rel_str);
-                        skip = true;
-                        break;
-                    }
-                }
-                if skip {
-                    continue;
+            // Skip if matched by our ignore patterns
+            let mut skip = false;
+            for pat in ignore_patterns {
+                if pat.is_match(&rel_str) {
+                    debug!("Skipping {} - matched ignore pattern", rel_str);
+                    skip = true;
+                    break;
                 }
+            }
+            if skip {
+                continue;
+            }
 
-                // Skip binary files
-                if let Some(ref cfg) = config {
-                    if !is_text_file(&path, &cfg.binary_extensions) {
-                        debug!("Skipping binary file: {}", rel_str);
-                        continue;
-                    }
-                } else if !is_text_file(&path, &[]) {
+            // Skip binary files
+            if let Some(cfg) = config {
+                if !is_text_file(&path, &cfg.binary_extensions) {
                     debug!("Skipping binary file: {}", rel_str);
                     continue;
                 }
+            } else if !is_text_file(&path, &[]) {
+                debug!("Skipping binary file: {}", rel_str);
+                continue;
+            }
 
-                // Calculate priority score
-                let mut priority = get_file_priority(&rel_str, ignore_patterns, priority_list);
+            // Calculate priority score
+            let mut priority = get_file_priority(&rel_str, ignore_patterns, priority_list);
 
-                // Apply git recentness boost
-                if let Some(boost_map) = recentness_boost {
-                    if let Some(boost) = boost_map.get(&rel_str.to_string()) {
-                        priority += *boost;
-                    }
+            // Apply git recentness boost
+            if let Some(boost_map) = recentness_boost {
+                if let Some(boost) = boost_map.get(&rel_str.to_string()) {
+                    priority += *boost;
                 }
-
-                results.push(FileEntry {
-                    path,
-                    priority,
-                    file_index,
-                });
-                file_index += 1;
             }
+
+            results.push(FileEntry {
+                path,
+                priority,
+                file_index,
+            });
+            file_index += 1;
         }
     }
 
@@ -284,45 +281,36 @@ fn collect_files(
 
 /// Receives chunks from workers and writes them to files
 fn aggregator_loop(rx: Receiver<FileChunk>, output_dir: PathBuf) -> Result<()> {
-    // Create output directory
     fs::create_dir_all(&output_dir)?;
 
-    // Collect all chunks
     let mut all_chunks = Vec::new();
     while let Ok(chunk) = rx.recv() {
         all_chunks.push(chunk);
     }
 
-    // Sort chunks by priority (ascending), then file_index, then part_index
-    // so that higher priority files come last
     all_chunks.sort_by(|a, b| {
         let p = a.priority.cmp(&b.priority);
         if p != std::cmp::Ordering::Equal {
             return p;
         }
-        // Then sort by file_index
         let f = a.file_index.cmp(&b.file_index);
         if f != std::cmp::Ordering::Equal {
             return f;
         }
-        // Finally sort by part_index
         a.part_index.cmp(&b.part_index)
     });
 
-    // Write sorted chunks
     let mut current_chunk = String::new();
-    let mut current_chunk_index = 0;
+    let current_chunk_index = 0;
 
     for chunk in all_chunks {
         let mut content = String::new();
         content.push_str(&format!(">>>> {}\n", chunk.rel_path));
         content.push_str(&chunk.content);
         content.push_str("\n\n");
-
         current_chunk.push_str(&content);
     }
 
-    // Write the final chunk
     if !current_chunk.is_empty() {
         let out_path = output_dir.join(format!("chunk-{}.txt", current_chunk_index));
         fs::write(&out_path, &current_chunk)?;

From 10d0102cdd10394891cecb868f181dd6c8f93514 Mon Sep 17 00:00:00 2001
From: Mohsen Azimi <me@azimi.me>
Date: Sun, 19 Jan 2025 11:34:52 +0700
Subject: [PATCH 4/7] feat: add benchmark regression test with 5% threshold

---
 .github/workflows/ci.yml | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a7eb9cd..8763755 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -119,6 +119,46 @@ jobs:
           path: ${{ matrix.asset_name }}
           if-no-files-found: error
 
+  benchmark:
+    name: Benchmark
+    if: github.event_name == 'pull_request'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - uses: Swatinem/rust-cache@v2
+        with:
+          cache-on-failure: true
+
+      - name: Build benchmarks on target branch
+        run: |
+          git fetch origin ${{ github.base_ref }}
+          git checkout ${{ github.base_ref }}
+          cargo bench --bench serialization --no-run
+
+      - name: Run benchmark on target branch
+        run: cargo bench --bench serialization -- --save-baseline main
+
+      - name: Build benchmarks on PR branch
+        run: |
+          git checkout ${{ github.head_ref }}
+          cargo bench --bench serialization --no-run
+
+      - name: Compare benchmarks
+        run: cargo bench --bench serialization -- --baseline main --significance-threshold 5
+
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@v3
+        with:
+          name: criterion-results
+          path: target/criterion/
+          if-no-files-found: error
+
   release:
     name: Release
     needs: [test, lint, build]

From f0a970f7083893cd186e127578f7d3dd81493038 Mon Sep 17 00:00:00 2001
From: Mohsen Azimi <me@azimi.me>
Date: Sun, 19 Jan 2025 11:44:25 +0700
Subject: [PATCH 5/7] feat: add comprehensive benchmarks for serialization

---
 Cargo.lock               |  67 ++++++++
 Cargo.toml               |   1 +
 benches/serialization.rs | 326 +++++++++++++++++++++++++++++++++++----
 3 files changed, 366 insertions(+), 28 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7184df0..173bab5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -151,6 +151,12 @@ dependencies = [
  "utf8-width",
 ]
 
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
 [[package]]
 name = "cast"
 version = "0.3.0"
@@ -785,6 +791,15 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
 
+[[package]]
+name = "ppv-lite86"
+version = "0.2.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+dependencies = [
+ "zerocopy",
+]
+
 [[package]]
 name = "predicates"
 version = "3.1.3"
@@ -833,6 +848,36 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
 [[package]]
 name = "rayon"
 version = "1.10.0"
@@ -1591,6 +1636,7 @@ dependencies = [
  "indicatif",
  "num_cpus",
  "predicates",
+ "rand",
  "regex",
  "serde",
  "sha2",
@@ -1601,3 +1647,24 @@ dependencies = [
  "tracing-subscriber",
  "walkdir",
 ]
+
+[[package]]
+name = "zerocopy"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
+dependencies = [
+ "byteorder",
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/Cargo.toml b/Cargo.toml
index c991bff..b0d4c2f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,6 +26,7 @@ chrono = "0.4"
 predicates = "3.0"
 tempfile = "3.9"
 criterion = "0.5"
+rand = "0.8"
 
 [[bench]]
 name = "serialization"
diff --git a/benches/serialization.rs b/benches/serialization.rs
index 315db18..0b76c40 100644
--- a/benches/serialization.rs
+++ b/benches/serialization.rs
@@ -1,36 +1,151 @@
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
-use std::fs;
+use criterion::{
+    black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput,
+};
+use rand::{distributions::Alphanumeric, Rng};
+use std::fs::{self, File};
+use std::io::Write;
 use std::path::Path;
 use tempfile::TempDir;
-use yek::serialize_repo;
+use yek::{serialize_repo, YekConfig};
 
-fn create_test_data(dir: &Path, size: usize) {
-    let filename = dir.join(format!("file_{}_bytes.txt", size));
+/// Creates a text file of a specified size in bytes.
+fn create_test_data_bytes(dir: &Path, size: usize, file_name: &str) {
+    let filename = dir.join(file_name);
     let data = vec![b'a'; size];
-    fs::write(&filename, &data).unwrap();
+    fs::write(&filename, &data).expect("Unable to write test data");
 }
 
-fn bench_serialization(c: &mut Criterion) {
-    let mut group = c.benchmark_group("serialization");
-    group.sample_size(10); // Number of samples to collect
+/// Creates a file with a specified approximate number of tokens.
+fn create_test_data_tokens(dir: &Path, tokens: usize, file_name: &str) {
+    let filename = dir.join(file_name);
+    // Each "token" is a short random word followed by a space
+    let mut rng = rand::thread_rng();
+    let mut file = File::create(&filename).expect("Unable to create file");
 
-    // Test different file sizes
-    let sizes = vec![
-        1024,             // 1KB
-        1024 * 1024,      // 1MB
-        10 * 1024 * 1024, // 10MB
-    ];
+    for _ in 0..tokens {
+        let word: String = (0..4).map(|_| rng.sample(Alphanumeric) as char).collect();
+        write!(file, "{} ", word).expect("Unable to write token");
+    }
+    file.flush().unwrap();
+}
+
+/// Creates multiple files of given sizes in a single directory.
+fn create_multiple_files(dir: &Path, sizes: &[usize], prefix: &str) {
+    for (i, &size) in sizes.iter().enumerate() {
+        let file_name = format!("{}_{}.txt", prefix, i);
+        create_test_data_bytes(dir, size, &file_name);
+    }
+}
+
+/// Creates multiple files with a given token count each.
+fn create_multiple_token_files(dir: &Path, tokens: &[usize], prefix: &str) {
+    for (i, &token_count) in tokens.iter().enumerate() {
+        let file_name = format!("{}_{}.txt", prefix, i);
+        create_test_data_tokens(dir, token_count, &file_name);
+    }
+}
+
+fn single_small_file_byte_mode(c: &mut Criterion) {
+    let mut group = c.benchmark_group("SingleFile_ByteMode");
+    let temp_dir = TempDir::new().unwrap();
+
+    let size = 10 * 1024; // 10 KB
+    create_test_data_bytes(temp_dir.path(), size, "small_file.txt");
+
+    let output_dir = temp_dir.path().join("output");
 
-    for size in sizes {
-        // Create a new temporary directory for each benchmark
-        let temp_dir = TempDir::new().unwrap();
-        let output_dir = temp_dir.path().join("output");
-        create_test_data(temp_dir.path(), size);
+    group.throughput(Throughput::Bytes(size as u64));
+    group.bench_function("single_small_file", |b| {
+        b.iter(|| {
+            serialize_repo(
+                black_box(1024 * 1024), // 1MB chunk
+                Some(temp_dir.path()),
+                false,
+                false,
+                None,
+                Some(&output_dir),
+                None,
+            )
+            .unwrap();
+            fs::remove_dir_all(&output_dir).ok();
+        });
+    });
+    group.finish();
+}
+
+fn single_large_file_byte_mode(c: &mut Criterion) {
+    let mut group = c.benchmark_group("SingleFile_ByteMode_Large");
+    let temp_dir = TempDir::new().unwrap();
+
+    let size = 10 * 1024 * 1024; // 10 MB
+    create_test_data_bytes(temp_dir.path(), size, "large_file.txt");
+
+    let output_dir = temp_dir.path().join("output");
+
+    group.throughput(Throughput::Bytes(size as u64));
+    group.bench_function("single_large_file", |b| {
+        b.iter(|| {
+            serialize_repo(
+                black_box(5 * 1024 * 1024), // 5MB chunk, forces splits
+                Some(temp_dir.path()),
+                false,
+                false,
+                None,
+                Some(&output_dir),
+                None,
+            )
+            .unwrap();
+            fs::remove_dir_all(&output_dir).ok();
+        });
+    });
+    group.finish();
+}
 
-        group.bench_with_input(BenchmarkId::new("file_size", size), &size, |b, &size| {
-            b.iter(|| {
+fn single_large_file_token_mode(c: &mut Criterion) {
+    let mut group = c.benchmark_group("SingleFile_TokenMode_Large");
+    let temp_dir = TempDir::new().unwrap();
+
+    let token_count = 200_000;
+    create_test_data_tokens(temp_dir.path(), token_count, "large_tokens.txt");
+
+    let output_dir = temp_dir.path().join("output");
+
+    group.throughput(Throughput::Elements(token_count as u64));
+    group.bench_function("single_large_token_file", |b| {
+        b.iter(|| {
+            serialize_repo(
+                black_box(50_000), // 50k tokens
+                Some(temp_dir.path()),
+                false,
+                true, // token-based
+                None,
+                Some(&output_dir),
+                None,
+            )
+            .unwrap();
+            fs::remove_dir_all(&output_dir).ok();
+        });
+    });
+    group.finish();
+}
+
+fn multiple_small_files(c: &mut Criterion) {
+    let mut group = c.benchmark_group("MultipleFiles_Small");
+    group.sample_size(10); // we can tune the sample size as needed
+
+    group.bench_function("multiple_small_files", |b| {
+        b.iter_batched(
+            || {
+                let temp_dir = TempDir::new().unwrap();
+                // Create a set of small files
+                let sizes = vec![1024; 50]; // 50 files of 1KB each
+                create_multiple_files(temp_dir.path(), &sizes, "small");
+                let output_dir = temp_dir.path().join("output");
+                (temp_dir, output_dir)
+            },
+            |(temp_dir, output_dir)| {
                 serialize_repo(
-                    black_box(size),
+                    black_box(10 * 1024), // 10KB chunk
                     Some(temp_dir.path()),
                     false,
                     false,
@@ -39,12 +154,167 @@ fn bench_serialization(c: &mut Criterion) {
                     None,
                 )
                 .unwrap();
-                fs::remove_dir_all(&output_dir).unwrap();
-            });
-        });
-    }
+                fs::remove_dir_all(&output_dir).ok();
+            },
+            BatchSize::SmallInput,
+        );
+    });
+    group.finish();
+}
+
+fn multiple_medium_files(c: &mut Criterion) {
+    let mut group = c.benchmark_group("MultipleFiles_Medium");
+
+    group.bench_function("multiple_medium_files", |b| {
+        b.iter_batched(
+            || {
+                let temp_dir = TempDir::new().unwrap();
+                // Create 20 files with sizes from 100KB to 500KB
+                let sizes = (100..=500)
+                    .step_by(20)
+                    .map(|kb| kb * 1024)
+                    .collect::<Vec<_>>();
+                create_multiple_files(temp_dir.path(), &sizes, "medium");
+                let output_dir = temp_dir.path().join("output");
+                (temp_dir, output_dir)
+            },
+            |(temp_dir, output_dir)| {
+                serialize_repo(
+                    black_box(512 * 1024), // 512KB chunk
+                    Some(temp_dir.path()),
+                    false,
+                    false,
+                    None,
+                    Some(&output_dir),
+                    None,
+                )
+                .unwrap();
+                fs::remove_dir_all(&output_dir).ok();
+            },
+            BatchSize::SmallInput,
+        );
+    });
+    group.finish();
+}
+
+fn multiple_large_files(c: &mut Criterion) {
+    let mut group = c.benchmark_group("MultipleFiles_Large");
+
+    group.bench_function("multiple_large_files", |b| {
+        b.iter_batched(
+            || {
+                let temp_dir = TempDir::new().unwrap();
+                // Create 5 large files, each ~ 5 MB
+                let sizes = vec![5_242_880; 5]; // ~5 MB x 5
+                create_multiple_files(temp_dir.path(), &sizes, "large");
+                let output_dir = temp_dir.path().join("output");
+                (temp_dir, output_dir)
+            },
+            |(temp_dir, output_dir)| {
+                serialize_repo(
+                    black_box(2 * 1024 * 1024), // 2 MB chunk to force splits
+                    Some(temp_dir.path()),
+                    false,
+                    false,
+                    None,
+                    Some(&output_dir),
+                    None,
+                )
+                .unwrap();
+                fs::remove_dir_all(&output_dir).ok();
+            },
+            BatchSize::SmallInput,
+        );
+    });
+    group.finish();
+}
+
+fn multiple_token_files(c: &mut Criterion) {
+    let mut group = c.benchmark_group("MultipleFiles_TokenMode");
+
+    group.bench_function("multiple_token_files", |b| {
+        b.iter_batched(
+            || {
+                let temp_dir = TempDir::new().unwrap();
+                // Create 10 files with 10k tokens each
+                let tokens = vec![10_000; 10];
+                create_multiple_token_files(temp_dir.path(), &tokens, "token");
+                let output_dir = temp_dir.path().join("output");
+                (temp_dir, output_dir)
+            },
+            |(temp_dir, output_dir)| {
+                serialize_repo(
+                    black_box(5_000), // 5k tokens chunk
+                    Some(temp_dir.path()),
+                    false,
+                    true, // token-based
+                    None,
+                    Some(&output_dir),
+                    None,
+                )
+                .unwrap();
+                fs::remove_dir_all(&output_dir).ok();
+            },
+            BatchSize::SmallInput,
+        );
+    });
+    group.finish();
+}
+
+/// Demonstrates using a custom config (e.g. extra ignores or priority rules).
+fn custom_config_test(c: &mut Criterion) {
+    let mut group = c.benchmark_group("CustomConfig");
+    let mut config = YekConfig::default();
+    // Add a silly priority rule for *.foo
+    config.priority_rules.push(yek::PriorityRule {
+        score: 500,
+        patterns: vec!["\\.foo$".into()],
+    });
+
+    group.bench_function("custom_config_test", |b| {
+        b.iter_batched(
+            || {
+                let temp_dir = TempDir::new().unwrap();
+                // Create several files, some .foo, some .bar
+                let files = &[
+                    (50_000, "file1.foo"),
+                    (70_000, "file2.bar"),
+                    (90_000, "file3.foo"),
+                ];
+                for &(size, name) in files {
+                    create_test_data_bytes(temp_dir.path(), size, name);
+                }
+                let output_dir = temp_dir.path().join("output");
+                (temp_dir, output_dir)
+            },
+            |(temp_dir, output_dir)| {
+                serialize_repo(
+                    black_box(128_000),
+                    Some(temp_dir.path()),
+                    false,
+                    false,
+                    Some(config.clone()),
+                    Some(&output_dir),
+                    None,
+                )
+                .unwrap();
+                fs::remove_dir_all(&output_dir).ok();
+            },
+            BatchSize::SmallInput,
+        );
+    });
     group.finish();
 }
 
-criterion_group!(benches, bench_serialization);
+criterion_group!(
+    benches,
+    single_small_file_byte_mode,
+    single_large_file_byte_mode,
+    single_large_file_token_mode,
+    multiple_small_files,
+    multiple_medium_files,
+    multiple_large_files,
+    multiple_token_files,
+    custom_config_test,
+);
 criterion_main!(benches);

From 2cd09e66b22f9efab2fe8e99fdbe8e3473f4b1db Mon Sep 17 00:00:00 2001
From: Mohsen Azimi <me@azimi.me>
Date: Sun, 19 Jan 2025 11:51:55 +0700
Subject: [PATCH 6/7] git: undo parallel execution in test branch

---
 src/lib.rs      | 337 +++++++++++++++++++++++-------------------------
 src/parallel.rs | 325 ----------------------------------------------
 2 files changed, 158 insertions(+), 504 deletions(-)
 delete mode 100644 src/parallel.rs

diff --git a/src/lib.rs b/src/lib.rs
index 20a8861..0b6e5e5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -9,8 +9,6 @@ use std::path::{Path, PathBuf};
 use std::process::{Command as SysCommand, Stdio};
 use tracing::{debug, info};
 use walkdir::WalkDir;
-mod parallel;
-use parallel::process_files_parallel;
 
 /// Helper macro to write debug statements both to standard debug log and to debug file if set.
 #[macro_export]
@@ -194,11 +192,7 @@ fn build_final_config(cfg: Option<YekConfig>) -> FinalConfig {
                 merged_ignore.push(reg);
             }
         }
-        // Clear default priority rules if user provides their own
-        if !user_cfg.priority_rules.is_empty() {
-            merged_priority.clear();
-        }
-        // Add user priority rules
+        // Merge or add new priority rules
         for user_rule in user_cfg.priority_rules {
             if user_rule.patterns.is_empty() {
                 continue;
@@ -354,15 +348,14 @@ pub fn get_file_priority(
     _ignore_pats: &[Regex],
     prio_list: &[PriorityPattern],
 ) -> i32 {
-    // Loop from highest score → lowest
-    for prio in prio_list.iter().rev() {
+    for prio in prio_list {
         for pat in &prio.patterns {
             if pat.is_match(rel_str) {
                 return prio.score;
             }
         }
     }
-    0 // fallback if nothing matches - lower than any user-defined priority
+    40 // fallback
 }
 
 /// Get the commit time of the most recent change to each file.
@@ -538,188 +531,165 @@ pub fn serialize_repo(
         None
     };
 
-    if stream {
-        // For streaming, we still use the old single-threaded approach
-        let mut files: Vec<FileEntry> = Vec::new();
-        let mut total_size = 0;
-        let mut current_chunk = 0;
-        let mut current_chunk_files = Vec::new();
-
-        // Walk directory tree
-        for entry in WalkDir::new(base_path)
-            .follow_links(true)
-            .into_iter()
-            .filter_map(|e| e.ok())
-        {
-            let path = entry.path();
-            if !path.is_file() {
-                continue;
-            }
+    // Collect files with their priorities
+    let mut files: Vec<FileEntry> = Vec::new();
+    let mut total_size = 0;
+    let mut current_chunk = 0;
+    let mut current_chunk_files = Vec::new();
+
+    // Walk directory tree
+    for entry in WalkDir::new(base_path)
+        .follow_links(true)
+        .into_iter()
+        .filter_map(|e| e.ok())
+    {
+        let path = entry.path();
+        if !path.is_file() {
+            continue;
+        }
 
-            // Get path relative to base
-            let rel_path = path.strip_prefix(base_path).unwrap_or(path);
-            let rel_str = rel_path.to_string_lossy();
-
-            // Normalize path separators to forward slashes for consistent pattern matching
-            #[cfg(windows)]
-            let rel_str = rel_str.replace('\\', "/");
-
-            // Skip if matched by gitignore
-            #[cfg(windows)]
-            let gitignore_path = rel_path
-                .to_str()
-                .map(|s| s.replace('\\', "/"))
-                .map(PathBuf::from)
-                .unwrap_or(rel_path.to_path_buf());
-            #[cfg(not(windows))]
-            let gitignore_path = rel_path.to_path_buf();
-
-            if gitignore.matched(&gitignore_path, false).is_ignore() {
-                debug!("Skipping {} - matched by gitignore", rel_str);
-                continue;
-            }
+        // Get path relative to base
+        let rel_path = path.strip_prefix(base_path).unwrap_or(path);
+        let rel_str = rel_path.to_string_lossy();
 
-            // Skip if matched by our ignore patterns
-            let mut skip = false;
-            #[cfg(windows)]
-            let pattern_path = rel_str.replace('\\', "/");
-            #[cfg(not(windows))]
-            let pattern_path = rel_str.to_string();
-
-            for pat in &final_config.ignore_patterns {
-                if pat.is_match(&pattern_path) {
-                    debug!("Skipping {} - matched ignore pattern", rel_str);
-                    skip = true;
-                    break;
-                }
-            }
-            if skip {
-                continue;
-            }
+        // Normalize path separators to forward slashes for consistent pattern matching
+        #[cfg(windows)]
+        let rel_str = rel_str.replace('\\', "/");
 
-            // Calculate priority score
-            let mut priority = get_file_priority(
-                &pattern_path,
-                &final_config.ignore_patterns,
-                &final_config.priority_list,
-            );
+        // Skip if matched by gitignore
+        #[cfg(windows)]
+        let gitignore_path = rel_path
+            .to_str()
+            .map(|s| s.replace('\\', "/"))
+            .map(PathBuf::from)
+            .unwrap_or(rel_path.to_path_buf());
+        #[cfg(not(windows))]
+        let gitignore_path = rel_path.to_path_buf();
 
-            // Apply rank-based boost if available
-            if let Some(ref boost_map) = recentness_boost {
-                if let Some(boost) = boost_map.get(&pattern_path) {
-                    priority += *boost;
-                }
+        if gitignore.matched(&gitignore_path, false).is_ignore() {
+            debug!("Skipping {} - matched by gitignore", rel_str);
+            continue;
+        }
+
+        // Skip if matched by our ignore patterns
+        let mut skip = false;
+        #[cfg(windows)]
+        let pattern_path = rel_str.replace('\\', "/");
+        #[cfg(not(windows))]
+        let pattern_path = rel_str.to_string();
+
+        for pat in &final_config.ignore_patterns {
+            if pat.is_match(&pattern_path) {
+                debug!("Skipping {} - matched ignore pattern", rel_str);
+                skip = true;
+                break;
             }
+        }
+        if skip {
+            continue;
+        }
 
-            files.push(FileEntry {
-                path: path.to_path_buf(),
-                priority,
-            });
+        // Calculate priority score
+        let mut priority = get_file_priority(
+            &pattern_path,
+            &final_config.ignore_patterns,
+            &final_config.priority_list,
+        );
+
+        // Apply rank-based boost if available
+        if let Some(ref boost_map) = recentness_boost {
+            if let Some(boost) = boost_map.get(&pattern_path) {
+                priority += *boost;
+            }
         }
 
-        // Sort files by priority (ascending) so higher priority files come last
-        files.sort_by(|a, b| a.priority.cmp(&b.priority));
+        files.push(FileEntry {
+            path: path.to_path_buf(),
+            priority,
+        });
+    }
 
-        // Process files in sorted order
-        for file in files {
-            let path = file.path;
-            let rel_path = path.strip_prefix(base_path).unwrap_or(&path);
-            let rel_str = rel_path.to_string_lossy();
+    // Sort files by priority (ascending) so higher priority files come last
+    files.sort_by(|a, b| a.priority.cmp(&b.priority));
 
-            // Skip binary files
-            if let Some(ref cfg) = config {
-                if !is_text_file(&path, &cfg.binary_extensions) {
-                    debug!("Skipping binary file: {}", rel_str);
-                    continue;
-                }
-            } else if !is_text_file(&path, &[]) {
+    // Process files in sorted order
+    for file in files {
+        let path = file.path;
+        let rel_path = path.strip_prefix(base_path).unwrap_or(&path);
+        let rel_str = rel_path.to_string_lossy();
+
+        // Skip binary files
+        if let Some(ref cfg) = config {
+            if !is_text_file(&path, &cfg.binary_extensions) {
                 debug!("Skipping binary file: {}", rel_str);
                 continue;
             }
+        } else if !is_text_file(&path, &[]) {
+            debug!("Skipping binary file: {}", rel_str);
+            continue;
+        }
 
-            // Read file content
-            let content = match fs::read_to_string(&path) {
-                Ok(c) => c,
-                Err(e) => {
-                    debug!("Failed to read {}: {}", rel_str, e);
-                    continue;
-                }
-            };
-
-            let size = count_size(&content, count_tokens);
-            if size == 0 {
-                debug!("Skipping empty file: {}", rel_str);
+        // Read file content
+        let content = match fs::read_to_string(&path) {
+            Ok(c) => c,
+            Err(e) => {
+                debug!("Failed to read {}: {}", rel_str, e);
                 continue;
             }
+        };
 
-            // If a single file is larger than max_size, split it into multiple chunks
-            if size > max_size {
-                debug_file!("File exceeds chunk size, splitting into multiple chunks");
-                let mut remaining = content.as_str();
-                let mut part = 0;
-
-                while !remaining.is_empty() {
-                    let mut chunk_size = if count_tokens {
-                        // In token mode, count words until we hit max_size
-                        let mut chars = 0;
-                        for (tokens, word) in remaining.split_whitespace().enumerate() {
-                            if tokens + 1 > max_size {
-                                break;
-                            }
-                            chars += word.len() + 1; // +1 for space
+        let size = count_size(&content, count_tokens);
+        if size == 0 {
+            debug!("Skipping empty file: {}", rel_str);
+            continue;
+        }
+
+        // If a single file is larger than max_size, split it into multiple chunks
+        if size > max_size {
+            debug_file!("File exceeds chunk size, splitting into multiple chunks");
+            let mut remaining = content.as_str();
+            let mut part = 0;
+
+            while !remaining.is_empty() {
+                let mut chunk_size = if count_tokens {
+                    // In token mode, count words until we hit max_size
+                    let mut chars = 0;
+                    for (tokens, word) in remaining.split_whitespace().enumerate() {
+                        if tokens + 1 > max_size {
+                            break;
                         }
-                        chars
-                    } else {
-                        max_size
-                    };
-
-                    // Ensure we make progress even if no word boundary found
-                    if chunk_size == 0 {
-                        chunk_size = std::cmp::min(max_size, remaining.len());
+                        chars += word.len() + 1; // +1 for space
                     }
-
-                    let (chunk, rest) =
-                        remaining.split_at(std::cmp::min(chunk_size, remaining.len()));
-                    remaining = rest.trim_start();
-
-                    let chunk_files =
-                        vec![(format!("{}:part{}", rel_str, part), chunk.to_string())];
-                    debug_file!("Written chunk {}", part);
-                    write_chunk(
-                        &chunk_files,
-                        part,
-                        output_dir.as_deref(),
-                        stream,
-                        count_tokens,
-                    )?;
-                    part += 1;
+                    chars
+                } else {
+                    max_size
+                };
+
+                // Ensure we make progress even if no word boundary found
+                if chunk_size == 0 {
+                    chunk_size = std::cmp::min(max_size, remaining.len());
                 }
-                continue;
-            }
 
-            // Check if adding this file would exceed chunk size
-            if total_size + size > max_size && !current_chunk_files.is_empty() {
-                // Write current chunk
+                let (chunk, rest) = remaining.split_at(std::cmp::min(chunk_size, remaining.len()));
+                remaining = rest.trim_start();
+
+                let chunk_files = vec![(format!("{}:part{}", rel_str, part), chunk.to_string())];
+                debug_file!("Written chunk {}", part);
                 write_chunk(
-                    &current_chunk_files,
-                    current_chunk,
+                    &chunk_files,
+                    part,
                     output_dir.as_deref(),
                     stream,
                     count_tokens,
                 )?;
-                debug_file!("Written chunk {}", current_chunk);
-                current_chunk += 1;
-                current_chunk_files.clear();
-                total_size = 0;
+                part += 1;
             }
-
-            // Add file to current chunk
-            current_chunk_files.push((rel_str.to_string(), content));
-            total_size += size;
+            continue;
         }
 
-        // Write final chunk if any files remain
-        if !current_chunk_files.is_empty() {
+        // Check if adding this file would exceed chunk size
+        if total_size + size > max_size && !current_chunk_files.is_empty() {
+            // Write current chunk
             write_chunk(
                 &current_chunk_files,
                 current_chunk,
@@ -728,23 +698,32 @@ pub fn serialize_repo(
                 count_tokens,
             )?;
             debug_file!("Written chunk {}", current_chunk);
+            current_chunk += 1;
+            current_chunk_files.clear();
+            total_size = 0;
         }
 
-        Ok(None)
-    } else if let Some(out_dir) = output_dir {
-        // Use parallel processing for non-streaming mode
-        process_files_parallel(
-            base_path,
-            max_size,
-            &out_dir,
-            config.as_ref(),
-            &final_config.ignore_patterns,
-            &final_config.priority_list,
-            recentness_boost.as_ref(),
+        // Add file to current chunk
+        current_chunk_files.push((rel_str.to_string(), content));
+        total_size += size;
+    }
+
+    // Write final chunk if any files remain
+    if !current_chunk_files.is_empty() {
+        write_chunk(
+            &current_chunk_files,
+            current_chunk,
+            output_dir.as_deref(),
+            stream,
+            count_tokens,
         )?;
-        Ok(Some(out_dir))
-    } else {
+        debug_file!("Written chunk {}", current_chunk);
+    }
+
+    if stream {
         Ok(None)
+    } else {
+        Ok(output_dir)
     }
 }
 
@@ -823,7 +802,7 @@ fn compute_recentness_boost(
         return HashMap::new();
     }
 
-    // Sort by ascending commit time => first is oldest
+    // Sort by ascending commit time
     let mut sorted: Vec<(&String, &u64)> = commit_times.iter().collect();
     sorted.sort_by_key(|(_, t)| **t);
 
@@ -840,8 +819,8 @@ fn compute_recentness_boost(
 
     let mut result = HashMap::new();
     for (i, (path, _time)) in sorted.iter().enumerate() {
-        let rank = i as f64 / last_index; // 0.0..1.0 (older files get lower rank)
-        let boost = (rank * max_boost as f64).round() as i32; // Newer files get higher boost
+        let rank = i as f64 / last_index; // 0.0..1.0
+        let boost = (rank * max_boost as f64).round() as i32;
         result.insert((*path).clone(), boost);
     }
     result
diff --git a/src/parallel.rs b/src/parallel.rs
deleted file mode 100644
index ac4bbba..0000000
--- a/src/parallel.rs
+++ /dev/null
@@ -1,325 +0,0 @@
-use crate::is_text_file;
-use crate::{get_file_priority, PriorityPattern, YekConfig};
-use anyhow::Result;
-use crossbeam::channel::{bounded, Receiver, Sender};
-use ignore::{gitignore::GitignoreBuilder, WalkBuilder};
-use num_cpus::get;
-use regex::Regex;
-use std::collections::HashMap;
-use std::fs;
-use std::io::{BufReader, Read};
-use std::path::{Path, PathBuf};
-use std::thread;
-use tracing::{debug, info};
-
-/// Represents a chunk of text read from one file
-#[derive(Debug)]
-pub struct FileChunk {
-    pub priority: i32,
-    pub file_index: usize,
-    pub part_index: usize,
-    pub rel_path: String,
-    pub content: String,
-}
-
-/// File entry with priority for sorting
-#[derive(Debug, Clone)]
-struct FileEntry {
-    path: PathBuf,
-    priority: i32,
-    file_index: usize,
-}
-
-/// Reads a file and determines if it's likely binary by checking for null bytes
-fn is_likely_binary(path: &Path) -> Result<bool> {
-    let f = fs::File::open(path)?;
-    let mut reader = BufReader::new(f);
-    let mut buf = [0; 4096];
-    let n = reader.read(&mut buf)?;
-    Ok(buf[..n].contains(&0))
-}
-
-/// Reads and chunks a single file, sending chunks through the channel
-fn read_and_send_chunks(
-    file_entry: FileEntry,
-    base_path: &Path,
-    tx: &Sender<FileChunk>,
-    max_size: usize,
-) -> Result<()> {
-    // Skip if binary
-    if is_likely_binary(&file_entry.path)? {
-        return Ok(());
-    }
-
-    // Read file content
-    let content = fs::read_to_string(&file_entry.path)?;
-    if content.is_empty() {
-        return Ok(());
-    }
-
-    // Get relative path for display
-    let rel_path = file_entry
-        .path
-        .strip_prefix(base_path)
-        .unwrap_or(&file_entry.path)
-        .to_string_lossy()
-        .into_owned();
-
-    // If smaller than max_size, send as single chunk
-    if content.len() <= max_size {
-        let chunk = FileChunk {
-            priority: file_entry.priority,
-            file_index: file_entry.file_index,
-            part_index: 0,
-            rel_path,
-            content,
-        };
-        tx.send(chunk).ok();
-        return Ok(());
-    }
-
-    // Otherwise split into chunks
-    let mut start = 0;
-    let mut part_index = 0;
-    let bytes = content.as_bytes();
-
-    while start < bytes.len() {
-        let end = (start + max_size).min(bytes.len());
-        let slice = &bytes[start..end];
-        let chunk_str = String::from_utf8_lossy(slice).into_owned();
-
-        let chunk = FileChunk {
-            priority: file_entry.priority,
-            file_index: file_entry.file_index,
-            part_index,
-            rel_path: rel_path.clone(),
-            content: chunk_str,
-        };
-
-        tx.send(chunk).ok();
-        start = end;
-        part_index += 1;
-    }
-
-    Ok(())
-}
-
-/// Main parallel processing function that coordinates workers and aggregator
-pub fn process_files_parallel(
-    base_dir: &Path,
-    max_size: usize,
-    output_dir: &Path,
-    config: Option<&YekConfig>,
-    ignore_patterns: &[Regex],
-    priority_list: &[PriorityPattern],
-    recentness_boost: Option<&HashMap<String, i32>>,
-) -> Result<()> {
-    // Create output directory
-    fs::create_dir_all(output_dir)?;
-
-    // Collect and sort files by priority
-    let files = collect_files(
-        base_dir,
-        config,
-        ignore_patterns,
-        priority_list,
-        recentness_boost,
-    )?;
-    if files.is_empty() {
-        return Ok(());
-    }
-
-    // Create channels for worker→aggregator communication
-    let (tx, rx) = bounded(256);
-
-    // Spawn aggregator thread
-    let output_dir = output_dir.to_path_buf();
-    let aggregator_handle = thread::spawn(move || aggregator_loop(rx, output_dir));
-
-    // Spawn worker threads
-    let num_threads = get();
-    let chunk_size = files.len().div_ceil(num_threads);
-    let mut handles = Vec::new();
-
-    for chunk in files.chunks(chunk_size) {
-        let chunk_files = chunk.to_vec();
-        let sender = tx.clone();
-        let base_path = base_dir.to_path_buf();
-
-        let handle = thread::spawn(move || -> Result<()> {
-            for file_entry in chunk_files {
-                read_and_send_chunks(file_entry, &base_path, &sender, max_size)?;
-            }
-            Ok(())
-        });
-        handles.push(handle);
-    }
-
-    // Drop original sender
-    drop(tx);
-
-    // Wait for workers
-    for handle in handles {
-        handle.join().unwrap()?;
-    }
-
-    // Wait for aggregator
-    aggregator_handle.join().unwrap()?;
-
-    Ok(())
-}
-
-/// Collects files from directory respecting .gitignore and sorts by priority
-fn collect_files(
-    base_dir: &Path,
-    config: Option<&YekConfig>,
-    ignore_patterns: &[Regex],
-    priority_list: &[PriorityPattern],
-    recentness_boost: Option<&HashMap<String, i32>>,
-) -> Result<Vec<FileEntry>> {
-    // Build gitignore matcher
-    let mut builder = GitignoreBuilder::new(base_dir);
-    let gitignore_path = base_dir.join(".gitignore");
-    if gitignore_path.exists() {
-        builder.add(&gitignore_path);
-    }
-    let gitignore = builder
-        .build()
-        .unwrap_or_else(|_| GitignoreBuilder::new(base_dir).build().unwrap());
-
-    let mut builder = WalkBuilder::new(base_dir);
-    builder.follow_links(false).standard_filters(true);
-
-    let mut results = Vec::new();
-    let mut file_index = 0;
-
-    for entry in builder.build().flatten() {
-        if entry.file_type().is_some_and(|ft| ft.is_file()) {
-            let path = entry.path().to_path_buf();
-            let rel_path = path.strip_prefix(base_dir).unwrap_or(&path);
-            let rel_str = rel_path.to_string_lossy();
-
-            // Skip if matched by gitignore
-            #[cfg(windows)]
-            let gitignore_path = rel_path
-                .to_str()
-                .map(|s| s.replace('\\', "/"))
-                .map(PathBuf::from)
-                .unwrap_or(rel_path.to_path_buf());
-            #[cfg(not(windows))]
-            let gitignore_path = rel_path.to_path_buf();
-
-            if gitignore.matched(&gitignore_path, false).is_ignore() {
-                debug!("Skipping {} - matched by gitignore", rel_str);
-                continue;
-            }
-
-            // Skip if matched by our ignore patterns
-            let mut skip = false;
-            for pat in ignore_patterns {
-                if pat.is_match(&rel_str) {
-                    debug!("Skipping {} - matched ignore pattern", rel_str);
-                    skip = true;
-                    break;
-                }
-            }
-            if skip {
-                continue;
-            }
-
-            // Skip binary files
-            if let Some(cfg) = config {
-                if !is_text_file(&path, &cfg.binary_extensions) {
-                    debug!("Skipping binary file: {}", rel_str);
-                    continue;
-                }
-            } else if !is_text_file(&path, &[]) {
-                debug!("Skipping binary file: {}", rel_str);
-                continue;
-            }
-
-            // Calculate priority score
-            let mut priority = get_file_priority(&rel_str, ignore_patterns, priority_list);
-
-            // Apply git recentness boost
-            if let Some(boost_map) = recentness_boost {
-                if let Some(boost) = boost_map.get(&rel_str.to_string()) {
-                    priority += *boost;
-                }
-            }
-
-            results.push(FileEntry {
-                path,
-                priority,
-                file_index,
-            });
-            file_index += 1;
-        }
-    }
-
-    // Sort by priority (ascending) so higher priority files come last
-    results.sort_by(|a, b| {
-        // First sort by priority (ascending)
-        let p = a.priority.cmp(&b.priority);
-        if p != std::cmp::Ordering::Equal {
-            return p;
-        }
-        // If priorities are equal, sort by Git boost (ascending)
-        if let Some(boost_map) = recentness_boost {
-            let a_boost = boost_map
-                .get(&a.path.to_string_lossy().to_string())
-                .unwrap_or(&0);
-            let b_boost = boost_map
-                .get(&b.path.to_string_lossy().to_string())
-                .unwrap_or(&0);
-            return a_boost.cmp(b_boost); // Lower boost (older files) come first
-        }
-        std::cmp::Ordering::Equal
-    });
-    Ok(results)
-}
-
-/// Receives chunks from workers and writes them to files
-fn aggregator_loop(rx: Receiver<FileChunk>, output_dir: PathBuf) -> Result<()> {
-    fs::create_dir_all(&output_dir)?;
-
-    let mut all_chunks = Vec::new();
-    while let Ok(chunk) = rx.recv() {
-        all_chunks.push(chunk);
-    }
-
-    all_chunks.sort_by(|a, b| {
-        let p = a.priority.cmp(&b.priority);
-        if p != std::cmp::Ordering::Equal {
-            return p;
-        }
-        let f = a.file_index.cmp(&b.file_index);
-        if f != std::cmp::Ordering::Equal {
-            return f;
-        }
-        a.part_index.cmp(&b.part_index)
-    });
-
-    let mut current_chunk = String::new();
-    let current_chunk_index = 0;
-
-    for chunk in all_chunks {
-        let mut content = String::new();
-        content.push_str(&format!(">>>> {}\n", chunk.rel_path));
-        content.push_str(&chunk.content);
-        content.push_str("\n\n");
-        current_chunk.push_str(&content);
-    }
-
-    if !current_chunk.is_empty() {
-        let out_path = output_dir.join(format!("chunk-{}.txt", current_chunk_index));
-        fs::write(&out_path, &current_chunk)?;
-        info!(
-            "Written chunk {} with {} lines.",
-            current_chunk_index,
-            current_chunk.lines().count()
-        );
-    }
-
-    Ok(())
-}

From 22cdad81260b2ce11a84da4109df02baeae5e10e Mon Sep 17 00:00:00 2001
From: Mohsen Azimi <me@azimi.me>
Date: Sun, 19 Jan 2025 11:59:50 +0700
Subject: [PATCH 7/7] style: use tempfile::tempdir() for performance tests

---
 tests/test_perf.rs | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/tests/test_perf.rs b/tests/test_perf.rs
index 4d5f040..14e4d25 100644
--- a/tests/test_perf.rs
+++ b/tests/test_perf.rs
@@ -1,6 +1,6 @@
 use std::fs;
-use std::path::Path;
 use std::time::{Duration, Instant};
+use tempfile::TempDir;
 use yek::serialize_repo;
 
 struct PerfStats {
@@ -34,9 +34,9 @@ fn test_serialization_performance() {
     const WARMUP_RUNS: usize = 2;
     const BENCH_RUNS: usize = 5;
 
-    // Create test data directory
-    let test_dir = "test_perf_data";
-    fs::create_dir_all(test_dir).unwrap();
+    // Create temporary test directory that will be automatically cleaned up
+    let test_dir = TempDir::new().unwrap();
+    let output_dir = TempDir::new().unwrap();
 
     // Create test files of different sizes
     let sizes = vec![1024, 1024 * 1024, 10 * 1024 * 1024]; // 1KB, 1MB, 10MB
@@ -45,7 +45,7 @@ fn test_serialization_performance() {
     println!("------------------------------");
 
     for size in sizes {
-        let filename = format!("{}/file_{}_bytes.txt", test_dir, size);
+        let filename = test_dir.path().join(format!("file_{}_bytes.txt", size));
         let data = vec![b'a'; size];
         fs::write(&filename, &data).unwrap();
 
@@ -55,15 +55,16 @@ fn test_serialization_performance() {
         for _ in 0..WARMUP_RUNS {
             serialize_repo(
                 size,
-                Some(Path::new(test_dir)),
+                Some(test_dir.path()),
                 false,
                 false,
                 None,
-                Some(Path::new("perf_output")),
+                Some(output_dir.path()),
                 None,
             )
             .unwrap();
-            fs::remove_dir_all("perf_output").unwrap();
+            fs::remove_dir_all(output_dir.path()).unwrap();
+            fs::create_dir_all(output_dir.path()).unwrap();
         }
 
         // Benchmark runs
@@ -74,11 +75,11 @@ fn test_serialization_performance() {
             let start = Instant::now();
             serialize_repo(
                 size,
-                Some(Path::new(test_dir)),
+                Some(test_dir.path()),
                 false,
                 false,
                 None,
-                Some(Path::new("perf_output")),
+                Some(output_dir.path()),
                 None,
             )
             .unwrap();
@@ -86,7 +87,8 @@ fn test_serialization_performance() {
             stats.update(duration);
 
             println!("  Run {}: {:?}", run, duration);
-            fs::remove_dir_all("perf_output").unwrap();
+            fs::remove_dir_all(output_dir.path()).unwrap();
+            fs::create_dir_all(output_dir.path()).unwrap();
         }
 
         println!("\nStats for {}B:", size);
@@ -95,6 +97,5 @@ fn test_serialization_performance() {
         println!("  Avg: {:?}", stats.avg);
     }
 
-    // Final cleanup
-    fs::remove_dir_all(test_dir).unwrap();
+    // TempDir will automatically clean up when dropped
 }