fix: Add primality test before Pollard-Rho factorization

naoNao89 · naoNao89 · commit 69e10cf6e8ba · 2025-12-09T02:13:55.000+07:00
- Add Miller-Rabin check before factorization
- Add comprehensive factor benchmarks
diff --git a/src/uu/factor/benches/factor_bench.rs b/src/uu/factor/benches/factor_bench.rs
@@ -3,17 +3,123 @@
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
 
-// spell-checker:ignore funcs
+// spell-checker:ignore funcs semiprimes
 
 use divan::{Bencher, black_box};
 use uu_factor::uumain;
 use uucore::benchmark::run_util_function;
 
-/// Benchmark multiple u64 digits
+/// Benchmark factoring a range of small consecutive integers
+/// This tests the trial division fast path for small numbers
+#[divan::bench(args = [1000, 5000])]
+fn factor_small_range(bencher: Bencher, count: usize) {
+    let numbers: Vec<String> = (2..=(count as u64)).map(|n| n.to_string()).collect();
+    bencher.bench(|| {
+        for num_str in &numbers {
+            black_box(run_util_function(uumain, &[num_str]));
+        }
+    });
+}
+
+/// Benchmark factoring small primes
+#[divan::bench(args = [
+    ("prime_1009", "1009"),
+    ("prime_10007", "10007"),
+    ("prime_100003", "100003"),
+])]
+fn factor_small_primes(bencher: Bencher, (_name, num_str): (&str, &str)) {
+    bencher.bench(|| {
+        black_box(run_util_function(uumain, &[num_str]));
+    });
+}
+
+/// Benchmark factoring large u64 primes
+/// These require primality testing but no factorization
+#[divan::bench(args = [
+    ("prime_near_u32_max", "4294967291"),
+    ("prime_near_i64_max", "9223372036854775783"),
+    ("prime_near_u64_max", "18446744073709551557"),
+])]
+fn factor_large_u64_primes(bencher: Bencher, (_name, num_str): (&str, &str)) {
+    bencher.bench(|| {
+        black_box(run_util_function(uumain, &[num_str]));
+    });
+}
+
+/// Benchmark factoring u64 semiprimes (product of two primes)
+/// These exercise the Pollard-Rho factorization algorithm
+#[divan::bench(args = [
+    ("semiprime_32bit", "3215031751"),
+    ("semiprime_48bit", "281474976710597"),
+    ("fermat_number_f5", "4294967297"),
+])]
+fn factor_u64_semiprimes(bencher: Bencher, (_name, num_str): (&str, &str)) {
+    bencher.bench(|| {
+        black_box(run_util_function(uumain, &[num_str]));
+    });
+}
+
+/// Benchmark factoring highly composite numbers
+/// These have many small factors and test trial division efficiency
+#[divan::bench(args = [
+    ("primorial_7", "510510"),
+    ("factorial_12", "479001600"),
+    ("highly_composite", "720720"),
+])]
+fn factor_highly_composite(bencher: Bencher, (_name, num_str): (&str, &str)) {
+    bencher.bench(|| {
+        black_box(run_util_function(uumain, &[num_str]));
+    });
+}
+
+/// Benchmark the maximum u64 value
+/// 2^64 - 1 = 3 × 5 × 17 × 257 × 641 × 65537 × 6700417
+#[divan::bench]
+fn factor_u64_max(bencher: Bencher) {
+    bencher.bench(|| {
+        black_box(run_util_function(uumain, &["18446744073709551615"]));
+    });
+}
+
+/// Benchmark factoring Mersenne prime M61 (2^61 - 1)
+/// This is a prime number, so it tests primality checking for large u64
+#[divan::bench]
+fn factor_mersenne_61(bencher: Bencher) {
+    // 2^61 - 1 = 2305843009213693951 (prime)
+    bencher.bench(|| {
+        black_box(run_util_function(uumain, &["2305843009213693951"]));
+    });
+}
+
+/// Benchmark factoring 100-bit numbers with many small factors
+/// This tests the u128 factorization path with numbers that factor quickly
+#[divan::bench(args = [
+    ("100bit_smooth", "123456789012345678901234567890"),
+])]
+fn factor_100bit_numbers(bencher: Bencher, (_name, num_str): (&str, &str)) {
+    bencher.bench(|| {
+        black_box(run_util_function(uumain, &[num_str]));
+    });
+}
+
+/// Benchmark factoring 128-bit numbers
+/// Tests various 128-bit cases: prime, power of 2, and smooth numbers
+#[divan::bench(args = [
+    ("128bit_prime", "340282366920938463463374607431768211297"),
+    ("128bit_power_of_2", "340282366920938463463374607431768211456"),
+    ("128bit_smooth", "340282366920938463463374607431768211455"),
+])]
+fn factor_128bit_numbers(bencher: Bencher, (_name, num_str): (&str, &str)) {
+    bencher.bench(|| {
+        black_box(run_util_function(uumain, &[num_str]));
+    });
+}
+
+/// Benchmark processing multiple numbers in sequence
+/// This tests the overhead of repeated invocations
 #[divan::bench(args = [(2)])]
 fn factor_multiple_u64s(bencher: Bencher, start_num: u64) {
     bencher
-        // this is a range of 5000 different u128 integers
         .with_inputs(|| (start_num, start_num + 2500))
         .bench_values(|(start_u64, end_u64)| {
             for u64_digit in start_u64..=end_u64 {
diff --git a/src/uu/factor/src/algorithm_selection.rs b/src/uu/factor/src/algorithm_selection.rs
@@ -13,6 +13,7 @@ use num_bigint::BigUint;
 use num_traits::ToPrimitive;
 use std::collections::BTreeMap;
 
+use super::ecm::is_probable_prime;
 use super::fermat::{fermat_factor_biguint, fermat_factor_u64};
 use super::pollard_rho::pollard_rho_with_target;
 use super::trial_division::{extract_small_factors, quick_trial_divide};
@@ -135,6 +136,13 @@ fn factorize_biguint_fast(n: &BigUint) -> BTreeMap<BigUint, usize> {
         return factors;
     }
 
+    // Check if remaining is prime before attempting factorization
+    // Primality testing is much faster than trying Pollard-Rho on a prime
+    if is_probable_prime(&remaining) {
+        factors.insert(remaining, 1);
+        return factors;
+    }
+
     // Try Fermat's method for numbers up to ~90 bits (optimal for close factors)
     if remaining.bits() <= 90 {
         if let Some(fermat_factor) = fermat_factor_biguint(&remaining) {
@@ -163,6 +171,13 @@ fn factorize_biguint_pollard_rho(factors: &mut BTreeMap<BigUint, usize>, n: BigU
         return;
     }
 
+    // Check if n is prime before attempting factorization
+    // Primality testing is much faster than trying Pollard-Rho on a prime
+    if is_probable_prime(&n) {
+        *factors.entry(n).or_insert(0) += 1;
+        return;
+    }
+
     // Estimate factor size (assume roughly balanced factors)
     let target_bits = (n.bits() as u32) / 2;
 
diff --git a/src/uu/factor/src/ecm.rs b/src/uu/factor/src/ecm.rs
@@ -669,7 +669,7 @@ fn compute_prime_product(bound: u64) -> u64 {
 }
 
 /// Miller-Rabin primality test
-fn is_probable_prime(n: &BigUint) -> bool {
+pub fn is_probable_prime(n: &BigUint) -> bool {
     // Use Miller-Rabin primality test with 15 iterations (high confidence)
     // This properly distinguishes composites from primes even for large numbers
     use num_integer::Integer;

Original file line number	Diff line number	Diff line change
`@@ -669,7 +669,7 @@ fn compute_prime_product(bound: u64) -> u64 {`
`669`	`669`	`}`
`670`	`670`
`671`	`671`	`/// Miller-Rabin primality test`
`672`		`-fn is_probable_prime(n: &BigUint) -> bool {`
	`672`	`+pub fn is_probable_prime(n: &BigUint) -> bool {`
`673`	`673`	`// Use Miller-Rabin primality test with 15 iterations (high confidence)`
`674`	`674`	`// This properly distinguishes composites from primes even for large numbers`
`675`	`675`	`use num_integer::Integer;`