From 1ed5a0b7f6dcb4a4915103b0a19bd3c7fa174907 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 08:19:17 +0200
Subject: [PATCH 01/15] Move python implementation to polygon-packer-py

---
 .../polygon_packer.py                         | 388 +++++++++---------
 .../requirements.txt                          |   0
 2 files changed, 195 insertions(+), 193 deletions(-)
 rename polygon_packer.py => polygon-packer-py/polygon_packer.py (97%)
 rename requirements.txt => polygon-packer-py/requirements.txt (100%)

diff --git a/polygon_packer.py b/polygon-packer-py/polygon_packer.py
similarity index 97%
rename from polygon_packer.py
rename to polygon-packer-py/polygon_packer.py
index 3fe2854..db94138 100644
--- a/polygon_packer.py
+++ b/polygon-packer-py/polygon_packer.py
@@ -1,193 +1,195 @@
-import numpy as np
-from scipy.optimize import basinhopping, minimize
-import matplotlib.pyplot as ppt
-from numba import njit
-from joblib import Parallel, delayed
-import argparse
-
-arg_parser = argparse.ArgumentParser()
-arg_parser.add_argument("inner_polygons", type=int, help="Number of inner polygons")
-arg_parser.add_argument("inner_sides", type=int, help="Number of sides of the inner polygons")
-arg_parser.add_argument("container_sides", type=int, help="Number of sides of the container polygon")
-arg_parser.add_argument("--attempts", type=int, default=1000, help="Number of attempts to run")
-arg_parser.add_argument("--tolerance", type=float, default=1e-8, help="Overlap penalty tolerance. Probably best left at default")
-arg_parser.add_argument("--finalstep", type=float, default=0.0001, help="How small the last theoretical step in container size decrease will be (it gets smaller over time)")
-args = arg_parser.parse_args()
-
-N = args.inner_polygons
-nsi = args.inner_sides
-nsc = args.container_sides
-attempts = args.attempts
-penalty_tolerance = args.tolerance
-final_step_size = args.finalstep
-
-unit_polygon_angles = np.linspace(0, 2 * np.pi, nsi, endpoint=False)
-unit_polygon_vertices = np.column_stack((np.cos(unit_polygon_angles), np.sin(unit_polygon_angles)))
-unit_polygon_vectors = np.column_stack((np.cos(unit_polygon_angles + np.pi / nsi), np.sin(unit_polygon_angles + np.pi / nsi)))
-unit_container_angles = np.linspace(0, 2 * np.pi, nsc, endpoint=False)
-unit_container_vertices = np.column_stack((np.cos(unit_container_angles), np.sin(unit_container_angles)))
-unit_container_vectors = np.column_stack((np.cos(unit_container_angles + np.pi / nsc), np.sin(unit_container_angles + np.pi / nsc)))
-unit_container_apothem = np.cos(np.pi / nsc)
-
-@njit(cache=True)
-def transform_polygon(x, y, a, vertices):
-    n_vertices = vertices.shape[0]
-    transformed = np.empty_like(vertices)
-    for i in range(n_vertices):
-        vx = vertices[i, 0]
-        vy = vertices[i, 1]
-        transformed[i, 0] = x + (vx * np.cos(a) - vy * np.sin(a))
-        transformed[i, 1] = y + (vx * np.sin(a) + vy * np.cos(a))
-    return transformed
-
-@njit(cache=True)
-def rotate_vectors(a, vectors):
-    n_vectors = vectors.shape[0]
-    rotated = np.empty_like(vectors)
-    for i in range(n_vectors):
-        vecx = vectors[i, 0]
-        vecy = vectors[i, 1]
-        rotated[i, 0] = vecx * np.cos(a) - vecy * np.sin(a)
-        rotated[i, 1] = vecx * np.sin(a) + vecy * np.cos(a)
-    return rotated
-        
-@njit(cache=True)
-def poking_penalty(vertices, S):
-    penalty = 0.0
-    limit = unit_container_apothem * S
-    for v in range(vertices.shape[0]):
-        vx = vertices[v, 0]
-        vy = vertices[v, 1]
-        for i in range(nsc):
-            distance = vx * unit_container_vectors[i, 0] + vy * unit_container_vectors[i, 1]
-            if distance > limit:
-                diff = distance - limit
-                penalty += diff * diff
-    return penalty
-
-@njit(cache=True)
-def bh_function(values, S):
-    penalty = 0.0
-    polygon_array = np.zeros((N, nsi, 2))
-    vector_array = np.zeros((N, nsi, 2))
-    for i in range(N):
-        posx = values[i * 3]
-        posy = values[i * 3 + 1]
-        rot = values[i * 3 + 2]
-        polygon_array[i] = transform_polygon(posx, posy, rot, unit_polygon_vertices)
-        vector_array[i] = rotate_vectors(rot, unit_polygon_vectors)
-
-        penalty += poking_penalty(polygon_array[i], S)
-
-    for i in range(N):
-        for j in range(i + 1, N):
-            collision = True
-            min_overlap = 100000000000000000000.0
-            for vec in range(nsi * 2):
-                if vec < nsi:
-                    x_axis = vector_array[i][vec, 0]
-                    y_axis = vector_array[i][vec, 1]
-                else:
-                    x_axis = vector_array[j][vec - nsi, 0]
-                    y_axis = vector_array[j][vec - nsi, 1]
-
-                min_1 = 100000000000000000000.0
-                max_1 = -100000000000000000000.0
-                for vert in range(nsi):
-                    dotp = polygon_array[i][vert, 0] * x_axis + polygon_array[i][vert, 1] * y_axis
-                    if dotp < min_1: 
-                        min_1 = dotp
-                    if dotp > max_1: 
-                        max_1 = dotp
-
-                min_2 = 100000000000000000000.0
-                max_2 = -100000000000000000000.0
-                for vert in range(nsi):
-                    dotp = polygon_array[j][vert, 0] * x_axis + polygon_array[j][vert, 1] * y_axis
-                    if dotp < min_2: 
-                        min_2 = dotp
-                    if dotp > max_2: 
-                        max_2 = dotp
-
-                overlap = min(max_1, max_2) - max(min_1, min_2)
-                if overlap <= 0:
-                    collision = False
-                    break
-                if overlap < min_overlap:
-                    min_overlap = overlap
-
-            if collision:
-                penalty += min_overlap * min_overlap
-            
-    return penalty
-
-def repetition(seed):
-    print("Attempt", seed)
-
-    np.random.seed(seed)
-    dynamic_S = np.sqrt(N) * (2 + np.random.rand() * 2)
-    initial_S = dynamic_S
-
-    if np.random.rand() < 0.5:
-        x0 = np.random.uniform(-dynamic_S/2, dynamic_S/2, N * 3)
-    else:
-        grid_linspace = np.linspace(-dynamic_S/2 * 0.9, dynamic_S/2 * 0.9, int(np.ceil(np.sqrt(N))))
-        xx, yy = np.meshgrid(grid_linspace, grid_linspace)
-        grid_points = np.column_stack((xx.flatten(), yy.flatten()))[:N]
-        x0 = np.zeros(N * 3)
-        x0[0::3] = grid_points[:, 0]
-        x0[1::3] = grid_points[:, 1]
-        x0[2::3] = np.random.uniform(0, 2 * np.pi, N)
-    
-    last_valid_x = x0.copy()
-    last_valid_S = dynamic_S
-
-    while True:
-        minimized = minimize(bh_function, x0, args=(dynamic_S,), method="L-BFGS-B", tol=1e-8)
-        multiplier = 0.9999 - (dynamic_S - np.sqrt(N) * nsi / nsc) * 0.0099 / (initial_S - np.sqrt(N) * nsi / nsc)
-        multiplier = 1 - final_step_size - (dynamic_S - np.sqrt(N) * nsi / nsc) * (0.01 - final_step_size) / (initial_S - np.sqrt(N) * nsi / nsc)
-        if minimized.fun < penalty_tolerance:
-            last_valid_x = minimized.x.copy()
-            last_valid_S = dynamic_S.copy()
-            x0 = minimized.x * multiplier
-            dynamic_S *= multiplier
-        else:
-            bh_result = basinhopping(
-                lambda x, s: bh_function(x, s),
-                x0,
-                minimizer_kwargs={'method': 'L-BFGS-B', 'args': (dynamic_S,), 'tol': 1e-8},
-                niter=50,
-                T=0.1,
-                stepsize=0.1
-            )
-            if bh_result.fun < penalty_tolerance:
-                last_valid_x = bh_result.x.copy()
-                last_valid_S = dynamic_S
-                x0 = bh_result.x * multiplier
-                dynamic_S *= multiplier
-            else:
-                break
-    return last_valid_S, last_valid_x
-
-best_S = float("inf")
-best_values = None
-results = Parallel(n_jobs=-1, prefer="processes")(delayed(repetition)(i) for i in range(attempts))
-    
-for s, values in results:
-    if s < best_S:
-        best_S = s
-        best_values = values
-
-print("Final side length:", best_S * np.sin(np.pi / nsc) / np.sin(np.pi / nsi))
-
-final_positions = positions = best_values.reshape((N, 3))
-fig, ax = ppt.subplots()
-container_plot = np.vstack((unit_container_vertices * best_S, unit_container_vertices[0] * best_S))
-ax.plot(container_plot[:,0], container_plot[:,1], color="#000000", linewidth=0.5)
-for i in range(N):
-    polygon = transform_polygon(best_values[i * 3], best_values[i * 3 + 1], best_values[i * 3 + 2], unit_polygon_vertices)
-    polygon_plot = np.vstack((polygon, polygon[0]))
-    ax.fill(polygon_plot[:,0], polygon_plot[:,1], "#CCCCCC", edgecolor="black", linewidth=0.5)
-ax.set_aspect("equal")
-ppt.title(f"Side length: {best_S * np.sin(np.pi / nsc) / np.sin(np.pi / nsi)}")
-ppt.savefig(f"{N}_{nsi}_in_{nsc}.png")
+import numpy as np
+from scipy.optimize import basinhopping, minimize
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as ppt
+from numba import njit
+from joblib import Parallel, delayed
+import argparse
+
+arg_parser = argparse.ArgumentParser()
+arg_parser.add_argument("inner_polygons", type=int, help="Number of inner polygons")
+arg_parser.add_argument("inner_sides", type=int, help="Number of sides of the inner polygons")
+arg_parser.add_argument("container_sides", type=int, help="Number of sides of the container polygon")
+arg_parser.add_argument("--attempts", type=int, default=1000, help="Number of attempts to run")
+arg_parser.add_argument("--tolerance", type=float, default=1e-8, help="Overlap penalty tolerance. Probably best left at default")
+arg_parser.add_argument("--finalstep", type=float, default=0.0001, help="How small the last theoretical step in container size decrease will be (it gets smaller over time)")
+args = arg_parser.parse_args()
+
+N = args.inner_polygons
+nsi = args.inner_sides
+nsc = args.container_sides
+attempts = args.attempts
+penalty_tolerance = args.tolerance
+final_step_size = args.finalstep
+
+unit_polygon_angles = np.linspace(0, 2 * np.pi, nsi, endpoint=False)
+unit_polygon_vertices = np.column_stack((np.cos(unit_polygon_angles), np.sin(unit_polygon_angles)))
+unit_polygon_vectors = np.column_stack((np.cos(unit_polygon_angles + np.pi / nsi), np.sin(unit_polygon_angles + np.pi / nsi)))
+unit_container_angles = np.linspace(0, 2 * np.pi, nsc, endpoint=False)
+unit_container_vertices = np.column_stack((np.cos(unit_container_angles), np.sin(unit_container_angles)))
+unit_container_vectors = np.column_stack((np.cos(unit_container_angles + np.pi / nsc), np.sin(unit_container_angles + np.pi / nsc)))
+unit_container_apothem = np.cos(np.pi / nsc)
+
+@njit(cache=True)
+def transform_polygon(x, y, a, vertices):
+    n_vertices = vertices.shape[0]
+    transformed = np.empty_like(vertices)
+    for i in range(n_vertices):
+        vx = vertices[i, 0]
+        vy = vertices[i, 1]
+        transformed[i, 0] = x + (vx * np.cos(a) - vy * np.sin(a))
+        transformed[i, 1] = y + (vx * np.sin(a) + vy * np.cos(a))
+    return transformed
+
+@njit(cache=True)
+def rotate_vectors(a, vectors):
+    n_vectors = vectors.shape[0]
+    rotated = np.empty_like(vectors)
+    for i in range(n_vectors):
+        vecx = vectors[i, 0]
+        vecy = vectors[i, 1]
+        rotated[i, 0] = vecx * np.cos(a) - vecy * np.sin(a)
+        rotated[i, 1] = vecx * np.sin(a) + vecy * np.cos(a)
+    return rotated
+        
+@njit(cache=True)
+def poking_penalty(vertices, S):
+    penalty = 0.0
+    limit = unit_container_apothem * S
+    for v in range(vertices.shape[0]):
+        vx = vertices[v, 0]
+        vy = vertices[v, 1]
+        for i in range(nsc):
+            distance = vx * unit_container_vectors[i, 0] + vy * unit_container_vectors[i, 1]
+            if distance > limit:
+                diff = distance - limit
+                penalty += diff * diff
+    return penalty
+
+@njit(cache=True)
+def bh_function(values, S):
+    penalty = 0.0
+    polygon_array = np.zeros((N, nsi, 2))
+    vector_array = np.zeros((N, nsi, 2))
+    for i in range(N):
+        posx = values[i * 3]
+        posy = values[i * 3 + 1]
+        rot = values[i * 3 + 2]
+        polygon_array[i] = transform_polygon(posx, posy, rot, unit_polygon_vertices)
+        vector_array[i] = rotate_vectors(rot, unit_polygon_vectors)
+
+        penalty += poking_penalty(polygon_array[i], S)
+
+    for i in range(N):
+        for j in range(i + 1, N):
+            collision = True
+            min_overlap = 100000000000000000000.0
+            for vec in range(nsi * 2):
+                if vec < nsi:
+                    x_axis = vector_array[i][vec, 0]
+                    y_axis = vector_array[i][vec, 1]
+                else:
+                    x_axis = vector_array[j][vec - nsi, 0]
+                    y_axis = vector_array[j][vec - nsi, 1]
+
+                min_1 = 100000000000000000000.0
+                max_1 = -100000000000000000000.0
+                for vert in range(nsi):
+                    dotp = polygon_array[i][vert, 0] * x_axis + polygon_array[i][vert, 1] * y_axis
+                    if dotp < min_1: 
+                        min_1 = dotp
+                    if dotp > max_1: 
+                        max_1 = dotp
+
+                min_2 = 100000000000000000000.0
+                max_2 = -100000000000000000000.0
+                for vert in range(nsi):
+                    dotp = polygon_array[j][vert, 0] * x_axis + polygon_array[j][vert, 1] * y_axis
+                    if dotp < min_2: 
+                        min_2 = dotp
+                    if dotp > max_2: 
+                        max_2 = dotp
+
+                overlap = min(max_1, max_2) - max(min_1, min_2)
+                if overlap <= 0:
+                    collision = False
+                    break
+                if overlap < min_overlap:
+                    min_overlap = overlap
+
+            if collision:
+                penalty += min_overlap * min_overlap
+            
+    return penalty
+
+def repetition(seed):
+    print("Attempt", seed)
+
+    np.random.seed(seed)
+    dynamic_S = np.sqrt(N) * (2 + np.random.rand() * 2)
+    initial_S = dynamic_S
+
+    if np.random.rand() < 0.5:
+        x0 = np.random.uniform(-dynamic_S/2, dynamic_S/2, N * 3)
+    else:
+        grid_linspace = np.linspace(-dynamic_S/2 * 0.9, dynamic_S/2 * 0.9, int(np.ceil(np.sqrt(N))))
+        xx, yy = np.meshgrid(grid_linspace, grid_linspace)
+        grid_points = np.column_stack((xx.flatten(), yy.flatten()))[:N]
+        x0 = np.zeros(N * 3)
+        x0[0::3] = grid_points[:, 0]
+        x0[1::3] = grid_points[:, 1]
+        x0[2::3] = np.random.uniform(0, 2 * np.pi, N)
+    
+    last_valid_x = x0.copy()
+    last_valid_S = dynamic_S
+
+    while True:
+        minimized = minimize(bh_function, x0, args=(dynamic_S,), method="L-BFGS-B", tol=1e-8)
+        multiplier = 0.9999 - (dynamic_S - np.sqrt(N) * nsi / nsc) * 0.0099 / (initial_S - np.sqrt(N) * nsi / nsc)
+        multiplier = 1 - final_step_size - (dynamic_S - np.sqrt(N) * nsi / nsc) * (0.01 - final_step_size) / (initial_S - np.sqrt(N) * nsi / nsc)
+        if minimized.fun < penalty_tolerance:
+            last_valid_x = minimized.x.copy()
+            last_valid_S = dynamic_S.copy()
+            x0 = minimized.x * multiplier
+            dynamic_S *= multiplier
+        else:
+            bh_result = basinhopping(
+                lambda x, s: bh_function(x, s),
+                x0,
+                minimizer_kwargs={'method': 'L-BFGS-B', 'args': (dynamic_S,), 'tol': 1e-8},
+                niter=50,
+                T=0.1,
+                stepsize=0.1
+            )
+            if bh_result.fun < penalty_tolerance:
+                last_valid_x = bh_result.x.copy()
+                last_valid_S = dynamic_S
+                x0 = bh_result.x * multiplier
+                dynamic_S *= multiplier
+            else:
+                break
+    return last_valid_S, last_valid_x
+
+best_S = float("inf")
+best_values = None
+results = Parallel(n_jobs=-1, prefer="processes")(delayed(repetition)(i) for i in range(attempts))
+    
+for s, values in results:
+    if s < best_S:
+        best_S = s
+        best_values = values
+
+print("Final side length:", best_S * np.sin(np.pi / nsc) / np.sin(np.pi / nsi))
+
+final_positions = positions = best_values.reshape((N, 3))
+fig, ax = ppt.subplots()
+container_plot = np.vstack((unit_container_vertices * best_S, unit_container_vertices[0] * best_S))
+ax.plot(container_plot[:,0], container_plot[:,1], color="#000000", linewidth=0.5)
+for i in range(N):
+    polygon = transform_polygon(best_values[i * 3], best_values[i * 3 + 1], best_values[i * 3 + 2], unit_polygon_vertices)
+    polygon_plot = np.vstack((polygon, polygon[0]))
+    ax.fill(polygon_plot[:,0], polygon_plot[:,1], "#CCCCCC", edgecolor="black", linewidth=0.5)
+ax.set_aspect("equal")
+ppt.title(f"Side length: {best_S * np.sin(np.pi / nsc) / np.sin(np.pi / nsi)}")
+ppt.savefig(f"{N}_{nsi}_in_{nsc}.png")
diff --git a/requirements.txt b/polygon-packer-py/requirements.txt
similarity index 100%
rename from requirements.txt
rename to polygon-packer-py/requirements.txt

From 5eb3b96249f683ab7b42dc9ea2b871bb06dd0c6e Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 08:19:17 +0200
Subject: [PATCH 02/15] Create Rust crate: polygon-packer-rs

---
 polygon-packer-rs/Cargo.toml           |  36 ++
 polygon-packer-rs/src/main.rs          | 768 +++++++++++++++++++++++++
 polygon-packer-rs/src/rust_specific.rs |  21 +
 3 files changed, 825 insertions(+)
 create mode 100644 polygon-packer-rs/Cargo.toml
 create mode 100644 polygon-packer-rs/src/main.rs
 create mode 100644 polygon-packer-rs/src/rust_specific.rs

diff --git a/polygon-packer-rs/Cargo.toml b/polygon-packer-rs/Cargo.toml
new file mode 100644
index 0000000..bbc9385
--- /dev/null
+++ b/polygon-packer-rs/Cargo.toml
@@ -0,0 +1,36 @@
+[package]
+name = "polygon-packer-rs"
+version = "0.1.0"
+edition = "2024"
+authors = ["Voxell Paladynee <voxellpaladynee@proton.me>"]
+
+[dependencies]
+# numeric library
+ndarray = "0.17.2"
+# optimization library
+argmin = "0.11.0"
+argmin-math = { version = "0.5", features = ["vec"] }
+# plot library
+plotters = "0.3"
+# parallelization
+rayon = "1.7"
+# command line argument parsing
+clap = { version = "4.4", features = ["derive"] }
+# Voxell's homegrown RNG library, used for pure-rust PCG family of random number
+# generators. https://www.pcg-random.org/index.html
+voxell_rng = "0.6.0"
+
+[profile.release]
+# opt-level: 3 for full optimizations
+opt-level = 3
+# lto: allows library code to be more aggressively inlined
+# "thin" for a lighter mode, "fat" for max perf
+lto = "fat"
+# codegen-units: only 1 concurrent compiler so the optimizer
+# has maximum code context 
+codegen-units = 1
+# panic: this potentially reduces instruction-cache pressure, but if it crashes
+# you won't get fancy errors.
+# set to "unwind" for fancy errors
+# set to "abort" for a very slight boost
+panic = "abort"
\ No newline at end of file
diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
new file mode 100644
index 0000000..b2db57e
--- /dev/null
+++ b/polygon-packer-rs/src/main.rs
@@ -0,0 +1,768 @@
+extern crate argmin;
+extern crate clap;
+extern crate ndarray;
+extern crate plotters;
+extern crate rayon;
+
+mod rust_specific;
+use rust_specific::AssocPI;
+use rust_specific::FloatType;
+// this is π in either f32 or f64
+const PI: FloatType = <FloatType as AssocPI>::PI;
+
+use argmin::core::{CostFunction, Error, Executor, Gradient, State};
+use argmin::solver::linesearch::BacktrackingLineSearch;
+use argmin::solver::linesearch::condition::ArmijoCondition;
+use argmin::solver::quasinewton::LBFGS;
+use clap::Parser;
+use ndarray::Array2;
+use plotters::prelude::*;
+use rayon::prelude::*;
+use voxell_rng::rng::pcg_advanced::pcg_64::PcgInnerState64;
+
+#[derive(Parser, Debug)]
+#[command(author, version, about, long_about = None)]
+struct Args {
+    /// Number of inner polygons
+    inner_polygons: usize,
+    /// Number of sides of the inner polygons
+    inner_sides: usize,
+    /// Number of sides of the container polygon
+    container_sides: usize,
+    /// Number of attempts to run
+    #[arg(long, default_value_t = 1000)]
+    attempts: usize,
+    /// Overlap penalty tolerance. Probably best left at default
+    #[arg(long, default_value_t = 1e-8)]
+    tolerance: FloatType,
+    /// How small the last theoretical step in container size decrease will be (it gets smaller over time)
+    #[arg(long, default_value_t = 0.0001)]
+    finalstep: FloatType,
+}
+
+fn main() {
+    let args = Args::parse();
+
+    let N = args.inner_polygons;
+    let nsi = args.inner_sides;
+    let nsc = args.container_sides;
+    let attempts = args.attempts;
+    let penalty_tolerance = args.tolerance;
+    let final_step_size = args.finalstep;
+
+    // Translated from Python:
+    // unit_polygon_angles = np.linspace(0, 2 * np.pi, nsi, endpoint=False)
+    let unit_polygon_angles_vec: Vec<FloatType> = (0..nsi)
+        .map(|i| 2.0 * PI * (i as FloatType) / (nsi as FloatType))
+        .collect();
+
+    // unit_polygon_vertices = np.column_stack((np.cos(unit_polygon_angles), np.sin(unit_polygon_angles)))
+    let mut unit_polygon_vertices_buf: Vec<FloatType> = Vec::with_capacity(nsi * 2);
+    for &a in &unit_polygon_angles_vec {
+        unit_polygon_vertices_buf.push(a.cos());
+        unit_polygon_vertices_buf.push(a.sin());
+    }
+    let unit_polygon_vertices: Array2<FloatType> =
+        Array2::from_shape_vec((nsi, 2), unit_polygon_vertices_buf).unwrap();
+
+    // unit_polygon_vectors = np.column_stack((np.cos(unit_polygon_angles + np.pi / nsi), np.sin(unit_polygon_angles + np.pi / nsi)))
+    let mut unit_polygon_vectors_buf: Vec<FloatType> = Vec::with_capacity(nsi * 2);
+    let offset_nsi = PI / (nsi as FloatType);
+    for &a in &unit_polygon_angles_vec {
+        let aa = a + offset_nsi;
+        unit_polygon_vectors_buf.push(aa.cos());
+        unit_polygon_vectors_buf.push(aa.sin());
+    }
+    let unit_polygon_vectors: Array2<FloatType> =
+        Array2::from_shape_vec((nsi, 2), unit_polygon_vectors_buf).unwrap();
+
+    // unit_container_angles = np.linspace(0, 2 * np.pi, nsc, endpoint=False)
+    let unit_container_angles: Vec<FloatType> = (0..nsc)
+        .map(|i| 2.0 * PI * (i as FloatType) / (nsc as FloatType))
+        .collect();
+
+    // unit_container_vertices = np.column_stack((np.cos(unit_container_angles), np.sin(unit_container_angles)))
+    let mut unit_container_vertices_buf: Vec<FloatType> = Vec::with_capacity(nsc * 2);
+    for &a in &unit_container_angles {
+        unit_container_vertices_buf.push(a.cos());
+        unit_container_vertices_buf.push(a.sin());
+    }
+    let unit_container_vertices: Array2<FloatType> =
+        Array2::from_shape_vec((nsc, 2), unit_container_vertices_buf).unwrap();
+
+    // unit_container_vectors = np.column_stack((np.cos(unit_container_angles + np.pi / nsc), np.sin(unit_container_angles + np.pi / nsc)))
+    let mut unit_container_vectors_buf: Vec<FloatType> = Vec::with_capacity(nsc * 2);
+    let offset_nsc = PI / (nsc as FloatType);
+    for &a in &unit_container_angles {
+        let aa = a + offset_nsc;
+        unit_container_vectors_buf.push(aa.cos());
+        unit_container_vectors_buf.push(aa.sin());
+    }
+    let unit_container_vectors: Array2<FloatType> =
+        Array2::from_shape_vec((nsc, 2), unit_container_vectors_buf).unwrap();
+
+    // unit_container_apothem = np.cos(np.pi / nsc)
+    let unit_container_apothem: FloatType = (PI / (nsc as FloatType)).cos();
+
+    let mut best_S: FloatType = FloatType::INFINITY;
+    let mut best_values: Option<Vec<FloatType>> = None;
+
+    // results = Parallel(n_jobs=-1, prefer="processes")(delayed(repetition)(i) for i in range(attempts))
+    let results: Vec<(FloatType, _)> = (0..attempts)
+        .into_par_iter()
+        .map(|i| {
+            repetition(
+                i,
+                N,
+                nsi,
+                nsc,
+                penalty_tolerance,
+                final_step_size,
+                &unit_polygon_vertices,
+                &unit_polygon_vectors,
+                &unit_container_vectors,
+                unit_container_apothem,
+            )
+        })
+        .collect();
+
+    for (s, values) in results {
+        if s < best_S {
+            best_S = s;
+            best_values = Some(values);
+        }
+    }
+
+    println!(
+        "Final side length: {}",
+        // best_S * np.sin(np.pi / nsc) / np.sin(np.pi / nsi),
+        best_S * (PI / (nsc as FloatType)).sin() / (PI / (nsi as FloatType)).sin(),
+    );
+
+    // ============== NOTE(@paladynee): ===============
+    // anything found after this line is translated from Python into Rust
+    // via an AI agent. the numerical results (excluding nondeterminism from
+    // rngs) have been checked against python over multiple passes.
+    //
+    // i am not well versed in mathematics so if anybody could fact check this
+    // implementation and remove this note, I'd be very glad.  
+
+    if let Some(vals) = best_values {
+        if vals.len() == N * 3 {
+            let positions = Array2::from_shape_vec((N, 3), vals.clone()).unwrap();
+            println!("Final positions (first 5 rows):");
+            for i in 0..std::cmp::min(5, N) {
+                println!("  {}: {:?}", i, positions.row(i));
+            }
+
+            // Plot result to PNG using Plotters (match Python output)
+            let file_name = format!("{}_{}_in_{}.png", N, nsi, nsc);
+            // collect polygon points and container points (as f64)
+            let mut all_points: Vec<(f64, f64)> = Vec::new();
+            let mut polys: Vec<Vec<(f64, f64)>> = Vec::with_capacity(N);
+            for i in 0..N {
+                let x = positions[[i, 0]];
+                let y = positions[[i, 1]];
+                let a = positions[[i, 2]];
+                let poly = transform_polygon(x, y, a, &unit_polygon_vertices);
+                let mut pts: Vec<(f64, f64)> = Vec::with_capacity(poly.len() / 2 + 1);
+                for k in 0..(poly.len() / 2) {
+                    let px = poly[k * 2] as f64;
+                    let py = poly[k * 2 + 1] as f64;
+                    pts.push((px, py));
+                    all_points.push((px, py));
+                }
+                if let Some(first) = pts.get(0).cloned() {
+                    pts.push(first);
+                    all_points.push(first);
+                }
+                polys.push(pts);
+            }
+
+            let mut container_pts: Vec<(f64, f64)> =
+                Vec::with_capacity(unit_container_vertices.shape()[0] + 1);
+            for i in 0..unit_container_vertices.shape()[0] {
+                let cx = unit_container_vertices[[i, 0]] * best_S;
+                let cy = unit_container_vertices[[i, 1]] * best_S;
+                container_pts.push((cx as f64, cy as f64));
+                all_points.push((cx as f64, cy as f64));
+            }
+            if let Some(first) = container_pts.get(0).cloned() {
+                container_pts.push(first);
+                all_points.push(first);
+            }
+
+            // determine bounds and padding
+            let (mut min_x, mut max_x, mut min_y, mut max_y) = (
+                std::f64::INFINITY,
+                std::f64::NEG_INFINITY,
+                std::f64::INFINITY,
+                std::f64::NEG_INFINITY,
+            );
+            for (x, y) in &all_points {
+                if *x < min_x {
+                    min_x = *x;
+                }
+                if *x > max_x {
+                    max_x = *x;
+                }
+                if *y < min_y {
+                    min_y = *y;
+                }
+                if *y > max_y {
+                    max_y = *y;
+                }
+            }
+            // choose grid steps matching the requested ticks
+            let x_step = 1.0_f64;
+            let y_step = 0.5_f64;
+            let x_min_tick = (min_x / x_step).floor() * x_step;
+            let x_max_tick = (max_x / x_step).ceil() * x_step;
+            let y_min_tick = (min_y / y_step).floor() * y_step;
+            let y_max_tick = (max_y / y_step).ceil() * y_step;
+
+            // Equalize spans so 1 unit in x equals 1 unit in y on the image
+            let dx = x_max_tick - x_min_tick;
+            let dy = y_max_tick - y_min_tick;
+            let span = dx.max(dy);
+            let x_center = (x_min_tick + x_max_tick) / 2.0;
+            let y_center = (y_min_tick + y_max_tick) / 2.0;
+            let x_min_eq = x_center - span / 2.0;
+            let x_max_eq = x_center + span / 2.0;
+            let y_min_eq = y_center - span / 2.0;
+            let y_max_eq = y_center + span / 2.0;
+
+            // Render PNG on a square canvas so aspect ratio is preserved
+            let backend = BitMapBackend::new(&file_name, (1024, 1024));
+            let root = backend.into_drawing_area();
+            root.fill(&WHITE).unwrap();
+            let mut chart = ChartBuilder::on(&root)
+                .margin(10)
+                .caption(
+                    format!(
+                        "Side length: {}",
+                        best_S * (PI / (nsc as FloatType)).sin() / (PI / (nsi as FloatType)).sin()
+                    ),
+                    ("sans-serif", 20).into_font(),
+                )
+                .build_cartesian_2d(x_min_eq..x_max_eq, y_min_eq..y_max_eq)
+                .unwrap();
+
+            // configure mesh to show ticks at our step sizes
+            let x_labels = ((span / x_step).round() as usize).saturating_add(1).max(2);
+            let y_labels = ((span / y_step).round() as usize).saturating_add(1).max(2);
+            chart
+                .configure_mesh()
+                .x_labels(x_labels)
+                .y_labels(y_labels)
+                .x_label_formatter(&|x| format!("{:.0}", x))
+                .y_label_formatter(&|y| format!("{:.1}", y))
+                .draw()
+                .ok();
+
+            // draw container outline
+            chart
+                .draw_series(std::iter::once(PathElement::new(
+                    container_pts.clone(),
+                    &BLACK,
+                )))
+                .ok();
+
+            // draw filled polygons and outlines
+            for poly in polys {
+                let fill_style = Into::<ShapeStyle>::into(&RGBColor(204, 204, 204)).filled();
+                chart
+                    .draw_series(std::iter::once(Polygon::new(poly.clone(), fill_style)))
+                    .ok();
+                chart
+                    .draw_series(std::iter::once(PathElement::new(poly, &BLACK)))
+                    .ok();
+            }
+
+            root.present().ok();
+            println!("Saved plot to {}", file_name);
+        }
+    } else {
+        println!("No valid packing found in attempts");
+    }
+}
+
+// def repetition(seed):
+fn transform_polygon(
+    x: FloatType,
+    y: FloatType,
+    a: FloatType,
+    vertices: &Array2<FloatType>,
+) -> Vec<FloatType> {
+    let n_vertices = vertices.shape()[0];
+    let mut transformed = vec![0.0 as FloatType; n_vertices * 2];
+    let ca = a.cos();
+    let sa = a.sin();
+    for i in 0..n_vertices {
+        let vx = vertices[[i, 0]];
+        let vy = vertices[[i, 1]];
+        transformed[i * 2] = x + (vx * ca - vy * sa);
+        transformed[i * 2 + 1] = y + (vx * sa + vy * ca);
+    }
+    transformed
+}
+
+fn rotate_vectors(a: FloatType, vectors: &Array2<FloatType>) -> Vec<FloatType> {
+    let n = vectors.shape()[0];
+    let mut rotated = vec![0.0 as FloatType; n * 2];
+    let ca = a.cos();
+    let sa = a.sin();
+    for i in 0..n {
+        let vx = vectors[[i, 0]];
+        let vy = vectors[[i, 1]];
+        rotated[i * 2] = vx * ca - vy * sa;
+        rotated[i * 2 + 1] = vx * sa + vy * ca;
+    }
+    rotated
+}
+
+fn poking_penalty(
+    vertices: &[FloatType],
+    S: FloatType,
+    unit_container_vectors: &Array2<FloatType>,
+    unit_container_apothem: FloatType,
+) -> FloatType {
+    let mut penalty: FloatType = 0.0;
+    let limit = unit_container_apothem * S;
+    let n_vertices = vertices.len() / 2;
+    let nsc = unit_container_vectors.shape()[0];
+    for v in 0..n_vertices {
+        let vx = vertices[v * 2];
+        let vy = vertices[v * 2 + 1];
+        for i in 0..nsc {
+            let dvx = unit_container_vectors[[i, 0]];
+            let dvy = unit_container_vectors[[i, 1]];
+            let distance = vx * dvx + vy * dvy;
+            if distance > limit {
+                let diff = distance - limit;
+                penalty += diff * diff;
+            }
+        }
+    }
+    penalty
+}
+
+fn bh_function(
+    values: &[FloatType],
+    S: FloatType,
+    N: usize,
+    nsi: usize,
+    nsc: usize,
+    unit_polygon_vertices: &Array2<FloatType>,
+    unit_polygon_vectors: &Array2<FloatType>,
+    unit_container_vectors: &Array2<FloatType>,
+    unit_container_apothem: FloatType,
+) -> FloatType {
+    let mut penalty: FloatType = 0.0;
+
+    let mut polygon_array: Vec<Vec<FloatType>> = Vec::with_capacity(N);
+    let mut vector_array: Vec<Vec<FloatType>> = Vec::with_capacity(N);
+
+    for i in 0..N {
+        let posx = values[i * 3];
+        let posy = values[i * 3 + 1];
+        let rot = values[i * 3 + 2];
+        let poly = transform_polygon(posx, posy, rot, unit_polygon_vertices);
+        let vecs = rotate_vectors(rot, unit_polygon_vectors);
+        penalty += poking_penalty(&poly, S, unit_container_vectors, unit_container_apothem);
+        polygon_array.push(poly);
+        vector_array.push(vecs);
+    }
+
+    for i in 0..N {
+        for j in (i + 1)..N {
+            let mut collision = true;
+            let huge: FloatType = 100000000000000000000.0 as FloatType;
+            let mut min_overlap: FloatType = huge;
+            for vec_idx in 0..(nsi * 2) {
+                let (x_axis, y_axis) = if vec_idx < nsi {
+                    (
+                        vector_array[i][vec_idx * 2],
+                        vector_array[i][vec_idx * 2 + 1],
+                    )
+                } else {
+                    (
+                        vector_array[j][(vec_idx - nsi) * 2],
+                        vector_array[j][(vec_idx - nsi) * 2 + 1],
+                    )
+                };
+
+                let mut min_1 = huge;
+                let mut max_1 = -huge;
+                for vert in 0..nsi {
+                    let dotp = polygon_array[i][vert * 2] * x_axis
+                        + polygon_array[i][vert * 2 + 1] * y_axis;
+                    if dotp < min_1 {
+                        min_1 = dotp;
+                    }
+                    if dotp > max_1 {
+                        max_1 = dotp;
+                    }
+                }
+
+                let mut min_2 = huge;
+                let mut max_2 = -huge;
+                for vert in 0..nsi {
+                    let dotp = polygon_array[j][vert * 2] * x_axis
+                        + polygon_array[j][vert * 2 + 1] * y_axis;
+                    if dotp < min_2 {
+                        min_2 = dotp;
+                    }
+                    if dotp > max_2 {
+                        max_2 = dotp;
+                    }
+                }
+
+                let overlap = (if max_1 < max_2 { max_1 } else { max_2 })
+                    - (if min_1 > min_2 { min_1 } else { min_2 });
+                if overlap <= (0.0 as FloatType) {
+                    collision = false;
+                    break;
+                }
+                if overlap < min_overlap {
+                    min_overlap = overlap;
+                }
+            }
+
+            if collision {
+                penalty += min_overlap * min_overlap;
+            }
+        }
+    }
+
+    penalty
+}
+
+struct BHProblem<'a> {
+    N: usize,
+    nsi: usize,
+    nsc: usize,
+    unit_polygon_vertices: &'a Array2<FloatType>,
+    unit_polygon_vectors: &'a Array2<FloatType>,
+    unit_container_vectors: &'a Array2<FloatType>,
+    unit_container_apothem: FloatType,
+    S: FloatType,
+}
+
+impl<'a> CostFunction for BHProblem<'a> {
+    type Param = Vec<FloatType>;
+    type Output = FloatType;
+
+    fn cost(&self, param: &Self::Param) -> Result<Self::Output, Error> {
+        Ok(bh_function(
+            param.as_slice(),
+            self.S,
+            self.N,
+            self.nsi,
+            self.nsc,
+            self.unit_polygon_vertices,
+            self.unit_polygon_vectors,
+            self.unit_container_vectors,
+            self.unit_container_apothem,
+        ))
+    }
+}
+
+impl<'a> Gradient for BHProblem<'a> {
+    type Param = Vec<FloatType>;
+    type Gradient = Vec<FloatType>;
+
+    fn gradient(&self, param: &Self::Param) -> Result<Self::Gradient, Error> {
+        let n = param.len();
+        let mut grad = vec![0.0 as FloatType; n];
+        let eps = FloatType::EPSILON.sqrt();
+        for i in 0..n {
+            let xi = param[i];
+            let h = eps * xi.abs().max(1.0 as FloatType);
+            let mut xp = param.clone();
+            xp[i] = xi + h;
+            let mut xm = param.clone();
+            xm[i] = xi - h;
+            let fp = bh_function(
+                xp.as_slice(),
+                self.S,
+                self.N,
+                self.nsi,
+                self.nsc,
+                self.unit_polygon_vertices,
+                self.unit_polygon_vectors,
+                self.unit_container_vectors,
+                self.unit_container_apothem,
+            );
+            let fm = bh_function(
+                xm.as_slice(),
+                self.S,
+                self.N,
+                self.nsi,
+                self.nsc,
+                self.unit_polygon_vertices,
+                self.unit_polygon_vectors,
+                self.unit_container_vectors,
+                self.unit_container_apothem,
+            );
+            grad[i] = (fp - fm) / (2.0 as FloatType * h);
+        }
+        Ok(grad)
+    }
+}
+
+fn initial_simplex(x0: &Vec<FloatType>) -> Vec<Vec<FloatType>> {
+    let n = x0.len();
+    let mut simplex: Vec<Vec<FloatType>> = Vec::with_capacity(n + 1);
+    simplex.push(x0.clone());
+    for i in 0..n {
+        let mut xi = x0.clone();
+        let delta = (0.05 as FloatType) * (1.0 as FloatType + x0[i].abs());
+        xi[i] += delta;
+        simplex.push(xi);
+    }
+    simplex
+}
+
+// def repetition(seed):
+fn repetition(
+    seed: usize,
+    N: usize,
+    nsi: usize,
+    nsc: usize,
+    penalty_tolerance: FloatType,
+    final_step_size: FloatType,
+    unit_polygon_vertices: &Array2<FloatType>,
+    unit_polygon_vectors: &Array2<FloatType>,
+    unit_container_vectors: &Array2<FloatType>,
+    unit_container_apothem: FloatType,
+) -> (FloatType, Vec<FloatType>) {
+    println!("Attempt {}", seed);
+
+    let mut rng = PcgInnerState64::mcg_seeded(seed as _);
+    let rand01 = |rng: &mut PcgInnerState64| {
+        let hi = rng.mcg_xsh_rs();
+        let lo = rng.mcg_xsh_rs();
+        let rand = ((hi as u64) << 32) | lo as u64;
+        rand as FloatType / (u64::MAX as FloatType)
+    };
+    let mut dynamic_S =
+        (N as FloatType).sqrt() * (2.0 as FloatType + rand01(&mut rng) * 2.0 as FloatType);
+    let initial_S = dynamic_S;
+
+    let mut x0 = vec![0.0 as FloatType; N * 3];
+
+    if rand01(&mut rng) < 0.5 {
+        for i in 0..(N * 3) {
+            x0[i] = (rand01(&mut rng) - 0.5 as FloatType) * dynamic_S;
+        }
+    } else {
+        let grid_count = ((N as FloatType).sqrt().ceil()) as usize;
+        let min = -dynamic_S / (2.0 as FloatType) * (0.9 as FloatType);
+        let max = dynamic_S / (2.0 as FloatType) * (0.9 as FloatType);
+        let mut grid_points: Vec<(FloatType, FloatType)> =
+            Vec::with_capacity(grid_count * grid_count);
+        for gy in 0..grid_count {
+            for gx in 0..grid_count {
+                let xi = if grid_count > 1 {
+                    min + (gx as FloatType) * ((max - min) / ((grid_count - 1) as FloatType))
+                } else {
+                    (min + max) / (2.0 as FloatType)
+                };
+                let yi = if grid_count > 1 {
+                    min + (gy as FloatType) * ((max - min) / ((grid_count - 1) as FloatType))
+                } else {
+                    (min + max) / (2.0 as FloatType)
+                };
+                grid_points.push((xi, yi));
+            }
+        }
+        for i in 0..N {
+            let (gx, gy) = grid_points[i % grid_points.len()];
+            x0[i * 3] = gx;
+            x0[i * 3 + 1] = gy;
+            x0[i * 3 + 2] = rand01(&mut rng) * (2.0 as FloatType) * PI;
+        }
+    }
+
+    let mut last_valid_x = x0.clone();
+    let mut last_valid_S = dynamic_S;
+
+    // Match Python's local-minimizer `tol=1e-8` used in `minimize(..., tol=1e-8)`
+    let solver_tol: FloatType = 1e-8 as FloatType;
+
+    loop {
+        // Local minimization using L-BFGS (match Python's L-BFGS-B)
+        let problem = BHProblem {
+            N,
+            nsi,
+            nsc,
+            unit_polygon_vertices,
+            unit_polygon_vectors,
+            unit_container_vectors,
+            unit_container_apothem,
+            S: dynamic_S,
+        };
+
+        let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
+        let linesearch = BacktrackingLineSearch::new(armijo);
+        let lbfgs = LBFGS::new(linesearch, 3)
+            .with_tolerance_grad(solver_tol)
+            .unwrap()
+            .with_tolerance_cost(solver_tol)
+            .unwrap();
+
+        let result = Executor::new(problem, lbfgs)
+            .configure(|state| state.param(x0.clone()))
+            .run();
+
+        let mut success = false;
+        if let Ok(res) = result {
+            let fun = res.state.get_best_cost();
+            if fun < penalty_tolerance {
+                if let Some(param) = res.state.get_best_param() {
+                    last_valid_x = param.clone();
+                    last_valid_S = dynamic_S;
+                    success = true;
+                    // compute multiplier (match Python exactly)
+                    let base = (N as FloatType).sqrt() * (nsi as FloatType) / (nsc as FloatType);
+                    let mut multiplier = 0.9999 as FloatType
+                        - (dynamic_S - base) * (0.0099 as FloatType) / (initial_S - base);
+                    multiplier = 1.0 as FloatType
+                        - final_step_size
+                        - (dynamic_S - base)
+                            * ((0.01 as FloatType - final_step_size) / (initial_S - base));
+                    x0 = param.iter().map(|v| v * multiplier).collect();
+                    dynamic_S *= multiplier;
+                }
+            } else {
+                // Basinhopping-like global search (many local starts)
+                let mut best_bh_fun = fun;
+                let mut best_bh_param = res.state.get_best_param().cloned().unwrap_or(x0.clone());
+                let mut current_fun = fun;
+                let mut current_param = res.state.get_best_param().cloned().unwrap_or(x0.clone());
+
+                for _ in 0..50 {
+                    let mut trial = current_param.clone();
+                    for k in 0..trial.len() {
+                        trial[k] += (rand01(&mut rng) - 0.5 as FloatType)
+                            * (2.0 as FloatType)
+                            * (0.1 as FloatType);
+                    }
+                    let problem = BHProblem {
+                        N,
+                        nsi,
+                        nsc,
+                        unit_polygon_vertices,
+                        unit_polygon_vectors,
+                        unit_container_vectors,
+                        unit_container_apothem,
+                        S: dynamic_S,
+                    };
+                    let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
+                    let linesearch = BacktrackingLineSearch::new(armijo);
+                    let lbfgs = LBFGS::new(linesearch, 3)
+                        .with_tolerance_grad(solver_tol)
+                        .unwrap()
+                        .with_tolerance_cost(solver_tol)
+                        .unwrap();
+                    if let Ok(res2) = Executor::new(problem, lbfgs)
+                        .configure(|state| state.param(trial.clone()))
+                        .run()
+                    {
+                        let f2 = res2.state.get_best_cost();
+                        if f2 < best_bh_fun {
+                            best_bh_fun = f2;
+                            best_bh_param = res2
+                                .state
+                                .get_best_param()
+                                .cloned()
+                                .unwrap_or(trial.clone());
+                        }
+                        // Metropolis acceptance
+                        if f2 < current_fun
+                            || ((current_fun - f2) / (0.1 as FloatType)).exp() > rand01(&mut rng)
+                        {
+                            current_fun = f2;
+                            current_param = res2
+                                .state
+                                .get_best_param()
+                                .cloned()
+                                .unwrap_or(trial.clone());
+                        }
+                    }
+                }
+
+                if best_bh_fun < penalty_tolerance {
+                    last_valid_x = best_bh_param.clone();
+                    last_valid_S = dynamic_S;
+                    let base = (N as FloatType).sqrt() * (nsi as FloatType) / (nsc as FloatType);
+                    let mut multiplier = 0.9999 as FloatType
+                        - (dynamic_S - base) * (0.0099 as FloatType) / (initial_S - base);
+                    multiplier = 1.0 as FloatType
+                        - final_step_size
+                        - (dynamic_S - base)
+                            * ((0.01 as FloatType - final_step_size) / (initial_S - base));
+                    x0 = last_valid_x.iter().map(|v| v * multiplier).collect();
+                    dynamic_S *= multiplier;
+                    success = true;
+                } else {
+                    break;
+                }
+            }
+        } else {
+            // If local solver failed for any reason, try a few random local starts
+            let mut found = false;
+            for _ in 0..10 {
+                let mut trial = x0.clone();
+                for k in 0..trial.len() {
+                    trial[k] +=
+                        (rand01(&mut rng) - 0.5 as FloatType) * dynamic_S * 0.1 as FloatType;
+                }
+                let problem = BHProblem {
+                    N,
+                    nsi,
+                    nsc,
+                    unit_polygon_vertices,
+                    unit_polygon_vectors,
+                    unit_container_vectors,
+                    unit_container_apothem,
+                    S: dynamic_S,
+                };
+                let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
+                let linesearch = BacktrackingLineSearch::new(armijo);
+                let lbfgs = LBFGS::new(linesearch, 3)
+                    .with_tolerance_grad(solver_tol)
+                    .unwrap()
+                    .with_tolerance_cost(solver_tol)
+                    .unwrap();
+                if let Ok(res2) = Executor::new(problem, lbfgs)
+                    .configure(|state| state.param(trial.clone()))
+                    .run()
+                {
+                    let f2 = res2.state.get_best_cost();
+                    if f2 < penalty_tolerance {
+                        last_valid_x = res2
+                            .state
+                            .get_best_param()
+                            .cloned()
+                            .unwrap_or(trial.clone());
+                        last_valid_S = dynamic_S;
+                        x0 = last_valid_x.clone();
+                        found = true;
+                        break;
+                    }
+                }
+            }
+            if !found {
+                break;
+            }
+        }
+
+        if !success {
+            break;
+        }
+    }
+
+    (last_valid_S, last_valid_x)
+}
diff --git a/polygon-packer-rs/src/rust_specific.rs b/polygon-packer-rs/src/rust_specific.rs
new file mode 100644
index 0000000..3ef05bf
--- /dev/null
+++ b/polygon-packer-rs/src/rust_specific.rs
@@ -0,0 +1,21 @@
+//! this module handles rust-specific things like
+//! hot-swapping the float type to squeeze out more perf.
+
+/// change this type to `f32` be less precise but get faster,
+/// or `f64` to be more precise but get slower.
+pub type FloatType = f32;
+
+/// an interface that defines PI for both 32 bit and 64 bit floats
+pub trait AssocPI {
+    const PI: Self;
+}
+
+// associated PI constant for f32
+impl AssocPI for f32 {
+    const PI: f32 = std::f32::consts::PI;
+}
+
+// associated PI constant for f64
+impl AssocPI for f64 {
+    const PI: f64 = std::f64::consts::PI;
+}

From e44472aa569ad4597055660897479d5f82b52d46 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 08:19:17 +0200
Subject: [PATCH 03/15] Add Rust section to README.md

---
 README.md | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 17fe194..e695e03 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 This program can quickly solve the 2D bin packing problem for any number of any polygons inside any other polygon! It was the tool used to find all the optimal packings under the name "Ignacio Vallejo" on [Erich's Packing Center](https://erich-friedman.github.io/packing/).
 <img width="640" height="480" alt="30 triangles in a hexagon" src="https://github.com/user-attachments/assets/48591a93-3ed9-4031-9c42-8b6eb579d91e" />
 
-### How to use
+### How to use with Python
 Download the python file, navigate to its location and run it like this:
 
 `python3 polygon_packer.py [n] [nsi] [nsc]`
@@ -14,3 +14,24 @@ Optional parameters:
 - `--attempts`: the total number of attempts to run. Increase to explore more possible packings. Defaults to 1000.
 - `--tolerance`: the tolerance for the penalty function. More penalty reduces the margin of overlap but limits exporation. Defaults to an empirical sweetspot of 0.00000001.
 - `--finalstep`: the container size is decreased by a smaller factor each time, to save compute at the beginning and achieve greater precision near the end. This sets the step size of the shrinkage which would correspond to the theoretical minimum container size (which, for most packings, will not actually be reached, so keep that in mind when setting this parameter). Defaults to 0.0001.
+
+### How to use with Rust
+
+- [Install The Rust Compiler.](https://rust-lang.org/learn/get-started/)
+
+- Clone this repository, change directory into `polygon-packer-rs`.
+
+- To compile with optimizations and run:
+
+```bash
+cargo run --release -- [n] [nsi] [nsc]
+```
+
+^ This may take a while.
+
+For further performance gains, set the environment variable `RUSTFLAGS` to
+`-C target-cpu=native`, and run the compiler again. Then, the compiler generates
+microarchitecture-specific machine code tailored for your CPU only.
+
+The part after `--release --` is the same arguments with the Python
+version.
\ No newline at end of file

From 2eff79aeb8667d9f42d48e7f412c6282e6f64f58 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 08:19:17 +0200
Subject: [PATCH 04/15] Add .gitignore

---
 .gitignore | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d02e6ac
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+.venv
+polygon-packer-py/__pycache__
+polygon-packer-py/venv
+polygon-packer-rs/target
+polygon-packer-rs/Cargo.lock 
+**/*.png

From c1025cc62a327bb9e050a4626190485940148ec3 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 08:59:46 +0200
Subject: [PATCH 05/15] Optimization pass, speedup ~5x -> ~15x

---
 polygon-packer-rs/Cargo.toml           | 200 ++++++++++++++-
 polygon-packer-rs/src/main.rs          | 336 +++++++++++++------------
 polygon-packer-rs/src/rust_specific.rs |  42 ++--
 3 files changed, 397 insertions(+), 181 deletions(-)

diff --git a/polygon-packer-rs/Cargo.toml b/polygon-packer-rs/Cargo.toml
index bbc9385..ae5a840 100644
--- a/polygon-packer-rs/Cargo.toml
+++ b/polygon-packer-rs/Cargo.toml
@@ -19,6 +19,8 @@ clap = { version = "4.4", features = ["derive"] }
 # Voxell's homegrown RNG library, used for pure-rust PCG family of random number
 # generators. https://www.pcg-random.org/index.html
 voxell_rng = "0.6.0"
+# bump allocator for temporary arena allocations to reduce transient heap use
+bumpalo = {version = "3", features = ["collections"]}
 
 [profile.release]
 # opt-level: 3 for full optimizations
@@ -33,4 +35,200 @@ codegen-units = 1
 # you won't get fancy errors.
 # set to "unwind" for fancy errors
 # set to "abort" for a very slight boost
-panic = "abort"
\ No newline at end of file
+panic = "abort"
+
+# ============================================
+# Linter configuration. A linter is a program that pattern-matches on source
+# code to identify issues. I (@Paladynee) use a very strict linter
+# configuration. 
+
+[lints.clippy]
+pedantic = { level = "warn", priority = -1 }
+cargo = { level = "warn", priority = -1 }
+restriction = { level = "warn", priority = -1 }
+nursery = { level = "warn", priority = -1 }
+
+mod_module_files = "deny"
+self_named_module_files = "allow"
+
+allow_attributes = "allow"
+allow_attributes_without_reason = "allow"
+arithmetic_side_effects = "allow"
+as_conversions = "allow"
+as_pointer_underscore = "allow"
+as_underscore = "allow"
+assertions_on_result_states = "allow"
+big_endian_bytes = "allow"
+cognitive_complexity = "allow"
+arbitrary_source_item_ordering = "allow"
+dbg_macro = "allow"
+default_numeric_fallback = "allow"
+default_union_representation = "allow"
+deref_by_slicing = "allow"
+disallowed_script_idents = "allow"
+else_if_without_else = "allow"
+empty_drop = "allow"
+empty_enum_variants_with_brackets = "allow"
+empty_structs_with_brackets = "allow"
+exhaustive_enums = "allow"
+exhaustive_structs = "allow"
+expect_used = "allow"
+field_scoped_visibility_modifiers = "allow"
+float_arithmetic = "allow"
+fn_to_numeric_cast_any = "allow"
+host_endian_bytes = "allow"
+if_then_some_else_none = "allow"
+impl_trait_in_params = "allow"
+implicit_return = "allow"
+indexing_slicing = "allow"
+infinite_loop = "allow"
+inline_asm_x86_att_syntax = "allow"
+inline_asm_x86_intel_syntax = "allow"
+integer_division = "allow"
+integer_division_remainder_used = "allow"
+large_include_file = "allow"
+let_underscore_must_use = "allow"
+let_underscore_untyped = "allow"
+little_endian_bytes = "allow"
+lossy_float_literal = "allow"
+mem_forget = "allow"
+min_ident_chars = "allow"
+missing_assert_message = "allow"
+missing_asserts_for_indexing = "allow"
+missing_docs_in_private_items = "allow"
+missing_inline_in_public_items = "allow"
+missing_trait_methods = "allow"
+mixed_read_write_in_expression = "allow"
+module_name_repetitions = "allow"
+modulo_arithmetic = "allow"
+multiple_unsafe_ops_per_block = "allow"
+needless_raw_strings = "allow"
+non_ascii_literal = "allow"
+panic = "allow"
+panic_in_result_fn = "allow"
+partial_pub_fields = "allow"
+pointer_format = "allow"
+print_stderr = "allow"
+print_stdout = "allow"
+pub_use = "allow"
+pub_with_shorthand = "allow"
+pub_without_shorthand = "allow"
+question_mark_used = "allow"
+rc_buffer = "allow"
+redundant_type_annotations = "allow"
+ref_patterns = "allow"
+renamed_function_params = "allow"
+semicolon_inside_block = "allow"
+semicolon_outside_block = "allow"
+separated_literal_suffix = "allow"
+shadow_reuse = "allow"
+shadow_same = "allow"
+shadow_unrelated = "allow"
+single_call_fn = "allow"
+single_char_lifetime_names = "allow"
+std_instead_of_alloc = "allow"
+std_instead_of_core = "allow"
+str_to_string = "allow"
+string_add = "allow"
+string_slice = "allow"
+suspicious_xor_used_as_pow = "allow"
+tests_outside_test_module = "allow"
+todo = "allow"
+try_err = "allow"
+undocumented_unsafe_blocks = "allow"
+unimplemented = "allow"
+unnecessary_safety_comment = "allow"
+unnecessary_safety_doc = "allow"
+unnecessary_self_imports = "allow"
+unreachable = "allow"
+unseparated_literal_suffix = "allow"
+unused_result_ok = "allow"
+unused_trait_names = "allow"
+unwrap_in_result = "allow"
+unwrap_used = "allow"
+use_debug = "allow"
+wildcard_enum_match_arm = "allow"
+bool_to_int_with_if = "allow"
+borrow_as_ptr = "allow"
+cast_lossless = "allow"
+cast_possible_truncation = "allow"
+cast_precision_loss = "allow"
+cast_possible_wrap = "allow"
+cast_sign_loss = "allow"
+checked_conversions = "allow"
+default_trait_access = "allow"
+elidable_lifetime_names = "allow"
+empty_enums = "allow"
+enum_glob_use = "allow"
+explicit_deref_methods = "allow"
+explicit_into_iter_loop = "allow"
+explicit_iter_loop = "allow"
+if_not_else = "allow"
+ignore_without_reason = "allow"
+ignored_unit_patterns = "allow"
+index_refutable_slice = "allow"
+inline_always = "allow"
+into_iter_without_iter = "allow"
+items_after_statements = "allow"
+large_digit_groups = "allow"
+large_stack_arrays = "allow"
+large_types_passed_by_value = "allow"
+linkedlist = "allow"
+manual_assert = "allow"
+manual_is_power_of_two = "allow"
+many_single_char_names = "allow"
+match_bool = "allow"
+match_same_arms = "allow"
+match_wild_err_arm = "allow"
+match_wildcard_for_single_variants = "allow"
+missing_errors_doc = "allow"
+missing_fields_in_debug = "allow"
+missing_panics_doc = "allow"
+must_use_candidate = "allow"
+mut_mut = "allow"
+needless_bitwise_bool = "allow"
+needless_continue = "allow"
+needless_for_each = "allow"
+needless_pass_by_value = "allow"
+needless_raw_string_hashes = "allow"
+float_cmp = "allow"
+no_effect_underscore_binding = "allow"
+ptr_as_ptr = "allow"
+ptr_cast_constness = "allow"
+pub_underscore_fields = "allow"
+redundant_closure_for_method_calls = "allow"
+redundant_else = "allow"
+return_self_not_must_use = "allow"
+semicolon_if_nothing_returned = "allow"
+should_panic_without_expect = "allow"
+similar_names = "allow"
+single_match_else = "allow"
+string_add_assign = "allow"
+struct_excessive_bools = "allow"
+struct_field_names = "allow"
+too_many_lines = "allow"
+uninlined_format_args = "allow"
+unnecessary_box_returns = "allow"
+unnecessary_wraps = "allow"
+unnested_or_patterns = "allow"
+unreadable_literal = "allow"
+unused_self = "allow"
+used_underscore_binding = "allow"
+used_underscore_items = "allow"
+verbose_bit_mask = "allow"
+wildcard_imports = "allow"
+equatable_if_let = "allow"
+missing_const_for_fn = "allow"
+option_if_let_else = "allow"
+use_self = "allow"
+missing_safety_doc = "allow"
+manual_let_else = "allow"
+map_err_ignore = "allow"
+or_fun_call = "allow"
+blanket_clippy_restriction_lints = "allow"
+doc_markdown = "allow"
+decimal_literal_representation = "allow"
+
+[lints.rust]
+ambiguous_negative_literals = "warn"
+non_ascii_idents = "warn"
diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
index b2db57e..656bf80 100644
--- a/polygon-packer-rs/src/main.rs
+++ b/polygon-packer-rs/src/main.rs
@@ -18,6 +18,10 @@ use clap::Parser;
 use ndarray::Array2;
 use plotters::prelude::*;
 use rayon::prelude::*;
+use std::cell::RefCell;
+use std::rc::Rc;
+use bumpalo::Bump;
+use bumpalo::collections::Vec as BumpVec;
 use voxell_rng::rng::pcg_advanced::pcg_64::PcgInnerState64;
 
 #[derive(Parser, Debug)]
@@ -307,168 +311,173 @@ fn transform_polygon(
     transformed
 }
 
-fn rotate_vectors(a: FloatType, vectors: &Array2<FloatType>) -> Vec<FloatType> {
-    let n = vectors.shape()[0];
-    let mut rotated = vec![0.0 as FloatType; n * 2];
-    let ca = a.cos();
-    let sa = a.sin();
-    for i in 0..n {
-        let vx = vectors[[i, 0]];
-        let vy = vectors[[i, 1]];
-        rotated[i * 2] = vx * ca - vy * sa;
-        rotated[i * 2 + 1] = vx * sa + vy * ca;
-    }
-    rotated
-}
 
-fn poking_penalty(
-    vertices: &[FloatType],
-    S: FloatType,
-    unit_container_vectors: &Array2<FloatType>,
-    unit_container_apothem: FloatType,
-) -> FloatType {
-    let mut penalty: FloatType = 0.0;
-    let limit = unit_container_apothem * S;
-    let n_vertices = vertices.len() / 2;
-    let nsc = unit_container_vectors.shape()[0];
-    for v in 0..n_vertices {
-        let vx = vertices[v * 2];
-        let vy = vertices[v * 2 + 1];
-        for i in 0..nsc {
-            let dvx = unit_container_vectors[[i, 0]];
-            let dvy = unit_container_vectors[[i, 1]];
-            let distance = vx * dvx + vy * dvy;
-            if distance > limit {
-                let diff = distance - limit;
-                penalty += diff * diff;
-            }
-        }
-    }
-    penalty
-}
 
-fn bh_function(
-    values: &[FloatType],
-    S: FloatType,
+// BHProblem struct must be declared before its impl. It holds arena-backed
+// scratch buffers to avoid transient allocations during cost evaluation.
+struct BHProblem<'a, 'b> {
     N: usize,
     nsi: usize,
     nsc: usize,
-    unit_polygon_vertices: &Array2<FloatType>,
-    unit_polygon_vectors: &Array2<FloatType>,
-    unit_container_vectors: &Array2<FloatType>,
+    unit_polygon_vertices: &'a Array2<FloatType>,
+    unit_polygon_vectors: &'a Array2<FloatType>,
+    unit_container_vectors: &'a Array2<FloatType>,
     unit_container_apothem: FloatType,
-) -> FloatType {
-    let mut penalty: FloatType = 0.0;
-
-    let mut polygon_array: Vec<Vec<FloatType>> = Vec::with_capacity(N);
-    let mut vector_array: Vec<Vec<FloatType>> = Vec::with_capacity(N);
-
-    for i in 0..N {
-        let posx = values[i * 3];
-        let posy = values[i * 3 + 1];
-        let rot = values[i * 3 + 2];
-        let poly = transform_polygon(posx, posy, rot, unit_polygon_vertices);
-        let vecs = rotate_vectors(rot, unit_polygon_vectors);
-        penalty += poking_penalty(&poly, S, unit_container_vectors, unit_container_apothem);
-        polygon_array.push(poly);
-        vector_array.push(vecs);
-    }
+    S: FloatType,
+    arena: &'b Bump,
+    polys_x: Rc<RefCell<BumpVec<'b, FloatType>>>,
+    polys_y: Rc<RefCell<BumpVec<'b, FloatType>>>,
+    vecs_x: Rc<RefCell<BumpVec<'b, FloatType>>>,
+    vecs_y: Rc<RefCell<BumpVec<'b, FloatType>>>,
+}
 
-    for i in 0..N {
-        for j in (i + 1)..N {
-            let mut collision = true;
-            let huge: FloatType = 100000000000000000000.0 as FloatType;
-            let mut min_overlap: FloatType = huge;
-            for vec_idx in 0..(nsi * 2) {
-                let (x_axis, y_axis) = if vec_idx < nsi {
-                    (
-                        vector_array[i][vec_idx * 2],
-                        vector_array[i][vec_idx * 2 + 1],
-                    )
-                } else {
-                    (
-                        vector_array[j][(vec_idx - nsi) * 2],
-                        vector_array[j][(vec_idx - nsi) * 2 + 1],
-                    )
-                };
+// BH cost evaluation implemented as a method on the problem struct.
+// Use a bump arena and SoA scratch buffers (polys_x/polys_y, vecs_x/vecs_y)
+// to reduce transient heap allocations and improve data locality.
+impl<'a, 'b> BHProblem<'a, 'b> {
+    fn bh_function(&self, values: &[FloatType]) -> FloatType {
+        const HUGE: FloatType = 1e20 as FloatType;
+
+        let n_vertices = self.unit_polygon_vertices.shape()[0];
+        let nsi = self.nsi;
+        let nsc = self.nsc;
+
+        let limit = self.unit_container_apothem * self.S;
+
+        let mut penalty: FloatType = 0.0;
+
+        // Reusable, arena-allocated SoA buffers.
+        let mut polys_x = self.polys_x.borrow_mut();
+        let mut polys_y = self.polys_y.borrow_mut();
+        let mut vecs_x = self.vecs_x.borrow_mut();
+        let mut vecs_y = self.vecs_y.borrow_mut();
+
+        polys_x.clear();
+        polys_y.clear();
+        vecs_x.clear();
+        vecs_y.clear();
+
+        // Reserve expected capacity to avoid repeated grows
+        let total_points = (self.N as usize) * (n_vertices as usize);
+        polys_x.reserve(total_points);
+        polys_y.reserve(total_points);
+        vecs_x.reserve(total_points);
+        vecs_y.reserve(total_points);
+
+        // Fill transformed polygon coordinates and rotated edge vectors.
+        for i in 0..self.N {
+            let posx = values[i * 3];
+            let posy = values[i * 3 + 1];
+            let rot = values[i * 3 + 2];
+            let ca = rot.cos();
+            let sa = rot.sin();
+
+            for v in 0..n_vertices {
+                let vx = self.unit_polygon_vertices[[v, 0]];
+                let vy = self.unit_polygon_vertices[[v, 1]];
+                let px = posx + (vx * ca - vy * sa);
+                let py = posy + (vx * sa + vy * ca);
+                polys_x.push(px);
+                polys_y.push(py);
+
+                // poking penalty against container
+                for ci in 0..nsc {
+                    let dvx = self.unit_container_vectors[[ci, 0]];
+                    let dvy = self.unit_container_vectors[[ci, 1]];
+                    let distance = px * dvx + py * dvy;
+                    if distance > limit {
+                        let diff = distance - limit;
+                        penalty += diff * diff;
+                    }
+                }
+            }
+
+            for v in 0..n_vertices {
+                let vx = self.unit_polygon_vectors[[v, 0]];
+                let vy = self.unit_polygon_vectors[[v, 1]];
+                vecs_x.push(vx * ca - vy * sa);
+                vecs_y.push(vx * sa + vy * ca);
+            }
+        }
 
-                let mut min_1 = huge;
-                let mut max_1 = -huge;
-                for vert in 0..nsi {
-                    let dotp = polygon_array[i][vert * 2] * x_axis
-                        + polygon_array[i][vert * 2 + 1] * y_axis;
-                    if dotp < min_1 {
-                        min_1 = dotp;
+        // Pairwise separating axis test using SoA buffers
+        for i in 0..self.N {
+            for j in (i + 1)..self.N {
+                let mut collision = true;
+                let mut min_overlap: FloatType = HUGE;
+
+                for vec_idx in 0..(nsi * 2) {
+                    let (x_axis, y_axis) = if vec_idx < nsi {
+                        (
+                            vecs_x[i * nsi + vec_idx],
+                            vecs_y[i * nsi + vec_idx],
+                        )
+                    } else {
+                        (
+                            vecs_x[j * nsi + (vec_idx - nsi)],
+                            vecs_y[j * nsi + (vec_idx - nsi)],
+                        )
+                    };
+
+                    let mut min_1 = HUGE;
+                    let mut max_1 = -HUGE;
+                    for vert in 0..nsi {
+                        let dotp = polys_x[i * nsi + vert] * x_axis
+                            + polys_y[i * nsi + vert] * y_axis;
+                        if dotp < min_1 {
+                            min_1 = dotp;
+                        }
+                        if dotp > max_1 {
+                            max_1 = dotp;
+                        }
                     }
-                    if dotp > max_1 {
-                        max_1 = dotp;
+
+                    let mut min_2 = HUGE;
+                    let mut max_2 = -HUGE;
+                    for vert in 0..nsi {
+                        let dotp = polys_x[j * nsi + vert] * x_axis
+                            + polys_y[j * nsi + vert] * y_axis;
+                        if dotp < min_2 {
+                            min_2 = dotp;
+                        }
+                        if dotp > max_2 {
+                            max_2 = dotp;
+                        }
                     }
-                }
 
-                let mut min_2 = huge;
-                let mut max_2 = -huge;
-                for vert in 0..nsi {
-                    let dotp = polygon_array[j][vert * 2] * x_axis
-                        + polygon_array[j][vert * 2 + 1] * y_axis;
-                    if dotp < min_2 {
-                        min_2 = dotp;
+                    let overlap = (if max_1 < max_2 { max_1 } else { max_2 })
+                        - (if min_1 > min_2 { min_1 } else { min_2 });
+                    if overlap <= (0.0 as FloatType) {
+                        collision = false;
+                        break;
                     }
-                    if dotp > max_2 {
-                        max_2 = dotp;
+                    if overlap < min_overlap {
+                        min_overlap = overlap;
                     }
                 }
 
-                let overlap = (if max_1 < max_2 { max_1 } else { max_2 })
-                    - (if min_1 > min_2 { min_1 } else { min_2 });
-                if overlap <= (0.0 as FloatType) {
-                    collision = false;
-                    break;
-                }
-                if overlap < min_overlap {
-                    min_overlap = overlap;
+                if collision {
+                    penalty += min_overlap * min_overlap;
                 }
             }
-
-            if collision {
-                penalty += min_overlap * min_overlap;
-            }
         }
-    }
 
-    penalty
+        penalty
+    }
 }
 
-struct BHProblem<'a> {
-    N: usize,
-    nsi: usize,
-    nsc: usize,
-    unit_polygon_vertices: &'a Array2<FloatType>,
-    unit_polygon_vectors: &'a Array2<FloatType>,
-    unit_container_vectors: &'a Array2<FloatType>,
-    unit_container_apothem: FloatType,
-    S: FloatType,
-}
 
-impl<'a> CostFunction for BHProblem<'a> {
+
+impl<'a, 'b> CostFunction for BHProblem<'a, 'b> {
     type Param = Vec<FloatType>;
     type Output = FloatType;
 
     fn cost(&self, param: &Self::Param) -> Result<Self::Output, Error> {
-        Ok(bh_function(
-            param.as_slice(),
-            self.S,
-            self.N,
-            self.nsi,
-            self.nsc,
-            self.unit_polygon_vertices,
-            self.unit_polygon_vectors,
-            self.unit_container_vectors,
-            self.unit_container_apothem,
-        ))
+        Ok(self.bh_function(param.as_slice()))
     }
 }
 
-impl<'a> Gradient for BHProblem<'a> {
+impl<'a, 'b> Gradient for BHProblem<'a, 'b> {
     type Param = Vec<FloatType>;
     type Gradient = Vec<FloatType>;
 
@@ -476,36 +485,18 @@ impl<'a> Gradient for BHProblem<'a> {
         let n = param.len();
         let mut grad = vec![0.0 as FloatType; n];
         let eps = FloatType::EPSILON.sqrt();
+        let mut xp = param.clone();
+        let mut xm = param.clone();
         for i in 0..n {
             let xi = param[i];
             let h = eps * xi.abs().max(1.0 as FloatType);
-            let mut xp = param.clone();
             xp[i] = xi + h;
-            let mut xm = param.clone();
             xm[i] = xi - h;
-            let fp = bh_function(
-                xp.as_slice(),
-                self.S,
-                self.N,
-                self.nsi,
-                self.nsc,
-                self.unit_polygon_vertices,
-                self.unit_polygon_vectors,
-                self.unit_container_vectors,
-                self.unit_container_apothem,
-            );
-            let fm = bh_function(
-                xm.as_slice(),
-                self.S,
-                self.N,
-                self.nsi,
-                self.nsc,
-                self.unit_polygon_vertices,
-                self.unit_polygon_vectors,
-                self.unit_container_vectors,
-                self.unit_container_apothem,
-            );
+            let fp = self.bh_function(xp.as_slice());
+            let fm = self.bh_function(xm.as_slice());
             grad[i] = (fp - fm) / (2.0 as FloatType * h);
+            xp[i] = xi;
+            xm[i] = xi;
         }
         Ok(grad)
     }
@@ -591,6 +582,14 @@ fn repetition(
     // Match Python's local-minimizer `tol=1e-8` used in `minimize(..., tol=1e-8)`
     let solver_tol: FloatType = 1e-8 as FloatType;
 
+    // Single arena reused for all BHProblem evaluations in this repetition.
+    let arena = Bump::new();
+    // Reusable arena-backed scratch buffers (wrapped in RefCell for interior mutability)
+    let polys_x = Rc::new(RefCell::new(BumpVec::new_in(&arena)));
+    let polys_y = Rc::new(RefCell::new(BumpVec::new_in(&arena)));
+    let vecs_x = Rc::new(RefCell::new(BumpVec::new_in(&arena)));
+    let vecs_y = Rc::new(RefCell::new(BumpVec::new_in(&arena)));
+
     loop {
         // Local minimization using L-BFGS (match Python's L-BFGS-B)
         let problem = BHProblem {
@@ -602,6 +601,11 @@ fn repetition(
             unit_container_vectors,
             unit_container_apothem,
             S: dynamic_S,
+            arena: &arena,
+            polys_x: polys_x.clone(),
+            polys_y: polys_y.clone(),
+            vecs_x: vecs_x.clone(),
+            vecs_y: vecs_y.clone(),
         };
 
         let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
@@ -632,7 +636,9 @@ fn repetition(
                         - final_step_size
                         - (dynamic_S - base)
                             * ((0.01 as FloatType - final_step_size) / (initial_S - base));
-                    x0 = param.iter().map(|v| v * multiplier).collect();
+                    for i in 0..x0.len() {
+                        x0[i] = param[i] * multiplier;
+                    }
                     dynamic_S *= multiplier;
                 }
             } else {
@@ -658,6 +664,11 @@ fn repetition(
                         unit_container_vectors,
                         unit_container_apothem,
                         S: dynamic_S,
+                        arena: &arena,
+                        polys_x: polys_x.clone(),
+                        polys_y: polys_y.clone(),
+                        vecs_x: vecs_x.clone(),
+                        vecs_y: vecs_y.clone(),
                     };
                     let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
                     let linesearch = BacktrackingLineSearch::new(armijo);
@@ -703,7 +714,9 @@ fn repetition(
                         - final_step_size
                         - (dynamic_S - base)
                             * ((0.01 as FloatType - final_step_size) / (initial_S - base));
-                    x0 = last_valid_x.iter().map(|v| v * multiplier).collect();
+                    for i in 0..x0.len() {
+                        x0[i] = last_valid_x[i] * multiplier;
+                    }
                     dynamic_S *= multiplier;
                     success = true;
                 } else {
@@ -728,6 +741,11 @@ fn repetition(
                     unit_container_vectors,
                     unit_container_apothem,
                     S: dynamic_S,
+                    arena: &arena,
+                    polys_x: polys_x.clone(),
+                    polys_y: polys_y.clone(),
+                    vecs_x: vecs_x.clone(),
+                    vecs_y: vecs_y.clone(),
                 };
                 let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
                 let linesearch = BacktrackingLineSearch::new(armijo);
diff --git a/polygon-packer-rs/src/rust_specific.rs b/polygon-packer-rs/src/rust_specific.rs
index 3ef05bf..aaea3c7 100644
--- a/polygon-packer-rs/src/rust_specific.rs
+++ b/polygon-packer-rs/src/rust_specific.rs
@@ -1,21 +1,21 @@
-//! this module handles rust-specific things like
-//! hot-swapping the float type to squeeze out more perf.
-
-/// change this type to `f32` be less precise but get faster,
-/// or `f64` to be more precise but get slower.
-pub type FloatType = f32;
-
-/// an interface that defines PI for both 32 bit and 64 bit floats
-pub trait AssocPI {
-    const PI: Self;
-}
-
-// associated PI constant for f32
-impl AssocPI for f32 {
-    const PI: f32 = std::f32::consts::PI;
-}
-
-// associated PI constant for f64
-impl AssocPI for f64 {
-    const PI: f64 = std::f64::consts::PI;
-}
+//! this module handles rust-specific things like
+//! hot-swapping the float type to squeeze out more perf.
+
+/// change this type to `f32` be less precise but get faster,
+/// or `f64` to be more precise but get slower.
+pub type FloatType = f64;
+
+/// an interface that defines PI for both 32 bit and 64 bit floats
+pub trait AssocPI {
+    const PI: Self;
+}
+
+// associated PI constant for f32
+impl AssocPI for f32 {
+    const PI: f32 = std::f32::consts::PI;
+}
+
+// associated PI constant for f64
+impl AssocPI for f64 {
+    const PI: f64 = std::f64::consts::PI;
+}

From dc9b4bf8ecc4b07c149b9215fff46b2e02a5f1de Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 09:06:01 +0200
Subject: [PATCH 06/15] Optimization pass: speedup = ~15x -> ~25.5x

---
 polygon-packer-rs/src/main.rs          | 119 ++++++++-----------------
 polygon-packer-rs/src/rust_specific.rs |   2 +-
 2 files changed, 40 insertions(+), 81 deletions(-)

diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
index 656bf80..bc455c3 100644
--- a/polygon-packer-rs/src/main.rs
+++ b/polygon-packer-rs/src/main.rs
@@ -18,10 +18,6 @@ use clap::Parser;
 use ndarray::Array2;
 use plotters::prelude::*;
 use rayon::prelude::*;
-use std::cell::RefCell;
-use std::rc::Rc;
-use bumpalo::Bump;
-use bumpalo::collections::Vec as BumpVec;
 use voxell_rng::rng::pcg_advanced::pcg_64::PcgInnerState64;
 
 #[derive(Parser, Debug)]
@@ -313,9 +309,10 @@ fn transform_polygon(
 
 
 
-// BHProblem struct must be declared before its impl. It holds arena-backed
-// scratch buffers to avoid transient allocations during cost evaluation.
-struct BHProblem<'a, 'b> {
+// BHProblem struct holds problem data used during cost evaluation.
+// To reduce memory pressure we compute transformed vertices and axes
+// on-the-fly instead of storing arena-backed scratch buffers.
+struct BHProblem<'a> {
     N: usize,
     nsi: usize,
     nsc: usize,
@@ -324,17 +321,12 @@ struct BHProblem<'a, 'b> {
     unit_container_vectors: &'a Array2<FloatType>,
     unit_container_apothem: FloatType,
     S: FloatType,
-    arena: &'b Bump,
-    polys_x: Rc<RefCell<BumpVec<'b, FloatType>>>,
-    polys_y: Rc<RefCell<BumpVec<'b, FloatType>>>,
-    vecs_x: Rc<RefCell<BumpVec<'b, FloatType>>>,
-    vecs_y: Rc<RefCell<BumpVec<'b, FloatType>>>,
 }
 
 // BH cost evaluation implemented as a method on the problem struct.
 // Use a bump arena and SoA scratch buffers (polys_x/polys_y, vecs_x/vecs_y)
 // to reduce transient heap allocations and improve data locality.
-impl<'a, 'b> BHProblem<'a, 'b> {
+impl<'a> BHProblem<'a> {
     fn bh_function(&self, values: &[FloatType]) -> FloatType {
         const HUGE: FloatType = 1e20 as FloatType;
 
@@ -346,25 +338,7 @@ impl<'a, 'b> BHProblem<'a, 'b> {
 
         let mut penalty: FloatType = 0.0;
 
-        // Reusable, arena-allocated SoA buffers.
-        let mut polys_x = self.polys_x.borrow_mut();
-        let mut polys_y = self.polys_y.borrow_mut();
-        let mut vecs_x = self.vecs_x.borrow_mut();
-        let mut vecs_y = self.vecs_y.borrow_mut();
-
-        polys_x.clear();
-        polys_y.clear();
-        vecs_x.clear();
-        vecs_y.clear();
-
-        // Reserve expected capacity to avoid repeated grows
-        let total_points = (self.N as usize) * (n_vertices as usize);
-        polys_x.reserve(total_points);
-        polys_y.reserve(total_points);
-        vecs_x.reserve(total_points);
-        vecs_y.reserve(total_points);
-
-        // Fill transformed polygon coordinates and rotated edge vectors.
+        // Compute transformed vertices on-the-fly and apply container penalty.
         for i in 0..self.N {
             let posx = values[i * 3];
             let posy = values[i * 3 + 1];
@@ -377,10 +351,7 @@ impl<'a, 'b> BHProblem<'a, 'b> {
                 let vy = self.unit_polygon_vertices[[v, 1]];
                 let px = posx + (vx * ca - vy * sa);
                 let py = posy + (vx * sa + vy * ca);
-                polys_x.push(px);
-                polys_y.push(py);
 
-                // poking penalty against container
                 for ci in 0..nsc {
                     let dvx = self.unit_container_vectors[[ci, 0]];
                     let dvy = self.unit_container_vectors[[ci, 1]];
@@ -391,39 +362,47 @@ impl<'a, 'b> BHProblem<'a, 'b> {
                     }
                 }
             }
-
-            for v in 0..n_vertices {
-                let vx = self.unit_polygon_vectors[[v, 0]];
-                let vy = self.unit_polygon_vectors[[v, 1]];
-                vecs_x.push(vx * ca - vy * sa);
-                vecs_y.push(vx * sa + vy * ca);
-            }
         }
 
-        // Pairwise separating axis test using SoA buffers
+        // Pairwise separating axis test: compute axes and projections on-the-fly
         for i in 0..self.N {
+            let posx_i = values[i * 3];
+            let posy_i = values[i * 3 + 1];
+            let rot_i = values[i * 3 + 2];
+            let ca_i = rot_i.cos();
+            let sa_i = rot_i.sin();
+
             for j in (i + 1)..self.N {
+                let posx_j = values[j * 3];
+                let posy_j = values[j * 3 + 1];
+                let rot_j = values[j * 3 + 2];
+                let ca_j = rot_j.cos();
+                let sa_j = rot_j.sin();
+
                 let mut collision = true;
                 let mut min_overlap: FloatType = HUGE;
 
                 for vec_idx in 0..(nsi * 2) {
                     let (x_axis, y_axis) = if vec_idx < nsi {
-                        (
-                            vecs_x[i * nsi + vec_idx],
-                            vecs_y[i * nsi + vec_idx],
-                        )
+                        let v = vec_idx;
+                        let ux = self.unit_polygon_vectors[[v, 0]];
+                        let uy = self.unit_polygon_vectors[[v, 1]];
+                        (ux * ca_i - uy * sa_i, ux * sa_i + uy * ca_i)
                     } else {
-                        (
-                            vecs_x[j * nsi + (vec_idx - nsi)],
-                            vecs_y[j * nsi + (vec_idx - nsi)],
-                        )
+                        let v = vec_idx - nsi;
+                        let ux = self.unit_polygon_vectors[[v, 0]];
+                        let uy = self.unit_polygon_vectors[[v, 1]];
+                        (ux * ca_j - uy * sa_j, ux * sa_j + uy * ca_j)
                     };
 
                     let mut min_1 = HUGE;
                     let mut max_1 = -HUGE;
                     for vert in 0..nsi {
-                        let dotp = polys_x[i * nsi + vert] * x_axis
-                            + polys_y[i * nsi + vert] * y_axis;
+                        let vx = self.unit_polygon_vertices[[vert, 0]];
+                        let vy = self.unit_polygon_vertices[[vert, 1]];
+                        let px = posx_i + (vx * ca_i - vy * sa_i);
+                        let py = posy_i + (vx * sa_i + vy * ca_i);
+                        let dotp = px * x_axis + py * y_axis;
                         if dotp < min_1 {
                             min_1 = dotp;
                         }
@@ -435,8 +414,11 @@ impl<'a, 'b> BHProblem<'a, 'b> {
                     let mut min_2 = HUGE;
                     let mut max_2 = -HUGE;
                     for vert in 0..nsi {
-                        let dotp = polys_x[j * nsi + vert] * x_axis
-                            + polys_y[j * nsi + vert] * y_axis;
+                        let vx = self.unit_polygon_vertices[[vert, 0]];
+                        let vy = self.unit_polygon_vertices[[vert, 1]];
+                        let px = posx_j + (vx * ca_j - vy * sa_j);
+                        let py = posy_j + (vx * sa_j + vy * ca_j);
+                        let dotp = px * x_axis + py * y_axis;
                         if dotp < min_2 {
                             min_2 = dotp;
                         }
@@ -468,7 +450,7 @@ impl<'a, 'b> BHProblem<'a, 'b> {
 
 
 
-impl<'a, 'b> CostFunction for BHProblem<'a, 'b> {
+impl<'a> CostFunction for BHProblem<'a> {
     type Param = Vec<FloatType>;
     type Output = FloatType;
 
@@ -477,7 +459,7 @@ impl<'a, 'b> CostFunction for BHProblem<'a, 'b> {
     }
 }
 
-impl<'a, 'b> Gradient for BHProblem<'a, 'b> {
+impl<'a> Gradient for BHProblem<'a> {
     type Param = Vec<FloatType>;
     type Gradient = Vec<FloatType>;
 
@@ -582,14 +564,6 @@ fn repetition(
     // Match Python's local-minimizer `tol=1e-8` used in `minimize(..., tol=1e-8)`
     let solver_tol: FloatType = 1e-8 as FloatType;
 
-    // Single arena reused for all BHProblem evaluations in this repetition.
-    let arena = Bump::new();
-    // Reusable arena-backed scratch buffers (wrapped in RefCell for interior mutability)
-    let polys_x = Rc::new(RefCell::new(BumpVec::new_in(&arena)));
-    let polys_y = Rc::new(RefCell::new(BumpVec::new_in(&arena)));
-    let vecs_x = Rc::new(RefCell::new(BumpVec::new_in(&arena)));
-    let vecs_y = Rc::new(RefCell::new(BumpVec::new_in(&arena)));
-
     loop {
         // Local minimization using L-BFGS (match Python's L-BFGS-B)
         let problem = BHProblem {
@@ -601,11 +575,6 @@ fn repetition(
             unit_container_vectors,
             unit_container_apothem,
             S: dynamic_S,
-            arena: &arena,
-            polys_x: polys_x.clone(),
-            polys_y: polys_y.clone(),
-            vecs_x: vecs_x.clone(),
-            vecs_y: vecs_y.clone(),
         };
 
         let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
@@ -664,11 +633,6 @@ fn repetition(
                         unit_container_vectors,
                         unit_container_apothem,
                         S: dynamic_S,
-                        arena: &arena,
-                        polys_x: polys_x.clone(),
-                        polys_y: polys_y.clone(),
-                        vecs_x: vecs_x.clone(),
-                        vecs_y: vecs_y.clone(),
                     };
                     let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
                     let linesearch = BacktrackingLineSearch::new(armijo);
@@ -741,11 +705,6 @@ fn repetition(
                     unit_container_vectors,
                     unit_container_apothem,
                     S: dynamic_S,
-                    arena: &arena,
-                    polys_x: polys_x.clone(),
-                    polys_y: polys_y.clone(),
-                    vecs_x: vecs_x.clone(),
-                    vecs_y: vecs_y.clone(),
                 };
                 let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
                 let linesearch = BacktrackingLineSearch::new(armijo);
diff --git a/polygon-packer-rs/src/rust_specific.rs b/polygon-packer-rs/src/rust_specific.rs
index aaea3c7..e06ca94 100644
--- a/polygon-packer-rs/src/rust_specific.rs
+++ b/polygon-packer-rs/src/rust_specific.rs
@@ -3,7 +3,7 @@
 
 /// change this type to `f32` be less precise but get faster,
 /// or `f64` to be more precise but get slower.
-pub type FloatType = f64;
+pub type FloatType = f32;
 
 /// an interface that defines PI for both 32 bit and 64 bit floats
 pub trait AssocPI {

From 5e7b1a40cef92c84b72f6f926db06353ea0f89d4 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 09:07:03 +0200
Subject: [PATCH 07/15] Add rustfmt.toml, apply formatting

---
 polygon-packer-rs/rustfmt.toml |  24 ++++++
 polygon-packer-rs/src/main.rs  | 139 ++++++++++++++++++++-------------
 2 files changed, 109 insertions(+), 54 deletions(-)
 create mode 100644 polygon-packer-rs/rustfmt.toml

diff --git a/polygon-packer-rs/rustfmt.toml b/polygon-packer-rs/rustfmt.toml
new file mode 100644
index 0000000..95ea60c
--- /dev/null
+++ b/polygon-packer-rs/rustfmt.toml
@@ -0,0 +1,24 @@
+max_width = 80
+imports_granularity = "Item"
+edition = "2024"
+style_edition = "2024"
+group_imports = "StdExternalCrate"
+use_field_init_shorthand = true
+wrap_comments = true
+tab_spaces = 4
+newline_style = "Unix"
+reorder_imports = true
+reorder_impl_items = false
+reorder_modules = true
+format_code_in_doc_comments = true
+hard_tabs = false
+comment_width = 80
+format_macro_matchers = true
+imports_layout = "Vertical"
+merge_derives = true
+overflow_delimited_expr = true
+condense_wildcard_suffixes = true
+remove_nested_parens = true
+fn_params_layout = "Compressed"
+match_arm_blocks = false
+struct_lit_single_line = false
\ No newline at end of file
diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
index bc455c3..c4fbb61 100644
--- a/polygon-packer-rs/src/main.rs
+++ b/polygon-packer-rs/src/main.rs
@@ -10,7 +10,11 @@ use rust_specific::FloatType;
 // this is π in either f32 or f64
 const PI: FloatType = <FloatType as AssocPI>::PI;
 
-use argmin::core::{CostFunction, Error, Executor, Gradient, State};
+use argmin::core::CostFunction;
+use argmin::core::Error;
+use argmin::core::Executor;
+use argmin::core::Gradient;
+use argmin::core::State;
 use argmin::solver::linesearch::BacktrackingLineSearch;
 use argmin::solver::linesearch::condition::ArmijoCondition;
 use argmin::solver::quasinewton::LBFGS;
@@ -35,7 +39,8 @@ struct Args {
     /// Overlap penalty tolerance. Probably best left at default
     #[arg(long, default_value_t = 1e-8)]
     tolerance: FloatType,
-    /// How small the last theoretical step in container size decrease will be (it gets smaller over time)
+    /// How small the last theoretical step in container size decrease will be
+    /// (it gets smaller over time)
     #[arg(long, default_value_t = 0.0001)]
     finalstep: FloatType,
 }
@@ -56,8 +61,10 @@ fn main() {
         .map(|i| 2.0 * PI * (i as FloatType) / (nsi as FloatType))
         .collect();
 
-    // unit_polygon_vertices = np.column_stack((np.cos(unit_polygon_angles), np.sin(unit_polygon_angles)))
-    let mut unit_polygon_vertices_buf: Vec<FloatType> = Vec::with_capacity(nsi * 2);
+    // unit_polygon_vertices = np.column_stack((np.cos(unit_polygon_angles),
+    // np.sin(unit_polygon_angles)))
+    let mut unit_polygon_vertices_buf: Vec<FloatType> =
+        Vec::with_capacity(nsi * 2);
     for &a in &unit_polygon_angles_vec {
         unit_polygon_vertices_buf.push(a.cos());
         unit_polygon_vertices_buf.push(a.sin());
@@ -65,8 +72,10 @@ fn main() {
     let unit_polygon_vertices: Array2<FloatType> =
         Array2::from_shape_vec((nsi, 2), unit_polygon_vertices_buf).unwrap();
 
-    // unit_polygon_vectors = np.column_stack((np.cos(unit_polygon_angles + np.pi / nsi), np.sin(unit_polygon_angles + np.pi / nsi)))
-    let mut unit_polygon_vectors_buf: Vec<FloatType> = Vec::with_capacity(nsi * 2);
+    // unit_polygon_vectors = np.column_stack((np.cos(unit_polygon_angles +
+    // np.pi / nsi), np.sin(unit_polygon_angles + np.pi / nsi)))
+    let mut unit_polygon_vectors_buf: Vec<FloatType> =
+        Vec::with_capacity(nsi * 2);
     let offset_nsi = PI / (nsi as FloatType);
     for &a in &unit_polygon_angles_vec {
         let aa = a + offset_nsi;
@@ -81,8 +90,10 @@ fn main() {
         .map(|i| 2.0 * PI * (i as FloatType) / (nsc as FloatType))
         .collect();
 
-    // unit_container_vertices = np.column_stack((np.cos(unit_container_angles), np.sin(unit_container_angles)))
-    let mut unit_container_vertices_buf: Vec<FloatType> = Vec::with_capacity(nsc * 2);
+    // unit_container_vertices = np.column_stack((np.cos(unit_container_angles),
+    // np.sin(unit_container_angles)))
+    let mut unit_container_vertices_buf: Vec<FloatType> =
+        Vec::with_capacity(nsc * 2);
     for &a in &unit_container_angles {
         unit_container_vertices_buf.push(a.cos());
         unit_container_vertices_buf.push(a.sin());
@@ -90,8 +101,10 @@ fn main() {
     let unit_container_vertices: Array2<FloatType> =
         Array2::from_shape_vec((nsc, 2), unit_container_vertices_buf).unwrap();
 
-    // unit_container_vectors = np.column_stack((np.cos(unit_container_angles + np.pi / nsc), np.sin(unit_container_angles + np.pi / nsc)))
-    let mut unit_container_vectors_buf: Vec<FloatType> = Vec::with_capacity(nsc * 2);
+    // unit_container_vectors = np.column_stack((np.cos(unit_container_angles +
+    // np.pi / nsc), np.sin(unit_container_angles + np.pi / nsc)))
+    let mut unit_container_vectors_buf: Vec<FloatType> =
+        Vec::with_capacity(nsc * 2);
     let offset_nsc = PI / (nsc as FloatType);
     for &a in &unit_container_angles {
         let aa = a + offset_nsc;
@@ -107,7 +120,8 @@ fn main() {
     let mut best_S: FloatType = FloatType::INFINITY;
     let mut best_values: Option<Vec<FloatType>> = None;
 
-    // results = Parallel(n_jobs=-1, prefer="processes")(delayed(repetition)(i) for i in range(attempts))
+    // results = Parallel(n_jobs=-1, prefer="processes")(delayed(repetition)(i)
+    // for i in range(attempts))
     let results: Vec<(FloatType, _)> = (0..attempts)
         .into_par_iter()
         .map(|i| {
@@ -136,7 +150,8 @@ fn main() {
     println!(
         "Final side length: {}",
         // best_S * np.sin(np.pi / nsc) / np.sin(np.pi / nsi),
-        best_S * (PI / (nsc as FloatType)).sin() / (PI / (nsi as FloatType)).sin(),
+        best_S * (PI / (nsc as FloatType)).sin()
+            / (PI / (nsi as FloatType)).sin(),
     );
 
     // ============== NOTE(@paladynee): ===============
@@ -145,11 +160,12 @@ fn main() {
     // rngs) have been checked against python over multiple passes.
     //
     // i am not well versed in mathematics so if anybody could fact check this
-    // implementation and remove this note, I'd be very glad.  
+    // implementation and remove this note, I'd be very glad.
 
     if let Some(vals) = best_values {
         if vals.len() == N * 3 {
-            let positions = Array2::from_shape_vec((N, 3), vals.clone()).unwrap();
+            let positions =
+                Array2::from_shape_vec((N, 3), vals.clone()).unwrap();
             println!("Final positions (first 5 rows):");
             for i in 0..std::cmp::min(5, N) {
                 println!("  {}: {:?}", i, positions.row(i));
@@ -165,7 +181,8 @@ fn main() {
                 let y = positions[[i, 1]];
                 let a = positions[[i, 2]];
                 let poly = transform_polygon(x, y, a, &unit_polygon_vertices);
-                let mut pts: Vec<(f64, f64)> = Vec::with_capacity(poly.len() / 2 + 1);
+                let mut pts: Vec<(f64, f64)> =
+                    Vec::with_capacity(poly.len() / 2 + 1);
                 for k in 0..(poly.len() / 2) {
                     let px = poly[k * 2] as f64;
                     let py = poly[k * 2 + 1] as f64;
@@ -241,7 +258,8 @@ fn main() {
                 .caption(
                     format!(
                         "Side length: {}",
-                        best_S * (PI / (nsc as FloatType)).sin() / (PI / (nsi as FloatType)).sin()
+                        best_S * (PI / (nsc as FloatType)).sin()
+                            / (PI / (nsi as FloatType)).sin()
                     ),
                     ("sans-serif", 20).into_font(),
                 )
@@ -249,8 +267,10 @@ fn main() {
                 .unwrap();
 
             // configure mesh to show ticks at our step sizes
-            let x_labels = ((span / x_step).round() as usize).saturating_add(1).max(2);
-            let y_labels = ((span / y_step).round() as usize).saturating_add(1).max(2);
+            let x_labels =
+                ((span / x_step).round() as usize).saturating_add(1).max(2);
+            let y_labels =
+                ((span / y_step).round() as usize).saturating_add(1).max(2);
             chart
                 .configure_mesh()
                 .x_labels(x_labels)
@@ -270,12 +290,18 @@ fn main() {
 
             // draw filled polygons and outlines
             for poly in polys {
-                let fill_style = Into::<ShapeStyle>::into(&RGBColor(204, 204, 204)).filled();
+                let fill_style =
+                    Into::<ShapeStyle>::into(&RGBColor(204, 204, 204)).filled();
                 chart
-                    .draw_series(std::iter::once(Polygon::new(poly.clone(), fill_style)))
+                    .draw_series(std::iter::once(Polygon::new(
+                        poly.clone(),
+                        fill_style,
+                    )))
                     .ok();
                 chart
-                    .draw_series(std::iter::once(PathElement::new(poly, &BLACK)))
+                    .draw_series(std::iter::once(PathElement::new(
+                        poly, &BLACK,
+                    )))
                     .ok();
             }
 
@@ -289,10 +315,7 @@ fn main() {
 
 // def repetition(seed):
 fn transform_polygon(
-    x: FloatType,
-    y: FloatType,
-    a: FloatType,
-    vertices: &Array2<FloatType>,
+    x: FloatType, y: FloatType, a: FloatType, vertices: &Array2<FloatType>,
 ) -> Vec<FloatType> {
     let n_vertices = vertices.shape()[0];
     let mut transformed = vec![0.0 as FloatType; n_vertices * 2];
@@ -307,8 +330,6 @@ fn transform_polygon(
     transformed
 }
 
-
-
 // BHProblem struct holds problem data used during cost evaluation.
 // To reduce memory pressure we compute transformed vertices and axes
 // on-the-fly instead of storing arena-backed scratch buffers.
@@ -364,7 +385,8 @@ impl<'a> BHProblem<'a> {
             }
         }
 
-        // Pairwise separating axis test: compute axes and projections on-the-fly
+        // Pairwise separating axis test: compute axes and projections
+        // on-the-fly
         for i in 0..self.N {
             let posx_i = values[i * 3];
             let posy_i = values[i * 3 + 1];
@@ -448,8 +470,6 @@ impl<'a> BHProblem<'a> {
     }
 }
 
-
-
 impl<'a> CostFunction for BHProblem<'a> {
     type Param = Vec<FloatType>;
     type Output = FloatType;
@@ -499,12 +519,8 @@ fn initial_simplex(x0: &Vec<FloatType>) -> Vec<Vec<FloatType>> {
 
 // def repetition(seed):
 fn repetition(
-    seed: usize,
-    N: usize,
-    nsi: usize,
-    nsc: usize,
-    penalty_tolerance: FloatType,
-    final_step_size: FloatType,
+    seed: usize, N: usize, nsi: usize, nsc: usize,
+    penalty_tolerance: FloatType, final_step_size: FloatType,
     unit_polygon_vertices: &Array2<FloatType>,
     unit_polygon_vectors: &Array2<FloatType>,
     unit_container_vectors: &Array2<FloatType>,
@@ -519,8 +535,8 @@ fn repetition(
         let rand = ((hi as u64) << 32) | lo as u64;
         rand as FloatType / (u64::MAX as FloatType)
     };
-    let mut dynamic_S =
-        (N as FloatType).sqrt() * (2.0 as FloatType + rand01(&mut rng) * 2.0 as FloatType);
+    let mut dynamic_S = (N as FloatType).sqrt()
+        * (2.0 as FloatType + rand01(&mut rng) * 2.0 as FloatType);
     let initial_S = dynamic_S;
 
     let mut x0 = vec![0.0 as FloatType; N * 3];
@@ -538,12 +554,14 @@ fn repetition(
         for gy in 0..grid_count {
             for gx in 0..grid_count {
                 let xi = if grid_count > 1 {
-                    min + (gx as FloatType) * ((max - min) / ((grid_count - 1) as FloatType))
+                    min + (gx as FloatType)
+                        * ((max - min) / ((grid_count - 1) as FloatType))
                 } else {
                     (min + max) / (2.0 as FloatType)
                 };
                 let yi = if grid_count > 1 {
-                    min + (gy as FloatType) * ((max - min) / ((grid_count - 1) as FloatType))
+                    min + (gy as FloatType)
+                        * ((max - min) / ((grid_count - 1) as FloatType))
                 } else {
                     (min + max) / (2.0 as FloatType)
                 };
@@ -561,7 +579,8 @@ fn repetition(
     let mut last_valid_x = x0.clone();
     let mut last_valid_S = dynamic_S;
 
-    // Match Python's local-minimizer `tol=1e-8` used in `minimize(..., tol=1e-8)`
+    // Match Python's local-minimizer `tol=1e-8` used in `minimize(...,
+    // tol=1e-8)`
     let solver_tol: FloatType = 1e-8 as FloatType;
 
     loop {
@@ -598,13 +617,16 @@ fn repetition(
                     last_valid_S = dynamic_S;
                     success = true;
                     // compute multiplier (match Python exactly)
-                    let base = (N as FloatType).sqrt() * (nsi as FloatType) / (nsc as FloatType);
+                    let base = (N as FloatType).sqrt() * (nsi as FloatType)
+                        / (nsc as FloatType);
                     let mut multiplier = 0.9999 as FloatType
-                        - (dynamic_S - base) * (0.0099 as FloatType) / (initial_S - base);
+                        - (dynamic_S - base) * (0.0099 as FloatType)
+                            / (initial_S - base);
                     multiplier = 1.0 as FloatType
                         - final_step_size
                         - (dynamic_S - base)
-                            * ((0.01 as FloatType - final_step_size) / (initial_S - base));
+                            * ((0.01 as FloatType - final_step_size)
+                                / (initial_S - base));
                     for i in 0..x0.len() {
                         x0[i] = param[i] * multiplier;
                     }
@@ -613,9 +635,11 @@ fn repetition(
             } else {
                 // Basinhopping-like global search (many local starts)
                 let mut best_bh_fun = fun;
-                let mut best_bh_param = res.state.get_best_param().cloned().unwrap_or(x0.clone());
+                let mut best_bh_param =
+                    res.state.get_best_param().cloned().unwrap_or(x0.clone());
                 let mut current_fun = fun;
-                let mut current_param = res.state.get_best_param().cloned().unwrap_or(x0.clone());
+                let mut current_param =
+                    res.state.get_best_param().cloned().unwrap_or(x0.clone());
 
                 for _ in 0..50 {
                     let mut trial = current_param.clone();
@@ -634,7 +658,8 @@ fn repetition(
                         unit_container_apothem,
                         S: dynamic_S,
                     };
-                    let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
+                    let armijo =
+                        ArmijoCondition::<FloatType>::new(0.0001).unwrap();
                     let linesearch = BacktrackingLineSearch::new(armijo);
                     let lbfgs = LBFGS::new(linesearch, 3)
                         .with_tolerance_grad(solver_tol)
@@ -656,7 +681,8 @@ fn repetition(
                         }
                         // Metropolis acceptance
                         if f2 < current_fun
-                            || ((current_fun - f2) / (0.1 as FloatType)).exp() > rand01(&mut rng)
+                            || ((current_fun - f2) / (0.1 as FloatType)).exp()
+                                > rand01(&mut rng)
                         {
                             current_fun = f2;
                             current_param = res2
@@ -671,13 +697,16 @@ fn repetition(
                 if best_bh_fun < penalty_tolerance {
                     last_valid_x = best_bh_param.clone();
                     last_valid_S = dynamic_S;
-                    let base = (N as FloatType).sqrt() * (nsi as FloatType) / (nsc as FloatType);
+                    let base = (N as FloatType).sqrt() * (nsi as FloatType)
+                        / (nsc as FloatType);
                     let mut multiplier = 0.9999 as FloatType
-                        - (dynamic_S - base) * (0.0099 as FloatType) / (initial_S - base);
+                        - (dynamic_S - base) * (0.0099 as FloatType)
+                            / (initial_S - base);
                     multiplier = 1.0 as FloatType
                         - final_step_size
                         - (dynamic_S - base)
-                            * ((0.01 as FloatType - final_step_size) / (initial_S - base));
+                            * ((0.01 as FloatType - final_step_size)
+                                / (initial_S - base));
                     for i in 0..x0.len() {
                         x0[i] = last_valid_x[i] * multiplier;
                     }
@@ -688,13 +717,15 @@ fn repetition(
                 }
             }
         } else {
-            // If local solver failed for any reason, try a few random local starts
+            // If local solver failed for any reason, try a few random local
+            // starts
             let mut found = false;
             for _ in 0..10 {
                 let mut trial = x0.clone();
                 for k in 0..trial.len() {
-                    trial[k] +=
-                        (rand01(&mut rng) - 0.5 as FloatType) * dynamic_S * 0.1 as FloatType;
+                    trial[k] += (rand01(&mut rng) - 0.5 as FloatType)
+                        * dynamic_S
+                        * 0.1 as FloatType;
                 }
                 let problem = BHProblem {
                     N,

From 920bba914317ab01fc871ccb64f37eb73f7b4b49 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 09:13:00 +0200
Subject: [PATCH 08/15] Applying lints, commit before `cargo fix`

---
 polygon-packer-rs/Cargo.toml           |  4 +-
 polygon-packer-rs/src/main.rs          | 88 +++++++++++++-------------
 polygon-packer-rs/src/rust_specific.rs |  9 ++-
 3 files changed, 52 insertions(+), 49 deletions(-)

diff --git a/polygon-packer-rs/Cargo.toml b/polygon-packer-rs/Cargo.toml
index ae5a840..a58d9f4 100644
--- a/polygon-packer-rs/Cargo.toml
+++ b/polygon-packer-rs/Cargo.toml
@@ -44,13 +44,13 @@ panic = "abort"
 
 [lints.clippy]
 pedantic = { level = "warn", priority = -1 }
-cargo = { level = "warn", priority = -1 }
+cargo = { level = "allow", priority = -1 }
 restriction = { level = "warn", priority = -1 }
 nursery = { level = "warn", priority = -1 }
 
 mod_module_files = "deny"
 self_named_module_files = "allow"
-
+doc_paragraphs_missing_punctuation = "allow"
 allow_attributes = "allow"
 allow_attributes_without_reason = "allow"
 arithmetic_side_effects = "allow"
diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
index c4fbb61..f0c1a96 100644
--- a/polygon-packer-rs/src/main.rs
+++ b/polygon-packer-rs/src/main.rs
@@ -5,6 +5,9 @@ extern crate plotters;
 extern crate rayon;
 
 mod rust_specific;
+use std::cmp::min;
+use std::iter;
+
 use rust_specific::AssocPI;
 use rust_specific::FloatType;
 // this is π in either f32 or f64
@@ -48,7 +51,7 @@ struct Args {
 fn main() {
     let args = Args::parse();
 
-    let N = args.inner_polygons;
+    let n = args.inner_polygons;
     let nsi = args.inner_sides;
     let nsc = args.container_sides;
     let attempts = args.attempts;
@@ -117,7 +120,7 @@ fn main() {
     // unit_container_apothem = np.cos(np.pi / nsc)
     let unit_container_apothem: FloatType = (PI / (nsc as FloatType)).cos();
 
-    let mut best_S: FloatType = FloatType::INFINITY;
+    let mut best_s: FloatType = FloatType::INFINITY;
     let mut best_values: Option<Vec<FloatType>> = None;
 
     // results = Parallel(n_jobs=-1, prefer="processes")(delayed(repetition)(i)
@@ -127,7 +130,7 @@ fn main() {
         .map(|i| {
             repetition(
                 i,
-                N,
+                n,
                 nsi,
                 nsc,
                 penalty_tolerance,
@@ -141,8 +144,8 @@ fn main() {
         .collect();
 
     for (s, values) in results {
-        if s < best_S {
-            best_S = s;
+        if s < best_s {
+            best_s = s;
             best_values = Some(values);
         }
     }
@@ -150,7 +153,7 @@ fn main() {
     println!(
         "Final side length: {}",
         // best_S * np.sin(np.pi / nsc) / np.sin(np.pi / nsi),
-        best_S * (PI / (nsc as FloatType)).sin()
+        best_s * (PI / (nsc as FloatType)).sin()
             / (PI / (nsi as FloatType)).sin(),
     );
 
@@ -163,20 +166,19 @@ fn main() {
     // implementation and remove this note, I'd be very glad.
 
     if let Some(vals) = best_values {
-        if vals.len() == N * 3 {
-            let positions =
-                Array2::from_shape_vec((N, 3), vals.clone()).unwrap();
+        if vals.len() == n * 3 {
+            let positions = Array2::from_shape_vec((n, 3), vals).unwrap();
             println!("Final positions (first 5 rows):");
-            for i in 0..std::cmp::min(5, N) {
+            for i in 0..min(5, n) {
                 println!("  {}: {:?}", i, positions.row(i));
             }
 
             // Plot result to PNG using Plotters (match Python output)
-            let file_name = format!("{}_{}_in_{}.png", N, nsi, nsc);
+            let file_name = format!("{}_{}_in_{}.png", n, nsi, nsc);
             // collect polygon points and container points (as f64)
             let mut all_points: Vec<(f64, f64)> = Vec::new();
-            let mut polys: Vec<Vec<(f64, f64)>> = Vec::with_capacity(N);
-            for i in 0..N {
+            let mut polys: Vec<Vec<(f64, f64)>> = Vec::with_capacity(n);
+            for i in 0..n {
                 let x = positions[[i, 0]];
                 let y = positions[[i, 1]];
                 let a = positions[[i, 2]];
@@ -189,7 +191,7 @@ fn main() {
                     pts.push((px, py));
                     all_points.push((px, py));
                 }
-                if let Some(first) = pts.get(0).cloned() {
+                if let Some(first) = pts.first().copied() {
                     pts.push(first);
                     all_points.push(first);
                 }
@@ -199,22 +201,22 @@ fn main() {
             let mut container_pts: Vec<(f64, f64)> =
                 Vec::with_capacity(unit_container_vertices.shape()[0] + 1);
             for i in 0..unit_container_vertices.shape()[0] {
-                let cx = unit_container_vertices[[i, 0]] * best_S;
-                let cy = unit_container_vertices[[i, 1]] * best_S;
+                let cx = unit_container_vertices[[i, 0]] * best_s;
+                let cy = unit_container_vertices[[i, 1]] * best_s;
                 container_pts.push((cx as f64, cy as f64));
                 all_points.push((cx as f64, cy as f64));
             }
-            if let Some(first) = container_pts.get(0).cloned() {
+            if let Some(first) = container_pts.first().copied() {
                 container_pts.push(first);
                 all_points.push(first);
             }
 
             // determine bounds and padding
             let (mut min_x, mut max_x, mut min_y, mut max_y) = (
-                std::f64::INFINITY,
-                std::f64::NEG_INFINITY,
-                std::f64::INFINITY,
-                std::f64::NEG_INFINITY,
+                f64::INFINITY,
+                f64::NEG_INFINITY,
+                f64::INFINITY,
+                f64::NEG_INFINITY,
             );
             for (x, y) in &all_points {
                 if *x < min_x {
@@ -242,8 +244,8 @@ fn main() {
             let dx = x_max_tick - x_min_tick;
             let dy = y_max_tick - y_min_tick;
             let span = dx.max(dy);
-            let x_center = (x_min_tick + x_max_tick) / 2.0;
-            let y_center = (y_min_tick + y_max_tick) / 2.0;
+            let x_center = f64::midpoint(y_min_tick, y_max_tick);
+            let y_center = f64::midpoint(y_min_tick, y_max_tick);
             let x_min_eq = x_center - span / 2.0;
             let x_max_eq = x_center + span / 2.0;
             let y_min_eq = y_center - span / 2.0;
@@ -258,7 +260,7 @@ fn main() {
                 .caption(
                     format!(
                         "Side length: {}",
-                        best_S * (PI / (nsc as FloatType)).sin()
+                        best_s * (PI / (nsc as FloatType)).sin()
                             / (PI / (nsi as FloatType)).sin()
                     ),
                     ("sans-serif", 20).into_font(),
@@ -282,9 +284,9 @@ fn main() {
 
             // draw container outline
             chart
-                .draw_series(std::iter::once(PathElement::new(
+                .draw_series(iter::once(PathElement::new(
                     container_pts.clone(),
-                    &BLACK,
+                    BLACK,
                 )))
                 .ok();
 
@@ -293,15 +295,13 @@ fn main() {
                 let fill_style =
                     Into::<ShapeStyle>::into(&RGBColor(204, 204, 204)).filled();
                 chart
-                    .draw_series(std::iter::once(Polygon::new(
+                    .draw_series(iter::once(Polygon::new(
                         poly.clone(),
                         fill_style,
                     )))
                     .ok();
                 chart
-                    .draw_series(std::iter::once(PathElement::new(
-                        poly, &BLACK,
-                    )))
+                    .draw_series(iter::once(PathElement::new(poly, BLACK)))
                     .ok();
             }
 
@@ -324,8 +324,8 @@ fn transform_polygon(
     for i in 0..n_vertices {
         let vx = vertices[[i, 0]];
         let vy = vertices[[i, 1]];
-        transformed[i * 2] = x + (vx * ca - vy * sa);
-        transformed[i * 2 + 1] = y + (vx * sa + vy * ca);
+        transformed[i * 2] = x + vy.mul_add(-sa, vx * ca);
+        transformed[i * 2 + 1] = y + vy.mul_add(ca, vx * sa);
     }
     transformed
 }
@@ -334,14 +334,14 @@ fn transform_polygon(
 // To reduce memory pressure we compute transformed vertices and axes
 // on-the-fly instead of storing arena-backed scratch buffers.
 struct BHProblem<'a> {
-    N: usize,
+    n: usize,
     nsi: usize,
     nsc: usize,
     unit_polygon_vertices: &'a Array2<FloatType>,
     unit_polygon_vectors: &'a Array2<FloatType>,
     unit_container_vectors: &'a Array2<FloatType>,
     unit_container_apothem: FloatType,
-    S: FloatType,
+    s: FloatType,
 }
 
 // BH cost evaluation implemented as a method on the problem struct.
@@ -355,12 +355,12 @@ impl<'a> BHProblem<'a> {
         let nsi = self.nsi;
         let nsc = self.nsc;
 
-        let limit = self.unit_container_apothem * self.S;
+        let limit = self.unit_container_apothem * self.s;
 
         let mut penalty: FloatType = 0.0;
 
         // Compute transformed vertices on-the-fly and apply container penalty.
-        for i in 0..self.N {
+        for i in 0..self.n {
             let posx = values[i * 3];
             let posy = values[i * 3 + 1];
             let rot = values[i * 3 + 2];
@@ -387,14 +387,14 @@ impl<'a> BHProblem<'a> {
 
         // Pairwise separating axis test: compute axes and projections
         // on-the-fly
-        for i in 0..self.N {
+        for i in 0..self.n {
             let posx_i = values[i * 3];
             let posy_i = values[i * 3 + 1];
             let rot_i = values[i * 3 + 2];
             let ca_i = rot_i.cos();
             let sa_i = rot_i.sin();
 
-            for j in (i + 1)..self.N {
+            for j in (i + 1)..self.n {
                 let posx_j = values[j * 3];
                 let posy_j = values[j * 3 + 1];
                 let rot_j = values[j * 3 + 2];
@@ -586,14 +586,14 @@ fn repetition(
     loop {
         // Local minimization using L-BFGS (match Python's L-BFGS-B)
         let problem = BHProblem {
-            N,
+            n: N,
             nsi,
             nsc,
             unit_polygon_vertices,
             unit_polygon_vectors,
             unit_container_vectors,
             unit_container_apothem,
-            S: dynamic_S,
+            s: dynamic_S,
         };
 
         let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
@@ -649,14 +649,14 @@ fn repetition(
                             * (0.1 as FloatType);
                     }
                     let problem = BHProblem {
-                        N,
+                        n: N,
                         nsi,
                         nsc,
                         unit_polygon_vertices,
                         unit_polygon_vectors,
                         unit_container_vectors,
                         unit_container_apothem,
-                        S: dynamic_S,
+                        s: dynamic_S,
                     };
                     let armijo =
                         ArmijoCondition::<FloatType>::new(0.0001).unwrap();
@@ -728,14 +728,14 @@ fn repetition(
                         * 0.1 as FloatType;
                 }
                 let problem = BHProblem {
-                    N,
+                    n: N,
                     nsi,
                     nsc,
                     unit_polygon_vertices,
                     unit_polygon_vectors,
                     unit_container_vectors,
                     unit_container_apothem,
-                    S: dynamic_S,
+                    s: dynamic_S,
                 };
                 let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
                 let linesearch = BacktrackingLineSearch::new(armijo);
diff --git a/polygon-packer-rs/src/rust_specific.rs b/polygon-packer-rs/src/rust_specific.rs
index e06ca94..d6c7d10 100644
--- a/polygon-packer-rs/src/rust_specific.rs
+++ b/polygon-packer-rs/src/rust_specific.rs
@@ -1,21 +1,24 @@
 //! this module handles rust-specific things like
 //! hot-swapping the float type to squeeze out more perf.
 
+use std::f32;
+use std::f64;
+
 /// change this type to `f32` be less precise but get faster,
 /// or `f64` to be more precise but get slower.
 pub type FloatType = f32;
 
-/// an interface that defines PI for both 32 bit and 64 bit floats
+/// an interface that defines PI for both 32 bit and 64 bit floats.
 pub trait AssocPI {
     const PI: Self;
 }
 
 // associated PI constant for f32
 impl AssocPI for f32 {
-    const PI: f32 = std::f32::consts::PI;
+    const PI: f32 = f32::consts::PI;
 }
 
 // associated PI constant for f64
 impl AssocPI for f64 {
-    const PI: f64 = std::f64::consts::PI;
+    const PI: f64 = f64::consts::PI;
 }

From cc3e1fb41ca7857e30d43d2bb2ae9e8c6b18fb0c Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 09:22:37 +0200
Subject: [PATCH 09/15] Fully satisfy lints

---
 polygon-packer-rs/src/main.rs | 148 ++++++++++++++++++----------------
 1 file changed, 79 insertions(+), 69 deletions(-)

diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
index f0c1a96..f49b5b7 100644
--- a/polygon-packer-rs/src/main.rs
+++ b/polygon-packer-rs/src/main.rs
@@ -370,16 +370,16 @@ impl<'a> BHProblem<'a> {
             for v in 0..n_vertices {
                 let vx = self.unit_polygon_vertices[[v, 0]];
                 let vy = self.unit_polygon_vertices[[v, 1]];
-                let px = posx + (vx * ca - vy * sa);
-                let py = posy + (vx * sa + vy * ca);
+                let px = posx + vy.mul_add(-sa, vx * ca);
+                let py = posy + vy.mul_add(ca, vx * sa);
 
                 for ci in 0..nsc {
                     let dvx = self.unit_container_vectors[[ci, 0]];
                     let dvy = self.unit_container_vectors[[ci, 1]];
-                    let distance = px * dvx + py * dvy;
+                    let distance = py.mul_add(dvy, px * dvx);
                     if distance > limit {
                         let diff = distance - limit;
-                        penalty += diff * diff;
+                        penalty = diff.mul_add(diff, penalty);
                     }
                 }
             }
@@ -409,12 +409,18 @@ impl<'a> BHProblem<'a> {
                         let v = vec_idx;
                         let ux = self.unit_polygon_vectors[[v, 0]];
                         let uy = self.unit_polygon_vectors[[v, 1]];
-                        (ux * ca_i - uy * sa_i, ux * sa_i + uy * ca_i)
+                        (
+                            uy.mul_add(-sa_i, ux * ca_i),
+                            uy.mul_add(ca_i, ux * sa_i),
+                        )
                     } else {
                         let v = vec_idx - nsi;
                         let ux = self.unit_polygon_vectors[[v, 0]];
                         let uy = self.unit_polygon_vectors[[v, 1]];
-                        (ux * ca_j - uy * sa_j, ux * sa_j + uy * ca_j)
+                        (
+                            uy.mul_add(-sa_j, ux * ca_j),
+                            uy.mul_add(ca_j, ux * sa_j),
+                        )
                     };
 
                     let mut min_1 = HUGE;
@@ -422,9 +428,9 @@ impl<'a> BHProblem<'a> {
                     for vert in 0..nsi {
                         let vx = self.unit_polygon_vertices[[vert, 0]];
                         let vy = self.unit_polygon_vertices[[vert, 1]];
-                        let px = posx_i + (vx * ca_i - vy * sa_i);
-                        let py = posy_i + (vx * sa_i + vy * ca_i);
-                        let dotp = px * x_axis + py * y_axis;
+                        let px = posx_i + vy.mul_add(-sa_i, vx * ca_i);
+                        let py = posy_i + vy.mul_add(ca_i, vx * sa_i);
+                        let dotp = py.mul_add(y_axis, px * x_axis);
                         if dotp < min_1 {
                             min_1 = dotp;
                         }
@@ -438,9 +444,9 @@ impl<'a> BHProblem<'a> {
                     for vert in 0..nsi {
                         let vx = self.unit_polygon_vertices[[vert, 0]];
                         let vy = self.unit_polygon_vertices[[vert, 1]];
-                        let px = posx_j + (vx * ca_j - vy * sa_j);
-                        let py = posy_j + (vx * sa_j + vy * ca_j);
-                        let dotp = px * x_axis + py * y_axis;
+                        let px = posx_j + vy.mul_add(-sa_j, vx * ca_j);
+                        let py = posy_j + vy.mul_add(ca_j, vx * sa_j);
+                        let dotp = py.mul_add(y_axis, px * x_axis);
                         if dotp < min_2 {
                             min_2 = dotp;
                         }
@@ -461,7 +467,7 @@ impl<'a> BHProblem<'a> {
                 }
 
                 if collision {
-                    penalty += min_overlap * min_overlap;
+                    penalty = min_overlap.mul_add(min_overlap, penalty);
                 }
             }
         }
@@ -504,12 +510,12 @@ impl<'a> Gradient for BHProblem<'a> {
     }
 }
 
-fn initial_simplex(x0: &Vec<FloatType>) -> Vec<Vec<FloatType>> {
+fn initial_simplex(x0: &[FloatType]) -> Vec<Vec<FloatType>> {
     let n = x0.len();
     let mut simplex: Vec<Vec<FloatType>> = Vec::with_capacity(n + 1);
-    simplex.push(x0.clone());
+    simplex.push(x0.to_owned());
     for i in 0..n {
-        let mut xi = x0.clone();
+        let mut xi = x0.to_owned();
         let delta = (0.05 as FloatType) * (1.0 as FloatType + x0[i].abs());
         xi[i] += delta;
         simplex.push(xi);
@@ -518,8 +524,9 @@ fn initial_simplex(x0: &Vec<FloatType>) -> Vec<Vec<FloatType>> {
 }
 
 // def repetition(seed):
+#[allow(clippy::too_many_arguments)]
 fn repetition(
-    seed: usize, N: usize, nsi: usize, nsc: usize,
+    seed: usize, n: usize, nsi: usize, nsc: usize,
     penalty_tolerance: FloatType, final_step_size: FloatType,
     unit_polygon_vertices: &Array2<FloatType>,
     unit_polygon_vectors: &Array2<FloatType>,
@@ -535,40 +542,44 @@ fn repetition(
         let rand = ((hi as u64) << 32) | lo as u64;
         rand as FloatType / (u64::MAX as FloatType)
     };
-    let mut dynamic_S = (N as FloatType).sqrt()
+    let mut dynamic_s = (n as FloatType).sqrt()
         * (2.0 as FloatType + rand01(&mut rng) * 2.0 as FloatType);
-    let initial_S = dynamic_S;
+    let initial_s = dynamic_s;
 
-    let mut x0 = vec![0.0 as FloatType; N * 3];
+    let mut x0 = vec![0.0 as FloatType; n * 3];
 
     if rand01(&mut rng) < 0.5 {
-        for i in 0..(N * 3) {
-            x0[i] = (rand01(&mut rng) - 0.5 as FloatType) * dynamic_S;
+        for item in x0.iter_mut().take(n * 3) {
+            *item = (rand01(&mut rng) - 0.5 as FloatType) * dynamic_s;
         }
     } else {
-        let grid_count = ((N as FloatType).sqrt().ceil()) as usize;
-        let min = -dynamic_S / (2.0 as FloatType) * (0.9 as FloatType);
-        let max = dynamic_S / (2.0 as FloatType) * (0.9 as FloatType);
+        let grid_count = ((n as FloatType).sqrt().ceil()) as usize;
+        let min = -dynamic_s / (2.0 as FloatType) * (0.9 as FloatType);
+        let max = dynamic_s / (2.0 as FloatType) * (0.9 as FloatType);
         let mut grid_points: Vec<(FloatType, FloatType)> =
             Vec::with_capacity(grid_count * grid_count);
         for gy in 0..grid_count {
             for gx in 0..grid_count {
                 let xi = if grid_count > 1 {
-                    min + (gx as FloatType)
-                        * ((max - min) / ((grid_count - 1) as FloatType))
+                    (gx as FloatType).mul_add(
+                        (max - min) / ((grid_count - 1) as FloatType),
+                        min,
+                    )
                 } else {
                     (min + max) / (2.0 as FloatType)
                 };
                 let yi = if grid_count > 1 {
-                    min + (gy as FloatType)
-                        * ((max - min) / ((grid_count - 1) as FloatType))
+                    (gy as FloatType).mul_add(
+                        (max - min) / ((grid_count - 1) as FloatType),
+                        min,
+                    )
                 } else {
                     (min + max) / (2.0 as FloatType)
                 };
                 grid_points.push((xi, yi));
             }
         }
-        for i in 0..N {
+        for i in 0..n {
             let (gx, gy) = grid_points[i % grid_points.len()];
             x0[i * 3] = gx;
             x0[i * 3 + 1] = gy;
@@ -577,7 +588,7 @@ fn repetition(
     }
 
     let mut last_valid_x = x0.clone();
-    let mut last_valid_S = dynamic_S;
+    let mut last_valid_s = dynamic_s;
 
     // Match Python's local-minimizer `tol=1e-8` used in `minimize(...,
     // tol=1e-8)`
@@ -586,14 +597,14 @@ fn repetition(
     loop {
         // Local minimization using L-BFGS (match Python's L-BFGS-B)
         let problem = BHProblem {
-            n: N,
+            n,
             nsi,
             nsc,
             unit_polygon_vertices,
             unit_polygon_vectors,
             unit_container_vectors,
             unit_container_apothem,
-            s: dynamic_S,
+            s: dynamic_s,
         };
 
         let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
@@ -613,24 +624,24 @@ fn repetition(
             let fun = res.state.get_best_cost();
             if fun < penalty_tolerance {
                 if let Some(param) = res.state.get_best_param() {
-                    last_valid_x = param.clone();
-                    last_valid_S = dynamic_S;
+                    last_valid_x.clone_from(param);
+                    last_valid_s = dynamic_s;
                     success = true;
                     // compute multiplier (match Python exactly)
-                    let base = (N as FloatType).sqrt() * (nsi as FloatType)
+                    let base = (n as FloatType).sqrt() * (nsi as FloatType)
                         / (nsc as FloatType);
                     let mut multiplier = 0.9999 as FloatType
-                        - (dynamic_S - base) * (0.0099 as FloatType)
-                            / (initial_S - base);
-                    multiplier = 1.0 as FloatType
-                        - final_step_size
-                        - (dynamic_S - base)
-                            * ((0.01 as FloatType - final_step_size)
-                                / (initial_S - base));
+                        - (dynamic_s - base) * (0.0099 as FloatType)
+                            / (initial_s - base);
+                    multiplier = (dynamic_s - base).mul_add(
+                        -((0.01 as FloatType - final_step_size)
+                            / (initial_s - base)),
+                        1.0 as FloatType - final_step_size,
+                    );
                     for i in 0..x0.len() {
                         x0[i] = param[i] * multiplier;
                     }
-                    dynamic_S *= multiplier;
+                    dynamic_s *= multiplier;
                 }
             } else {
                 // Basinhopping-like global search (many local starts)
@@ -643,20 +654,20 @@ fn repetition(
 
                 for _ in 0..50 {
                     let mut trial = current_param.clone();
-                    for k in 0..trial.len() {
-                        trial[k] += (rand01(&mut rng) - 0.5 as FloatType)
-                            * (2.0 as FloatType)
-                            * (0.1 as FloatType);
+                    for item in &mut trial {
+                        *item = ((rand01(&mut rng) - 0.5 as FloatType)
+                            * (2.0 as FloatType))
+                            .mul_add(0.1 as FloatType, *item);
                     }
                     let problem = BHProblem {
-                        n: N,
+                        n,
                         nsi,
                         nsc,
                         unit_polygon_vertices,
                         unit_polygon_vectors,
                         unit_container_vectors,
                         unit_container_apothem,
-                        s: dynamic_S,
+                        s: dynamic_s,
                     };
                     let armijo =
                         ArmijoCondition::<FloatType>::new(0.0001).unwrap();
@@ -695,22 +706,22 @@ fn repetition(
                 }
 
                 if best_bh_fun < penalty_tolerance {
-                    last_valid_x = best_bh_param.clone();
-                    last_valid_S = dynamic_S;
-                    let base = (N as FloatType).sqrt() * (nsi as FloatType)
+                    last_valid_x.clone_from(&best_bh_param);
+                    last_valid_s = dynamic_s;
+                    let base = (n as FloatType).sqrt() * (nsi as FloatType)
                         / (nsc as FloatType);
                     let mut multiplier = 0.9999 as FloatType
-                        - (dynamic_S - base) * (0.0099 as FloatType)
-                            / (initial_S - base);
-                    multiplier = 1.0 as FloatType
-                        - final_step_size
-                        - (dynamic_S - base)
-                            * ((0.01 as FloatType - final_step_size)
-                                / (initial_S - base));
+                        - (dynamic_s - base) * (0.0099 as FloatType)
+                            / (initial_s - base);
+                    multiplier = (dynamic_s - base).mul_add(
+                        -((0.01 as FloatType - final_step_size)
+                            / (initial_s - base)),
+                        1.0 as FloatType - final_step_size,
+                    );
                     for i in 0..x0.len() {
                         x0[i] = last_valid_x[i] * multiplier;
                     }
-                    dynamic_S *= multiplier;
+                    dynamic_s *= multiplier;
                     success = true;
                 } else {
                     break;
@@ -722,20 +733,19 @@ fn repetition(
             let mut found = false;
             for _ in 0..10 {
                 let mut trial = x0.clone();
-                for k in 0..trial.len() {
-                    trial[k] += (rand01(&mut rng) - 0.5 as FloatType)
-                        * dynamic_S
-                        * 0.1 as FloatType;
+                for item in &mut trial {
+                    *item = ((rand01(&mut rng) - 0.5 as FloatType) * dynamic_s)
+                        .mul_add(0.1 as FloatType, *item);
                 }
                 let problem = BHProblem {
-                    n: N,
+                    n,
                     nsi,
                     nsc,
                     unit_polygon_vertices,
                     unit_polygon_vectors,
                     unit_container_vectors,
                     unit_container_apothem,
-                    s: dynamic_S,
+                    s: dynamic_s,
                 };
                 let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
                 let linesearch = BacktrackingLineSearch::new(armijo);
@@ -755,7 +765,7 @@ fn repetition(
                             .get_best_param()
                             .cloned()
                             .unwrap_or(trial.clone());
-                        last_valid_S = dynamic_S;
+                        last_valid_s = dynamic_s;
                         x0 = last_valid_x.clone();
                         found = true;
                         break;
@@ -772,5 +782,5 @@ fn repetition(
         }
     }
 
-    (last_valid_S, last_valid_x)
+    (last_valid_s, last_valid_x)
 }

From 3207af655790d1cac0d6f0dc5fcf3c531e6a5d99 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 10:47:38 +0200
Subject: [PATCH 10/15] Complete rewrite: remove argmin, switch to nlopt.
 speedup = ~15x

---
 polygon-packer-rs/Cargo.toml           |    6 +-
 polygon-packer-rs/src/main.rs          | 1050 ++++++++----------------
 polygon-packer-rs/src/rust_specific.rs |    2 +-
 3 files changed, 331 insertions(+), 727 deletions(-)

diff --git a/polygon-packer-rs/Cargo.toml b/polygon-packer-rs/Cargo.toml
index a58d9f4..fae2a0b 100644
--- a/polygon-packer-rs/Cargo.toml
+++ b/polygon-packer-rs/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "polygon-packer-rs"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2024"
 authors = ["Voxell Paladynee <voxellpaladynee@proton.me>"]
 
@@ -8,8 +8,7 @@ authors = ["Voxell Paladynee <voxellpaladynee@proton.me>"]
 # numeric library
 ndarray = "0.17.2"
 # optimization library
-argmin = "0.11.0"
-argmin-math = { version = "0.5", features = ["vec"] }
+nlopt = "0.6.0"
 # plot library
 plotters = "0.3"
 # parallelization
@@ -21,6 +20,7 @@ clap = { version = "4.4", features = ["derive"] }
 voxell_rng = "0.6.0"
 # bump allocator for temporary arena allocations to reduce transient heap use
 bumpalo = {version = "3", features = ["collections"]}
+nalgebra = "0.34.2"
 
 [profile.release]
 # opt-level: 3 for full optimizations
diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
index f49b5b7..af3192a 100644
--- a/polygon-packer-rs/src/main.rs
+++ b/polygon-packer-rs/src/main.rs
@@ -1,786 +1,390 @@
-extern crate argmin;
-extern crate clap;
-extern crate ndarray;
-extern crate plotters;
-extern crate rayon;
+use std::f64::consts::PI;
 
-mod rust_specific;
-use std::cmp::min;
-use std::iter;
-
-use rust_specific::AssocPI;
-use rust_specific::FloatType;
-// this is π in either f32 or f64
-const PI: FloatType = <FloatType as AssocPI>::PI;
-
-use argmin::core::CostFunction;
-use argmin::core::Error;
-use argmin::core::Executor;
-use argmin::core::Gradient;
-use argmin::core::State;
-use argmin::solver::linesearch::BacktrackingLineSearch;
-use argmin::solver::linesearch::condition::ArmijoCondition;
-use argmin::solver::quasinewton::LBFGS;
 use clap::Parser;
-use ndarray::Array2;
+use nalgebra::Rotation2;
+use nalgebra::Vector2;
+use nlopt::Algorithm;
+use nlopt::Nlopt;
+use nlopt::Target;
 use plotters::prelude::*;
 use rayon::prelude::*;
 use voxell_rng::rng::pcg_advanced::pcg_64::PcgInnerState64;
 
-#[derive(Parser, Debug)]
-#[command(author, version, about, long_about = None)]
-struct Args {
-    /// Number of inner polygons
-    inner_polygons: usize,
-    /// Number of sides of the inner polygons
-    inner_sides: usize,
-    /// Number of sides of the container polygon
-    container_sides: usize,
-    /// Number of attempts to run
-    #[arg(long, default_value_t = 1000)]
-    attempts: usize,
-    /// Overlap penalty tolerance. Probably best left at default
-    #[arg(long, default_value_t = 1e-8)]
-    tolerance: FloatType,
-    /// How small the last theoretical step in container size decrease will be
-    /// (it gets smaller over time)
-    #[arg(long, default_value_t = 0.0001)]
-    finalstep: FloatType,
-}
-
 fn main() {
     let args = Args::parse();
+    let geo = Geometry::new(args.inner_sides, args.container_sides);
 
-    let n = args.inner_polygons;
-    let nsi = args.inner_sides;
-    let nsc = args.container_sides;
-    let attempts = args.attempts;
-    let penalty_tolerance = args.tolerance;
-    let final_step_size = args.finalstep;
-
-    // Translated from Python:
-    // unit_polygon_angles = np.linspace(0, 2 * np.pi, nsi, endpoint=False)
-    let unit_polygon_angles_vec: Vec<FloatType> = (0..nsi)
-        .map(|i| 2.0 * PI * (i as FloatType) / (nsi as FloatType))
+    let results: Vec<(f64, Vec<f64>)> = (0..args.attempts)
+        .into_par_iter()
+        .map(|i| repetition(i, &args, &geo))
         .collect();
 
-    // unit_polygon_vertices = np.column_stack((np.cos(unit_polygon_angles),
-    // np.sin(unit_polygon_angles)))
-    let mut unit_polygon_vertices_buf: Vec<FloatType> =
-        Vec::with_capacity(nsi * 2);
-    for &a in &unit_polygon_angles_vec {
-        unit_polygon_vertices_buf.push(a.cos());
-        unit_polygon_vertices_buf.push(a.sin());
-    }
-    let unit_polygon_vertices: Array2<FloatType> =
-        Array2::from_shape_vec((nsi, 2), unit_polygon_vertices_buf).unwrap();
+    if let Some((best_s, best_v)) = results
+        .into_iter()
+        .min_by(|a, b| a.0.partial_cmp(&b.0).unwrap())
+    {
+        let side_len = best_s * (PI / args.container_sides as f64).sin()
+            / (PI / args.inner_sides as f64).sin();
+        println!("Final side length: {}", side_len);
 
-    // unit_polygon_vectors = np.column_stack((np.cos(unit_polygon_angles +
-    // np.pi / nsi), np.sin(unit_polygon_angles + np.pi / nsi)))
-    let mut unit_polygon_vectors_buf: Vec<FloatType> =
-        Vec::with_capacity(nsi * 2);
-    let offset_nsi = PI / (nsi as FloatType);
-    for &a in &unit_polygon_angles_vec {
-        let aa = a + offset_nsi;
-        unit_polygon_vectors_buf.push(aa.cos());
-        unit_polygon_vectors_buf.push(aa.sin());
+        verify_results(&best_v, best_s, &args, &geo);
+        render(&args, &geo, best_s, &best_v);
     }
-    let unit_polygon_vectors: Array2<FloatType> =
-        Array2::from_shape_vec((nsi, 2), unit_polygon_vectors_buf).unwrap();
+}
 
-    // unit_container_angles = np.linspace(0, 2 * np.pi, nsc, endpoint=False)
-    let unit_container_angles: Vec<FloatType> = (0..nsc)
-        .map(|i| 2.0 * PI * (i as FloatType) / (nsc as FloatType))
-        .collect();
+#[derive(Parser, Debug, Clone)]
+pub struct Args {
+    pub inner_polygons: usize,
+    pub inner_sides: usize,
+    pub container_sides: usize,
+    #[arg(long, default_value_t = 1000)]
+    pub attempts: usize,
+    #[arg(long, default_value_t = 1e-8)]
+    pub tolerance: f64,
+    #[arg(long, default_value_t = 0.0001)]
+    pub finalstep: f64,
+}
 
-    // unit_container_vertices = np.column_stack((np.cos(unit_container_angles),
-    // np.sin(unit_container_angles)))
-    let mut unit_container_vertices_buf: Vec<FloatType> =
-        Vec::with_capacity(nsc * 2);
-    for &a in &unit_container_angles {
-        unit_container_vertices_buf.push(a.cos());
-        unit_container_vertices_buf.push(a.sin());
-    }
-    let unit_container_vertices: Array2<FloatType> =
-        Array2::from_shape_vec((nsc, 2), unit_container_vertices_buf).unwrap();
+struct Geometry {
+    inner_verts: Vec<Vector2<f64>>,
+    inner_axes: Vec<Vector2<f64>>,
+    cont_axes: Vec<Vector2<f64>>,
+    apothem: f64,
+}
 
-    // unit_container_vectors = np.column_stack((np.cos(unit_container_angles +
-    // np.pi / nsc), np.sin(unit_container_angles + np.pi / nsc)))
-    let mut unit_container_vectors_buf: Vec<FloatType> =
-        Vec::with_capacity(nsc * 2);
-    let offset_nsc = PI / (nsc as FloatType);
-    for &a in &unit_container_angles {
-        let aa = a + offset_nsc;
-        unit_container_vectors_buf.push(aa.cos());
-        unit_container_vectors_buf.push(aa.sin());
+impl Geometry {
+    fn new(nsi: usize, nsc: usize) -> Self {
+        let i_angle = 2.0 * PI / nsi as f64;
+        let c_angle = 2.0 * PI / nsc as f64;
+        let inner_verts = (0..nsi)
+            .map(|i| {
+                Vector2::new(
+                    (i as f64 * i_angle).cos(),
+                    (i as f64 * i_angle).sin(),
+                )
+            })
+            .collect();
+        let inner_axes = (0..nsi)
+            .map(|i| {
+                let a = i as f64 * i_angle + PI / nsi as f64;
+                Vector2::new(a.cos(), a.sin())
+            })
+            .collect();
+        let cont_axes = (0..nsc)
+            .map(|i| {
+                let a = i as f64 * c_angle + PI / nsc as f64;
+                Vector2::new(a.cos(), a.sin())
+            })
+            .collect();
+        Self {
+            inner_verts,
+            inner_axes,
+            cont_axes,
+            apothem: (PI / nsc as f64).cos(),
+        }
     }
-    let unit_container_vectors: Array2<FloatType> =
-        Array2::from_shape_vec((nsc, 2), unit_container_vectors_buf).unwrap();
-
-    // unit_container_apothem = np.cos(np.pi / nsc)
-    let unit_container_apothem: FloatType = (PI / (nsc as FloatType)).cos();
+}
 
-    let mut best_s: FloatType = FloatType::INFINITY;
-    let mut best_values: Option<Vec<FloatType>> = None;
+fn penalty(v: &[f64], s: f64, args: &Args, geo: &Geometry) -> f64 {
+    let mut p = 0.0;
+    let limit = geo.apothem * s;
+
+    // 1. Container Penalty (Poking)
+    for i in 0..args.inner_polygons {
+        let (pos, rot) = (
+            Vector2::new(v[i * 3], v[i * 3 + 1]),
+            Rotation2::new(v[i * 3 + 2]),
+        );
+        for iv in &geo.inner_verts {
+            let pt = rot * iv + pos;
+            for ax in &geo.cont_axes {
+                let dist = pt.dot(ax);
+                if dist > limit {
+                    p += (dist - limit).powi(2);
+                }
+            }
+        }
+    }
 
-    // results = Parallel(n_jobs=-1, prefer="processes")(delayed(repetition)(i)
-    // for i in range(attempts))
-    let results: Vec<(FloatType, _)> = (0..attempts)
-        .into_par_iter()
+    // 2. Pairwise Collision Penalty (SAT)
+    let polys: Vec<_> = (0..args.inner_polygons)
         .map(|i| {
-            repetition(
-                i,
-                n,
-                nsi,
-                nsc,
-                penalty_tolerance,
-                final_step_size,
-                &unit_polygon_vertices,
-                &unit_polygon_vectors,
-                &unit_container_vectors,
-                unit_container_apothem,
+            let (pos, rot) = (
+                Vector2::new(v[i * 3], v[i * 3 + 1]),
+                Rotation2::new(v[i * 3 + 2]),
+            );
+            (
+                geo.inner_verts
+                    .iter()
+                    .map(|&iv| rot * iv + pos)
+                    .collect::<Vec<_>>(),
+                geo.inner_axes
+                    .iter()
+                    .map(|&ia| rot * ia)
+                    .collect::<Vec<_>>(),
             )
         })
         .collect();
 
-    for (s, values) in results {
-        if s < best_s {
-            best_s = s;
-            best_values = Some(values);
-        }
-    }
-
-    println!(
-        "Final side length: {}",
-        // best_S * np.sin(np.pi / nsc) / np.sin(np.pi / nsi),
-        best_s * (PI / (nsc as FloatType)).sin()
-            / (PI / (nsi as FloatType)).sin(),
-    );
-
-    // ============== NOTE(@paladynee): ===============
-    // anything found after this line is translated from Python into Rust
-    // via an AI agent. the numerical results (excluding nondeterminism from
-    // rngs) have been checked against python over multiple passes.
-    //
-    // i am not well versed in mathematics so if anybody could fact check this
-    // implementation and remove this note, I'd be very glad.
-
-    if let Some(vals) = best_values {
-        if vals.len() == n * 3 {
-            let positions = Array2::from_shape_vec((n, 3), vals).unwrap();
-            println!("Final positions (first 5 rows):");
-            for i in 0..min(5, n) {
-                println!("  {}: {:?}", i, positions.row(i));
-            }
-
-            // Plot result to PNG using Plotters (match Python output)
-            let file_name = format!("{}_{}_in_{}.png", n, nsi, nsc);
-            // collect polygon points and container points (as f64)
-            let mut all_points: Vec<(f64, f64)> = Vec::new();
-            let mut polys: Vec<Vec<(f64, f64)>> = Vec::with_capacity(n);
-            for i in 0..n {
-                let x = positions[[i, 0]];
-                let y = positions[[i, 1]];
-                let a = positions[[i, 2]];
-                let poly = transform_polygon(x, y, a, &unit_polygon_vertices);
-                let mut pts: Vec<(f64, f64)> =
-                    Vec::with_capacity(poly.len() / 2 + 1);
-                for k in 0..(poly.len() / 2) {
-                    let px = poly[k * 2] as f64;
-                    let py = poly[k * 2 + 1] as f64;
-                    pts.push((px, py));
-                    all_points.push((px, py));
-                }
-                if let Some(first) = pts.first().copied() {
-                    pts.push(first);
-                    all_points.push(first);
-                }
-                polys.push(pts);
-            }
-
-            let mut container_pts: Vec<(f64, f64)> =
-                Vec::with_capacity(unit_container_vertices.shape()[0] + 1);
-            for i in 0..unit_container_vertices.shape()[0] {
-                let cx = unit_container_vertices[[i, 0]] * best_s;
-                let cy = unit_container_vertices[[i, 1]] * best_s;
-                container_pts.push((cx as f64, cy as f64));
-                all_points.push((cx as f64, cy as f64));
-            }
-            if let Some(first) = container_pts.first().copied() {
-                container_pts.push(first);
-                all_points.push(first);
-            }
-
-            // determine bounds and padding
-            let (mut min_x, mut max_x, mut min_y, mut max_y) = (
-                f64::INFINITY,
-                f64::NEG_INFINITY,
-                f64::INFINITY,
-                f64::NEG_INFINITY,
-            );
-            for (x, y) in &all_points {
-                if *x < min_x {
-                    min_x = *x;
-                }
-                if *x > max_x {
-                    max_x = *x;
-                }
-                if *y < min_y {
-                    min_y = *y;
+    for i in 0..args.inner_polygons {
+        for j in i + 1..args.inner_polygons {
+            let mut min_overlap = 1e20;
+            let mut collision = true;
+            for ax in polys[i].1.iter().chain(polys[j].1.iter()) {
+                let (min1, max1) = project(&polys[i].0, ax);
+                let (min2, max2) = project(&polys[j].0, ax);
+                let overlap = max1.min(max2) - min1.max(min2);
+                if overlap <= 0.0 {
+                    collision = false;
+                    break;
                 }
-                if *y > max_y {
-                    max_y = *y;
+                if overlap < min_overlap {
+                    min_overlap = overlap;
                 }
             }
-            // choose grid steps matching the requested ticks
-            let x_step = 1.0_f64;
-            let y_step = 0.5_f64;
-            let x_min_tick = (min_x / x_step).floor() * x_step;
-            let x_max_tick = (max_x / x_step).ceil() * x_step;
-            let y_min_tick = (min_y / y_step).floor() * y_step;
-            let y_max_tick = (max_y / y_step).ceil() * y_step;
-
-            // Equalize spans so 1 unit in x equals 1 unit in y on the image
-            let dx = x_max_tick - x_min_tick;
-            let dy = y_max_tick - y_min_tick;
-            let span = dx.max(dy);
-            let x_center = f64::midpoint(y_min_tick, y_max_tick);
-            let y_center = f64::midpoint(y_min_tick, y_max_tick);
-            let x_min_eq = x_center - span / 2.0;
-            let x_max_eq = x_center + span / 2.0;
-            let y_min_eq = y_center - span / 2.0;
-            let y_max_eq = y_center + span / 2.0;
-
-            // Render PNG on a square canvas so aspect ratio is preserved
-            let backend = BitMapBackend::new(&file_name, (1024, 1024));
-            let root = backend.into_drawing_area();
-            root.fill(&WHITE).unwrap();
-            let mut chart = ChartBuilder::on(&root)
-                .margin(10)
-                .caption(
-                    format!(
-                        "Side length: {}",
-                        best_s * (PI / (nsc as FloatType)).sin()
-                            / (PI / (nsi as FloatType)).sin()
-                    ),
-                    ("sans-serif", 20).into_font(),
-                )
-                .build_cartesian_2d(x_min_eq..x_max_eq, y_min_eq..y_max_eq)
-                .unwrap();
-
-            // configure mesh to show ticks at our step sizes
-            let x_labels =
-                ((span / x_step).round() as usize).saturating_add(1).max(2);
-            let y_labels =
-                ((span / y_step).round() as usize).saturating_add(1).max(2);
-            chart
-                .configure_mesh()
-                .x_labels(x_labels)
-                .y_labels(y_labels)
-                .x_label_formatter(&|x| format!("{:.0}", x))
-                .y_label_formatter(&|y| format!("{:.1}", y))
-                .draw()
-                .ok();
-
-            // draw container outline
-            chart
-                .draw_series(iter::once(PathElement::new(
-                    container_pts.clone(),
-                    BLACK,
-                )))
-                .ok();
-
-            // draw filled polygons and outlines
-            for poly in polys {
-                let fill_style =
-                    Into::<ShapeStyle>::into(&RGBColor(204, 204, 204)).filled();
-                chart
-                    .draw_series(iter::once(Polygon::new(
-                        poly.clone(),
-                        fill_style,
-                    )))
-                    .ok();
-                chart
-                    .draw_series(iter::once(PathElement::new(poly, BLACK)))
-                    .ok();
+            if collision {
+                p += min_overlap * min_overlap;
             }
-
-            root.present().ok();
-            println!("Saved plot to {}", file_name);
         }
-    } else {
-        println!("No valid packing found in attempts");
-    }
-}
-
-// def repetition(seed):
-fn transform_polygon(
-    x: FloatType, y: FloatType, a: FloatType, vertices: &Array2<FloatType>,
-) -> Vec<FloatType> {
-    let n_vertices = vertices.shape()[0];
-    let mut transformed = vec![0.0 as FloatType; n_vertices * 2];
-    let ca = a.cos();
-    let sa = a.sin();
-    for i in 0..n_vertices {
-        let vx = vertices[[i, 0]];
-        let vy = vertices[[i, 1]];
-        transformed[i * 2] = x + vy.mul_add(-sa, vx * ca);
-        transformed[i * 2 + 1] = y + vy.mul_add(ca, vx * sa);
     }
-    transformed
+    p
 }
 
-// BHProblem struct holds problem data used during cost evaluation.
-// To reduce memory pressure we compute transformed vertices and axes
-// on-the-fly instead of storing arena-backed scratch buffers.
-struct BHProblem<'a> {
-    n: usize,
-    nsi: usize,
-    nsc: usize,
-    unit_polygon_vertices: &'a Array2<FloatType>,
-    unit_polygon_vectors: &'a Array2<FloatType>,
-    unit_container_vectors: &'a Array2<FloatType>,
-    unit_container_apothem: FloatType,
-    s: FloatType,
+fn project(verts: &[Vector2<f64>], ax: &Vector2<f64>) -> (f64, f64) {
+    verts
+        .iter()
+        .map(|v| v.dot(ax))
+        .fold((f64::MAX, f64::MIN), |(a, b), d| (a.min(d), b.max(d)))
 }
 
-// BH cost evaluation implemented as a method on the problem struct.
-// Use a bump arena and SoA scratch buffers (polys_x/polys_y, vecs_x/vecs_y)
-// to reduce transient heap allocations and improve data locality.
-impl<'a> BHProblem<'a> {
-    fn bh_function(&self, values: &[FloatType]) -> FloatType {
-        const HUGE: FloatType = 1e20 as FloatType;
-
-        let n_vertices = self.unit_polygon_vertices.shape()[0];
-        let nsi = self.nsi;
-        let nsc = self.nsc;
-
-        let limit = self.unit_container_apothem * self.s;
-
-        let mut penalty: FloatType = 0.0;
-
-        // Compute transformed vertices on-the-fly and apply container penalty.
-        for i in 0..self.n {
-            let posx = values[i * 3];
-            let posy = values[i * 3 + 1];
-            let rot = values[i * 3 + 2];
-            let ca = rot.cos();
-            let sa = rot.sin();
-
-            for v in 0..n_vertices {
-                let vx = self.unit_polygon_vertices[[v, 0]];
-                let vy = self.unit_polygon_vertices[[v, 1]];
-                let px = posx + vy.mul_add(-sa, vx * ca);
-                let py = posy + vy.mul_add(ca, vx * sa);
-
-                for ci in 0..nsc {
-                    let dvx = self.unit_container_vectors[[ci, 0]];
-                    let dvy = self.unit_container_vectors[[ci, 1]];
-                    let distance = py.mul_add(dvy, px * dvx);
-                    if distance > limit {
-                        let diff = distance - limit;
-                        penalty = diff.mul_add(diff, penalty);
-                    }
-                }
+/// Matches SciPy's numerical gradient calculation (Forward Difference)
+fn local_min(
+    x0: &[f64], s: f64, args: &Args, geo: &Geometry,
+) -> Option<Vec<f64>> {
+    let mut x = x0.to_vec();
+    let obj = |xx: &[f64], grad: Option<&mut [f64]>, _: &mut ()| -> f64 {
+        let f0 = penalty(xx, s, args, geo);
+        if let Some(g) = grad {
+            // SciPy uses a small epsilon relative to the scale of the value
+            let eps = f64::EPSILON.sqrt();
+            let mut x_tmp = xx.to_vec();
+            for i in 0..xx.len() {
+                let step = eps * (1.0 + xx[i].abs());
+                x_tmp[i] += step;
+                let f1 = penalty(&x_tmp, s, args, geo);
+                g[i] = (f1 - f0) / step;
+                x_tmp[i] = xx[i]; // Reset for next dimension
             }
         }
+        f0
+    };
 
-        // Pairwise separating axis test: compute axes and projections
-        // on-the-fly
-        for i in 0..self.n {
-            let posx_i = values[i * 3];
-            let posy_i = values[i * 3 + 1];
-            let rot_i = values[i * 3 + 2];
-            let ca_i = rot_i.cos();
-            let sa_i = rot_i.sin();
-
-            for j in (i + 1)..self.n {
-                let posx_j = values[j * 3];
-                let posy_j = values[j * 3 + 1];
-                let rot_j = values[j * 3 + 2];
-                let ca_j = rot_j.cos();
-                let sa_j = rot_j.sin();
-
-                let mut collision = true;
-                let mut min_overlap: FloatType = HUGE;
-
-                for vec_idx in 0..(nsi * 2) {
-                    let (x_axis, y_axis) = if vec_idx < nsi {
-                        let v = vec_idx;
-                        let ux = self.unit_polygon_vectors[[v, 0]];
-                        let uy = self.unit_polygon_vectors[[v, 1]];
-                        (
-                            uy.mul_add(-sa_i, ux * ca_i),
-                            uy.mul_add(ca_i, ux * sa_i),
-                        )
-                    } else {
-                        let v = vec_idx - nsi;
-                        let ux = self.unit_polygon_vectors[[v, 0]];
-                        let uy = self.unit_polygon_vectors[[v, 1]];
-                        (
-                            uy.mul_add(-sa_j, ux * ca_j),
-                            uy.mul_add(ca_j, ux * sa_j),
-                        )
-                    };
-
-                    let mut min_1 = HUGE;
-                    let mut max_1 = -HUGE;
-                    for vert in 0..nsi {
-                        let vx = self.unit_polygon_vertices[[vert, 0]];
-                        let vy = self.unit_polygon_vertices[[vert, 1]];
-                        let px = posx_i + vy.mul_add(-sa_i, vx * ca_i);
-                        let py = posy_i + vy.mul_add(ca_i, vx * sa_i);
-                        let dotp = py.mul_add(y_axis, px * x_axis);
-                        if dotp < min_1 {
-                            min_1 = dotp;
-                        }
-                        if dotp > max_1 {
-                            max_1 = dotp;
-                        }
-                    }
-
-                    let mut min_2 = HUGE;
-                    let mut max_2 = -HUGE;
-                    for vert in 0..nsi {
-                        let vx = self.unit_polygon_vertices[[vert, 0]];
-                        let vy = self.unit_polygon_vertices[[vert, 1]];
-                        let px = posx_j + vy.mul_add(-sa_j, vx * ca_j);
-                        let py = posy_j + vy.mul_add(ca_j, vx * sa_j);
-                        let dotp = py.mul_add(y_axis, px * x_axis);
-                        if dotp < min_2 {
-                            min_2 = dotp;
-                        }
-                        if dotp > max_2 {
-                            max_2 = dotp;
-                        }
-                    }
-
-                    let overlap = (if max_1 < max_2 { max_1 } else { max_2 })
-                        - (if min_1 > min_2 { min_1 } else { min_2 });
-                    if overlap <= (0.0 as FloatType) {
-                        collision = false;
-                        break;
-                    }
-                    if overlap < min_overlap {
-                        min_overlap = overlap;
-                    }
-                }
+    let mut opt =
+        Nlopt::new(Algorithm::Lbfgs, x0.len(), obj, Target::Minimize, ());
 
-                if collision {
-                    penalty = min_overlap.mul_add(min_overlap, penalty);
-                }
-            }
-        }
+    // Mapping SciPy's 'tol' to NLopt tolerances
+    opt.set_ftol_rel(args.tolerance).ok();
+    opt.set_xtol_rel(args.tolerance).ok();
+    opt.set_maxeval(5000).ok(); // Standard limit to prevent infinite loops
 
-        penalty
+    match opt.optimize(&mut x) {
+        Ok(_) => Some(x),
+        Err(_) => None,
     }
 }
 
-impl<'a> CostFunction for BHProblem<'a> {
-    type Param = Vec<FloatType>;
-    type Output = FloatType;
+fn verify_results(v: &[f64], s: f64, args: &Args, geo: &Geometry) {
+    println!("--- Verification Pass ---");
+    let limit = geo.apothem * s;
+    let mut overlap_count = 0;
+    let mut out_of_bounds = 0;
 
-    fn cost(&self, param: &Self::Param) -> Result<Self::Output, Error> {
-        Ok(self.bh_function(param.as_slice()))
-    }
-}
-
-impl<'a> Gradient for BHProblem<'a> {
-    type Param = Vec<FloatType>;
-    type Gradient = Vec<FloatType>;
+    let polys: Vec<_> = (0..args.inner_polygons)
+        .map(|i| {
+            let (pos, rot) = (
+                Vector2::new(v[i * 3], v[i * 3 + 1]),
+                Rotation2::new(v[i * 3 + 2]),
+            );
+            (
+                geo.inner_verts
+                    .iter()
+                    .map(|&iv| rot * iv + pos)
+                    .collect::<Vec<_>>(),
+                geo.inner_axes
+                    .iter()
+                    .map(|&ia| rot * ia)
+                    .collect::<Vec<_>>(),
+            )
+        })
+        .collect();
 
-    fn gradient(&self, param: &Self::Param) -> Result<Self::Gradient, Error> {
-        let n = param.len();
-        let mut grad = vec![0.0 as FloatType; n];
-        let eps = FloatType::EPSILON.sqrt();
-        let mut xp = param.clone();
-        let mut xm = param.clone();
-        for i in 0..n {
-            let xi = param[i];
-            let h = eps * xi.abs().max(1.0 as FloatType);
-            xp[i] = xi + h;
-            xm[i] = xi - h;
-            let fp = self.bh_function(xp.as_slice());
-            let fm = self.bh_function(xm.as_slice());
-            grad[i] = (fp - fm) / (2.0 as FloatType * h);
-            xp[i] = xi;
-            xm[i] = xi;
+    for (i, verts) in polys.iter().map(|x| &x.0).enumerate() {
+        if verts
+            .iter()
+            .any(|pt| geo.cont_axes.iter().any(|ax| pt.dot(ax) > limit + 1e-9))
+        {
+            out_of_bounds += 1;
+        }
+        for (j, ()) in polys.iter().map(|_x| ()).enumerate().skip(i + 1) {
+            let mut collision = true;
+            for ax in polys[i].1.iter().chain(polys[j].1.iter()) {
+                let (min1, max1) = project(&polys[i].0, ax);
+                let (min2, max2) = project(&polys[j].0, ax);
+                if max1.min(max2) - min1.max(min2) <= 1e-9 {
+                    collision = false;
+                    break;
+                }
+            }
+            if collision {
+                overlap_count += 1;
+            }
         }
-        Ok(grad)
-    }
-}
-
-fn initial_simplex(x0: &[FloatType]) -> Vec<Vec<FloatType>> {
-    let n = x0.len();
-    let mut simplex: Vec<Vec<FloatType>> = Vec::with_capacity(n + 1);
-    simplex.push(x0.to_owned());
-    for i in 0..n {
-        let mut xi = x0.to_owned();
-        let delta = (0.05 as FloatType) * (1.0 as FloatType + x0[i].abs());
-        xi[i] += delta;
-        simplex.push(xi);
     }
-    simplex
+    println!(
+        "Out of bounds: {}, Pairwise overlaps: {}",
+        out_of_bounds, overlap_count
+    );
 }
 
-// def repetition(seed):
-#[allow(clippy::too_many_arguments)]
-fn repetition(
-    seed: usize, n: usize, nsi: usize, nsc: usize,
-    penalty_tolerance: FloatType, final_step_size: FloatType,
-    unit_polygon_vertices: &Array2<FloatType>,
-    unit_polygon_vectors: &Array2<FloatType>,
-    unit_container_vectors: &Array2<FloatType>,
-    unit_container_apothem: FloatType,
-) -> (FloatType, Vec<FloatType>) {
-    println!("Attempt {}", seed);
-
-    let mut rng = PcgInnerState64::mcg_seeded(seed as _);
-    let rand01 = |rng: &mut PcgInnerState64| {
-        let hi = rng.mcg_xsh_rs();
-        let lo = rng.mcg_xsh_rs();
-        let rand = ((hi as u64) << 32) | lo as u64;
-        rand as FloatType / (u64::MAX as FloatType)
+fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
+    let mut rng = PcgInnerState64::mcg_seeded(seed as u64);
+    let mut rand = || {
+        (((rng.mcg_xsh_rs() as u64) << 32) | rng.mcg_xsh_rs() as u64) as f64
+            / u64::MAX as f64
     };
-    let mut dynamic_s = (n as FloatType).sqrt()
-        * (2.0 as FloatType + rand01(&mut rng) * 2.0 as FloatType);
-    let initial_s = dynamic_s;
-
-    let mut x0 = vec![0.0 as FloatType; n * 3];
+    let mut s = (args.inner_polygons as f64).sqrt() * (2.0 + rand() * 2.0);
+    let initial_s = s;
+    let mut x = vec![0.0; args.inner_polygons * 3];
 
-    if rand01(&mut rng) < 0.5 {
-        for item in x0.iter_mut().take(n * 3) {
-            *item = (rand01(&mut rng) - 0.5 as FloatType) * dynamic_s;
-        }
+    if rand() < 0.5 {
+        x.iter_mut().for_each(|v| *v = (rand() - 0.5) * s);
     } else {
-        let grid_count = ((n as FloatType).sqrt().ceil()) as usize;
-        let min = -dynamic_s / (2.0 as FloatType) * (0.9 as FloatType);
-        let max = dynamic_s / (2.0 as FloatType) * (0.9 as FloatType);
-        let mut grid_points: Vec<(FloatType, FloatType)> =
-            Vec::with_capacity(grid_count * grid_count);
-        for gy in 0..grid_count {
-            for gx in 0..grid_count {
-                let xi = if grid_count > 1 {
-                    (gx as FloatType).mul_add(
-                        (max - min) / ((grid_count - 1) as FloatType),
-                        min,
-                    )
-                } else {
-                    (min + max) / (2.0 as FloatType)
-                };
-                let yi = if grid_count > 1 {
-                    (gy as FloatType).mul_add(
-                        (max - min) / ((grid_count - 1) as FloatType),
-                        min,
-                    )
-                } else {
-                    (min + max) / (2.0 as FloatType)
-                };
-                grid_points.push((xi, yi));
-            }
-        }
-        for i in 0..n {
-            let (gx, gy) = grid_points[i % grid_points.len()];
-            x0[i * 3] = gx;
-            x0[i * 3 + 1] = gy;
-            x0[i * 3 + 2] = rand01(&mut rng) * (2.0 as FloatType) * PI;
+        let gc = (args.inner_polygons as f64).sqrt().ceil() as usize;
+        let span = s * 0.45;
+        for i in 0..args.inner_polygons {
+            x[i * 3] = if gc > 1 {
+                (i % gc) as f64 * (2.0 * span / (gc - 1) as f64) - span
+            } else {
+                0.0
+            };
+            x[i * 3 + 1] = if gc > 1 {
+                (i / gc) as f64 * (2.0 * span / (gc - 1) as f64) - span
+            } else {
+                0.0
+            };
+            x[i * 3 + 2] = rand() * 2.0 * PI;
         }
     }
 
-    let mut last_valid_x = x0.clone();
-    let mut last_valid_s = dynamic_s;
-
-    // Match Python's local-minimizer `tol=1e-8` used in `minimize(...,
-    // tol=1e-8)`
-    let solver_tol: FloatType = 1e-8 as FloatType;
-
+    let (mut last_x, mut last_s) = (x.clone(), s);
     loop {
-        // Local minimization using L-BFGS (match Python's L-BFGS-B)
-        let problem = BHProblem {
-            n,
-            nsi,
-            nsc,
-            unit_polygon_vertices,
-            unit_polygon_vectors,
-            unit_container_vectors,
-            unit_container_apothem,
-            s: dynamic_s,
-        };
-
-        let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
-        let linesearch = BacktrackingLineSearch::new(armijo);
-        let lbfgs = LBFGS::new(linesearch, 3)
-            .with_tolerance_grad(solver_tol)
-            .unwrap()
-            .with_tolerance_cost(solver_tol)
-            .unwrap();
-
-        let result = Executor::new(problem, lbfgs)
-            .configure(|state| state.param(x0.clone()))
-            .run();
-
-        let mut success = false;
-        if let Ok(res) = result {
-            let fun = res.state.get_best_cost();
-            if fun < penalty_tolerance {
-                if let Some(param) = res.state.get_best_param() {
-                    last_valid_x.clone_from(param);
-                    last_valid_s = dynamic_s;
-                    success = true;
-                    // compute multiplier (match Python exactly)
-                    let base = (n as FloatType).sqrt() * (nsi as FloatType)
-                        / (nsc as FloatType);
-                    let mut multiplier = 0.9999 as FloatType
-                        - (dynamic_s - base) * (0.0099 as FloatType)
-                            / (initial_s - base);
-                    multiplier = (dynamic_s - base).mul_add(
-                        -((0.01 as FloatType - final_step_size)
-                            / (initial_s - base)),
-                        1.0 as FloatType - final_step_size,
-                    );
-                    for i in 0..x0.len() {
-                        x0[i] = param[i] * multiplier;
-                    }
-                    dynamic_s *= multiplier;
-                }
-            } else {
-                // Basinhopping-like global search (many local starts)
-                let mut best_bh_fun = fun;
-                let mut best_bh_param =
-                    res.state.get_best_param().cloned().unwrap_or(x0.clone());
-                let mut current_fun = fun;
-                let mut current_param =
-                    res.state.get_best_param().cloned().unwrap_or(x0.clone());
-
-                for _ in 0..50 {
-                    let mut trial = current_param.clone();
-                    for item in &mut trial {
-                        *item = ((rand01(&mut rng) - 0.5 as FloatType)
-                            * (2.0 as FloatType))
-                            .mul_add(0.1 as FloatType, *item);
-                    }
-                    let problem = BHProblem {
-                        n,
-                        nsi,
-                        nsc,
-                        unit_polygon_vertices,
-                        unit_polygon_vectors,
-                        unit_container_vectors,
-                        unit_container_apothem,
-                        s: dynamic_s,
-                    };
-                    let armijo =
-                        ArmijoCondition::<FloatType>::new(0.0001).unwrap();
-                    let linesearch = BacktrackingLineSearch::new(armijo);
-                    let lbfgs = LBFGS::new(linesearch, 3)
-                        .with_tolerance_grad(solver_tol)
-                        .unwrap()
-                        .with_tolerance_cost(solver_tol)
-                        .unwrap();
-                    if let Ok(res2) = Executor::new(problem, lbfgs)
-                        .configure(|state| state.param(trial.clone()))
-                        .run()
-                    {
-                        let f2 = res2.state.get_best_cost();
-                        if f2 < best_bh_fun {
-                            best_bh_fun = f2;
-                            best_bh_param = res2
-                                .state
-                                .get_best_param()
-                                .cloned()
-                                .unwrap_or(trial.clone());
-                        }
-                        // Metropolis acceptance
-                        if f2 < current_fun
-                            || ((current_fun - f2) / (0.1 as FloatType)).exp()
-                                > rand01(&mut rng)
-                        {
-                            current_fun = f2;
-                            current_param = res2
-                                .state
-                                .get_best_param()
-                                .cloned()
-                                .unwrap_or(trial.clone());
-                        }
-                    }
-                }
-
-                if best_bh_fun < penalty_tolerance {
-                    last_valid_x.clone_from(&best_bh_param);
-                    last_valid_s = dynamic_s;
-                    let base = (n as FloatType).sqrt() * (nsi as FloatType)
-                        / (nsc as FloatType);
-                    let mut multiplier = 0.9999 as FloatType
-                        - (dynamic_s - base) * (0.0099 as FloatType)
-                            / (initial_s - base);
-                    multiplier = (dynamic_s - base).mul_add(
-                        -((0.01 as FloatType - final_step_size)
-                            / (initial_s - base)),
-                        1.0 as FloatType - final_step_size,
-                    );
-                    for i in 0..x0.len() {
-                        x0[i] = last_valid_x[i] * multiplier;
-                    }
-                    dynamic_s *= multiplier;
-                    success = true;
-                } else {
-                    break;
-                }
-            }
-        } else {
-            // If local solver failed for any reason, try a few random local
-            // starts
-            let mut found = false;
-            for _ in 0..10 {
-                let mut trial = x0.clone();
-                for item in &mut trial {
-                    *item = ((rand01(&mut rng) - 0.5 as FloatType) * dynamic_s)
-                        .mul_add(0.1 as FloatType, *item);
-                }
-                let problem = BHProblem {
-                    n,
-                    nsi,
-                    nsc,
-                    unit_polygon_vertices,
-                    unit_polygon_vectors,
-                    unit_container_vectors,
-                    unit_container_apothem,
-                    s: dynamic_s,
-                };
-                let armijo = ArmijoCondition::<FloatType>::new(0.0001).unwrap();
-                let linesearch = BacktrackingLineSearch::new(armijo);
-                let lbfgs = LBFGS::new(linesearch, 3)
-                    .with_tolerance_grad(solver_tol)
-                    .unwrap()
-                    .with_tolerance_cost(solver_tol)
-                    .unwrap();
-                if let Ok(res2) = Executor::new(problem, lbfgs)
-                    .configure(|state| state.param(trial.clone()))
-                    .run()
-                {
-                    let f2 = res2.state.get_best_cost();
-                    if f2 < penalty_tolerance {
-                        last_valid_x = res2
-                            .state
-                            .get_best_param()
-                            .cloned()
-                            .unwrap_or(trial.clone());
-                        last_valid_s = dynamic_s;
-                        x0 = last_valid_x.clone();
-                        found = true;
-                        break;
-                    }
-                }
+        if let Some(refined) = local_min(&x, s, args, geo) {
+            let p = penalty(&refined, s, args, geo);
+            if p < args.tolerance {
+                last_x = refined.clone();
+                last_s = s;
+                let base = (args.inner_polygons as f64).sqrt()
+                    * args.inner_sides as f64
+                    / args.container_sides as f64;
+                let m = (s - base).mul_add(
+                    -((0.01 - args.finalstep) / (initial_s - base)),
+                    1.0 - args.finalstep,
+                );
+                x = refined.iter().map(|v| v * m).collect();
+                s *= m;
+                continue;
             }
-            if !found {
-                break;
+            if let Some(bh) = run_bh(&refined, p, s, args, geo, &mut rand) {
+                last_x = bh.clone();
+                last_s = s;
+                let base = (args.inner_polygons as f64).sqrt()
+                    * args.inner_sides as f64
+                    / args.container_sides as f64;
+                let m = (s - base).mul_add(
+                    -((0.01 - args.finalstep) / (initial_s - base)),
+                    1.0 - args.finalstep,
+                );
+                x = bh.iter().map(|v| v * m).collect();
+                s *= m;
+                continue;
             }
         }
+        break;
+    }
+    (last_s, last_x)
+}
 
-        if !success {
-            break;
+fn run_bh(
+    refined: &[f64], p: f64, s: f64, args: &Args, geo: &Geometry,
+    rand: &mut dyn FnMut() -> f64,
+) -> Option<Vec<f64>> {
+    let (mut best_p, mut best_x) = (p, refined.to_vec());
+    let (mut cur_p, mut cur_x) = (p, refined.to_vec());
+    for _ in 0..50 {
+        let trial: Vec<_> =
+            cur_x.iter().map(|&v| v + (rand() - 0.5) * 0.2).collect();
+        if let Some(opt_x) = local_min(&trial, s, args, geo) {
+            let opt_p = penalty(&opt_x, s, args, geo);
+            if opt_p < best_p {
+                best_p = opt_p;
+                best_x = opt_x.clone();
+            }
+            if opt_p < cur_p || ((cur_p - opt_p) / 0.1).exp() > rand() {
+                cur_p = opt_p;
+                cur_x = opt_x;
+            }
+            if best_p < args.tolerance {
+                return Some(best_x);
+            }
         }
     }
+    None
+}
+
+fn render(args: &Args, geo: &Geometry, s: f64, v: &[f64]) {
+    let path = format!(
+        "{}_{}_in_{}.png",
+        args.inner_polygons, args.inner_sides, args.container_sides
+    );
+    let root = BitMapBackend::new(&path, (1024, 1024)).into_drawing_area();
+    root.fill(&WHITE).ok();
+    let mut chart = ChartBuilder::on(&root)
+        .build_cartesian_2d(-s..s, -s..s)
+        .unwrap();
 
-    (last_valid_s, last_valid_x)
+    let mut c_pts: Vec<_> = (0..args.container_sides)
+        .map(|i| {
+            let a = i as f64 * (2.0 * PI / args.container_sides as f64);
+            (a.cos() * s, a.sin() * s)
+        })
+        .collect();
+    c_pts.push(c_pts[0]); // Close outline
+    chart
+        .draw_series(std::iter::once(PathElement::new(c_pts, BLACK)))
+        .ok();
+
+    for i in 0..args.inner_polygons {
+        let (pos, rot) = (
+            Vector2::new(v[i * 3], v[i * 3 + 1]),
+            Rotation2::new(v[i * 3 + 2]),
+        );
+        let mut p_pts: Vec<_> = geo
+            .inner_verts
+            .iter()
+            .map(|&iv| {
+                let p = rot * iv + pos;
+                (p.x, p.y)
+            })
+            .collect();
+        chart
+            .draw_series(std::iter::once(Polygon::new(
+                p_pts.clone(),
+                RGBColor(204, 204, 204).filled(),
+            )))
+            .ok();
+        p_pts.push(p_pts[0]); // Close outline
+        chart
+            .draw_series(std::iter::once(PathElement::new(p_pts, BLACK)))
+            .ok();
+    }
 }
diff --git a/polygon-packer-rs/src/rust_specific.rs b/polygon-packer-rs/src/rust_specific.rs
index d6c7d10..26e53d7 100644
--- a/polygon-packer-rs/src/rust_specific.rs
+++ b/polygon-packer-rs/src/rust_specific.rs
@@ -6,7 +6,7 @@ use std::f64;
 
 /// change this type to `f32` be less precise but get faster,
 /// or `f64` to be more precise but get slower.
-pub type FloatType = f32;
+pub type FloatType = f64;
 
 /// an interface that defines PI for both 32 bit and 64 bit floats.
 pub trait AssocPI {

From 317af6113420aea49a0492ac0a61ecb9db524e72 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 11:23:44 +0200
Subject: [PATCH 11/15] Slight bump allocator tweaks

---
 polygon-packer-rs/Cargo.toml  |  25 +-
 polygon-packer-rs/src/main.rs | 515 ++++++++++++++++++++++------------
 2 files changed, 348 insertions(+), 192 deletions(-)

diff --git a/polygon-packer-rs/Cargo.toml b/polygon-packer-rs/Cargo.toml
index fae2a0b..906465a 100644
--- a/polygon-packer-rs/Cargo.toml
+++ b/polygon-packer-rs/Cargo.toml
@@ -5,22 +5,25 @@ edition = "2024"
 authors = ["Voxell Paladynee <voxellpaladynee@proton.me>"]
 
 [dependencies]
+# bump allocator for temporary arena allocations to reduce transient heap use
+bumpalo = { version = "3", features = ["collections"] } 
+# command line argument parsing
+clap = { version = "4.4", features = ["derive"] } 
+# linear algebra library for geometric computations
+nalgebra = "0.34.2" 
 # numeric library
-ndarray = "0.17.2"
+ndarray = "0.17.2" 
 # optimization library
-nlopt = "0.6.0"
+nlopt = "0.6.0" 
 # plot library
-plotters = "0.3"
+plotters = "0.3" 
 # parallelization
-rayon = "1.7"
-# command line argument parsing
-clap = { version = "4.4", features = ["derive"] }
-# Voxell's homegrown RNG library, used for pure-rust PCG family of random number
+rayon = "1.7" 
+# Voxell's homegrown RNG library, used for pure-rust PCG family of random number 
 # generators. https://www.pcg-random.org/index.html
-voxell_rng = "0.6.0"
-# bump allocator for temporary arena allocations to reduce transient heap use
-bumpalo = {version = "3", features = ["collections"]}
-nalgebra = "0.34.2"
+voxell_rng = "0.6.0" 
+# Voxell's homegrown timer library
+voxell_timer = "1.2.2" 
 
 [profile.release]
 # opt-level: 3 for full optimizations
diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
index af3192a..459f1f0 100644
--- a/polygon-packer-rs/src/main.rs
+++ b/polygon-packer-rs/src/main.rs
@@ -1,5 +1,7 @@
 use std::f64::consts::PI;
+use std::time::Duration;
 
+use bumpalo::Bump;
 use clap::Parser;
 use nalgebra::Rotation2;
 use nalgebra::Vector2;
@@ -9,15 +11,48 @@ use nlopt::Target;
 use plotters::prelude::*;
 use rayon::prelude::*;
 use voxell_rng::rng::pcg_advanced::pcg_64::PcgInnerState64;
+use voxell_timer::time_fn;
+
+// --- Constants ---
+const MAX_ITERATIONS: usize = 5000;
+const BH_STEPS: usize = 50;
+const BH_STEP_SIZE: f64 = 0.2;
+const BH_TEMP: f64 = 0.1;
+const VERIFY_EPS: f64 = 1e-9;
+const GRADIENT_EPS_REL: f64 = 1.0; // Scaled by f64::EPSILON.sqrt()
+
+#[derive(Parser, Debug, Clone)]
+pub struct Args {
+    pub inner_polygons: usize,
+    pub inner_sides: usize,
+    pub container_sides: usize,
+    #[arg(long, default_value_t = 1000)]
+    pub attempts: usize,
+    #[arg(long, default_value_t = 1e-8)]
+    pub tolerance: f64,
+    #[arg(long, default_value_t = 0.0001)]
+    pub finalstep: f64,
+}
 
 fn main() {
+    eprintln!(
+        "Polygon Packer - Packing regular polygons into a regular container"
+    );
     let args = Args::parse();
     let geo = Geometry::new(args.inner_sides, args.container_sides);
 
-    let results: Vec<(f64, Vec<f64>)> = (0..args.attempts)
-        .into_par_iter()
-        .map(|i| repetition(i, &args, &geo))
-        .collect();
+    println!(
+        "Starting packing with {} polygons ({} sides each) into a {}-sided container",
+        args.inner_polygons, args.inner_sides, args.container_sides
+    );
+    let (results, duration): (Vec<(f64, Vec<f64>)>, Duration) = time_fn(|| {
+        (0..args.attempts)
+            .into_par_iter()
+            .map(|i| repetition(i, &args, &geo))
+            .collect()
+    });
+
+    println!("Completed {} attempts in {:.2?}", args.attempts, duration);
 
     if let Some((best_s, best_v)) = results
         .into_iter()
@@ -32,23 +67,14 @@ fn main() {
     }
 }
 
-#[derive(Parser, Debug, Clone)]
-pub struct Args {
-    pub inner_polygons: usize,
-    pub inner_sides: usize,
-    pub container_sides: usize,
-    #[arg(long, default_value_t = 1000)]
-    pub attempts: usize,
-    #[arg(long, default_value_t = 1e-8)]
-    pub tolerance: f64,
-    #[arg(long, default_value_t = 0.0001)]
-    pub finalstep: f64,
-}
-
+/// SoA (Structure of Arrays) representation for geometry to aid cache locality
 struct Geometry {
-    inner_verts: Vec<Vector2<f64>>,
-    inner_axes: Vec<Vector2<f64>>,
-    cont_axes: Vec<Vector2<f64>>,
+    inner_verts_x: Vec<f64>,
+    inner_verts_y: Vec<f64>,
+    inner_axes_x: Vec<f64>,
+    inner_axes_y: Vec<f64>,
+    cont_axes_x: Vec<f64>,
+    cont_axes_y: Vec<f64>,
     apothem: f64,
 }
 
@@ -56,83 +82,132 @@ impl Geometry {
     fn new(nsi: usize, nsc: usize) -> Self {
         let i_angle = 2.0 * PI / nsi as f64;
         let c_angle = 2.0 * PI / nsc as f64;
-        let inner_verts = (0..nsi)
-            .map(|i| {
-                Vector2::new(
-                    (i as f64 * i_angle).cos(),
-                    (i as f64 * i_angle).sin(),
-                )
-            })
-            .collect();
-        let inner_axes = (0..nsi)
-            .map(|i| {
-                let a = i as f64 * i_angle + PI / nsi as f64;
-                Vector2::new(a.cos(), a.sin())
-            })
-            .collect();
-        let cont_axes = (0..nsc)
-            .map(|i| {
-                let a = i as f64 * c_angle + PI / nsc as f64;
-                Vector2::new(a.cos(), a.sin())
-            })
-            .collect();
+
+        let mut inner_verts_x = Vec::with_capacity(nsi);
+        let mut inner_verts_y = Vec::with_capacity(nsi);
+        for i in 0..nsi {
+            inner_verts_x.push((i as f64 * i_angle).cos());
+            inner_verts_y.push((i as f64 * i_angle).sin());
+        }
+
+        let mut inner_axes_x = Vec::with_capacity(nsi);
+        let mut inner_axes_y = Vec::with_capacity(nsi);
+        for i in 0..nsi {
+            let a = i as f64 * i_angle + PI / nsi as f64;
+            inner_axes_x.push(a.cos());
+            inner_axes_y.push(a.sin());
+        }
+
+        let mut cont_axes_x = Vec::with_capacity(nsc);
+        let mut cont_axes_y = Vec::with_capacity(nsc);
+        for i in 0..nsc {
+            let a = i as f64 * c_angle + PI / nsc as f64;
+            cont_axes_x.push(a.cos());
+            cont_axes_y.push(a.sin());
+        }
+
         Self {
-            inner_verts,
-            inner_axes,
-            cont_axes,
+            inner_verts_x,
+            inner_verts_y,
+            inner_axes_x,
+            inner_axes_y,
+            cont_axes_x,
+            cont_axes_y,
             apothem: (PI / nsc as f64).cos(),
         }
     }
 }
 
-fn penalty(v: &[f64], s: f64, args: &Args, geo: &Geometry) -> f64 {
+/// Scratchpad to hold temporary polygon data and reduce allocations.
+/// Using Bumpalo for fast, scoped arena allocation.
+struct ScratchPad<'a> {
+    // (x, y) vertices for each polygon, flattened: [poly0_vx0, poly0_vy0,
+    // poly0_vx1, ...]
+    poly_verts: &'a mut [f64],
+    // (x, y) axes for each polygon, flattened
+    poly_axes: &'a mut [f64],
+}
+
+impl<'a> ScratchPad<'a> {
+    fn new(bump: &'a Bump, n_polys: usize, n_sides: usize) -> Self {
+        let v_size = n_polys * n_sides * 2;
+        let a_size = n_polys * n_sides * 2;
+        Self {
+            poly_verts: bump.alloc_slice_fill_copy(v_size, 0.0),
+            poly_axes: bump.alloc_slice_fill_copy(a_size, 0.0),
+        }
+    }
+}
+
+fn penalty(
+    v: &[f64], s: f64, args: &Args, geo: &Geometry, pad: &mut ScratchPad,
+) -> f64 {
     let mut p = 0.0;
     let limit = geo.apothem * s;
+    let nsi = args.inner_sides;
 
-    // 1. Container Penalty (Poking)
+    // 1. Container Penalty & Fill Scratchpad (SoA-style transformation)
     for i in 0..args.inner_polygons {
-        let (pos, rot) = (
-            Vector2::new(v[i * 3], v[i * 3 + 1]),
-            Rotation2::new(v[i * 3 + 2]),
-        );
-        for iv in &geo.inner_verts {
-            let pt = rot * iv + pos;
-            for ax in &geo.cont_axes {
-                let dist = pt.dot(ax);
+        let (px, py, pr) = (v[i * 3], v[i * 3 + 1], v[i * 3 + 2]);
+        let (sin_r, cos_r) = pr.sin_cos();
+
+        for j in 0..nsi {
+            // Transform vertex
+            let vx = geo.inner_verts_x[j];
+            let vy = geo.inner_verts_y[j];
+            let tx = cos_r * vx - sin_r * vy + px;
+            let ty = sin_r * vx + cos_r * vy + py;
+
+            // Store in scratchpad for SAT phase
+            pad.poly_verts[(i * nsi + j) * 2] = tx;
+            pad.poly_verts[(i * nsi + j) * 2 + 1] = ty;
+
+            // Container check
+            for k in 0..geo.cont_axes_x.len() {
+                let dist = tx * geo.cont_axes_x[k] + ty * geo.cont_axes_y[k];
                 if dist > limit {
                     p += (dist - limit).powi(2);
                 }
             }
         }
+
+        // Transform axes for SAT
+        for j in 0..nsi {
+            let ax = geo.inner_axes_x[j];
+            let ay = geo.inner_axes_y[j];
+            pad.poly_axes[(i * nsi + j) * 2] = cos_r * ax - sin_r * ay;
+            pad.poly_axes[(i * nsi + j) * 2 + 1] = sin_r * ax + cos_r * ay;
+        }
     }
 
     // 2. Pairwise Collision Penalty (SAT)
-    let polys: Vec<_> = (0..args.inner_polygons)
-        .map(|i| {
-            let (pos, rot) = (
-                Vector2::new(v[i * 3], v[i * 3 + 1]),
-                Rotation2::new(v[i * 3 + 2]),
-            );
-            (
-                geo.inner_verts
-                    .iter()
-                    .map(|&iv| rot * iv + pos)
-                    .collect::<Vec<_>>(),
-                geo.inner_axes
-                    .iter()
-                    .map(|&ia| rot * ia)
-                    .collect::<Vec<_>>(),
-            )
-        })
-        .collect();
-
     for i in 0..args.inner_polygons {
+        let i_off = i * nsi * 2;
         for j in i + 1..args.inner_polygons {
+            let j_off = j * nsi * 2;
             let mut min_overlap = 1e20;
             let mut collision = true;
-            for ax in polys[i].1.iter().chain(polys[j].1.iter()) {
-                let (min1, max1) = project(&polys[i].0, ax);
-                let (min2, max2) = project(&polys[j].0, ax);
+
+            // Check axes of both polygons
+            for k in 0..(nsi * 2) {
+                let (ax, ay) = if k < nsi {
+                    (
+                        pad.poly_axes[i_off + k * 2],
+                        pad.poly_axes[i_off + k * 2 + 1],
+                    )
+                } else {
+                    let k2 = k - nsi;
+                    (
+                        pad.poly_axes[j_off + k2 * 2],
+                        pad.poly_axes[j_off + k2 * 2 + 1],
+                    )
+                };
+
+                let (min1, max1) =
+                    project_soa(pad.poly_verts, i_off, nsi, ax, ay);
+                let (min2, max2) =
+                    project_soa(pad.poly_verts, j_off, nsi, ax, ay);
+
                 let overlap = max1.min(max2) - min1.max(min2);
                 if overlap <= 0.0 {
                     collision = false;
@@ -142,6 +217,7 @@ fn penalty(v: &[f64], s: f64, args: &Args, geo: &Geometry) -> f64 {
                     min_overlap = overlap;
                 }
             }
+
             if collision {
                 p += min_overlap * min_overlap;
             }
@@ -150,136 +226,107 @@ fn penalty(v: &[f64], s: f64, args: &Args, geo: &Geometry) -> f64 {
     p
 }
 
-fn project(verts: &[Vector2<f64>], ax: &Vector2<f64>) -> (f64, f64) {
-    verts
-        .iter()
-        .map(|v| v.dot(ax))
-        .fold((f64::MAX, f64::MIN), |(a, b), d| (a.min(d), b.max(d)))
+#[inline(always)]
+fn project_soa(
+    verts: &[f64], offset: usize, n: usize, ax: f64, ay: f64,
+) -> (f64, f64) {
+    let mut min = f64::MAX;
+    let mut max = f64::MIN;
+    for i in 0..n {
+        let dot = verts[offset + i * 2] * ax + verts[offset + i * 2 + 1] * ay;
+        if dot < min {
+            min = dot;
+        }
+        if dot > max {
+            max = dot;
+        }
+    }
+    (min, max)
 }
 
-/// Matches SciPy's numerical gradient calculation (Forward Difference)
 fn local_min(
-    x0: &[f64], s: f64, args: &Args, geo: &Geometry,
+    x0: &[f64], s: f64, args: &Args, geo: &Geometry, bump: &mut Bump,
 ) -> Option<Vec<f64>> {
     let mut x = x0.to_vec();
-    let obj = |xx: &[f64], grad: Option<&mut [f64]>, _: &mut ()| -> f64 {
-        let f0 = penalty(xx, s, args, geo);
+    // Reuse caller-provided arena to avoid allocating an arena per call.
+    bump.reset();
+
+    struct OptCtx<'a> {
+        pad: ScratchPad<'a>,
+        x_tmp: Vec<f64>,
+    }
+
+    let ctx = OptCtx {
+        pad: ScratchPad::new(&*bump, args.inner_polygons, args.inner_sides),
+        x_tmp: x0.to_vec(),
+    };
+
+    let obj = |xx: &[f64], grad: Option<&mut [f64]>, ctx: &mut OptCtx| -> f64 {
+        let f0 = penalty(xx, s, args, geo, &mut ctx.pad);
         if let Some(g) = grad {
-            // SciPy uses a small epsilon relative to the scale of the value
             let eps = f64::EPSILON.sqrt();
-            let mut x_tmp = xx.to_vec();
+            ctx.x_tmp.copy_from_slice(xx);
             for i in 0..xx.len() {
-                let step = eps * (1.0 + xx[i].abs());
-                x_tmp[i] += step;
-                let f1 = penalty(&x_tmp, s, args, geo);
+                let step = eps * (GRADIENT_EPS_REL + xx[i].abs());
+                ctx.x_tmp[i] += step;
+                let f1 = penalty(&ctx.x_tmp, s, args, geo, &mut ctx.pad);
                 g[i] = (f1 - f0) / step;
-                x_tmp[i] = xx[i]; // Reset for next dimension
+                ctx.x_tmp[i] = xx[i];
             }
         }
         f0
     };
 
-    let mut opt =
-        Nlopt::new(Algorithm::Lbfgs, x0.len(), obj, Target::Minimize, ());
+    let mut opt = Nlopt::new(Algorithm::Lbfgs, x0.len(), obj, Target::Minimize, ctx);
 
-    // Mapping SciPy's 'tol' to NLopt tolerances
     opt.set_ftol_rel(args.tolerance).ok();
     opt.set_xtol_rel(args.tolerance).ok();
-    opt.set_maxeval(5000).ok(); // Standard limit to prevent infinite loops
+    // Use the cast as requested
+    opt.set_maxeval(MAX_ITERATIONS as _).ok();
 
     match opt.optimize(&mut x) {
         Ok(_) => Some(x),
         Err(_) => None,
     }
 }
-
-fn verify_results(v: &[f64], s: f64, args: &Args, geo: &Geometry) {
-    println!("--- Verification Pass ---");
-    let limit = geo.apothem * s;
-    let mut overlap_count = 0;
-    let mut out_of_bounds = 0;
-
-    let polys: Vec<_> = (0..args.inner_polygons)
-        .map(|i| {
-            let (pos, rot) = (
-                Vector2::new(v[i * 3], v[i * 3 + 1]),
-                Rotation2::new(v[i * 3 + 2]),
-            );
-            (
-                geo.inner_verts
-                    .iter()
-                    .map(|&iv| rot * iv + pos)
-                    .collect::<Vec<_>>(),
-                geo.inner_axes
-                    .iter()
-                    .map(|&ia| rot * ia)
-                    .collect::<Vec<_>>(),
-            )
-        })
-        .collect();
-
-    for (i, verts) in polys.iter().map(|x| &x.0).enumerate() {
-        if verts
-            .iter()
-            .any(|pt| geo.cont_axes.iter().any(|ax| pt.dot(ax) > limit + 1e-9))
-        {
-            out_of_bounds += 1;
-        }
-        for (j, ()) in polys.iter().map(|_x| ()).enumerate().skip(i + 1) {
-            let mut collision = true;
-            for ax in polys[i].1.iter().chain(polys[j].1.iter()) {
-                let (min1, max1) = project(&polys[i].0, ax);
-                let (min2, max2) = project(&polys[j].0, ax);
-                if max1.min(max2) - min1.max(min2) <= 1e-9 {
-                    collision = false;
-                    break;
-                }
-            }
-            if collision {
-                overlap_count += 1;
-            }
-        }
-    }
-    println!(
-        "Out of bounds: {}, Pairwise overlaps: {}",
-        out_of_bounds, overlap_count
-    );
-}
-
 fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
     let mut rng = PcgInnerState64::mcg_seeded(seed as u64);
-    let mut rand = || {
-        (((rng.mcg_xsh_rs() as u64) << 32) | rng.mcg_xsh_rs() as u64) as f64
-            / u64::MAX as f64
-    };
+    eprintln!(
+        "Starting attempt {} with seed {}",
+        seed,
+        rng.oneseq_rxs_m_xs()
+    );
+    let mut rand = || rng.oneseq_rxs_m_xs() as f64 / u64::MAX as f64;
+
     let mut s = (args.inner_polygons as f64).sqrt() * (2.0 + rand() * 2.0);
     let initial_s = s;
     let mut x = vec![0.0; args.inner_polygons * 3];
 
+    // Initial positioning
     if rand() < 0.5 {
         x.iter_mut().for_each(|v| *v = (rand() - 0.5) * s);
     } else {
         let gc = (args.inner_polygons as f64).sqrt().ceil() as usize;
         let span = s * 0.45;
         for i in 0..args.inner_polygons {
-            x[i * 3] = if gc > 1 {
-                (i % gc) as f64 * (2.0 * span / (gc - 1) as f64) - span
-            } else {
-                0.0
-            };
-            x[i * 3 + 1] = if gc > 1 {
-                (i / gc) as f64 * (2.0 * span / (gc - 1) as f64) - span
+            let step = if gc > 1 {
+                (2.0 * span / (gc - 1) as f64)
             } else {
                 0.0
             };
+            x[i * 3] = (i % gc) as f64 * step - span;
+            x[i * 3 + 1] = (i / gc) as f64 * step - span;
             x[i * 3 + 2] = rand() * 2.0 * PI;
         }
     }
 
+    let mut bump = Bump::new();
     let (mut last_x, mut last_s) = (x.clone(), s);
+
     loop {
-        if let Some(refined) = local_min(&x, s, args, geo) {
-            let p = penalty(&refined, s, args, geo);
+        if let Some(refined) = local_min(&x, s, args, geo, &mut bump) {
+            let mut pad = ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
+            let p = penalty(&refined, s, args, geo, &mut pad);
             if p < args.tolerance {
                 last_x = refined.clone();
                 last_s = s;
@@ -290,7 +337,13 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
                     -((0.01 - args.finalstep) / (initial_s - base)),
                     1.0 - args.finalstep,
                 );
-                x = refined.iter().map(|v| v * m).collect();
+                // FIXED: Only scale positions (indices 0 and 1 mod 3), NOT
+                // rotations (index 2 mod 3)
+                for i in 0..args.inner_polygons {
+                    x[i * 3] = refined[i * 3] * m;
+                    x[i * 3 + 1] = refined[i * 3 + 1] * m;
+                    x[i * 3 + 2] = refined[i * 3 + 2]; // Rotation remains constant
+                }
                 s *= m;
                 continue;
             }
@@ -304,7 +357,15 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
                     -((0.01 - args.finalstep) / (initial_s - base)),
                     1.0 - args.finalstep,
                 );
-                x = bh.iter().map(|v| v * m).collect();
+                // --- FIX APPLIED HERE ---
+                // Change this: x = bh.iter().map(|v| v * m).collect();
+                // To this:
+                for i in 0..args.inner_polygons {
+                    x[i * 3] = bh[i * 3] * m;
+                    x[i * 3 + 1] = bh[i * 3 + 1] * m;
+                    x[i * 3 + 2] = bh[i * 3 + 2]; // Keep rotation intact!
+                }
+                // ------------------------
                 s *= m;
                 continue;
             }
@@ -320,16 +381,21 @@ fn run_bh(
 ) -> Option<Vec<f64>> {
     let (mut best_p, mut best_x) = (p, refined.to_vec());
     let (mut cur_p, mut cur_x) = (p, refined.to_vec());
-    for _ in 0..50 {
-        let trial: Vec<_> =
-            cur_x.iter().map(|&v| v + (rand() - 0.5) * 0.2).collect();
-        if let Some(opt_x) = local_min(&trial, s, args, geo) {
-            let opt_p = penalty(&opt_x, s, args, geo);
+    let mut bump = Bump::new();
+
+    for _ in 0..BH_STEPS {
+        let trial: Vec<_> = cur_x
+            .iter()
+            .map(|&v| v + (rand() - 0.5) * BH_STEP_SIZE)
+            .collect();
+        if let Some(opt_x) = local_min(&trial, s, args, geo, &mut bump) {
+            let mut pad = ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
+            let opt_p = penalty(&opt_x, s, args, geo, &mut pad);
             if opt_p < best_p {
                 best_p = opt_p;
                 best_x = opt_x.clone();
             }
-            if opt_p < cur_p || ((cur_p - opt_p) / 0.1).exp() > rand() {
+            if opt_p < cur_p || ((cur_p - opt_p) / BH_TEMP).exp() > rand() {
                 cur_p = opt_p;
                 cur_x = opt_x;
             }
@@ -341,6 +407,92 @@ fn run_bh(
     None
 }
 
+fn verify_results(v: &[f64], s: f64, args: &Args, geo: &Geometry) {
+    println!("--- Verification Pass ---");
+    let bump = Bump::new();
+    let mut pad = ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
+    let limit = geo.apothem * s;
+    let mut overlap_count = 0;
+    let mut out_of_bounds = 0;
+    let nsi = args.inner_sides;
+
+    // 1. Fully populate the scratchpad FIRST
+    for i in 0..args.inner_polygons {
+        let (px, py, pr) = (v[i * 3], v[i * 3 + 1], v[i * 3 + 2]);
+        let (sin_r, cos_r) = pr.sin_cos();
+        for j in 0..nsi {
+            let tx = cos_r * geo.inner_verts_x[j]
+                - sin_r * geo.inner_verts_y[j]
+                + px;
+            let ty = sin_r * geo.inner_verts_x[j]
+                + cos_r * geo.inner_verts_y[j]
+                + py;
+            pad.poly_verts[(i * nsi + j) * 2] = tx;
+            pad.poly_verts[(i * nsi + j) * 2 + 1] = ty;
+        }
+        for j in 0..nsi {
+            let ax = geo.inner_axes_x[j];
+            let ay = geo.inner_axes_y[j];
+            pad.poly_axes[(i * nsi + j) * 2] = cos_r * ax - sin_r * ay;
+            pad.poly_axes[(i * nsi + j) * 2 + 1] = sin_r * ax + cos_r * ay;
+        }
+    }
+
+    // 2. Perform verification checks on the complete data
+    for i in 0..args.inner_polygons {
+        let mut is_out = false;
+        for j in 0..nsi {
+            let tx = pad.poly_verts[(i * nsi + j) * 2];
+            let ty = pad.poly_verts[(i * nsi + j) * 2 + 1];
+            if geo
+                .cont_axes_x
+                .iter()
+                .zip(&geo.cont_axes_y)
+                .any(|(&ax, &ay)| tx * ax + ty * ay > limit + VERIFY_EPS)
+            {
+                is_out = true;
+                break;
+            }
+        }
+        if is_out {
+            out_of_bounds += 1;
+        }
+
+        for j in i + 1..args.inner_polygons {
+            let mut collision = true;
+            for k in 0..(nsi * 2) {
+                let (ax, ay) = if k < nsi {
+                    (
+                        pad.poly_axes[(i * nsi + k) * 2],
+                        pad.poly_axes[(i * nsi + k) * 2 + 1],
+                    )
+                } else {
+                    let k2 = k - nsi;
+                    (
+                        pad.poly_axes[(j * nsi + k2) * 2],
+                        pad.poly_axes[(j * nsi + k2) * 2 + 1],
+                    )
+                };
+                let (min1, max1) =
+                    project_soa(pad.poly_verts, i * nsi * 2, nsi, ax, ay);
+                let (min2, max2) =
+                    project_soa(pad.poly_verts, j * nsi * 2, nsi, ax, ay);
+                if max1.min(max2) - min1.max(min2) <= VERIFY_EPS {
+                    collision = false;
+                    break;
+                }
+            }
+            if collision {
+                overlap_count += 1;
+            }
+        }
+    }
+    println!(
+        "Out of bounds: {}, Pairwise overlaps: {}",
+        out_of_bounds, overlap_count
+    );
+}
+
 fn render(args: &Args, geo: &Geometry, s: f64, v: &[f64]) {
     let path = format!(
         "{}_{}_in_{}.png",
@@ -358,22 +510,23 @@ fn render(args: &Args, geo: &Geometry, s: f64, v: &[f64]) {
             (a.cos() * s, a.sin() * s)
         })
         .collect();
-    c_pts.push(c_pts[0]); // Close outline
+    c_pts.push(c_pts[0]);
     chart
         .draw_series(std::iter::once(PathElement::new(c_pts, BLACK)))
         .ok();
 
     for i in 0..args.inner_polygons {
-        let (pos, rot) = (
-            Vector2::new(v[i * 3], v[i * 3 + 1]),
-            Rotation2::new(v[i * 3 + 2]),
-        );
-        let mut p_pts: Vec<_> = geo
-            .inner_verts
-            .iter()
-            .map(|&iv| {
-                let p = rot * iv + pos;
-                (p.x, p.y)
+        let (px, py, pr) = (v[i * 3], v[i * 3 + 1], v[i * 3 + 2]);
+        let (sin_r, cos_r) = pr.sin_cos();
+        let mut p_pts: Vec<_> = (0..args.inner_sides)
+            .map(|j| {
+                let tx = cos_r * geo.inner_verts_x[j]
+                    - sin_r * geo.inner_verts_y[j]
+                    + px;
+                let ty = sin_r * geo.inner_verts_x[j]
+                    + cos_r * geo.inner_verts_y[j]
+                    + py;
+                (tx, ty)
             })
             .collect();
         chart
@@ -382,7 +535,7 @@ fn render(args: &Args, geo: &Geometry, s: f64, v: &[f64]) {
                 RGBColor(204, 204, 204).filled(),
             )))
             .ok();
-        p_pts.push(p_pts[0]); // Close outline
+        p_pts.push(p_pts[0]);
         chart
             .draw_series(std::iter::once(PathElement::new(p_pts, BLACK)))
             .ok();

From 7867f8d542b5f129d6d0557939db05ab68e47451 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 11:30:40 +0200
Subject: [PATCH 12/15] Remove unused import, formatting

---
 polygon-packer-rs/src/main.rs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
index 459f1f0..c90da37 100644
--- a/polygon-packer-rs/src/main.rs
+++ b/polygon-packer-rs/src/main.rs
@@ -3,8 +3,6 @@ use std::time::Duration;
 
 use bumpalo::Bump;
 use clap::Parser;
-use nalgebra::Rotation2;
-use nalgebra::Vector2;
 use nlopt::Algorithm;
 use nlopt::Nlopt;
 use nlopt::Target;
@@ -277,7 +275,8 @@ fn local_min(
         f0
     };
 
-    let mut opt = Nlopt::new(Algorithm::Lbfgs, x0.len(), obj, Target::Minimize, ctx);
+    let mut opt =
+        Nlopt::new(Algorithm::Lbfgs, x0.len(), obj, Target::Minimize, ctx);
 
     opt.set_ftol_rel(args.tolerance).ok();
     opt.set_xtol_rel(args.tolerance).ok();
@@ -325,7 +324,8 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
 
     loop {
         if let Some(refined) = local_min(&x, s, args, geo, &mut bump) {
-            let mut pad = ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
+            let mut pad =
+                ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
             let p = penalty(&refined, s, args, geo, &mut pad);
             if p < args.tolerance {
                 last_x = refined.clone();
@@ -389,7 +389,8 @@ fn run_bh(
             .map(|&v| v + (rand() - 0.5) * BH_STEP_SIZE)
             .collect();
         if let Some(opt_x) = local_min(&trial, s, args, geo, &mut bump) {
-            let mut pad = ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
+            let mut pad =
+                ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
             let opt_p = penalty(&opt_x, s, args, geo, &mut pad);
             if opt_p < best_p {
                 best_p = opt_p;

From 564aa5d2d633c3fa18cff52b340e310ffc721a97 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 11:53:32 +0200
Subject: [PATCH 13/15] More linter passes, remove bad comments, add cli helps
 back, no functional change

---
 polygon-packer-rs/src/main.rs          | 59 +++++++++++---------------
 polygon-packer-rs/src/rust_specific.rs | 24 -----------
 2 files changed, 24 insertions(+), 59 deletions(-)
 delete mode 100644 polygon-packer-rs/src/rust_specific.rs

diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
index c90da37..b2af994 100644
--- a/polygon-packer-rs/src/main.rs
+++ b/polygon-packer-rs/src/main.rs
@@ -1,4 +1,8 @@
+// changing `a * b + c` to `a.mul_add(b, c)` literally leads to more
+// verification errors, so we don't do that.
+#![allow(clippy::suboptimal_flops)]
 use std::f64::consts::PI;
+use std::iter;
 use std::time::Duration;
 
 use bumpalo::Bump;
@@ -11,23 +15,29 @@ use rayon::prelude::*;
 use voxell_rng::rng::pcg_advanced::pcg_64::PcgInnerState64;
 use voxell_timer::time_fn;
 
-// --- Constants ---
 const MAX_ITERATIONS: usize = 5000;
 const BH_STEPS: usize = 50;
 const BH_STEP_SIZE: f64 = 0.2;
 const BH_TEMP: f64 = 0.1;
 const VERIFY_EPS: f64 = 1e-9;
-const GRADIENT_EPS_REL: f64 = 1.0; // Scaled by f64::EPSILON.sqrt()
+const GRADIENT_EPS_REL: f64 = 1.0;
 
 #[derive(Parser, Debug, Clone)]
 pub struct Args {
+    /// Number of inner polygons
     pub inner_polygons: usize,
+    /// Number of sides of the inner polygons
     pub inner_sides: usize,
+    /// Number of sides of the container polygon
     pub container_sides: usize,
+    /// Number of attempts to run
     #[arg(long, default_value_t = 1000)]
     pub attempts: usize,
+    /// Overlap penalty tolerance. Probably best left at default.
     #[arg(long, default_value_t = 1e-8)]
     pub tolerance: f64,
+    /// How small the last theoretical step in container size decrease will be
+    /// (it gets smaller over time)
     #[arg(long, default_value_t = 0.0001)]
     pub finalstep: f64,
 }
@@ -65,7 +75,6 @@ fn main() {
     }
 }
 
-/// SoA (Structure of Arrays) representation for geometry to aid cache locality
 struct Geometry {
     inner_verts_x: Vec<f64>,
     inner_verts_y: Vec<f64>,
@@ -116,11 +125,9 @@ impl Geometry {
     }
 }
 
-/// Scratchpad to hold temporary polygon data and reduce allocations.
-/// Using Bumpalo for fast, scoped arena allocation.
 struct ScratchPad<'a> {
-    // (x, y) vertices for each polygon, flattened: [poly0_vx0, poly0_vy0,
-    // poly0_vx1, ...]
+    // (x, y) vertices for each polygon,
+    // flattened: [poly0_vx0, poly0_vy0, poly0_vx1, ...]
     poly_verts: &'a mut [f64],
     // (x, y) axes for each polygon, flattened
     poly_axes: &'a mut [f64],
@@ -144,23 +151,19 @@ fn penalty(
     let limit = geo.apothem * s;
     let nsi = args.inner_sides;
 
-    // 1. Container Penalty & Fill Scratchpad (SoA-style transformation)
     for i in 0..args.inner_polygons {
         let (px, py, pr) = (v[i * 3], v[i * 3 + 1], v[i * 3 + 2]);
         let (sin_r, cos_r) = pr.sin_cos();
 
         for j in 0..nsi {
-            // Transform vertex
             let vx = geo.inner_verts_x[j];
             let vy = geo.inner_verts_y[j];
             let tx = cos_r * vx - sin_r * vy + px;
             let ty = sin_r * vx + cos_r * vy + py;
 
-            // Store in scratchpad for SAT phase
             pad.poly_verts[(i * nsi + j) * 2] = tx;
             pad.poly_verts[(i * nsi + j) * 2 + 1] = ty;
 
-            // Container check
             for k in 0..geo.cont_axes_x.len() {
                 let dist = tx * geo.cont_axes_x[k] + ty * geo.cont_axes_y[k];
                 if dist > limit {
@@ -169,7 +172,6 @@ fn penalty(
             }
         }
 
-        // Transform axes for SAT
         for j in 0..nsi {
             let ax = geo.inner_axes_x[j];
             let ay = geo.inner_axes_y[j];
@@ -178,7 +180,6 @@ fn penalty(
         }
     }
 
-    // 2. Pairwise Collision Penalty (SAT)
     for i in 0..args.inner_polygons {
         let i_off = i * nsi * 2;
         for j in i + 1..args.inner_polygons {
@@ -186,7 +187,6 @@ fn penalty(
             let mut min_overlap = 1e20;
             let mut collision = true;
 
-            // Check axes of both polygons
             for k in 0..(nsi * 2) {
                 let (ax, ay) = if k < nsi {
                     (
@@ -246,7 +246,6 @@ fn local_min(
     x0: &[f64], s: f64, args: &Args, geo: &Geometry, bump: &mut Bump,
 ) -> Option<Vec<f64>> {
     let mut x = x0.to_vec();
-    // Reuse caller-provided arena to avoid allocating an arena per call.
     bump.reset();
 
     struct OptCtx<'a> {
@@ -280,7 +279,6 @@ fn local_min(
 
     opt.set_ftol_rel(args.tolerance).ok();
     opt.set_xtol_rel(args.tolerance).ok();
-    // Use the cast as requested
     opt.set_maxeval(MAX_ITERATIONS as _).ok();
 
     match opt.optimize(&mut x) {
@@ -301,7 +299,6 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
     let initial_s = s;
     let mut x = vec![0.0; args.inner_polygons * 3];
 
-    // Initial positioning
     if rand() < 0.5 {
         x.iter_mut().for_each(|v| *v = (rand() - 0.5) * s);
     } else {
@@ -309,7 +306,7 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
         let span = s * 0.45;
         for i in 0..args.inner_polygons {
             let step = if gc > 1 {
-                (2.0 * span / (gc - 1) as f64)
+                2.0 * span / (gc - 1) as f64
             } else {
                 0.0
             };
@@ -328,7 +325,7 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
                 ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
             let p = penalty(&refined, s, args, geo, &mut pad);
             if p < args.tolerance {
-                last_x = refined.clone();
+                last_x.clone_from(&refined);
                 last_s = s;
                 let base = (args.inner_polygons as f64).sqrt()
                     * args.inner_sides as f64
@@ -337,18 +334,16 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
                     -((0.01 - args.finalstep) / (initial_s - base)),
                     1.0 - args.finalstep,
                 );
-                // FIXED: Only scale positions (indices 0 and 1 mod 3), NOT
-                // rotations (index 2 mod 3)
                 for i in 0..args.inner_polygons {
                     x[i * 3] = refined[i * 3] * m;
                     x[i * 3 + 1] = refined[i * 3 + 1] * m;
-                    x[i * 3 + 2] = refined[i * 3 + 2]; // Rotation remains constant
+                    x[i * 3 + 2] = refined[i * 3 + 2];
                 }
                 s *= m;
                 continue;
             }
             if let Some(bh) = run_bh(&refined, p, s, args, geo, &mut rand) {
-                last_x = bh.clone();
+                last_x.clone_from(&bh);
                 last_s = s;
                 let base = (args.inner_polygons as f64).sqrt()
                     * args.inner_sides as f64
@@ -357,15 +352,11 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
                     -((0.01 - args.finalstep) / (initial_s - base)),
                     1.0 - args.finalstep,
                 );
-                // --- FIX APPLIED HERE ---
-                // Change this: x = bh.iter().map(|v| v * m).collect();
-                // To this:
                 for i in 0..args.inner_polygons {
                     x[i * 3] = bh[i * 3] * m;
                     x[i * 3 + 1] = bh[i * 3 + 1] * m;
-                    x[i * 3 + 2] = bh[i * 3 + 2]; // Keep rotation intact!
+                    x[i * 3 + 2] = bh[i * 3 + 2];
                 }
-                // ------------------------
                 s *= m;
                 continue;
             }
@@ -394,7 +385,7 @@ fn run_bh(
             let opt_p = penalty(&opt_x, s, args, geo, &mut pad);
             if opt_p < best_p {
                 best_p = opt_p;
-                best_x = opt_x.clone();
+                best_x.clone_from(&opt_x);
             }
             if opt_p < cur_p || ((cur_p - opt_p) / BH_TEMP).exp() > rand() {
                 cur_p = opt_p;
@@ -411,13 +402,12 @@ fn run_bh(
 fn verify_results(v: &[f64], s: f64, args: &Args, geo: &Geometry) {
     println!("--- Verification Pass ---");
     let bump = Bump::new();
-    let mut pad = ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
+    let pad = ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
     let limit = geo.apothem * s;
     let mut overlap_count = 0;
     let mut out_of_bounds = 0;
     let nsi = args.inner_sides;
 
-    // 1. Fully populate the scratchpad FIRST
     for i in 0..args.inner_polygons {
         let (px, py, pr) = (v[i * 3], v[i * 3 + 1], v[i * 3 + 2]);
         let (sin_r, cos_r) = pr.sin_cos();
@@ -439,7 +429,6 @@ fn verify_results(v: &[f64], s: f64, args: &Args, geo: &Geometry) {
         }
     }
 
-    // 2. Perform verification checks on the complete data
     for i in 0..args.inner_polygons {
         let mut is_out = false;
         for j in 0..nsi {
@@ -513,7 +502,7 @@ fn render(args: &Args, geo: &Geometry, s: f64, v: &[f64]) {
         .collect();
     c_pts.push(c_pts[0]);
     chart
-        .draw_series(std::iter::once(PathElement::new(c_pts, BLACK)))
+        .draw_series(iter::once(PathElement::new(c_pts, BLACK)))
         .ok();
 
     for i in 0..args.inner_polygons {
@@ -531,14 +520,14 @@ fn render(args: &Args, geo: &Geometry, s: f64, v: &[f64]) {
             })
             .collect();
         chart
-            .draw_series(std::iter::once(Polygon::new(
+            .draw_series(iter::once(Polygon::new(
                 p_pts.clone(),
                 RGBColor(204, 204, 204).filled(),
             )))
             .ok();
         p_pts.push(p_pts[0]);
         chart
-            .draw_series(std::iter::once(PathElement::new(p_pts, BLACK)))
+            .draw_series(iter::once(PathElement::new(p_pts, BLACK)))
             .ok();
     }
 }
diff --git a/polygon-packer-rs/src/rust_specific.rs b/polygon-packer-rs/src/rust_specific.rs
deleted file mode 100644
index 26e53d7..0000000
--- a/polygon-packer-rs/src/rust_specific.rs
+++ /dev/null
@@ -1,24 +0,0 @@
-//! this module handles rust-specific things like
-//! hot-swapping the float type to squeeze out more perf.
-
-use std::f32;
-use std::f64;
-
-/// change this type to `f32` be less precise but get faster,
-/// or `f64` to be more precise but get slower.
-pub type FloatType = f64;
-
-/// an interface that defines PI for both 32 bit and 64 bit floats.
-pub trait AssocPI {
-    const PI: Self;
-}
-
-// associated PI constant for f32
-impl AssocPI for f32 {
-    const PI: f32 = f32::consts::PI;
-}
-
-// associated PI constant for f64
-impl AssocPI for f64 {
-    const PI: f64 = f64::consts::PI;
-}

From 948609a5add7462eb806647c69033ed35ae79692 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 12:44:28 +0200
Subject: [PATCH 14/15] C1 continuity, gradient changes

---
 polygon-packer-rs/src/main.rs | 62 ++++++++++++++++++++++++++---------
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
index b2af994..8984336 100644
--- a/polygon-packer-rs/src/main.rs
+++ b/polygon-packer-rs/src/main.rs
@@ -20,7 +20,10 @@ const BH_STEPS: usize = 50;
 const BH_STEP_SIZE: f64 = 0.2;
 const BH_TEMP: f64 = 0.1;
 const VERIFY_EPS: f64 = 1e-9;
-const GRADIENT_EPS_REL: f64 = 1.0;
+// const GRADIENT_EPS_REL: f64 = 1.0;
+const ABS_GRADIENT_STEP: f64 = 1e-8;
+// enable for C1-continuity issues
+const CENTRAL_DIFF: bool = true;
 
 #[derive(Parser, Debug, Clone)]
 pub struct Args {
@@ -261,13 +264,34 @@ fn local_min(
     let obj = |xx: &[f64], grad: Option<&mut [f64]>, ctx: &mut OptCtx| -> f64 {
         let f0 = penalty(xx, s, args, geo, &mut ctx.pad);
         if let Some(g) = grad {
-            let eps = f64::EPSILON.sqrt();
+            // let eps = f64::EPSILON.sqrt();
             ctx.x_tmp.copy_from_slice(xx);
             for i in 0..xx.len() {
-                let step = eps * (GRADIENT_EPS_REL + xx[i].abs());
-                ctx.x_tmp[i] += step;
-                let f1 = penalty(&ctx.x_tmp, s, args, geo, &mut ctx.pad);
-                g[i] = (f1 - f0) / step;
+                // let step = eps * (GRADIENT_EPS_REL + xx[i].abs());
+                // ctx.x_tmp[i] += step;
+                // let f1 = penalty(&ctx.x_tmp, s, args, geo, &mut ctx.pad);
+                // g[i] = (f1 - f0) / step;
+                // ctx.x_tmp[i] = xx[i];
+
+                // let h = ABS_GRADIENT_STEP;
+                // ctx.x_tmp[i] = xx[i] + h;
+                // let fp = penalty(&ctx.x_tmp, s, args, geo, &mut ctx.pad);
+                // ctx.x_tmp[i] = xx[i] - h;
+                // let fm = penalty(&ctx.x_tmp, s, args, geo, &mut ctx.pad);
+                // g[i] = (fp - fm) / (2.0 * h);
+                // ctx.x_tmp[i] = xx[i];
+
+                let h = ABS_GRADIENT_STEP;
+                ctx.x_tmp[i] = xx[i] + h;
+                if CENTRAL_DIFF {
+                    let fp = penalty(&ctx.x_tmp, s, args, geo, &mut ctx.pad);
+                    ctx.x_tmp[i] = xx[i] - h;
+                    let fm = penalty(&ctx.x_tmp, s, args, geo, &mut ctx.pad);
+                    g[i] = (fp - fm) / (2.0 * h);
+                } else {
+                    let f1 = penalty(&ctx.x_tmp, s, args, geo, &mut ctx.pad);
+                    g[i] = (f1 - f0) / h;
+                }
                 ctx.x_tmp[i] = xx[i];
             }
         }
@@ -287,7 +311,7 @@ fn local_min(
     }
 }
 fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
-    let mut rng = PcgInnerState64::mcg_seeded(seed as u64);
+    let mut rng = PcgInnerState64::oneseq_seeded(seed as u64);
     eprintln!(
         "Starting attempt {} with seed {}",
         seed,
@@ -330,10 +354,14 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
                 let base = (args.inner_polygons as f64).sqrt()
                     * args.inner_sides as f64
                     / args.container_sides as f64;
-                let m = (s - base).mul_add(
-                    -((0.01 - args.finalstep) / (initial_s - base)),
-                    1.0 - args.finalstep,
-                );
+                // let m = (s - base).mul_add(
+                //     -((0.01 - args.finalstep) / (initial_s - base)),
+                //     1.0 - args.finalstep,
+                // );
+                let diff_s = s - base;
+                let range_s = initial_s - base;
+                let step_ratio = (0.01 - args.finalstep) / range_s;
+                let m = (1.0 - args.finalstep) - (diff_s * step_ratio);
                 for i in 0..args.inner_polygons {
                     x[i * 3] = refined[i * 3] * m;
                     x[i * 3 + 1] = refined[i * 3 + 1] * m;
@@ -348,10 +376,14 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
                 let base = (args.inner_polygons as f64).sqrt()
                     * args.inner_sides as f64
                     / args.container_sides as f64;
-                let m = (s - base).mul_add(
-                    -((0.01 - args.finalstep) / (initial_s - base)),
-                    1.0 - args.finalstep,
-                );
+                // let m = (s - base).mul_add(
+                //     -((0.01 - args.finalstep) / (initial_s - base)),
+                //     1.0 - args.finalstep,
+                // );
+                let diff_s = s - base;
+                let range_s = initial_s - base;
+                let step_ratio = (0.01 - args.finalstep) / range_s;
+                let m = (1.0 - args.finalstep) - (diff_s * step_ratio);
                 for i in 0..args.inner_polygons {
                     x[i * 3] = bh[i * 3] * m;
                     x[i * 3 + 1] = bh[i * 3 + 1] * m;

From 17f451f0326da74a93ac0390c69fc8cc6919e1f9 Mon Sep 17 00:00:00 2001
From: Voxell Paladynee <paladyneshadow@gmail.com>
Date: Sun, 26 Apr 2026 13:37:42 +0200
Subject: [PATCH 15/15] More memory usage optimizations

---
 polygon-packer-rs/src/main.rs | 77 ++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 32 deletions(-)

diff --git a/polygon-packer-rs/src/main.rs b/polygon-packer-rs/src/main.rs
index 8984336..2930820 100644
--- a/polygon-packer-rs/src/main.rs
+++ b/polygon-packer-rs/src/main.rs
@@ -167,12 +167,20 @@ fn penalty(
             pad.poly_verts[(i * nsi + j) * 2] = tx;
             pad.poly_verts[(i * nsi + j) * 2 + 1] = ty;
 
-            for k in 0..geo.cont_axes_x.len() {
-                let dist = tx * geo.cont_axes_x[k] + ty * geo.cont_axes_y[k];
-                if dist > limit {
-                    p += (dist - limit).powi(2);
-                }
-            }
+            // for k in 0..geo.cont_axes_x.len() {
+            //     let dist = tx * geo.cont_axes_x[k] + ty * geo.cont_axes_y[k];
+            //     if dist > limit {
+            //         p += (dist - limit).powi(2);
+            //     }
+            // }
+            geo.cont_axes_x.iter().zip(&geo.cont_axes_y).for_each(
+                |(&ax, &ay)| {
+                    let dist = tx * ax + ty * ay;
+                    if dist > limit {
+                        p += (dist - limit).powi(2);
+                    }
+                },
+            );
         }
 
         for j in 0..nsi {
@@ -233,8 +241,12 @@ fn project_soa(
 ) -> (f64, f64) {
     let mut min = f64::MAX;
     let mut max = f64::MIN;
+    // for i in 0..n {
+    //     let dot = verts[offset + i * 2] * ax + verts[offset + i * 2 + 1] *
+    // ay;
+    let relevant_verts = &verts[offset..offset + n * 2];
     for i in 0..n {
-        let dot = verts[offset + i * 2] * ax + verts[offset + i * 2 + 1] * ay;
+        let dot = relevant_verts[i * 2] * ax + relevant_verts[i * 2 + 1] * ay;
         if dot < min {
             min = dot;
         }
@@ -246,9 +258,8 @@ fn project_soa(
 }
 
 fn local_min(
-    x0: &[f64], s: f64, args: &Args, geo: &Geometry, bump: &mut Bump,
+    x: &mut [f64], s: f64, args: &Args, geo: &Geometry, bump: &mut Bump,
 ) -> Option<Vec<f64>> {
-    let mut x = x0.to_vec();
     bump.reset();
 
     struct OptCtx<'a> {
@@ -258,7 +269,7 @@ fn local_min(
 
     let ctx = OptCtx {
         pad: ScratchPad::new(&*bump, args.inner_polygons, args.inner_sides),
-        x_tmp: x0.to_vec(),
+        x_tmp: x.to_vec(),
     };
 
     let obj = |xx: &[f64], grad: Option<&mut [f64]>, ctx: &mut OptCtx| -> f64 {
@@ -299,16 +310,13 @@ fn local_min(
     };
 
     let mut opt =
-        Nlopt::new(Algorithm::Lbfgs, x0.len(), obj, Target::Minimize, ctx);
+        Nlopt::new(Algorithm::Lbfgs, x.len(), obj, Target::Minimize, ctx);
 
     opt.set_ftol_rel(args.tolerance).ok();
     opt.set_xtol_rel(args.tolerance).ok();
     opt.set_maxeval(MAX_ITERATIONS as _).ok();
 
-    match opt.optimize(&mut x) {
-        Ok(_) => Some(x),
-        Err(_) => None,
-    }
+    opt.optimize(x).ok().map(|_| x.to_vec())
 }
 fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
     let mut rng = PcgInnerState64::oneseq_seeded(seed as u64);
@@ -341,16 +349,16 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
     }
 
     let mut bump = Bump::new();
-    let (mut last_x, mut last_s) = (x.clone(), s);
+    let (mut best_x, mut best_s) = (x.clone(), s);
 
     loop {
-        if let Some(refined) = local_min(&x, s, args, geo, &mut bump) {
+        if let Some(mut refined) = local_min(&mut x, s, args, geo, &mut bump) {
             let mut pad =
                 ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
             let p = penalty(&refined, s, args, geo, &mut pad);
             if p < args.tolerance {
-                last_x.clone_from(&refined);
-                last_s = s;
+                best_x.clone_from(&refined);
+                best_s = s;
                 let base = (args.inner_polygons as f64).sqrt()
                     * args.inner_sides as f64
                     / args.container_sides as f64;
@@ -362,17 +370,17 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
                 let range_s = initial_s - base;
                 let step_ratio = (0.01 - args.finalstep) / range_s;
                 let m = (1.0 - args.finalstep) - (diff_s * step_ratio);
-                for i in 0..args.inner_polygons {
-                    x[i * 3] = refined[i * 3] * m;
-                    x[i * 3 + 1] = refined[i * 3 + 1] * m;
-                    x[i * 3 + 2] = refined[i * 3 + 2];
-                }
+                refined.chunks_exact_mut(3).for_each(|c| {
+                    c[0] *= m;
+                    c[1] *= m;
+                });
+                x = refined;
                 s *= m;
                 continue;
             }
             if let Some(bh) = run_bh(&refined, p, s, args, geo, &mut rand) {
-                last_x.clone_from(&bh);
-                last_s = s;
+                best_x.clone_from(&bh);
+                best_s = s;
                 let base = (args.inner_polygons as f64).sqrt()
                     * args.inner_sides as f64
                     / args.container_sides as f64;
@@ -395,7 +403,7 @@ fn repetition(seed: usize, args: &Args, geo: &Geometry) -> (f64, Vec<f64>) {
         }
         break;
     }
-    (last_s, last_x)
+    (best_s, best_x)
 }
 
 fn run_bh(
@@ -404,14 +412,19 @@ fn run_bh(
 ) -> Option<Vec<f64>> {
     let (mut best_p, mut best_x) = (p, refined.to_vec());
     let (mut cur_p, mut cur_x) = (p, refined.to_vec());
+    let mut trial = cur_x.clone();
     let mut bump = Bump::new();
 
     for _ in 0..BH_STEPS {
-        let trial: Vec<_> = cur_x
-            .iter()
-            .map(|&v| v + (rand() - 0.5) * BH_STEP_SIZE)
-            .collect();
-        if let Some(opt_x) = local_min(&trial, s, args, geo, &mut bump) {
+        // let trial: Vec<_> = cur_x
+        //     .iter()
+        //     .map(|&v| v + (rand() - 0.5) * BH_STEP_SIZE)
+        //     .collect();
+        for (trial, cur_x) in trial.iter_mut().zip(&cur_x) {
+            *trial = *cur_x + (rand() - 0.5) * BH_STEP_SIZE;
+        }
+        // if let Some(opt_x) = local_min(&trial, s, args, geo, &mut bump) {
+        if let Some(opt_x) = local_min(&mut trial, s, args, geo, &mut bump) {
             let mut pad =
                 ScratchPad::new(&bump, args.inner_polygons, args.inner_sides);
             let opt_p = penalty(&opt_x, s, args, geo, &mut pad);