Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 35 additions & 119 deletions benchmarks/benchmark_arithmetic_and_derived.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,14 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Benchmark for arithmetic operations and derived properties in LVec.
This benchmark focuses on the computational speed of vector algebra and
the effectiveness of caching in LVec compared to other vector libraries.
"""

import numpy as np
import timeit
import matplotlib.pyplot as plt
import tracemalloc
import gc
import time
import os
from functools import partial
from lvec import LVec, Vector2D, Vector3D
import vector # Comparison library
from plotting_utils import plot_vector_types_comparison, set_publication_style

def measure_memory_usage(operation, n_repeats=5):
"""Measure memory usage for an operation."""
Expand Down Expand Up @@ -139,7 +132,7 @@ def benchmark_arithmetic(size, vector_type, n_repeats=10):
cross_time, cross_std = measure_single_timing(cross_op, n_repeats)
results["cross_product"] = {"time": cross_time, "std": cross_std}

elif vector_type == "Vector": # vector package for comparison
elif vector_type == "Scikit Vector": # Changed from "Vector" to "Scikit Vector"
px, py, pz, E = generate_test_data(size)
v1 = vector.arr({"px": px, "py": py, "pz": pz, "E": E})
v2 = vector.arr({"px": px, "py": py, "pz": pz, "E": E})
Expand Down Expand Up @@ -227,7 +220,7 @@ def benchmark_derived_properties(size, vector_type, n_repeats=10):
rho_time, rho_std = measure_single_timing(rho_op, n_repeats)
results["rho"] = {"time": rho_time, "std": rho_std}

elif vector_type == "Vector": # vector package for comparison
elif vector_type == "Scikit Vector": # Changed from "Vector" to "Scikit Vector"
px, py, pz, E = generate_test_data(size)
vec = vector.arr({"px": px, "py": py, "pz": pz, "E": E})

Expand Down Expand Up @@ -288,132 +281,56 @@ def uncached_access():

def plot_arithmetic_results(sizes, results, vector_types, operations, save_path="benchmark_arithmetic.pdf"):
"""Plot arithmetic operation benchmark results."""
plt.style.use('default')
n_ops = len(operations)
n_cols = 2
n_rows = (n_ops + n_cols - 1) // n_cols # Ceiling division

fig = plt.figure(figsize=(12, 4 * n_rows))
gs = fig.add_gridspec(n_rows, n_cols, hspace=0.4, wspace=0.3)

colors = {'LVec': '#3498db', 'Vector2D': '#2ecc71', 'Vector3D': '#9b59b6', 'Vector': '#e74c3c'}

for op_idx, operation in enumerate(operations):
row = op_idx // n_cols
col = op_idx % n_cols
ax = fig.add_subplot(gs[row, col])

for vtype in vector_types:
# Check if this vector type has this operation
if vtype in results and operation in results[vtype][0]:
times = [results[vtype][i].get(operation, {}).get("time", np.nan) * 1000 for i in range(len(sizes))] # ms
ax.plot(sizes, times, 'o-', label=vtype, color=colors[vtype], linewidth=2, markersize=6)

ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('Array Size', fontsize=10)
ax.set_ylabel('Time (ms)', fontsize=10)
ax.set_title(operation.replace('_', ' ').title(), fontsize=12)
ax.grid(True, which='both', linestyle='--', alpha=0.7)
ax.grid(True, which='minor', linestyle=':', alpha=0.4)
ax.legend(fontsize=10)
ax.tick_params(labelsize=8)

# Remove any empty subplots
for idx in range(len(operations), n_rows * n_cols):
row = idx // n_cols
col = idx % n_cols
if idx < n_rows * n_cols: # Ensure we're not out of bounds
fig.delaxes(fig.add_subplot(gs[row, col]))

plt.suptitle('Performance Comparison of Arithmetic Operations', fontsize=14, y=1.02)
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close()
plot_vector_types_comparison(
sizes,
results,
vector_types,
operations,
title='Arithmetic Operations Performance',
filename=save_path
)

def plot_derived_results(sizes, results, vector_types, properties, save_path="benchmark_derived.pdf"):
"""Plot derived properties benchmark results."""
plt.style.use('default')
n_props = len(properties)
n_cols = 2
n_rows = (n_props + n_cols - 1) // n_cols # Ceiling division

fig = plt.figure(figsize=(12, 4 * n_rows))
gs = fig.add_gridspec(n_rows, n_cols, hspace=0.4, wspace=0.3)

colors = {'LVec': '#3498db', 'Vector2D': '#2ecc71', 'Vector3D': '#9b59b6', 'Vector': '#e74c3c'}

for prop_idx, prop in enumerate(properties):
row = prop_idx // n_cols
col = prop_idx % n_cols
ax = fig.add_subplot(gs[row, col])

for vtype in vector_types:
# Check if this vector type has this property
if vtype in results and prop in results[vtype][0]:
times = [results[vtype][i].get(prop, {}).get("time", np.nan) * 1000 for i in range(len(sizes))] # ms
ax.plot(sizes, times, 'o-', label=vtype, color=colors[vtype], linewidth=2, markersize=6)

ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('Array Size', fontsize=10)
ax.set_ylabel('Time (ms)', fontsize=10)
ax.set_title(prop.replace('_', ' ').title(), fontsize=12)
ax.grid(True, which='both', linestyle='--', alpha=0.7)
ax.grid(True, which='minor', linestyle=':', alpha=0.4)
ax.legend(fontsize=10)
ax.tick_params(labelsize=8)

# Remove any empty subplots
for idx in range(len(properties), n_rows * n_cols):
row = idx // n_cols
col = idx % n_cols
if idx < n_rows * n_cols: # Ensure we're not out of bounds
fig.delaxes(fig.add_subplot(gs[row, col]))

plt.suptitle('Performance Comparison of Derived Properties', fontsize=14, y=1.02)
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close()
plot_vector_types_comparison(
sizes,
results,
vector_types,
properties,
title='Derived Properties Performance',
filename=save_path
)

def plot_caching_results(sizes, cache_results, save_path="benchmark_caching.pdf"):
"""Plot caching effectiveness benchmark results."""
plt.style.use('default')
set_publication_style()

fig, ax = plt.subplots(figsize=(10, 6))

# Extract times in milliseconds
cached_times = [res["cached"]["time"] * 1000 for res in cache_results]
uncached_times = [res["uncached"]["time"] * 1000 for res in cache_results]
# Convert to milliseconds
cached_times = np.array([res["cached"] for res in cache_results]) * 1000
uncached_times = np.array([res["uncached"] for res in cache_results]) * 1000

# Calculate speedup
speedup = [uncached / cached for uncached, cached in zip(uncached_times, cached_times)]
ax.plot(sizes, cached_times, 'o-', label='With Caching', color='#109618', linewidth=2, markersize=6)
ax.plot(sizes, uncached_times, 'o-', label='Without Caching', color='#FF9900', linewidth=2, markersize=6)

# Create primary plot for times
ax.plot(sizes, cached_times, 'o-', label='With Caching', color='#2ecc71', linewidth=2, markersize=6)
ax.plot(sizes, uncached_times, 'o-', label='Without Caching', color='#e74c3c', linewidth=2, markersize=6)
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('Array Size', fontsize=12)
ax.set_ylabel('Time (ms)', fontsize=12)
ax.set_title('Caching Effectiveness in lvec', fontsize=14)
ax.grid(True, which='both', linestyle='--', alpha=0.7)
ax.grid(True, which='minor', linestyle=':', alpha=0.4)
ax.tick_params(labelsize=10)
ax.legend(fontsize=10, loc='upper left')
ax.legend(fontsize=12)

# Create secondary y-axis for speedup
ax2 = ax.twinx()
ax2.plot(sizes, speedup, 'o--', label='Speedup Factor', color='#3498db', linewidth=1.5, markersize=5)
ax2.set_ylabel('Speedup Factor (Uncached/Cached)', fontsize=12, color='#3498db')
ax2.tick_params(axis='y', labelcolor='#3498db')
ax2.legend(fontsize=10, loc='upper right')

plt.title('Caching Effectiveness in LVec', fontsize=14)
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.tight_layout()
plt.savefig(os.path.join('benchmarks/plots', save_path), bbox_inches='tight')
plt.close()

def run_benchmarks():
"""Run all benchmarks and plot results."""
sizes = [10, 100, 1000, 10000, 100000, 1000000]
vector_types = ["LVec", "Vector2D", "Vector3D", "Vector"]
vector_types = ["LVec", "Vector2D", "Vector3D", "Scikit Vector"]

# Arithmetic operations
arith_results = {vtype: [] for vtype in vector_types}
Expand Down Expand Up @@ -447,8 +364,9 @@ def run_benchmarks():
print(f" Speedup: {res['uncached']['time']/res['cached']['time']:.2f}x")

# Plot results
arith_ops = ["addition", "subtraction", "scalar_mul", "dot_product", "cross_product"]
derived_props = ["mass", "pt", "eta", "phi", "magnitude", "theta", "rho"]
# Remove operations that don't have Scikit Vector equivalents
arith_ops = ["addition", "subtraction", "scalar_mul"] # Removed dot_product and cross_product
derived_props = ["mass", "pt", "eta", "phi"] # Removed magnitude, theta, rho

plot_arithmetic_results(sizes, arith_results, vector_types, arith_ops, "benchmarks/plots/benchmark_arithmetic.pdf")
plot_derived_results(sizes, derived_results, vector_types, derived_props, "benchmarks/plots/benchmark_derived.pdf")
Expand All @@ -461,7 +379,5 @@ def run_benchmarks():

if __name__ == "__main__":
# Create plots directory if it doesn't exist
import os
os.makedirs("benchmarks/plots", exist_ok=True)

run_benchmarks()
run_benchmarks()
111 changes: 39 additions & 72 deletions benchmarks/benchmark_initialization.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import numpy as np
import timeit
import matplotlib.pyplot as plt
from lvec import LVec
import vector
import tracemalloc
import gc
import time
import os
from lvec import LVec
import vector
from plotting_utils import plot_combined_performance, set_publication_style, COLORS

def measure_memory_usage(operation, n_repeats=5):
"""Measure memory usage for an operation."""
Expand Down Expand Up @@ -39,7 +41,7 @@ def measure_initialization_time(init_function, n_repeats=5, number=10):

def benchmark_initialization_overhead(sizes, n_repeats=5):
"""
Benchmark initialization overhead between LVec and vector package.
Benchmark initialization overhead between lvec and vector package.

Parameters:
-----------
Expand All @@ -63,7 +65,7 @@ def benchmark_initialization_overhead(sizes, n_repeats=5):
print(f"\nBenchmarking initialization with {size:,} vectors:")
px, py, pz, E = generate_test_data(size)

# Benchmark LVec initialization
# Benchmark lvec initialization
def lvec_init():
return LVec(px, py, pz, E)

Expand All @@ -88,10 +90,10 @@ def vector_init():
vector_memory.append(vector_mem)

print(f" Results for {size:,} vectors:")
print(f" LVec: {lvec_mean*1000:.3f} ± {lvec_std*1000:.3f} ms, {lvec_mem:.2f} MB")
print(f" Vector: {vector_mean*1000:.3f} ± {vector_std*1000:.3f} ms, {vector_mem:.2f} MB")
print(f" Speed Ratio: {vector_mean/lvec_mean:.2f}x faster with LVec")
print(f" Memory Ratio: {vector_mem/lvec_mem:.2f}x more memory efficient with LVec")
print(f" lvec: {lvec_mean*1000:.3f} ± {lvec_std*1000:.3f} ms, {lvec_mem:.2f} MB")
print(f" vector: {vector_mean*1000:.3f} ± {vector_std*1000:.3f} ms, {vector_mem:.2f} MB")
print(f" Speed Ratio: {vector_mean/lvec_mean:.2f}x faster with lvec")
print(f" Memory Ratio: {vector_mem/lvec_mem:.2f}x more memory efficient with lvec")

return (np.array(lvec_times), np.array(lvec_errors),
np.array(vector_times), np.array(vector_errors),
Expand Down Expand Up @@ -124,9 +126,9 @@ def benchmark_cached_initialization():

px, py, pz, E = generate_test_data(size)

# Measure LVec repeated initialization
# Measure lvec repeated initialization
lvec_times = []
print(" Measuring LVec repeated initialization...")
print(" Measuring lvec repeated initialization...")
for i in range(repeats):
start = time.time()
vec = LVec(px, py, pz, E)
Expand All @@ -145,85 +147,50 @@ def benchmark_cached_initialization():
print(f" Iteration {i+1}: {vector_times[-1]:.3f} ms")

# Plot results
set_publication_style()
plt.figure(figsize=(10, 6))
plt.plot(range(1, repeats+1), lvec_times, 'o-', label='LVec', color='#3498db', linewidth=2)
plt.plot(range(1, repeats+1), vector_times, 'o-', label='vector', color='#9b59b6', linewidth=2)
plt.plot(range(1, repeats+1), lvec_times, 'o-', label='lvec', color=COLORS['lvec'], linewidth=2)
plt.plot(range(1, repeats+1), vector_times, 'o-', label='vector', color=COLORS['vector'], linewidth=2)
plt.xlabel('Initialization Iteration', fontsize=12)
plt.ylabel('Time (ms)', fontsize=12)
plt.title('Repeated Initialization Performance (Caching Effects)', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(fontsize=12)
plt.savefig('benchmarks/plots/cached_initialization_benchmark.pdf', dpi=300, bbox_inches='tight')
plt.savefig(os.path.join('benchmarks/plots', 'cached_initialization_benchmark.pdf'), bbox_inches='tight')
plt.close()

return lvec_times, vector_times

def plot_results(sizes, lvec_data, vector_data, title="LVec vs vector Initialization Overhead"):
"""Plot benchmark results."""
def plot_results(sizes, lvec_data, vector_data, title="lvec vs vector Initialization Overhead"):
"""Plot benchmark results using standardized plotting utilities."""
lvec_times, lvec_errors, lvec_memory = lvec_data
vector_times, vector_errors, vector_memory = vector_data

# Convert to milliseconds
lvec_times *= 1000
vector_times *= 1000

# Create figure with two subplots
plt.style.use('default')
fig = plt.figure(figsize=(12, 10))
gs = fig.add_gridspec(3, 1, height_ratios=[1, 1, 1], hspace=0.3)

# Upper plot: timing comparison
ax1 = fig.add_subplot(gs[0])
ax1.plot(sizes, lvec_times, 'o-', label='LVec', color='#3498db', linewidth=2, markersize=8)
ax1.plot(sizes, vector_times, 'o-', label='vector', color='#9b59b6', linewidth=2, markersize=8)
ax1.set_xscale('log')
ax1.set_yscale('log')
ax1.set_ylabel('Initialization Time (ms)', fontsize=12)
ax1.set_title(title, fontsize=14, pad=15)
ax1.grid(True, which='both', linestyle='--', alpha=0.7)
ax1.legend(fontsize=12)
ax1.tick_params(labelsize=10)

# Middle plot: memory usage
ax2 = fig.add_subplot(gs[1])
ax2.plot(sizes, lvec_memory, 'o-', label='LVec', color='#2ecc71', linewidth=2, markersize=8)
ax2.plot(sizes, vector_memory, 'o-', label='vector', color='#e74c3c', linewidth=2, markersize=8)
ax2.set_xscale('log')
ax2.set_yscale('log')
ax2.set_ylabel('Memory Usage (MB)', fontsize=12)
ax2.grid(True, which='both', linestyle='--', alpha=0.7)
ax2.legend(fontsize=12)
ax2.tick_params(labelsize=10)

# Bottom plot: performance ratio
ax3 = fig.add_subplot(gs[2])
ax3.plot(sizes, vector_times / lvec_times, 'o-', label='Time Ratio (vector/LVec)',
color='#f39c12', linewidth=2, markersize=8)
ax3.plot(sizes, vector_memory / lvec_memory, 'o-', label='Memory Ratio (vector/LVec)',
color='#16a085', linewidth=2, markersize=8)
ax3.set_xscale('log')
ax3.axhline(y=1.0, color='gray', linestyle='--', alpha=0.7)
ax3.set_xlabel('Number of Vectors', fontsize=12)
ax3.set_ylabel('Ratio (vector/LVec)', fontsize=12)
ax3.grid(True, which='both', linestyle='--', alpha=0.7)
ax3.legend(fontsize=12)
ax3.tick_params(labelsize=10)

# Add minor gridlines
ax1.grid(True, which='minor', linestyle=':', alpha=0.4)
ax2.grid(True, which='minor', linestyle=':', alpha=0.4)
ax3.grid(True, which='minor', linestyle=':', alpha=0.4)

plt.savefig('benchmarks/plots/initialization_benchmark_results.pdf', dpi=300, bbox_inches='tight')
plt.close()
plot_combined_performance(
sizes,
lvec_times,
vector_times,
lvec_memory,
vector_memory,
title=title,
filename='initialization_benchmark.pdf'
)

if __name__ == '__main__':
print("=== LVec vs vector Initialization Overhead Benchmark ===")
print("=== lvec vs vector Initialization Overhead Benchmark ===")

# Create plots directory if it doesn't exist
os.makedirs("benchmarks/plots", exist_ok=True)

# Run main benchmarks
batch_results = benchmark_batch_sizes()
sizes = [10, 100, 1000, 10000, 100000, 1000000]
results = benchmark_initialization_overhead(sizes)
plot_results(sizes,
(results[0], results[1], results[4]),
(results[2], results[3], results[5]))

# Run the cached initialization benchmark
cache_results = benchmark_cached_initialization()
# Run additional benchmarks
batch_results = benchmark_batch_sizes()
cached_results = benchmark_cached_initialization()

print("\nBenchmark completed. Results saved to PDF files.")
Loading