Merge remote-tracking branch 'flatiron/master' into optimizing-nthreads

flatironinstitute · Feb 27, 2025 · 1141dda · 1141dda
2 parents b3e5a64 + 30de078
commit 1141dda
Show file tree

Hide file tree

Showing 5 changed files with 629 additions and 12 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -3,6 +3,8 @@ If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately).
 
 Master, using release name V 2.4.0 (2/11/25)
 
+* Tweaked choice of upsampfact to use a density based heuristic for type 1 and 2
+  in the CPU library. This gives a significant speedup for some cases.
 * Removed FINUFFT_CUDA_ARCHITECTURES flag, as it was unnecessary duplication.
 * Enabled LTO for finufft, nvcc support is flaky at the moment.
 * Added GPU spread interp only test. Added CPU spread interp only test to cmake

diff --git a/devel/analyse_upsamp.py b/devel/analyse_upsamp.py
@@ -0,0 +1,65 @@
+import pandas as pd
+import numpy as np
+
+# Load the dataset
+file_path = "wisdom.csv"  # Change this to the actual file path
+df = pd.read_csv(file_path)
+
+# Convert specified columns to numeric, coercing errors to NaN
+columns_to_convert = ['Num_pts', 'Time_2.0', 'Time_1.25']
+for col in columns_to_convert:
+    df[col] = pd.to_numeric(df[col], errors='coerce')
+
+# Drop rows with NaN values in these columns (optional, but recommended)
+df = df.dropna(subset=columns_to_convert)
+# scientific notation to Epsilon
+df['Epsilon'] = df['Epsilon'].apply(lambda x: f'{x:.1e}')
+
+# round Time_2.0 and Time_1.25 to 2 decimal places
+df['Time_2.0'] = df['Time_2.0'].apply(lambda x: round(x, 2))
+df['Time_1.25'] = df['Time_1.25'].apply(lambda x: round(x, 2))
+
+
+# Extracting grid dimensions and volume from 'Size' column
+def parse_grid_size(size):
+    try:
+        size = str(size).strip()
+        if 'x' in size:
+            dims = list(map(int, size.split('x')))
+            volume = np.prod(dims)
+            dim = len(dims)
+        else:
+            volume = int(size)
+            dim = 1
+        return volume, str(dim)
+    except ValueError:
+        return np.nan, np.nan
+
+
+df[['Volume', 'Dim']] = df['Size'].apply(lambda x: pd.Series(parse_grid_size(str(x))))
+
+# Compute density
+df['Density'] = df['Num_pts'] / df['Volume']
+# remove columns where Time_2.0 is > Time_1.25
+df = df[df['Time_2.0'] <= df['Time_1.25']]
+
+# drop columns where diff between Time_2.0 and Time_1.25 is less than 5%
+df['% Diff'] = abs(df['Time_2.0'] - df['Time_1.25']) / df['Time_1.25']
+df = df[df['% Diff'] >= .05]
+
+# keep rows where n_threads is 1
+df = df[df['n_threads'] == 16]
+# drop time columns
+df.drop(columns=['Time_2.0', 'Time_1.25', 'n_threads'], inplace=True)
+df.drop(columns=['% Diff', 'Size', 'Num_pts', 'Volume', 'Unnamed: 0'], inplace=True)
+# group by NUFFT_type, Data_type, Density, Dim, n_threads to make print more readable, concatenate values do not aggregate them
+# Group by specific columns and concatenate values instead of aggregating them
+grouped_df = df.groupby(['NUFFT_type', 'Data_type', 'Density', 'Dim'], as_index=False).agg(lambda x: ', '.join(x))
+# grouped_df['Epsilon'] = grouped_df['Epsilon'].apply(lambda x: "{:.1e}".format(float(x)))
+
+# sort by NUFFT type first, then Dim and then Density
+grouped_df = grouped_df.sort_values(by=['NUFFT_type', 'Data_type', 'Dim', 'Density'])
+grouped_df = grouped_df[['NUFFT_type', 'Data_type', 'Dim', 'Density', 'Epsilon']]
+
+# Display the final dataset
+print(grouped_df.to_string(index=False))
diff --git a/devel/wisdom.py b/devel/wisdom.py
@@ -0,0 +1,199 @@
+import numpy as np
+import finufft
+import timeit
+import statistics
+import matplotlib.pyplot as plt
+import os
+import pandas as pd
+import sys
+
+sys.stdout.reconfigure(line_buffering=True)  # Ensures auto-flushing
+
+
+# Global list for collecting detailed benchmark rows
+all_results = []
+
+def benchmark_function(func, *args, runs=5, **kwargs):
+    """Runs the function multiple times and returns average & std dev."""
+    runtimes = [
+        timeit.timeit(lambda: func(*args, **kwargs), number=1)
+        for _ in range(runs)
+    ]
+    avg_runtime = statistics.mean(runtimes)
+    stdev_runtime = statistics.stdev(runtimes) if runs > 1 else 0.0
+    return avg_runtime, stdev_runtime
+
+# cache functiojn results with decorator
+def generate_random_data(nufft_type, nufft_sizes, num_pts, dtype):
+    """Generates random NUFFT input data in the correct dtype."""
+    dim = len(nufft_sizes)
+    rng = np.random.Generator(np.random.SFC64(42))
+    # Set the correct complex dtype
+    complex_dtype = np.complex64 if dtype == np.float32 else np.complex128
+
+    # Generate data for nonuniform points and coefficients
+    x = [np.array(2 * np.pi * rng.random(num_pts) - np.pi, dtype=dtype) for _ in range(dim)]
+    c = np.array(rng.random(num_pts) + 1j * rng.random(num_pts), dtype=complex_dtype)
+
+    f = rng.standard_normal(nufft_sizes, dtype=dtype) + 1j * rng.standard_normal(nufft_sizes, dtype=dtype) if nufft_type == 2 else None
+    d = [np.array(2 * np.pi * rng.random(num_pts) - np.pi, dtype=dtype) for _ in range(dim)] if nufft_type == 3 else None
+    return x, c, f, d
+
+def run_nufft(nufft_type, nufft_sizes, epsilon, n_threads, upsampfac, x, c, f, d):
+    """Runs NUFFT with the correct dtype and parameters."""
+    opts = {'nthreads': n_threads, 'upsampfac': upsampfac, 'debug': 0}
+    dim = len(nufft_sizes)
+
+    if nufft_type == 1:
+        if dim == 1:
+            return finufft.nufft1d1(x[0], c, nufft_sizes[0], eps=epsilon, **opts)
+        elif dim == 2:
+            return finufft.nufft2d1(x[0], x[1], c, nufft_sizes, eps=epsilon, **opts)
+        elif dim == 3:
+            return finufft.nufft3d1(x[0], x[1], x[2], c, nufft_sizes, eps=epsilon, **opts)
+    elif nufft_type == 2:
+        if dim == 1:
+            return finufft.nufft1d2(x[0], f, eps=epsilon, **opts)
+        elif dim == 2:
+            return finufft.nufft2d2(x[0], x[1], f, eps=epsilon, **opts)
+        elif dim == 3:
+            return finufft.nufft3d2(x[0], x[1], x[2], f, eps=epsilon, **opts)
+    elif nufft_type == 3:
+        if dim == 1:
+            return finufft.nufft1d3(x[0], c, d[0], eps=epsilon, **opts)
+        elif dim == 2:
+            return finufft.nufft2d3(x[0], x[1], c, d[0], d[1], eps=epsilon, **opts)
+        elif dim == 3:
+            return finufft.nufft3d3(x[0], x[1], x[2], c, d[0], d[1], d[2], eps=epsilon, **opts)
+
+    else:
+        raise ValueError("Invalid NUFFT type. Use 1, 2, or 3.")
+
+def benchmark_nufft_collection(nufft_type, nufft_sizes, num_pts, epsilons, n_threads, upsampfacs, dtype, runs=5,
+                               description=""):
+    """Runs benchmarks while measuring performance across densities and records detailed results."""
+    results = {upsamp: [] for upsamp in upsampfacs}
+    x, c, f, d = generate_random_data(nufft_type, nufft_sizes, num_pts, dtype)
+
+    # Compute density
+    size_product = np.prod(nufft_sizes)
+    density = num_pts / size_product
+
+    title = (f"NUFFT Type {nufft_type}, {len(nufft_sizes)}D, {dtype.__name__} "
+             f"(Size: {'x'.join(map(str, nufft_sizes))}, Num Pts: {num_pts}, Density: {density:.2f}, Threads: {n_threads})")
+    print(f"\n=== DEBUG: {title} ===")
+    print(f"{'Epsilon':<10} | {'1.25s':<12} | {'2.0s':<12} | % Diff")
+
+    for epsilon in epsilons:
+        runtimes = {}
+        for upsamp in upsampfacs:
+            avg_runtime, _ = benchmark_function(
+                run_nufft, nufft_type, nufft_sizes, epsilon, n_threads, upsamp, x, c, f, d, runs=runs
+            )
+            runtimes[upsamp] = avg_runtime
+            results[upsamp].append(avg_runtime)
+
+        diff = ((runtimes[2.0] - runtimes[1.25]) / runtimes[1.25]) * 100
+        print(f"{epsilon:<10.1e} | {runtimes[1.25]:<12.6f} | {runtimes[2.0]:<12.6f} | {diff:+.1f}%")
+
+        # Append a row of results for this epsilon value
+        all_results.append({
+            'Epsilon': epsilon,
+            'Time_1.25': runtimes[1.25],
+            'Time_2.0': runtimes[2.0],
+            '% Diff': diff,
+            'NUFFT_type': nufft_type,
+            'Data_type': dtype.__name__,
+            'Size': 'x'.join(map(str, nufft_sizes)),
+            'Num_pts': num_pts,
+            'Density': density,
+            'n_threads': n_threads
+        })
+
+    plot_benchmark_results(results, epsilons, upsampfacs, title, density)
+    return results
+
+def plot_benchmark_results(results, epsilons, upsampfacs, title, density):
+    """Plots performance results while including density information."""
+    x_axis = np.arange(len(epsilons))
+    width = 0.35  # Bar width
+
+    fig, ax = plt.subplots(figsize=(10, 6))
+    colors = ['tab:blue', 'tab:orange']
+
+    for i, upsamp in enumerate(upsampfacs):
+        ax.bar(x_axis + (i - 0.5) * width, results[upsamp], width, label=f"upsampfac = {upsamp}", color=colors[i])
+
+    ax.set_xlabel("Epsilon")
+    ax.set_ylabel("Average Runtime (s)")
+    ax.set_title(f"{title} - Density {density:.2f}")
+    ax.set_xticks(x_axis)
+    ax.set_xticklabels([f"{eps:.0e}" for eps in epsilons])
+    ax.set_yscale("log")
+    ax.legend(loc='upper left', bbox_to_anchor=(1.05, 1))
+    plt.tight_layout()
+
+    os.makedirs("plots", exist_ok=True)
+    filename = f"plots/{title.replace(' ', '_').replace(',', '').replace(':', '').replace('.', '_')}.png"
+    plt.savefig(filename)
+    # plt.show()
+
+# Define parameters for benchmarking
+upsampfacs = [2.0, 1.25]
+runs = 5
+
+for n_threads in reversed([16]):
+    # total_elements = 100**3 if n_threads == 1 else 216**3
+    total_elements = 100**3 if n_threads == 1 else 216**3
+    # Select test dimensions for 1D, 2D, and 3D
+    size_1d = (int(total_elements),)
+    size_2d = (int(np.sqrt(total_elements)), int(np.sqrt(total_elements)))
+    size_3d = (int(np.cbrt(total_elements)), int(np.cbrt(total_elements)), int(np.cbrt(total_elements)))
+
+    # Define num_pts range: starts with 1/16th of volume, ends with 1024x the volume
+    volume_1d = np.prod(size_1d)
+    volume_2d = np.prod(size_2d)
+    volume_3d = np.prod(size_3d)
+
+    num_pts_range = lambda volume: [volume // 16 * (2**i) for i in range(10)]
+
+
+    test_cases = []
+    for nufft_type in [2, 1]:
+            for size, desc in [(size_1d, "1D"), (size_2d, "2D"), (size_3d, "3D")]:
+                for num_pts in reversed(num_pts_range(np.prod(size))):
+                    test_cases.append({
+                        "nufft_type": nufft_type,
+                        "nufft_sizes": size,
+                        "num_pts": num_pts,
+                        "n_threads": n_threads,
+                        "description": f"NUFFT Type {nufft_type}, {desc}, Threads {n_threads}, Size {'x'.join(map(str, size))}, Num Pts {num_pts}"
+                    })
+
+# Run benchmarks and generate plots for each test case and for both float32 and float64.
+for case in test_cases:
+    for dtype in [np.float32, np.float64]:
+        epsilons = np.logspace(-1, -6, num=6) if dtype == np.float32 else np.logspace(-1, -9, num=9)
+
+        print(f'RUNNING TEST CASE : {case["description"]} with dtype : {dtype.__name__} epsilons : {epsilons}')
+        benchmark_nufft_collection(
+            case["nufft_type"],
+            case["nufft_sizes"],
+            case["num_pts"],
+            epsilons,
+            case["n_threads"],
+            upsampfacs,
+            dtype,
+            runs=runs,
+            description=case["description"]
+        )
+
+# After all benchmarks are done, build and print the final results table.
+df = pd.DataFrame(all_results)
+# Reorder columns as desired.
+df = df[['Epsilon', 'Time_1.25', 'Time_2.0', '% Diff', 'NUFFT_type', 'Data_type', 'Size', 'Num_pts', 'Density', 'n_threads']]
+print("\nFinal Benchmark Results:")
+print(df.to_string(index=False))
+df.to_csv('wisdom.csv')
+df.to_latex('wisdom.tex')
+df.to_markdown('wisdom.md')