From 8be16f5abfc5454921e5bf20104c7563e47b9afe Mon Sep 17 00:00:00 2001
From: Zach Corse <zcorse@nvidia.com>
Date: Thu, 6 Jun 2024 13:18:24 -0700
Subject: [PATCH] randn fix

---
 CHANGELOG.md            |  1 +
 warp/native/rand.h      |  6 +++++-
 warp/tests/test_rand.py | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5927f6edc..450d1c845 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@
 - Support headless rendering of `OpenGLRenderer` via `pyglet.options["headless"] = True`
 - `RegisteredGLBuffer` can fall back to CPU-bound copying if CUDA/OpenGL interop is not available
 - Fix to forward `wp.copy()` params to gradient and adjoint copy function calls.
+- Fix so that `wp.randn()` doesn't return inf
 - Fix slicing of arrays with gradients in kernels
 
 ## [1.1.1] - 2024-05-24
diff --git a/warp/native/rand.h b/warp/native/rand.h
index 3e364d548..e42b40a72 100644
--- a/warp/native/rand.h
+++ b/warp/native/rand.h
@@ -13,6 +13,10 @@
 #define M_PI_F 3.14159265358979323846f
 #endif
 
+#ifndef LOG_EPSILON
+#define LOG_EPSILON 5.96e-8f
+#endif
+
 namespace wp
 {
 
@@ -33,7 +37,7 @@ inline CUDA_CALLABLE float randf(uint32& state) { state = rand_pcg(state); retur
 inline CUDA_CALLABLE float randf(uint32& state, float min, float max) { return (max - min) * randf(state) + min; }
 
 // Box-Muller method
-inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state))) * cos(2.f * M_PI_F * randf(state)); }
+inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state) + LOG_EPSILON)) * cos(2.f * M_PI_F * randf(state)); }
 
 inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed, float adj_ret) {}
 inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset, float adj_ret) {}
diff --git a/warp/tests/test_rand.py b/warp/tests/test_rand.py
index afb4f5f02..948ce1c3b 100644
--- a/warp/tests/test_rand.py
+++ b/warp/tests/test_rand.py
@@ -108,6 +108,46 @@ def test_rand(test, device):
     test.assertTrue(err < 1e-04)
 
 
+@wp.kernel
+def randn_kernel(
+    x: wp.array(dtype=float),
+):
+    tid = wp.tid()
+    r = wp.rand_init(tid)
+    x[tid] = wp.randn(r)
+
+
+def test_randn(test, device):
+    N = 100000000
+
+    samples = wp.zeros(N, dtype=float, device=device)
+
+    wp.launch(randn_kernel, inputs=[samples], dim=N, device=device)
+
+    randn_samples = samples.numpy()
+
+    test.assertFalse(np.any(np.isinf(randn_samples)))
+    test.assertFalse(np.any(np.isnan(randn_samples)))
+
+    randn_true = np.array(
+        [
+            -1.8213255,
+            0.27881497,
+            -1.1122388,
+            0.5936895,
+            0.04976363,
+            0.69087356,
+            0.2935363,
+            0.8405019,
+            -0.8436684,
+            0.53108305,
+        ]
+    )
+
+    err = np.max(np.abs(randn_samples[0:10] - randn_true))
+    test.assertTrue(err < 1e-04)
+
+
 @wp.kernel
 def sample_cdf_kernel(kernel_seed: int, cdf: wp.array(dtype=float), samples: wp.array(dtype=int)):
     tid = wp.tid()
@@ -273,6 +313,7 @@ class TestRand(unittest.TestCase):
 
 
 add_function_test(TestRand, "test_rand", test_rand, devices=devices)
+add_function_test(TestRand, "test_randn", test_randn, devices=devices)
 add_function_test(TestRand, "test_sample_cdf", test_sample_cdf, devices=devices)
 add_function_test(TestRand, "test_sampling_methods", test_sampling_methods, devices=devices)
 add_function_test(TestRand, "test_poisson", test_poisson, devices=devices)