From 8be16f5abfc5454921e5bf20104c7563e47b9afe Mon Sep 17 00:00:00 2001 From: Zach Corse Date: Thu, 6 Jun 2024 13:18:24 -0700 Subject: [PATCH] randn fix --- CHANGELOG.md | 1 + warp/native/rand.h | 6 +++++- warp/tests/test_rand.py | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5927f6edc..450d1c845 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ - Support headless rendering of `OpenGLRenderer` via `pyglet.options["headless"] = True` - `RegisteredGLBuffer` can fall back to CPU-bound copying if CUDA/OpenGL interop is not available - Fix to forward `wp.copy()` params to gradient and adjoint copy function calls. +- Fix so that `wp.randn()` doesn't return inf - Fix slicing of arrays with gradients in kernels ## [1.1.1] - 2024-05-24 diff --git a/warp/native/rand.h b/warp/native/rand.h index 3e364d548..e42b40a72 100644 --- a/warp/native/rand.h +++ b/warp/native/rand.h @@ -13,6 +13,10 @@ #define M_PI_F 3.14159265358979323846f #endif +#ifndef LOG_EPSILON +#define LOG_EPSILON 5.96e-8f +#endif + namespace wp { @@ -33,7 +37,7 @@ inline CUDA_CALLABLE float randf(uint32& state) { state = rand_pcg(state); retur inline CUDA_CALLABLE float randf(uint32& state, float min, float max) { return (max - min) * randf(state) + min; } // Box-Muller method -inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state))) * cos(2.f * M_PI_F * randf(state)); } +inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state) + LOG_EPSILON)) * cos(2.f * M_PI_F * randf(state)); } inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed, float adj_ret) {} inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset, float adj_ret) {} diff --git a/warp/tests/test_rand.py b/warp/tests/test_rand.py index afb4f5f02..948ce1c3b 100644 --- a/warp/tests/test_rand.py +++ b/warp/tests/test_rand.py @@ -108,6 +108,46 @@ def test_rand(test, device): test.assertTrue(err < 1e-04) +@wp.kernel +def randn_kernel( + x: wp.array(dtype=float), +): + tid = wp.tid() + r = wp.rand_init(tid) + x[tid] = wp.randn(r) + + +def test_randn(test, device): + N = 100000000 + + samples = wp.zeros(N, dtype=float, device=device) + + wp.launch(randn_kernel, inputs=[samples], dim=N, device=device) + + randn_samples = samples.numpy() + + test.assertFalse(np.any(np.isinf(randn_samples))) + test.assertFalse(np.any(np.isnan(randn_samples))) + + randn_true = np.array( + [ + -1.8213255, + 0.27881497, + -1.1122388, + 0.5936895, + 0.04976363, + 0.69087356, + 0.2935363, + 0.8405019, + -0.8436684, + 0.53108305, + ] + ) + + err = np.max(np.abs(randn_samples[0:10] - randn_true)) + test.assertTrue(err < 1e-04) + + @wp.kernel def sample_cdf_kernel(kernel_seed: int, cdf: wp.array(dtype=float), samples: wp.array(dtype=int)): tid = wp.tid() @@ -273,6 +313,7 @@ class TestRand(unittest.TestCase): add_function_test(TestRand, "test_rand", test_rand, devices=devices) +add_function_test(TestRand, "test_randn", test_randn, devices=devices) add_function_test(TestRand, "test_sample_cdf", test_sample_cdf, devices=devices) add_function_test(TestRand, "test_sampling_methods", test_sampling_methods, devices=devices) add_function_test(TestRand, "test_poisson", test_poisson, devices=devices)