Skip to content

Commit 59722a4

Browse files
committed
feat: improve fgn cuda impl
1 parent 1cd8dea commit 59722a4

9 files changed

Lines changed: 32 additions & 15 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ time = { version = "0.3.36", features = [
6969
tokio-test = "0.4.4"
7070
tracing = "0.1.40"
7171
tracing-test = "0.2.5"
72-
wide = "0.8.2"
72+
wide = "1.1.0"
7373
yahoo_finance_api = { version = "2.3.0", optional = true }
7474
arc-swap = "1.7.1"
7575

src/stochastic/cuda/fgn.cu

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ __global__ void scale_and_copy_to_output(
3737
int n,
3838
int m,
3939
int offset,
40-
float hurst,
41-
float t)
40+
float scale)
4241
{
4342
int out_size = n - offset;
4443
int tid = blockIdx.x * blockDim.x + threadIdx.x;
@@ -47,7 +46,6 @@ __global__ void scale_and_copy_to_output(
4746
int traj_id = tid / out_size;
4847
int idx = tid % out_size;
4948
int data_idx = traj_id * (2 * n) + (idx + 1);
50-
float scale = powf((float)n, -hurst) * powf(t, hurst);
5149
d_output[tid] = d_data[data_idx].x * scale;
5250
}
5351

@@ -57,8 +55,7 @@ extern "C" EXPORT void fgn_kernel(
5755
int n,
5856
int m,
5957
int offset,
60-
float hurst,
61-
float t,
58+
float scale,
6259
unsigned long seed)
6360
{
6461
int traj_size = 2 * n;
@@ -83,8 +80,8 @@ extern "C" EXPORT void fgn_kernel(
8380
int totalThreads = m * out_size;
8481
int blockSize = 512;
8582
int gridSize = (totalThreads + blockSize - 1) / blockSize;
86-
scale_and_copy_to_output<<<gridSize, blockSize>>>(d_data, d_output, n, m, offset, hurst, t);
83+
scale_and_copy_to_output<<<gridSize, blockSize>>>(d_data, d_output, n, m, offset, scale);
8784
cudaDeviceSynchronize();
8885
}
8986
cudaFree(d_data);
90-
}
87+
}
-827 Bytes
Binary file not shown.
-1.84 KB
Binary file not shown.
527 KB
Binary file not shown.
-73 KB
Binary file not shown.
30 Bytes
Binary file not shown.
0 Bytes
Binary file not shown.

src/stochastic/noise/fgn.rs

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,11 @@ impl SamplingExt<f64> for FGN<f64> {
104104

105105
#[cfg(feature = "cuda")]
106106
fn sample_cuda(&self) -> Result<Either<Array1<f64>, Array2<f64>>> {
107+
// nvcc -O3 -use_fast_math -shared fgn.cu -o ./fgn_linux/libfgn.so -Xcompiler -fPIC -lcufft -lcurand
107108
// nvcc -shared -Xcompiler -fPIC fgn.cu -o libfgn.so -lcufft // ELF header error
108109
// nvcc -shared -o libfgn.so fgn.cu -Xcompiler -fPIC
109110
// nvcc -O3 -use_fast_math -o libfgn.so fgn.cu -Xcompiler -fPIC
110-
// nvcc -shared fgn.cu -o fgn.dll -lcufft
111+
// nvcc -shared fgn.cu -o ./fgn_windows/fgn.dll -lcufft
111112
use std::ffi::c_void;
112113

113114
use cudarc::driver::{CudaDevice, DevicePtr, DevicePtrMut, DeviceRepr};
@@ -133,8 +134,7 @@ impl SamplingExt<f64> for FGN<f64> {
133134
/* n: */ i32,
134135
/* m: */ i32,
135136
/* offset: */ i32,
136-
/* hurst: */ f32,
137-
/* t: */ f32,
137+
/* scale: */ f32,
138138
/* seed: */ u64,
139139
);
140140

@@ -152,6 +152,7 @@ impl SamplingExt<f64> for FGN<f64> {
152152
let offset = self.offset;
153153
let hurst = self.hurst;
154154
let t = self.t.unwrap_or(1.0);
155+
let scale = (n as f32).powf(-(hurst as f32)) * (t as f32).powf(hurst as f32);
155156
let mut rng = rand::thread_rng();
156157
let seed: u64 = rng.gen();
157158

@@ -173,8 +174,7 @@ impl SamplingExt<f64> for FGN<f64> {
173174
n as i32,
174175
m as i32,
175176
offset as i32,
176-
hurst as f32,
177-
t as f32,
177+
scale,
178178
seed,
179179
);
180180
}
@@ -359,21 +359,41 @@ mod tests {
359359
let fgn = fbm.sample_cuda().unwrap();
360360
let fgn = fgn.left().unwrap();
361361
plot_1d!(fgn, "Fractional Brownian Motion (H = 0.7)");
362+
use crate::plot_2d;
363+
364+
let fgn = FGN::<f64>::new(0.7, 500, Some(1.0), Some(1));
365+
let fgn = fgn.sample_cuda().unwrap();
366+
let fgn_left = fgn.left().unwrap();
367+
plot_1d!(fgn_left, "Fractional Brownian Motion (H = 0.7)");
362368
let mut path = Array1::<f64>::zeros(500);
363369
for i in 1..500 {
364-
path[i] += path[i - 1] + fgn[i];
370+
path[i] += path[i - 1] + fgn_left[i];
365371
}
366372
plot_1d!(path, "Fractional Brownian Motion (H = 0.7)");
367373

374+
let fgn = FGN::<f64>::new(0.7, 5000, Some(1.0), Some(10000));
368375
let start = std::time::Instant::now();
369376
let _ = fbm.sample_cuda();
377+
let res = fgn.sample_cuda().unwrap();
370378
let end = start.elapsed().as_millis();
371379
tracing::info!("10000 fgn generated on cuda in: {end}");
380+
// slice first 2 rows
381+
let paths = res.right().unwrap();
382+
let paths = paths.slice(s![..2, ..]);
383+
plot_2d!(paths.row(0), "Path 1", paths.row(1), "Path 2");
384+
let mut fbm1 = Array1::<f64>::zeros(5000);
385+
let mut fbm2 = Array1::<f64>::zeros(5000);
386+
for i in 1..5000 {
387+
fbm1[i] += fbm1[i - 1] + paths.row(0)[i];
388+
fbm2[i] += fbm2[i - 1] + paths.row(1)[i];
389+
}
390+
plot_2d!(fbm1, "FBM Path 1", fbm2, "FBM Path 2");
372391

373392
let start = std::time::Instant::now();
374393
let _ = fbm.sample_par();
394+
let _ = fgn.sample_par();
375395
let end = start.elapsed().as_millis();
376-
tracing::info!("10000 fgn generated on cuda in: {end}");
396+
tracing::info!("10000 fgn generated on cpu in: {end}");
377397
}
378398

379399
#[test]

0 commit comments

Comments
 (0)