Skip to content

Commit

Permalink
cherry-pick PR630 8433686
Browse files Browse the repository at this point in the history
  • Loading branch information
lu1and10 committed Feb 12, 2025
1 parent 1fea254 commit bd0ed1b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 12 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ List of features / changes made / release notes, in reverse chronological order.
If not stated, FINUFFT is assumed (cuFINUFFT <=1.3 is listed separately).


V 2.3.2 (2/12/25) support release

* Increase cufinufft 1d cmake test size and fix 1d spreader subprob sm kernel.

V 2.3.1 (11/25/24, minor support release)

* Support and docs for opts.gpu_spreadinterponly=1 for MRI "density compensation
Expand Down
19 changes: 12 additions & 7 deletions src/cuda/1d/spreadinterp1d.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ __global__ void spread_1d_subprob(
const int nupts = min(maxsubprobsize, bin_size[bidx] - binsubp_idx * maxsubprobsize);
const int xoffset = (bidx % nbinx) * bin_size_x;
const auto ns_2 = (ns + 1) / 2;
const int N = bin_size_x + 2 * ns_2;
const auto rounded_ns = ns_2 * 2;
const int N = bin_size_x + rounded_ns;

// dynamic stack allocation
#if ALLOCA_SUPPORTED
Expand All @@ -124,18 +125,22 @@ __global__ void spread_1d_subprob(
__syncthreads();

for (auto i = threadIdx.x; i < nupts; i += blockDim.x) {
const auto idx = ptstart + i;
const auto x_rescaled = fold_rescale(x[idxnupts[idx]], nf1);
const auto cnow = c[idxnupts[idx]];
const auto [xstart, xend] = interval(ns, x_rescaled);
const T x1 = T(xstart + xoffset) - x_rescaled;
const auto idx = ptstart + i;
const auto x_rescaled = fold_rescale(x[idxnupts[idx]], nf1);
const auto cnow = c[idxnupts[idx]];
auto [xstart, xend] = interval(ns, x_rescaled);

const T x1 = T(xstart) - x_rescaled;
xstart -= xoffset;
xend -= xoffset;

if constexpr (KEREVALMETH == 1)
eval_kernel_vec_horner(ker1, x1, ns, sigma);
else
eval_kernel_vec(ker1, x1, ns, es_c, es_beta);
for (int xx = xstart; xx <= xend; xx++) {
const auto ix = xx + ns_2;
if (ix >= (bin_size_x + ns_2) || ix < 0) break;
if (ix >= (bin_size_x + rounded_ns) || ix < 0) break;
const cuda_complex<T> result{cnow.x * ker1[xx - xstart],
cnow.y * ker1[xx - xstart]};
atomicAddComplexShared<T>(fwshared + ix, result);
Expand Down
10 changes: 5 additions & 5 deletions test/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,24 @@ foreach(srcfile ${test_src})
endif()
target_compile_features(${executable} PUBLIC cxx_std_17)
set_target_properties(
${executable} PROPERTIES LINKER_LANGUAGE CUDA CUDA_ARCHITECTURES
"${FINUFFT_CUDA_ARCHITECTURES}")
${executable} PROPERTIES LINKER_LANGUAGE CUDA
CUDA_ARCHITECTURES "${FINUFFT_CUDA_ARCHITECTURES}")
message(STATUS "Adding test ${executable}"
" with CUDA_ARCHITECTURES=${FINUFFT_CUDA_ARCHITECTURES}"
" and INCLUDE=${CUFINUFFT_INCLUDE_DIRS}")
endforeach()

function(add_tests PREC REQ_TOL CHECK_TOL UPSAMP)
add_test(NAME cufinufft1d1_test_GM_${PREC}_${UPSAMP}
COMMAND cufinufft1d_test 1 1 1e2 2e2 ${REQ_TOL} ${CHECK_TOL} ${PREC}
COMMAND cufinufft1d_test 1 1 2e3 4e3 ${REQ_TOL} ${CHECK_TOL} ${PREC}
${UPSAMP})

add_test(NAME cufinufft1d1_test_SM_${PREC}_${UPSAMP}
COMMAND cufinufft1d_test 2 1 1e2 2e2 ${REQ_TOL} ${CHECK_TOL} ${PREC}
COMMAND cufinufft1d_test 2 1 2e3 4e3 ${REQ_TOL} ${CHECK_TOL} ${PREC}
${UPSAMP})

add_test(NAME cufinufft1d2_test_GM_${PREC}_${UPSAMP}
COMMAND cufinufft1d_test 1 2 1e2 2e2 ${REQ_TOL} ${CHECK_TOL} ${PREC}
COMMAND cufinufft1d_test 1 2 2e3 4e3 ${REQ_TOL} ${CHECK_TOL} ${PREC}
${UPSAMP})

add_test(NAME cufinufft2d1_test_GM_${PREC}_${UPSAMP}
Expand Down

0 comments on commit bd0ed1b

Please sign in to comment.