Skip to content

Commit

Permalink
feat: allow generate_permutation_polynomials calls for 3 columns input
Browse files Browse the repository at this point in the history
  • Loading branch information
robik75 committed Sep 26, 2024
1 parent bd57a9f commit 0c2ed06
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
10 changes: 8 additions & 2 deletions src/pn.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ cudaError_t generate_permutation_polynomials(const generate_permutation_polynomi
cudaMemPool_t pool = cfg.mem_pool;
cudaStream_t stream = cfg.stream;
unsigned int columns_count = cfg.columns_count;
assert(columns_count == 4);
unsigned int log_rows_count = cfg.log_rows_count;
const unsigned cells_count = columns_count << log_rows_count;
const unsigned bits_count = log2_ceiling(columns_count) + log_rows_count;
Expand All @@ -31,7 +30,14 @@ cudaError_t generate_permutation_polynomials(const generate_permutation_polynomi
unsigned_ints sorted_values;

HANDLE_CUDA_ERROR(allocate(unsorted_keys, cells_count, pool, stream));
HANDLE_CUDA_ERROR(transpose<4>(unsorted_keys, cfg.indexes, log_rows_count, stream));
switch (columns_count) {
case 3:
HANDLE_CUDA_ERROR(transpose<3>(unsorted_keys, cfg.indexes, log_rows_count, stream));
case 4:
HANDLE_CUDA_ERROR(transpose<4>(unsorted_keys, cfg.indexes, log_rows_count, stream));
default:
assert(columns_count == 3 || columns_count == 4);
}
HANDLE_CUDA_ERROR(allocate(unsorted_values, cells_count, pool, stream));
HANDLE_CUDA_ERROR(fill_transposed_range(unsorted_values, columns_count, log_rows_count, stream));
HANDLE_CUDA_ERROR(allocate(sorted_keys, cells_count, pool, stream));
Expand Down
1 change: 1 addition & 0 deletions src/pn_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ template <unsigned COL_COUNT> cudaError_t transpose(unsigned *dst, const unsigne
return cudaGetLastError();
}

template cudaError_t transpose<3>(unsigned *dst, const unsigned *src, unsigned log_rows_count, cudaStream_t stream);
template cudaError_t transpose<4>(unsigned *dst, const unsigned *src, unsigned log_rows_count, cudaStream_t stream);
#undef BLOCK_SIZE

Expand Down

0 comments on commit 0c2ed06

Please sign in to comment.