Skip to content

Commit

Permalink
feat: 3-columns permutation matrix (#17)
Browse files Browse the repository at this point in the history
# What ❔

This PR adds the the possibility to generate 3-column permutation
matrix.

## Why ❔

This feature is needed for the fflonk prover implementation.
  • Loading branch information
robik75 authored Nov 12, 2024
1 parent c66a9ba commit 971ab2a
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
12 changes: 10 additions & 2 deletions src/pn.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ cudaError_t generate_permutation_polynomials(const generate_permutation_polynomi
cudaMemPool_t pool = cfg.mem_pool;
cudaStream_t stream = cfg.stream;
unsigned int columns_count = cfg.columns_count;
assert(columns_count == 4);
unsigned int log_rows_count = cfg.log_rows_count;
const unsigned cells_count = columns_count << log_rows_count;
const unsigned bits_count = log2_ceiling(columns_count) + log_rows_count;
Expand All @@ -31,7 +30,16 @@ cudaError_t generate_permutation_polynomials(const generate_permutation_polynomi
unsigned_ints sorted_values;

HANDLE_CUDA_ERROR(allocate(unsorted_keys, cells_count, pool, stream));
HANDLE_CUDA_ERROR(transpose<4>(unsorted_keys, cfg.indexes, log_rows_count, stream));
switch (columns_count) {
case 3:
HANDLE_CUDA_ERROR(transpose<3>(unsorted_keys, cfg.indexes, log_rows_count, stream));
break;
case 4:
HANDLE_CUDA_ERROR(transpose<4>(unsorted_keys, cfg.indexes, log_rows_count, stream));
break;
default:
assert(columns_count == 3 || columns_count == 4);
}
HANDLE_CUDA_ERROR(allocate(unsorted_values, cells_count, pool, stream));
HANDLE_CUDA_ERROR(fill_transposed_range(unsorted_values, columns_count, log_rows_count, stream));
HANDLE_CUDA_ERROR(allocate(sorted_keys, cells_count, pool, stream));
Expand Down
1 change: 1 addition & 0 deletions src/pn_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ template <unsigned COL_COUNT> cudaError_t transpose(unsigned *dst, const unsigne
return cudaGetLastError();
}

template cudaError_t transpose<3>(unsigned *dst, const unsigned *src, unsigned log_rows_count, cudaStream_t stream);
template cudaError_t transpose<4>(unsigned *dst, const unsigned *src, unsigned log_rows_count, cudaStream_t stream);
#undef BLOCK_SIZE

Expand Down

0 comments on commit 971ab2a

Please sign in to comment.