Skip to content

Commit 3209b9f

Browse files
committed
Fixed multicore for z_bind
1 parent c7508d3 commit 3209b9f

File tree

3 files changed

+96
-73
lines changed

3 files changed

+96
-73
lines changed

spartan_parallel/src/custom_dense_mlpoly.rs

Lines changed: 73 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
use std::cmp::min;
33

44
use crate::dense_mlpoly::DensePolynomial;
5+
use crate::math::Math;
56
use crate::scalar::SpartanExtensionField;
67
use rayon::prelude::*;
78

89
const MODE_P: usize = 1;
910
const MODE_Q: usize = 2;
1011
const MODE_W: usize = 3;
1112
const MODE_X: usize = 4;
12-
const NUM_MULTI_THREAD_CORES: usize = 32;
1313

1414
// Customized Dense ML Polynomials for Data-Parallelism
1515
// These Dense ML Polys are aimed for space-efficiency by removing the 0s for invalid (p, q, w, x) quadruple
@@ -30,6 +30,40 @@ pub struct DensePolynomialPqx<S: SpartanExtensionField> {
3030
// The same applies to X
3131
}
3232

33+
fn fold_rq<S: SpartanExtensionField>(proofs: &mut [Vec<Vec<S>>], r_q: &[S], step: usize, mut q: usize, w: usize, x: usize) {
34+
for r in r_q {
35+
let r1 = S::field_one() - r.clone();
36+
let r2 = r.clone();
37+
38+
q = q.div_ceil(2);
39+
(0..q).for_each(|q| {
40+
(0..w).for_each(|w| {
41+
(0..x).for_each(|x| {
42+
proofs[q * step][w][x] = r1 * proofs[2 * q * step][w][x] + r2 * proofs[(2 * q + 1) * step][w][x];
43+
});
44+
});
45+
});
46+
}
47+
48+
/*
49+
if lvl > final_lvl {
50+
fold_rq(proofs, r_q, 2 * idx, step, lvl - 1, final_lvl, w, x);
51+
fold_rq(proofs, r_q, 2 * idx + step, step, lvl - 1, final_lvl, w, x);
52+
53+
let r1 = S::field_one() - r_q[lvl - 1];
54+
let r2 = r_q[lvl - 1];
55+
56+
(0..w).for_each(|w| {
57+
(0..x).for_each(|x| {
58+
proofs[idx][w][x] = r1 * proofs[idx * 2][w][x] + r2 * proofs[idx * 2 + step][w][x];
59+
});
60+
});
61+
} else {
62+
// base level. do nothing
63+
}
64+
*/
65+
}
66+
3367
impl<S: SpartanExtensionField> DensePolynomialPqx<S> {
3468
// Assume z_mat is of form (p, q_rev, x_rev), construct DensePoly
3569
pub fn new(
@@ -207,7 +241,7 @@ impl<S: SpartanExtensionField> DensePolynomialPqx<S> {
207241
}
208242

209243
// Bound the entire "q" section to r_q in reverse
210-
pub fn bound_poly_vars_rq(
244+
pub fn bound_poly_vars_rq_parallel(
211245
&mut self,
212246
r_q: &[S],
213247
) {
@@ -218,50 +252,47 @@ impl<S: SpartanExtensionField> DensePolynomialPqx<S> {
218252
.enumerate()
219253
.map(|(p, mut inst)| {
220254
let num_proofs = self.num_proofs[p];
221-
let dist_size = num_proofs / min(num_proofs, NUM_MULTI_THREAD_CORES); // distributed number of proofs on each thread
255+
let dist_size = num_proofs / min(num_proofs, rayon::current_num_threads().next_power_of_two()); // distributed number of proofs on each thread
222256
let num_threads = num_proofs / dist_size;
223257

224258
// To perform rigorous parallelism, both num_proofs and # threads must be powers of 2
225259
// # threads must fully divide num_proofs for even distribution
226-
assert!(num_proofs & (num_proofs - 1) == 0);
227-
assert!(num_threads & (num_threads - 1) == 0);
260+
assert_eq!(num_proofs, num_proofs.next_power_of_two());
261+
assert_eq!(num_threads, num_threads.next_power_of_two());
228262

229263
// Determine parallelism levels
230-
let levels = num_proofs.trailing_zeros() as usize; // total layers
231-
let sub_levels = dist_size.trailing_zeros() as usize; // parallelism layers
232-
let final_levels = num_threads.trailing_zeros() as usize; // single core final layers
264+
let levels = num_proofs.log_2(); // total layers
265+
let sub_levels = dist_size.log_2(); // parallel layers
266+
let final_levels = num_threads.log_2(); // single core final layers
233267
let left_over_q_len = r_q.len() - levels; // if r_q.len() > log2(num_proofs)
234268

235269
// single proof matrix dimension W x X
236270
let num_witness_secs = min(self.num_witness_secs, inst[0].len());
237271
let num_inputs = self.num_inputs[p];
238-
272+
273+
// Divide rq into sub, final, and left_over
274+
let sub_rq = &r_q[0..sub_levels];
275+
let final_rq = &r_q[sub_levels..levels];
276+
let left_over_rq = &r_q[(r_q.len() - left_over_q_len)..r_q.len()];
277+
239278
if sub_levels > 0 {
240-
let thread_split_inst = (0..num_threads)
241-
.map(|_| {
242-
inst.split_off(inst.len() - dist_size)
279+
inst = inst
280+
.par_chunks_mut(dist_size)
281+
.map(|chunk| {
282+
fold_rq(chunk, sub_rq, 1, dist_size, num_witness_secs, num_inputs);
283+
chunk.to_vec()
243284
})
244-
.rev()
245-
.collect::<Vec<Vec<Vec<Vec<S>>>>>();
246-
247-
inst = thread_split_inst
248-
.into_par_iter()
249-
.map(|mut chunk| {
250-
fold(&mut chunk, r_q, 0, 1, sub_levels, 0, num_witness_secs, num_inputs);
251-
chunk
252-
})
253-
.collect::<Vec<Vec<Vec<Vec<S>>>>>()
254-
.into_iter().flatten().collect()
285+
.flatten().collect()
255286
}
256287

257288
if final_levels > 0 {
258289
// aggregate the final result from sub-threads outputs using a single core
259-
fold(&mut inst, r_q, 0, dist_size, final_levels + sub_levels, sub_levels, num_witness_secs, num_inputs);
290+
fold_rq(&mut inst, final_rq, dist_size, num_threads, num_witness_secs, num_inputs);
260291
}
261292

262293
if left_over_q_len > 0 {
263294
// the series of random challenges exceeds the total number of variables
264-
let c = r_q[(r_q.len() - left_over_q_len)..r_q.len()].iter().fold(S::field_one(), |acc, n| acc * (S::field_one() - *n));
295+
let c = left_over_rq.into_iter().fold(S::field_one(), |acc, n| acc * (S::field_one() - *n));
265296
for w in 0..inst[0].len() {
266297
for x in 0..inst[0][0].len() {
267298
inst[0][w][x] *= c;
@@ -275,6 +306,23 @@ impl<S: SpartanExtensionField> DensePolynomialPqx<S> {
275306
self.max_num_proofs /= 2usize.pow(r_q.len() as u32);
276307
}
277308

309+
// Bound the entire "q" section to r_q in reverse
310+
// Must occur after r_q's are bounded
311+
pub fn bound_poly_vars_rq(&mut self,
312+
r_q: &[S],
313+
) {
314+
let mut count = 0;
315+
for r in r_q {
316+
self.bound_poly_q(r);
317+
count += 1;
318+
if count == 10 {
319+
for p in 0..self.Z.len() {
320+
println!("SINGLE CORE: P: {}, INST0: {:?}", p, self.Z[p][0][0][0]);
321+
}
322+
}
323+
}
324+
}
325+
278326
// Bound the entire "w" section to r_w in reverse
279327
pub fn bound_poly_vars_rw(&mut self,
280328
r_w: &[S],
@@ -327,22 +375,4 @@ impl<S: SpartanExtensionField> DensePolynomialPqx<S> {
327375
}
328376
DensePolynomial::new(Z_poly)
329377
}
330-
}
331-
332-
fn fold<S: SpartanExtensionField>(proofs: &mut Vec<Vec<Vec<S>>>, r_q: &[S], idx: usize, step: usize, lvl: usize, final_lvl: usize, w: usize, x: usize) {
333-
if lvl > final_lvl {
334-
fold(proofs, r_q, 2 * idx, step, lvl - 1, final_lvl, w, x);
335-
fold(proofs, r_q, 2 * idx + step, step, lvl - 1, final_lvl, w, x);
336-
337-
let r1 = S::field_one() - r_q[lvl - 1];
338-
let r2 = r_q[lvl - 1];
339-
340-
(0..w).for_each(|w| {
341-
(0..x).for_each(|x| {
342-
proofs[idx][w][x] = r1 * proofs[idx * 2][w][x] + r2 * proofs[idx * 2 + step][w][x];
343-
});
344-
});
345-
} else {
346-
// base level. do nothing
347-
}
348378
}

spartan_parallel/src/r1csproof.rs

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -184,34 +184,25 @@ impl<S: SpartanExtensionField + Send + Sync> R1CSProof<S> {
184184
// append input to variables to create a single vector z
185185
let timer_tmp = Timer::new("prove_z_mat_gen");
186186

187-
let z_mat = (0..num_instances)
188-
.into_par_iter()
189-
.map(|p| {
190-
(0..num_proofs[p])
191-
.into_par_iter()
192-
.map(|q| {
193-
(0..witness_secs.len())
194-
.map(|w| {
195-
let ws = witness_secs[w];
196-
let p_w = if ws.w_mat.len() == 1 { 0 } else { p };
197-
let q_w = if ws.w_mat[p_w].len() == 1 { 0 } else { q };
198-
199-
let r_w = if ws.num_inputs[p_w] < num_inputs[p] {
200-
let padding = std::iter::repeat(S::field_zero()).take(num_inputs[p] - ws.num_inputs[p_w]).collect::<Vec<S>>();
201-
let mut r = ws.w_mat[p_w][q_w].clone();
202-
r.extend(padding);
203-
r
204-
} else {
205-
ws.w_mat[p_w][q_w].iter().take(num_inputs[p]).cloned().collect::<Vec<S>>()
206-
};
207-
208-
r_w
209-
})
210-
.collect::<Vec<Vec<S>>>()
211-
})
212-
.collect::<Vec<Vec<Vec<S>>>>()
213-
})
214-
.collect::<Vec<Vec<Vec<Vec<S>>>>>();
187+
let z_mat = (0..num_instances).map(|p| {
188+
(0..num_proofs[p]).into_par_iter().map(|q| {
189+
(0..witness_secs.len()).map(|w| {
190+
let ws = witness_secs[w];
191+
let p_w = if ws.w_mat.len() == 1 { 0 } else { p };
192+
let q_w = if ws.w_mat[p_w].len() == 1 { 0 } else { q };
193+
194+
let r_w = if ws.num_inputs[p_w] < num_inputs[p] {
195+
let padding = std::iter::repeat(S::field_zero()).take(num_inputs[p] - ws.num_inputs[p_w]).collect::<Vec<S>>();
196+
let mut r = ws.w_mat[p_w][q_w].clone();
197+
r.extend(padding);
198+
r
199+
} else {
200+
ws.w_mat[p_w][q_w].iter().take(num_inputs[p]).cloned().collect::<Vec<S>>()
201+
};
202+
r_w
203+
}).collect::<Vec<Vec<S>>>()
204+
}).collect::<Vec<Vec<Vec<S>>>>()
205+
}).collect::<Vec<Vec<Vec<Vec<S>>>>>();
215206
timer_tmp.stop();
216207

217208
// derive the verifier's challenge \tau
@@ -346,13 +337,14 @@ impl<S: SpartanExtensionField + Send + Sync> R1CSProof<S> {
346337
);
347338
timer_tmp.stop();
348339
let timer_tmp = Timer::new("prove_z_bind");
349-
Z_poly.bound_poly_vars_rq(&rq_rev);
340+
Z_poly.bound_poly_vars_rq_parallel(&rq_rev);
350341
timer_tmp.stop();
351342

352343
// An Eq function to match p with rp
353344
let mut eq_p_rp_poly = DensePolynomial::new(EqPolynomial::new(rp).evals());
354345

355346
// Sumcheck 2: (rA + rB + rC) * Z * eq(p) = e
347+
let timer_tmp = Timer::new("prove_sum_check");
356348
let (sc_proof_phase2, ry_rev, _claims_phase2) = R1CSProof::prove_phase_two(
357349
num_rounds_y + num_rounds_w + num_rounds_p,
358350
num_rounds_y,
@@ -367,6 +359,7 @@ impl<S: SpartanExtensionField + Send + Sync> R1CSProof<S> {
367359
&mut Z_poly,
368360
transcript,
369361
);
362+
timer_tmp.stop();
370363
timer_sc_proof_phase2.stop();
371364

372365
// Separate ry into rp, rw, and ry
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
const u32 REPETITION = 1000
1+
const u32 REPETITION = 10000

0 commit comments

Comments
 (0)