16
16
#define POPCOUNT (x ) (__builtin_popcountl((unsigned long)(x))) /* TODO make these portable */
17
17
#define CTZ (x ) (__builtin_ctzl((unsigned long)(x)))
18
18
19
+ /* Number of scalars that should remain at the end of a recursive proof. The paper
20
+ * uses 2, by reducing the scalars as far as possible. We stop one recursive step
21
+ * early, trading two points (L, R) for two scalars, which reduces verification
22
+ * and prover cost.
23
+ *
24
+ * For the most part, all comments assume this value is at 4.
25
+ */
19
26
#define IP_AB_SCALARS 4
20
27
28
+ /* Bulletproof inner products consist of the four scalars and `2[log2(n) - 1]` points
29
+ * `a_1`, `a_2`, `b_1`, `b_2`, `L_i` and `R_i`, where `i` ranges from 0 to `log2(n)-1`.
30
+ *
31
+ * The prover takes as input a point `P` and scalar `c`. It proves that there exist
32
+ * scalars `a_i`, `b_i` for `i` ranging from 0 to `n-1`, such that
33
+ * `P = sum_i [a_i G_i + b_i H_i]` and `<{a_i}, {b_i}> = c`.
34
+ * where `G_i` and `H_i` are standard NUMS generators.
35
+ *
36
+ * Verification of the proof comes down to a single multiexponentiation of the form
37
+ *
38
+ * P + (c - a_1*b_1 - a_2*b_2)*x*G
39
+ * - sum_{i=1}^n [s'_i*G_i + s_i*H_i]
40
+ * + sum_{i=1}^log2(n) [x_i^-2 L_i + x_i^2 R_i]
41
+ *
42
+ * which will equal infinity if the inner product proof is correct. Here
43
+ * - `G` is the standard secp generator
44
+ * - `x` is a hash of `commit` and is used to rerandomize `c`. See Protocol 2 vs Protocol 1 in the paper.
45
+ * - `x_i = H(x_{i-1} || L_i || R_i)`, where `x_{-1}` is passed through the `commit` variable and
46
+ * must be a commitment to `P` and `c`.
47
+ * - `s_i` and `s'_i` are computed as follows.
48
+ *
49
+ * Letting `i_j` be defined as 1 if `i & 2^j == 1`, and -1 otherwise,
50
+ * - For `i` from `1` to `n/2`, `s'_i = a_1 * prod_{j=1}^log2(n) x_j^i_j`
51
+ * - For `i` from `n/2 + 1` to `n`, `s'_i = a_2 * prod_{j=1}^log2(n) x_j^i_j`
52
+ * - For `i` from `1` to `n/2`, `s_i = b_1 * prod_{j=1}^log2(n) x_j^-i_j`
53
+ * - For `i` from `n/2 + 1` to `n`, `s_i = b_2 * prod_{j=1}^log2(n) x_j^-i_j`
54
+ *
55
+ * Observe that these can be computed iteratively by labelling the coefficients `s_i` for `i`
56
+ * from `0` to `2n-1` rather than 1-indexing and distinguishing between `s_i'`s and `s_i`s:
57
+ *
58
+ * Start with `s_0 = a_1 * prod_{j=1}^log2(n) x_j^-1`, then for later `s_i`s,
59
+ * - For `i` from `1` to `n/2 - 1`, multiply some earlier `s'_j` by some `x_k^2`
60
+ * - For `i = n/2`, multiply `s_{i-1} by `a_2/a_1`.
61
+ * - For `i` from `n/2 + 1` to `n - 1`, multiply some earlier `s'_j` by some `x_k^2`
62
+ * - For `i = n`, multiply `s'_{i-1}` by `b_1/a_2` to get `s_i`.
63
+ * - For `i` from `n + 1` to `3n/2 - 1`, multiply some earlier `s_j` by some `x_k^-2`
64
+ * - For `i = 3n/2`, multiply `s_{i-1}` by `b_2/b_1`.
65
+ * - For `i` from `3n/2 + 1` to `2n - 1`, multiply some earlier `s_j` by some `x_k^-2`
66
+ * where of course, the indices `j` and `k` must be chosen carefully.
67
+ *
68
+ * The bulk of `secp256k1_bulletproof_innerproduct_vfy_ecmult_callback` involves computing
69
+ * these indices, given `a_2/a_1`, `b_1/a_1`, `b_2/b_1`, and the `x_k^2`s as input. It
70
+ * computes `x_k^-2` as a side-effect of its other computation.
71
+ */
72
+
21
73
typedef int (secp256k1_bulletproof_vfy_callback )(secp256k1_scalar * sc , secp256k1_ge * pt , secp256k1_scalar * randomizer , size_t idx , void * data );
22
74
23
75
/* used by callers to wrap a proof with surrounding context */
@@ -67,111 +119,116 @@ size_t secp256k1_bulletproof_innerproduct_proof_length(size_t n) {
67
119
}
68
120
}
69
121
70
- /* Bulletproof rangeproof verification comes down to a single multiexponentiation of the form
71
- *
72
- * P + (c-a*b)*x*G - sum_{i=1}^n [a*s'_i*G_i + b*s_i*H_i] + sum_{i=1}^log2(n) [x_i^-2 L_i + x_i^2 R_i
73
- *
74
- * which will equal infinity if the rangeproof is correct. Here
75
- * - `G_i` and `H_i` are standard NUMS generators. `G` is the standard secp256k1 generator.
76
- * - `P` and `c` are inputs to the proof, which claims that there exist `a_i` and `b_i`, `i` ranging
77
- * from 0 to `n-1`, such that `P = sum_i [a_i G_i + b_i H_i]` and that `<{a_i}, {b_i}> = c`.
78
- * - `a`, `b`, `L_i` and `R_i`are auxillary components of the proof, where `i` ranges from 0 to `log2(n)-1`.
79
- * - `x_i = H(x_{i-1} || L_i || R_i)`, where `x_{-1}` is passed through the `commit` variable and
80
- * must be a commitment to `P` and `c`.
81
- * - `x` is a hash of `commit` and is used to rerandomize `c`. See Protocol 2 vs Protocol 1 in the paper.
82
- * - `s_i` and `s'_i` are computed as follows.
83
- *
84
- * For each `i` between 0 and `n-1` inclusive, let `b_{ij}` be -1 (1) if the `j`th bit of `i` is zero (one).
85
- * Here `j` ranges from 0 to `log2(n)-1`. Then for each such `i` we define
86
- * - `s_i = prod_j x_j^{b_{ij}}`
87
- * - `s'_i = 1/s_i`
88
- *
89
- * Alternately we can define `s_i` and `s'_i` recursively as follows:
90
- * - `s_0 = s`_{n - 1} = 1 / prod_j x_j`
91
- * - `s_i = s'_{n - 1 - i} = s_{i - 2^j} * x_j^2` where `j = i & (i - 1)` is `i` with its least significant 1 set to 0.
92
- *
93
- * Our ecmult_multi function takes `(c - a*b)*x` directly and multiplies this by `G`. For every other
122
+ /* Our ecmult_multi function takes `(c - a*b)*x` directly and multiplies this by `G`. For every other
94
123
* (scalar, point) pair it calls the following callback function, which takes an index and outputs a
95
124
* pair. The function therefore has three regimes:
96
125
*
97
- * For the first `2n` invocations, it alternately returns `(s'_{n - i}, G_{n - i})` and `(s_i, H_i)`,
98
- * where `i` is `floor(idx / 2)`. The reason for the funny indexing is that we use the above recursive
99
- * definition of `s_i` and `s'_i` which produces each element with only a single scalar multiplication,
100
- * but in this mixed order. (We start with an array of `x_j^2` for each `x_j`.)
101
- *
102
- * As a side-effect, whenever `n - i = 2^j` for some `j`, `s_i = x_j^{-1} * prod_{j' != j} x_{j'}`,
103
- * so `x_j^{-2} = s_i*s_0`. Therefore we compute an array of inverse squares during this computation,
104
- * using only one multiplication per. We will need it in the following step.
105
- *
106
- * For the next `2*log2(n)` invocations it alternately returns `(x_i^-2, L_i)` and `(x_i^2, R_i)`
107
- * where `i` is `idx - 2*n`.
126
+ * For the first `n` invocations, it returns `(s'_i, G_i)` for `i` from 1 to `n`.
127
+ * For the next `n` invocations, it returns `(s_i, H_i)` for `i` from 1 to `n`.
128
+ * For the next `2*log2(n)` invocations it returns `(x_i^-2, L_i)` and `(x_i^2, R_i)`,
129
+ * alternating between the two choices, for `i` from 1 to `log2(n)`.
108
130
*
109
131
* For the remaining invocations it passes through to another callback, `rangeproof_cb_data` which
110
132
* computes `P`. The reason for this is that in practice `P` is usually defined by another multiexp
111
133
* rather than being a known point, and it is more efficient to compute one exponentiation.
112
134
*
135
+ * Inline we refer to the first `2n` coefficients as `s_i` for `i` from 0 to `2n-1`, since that
136
+ * is the more convenient indexing. In particular we describe (a) how the indices `j` and `k`,
137
+ * from the big comment block above, are chosen; and (b) when/how each `x_k^-2` is computed.
113
138
*/
114
-
115
- /* For the G and H generators, we choose the ith generator with a scalar computed from the
116
- * L/R hashes as follows: prod_{j=1}^m x_j^{e_j}, where each exponent e_j is either -1 or 1.
117
- * The choice directly maps to the bits of i: for the G generators, a 0 bit means e_j is 1
118
- * and a 1 bit means e_j is -1. For the H generators it is the opposite. Finally, each of the
119
- * G scalars is further multiplied by -a, while each of the H scalars is further multiplied
120
- * by -b.
121
- *
122
- * These scalars are computed starting from I, the inverse of the product of every x_j, which
123
- * is then selectively multiplied by x_j^2 for whichever j's are needed. As it turns out, by
124
- * caching logarithmically many scalars, this can always be done by multiplying one of the
125
- * cached values by a single x_j, rather than starting from I and doing multiple multiplications.
126
- */
127
-
128
139
static int secp256k1_bulletproof_innerproduct_vfy_ecmult_callback (secp256k1_scalar * sc , secp256k1_ge * pt , size_t idx , void * data ) {
129
140
secp256k1_bulletproof_innerproduct_vfy_ecmult_context * ctx = (secp256k1_bulletproof_innerproduct_vfy_ecmult_context * ) data ;
130
141
131
- /* First 2N points use the standard Gi, Hi generators, and the scalars can be aggregated across proofs */
142
+ /* First 2N points use the standard Gi, Hi generators, and the scalars can be aggregated across proofs.
143
+ * Inside this if clause, `idx` corresponds to the index `i` in the big comment, and runs from 0 to `2n-1`.
144
+ * Also `ctx->vec_len` corresponds to `n`. */
132
145
if (idx < 2 * ctx -> vec_len ) {
146
+ /* Number of `a` scalars in the proof (same as number of `b` scalars in the proof). Will
147
+ * be 2 except for very small proofs that have fewer than 2 scalars as input. */
133
148
const size_t grouping = ctx -> vec_len < IP_AB_SCALARS / 2 ? ctx -> vec_len : IP_AB_SCALARS / 2 ;
134
149
const size_t lg_grouping = secp256k1_floor_lg (grouping );
135
150
size_t i ;
136
- /* TODO zero this point when appropriate for non-2^n numbers of pairs */
151
+ VERIFY_CHECK (lg_grouping == 0 || lg_grouping == 1 ); /* TODO support higher IP_AB_SCALARS */
152
+
153
+ /* Determine whether we're multiplying by `G_i`s or `H_i`s. */
137
154
if (idx < ctx -> vec_len ) {
138
155
* pt = ctx -> geng [idx ];
139
156
} else {
140
157
* pt = ctx -> genh [idx - ctx -> vec_len ];
141
158
}
142
159
143
160
secp256k1_scalar_clear (sc );
161
+ /* Loop over all the different inner product proofs we might be doing at once. Since they
162
+ * share generators `G_i` and `H_i`, we compute all of their scalars at once and add them.
163
+ * For each proof we start with the "seed value" `ctx->proof[i].xcache[0]` (see next comment
164
+ * for its meaning) from which every other scalar derived. We expect the caller to have
165
+ * randomized this to ensure that this wanton addition cannot enable cancellation attacks.
166
+ */
144
167
for (i = 0 ; i < ctx -> n_proofs ; i ++ ) {
168
+ /* To recall from the introductory comment: most `s_i` values are computed by taking an
169
+ * earlier `s_j` value and multiplying it by some `x_k^2`.
170
+ *
171
+ * We now explain the index `j`: it is the largest number with one fewer 1-bits than `i`.
172
+ * Alternately, the most recently returned `s_j` where `j` has one fewer 1-bits than `i`.
173
+ *
174
+ * To ensure that `s_j` is available when we need it, on each iteration we define the
175
+ * variable `cache_idx` which simply counts the 1-bits in `i`; before returning `s_i`
176
+ * we store it in `ctx->proof[i].xcache[cache_idx]`. Then later, when we want "most
177
+ * recently returned `s_j` with one fewer 1-bits than `i`, it'll be sitting right
178
+ * there in `ctx->proof[i].xcache[cache_idx - 1]`.
179
+ *
180
+ * Note that `ctx->proof[i].xcache[0]` will always equal `-a_1 * prod_{i=1}^{n-1} x_i^-2`,
181
+ * and we expect the caller to have set this.
182
+ */
145
183
const size_t cache_idx = POPCOUNT (idx );
146
184
secp256k1_scalar term ;
147
185
VERIFY_CHECK (cache_idx < SECP256K1_BULLETPROOF_MAX_DEPTH );
148
- /* Compute the normal inner-product scalar.. . */
186
+ /* For the special case `cache_idx == 0` (which is true iff `idx == 0`) there is nothing to do . */
149
187
if (cache_idx > 0 ) {
188
+ /* Otherwise, check if this is one of the special indices where we transition from `a_1` to `a_2`,
189
+ * from `a_2` to `b_1`, or from `b_1` to `b_2`. (For small proofs there is only one transition,
190
+ * from `a` to `b`.) */
150
191
if (idx % (ctx -> vec_len / grouping ) == 0 ) {
151
192
const size_t abinv_idx = idx / (ctx -> vec_len / grouping ) - 1 ;
152
193
size_t prev_cache_idx ;
194
+ /* Check if it's the even specialer index where we're transitioning from `a`s to `b`s, from
195
+ * `G`s to `H`s, and from `x_k^2`s to `x_k^-2`s. In rangeproof and circuit applications,
196
+ * the caller secretly has a variable `y` such that `H_i` is really `y^-i H_i` for `i` ranging
197
+ * from 0 to `n-1`. Rather than forcing the caller to tweak every `H_i` herself, which would
198
+ * be very slow and prevent precomputation, we instead multiply our cached `x_k^-2` values
199
+ * by `y^(-2^k)` respectively, which will ultimately result in every `s_i` we return having
200
+ * been multiplied by `y^-i`.
201
+ *
202
+ * This is an underhanded trick but the result is that all `n` powers of `y^-i` show up
203
+ * in the right place, and we only need log-many scalar squarings and multiplications.
204
+ */
153
205
if (idx == ctx -> vec_len ) {
154
- /* Transition from G to H, a's to b's */
155
206
secp256k1_scalar yinvn = ctx -> proof [i ].proof -> yinv ;
156
207
size_t j ;
157
208
prev_cache_idx = POPCOUNT (idx - 1 );
158
209
for (j = 0 ; j < (size_t ) CTZ (idx ) - lg_grouping ; j ++ ) {
159
210
secp256k1_scalar_mul (& ctx -> proof [i ].xsqinvy [j ], & ctx -> proof [i ].xsqinv [j ], & yinvn );
160
211
secp256k1_scalar_sqr (& yinvn , & yinvn );
161
212
}
162
- for (j = 0 ; j < lg_grouping ; j ++ ) {
163
- /* TODO this only does the right thing for lg_grouping = 0 or 1 */
213
+ if (lg_grouping == 1 ) {
164
214
secp256k1_scalar_mul (& ctx -> proof [i ].abinv [2 ], & ctx -> proof [i ].abinv [2 ], & yinvn );
165
215
secp256k1_scalar_sqr (& yinvn , & yinvn );
166
216
}
167
217
} else {
168
218
prev_cache_idx = cache_idx - 1 ;
169
219
}
220
+ /* Regardless of specialness, we multiply by `a_2/a_1` or whatever the appropriate multiplier
221
+ * is. We expect the caller to have given these to us in the `ctx->proof[i].abinv` array. */
170
222
secp256k1_scalar_mul (
171
223
& ctx -> proof [i ].xcache [cache_idx ],
172
224
& ctx -> proof [i ].xcache [prev_cache_idx ],
173
225
& ctx -> proof [i ].abinv [abinv_idx ]
174
226
);
227
+ /* If it's *not* a special index, just multiply by the appropriate `x_k^2`, or `x_k^-2` in case
228
+ * we're in the `H_i` half of the multiexp. At this point we can explain the index `k`, which
229
+ * is computed in the variable `xsq_idx` (`xsqinv_idx` respectively). In light of our discussion
230
+ * of `j`, we see that this should be "the least significant bit that's 1 in `i` but not `i-1`."
231
+ * In other words, it is the number of trailing 0 bits in the index `i`. */
175
232
} else if (idx < ctx -> vec_len ) {
176
233
const size_t xsq_idx = CTZ (idx );
177
234
secp256k1_scalar_mul (& ctx -> proof [i ].xcache [cache_idx ], & ctx -> proof [i ].xcache [cache_idx - 1 ], & ctx -> proof [i ].xsq [xsq_idx ]);
@@ -182,14 +239,19 @@ static int secp256k1_bulletproof_innerproduct_vfy_ecmult_callback(secp256k1_scal
182
239
}
183
240
term = ctx -> proof [i ].xcache [cache_idx ];
184
241
185
- /* When going through the G generators, compute the x-inverses as side effects */
186
- if (idx < ctx -> vec_len / grouping && POPCOUNT (idx ) == ctx -> lg_vec_len - 1 ) { /* if the scalar has only one 0, i.e. only one inverse... */
242
+ /* One last trick: compute `x_k^-2` while computing the `G_i` scalars, so that they'll be
243
+ * available when we need them for the `H_i` scalars. We can do this for every `i` value
244
+ * that has exactly one 0-bit, i.e. which is a product of all `x_i`s and one `x_k^-1`. By
245
+ * multiplying that by the special value `prod_{i=1}^n x_i^-1` we obtain simply `x_k^-2`.
246
+ * We expect the caller to give us this special value in `ctx->proof[i].xsqinv_mask`. */
247
+ if (idx < ctx -> vec_len / grouping && POPCOUNT (idx ) == ctx -> lg_vec_len - 1 ) {
187
248
const size_t xsqinv_idx = CTZ (~idx );
188
- /* ...multiply it by the total inverse, to get x_j^-2 */
189
249
secp256k1_scalar_mul (& ctx -> proof [i ].xsqinv [xsqinv_idx ], & ctx -> proof [i ].xcache [cache_idx ], & ctx -> proof [i ].xsqinv_mask );
190
250
}
191
251
192
- /* ...add whatever offset the rangeproof wants... */
252
+ /* Finally, if the caller, in its computation of `P`, wants to multiply `G_i` or `H_i` by some scalar,
253
+ * we add that to our sum as well. Again, we trust the randomization in `xcache[0]` to prevent any
254
+ * cancellation attacks here. */
193
255
if (ctx -> proof [i ].proof -> rangeproof_cb != NULL ) {
194
256
secp256k1_scalar rangeproof_offset ;
195
257
if ((ctx -> proof [i ].proof -> rangeproof_cb )(& rangeproof_offset , NULL , & ctx -> randomizer [i ], idx , ctx -> proof [i ].proof -> rangeproof_cb_data ) == 0 ) {
0 commit comments