forked from danaj/Math-Prime-Util
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.h
400 lines (352 loc) · 13.3 KB
/
util.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
#ifndef MPU_UTIL_H
#define MPU_UTIL_H
#include "ptypes.h"
extern void sort_uv_array(UV* L, unsigned long len);
extern void sort_iv_array(IV* L, unsigned long len);
extern void sort_dedup_uv_array(UV* L, int data_is_signed, unsigned long *len);
extern unsigned long index_in_sorted_uv_array(UV v, UV* L, unsigned long len);
extern unsigned long index_in_sorted_iv_array(IV v, IV* L, unsigned long len);
#define is_in_sorted_uv_array(v,L,len) (index_in_sorted_uv_array(v,L,len) > 0)
#define is_in_sorted_iv_array(v,L,len) (index_in_sorted_iv_array(v,L,len) > 0)
extern int _XS_get_verbose(void);
extern void _XS_set_verbose(int v);
extern int _XS_get_callgmp(void);
extern void _XS_set_callgmp(int v);
/* Disable all manual seeding */
extern int _XS_get_secure(void);
extern void _XS_set_secure(void);
extern int is_prime(UV x);
extern UV next_prime(UV x);
extern UV prev_prime(UV x);
extern void print_primes(UV low, UV high, int fd);
extern uint32_t powerof(UV n);
extern int is_power(UV n, UV a);
extern UV rootint(UV n, UV k);
extern UV ipowsafe(UV n, UV k); /* returns UV_MAX if overflows */
extern UV lcmsafe(UV x, UV u); /* returns 0 if overflows */
extern UV valuation(UV n, UV k);
extern UV valuation_remainder(UV n, UV k, UV *r);
extern UV logint(UV n, UV b);
extern UV mpu_popcount_string(const char* ptr, uint32_t len);
extern unsigned char* range_issquarefree(UV lo, UV hi);
extern UV powersum(UV n, UV k);
extern signed char* range_moebius(UV low, UV high);
extern signed char* range_liouville(UV low, UV high);
extern int liouville(UV n);
extern IV mertens(UV n);
extern IV sumliouville(UV n);
extern int kronecker_uu(UV a, UV b);
extern int kronecker_su(IV a, UV b);
extern int kronecker_ss(IV a, IV b);
extern UV pn_primorial(UV n);
extern UV primorial(UV n);
extern UV factorial(UV n);
extern UV subfactorial(UV n);
extern UV fubini(UV n);
extern UV binomial(UV n, UV k);
extern UV falling_factorial(UV n, UV m);
extern UV rising_factorial(UV n, UV m);
extern IV falling_factorial_s(IV n, UV m);
extern IV rising_factorial_s(IV n, UV m);
extern IV gcdext(IV a, IV b, IV* u, IV* v, IV* s, IV* t); /* Ext Euclidean */
extern UV modinverse(UV a, UV p); /* Returns 1/a mod p */
extern UV divmod(UV a, UV b, UV n); /* Returns a/b mod n */
extern UV gcddivmod(UV a, UV b, UV n); /* divmod(a/gcd,b/gcd,n) */
extern UV pisano_period(UV n);
extern int chinese(UV *r, UV *lcm, UV* a, UV* n, UV num);/* Chinese Remainder */
/* Do the inverse for a negative modular power / root. a^-k => (1/a)^k mod n */
extern int prep_pow_inv(UV *a, UV *k, int kstatus, UV n);
/* Division and remainder. Returns remainder. */
extern IV tdivrem(IV *q, IV *r, IV D, IV d); /* signed div/rem trunc */
extern IV fdivrem(IV *q, IV *r, IV D, IV d); /* signed div/rem floor */
extern IV cdivrem(IV *q, IV *r, IV D, IV d); /* signed div/rem ceiling */
extern IV edivrem(IV *q, IV *r, IV D, IV d); /* signed div/rem Euclidian */
extern UV ivmod(IV a, UV n); /* Returns a mod n (trunc) */
extern UV carmichael_lambda(UV n);
extern int moebius(UV n);
extern UV exp_mangoldt(UV n);
extern UV znprimroot(UV n);
extern UV znorder(UV a, UV n);
/* nprime says to assume n = p or n = 2p. Skips power and primality tests. */
extern int is_primitive_root(UV a, UV n, int nprime);
extern UV factorialmod(UV n, UV m);
extern int binomialmod(UV *res, UV n, UV k, UV m);
extern int is_square_free(UV n);
extern int is_perfect_number(UV n);
extern int is_fundamental(UV n, int neg);
extern int is_semiprime(UV n);
extern int is_almost_prime(UV k, UV n);
extern int is_carmichael(UV n);
extern UV is_quasi_carmichael(UV n); /* Returns number of bases */
extern UV pillai_v(UV n); /* v: v! % n == n-1 && n % v != 1 */
extern UV qnr(UV n);
extern int is_qr(UV a, UV n); /* kronecker that works for composites */
extern int is_practical(UV n);
extern int is_delicate_prime(UV n, uint32_t b);
extern int is_smooth(UV n, UV k);
extern int is_rough(UV n, UV k);
extern int is_sum_of_two_squares(UV n);
extern int is_sum_of_three_squares(UV n);
extern int cornacchia(UV *x, UV *y, UV d, UV p);
extern int is_congruent_number(UV n);
extern UV debruijn_psi(UV x, UV y);
extern UV buchstab_phi(UV x, UV y);
extern UV stirling3(UV n, UV m);
extern IV stirling2(UV n, UV m);
extern IV stirling1(UV n, UV m);
extern IV hclassno(UV n);
extern IV ramanujan_tau(UV n);
extern char* pidigits(int digits);
extern int strnum_minmax(int min, const char* a, STRLEN alen, const char* b, STRLEN blen);
extern int strnum_cmp(const char* a, STRLEN alen, const char* b, STRLEN blen);
extern int from_digit_string(UV* n, const char* s, int base);
extern int from_digit_to_UV(UV* rn, UV* r, int len, int base);
extern int from_digit_to_str(char** rstr, UV* r, int len, int base);
extern int to_digit_array(int* bits, UV n, int base, int length);
extern int to_digit_string(char *s, UV n, int base, int length);
extern int to_string_128(char s[40], IV hi, UV lo);
extern int validate_zeckendorf(const char* str);
extern UV from_zeckendorf(const char* str);
extern char* to_zeckendorf(UV n);
extern int is_catalan_pseudoprime(UV n);
extern UV polygonal_root(UV n, UV k, int* overflow);
extern UV npartitions(UV n);
extern UV consecutive_integer_lcm(UV n);
extern UV frobenius_number(UV* A, uint32_t alen);
extern int num_to_perm(UV rank, int n, int *vec);
extern int perm_to_num(int n, int *vec, UV *rank);
extern void randperm(void* ctx, UV n, UV k, UV *S);
extern UV random_factored_integer(void* ctx, UV n, int *nf, UV *factors);
extern UV gcdz(UV x, UV y);
#if defined(FUNC_isqrt) || defined(FUNC_is_perfect_square)
#include <math.h>
static UV isqrt(UV n) {
UV root;
#if BITS_PER_WORD == 32
if (n >= UVCONST(4294836225)) return UVCONST(65535);
#else
if (n >= UVCONST(18446744065119617025)) return UVCONST(4294967295);
#endif
root = (UV) sqrt((double)n);
if (root*root > n) root--;
if ((root+1)*(root+1) <= n) root++;
return root;
}
#endif
#if defined(FUNC_icbrt) || defined(FUNC_is_perfect_cube)
static UV icbrt(UV n) {
UV b, root = 0;
#if BITS_PER_WORD == 32
int s = 30;
if (n >= UVCONST(4291015625)) return UVCONST(1625);
#else
int s = 63;
if (n >= UVCONST(18446724184312856125)) return UVCONST(2642245);
#endif
for ( ; s >= 0; s -= 3) {
root += root;
b = 3*root*(root+1)+1;
if ((n >> s) >= b) {
n -= b << s;
root++;
}
}
return root;
}
#endif
#if defined(FUNC_ipow)
static UV ipow(UV n, UV k) {
UV p = 1;
while (k) {
if (k & 1) p *= n;
k >>= 1;
if (k) n *= n;
}
return p;
}
#endif
#if defined(FUNC_gcd_ui) || defined(FUNC_lcm_ui)
/* If we have a very fast ctz, then use the fast FLINT version of gcd */
#if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
#define gcd_ui(x,y) gcdz(x,y)
#else
static UV gcd_ui(UV x, UV y) {
UV t;
if (y < x) { t = x; x = y; y = t; }
while (y > 0) {
t = y; y = x % y; x = t; /* y1 <- x0 % y0 ; x1 <- y0 */
}
return x;
}
#endif
#endif
#ifdef FUNC_lcm_ui
static UV lcm_ui(UV x, UV y) {
/* Can overflow if lcm(x,y) > 2^64 (e.g. two primes each > 2^32) */
return x * (y / gcd_ui(x,y));
}
#endif
#ifdef FUNC_is_perfect_square
/* See: http://mersenneforum.org/showpost.php?p=110896 */
static int is_perfect_square(UV n)
{
/* Step 1, reduce to 18% of inputs */
uint32_t m = n & 127;
if ((m*0x8bc40d7d) & (m*0xa1e2f5d1) & 0x14020a) return 0;
/* Step 2, reduce to 7% of inputs (mod 99 reduces to 4% but slower) */
m = n %240; if ((m*0xfa445556) & (m*0x8021feb1) & 0x614aaa0f) return 0;
/* m = n % 99; if ((m*0x5411171d) & (m*0xe41dd1c7) & 0x80028a80) return 0; */
/* Step 3, do the square root instead of any more rejections */
m = isqrt(n);
return (UV)m*(UV)m == n;
}
#endif
#ifdef FUNC_is_perfect_cube
static int is_perfect_cube(UV n)
{
uint32_t m;
m = n % 117; if ((m*833230740) & (m*120676722) & 813764715) return 0;
m = n % 133; if ((m*76846229) & (m*305817297) & 306336544) return 0;
m = n % 43; if ((m*193635074) & (m*3653322805U) & 74401) return 0;
m = n % 37; if ((m*919307198) & (m*3908849845U) & 6665) return 0;
m = icbrt(n);
return (UV)m*m*m == n;
}
#endif
#ifdef FUNC_is_perfect_fifth
static int is_perfect_fifth(UV n)
{
UV m;
if ((n & 3) == 2) return 0;
m = n % 88; if ((m*85413603) & (m*76260301) & 26476550) return 0;
m = n % 31; if ((m*80682551) & (m*73523539) & 45414528) return 0;
m = n % 41; if ((m*92806493) & (m*130690042) & 35668129) return 0;
/* m = n % 25; if ((m*109794298) & (m*105535723) & 16097553) return 0; */
m = rootint(n, 5);
return m*m*m*m*m == n;
}
#endif
#ifdef FUNC_is_perfect_seventh
static int is_perfect_seventh(UV n)
{
UV m;
/* if ((n & 3) == 2) return 0; */
m = n & 511; if ((m*97259473) & (m*51311663) & 894) return 0;
m = n % 49; if ((m*109645301) & (m*76482737) & 593520192) return 0;
m = n % 71; if ((m*71818386) & (m*38821587) & 35299393) return 0;
/* m = n % 43; if ((m*101368253) & (m*814158665) & 142131408) return 0; */
/* m = n % 29; if ((m*81935611) & (m*84736134) & 37831965) return 0; */
/* m = n % 116; if ((m*348163737) & (m*1539055705) & 2735997248) return 0; */
m = rootint(n, 7);
return m*m*m*m*m*m*m == n;
}
#endif
#if defined(FUNC_clz) || defined(FUNC_ctz) || defined(FUNC_log2floor)
/* log2floor(n) gives the location of the first set bit (starting from left)
* ctz(n) gives the number of times n is divisible by 2
* clz(n) gives the number of zeros on the left */
#if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
#if BITS_PER_WORD == 64
#define ctz(n) ((n) ? __builtin_ctzll(n) : 64)
#define clz(n) ((n) ? __builtin_clzll(n) : 64)
#define log2floor(n) ((n) ? 63-__builtin_clzll(n) : 0)
#else
#define ctz(n) ((n) ? __builtin_ctzl(n) : 32)
#define clz(n) ((n) ? __builtin_clzl(n) : 32)
#define log2floor(n) ((n) ? 31-__builtin_clzl(n) : 0)
#endif
/* For MSC, we need to use _BitScanForward and _BitScanReverse. The way to
* get to them has changed, so we're going to only use them on new systems.
* The performance of these functions are not super critical.
* What is: popcnt, mulmod, and muladd.
*/
#elif defined (_MSC_VER) && _MSC_VER >= 1400 && !defined(__clang__) && !defined(_WIN32_WCE)
#include <intrin.h>
#ifdef FUNC_ctz
static int ctz(UV n) {
UV tz = 0;
#if BITS_PER_WORD == 64
if (_BitScanForward64(&tz, n)) return tz; else return 64;
#else
if (_BitScanForward(&tz, n)) return tz; else return 32;
#endif
}
#endif
#if defined(FUNC_clz) || defined(FUNC_log2floor)
static int log2floor(UV n) {
UV lz = 0;
#if BITS_PER_WORD == 64
if (_BitScanReverse64(&lz, n)) return lz; else return 0;
#else
if (_BitScanReverse(&lz, n)) return lz; else return 0;
#endif
}
#endif
#elif BITS_PER_WORD == 64
static const unsigned char _debruijn64[64] = {
63, 0,58, 1,59,47,53, 2, 60,39,48,27,54,33,42, 3, 61,51,37,40,49,18,28,20,
55,30,34,11,43,14,22, 4, 62,57,46,52,38,26,32,41, 50,36,17,19,29,10,13,21,
56,45,25,31,35,16, 9,12, 44,24,15, 8,23, 7, 6, 5 };
#ifdef FUNC_ctz
static unsigned int ctz(UV n) {
return n ? _debruijn64[((n & -n)*UVCONST(0x07EDD5E59A4E28C2)) >> 58] : 64;
}
#endif
#if defined(FUNC_clz) || defined(FUNC_log2floor)
static unsigned int log2floor(UV n) {
if (n == 0) return 0;
n |= n >> 1; n |= n >> 2; n |= n >> 4;
n |= n >> 8; n |= n >> 16; n |= n >> 32;
return _debruijn64[((n-(n>>1))*UVCONST(0x07EDD5E59A4E28C2)) >> 58];
}
#endif
#else
#ifdef FUNC_ctz
static const unsigned char _trail_debruijn32[32] = {
0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8,
31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9 };
static unsigned int ctz(UV n) {
return n ? _trail_debruijn32[((n & -n) * UVCONST(0x077CB531)) >> 27] : 32;
}
#endif
#if defined(FUNC_clz) || defined(FUNC_log2floor)
static const unsigned char _lead_debruijn32[32] = {
0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
static unsigned int log2floor(UV n) {
if (n == 0) return 0;
n |= n >> 1; n |= n >> 2; n |= n >> 4; n |= n >> 8; n |= n >> 16;
return _lead_debruijn32[(n * UVCONST(0x07C4ACDD)) >> 27];
}
#endif
#endif
#if defined(FUNC_clz) && !defined(clz)
#define clz(n) ( (n) ? BITS_PER_WORD-1-log2floor(n) : BITS_PER_WORD )
#endif
#endif /* End of log2floor, clz, and ctz */
#ifdef FUNC_popcnt
/* GCC 3.4 - 4.1 has broken 64-bit popcount.
* GCC 4.2+ can generate awful code when it doesn't have asm (GCC bug 36041).
* When the asm is present (e.g. compile with -march=native on a platform that
* has them, like Nahelem+), then it is almost as fast as manually written asm. */
#if BITS_PER_WORD == 64
#if defined(__POPCNT__) && defined(__GNUC__) && (__GNUC__> 4 || (__GNUC__== 4 && __GNUC_MINOR__> 1))
#define popcnt(b) __builtin_popcountll(b)
#else
static UV popcnt(UV b) {
b -= (b >> 1) & 0x5555555555555555;
b = (b & 0x3333333333333333) + ((b >> 2) & 0x3333333333333333);
b = (b + (b >> 4)) & 0x0f0f0f0f0f0f0f0f;
return (b * 0x0101010101010101) >> 56;
}
#endif
#else
static UV popcnt(UV b) {
b -= (b >> 1) & 0x55555555;
b = (b & 0x33333333) + ((b >> 2) & 0x33333333);
b = (b + (b >> 4)) & 0x0f0f0f0f;
return (b * 0x01010101) >> 24;
}
#endif
#endif
#endif