diff --git a/ChangeLog b/ChangeLog index f9c5933..a6b3cfe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2016-07-21 David Bauer + + * libdieharder/dab_filltree2.c: Updated to a g-test with correction + with exact computed target + * libdieharder/chisq.c: Correction applied + * include/dieharder/libdieharder.h: tie-in new generator + + ------------------------------------------------------------------------ r529 | rgbatduke | 2011-04-01 13:49:31 -0400 (Fri, 01 Apr 2011) | 117 lines diff --git a/dieharder/choose_rng.c b/dieharder/choose_rng.c index 72d885d..f5d4e49 100644 --- a/dieharder/choose_rng.c +++ b/dieharder/choose_rng.c @@ -191,6 +191,13 @@ int select_rng(int gennum,char *genname,unsigned int initial_seed) } rng = gsl_rng_alloc(dh_rng_types[gennum]); + /* + * Here we evaluate the speed of the generator if the rate flag is set. + */ + if(tflag & TRATE){ + time_rng(); + } + /* * OK, here's the deal on seeds. If strategy = 0, we set the seed * ONE TIME right HERE to either a randomly selected seed or whatever @@ -240,19 +247,11 @@ int select_rng(int gennum,char *genname,unsigned int initial_seed) } /* - * Set the seed. We do this here just so it is set for the timing - * test. It may or may not ever be reset. + * Set the seed. It may or may not ever be reset. */ gsl_rng_set(rng,seed); - /* - * Here we evaluate the speed of the generator if the rate flag is set. - */ - if(tflag & TRATE){ - time_rng(); - } - - /* +/* * Before we quit, we must count the number of significant bits in the * selected rng AND create a mask. Note that several routines in bits * WILL NOT WORK unless this is all set correctly, which actually @@ -377,6 +376,13 @@ int select_XOR() */ rng = gsl_rng_alloc(dh_rng_types[14]); + /* + * Here we evaluate the speed of the generator if the rate flag is set. + */ + if(tflag & TRATE){ + time_rng(); + } + /* * OK, here's the deal on seeds. If strategy = 0, we set the seed * ONE TIME right HERE to either a randomly selected seed or whatever @@ -426,19 +432,11 @@ int select_XOR() } /* - * Set the seed. We do this here just so it is set for the timing - * test. It may or may not ever be reset. + * Set the seed. It may or may not ever be reset. */ gsl_rng_set(rng,seed); - /* - * Here we evaluate the speed of the generator if the rate flag is set. - */ - if(tflag & TRATE){ - time_rng(); - } - - /* +/* * We don't really need this anymore, I don't think. But we'll leave it * for now. */ diff --git a/dieharder/parsecl.c b/dieharder/parsecl.c index 297054a..5809193 100644 --- a/dieharder/parsecl.c +++ b/dieharder/parsecl.c @@ -59,7 +59,8 @@ void parsecl(int argc, char **argv) all = YES; break; case 'B': - binary = 1; + output_format = 0; + fprintf(stderr, "Warning: \"-B\" option is deprecated. Use \"-O 0\" instead.\n"); break; case 'c': /* printf("Setting separator to %c\n",optarg[0]); */ diff --git a/dieharder/set_globals.c b/dieharder/set_globals.c index 175aa52..4d69b1d 100644 --- a/dieharder/set_globals.c +++ b/dieharder/set_globals.c @@ -12,7 +12,6 @@ void set_globals() * by a flag with the same first letter. In order: */ all = NO; /* Default is to NOT do all the tests */ - binary = NO; /* Do output a random stream in binary (with -o) */ dtest_num = -1; /* -1 means no test selected */ dtest_name[0] = (char)0; /* empty test name is also default */ filename[0] = (char)0; /* No input file */ diff --git a/include/dieharder/dab_birthdays1.h b/include/dieharder/dab_birthdays1.h new file mode 100644 index 0000000..ca33df3 --- /dev/null +++ b/include/dieharder/dab_birthdays1.h @@ -0,0 +1,60 @@ +/* + * dab_birthdays1 test header. + */ + +/* + * function prototype + */ +int dab_birthdays1(Test **test, int irun); + +static Dtest dab_birthdays1_dtest __attribute__((unused)) = { + "Diehard Birthdays Test", + "dab_birthdays1", + "\n\ +#==================================================================\n\ +# Diehard \"Birthdays\" test (modified).\n\ +# This is a version of the Diehard Birthdays test, modified from the\n\ +# original Dieharder implementation of it.\n\ +# \n\ +# This is a BIRTHDAY SPACINGS TEST\n\ +# Choose m birthdays in a year of n days. List the spacings between \n\ +# the birthdays. If j is the number of values that occur more than \n\ +# once in that list, then j is asympotically Poisson distributed with \n\ +# mean lambda = m^3/(4n). A Chi-Sq test is performed comparing the \n\ +# seen distribution of repeated spacings to the Poisson distribution. \n\ +# Simulations show that the approximation is better for larger n and \n\ +# smaller lambda. However, since for any given run j must be an \n\ +# integer, a small lambda value requires more runs to build up a good \n\ +# statistic. This test uses m=1700 as the default, but it may \n\ +# changed (via the -n (ntuple) option), up to a maximum value of \n\ +# 4096. The value of n is fixed by the choice of generator, with \n\ +# n=2^r, where r is the number of bits per word in the generator's \n\ +# output (a maximum of 32 for this version of Dieharder). This test \n\ +# prefers a larger t-count (-t option) and p-value samples set to 1 \n\ +# (-p 1, which is the default).\n\ +# \n\ +# Be careful when running this test against generators with reduced \n\ +# word sizes, as it may give false positives. When it doubt, check \n\ +# against an assumed good generator that is set to produce the same \n\ +# size output. As an example, for testing a generator with an output \n\ +# size of 20 bits, using \"-n 50 -t 8000\" produced a test that \n\ +# repeated passed an assumed good generator at \"-p 100\", but had \n\ +# trouble at \"-p 500\". Alternately, raising the t-count also shows \n\ +# that m of 50 isn't low enough to give a good approximation. For \n\ +# long tests of generators with an output size smaller than 30 bits, \n\ +# producing the target by simulation instead of relying on the \n\ +# Poisson approximation will probably be necessary.\n\ +# \n\ +#==================================================================\n", + 1, + 2000, + 1, + dab_birthdays1, + 0 +}; + +/* + * Global variables +uint dab_birthdays1_nms,dab_birthdays1_nbits; +uint *dab_birthdays1_rand_uint; + */ diff --git a/include/dieharder/dab_opso2.h b/include/dieharder/dab_opso2.h new file mode 100644 index 0000000..ef8083f --- /dev/null +++ b/include/dieharder/dab_opso2.h @@ -0,0 +1,58 @@ +/* + * dab_opso2 test header. + */ + +/* + * function prototype + */ +int dab_opso2(Test **test, int irun); + +static Dtest dab_opso2_dtest __attribute__((unused)) = { + "DAB OPSO2", + "dab_opso2", + "\ +#==================================================================\n\ +# DAB OPSO2 Test\n\ +# This test is misnamed. It is an evolution of the OPSO test from\n\ +# the original Diehard program. However, it does not use\n\ +# DAB OPSO2 Test\n\ +# This test is misnamed. It is an evolution of the OPSO test from\n\ +# the original Diehard program. However, it does not use\n\ +# overlapping samples. Additionally, it returns two p-values,\n\ +# the second of which follows the Pairs-Sparse-Occupancy part of\n\ +# the name. The first p-value effectively takes both letters from\n\ +# the same input word. However, that isn't any different from\n\ +# having 1-letter words, where each letter is twice as long.\n\ +# \n\ +# This verion uses 2^24 slots. The first p-value thus takes 24\n\ +# bits directly from each input word. The second p-value is based\n\ +# on two 12-bit letters from each of two words; each pair of input\n\ +# words will produce two output \"words\".\n\ +# \n\ +# This test will give a false positive for all generators with an\n\ +# output word of less than 24 bits.\n\ +# \n\ +# Note tsamples is set to 2^26 = 67108864, and cannot be varied.\n\ +#\n\ +# Diehard Overlapping Pairs Sparse Occupance (OPSO)\n\ +# The OPSO test considers 2-letter words from an alphabet of \n\ +# 1024 letters. Each letter is determined by a specified ten \n\ +# bits from a 32-bit integer in the sequence to be tested. OPSO \n\ +# generates 2^21 (overlapping) 2-letter words (from 2^21+1 \n\ +# \"keystrokes\") and counts the number of missing words---that \n\ +# is 2-letter words which do not appear in the entire sequence. \n\ +# That count should be very close to normally distributed with \n\ +# mean 141,909, sigma 290. Thus (missingwrds-141909)/290 should \n\ +# be a standard normal variable. The OPSO test takes 32 bits at \n\ +# a time from the test file and uses a designated set of ten \n\ +# consecutive bits. It then restarts the file for the next de- \n\ +# signated 10 bits, and so on. \n\ +# \n\ +#==================================================================\n", + 1, + 67108864, + 2, + dab_opso2, + 0 +}; + diff --git a/include/dieharder/libdieharder.h b/include/dieharder/libdieharder.h index eb55b9a..3227c23 100644 --- a/include/dieharder/libdieharder.h +++ b/include/dieharder/libdieharder.h @@ -116,6 +116,7 @@ double chisq_poisson(unsigned int *observed,double lambda,int kmax,unsigned int nsamp); double chisq_binomial(double *observed,double prob,unsigned int kmax,unsigned int nsamp); double chisq_pearson(double *observed,double *expected,int kmax); + double chisq_uint_uniform_gtest(uint *observed,long numItems,int kmax); double sample(void *testfunc()); double kstest(double *pvalue,int count); double kstest_kuiper(double *pvalue,int count); @@ -162,7 +163,6 @@ *======================================================================== */ extern unsigned int all; /* Flag to do all tests on selected generator */ - extern unsigned int binary; /* Flag to output rands in binary (with -o -f) */ extern unsigned int bits; /* bitstring size (in bits) */ extern unsigned int diehard; /* Diehard test number */ extern unsigned int generator; /* GSL generator id number to be tested */ diff --git a/include/dieharder/tests.h b/include/dieharder/tests.h index 3f77157..8c7246f 100644 --- a/include/dieharder/tests.h +++ b/include/dieharder/tests.h @@ -13,11 +13,13 @@ #include //#include #include +#include #include #include #include #include #include +#include #include #include #include @@ -81,11 +83,13 @@ RGB_LAGGED_SUMS, RGB_LMN, //RGB_OPERM, + DAB_BIRTHDAYS1, DAB_BYTEDISTRIB, DAB_DCT, DAB_FILLTREE, DAB_FILLTREE2, DAB_MONOBIT2, + DAB_OPSO2, N_RGB_TESTS } Rgb_Tests; diff --git a/libdieharder/Makefile.am b/libdieharder/Makefile.am index 0873f9c..0f21804 100644 --- a/libdieharder/Makefile.am +++ b/libdieharder/Makefile.am @@ -32,11 +32,13 @@ libdieharder_la_SOURCES = \ bits.c \ chisq.c \ countx.c \ + dab_birthdays1.c \ dab_bytedistrib.c \ dab_dct.c \ dab_filltree.c \ dab_filltree2.c \ dab_monobit2.c \ + dab_opso2.c \ diehard_2dsphere.c \ diehard_3dsphere.c \ diehard_birthdays.c \ diff --git a/libdieharder/chisq.c b/libdieharder/chisq.c index 5a53bd9..ed8d523 100644 --- a/libdieharder/chisq.c +++ b/libdieharder/chisq.c @@ -198,6 +198,52 @@ double chisq_binomial(double *observed,double prob,unsigned int kmax,unsigned in } +/* + * Perform the g-test (related to Pearson's Chi Sq test), for a uniform + * distribution into a set of kmax bins. Performs an additional + * correction, which restricts it to handling uniform distributions only. + */ +double chisq_uint_uniform_gtest(uint *observed,long numItems,int kmax) +{ + uint i,j,k; + double delchisq,chisq,pvalue; + double expected = (double) numItems / kmax; + + chisq = 0.0; + for(k = 0;k < kmax;k++){ + if (observed[k] == 0) continue; + delchisq = 2.0 * ((double) observed[k] * log((double) observed[k] / expected)); + chisq += delchisq; + if(verbose){ + printf("%u: observed = %u, expected = %f, delchisq = %f, chisq = %f\n", + k,observed[k],expected,delchisq,chisq); + } + } + + if(verbose){ + printf("Evaluated chisq = %f for %u k values\n",chisq,kmax); + } + + /* Apply correction; from Wikipedia, citing Smith, P. J., Rae, D. S., Manderscheid, + * R. W. and Silbergeld, S. (1981). "Approximating the Moments and Distribution of + * the Likelihood Ratio Statistic for Multinomial Goodness of Fit" + */ + chisq /= 1.0 + ((double) (kmax + 1) / (6.0 * numItems)) + + ((double) (kmax * kmax) / (6.0 * numItems * numItems)); + + /* + * Now evaluate the corresponding pvalue. The only real question + * is what is the correct number of degrees of freedom. We have + * kmax bins, so it should be kmax-1. + */ + pvalue = gsl_sf_gamma_inc_Q((double)(kmax-1)/2.0,fabs(chisq)/2.0); + if(verbose){ + printf("pvalue = %f in chisq_pearson.\n",pvalue); + } + + return(pvalue); +} + /* * Contributed by David Bauer to do a Pearson chisq on a 2D * histogram. diff --git a/libdieharder/dab_birthdays1.c b/libdieharder/dab_birthdays1.c new file mode 100644 index 0000000..3eb8b2f --- /dev/null +++ b/libdieharder/dab_birthdays1.c @@ -0,0 +1,242 @@ +/* + * ======================================================================== + * $Id: diehard_birthdays.c 250 2006-10-10 05:02:26Z rgb $ + * + * See copyright in copyright.h and the accompanying file COPYING + * ======================================================================== + */ + +/* + * ======================================================================== + * This is a version of the Diehard Birthdays test, modified from the + * original Dieharder implementation of it. + * + * This is a BIRTHDAY SPACINGS TEST + * Choose m birthdays in a year of n days. List the spacings between + * the birthdays. If j is the number of values that occur more than + * once in that list, then j is asympotically Poisson distributed with + * mean lambda = m^3/(4n). A Chi-Sq test is performed comparing the + * seen distribution of repeated spacings to the Poisson distribution. + * Simulations show that the approximation is better for larger n and + * smaller lambda. However, since for any given run j must be an + * integer, a small lambda value requires more runs to build up a good + * statistic. This test uses m=1700 as the default, but it may + * changed (via the -n (ntuple) option), up to a maximum value of + * 4096. The value of n is fixed by the choice of generator, with + * n=2^r, where r is the number of bits per word in the generator's + * output (a maximum of 32 for this version of Dieharder). This test + * prefers a larger t-count (-t option) and p-value samples set to 1 + * (-p 1, which is the default). + * + * Be careful when running this test against generators with reduced + * word sizes, as it may give false positives. When it doubt, check + * against an assumed good generator that is set to produce the same + * size output. As an example, for testing a generator with an output + * size of 20 bits, using "-n 50 -t 8000" produced a test that + * repeated passed an assumed good generator at "-p 100", but had + * trouble at "-p 500". Alternately, raising the t-count also shows + * that m of 50 isn't low enough to give a good approximation. For + * long tests of generators with an output size smaller than 30 bits, + * roducing the target by simulation instead of relying on the + * Poisson approximation will probably be necessary. + * +*======================================================================== + */ + + +#include +#define NMS 4096 /* Maximum value */ +#define DEFAULT_NMS 1700 + +static double lambda; +static unsigned int *intervals; +static unsigned int nms,nbits,kmax; + +int dab_birthdays1(Test **test, int irun) +{ + + uint i,k,t,m,mnext; + uint *js; + uint rand_uint[NMS]; + + double binfreq; + + /* + * for display only. + */ + test[0]->ntuple = rmax_bits; + + nms = ntuple == 0 ? DEFAULT_NMS : ntuple; + nbits = rmax_bits; + + /* + * This is the one thing that matters. We're going to make the + * exact poisson distribution we expect below, and lambda has to + * be right. lambda = nms^3/4n where n = 2^nbits, which is: + * lambda = (2^9)^3/(2^2 * 2^24) = 2^27/2^26 = 2.0 + * for Marsaglia's defaults. Small changes in nms make big changes + * in lambda, but we can easily pick e.g. nbits = 30, nms = 2048 + * lambda = (2^11)^3/2^32 = 2.0 + * and get the same test, but with a lot more samples and perhaps a + * slightly smoother result. + */ + // lambda = .53; /* Target */ + // lambda = 4.1; /* TEMPORARY */ + // nms = (uint) pow(lambda * pow(2.0, (double) nbits + 2.0), 1.0 / 3.0); + + if (nms > NMS) nms = NMS; /* Can't be larger than allocated space. */ + lambda = (double) nms*nms*nms/pow(2.0,(double)nbits+2.0); + /* printf("\tdab_birthdays: nms=%d, lambda = %f\n", nms, lambda); */ + + /* + * Allocate memory for intervals + */ + intervals = (unsigned int *)malloc(nms*sizeof(unsigned int)); + + /* + * This should be more than twice as many slots as we really + * need for the Poissonian tail. We're going to sample tsamples + * times, and we only want to keep the histogram out to where + * it has a reasonable number of hits/degrees of freedom, just + * like we do with all the chisq's built on histograms. + */ + kmax = 1; + while((binfreq = test[0]->tsamples*gsl_ran_poisson_pdf(kmax,lambda)) > 5) { + /* if (irun == 0) printf("\tbin[%d] = %.1f\n", kmax, binfreq); */ + kmax++; + } + /* if (irun == 0) printf("\tbin[%d] = %.1f\n", kmax, binfreq); */ + /* Cruft: printf("binfreq[%u] = %f\n",kmax,binfreq); */ + kmax++; /* and one to grow on...*/ + /* if (irun == 0) printf("\tbin[%d] = %.1f\n", kmax, test[0]->tsamples*gsl_ran_poisson_pdf(kmax, lambda)); */ + + /* + * js[kmax] is the histogram we increment using the + * count of repeated intervals as an index. Clear it. + */ + js = (unsigned int *)malloc(kmax*sizeof(unsigned int)); + for(i=0;itsamples;t++) { + /* Fill the array with nms samples; each will be rmax_bits long */ + for(m = 0;m= %u: skipping increment of js[%u]\n",k,kmax,k); + } + } + + } + + + /* + * Let's sort the result (for fun) and print it out for each bit + * position. + */ + MYDEBUG(D_DIEHARD_BDAY){ + printf("#==================================================================\n"); + printf("# This is the repeated interval histogram:\n"); + for(k=0;kpvalues[irun] = chisq_poisson(js,lambda,kmax,test[0]->tsamples); + MYDEBUG(D_DIEHARD_BDAY){ + printf("# diehard_birthdays(): test[0]->pvalues[%u] = %10.5f\n",irun,test[0]->pvalues[irun]); + } + + nullfree(intervals); + nullfree(js); + + return(0); + +} + diff --git a/libdieharder/dab_filltree2.c b/libdieharder/dab_filltree2.c index 59cbd52..0a4e11b 100644 --- a/libdieharder/dab_filltree2.c +++ b/libdieharder/dab_filltree2.c @@ -27,69 +27,69 @@ typedef unsigned char uchar; -static double targetData1[32] __attribute__((unused)) = { // size=32, generated from 3e9 samples -0.00000000000e+00, 0.00000000000e+00, 0.00000000000e+00, 0.00000000000e+00, -9.76265666667e-04, 3.90648133333e-03, 9.42791500000e-03, 1.77898240000e-02, -2.88606903333e-02, 4.21206876667e-02, 5.67006123333e-02, 7.13000270000e-02, -8.43831060000e-02, 9.43500813333e-02, 9.97647353333e-02, 9.97074473333e-02, -9.39825660000e-02, 8.32745740000e-02, 6.90631193333e-02, 5.33001223333e-02, -3.80133193333e-02, 2.48386790000e-02, 1.47058170000e-02, 7.79203100000e-03, -3.63280466667e-03, 1.45833733333e-03, 4.88868000000e-04, 1.31773666667e-04, -2.63546666667e-05, 3.51800000000e-06, 2.42333333333e-07, 0.00000000000e+00 +static double targetData1[32] = { // size=32, exact +0,0,0,0, +0.0009765625,0.00390625,0.009429931640625,0.0177898406982422, +0.0288577079772949,0.0421273708343506,0.0566971004009247,0.0712958071380854, +0.0843847030773759,0.0943488389020786,0.0997659675704199,0.0997059316159721, +0.0939841693480048,0.0832818323431184,0.0690661567389839,0.0533005473930537, +0.0380100025885399,0.0248326931670262,0.0147065833262707,0.00779190452613503, +0.00363217487506055,0.00145753559952102,0.000488833092230661,0.000131328293435103, +2.64480590945694E-05,3.54215077159411E-06,2.36143384772941E-07,0 }; -static double targetData2[64] __attribute__((unused)) = { // size=64, generated from 3e9 samples -0.00000000000e+00, 0.00000000000e+00, 0.00000000000e+00, 0.00000000000e+00, -0.00000000000e+00, 3.03990000000e-05, 1.52768666667e-04, 4.47074666667e-04, -1.00459133333e-03, 1.91267566667e-03, 3.25090066667e-03, 5.08490633333e-03, -7.45162400000e-03, 1.03865720000e-02, 1.38770320000e-02, 1.78957393333e-02, -2.23788223333e-02, 2.72281453333e-02, 3.23125090000e-02, 3.74760433333e-02, -4.25407143333e-02, 4.72809176667e-02, 5.15021953333e-02, 5.49909926667e-02, -5.75592136667e-02, 5.90709556667e-02, 5.94040870000e-02, 5.85191160000e-02, -5.64374923333e-02, 5.32542393333e-02, 4.91296690000e-02, 4.42833420000e-02, -3.89462263333e-02, 3.33966756667e-02, 2.78853806667e-02, 2.26466276667e-02, -1.78641310000e-02, 1.36666050000e-02, 1.01212106667e-02, 7.24765200000e-03, -5.00267066667e-03, 3.32686800000e-03, 2.12376733333e-03, 1.29971400000e-03, -7.59987666667e-04, 4.23040000000e-04, 2.23585333333e-04, 1.11777000000e-04, -5.27836666667e-05, 2.33956666667e-05, 9.67033333333e-06, 3.63066666667e-06, -1.29500000000e-06, 4.04666666667e-07, 1.24666666667e-07, 2.80000000000e-08, -8.33333333333e-09, 1.33333333333e-09, 0.00000000000e+00, 0.00000000000e+00, -0.00000000000e+00, 0.00000000000e+00, 0.00000000000e+00, 0.00000000000e+00 +static double targetData2[64] = { // size=64, exact +0,0,0,0, +0,3.0517578125E-05,0.000152587890625,0.000447511672973633, +0.00100504606962204,0.00191331189125776,0.00325121614150703,0.00508295510371681, +0.0074537967325341,0.010386477114821,0.01387787701767,0.0178959820102482, +0.0223773962832272,0.0272258597146006,0.0323123111057767,0.037477047033474, +0.0425344401223118,0.0472804976516124,0.0515032630016068,0.054995703372204, +0.0575703200065506,0.0590743134364583,0.059403803645923,0.0585154187223134, +0.0564335947898278,0.0532522211136633,0.0491298234656811,0.0442782596874916, +0.0389458016037146,0.0333963492622822,0.0278871984136495,0.0226481070118066, +0.0178642831703181,0.0136653347694681,0.0101212706649298,0.00724550563200844, +0.00500372727772275,0.00332665901853009,0.00212436000041605,0.00129979403042639, +0.000759908933033483,0.000423232777916195,0.000223807074715186,0.000111949408083441, +5.27471340854235E-05,2.32990903233354E-05,9.59581500880408E-06,3.66187793596757E-06, +1.28535889122115E-06,4.11412264488149E-07,1.18830307817474E-07,3.05774193583426E-08, +6.89718891789191E-09,1.33534096493464E-09,2.15638141871131E-10,2.78699898584813E-11, +2.70138143225745E-12,1.74464217499961E-13,5.62787798386969E-15,0 }; -static double targetData[128] = { // size=128, generated from 6e9 samples -0.00000000000e+00,0.00000000000e+00,0.00000000000e+00,0.00000000000e+00, -0.00000000000e+00,0.00000000000e+00,4.77166666667e-07,2.85516666667e-06, -9.85200000000e-06,2.55708333333e-05,5.54055000000e-05,1.06338000000e-04, -1.86151333333e-04,3.02864333333e-04,4.66803833333e-04,6.86296166667e-04, -9.71489833333e-04,1.33154316667e-03,1.77454416667e-03,2.31186450000e-03, -2.94724100000e-03,3.68750350000e-03,4.53733233333e-03,5.50155300000e-03, -6.58318550000e-03,7.77896366667e-03,9.08643266667e-03,1.05029766667e-02, -1.20238296667e-02,1.36316733333e-02,1.53215870000e-02,1.70715931667e-02, -1.88714935000e-02,2.06986750000e-02,2.25274171667e-02,2.43387205000e-02, -2.61018481667e-02,2.77880516667e-02,2.93792388333e-02,3.08369918333e-02, -3.21362530000e-02,3.32571148333e-02,3.41773491667e-02,3.48649186667e-02, -3.53142736667e-02,3.55033806667e-02,3.54345836667e-02,3.50962926667e-02, -3.44943555000e-02,3.36465076667e-02,3.25496588333e-02,3.12430565000e-02, -2.97450508333e-02,2.80761470000e-02,2.62786038333e-02,2.43732936667e-02, -2.24096238333e-02,2.04087746667e-02,1.84149658333e-02,1.64520138333e-02, -1.45553360000e-02,1.27460330000e-02,1.10448845000e-02,9.47002933333e-03, -8.03035333333e-03,6.73145016667e-03,5.58088483333e-03,4.56875066667e-03, -3.69710383333e-03,2.95267200000e-03,2.32891533333e-03,1.81311533333e-03, -1.39093383333e-03,1.05322383333e-03,7.86386833333e-04,5.78384666667e-04, -4.18726333333e-04,2.98206500000e-04,2.09200000000e-04,1.44533833333e-04, -9.79670000000e-05,6.52683333333e-05,4.27531666667e-05,2.73350000000e-05, -1.72115000000e-05,1.06390000000e-05,6.38166666667e-06,3.79950000000e-06, -2.21116666667e-06,1.25500000000e-06,6.95833333333e-07,3.83500000000e-07, -1.98000000000e-07,9.91666666667e-08,4.73333333333e-08,2.66666666667e-08, -1.13333333333e-08,5.50000000000e-09,2.66666666667e-09,1.00000000000e-09, -6.66666666667e-10,3.33333333333e-10,0.00000000000e+00,0.00000000000e+00, -0.00000000000e+00,0.00000000000e+00,0.00000000000e+00,0.00000000000e+00, -0.00000000000e+00,0.00000000000e+00,0.00000000000e+00,0.00000000000e+00, -0.00000000000e+00,0.00000000000e+00,0.00000000000e+00,0.00000000000e+00, -0.00000000000e+00,0.00000000000e+00,0.00000000000e+00,0.00000000000e+00, -0.00000000000e+00,0.00000000000e+00,0.00000000000e+00,0.00000000000e+00, -0.00000000000e+00,0.00000000000e+00,0.00000000000e+00,0.00000000000e+00, +static double targetData[128] = { // size=128, exact +0,0,0,0, +0,0,4.76837158203125E-07,2.86102294921875E-06, +9.85432416200638E-06,2.55650229519233E-05,5.54892931177164E-05,0.000106380728425393, +0.000186047948883328,0.00030311244166753,0.000466749839810675,0.000686429646575251, +0.000971661742344283,0.00133175306183679,0.00177557452024558,0.00231133638265425, +0.00294636954701581,0.00368691038033341,0.00453788756811422,0.00550271070203945, +0.00658306187040692,0.00777869319248569,0.00908723493764983,0.0105040204946228, +0.0120219359149386,0.0136313029599996,0.0153198054448571,0.0170724691056809, +0.0188717051355292,0.0206974268550898,0.0225272476458152,0.0243367662285437, +0.0260999426084925,0.0277895645547568,0.0293778004147126,0.0308368295109554, +0.0321395365179289,0.0332602513106519,0.0341755111120263,0.0348648176685728, +0.0353113590065781,0.0355026634060874,0.0354311528899465,0.035094565005384, +0.0344962151263554,0.0336450769511817,0.0325556661902256,0.031247721351896, +0.0297456856036487,0.0280780043276814,0.0262762635190492,0.0243742038353947, +0.0224066531487575,0.0204084261828087,0.0184132426894747,0.0164527152702953, +0.0145554542687502,0.0127463303124921,0.0110459255065655,0.00947019265623616, +0.00803032911044744,0.00673285885868451,0.00557990440062551,0.00456961957621112, +0.00369674675854223,0.0029532570905934,0.00232903101043375,0.00181253806897198, +0.00139147961934567,0.00105336473071676,0.00078599787487321,0.000577865708395529, +0.00041841881087046,0.000298251837753283,0.000209191678462234,0.000144307562906168, +9.78595402557837E-05,6.520247003055E-05,4.26618834333199E-05,2.73961611501714E-05, +1.7256844789337E-05,1.06559535882152E-05,6.44626192690798E-06,3.81786954553569E-06, +2.2122334819058E-06,1.25320523087539E-06,6.9352747611892E-07,3.74634041706112E-07, +1.97372359804204E-07,1.01324484479249E-07,5.06387688071287E-08,2.4612770858879E-08, +1.16222051625842E-08,5.32575018016149E-09,2.36548132737708E-09,1.01707372778487E-09, +4.22755189488443E-10,1.69627368106376E-10,6.55983218678784E-11,2.44087465306515E-11, +8.72288816181913E-12,2.98796462656132E-12,9.78930546827611E-13,3.06028628289706E-13, +9.10492627063714E-14,2.57067414678584E-14,6.86573813128305E-15,1.72840983639153E-15, +4.08488586235289E-16,9.02205965579907E-17,1.85248682021962E-17,3.5148157083845E-18, +6.11888831372621E-19,9.69194996799213E-20,1.38262127072485E-20,1.75429503989084E-21, +1.94858024047957E-22,1.85585143679842E-23,1.47329706646149E-24,9.35788996513105E-26, +4.45783630198697E-27,1.41554277734465E-28,2.24689329737247E-30,0 }; static inline int insertBit(uint x, uchar *array, uint *i, uint *d); @@ -104,7 +104,7 @@ int dab_filltree2(Test **test, int irun) { uint x; uint start = 0; uint end = 0; - double *positionCounts; + uint *positionCounts; uint bitCount; test[0]->ntuple = 0; @@ -115,8 +115,8 @@ int dab_filltree2(Test **test, int irun) { memset(counts, 0, sizeof(double) * target); - positionCounts = (double *) malloc(sizeof(double) * size/2); - memset(positionCounts, 0, sizeof(double) * size/2); + positionCounts = (uint *) malloc(sizeof(uint) * size/2); + memset(positionCounts, 0, sizeof(uint) * size/2); /* Calculate expected counts. */ for (i = 0; i < target; i++) { @@ -162,7 +162,7 @@ int dab_filltree2(Test **test, int irun) { /* Second p-value is calculated against a uniform distribution. */ for (i = 0; i < size/2; i++) expected[i] = test[0]->tsamples/(size/2); - test[1]->pvalues[irun] = chisq_pearson(positionCounts, expected, size/2); + test[1]->pvalues[irun] = chisq_uint_uniform_gtest(positionCounts, test[0]->tsamples, size/2); nullfree(positionCounts); diff --git a/libdieharder/dab_opso2.c b/libdieharder/dab_opso2.c new file mode 100644 index 0000000..c37ad8f --- /dev/null +++ b/libdieharder/dab_opso2.c @@ -0,0 +1,134 @@ +/* + * ======================================================================== + * $Id: diehard_opso.c 231 2006-08-22 16:18:05Z rgb $ + * + * See copyright in copyright.h and the accompanying file COPYING + * ======================================================================== + */ + +/* + * ======================================================================== + * DAB OPSO2 Test + * This test is misnamed. It is an evolution of the OPSO test from + * the original Diehard program. However, it does not use + * overlapping samples. Additionally, it returns two p-values, + * the second of which follows the Pairs-Sparse-Occupancy part of + * the name. The first p-value effectively takes both letters from + * the same input word. However, that isn't any different from + * having 1-letter words, where each letter is twice as long. + * + * This verion uses 2^24 slots. The first p-value thus takes 24 + * bits directly from each input word. The second p-value is based + * on two 12-bit letters from each of two words; each pair of input + * words will produce two output "words". + * + * This test will give a false positive for all generators with an + * output word of less than 24 bits. + * + * OPSO means Overlapping-Pairs-Sparse-Occupancy :: + * The OPSO test considers 2-letter words from an alphabet of :: + * 1024 letters. Each letter is determined by a specified ten :: + * bits from a 32-bit integer in the sequence to be tested. OPSO :: + * generates 2^21 (overlapping) 2-letter words (from 2^21+1 :: + * "keystrokes") and counts the number of missing words---that :: + * is 2-letter words which do not appear in the entire sequence. :: + * That count should be very close to normally distributed with :: + * mean 141,909, sigma 290. Thus (missingwrds-141909)/290 should :: + * be a standard normal variable. The OPSO test takes 32 bits at :: + * a time from the test file and uses a designated set of ten :: + * consecutive bits. It then restarts the file for the next de- :: + * signated 10 bits, and so on. :: + * + *======================================================================== + */ + + +#include + +int dab_opso2(Test **test, int irun) { + uint i,j,k,t; + unsigned int j0 = 0, k0 = 0; + Xtest ptest1, ptest2; + unsigned int w1[524288], w2[524288]; /* 2^24 positions = 2^5 * 2^19; 2^19 = 524288 */ + unsigned int mask[32]; /* Masks to take the place of a bitset operation */ + + for (i = 0; i < 32; i++) mask[i] = 1<ntuple = 0; + test[1]->ntuple = 1; + + /* If the generator word size is too small, abort early. */ + if (rmax_bits < 24) { + test[0]->pvalues[irun] = 0.5; + test[1]->pvalues[irun] = 0.5; + if (irun == 0) { + printf("OPSO2: Requires rmax_bits to be >= 24\n"); + } + return 0; + } + + /* slots = 2^24, words = 2^26 */ + // New calculations from ossotarg.c + // y = 307285.393182468 + // sigma = 528.341514924662 + ptest1.y = 307285.393182468; + ptest1.sigma = 528.341514924662; + ptest2.y = ptest1.y; + ptest2.sigma = ptest1.sigma; + test[0]->tsamples = 1<<26; + + /* Zero the column */ + memset(w1, 0, sizeof(unsigned int) * 524288); + memset(w2, 0, sizeof(unsigned int) * 524288); + + /* The main loop looks more complicated, because it is + * two tests in one loop. w1 tests for patterns within + * each word. w2 tests for pattens in pairs of words. + * + * Yes, that means that w2 is the OPSO, while w1 is + * something else. + * + * Generally, w2 is the stronger test. However, + * generator 18 fails w1 much easier than w2. + */ + for(t=0;ttsamples;t++) { /* Start main loop */ + if(t%2 == 0) { // Get two inputs every other round + j0 = gsl_rng_get(rng); + k0 = gsl_rng_get(rng); + + w1[(j0 >> 5) % 524288] |= mask[j0 % 32]; + j = j0 & 0x0fff; + k = (j << 12) | (k0 & 0x0fff); + } else { + w1[(k0 >> 5) % 524288] |= mask[k0 % 32]; + j = (j0 >> 12) & 0x0fff; + k = (j << 12) | ((k0 >> 12) & 0x0fff); + } + + w2[k >> 5] |= mask[k % 32]; + } + + /* + * Now we count the holes, so to speak + */ + j0 = 0; + k0 = 0; + ptest1.x = 0; + ptest2.x = 0; + for (i = 0; i < 32; i++) { + for (j = 0; j < 524288; j++) { + if ((w1[j] & mask[i]) == 0) j0++; + if ((w2[j] & mask[i]) == 0) k0++; + } + } + + ptest1.x = j0; + ptest2.x = k0; + Xtest_eval(&ptest1); + Xtest_eval(&ptest2); + test[0]->pvalues[irun] = ptest1.pvalue; + test[1]->pvalues[irun] = ptest2.pvalue; + + return(0); +} + diff --git a/libdieharder/dieharder_test_types.c b/libdieharder/dieharder_test_types.c index 40d731e..c3a8c01 100644 --- a/libdieharder/dieharder_test_types.c +++ b/libdieharder/dieharder_test_types.c @@ -150,6 +150,13 @@ void dieharder_test_types() ADD_TEST(&dab_monobit2_dtest); dh_num_other_tests++; + ADD_TEST(&dab_birthdays1_dtest); + dh_num_other_tests++; + + ADD_TEST(&dab_opso2_dtest); + dh_num_other_tests++; + + /* * This is the total number of DOCUMENTED tests reported back to the * UIs. Note that dh_num_user_tests is counted up by add_ui_tests(),