Skip to content

Commit

Permalink
Merge pull request #18 from mbroz/sync-rgb
Browse files Browse the repository at this point in the history
Sync commits with dieharder-rgb repo
  • Loading branch information
eddelbuettel authored Jan 26, 2024
2 parents cb20f26 + a4c020b commit ea8ea11
Show file tree
Hide file tree
Showing 14 changed files with 645 additions and 86 deletions.
8 changes: 8 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
2016-07-21 David Bauer <[email protected]>

* libdieharder/dab_filltree2.c: Updated to a g-test with correction
with exact computed target
* libdieharder/chisq.c: Correction applied
* include/dieharder/libdieharder.h: tie-in new generator


------------------------------------------------------------------------
r529 | rgbatduke | 2011-04-01 13:49:31 -0400 (Fri, 01 Apr 2011) | 117 lines

Expand Down
38 changes: 18 additions & 20 deletions dieharder/choose_rng.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,13 @@ int select_rng(int gennum,char *genname,unsigned int initial_seed)
}
rng = gsl_rng_alloc(dh_rng_types[gennum]);

/*
* Here we evaluate the speed of the generator if the rate flag is set.
*/
if(tflag & TRATE){
time_rng();
}

/*
* OK, here's the deal on seeds. If strategy = 0, we set the seed
* ONE TIME right HERE to either a randomly selected seed or whatever
Expand Down Expand Up @@ -240,19 +247,11 @@ int select_rng(int gennum,char *genname,unsigned int initial_seed)
}

/*
* Set the seed. We do this here just so it is set for the timing
* test. It may or may not ever be reset.
* Set the seed. It may or may not ever be reset.
*/
gsl_rng_set(rng,seed);

/*
* Here we evaluate the speed of the generator if the rate flag is set.
*/
if(tflag & TRATE){
time_rng();
}

/*
/*
* Before we quit, we must count the number of significant bits in the
* selected rng AND create a mask. Note that several routines in bits
* WILL NOT WORK unless this is all set correctly, which actually
Expand Down Expand Up @@ -377,6 +376,13 @@ int select_XOR()
*/
rng = gsl_rng_alloc(dh_rng_types[14]);

/*
* Here we evaluate the speed of the generator if the rate flag is set.
*/
if(tflag & TRATE){
time_rng();
}

/*
* OK, here's the deal on seeds. If strategy = 0, we set the seed
* ONE TIME right HERE to either a randomly selected seed or whatever
Expand Down Expand Up @@ -426,19 +432,11 @@ int select_XOR()
}

/*
* Set the seed. We do this here just so it is set for the timing
* test. It may or may not ever be reset.
* Set the seed. It may or may not ever be reset.
*/
gsl_rng_set(rng,seed);

/*
* Here we evaluate the speed of the generator if the rate flag is set.
*/
if(tflag & TRATE){
time_rng();
}

/*
/*
* We don't really need this anymore, I don't think. But we'll leave it
* for now.
*/
Expand Down
3 changes: 2 additions & 1 deletion dieharder/parsecl.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ void parsecl(int argc, char **argv)
all = YES;
break;
case 'B':
binary = 1;
output_format = 0;
fprintf(stderr, "Warning: \"-B\" option is deprecated. Use \"-O 0\" instead.\n");
break;
case 'c':
/* printf("Setting separator to %c\n",optarg[0]); */
Expand Down
1 change: 0 additions & 1 deletion dieharder/set_globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ void set_globals()
* by a flag with the same first letter. In order:
*/
all = NO; /* Default is to NOT do all the tests */
binary = NO; /* Do output a random stream in binary (with -o) */
dtest_num = -1; /* -1 means no test selected */
dtest_name[0] = (char)0; /* empty test name is also default */
filename[0] = (char)0; /* No input file */
Expand Down
60 changes: 60 additions & 0 deletions include/dieharder/dab_birthdays1.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* dab_birthdays1 test header.
*/

/*
* function prototype
*/
int dab_birthdays1(Test **test, int irun);

static Dtest dab_birthdays1_dtest __attribute__((unused)) = {
"Diehard Birthdays Test",
"dab_birthdays1",
"\n\
#==================================================================\n\
# Diehard \"Birthdays\" test (modified).\n\
# This is a version of the Diehard Birthdays test, modified from the\n\
# original Dieharder implementation of it.\n\
# \n\
# This is a BIRTHDAY SPACINGS TEST\n\
# Choose m birthdays in a year of n days. List the spacings between \n\
# the birthdays. If j is the number of values that occur more than \n\
# once in that list, then j is asympotically Poisson distributed with \n\
# mean lambda = m^3/(4n). A Chi-Sq test is performed comparing the \n\
# seen distribution of repeated spacings to the Poisson distribution. \n\
# Simulations show that the approximation is better for larger n and \n\
# smaller lambda. However, since for any given run j must be an \n\
# integer, a small lambda value requires more runs to build up a good \n\
# statistic. This test uses m=1700 as the default, but it may \n\
# changed (via the -n (ntuple) option), up to a maximum value of \n\
# 4096. The value of n is fixed by the choice of generator, with \n\
# n=2^r, where r is the number of bits per word in the generator's \n\
# output (a maximum of 32 for this version of Dieharder). This test \n\
# prefers a larger t-count (-t option) and p-value samples set to 1 \n\
# (-p 1, which is the default).\n\
# \n\
# Be careful when running this test against generators with reduced \n\
# word sizes, as it may give false positives. When it doubt, check \n\
# against an assumed good generator that is set to produce the same \n\
# size output. As an example, for testing a generator with an output \n\
# size of 20 bits, using \"-n 50 -t 8000\" produced a test that \n\
# repeated passed an assumed good generator at \"-p 100\", but had \n\
# trouble at \"-p 500\". Alternately, raising the t-count also shows \n\
# that m of 50 isn't low enough to give a good approximation. For \n\
# long tests of generators with an output size smaller than 30 bits, \n\
# producing the target by simulation instead of relying on the \n\
# Poisson approximation will probably be necessary.\n\
# \n\
#==================================================================\n",
1,
2000,
1,
dab_birthdays1,
0
};

/*
* Global variables
uint dab_birthdays1_nms,dab_birthdays1_nbits;
uint *dab_birthdays1_rand_uint;
*/
58 changes: 58 additions & 0 deletions include/dieharder/dab_opso2.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* dab_opso2 test header.
*/

/*
* function prototype
*/
int dab_opso2(Test **test, int irun);

static Dtest dab_opso2_dtest __attribute__((unused)) = {
"DAB OPSO2",
"dab_opso2",
"\
#==================================================================\n\
# DAB OPSO2 Test\n\
# This test is misnamed. It is an evolution of the OPSO test from\n\
# the original Diehard program. However, it does not use\n\
# DAB OPSO2 Test\n\
# This test is misnamed. It is an evolution of the OPSO test from\n\
# the original Diehard program. However, it does not use\n\
# overlapping samples. Additionally, it returns two p-values,\n\
# the second of which follows the Pairs-Sparse-Occupancy part of\n\
# the name. The first p-value effectively takes both letters from\n\
# the same input word. However, that isn't any different from\n\
# having 1-letter words, where each letter is twice as long.\n\
# \n\
# This verion uses 2^24 slots. The first p-value thus takes 24\n\
# bits directly from each input word. The second p-value is based\n\
# on two 12-bit letters from each of two words; each pair of input\n\
# words will produce two output \"words\".\n\
# \n\
# This test will give a false positive for all generators with an\n\
# output word of less than 24 bits.\n\
# \n\
# Note tsamples is set to 2^26 = 67108864, and cannot be varied.\n\
#\n\
# Diehard Overlapping Pairs Sparse Occupance (OPSO)\n\
# The OPSO test considers 2-letter words from an alphabet of \n\
# 1024 letters. Each letter is determined by a specified ten \n\
# bits from a 32-bit integer in the sequence to be tested. OPSO \n\
# generates 2^21 (overlapping) 2-letter words (from 2^21+1 \n\
# \"keystrokes\") and counts the number of missing words---that \n\
# is 2-letter words which do not appear in the entire sequence. \n\
# That count should be very close to normally distributed with \n\
# mean 141,909, sigma 290. Thus (missingwrds-141909)/290 should \n\
# be a standard normal variable. The OPSO test takes 32 bits at \n\
# a time from the test file and uses a designated set of ten \n\
# consecutive bits. It then restarts the file for the next de- \n\
# signated 10 bits, and so on. \n\
# \n\
#==================================================================\n",
1,
67108864,
2,
dab_opso2,
0
};

2 changes: 1 addition & 1 deletion include/dieharder/libdieharder.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
double chisq_poisson(unsigned int *observed,double lambda,int kmax,unsigned int nsamp);
double chisq_binomial(double *observed,double prob,unsigned int kmax,unsigned int nsamp);
double chisq_pearson(double *observed,double *expected,int kmax);
double chisq_uint_uniform_gtest(uint *observed,long numItems,int kmax);
double sample(void *testfunc());
double kstest(double *pvalue,int count);
double kstest_kuiper(double *pvalue,int count);
Expand Down Expand Up @@ -162,7 +163,6 @@
*========================================================================
*/
extern unsigned int all; /* Flag to do all tests on selected generator */
extern unsigned int binary; /* Flag to output rands in binary (with -o -f) */
extern unsigned int bits; /* bitstring size (in bits) */
extern unsigned int diehard; /* Diehard test number */
extern unsigned int generator; /* GSL generator id number to be tested */
Expand Down
4 changes: 4 additions & 0 deletions include/dieharder/tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
#include <dieharder/rgb_minimum_distance.h>
//#include <dieharder/rgb_operm.h>
#include <dieharder/rgb_permutations.h>
#include <dieharder/dab_birthdays1.h>
#include <dieharder/dab_bytedistrib.h>
#include <dieharder/dab_dct.h>
#include <dieharder/dab_filltree.h>
#include <dieharder/dab_filltree2.h>
#include <dieharder/dab_monobit2.h>
#include <dieharder/dab_opso2.h>
#include <dieharder/diehard_birthdays.h>
#include <dieharder/diehard_operm5.h>
#include <dieharder/diehard_rank_32x32.h>
Expand Down Expand Up @@ -81,11 +83,13 @@
RGB_LAGGED_SUMS,
RGB_LMN,
//RGB_OPERM,
DAB_BIRTHDAYS1,
DAB_BYTEDISTRIB,
DAB_DCT,
DAB_FILLTREE,
DAB_FILLTREE2,
DAB_MONOBIT2,
DAB_OPSO2,
N_RGB_TESTS
} Rgb_Tests;

Expand Down
2 changes: 2 additions & 0 deletions libdieharder/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ libdieharder_la_SOURCES = \
bits.c \
chisq.c \
countx.c \
dab_birthdays1.c \
dab_bytedistrib.c \
dab_dct.c \
dab_filltree.c \
dab_filltree2.c \
dab_monobit2.c \
dab_opso2.c \
diehard_2dsphere.c \
diehard_3dsphere.c \
diehard_birthdays.c \
Expand Down
46 changes: 46 additions & 0 deletions libdieharder/chisq.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,52 @@ double chisq_binomial(double *observed,double prob,unsigned int kmax,unsigned in

}

/*
* Perform the g-test (related to Pearson's Chi Sq test), for a uniform
* distribution into a set of kmax bins. Performs an additional
* correction, which restricts it to handling uniform distributions only.
*/
double chisq_uint_uniform_gtest(uint *observed,long numItems,int kmax)
{
uint i,j,k;
double delchisq,chisq,pvalue;
double expected = (double) numItems / kmax;

chisq = 0.0;
for(k = 0;k < kmax;k++){
if (observed[k] == 0) continue;
delchisq = 2.0 * ((double) observed[k] * log((double) observed[k] / expected));
chisq += delchisq;
if(verbose){
printf("%u: observed = %u, expected = %f, delchisq = %f, chisq = %f\n",
k,observed[k],expected,delchisq,chisq);
}
}

if(verbose){
printf("Evaluated chisq = %f for %u k values\n",chisq,kmax);
}

/* Apply correction; from Wikipedia, citing Smith, P. J., Rae, D. S., Manderscheid,
* R. W. and Silbergeld, S. (1981). "Approximating the Moments and Distribution of
* the Likelihood Ratio Statistic for Multinomial Goodness of Fit"
*/
chisq /= 1.0 + ((double) (kmax + 1) / (6.0 * numItems)) +
((double) (kmax * kmax) / (6.0 * numItems * numItems));

/*
* Now evaluate the corresponding pvalue. The only real question
* is what is the correct number of degrees of freedom. We have
* kmax bins, so it should be kmax-1.
*/
pvalue = gsl_sf_gamma_inc_Q((double)(kmax-1)/2.0,fabs(chisq)/2.0);
if(verbose){
printf("pvalue = %f in chisq_pearson.\n",pvalue);
}

return(pvalue);
}

/*
* Contributed by David Bauer to do a Pearson chisq on a 2D
* histogram.
Expand Down
Loading

0 comments on commit ea8ea11

Please sign in to comment.