forked from lemire/rollinghashcpp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrabinkarphash.h
84 lines (68 loc) · 2.87 KB
/
rabinkarphash.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#ifndef KARPRABINHASH
#define KARPRABINHASH
#include "characterhash.h"
/**
* This is a randomized version of the Karp-Rabin hash function.
* Each instance is a rolling hash function meant to hash streams of characters.
* Each new instance of this class comes with new random keys.
*
* Recommended usage to get L-bit hash values over n-grams:
* KarpRabinHash<> hf(n,L );
* for(uint32 k = 0; k<n;++k) {
* unsigned char c = ... ; // grab some character
* hf.eat(c); // feed it to the hasher
* }
* while(...) { // go over your string
* hf.hashvalue; // at all times, this contains the hash value
* unsigned char c = ... ;// points to the next character
* unsigned char out = ...; // character we want to forget
* hf.update(out,c); // update hash value
* }
*/
template <typename hashvaluetype = uint32, typename chartype = unsigned char>
class KarpRabinHash {
public:
// myn is the length of the sequences, e.g., 3 means that you want to hash sequences of 3 characters
// mywordsize is the number of bits you which to receive as hash values, e.g., 19 means that the hash values are 19-bit integers
KarpRabinHash(int myn, int mywordsize=19) : hashvalue(0),n(myn),
wordsize(mywordsize),
hasher( maskfnc<hashvaluetype>(wordsize)),
HASHMASK(maskfnc<hashvaluetype>(wordsize)),BtoN(1) {
for (int i=0; i < n ; ++i) {
BtoN *= B;
BtoN &= HASHMASK;
}
}
// this is a convenience function, use eat,update and .hashvalue to use as a rolling hash function
template<class container>
hashvaluetype hash(container & c) {
hashvaluetype answer(0);
for(uint k = 0; k<c.size(); ++k) {
hashvaluetype x(1);
for(uint j = 0; j< c.size()-1-k; ++j) {
x= (x * B) & HASHMASK;
}
x= (x * hasher.hashvalues[c[k]]) & HASHMASK;
answer=(answer+x) & HASHMASK;
}
return answer;
}
// add inchar as an input, this is used typically only at the start
// the hash value is updated to that of a longer string (one where inchar was appended)
void eat(chartype inchar) {
hashvalue = (B*hashvalue + hasher.hashvalues[inchar] )& HASHMASK;
}
// add inchar as an input and remove outchar, the hashvalue is updated
// this function can be used to update the hash value from the hash value of [outchar]ABC to the hash value of ABC[inchar]
void update(chartype outchar, chartype inchar) {
hashvalue = (B*hashvalue + hasher.hashvalues[inchar] - BtoN * hasher.hashvalues[outchar]) & HASHMASK;
}
hashvaluetype hashvalue;
int n;
const int wordsize;
CharacterHash<hashvaluetype,chartype> hasher;
const hashvaluetype HASHMASK;
hashvaluetype BtoN;
static const hashvaluetype B=37;
};
#endif