1
- -- |
1
+ -- | A fast, space efficient Bloom filter implementation. A Bloom filter is a
2
+ -- set-like data structure that provides a probabilistic membership test.
2
3
--
3
- -- A fast, space efficient Bloom filter implementation. A Bloom
4
- -- filter is a set-like data structure that provides a probabilistic
5
- -- membership test.
4
+ -- * Queries do not give false negatives. When an element is added to a filter,
5
+ -- a subsequent membership test will definitely return 'True'.
6
6
--
7
- -- * Queries do not give false negatives. When an element is added to
8
- -- a filter, a subsequent membership test will definitely return
9
- -- 'True' .
7
+ -- * False positives /are/ possible. If an element has not been added to a
8
+ -- filter, a membership test /may/ nevertheless indicate that the element is
9
+ -- present .
10
10
--
11
- -- * False positives /are/ possible. If an element has not been added
12
- -- to a filter, a membership test /may/ nevertheless indicate that
13
- -- the element is present.
14
- --
15
-
16
11
module Data.BloomFilter.Blocked (
12
+ -- * Overview
13
+ -- $overview
14
+
17
15
-- * Types
18
16
Hash ,
19
17
Salt ,
@@ -57,6 +55,7 @@ module Data.BloomFilter.Blocked (
57
55
maxSizeBits ,
58
56
insert ,
59
57
insertMany ,
58
+ read ,
60
59
61
60
-- ** Conversion
62
61
freeze ,
@@ -68,6 +67,7 @@ module Data.BloomFilter.Blocked (
68
67
hashesWithSalt ,
69
68
insertHashes ,
70
69
elemHashes ,
70
+ readHashes ,
71
71
-- ** Prefetching
72
72
prefetchInsert ,
73
73
prefetchElem ,
@@ -80,23 +80,60 @@ import Data.Bits ((.&.))
80
80
import Data.Primitive.ByteArray (MutableByteArray )
81
81
import qualified Data.Primitive.PrimArray as P
82
82
83
- import Data.BloomFilter.Blocked.Calc
83
+ import Data.BloomFilter.Blocked.Calc (BitsPerEntry , BloomPolicy (.. ),
84
+ BloomSize (.. ), FPR , NumEntries , policyFPR , policyForBits ,
85
+ policyForFPR , sizeForBits , sizeForFPR , sizeForPolicy )
84
86
import Data.BloomFilter.Blocked.Internal hiding (deserialise )
85
87
import qualified Data.BloomFilter.Blocked.Internal as Internal
86
88
import Data.BloomFilter.Hash
87
89
88
- import Prelude hiding (elem , notElem )
90
+ import Prelude hiding (elem , notElem , read )
91
+
92
+ -- $setup
93
+ --
94
+ -- >>> import Text.Printf
95
+
96
+ -- $overview
97
+ --
98
+ -- Each of the functions for creating Bloom filters accepts a 'BloomSize'. The
99
+ -- size determines the number of bits that should be used for the filter. Note
100
+ -- that a filter is fixed in size; it cannot be resized after creation.
101
+ --
102
+ -- The size can be specified by asking for a target false positive rate (FPR)
103
+ -- or a number of bits per element, and the number of elements in the filter.
104
+ -- For example:
105
+ --
106
+ -- * @'sizeForFPR' 1e-3 10_000@ for a Bloom filter sized for 10,000 elements
107
+ -- with a false positive rate of 1 in 1000
108
+ --
109
+ -- * @'sizeForBits' 10 10_000@ for a Bloom filter sized for 10,000 elements
110
+ -- with 10 bits per element
111
+ --
112
+ -- Depending on the application it may be more important to target a fixed
113
+ -- amount of memory to use, or target a specific FPR.
114
+ --
115
+ -- As a very rough guide for filter sizes, here are a range of FPRs and bits
116
+ -- per element:
117
+ --
118
+ -- * FPR of 1e-1 requires approximately 4.8 bits per element
119
+ -- * FPR of 1e-2 requires approximately 9.8 bits per element
120
+ -- * FPR of 1e-3 requires approximately 15.8 bits per element
121
+ -- * FPR of 1e-4 requires approximately 22.6 bits per element
122
+ -- * FPR of 1e-5 requires approximately 30.2 bits per element
123
+ --
124
+ -- >>> fmap (printf "%0.1f" . policyBits . policyForFPR) [1e-1, 1e-2, 1e-3, 1e-4, 1e-5]
125
+ -- ["4.8","9.8","15.8","22.6","30.2"]
89
126
90
127
-- | Create an immutable Bloom filter, using the given setup function
91
128
-- which executes in the 'ST' monad.
92
129
--
93
130
-- Example:
94
131
--
95
- -- @
132
+ -- >>> :{
96
133
-- filter = create (sizeForBits 16 2) 4 $ \mf -> do
97
- -- insert mf \ "foo\ "
98
- -- insert mf \ "bar\ "
99
- -- @
134
+ -- insert mf "foo"
135
+ -- insert mf "bar"
136
+ -- :}
100
137
--
101
138
-- Note that the result of the setup function is not used.
102
139
create :: BloomSize
@@ -141,6 +178,12 @@ elem = \ !x !b -> elemHashes b (hashesWithSalt (hashSalt b) x)
141
178
notElem :: Hashable a => a -> Bloom a -> Bool
142
179
notElem = \ x b -> not (x `elem` b)
143
180
181
+ -- | Query a mutable Bloom filter for membership. If the value is
182
+ -- present, return @True@. If the value is not present, there is
183
+ -- /still/ some possibility that @True@ will be returned.
184
+ read :: Hashable a => MBloom s a -> a -> ST s Bool
185
+ read ! mb ! x = readHashes mb (hashesWithSalt (mbHashSalt mb) x)
186
+
144
187
-- | Build an immutable Bloom filter from a seed value. The seeding
145
188
-- function populates the filter as follows.
146
189
--
@@ -168,6 +211,7 @@ unfold bloomsize bloomsalt f k =
168
211
Nothing -> pure ()
169
212
Just (a, j') -> insert mb a >> loop j'
170
213
214
+ {-# INLINEABLE fromList #-}
171
215
-- | Create a Bloom filter, populating it from a sequence of values.
172
216
--
173
217
-- For example
@@ -185,10 +229,11 @@ fromList policy bloomsalt xs =
185
229
where
186
230
bsize = sizeForPolicy policy (length xs)
187
231
188
- {-# SPECIALISE deserialise :: BloomSize
189
- -> Salt
190
- -> (MutableByteArray RealWorld -> Int -> Int -> IO ())
191
- -> IO (Bloom a) #-}
232
+ {-# SPECIALISE deserialise ::
233
+ BloomSize
234
+ -> Salt
235
+ -> (MutableByteArray RealWorld -> Int -> Int -> IO ())
236
+ -> IO (Bloom a) #-}
192
237
deserialise :: PrimMonad m
193
238
=> BloomSize
194
239
-> Salt
0 commit comments