-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBloomFilter.java
More file actions
317 lines (259 loc) · 7.02 KB
/
BloomFilter.java
File metadata and controls
317 lines (259 loc) · 7.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
package my.based.learn.algorithm;
/*
* Implementation of a Bloom-filter
*
* @param <E>
* Object type is to be insert into the bloom filter,e.g.
* String or Integer or Long
*
* @author : qingwu.fu <qingwufu@gmail.com>
* @date : 2013-06-19
*
*/
import java.io.Serializable;
import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.SimpleDateFormat;
import java.util.BitSet;
import java.util.Collection;
import java.util.Date;
public class BloomFilter<E> implements Serializable {
private int bitSetSize;
private int exepectElementNum;
private int functions;
private double bitPerEle;
private int addEleNum;
public BitSet bitBuf;
static final String hashName = "MD5"; // MD5 gives good enough accuracey in most circumstances. Change to SHA1 if it's needed
static final Charset charset = Charset.forName("UTF-8"); // encoding used for storing hash valuses as strings
static final MessageDigest digestFunction;
static {
MessageDigest tmp;
try{
tmp = MessageDigest.getInstance(hashName);
} catch (NoSuchAlgorithmException e){
tmp = null;
}
digestFunction = tmp;
}
/*
* The total length of the Bloom Filter will be bitPerEle * elementNum
*
* @param bitPerEle
* the number of bits used per element
* @param elementNum
* the expected number of elements the filter will contain
* @param funcNum
* the number of hash functions used
*/
public BloomFilter(double bitPerEle, int elementNum, int funcNum){
this.bitPerEle = bitPerEle;
this.exepectElementNum = elementNum;
this.functions = funcNum;
this.addEleNum = 0;
this.bitSetSize = (int)Math.ceil(bitPerEle * elementNum);
this.bitBuf = new BitSet(this.bitSetSize);
}
/*
* @param bitSetSize
* defines how many bits should be used in total for the filter
* @param expectElementNum
* defines the maximum number of elements the filter is expected to contain
*
* The optimal number of hash functions is estimated from the total size of the Bloom and the number of expected elements.
*
*/
public BloomFilter(int bitSetSize, int expectElementNum){
this((double)bitSetSize/expectElementNum, expectElementNum, (int) Math.round((bitSetSize/(double)expectElementNum)*Math.log(2.0)));
}
/*
* Generates a digest based on the contents of a String
*
* @param val
* specifies the input data
*
* @param charset
* specifies the encoding of the input data
*
*/
public static long createHash(String val, Charset charset){
return createHash(val.getBytes(charset));
}
/*
* @param val
* specifies the input data.The encoding is expected to be UTF-8
*
*/
public static long createHash(String val){
return createHash(val, charset);
}
/*
* @param data
* specifies input data
* @return digest as long
*/
public static long createHash(byte[] data){
long h = 0;
byte[] res;
synchronized (digestFunction){
res = digestFunction.digest(data);
}
//将四个字节的 byte 类型转换成 long 型
for (int i = 0; i < 4; i++){
h <<= 8;
h |= ((int) res[i]) & 0xFF;
}
return h;
}
@Override
public boolean equals(Object obj){
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
final BloomFilter<E> other = (BloomFilter) obj;
if (this.bitSetSize != other.bitSetSize)
return false;
if (this.exepectElementNum != other.exepectElementNum)
return false;
if (this.functions != other.functions)
return false;
if (this.bitBuf != other.bitBuf
&& (this.bitBuf == null || !this.bitBuf.equals(other.bitBuf)))
return false;
return true;
}
@Override
public int hashCode(){
int hash = 7;
hash = 31 * hash + (this.bitBuf != null ? this.bitBuf.hashCode() : 0 );
hash = 31 * hash + this.exepectElementNum;
hash = 31 * hash + this.bitSetSize;
hash = 31 * hash + this.functions;
return hash;
}
/*
* @return expected probability of false positive
*/
public double expectedFalsePositiveProbability(){
return getFalsePositiveProbability(this.exepectElementNum);
}
/*
* @param elementNum
* number of inserted elements
* @return probability of a false positive.
*
*/
public double getFalsePositiveProbability(double elementNum){
return Math.pow((1 - Math.exp(-this.functions * (double) elementNum/(double)this.bitSetSize)), this.functions);
}
/*
* Sets all bits to false in the Bloom filter
*/
public void clear(){
this.bitBuf.clear();
this.addEleNum = 0;
}
/*
* Adds an object to the Bloom filter. The output from the object's toString() method is used as input to the hash functions.
*
* @param element
*/
public void add(E element){
long hash;
String valString = element.toString();
for (int x = 0; x < this.functions; x++){
hash = createHash(valString + Integer.toString(x));
hash = hash % (long) this.bitSetSize;
bitBuf.set(Math.abs((int)hash), true);
}
this.addEleNum++;
}
/*
* Adds all elements from a Collection to the Bloom filter.
*
* @param c
* Collection of elements.
*/
public void addAll(Collection<? extends E> c){
for (E element : c){
add(element);
}
}
/*
* @param element
* element to check
* @return true or false
*/
public boolean contains(E element){
long hash;
String valString = element.toString();
for (int x = 0; x < this.functions; x++){
hash = createHash(valString + Integer.toString(x));
hash = hash % (long) this.bitSetSize;
if (!this.bitBuf.get(Math.abs((int) hash)))
return false;
}
return true;
}
/*
* @param c
* elements to check
* @return true or false
*/
public boolean containsAll(Collection<? extends E> c){
for (E element : c)
if (!contains(element))
return false;
return true;
}
/*
* Get and set a single bit
*/
public boolean getBit(int index){
return this.bitBuf.get(index);
}
public void setBit(int index, boolean value){
this.bitBuf.set(index, value);
}
/*
* Return the bit set used to store the Bloom filter
*
* @return bit set
*/
public BitSet getBitSet() {
return this.bitBuf;
}
/*
* Return the number of bits in the Bloom filter.
*
* @return the size of the bitset usef by the Bloom filter
*/
public int size() {
return this.bitSetSize;
}
/*
* Return the number of elements added to the Bloom filter after it was constructed after clear() was called
*
* @return number of elements added to the Bloom filter.
*/
public int count(){
return this.addEleNum;
}
public int getExpectedNumberOfElements(){
return this.exepectElementNum;
}
public double getExpectedBitsPerElement(){
return this.bitPerEle;
}
public double getBitsPerElement() {
return this.bitSetSize / (double) this.addEleNum;
}
public static void main(String[] args){
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date d = new Date();
Date d1 = new Date(d.getTime() - 24*60*60*1000);
System.out.println(sdf.format(d) + " \t" + sdf.format(d1));
}
}