-
Notifications
You must be signed in to change notification settings - Fork 0
/
bloom.go
138 lines (120 loc) · 2.82 KB
/
bloom.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package main
import (
"bytes"
"encoding/gob"
"fmt"
"hash"
"hash/fnv"
)
// Bitstring holds bitstrings of arbitrary length.
type Bitstring []byte
// Len returns the length of the bitstring
func (bs Bitstring) Len() uint {
return uint(len(bs) * 8)
}
// Set sets the specified bit to be on.
func (bs Bitstring) Set(n uint) {
pos := n / 8
bit := n % 8
bs[pos] |= 1 << bit
}
// Get returns the bit value at position n.
func (bs Bitstring) Get(n uint) bool {
pos := n / 8
bit := n % 8
return (bs[pos]>>bit)&1 == 1
}
// String returns the bitstring as a string.
func (bs Bitstring) String() string {
buf := bytes.Buffer{}
for _, b := range bs {
buf.WriteString(fmt.Sprintf("%08b", b))
}
return buf.String()
}
// NewBitstring creates a new Bitstring with the requested size (in bytes).
func NewBitstring(siz int) Bitstring {
return make([]byte, siz)
}
// BloomFilter holds a bloom filter.
type BloomFilter struct {
hfuncs [](func() hash.Hash64)
bits Bitstring
}
// NewBloomFilter generates a bloom filter with length 8*n.
func NewBloomFilter(n int) *BloomFilter {
bf := new(BloomFilter)
bf.bits = NewBitstring(n)
bf.hfuncs = [](func() hash.Hash64){fnv.New64, fnv.New64a}
return bf
}
func makeHashes(d []byte, h hash.Hash64) ([]uint32, error) {
_, err := h.Write(d)
if err != nil {
return nil, err
}
hashed := h.Sum64()
lower := uint32(hashed)
upper := uint32(hashed >> 32)
return []uint32{lower, upper}, nil
}
func (bf *BloomFilter) makeAllHashes(v interface{}) ([]uint, error) {
buf := bytes.Buffer{}
enc := gob.NewEncoder(&buf)
err := enc.Encode(v)
data := buf.Bytes()
if err != nil {
return nil, err
}
hashes := []uint{}
for _, hfunc := range bf.hfuncs {
out, err := makeHashes(data, hfunc())
if err != nil {
return nil, err
}
for _, h := range out {
hashes = append(hashes, uint(h))
}
}
return hashes, nil
}
// Add adds v to the set.
func (bf *BloomFilter) Add(v interface{}) error {
hashes, err := bf.makeAllHashes(v)
if err != nil {
return err
}
for _, h := range hashes {
bf.bits.Set(h % bf.bits.Len())
}
return nil
}
// Has checks if v is in the set. Note that false positives may occur.
func (bf *BloomFilter) Has(v interface{}) (bool, error) {
hashes, err := bf.makeAllHashes(v)
if err != nil {
return false, err
}
contains := true
for _, h := range hashes {
contains = contains && bf.bits.Get(h%bf.bits.Len())
}
return contains, nil
}
func (bf *BloomFilter) String() string {
return bf.bits.String()
}
func main() {
bf := NewBloomFilter(10)
fmt.Println(bf)
bf.Add("Happy")
fmt.Println(bf)
bf.Add("Sad")
fmt.Println(bf)
contained, _ := bf.Has("Happy")
fmt.Printf("Contains \"Happy\": %t\n", contained)
contained, _ = bf.Has("Sad")
fmt.Printf("Contains \"Sad\": %t\n", contained)
contained, _ = bf.Has("Not sad")
fmt.Printf("Contains \"Not sad\": %t\n", contained)
}