tapir/primitives/bloom.go

63 lines
1.6 KiB
Go

package primitives
import (
"crypto/sha256"
"math"
"math/big"
"sync"
)
// BloomFilter implements a bloom filter
type BloomFilter struct {
B []bool
lock sync.Mutex
}
// Init constructs a bloom filter of size m
func (bf *BloomFilter) Init(m int64) {
bf.B = make([]bool, m)
}
// Hash transforms a message to a set of bit flips
func (bf *BloomFilter) Hash(msg []byte) []int {
// Not the fastest hash function ever, but cryptographic security is more important than speed.
hash1 := sha256.Sum256(append([]byte("h1"), msg...))
hash2 := sha256.Sum256(append([]byte("h2"), msg...))
hash3 := sha256.Sum256(append([]byte("h3"), msg...))
hash4 := sha256.Sum256(append([]byte("h4"), msg...))
m := int64(len(bf.B))
// Number of bytes needed to pick a position from [0,m)
B := int(math.Ceil(math.Log2(float64(m)) / 8.0))
p1 := big.NewInt(0).SetBytes(hash1[:B]).Int64()
p2 := big.NewInt(0).SetBytes(hash2[:B]).Int64()
p3 := big.NewInt(0).SetBytes(hash3[:B]).Int64()
p4 := big.NewInt(0).SetBytes(hash4[:B]).Int64()
return []int{int(p1), int(p2), int(p3), int(p4)}
}
// Insert updates the BloomFilter (suitable for concurrent use)
func (bf *BloomFilter) Insert(msg []byte) {
pos := bf.Hash(msg)
bf.lock.Lock()
defer bf.lock.Unlock()
bf.B[pos[0]] = true
bf.B[pos[1]] = true
bf.B[pos[2]] = true
bf.B[pos[3]] = true
}
// Check returns true if the messages might be in the BloomFilter
// (No false positives, possible false negatives due to the probabilistic nature of the filter)
func (bf *BloomFilter) Check(msg []byte) bool {
pos := bf.Hash(msg)
if bf.B[pos[0]] && bf.B[pos[1]] && bf.B[pos[2]] && bf.B[pos[3]] {
return true
}
return false
}