BloomFilter.hpp
A Bloom filter implementation.
- Copyright
Arash Partow, 2000 (modified slightly by Emily Dolson)
Note
This file is included in Empirical (https://github.com/devosoft/Empirical) for convenience. The Bloom filter class was written by Arash Partow (http://www.partow.net/programming/hashfunctions/index.html) and distributed under the MIT License.
Functions
-
inline BloomFilter operator&(const BloomFilter &a, const BloomFilter &b)
Calculate the intersection of two Bloom filters.
-
inline BloomFilter operator|(const BloomFilter &a, const BloomFilter &b)
Calculate the union of two Bloom filters.
-
inline BloomFilter operator^(const BloomFilter &a, const BloomFilter &b)
Calculate the difference of two Bloom filters.
Variables
-
static const unsigned char bit_mask[bits_per_char] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80}
-
class BloomFilter
- #include <BloomFilter.hpp>
This class implements a Bloom filter, which is a memory-efficient data structure for identifying values that have been seen before (with a tunable probability of a false positive - thinking that we have already seen a value when we actually haven’t)
Subclassed by CompressibleBloomFilter
Public Functions
-
inline BloomFilter()
-
inline BloomFilter(const BloomParameters &p)
-
inline BloomFilter(const BloomFilter &filter)
-
inline bool operator==(const BloomFilter &f) const
-
inline bool operator!=(const BloomFilter &f) const
-
inline BloomFilter &operator=(const BloomFilter &f)
-
inline virtual ~BloomFilter()
-
inline bool operator!() const
-
inline void clear()
Resets table to starting conditions, as if nothing had been added.
-
inline void insert(const unsigned char *key_begin, const std::size_t &length)
Insert a value into the Bloom filter (i.e. indicate that we have seen it)
-
template<typename T>
inline void insert(const T &t) Insert a value into the Bloom filter (i.e. indicate that we have seen it)
-
inline void insert(const std::string &key)
Insert a value into the Bloom filter (i.e. indicate that we have seen it)
-
inline void insert(const char *data, const std::size_t &length)
Insert a value into the Bloom filter (i.e. indicate that we have seen it)
-
template<typename InputIterator>
inline void insert(const InputIterator begin, const InputIterator end) Insert a sequence of values into the Bloom filter (i.e. indicate that we have seen it)
-
inline virtual bool contains(const unsigned char *key_begin, const std::size_t length) const
- Returns:
true if it’s possible that the specified element was previously added to the Bloom filter.
-
template<typename T>
inline bool contains(const T &t) const - Returns:
true if it’s possible that the specified element was previously added to the Bloom filter.
-
inline bool contains(const std::string &key) const
- Returns:
true if it’s possible that the specified element was previously added to the Bloom filter.
-
inline bool contains(const char *data, const std::size_t &length) const
- Returns:
true if it’s possible that the specified element was previously added to the Bloom filter.
-
template<typename InputIterator>
inline InputIterator contains_all(const InputIterator begin, const InputIterator end) const Checks whether its possible that the Bloom filter has seen all of the elements within the specified range. If so, returns
end. Otherwise returns an iterator to the first element that has definitely not previously been added to the Bloom filter.
-
template<typename InputIterator>
inline InputIterator contains_none(const InputIterator begin, const InputIterator end) const Checks whether its possible that the Bloom filter has seen none of the elements within the specified range. If so, returns
end. Otherwise returns an iterator to the first element that has possible previously been added to the Bloom filter.
-
inline virtual unsigned long long int size() const
- Returns:
the size of the Bloom filter’s internal table
-
inline unsigned long long int element_count() const
- Returns:
the number of elements that have been added to the Bloom filter
-
inline double effective_fpp() const
Calculate effective false positive probability.
-
inline BloomFilter &operator&=(const BloomFilter &f)
Calculate the intersection of two Bloom filters.
-
inline BloomFilter &operator|=(const BloomFilter &f)
Calculate the union of two Bloom filters.
-
inline BloomFilter &operator^=(const BloomFilter &f)
Calculate the difference of two Bloom filters.
Protected Functions
-
inline virtual void compute_indices(const bloom_type &hash, std::size_t &bit_index, std::size_t &bit) const
-
inline void generate_unique_salt()
-
inline bloom_type hash_ap(const unsigned char *begin, std::size_t remaining_length, bloom_type hash) const
-
inline BloomFilter()
-
class CompressibleBloomFilter : public BloomFilter
- #include <BloomFilter.hpp>
A Bloom filter that can be compressed.
Public Functions
-
inline CompressibleBloomFilter(const BloomParameters &p)
-
inline virtual unsigned long long int size() const
- Returns:
the current size of the bloom filter (i.e. the size of the internal table)
-
inline bool compress(const double &percentage)
Compress the Bloom filter
- Parameters:
percentage – the percentage of the Bloom filter’s current size to compress to (e.g. 50 would reduce the current size by half)
Private Functions
-
inline CompressibleBloomFilter(const BloomParameters &p)