BloomFilter.hpp
A Bloom filter implementation.
- Copyright
Arash Partow, 2000 (modified slightly by Emily Dolson)
Note
This file is included in Empirical (https://github.com/devosoft/Empirical) for convenience. The Bloom filter class was written by Arash Partow (http://www.partow.net/programming/hashfunctions/index.html) and distributed under the MIT License.
Functions
-
inline BloomFilter operator&(const BloomFilter &a, const BloomFilter &b)
Calculate the intersection of two Bloom filters.
-
inline BloomFilter operator|(const BloomFilter &a, const BloomFilter &b)
Calculate the union of two Bloom filters.
-
inline BloomFilter operator^(const BloomFilter &a, const BloomFilter &b)
Calculate the difference of two Bloom filters.
Variables
-
static const unsigned char bit_mask[bits_per_char] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80}
-
class BloomFilter
- #include <BloomFilter.hpp>
This class implements a Bloom filter, which is a memory-efficient data structure for identifying values that have been seen before (with a tunable probability of a false positive - thinking that we have already seen a value when we actually haven’t)
Subclassed by CompressibleBloomFilter
Public Functions
-
inline BloomFilter()
-
inline BloomFilter(const BloomParameters &p)
-
inline BloomFilter(const BloomFilter &filter)
-
inline bool operator==(const BloomFilter &f) const
-
inline bool operator!=(const BloomFilter &f) const
-
inline BloomFilter &operator=(const BloomFilter &f)
-
inline virtual ~BloomFilter()
-
inline bool operator!() const
-
inline void clear()
Resets table to starting conditions, as if nothing had been added.
-
inline void insert(const unsigned char *key_begin, const std::size_t &length)
Insert a value into the Bloom filter (i.e. indicate that we have seen it)
-
template<typename T>
inline void insert(const T &t) Insert a value into the Bloom filter (i.e. indicate that we have seen it)
-
inline void insert(const std::string &key)
Insert a value into the Bloom filter (i.e. indicate that we have seen it)
-
inline void insert(const char *data, const std::size_t &length)
Insert a value into the Bloom filter (i.e. indicate that we have seen it)
-
template<typename InputIterator>
inline void insert(const InputIterator begin, const InputIterator end) Insert a sequence of values into the Bloom filter (i.e. indicate that we have seen it)
-
inline virtual bool contains(const unsigned char *key_begin, const std::size_t length) const
- Returns:
true if it’s possible that the specified element was previously added to the Bloom filter.
-
template<typename T>
inline bool contains(const T &t) const - Returns:
true if it’s possible that the specified element was previously added to the Bloom filter.
-
inline bool contains(const std::string &key) const
- Returns:
true if it’s possible that the specified element was previously added to the Bloom filter.
-
inline bool contains(const char *data, const std::size_t &length) const
- Returns:
true if it’s possible that the specified element was previously added to the Bloom filter.
-
template<typename InputIterator>
inline InputIterator contains_all(const InputIterator begin, const InputIterator end) const Checks whether its possible that the Bloom filter has seen all of the elements within the specified range. If so, returns
end
. Otherwise returns an iterator to the first element that has definitely not previously been added to the Bloom filter.
-
template<typename InputIterator>
inline InputIterator contains_none(const InputIterator begin, const InputIterator end) const Checks whether its possible that the Bloom filter has seen none of the elements within the specified range. If so, returns
end
. Otherwise returns an iterator to the first element that has possible previously been added to the Bloom filter.
-
inline virtual unsigned long long int size() const
- Returns:
the size of the Bloom filter’s internal table
-
inline unsigned long long int element_count() const
- Returns:
the number of elements that have been added to the Bloom filter
-
inline double effective_fpp() const
Calculate effective false positive probability.
-
inline BloomFilter &operator&=(const BloomFilter &f)
Calculate the intersection of two Bloom filters.
-
inline BloomFilter &operator|=(const BloomFilter &f)
Calculate the union of two Bloom filters.
-
inline BloomFilter &operator^=(const BloomFilter &f)
Calculate the difference of two Bloom filters.
Protected Functions
-
inline virtual void compute_indices(const bloom_type &hash, std::size_t &bit_index, std::size_t &bit) const
-
inline void generate_unique_salt()
-
inline bloom_type hash_ap(const unsigned char *begin, std::size_t remaining_length, bloom_type hash) const
-
inline BloomFilter()
-
class CompressibleBloomFilter : public BloomFilter
- #include <BloomFilter.hpp>
A Bloom filter that can be compressed.
Public Functions
-
inline CompressibleBloomFilter(const BloomParameters &p)
-
inline virtual unsigned long long int size() const
- Returns:
the current size of the bloom filter (i.e. the size of the internal table)
-
inline bool compress(const double &percentage)
Compress the Bloom filter
- Parameters:
percentage – the percentage of the Bloom filter’s current size to compress to (e.g. 50 would reduce the current size by half)
Private Functions
-
inline CompressibleBloomFilter(const BloomParameters &p)