19 #ifndef INCLUDE_BLOOM_FILTER_HPP
20 #define INCLUDE_BLOOM_FILTER_HPP
49 maximum_size(std::numeric_limits<unsigned long long int>::max()),
113 double min_m = std::numeric_limits<double>::infinity();
121 const double curr_m = numerator / denominator;
123 if (curr_m < min_m) {
135 optp.
table_size =
static_cast<unsigned long long int>(min_m);
238 inline void insert(
const unsigned char *key_begin,
const std::size_t &length) {
239 std::size_t bit_index = 0;
242 for (std::size_t i = 0; i <
salt_.size(); ++i) {
254 insert(
reinterpret_cast<const unsigned char *
>(&t),
sizeof(T));
257 inline void insert(
const std::string &key) {
258 insert(
reinterpret_cast<const unsigned char *
>(key.data()), key.size());
261 inline void insert(
const char *data,
const std::size_t &length) {
262 insert(
reinterpret_cast<const unsigned char *
>(data), length);
265 template<
typename InputIterator>
266 inline void insert(
const InputIterator begin,
const InputIterator end) {
267 InputIterator itr = begin;
274 inline virtual bool contains(
const unsigned char *key_begin,
const std::size_t length)
const {
275 std::size_t bit_index = 0;
278 for (std::size_t i = 0; i <
salt_.size(); ++i) {
291 return contains(
reinterpret_cast<const unsigned char *
>(&t),
static_cast<std::size_t
>(
sizeof(T)));
294 inline bool contains(
const std::string &key)
const {
295 return contains(
reinterpret_cast<const unsigned char *
>(key.c_str()), key.size());
298 inline bool contains(
const char *data,
const std::size_t &length)
const {
299 return contains(
reinterpret_cast<const unsigned char *
>(data), length);
302 template<
typename InputIterator>
303 inline InputIterator
contains_all(
const InputIterator begin,
const InputIterator end)
const {
304 InputIterator itr = begin;
317 template<
typename InputIterator>
318 inline InputIterator
contains_none(
const InputIterator begin,
const InputIterator end)
const {
319 InputIterator itr = begin;
332 inline virtual unsigned long long int size()
const {
358 for (std::size_t i = 0; i <
bit_table_.size(); ++i) {
373 for (std::size_t i = 0; i <
bit_table_.size(); ++i) {
388 for (std::size_t i = 0; i <
bit_table_.size(); ++i) {
418 const unsigned int predef_salt_count = 128;
420 static const bloom_type predef_salt[predef_salt_count] =
422 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
423 0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B,
424 0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66,
425 0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA,
426 0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99,
427 0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33,
428 0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5,
429 0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000,
430 0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F,
431 0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63,
432 0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7,
433 0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492,
434 0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A,
435 0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
436 0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3,
437 0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432,
438 0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC,
439 0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB,
440 0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331,
441 0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68,
442 0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8,
443 0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A,
444 0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF,
445 0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E,
446 0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39,
447 0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E,
448 0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355,
449 0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E,
450 0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79,
451 0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075,
452 0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC,
453 0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421
457 std::copy(predef_salt,
459 std::back_inserter(
salt_));
461 for (std::size_t i = 0; i <
salt_.size(); ++i) {
471 std::copy(predef_salt, predef_salt + predef_salt_count, std::back_inserter(
salt_));
478 if (0 == current_salt)
481 if (
salt_.end() == std::find(
salt_.begin(),
salt_.end(), current_salt)) {
482 salt_.push_back(current_salt);
489 const unsigned char *itr = begin;
490 unsigned int loop = 0;
492 while (remaining_length >= 8) {
493 const unsigned int &i1 = *(
reinterpret_cast<const unsigned int *
>(itr));
494 itr +=
sizeof(
unsigned int);
495 const unsigned int &i2 = *(
reinterpret_cast<const unsigned int *
>(itr));
496 itr +=
sizeof(
unsigned int);
498 hash ^= (hash << 7) ^ i1 * (hash >> 3) ^
499 (~((hash << 11) + (i2 ^ (hash >> 5))));
501 remaining_length -= 8;
504 if (remaining_length) {
505 if (remaining_length >= 4) {
506 const unsigned int &i = *(
reinterpret_cast<const unsigned int *
>(itr));
509 hash ^= (hash << 7) ^ i * (hash >> 3);
511 hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
515 remaining_length -= 4;
517 itr +=
sizeof(
unsigned int);
520 if (remaining_length >= 2) {
521 const unsigned short &i = *(
reinterpret_cast<const unsigned short *
>(itr));
524 hash ^= (hash << 7) ^ i * (hash >> 3);
526 hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
530 remaining_length -= 2;
532 itr +=
sizeof(
unsigned short);
535 if (remaining_length) {
536 hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop;
579 inline unsigned long long int size()
const {
580 return size_list.back();
585 (percentage < 0.0) ||
586 (percentage >= 100.0)
591 unsigned long long int original_table_size = size_list.back();
592 unsigned long long int
593 new_table_size =
static_cast<unsigned long long int>((size_list.back() * (1.0 - (percentage / 100.0))));
599 (new_table_size >= original_table_size)
606 const unsigned long long int new_tbl_raw_size = new_table_size /
bits_per_char;
612 typedef table_type::iterator itr_t;
616 itr_t itr_tmp = tmp.begin();
619 *(itr_tmp++) |= (*itr++);
624 size_list.push_back(new_table_size);
631 inline void compute_indices(
const bloom_type &hash, std::size_t &bit_index, std::size_t &bit)
const {
634 for (std::size_t i = 0; i < size_list.size(); ++i) {
635 bit_index %= size_list[i];
641 std::vector<unsigned long long int> size_list;
bloom_filter operator|(const bloom_filter &a, const bloom_filter &b)
Definition: bloom_filter.hpp:559
bloom_filter operator^(const bloom_filter &a, const bloom_filter &b)
Definition: bloom_filter.hpp:565
bloom_filter operator&(const bloom_filter &a, const bloom_filter &b)
Definition: bloom_filter.hpp:553
static const unsigned char bit_mask[bits_per_char]
Definition: bloom_filter.hpp:33
static const std::size_t bits_per_char
Definition: bloom_filter.hpp:31
Definition: bloom_filter.hpp:155
InputIterator contains_all(const InputIterator begin, const InputIterator end) const
Definition: bloom_filter.hpp:303
bloom_filter & operator&=(const bloom_filter &f)
Definition: bloom_filter.hpp:351
double desired_false_positive_probability_
Definition: bloom_filter.hpp:550
unsigned long long int projected_element_count_
Definition: bloom_filter.hpp:547
bloom_filter(const bloom_filter &filter)
Definition: bloom_filter.hpp:185
bool contains(const std::string &key) const
Definition: bloom_filter.hpp:294
unsigned long long int table_size_
Definition: bloom_filter.hpp:546
unsigned long long int element_count() const
Definition: bloom_filter.hpp:336
bool operator!=(const bloom_filter &f) const
Definition: bloom_filter.hpp:205
void insert(const unsigned char *key_begin, const std::size_t &length)
Definition: bloom_filter.hpp:238
void generate_unique_salt()
Definition: bloom_filter.hpp:411
bloom_filter()
Definition: bloom_filter.hpp:164
bool contains(const T &t) const
Definition: bloom_filter.hpp:290
std::size_t hash_count()
Definition: bloom_filter.hpp:400
bool contains(const char *data, const std::size_t &length) const
Definition: bloom_filter.hpp:298
void insert(const T &t)
Definition: bloom_filter.hpp:252
std::vector< unsigned char > table_type
Definition: bloom_filter.hpp:160
InputIterator contains_none(const InputIterator begin, const InputIterator end) const
Definition: bloom_filter.hpp:318
std::vector< bloom_type > salt_
Definition: bloom_filter.hpp:543
void insert(const std::string &key)
Definition: bloom_filter.hpp:257
unsigned long long int random_seed_
Definition: bloom_filter.hpp:549
virtual bool contains(const unsigned char *key_begin, const std::size_t length) const
Definition: bloom_filter.hpp:274
bloom_filter & operator^=(const bloom_filter &f)
Definition: bloom_filter.hpp:381
bool operator!() const
Definition: bloom_filter.hpp:229
std::vector< unsigned char > bit_table_
Definition: bloom_filter.hpp:544
void insert(const InputIterator begin, const InputIterator end)
Definition: bloom_filter.hpp:266
bool operator==(const bloom_filter &f) const
Definition: bloom_filter.hpp:189
bloom_filter & operator=(const bloom_filter &f)
Definition: bloom_filter.hpp:209
unsigned long long int inserted_element_count_
Definition: bloom_filter.hpp:548
virtual unsigned long long int size() const
Definition: bloom_filter.hpp:332
bloom_filter & operator|=(const bloom_filter &f)
Definition: bloom_filter.hpp:366
const cell_type * table() const
Definition: bloom_filter.hpp:396
virtual void compute_indices(const bloom_type &hash, std::size_t &bit_index, std::size_t &bit) const
Definition: bloom_filter.hpp:406
bloom_type hash_ap(const unsigned char *begin, std::size_t remaining_length, bloom_type hash) const
Definition: bloom_filter.hpp:488
double effective_fpp() const
Definition: bloom_filter.hpp:340
bloom_filter(const bloom_parameters &p)
Definition: bloom_filter.hpp:172
void clear()
Definition: bloom_filter.hpp:233
unsigned char cell_type
Definition: bloom_filter.hpp:159
unsigned int bloom_type
Definition: bloom_filter.hpp:158
virtual ~bloom_filter()
Definition: bloom_filter.hpp:227
void insert(const char *data, const std::size_t &length)
Definition: bloom_filter.hpp:261
unsigned int salt_count_
Definition: bloom_filter.hpp:545
Definition: bloom_filter.hpp:44
bloom_parameters()
Definition: bloom_filter.hpp:47
double false_positive_probability
Definition: bloom_filter.hpp:86
unsigned long long int maximum_size
Definition: bloom_filter.hpp:72
unsigned int maximum_number_of_hashes
Definition: bloom_filter.hpp:76
unsigned long long int projected_element_count
Definition: bloom_filter.hpp:81
virtual ~bloom_parameters()
Definition: bloom_filter.hpp:56
bool operator!()
Definition: bloom_filter.hpp:58
virtual bool compute_optimal_parameters()
Definition: bloom_filter.hpp:101
unsigned long long int random_seed
Definition: bloom_filter.hpp:88
unsigned long long int minimum_size
Definition: bloom_filter.hpp:71
unsigned int minimum_number_of_hashes
Definition: bloom_filter.hpp:75
optimal_parameters_t optimal_parameters
Definition: bloom_filter.hpp:99
Definition: bloom_filter.hpp:571
compressible_bloom_filter(const bloom_parameters &p)
Definition: bloom_filter.hpp:574
unsigned long long int size() const
Definition: bloom_filter.hpp:579
bool compress(const double &percentage)
Definition: bloom_filter.hpp:583
void swap(BlockingConcurrentQueue< T, Traits > &a, BlockingConcurrentQueue< T, Traits > &b) MOODYCAMEL_NOEXCEPT
Definition: blockingconcurrentqueue.h:929
Definition: bloom_filter.hpp:90
unsigned int number_of_hashes
Definition: bloom_filter.hpp:95
unsigned long long int table_size
Definition: bloom_filter.hpp:96
optimal_parameters_t()
Definition: bloom_filter.hpp:91