00001
00002
00003
00004
00005
00006
00007
00008 #include "ibrcommon/data/BloomFilter.h"
00009 #include "ibrcommon/Exceptions.h"
00010
00011 namespace ibrcommon
00012 {
00013 DefaultHashProvider::DefaultHashProvider(size_t salt_count)
00014 : salt_count_(salt_count)
00015 {
00016 generate_salt();
00017 }
00018
00019 DefaultHashProvider::~DefaultHashProvider()
00020 {
00021 }
00022
00023 bool DefaultHashProvider::operator==(const DefaultHashProvider &provider) const
00024 {
00025 return ( provider.salt_count_ == salt_count_);
00026 }
00027
00028 void DefaultHashProvider::clear()
00029 {
00030 _salt.clear();
00031 }
00032
00033 void DefaultHashProvider::add(bloom_type hash)
00034 {
00035 _salt.push_back(hash);
00036 }
00037
00038 size_t DefaultHashProvider::count()
00039 {
00040 return _salt.size();
00041 }
00042
00043 const std::list<bloom_type> DefaultHashProvider::hash(const unsigned char* begin, std::size_t remaining_length) const
00044 {
00045 std::list<bloom_type> hashes;
00046
00047 for (std::vector<bloom_type>::const_iterator iter = _salt.begin(); iter != _salt.end(); iter++)
00048 {
00049 hashes.push_back(hash_ap(begin, remaining_length, (*iter)));
00050 }
00051
00052 return hashes;
00053 }
00054
00055
00056 void DefaultHashProvider::generate_salt()
00057 {
00058 const unsigned int predef_salt_count = 64;
00059 static const bloom_type predef_salt[predef_salt_count] =
00060 {
00061 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
00062 0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B,
00063 0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66,
00064 0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA,
00065 0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99,
00066 0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33,
00067 0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5,
00068 0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000,
00069 0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F,
00070 0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63,
00071 0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7,
00072 0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492,
00073 0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A,
00074 0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
00075 0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3,
00076 0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432
00077 };
00078
00079 if (salt_count_ > predef_salt_count)
00080 {
00081 throw ibrcommon::Exception("Max. 64 hash salts supported!");
00082 }
00083
00084 for(unsigned int i = 0; i < salt_count_; ++i)
00085 {
00086 add(predef_salt[i]);
00087 }
00088 }
00089
00090 bloom_type DefaultHashProvider::hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const
00091 {
00092 const unsigned char* it = begin;
00093 while(remaining_length >= 2)
00094 {
00095 hash ^= (hash << 7) ^ (*it++) * (hash >> 3);
00096 hash ^= (~((hash << 11) + ((*it++) ^ (hash >> 5))));
00097 remaining_length -= 2;
00098 }
00099 if (remaining_length)
00100 {
00101 hash ^= (hash << 7) ^ (*it) * (hash >> 3);
00102 }
00103 return hash;
00104 }
00105
00106 const unsigned char BloomFilter::bit_mask[bits_per_char] = {
00107 0x01,
00108 0x02,
00109 0x04,
00110 0x08,
00111 0x10,
00112 0x20,
00113 0x40,
00114 0x80
00115 };
00116
00117 BloomFilter::BloomFilter(std::size_t table_size, std::size_t salt_count)
00118 : _hashp(salt_count), table_size_(table_size)
00119 {
00120 bit_table_ = new cell_type[table_size_ / bits_per_char];
00121 std::fill_n(bit_table_,(table_size_ / bits_per_char),0x00);
00122 }
00123
00124 BloomFilter::BloomFilter(const BloomFilter& filter)
00125 : _hashp(filter._hashp), table_size_(filter.table_size_)
00126 {
00127 bit_table_ = new cell_type[table_size_ / bits_per_char];
00128 std::copy(filter.bit_table_, filter.bit_table_ + (table_size_ / bits_per_char), bit_table_);
00129 }
00130
00131 BloomFilter::~BloomFilter()
00132 {
00133 delete[] bit_table_;
00134 }
00135
00136 BloomFilter& BloomFilter::operator=(const BloomFilter& filter)
00137 {
00138 _hashp = filter._hashp;
00139 table_size_ = filter.table_size_;
00140 delete[] bit_table_;
00141 bit_table_ = new cell_type[table_size_ / bits_per_char];
00142 std::copy(filter.bit_table_, filter.bit_table_ + (table_size_ / bits_per_char), bit_table_);
00143 return *this;
00144 }
00145
00146 bool BloomFilter::operator!() const
00147 {
00148 return (0 == table_size_);
00149 }
00150
00151 void BloomFilter::clear()
00152 {
00153 std::fill_n(bit_table_,(table_size_ / bits_per_char),0x00);
00154 }
00155
00156 void BloomFilter::insert(const unsigned char* key_begin, const std::size_t length)
00157 {
00158 std::size_t bit_index = 0;
00159 std::size_t bit = 0;
00160
00161 std::list<bloom_type> hashes = _hashp.hash(key_begin, length);
00162
00163 for (std::list<bloom_type>::iterator iter = hashes.begin(); iter != hashes.end(); iter++)
00164 {
00165 compute_indices( (*iter), bit_index, bit );
00166 bit_table_[bit_index / bits_per_char] |= bit_mask[bit];
00167 }
00168 }
00169
00170 void BloomFilter::insert(const std::string& key)
00171 {
00172 insert(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
00173 }
00174
00175 void BloomFilter::insert(const char* data, const std::size_t& length)
00176 {
00177 insert(reinterpret_cast<const unsigned char*>(data),length);
00178 }
00179
00180 bool BloomFilter::contains(const unsigned char* key_begin, const std::size_t length) const
00181 {
00182 std::size_t bit_index = 0;
00183 std::size_t bit = 0;
00184
00185 const std::list<bloom_type> hashes = _hashp.hash(key_begin, length);
00186
00187 for (std::list<bloom_type>::const_iterator iter = hashes.begin(); iter != hashes.end(); iter++)
00188 {
00189 compute_indices( (*iter), bit_index, bit );
00190 if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit])
00191 {
00192 return false;
00193 }
00194 }
00195
00196 return true;
00197 }
00198
00199 bool BloomFilter::contains(const std::string& key) const
00200 {
00201 return contains(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
00202 }
00203
00204 bool BloomFilter::contains(const char* data, const std::size_t& length) const
00205 {
00206 return contains(reinterpret_cast<const unsigned char*>(data),length);
00207 }
00208
00209 std::size_t BloomFilter::size() const
00210 {
00211 return (table_size_ / bits_per_char);
00212 }
00213
00214 BloomFilter& BloomFilter::operator &= (const BloomFilter& filter)
00215 {
00216
00217 if (
00218 (_hashp == filter._hashp) &&
00219 (table_size_ == filter.table_size_)
00220 )
00221 {
00222 for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i)
00223 {
00224 bit_table_[i] &= filter.bit_table_[i];
00225 }
00226 }
00227 return *this;
00228 }
00229
00230 BloomFilter& BloomFilter::operator |= (const BloomFilter& filter)
00231 {
00232
00233 if (
00234 (_hashp == filter._hashp) &&
00235 (table_size_ == filter.table_size_)
00236 )
00237 {
00238 for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i)
00239 {
00240 bit_table_[i] |= filter.bit_table_[i];
00241 }
00242 }
00243 return *this;
00244 }
00245
00246 BloomFilter& BloomFilter::operator ^= (const BloomFilter& filter)
00247 {
00248
00249 if (
00250 (_hashp == filter._hashp) &&
00251 (table_size_ == filter.table_size_)
00252 )
00253 {
00254 for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i)
00255 {
00256 bit_table_[i] ^= filter.bit_table_[i];
00257 }
00258 }
00259 return *this;
00260 }
00261
00262 void BloomFilter::load(const cell_type* data, size_t len)
00263 {
00264 if (len < table_size_)
00265 {
00266 std::copy(data, data + len, bit_table_);
00267 }
00268 else
00269 {
00270 std::copy(data, data + table_size_, bit_table_);
00271 }
00272 }
00273
00274 const cell_type* BloomFilter::table() const
00275 {
00276 return bit_table_;
00277 }
00278
00279 void BloomFilter::compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const
00280 {
00281 bit_index = hash % table_size_;
00282 bit = bit_index % bits_per_char;
00283 }
00284 }