tbloomfilter.c 5.2 KB
Newer Older
R
root 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#define _DEFAULT_SOURCE

#include "tbloomfilter.h"
#include "taos.h"
#include "taoserror.h"

22 23
#define UNIT_NUM_BITS      64ULL
#define UNIT_ADDR_NUM_BITS 6ULL
R
root 已提交
24 25 26

static FORCE_INLINE bool setBit(uint64_t *buf, uint64_t index) {
  uint64_t unitIndex = index >> UNIT_ADDR_NUM_BITS;
27
  uint64_t mask = 1ULL << (index % UNIT_NUM_BITS);
R
root 已提交
28 29 30 31 32 33 34
  uint64_t old = buf[unitIndex];
  buf[unitIndex] |= mask;
  return buf[unitIndex] != old;
}

static FORCE_INLINE bool getBit(uint64_t *buf, uint64_t index) {
  uint64_t unitIndex = index >> UNIT_ADDR_NUM_BITS;
35
  uint64_t mask = 1ULL << (index % UNIT_NUM_BITS);
R
root 已提交
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
  return buf[unitIndex] & mask;
}

SBloomFilter *tBloomFilterInit(uint64_t expectedEntries, double errorRate) {
  if (expectedEntries < 1 || errorRate <= 0 || errorRate >= 1.0) {
    return NULL;
  }
  SBloomFilter *pBF = taosMemoryCalloc(1, sizeof(SBloomFilter));
  if (pBF == NULL) {
    return NULL;
  }
  pBF->expectedEntries = expectedEntries;
  pBF->errorRate = errorRate;

  double lnRate = fabs(log(errorRate));
  // ln(2)^2 = 0.480453013918201
  // m = - n * ln(P) / ( ln(2) )^2
  // m is the size of bloom filter, n is expected entries, P is false positive probability
H
Hongze Cheng 已提交
54
  pBF->numUnits = (uint64_t)ceil(expectedEntries * lnRate / 0.480453013918201 / UNIT_NUM_BITS);
R
root 已提交
55 56 57 58
  pBF->numBits = pBF->numUnits * 64;
  pBF->size = 0;

  // ln(2) = 0.693147180559945
H
Hongze Cheng 已提交
59
  pBF->hashFunctions = (uint32_t)ceil(lnRate / 0.693147180559945);
L
Liu Jicong 已提交
60 61 62 63
  /*pBF->hashFn1 = taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP);*/
  /*pBF->hashFn2 = taosGetDefaultHashFunction(TSDB_DATA_TYPE_NCHAR);*/
  pBF->hashFn1 = taosFastHash;
  pBF->hashFn2 = taosDJB2Hash;
R
root 已提交
64 65 66 67 68 69 70 71 72 73
  pBF->buffer = taosMemoryCalloc(pBF->numUnits, sizeof(uint64_t));
  if (pBF->buffer == NULL) {
    tBloomFilterDestroy(pBF);
    return NULL;
  }
  return pBF;
}

int32_t tBloomFilterPut(SBloomFilter *pBF, const void *keyBuf, uint32_t len) {
  ASSERT(!tBloomFilterIsFull(pBF));
H
Hongze Cheng 已提交
74 75 76
  uint64_t                h1 = (uint64_t)pBF->hashFn1(keyBuf, len);
  uint64_t                h2 = (uint64_t)pBF->hashFn2(keyBuf, len);
  bool                    hasChange = false;
R
root 已提交
77
  const register uint64_t size = pBF->numBits;
H
Hongze Cheng 已提交
78
  uint64_t                cbHash = h1;
R
root 已提交
79 80 81 82 83 84 85 86 87 88 89
  for (uint64_t i = 0; i < pBF->hashFunctions; ++i) {
    hasChange |= setBit(pBF->buffer, cbHash % size);
    cbHash += h2;
  }
  if (hasChange) {
    pBF->size++;
    return TSDB_CODE_SUCCESS;
  }
  return TSDB_CODE_FAILED;
}

H
Hongze Cheng 已提交
90 91 92
int32_t tBloomFilterNoContain(const SBloomFilter *pBF, const void *keyBuf, uint32_t len) {
  uint64_t                h1 = (uint64_t)pBF->hashFn1(keyBuf, len);
  uint64_t                h2 = (uint64_t)pBF->hashFn2(keyBuf, len);
R
root 已提交
93
  const register uint64_t size = pBF->numBits;
H
Hongze Cheng 已提交
94
  uint64_t                cbHash = h1;
R
root 已提交
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
  for (uint64_t i = 0; i < pBF->hashFunctions; ++i) {
    if (!getBit(pBF->buffer, cbHash % size)) {
      return TSDB_CODE_SUCCESS;
    }
    cbHash += h2;
  }
  return TSDB_CODE_FAILED;
}

void tBloomFilterDestroy(SBloomFilter *pBF) {
  if (pBF == NULL) {
    return;
  }
  taosMemoryFree(pBF->buffer);
  taosMemoryFree(pBF);
}

H
Hongze Cheng 已提交
112
int32_t tBloomFilterEncode(const SBloomFilter *pBF, SEncoder *pEncoder) {
5
54liuyao 已提交
113 114 115 116 117 118
  if (tEncodeU32(pEncoder, pBF->hashFunctions) < 0) return -1;
  if (tEncodeU64(pEncoder, pBF->expectedEntries) < 0) return -1;
  if (tEncodeU64(pEncoder, pBF->numUnits) < 0) return -1;
  if (tEncodeU64(pEncoder, pBF->numBits) < 0) return -1;
  if (tEncodeU64(pEncoder, pBF->size) < 0) return -1;
  for (uint64_t i = 0; i < pBF->numUnits; i++) {
H
Hongze Cheng 已提交
119
    uint64_t *pUnits = (uint64_t *)pBF->buffer;
5
54liuyao 已提交
120 121 122 123 124 125
    if (tEncodeU64(pEncoder, pUnits[i]) < 0) return -1;
  }
  if (tEncodeDouble(pEncoder, pBF->errorRate) < 0) return -1;
  return 0;
}

H
Hongze Cheng 已提交
126
SBloomFilter *tBloomFilterDecode(SDecoder *pDecoder) {
5
54liuyao 已提交
127 128 129 130 131 132 133 134 135
  SBloomFilter *pBF = taosMemoryCalloc(1, sizeof(SBloomFilter));
  pBF->buffer = NULL;
  if (tDecodeU32(pDecoder, &pBF->hashFunctions) < 0) goto _error;
  if (tDecodeU64(pDecoder, &pBF->expectedEntries) < 0) goto _error;
  if (tDecodeU64(pDecoder, &pBF->numUnits) < 0) goto _error;
  if (tDecodeU64(pDecoder, &pBF->numBits) < 0) goto _error;
  if (tDecodeU64(pDecoder, &pBF->size) < 0) goto _error;
  pBF->buffer = taosMemoryCalloc(pBF->numUnits, sizeof(uint64_t));
  for (int32_t i = 0; i < pBF->numUnits; i++) {
H
Hongze Cheng 已提交
136
    uint64_t *pUnits = (uint64_t *)pBF->buffer;
5
54liuyao 已提交
137 138 139 140 141 142 143 144 145 146
    if (tDecodeU64(pDecoder, pUnits + i) < 0) goto _error;
  }
  if (tDecodeDouble(pDecoder, &pBF->errorRate) < 0) goto _error;
  pBF->hashFn1 = taosGetDefaultHashFunction(TSDB_DATA_TYPE_TIMESTAMP);
  pBF->hashFn2 = taosGetDefaultHashFunction(TSDB_DATA_TYPE_NCHAR);
  return pBF;

_error:
  tBloomFilterDestroy(pBF);
  return NULL;
R
root 已提交
147 148
}

L
Liu Jicong 已提交
149
bool tBloomFilterIsFull(const SBloomFilter *pBF) { return pBF->size >= pBF->expectedEntries; }