未验证 提交 c63524de 编写于 作者: Y yukun 提交者: GitHub

Add dablooms (#5440)

* Add dablooms cwrapper
Signed-off-by: Nfishpenguin <kun.yu@zilliz.com>

* Fix undefined reference bug
Signed-off-by: Nfishpenguin <kun.yu@zilliz.com>

* Add cwrapper_dablooms_build shell script
Signed-off-by: Nfishpenguin <kun.yu@zilliz.com>
上级 16839f67
......@@ -61,6 +61,7 @@ cmake_build/
.DS_Store
*.sw[po]
cwrapper_build
cwrapper_dablooms_build
**/cwrapper_rocksdb_build/
**/.clangd/*
**/compile_commands.json
......
......@@ -132,6 +132,7 @@ build-go: standalone milvus
build-cpp:
@(env bash $(PWD)/scripts/core_build.sh -f "$(CUSTOM_THIRDPARTY_PATH)")
@(env bash $(PWD)/scripts/cwrapper_build.sh -t Release -f "$(CUSTOM_THIRDPARTY_PATH)")
@(env bash $(PWD)/scripts/cwrapper_dablooms_build.sh -t Release -f "$(CUSTOM_THIRDPARTY_PATH)")
@go env -w CGO_CFLAGS="-I$(PWD)/internal/kv/rocksdb/cwrapper/output/include"
@go env -w CGO_LDFLAGS="-L$(PWD)/internal/kv/rocksdb/cwrapper/output/lib -l:librocksdb.a -lstdc++ -lm -lz"
@(env bash $(PWD)/scripts/cwrapper_rocksdb_build.sh -t Release -f "$(CUSTOM_THIRDPARTY_PATH)")
......
output
cmake-build-debug
.idea
cmake_build
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under the License.
cmake_minimum_required(VERSION 3.14...3.17 FATAL_ERROR)
project(dablooms)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
add_library(dablooms STATIC dablooms.cpp murmur.cpp)
target_include_directories(dablooms
PUBLIC
${PROJECT_SOURCE_DIR}
)
target_sources(dablooms PUBLIC dablooms.cpp murmur.cpp
)
set_target_properties( dablooms PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR} )
# target_link_libraries(dablooms PUBLIC dablooms)
if(NOT CMAKE_INSTALL_PREFIX)
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR})
endif()
install(TARGETS dablooms DESTINATION ${CMAKE_INSTALL_PREFIX})
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under the License.
#!/bin/bash
SOURCE=${BASH_SOURCE[0]}
while [ -h $SOURCE ]; do # resolve $SOURCE until the file is no longer a symlink
DIR=$( cd -P $( dirname $SOURCE ) && pwd )
SOURCE=$(readlink $SOURCE)
[[ $SOURCE != /* ]] && SOURCE=$DIR/$SOURCE # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
DIR=$( cd -P $( dirname $SOURCE ) && pwd )
# echo $DIR
CMAKE_BUILD=${DIR}/cmake_build
OUTPUT_LIB=${DIR}/output
if [ ! -d ${CMAKE_BUILD} ];then
mkdir ${CMAKE_BUILD}
fi
if [ -d ${OUTPUT_LIB} ];then
rm -rf ${OUTPUT_LIB}
fi
mkdir ${OUTPUT_LIB}
BUILD_TYPE="Debug"
CUSTOM_THIRDPARTY_PATH=""
while getopts "t:h:f:" arg; do
case $arg in
f)
CUSTOM_THIRDPARTY_PATH=$OPTARG
;;
t)
BUILD_TYPE=$OPTARG # BUILD_TYPE
;;
h) # help
echo "-t: build type(default: Debug)
-f: custom thirdparty path(default:)
-h: help
"
exit 0
;;
?)
echo "ERROR! unknown argument"
exit 1
;;
esac
done
echo "BUILD_TYPE: " $BUILD_TYPE
echo "CUSTOM_THIRDPARTY_PATH: " $CUSTOM_THIRDPARTY_PATH
pushd ${CMAKE_BUILD}
CMAKE_CMD="cmake \
-DCMAKE_INSTALL_PREFIX=${OUTPUT_LIB} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCUSTOM_THIRDPARTY_DOWNLOAD_PATH=${CUSTOM_THIRDPARTY_PATH} .."
${CMAKE_CMD}
echo ${CMAKE_CMD}
if [[ ! ${jobs+1} ]]; then
jobs=$(nproc)
fi
make -j ${jobs} && make install
/* Copyright @2012 by Justin Hines at Bitly under a very liberal license. See LICENSE in the source distribution. */
#include <sys/stat.h>
#include <stdint.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <fcntl.h>
#include <math.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <errno.h>
#include "murmur.h"
#include "dablooms.h"
#define DABLOOMS_VERSION "0.9.1"
#define ERROR_TIGHTENING_RATIO 0.5
#define SALT_CONSTANT 0x97c29b3a
const char *dablooms_version(void)
{
return DABLOOMS_VERSION;
}
void free_bitmap(bitmap_t *bitmap)
{
if (bitmap != nullptr) {
free(bitmap->array);
free(bitmap);
}
}
bitmap_t *bitmap_resize(bitmap_t *bitmap, size_t new_size)
{
size_t old_size = (bitmap->array == nullptr) ? 0 : bitmap->bytes;
char* new_array = (char*)realloc(bitmap->array, new_size);
if (new_array == nullptr) {
// Todo: malloc error
} else {
bitmap->bytes = new_size;
bitmap->array = new_array;
if (new_size > old_size) {
memset(bitmap->array + old_size, 0, new_size - old_size);
}
}
return bitmap;
}
/* Create a new bitmap, not full featured, simple to give
* us a means of interacting with the 4 bit counters */
bitmap_t *new_bitmap(size_t bytes)
{
bitmap_t *bitmap;
if ((bitmap = (bitmap_t *)malloc(sizeof(bitmap_t))) == nullptr) {
return nullptr;
}
if ((bitmap->array = (char*)malloc(bytes)) == nullptr) {
free(bitmap);
return nullptr;
}
memset(bitmap->array, 0, bytes);
bitmap->bytes = bytes;
return bitmap;
}
int bitmap_increment(bitmap_t *bitmap, unsigned int index, long offset)
{
long access = index / 2 + offset;
uint8_t temp;
uint8_t n = bitmap->array[access];
if (index % 2 != 0) {
temp = (n & 0x0f);
n = (n & 0xf0) + ((n & 0x0f) + 0x01);
} else {
temp = (n & 0xf0) >> 4;
n = (n & 0x0f) + ((n & 0xf0) + 0x10);
}
if (temp == 0x0f) {
// fprintf(stderr, "Error, 4 bit int Overflow\n");
return -1;
}
bitmap->array[access] = n;
return 0;
}
/* increments the four bit counter */
int bitmap_decrement(bitmap_t *bitmap, unsigned int index, long offset)
{
long access = index / 2 + offset;
uint8_t temp;
uint8_t n = bitmap->array[access];
if (index % 2 != 0) {
temp = (n & 0x0f);
n = (n & 0xf0) + ((n & 0x0f) - 0x01);
} else {
temp = (n & 0xf0) >> 4;
n = (n & 0x0f) + ((n & 0xf0) - 0x10);
}
if (temp == 0x00) {
// fprintf(stderr, "Error, Decrementing zero\n");
// fprintf(stderr, "Bloom filter Error: you have deleted the same id more than 15 times!\n");
return -1;
}
bitmap->array[access] = n;
return 0;
}
/* decrements the four bit counter */
int bitmap_check(bitmap_t *bitmap, unsigned int index, long offset)
{
long access = index / 2 + offset;
if (index % 2 != 0 ) {
return bitmap->array[access] & 0x0f;
} else {
return bitmap->array[access] & 0xf0;
}
}
/*
* Perform the actual hashing for `key`
*
* Only call the hash once to get a pair of initial values (h1 and
* h2). Use these values to generate all hashes in a quick loop.
*
* See paper by Kirsch, Mitzenmacher [2006]
* http://www.eecs.harvard.edu/~michaelm/postscripts/rsa2008.pdf
*/
void hash_func(counting_bloom_t *bloom, const char *key, size_t key_len, uint32_t *hashes)
{
int i;
uint32_t checksum[4];
MurmurHash3_x64_128(key, key_len, SALT_CONSTANT, checksum);
uint32_t h1 = checksum[0];
uint32_t h2 = checksum[1];
for (i = 0; i < bloom->nfuncs; i++) {
hashes[i] = (h1 + i * h2) % bloom->counts_per_func;
}
}
counting_bloom_t *counting_bloom_init(unsigned int capacity, double error_rate, long offset)
{
counting_bloom_t *bloom;
if ((bloom = (counting_bloom_t *)malloc(sizeof(counting_bloom_t))) == nullptr) {
return nullptr;
}
bloom->bitmap = nullptr;
bloom->capacity = capacity;
bloom->error_rate = error_rate;
bloom->offset = offset + sizeof(counting_bloom_header_t);
bloom->nfuncs = (size_t) ceil(log(1 / error_rate) / log(2));
bloom->counts_per_func = (unsigned int) ceil(capacity * fabs(log(error_rate)) / (bloom->nfuncs * pow(log(2), 2)));
bloom->size = bloom->nfuncs * bloom->counts_per_func;
/* rounding-up integer divide by 2 of bloom->size */
bloom->num_bytes = ((bloom->size + 1) / 2) + sizeof(counting_bloom_header_t);
bloom->hashes = (uint32_t *)calloc(bloom->nfuncs, sizeof(uint32_t));
return bloom;
}
int counting_bloom_add(counting_bloom_t *bloom, const char *s, size_t len)
{
unsigned int index, i, offset;
unsigned int *hashes = bloom->hashes;
hash_func(bloom, s, len, hashes);
bool error = false;
for (i = 0; i < bloom->nfuncs; i++) {
offset = i * bloom->counts_per_func;
index = hashes[i] + offset;
if (bitmap_increment(bloom->bitmap, index, bloom->offset) == -1) {
error = true;
}
}
bloom->header->count++;
//return 0;
return error ? -1 : 0;
}
int counting_bloom_remove(counting_bloom_t *bloom, const char *s, size_t len)
{
unsigned int index, i, offset;
unsigned int *hashes = bloom->hashes;
hash_func(bloom, s, len, hashes);
bool error = false;
for (i = 0; i < bloom->nfuncs; i++) {
offset = i * bloom->counts_per_func;
index = hashes[i] + offset;
if (bitmap_decrement(bloom->bitmap, index, bloom->offset) == -1) {
error = true;
}
}
bloom->header->count--;
//return 0;
return error ? -1 : 0;
}
int counting_bloom_check(counting_bloom_t *bloom, const char *s, size_t len)
{
unsigned int index, i, offset;
unsigned int *hashes = bloom->hashes;
hash_func(bloom, s, len, hashes);
for (i = 0; i < bloom->nfuncs; i++) {
offset = i * bloom->counts_per_func;
index = hashes[i] + offset;
if (!(bitmap_check(bloom->bitmap, index, bloom->offset))) {
return 0;
}
}
return 1;
}
int free_scaling_bloom(scaling_bloom_t *bloom)
{
int i;
for (i = bloom->num_blooms - 1; i >= 0; i--) {
free(bloom->blooms[i]->hashes);
free(bloom->blooms[i]);
}
free(bloom->blooms);
free_bitmap(bloom->bitmap);
free(bloom);
return 0;
}
/* creates a new counting bloom filter from a given scaling bloom filter, with count and id */
counting_bloom_t *new_counting_bloom_from_scale(scaling_bloom_t *bloom, bool extern_bitmap = false)
{
int i;
long offset;
double error_rate;
counting_bloom_t *cur_bloom;
error_rate = bloom->error_rate * (pow(ERROR_TIGHTENING_RATIO, bloom->num_blooms + 1));
if ((bloom->blooms = (counting_bloom_t **)realloc(bloom->blooms, (bloom->num_blooms + 1) * sizeof(counting_bloom_t *))) == nullptr) {
return nullptr;
}
cur_bloom = counting_bloom_init(bloom->capacity, error_rate, bloom->num_bytes);
bloom->blooms[bloom->num_blooms] = cur_bloom;
bloom->num_blooms++;
if (!extern_bitmap) {
bloom->bitmap = bitmap_resize(bloom->bitmap, bloom->num_bytes + cur_bloom->num_bytes);
/* reset header pointer, as realloc may have moved */
bloom->header = (scaling_bloom_header_t *) bloom->bitmap->array;
/* Set the pointers for these header structs to the right location since realloc may have moved */
for (i = 0; i < bloom->num_blooms; i++) {
offset = bloom->blooms[i]->offset - sizeof(counting_bloom_header_t);
bloom->blooms[i]->header = (counting_bloom_header_t *) (bloom->bitmap->array + offset);
}
} else {
offset = cur_bloom->offset - sizeof(counting_bloom_header_t);
cur_bloom->header = (counting_bloom_header_t *) (bloom->bitmap->array + offset);
}
bloom->num_bytes += cur_bloom->num_bytes;
cur_bloom->bitmap = bloom->bitmap;
return cur_bloom;
}
uint64_t scaling_bloom_clear_seqnums(scaling_bloom_t *bloom)
{
uint64_t seqnum = bloom->header->mem_seqnum;
bloom->header->mem_seqnum = 0;
return seqnum;
}
int scaling_bloom_add(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id)
{
int i;
uint64_t seqnum;
counting_bloom_t *cur_bloom = nullptr;
for (i = bloom->num_blooms - 1; i >= 0; i--) {
cur_bloom = bloom->blooms[i];
if (id >= cur_bloom->header->id) {
break;
}
}
seqnum = scaling_bloom_clear_seqnums(bloom);
if ((id > bloom->header->max_id) && (cur_bloom->header->count >= cur_bloom->capacity)) {
cur_bloom = new_counting_bloom_from_scale(bloom);
cur_bloom->header->count = 0;
cur_bloom->header->id = bloom->header->max_id + 1;
}
if (bloom->header->max_id < id) {
bloom->header->max_id = id;
}
bool error = false;
if (counting_bloom_add(cur_bloom, s, len) == -1) {
error = true;
}
bloom->header->mem_seqnum = seqnum + 1;
//return 1;
return error ? -1 : 1;
}
int scaling_bloom_remove(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id)
{
counting_bloom_t *cur_bloom;
int i;
uint64_t seqnum;
bool error = false;
for (i = bloom->num_blooms - 1; i >= 0; i--) {
cur_bloom = bloom->blooms[i];
if (id >= cur_bloom->header->id) {
seqnum = scaling_bloom_clear_seqnums(bloom);
if (counting_bloom_remove(cur_bloom, s, len) == -1) {
error = true;
}
bloom->header->mem_seqnum = seqnum + 1;
//return 1;
return error ? -1 : 1;
}
}
return 0;
}
int scaling_bloom_check(scaling_bloom_t *bloom, const char *s, size_t len)
{
int i;
counting_bloom_t *cur_bloom;
for (i = bloom->num_blooms - 1; i >= 0; i--) {
cur_bloom = bloom->blooms[i];
if (counting_bloom_check(cur_bloom, s, len)) {
return 1;
}
}
return 0;
}
scaling_bloom_t *scaling_bloom_init(unsigned int capacity, double error_rate, bitmap_t* bitmap = nullptr)
{
scaling_bloom_t *bloom;
if ((bloom = (scaling_bloom_t *)malloc(sizeof(scaling_bloom_t))) == nullptr) {
return nullptr;
}
if (bitmap == nullptr) {
if ((bloom->bitmap = new_bitmap(sizeof(scaling_bloom_header_t))) == nullptr) {
free(bloom);
return nullptr;
}
} else {
bloom->bitmap = bitmap;
}
bloom->header = (scaling_bloom_header_t *) bloom->bitmap->array;
bloom->capacity = capacity;
bloom->error_rate = error_rate;
bloom->num_blooms = 0;
bloom->num_bytes = sizeof(scaling_bloom_header_t);
bloom->blooms = nullptr;
return bloom;
}
scaling_bloom_t *new_scaling_bloom(unsigned int capacity, double error_rate)
{
scaling_bloom_t *bloom;
counting_bloom_t *cur_bloom;
bloom = scaling_bloom_init(capacity, error_rate);
if (!(cur_bloom = new_counting_bloom_from_scale(bloom))) {
free_scaling_bloom(bloom);
return nullptr;
}
cur_bloom->header->count = 0;
cur_bloom->header->id = 0;
bloom->header->mem_seqnum = 1;
return bloom;
}
scaling_bloom_t *new_scaling_bloom_from_bitmap(unsigned int capacity, double error_rate, bitmap_t* bitmap)
{
scaling_bloom_t *bloom;
counting_bloom_t *cur_bloom;
if ((bloom = scaling_bloom_init(capacity, error_rate, bitmap)) == nullptr) {
return nullptr;
}
int size = bitmap->bytes - sizeof(scaling_bloom_header_t);
while (size) {
cur_bloom = new_counting_bloom_from_scale(bloom, true);
// leave count and id as they were set in the file
size -= cur_bloom->num_bytes;
if (size < 0) {
free_scaling_bloom(bloom);
return nullptr;
}
}
return bloom;
}
size_t bloom_size(scaling_bloom_t *bloom) {
size_t rst = 0;
if (bloom != nullptr) {
rst = sizeof(scaling_bloom_t);
rst += bloom->num_bytes;
rst += bloom->num_blooms * (sizeof(counting_bloom_t) + sizeof(void*));
for (unsigned int i = 0; i < bloom->num_blooms; i++) {
rst += bloom->blooms[i]->nfuncs * sizeof(uint32_t);
}
}
return rst;
}
\ No newline at end of file
/* Copyright @2012 by Justin Hines at Bitly under a very liberal license. See LICENSE in the source distribution. */
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#ifndef __BLOOM_H__
#define __BLOOM_H__
#include <stdint.h>
#include <stdlib.h>
const char *dablooms_version(void);
typedef struct {
size_t bytes;
char *array;
} bitmap_t;
bitmap_t *bitmap_resize(bitmap_t *bitmap, size_t old_size, size_t new_size);
bitmap_t *new_bitmap(size_t bytes);
int bitmap_increment(bitmap_t *bitmap, unsigned int index, long offset);
int bitmap_decrement(bitmap_t *bitmap, unsigned int index, long offset);
int bitmap_check(bitmap_t *bitmap, unsigned int index, long offset);
void free_bitmap(bitmap_t *bitmap);
typedef struct {
uint64_t id;
uint32_t count;
uint32_t _pad;
} counting_bloom_header_t;
typedef struct {
counting_bloom_header_t *header;
unsigned int capacity;
long offset;
unsigned int counts_per_func;
uint32_t *hashes;
size_t nfuncs;
size_t size;
size_t num_bytes;
double error_rate;
bitmap_t *bitmap;
} counting_bloom_t;
int counting_bloom_add(counting_bloom_t *bloom, const char *s, size_t len);
int counting_bloom_remove(counting_bloom_t *bloom, const char *s, size_t len);
int counting_bloom_check(counting_bloom_t *bloom, const char *s, size_t len);
typedef struct {
uint64_t max_id;
uint64_t mem_seqnum;
uint64_t reserved;
} scaling_bloom_header_t;
typedef struct {
scaling_bloom_header_t *header;
unsigned int capacity;
unsigned int num_blooms;
size_t num_bytes;
double error_rate;
counting_bloom_t **blooms;
bitmap_t *bitmap;
} scaling_bloom_t;
scaling_bloom_t *new_scaling_bloom(unsigned int capacity, double error_rate);
scaling_bloom_t *new_scaling_bloom_from_bitmap(unsigned int capacity, double error_rate, bitmap_t* bitmap);
int free_scaling_bloom(scaling_bloom_t *bloom);
int scaling_bloom_add(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id);
int scaling_bloom_remove(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id);
int scaling_bloom_check(scaling_bloom_t *bloom, const char *s, size_t len);
size_t bloom_size(scaling_bloom_t *bloom);
#endif
#ifdef __cplusplus
}
#endif
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
#include "murmur.h"
#define FORCE_INLINE inline static
FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r )
{
return (x << r) | (x >> (64 - r));
}
#define ROTL64(x,y) rotl64(x,y)
#define BIG_CONSTANT(x) (x##LLU)
#define getblock(x, i) (x[i])
//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche
FORCE_INLINE uint64_t fmix64(uint64_t k)
{
k ^= k >> 33;
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
k ^= k >> 33;
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
k ^= k >> 33;
return k;
}
//-----------------------------------------------------------------------------
void MurmurHash3_x64_128 ( const void * key, const int len,
const uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 16;
uint64_t h1 = seed;
uint64_t h2 = seed;
uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
int i;
//----------
// body
const uint64_t * blocks = (const uint64_t *)(data);
for(i = 0; i < nblocks; i++) {
uint64_t k1 = getblock(blocks,i*2+0);
uint64_t k2 = getblock(blocks,i*2+1);
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
uint64_t k1 = 0;
uint64_t k2 = 0;
switch(len & 15) {
case 15: k2 ^= ((uint64_t)tail[14]) << 48;
case 14: k2 ^= ((uint64_t)tail[13]) << 40;
case 13: k2 ^= ((uint64_t)tail[12]) << 32;
case 12: k2 ^= ((uint64_t)tail[11]) << 24;
case 11: k2 ^= ((uint64_t)tail[10]) << 16;
case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
case 8: k1 ^= ((uint64_t)tail[ 7]) << 56;
case 7: k1 ^= ((uint64_t)tail[ 6]) << 48;
case 6: k1 ^= ((uint64_t)tail[ 5]) << 40;
case 5: k1 ^= ((uint64_t)tail[ 4]) << 32;
case 4: k1 ^= ((uint64_t)tail[ 3]) << 24;
case 3: k1 ^= ((uint64_t)tail[ 2]) << 16;
case 2: k1 ^= ((uint64_t)tail[ 1]) << 8;
case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
}
//----------
// finalization
h1 ^= len; h2 ^= len;
h1 += h2;
h2 += h1;
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 += h2;
h2 += h1;
((uint64_t*)out)[0] = h1;
((uint64_t*)out)[1] = h2;
}
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
#ifndef _MURMURHASH3_H_
#define _MURMURHASH3_H_
#include <stdint.h>
void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
#endif // _MURMURHASH3_H_
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
package dablooms
/*
#cgo CFLAGS: -I${SRCDIR}/cwrapper
#cgo LDFLAGS: -L${SRCDIR}/cwrapper/output -ldablooms -lstdc++ -lm
#include <stdlib.h>
#include <dablooms.h>
*/
import "C"
import (
"unsafe"
)
type ScalingBloom struct {
cfilter *C.scaling_bloom_t
}
func NewScalingBloom(capacity uint64, errorRate float64) *ScalingBloom {
sb := &ScalingBloom{
cfilter: C.new_scaling_bloom(C.uint(capacity), C.double(errorRate)),
}
return sb
}
func (sb *ScalingBloom) Destroy() {
C.free_scaling_bloom(sb.cfilter)
}
func (sb *ScalingBloom) Add(key []byte, id int64) bool {
cKey := (*C.char)(unsafe.Pointer(&key[0]))
return C.scaling_bloom_add(sb.cfilter, cKey, C.size_t(len(key)), C.uint64_t(id)) == 1
}
func (sb *ScalingBloom) Remove(key []byte, id int64) bool {
cKey := (*C.char)(unsafe.Pointer(&key[0]))
return C.scaling_bloom_remove(sb.cfilter, cKey, C.size_t(len(key)), C.uint64_t(id)) == 1
}
func (sb *ScalingBloom) Check(key []byte) bool {
cKey := (*C.char)(unsafe.Pointer(&key[0]))
return C.scaling_bloom_check(sb.cfilter, cKey, C.size_t(len(key))) == 1
}
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
package dablooms
import (
"fmt"
"strconv"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
type stats struct {
TruePositives int64
TrueNegatives int64
FalsePositives int64
FalseNegatives int64
}
var Capacity uint64 = 1000000
var ErrorRate float64 = .05
func PrintResults(stats *stats) {
falsePositiveRate := float64(stats.FalsePositives) / float64(stats.FalsePositives+stats.TrueNegatives)
fmt.Printf("True positives: %7d\n", stats.TruePositives)
fmt.Printf("True negatives: %7d\n", stats.TrueNegatives)
fmt.Printf("False positives: %7d\n", stats.FalsePositives)
fmt.Printf("False negatives: %7d\n", stats.FalseNegatives)
fmt.Printf("False positive rate: %f\n", falsePositiveRate)
if falsePositiveRate > ErrorRate {
fmt.Printf("False positive rate too high\n")
}
}
func TestDablooms_Correctness(t *testing.T) {
sb := NewScalingBloom(Capacity, ErrorRate)
assert.NotNil(t, sb)
start := time.Now().UnixNano()
for i := 0; i < int(Capacity*2); i++ {
if i%2 == 0 {
key := strconv.Itoa(i)
sb.Add([]byte(key), int64(i))
}
}
end := time.Now().UnixNano()
seconds := float64((end - start) / 1e9)
fmt.Printf("The time cost for add: %fs\n", seconds)
results := &stats{
TruePositives: 0,
TrueNegatives: 0,
FalsePositives: 0,
FalseNegatives: 0,
}
start = time.Now().UnixNano()
for i := 0; i < int(Capacity*2); i++ {
if i%2 == 1 {
key := strconv.Itoa(i)
positive := sb.Check([]byte(key))
if positive {
results.FalsePositives++
} else {
results.TrueNegatives++
}
}
}
end = time.Now().UnixNano()
seconds = float64((end - start) / 1e9)
fmt.Printf("Time cost for check: %fs\n", seconds)
sb.Destroy()
PrintResults(results)
// False negatives means that there should
assert.False(t, results.FalseNegatives > 0)
}
#!/bin/bash
SOURCE=${BASH_SOURCE[0]}
while [ -h $SOURCE ]; do # resolve $SOURCE until the file is no longer a symlink
DIR=$( cd -P $( dirname $SOURCE ) && pwd )
SOURCE=$(readlink $SOURCE)
[[ $SOURCE != /* ]] && SOURCE=$DIR/$SOURCE # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
DIR=$( cd -P $( dirname $SOURCE ) && pwd )
# DIR=${DIR}/../internal/util/dablooms/cwrapper
CMAKE_BUILD=${DIR}/../cwrapper_dablooms_build
OUTPUT_LIB=${DIR}/../internal/util/dablooms/cwrapper/output
SRC_DIR=${DIR}/../internal/util/dablooms/cwrapper
if [ ! -d ${CMAKE_BUILD} ];then
mkdir ${CMAKE_BUILD}
fi
if [ -d ${OUTPUT_LIB} ];then
rm -rf ${OUTPUT_LIB}
fi
mkdir ${OUTPUT_LIB}
BUILD_TYPE="Debug"
CUSTOM_THIRDPARTY_PATH=""
while getopts "t:h:f:" arg; do
case $arg in
f)
CUSTOM_THIRDPARTY_PATH=$OPTARG
;;
t)
BUILD_TYPE=$OPTARG # BUILD_TYPE
;;
h) # help
echo "-t: build type(default: Debug)
-f: custom thirdparty path(default: "")
-h: help
"
exit 0
;;
?)
echo "ERROR! unknown argument"
exit 1
;;
esac
done
echo "BUILD_TYPE: " $BUILD_TYPE
echo "CUSTOM_THIRDPARTY_PATH: " $CUSTOM_THIRDPARTY_PATH
pushd ${CMAKE_BUILD}
CMAKE_CMD="cmake \
-DCMAKE_INSTALL_PREFIX=${OUTPUT_LIB} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCUSTOM_THIRDPARTY_DOWNLOAD_PATH=${CUSTOM_THIRDPARTY_PATH} ${SRC_DIR}"
${CMAKE_CMD}
echo ${CMAKE_CMD}
if [[ ! ${jobs+1} ]]; then
jobs=$(nproc)
fi
make -j ${jobs} && make install
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册