提交 26a6675e 编写于 作者: wmmhello's avatar wmmhello

add test cases for hyperloglog

上级 dc02f0cd
...@@ -284,12 +284,11 @@ typedef struct { ...@@ -284,12 +284,11 @@ typedef struct {
uint8_t buckets[HLL_BUCKETS]; // Data bytes. uint8_t buckets[HLL_BUCKETS]; // Data bytes.
} SHLLInfo; } SHLLInfo;
static void hllBucketHisto(uint8_t *buckets, int* bucketHisto) { static void hllBucketHisto(uint8_t *buckets, int32_t* bucketHisto) {
uint64_t *word = (uint64_t*) buckets; uint64_t *word = (uint64_t*) buckets;
uint8_t *bytes; uint8_t *bytes;
int j;
for (j = 0; j < HLL_BUCKETS/8; j++) { for (int32_t j = 0; j < HLL_BUCKETS>>3; j++) {
if (*word == 0) { if (*word == 0) {
bucketHisto[0] += 8; bucketHisto[0] += 8;
} else { } else {
...@@ -321,7 +320,7 @@ static double hllTau(double x) { ...@@ -321,7 +320,7 @@ static double hllTau(double x) {
} }
static double hllSigma(double x) { static double hllSigma(double x) {
if (x == 1.) return INFINITY; if (x == 1.0) return INFINITY;
double zPrime; double zPrime;
double y = 1; double y = 1;
double z = x; double z = x;
...@@ -334,9 +333,10 @@ static double hllSigma(double x) { ...@@ -334,9 +333,10 @@ static double hllSigma(double x) {
return z; return z;
} }
// estimate the cardinality, the algorithm refer this paper: "New cardinality estimation algorithms for HyperLogLog sketches"
static uint64_t hllCountCnt(uint8_t *buckets) { static uint64_t hllCountCnt(uint8_t *buckets) {
double m = HLL_BUCKETS; double m = HLL_BUCKETS;
int buckethisto[64] = {0}; int32_t buckethisto[64] = {0};
hllBucketHisto(buckets,buckethisto); hllBucketHisto(buckets,buckethisto);
double z = m * hllTau((m-buckethisto[HLL_DATA_BITS+1])/(double)m); double z = m * hllTau((m-buckethisto[HLL_DATA_BITS+1])/(double)m);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册