提交 58500a3c 编写于 作者: A AlexDuan

limit centiors count to expect range and speed up same points

上级 f651179a
...@@ -26,11 +26,11 @@ ...@@ -26,11 +26,11 @@
#define M_PI 3.14159265358979323846264338327950288 /* pi */ #define M_PI 3.14159265358979323846264338327950288 /* pi */
#endif #endif
#define ADDITION_CENTROID_NUM 100 #define ADDITION_CENTROID_NUM 2
#define COMPRESSION 400 #define COMPRESSION 400
#define GET_CENTROID(compression) (ceil(compression * M_PI / 2) + 1 + ADDITION_CENTROID_NUM) #define GET_CENTROID(compression) (ceil(compression * M_PI / 2) + 1 + ADDITION_CENTROID_NUM)
#define GET_THRESHOLD(compression) (7.5 + 0.37 * compression - 2e-4 * pow(compression, 2)) #define GET_THRESHOLD(compression) (7.5 + 0.37 * compression - 2e-4 * pow(compression, 2))
#define TDIGEST_SIZE(compression) (sizeof(TDigest) + sizeof(SPt)*GET_THRESHOLD(compression)) #define TDIGEST_SIZE(compression) (sizeof(TDigest) + sizeof(SCentroid)*GET_CENTROID(compression) + sizeof(SPt)*GET_THRESHOLD(compression))
typedef struct SCentroid { typedef struct SCentroid {
double mean; double mean;
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include "tdigest.h" #include "tdigest.h"
#define INTERPOLATE(x, x0, x1) (((x) - (x0)) / ((x1) - (x0))) #define INTERPOLATE(x, x0, x1) (((x) - (x0)) / ((x1) - (x0)))
#define INTEGRATED_LOCATION(compression, q) ((compression) * (asin(2 * (q) - 1) + M_PI / 2) / M_PI) #define INTEGRATED_LOCATION(compression, q) ((compression) * (asin(2 * (double)(q) - 1)/M_PI + (double)1/2))
#define FLOAT_EQ(f1, f2) (fabs((f1) - (f2)) <= FLT_EPSILON) #define FLOAT_EQ(f1, f2) (fabs((f1) - (f2)) <= FLT_EPSILON)
typedef struct SMergeArgs { typedef struct SMergeArgs {
...@@ -78,26 +78,28 @@ static void mergeCentroid(SMergeArgs *args, SCentroid *merge) { ...@@ -78,26 +78,28 @@ static void mergeCentroid(SMergeArgs *args, SCentroid *merge) {
SCentroid *c = &args->centroids[args->idx]; SCentroid *c = &args->centroids[args->idx];
args->weight_so_far += merge->weight; args->weight_so_far += merge->weight;
k2 = INTEGRATED_LOCATION(args->t->compression, k2 = INTEGRATED_LOCATION(args->t->size,
args->weight_so_far / args->t->total_weight); args->weight_so_far / (args->t->total_weight + merge->weight));
//idx++
if (k2 - args->k1 > 1 && c->weight > 0) { if(k2 - args->k1 > 1 && c->weight > 0) {
if(args->idx + 1 < args->t->size) { // check avoid overflow if(args->idx + 1 < args->t->size
args->idx++; && merge->mean != args->centroids[args->idx].mean) {
} else { args->idx++;
assert(0);
} }
args->k1 = INTEGRATED_LOCATION(args->t->compression, args->k1 = k2;
(args->weight_so_far - merge->weight) / args->t->total_weight);
} }
c = &args->centroids[args->idx]; c = &args->centroids[args->idx];
c->weight += merge->weight; if(c->mean == merge->mean) {
c->mean += (merge->mean - c->mean) * merge->weight / c->weight; c->weight += merge->weight;
} else {
if (merge->weight > 0) { c->weight += merge->weight;
args->min = MIN(merge->mean, args->min); c->mean += (merge->mean - c->mean) * merge->weight / c->weight;
args->max = MAX(merge->mean, args->max);
if (merge->weight > 0) {
args->min = MIN(merge->mean, args->min);
args->max = MAX(merge->mean, args->max);
}
} }
} }
...@@ -163,10 +165,13 @@ void tdigestAdd(TDigest* t, double x, int64_t w) { ...@@ -163,10 +165,13 @@ void tdigestAdd(TDigest* t, double x, int64_t w) {
return; return;
int32_t i = t->num_buffered_pts; int32_t i = t->num_buffered_pts;
t->buffered_pts[i].value = x; if(i > 0 && t->buffered_pts[i-1].value == x ) {
t->buffered_pts[i].weight = w; t->buffered_pts[i].weight = w;
t->num_buffered_pts++; } else {
t->total_weight += w; t->buffered_pts[i].value = x;
t->buffered_pts[i].weight = w;
t->num_buffered_pts++;
}
if (t->num_buffered_pts >= t->threshold) if (t->num_buffered_pts >= t->threshold)
tdigestCompress(t); tdigestCompress(t);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册