提交 c04da26c 编写于 作者: H Haojun Liao

[TD-2634]

上级 84b867e2
......@@ -16,32 +16,36 @@
#ifndef TDENGINE_QPERCENTILE_H
#define TDENGINE_QPERCENTILE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "qExtbuffer.h"
#include "qResultbuf.h"
#include "qTsbuf.h"
typedef struct MinMaxEntry {
union {
double dMinVal;
int32_t iMinVal;
int64_t i64MinVal;
double dMinVal;
int64_t i64MinVal;
uint64_t u64MinVal;
};
union {
double dMaxVal;
int32_t iMaxVal;
int64_t i64MaxVal;
int64_t u64MaxVal;
};
} MinMaxEntry;
typedef struct {
int32_t size;
int32_t pageId;
int32_t size;
int32_t pageId;
tFilePage *data;
} SSlotInfo;
typedef struct tMemBucketSlot {
SSlotInfo info;
MinMaxEntry range;
SSlotInfo info;
MinMaxEntry range;
} tMemBucketSlot;
struct tMemBucket;
......@@ -52,16 +56,16 @@ typedef struct tMemBucket {
int16_t type;
int16_t bytes;
int32_t total;
int32_t elemPerPage; // number of elements for each object
int32_t maxCapacity; // maximum allowed number of elements that can be sort directly to get the result
int32_t bufPageSize; // disk page size
MinMaxEntry range; // value range
int32_t times; // count that has been checked for deciding the correct data value buckets.
int32_t elemPerPage; // number of elements for each object
int32_t maxCapacity; // maximum allowed number of elements that can be sort directly to get the result
int32_t bufPageSize; // disk page size
MinMaxEntry range; // value range
int32_t times; // count that has been checked for deciding the correct data value buckets.
__compar_fn_t comparFn;
tMemBucketSlot *pSlots;
tMemBucketSlot * pSlots;
SDiskbasedResultBuf *pBuffer;
__perc_hash_func_t hashFunc;
__perc_hash_func_t hashFunc;
} tMemBucket;
tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, double maxval);
......@@ -73,3 +77,7 @@ int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size);
double getPercentile(tMemBucket *pMemBucket, double percent);
#endif // TDENGINE_QPERCENTILE_H
#ifdef __cplusplus
}
#endif
\ No newline at end of file
......@@ -2545,7 +2545,7 @@ static void percentile_next_step(SQLFunctionCtx *pCtx) {
if (pInfo->numOfElems == 0) {
pResInfo->complete = true;
} else {
pInfo->pMemBucket = tMemBucketCreate(pCtx->inputBytes, pCtx->inputType, GET_DOUBLE_VAL(&pInfo->minval), GET_DOUBLE_VAL(&pInfo->maxval));
pInfo->pMemBucket = tMemBucketCreate(pCtx->inputBytes, pCtx->inputType, pInfo->minval, pInfo->maxval);
}
pInfo->stage += 1;
......
......@@ -20,6 +20,7 @@
#include "taosdef.h"
#include "tulog.h"
#include "tcompare.h"
#include "ttype.h"
#define DEFAULT_NUM_OF_SLOT 1024
......@@ -48,25 +49,15 @@ static tFilePage *loadDataFromFilePage(tMemBucket *pMemBucket, int32_t slotIdx)
}
static void resetBoundingBox(MinMaxEntry* range, int32_t type) {
switch (type) {
case TSDB_DATA_TYPE_BIGINT: {
range->i64MaxVal = INT64_MIN;
range->i64MinVal = INT64_MAX;
break;
};
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_TINYINT: {
range->iMaxVal = INT32_MIN;
range->iMinVal = INT32_MAX;
break;
};
case TSDB_DATA_TYPE_DOUBLE:
case TSDB_DATA_TYPE_FLOAT: {
range->dMaxVal = -DBL_MAX;
range->dMinVal = DBL_MAX;
break;
}
if (IS_SIGNED_NUMERIC_TYPE(type)) {
range->i64MaxVal = INT64_MIN;
range->i64MinVal = INT64_MAX;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
range->u64MaxVal = 0;
range->u64MinVal = UINT64_MAX;
} else {
range->dMaxVal = -DBL_MAX;
range->dMinVal = DBL_MAX;
}
}
......@@ -75,23 +66,15 @@ static int32_t setBoundingBox(MinMaxEntry* range, int16_t type, double minval, d
return -1;
}
switch(type) {
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_INT:
range->iMinVal = (int32_t) minval;
range->iMaxVal = (int32_t) maxval;
break;
case TSDB_DATA_TYPE_BIGINT:
range->i64MinVal = (int64_t) minval;
range->i64MaxVal = (int64_t) maxval;
break;
case TSDB_DATA_TYPE_FLOAT:
case TSDB_DATA_TYPE_DOUBLE:
range->dMinVal = minval;
range->dMaxVal = maxval;
break;
if (IS_SIGNED_NUMERIC_TYPE(type)) {
range->i64MinVal = (int64_t) minval;
range->i64MaxVal = (int64_t) maxval;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)){
range->u64MinVal = (uint64_t) minval;
range->u64MaxVal = (uint64_t) maxval;
} else {
range->dMinVal = minval;
range->dMaxVal = maxval;
}
return 0;
......@@ -120,117 +103,56 @@ double findOnlyResult(tMemBucket *pMemBucket) {
tFilePage* pPage = getResBufPage(pMemBucket->pBuffer, pgInfo->pageId);
assert(pPage->num == 1);
switch (pMemBucket->type) {
case TSDB_DATA_TYPE_INT:
return *(int32_t *)pPage->data;
case TSDB_DATA_TYPE_SMALLINT:
return *(int16_t *)pPage->data;
case TSDB_DATA_TYPE_TINYINT:
return *(int8_t *)pPage->data;
case TSDB_DATA_TYPE_BIGINT:
return (double)(*(int64_t *)pPage->data);
case TSDB_DATA_TYPE_DOUBLE: {
double dv = GET_DOUBLE_VAL(pPage->data);
return dv;
}
case TSDB_DATA_TYPE_FLOAT: {
float fv = GET_FLOAT_VAL(pPage->data);
return fv;
}
default:
return 0;
}
double v = 0;
GET_TYPED_DATA(v, double, pMemBucket->type, pPage->data);
return v;
}
return 0;
}
int32_t tBucketBigIntHash(tMemBucket *pBucket, const void *value) {
int64_t v = *(int64_t *)value;
int32_t index = -1;
int32_t halfSlot = pBucket->numOfSlots >> 1;
// int32_t bits = 32;//bitsOfNumber(pBucket->numOfSlots) - 1;
if (pBucket->range.i64MaxVal == INT64_MIN) {
if (v >= 0) {
index = (v >> (64 - 9)) + halfSlot;
} else { // v<0
index = ((-v) >> (64 - 9));
index = -index + (halfSlot - 1);
}
int32_t tBucketIntHash(tMemBucket *pBucket, const void *value) {
int64_t v = 0;
GET_TYPED_DATA(v, int64_t, pBucket->type, value);
return index;
int32_t index = -1;
// divide the value range into 1024 buckets
uint64_t span = pBucket->range.i64MaxVal - pBucket->range.i64MinVal;
if (span < pBucket->numOfSlots) {
int32_t delta = v - pBucket->range.i64MinVal;
index = (delta % pBucket->numOfSlots);
} else {
// out of range
if (v < pBucket->range.i64MinVal || v > pBucket->range.i64MaxVal) {
return -1;
double slotSpan = (double)span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.i64MinVal) / slotSpan);
if (v == pBucket->range.i64MaxVal) {
index -= 1;
}
// todo hash for bigint and float and double
int64_t span = pBucket->range.i64MaxVal - pBucket->range.i64MinVal;
if (span < pBucket->numOfSlots) {
int32_t delta = (int32_t)(v - pBucket->range.i64MinVal);
index = delta % pBucket->numOfSlots;
} else {
double slotSpan = (double)span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.i64MinVal) / slotSpan);
if (v == pBucket->range.i64MaxVal) {
index -= 1;
}
}
return index;
}
assert(v >= pBucket->range.i64MinVal && v <= pBucket->range.i64MaxVal && index >= 0 && index < pBucket->numOfSlots);
return index;
}
// todo refactor to more generic
int32_t tBucketIntHash(tMemBucket *pBucket, const void *value) {
int32_t v = 0;
switch(pBucket->type) {
case TSDB_DATA_TYPE_SMALLINT: v = *(int16_t*) value; break;
case TSDB_DATA_TYPE_TINYINT: v = *(int8_t*) value; break;
default: v = *(int32_t*) value;break;
}
int32_t tBucketUintHash(tMemBucket *pBucket, const void *value) {
int64_t v = 0;
GET_TYPED_DATA(v, uint64_t, pBucket->type, value);
int32_t index = -1;
if (pBucket->range.iMaxVal == INT32_MIN) {
/*
* taking negative integer into consideration,
* there is only half of pBucket->segs available for non-negative integer
*/
int32_t halfSlot = pBucket->numOfSlots >> 1;
int32_t bits = 32;//bitsOfNumber(pBucket->numOfSlots) - 1;
if (v >= 0) {
index = (v >> (bits - 9)) + halfSlot;
} else { // v < 0
index = ((-v) >> (32 - 9));
index = -index + (halfSlot - 1);
}
return index;
// divide the value range into 1024 buckets
uint64_t span = pBucket->range.u64MaxVal - pBucket->range.u64MinVal;
if (span < pBucket->numOfSlots) {
int32_t delta = v - pBucket->range.u64MinVal;
index = (delta % pBucket->numOfSlots);
} else {
// out of range
if (v < pBucket->range.iMinVal || v > pBucket->range.iMaxVal) {
return -1;
}
// divide a range of [iMinVal, iMaxVal] into 1024 buckets
int32_t span = pBucket->range.iMaxVal - pBucket->range.iMinVal;
if (span < pBucket->numOfSlots) {
int32_t delta = v - pBucket->range.iMinVal;
index = (delta % pBucket->numOfSlots);
} else {
double slotSpan = (double)span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.iMinVal) / slotSpan);
if (v == pBucket->range.iMaxVal) {
index -= 1;
}
double slotSpan = (double)span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.u64MinVal) / slotSpan);
if (v == pBucket->range.u64MaxVal) {
index -= 1;
}
return index;
}
assert(v >= pBucket->range.u64MinVal && v <= pBucket->range.i64MaxVal && index >= 0 && index < pBucket->numOfSlots);
return index;
}
int32_t tBucketDoubleHash(tMemBucket *pBucket, const void *value) {
......@@ -243,62 +165,30 @@ int32_t tBucketDoubleHash(tMemBucket *pBucket, const void *value) {
int32_t index = -1;
if (pBucket->range.dMinVal == DBL_MAX) {
/*
* taking negative integer into consideration,
* there is only half of pBucket->segs available for non-negative integer
*/
double x = DBL_MAX / (pBucket->numOfSlots >> 1);
double posx = (v + DBL_MAX) / x;
return ((int32_t)posx) % pBucket->numOfSlots;
// divide a range of [dMinVal, dMaxVal] into 1024 buckets
double span = pBucket->range.dMaxVal - pBucket->range.dMinVal;
if (span < pBucket->numOfSlots) {
int32_t delta = (int32_t)(v - pBucket->range.dMinVal);
index = (delta % pBucket->numOfSlots);
} else {
// out of range
if (v < pBucket->range.dMinVal || v > pBucket->range.dMaxVal) {
return -1;
double slotSpan = span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.dMinVal) / slotSpan);
if (v == pBucket->range.dMaxVal) {
index -= 1;
}
// divide a range of [dMinVal, dMaxVal] into 1024 buckets
double span = pBucket->range.dMaxVal - pBucket->range.dMinVal;
if (span < pBucket->numOfSlots) {
int32_t delta = (int32_t)(v - pBucket->range.dMinVal);
index = (delta % pBucket->numOfSlots);
} else {
double slotSpan = span / pBucket->numOfSlots;
index = (int32_t)((v - pBucket->range.dMinVal) / slotSpan);
if (v == pBucket->range.dMaxVal) {
index -= 1;
}
}
if (index < 0 || index > pBucket->numOfSlots) {
uError("error in hash process. slot id: %d", index);
}
return index;
}
assert(v >= pBucket->range.dMinVal && v <= pBucket->range.dMaxVal && index >= 0 && index < pBucket->numOfSlots);
return index;
}
static __perc_hash_func_t getHashFunc(int32_t type) {
switch (type) {
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_TINYINT: {
return tBucketIntHash;
};
case TSDB_DATA_TYPE_DOUBLE:
case TSDB_DATA_TYPE_FLOAT: {
return tBucketDoubleHash;
};
case TSDB_DATA_TYPE_BIGINT: {
return tBucketBigIntHash;
};
default: {
return NULL;
}
if (IS_SIGNED_NUMERIC_TYPE(type)) {
return tBucketIntHash;
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
return tBucketUintHash;
} else {
return tBucketDoubleHash;
}
}
......@@ -372,77 +262,41 @@ void tMemBucketDestroy(tMemBucket *pBucket) {
}
void tMemBucketUpdateBoundingBox(MinMaxEntry *r, const char *data, int32_t dataType) {
switch (dataType) {
case TSDB_DATA_TYPE_INT: {
int32_t val = *(int32_t *)data;
if (r->iMinVal > val) {
r->iMinVal = val;
}
if (IS_SIGNED_NUMERIC_TYPE(dataType)) {
int64_t v = 0;
GET_TYPED_DATA(v, int64_t, dataType, data);
if (r->iMaxVal < val) {
r->iMaxVal = val;
}
break;
};
case TSDB_DATA_TYPE_BIGINT: {
int64_t val = *(int64_t *)data;
if (r->i64MinVal > val) {
r->i64MinVal = val;
}
if (r->i64MaxVal < val) {
r->i64MaxVal = val;
}
break;
};
case TSDB_DATA_TYPE_SMALLINT: {
int32_t val = *(int16_t *)data;
if (r->iMinVal > val) {
r->iMinVal = val;
}
if (r->iMaxVal < val) {
r->iMaxVal = val;
}
break;
};
case TSDB_DATA_TYPE_TINYINT: {
int32_t val = *(int8_t *)data;
if (r->iMinVal > val) {
r->iMinVal = val;
}
if (r->i64MinVal > v) {
r->i64MinVal = v;
}
if (r->iMaxVal < val) {
r->iMaxVal = val;
}
if (r->i64MaxVal < v) {
r->i64MaxVal = v;
}
} else if (IS_UNSIGNED_NUMERIC_TYPE(dataType)) {
uint64_t v = 0;
GET_TYPED_DATA(v, uint64_t, dataType, data);
break;
};
case TSDB_DATA_TYPE_DOUBLE: {
// double val = *(double *)data;
double val = GET_DOUBLE_VAL(data);
if (r->dMinVal > val) {
r->dMinVal = val;
}
if (r->i64MinVal > v) {
r->i64MinVal = v;
}
if (r->dMaxVal < val) {
r->dMaxVal = val;
}
break;
};
case TSDB_DATA_TYPE_FLOAT: {
double val = GET_FLOAT_VAL(data);
if (r->i64MaxVal < v) {
r->i64MaxVal = v;
}
} else if (IS_FLOAT_TYPE(dataType)) {
double v = 0;
GET_TYPED_DATA(v, double, dataType, data);
if (r->dMinVal > val) {
r->dMinVal = val;
}
if (r->dMinVal > v) {
r->dMinVal = v;
}
if (r->dMaxVal < val) {
r->dMaxVal = val;
}
break;
};
default: { assert(false); }
if (r->dMaxVal < v) {
r->dMaxVal = v;
}
} else {
assert(0);
}
}
......@@ -452,16 +306,13 @@ void tMemBucketUpdateBoundingBox(MinMaxEntry *r, const char *data, int32_t dataT
int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size) {
assert(pBucket != NULL && data != NULL && size > 0);
pBucket->total += (int32_t)size;
int32_t count = 0;
int32_t bytes = pBucket->bytes;
for (int32_t i = 0; i < size; ++i) {
char *d = (char *) data + i * bytes;
count += 1;
int32_t index = (pBucket->hashFunc)(pBucket, d);
if (index == -1) { // the value is out of range, do not add it into bucket
return -1;
}
tMemBucketSlot *pSlot = &pBucket->pSlots[index];
tMemBucketUpdateBoundingBox(&pSlot->range, d, pBucket->type);
......@@ -489,64 +340,11 @@ int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size) {
pSlot->info.size += 1;
}
pBucket->total += count;
return 0;
}
////////////////////////////////////////////////////////////////////////////////////////////
static UNUSED_FUNC void findMaxMinValue(tMemBucket *pMemBucket, double *maxVal, double *minVal) {
*minVal = DBL_MAX;
*maxVal = -DBL_MAX;
for (int32_t i = 0; i < pMemBucket->numOfSlots; ++i) {
tMemBucketSlot *pSlot = &pMemBucket->pSlots[i];
if (pSlot->info.size == 0) {
continue;
}
switch (pMemBucket->type) {
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_TINYINT: {
double minv = pSlot->range.iMinVal;
double maxv = pSlot->range.iMaxVal;
if (*minVal > minv) {
*minVal = minv;
}
if (*maxVal < maxv) {
*maxVal = maxv;
}
break;
}
case TSDB_DATA_TYPE_DOUBLE:
case TSDB_DATA_TYPE_FLOAT: {
double minv = pSlot->range.dMinVal;
double maxv = pSlot->range.dMaxVal;
if (*minVal > minv) {
*minVal = minv;
}
if (*maxVal < maxv) {
*maxVal = maxv;
}
break;
}
case TSDB_DATA_TYPE_BIGINT: {
double minv = (double)pSlot->range.i64MinVal;
double maxv = (double)pSlot->range.i64MaxVal;
if (*minVal > minv) {
*minVal = minv;
}
if (*maxVal < maxv) {
*maxVal = maxv;
}
break;
}
}
}
}
/*
*
* now, we need to find the minimum value of the next slot for
......@@ -565,7 +363,6 @@ static MinMaxEntry getMinMaxEntryOfNextSlotWithData(tMemBucket *pMemBucket, int3
}
static bool isIdenticalData(tMemBucket *pMemBucket, int32_t index);
char *getFirstElemOfMemBuffer(tMemBucketSlot *pSeg, int32_t slotIdx, tFilePage *pPage);
static double getIdenticalDataVal(tMemBucket* pMemBucket, int32_t slotIndex) {
assert(isIdenticalData(pMemBucket, slotIndex));
......@@ -573,24 +370,12 @@ static double getIdenticalDataVal(tMemBucket* pMemBucket, int32_t slotIndex) {
tMemBucketSlot *pSlot = &pMemBucket->pSlots[slotIndex];
double finalResult = 0.0;
switch (pMemBucket->type) {
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_INT: {
finalResult = pSlot->range.iMinVal;
break;
}
case TSDB_DATA_TYPE_FLOAT:
case TSDB_DATA_TYPE_DOUBLE: {
finalResult = pSlot->range.dMinVal;
break;
};
case TSDB_DATA_TYPE_BIGINT: {
finalResult = (double)pSlot->range.i64MinVal;
break;
}
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
finalResult = pSlot->range.i64MinVal;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
finalResult = pSlot->range.u64MinVal;
} else {
finalResult = pSlot->range.dMinVal;
}
return finalResult;
......@@ -616,26 +401,16 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction)
double maxOfThisSlot = 0;
double minOfNextSlot = 0;
switch (pMemBucket->type) {
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_TINYINT: {
maxOfThisSlot = pSlot->range.iMaxVal;
minOfNextSlot = next.iMinVal;
break;
};
case TSDB_DATA_TYPE_FLOAT:
case TSDB_DATA_TYPE_DOUBLE: {
maxOfThisSlot = pSlot->range.dMaxVal;
minOfNextSlot = next.dMinVal;
break;
};
case TSDB_DATA_TYPE_BIGINT: {
maxOfThisSlot = (double)pSlot->range.i64MaxVal;
minOfNextSlot = (double)next.i64MinVal;
break;
}
};
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
maxOfThisSlot = pSlot->range.i64MaxVal;
minOfNextSlot = next.i64MinVal;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
maxOfThisSlot = pSlot->range.u64MaxVal;
minOfNextSlot = next.u64MinVal;
} else {
maxOfThisSlot = pSlot->range.dMaxVal;
minOfNextSlot = next.dMinVal;
}
assert(minOfNextSlot > maxOfThisSlot);
......@@ -652,38 +427,8 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction)
char *nextVal = thisVal + pMemBucket->bytes;
double td = 1.0, nd = 1.0;
switch (pMemBucket->type) {
case TSDB_DATA_TYPE_SMALLINT: {
td = *(int16_t *)thisVal;
nd = *(int16_t *)nextVal;
break;
}
case TSDB_DATA_TYPE_TINYINT: {
td = *(int8_t *)thisVal;
nd = *(int8_t *)nextVal;
break;
}
case TSDB_DATA_TYPE_INT: {
td = *(int32_t *)thisVal;
nd = *(int32_t *)nextVal;
break;
};
case TSDB_DATA_TYPE_FLOAT: {
td = GET_FLOAT_VAL(thisVal);
nd = GET_FLOAT_VAL(nextVal);
break;
}
case TSDB_DATA_TYPE_DOUBLE: {
td = GET_DOUBLE_VAL(thisVal);
nd = GET_DOUBLE_VAL(nextVal);
break;
}
case TSDB_DATA_TYPE_BIGINT: {
td = (double)*(int64_t *)thisVal;
nd = (double)*(int64_t *)nextVal;
break;
}
}
GET_TYPED_DATA(td, double, pMemBucket->type, thisVal);
GET_TYPED_DATA(nd, double, pMemBucket->type, nextVal);
double val = (1 - fraction) * td + fraction * nd;
tfree(buffer);
......@@ -741,20 +486,14 @@ double getPercentile(tMemBucket *pMemBucket, double percent) {
if (fabs(percent - 100.0) < DBL_EPSILON || (percent < DBL_EPSILON)) {
MinMaxEntry* pRange = &pMemBucket->range;
switch(pMemBucket->type) {
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_INT:
return fabs(percent - 100) < DBL_EPSILON? pRange->iMaxVal:pRange->iMinVal;
case TSDB_DATA_TYPE_BIGINT: {
double v = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->i64MaxVal : pRange->i64MinVal);
return v;
}
case TSDB_DATA_TYPE_FLOAT:
case TSDB_DATA_TYPE_DOUBLE:
return fabs(percent - 100) < DBL_EPSILON? pRange->dMaxVal:pRange->dMinVal;
default:
return -1;
if (IS_SIGNED_NUMERIC_TYPE(pMemBucket->type)) {
double v = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->i64MaxVal : pRange->i64MinVal);
return v;
} else if (IS_UNSIGNED_NUMERIC_TYPE(pMemBucket->type)) {
double v = (double)(fabs(percent - 100) < DBL_EPSILON ? pRange->u64MaxVal : pRange->u64MinVal);
return v;
} else {
return fabs(percent - 100) < DBL_EPSILON? pRange->dMaxVal:pRange->dMinVal;
}
}
......@@ -771,40 +510,9 @@ double getPercentile(tMemBucket *pMemBucket, double percent) {
bool isIdenticalData(tMemBucket *pMemBucket, int32_t index) {
tMemBucketSlot *pSeg = &pMemBucket->pSlots[index];
if (pMemBucket->type == TSDB_DATA_TYPE_INT || pMemBucket->type == TSDB_DATA_TYPE_BIGINT ||
pMemBucket->type == TSDB_DATA_TYPE_SMALLINT || pMemBucket->type == TSDB_DATA_TYPE_TINYINT) {
return pSeg->range.i64MinVal == pSeg->range.i64MaxVal;
}
if (pMemBucket->type == TSDB_DATA_TYPE_FLOAT || pMemBucket->type == TSDB_DATA_TYPE_DOUBLE) {
if (IS_FLOAT_TYPE(pMemBucket->type)) {
return fabs(pSeg->range.dMaxVal - pSeg->range.dMinVal) < DBL_EPSILON;
} else {
return pSeg->range.i64MinVal == pSeg->range.i64MaxVal;
}
return false;
}
/*
* get the first element of one slot into memory.
* if no data of current slot in memory, load it from disk
*/
char *getFirstElemOfMemBuffer(tMemBucketSlot *pSeg, int32_t slotIdx, tFilePage *pPage) {
// STSBuf *pMemBuffer = pSeg->pBuffer[slotIdx];
char *thisVal = NULL;
// if (pSeg->pBuffer[slotIdx]->numOfTotal != 0) {
//// thisVal = pSeg->pBuffer[slotIdx]->pHead->item.data;
// } else {
// /*
// * no data in memory, load one page into memory
// */
// tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[0];
// assert(pFlushInfo->numOfPages == pMemBuffer->fileMeta.nFileSize);
// int32_t ret;
// ret = fseek(pMemBuffer->file, pFlushInfo->startPageId * pMemBuffer->pageSize, SEEK_SET);
// UNUSED(ret);
// size_t sz = fread(pPage, pMemBuffer->pageSize, 1, pMemBuffer->file);
// UNUSED(sz);
// thisVal = pPage->data;
// }
return thisVal;
}
}
\ No newline at end of file
#include <gtest/gtest.h>
#include <iostream>
#include "qResultbuf.h"
#include "taos.h"
#include "taosdef.h"
#include "qPercentile.h"
namespace {
tMemBucket *createBigIntDataBucket(int32_t start, int32_t end) {
tMemBucket *pBucket = tMemBucketCreate(sizeof(int64_t), TSDB_DATA_TYPE_BIGINT, start, end);
for (int32_t i = start; i <= end; ++i) {
int64_t val = i;
tMemBucketPut(pBucket, &val, 1);
}
return pBucket;
}
tMemBucket *createIntDataBucket(int32_t start, int32_t end) {
tMemBucket *pBucket = tMemBucketCreate(sizeof(int32_t), TSDB_DATA_TYPE_INT, start, end);
for (int32_t i = start; i <= end; ++i) {
int32_t val = i;
tMemBucketPut(pBucket, &val, 1);
}
return pBucket;
}
tMemBucket *createDoubleDataBucket(int32_t start, int32_t end) {
tMemBucket *pBucket = tMemBucketCreate(sizeof(double), TSDB_DATA_TYPE_DOUBLE, start, end);
for (int32_t i = start; i <= end; ++i) {
double val = i;
int32_t ret = tMemBucketPut(pBucket, &val, 1);
if (ret != 0) {
printf("value out of range:%f", val);
}
}
return pBucket;
}
tMemBucket *createUnsignedDataBucket(int32_t start, int32_t end, int32_t type) {
tMemBucket *pBucket = tMemBucketCreate(tDataTypeDesc[type].nSize, type, start, end);
for (int32_t i = start; i <= end; ++i) {
uint64_t k = i;
int32_t ret = tMemBucketPut(pBucket, &k, 1);
if (ret != 0) {
printf("value out of range:%f", k);
}
}
return pBucket;
}
void intDataTest() {
printf("running %s\n", __FUNCTION__);
tMemBucket *pBucket = NULL;
double result = 0.;
pBucket = createIntDataBucket(0, 0);
result = getPercentile(pBucket, 0);
ASSERT_DOUBLE_EQ(result, 0);
tMemBucketDestroy(pBucket);
pBucket = createIntDataBucket(0, 1);
result = getPercentile(pBucket, 100);
ASSERT_DOUBLE_EQ(result, 1);
result = getPercentile(pBucket, 0);
ASSERT_DOUBLE_EQ(result, 0);
tMemBucketDestroy(pBucket);
pBucket = createIntDataBucket(-1, 1);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 0);
result = getPercentile(pBucket, 0);
ASSERT_DOUBLE_EQ(result, -1);
result = getPercentile(pBucket, 75);
ASSERT_DOUBLE_EQ(result, 0.5);
result = getPercentile(pBucket, 100);
ASSERT_DOUBLE_EQ(result, 1);
tMemBucketDestroy(pBucket);
pBucket = createIntDataBucket(0, 99999);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 49999.5);
tMemBucketDestroy(pBucket);
}
void bigintDataTest() {
printf("running %s\n", __FUNCTION__);
tMemBucket *pBucket = NULL;
double result = 0.0;
pBucket = createBigIntDataBucket(-1000, 1000);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 0.);
tMemBucketDestroy(pBucket);
pBucket = createBigIntDataBucket(-10000, 10000);
result = getPercentile(pBucket, 100);
ASSERT_DOUBLE_EQ(result, 10000.0);
tMemBucketDestroy(pBucket);
pBucket = createBigIntDataBucket(-10000, 10000);
result = getPercentile(pBucket, 75);
ASSERT_DOUBLE_EQ(result, 5000.0);
tMemBucketDestroy(pBucket);
}
void doubleDataTest() {
printf("running %s\n", __FUNCTION__);
tMemBucket *pBucket = NULL;
double result = 0;
pBucket = createDoubleDataBucket(-10, 10);
result = getPercentile(pBucket, 0);
ASSERT_DOUBLE_EQ(result, -10.0);
printf("result is: %lf\n", result);
tMemBucketDestroy(pBucket);
pBucket = createDoubleDataBucket(-100000, 100000);
result = getPercentile(pBucket, 25);
ASSERT_DOUBLE_EQ(result, -50000);
printf("result is: %lf\n", result);
tMemBucketDestroy(pBucket);
pBucket = createDoubleDataBucket(-100000, 100000);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 0);
tMemBucketDestroy(pBucket);
pBucket = createDoubleDataBucket(-100000, 100000);
result = getPercentile(pBucket, 75);
ASSERT_DOUBLE_EQ(result, 50000);
tMemBucketDestroy(pBucket);
pBucket = createDoubleDataBucket(-100000, 100000);
result = getPercentile(pBucket, 100);
ASSERT_DOUBLE_EQ(result, 100000.0);
printf("result is: %lf\n", result);
tMemBucketDestroy(pBucket);
}
/*
* large data test, we employ 0.1billion double data to calculated the percentile
* which is 800MB data
*/
void largeDataTest() {
printf("running : %s\n", __FUNCTION__);
tMemBucket *pBucket = NULL;
double result = 0;
struct timeval tv;
gettimeofday(&tv, NULL);
int64_t start = tv.tv_sec;
printf("start time: %" PRId64 "\n", tv.tv_sec);
pBucket = createDoubleDataBucket(0, 100000000);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 50000000);
gettimeofday(&tv, NULL);
printf("total elapsed time: %" PRId64 " sec.", -start + tv.tv_sec);
printf("the result of %d is: %lf\n", 50, result);
tMemBucketDestroy(pBucket);
}
void qsortTest() {
printf("running : %s\n", __FUNCTION__);
SSchema field[1] = {
{TSDB_DATA_TYPE_INT, "k", sizeof(int32_t)},
};
const int32_t num = 2000;
int32_t *d = (int32_t *)malloc(sizeof(int32_t) * num);
for (int32_t i = 0; i < num; ++i) {
d[i] = i % 4;
}
const int32_t numOfOrderCols = 1;
int32_t orderColIdx = 0;
SColumnModel * pModel = createColumnModel(field, 1, 1000);
tOrderDescriptor *pDesc = tOrderDesCreate(&orderColIdx, numOfOrderCols, pModel, 1);
tColDataQSort(pDesc, num, 0, num - 1, (char *)d, 1);
for (int32_t i = 0; i < num; ++i) {
printf("%d\t", d[i]);
}
printf("\n");
destroyColumnModel(pModel);
}
void unsignedDataTest() {
printf("running %s\n", __FUNCTION__);
tMemBucket *pBucket = NULL;
double result = 0.0;
pBucket = createUnsignedDataBucket(0, 1000, TSDB_DATA_TYPE_UINT);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 500.0);
tMemBucketDestroy(pBucket);
pBucket = createUnsignedDataBucket(0, 10000, TSDB_DATA_TYPE_UBIGINT);
result = getPercentile(pBucket, 100);
ASSERT_DOUBLE_EQ(result, 10000.0);
result = getPercentile(pBucket, 0);
ASSERT_DOUBLE_EQ(result, 0.0);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 5000);
result = getPercentile(pBucket, 75);
ASSERT_DOUBLE_EQ(result, 7500);
tMemBucketDestroy(pBucket);
}
} // namespace
TEST(testCase, percentileTest) {
// qsortTest();
intDataTest();
bigintDataTest();
doubleDataTest();
unsignedDataTest();
largeDataTest();
}
......@@ -30,24 +30,32 @@ int32_t compareInt8Val(const void *pLeft, const void *pRight) {
return 0;
}
int32_t compareIntDoubleVal(const void *pLeft, const void *pRight) {
int64_t lhs = GET_INT64_VAL(pLeft);
double rhs = GET_DOUBLE_VAL(pRight);
if (fabs(lhs - rhs) < FLT_EPSILON) {
return 0;
} else {
return (lhs > rhs) ? 1 : -1;
}
int32_t compareUint32Val(const void *pLeft, const void *pRight) {
int32_t left = GET_UINT32_VAL(pLeft), right = GET_UINT32_VAL(pRight);
if (left > right) return 1;
if (left < right) return -1;
return 0;
}
int32_t compareDoubleIntVal(const void *pLeft, const void *pRight) {
double lhs = GET_DOUBLE_VAL(pLeft);
int64_t rhs = GET_INT64_VAL(pRight);
if (fabs(lhs - rhs) < FLT_EPSILON) {
return 0;
} else {
return (lhs > rhs) ? 1 : -1;
}
int32_t compareUint64Val(const void *pLeft, const void *pRight) {
int64_t left = GET_UINT64_VAL(pLeft), right = GET_UINT64_VAL(pRight);
if (left > right) return 1;
if (left < right) return -1;
return 0;
}
int32_t compareUint16Val(const void *pLeft, const void *pRight) {
int16_t left = GET_UINT16_VAL(pLeft), right = GET_UINT16_VAL(pRight);
if (left > right) return 1;
if (left < right) return -1;
return 0;
}
int32_t compareUint8Val(const void* pLeft, const void* pRight) {
uint8_t left = GET_UINT8_VAL(pLeft), right = GET_UINT8_VAL(pRight);
if (left > right) return 1;
if (left < right) return -1;
return 0;
}
int32_t compareFloatVal(const void *pLeft, const void *pRight) {
......@@ -369,15 +377,24 @@ __compar_fn_t getKeyComparFunc(int32_t keyType) {
case TSDB_DATA_TYPE_DOUBLE:
comparFn = compareDoubleVal;
break;
case TSDB_DATA_TYPE_UTINYINT:
comparFn = compareUint8Val;
break;
case TSDB_DATA_TYPE_USMALLINT:
comparFn = compareUint16Val;
break;
case TSDB_DATA_TYPE_UINT:
comparFn = compareUint32Val;
break;
case TSDB_DATA_TYPE_UBIGINT:
comparFn = compareUint64Val;
break;
case TSDB_DATA_TYPE_BINARY:
comparFn = compareLenPrefixedStr;
break;
case TSDB_DATA_TYPE_NCHAR:
comparFn = compareLenPrefixedWStr;
break;
default:
comparFn = compareInt32Val;
break;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册