提交 c04da26c 编写于 作者: H Haojun Liao

[TD-2634]

上级 84b867e2
......@@ -16,32 +16,36 @@
#ifndef TDENGINE_QPERCENTILE_H
#define TDENGINE_QPERCENTILE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "qExtbuffer.h"
#include "qResultbuf.h"
#include "qTsbuf.h"
typedef struct MinMaxEntry {
union {
double dMinVal;
int32_t iMinVal;
int64_t i64MinVal;
double dMinVal;
int64_t i64MinVal;
uint64_t u64MinVal;
};
union {
double dMaxVal;
int32_t iMaxVal;
int64_t i64MaxVal;
int64_t u64MaxVal;
};
} MinMaxEntry;
typedef struct {
int32_t size;
int32_t pageId;
int32_t size;
int32_t pageId;
tFilePage *data;
} SSlotInfo;
typedef struct tMemBucketSlot {
SSlotInfo info;
MinMaxEntry range;
SSlotInfo info;
MinMaxEntry range;
} tMemBucketSlot;
struct tMemBucket;
......@@ -52,16 +56,16 @@ typedef struct tMemBucket {
int16_t type;
int16_t bytes;
int32_t total;
int32_t elemPerPage; // number of elements for each object
int32_t maxCapacity; // maximum allowed number of elements that can be sort directly to get the result
int32_t bufPageSize; // disk page size
MinMaxEntry range; // value range
int32_t times; // count that has been checked for deciding the correct data value buckets.
int32_t elemPerPage; // number of elements for each object
int32_t maxCapacity; // maximum allowed number of elements that can be sort directly to get the result
int32_t bufPageSize; // disk page size
MinMaxEntry range; // value range
int32_t times; // count that has been checked for deciding the correct data value buckets.
__compar_fn_t comparFn;
tMemBucketSlot *pSlots;
tMemBucketSlot * pSlots;
SDiskbasedResultBuf *pBuffer;
__perc_hash_func_t hashFunc;
__perc_hash_func_t hashFunc;
} tMemBucket;
tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, double maxval);
......@@ -73,3 +77,7 @@ int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size);
double getPercentile(tMemBucket *pMemBucket, double percent);
#endif // TDENGINE_QPERCENTILE_H
#ifdef __cplusplus
}
#endif
\ No newline at end of file
......@@ -2545,7 +2545,7 @@ static void percentile_next_step(SQLFunctionCtx *pCtx) {
if (pInfo->numOfElems == 0) {
pResInfo->complete = true;
} else {
pInfo->pMemBucket = tMemBucketCreate(pCtx->inputBytes, pCtx->inputType, GET_DOUBLE_VAL(&pInfo->minval), GET_DOUBLE_VAL(&pInfo->maxval));
pInfo->pMemBucket = tMemBucketCreate(pCtx->inputBytes, pCtx->inputType, pInfo->minval, pInfo->maxval);
}
pInfo->stage += 1;
......
此差异已折叠。
#include <gtest/gtest.h>
#include <iostream>
#include "qResultbuf.h"
#include "taos.h"
#include "taosdef.h"
#include "qPercentile.h"
namespace {
tMemBucket *createBigIntDataBucket(int32_t start, int32_t end) {
tMemBucket *pBucket = tMemBucketCreate(sizeof(int64_t), TSDB_DATA_TYPE_BIGINT, start, end);
for (int32_t i = start; i <= end; ++i) {
int64_t val = i;
tMemBucketPut(pBucket, &val, 1);
}
return pBucket;
}
tMemBucket *createIntDataBucket(int32_t start, int32_t end) {
tMemBucket *pBucket = tMemBucketCreate(sizeof(int32_t), TSDB_DATA_TYPE_INT, start, end);
for (int32_t i = start; i <= end; ++i) {
int32_t val = i;
tMemBucketPut(pBucket, &val, 1);
}
return pBucket;
}
tMemBucket *createDoubleDataBucket(int32_t start, int32_t end) {
tMemBucket *pBucket = tMemBucketCreate(sizeof(double), TSDB_DATA_TYPE_DOUBLE, start, end);
for (int32_t i = start; i <= end; ++i) {
double val = i;
int32_t ret = tMemBucketPut(pBucket, &val, 1);
if (ret != 0) {
printf("value out of range:%f", val);
}
}
return pBucket;
}
tMemBucket *createUnsignedDataBucket(int32_t start, int32_t end, int32_t type) {
tMemBucket *pBucket = tMemBucketCreate(tDataTypeDesc[type].nSize, type, start, end);
for (int32_t i = start; i <= end; ++i) {
uint64_t k = i;
int32_t ret = tMemBucketPut(pBucket, &k, 1);
if (ret != 0) {
printf("value out of range:%f", k);
}
}
return pBucket;
}
void intDataTest() {
printf("running %s\n", __FUNCTION__);
tMemBucket *pBucket = NULL;
double result = 0.;
pBucket = createIntDataBucket(0, 0);
result = getPercentile(pBucket, 0);
ASSERT_DOUBLE_EQ(result, 0);
tMemBucketDestroy(pBucket);
pBucket = createIntDataBucket(0, 1);
result = getPercentile(pBucket, 100);
ASSERT_DOUBLE_EQ(result, 1);
result = getPercentile(pBucket, 0);
ASSERT_DOUBLE_EQ(result, 0);
tMemBucketDestroy(pBucket);
pBucket = createIntDataBucket(-1, 1);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 0);
result = getPercentile(pBucket, 0);
ASSERT_DOUBLE_EQ(result, -1);
result = getPercentile(pBucket, 75);
ASSERT_DOUBLE_EQ(result, 0.5);
result = getPercentile(pBucket, 100);
ASSERT_DOUBLE_EQ(result, 1);
tMemBucketDestroy(pBucket);
pBucket = createIntDataBucket(0, 99999);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 49999.5);
tMemBucketDestroy(pBucket);
}
void bigintDataTest() {
printf("running %s\n", __FUNCTION__);
tMemBucket *pBucket = NULL;
double result = 0.0;
pBucket = createBigIntDataBucket(-1000, 1000);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 0.);
tMemBucketDestroy(pBucket);
pBucket = createBigIntDataBucket(-10000, 10000);
result = getPercentile(pBucket, 100);
ASSERT_DOUBLE_EQ(result, 10000.0);
tMemBucketDestroy(pBucket);
pBucket = createBigIntDataBucket(-10000, 10000);
result = getPercentile(pBucket, 75);
ASSERT_DOUBLE_EQ(result, 5000.0);
tMemBucketDestroy(pBucket);
}
void doubleDataTest() {
printf("running %s\n", __FUNCTION__);
tMemBucket *pBucket = NULL;
double result = 0;
pBucket = createDoubleDataBucket(-10, 10);
result = getPercentile(pBucket, 0);
ASSERT_DOUBLE_EQ(result, -10.0);
printf("result is: %lf\n", result);
tMemBucketDestroy(pBucket);
pBucket = createDoubleDataBucket(-100000, 100000);
result = getPercentile(pBucket, 25);
ASSERT_DOUBLE_EQ(result, -50000);
printf("result is: %lf\n", result);
tMemBucketDestroy(pBucket);
pBucket = createDoubleDataBucket(-100000, 100000);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 0);
tMemBucketDestroy(pBucket);
pBucket = createDoubleDataBucket(-100000, 100000);
result = getPercentile(pBucket, 75);
ASSERT_DOUBLE_EQ(result, 50000);
tMemBucketDestroy(pBucket);
pBucket = createDoubleDataBucket(-100000, 100000);
result = getPercentile(pBucket, 100);
ASSERT_DOUBLE_EQ(result, 100000.0);
printf("result is: %lf\n", result);
tMemBucketDestroy(pBucket);
}
/*
* large data test, we employ 0.1billion double data to calculated the percentile
* which is 800MB data
*/
void largeDataTest() {
printf("running : %s\n", __FUNCTION__);
tMemBucket *pBucket = NULL;
double result = 0;
struct timeval tv;
gettimeofday(&tv, NULL);
int64_t start = tv.tv_sec;
printf("start time: %" PRId64 "\n", tv.tv_sec);
pBucket = createDoubleDataBucket(0, 100000000);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 50000000);
gettimeofday(&tv, NULL);
printf("total elapsed time: %" PRId64 " sec.", -start + tv.tv_sec);
printf("the result of %d is: %lf\n", 50, result);
tMemBucketDestroy(pBucket);
}
void qsortTest() {
printf("running : %s\n", __FUNCTION__);
SSchema field[1] = {
{TSDB_DATA_TYPE_INT, "k", sizeof(int32_t)},
};
const int32_t num = 2000;
int32_t *d = (int32_t *)malloc(sizeof(int32_t) * num);
for (int32_t i = 0; i < num; ++i) {
d[i] = i % 4;
}
const int32_t numOfOrderCols = 1;
int32_t orderColIdx = 0;
SColumnModel * pModel = createColumnModel(field, 1, 1000);
tOrderDescriptor *pDesc = tOrderDesCreate(&orderColIdx, numOfOrderCols, pModel, 1);
tColDataQSort(pDesc, num, 0, num - 1, (char *)d, 1);
for (int32_t i = 0; i < num; ++i) {
printf("%d\t", d[i]);
}
printf("\n");
destroyColumnModel(pModel);
}
void unsignedDataTest() {
printf("running %s\n", __FUNCTION__);
tMemBucket *pBucket = NULL;
double result = 0.0;
pBucket = createUnsignedDataBucket(0, 1000, TSDB_DATA_TYPE_UINT);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 500.0);
tMemBucketDestroy(pBucket);
pBucket = createUnsignedDataBucket(0, 10000, TSDB_DATA_TYPE_UBIGINT);
result = getPercentile(pBucket, 100);
ASSERT_DOUBLE_EQ(result, 10000.0);
result = getPercentile(pBucket, 0);
ASSERT_DOUBLE_EQ(result, 0.0);
result = getPercentile(pBucket, 50);
ASSERT_DOUBLE_EQ(result, 5000);
result = getPercentile(pBucket, 75);
ASSERT_DOUBLE_EQ(result, 7500);
tMemBucketDestroy(pBucket);
}
} // namespace
TEST(testCase, percentileTest) {
// qsortTest();
intDataTest();
bigintDataTest();
doubleDataTest();
unsignedDataTest();
largeDataTest();
}
......@@ -30,24 +30,32 @@ int32_t compareInt8Val(const void *pLeft, const void *pRight) {
return 0;
}
int32_t compareIntDoubleVal(const void *pLeft, const void *pRight) {
int64_t lhs = GET_INT64_VAL(pLeft);
double rhs = GET_DOUBLE_VAL(pRight);
if (fabs(lhs - rhs) < FLT_EPSILON) {
return 0;
} else {
return (lhs > rhs) ? 1 : -1;
}
int32_t compareUint32Val(const void *pLeft, const void *pRight) {
int32_t left = GET_UINT32_VAL(pLeft), right = GET_UINT32_VAL(pRight);
if (left > right) return 1;
if (left < right) return -1;
return 0;
}
int32_t compareDoubleIntVal(const void *pLeft, const void *pRight) {
double lhs = GET_DOUBLE_VAL(pLeft);
int64_t rhs = GET_INT64_VAL(pRight);
if (fabs(lhs - rhs) < FLT_EPSILON) {
return 0;
} else {
return (lhs > rhs) ? 1 : -1;
}
int32_t compareUint64Val(const void *pLeft, const void *pRight) {
int64_t left = GET_UINT64_VAL(pLeft), right = GET_UINT64_VAL(pRight);
if (left > right) return 1;
if (left < right) return -1;
return 0;
}
int32_t compareUint16Val(const void *pLeft, const void *pRight) {
int16_t left = GET_UINT16_VAL(pLeft), right = GET_UINT16_VAL(pRight);
if (left > right) return 1;
if (left < right) return -1;
return 0;
}
int32_t compareUint8Val(const void* pLeft, const void* pRight) {
uint8_t left = GET_UINT8_VAL(pLeft), right = GET_UINT8_VAL(pRight);
if (left > right) return 1;
if (left < right) return -1;
return 0;
}
int32_t compareFloatVal(const void *pLeft, const void *pRight) {
......@@ -369,15 +377,24 @@ __compar_fn_t getKeyComparFunc(int32_t keyType) {
case TSDB_DATA_TYPE_DOUBLE:
comparFn = compareDoubleVal;
break;
case TSDB_DATA_TYPE_UTINYINT:
comparFn = compareUint8Val;
break;
case TSDB_DATA_TYPE_USMALLINT:
comparFn = compareUint16Val;
break;
case TSDB_DATA_TYPE_UINT:
comparFn = compareUint32Val;
break;
case TSDB_DATA_TYPE_UBIGINT:
comparFn = compareUint64Val;
break;
case TSDB_DATA_TYPE_BINARY:
comparFn = compareLenPrefixedStr;
break;
case TSDB_DATA_TYPE_NCHAR:
comparFn = compareLenPrefixedWStr;
break;
default:
comparFn = compareInt32Val;
break;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册