提交 6f77eecc 编写于 作者: H Haojun Liao

[td-13039] support stddev.

上级 29fd5ec1
...@@ -38,9 +38,14 @@ void minFunction(SqlFunctionCtx* pCtx); ...@@ -38,9 +38,14 @@ void minFunction(SqlFunctionCtx* pCtx);
void maxFunction(SqlFunctionCtx *pCtx); void maxFunction(SqlFunctionCtx *pCtx);
bool getStddevFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool getStddevFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
bool stddevFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo);
void stddevFunction(SqlFunctionCtx* pCtx); void stddevFunction(SqlFunctionCtx* pCtx);
void stddevFinalize(SqlFunctionCtx* pCtx); void stddevFinalize(SqlFunctionCtx* pCtx);
bool getPercentileFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
bool percentileFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo);
void percentileFunction(SqlFunctionCtx *pCtx);
bool getFirstLastFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool getFirstLastFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv);
void firstFunction(SqlFunctionCtx *pCtx); void firstFunction(SqlFunctionCtx *pCtx);
void lastFunction(SqlFunctionCtx *pCtx); void lastFunction(SqlFunctionCtx *pCtx);
......
...@@ -68,9 +68,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { ...@@ -68,9 +68,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.classification = FUNC_MGT_AGG_FUNC, .classification = FUNC_MGT_AGG_FUNC,
.checkFunc = stubCheckAndGetResultType, .checkFunc = stubCheckAndGetResultType,
.getEnvFunc = getStddevFuncEnv, .getEnvFunc = getStddevFuncEnv,
.initFunc = maxFunctionSetup, .initFunc = stddevFunctionSetup,
.processFunc = maxFunction, .processFunc = stddevFunction,
.finalizeFunc = functionFinalize .finalizeFunc = stddevFinalize
}, },
{ {
.name = "percentile", .name = "percentile",
...@@ -434,6 +434,7 @@ int32_t stubCheckAndGetResultType(SFunctionNode* pFunc) { ...@@ -434,6 +434,7 @@ int32_t stubCheckAndGetResultType(SFunctionNode* pFunc) {
break; break;
} }
case FUNCTION_TYPE_STDDEV:
case FUNCTION_TYPE_SIN: case FUNCTION_TYPE_SIN:
case FUNCTION_TYPE_COS: case FUNCTION_TYPE_COS:
case FUNCTION_TYPE_TAN: case FUNCTION_TYPE_TAN:
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
*/ */
#include "builtinsimpl.h" #include "builtinsimpl.h"
#include "tpercentile.h"
#include "querynodes.h" #include "querynodes.h"
#include "taggfunction.h" #include "taggfunction.h"
#include "tdatablock.h" #include "tdatablock.h"
...@@ -453,6 +454,7 @@ bool getTopBotFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) { ...@@ -453,6 +454,7 @@ bool getTopBotFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) {
} }
typedef struct SStddevRes { typedef struct SStddevRes {
double result;
int64_t count; int64_t count;
union {double quadraticDSum; int64_t quadraticISum;}; union {double quadraticDSum; int64_t quadraticISum;};
union {double dsum; int64_t isum;}; union {double dsum; int64_t isum;};
...@@ -463,38 +465,129 @@ bool getStddevFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) { ...@@ -463,38 +465,129 @@ bool getStddevFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) {
return true; return true;
} }
bool stddevFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo) {
if (!functionSetup(pCtx, pResultInfo)) {
return false;
}
SStddevRes* pRes = GET_ROWCELL_INTERBUF(pResultInfo);
memset(pRes, 0, sizeof(SStddevRes));
return true;
}
void stddevFunction(SqlFunctionCtx* pCtx) { void stddevFunction(SqlFunctionCtx* pCtx) {
int32_t numOfElem = 0; int32_t numOfElem = 0;
// Only the pre-computing information loaded and actual data does not loaded // Only the pre-computing information loaded and actual data does not loaded
SInputColumnInfoData* pInput = &pCtx->input; SInputColumnInfoData* pInput = &pCtx->input;
SColumnDataAgg *pAgg = pInput->pColumnDataAgg[0]; SColumnDataAgg* pAgg = pInput->pColumnDataAgg[0];
int32_t type = pInput->pData[0]->info.type; int32_t type = pInput->pData[0]->info.type;
SStddevRes* pStddevRes = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); SStddevRes* pStddevRes = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
// } else { // computing based on the true data block // computing based on the true data block
SColumnInfoData* pCol = pInput->pData[0]; SColumnInfoData* pCol = pInput->pData[0];
int32_t start = pInput->startRowIndex; int32_t start = pInput->startRowIndex;
int32_t numOfRows = pInput->numOfRows; int32_t numOfRows = pInput->numOfRows;
switch(type) { switch (type) {
case TSDB_DATA_TYPE_INT: { case TSDB_DATA_TYPE_TINYINT: {
int32_t* plist = (int32_t*)pCol->pData; int8_t* plist = (int8_t*)pCol->pData;
for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) { for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) {
if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) { if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) {
continue; continue;
} }
numOfElem += 1;
pStddevRes->count += 1; pStddevRes->count += 1;
pStddevRes->isum += plist[i]; pStddevRes->isum += plist[i];
pStddevRes->quadraticISum += plist[i] * plist[i]; pStddevRes->quadraticISum += plist[i] * plist[i];
} }
break;
}
case TSDB_DATA_TYPE_SMALLINT: {
int16_t* plist = (int16_t*)pCol->pData;
for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) {
if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) {
continue;
}
numOfElem += 1;
pStddevRes->count += 1;
pStddevRes->isum += plist[i];
pStddevRes->quadraticISum += plist[i] * plist[i];
}
break;
}
case TSDB_DATA_TYPE_INT: {
int32_t* plist = (int32_t*)pCol->pData;
for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) {
if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) {
continue;
}
numOfElem += 1;
pStddevRes->count += 1;
pStddevRes->isum += plist[i];
pStddevRes->quadraticISum += plist[i] * plist[i];
}
break;
}
case TSDB_DATA_TYPE_BIGINT: {
int64_t* plist = (int64_t*)pCol->pData;
for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) {
if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) {
continue;
}
numOfElem += 1;
pStddevRes->count += 1;
pStddevRes->isum += plist[i];
pStddevRes->quadraticISum += plist[i] * plist[i];
}
break;
}
case TSDB_DATA_TYPE_FLOAT: {
float* plist = (float*)pCol->pData;
for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) {
if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) {
continue;
}
numOfElem += 1;
pStddevRes->count += 1;
pStddevRes->isum += plist[i];
pStddevRes->quadraticISum += plist[i] * plist[i];
}
break;
}
case TSDB_DATA_TYPE_DOUBLE: {
double* plist = (double*)pCol->pData;
for (int32_t i = start; i < numOfRows + pInput->startRowIndex; ++i) {
if (pCol->hasNull && colDataIsNull_f(pCol->nullbitmap, i)) {
continue;
}
numOfElem += 1;
pStddevRes->count += 1;
pStddevRes->isum += plist[i];
pStddevRes->quadraticISum += plist[i] * plist[i];
} }
break; break;
} }
default:
break;
}
// data in the check operation are all null, not output // data in the check operation are all null, not output
SET_VAL(GET_RES_INFO(pCtx), numOfElem, 1); SET_VAL(GET_RES_INFO(pCtx), numOfElem, 1);
} }
...@@ -503,11 +596,122 @@ void stddevFinalize(SqlFunctionCtx* pCtx) { ...@@ -503,11 +596,122 @@ void stddevFinalize(SqlFunctionCtx* pCtx) {
functionFinalize(pCtx); functionFinalize(pCtx);
SStddevRes* pStddevRes = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); SStddevRes* pStddevRes = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
double res = pStddevRes->quadraticISum/pStddevRes->count - (pStddevRes->isum / pStddevRes->count) * (pStddevRes->isum / pStddevRes->count); double avg = pStddevRes->isum / ((double) pStddevRes->count);
pStddevRes->result = sqrt(pStddevRes->quadraticISum/((double)pStddevRes->count) - avg*avg);
}
typedef struct SPercentileInfo {
tMemBucket *pMemBucket;
int32_t stage;
double minval;
double maxval;
int64_t numOfElems;
} SPercentileInfo;
bool getPercentileFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) {
pEnv->calcMemSize = sizeof(SPercentileInfo);
return true;
}
bool percentileFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo) {
if (!functionSetup(pCtx, pResultInfo)) {
return false;
}
// in the first round, get the min-max value of all involved data
SPercentileInfo *pInfo = GET_ROWCELL_INTERBUF(pResultInfo);
SET_DOUBLE_VAL(&pInfo->minval, DBL_MAX);
SET_DOUBLE_VAL(&pInfo->maxval, -DBL_MAX);
pInfo->numOfElems = 0;
return true;
} }
void percentileFunction(SqlFunctionCtx *pCtx) {
int32_t notNullElems = 0;
#if 0
SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
SPercentileInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo);
if (pCtx->currentStage == REPEAT_SCAN && pInfo->stage == 0) {
pInfo->stage += 1;
// all data are null, set it completed
if (pInfo->numOfElems == 0) {
pResInfo->complete = true;
return;
} else {
pInfo->pMemBucket = tMemBucketCreate(pCtx->inputBytes, pCtx->inputType, pInfo->minval, pInfo->maxval);
}
}
// the first stage, only acquire the min/max value
if (pInfo->stage == 0) {
if (pCtx->preAggVals.isSet) {
double tmin = 0.0, tmax = 0.0;
if (IS_SIGNED_NUMERIC_TYPE(pCtx->inputType)) {
tmin = (double)GET_INT64_VAL(&pCtx->preAggVals.statis.min);
tmax = (double)GET_INT64_VAL(&pCtx->preAggVals.statis.max);
} else if (IS_FLOAT_TYPE(pCtx->inputType)) {
tmin = GET_DOUBLE_VAL(&pCtx->preAggVals.statis.min);
tmax = GET_DOUBLE_VAL(&pCtx->preAggVals.statis.max);
} else if (IS_UNSIGNED_NUMERIC_TYPE(pCtx->inputType)) {
tmin = (double)GET_UINT64_VAL(&pCtx->preAggVals.statis.min);
tmax = (double)GET_UINT64_VAL(&pCtx->preAggVals.statis.max);
} else {
assert(true);
}
if (GET_DOUBLE_VAL(&pInfo->minval) > tmin) {
SET_DOUBLE_VAL(&pInfo->minval, tmin);
}
if (GET_DOUBLE_VAL(&pInfo->maxval) < tmax) {
SET_DOUBLE_VAL(&pInfo->maxval, tmax);
}
pInfo->numOfElems += (pCtx->size - pCtx->preAggVals.statis.numOfNull);
} else {
for (int32_t i = 0; i < pCtx->size; ++i) {
char *data = GET_INPUT_DATA(pCtx, i);
if (pCtx->hasNull && isNull(data, pCtx->inputType)) {
continue;
}
double v = 0;
GET_TYPED_DATA(v, double, pCtx->inputType, data);
if (v < GET_DOUBLE_VAL(&pInfo->minval)) {
SET_DOUBLE_VAL(&pInfo->minval, v);
}
if (v > GET_DOUBLE_VAL(&pInfo->maxval)) {
SET_DOUBLE_VAL(&pInfo->maxval, v);
}
pInfo->numOfElems += 1;
}
}
return;
}
// the second stage, calculate the true percentile value
for (int32_t i = 0; i < pCtx->size; ++i) {
char *data = GET_INPUT_DATA(pCtx, i);
if (pCtx->hasNull && isNull(data, pCtx->inputType)) {
continue;
}
notNullElems += 1;
tMemBucketPut(pInfo->pMemBucket, data, 1);
}
SET_VAL(pCtx, notNullElems, 1);
pResInfo->hasResult = DATA_SET_FLAG;
#endif
}
bool getFirstLastFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) { bool getFirstLastFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) {
SColumnNode* pNode = nodesListGetNode(pFunc->pParameterList, 0); SColumnNode* pNode = nodesListGetNode(pFunc->pParameterList, 0);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册