From e7e5fd43456f263a66255a5fb522eb4b9f12cb7b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 2 Apr 2022 23:42:00 +0800 Subject: [PATCH] [td-14393] support percentile function. --- source/libs/executor/src/executorimpl.c | 1 - source/libs/function/inc/builtinsimpl.h | 1 + source/libs/function/src/builtins.c | 6 +-- source/libs/function/src/builtinsimpl.c | 72 ++++++++++++++++--------- tests/script/tsim/query/session.sim | 27 +++++----- 5 files changed, 65 insertions(+), 42 deletions(-) diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 6838b96892..23dde6b295 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -6104,7 +6104,6 @@ static int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { if (pBlock == NULL) { break; } - // if (pAggInfo->current != NULL) { // setTagValue(pOperator, pAggInfo->current->pTable, pInfo->pCtx, pOperator->numOfOutput); // } diff --git a/source/libs/function/inc/builtinsimpl.h b/source/libs/function/inc/builtinsimpl.h index ab7bfc7767..045bc95515 100644 --- a/source/libs/function/inc/builtinsimpl.h +++ b/source/libs/function/inc/builtinsimpl.h @@ -45,6 +45,7 @@ void stddevFinalize(SqlFunctionCtx* pCtx); bool getPercentileFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool percentileFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo); void percentileFunction(SqlFunctionCtx *pCtx); +void percentileFinalize(SqlFunctionCtx* pCtx); bool getFirstLastFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); void firstFunction(SqlFunctionCtx *pCtx); diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 799c5308d0..a315c6c8a2 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -77,9 +77,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .type = FUNCTION_TYPE_PERCENTILE, .classification = FUNC_MGT_AGG_FUNC, .checkFunc = stubCheckAndGetResultType, - .getEnvFunc = getMinmaxFuncEnv, - .initFunc = maxFunctionSetup, - .processFunc = maxFunction, + .getEnvFunc = getPercentileFuncEnv, + .initFunc = percentileFunctionSetup, + .processFunc = percentileFunction, .finalizeFunc = functionFinalize }, { diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 0dc2989f77..a1c5dbce7b 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -480,7 +480,6 @@ void stddevFunction(SqlFunctionCtx* pCtx) { // Only the pre-computing information loaded and actual data does not loaded SInputColumnInfoData* pInput = &pCtx->input; - SColumnDataAgg* pAgg = pInput->pColumnDataAgg[0]; int32_t type = pInput->pData[0]->info.type; SStddevRes* pStddevRes = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); @@ -601,6 +600,7 @@ void stddevFinalize(SqlFunctionCtx* pCtx) { } typedef struct SPercentileInfo { + double result; tMemBucket *pMemBucket; int32_t stage; double minval; @@ -629,10 +629,15 @@ bool percentileFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultI void percentileFunction(SqlFunctionCtx *pCtx) { int32_t notNullElems = 0; -#if 0 - SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx); - SPercentileInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); + SResultRowEntryInfo *pResInfo = GET_RES_INFO(pCtx); + + SInputColumnInfoData* pInput = &pCtx->input; + SColumnDataAgg *pAgg = pInput->pColumnDataAgg[0]; + SColumnInfoData *pCol = pInput->pData[0]; + int32_t type = pCol->info.type; + + SPercentileInfo *pInfo = GET_ROWCELL_INTERBUF(pResInfo); if (pCtx->currentStage == REPEAT_SCAN && pInfo->stage == 0) { pInfo->stage += 1; @@ -647,19 +652,17 @@ void percentileFunction(SqlFunctionCtx *pCtx) { // the first stage, only acquire the min/max value if (pInfo->stage == 0) { - if (pCtx->preAggVals.isSet) { + if (pCtx->input.colDataAggIsSet) { double tmin = 0.0, tmax = 0.0; - if (IS_SIGNED_NUMERIC_TYPE(pCtx->inputType)) { - tmin = (double)GET_INT64_VAL(&pCtx->preAggVals.statis.min); - tmax = (double)GET_INT64_VAL(&pCtx->preAggVals.statis.max); - } else if (IS_FLOAT_TYPE(pCtx->inputType)) { - tmin = GET_DOUBLE_VAL(&pCtx->preAggVals.statis.min); - tmax = GET_DOUBLE_VAL(&pCtx->preAggVals.statis.max); - } else if (IS_UNSIGNED_NUMERIC_TYPE(pCtx->inputType)) { - tmin = (double)GET_UINT64_VAL(&pCtx->preAggVals.statis.min); - tmax = (double)GET_UINT64_VAL(&pCtx->preAggVals.statis.max); - } else { - assert(true); + if (IS_SIGNED_NUMERIC_TYPE(type)) { + tmin = (double)GET_INT64_VAL(&pAgg->min); + tmax = (double)GET_INT64_VAL(&pAgg->max); + } else if (IS_FLOAT_TYPE(type)) { + tmin = GET_DOUBLE_VAL(&pAgg->min); + tmax = GET_DOUBLE_VAL(&pAgg->max); + } else if (IS_UNSIGNED_NUMERIC_TYPE(type)) { + tmin = (double)GET_UINT64_VAL(&pAgg->min); + tmax = (double)GET_UINT64_VAL(&pAgg->max); } if (GET_DOUBLE_VAL(&pInfo->minval) > tmin) { @@ -670,17 +673,19 @@ void percentileFunction(SqlFunctionCtx *pCtx) { SET_DOUBLE_VAL(&pInfo->maxval, tmax); } - pInfo->numOfElems += (pCtx->size - pCtx->preAggVals.statis.numOfNull); + pInfo->numOfElems += (pInput->numOfRows - pAgg->numOfNull); } else { - for (int32_t i = 0; i < pCtx->size; ++i) { - char *data = GET_INPUT_DATA(pCtx, i); - if (pCtx->hasNull && isNull(data, pCtx->inputType)) { + // check the valid data one by one + int32_t start = pInput->startRowIndex; + for (int32_t i = start; i < pInput->numOfRows + start; ++i) { + if (colDataIsNull_f(pCol->nullbitmap, i)) { continue; } + char *data = colDataGetData(pCol, i); + double v = 0; GET_TYPED_DATA(v, double, pCtx->inputType, data); - if (v < GET_DOUBLE_VAL(&pInfo->minval)) { SET_DOUBLE_VAL(&pInfo->minval, v); } @@ -697,20 +702,35 @@ void percentileFunction(SqlFunctionCtx *pCtx) { } // the second stage, calculate the true percentile value - for (int32_t i = 0; i < pCtx->size; ++i) { - char *data = GET_INPUT_DATA(pCtx, i); - if (pCtx->hasNull && isNull(data, pCtx->inputType)) { + int32_t start = pInput->startRowIndex; + for (int32_t i = start; i < pInput->numOfRows + start; ++i) { + if (colDataIsNull_f(pCol->nullbitmap, i)) { continue; } + char *data = colDataGetData(pCol, i); + notNullElems += 1; tMemBucketPut(pInfo->pMemBucket, data, 1); } - SET_VAL(pCtx, notNullElems, 1); + SET_VAL(pResInfo, notNullElems, 1); pResInfo->hasResult = DATA_SET_FLAG; -#endif +} + +void percentileFinalize(SqlFunctionCtx* pCtx) { + double v = 50;//pCtx->param[0].nType == TSDB_DATA_TYPE_INT ? pCtx->param[0].i64 : pCtx->param[0].dKey; + SResultRowEntryInfo *pResInfo = GET_RES_INFO(pCtx); + SPercentileInfo* ppInfo = (SPercentileInfo *) GET_ROWCELL_INTERBUF(pResInfo); + + tMemBucket * pMemBucket = ppInfo->pMemBucket; + if (pMemBucket != NULL && pMemBucket->total > 0) { // check for null + SET_DOUBLE_VAL(&ppInfo->result, getPercentile(pMemBucket, v)); + } + + tMemBucketDestroy(pMemBucket); + functionFinalize(pCtx); } bool getFirstLastFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) { diff --git a/tests/script/tsim/query/session.sim b/tests/script/tsim/query/session.sim index 635e6a60d7..c1d3437e4a 100644 --- a/tests/script/tsim/query/session.sim +++ b/tests/script/tsim/query/session.sim @@ -284,14 +284,15 @@ print ====> select count(*),first(tagtype),last(tagtype),avg(tagtype),sum(tagtyp # return -1 #endi -sql_error select * from dev_001 session(ts,1w) -sql_error select count(*) from st session(ts,1w) -sql_error select count(*) from dev_001 group by tagtype session(ts,1w) -sql_error select count(*) from dev_001 session(ts,1n) -sql_error select count(*) from dev_001 session(ts,1y) -sql_error select count(*) from dev_001 session(ts,0s) -sql_error select count(*) from dev_001 session(i,1y) -sql_error select count(*) from dev_001 session(ts,1d) where ts <'2020-05-20 0:0:0' +print ================> syntax error check not active ================> reactive +#sql_error select * from dev_001 session(ts,1w) +#sql_error select count(*) from st session(ts,1w) +#sql_error select count(*) from dev_001 group by tagtype session(ts,1w) +#sql_error select count(*) from dev_001 session(ts,1n) +#sql_error select count(*) from dev_001 session(ts,1y) +#sql_error select count(*) from dev_001 session(ts,0s) +#sql_error select count(*) from dev_001 session(i,1y) +#sql_error select count(*) from dev_001 session(ts,1d) where ts <'2020-05-20 0:0:0' print ====> create database d1 precision 'us' sql create database d1 precision 'us' @@ -299,17 +300,19 @@ sql use d1 sql create table dev_001 (ts timestamp ,i timestamp ,j int) sql insert into dev_001 values(1623046993681000,now,1)(1623046993681001,now+1s,2)(1623046993681002,now+2s,3)(1623046993681004,now+5s,4) print ====> select count(*) from dev_001 session(ts,1u) -sql select count(*) from dev_001 session(ts,1u) +sql select _wstartts, count(*) from dev_001 session(ts,1u) if $rows != 2 then + print expect 2, actual: $rows return -1 endi + if $data01 != 3 then return -1 endi -sql_error select count(*) from dev_001 session(i,1s) -sql create table secondts(ts timestamp,t2 timestamp,i int) -sql_error select count(*) from secondts session(t2,2s) +#sql_error select count(*) from dev_001 session(i,1s) +#sql create table secondts(ts timestamp,t2 timestamp,i int) +#sql_error select count(*) from secondts session(t2,2s) if $loop_test == 0 then print =============== stop and restart taosd -- GitLab