提交 49098547 编写于 作者: K kailixu

Merge branch '2.6' into fix/TS-2670-2.6

......@@ -569,18 +569,23 @@ Query OK, 2 row(s) in set (0.000793s)
### PERCENTILE
```
SELECT PERCENTILE(field_name, P) FROM { tb_name } [WHERE clause];
SELECT PERCENTILE(field_name, P [, P1] ...) FROM { tb_name } [WHERE clause];
```
**Description**: The value whose rank in a specific column matches the specified percentage. If such a value matching the specified percentage doesn't exist in the column, an interpolation value will be returned.
**Return value type**: Double precision floating point
**Return value type**: This function takes 2 minumum and 11 maximum parameters, and it can simultaneously return 10 percentiles at most. If 2 parameters are given, a single percentile is returned and the value type is DOUBLE.
If more than 2 parameters are given, the return value type is a VARCHAR string, the format of which is a JSON ARRAY containing all return values.
**Applicable column types**: Data types except for timestamp, binary, nchar and bool
**Applicable table types**: table
**More explanations**: _P_ is in range [0,100], when _P_ is 0, the result is same as using function MIN; when _P_ is 100, the result is same as function MAX.
**More explanations**:
- _P_ is in range [0,100], when _P_ is 0, the result is same as using function MIN; when _P_ is 100, the result is same as function MAX.
- When calculating multiple percentiles of a specific column, a single PERCENTILE function with multiple parameters is adviced, as this can largely reduce the query response time.
For example, using SELECT percentile(col, 90, 95, 99) FROM table will perform better than SELECT percentile(col, 90), percentile(col, 95), percentile(col, 99) from table.
**Examples**:
......
......@@ -564,18 +564,22 @@ Query OK, 2 row(s) in set (0.000793s)
### PERCENTILE
```
SELECT PERCENTILE(field_name, P) FROM { tb_name } [WHERE clause];
SELECT PERCENTILE(field_name, P [, P1] ...) FROM { tb_name } [WHERE clause];
```
**功能说明**:统计表中某列的值百分比分位数。
**返回数据类型** 双精度浮点数 Double
**返回数据类型**该函数最小参数个数为 2 个,最大参数个数为 11 个。可以最多同时返回 10 个百分比分位数。当参数个数为 2 时, 返回一个分位数,类型为DOUBLE,当参数个数大于 2 时,返回类型为VARCHAR, 格式为包含多个返回值的JSON数组
**应用字段**:不能应用在 timestamp、binary、nchar、bool 类型字段。
**适用于**:表。
**使用说明***P*值取值范围 0≤*P*≤100,为 0 的时候等同于 MIN,为 100 的时候等同于 MAX。
**使用说明**
- *P*值取值范围 0≤*P*≤100,为 0 的时候等同于 MIN,为 100 的时候等同于 MAX。
- 同时计算针对同一列的多个分位数时,建议使用一个PERCENTILE函数和多个参数的方式,能很大程度上降低查询的响应时间。
比如,使用查询SELECT percentile(col, 90, 95, 99) FROM table, 性能会优于SELECT percentile(col, 90), percentile(col, 95), percentile(col, 99) from table。
**示例**
......
......@@ -3289,9 +3289,12 @@ int32_t addExprAndResultField(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, int32_t col
} else if (functionId == TSDB_FUNC_APERCT || functionId == TSDB_FUNC_TAIL) {
size_t cnt = taosArrayGetSize(pItem->pNode->Expr.paramList);
if (cnt != 2 && cnt != 3) valid = false;
} else if (functionId == TSDB_FUNC_PERCT) {
size_t cnt = taosArrayGetSize(pItem->pNode->Expr.paramList);
if (cnt < 2 || cnt > 11) valid = false;
} else if (functionId == TSDB_FUNC_UNIQUE) {
if (taosArrayGetSize(pItem->pNode->Expr.paramList) != 1) valid = false;
}else {
} else {
if (taosArrayGetSize(pItem->pNode->Expr.paramList) != 2) valid = false;
}
if (!valid) {
......@@ -3332,7 +3335,7 @@ int32_t addExprAndResultField(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, int32_t col
}
tVariant* pVariant = NULL;
if (functionId != TSDB_FUNC_UNIQUE) {
if (functionId != TSDB_FUNC_UNIQUE && functionId != TSDB_FUNC_PERCT) {
// 3. valid the parameters
if (pParamElem[1].pNode->tokenId == TK_ID) {
return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg2);
......@@ -3348,7 +3351,31 @@ int32_t addExprAndResultField(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, int32_t col
char val[8] = {0};
SExprInfo* pExpr = NULL;
if (functionId == TSDB_FUNC_PERCT || functionId == TSDB_FUNC_APERCT) {
if (functionId == TSDB_FUNC_PERCT) {
int32_t numOfParams = (int32_t)taosArrayGetSize(pItem->pNode->Expr.paramList);
getResultDataInfo(pSchema->type, pSchema->bytes, functionId, numOfParams - 1, &resultType, &resultSize, &interResult, 0,
false, pUdfInfo);
pExpr = tscExprAppend(pQueryInfo, functionId, &idx, resultType, resultSize, getNewResColId(pCmd),
interResult, false);
for (int32_t i = 1; i < numOfParams; ++i) {
pVariant = &pParamElem[i].pNode->value;
if (pVariant->nType != TSDB_DATA_TYPE_DOUBLE && pVariant->nType != TSDB_DATA_TYPE_BIGINT) {
return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg5);
}
tVariantDump(pVariant, val, TSDB_DATA_TYPE_DOUBLE, true);
double dp = GET_DOUBLE_VAL(val);
if (dp < 0 || dp > TOP_BOTTOM_QUERY_LIMIT) {
return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg5);
}
tscExprAddParams(&pExpr->base, val, TSDB_DATA_TYPE_DOUBLE, sizeof(double));
}
tscInsertPrimaryTsSourceColumn(pQueryInfo, pTableMetaInfo->pTableMeta->id.uid);
colIndex += 1; // the first column is ts
} else if (functionId == TSDB_FUNC_APERCT) {
// param1 double
if (pVariant->nType != TSDB_DATA_TYPE_DOUBLE && pVariant->nType != TSDB_DATA_TYPE_BIGINT) {
return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg5);
......
......@@ -3027,6 +3027,11 @@ void tscHandleSubqueryError(SRetrieveSupport *trsupport, SSqlObj *pSql, int numO
}
SSqlObj *pParentSql = trsupport->pParentSql;
if (pParentSql->signature != pParentSql) {
tscError("query 0x%"PRIx64" parent sql obj %p not valid. parent id: 0x%"PRIx64, pSql->self, pParentSql, pParentSql->self);
return;
}
int32_t subqueryIndex = trsupport->subqueryIndex;
assert(pSql != NULL);
......@@ -3134,6 +3139,11 @@ static void tscAllDataRetrievedFromDnode(SRetrieveSupport *trsupport, SSqlObj* p
SSqlObj * pParentSql = trsupport->pParentSql;
tOrderDescriptor *pDesc = trsupport->pOrderDescriptor;
if (pParentSql->signature != pParentSql) {
tscError("query 0x%"PRIx64" parent sql obj %p not valid. parent id: 0x%"PRIx64, pSql->self, pParentSql, pParentSql->self);
return;
}
SSubqueryState* pState = &pParentSql->subState;
SQueryInfo *pQueryInfo = tscGetQueryInfo(&pSql->cmd);
......
......@@ -282,14 +282,15 @@ void taos_cleanup(void) {
#endif
}
int32_t id = tscObjRef;
tscObjRef = -1;
taosCloseRef(id);
void* p = tscQhandle;
tscQhandle = NULL;
taosCleanUpScheduler(p);
int32_t id = tscObjRef;
tscObjRef = -1;
taosCloseRef(id);
id = tscRefId;
tscRefId = -1;
taosCloseRef(id);
......
......@@ -2780,7 +2780,7 @@ void tscExprAddParams(SSqlExpr* pExpr, char* argument, int32_t type, int32_t byt
tVariantCreateFromBinary(&pExpr->param[pExpr->numOfParams], argument, bytes, type);
pExpr->numOfParams += 1;
assert(pExpr->numOfParams <= 3);
assert(pExpr->numOfParams <= 10);
}
SExprInfo* tscExprGet(SQueryInfo* pQueryInfo, int32_t idx) {
......
......@@ -59,7 +59,7 @@ typedef struct SSqlExpr {
// pQueryAttr->interBytesForGlobal
int16_t numOfParams; // argument value of each function
tVariant param[3]; // parameters are not more than 3
tVariant param[10]; // parameters are not more than 10
int32_t offset; // sub result column value of arithmetic expression.
int16_t resColId; // result column id
......
......@@ -210,7 +210,7 @@ typedef struct SQLFunctionCtx {
int64_t startTs; // timestamp range of current query when function is executed on a specific data block
int64_t endTs;
int32_t numOfParams;
tVariant param[4]; // input parameter, e.g., top(k, 20), the number of results for top query is kept in param
tVariant param[10]; // input parameter, e.g., top(k, 20), the number of results for top query is kept in param
int64_t *ptsList; // corresponding timestamp array list
void *ptsOutputBuf; // corresponding output buffer for timestamp of each result, e.g., top/bottom*/
SQLPreAggVal preAggVals;
......
......@@ -700,8 +700,13 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI
*bytes = sizeof(double);
*interBytes = sizeof(SSpreadInfo);
} else if (functionId == TSDB_FUNC_PERCT) {
if (param > 1) {
*type = (int16_t)TSDB_DATA_TYPE_BINARY;
*bytes = 512;
} else {
*type = (int16_t)TSDB_DATA_TYPE_DOUBLE;
*bytes = sizeof(double);
}
*interBytes = sizeof(SPercentileInfo);
} else if (functionId == TSDB_FUNC_LEASTSQR) {
*type = TSDB_DATA_TYPE_BINARY;
......@@ -3096,7 +3101,7 @@ static void percentile_function(SQLFunctionCtx *pCtx) {
}
static void percentile_finalizer(SQLFunctionCtx *pCtx) {
double v = pCtx->param[0].nType == TSDB_DATA_TYPE_INT ? pCtx->param[0].i64 : pCtx->param[0].dKey;
double v = 0;
SResultRowCellInfo *pResInfo = GET_RES_INFO(pCtx);
SPercentileInfo* ppInfo = (SPercentileInfo *) GET_ROWCELL_INTERBUF(pResInfo);
......@@ -3107,8 +3112,26 @@ static void percentile_finalizer(SQLFunctionCtx *pCtx) {
assert(ppInfo->numOfElems == 0);
setNull(pCtx->pOutput, pCtx->outputType, pCtx->outputBytes);
} else {
if (pCtx->numOfParams > 1) {
((char *)varDataVal(pCtx->pOutput))[0] = '[';
size_t len = 1;
size_t maxBufLen = 512;
for (int32_t i = 0; i < pCtx->numOfParams; ++i) {
v = pCtx->param[i].nType == TSDB_DATA_TYPE_INT ? pCtx->param[i].i64 : pCtx->param[i].dKey;
if (i == pCtx->numOfParams - 1) {
len += snprintf((char *)varDataVal(pCtx->pOutput) + len, maxBufLen - len, "%lf]", getPercentile(pMemBucket, v));
} else {
len += snprintf((char *)varDataVal(pCtx->pOutput) + len, maxBufLen - len, "%lf, ", getPercentile(pMemBucket, v));
}
}
varDataSetLen(pCtx->pOutput, len);
} else {
v = pCtx->param[0].nType == TSDB_DATA_TYPE_INT ? pCtx->param[0].i64 : pCtx->param[0].dKey;
SET_DOUBLE_VAL((double *)pCtx->pOutput, getPercentile(pMemBucket, v));
}
}
tMemBucketDestroy(pMemBucket);
doFinalizer(pCtx);
......
......@@ -9775,7 +9775,13 @@ int32_t createQueryFunc(SQueriedTableInfo* pTableInfo, int32_t numOfOutput, SExp
}
}
int32_t param = (int32_t)pExprs[i].base.param[0].i64;
int32_t param;
if (pExprs[i].base.functionId != TSDB_FUNC_PERCT) {
param = (int32_t)pExprs[i].base.param[0].i64;
} else {
param = pExprs[i].base.numOfParams;
}
if (pExprs[i].base.functionId > 0 && pExprs[i].base.functionId != TSDB_FUNC_SCALAR_EXPR &&
!isTimeWindowFunction(pExprs[i].base.functionId) &&
(type != pExprs[i].base.colType || bytes != pExprs[i].base.colBytes)) {
......
......@@ -51,6 +51,19 @@ class TDTestCase:
tdSql.error("select percentile(col9 20) from test")
tdSql.error("select apercentile(col9 20) from test")
tdSql.error(f'select percentile(col1) from test')
tdSql.error(f'select percentile(col1, -1) from test')
tdSql.error(f'select percentile(col1, 101) from test')
tdSql.error(f'select percentile(col1, col2) from test')
tdSql.error(f'select percentile(1, col1) from test')
tdSql.error(f'select percentile(col1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 101) from test')
tdSql.query(f'select percentile(col1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100) from test')
tdSql.checkData(0, 0, '[1.900000, 2.800000, 3.700000, 4.600000, 5.500000, 6.400000, 7.300000, 8.200000, 9.100000, 10.000000]')
tdSql.query(f'select percentile(col1, 9.9, 19.9, 29.9, 39.9, 49.9, 59.9, 69.9, 79.9, 89.9, 99.9) from test')
tdSql.checkData(0, 0, '[1.891000, 2.791000, 3.691000, 4.591000, 5.491000, 6.391000, 7.291000, 8.191000, 9.091000, 9.991000]')
tdSql.query("select percentile(col1, 0) from test")
tdSql.checkData(0, 0, np.percentile(intData, 0))
tdSql.query("select apercentile(col1, 0) from test")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册