From 1eb59a2fedc3a960c6478cba1865924311f54938 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Tue, 17 May 2022 10:34:59 +0800 Subject: [PATCH] feat(query): add hll function --- include/libs/function/functionMgt.h | 1 + source/libs/function/inc/builtinsimpl.h | 4 ++++ source/libs/function/src/builtins.c | 25 +++++++++++++++++++++++++ source/libs/function/src/builtinsimpl.c | 4 ++-- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index 2b58ed7c0b..aec1476663 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -41,6 +41,7 @@ typedef enum EFunctionType { FUNCTION_TYPE_SUM, FUNCTION_TYPE_TWA, FUNCTION_TYPE_HISTOGRAM, + FUNCTION_TYPE_HYPERLOGLOG, // nonstandard SQL function FUNCTION_TYPE_BOTTOM = 500, diff --git a/source/libs/function/inc/builtinsimpl.h b/source/libs/function/inc/builtinsimpl.h index c25d74911c..99313675a5 100644 --- a/source/libs/function/inc/builtinsimpl.h +++ b/source/libs/function/inc/builtinsimpl.h @@ -90,6 +90,10 @@ bool histogramFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultIn int32_t histogramFunction(SqlFunctionCtx* pCtx); int32_t histogramFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); +bool getHLLFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); +int32_t hllFunction(SqlFunctionCtx* pCtx); +int32_t hllFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); + bool getStateFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool stateFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo); int32_t stateCountFunction(SqlFunctionCtx* pCtx); diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index e41e3c7c39..48165fdd99 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -263,6 +263,21 @@ static int32_t translateHistogram(SFunctionNode* pFunc, char* pErrBuf, int32_t l return TSDB_CODE_SUCCESS; } +static int32_t translateHLL(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { + if (1 != LIST_LENGTH(pFunc->pParameterList)) { + return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); + } + + SNode* pPara = nodesListGetNode(pFunc->pParameterList, 0); + if (QUERY_NODE_COLUMN != nodeType(pPara)) { + return buildFuncErrMsg(pErrBuf, len, TSDB_CODE_FUNC_FUNTION_ERROR, + "The input parameter of HYPERLOGLOG function can only be column"); + } + + pFunc->node.resType = (SDataType){.bytes = tDataTypes[TSDB_DATA_TYPE_UBIGINT].bytes, .type = TSDB_DATA_TYPE_UBIGINT}; + return TSDB_CODE_SUCCESS; +} + static int32_t translateStateCount(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { if (3 != LIST_LENGTH(pFunc->pParameterList)) { return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); @@ -829,6 +844,16 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = histogramFunction, .finalizeFunc = histogramFinalize }, + { + .name = "hyperloglog", + .type = FUNCTION_TYPE_HYPERLOGLOG, + .classification = FUNC_MGT_AGG_FUNC, + .translateFunc = translateHLL, + .getEnvFunc = getHLLFuncEnv, + .initFunc = functionSetup, + .processFunc = hllFunction, + .finalizeFunc = hllFinalize + }, { .name = "state_count", .type = FUNCTION_TYPE_STATE_COUNT, diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 556015a2ac..7ff3b6fb05 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -26,7 +26,7 @@ #define MAVG_MAX_POINTS_NUM 1000 #define SAMPLE_MAX_POINTS_NUM 1000 #define TAIL_MAX_POINTS_NUM 100 -#define TAIL_MAX_OFFSET 10 +#define TAIL_MAX_OFFSET 100 #define HLL_BUCKET_BITS 14 // The bits of the bucket #define HLL_DATA_BITS (64-HLL_BUCKET_BITS) @@ -2849,8 +2849,8 @@ int32_t hllFunction(SqlFunctionCtx *pCtx) { char* data = colDataGetData(pCol, i); if (IS_VAR_DATA_TYPE(type)) { + bytes = varDataLen(data); data = varDataVal(data); - bytes -= VARSTR_HEADER_SIZE; } int32_t index = 0; -- GitLab