diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index 2b58ed7c0bdc70d6e6437542e2136867d068b645..aec14766637a6f74a1a723f7cffeacbf6eb9c6f8 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -41,6 +41,7 @@ typedef enum EFunctionType { FUNCTION_TYPE_SUM, FUNCTION_TYPE_TWA, FUNCTION_TYPE_HISTOGRAM, + FUNCTION_TYPE_HYPERLOGLOG, // nonstandard SQL function FUNCTION_TYPE_BOTTOM = 500, diff --git a/source/libs/function/inc/builtinsimpl.h b/source/libs/function/inc/builtinsimpl.h index c25d74911c916ed26c1a04241909975ab8ab793a..99313675a5ad162972907d57b0a4330cb925521d 100644 --- a/source/libs/function/inc/builtinsimpl.h +++ b/source/libs/function/inc/builtinsimpl.h @@ -90,6 +90,10 @@ bool histogramFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultIn int32_t histogramFunction(SqlFunctionCtx* pCtx); int32_t histogramFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); +bool getHLLFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); +int32_t hllFunction(SqlFunctionCtx* pCtx); +int32_t hllFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); + bool getStateFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool stateFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo); int32_t stateCountFunction(SqlFunctionCtx* pCtx); diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index e41e3c7c39a2a18e262e892cf659fd3258361ace..48165fdd99f428818598d72c4d8f9bf795b4e831 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -263,6 +263,21 @@ static int32_t translateHistogram(SFunctionNode* pFunc, char* pErrBuf, int32_t l return TSDB_CODE_SUCCESS; } +static int32_t translateHLL(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { + if (1 != LIST_LENGTH(pFunc->pParameterList)) { + return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); + } + + SNode* pPara = nodesListGetNode(pFunc->pParameterList, 0); + if (QUERY_NODE_COLUMN != nodeType(pPara)) { + return buildFuncErrMsg(pErrBuf, len, TSDB_CODE_FUNC_FUNTION_ERROR, + "The input parameter of HYPERLOGLOG function can only be column"); + } + + pFunc->node.resType = (SDataType){.bytes = tDataTypes[TSDB_DATA_TYPE_UBIGINT].bytes, .type = TSDB_DATA_TYPE_UBIGINT}; + return TSDB_CODE_SUCCESS; +} + static int32_t translateStateCount(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { if (3 != LIST_LENGTH(pFunc->pParameterList)) { return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); @@ -829,6 +844,16 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = histogramFunction, .finalizeFunc = histogramFinalize }, + { + .name = "hyperloglog", + .type = FUNCTION_TYPE_HYPERLOGLOG, + .classification = FUNC_MGT_AGG_FUNC, + .translateFunc = translateHLL, + .getEnvFunc = getHLLFuncEnv, + .initFunc = functionSetup, + .processFunc = hllFunction, + .finalizeFunc = hllFinalize + }, { .name = "state_count", .type = FUNCTION_TYPE_STATE_COUNT, diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 556015a2ac76b541403e5b9c36180b6bebbd56f5..7ff3b6fb052d32b6f52f8694b896a53bacf59226 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -26,7 +26,7 @@ #define MAVG_MAX_POINTS_NUM 1000 #define SAMPLE_MAX_POINTS_NUM 1000 #define TAIL_MAX_POINTS_NUM 100 -#define TAIL_MAX_OFFSET 10 +#define TAIL_MAX_OFFSET 100 #define HLL_BUCKET_BITS 14 // The bits of the bucket #define HLL_DATA_BITS (64-HLL_BUCKET_BITS) @@ -2849,8 +2849,8 @@ int32_t hllFunction(SqlFunctionCtx *pCtx) { char* data = colDataGetData(pCol, i); if (IS_VAR_DATA_TYPE(type)) { + bytes = varDataLen(data); data = varDataVal(data); - bytes -= VARSTR_HEADER_SIZE; } int32_t index = 0;