diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 3c9eca85ddc4fc521227d215414cdff417dc319f..64cca47da07d8bd663d2467c098ba978f3adeeca 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -14,13 +14,14 @@ */ #include "builtinsimpl.h" +#include "cJSON.h" #include "function.h" #include "querynodes.h" #include "taggfunction.h" #include "tdatablock.h" #include "tpercentile.h" -#define HISTOGRAM_MAX_BINS_NUM 100 +#define HISTOGRAM_MAX_BINS_NUM 100 typedef struct SSumRes { union { @@ -106,6 +107,13 @@ typedef struct SHistoFuncInfo { SHistoFuncBin bins[]; } SHistoFuncInfo; +typedef enum { + UNKNOWN_BIN = 0, + USER_INPUT_BIN, + LINEAR_BIN, + LOG_BIN +} EHistoBinType; + #define SET_VAL(_info, numOfElem, res) \ do { \ @@ -1801,16 +1809,165 @@ bool getHistogramFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) return true; } -bool histogramFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo* pResultInfo) { +static int8_t getHistogramBinType(char *binTypeStr) { + int8_t binType; + if (strcasecmp(binTypeStr, "user_input") == 0) { + binType = USER_INPUT_BIN; + } else if (strcasecmp(binTypeStr, "linear_bin") == 0) { + binType = LINEAR_BIN; + } else if (strcasecmp(binTypeStr, "log_bin") == 0) { + binType = LOG_BIN; + } else { + binType = UNKNOWN_BIN; + } + + return binType; +} + +static bool getHistogramBinDesc(SHistoFuncInfo *pInfo, char *binDescStr, int8_t binType, bool normalized) { + cJSON* binDesc = cJSON_Parse(binDescStr); + int32_t counter; + int32_t numOfBins; + double* intervals; + if (cJSON_IsObject(binDesc)) { /* linaer/log bins */ + int32_t numOfParams = cJSON_GetArraySize(binDesc); + int32_t startIndex; + if (numOfParams != 4) { + return false; + } + + cJSON* start = cJSON_GetObjectItem(binDesc, "start"); + cJSON* factor = cJSON_GetObjectItem(binDesc, "factor"); + cJSON* width = cJSON_GetObjectItem(binDesc, "width"); + cJSON* count = cJSON_GetObjectItem(binDesc, "count"); + cJSON* infinity = cJSON_GetObjectItem(binDesc, "infinity"); + + if (!cJSON_IsNumber(start) || !cJSON_IsNumber(count) || !cJSON_IsBool(infinity)) { + return false; + } + + if (count->valueint <= 0 || count->valueint > 1000) { // limit count to 1000 + return false; + } + + if (isinf(start->valuedouble) || (width != NULL && isinf(width->valuedouble)) || + (factor != NULL && isinf(factor->valuedouble)) || (count != NULL && isinf(count->valuedouble))) { + return false; + } + + counter = (int32_t)count->valueint; + if (infinity->valueint == false) { + startIndex = 0; + numOfBins = counter + 1; + } else { + startIndex = 1; + numOfBins = counter + 3; + } + + intervals = taosMemoryCalloc(numOfBins, sizeof(double)); + if (cJSON_IsNumber(width) && factor == NULL && binType == LINEAR_BIN) { + // linear bin process + if (width->valuedouble == 0) { + taosMemoryFree(intervals); + return false; + } + for (int i = 0; i < counter + 1; ++i) { + intervals[startIndex] = start->valuedouble + i * width->valuedouble; + if (isinf(intervals[startIndex])) { + taosMemoryFree(intervals); + return false; + } + startIndex++; + } + } else if (cJSON_IsNumber(factor) && width == NULL && binType == LOG_BIN) { + // log bin process + if (start->valuedouble == 0) { + taosMemoryFree(intervals); + return false; + } + if (factor->valuedouble < 0 || factor->valuedouble == 0 || factor->valuedouble == 1) { + taosMemoryFree(intervals); + return false; + } + for (int i = 0; i < counter + 1; ++i) { + intervals[startIndex] = start->valuedouble * pow(factor->valuedouble, i * 1.0); + if (isinf(intervals[startIndex])) { + taosMemoryFree(intervals); + return false; + } + startIndex++; + } + } else { + taosMemoryFree(intervals); + return false; + } + + if (infinity->valueint == true) { + intervals[0] = -INFINITY; + intervals[numOfBins - 1] = INFINITY; + // in case of desc bin orders, -inf/inf should be swapped + ASSERT(numOfBins >= 4); + if (intervals[1] > intervals[numOfBins - 2]) { + TSWAP(intervals[0], intervals[numOfBins - 1]); + } + } + } else if (cJSON_IsArray(binDesc)) { /* user input bins */ + if (binType != USER_INPUT_BIN) { + return false; + } + counter = numOfBins = cJSON_GetArraySize(binDesc); + intervals = taosMemoryCalloc(numOfBins, sizeof(double)); + cJSON* bin = binDesc->child; + if (bin == NULL) { + taosMemoryFree(intervals); + return false; + } + int i = 0; + while (bin) { + intervals[i] = bin->valuedouble; + if (!cJSON_IsNumber(bin)) { + taosMemoryFree(intervals); + return false; + } + if (i != 0 && intervals[i] <= intervals[i - 1]) { + taosMemoryFree(intervals); + return false; + } + bin = bin->next; + i++; + } + } else { + return false; + } + + pInfo->numOfBins = numOfBins; + pInfo->normalized = normalized; + for (int32_t i = 0; i < numOfBins; ++i) { + pInfo->bins[i].lower = intervals[i] < intervals[i + 1] ? intervals[i] : intervals[i + 1]; + pInfo->bins[i].upper = intervals[i + 1] > intervals[i] ? intervals[i + 1] : intervals[i]; + pInfo->bins[i].count = 0; + } + + taosMemoryFree(intervals); + return true; +} + +bool histogramFunctionSetup(SqlFunctionCtx *pCtx, SResultRowEntryInfo *pResultInfo) { if (!functionSetup(pCtx, pResultInfo)) { return false; } - SHistoFuncInfo* pInfo = GET_ROWCELL_INTERBUF(pResultInfo); - char* binType = pCtx->param[1].param.pz; - char* binDesc = pCtx->param[2].param.pz; - int64_t nornalized = pCtx->param[3].param.i; + SHistoFuncInfo *pInfo = GET_ROWCELL_INTERBUF(pResultInfo); + int8_t binType = getHistogramBinType(varDataVal(pCtx->param[1].param.pz)); + if (binType == UNKNOWN_BIN) { + return false; + } + char* binDesc = varDataVal(pCtx->param[2].param.pz); + int64_t normalized = pCtx->param[3].param.i; + if (!getHistogramBinDesc(pInfo, binDesc, binType, (bool)normalized)) { + return false; + } return true; }