diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index bef1d242c9f1e249dbd7d66b7ad029666faaf2d3..591a6bba344eaffb93117668e7a3dcc91761abad 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -821,17 +821,22 @@ static void doSetupSDataBlock(SSqlRes* pRes, SSDataBlock* pBlock, SFilterInfo* p // filter data if needed if (pFilterInfo) { //doSetFilterColumnInfo(pFilterInfo, numOfFilterCols, pBlock); - doSetFilterColInfo(pFilterInfo, pBlock); + filterSetColFieldData(pFilterInfo, pBlock->info.numOfCols, pBlock->pDataBlock); bool gotNchar = false; filterConverNcharColumns(pFilterInfo, pBlock->info.rows, &gotNchar); - int8_t* p = calloc(pBlock->info.rows, sizeof(int8_t)); + int8_t* p = NULL; //bool all = doFilterDataBlock(pFilterInfo, numOfFilterCols, pBlock->info.rows, p); - bool all = filterExecute(pFilterInfo, pBlock->info.rows, p); + bool all = filterExecute(pFilterInfo, pBlock->info.rows, &p, NULL, 0); if (gotNchar) { filterFreeNcharColumns(pFilterInfo); } if (!all) { - doCompactSDataBlock(pBlock, pBlock->info.rows, p); + if (p) { + doCompactSDataBlock(pBlock, pBlock->info.rows, p); + } else { + pBlock->info.rows = 0; + pBlock->pBlockStatis = NULL; // clean the block statistics info + } } tfree(p); diff --git a/src/query/inc/qExecutor.h b/src/query/inc/qExecutor.h index 84f00891e5e75bc7b3c7a32f3de0d040bd4c5cb7..a694e9b1dda7e49f2edc778465c3848f5e1c9ad4 100644 --- a/src/query/inc/qExecutor.h +++ b/src/query/inc/qExecutor.h @@ -598,7 +598,6 @@ SSDataBlock* doSLimit(void* param, bool* newgroup); int32_t doCreateFilterInfo(SColumnInfo* pCols, int32_t numOfCols, int32_t numOfFilterCols, SSingleColumnFilterInfo** pFilterInfo, uint64_t qId); void doSetFilterColumnInfo(SSingleColumnFilterInfo* pFilterInfo, int32_t numOfFilterCols, SSDataBlock* pBlock); -void doSetFilterColInfo(SFilterInfo *pFilters, SSDataBlock* pBlock); bool doFilterDataBlock(SSingleColumnFilterInfo* pFilterInfo, int32_t numOfFilterCols, int32_t numOfRows, int8_t* p); void doCompactSDataBlock(SSDataBlock* pBlock, int32_t numOfRows, int8_t* p); diff --git a/src/query/inc/qFilter.h b/src/query/inc/qFilter.h index 7a7b3157eaeaf7075b1d856b4e68568f61871885..af45b816f9e6725579403069843295895cf57cc8 100644 --- a/src/query/inc/qFilter.h +++ b/src/query/inc/qFilter.h @@ -31,10 +31,11 @@ extern "C" { #define FILTER_DEFAULT_GROUP_UNIT_SIZE 2 #define FILTER_DUMMY_EMPTY_OPTR 127 -#define FILTER_DUMMY_RANGE_OPTR 126 #define MAX_NUM_STR_SIZE 40 +#define FILTER_RM_UNIT_MIN_ROWS 100 + enum { FLD_TYPE_COLUMN = 1, FLD_TYPE_VALUE = 2, @@ -70,6 +71,12 @@ enum { FI_STATUS_CLONED = 8, }; +enum { + FI_STATUS_BLK_ALL = 1, + FI_STATUS_BLK_EMPTY = 2, + FI_STATUS_BLK_ACTIVE = 4, +}; + enum { RANGE_TYPE_UNIT = 1, RANGE_TYPE_VAR_HASH = 2, @@ -98,7 +105,7 @@ typedef struct SFilterColRange { typedef bool (*rangeCompFunc) (const void *, const void *, const void *, const void *, __compar_fn_t); typedef int32_t(*filter_desc_compare_func)(const void *, const void *); -typedef bool(*filter_exec_func)(void *, int32_t, int8_t*); +typedef bool(*filter_exec_func)(void *, int32_t, int8_t**, SDataStatis *, int16_t); typedef struct SFilterRangeCompare { int64_t s; @@ -197,6 +204,7 @@ typedef struct SFilterComUnit { void *colData; void *valData; void *valData2; + uint16_t colId; uint16_t dataSize; uint8_t dataType; uint8_t optr; @@ -224,7 +232,11 @@ typedef struct SFilterInfo { uint8_t *unitRes; // result uint8_t *unitFlags; // got result SFilterRangeCtx **colRange; - filter_exec_func func; + filter_exec_func func; + uint8_t blkFlag; + uint16_t blkGroupNum; + uint16_t *blkUnits; + int8_t *blkUnitRes; SFilterPCtx pctx; } SFilterInfo; @@ -265,12 +277,13 @@ typedef struct SFilterInfo { #define CHK_RET(c, r) do { if (c) { return r; } } while (0) #define CHK_JMP(c) do { if (c) { goto _return; } } while (0) #define CHK_LRETV(c,...) do { if (c) { qError(__VA_ARGS__); return; } } while (0) -#define CHK_LRET(c, r,...) do { if (c) { qError(__VA_ARGS__); return r; } } while (0) +#define CHK_LRET(c, r,...) do { if (c) { if (r) {qError(__VA_ARGS__); } else { qDebug(__VA_ARGS__); } return r; } } while (0) #define FILTER_GET_FIELD(i, id) (&((i)->fields[(id).type].fields[(id).idx])) #define FILTER_GET_COL_FIELD(i, idx) (&((i)->fields[FLD_TYPE_COLUMN].fields[idx])) #define FILTER_GET_COL_FIELD_TYPE(fi) (((SSchema *)((fi)->desc))->type) #define FILTER_GET_COL_FIELD_SIZE(fi) (((SSchema *)((fi)->desc))->bytes) +#define FILTER_GET_COL_FIELD_ID(fi) (((SSchema *)((fi)->desc))->colId) #define FILTER_GET_COL_FIELD_DESC(fi) ((SSchema *)((fi)->desc)) #define FILTER_GET_COL_FIELD_DATA(fi, ri) ((char *)(fi)->data + ((SSchema *)((fi)->desc))->bytes * (ri)) #define FILTER_GET_VAL_FIELD_TYPE(fi) (((tVariant *)((fi)->desc))->nType) @@ -280,10 +293,12 @@ typedef struct SFilterInfo { #define FILTER_GROUP_UNIT(i, g, uid) ((i)->units + (g)->unitIdxs[uid]) #define FILTER_UNIT_LEFT_FIELD(i, u) FILTER_GET_FIELD(i, (u)->left) #define FILTER_UNIT_RIGHT_FIELD(i, u) FILTER_GET_FIELD(i, (u)->right) +#define FILTER_UNIT_RIGHT2_FIELD(i, u) FILTER_GET_FIELD(i, (u)->right2) #define FILTER_UNIT_DATA_TYPE(u) ((u)->compare.type) #define FILTER_UNIT_COL_DESC(i, u) FILTER_GET_COL_FIELD_DESC(FILTER_UNIT_LEFT_FIELD(i, u)) #define FILTER_UNIT_COL_DATA(i, u, ri) FILTER_GET_COL_FIELD_DATA(FILTER_UNIT_LEFT_FIELD(i, u), ri) #define FILTER_UNIT_COL_SIZE(i, u) FILTER_GET_COL_FIELD_SIZE(FILTER_UNIT_LEFT_FIELD(i, u)) +#define FILTER_UNIT_COL_ID(i, u) FILTER_GET_COL_FIELD_ID(FILTER_UNIT_LEFT_FIELD(i, u)) #define FILTER_UNIT_VAL_DATA(i, u) FILTER_GET_VAL_FIELD_DATA(FILTER_UNIT_RIGHT_FIELD(i, u)) #define FILTER_UNIT_COL_IDX(u) ((u)->left.idx) #define FILTER_UNIT_OPTR(u) ((u)->compare.optr) @@ -309,8 +324,8 @@ typedef struct SFilterInfo { extern int32_t filterInitFromTree(tExprNode* tree, SFilterInfo **pinfo, uint32_t options); -extern bool filterExecute(SFilterInfo *info, int32_t numOfRows, int8_t* p); -extern int32_t filterSetColFieldData(SFilterInfo *info, int16_t colId, void *data); +extern bool filterExecute(SFilterInfo *info, int32_t numOfRows, int8_t** p, SDataStatis *statis, int16_t numOfCols); +extern int32_t filterSetColFieldData(SFilterInfo *info, int32_t numOfCols, SArray* pDataBlock); extern int32_t filterGetTimeRange(SFilterInfo *info, STimeWindow *win); extern int32_t filterConverNcharColumns(SFilterInfo* pFilterInfo, int32_t rows, bool *gotNchar); extern int32_t filterFreeNcharColumns(SFilterInfo* pFilterInfo); diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index d799d56cea8e0956ea1abc3fcc898d40f3e38916..fe362f51a9622aaa114f9ece61003e2827065c8d 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -2951,12 +2951,13 @@ void filterRowsInDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SSingleColumnFilterInf void filterColRowsInDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SSDataBlock* pBlock, bool ascQuery) { int32_t numOfRows = pBlock->info.rows; - int8_t *p = calloc(numOfRows, sizeof(int8_t)); + int8_t *p = NULL; bool all = true; if (pRuntimeEnv->pTsBuf != NULL) { - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, 0); - + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, 0); + p = calloc(numOfRows, sizeof(int8_t)); + TSKEY* k = (TSKEY*) pColInfoData->pData; for (int32_t i = 0; i < numOfRows; ++i) { int32_t offset = ascQuery? i:(numOfRows - i - 1); @@ -2979,11 +2980,16 @@ void filterColRowsInDataBlock(SQueryRuntimeEnv* pRuntimeEnv, SSDataBlock* pBlock // save the cursor status pRuntimeEnv->current->cur = tsBufGetCursor(pRuntimeEnv->pTsBuf); } else { - all = filterExecute(pRuntimeEnv->pQueryAttr->pFilters, numOfRows, p); + all = filterExecute(pRuntimeEnv->pQueryAttr->pFilters, numOfRows, &p, pBlock->pBlockStatis, pRuntimeEnv->pQueryAttr->numOfCols); } if (!all) { - doCompactSDataBlock(pBlock, numOfRows, p); + if (p) { + doCompactSDataBlock(pBlock, numOfRows, p); + } else { + pBlock->info.rows = 0; + pBlock->pBlockStatis = NULL; // clean the block statistics info + } } tfree(p); @@ -3032,15 +3038,6 @@ void doSetFilterColumnInfo(SSingleColumnFilterInfo* pFilterInfo, int32_t numOfFi } } - -void doSetFilterColInfo(SFilterInfo * pFilters, SSDataBlock* pBlock) { - for (int32_t j = 0; j < pBlock->info.numOfCols; ++j) { - SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, j); - - filterSetColFieldData(pFilters, pColInfo->info.colId, pColInfo->pData); - } -} - int32_t loadDataBlockOnDemand(SQueryRuntimeEnv* pRuntimeEnv, STableScanInfo* pTableScanInfo, SSDataBlock* pBlock, uint32_t* status) { *status = BLK_DATA_NO_NEEDED; @@ -3195,7 +3192,7 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv* pRuntimeEnv, STableScanInfo* pTa } if (pQueryAttr->pFilters != NULL) { - doSetFilterColInfo(pQueryAttr->pFilters, pBlock); + filterSetColFieldData(pQueryAttr->pFilters, pBlock->info.numOfCols, pBlock->pDataBlock); } if (pQueryAttr->pFilters != NULL || pRuntimeEnv->pTsBuf != NULL) { diff --git a/src/query/src/qFilter.c b/src/query/src/qFilter.c index 1171bb089641909ad8a18ac91866736d23a2eea8..72f8376af6d5b7d25ba154a95e034be4eb0e6f66 100644 --- a/src/query/src/qFilter.c +++ b/src/query/src/qFilter.c @@ -157,7 +157,7 @@ __compar_fn_t gDataCompare[] = {compareInt32Val, compareInt8Val, compareInt16Val compareDoubleVal, compareLenPrefixedStr, compareStrPatternComp, compareFindItemInSet, compareWStrPatternComp, compareLenPrefixedWStr, compareUint8Val, compareUint16Val, compareUint32Val, compareUint64Val, setCompareBytes1, setCompareBytes2, setCompareBytes4, setCompareBytes8 -}; +}; int8_t filterGetCompFuncIdx(int32_t type, int32_t optr) { int8_t comparFn = 0; @@ -1521,7 +1521,7 @@ void filterDumpInfoToString(SFilterInfo *info, const char *msg, int32_t options) int32_t type = FILTER_UNIT_DATA_TYPE(unit); int32_t len = 0; int32_t tlen = 0; - char str[256] = {0}; + char str[512] = {0}; SFilterField *left = FILTER_UNIT_LEFT_FIELD(info, unit); SSchema *sch = left->desc; @@ -1539,6 +1539,24 @@ void filterDumpInfoToString(SFilterInfo *info, const char *msg, int32_t options) strcat(str, "NULL"); } strcat(str, "]"); + + if (unit->compare.optr2) { + strcat(str, " && "); + sprintf(str + strlen(str), "[%d][%s] %s [", sch->colId, sch->name, gOptrStr[unit->compare.optr2].str); + + if (unit->right2.type == FLD_TYPE_VALUE && FILTER_UNIT_OPTR(unit) != TSDB_RELATION_IN) { + SFilterField *right = FILTER_UNIT_RIGHT2_FIELD(info, unit); + char *data = right->data; + if (IS_VAR_DATA_TYPE(type)) { + tlen = varDataLen(data); + data += VARSTR_HEADER_SIZE; + } + converToStr(str + strlen(str), type, data, tlen > 32 ? 32 : tlen, &tlen); + } else { + strcat(str, "NULL"); + } + strcat(str, "]"); + } qDebug("%s", str); //TODO } @@ -1556,37 +1574,63 @@ void filterDumpInfoToString(SFilterInfo *info, const char *msg, int32_t options) return; } - qDebug("%s - RANGE info:", msg); - - qDebug("RANGE Num:%u", info->colRangeNum); - for (uint16_t i = 0; i < info->colRangeNum; ++i) { - SFilterRangeCtx *ctx = info->colRange[i]; - qDebug("Column ID[%d] RANGE: isnull[%d],notnull[%d],range[%d]", ctx->colId, ctx->isnull, ctx->notnull, ctx->isrange); - if (ctx->isrange) { - SFilterRangeNode *r = ctx->rs; - while (r) { - char str[256] = {0}; - int32_t tlen = 0; - if (FILTER_GET_FLAG(r->ra.sflag, RANGE_FLG_NULL)) { - strcat(str,"(NULL)"); - } else { - FILTER_GET_FLAG(r->ra.sflag, RANGE_FLG_EXCLUDE) ? strcat(str,"(") : strcat(str,"["); - converToStr(str + strlen(str), ctx->type, &r->ra.s, tlen > 32 ? 32 : tlen, &tlen); - FILTER_GET_FLAG(r->ra.sflag, RANGE_FLG_EXCLUDE) ? strcat(str,")") : strcat(str,"]"); - } - strcat(str, " - "); - if (FILTER_GET_FLAG(r->ra.eflag, RANGE_FLG_NULL)) { - strcat(str, "(NULL)"); - } else { - FILTER_GET_FLAG(r->ra.eflag, RANGE_FLG_EXCLUDE) ? strcat(str,"(") : strcat(str,"["); - converToStr(str + strlen(str), ctx->type, &r->ra.e, tlen > 32 ? 32 : tlen, &tlen); - FILTER_GET_FLAG(r->ra.eflag, RANGE_FLG_EXCLUDE) ? strcat(str,")") : strcat(str,"]"); + if (options == 1) { + qDebug("%s - RANGE info:", msg); + + qDebug("RANGE Num:%u", info->colRangeNum); + for (uint16_t i = 0; i < info->colRangeNum; ++i) { + SFilterRangeCtx *ctx = info->colRange[i]; + qDebug("Column ID[%d] RANGE: isnull[%d],notnull[%d],range[%d]", ctx->colId, ctx->isnull, ctx->notnull, ctx->isrange); + if (ctx->isrange) { + SFilterRangeNode *r = ctx->rs; + while (r) { + char str[256] = {0}; + int32_t tlen = 0; + if (FILTER_GET_FLAG(r->ra.sflag, RANGE_FLG_NULL)) { + strcat(str,"(NULL)"); + } else { + FILTER_GET_FLAG(r->ra.sflag, RANGE_FLG_EXCLUDE) ? strcat(str,"(") : strcat(str,"["); + converToStr(str + strlen(str), ctx->type, &r->ra.s, tlen > 32 ? 32 : tlen, &tlen); + FILTER_GET_FLAG(r->ra.sflag, RANGE_FLG_EXCLUDE) ? strcat(str,")") : strcat(str,"]"); + } + strcat(str, " - "); + if (FILTER_GET_FLAG(r->ra.eflag, RANGE_FLG_NULL)) { + strcat(str, "(NULL)"); + } else { + FILTER_GET_FLAG(r->ra.eflag, RANGE_FLG_EXCLUDE) ? strcat(str,"(") : strcat(str,"["); + converToStr(str + strlen(str), ctx->type, &r->ra.e, tlen > 32 ? 32 : tlen, &tlen); + FILTER_GET_FLAG(r->ra.eflag, RANGE_FLG_EXCLUDE) ? strcat(str,")") : strcat(str,"]"); + } + qDebug("range: %s", str); + + r = r->next; } - qDebug("range: %s", str); - - r = r->next; } } + + return; + } + + qDebug("%s - Block Filter info:", msg); + + if (FILTER_GET_FLAG(info->blkFlag, FI_STATUS_BLK_ALL)) { + qDebug("Flag:%s", "ALL"); + return; + } else if (FILTER_GET_FLAG(info->blkFlag, FI_STATUS_BLK_EMPTY)) { + qDebug("Flag:%s", "EMPTY"); + return; + } else if (FILTER_GET_FLAG(info->blkFlag, FI_STATUS_BLK_ACTIVE)){ + qDebug("Flag:%s", "ACTIVE"); + } + + qDebug("GroupNum:%d", info->blkGroupNum); + uint16_t *unitIdx = info->blkUnits; + for (uint16_t i = 0; i < info->blkGroupNum; ++i) { + qDebug("Group[%d] UnitNum: %d:", i, *unitIdx); + uint16_t unitNum = *(unitIdx++); + for (uint16_t m = 0; m < unitNum; ++m) { + qDebug("uidx[%d]", *(unitIdx++)); + } } } } @@ -1674,7 +1718,9 @@ void filterFreeInfo(SFilterInfo *info) { CHK_RETV(info == NULL); tfree(info->cunits); - + tfree(info->blkUnitRes); + tfree(info->blkUnits); + for (int32_t i = 0; i < FLD_TYPE_MAX; ++i) { for (uint16_t f = 0; f < info->fields[i].num; ++f) { filterFreeField(&info->fields[i].fields[f], i); @@ -2485,7 +2531,9 @@ int32_t filterGenerateComInfo(SFilterInfo *info) { uint16_t n = 0; info->cunits = malloc(info->unitNum * sizeof(*info->cunits)); - + info->blkUnitRes = malloc(sizeof(*info->blkUnitRes) * info->unitNum); + info->blkUnits = malloc(sizeof(*info->blkUnits) * (info->unitNum + 1) * info->groupNum); + for (uint16_t i = 0; i < info->unitNum; ++i) { SFilterUnit *unit = &info->units[i]; @@ -2493,6 +2541,7 @@ int32_t filterGenerateComInfo(SFilterInfo *info) { info->cunits[i].rfunc = filterGetRangeCompFuncFromOptrs(unit->compare.optr, unit->compare.optr2); info->cunits[i].optr = FILTER_UNIT_OPTR(unit); info->cunits[i].colData = NULL; + info->cunits[i].colId = FILTER_UNIT_COL_ID(info, unit); if (unit->right.type == FLD_TYPE_VALUE) { info->cunits[i].valData = FILTER_UNIT_VAL_DATA(info, unit); @@ -2541,36 +2590,317 @@ int32_t filterUpdateComUnits(SFilterInfo *info) { } -static FORCE_INLINE bool filterExecuteImplAll(void *info, int32_t numOfRows, int8_t* p) { +int32_t filterRmUnitByRange(SFilterInfo *info, SDataStatis *pDataStatis, int32_t numOfCols, int32_t numOfRows) { + int32_t rmUnit = 0; + + memset(info->blkUnitRes, 0, sizeof(*info->blkUnitRes) * info->unitNum); + + for (int32_t k = 0; k < info->unitNum; ++k) { + int32_t index = -1; + SFilterComUnit *cunit = &info->cunits[k]; + + if (FILTER_NO_MERGE_DATA_TYPE(cunit->dataType)) { + continue; + } + + for(int32_t i = 0; i < numOfCols; ++i) { + if (pDataStatis[i].colId == cunit->colId) { + index = i; + break; + } + } + + if (index == -1) { + continue; + } + + if (pDataStatis[index].numOfNull <= 0) { + if (cunit->optr == TSDB_RELATION_ISNULL) { + info->blkUnitRes[k] = -1; + rmUnit = 1; + continue; + } + + if (cunit->optr == TSDB_RELATION_NOTNULL) { + info->blkUnitRes[k] = 1; + rmUnit = 1; + continue; + } + } else { + if (pDataStatis[index].numOfNull == numOfRows) { + if (cunit->optr == TSDB_RELATION_ISNULL) { + info->blkUnitRes[k] = 1; + rmUnit = 1; + continue; + } + + info->blkUnitRes[k] = -1; + rmUnit = 1; + continue; + } + } + + if (cunit->optr == TSDB_RELATION_ISNULL || cunit->optr == TSDB_RELATION_NOTNULL + || cunit->optr == TSDB_RELATION_IN || cunit->optr == TSDB_RELATION_LIKE + || cunit->optr == TSDB_RELATION_NOT_EQUAL) { + continue; + } + + SDataStatis* pDataBlockst = &pDataStatis[index]; + void *minVal, *maxVal; + + if (cunit->dataType == TSDB_DATA_TYPE_FLOAT) { + float minv = (float)(*(double *)(&pDataBlockst->min)); + float maxv = (float)(*(double *)(&pDataBlockst->max)); + + minVal = &minv; + maxVal = &maxv; + } else { + minVal = &pDataBlockst->min; + maxVal = &pDataBlockst->max; + } + + bool minRes = false, maxRes = false; + + if (cunit->rfunc >= 0) { + minRes = (*gRangeCompare[cunit->rfunc])(minVal, minVal, cunit->valData, cunit->valData2, gDataCompare[cunit->func]); + maxRes = (*gRangeCompare[cunit->rfunc])(maxVal, maxVal, cunit->valData, cunit->valData2, gDataCompare[cunit->func]); + + if (minRes && maxRes) { + info->blkUnitRes[k] = 1; + rmUnit = 1; + } else if ((!minRes) && (!maxRes)) { + minRes = filterDoCompare(gDataCompare[cunit->func], TSDB_RELATION_LESS_EQUAL, minVal, cunit->valData); + maxRes = filterDoCompare(gDataCompare[cunit->func], TSDB_RELATION_GREATER_EQUAL, maxVal, cunit->valData2); + + if (minRes && maxRes) { + continue; + } + + info->blkUnitRes[k] = -1; + rmUnit = 1; + } + } else { + minRes = filterDoCompare(gDataCompare[cunit->func], cunit->optr, minVal, cunit->valData); + maxRes = filterDoCompare(gDataCompare[cunit->func], cunit->optr, maxVal, cunit->valData); + + if (minRes && maxRes) { + info->blkUnitRes[k] = 1; + rmUnit = 1; + } else if ((!minRes) && (!maxRes)) { + if (cunit->optr == TSDB_RELATION_EQUAL) { + minRes = filterDoCompare(gDataCompare[cunit->func], TSDB_RELATION_GREATER, minVal, cunit->valData); + maxRes = filterDoCompare(gDataCompare[cunit->func], TSDB_RELATION_LESS, maxVal, cunit->valData); + if (minRes || maxRes) { + info->blkUnitRes[k] = -1; + rmUnit = 1; + } + + continue; + } + + info->blkUnitRes[k] = -1; + rmUnit = 1; + } + } + + } + + CHK_LRET(rmUnit == 0, TSDB_CODE_SUCCESS, "NO Block Filter APPLY"); + + info->blkGroupNum = info->groupNum; + + uint16_t *unitNum = info->blkUnits; + uint16_t *unitIdx = unitNum + 1; + int32_t all = 0, empty = 0; + + for (uint32_t g = 0; g < info->groupNum; ++g) { + SFilterGroup *group = &info->groups[g]; + *unitNum = group->unitNum; + all = 0; + empty = 0; + + for (uint32_t u = 0; u < group->unitNum; ++u) { + uint16_t uidx = group->unitIdxs[u]; + if (info->blkUnitRes[uidx] == 1) { + --(*unitNum); + all = 1; + continue; + } else if (info->blkUnitRes[uidx] == -1) { + *unitNum = 0; + empty = 1; + break; + } + + *(unitIdx++) = uidx; + } + + if (*unitNum == 0) { + --info->blkGroupNum; + assert(empty || all); + + if (empty) { + FILTER_SET_FLAG(info->blkFlag, FI_STATUS_BLK_EMPTY); + } else { + FILTER_SET_FLAG(info->blkFlag, FI_STATUS_BLK_ALL); + goto _return; + } + + continue; + } + + unitNum = unitIdx; + ++unitIdx; + } + + if (info->blkGroupNum) { + FILTER_CLR_FLAG(info->blkFlag, FI_STATUS_BLK_EMPTY); + FILTER_SET_FLAG(info->blkFlag, FI_STATUS_BLK_ACTIVE); + } + +_return: + + filterDumpInfoToString(info, "Block Filter", 2); + + return TSDB_CODE_SUCCESS; +} + +bool filterExecuteBasedOnStatisImpl(void *pinfo, int32_t numOfRows, int8_t** p, SDataStatis *statis, int16_t numOfCols) { + SFilterInfo *info = (SFilterInfo *)pinfo; + bool all = true; + uint16_t *unitIdx = NULL; + + *p = calloc(numOfRows, sizeof(int8_t)); + + for (int32_t i = 0; i < numOfRows; ++i) { + //FILTER_UNIT_CLR_F(info); + + unitIdx = info->blkUnits; + + for (uint32_t g = 0; g < info->blkGroupNum; ++g) { + uint16_t unitNum = *(unitIdx++); + for (uint32_t u = 0; u < unitNum; ++u) { + SFilterComUnit *cunit = &info->cunits[*(unitIdx + u)]; + void *colData = (char *)cunit->colData + cunit->dataSize * i; + + //if (FILTER_UNIT_GET_F(info, uidx)) { + // p[i] = FILTER_UNIT_GET_R(info, uidx); + //} else { + uint8_t optr = cunit->optr; + + if (isNull(colData, cunit->dataType)) { + (*p)[i] = optr == TSDB_RELATION_ISNULL ? true : false; + } else { + if (optr == TSDB_RELATION_NOTNULL) { + (*p)[i] = 1; + } else if (optr == TSDB_RELATION_ISNULL) { + (*p)[i] = 0; + } else if (cunit->rfunc >= 0) { + (*p)[i] = (*gRangeCompare[cunit->rfunc])(colData, colData, cunit->valData, cunit->valData2, gDataCompare[cunit->func]); + } else { + (*p)[i] = filterDoCompare(gDataCompare[cunit->func], cunit->optr, colData, cunit->valData); + } + + //FILTER_UNIT_SET_R(info, uidx, p[i]); + //FILTER_UNIT_SET_F(info, uidx); + } + + if ((*p)[i] == 0) { + break; + } + } + + if ((*p)[i]) { + break; + } + + unitIdx += unitNum; + } + + if ((*p)[i] == 0) { + all = false; + } + } + + return all; +} + + + +int32_t filterExecuteBasedOnStatis(SFilterInfo *info, int32_t numOfRows, int8_t** p, SDataStatis *statis, int16_t numOfCols, bool* all) { + if (statis && numOfRows >= FILTER_RM_UNIT_MIN_ROWS) { + info->blkFlag = 0; + + filterRmUnitByRange(info, statis, numOfCols, numOfRows); + + if (info->blkFlag) { + if (FILTER_GET_FLAG(info->blkFlag, FI_STATUS_BLK_ALL)) { + *all = true; + goto _return; + } else if (FILTER_GET_FLAG(info->blkFlag, FI_STATUS_BLK_EMPTY)) { + *all = false; + goto _return; + } + + assert(info->unitNum > 1); + + *all = filterExecuteBasedOnStatisImpl(info, numOfRows, p, statis, numOfCols); + + goto _return; + } + } + + return 1; + +_return: + info->blkFlag = 0; + + return TSDB_CODE_SUCCESS; +} + + +static FORCE_INLINE bool filterExecuteImplAll(void *info, int32_t numOfRows, int8_t** p, SDataStatis *statis, int16_t numOfCols) { return true; } -static FORCE_INLINE bool filterExecuteImplEmpty(void *info, int32_t numOfRows, int8_t* p) { +static FORCE_INLINE bool filterExecuteImplEmpty(void *info, int32_t numOfRows, int8_t** p, SDataStatis *statis, int16_t numOfCols) { return false; } -static FORCE_INLINE bool filterExecuteImplIsNull(void *pinfo, int32_t numOfRows, int8_t* p) { +static FORCE_INLINE bool filterExecuteImplIsNull(void *pinfo, int32_t numOfRows, int8_t** p, SDataStatis *statis, int16_t numOfCols) { SFilterInfo *info = (SFilterInfo *)pinfo; bool all = true; + + if (filterExecuteBasedOnStatis(info, numOfRows, p, statis, numOfCols, &all) == 0) { + return all; + } + + *p = calloc(numOfRows, sizeof(int8_t)); for (int32_t i = 0; i < numOfRows; ++i) { uint16_t uidx = info->groups[0].unitIdxs[0]; void *colData = (char *)info->cunits[uidx].colData + info->cunits[uidx].dataSize * i; - p[i] = isNull(colData, info->cunits[uidx].dataType); - if (p[i] == 0) { + (*p)[i] = isNull(colData, info->cunits[uidx].dataType); + if ((*p)[i] == 0) { all = false; } } return all; } -static FORCE_INLINE bool filterExecuteImplNotNull(void *pinfo, int32_t numOfRows, int8_t* p) { +static FORCE_INLINE bool filterExecuteImplNotNull(void *pinfo, int32_t numOfRows, int8_t** p, SDataStatis *statis, int16_t numOfCols) { SFilterInfo *info = (SFilterInfo *)pinfo; bool all = true; - + + if (filterExecuteBasedOnStatis(info, numOfRows, p, statis, numOfCols, &all) == 0) { + return all; + } + + *p = calloc(numOfRows, sizeof(int8_t)); + for (int32_t i = 0; i < numOfRows; ++i) { uint16_t uidx = info->groups[0].unitIdxs[0]; void *colData = (char *)info->cunits[uidx].colData + info->cunits[uidx].dataSize * i; - p[i] = !isNull(colData, info->cunits[uidx].dataType); - if (p[i] == 0) { + (*p)[i] = !isNull(colData, info->cunits[uidx].dataType); + if ((*p)[i] == 0) { all = false; } } @@ -2578,7 +2908,7 @@ static FORCE_INLINE bool filterExecuteImplNotNull(void *pinfo, int32_t numOfRows return all; } -bool filterExecuteImplRange(void *pinfo, int32_t numOfRows, int8_t* p) { +bool filterExecuteImplRange(void *pinfo, int32_t numOfRows, int8_t** p, SDataStatis *statis, int16_t numOfCols) { SFilterInfo *info = (SFilterInfo *)pinfo; bool all = true; uint16_t dataSize = info->cunits[0].dataSize; @@ -2587,6 +2917,12 @@ bool filterExecuteImplRange(void *pinfo, int32_t numOfRows, int8_t* p) { void *valData = info->cunits[0].valData; void *valData2 = info->cunits[0].valData2; __compar_fn_t func = gDataCompare[info->cunits[0].func]; + + if (filterExecuteBasedOnStatis(info, numOfRows, p, statis, numOfCols, &all) == 0) { + return all; + } + + *p = calloc(numOfRows, sizeof(int8_t)); for (int32_t i = 0; i < numOfRows; ++i) { if (isNull(colData, info->cunits[0].dataType)) { @@ -2595,9 +2931,9 @@ bool filterExecuteImplRange(void *pinfo, int32_t numOfRows, int8_t* p) { continue; } - p[i] = (*rfunc)(colData, colData, valData, valData2, func); + (*p)[i] = (*rfunc)(colData, colData, valData, valData2, func); - if (p[i] == 0) { + if ((*p)[i] == 0) { all = false; } @@ -2607,9 +2943,15 @@ bool filterExecuteImplRange(void *pinfo, int32_t numOfRows, int8_t* p) { return all; } -bool filterExecuteImplMisc(void *pinfo, int32_t numOfRows, int8_t* p) { +bool filterExecuteImplMisc(void *pinfo, int32_t numOfRows, int8_t** p, SDataStatis *statis, int16_t numOfCols) { SFilterInfo *info = (SFilterInfo *)pinfo; bool all = true; + + if (filterExecuteBasedOnStatis(info, numOfRows, p, statis, numOfCols, &all) == 0) { + return all; + } + + *p = calloc(numOfRows, sizeof(int8_t)); for (int32_t i = 0; i < numOfRows; ++i) { uint16_t uidx = info->groups[0].unitIdxs[0]; @@ -2619,9 +2961,9 @@ bool filterExecuteImplMisc(void *pinfo, int32_t numOfRows, int8_t* p) { continue; } - p[i] = filterDoCompare(gDataCompare[info->cunits[uidx].func], info->cunits[uidx].optr, colData, info->cunits[uidx].valData); + (*p)[i] = filterDoCompare(gDataCompare[info->cunits[uidx].func], info->cunits[uidx].optr, colData, info->cunits[uidx].valData); - if (p[i] == 0) { + if ((*p)[i] == 0) { all = false; } } @@ -2630,10 +2972,16 @@ bool filterExecuteImplMisc(void *pinfo, int32_t numOfRows, int8_t* p) { } -bool filterExecuteImpl(void *pinfo, int32_t numOfRows, int8_t* p) { +bool filterExecuteImpl(void *pinfo, int32_t numOfRows, int8_t** p, SDataStatis *statis, int16_t numOfCols) { SFilterInfo *info = (SFilterInfo *)pinfo; bool all = true; + if (filterExecuteBasedOnStatis(info, numOfRows, p, statis, numOfCols, &all) == 0) { + return all; + } + + *p = calloc(numOfRows, sizeof(int8_t)); + for (int32_t i = 0; i < numOfRows; ++i) { //FILTER_UNIT_CLR_F(info); @@ -2650,33 +2998,33 @@ bool filterExecuteImpl(void *pinfo, int32_t numOfRows, int8_t* p) { uint8_t optr = cunit->optr; if (isNull(colData, cunit->dataType)) { - p[i] = optr == TSDB_RELATION_ISNULL ? true : false; + (*p)[i] = optr == TSDB_RELATION_ISNULL ? true : false; } else { if (optr == TSDB_RELATION_NOTNULL) { - p[i] = 1; + (*p)[i] = 1; } else if (optr == TSDB_RELATION_ISNULL) { - p[i] = 0; + (*p)[i] = 0; } else if (cunit->rfunc >= 0) { - p[i] = (*gRangeCompare[cunit->rfunc])(colData, colData, cunit->valData, cunit->valData2, gDataCompare[cunit->func]); + (*p)[i] = (*gRangeCompare[cunit->rfunc])(colData, colData, cunit->valData, cunit->valData2, gDataCompare[cunit->func]); } else { - p[i] = filterDoCompare(gDataCompare[cunit->func], cunit->optr, colData, cunit->valData); + (*p)[i] = filterDoCompare(gDataCompare[cunit->func], cunit->optr, colData, cunit->valData); } //FILTER_UNIT_SET_R(info, uidx, p[i]); //FILTER_UNIT_SET_F(info, uidx); } - if (p[i] == 0) { + if ((*p)[i] == 0) { break; } } - if (p[i]) { + if ((*p)[i]) { break; } } - if (p[i] == 0) { + if ((*p)[i] == 0) { all = false; } } @@ -2684,8 +3032,9 @@ bool filterExecuteImpl(void *pinfo, int32_t numOfRows, int8_t* p) { return all; } -FORCE_INLINE bool filterExecute(SFilterInfo *info, int32_t numOfRows, int8_t* p) { - return (*info->func)(info, numOfRows, p); + +FORCE_INLINE bool filterExecute(SFilterInfo *info, int32_t numOfRows, int8_t** p, SDataStatis *statis, int16_t numOfCols) { + return (*info->func)(info, numOfRows, p, statis, numOfCols); } int32_t filterSetExecFunc(SFilterInfo *info) { @@ -2767,7 +3116,7 @@ _return: return TSDB_CODE_SUCCESS; } -int32_t filterSetColFieldData(SFilterInfo *info, int16_t colId, void *data) { +int32_t filterSetColFieldData(SFilterInfo *info, int32_t numOfCols, SArray* pDataBlock) { CHK_LRET(info == NULL, TSDB_CODE_QRY_APP_ERROR, "info NULL"); CHK_LRET(info->fields[FLD_TYPE_COLUMN].num <= 0, TSDB_CODE_QRY_APP_ERROR, "no column fileds"); @@ -2778,10 +3127,14 @@ int32_t filterSetColFieldData(SFilterInfo *info, int16_t colId, void *data) { for (uint16_t i = 0; i < info->fields[FLD_TYPE_COLUMN].num; ++i) { SFilterField* fi = &info->fields[FLD_TYPE_COLUMN].fields[i]; SSchema* sch = fi->desc; - if (sch->colId == colId) { - fi->data = data; - - break; + + for (int32_t j = 0; j < numOfCols; ++j) { + SColumnInfoData* pColInfo = taosArrayGet(pDataBlock, j); + if (sch->colId == pColInfo->info.colId) { + fi->data = pColInfo->pData; + + break; + } } } @@ -2880,16 +3233,17 @@ bool filterRangeExecute(SFilterInfo *info, SDataStatis *pDataStatis, int32_t num // no statistics data, load the true data block if (index == -1) { - return true; + break; } // not support pre-filter operation on binary/nchar data type if (FILTER_NO_MERGE_DATA_TYPE(ctx->type)) { - return true; + break; } if ((pDataStatis[index].numOfNull <= 0) && (ctx->isnull && !ctx->notnull && !ctx->isrange)) { - return false; + ret = false; + break; } // all data in current column are NULL, no need to check its boundary value @@ -2897,7 +3251,8 @@ bool filterRangeExecute(SFilterInfo *info, SDataStatis *pDataStatis, int32_t num // if isNULL query exists, load the null data column if ((ctx->notnull || ctx->isrange) && (!ctx->isnull)) { - return false; + ret = false; + break; } continue; diff --git a/tests/script/general/parser/condition.sim b/tests/script/general/parser/condition.sim index 56706467f1be2aae68244c491badd3604df7d3af..c3aed7e2a3b04c0ca2e27e2e62d92009e8b2fe8e 100644 --- a/tests/script/general/parser/condition.sim +++ b/tests/script/general/parser/condition.sim @@ -3,6 +3,7 @@ system sh/stop_dnodes.sh system sh/deploy.sh -n dnode1 -i 1 system sh/cfg.sh -n dnode1 -c walLevel -v 1 system sh/cfg.sh -n dnode1 -c maxtablespervnode -v 4 +system sh/cfg.sh -n dnode1 -c cache -v 1 system sh/exec.sh -n dnode1 -s start sleep 100 @@ -93,6 +94,47 @@ sql insert into tb3_2 values ('2021-05-06 18:19:28',15,NULL,15,NULL,15,NULL,true sql insert into tb3_2 values ('2021-06-06 18:19:28',NULL,16.0,NULL,16,NULL,16.0,NULL,'16',NULL) sql insert into tb3_2 values ('2021-07-06 18:19:28',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) + +sql create table stb4 (ts timestamp, c1 int, c2 float, c3 bigint, c4 smallint, c5 tinyint, c6 double, c7 bool, c8 binary(10), c9 nchar(9),c10 binary(16300)) TAGS(t1 int, t2 binary(10), t3 double) + +sql create table tb4_0 using stb4 tags(0,'0',0.0) +sql create table tb4_1 using stb4 tags(1,'1',1.0) +sql create table tb4_2 using stb4 tags(2,'2',2.0) +sql create table tb4_3 using stb4 tags(3,'3',3.0) +sql create table tb4_4 using stb4 tags(4,'4',4.0) + +$i = 0 +$ts0 = 1625850000000 +$blockNum = 5 +$delta = 0 +$tbname0 = tb4_ +$a = 0 +$b = 200 +$c = 400 +while $i < $blockNum + $x = 0 + $rowNum = 1200 + while $x < $rowNum + $ts = $ts0 + $x + $a = $a + 1 + $b = $b + 1 + $c = $c + 1 + $d = $x / 10 + $tin = $rowNum + $binary = 'binary . $c + $binary = $binary . ' + $nchar = 'nchar . $c + $nchar = $nchar . ' + $tbname = 'tb4_ . $i + $tbname = $tbname . ' + sql insert into $tbname values ( $ts , $a , $b , $c , $d , $d , $c , true, $binary , $nchar , $binary ) + $x = $x + 1 + endw + + $i = $i + 1 + $ts0 = $ts0 + 259200000 +endw + sleep 100 sql connect diff --git a/tests/script/general/parser/condition_query.sim b/tests/script/general/parser/condition_query.sim index 7600e510d399d461043f196cc42dda825d7dcced..8dfa8dae0c9e0c56116cb4132d1e940e99f45d48 100644 --- a/tests/script/general/parser/condition_query.sim +++ b/tests/script/general/parser/condition_query.sim @@ -1959,6 +1959,117 @@ if $rows != 14 then return -1 endi +sql select ts,c1 from stb4 where c1 = 200; +if $rows != 1 then + return -1 +endi +if $data00 != @21-07-10 01:00:00.199@ then + return -1 +endi + +sql select ts,c1 from stb4 where c1 != 200; +if $rows != 5999 then + return -1 +endi + + + +sql select ts,c1,c2,c3,c4 from stb4 where c1 >= 200 and c2 > 500 and c3 < 800 and c4 between 33 and 37 and c4 != 35 and c2 < 555 and c1 < 339 and c1 in (331,333,335); +if $rows != 3 then + return -1 +endi +if $data00 != @21-07-10 01:00:00.330@ then + return -1 +endi +if $data10 != @21-07-10 01:00:00.332@ then + return -1 +endi +if $data20 != @21-07-10 01:00:00.334@ then + return -1 +endi + +sql select ts,c1,c2,c3,c4 from stb4 where c1 > -3 and c1 < 5; +if $rows != 4 then + return -1 +endi +if $data00 != @21-07-10 01:00:00.000@ then + return -1 +endi +if $data10 != @21-07-10 01:00:00.001@ then + return -1 +endi +if $data20 != @21-07-10 01:00:00.002@ then + return -1 +endi +if $data30 != @21-07-10 01:00:00.003@ then + return -1 +endi + +sql select ts,c1,c2,c3,c4 from stb4 where c1 >= 2 and c1 < 5; +if $rows != 3 then + return -1 +endi +if $data00 != @21-07-10 01:00:00.001@ then + return -1 +endi +if $data10 != @21-07-10 01:00:00.002@ then + return -1 +endi +if $data20 != @21-07-10 01:00:00.003@ then + return -1 +endi + +sql select ts,c1,c2,c3,c4 from stb4 where c1 >= -3 and c1 < 1300; +if $rows != 1299 then + return -1 +endi + +sql select ts,c1,c2,c3,c4 from stb4 where c1 >= 1298 and c1 < 1300 or c2 > 210 and c2 < 213; +if $rows != 4 then + return -1 +endi +if $data00 != @21-07-10 01:00:00.010@ then + return -1 +endi +if $data10 != @21-07-10 01:00:00.011@ then + return -1 +endi +if $data20 != @21-07-13 01:00:00.097@ then + return -1 +endi +if $data30 != @21-07-13 01:00:00.098@ then + return -1 +endi + +sql select ts,c1,c2,c3,c4 from stb4 where c1 >= -3; +if $rows != 6000 then + return -1 +endi + +sql select ts,c1,c2,c3,c4 from stb4 where c1 < 1400; +if $rows != 1399 then + return -1 +endi + +sql select ts,c1,c2,c3,c4 from stb4 where c1 < 1100; +if $rows != 1099 then + return -1 +endi + + +sql select ts,c1,c2,c3,c4 from stb4 where c1 in(10,100, 1100,3300) and c1 != 10; +if $rows != 3 then + return -1 +endi +if $data00 != @21-07-10 01:00:00.099@ then + return -1 +endi +if $data10 != @21-07-10 01:00:01.099@ then + return -1 +endi +if $data20 != @21-07-16 01:00:00.899@ then + return -1 +endi print "ts test"