diff --git a/source/libs/scalar/inc/filterInt.h b/source/libs/scalar/inc/filterInt.h index 87327a365763771775106851f6442a66887d5bbc..e7695b2f04ea4fed2ebf9a77bf00717e1978003e 100644 --- a/source/libs/scalar/inc/filterInt.h +++ b/source/libs/scalar/inc/filterInt.h @@ -276,7 +276,7 @@ struct SFilterInfo { #define FILTER_CLR_FLAG(st, f) st &= (~f) #define SIMPLE_COPY_VALUES(dst, src) *((int64_t *)dst) = *((int64_t *)src) -#define FILTER_PACKAGE_UNIT_HASH_KEY(v, optr, idx1, idx2) do { char *_t = (char *)v; _t[0] = optr; *(uint32_t *)(_t + 1) = idx1; *(uint32_t *)(_t + 3) = idx2; } while (0) +#define FLT_PACKAGE_UNIT_HASH_KEY(v, op1, op2, lidx, ridx, ridx2) do { char *_t = (char *)(v); _t[0] = (op1); _t[1] = (op2); *(uint32_t *)(_t + 2) = (lidx); *(uint32_t *)(_t + 2 + sizeof(uint32_t)) = (ridx); } while (0) #define FILTER_GREATER(cr,sflag,eflag) ((cr > 0) || ((cr == 0) && (FILTER_GET_FLAG(sflag,RANGE_FLG_EXCLUDE) || FILTER_GET_FLAG(eflag,RANGE_FLG_EXCLUDE)))) #define FILTER_COPY_RA(dst, src) do { (dst)->sflag = (src)->sflag; (dst)->eflag = (src)->eflag; (dst)->s = (src)->s; (dst)->e = (src)->e; } while (0) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 5555a52c8e171a50bac4fa9f7b894ce4b8156b60..9e676354374fce6c2e733ac8d42c45baef9bada8 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -1068,14 +1068,14 @@ int32_t filterAddFieldFromNode(SFilterInfo *info, SNode *node, SFilterFieldId *f return TSDB_CODE_SUCCESS; } -int32_t filterAddUnit(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFilterFieldId *right, uint32_t *uidx) { +int32_t filterAddUnitImpl(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFilterFieldId *right, uint8_t optr2, SFilterFieldId *right2, uint32_t *uidx) { if (FILTER_GET_FLAG(info->options, FLT_OPTION_NEED_UNIQE)) { if (info->pctx.unitHash == NULL) { info->pctx.unitHash = taosHashInit(FILTER_DEFAULT_GROUP_SIZE * FILTER_DEFAULT_UNIT_SIZE, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, false); } else { - int64_t v = 0; - FILTER_PACKAGE_UNIT_HASH_KEY(&v, optr, left->idx, right ? right->idx : -1); - void *hu = taosHashGet(info->pctx.unitHash, &v, sizeof(v)); + char v[14] = {0}; + FLT_PACKAGE_UNIT_HASH_KEY(&v, optr, optr2, left->idx, (right ? right->idx : -1), (right2 ? right2->idx : -1)); + void *hu = taosHashGet(info->pctx.unitHash, v, sizeof(v)); if (hu) { *uidx = *(uint32_t *)hu; return TSDB_CODE_SUCCESS; @@ -1097,7 +1097,11 @@ int32_t filterAddUnit(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFi if (right) { u->right = *right; } - + u->compare.optr2 = optr2; + if (right2) { + u->right2 = *right2; + } + if (u->right.type == FLD_TYPE_VALUE) { SFilterField *val = FILTER_UNIT_RIGHT_FIELD(info, u); assert(FILTER_GET_FLAG(val->flag, FLD_TYPE_VALUE)); @@ -1118,9 +1122,9 @@ int32_t filterAddUnit(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFi *uidx = info->unitNum; if (FILTER_GET_FLAG(info->options, FLT_OPTION_NEED_UNIQE)) { - int64_t v = 0; - FILTER_PACKAGE_UNIT_HASH_KEY(&v, optr, left->idx, right ? right->idx : -1); - taosHashPut(info->pctx.unitHash, &v, sizeof(v), uidx, sizeof(*uidx)); + char v[14] = {0}; + FLT_PACKAGE_UNIT_HASH_KEY(&v, optr, optr2, left->idx, (right ? right->idx : -1), (right2 ? right2->idx : -1)); + taosHashPut(info->pctx.unitHash, v, sizeof(v), uidx, sizeof(*uidx)); } ++info->unitNum; @@ -1129,6 +1133,9 @@ int32_t filterAddUnit(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFi } +int32_t filterAddUnit(SFilterInfo *info, uint8_t optr, SFilterFieldId *left, SFilterFieldId *right, uint32_t *uidx) { + return filterAddUnitImpl(info, optr, left, right, 0, NULL, uidx); +} int32_t filterAddUnitToGroup(SFilterGroup *group, uint32_t unitIdx) { if (group->unitNum >= group->unitSize) { @@ -1305,8 +1312,8 @@ int32_t filterAddGroupUnitFromCtx(SFilterInfo *dst, SFilterInfo *src, SFilterRan SIMPLE_COPY_VALUES(data2, &ra->e); filterAddField(dst, NULL, &data2, FLD_TYPE_VALUE, &right2, tDataTypes[type].bytes, true); - filterAddUnit(dst, FILTER_GET_FLAG(ra->sflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_GREATER_THAN : OP_TYPE_GREATER_EQUAL, &left, &right, &uidx); - filterAddUnitRight(dst, FILTER_GET_FLAG(ra->eflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_LOWER_THAN : OP_TYPE_LOWER_EQUAL, &right2, uidx); + filterAddUnitImpl(dst, FILTER_GET_FLAG(ra->sflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_GREATER_THAN : OP_TYPE_GREATER_EQUAL, &left, &right, + FILTER_GET_FLAG(ra->eflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_LOWER_THAN : OP_TYPE_LOWER_EQUAL, &right2, &uidx); filterAddUnitToGroup(g, uidx); return TSDB_CODE_SUCCESS; } @@ -1380,8 +1387,8 @@ int32_t filterAddGroupUnitFromCtx(SFilterInfo *dst, SFilterInfo *src, SFilterRan SIMPLE_COPY_VALUES(data2, &r->ra.e); filterAddField(dst, NULL, &data2, FLD_TYPE_VALUE, &right2, tDataTypes[type].bytes, true); - filterAddUnit(dst, FILTER_GET_FLAG(r->ra.sflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_GREATER_THAN : OP_TYPE_GREATER_EQUAL, &left, &right, &uidx); - filterAddUnitRight(dst, FILTER_GET_FLAG(r->ra.eflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_LOWER_THAN : OP_TYPE_LOWER_EQUAL, &right2, uidx); + filterAddUnitImpl(dst, FILTER_GET_FLAG(r->ra.sflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_GREATER_THAN : OP_TYPE_GREATER_EQUAL, &left, &right, + FILTER_GET_FLAG(r->ra.eflag, RANGE_FLG_EXCLUDE) ? OP_TYPE_LOWER_THAN : OP_TYPE_LOWER_EQUAL, &right2, &uidx); filterAddUnitToGroup(g, uidx); } @@ -2231,6 +2238,44 @@ int32_t filterMergeGroupUnits(SFilterInfo *info, SFilterGroupCtx** gRes, int32_t return TSDB_CODE_SUCCESS; } +bool filterIsSameUnits(SFilterColInfo* pCol1, SFilterColInfo* pCol2) { + if (pCol1->type != pCol2->type) { + return false; + } + + if (RANGE_TYPE_MR_CTX == pCol1->type) { + SFilterRangeCtx* pCtx1 = (SFilterRangeCtx*)pCol1->info; + SFilterRangeCtx* pCtx2 = (SFilterRangeCtx*)pCol2->info; + + if ((pCtx1->isnull != pCtx2->isnull) || (pCtx1->notnull != pCtx2->notnull) || (pCtx1->isrange != pCtx2->isrange)) { + return false; + } + + + SFilterRangeNode* pNode1 = pCtx1->rs; + SFilterRangeNode* pNode2 = pCtx2->rs; + + while (true) { + if (NULL == pNode1 && NULL == pNode2) { + break; + } + + if (NULL == pNode1 || NULL == pNode2) { + return false; + } + + if (pNode1->ra.s != pNode2->ra.s || pNode1->ra.e != pNode2->ra.e || pNode1->ra.sflag != pNode2->ra.sflag || pNode1->ra.eflag != pNode2->ra.eflag) { + return false; + } + + pNode1 = pNode1->next; + pNode2 = pNode2->next; + } + } + + return true; +} + void filterCheckColConflict(SFilterGroupCtx* gRes1, SFilterGroupCtx* gRes2, bool *conflict) { uint32_t idx1 = 0, idx2 = 0, m = 0, n = 0; bool equal = false; @@ -2256,6 +2301,11 @@ void filterCheckColConflict(SFilterGroupCtx* gRes1, SFilterGroupCtx* gRes2, bool return; } + if (!filterIsSameUnits(&gRes1->colInfo[idx1], &gRes2->colInfo[idx2])) { + *conflict = true; + return; + } + // for long in operation if (gRes1->colInfo[idx1].optr == OP_TYPE_EQUAL && gRes2->colInfo[idx2].optr == OP_TYPE_EQUAL) { SFilterRangeCtx* ctx = gRes1->colInfo[idx1].info; @@ -2869,17 +2919,22 @@ int32_t filterRmUnitByRange(SFilterInfo *info, SColumnDataAgg *pDataStatis, int3 for (uint32_t g = 0; g < info->groupNum; ++g) { SFilterGroup *group = &info->groups[g]; + // first is block unint num for a group, following append unitNum blkUnitIdx for this group *unitNum = group->unitNum; all = 0; empty = 0; + // save group idx start pointer + uint32_t * pGroupIdx = unitIdx; for (uint32_t u = 0; u < group->unitNum; ++u) { uint32_t uidx = group->unitIdxs[u]; if (info->blkUnitRes[uidx] == 1) { + // blkUnitRes == 1 is always true, so need not compare every time, delete this unit from group --(*unitNum); all = 1; continue; } else if (info->blkUnitRes[uidx] == -1) { + // blkUnitRes == -1 is alwary false, so in group is alwary false, need delete this group from blkGroupNum *unitNum = 0; empty = 1; break; @@ -2889,6 +2944,9 @@ int32_t filterRmUnitByRange(SFilterInfo *info, SColumnDataAgg *pDataStatis, int3 } if (*unitNum == 0) { + // if unit num is zero, reset unitIdx to start on this group + unitIdx = pGroupIdx; + --info->blkGroupNum; assert(empty || all); diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index 97295d75e072c3c561f579bfc8cb2c15489da858..46bae734ea72901ef704969045186a10c52a9a72 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -344,6 +344,7 @@ # --- scalar ---- ./test.sh -f tsim/scalar/in.sim ./test.sh -f tsim/scalar/scalar.sim +./test.sh -f tsim/scalar/filter.sim # ---- alter ---- ./test.sh -f tsim/alter/cached_schema_after_alter.sim diff --git a/tests/script/tsim/scalar/filter.sim b/tests/script/tsim/scalar/filter.sim new file mode 100644 index 0000000000000000000000000000000000000000..923166227856189e91848150ed9e848f946b066d --- /dev/null +++ b/tests/script/tsim/scalar/filter.sim @@ -0,0 +1,38 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +print ======== step1 +sql drop database if exists db1; +sql create database db1 vgroups 3; +sql use db1; +sql create stable st1 (fts timestamp, fbool bool, ftiny tinyint, fsmall smallint, fint int, fbig bigint, futiny tinyint unsigned, fusmall smallint unsigned, fuint int unsigned, fubig bigint unsigned, ffloat float, fdouble double, fbin binary(10), fnchar nchar(10)) tags(tts timestamp, tbool bool, ttiny tinyint, tsmall smallint, tint int, tbig bigint, tutiny tinyint unsigned, tusmall smallint unsigned, tuint int unsigned, tubig bigint unsigned, tfloat float, tdouble double, tbin binary(10), tnchar nchar(10)); +sql create table tb1 using st1 tags('2022-07-10 16:31:00', true, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 'a', 'a'); +sql create table tb2 using st1 tags('2022-07-10 16:32:00', false, 2, 2, 2, 2, 2, 2, 2, 2, 2.0, 2.0, 'b', 'b'); +sql create table tb3 using st1 tags('2022-07-10 16:33:00', true, 3, 3, 3, 3, 3, 3, 3, 3, 3.0, 3.0, 'c', 'c'); + +sql insert into tb1 values ('2022-07-10 16:31:01', false, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 'a', 'a'); +sql insert into tb1 values ('2022-07-10 16:31:02', true, 2, 2, 2, 2, 2, 2, 2, 2, 2.0, 2.0, 'b', 'b'); +sql insert into tb1 values ('2022-07-10 16:31:03', false, 3, 3, 3, 3, 3, 3, 3, 3, 3.0, 3.0, 'c', 'c'); +sql insert into tb1 values ('2022-07-10 16:31:04', true, 4, 4, 4, 4, 4, 4, 4, 4, 4.0, 4.0, 'd', 'd'); +sql insert into tb1 values ('2022-07-10 16:31:05', false, 5, 5, 5, 5, 5, 5, 5, 5, 5.0, 5.0, 'e', 'e'); + +sql insert into tb2 values ('2022-07-10 16:32:01', false, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 'a', 'a'); +sql insert into tb2 values ('2022-07-10 16:32:02', true, 2, 2, 2, 2, 2, 2, 2, 2, 2.0, 2.0, 'b', 'b'); +sql insert into tb2 values ('2022-07-10 16:32:03', false, 3, 3, 3, 3, 3, 3, 3, 3, 3.0, 3.0, 'c', 'c'); +sql insert into tb2 values ('2022-07-10 16:32:04', true, 4, 4, 4, 4, 4, 4, 4, 4, 4.0, 4.0, 'd', 'd'); +sql insert into tb2 values ('2022-07-10 16:32:05', false, 5, 5, 5, 5, 5, 5, 5, 5, 5.0, 5.0, 'e', 'e'); + +sql insert into tb3 values ('2022-07-10 16:33:01', false, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 'a', 'a'); +sql insert into tb3 values ('2022-07-10 16:33:02', true, 2, 2, 2, 2, 2, 2, 2, 2, 2.0, 2.0, 'b', 'b'); +sql insert into tb3 values ('2022-07-10 16:33:03', false, 3, 3, 3, 3, 3, 3, 3, 3, 3.0, 3.0, 'c', 'c'); +sql insert into tb3 values ('2022-07-10 16:33:04', true, 4, 4, 4, 4, 4, 4, 4, 4, 4.0, 4.0, 'd', 'd'); +sql insert into tb3 values ('2022-07-10 16:33:05', false, 5, 5, 5, 5, 5, 5, 5, 5, 5.0, 5.0, 'e', 'e'); + +sql select * from st1 where (ttiny > 2 or ftiny < 5) and ftiny > 2; +if $rows != 7 then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT