未验证 提交 e2d45438 编写于 作者: D dapan1121 提交者: GitHub

Merge pull request #21405 from taosdata/szhou/scl-flt

enhance: scalar filter improvement
......@@ -180,6 +180,7 @@ extern int32_t tsRpcRetryInterval;
extern bool tsDisableStream;
extern int64_t tsStreamBufferSize;
extern int64_t tsCheckpointInterval;
extern bool tsFilterScalarMode;
// #define NEEDTO_COMPRESSS_MSG(size) (tsCompressMsgSize != -1 && (size) > tsCompressMsgSize)
......
......@@ -208,6 +208,7 @@ char tsUdfdLdLibPath[512] = "";
bool tsDisableStream = false;
int64_t tsStreamBufferSize = 128 * 1024 * 1024;
int64_t tsCheckpointInterval = 3 * 60 * 60 * 1000;
bool tsFilterScalarMode = false;
#ifndef _STORAGE
int32_t taosSetTfsCfg(SConfig *pCfg) {
......@@ -522,6 +523,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddInt32(pCfg, "cacheLazyLoadThreshold", tsCacheLazyLoadThreshold, 0, 100000, 0) != 0) return -1;
if (cfgAddBool(pCfg, "filterScalarMode", tsFilterScalarMode, 0) != 0) return -1;
GRANT_CFG_ADD;
return 0;
}
......@@ -898,6 +901,8 @@ static int32_t taosSetServerCfg(SConfig *pCfg) {
tsStreamBufferSize = cfgGetItem(pCfg, "streamBufferSize")->i64;
tsCheckpointInterval = cfgGetItem(pCfg, "checkpointInterval")->i64;
tsFilterScalarMode = cfgGetItem(pCfg, "filterScalarMode")->bval;
GRANT_CFG_GET;
return 0;
}
......
......@@ -573,8 +573,18 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoD
if (colDataIsNull_var(pDst, j)) {
colDataSetNull_var(pDst, numOfRows);
} else {
// fix address sanitizer error. p1 may point to memory that will change during realloc of colDataSetVal, first copy it to p2
char* p1 = colDataGetVarData(pDst, j);
colDataSetVal(pDst, numOfRows, p1, false);
int32_t len = 0;
if (pDst->info.type == TSDB_DATA_TYPE_JSON) {
len = getJsonValueLen(p1);
} else {
len = varDataTLen(p1);
}
char* p2 = taosMemoryMalloc(len);
memcpy(p2, p1, len);
colDataSetVal(pDst, numOfRows, p2, false);
taosMemoryFree(p2);
}
numOfRows += 1;
j += 1;
......
......@@ -600,9 +600,9 @@ SUdf *udfdGetOrCreateUdf(const char *udfName) {
return udf;
} else {
(*pUdfHash)->expired = true;
taosHashRemove(global.udfsHash, udfName, strlen(udfName));
fnInfo("udfd expired, check for new version. existing udf %s udf version %d, udf created time %" PRIx64,
(*pUdfHash)->name, (*pUdfHash)->version, (*pUdfHash)->createdTime);
taosHashRemove(global.udfsHash, udfName, strlen(udfName));
}
}
......
......@@ -227,8 +227,10 @@ typedef struct SFltTreeStat {
SFilterInfo *info;
} SFltTreeStat;
typedef struct SFltScalarCtx {
SNode *node;
SArray* fltSclRange;
} SFltScalarCtx;
typedef struct SFltBuildGroupCtx {
......@@ -237,6 +239,11 @@ typedef struct SFltBuildGroupCtx {
int32_t code;
} SFltBuildGroupCtx;
typedef struct {
SColumnNode *colNode;
SArray *points;
} SFltSclColumnRange;
struct SFilterInfo {
bool scalarMode;
SFltScalarCtx sclCtx;
......
......@@ -14,6 +14,7 @@
*/
#include <tlog.h>
#include "os.h"
#include "tglobal.h"
#include "thash.h"
// #include "queryLog.h"
#include "filter.h"
......@@ -22,6 +23,7 @@
#include "sclInt.h"
#include "tcompare.h"
#include "tdatablock.h"
#include "tsimplehash.h"
#include "ttime.h"
bool filterRangeCompGi(const void *minv, const void *maxv, const void *minr, const void *maxr, __compar_fn_t cfunc) {
......@@ -261,7 +263,7 @@ int8_t filterGetCompFuncIdx(int32_t type, int32_t optr) {
comparFn = 19;
} else if (optr == OP_TYPE_NMATCH) {
comparFn = 20;
} else if (optr == OP_TYPE_LIKE) { /* wildcard query using like operator */
} else if (optr == OP_TYPE_LIKE) { /* wildcard query using like operator */
comparFn = 7;
} else if (optr == OP_TYPE_NOT_LIKE) { /* wildcard query using like operator */
comparFn = 26;
......@@ -1636,7 +1638,7 @@ void filterDumpInfoToString(SFilterInfo *info, const char *msg, int32_t options)
SDataType *dType = &var->node.resType;
qDebug("VAL%d => [type:%d][val:%" PRIx64 "]", i, dType->type, var->datum.i); // TODO
} else if (field->data) {
qDebug("VAL%d => [type:NIL][val:NIL]", i); // TODO
qDebug("VAL%d => [type:NIL][val:NIL]", i); // TODO
}
}
......@@ -1843,6 +1845,13 @@ void filterFreeInfo(SFilterInfo *info) {
return;
}
for (int32_t i = 0; i < taosArrayGetSize(info->sclCtx.fltSclRange); ++i) {
SFltSclColumnRange *colRange = taosArrayGet(info->sclCtx.fltSclRange, i);
nodesDestroyNode((SNode *)colRange->colNode);
taosArrayDestroy(colRange->points);
}
taosArrayDestroy(info->sclCtx.fltSclRange);
taosMemoryFreeClear(info->cunits);
taosMemoryFreeClear(info->blkUnitRes);
taosMemoryFreeClear(info->blkUnits);
......@@ -3426,8 +3435,356 @@ _return:
return code;
}
// compare ranges, null < min < val < max. null=null, min=min, max=max
typedef enum {
FLT_SCL_DATUM_KIND_NULL,
FLT_SCL_DATUM_KIND_MIN,
FLT_SCL_DATUM_KIND_INT64,
FLT_SCL_DATUM_KIND_UINT64,
FLT_SCL_DATUM_KIND_FLOAT64,
FLT_SCL_DATUM_KIND_VARCHAR,
FLT_SCL_DATUM_KIND_NCHAR,
FLT_SCL_DATUM_KIND_MAX,
} SFltSclDatumKind;
typedef struct {
SFltSclDatumKind kind;
union {
int64_t i; // for int and bool (1 true, 0 false) and ts
uint64_t u; // for uint
double d; // for double
uint8_t *pData; // for varchar, nchar, len prefixed
};
SDataType type; // TODO: original data type, may not be used?
} SFltSclDatum;
typedef struct {
SFltSclDatum val;
bool excl;
bool start;
} SFltSclPoint;
int32_t fltSclCompareWithFloat64(SFltSclDatum *val1, SFltSclDatum *val2) {
// val2->kind == float64
switch (val1->kind) {
case FLT_SCL_DATUM_KIND_UINT64:
return compareUint64Double(&val1->u, &val2->d);
case FLT_SCL_DATUM_KIND_INT64:
return compareInt64Double(&val1->i, &val2->d);
case FLT_SCL_DATUM_KIND_FLOAT64: {
return compareDoubleVal(&val1->d, &val2->d);
}
// TODO: varchar, nchar
default:
qError("not supported comparsion. kind1 %d, kind2 %d", val1->kind, val2->kind);
return (val1->kind - val2->kind);
}
}
int32_t fltSclCompareWithInt64(SFltSclDatum *val1, SFltSclDatum *val2) {
// val2->kind == int64
switch (val1->kind) {
case FLT_SCL_DATUM_KIND_UINT64:
return compareUint64Int64(&val1->u, &val2->i);
case FLT_SCL_DATUM_KIND_INT64:
return compareInt64Val(&val1->i, &val2->i);
case FLT_SCL_DATUM_KIND_FLOAT64: {
return compareDoubleInt64(&val1->d, &val2->i);
}
// TODO: varchar, nchar
default:
qError("not supported comparsion. kind1 %d, kind2 %d", val1->kind, val2->kind);
return (val1->kind - val2->kind);
}
}
int32_t fltSclCompareWithUInt64(SFltSclDatum *val1, SFltSclDatum *val2) {
// val2 kind == uint64
switch (val1->kind) {
case FLT_SCL_DATUM_KIND_UINT64:
return compareUint64Val(&val1->u, &val2->u);
case FLT_SCL_DATUM_KIND_INT64:
return compareInt64Uint64(&val1->i, &val2->u);
case FLT_SCL_DATUM_KIND_FLOAT64: {
return compareDoubleUint64(&val1->d, &val2->u);
}
// TODO: varchar, nchar
default:
qError("not supported comparsion. kind1 %d, kind2 %d", val1->kind, val2->kind);
return (val1->kind - val2->kind);
}
}
int32_t fltSclCompareDatum(SFltSclDatum *val1, SFltSclDatum *val2) {
if (val2->kind == FLT_SCL_DATUM_KIND_NULL || val2->kind == FLT_SCL_DATUM_KIND_MIN ||
val2->kind == FLT_SCL_DATUM_KIND_MAX) {
return (val1->kind < val2->kind) ? -1 : ((val1->kind > val2->kind) ? 1 : 0);
}
switch (val2->kind) {
case FLT_SCL_DATUM_KIND_UINT64: {
return fltSclCompareWithUInt64(val1, val2);
}
case FLT_SCL_DATUM_KIND_INT64: {
return fltSclCompareWithInt64(val1, val2);
}
case FLT_SCL_DATUM_KIND_FLOAT64: {
return fltSclCompareWithFloat64(val1, val2);
}
// TODO: varchar/nchar
default:
qError("not supported kind when compare datum. kind2 : %d", val2->kind);
return 0;
break;
}
return 0;
}
bool fltSclLessPoint(SFltSclPoint *pt1, SFltSclPoint *pt2) {
// first value compare
int32_t cmp = fltSclCompareDatum(&pt1->val, &pt2->val);
if (cmp != 0) {
return cmp < 0;
}
if (pt1->start && pt2->start) {
return !pt1->excl && pt2->excl;
} else if (pt1->start) {
return !pt1->excl && !pt2->excl;
} else if (pt2->start) {
return pt1->excl || pt2->excl;
}
return pt1->excl && !pt2->excl;
}
int32_t fltSclMergeSort(SArray *pts1, SArray *pts2, SArray *result) {
size_t len1 = taosArrayGetSize(pts1);
size_t len2 = taosArrayGetSize(pts2);
size_t i = 0;
size_t j = 0;
while (i < len1 && j < len2) {
SFltSclPoint *pt1 = taosArrayGet(pts1, i);
SFltSclPoint *pt2 = taosArrayGet(pts2, j);
bool less = fltSclLessPoint(pt1, pt2);
if (less) {
taosArrayPush(result, pt1);
++i;
} else {
taosArrayPush(result, pt2);
++j;
}
}
if (i < len1) {
for (; i < len1; ++i) {
SFltSclPoint *pt1 = taosArrayGet(pts1, i);
taosArrayPush(result, pt1);
}
}
if (j < len2) {
for (; j < len2; ++j) {
SFltSclPoint *pt2 = taosArrayGet(pts2, j);
taosArrayPush(result, pt2);
}
}
return 0;
}
int32_t fltSclMerge(SArray *pts1, SArray *pts2, bool isUnion, SArray *merged) {
size_t len1 = taosArrayGetSize(pts1);
size_t len2 = taosArrayGetSize(pts2);
// first merge sort pts1 and pts2
SArray *all = taosArrayInit(len1 + len2, sizeof(SFltSclPoint));
fltSclMergeSort(pts1, pts2, all);
int32_t countRequired = (isUnion) ? 1 : 2;
int32_t count = 0;
for (int32_t i = 0; i < taosArrayGetSize(all); ++i) {
SFltSclPoint *pt = taosArrayGet(all, i);
if (pt->start) {
++count;
if (count == countRequired) {
taosArrayPush(merged, pt);
}
} else {
if (count == countRequired) {
taosArrayPush(merged, pt);
}
--count;
}
}
taosArrayDestroy(all);
return 0;
}
int32_t fltSclIntersect(SArray *pts1, SArray *pts2, SArray *merged) { return fltSclMerge(pts1, pts2, false, merged); }
int32_t fltSclUnion(SArray *pts1, SArray *pts2, SArray *merged) { return fltSclMerge(pts1, pts2, true, merged); }
typedef struct {
SColumnNode *colNode;
SValueNode *valNode;
EOperatorType type;
} SFltSclOperator;
SFltSclColumnRange *fltSclGetOrCreateColumnRange(SColumnNode *colNode, SArray *colRangeList) {
for (int32_t i = 0; i < taosArrayGetSize(colRangeList); ++i) {
SFltSclColumnRange *colRange = taosArrayGet(colRangeList, i);
if (nodesEqualNode((SNode *)colRange->colNode, (SNode *)colNode)) {
return colRange;
}
}
SColumnNode *pColumnNode = (SColumnNode *)nodesCloneNode((SNode *)colNode);
SFltSclColumnRange newColRange = {.colNode = pColumnNode, .points = taosArrayInit(4, sizeof(SFltSclPoint))};
taosArrayPush(colRangeList, &newColRange);
return taosArrayGetLast(colRangeList);
}
int32_t fltSclBuildDatumFromValueNode(SFltSclDatum *datum, SValueNode *valNode) {
datum->type = valNode->node.resType;
if (valNode->isNull) {
datum->kind = FLT_SCL_DATUM_KIND_NULL;
} else {
switch (valNode->node.resType.type) {
case TSDB_DATA_TYPE_NULL: {
datum->kind = FLT_SCL_DATUM_KIND_NULL;
break;
}
case TSDB_DATA_TYPE_BOOL: {
datum->kind = FLT_SCL_DATUM_KIND_INT64;
datum->i = (valNode->datum.b) ? 0 : 1;
break;
}
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_BIGINT:
case TSDB_DATA_TYPE_TIMESTAMP: {
datum->kind = FLT_SCL_DATUM_KIND_INT64;
datum->i = valNode->datum.i;
break;
}
case TSDB_DATA_TYPE_UTINYINT:
case TSDB_DATA_TYPE_USMALLINT:
case TSDB_DATA_TYPE_UINT:
case TSDB_DATA_TYPE_UBIGINT: {
datum->kind = FLT_SCL_DATUM_KIND_UINT64;
datum->u = valNode->datum.u;
break;
}
case TSDB_DATA_TYPE_FLOAT:
case TSDB_DATA_TYPE_DOUBLE: {
datum->kind = FLT_SCL_DATUM_KIND_FLOAT64;
datum->d = valNode->datum.d;
break;
}
// TODO:varchar/nchar/json
default: {
qError("not supported type %d when build datum from value node", valNode->node.resType.type);
break;
}
}
}
return TSDB_CODE_SUCCESS;
}
int32_t fltSclBuildDatumFromBlockSmaValue(SFltSclDatum *datum, uint8_t type, int64_t val) {
switch (type) {
case TSDB_DATA_TYPE_BOOL:
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_BIGINT:
case TSDB_DATA_TYPE_TIMESTAMP: {
datum->kind = FLT_SCL_DATUM_KIND_INT64;
datum->i = val;
break;
}
case TSDB_DATA_TYPE_UTINYINT:
case TSDB_DATA_TYPE_USMALLINT:
case TSDB_DATA_TYPE_UINT:
case TSDB_DATA_TYPE_UBIGINT: {
datum->kind = FLT_SCL_DATUM_KIND_UINT64;
datum->u = *(uint64_t *)&val;
break;
}
case TSDB_DATA_TYPE_FLOAT:
case TSDB_DATA_TYPE_DOUBLE: {
datum->kind = FLT_SCL_DATUM_KIND_FLOAT64;
datum->d = *(double *)&val;
break;
}
// TODO:varchar/nchar/json
default: {
datum->kind = FLT_SCL_DATUM_KIND_NULL;
qError("not supported type %d when build datum from block sma value", type);
break;
}
}
return TSDB_CODE_SUCCESS;
}
int32_t fltSclBuildRangeFromBlockSma(SFltSclColumnRange *colRange, SColumnDataAgg *pAgg, int32_t numOfRows,
SArray *points) {
if (pAgg->numOfNull == numOfRows) {
SFltSclDatum datum = {.kind = FLT_SCL_DATUM_KIND_NULL};
SFltSclPoint startPt = {.start = true, .excl = false, .val = datum};
SFltSclPoint endPt = {.start = false, .excl = false, .val = datum};
taosArrayPush(points, &startPt);
taosArrayPush(points, &endPt);
return TSDB_CODE_SUCCESS;
}
if (pAgg->numOfNull > 0) {
SFltSclDatum nullDatum = {.kind = FLT_SCL_DATUM_KIND_NULL};
SFltSclPoint startPt = {.start = true, .excl = false, .val = nullDatum};
SFltSclPoint endPt = {.start = false, .excl = false, .val = nullDatum};
taosArrayPush(points, &startPt);
taosArrayPush(points, &endPt);
}
SFltSclDatum min;
fltSclBuildDatumFromBlockSmaValue(&min, colRange->colNode->node.resType.type, pAgg->min);
SFltSclPoint minPt = {.excl = false, .start = true, .val = min};
SFltSclDatum max;
fltSclBuildDatumFromBlockSmaValue(&max, colRange->colNode->node.resType.type, pAgg->max);
SFltSclPoint maxPt = {.excl = false, .start = false, .val = max};
taosArrayPush(points, &minPt);
taosArrayPush(points, &maxPt);
return TSDB_CODE_SUCCESS;
}
bool filterRangeExecute(SFilterInfo *info, SColumnDataAgg **pDataStatis, int32_t numOfCols, int32_t numOfRows) {
if (info->scalarMode) {
SArray *colRanges = info->sclCtx.fltSclRange;
for (int32_t i = 0; i < taosArrayGetSize(colRanges); ++i) {
SFltSclColumnRange *colRange = taosArrayGet(colRanges, i);
bool foundCol = false;
int32_t j = 0;
for (; j < numOfCols; ++j) {
if (pDataStatis[j] != NULL && pDataStatis[j]->colId == colRange->colNode->colId) {
foundCol = true;
break;
}
}
if (foundCol) {
SColumnDataAgg *pAgg = pDataStatis[j];
SArray *points = taosArrayInit(2, sizeof(SFltSclPoint));
fltSclBuildRangeFromBlockSma(colRange, pAgg, numOfRows, points);
qDebug("column data agg: nulls %d, rows %d, max %" PRId64 " min %" PRId64, pAgg->numOfNull, numOfRows,
pAgg->max, pAgg->min);
SArray *merged = taosArrayInit(8, sizeof(SFltSclPoint));
fltSclIntersect(points, colRange->points, merged);
bool isIntersect = taosArrayGetSize(merged) != 0;
qDebug("filter range execute, scalar mode, column range found. colId: %d colName: %s has overlap: %d",
colRange->colNode->colId, colRange->colNode->colName, isIntersect);
taosArrayDestroy(merged);
taosArrayDestroy(points);
if (!isIntersect) {
return false;
}
}
}
return true;
}
......@@ -3607,6 +3964,31 @@ _return:
return code;
}
static int32_t fltSclGetDatumValueFromPoint(SFltSclPoint *point, SFltSclDatum *d) {
*d = point->val;
if (point->val.kind == FLT_SCL_DATUM_KIND_NULL) {
return TSDB_CODE_SUCCESS;
}
if (point->val.kind == FLT_SCL_DATUM_KIND_MAX) {
getDataMax(d->type.type, &(d->i));
} else if (point->val.kind == FLT_SCL_DATUM_KIND_MIN) {
getDataMin(d->type.type, &(d->i));
}
if (IS_INTEGER_TYPE(d->type.type) || IS_TIMESTAMP_TYPE(d->type.type)) {
if (point->excl) {
if (point->start) {
++d->i;
} else {
--d->i;
}
}
} else {
qError("not supported type %d when get datum from point", d->type.type);
}
return TSDB_CODE_SUCCESS;
}
int32_t filterGetTimeRange(SNode *pNode, STimeWindow *win, bool *isStrict) {
SFilterInfo *info = NULL;
int32_t code = 0;
......@@ -3616,6 +3998,26 @@ int32_t filterGetTimeRange(SNode *pNode, STimeWindow *win, bool *isStrict) {
FLT_ERR_RET(filterInitFromNode(pNode, &info, FLT_OPTION_NO_REWRITE | FLT_OPTION_TIMESTAMP));
if (info->scalarMode) {
SArray *colRanges = info->sclCtx.fltSclRange;
if (taosArrayGetSize(colRanges) == 1) {
SFltSclColumnRange *colRange = taosArrayGet(colRanges, 0);
SArray *points = colRange->points;
if (taosArrayGetSize(points) == 2) {
SFltSclPoint *startPt = taosArrayGet(points, 0);
SFltSclPoint *endPt = taosArrayGet(points, 1);
SFltSclDatum start;
SFltSclDatum end;
fltSclGetDatumValueFromPoint(startPt, &start);
fltSclGetDatumValueFromPoint(endPt, &end);
win->skey = start.i;
win->ekey = end.i;
*isStrict = true;
goto _return;
} else if (taosArrayGetSize(points) == 0) {
*win = TSWINDOW_DESC_INITIALIZER;
goto _return;
}
}
*win = TSWINDOW_INITIALIZER;
*isStrict = false;
goto _return;
......@@ -3946,8 +4348,204 @@ _return:
FLT_RET(code);
}
int32_t fltSclBuildRangePoints(SFltSclOperator *oper, SArray *points) {
switch (oper->type) {
case OP_TYPE_GREATER_THAN: {
SFltSclDatum start;
fltSclBuildDatumFromValueNode(&start, oper->valNode);
SFltSclPoint startPt = {.start = true, .excl = true, .val = start};
SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType};
SFltSclPoint endPt = {.start = false, .excl = false, .val = end};
taosArrayPush(points, &startPt);
taosArrayPush(points, &endPt);
break;
}
case OP_TYPE_GREATER_EQUAL: {
SFltSclDatum start;
fltSclBuildDatumFromValueNode(&start, oper->valNode);
SFltSclPoint startPt = {.start = true, .excl = false, .val = start};
SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType};
SFltSclPoint endPt = {.start = false, .excl = false, .val = end};
taosArrayPush(points, &startPt);
taosArrayPush(points, &endPt);
break;
}
case OP_TYPE_LOWER_THAN: {
SFltSclDatum end;
fltSclBuildDatumFromValueNode(&end, oper->valNode);
SFltSclPoint endPt = {.start = false, .excl = true, .val = end};
SFltSclDatum start = {.kind = FLT_SCL_DATUM_KIND_MIN, .type = oper->colNode->node.resType};
SFltSclPoint startPt = {.start = true, .excl = false, .val = start};
taosArrayPush(points, &startPt);
taosArrayPush(points, &endPt);
break;
}
case OP_TYPE_LOWER_EQUAL: {
SFltSclDatum end;
fltSclBuildDatumFromValueNode(&end, oper->valNode);
SFltSclPoint endPt = {.start = false, .excl = false, .val = end};
SFltSclDatum start = {.kind = FLT_SCL_DATUM_KIND_MIN, .type = oper->colNode->node.resType};
SFltSclPoint startPt = {.start = true, .excl = false, .val = start};
taosArrayPush(points, &startPt);
taosArrayPush(points, &endPt);
break;
}
case OP_TYPE_EQUAL: {
SFltSclDatum valDatum;
fltSclBuildDatumFromValueNode(&valDatum, oper->valNode);
SFltSclPoint startPt = {.start = true, .excl = false, .val = valDatum};
SFltSclPoint endPt = {.start = false, .excl = false, .val = valDatum};
taosArrayPush(points, &startPt);
taosArrayPush(points, &endPt);
break;
}
case OP_TYPE_NOT_EQUAL: {
SFltSclDatum valDatum;
fltSclBuildDatumFromValueNode(&valDatum, oper->valNode);
{
SFltSclDatum start = {.kind = FLT_SCL_DATUM_KIND_MIN, .type = oper->colNode->node.resType};
SFltSclPoint startPt = {.start = true, .excl = false, .val = start};
SFltSclPoint endPt = {.start = false, .excl = true, .val = valDatum};
taosArrayPush(points, &startPt);
taosArrayPush(points, &endPt);
}
{
SFltSclPoint startPt = {.start = true, .excl = true, .val = valDatum};
SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType};
SFltSclPoint endPt = {.start = false, .excl = false, .val = end};
taosArrayPush(points, &startPt);
taosArrayPush(points, &endPt);
}
break;
}
case OP_TYPE_IS_NULL: {
SFltSclDatum nullDatum = {.kind = FLT_SCL_DATUM_KIND_NULL};
SFltSclPoint startPt = {.start = true, .excl = false, .val = nullDatum};
SFltSclPoint endPt = {.start = false, .excl = false, .val = nullDatum};
taosArrayPush(points, &startPt);
taosArrayPush(points, &endPt);
break;
}
case OP_TYPE_IS_NOT_NULL: {
SFltSclDatum minDatum = {.kind = FLT_SCL_DATUM_KIND_MIN, .type = oper->colNode->node.resType};
SFltSclPoint startPt = {.start = true, .excl = false, .val = minDatum};
SFltSclDatum maxDatum = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType};
SFltSclPoint endPt = {.start = false, .excl = false, .val = maxDatum};
taosArrayPush(points, &startPt);
taosArrayPush(points, &endPt);
break;
}
default: {
qError("not supported operator type : %d when build range points", oper->type);
break;
}
}
return TSDB_CODE_SUCCESS;
}
// TODO: process DNF composed of CNF
int32_t fltSclProcessCNF(SArray *sclOpListCNF, SArray *colRangeList) {
size_t sz = taosArrayGetSize(sclOpListCNF);
for (int32_t i = 0; i < sz; ++i) {
SFltSclOperator *sclOper = taosArrayGet(sclOpListCNF, i);
SFltSclColumnRange *colRange = fltSclGetOrCreateColumnRange(sclOper->colNode, colRangeList);
SArray *points = taosArrayInit(4, sizeof(SFltSclPoint));
fltSclBuildRangePoints(sclOper, points);
if (taosArrayGetSize(colRange->points) != 0) {
SArray *merged = taosArrayInit(4, sizeof(SFltSclPoint));
int32_t code = fltSclIntersect(colRange->points, points, merged);
taosArrayDestroy(colRange->points);
taosArrayDestroy(points);
colRange->points = merged;
} else {
taosArrayDestroy(colRange->points);
colRange->points = points;
}
}
return TSDB_CODE_SUCCESS;
}
static bool fltSclIsCollectableNode(SNode *pNode) {
if (nodeType(pNode) != QUERY_NODE_OPERATOR) {
return false;
}
SOperatorNode *pOper = (SOperatorNode *)pNode;
if (pOper->pLeft == NULL || pOper->pRight == NULL) {
return false;
}
if (!(pOper->opType == OP_TYPE_GREATER_THAN || pOper->opType == OP_TYPE_GREATER_EQUAL ||
pOper->opType == OP_TYPE_LOWER_THAN || pOper->opType == OP_TYPE_LOWER_EQUAL ||
pOper->opType == OP_TYPE_NOT_EQUAL || pOper->opType == OP_TYPE_EQUAL)) {
return false;
}
if (!(nodeType(pOper->pLeft) == QUERY_NODE_COLUMN && nodeType(pOper->pRight) == QUERY_NODE_VALUE)) {
return false;
}
return true;
}
static int32_t fltSclCollectOperatorFromNode(SNode *pNode, SArray *sclOpList) {
if (!fltSclIsCollectableNode(pNode)) {
return TSDB_CODE_SUCCESS;
}
SOperatorNode *pOper = (SOperatorNode *)pNode;
SValueNode *valNode = (SValueNode *)pOper->pRight;
if (IS_NUMERIC_TYPE(valNode->node.resType.type) || valNode->node.resType.type == TSDB_DATA_TYPE_TIMESTAMP) {
SFltSclOperator sclOp = {.colNode = (SColumnNode *)nodesCloneNode(pOper->pLeft),
.valNode = (SValueNode *)nodesCloneNode(pOper->pRight),
.type = pOper->opType};
taosArrayPush(sclOpList, &sclOp);
}
return TSDB_CODE_SUCCESS;
}
static int32_t fltSclCollectOperatorsFromLogicCond(SNode *pNode, SArray *sclOpList) {
if (nodeType(pNode) != QUERY_NODE_LOGIC_CONDITION) {
return TSDB_CODE_SUCCESS;
}
SLogicConditionNode *pLogicCond = (SLogicConditionNode *)pNode;
// TODO: support LOGIC_COND_TYPE_OR
if (pLogicCond->condType != LOGIC_COND_TYPE_AND) {
return TSDB_CODE_SUCCESS;
}
SNode *pExpr = NULL;
FOREACH(pExpr, pLogicCond->pParameterList) {
if (!fltSclIsCollectableNode(pExpr)) {
return TSDB_CODE_SUCCESS;
}
}
FOREACH(pExpr, pLogicCond->pParameterList) { fltSclCollectOperatorFromNode(pExpr, sclOpList); }
return TSDB_CODE_SUCCESS;
}
static int32_t fltSclCollectOperators(SNode *pNode, SArray *sclOpList) {
if (nodeType(pNode) == QUERY_NODE_OPERATOR) {
fltSclCollectOperatorFromNode(pNode, sclOpList);
} else if (nodeType(pNode) == QUERY_NODE_LOGIC_CONDITION) {
fltSclCollectOperatorsFromLogicCond(pNode, sclOpList);
}
return TSDB_CODE_SUCCESS;
}
int32_t fltOptimizeNodes(SFilterInfo *pInfo, SNode **pNode, SFltTreeStat *pStat) {
// TODO
SArray *sclOpList = taosArrayInit(16, sizeof(SFltSclOperator));
fltSclCollectOperators(*pNode, sclOpList);
SArray *colRangeList = taosArrayInit(16, sizeof(SFltSclColumnRange));
fltSclProcessCNF(sclOpList, colRangeList);
pInfo->sclCtx.fltSclRange = colRangeList;
for (int32_t i = 0; i < taosArrayGetSize(sclOpList); ++i) {
SFltSclOperator *sclOp = taosArrayGet(sclOpList, i);
nodesDestroyNode((SNode *)sclOp->colNode);
nodesDestroyNode((SNode *)sclOp->valNode);
}
taosArrayDestroy(sclOpList);
return TSDB_CODE_SUCCESS;
}
......@@ -4021,8 +4619,11 @@ int32_t filterInitFromNode(SNode *pNode, SFilterInfo **pInfo, uint32_t options)
stat.info = info;
FLT_ERR_JRET(fltReviseNodes(info, &pNode, &stat));
info->scalarMode = stat.scalarMode;
if (tsFilterScalarMode) {
info->scalarMode = true;
} else {
info->scalarMode = stat.scalarMode;
}
fltDebug("scalar mode: %d", info->scalarMode);
if (!info->scalarMode) {
......
......@@ -1791,7 +1791,11 @@ void vectorNotMatch(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOu
void vectorIsNull(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOut, int32_t _ord) {
for (int32_t i = 0; i < pLeft->numOfRows; ++i) {
int8_t v = IS_HELPER_NULL(pLeft->columnData, i) ? 1 : 0;
if (v) {
++pOut->numOfQualified;
}
colDataSetInt8(pOut->columnData, i, &v);
colDataClearNull_f(pOut->columnData->nullbitmap, i);
}
pOut->numOfRows = pLeft->numOfRows;
}
......@@ -1799,7 +1803,11 @@ void vectorIsNull(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOut,
void vectorNotNull(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOut, int32_t _ord) {
for (int32_t i = 0; i < pLeft->numOfRows; ++i) {
int8_t v = IS_HELPER_NULL(pLeft->columnData, i) ? 0 : 1;
if (v) {
++pOut->numOfQualified;
}
colDataSetInt8(pOut->columnData, i, &v);
colDataClearNull_f(pOut->columnData->nullbitmap, i);
}
pOut->numOfRows = pLeft->numOfRows;
}
......@@ -1812,6 +1820,13 @@ void vectorIsTrue(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOut,
colDataSetInt8(pOut->columnData, i, &v);
colDataClearNull_f(pOut->columnData->nullbitmap, i);
}
{
bool v = false;
GET_TYPED_DATA(v, bool, pOut->columnData->info.type, colDataGetData(pOut->columnData, i));
if (v) {
++pOut->numOfQualified;
}
}
}
pOut->columnData->hasNull = false;
}
......@@ -1851,7 +1866,9 @@ void vectorJsonContains(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam
char *pLeftData = colDataGetVarData(pLeft->columnData, i);
getJsonValue(pLeftData, jsonKey, &isExist);
}
if (isExist) {
++pOut->numOfQualified;
}
colDataSetVal(pOutputCol, i, (const char *)(&isExist), false);
}
taosMemoryFree(jsonKey);
......
......@@ -1274,6 +1274,7 @@
,,y,script,./test.sh -f tsim/parser/columnValue_tinyint.sim
,,y,script,./test.sh -f tsim/parser/columnValue_unsign.sim
,,y,script,./test.sh -f tsim/parser/condition.sim
,,y,script,./test.sh -f tsim/parser/condition_scl.sim
,,y,script,./test.sh -f tsim/parser/constCol.sim
,,y,script,./test.sh -f tsim/parser/create_db.sim
,,y,script,./test.sh -f tsim/parser/create_mt.sim
......@@ -1353,6 +1354,7 @@
,,y,script,./test.sh -f tsim/query/event.sim
,,y,script,./test.sh -f tsim/query/forceFill.sim
,,y,script,./test.sh -f tsim/query/emptyTsRange.sim
,,y,script,./test.sh -f tsim/query/emptyTsRange_scl.sim
,,y,script,./test.sh -f tsim/query/partitionby.sim
,,y,script,./test.sh -f tsim/query/tableCount.sim
,,y,script,./test.sh -f tsim/query/tag_scan.sim
......
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1
system sh/cfg.sh -n dnode1 -c filterScalarMode -v 1
system sh/exec.sh -n dnode1 -s start
sql connect
sql drop database if exists cdb
sql create database if not exists cdb
sql use cdb
sql create table stb1 (ts timestamp, c1 int, c2 float, c3 bigint, c4 smallint, c5 tinyint, c6 double, c7 bool, c8 binary(10), c9 nchar(9)) TAGS(t1 int, t2 binary(10), t3 double)
sql create table tb1 using stb1 tags(1,'1',1.0)
sql create table tb2 using stb1 tags(2,'2',2.0)
sql create table tb3 using stb1 tags(3,'3',3.0)
sql create table tb4 using stb1 tags(4,'4',4.0)
sql create table tb5 using stb1 tags(5,'5',5.0)
sql create table tb6 using stb1 tags(6,'6',6.0)
sql insert into tb1 values ('2021-05-05 18:19:00',1,1.0,1,1,1,1.0,true ,'1','1')
sql insert into tb1 values ('2021-05-05 18:19:01',2,2.0,2,2,2,2.0,true ,'2','2')
sql insert into tb1 values ('2021-05-05 18:19:02',3,3.0,3,3,3,3.0,false,'3','3')
sql insert into tb1 values ('2021-05-05 18:19:03',4,4.0,4,4,4,4.0,false,'4','4')
sql insert into tb1 values ('2021-05-05 18:19:04',11,11.0,11,11,11,11.0,true ,'11','11')
sql insert into tb1 values ('2021-05-05 18:19:05',12,12.0,12,12,12,12.0,true ,'12','12')
sql insert into tb1 values ('2021-05-05 18:19:06',13,13.0,13,13,13,13.0,false,'13','13')
sql insert into tb1 values ('2021-05-05 18:19:07',14,14.0,14,14,14,14.0,false,'14','14')
sql insert into tb2 values ('2021-05-05 18:19:08',21,21.0,21,21,21,21.0,true ,'21','21')
sql insert into tb2 values ('2021-05-05 18:19:09',22,22.0,22,22,22,22.0,true ,'22','22')
sql insert into tb2 values ('2021-05-05 18:19:10',23,23.0,23,23,23,23.0,false,'23','23')
sql insert into tb2 values ('2021-05-05 18:19:11',24,24.0,24,24,24,24.0,false,'24','24')
sql insert into tb3 values ('2021-05-05 18:19:12',31,31.0,31,31,31,31.0,true ,'31','31')
sql insert into tb3 values ('2021-05-05 18:19:13',32,32.0,32,32,32,32.0,true ,'32','32')
sql insert into tb3 values ('2021-05-05 18:19:14',33,33.0,33,33,33,33.0,false,'33','33')
sql insert into tb3 values ('2021-05-05 18:19:15',34,34.0,34,34,34,34.0,false,'34','34')
sql insert into tb4 values ('2021-05-05 18:19:16',41,41.0,41,41,41,41.0,true ,'41','41')
sql insert into tb4 values ('2021-05-05 18:19:17',42,42.0,42,42,42,42.0,true ,'42','42')
sql insert into tb4 values ('2021-05-05 18:19:18',43,43.0,43,43,43,43.0,false,'43','43')
sql insert into tb4 values ('2021-05-05 18:19:19',44,44.0,44,44,44,44.0,false,'44','44')
sql insert into tb5 values ('2021-05-05 18:19:20',51,51.0,51,51,51,51.0,true ,'51','51')
sql insert into tb5 values ('2021-05-05 18:19:21',52,52.0,52,52,52,52.0,true ,'52','52')
sql insert into tb5 values ('2021-05-05 18:19:22',53,53.0,53,53,53,53.0,false,'53','53')
sql insert into tb5 values ('2021-05-05 18:19:23',54,54.0,54,54,54,54.0,false,'54','54')
sql insert into tb6 values ('2021-05-05 18:19:24',61,61.0,61,61,61,61.0,true ,'61','61')
sql insert into tb6 values ('2021-05-05 18:19:25',62,62.0,62,62,62,62.0,true ,'62','62')
sql insert into tb6 values ('2021-05-05 18:19:26',63,63.0,63,63,63,63.0,false,'63','63')
sql insert into tb6 values ('2021-05-05 18:19:27',64,64.0,64,64,64,64.0,false,'64','64')
sql insert into tb6 values ('2021-05-05 18:19:28',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
sql create table stb2 (ts timestamp, u1 int unsigned, u2 bigint unsigned, u3 smallint unsigned, u4 tinyint unsigned, ts2 timestamp) TAGS(t1 int unsigned, t2 bigint unsigned, t3 timestamp, t4 int)
sql create table tb2_1 using stb2 tags(1,1,'2021-05-05 18:38:38',1)
sql create table tb2_2 using stb2 tags(2,2,'2021-05-05 18:58:58',2)
sql insert into tb2_1 values ('2021-05-05 18:19:00',1,2,3,4,'2021-05-05 18:28:01')
sql insert into tb2_1 values ('2021-05-05 18:19:01',5,6,7,8,'2021-05-05 18:28:02')
sql insert into tb2_1 values ('2021-05-05 18:19:02',2,2,3,4,'2021-05-05 18:28:03')
sql insert into tb2_1 values ('2021-05-05 18:19:03',5,6,7,8,'2021-05-05 18:28:04')
sql insert into tb2_1 values ('2021-05-05 18:19:04',3,2,3,4,'2021-05-05 18:28:05')
sql insert into tb2_1 values ('2021-05-05 18:19:05',5,6,7,8,'2021-05-05 18:28:06')
sql insert into tb2_1 values ('2021-05-05 18:19:06',4,2,3,4,'2021-05-05 18:28:07')
sql insert into tb2_1 values ('2021-05-05 18:19:07',5,6,7,8,'2021-05-05 18:28:08')
sql insert into tb2_1 values ('2021-05-05 18:19:08',5,2,3,4,'2021-05-05 18:28:09')
sql insert into tb2_1 values ('2021-05-05 18:19:09',5,6,7,8,'2021-05-05 18:28:10')
sql insert into tb2_1 values ('2021-05-05 18:19:10',6,2,3,4,'2021-05-05 18:28:11')
sql insert into tb2_2 values ('2021-05-05 18:19:11',5,6,7,8,'2021-05-05 18:28:12')
sql insert into tb2_2 values ('2021-05-05 18:19:12',7,2,3,4,'2021-05-05 18:28:13')
sql insert into tb2_2 values ('2021-05-05 18:19:13',5,6,7,8,'2021-05-05 18:28:14')
sql insert into tb2_2 values ('2021-05-05 18:19:14',8,2,3,4,'2021-05-05 18:28:15')
sql insert into tb2_2 values ('2021-05-05 18:19:15',5,6,7,8,'2021-05-05 18:28:16')
sql create table stb3 (ts timestamp, c1 int, c2 float, c3 bigint, c4 smallint, c5 tinyint, c6 double, c7 bool, c8 binary(10), c9 nchar(9)) TAGS(t1 int, t2 binary(10), t3 double)
sql create table tb3_1 using stb3 tags(1,'1',1.0)
sql create table tb3_2 using stb3 tags(2,'2',2.0)
sql insert into tb3_1 values ('2021-01-05 18:19:00',1,1.0,1,1,1,1.0,true ,'1','1')
sql insert into tb3_1 values ('2021-02-05 18:19:01',2,2.0,2,2,2,2.0,true ,'2','2')
sql insert into tb3_1 values ('2021-03-05 18:19:02',3,3.0,3,3,3,3.0,false,'3','3')
sql insert into tb3_1 values ('2021-04-05 18:19:03',4,4.0,4,4,4,4.0,false,'4','4')
sql insert into tb3_1 values ('2021-05-05 18:19:28',5,NULL,5,NULL,5,NULL,true,NULL,'5')
sql insert into tb3_1 values ('2021-06-05 18:19:28',NULL,6.0,NULL,6,NULL,6.0,NULL,'6',NULL)
sql insert into tb3_1 values ('2021-07-05 18:19:28',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
sql insert into tb3_2 values ('2021-01-06 18:19:00',11,11.0,11,11,11,11.0,true ,'11','11')
sql insert into tb3_2 values ('2021-02-06 18:19:01',12,12.0,12,12,12,12.0,true ,'12','12')
sql insert into tb3_2 values ('2021-03-06 18:19:02',13,13.0,13,13,13,13.0,false,'13','13')
sql insert into tb3_2 values ('2021-04-06 18:19:03',14,14.0,14,14,14,14.0,false,'14','14')
sql insert into tb3_2 values ('2021-05-06 18:19:28',15,NULL,15,NULL,15,NULL,true,NULL,'15')
sql insert into tb3_2 values ('2021-06-06 18:19:28',NULL,16.0,NULL,16,NULL,16.0,NULL,'16',NULL)
sql insert into tb3_2 values ('2021-07-06 18:19:28',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
sql create table stb4 (ts timestamp, c1 int, c2 float, c3 bigint, c4 smallint, c5 tinyint, c6 double, c7 bool, c8 binary(10), c9 nchar(9),c10 binary(16300)) TAGS(t1 int, t2 binary(10), t3 double)
sql create table tb4_0 using stb4 tags(0,'0',0.0)
sql create table tb4_1 using stb4 tags(1,'1',1.0)
sql create table tb4_2 using stb4 tags(2,'2',2.0)
sql create table tb4_3 using stb4 tags(3,'3',3.0)
sql create table tb4_4 using stb4 tags(4,'4',4.0)
$i = 0
$ts0 = 1625850000000
$blockNum = 5
$delta = 0
$tbname0 = tb4_
$a = 0
$b = 200
$c = 400
while $i < $blockNum
$x = 0
$rowNum = 1200
while $x < $rowNum
$ts = $ts0 + $x
$a = $a + 1
$b = $b + 1
$c = $c + 1
$d = $x / 10
$tin = $rowNum
$binary = 'binary . $c
$binary = $binary . '
$nchar = 'nchar . $c
$nchar = $nchar . '
$tbname = 'tb4_ . $i
$tbname = $tbname . '
sql insert into $tbname values ( $ts , $a , $b , $c , $d , $d , $c , true, $binary , $nchar , $binary )
$x = $x + 1
endw
$i = $i + 1
$ts0 = $ts0 + 259200000
endw
run tsim/parser/condition_query.sim
print ================== restart server to commit data into disk
system sh/exec.sh -n dnode1 -s stop -x SIGINT
system sh/exec.sh -n dnode1 -s start
print ================== server restart completed
sql connect
run tsim/parser/condition_query.sim
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1
system sh/cfg.sh -n dnode1 -c filterScalarMode -v 1
system sh/exec.sh -n dnode1 -s start
sql connect
sql drop database if exists db1;
sql create database if not exists db1;
sql use db1;
sql create stable sta (ts timestamp, f1 double, f2 binary(200)) tags(t1 int);
sql create table tba1 using sta tags(1);
sql insert into tba1 values ('2022-04-26 15:15:01', 1.0, "a");
sql insert into tba1 values ('2022-04-26 15:15:02', 2.0, "b");
sql insert into tba1 values ('2022-04-26 15:15:04', 4.0, "b");
sql insert into tba1 values ('2022-04-26 15:15:05', 5.0, "b");
sql select last_row(*) from sta where ts >= 1678901803783 and ts <= 1678901803783 and _c0 <= 1678901803782 interval(10d,8d) fill(linear) order by _wstart desc;
if $rows != 0 then
return -1
endi
system sh/exec.sh -n dnode1 -s stop -x SIGINT
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册