From a9ab3c8940b43d9a34569443d17f49e798b1233d Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Sun, 24 Apr 2022 16:31:17 +0800 Subject: [PATCH] enh: order by primary key optimize --- source/libs/parser/inc/sql.y | 1 + source/libs/parser/src/parCalcConst.c | 2 +- source/libs/planner/src/planOptimizer.c | 173 ++++++++++++++++++++---- source/libs/scalar/src/sclfunc.c | 20 +-- 4 files changed, 159 insertions(+), 37 deletions(-) diff --git a/source/libs/parser/inc/sql.y b/source/libs/parser/inc/sql.y index b1cf5759b7..20e470bd1d 100644 --- a/source/libs/parser/inc/sql.y +++ b/source/libs/parser/inc/sql.y @@ -611,6 +611,7 @@ function_expression(A) ::= function_name(B) NK_LP expression_list(C) NK_RP(D). function_expression(A) ::= star_func(B) NK_LP star_func_para_list(C) NK_RP(D). { A = createRawExprNodeExt(pCxt, &B, &D, createFunctionNode(pCxt, &B, C)); } function_expression(A) ::= CAST(B) NK_LP expression(C) AS type_name(D) NK_RP(E). { A = createRawExprNodeExt(pCxt, &B, &E, createCastFunctionNode(pCxt, releaseRawExprNode(pCxt, C), D)); } function_expression(A) ::= noarg_func(B) NK_LP NK_RP(C). { A = createRawExprNodeExt(pCxt, &B, &C, createFunctionNodeNoArg(pCxt, &B)); } +//function_expression(A) ::= NOW(B). { A = createRawExprNode(pCxt, &B, createFunctionNode(pCxt, &B, NULL)); } %type noarg_func { SToken } %destructor noarg_func { } diff --git a/source/libs/parser/src/parCalcConst.c b/source/libs/parser/src/parCalcConst.c index ef1f9ada01..51e0daf4ad 100644 --- a/source/libs/parser/src/parCalcConst.c +++ b/source/libs/parser/src/parCalcConst.c @@ -83,7 +83,7 @@ static EDealRes calcConstOperator(SOperatorNode** pNode, void* pContext) { static EDealRes calcConstFunction(SFunctionNode** pNode, void* pContext) { SFunctionNode* pFunc = *pNode; - if (!fmIsScalarFunc(pFunc->funcId)) { + if (!fmIsScalarFunc(pFunc->funcId) || fmIsUserDefinedFunc(pFunc->funcId)) { return DEAL_RES_CONTINUE; } SNode* pParam = NULL; diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 832135e90e..477a4b1bb6 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -20,6 +20,8 @@ #define OPTIMIZE_FLAG_MASK(n) (1 << n) #define OPTIMIZE_FLAG_OSD OPTIMIZE_FLAG_MASK(0) +#define OPTIMIZE_FLAG_CPD OPTIMIZE_FLAG_MASK(1) +#define OPTIMIZE_FLAG_OPK OPTIMIZE_FLAG_MASK(2) #define OPTIMIZE_FLAG_SET_MASK(val, mask) (val) |= (mask) #define OPTIMIZE_FLAG_TEST_MASK(val, mask) (((val) & (mask)) != 0) @@ -57,7 +59,23 @@ typedef enum ECondAction { // after supporting outer join, there are other possibilities } ECondAction; -EDealRes haveNormalColImpl(SNode* pNode, void* pContext) { +typedef bool (*FMayBeOptimized)(SLogicNode* pNode); + +static SLogicNode* optFindPossibleNode(SLogicNode* pNode, FMayBeOptimized func) { + if (func(pNode)) { + return pNode; + } + SNode* pChild; + FOREACH(pChild, pNode->pChildren) { + SLogicNode* pScanNode = optFindPossibleNode((SLogicNode*)pChild, func); + if (NULL != pScanNode) { + return pScanNode; + } + } + return NULL; +} + +EDealRes osdHaveNormalColImpl(SNode* pNode, void* pContext) { if (QUERY_NODE_COLUMN == nodeType(pNode)) { *((bool*)pContext) = (COLUMN_TYPE_TAG != ((SColumnNode*)pNode)->colType); return *((bool*)pContext) ? DEAL_RES_END : DEAL_RES_IGNORE_CHILD; @@ -65,9 +83,9 @@ EDealRes haveNormalColImpl(SNode* pNode, void* pContext) { return DEAL_RES_CONTINUE; } -static bool haveNormalCol(SNodeList* pList) { +static bool osdHaveNormalCol(SNodeList* pList) { bool res = false; - nodesWalkExprsPostOrder(pList, haveNormalColImpl, &res); + nodesWalkExprsPostOrder(pList, osdHaveNormalColImpl, &res); return res; } @@ -89,21 +107,7 @@ static bool osdMayBeOptimized(SLogicNode* pNode) { if (QUERY_NODE_LOGIC_PLAN_WINDOW == nodeType(pNode->pParent)) { return (WINDOW_TYPE_INTERVAL == ((SWindowLogicNode*)pNode->pParent)->winType); } - return !haveNormalCol(((SAggLogicNode*)pNode->pParent)->pGroupKeys); -} - -static SLogicNode* osdFindPossibleScanNode(SLogicNode* pNode) { - if (osdMayBeOptimized(pNode)) { - return pNode; - } - SNode* pChild; - FOREACH(pChild, pNode->pChildren) { - SLogicNode* pScanNode = osdFindPossibleScanNode((SLogicNode*)pChild); - if (NULL != pScanNode) { - return pScanNode; - } - } - return NULL; + return !osdHaveNormalCol(((SAggLogicNode*)pNode->pParent)->pGroupKeys); } static SNodeList* osdGetAllFuncs(SLogicNode* pNode) { @@ -138,7 +142,7 @@ static int32_t osdGetRelatedFuncs(SScanLogicNode* pScan, SNodeList** pSdrFuncs, } static int32_t osdMatch(SOptimizeContext* pCxt, SLogicNode* pLogicNode, SOsdInfo* pInfo) { - pInfo->pScan = (SScanLogicNode*)osdFindPossibleScanNode(pLogicNode); + pInfo->pScan = (SScanLogicNode*)optFindPossibleNode(pLogicNode, osdMayBeOptimized); if (NULL == pInfo->pScan) { return TSDB_CODE_SUCCESS; } @@ -345,7 +349,7 @@ static int32_t cpdCalcTimeRange(SScanLogicNode* pScan, SNode** pPrimaryKeyCond, } static int32_t cpdOptimizeScanCondition(SOptimizeContext* pCxt, SScanLogicNode* pScan) { - if (NULL == pScan->node.pConditions) { + if (NULL == pScan->node.pConditions || OPTIMIZE_FLAG_TEST_MASK(pScan->node.optimizedFlag, OPTIMIZE_FLAG_CPD)) { return TSDB_CODE_SUCCESS; } @@ -359,7 +363,10 @@ static int32_t cpdOptimizeScanCondition(SOptimizeContext* pCxt, SScanLogicNode* pScan->node.pConditions = pOtherCond; } - if (TSDB_CODE_SUCCESS != code) { + if (TSDB_CODE_SUCCESS == code) { + OPTIMIZE_FLAG_SET_MASK(pScan->node.optimizedFlag, OPTIMIZE_FLAG_CPD); + pCxt->optimized = true; + } else { nodesDestroyNode(pPrimaryKeyCond); nodesDestroyNode(pOtherCond); } @@ -367,7 +374,7 @@ static int32_t cpdOptimizeScanCondition(SOptimizeContext* pCxt, SScanLogicNode* return code; } -static bool belongThisTable(SNode* pCondCol, SNodeList* pTableCols) { +static bool cpdBelongThisTable(SNode* pCondCol, SNodeList* pTableCols) { SNode* pTableCol = NULL; FOREACH(pTableCol, pTableCols) { if (nodesEqualNode(pCondCol, pTableCol)) { @@ -380,9 +387,9 @@ static bool belongThisTable(SNode* pCondCol, SNodeList* pTableCols) { static EDealRes cpdIsMultiTableCondImpl(SNode* pNode, void* pContext) { SCpdIsMultiTableCondCxt* pCxt = pContext; if (QUERY_NODE_COLUMN == nodeType(pNode)) { - if (belongThisTable(pNode, pCxt->pLeftCols)) { + if (cpdBelongThisTable(pNode, pCxt->pLeftCols)) { pCxt->havaLeftCol = true; - } else if (belongThisTable(pNode, pCxt->pRightCols)) { + } else if (cpdBelongThisTable(pNode, pCxt->pRightCols)) { pCxt->haveRightCol = true; } return pCxt->havaLeftCol && pCxt->haveRightCol ? DEAL_RES_END : DEAL_RES_CONTINUE; @@ -509,7 +516,7 @@ static int32_t cpdPushCondToChild(SOptimizeContext* pCxt, SLogicNode* pChild, SN } static int32_t cpdPushJoinCondition(SOptimizeContext* pCxt, SJoinLogicNode* pJoin) { - if (NULL == pJoin->node.pConditions) { + if (NULL == pJoin->node.pConditions || OPTIMIZE_FLAG_TEST_MASK(pJoin->node.optimizedFlag, OPTIMIZE_FLAG_CPD)) { return TSDB_CODE_SUCCESS; } @@ -527,7 +534,10 @@ static int32_t cpdPushJoinCondition(SOptimizeContext* pCxt, SJoinLogicNode* pJoi code = cpdPushCondToChild(pCxt, (SLogicNode*)nodesListGetNode(pJoin->node.pChildren, 1), &pRightChildCond); } - if (TSDB_CODE_SUCCESS != code) { + if (TSDB_CODE_SUCCESS == code) { + OPTIMIZE_FLAG_SET_MASK(pJoin->node.optimizedFlag, OPTIMIZE_FLAG_CPD); + pCxt->optimized = true; + } else { nodesDestroyNode(pOnCond); nodesDestroyNode(pLeftChildCond); nodesDestroyNode(pRightChildCond); @@ -572,9 +582,118 @@ static int32_t cpdOptimize(SOptimizeContext* pCxt, SLogicNode* pLogicNode) { return cpdPushCondition(pCxt, pLogicNode); } +static bool opkIsPrimaryKeyOrderBy(SNodeList* pSortKeys) { + if (1 != LIST_LENGTH(pSortKeys)) { + return false; + } + SNode* pNode = nodesListGetNode(pSortKeys, 0); + return (QUERY_NODE_COLUMN == nodeType(pNode) ? (PRIMARYKEY_TIMESTAMP_COL_ID == ((SColumnNode*)pNode)->colId) : false); +} + +static bool opkSortMayBeOptimized(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_SORT != nodeType(pNode)) { + return false; + } + if (OPTIMIZE_FLAG_TEST_MASK(pNode->optimizedFlag, OPTIMIZE_FLAG_OPK)) { + return false; + } + return true; +} + +static int32_t opkGetScanNodesImpl(SLogicNode* pNode, bool* pNotOptimize, SNodeList** pScanNodes) { + int32_t code = TSDB_CODE_SUCCESS; + + switch (nodeType(pNode)) { + case QUERY_NODE_LOGIC_PLAN_SCAN: + return nodesListMakeAppend(pScanNodes, pNode); + case QUERY_NODE_LOGIC_PLAN_JOIN: + code = opkGetScanNodesImpl(nodesListGetNode(pNode->pChildren, 0), pNotOptimize, pScanNodes); + if (TSDB_CODE_SUCCESS == code) { + code = opkGetScanNodesImpl(nodesListGetNode(pNode->pChildren, 1), pNotOptimize, pScanNodes); + } + return code; + case QUERY_NODE_LOGIC_PLAN_AGG: + *pNotOptimize = true; + return code; + default: + break; + } + + if (1 != LIST_LENGTH(pNode->pChildren)) { + *pNotOptimize = true; + } + + return opkGetScanNodesImpl(nodesListGetNode(pNode->pChildren, 0), pNotOptimize, pScanNodes); +} + +static int32_t opkGetScanNodes(SLogicNode* pNode, SNodeList** pScanNodes) { + bool notOptimize = false; + int32_t code = opkGetScanNodesImpl(pNode, ¬Optimize, pScanNodes); + if (TSDB_CODE_SUCCESS != code || notOptimize) { + nodesClearList(*pScanNodes); + } + return code; +} + +static EOrder opkGetPrimaryKeyOrder(SSortLogicNode* pSort) { + return ((SOrderByExprNode*)nodesListGetNode(pSort->pSortKeys, 0))->order; +} + +static SNode* opkRewriteDownNode(SSortLogicNode* pSort) { + SNode* pDownNode = nodesListGetNode(pSort->node.pChildren, 0); + // todo + return pDownNode; +} + +static int32_t opkDoOptimized(SOptimizeContext* pCxt, SSortLogicNode* pSort, SNodeList* pScanNodes) { + EOrder order = opkGetPrimaryKeyOrder(pSort); + SNode* pScan = NULL; + FOREACH(pScan, pScanNodes) { + ((SScanLogicNode*)pScan)->scanFlag = (ORDER_ASC == order ? MAIN_SCAN : REVERSE_SCAN); + } + if (NULL == pSort->node.pParent) { + // todo + return TSDB_CODE_SUCCESS; + } + + SNode* pDownNode = opkRewriteDownNode(pSort); + SNode* pNode; + FOREACH(pNode, pSort->node.pParent->pChildren) { + if (nodesEqualNode(pNode, pSort)) { + REPLACE_NODE(pDownNode); + break; + } + } + nodesDestroyNode(pSort); + return TSDB_CODE_SUCCESS; +} + +static int32_t opkOptimizeImpl(SOptimizeContext* pCxt, SSortLogicNode* pSort) { + OPTIMIZE_FLAG_SET_MASK(pSort->node.optimizedFlag, OPTIMIZE_FLAG_OPK); + if (!opkIsPrimaryKeyOrderBy(pSort->pSortKeys) || 1 != LIST_LENGTH(pSort->node.pChildren)) { + return TSDB_CODE_SUCCESS; + } + SNodeList* pScanNodes = NULL; + int32_t code = opkGetScanNodes(nodesListGetNode(pSort->node.pChildren, 0), &pScanNodes); + if (TSDB_CODE_SUCCESS == code && NULL != pScanNodes) { + code = opkDoOptimized(pCxt, pSort, pScanNodes); + } + nodesClearList(pScanNodes); + return code; +} + +static int32_t opkOptimize(SOptimizeContext* pCxt, SLogicNode* pLogicNode) { + SSortLogicNode* pSort = (SSortLogicNode*)optFindPossibleNode(pLogicNode, opkSortMayBeOptimized); + if (NULL == pSort) { + return TSDB_CODE_SUCCESS; + } + return opkOptimizeImpl(pCxt, pSort); +} + static const SOptimizeRule optimizeRuleSet[] = { { .pName = "OptimizeScanData", .optimizeFunc = osdOptimize }, - { .pName = "ConditionPushDown", .optimizeFunc = cpdOptimize } + { .pName = "ConditionPushDown", .optimizeFunc = cpdOptimize }, + { .pName = "OrderByPrimaryKey", .optimizeFunc = opkOptimize } }; static const int32_t optimizeRuleNum = (sizeof(optimizeRuleSet) / sizeof(SOptimizeRule)); diff --git a/source/libs/scalar/src/sclfunc.c b/source/libs/scalar/src/sclfunc.c index cca7f1cbff..156e71a845 100644 --- a/source/libs/scalar/src/sclfunc.c +++ b/source/libs/scalar/src/sclfunc.c @@ -1247,26 +1247,28 @@ int32_t timeDiffFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *p } int32_t nowFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOutput) { - if (inputNum != 1) { - return TSDB_CODE_FAILED; + int64_t ts = taosGetTimestamp(TSDB_TIME_PRECISION_MILLI); + for (int32_t i = 0; i < pInput->numOfRows; ++i) { + colDataAppendInt64(pOutput->columnData, i, &ts); } - colDataAppendInt64(pOutput->columnData, pOutput->numOfRows, (int64_t *)colDataGetData(pInput->columnData, 0)); + pOutput->numOfRows = pInput->numOfRows; return TSDB_CODE_SUCCESS; } int32_t todayFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOutput) { - if (inputNum != 1) { - return TSDB_CODE_FAILED; + int64_t ts = taosGetTimestampToday(TSDB_TIME_PRECISION_MILLI); + for (int32_t i = 0; i < pInput->numOfRows; ++i) { + colDataAppendInt64(pOutput->columnData, i, &ts); } - colDataAppendInt64(pOutput->columnData, pOutput->numOfRows, (int64_t *)colDataGetData(pInput->columnData, 0)); + pOutput->numOfRows = pInput->numOfRows; return TSDB_CODE_SUCCESS; } int32_t timezoneFunction(SScalarParam *pInput, int32_t inputNum, SScalarParam *pOutput) { - if (inputNum != 1) { - return TSDB_CODE_FAILED; + for (int32_t i = 0; i < pInput->numOfRows; ++i) { + colDataAppend(pOutput->columnData, i, tsTimezoneStr, false); } - colDataAppend(pOutput->columnData, pOutput->numOfRows, (char *)colDataGetData(pInput->columnData, 0), false); + pOutput->numOfRows = pInput->numOfRows; return TSDB_CODE_SUCCESS; } -- GitLab