diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index 453c5d49142934b79758f3faf5e3c12c80c16a55..821defc7972866510a35a2dd925cb75ff1ebd6ef 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -55,6 +55,7 @@ typedef struct SLogicNode { EGroupAction groupAction; EOrder inputTsOrder; EOrder outputTsOrder; + bool forceCreateNonBlockingOptr; // true if the operator can use non-blocking(pipeline) mode } SLogicNode; typedef enum EScanType { @@ -105,6 +106,7 @@ typedef struct SScanLogicNode { bool hasNormalCols; // neither tag column nor primary key tag column bool sortPrimaryKey; bool igLastNull; + bool groupOrderScan; } SScanLogicNode; typedef struct SJoinLogicNode { @@ -316,6 +318,7 @@ typedef struct SPhysiNode { struct SPhysiNode* pParent; SNode* pLimit; SNode* pSlimit; + bool forceCreateNonBlockingOptr; } SPhysiNode; typedef struct SScanPhysiNode { @@ -326,6 +329,7 @@ typedef struct SScanPhysiNode { uint64_t suid; int8_t tableType; SName tableName; + bool groupOrderScan; } SScanPhysiNode; typedef SScanPhysiNode STagScanPhysiNode; diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index b2f03fa7ba54606a924214f98559c818bbef3ac9..887a11083153c0daa3bf6f06a36ae49d6c65dab5 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -632,7 +632,10 @@ int32_t blockDataToBuf(char* buf, const SSDataBlock* pBlock) { pStart += colSize; } } else { - memcpy(pStart, pCol->pData, dataSize); + if (dataSize != 0) { + // ubsan reports error if pCol->pData==NULL && dataSize==0 + memcpy(pStart, pCol->pData, dataSize); + } pStart += dataSize; } } @@ -684,8 +687,10 @@ int32_t blockDataFromBuf(SSDataBlock* pBlock, const char* buf) { return TSDB_CODE_FAILED; } } - - memcpy(pCol->pData, pStart, colLength); + if (colLength != 0) { + // ubsan reports error if colLength==0 && pCol->pData == 0 + memcpy(pCol->pData, pStart, colLength); + } pStart += colLength; } diff --git a/source/dnode/mnode/impl/inc/mndStb.h b/source/dnode/mnode/impl/inc/mndStb.h index 99af413539b850eb4f62808fdf7899ab679933f0..db960d790f708e76ce2921f4ad6b6fefba82e441 100644 --- a/source/dnode/mnode/impl/inc/mndStb.h +++ b/source/dnode/mnode/impl/inc/mndStb.h @@ -39,6 +39,7 @@ int32_t mndBuildSMCreateStbRsp(SMnode *pMnode, char *dbFName, char *stbFName, vo void mndExtractDbNameFromStbFullName(const char *stbFullName, char *dst); void mndExtractShortDbNameFromStbFullName(const char *stbFullName, char *dst); +void mndExtractShortDbNameFromDbFullName(const char *stbFullName, char *dst); void mndExtractTbNameFromStbFullName(const char *stbFullName, char *dst, int32_t dstSize); const char *mndGetStbStr(const char *src); diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index d6537ef9926cd01bffd8f2ed985e9d3c7fff7d7f..fa104d06ff5c0393ed3535b2cf0d36570b7b2dc9 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -2498,12 +2498,14 @@ static int32_t mndProcessTableCfgReq(SRpcMsg *pReq) { goto _OVER; } - if (0 == strcmp(cfgReq.dbFName, TSDB_INFORMATION_SCHEMA_DB)) { + char dbName[TSDB_DB_NAME_LEN] = {0}; + mndExtractShortDbNameFromDbFullName(cfgReq.dbFName, dbName); + if (0 == strcmp(dbName, TSDB_INFORMATION_SCHEMA_DB)) { mInfo("information_schema table:%s.%s, start to retrieve cfg", cfgReq.dbFName, cfgReq.tbName); if (mndBuildInsTableCfg(pMnode, cfgReq.dbFName, cfgReq.tbName, &cfgRsp) != 0) { goto _OVER; } - } else if (0 == strcmp(cfgReq.dbFName, TSDB_PERFORMANCE_SCHEMA_DB)) { + } else if (0 == strcmp(dbName, TSDB_PERFORMANCE_SCHEMA_DB)) { mInfo("performance_schema table:%s.%s, start to retrieve cfg", cfgReq.dbFName, cfgReq.tbName); if (mndBuildPerfsTableCfg(pMnode, cfgReq.dbFName, cfgReq.tbName, &cfgRsp) != 0) { goto _OVER; @@ -2672,6 +2674,13 @@ void mndExtractShortDbNameFromStbFullName(const char *stbFullName, char *dst) { tNameGetDbName(&name, dst); } +void mndExtractShortDbNameFromDbFullName(const char *stbFullName, char *dst) { + SName name = {0}; + tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB); + + tNameGetDbName(&name, dst); +} + void mndExtractTbNameFromStbFullName(const char *stbFullName, char *dst, int32_t dstSize) { int32_t pos = -1; int32_t num = 0; diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index 562343c9c70b4b8a38c866069e897535c5fdfddd..784126eee941a4c9cd0af7898a58e1b6d5d4648b 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -2090,7 +2090,7 @@ int32_t ctgLaunchGetTbCfgTask(SCtgTask* pTask) { } CTG_CACHE_NHIT_INC(CTG_CI_TBL_CFG, 1); - + if (pCtx->tbType <= 0) { CTG_ERR_JRET(ctgReadTbTypeFromCache(pCtg, dbFName, pCtx->pName->tname, &pCtx->tbType)); if (pCtx->tbType <= 0) { @@ -2102,7 +2102,7 @@ int32_t ctgLaunchGetTbCfgTask(SCtgTask* pTask) { } } - if (TSDB_SUPER_TABLE == pCtx->tbType) { + if (TSDB_SUPER_TABLE == pCtx->tbType || TSDB_SYSTEM_TABLE == pCtx->tbType) { CTG_ERR_JRET(ctgGetTableCfgFromMnode(pCtg, pConn, pCtx->pName, NULL, pTask)); } else { if (NULL == pCtx->pVgInfo) { diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 9c41eb2bf08d3b81c0f86ce46b053a83615d586a..5baf0978cd84a79ab77dd935f2e35bcfa70efa9a 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -232,19 +232,20 @@ typedef struct STableMergeScanInfo { int32_t tableEndIndex; bool hasGroupId; uint64_t groupId; - SArray* queryConds; // array of queryTableDataCond STableScanBase base; int32_t bufPageSize; uint32_t sortBufSize; // max buffer size for in-memory sort SArray* pSortInfo; SSortHandle* pSortHandle; SSDataBlock* pSortInputBlock; + SSDataBlock* pReaderBlock; int64_t startTs; // sort start time SArray* sortSourceParams; SLimitInfo limitInfo; int64_t numOfRows; SScanInfo scanInfo; int32_t scanTimes; + int32_t readIdx; SSDataBlock* pResBlock; SSampleExecInfo sample; // sample execution info SSortExecInfo sortExecInfo; diff --git a/source/libs/executor/inc/tsort.h b/source/libs/executor/inc/tsort.h index 627aa825c671ee0d87700f62b00e6b3c9fdfef4d..538a9f18f6012a110b52fa8e2d899f551dd88597 100644 --- a/source/libs/executor/inc/tsort.h +++ b/source/libs/executor/inc/tsort.h @@ -26,6 +26,7 @@ extern "C" { enum { SORT_MULTISOURCE_MERGE = 0x1, SORT_SINGLESOURCE_SORT = 0x2, + SORT_BLOCK_TS_MERGE = 0x3 }; typedef struct SMultiMergeSource { @@ -53,6 +54,12 @@ typedef struct SMsortComparParam { int32_t numOfSources; SArray* orderInfo; // SArray bool cmpGroupId; + + int32_t sortType; + // the following field to speed up when sortType == SORT_BLOCK_TS_MERGE + int32_t tsSlotId; + int32_t order; + __compar_fn_t cmpFn; } SMsortComparParam; typedef struct SSortHandle SSortHandle; @@ -70,8 +77,8 @@ typedef int32_t (*_sort_merge_compar_fn_t)(const void* p1, const void* p2, void* * @return */ SSortHandle* tsortCreateSortHandle(SArray* pOrderInfo, int32_t type, int32_t pageSize, int32_t numOfPages, - SSDataBlock* pBlock, const char* idstr, uint64_t maxRows, uint32_t maxTupleLength, - uint32_t sortBufSize); + SSDataBlock* pBlock, const char* idstr, uint64_t pqMaxRows, uint32_t pqMaxTupleLength, + uint32_t pqSortBufSize); void tsortSetForceUsePQSort(SSortHandle* pHandle); @@ -110,6 +117,10 @@ int32_t tsortSetFetchRawDataFp(SSortHandle* pHandle, _sort_fetch_block_fn_t fetc */ int32_t tsortSetComparFp(SSortHandle* pHandle, _sort_merge_compar_fn_t fp); +/** + * +*/ +void tsortSetMergeLimit(SSortHandle* pHandle, int64_t mergeLimit); /** * */ diff --git a/source/libs/executor/src/aggregateoperator.c b/source/libs/executor/src/aggregateoperator.c index be0ad1c2399c1b423bf6a09635d6334581c321fe..176c4b53be828a0ad356953793f722cc39d43893 100644 --- a/source/libs/executor/src/aggregateoperator.c +++ b/source/libs/executor/src/aggregateoperator.c @@ -45,6 +45,8 @@ typedef struct SAggOperatorInfo { SGroupResInfo groupResInfo; SExprSupp scalarExprSup; bool groupKeyOptimized; + bool hasValidBlock; + SSDataBlock* pNewGroupBlock; } SAggOperatorInfo; static void destroyAggOperatorInfo(void* param); @@ -53,7 +55,6 @@ static void setExecutionContext(SOperatorInfo* pOperator, int32_t numOfOutput, u static int32_t createDataBlockForEmptyInput(SOperatorInfo* pOperator, SSDataBlock** ppBlock); static void destroyDataBlockForEmptyInput(bool blockAllocated, SSDataBlock** ppBlock); -static int32_t doOpenAggregateOptr(SOperatorInfo* pOperator); static int32_t doAggregateImpl(SOperatorInfo* pOperator, SqlFunctionCtx* pCtx); static SSDataBlock* getAggregateResult(SOperatorInfo* pOperator); @@ -111,9 +112,9 @@ SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SAggPhysiN pInfo->binfo.inputTsOrder = pAggNode->node.inputTsOrder; pInfo->binfo.outputTsOrder = pAggNode->node.outputTsOrder; - setOperatorInfo(pOperator, "TableAggregate", QUERY_NODE_PHYSICAL_PLAN_HASH_AGG, true, OP_NOT_OPENED, pInfo, - pTaskInfo); - pOperator->fpSet = createOperatorFpSet(doOpenAggregateOptr, getAggregateResult, NULL, destroyAggOperatorInfo, + setOperatorInfo(pOperator, "TableAggregate", QUERY_NODE_PHYSICAL_PLAN_HASH_AGG, + !pAggNode->node.forceCreateNonBlockingOptr, OP_NOT_OPENED, pInfo, pTaskInfo); + pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, getAggregateResult, NULL, destroyAggOperatorInfo, optrDefaultBufFn, NULL); if (downstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { @@ -153,28 +154,42 @@ void destroyAggOperatorInfo(void* param) { taosMemoryFreeClear(param); } -// this is a blocking operator -int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { - if (OPTR_IS_OPENED(pOperator)) { - return TSDB_CODE_SUCCESS; - } - +/** + * @brief get blocks from downstream and fill results into groupedRes after aggragation + * @retval false if no more groups + * @retval true if there could have new groups coming + * @note if pOperator.blocking is true, scan all blocks from downstream, all groups are handled + * if false, fill results of ONE GROUP + * */ +static bool nextGroupedResult(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SAggOperatorInfo* pAggInfo = pOperator->info; + if (pOperator->blocking && pAggInfo->hasValidBlock) return false; + SExprSupp* pSup = &pOperator->exprSupp; SOperatorInfo* downstream = pOperator->pDownstream[0]; - int64_t st = taosGetTimestampUs(); - int32_t code = TSDB_CODE_SUCCESS; - int32_t order = pAggInfo->binfo.inputTsOrder; - bool hasValidBlock = false; + int64_t st = taosGetTimestampUs(); + int32_t code = TSDB_CODE_SUCCESS; + int32_t order = pAggInfo->binfo.inputTsOrder; + SSDataBlock* pBlock = pAggInfo->pNewGroupBlock; + if (pBlock) { + pAggInfo->pNewGroupBlock = NULL; + tSimpleHashClear(pAggInfo->aggSup.pResultRowHashTable); + setExecutionContext(pOperator, pOperator->exprSupp.numOfExprs, pBlock->info.id.groupId); + setInputDataBlock(pSup, pBlock, order, pBlock->info.scanFlag, true); + code = doAggregateImpl(pOperator, pSup->pCtx); + if (code != TSDB_CODE_SUCCESS) { + T_LONG_JMP(pTaskInfo->env, code); + } + } while (1) { bool blockAllocated = false; - SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + pBlock = downstream->fpSet.getNextFn(downstream); if (pBlock == NULL) { - if (!hasValidBlock) { + if (!pAggInfo->hasValidBlock) { createDataBlockForEmptyInput(pOperator, &pBlock); if (pBlock == NULL) { break; @@ -184,7 +199,7 @@ int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { break; } } - hasValidBlock = true; + pAggInfo->hasValidBlock = true; pAggInfo->binfo.pRes->info.scanFlag = pBlock->info.scanFlag; // there is an scalar expression that needs to be calculated before apply the group aggregation. @@ -196,7 +211,11 @@ int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { T_LONG_JMP(pTaskInfo->env, code); } } - + // if non-blocking mode and new group arrived, save the block and break + if (!pOperator->blocking && pAggInfo->groupId != UINT64_MAX && pBlock->info.id.groupId != pAggInfo->groupId) { + pAggInfo->pNewGroupBlock = pBlock; + break; + } // the pDataBlock are always the same one, no need to call this again setExecutionContext(pOperator, pOperator->exprSupp.numOfExprs, pBlock->info.id.groupId); setInputDataBlock(pSup, pBlock, order, pBlock->info.scanFlag, true); @@ -215,10 +234,7 @@ int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { } initGroupedResultInfo(&pAggInfo->groupResInfo, pAggInfo->aggSup.pResultRowHashTable, 0); - OPTR_SET_OPENED(pOperator); - - pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; - return pTaskInfo->code; + return pBlock != NULL; } SSDataBlock* getAggregateResult(SOperatorInfo* pOperator) { @@ -230,26 +246,25 @@ SSDataBlock* getAggregateResult(SOperatorInfo* pOperator) { } SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - pTaskInfo->code = pOperator->fpSet._openFn(pOperator); - if (pTaskInfo->code != TSDB_CODE_SUCCESS) { - setOperatorCompleted(pOperator); - return NULL; - } + bool hasNewGroups = false; + do { + hasNewGroups = nextGroupedResult(pOperator); + blockDataEnsureCapacity(pInfo->pRes, pOperator->resultInfo.capacity); - blockDataEnsureCapacity(pInfo->pRes, pOperator->resultInfo.capacity); - while (1) { - doBuildResultDatablock(pOperator, pInfo, &pAggInfo->groupResInfo, pAggInfo->aggSup.pResultBuf); - doFilter(pInfo->pRes, pOperator->exprSupp.pFilterInfo, NULL); + while (1) { + doBuildResultDatablock(pOperator, pInfo, &pAggInfo->groupResInfo, pAggInfo->aggSup.pResultBuf); + doFilter(pInfo->pRes, pOperator->exprSupp.pFilterInfo, NULL); - if (!hasRemainResults(&pAggInfo->groupResInfo)) { - setOperatorCompleted(pOperator); - break; - } + if (!hasRemainResults(&pAggInfo->groupResInfo)) { + if (!hasNewGroups) setOperatorCompleted(pOperator); + break; + } - if (pInfo->pRes->info.rows > 0) { - break; + if (pInfo->pRes->info.rows > 0) { + break; + } } - } + } while (pInfo->pRes->info.rows == 0 && hasNewGroups); size_t rows = blockDataGetNumOfRows(pInfo->pRes); pOperator->resultInfo.totalRows += rows; diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 832750e967dfd6f996d12281a9ece48b6c6d26c7..e1bf4e7cb0e3b3e2df8cc7931d57c153a952b78b 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -127,6 +127,10 @@ void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, in if (pGroupResInfo->pRows != NULL) { taosArrayDestroy(pGroupResInfo->pRows); } + if (pGroupResInfo->pBuf) { + taosMemoryFree(pGroupResInfo->pBuf); + pGroupResInfo->pBuf = NULL; + } // extract the result rows information from the hash map int32_t size = tSimpleHashGetSize(pHashmap); @@ -2104,6 +2108,8 @@ int32_t buildGroupIdMapForAllTables(STableListInfo* pTableListInfo, SReadHandle* if (groupSort && groupByTbname) { taosArraySort(pTableListInfo->pTableList, orderbyGroupIdComparFn); pTableListInfo->numOfOuputGroups = numOfTables; + } else if (groupByTbname && pScanNode->groupOrderScan){ + pTableListInfo->numOfOuputGroups = numOfTables; } else { pTableListInfo->numOfOuputGroups = 1; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index f1de80ec4cf92adb3bb06646c0787816252308b9..810f9709f5887d9bb526eeccfdd07e10dc782e9d 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -13,8 +13,6 @@ * along with this program. If not, see . */ -// clang-format off - #include "executorInt.h" #include "filter.h" #include "function.h" @@ -55,8 +53,7 @@ typedef struct STableMergeScanSortSourceParam { SOperatorInfo* pOperator; int32_t readerIdx; uint64_t uid; - SSDataBlock* inputBlock; - STsdbReader* dataReader; + STsdbReader* reader; } STableMergeScanSortSourceParam; typedef struct STableCountScanOperatorInfo { @@ -2734,32 +2731,17 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SStorageAPI* pAPI = &pTaskInfo->storageAPI; - int32_t readIdx = source->readerIdx; - SSDataBlock* pBlock = source->inputBlock; + SSDataBlock* pBlock = pInfo->pReaderBlock; int32_t code = 0; - SQueryTableDataCond* pQueryCond = taosArrayGet(pInfo->queryConds, readIdx); - int64_t st = taosGetTimestampUs(); - void* p = tableListGetInfo(pInfo->base.pTableListInfo, readIdx + pInfo->tableStartIndex); - SReadHandle* pHandle = &pInfo->base.readHandle; - if (NULL == source->dataReader) { - code = pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, pQueryCond, p, 1, pBlock, (void**)&source->dataReader, GET_TASKID(pTaskInfo), false, NULL); - if (code != 0) { - T_LONG_JMP(pTaskInfo->env, code); - } - } - - pInfo->base.dataReader = source->dataReader; - STsdbReader* reader = pInfo->base.dataReader; bool hasNext = false; - qTrace("tsdb/read-table-data: %p, enter next reader", reader); + STsdbReader* reader = pInfo->base.dataReader; while (true) { code = pAPI->tsdReader.tsdNextDataBlock(reader, &hasNext); if (code != 0) { pAPI->tsdReader.tsdReaderReleaseDataBlock(reader); - pInfo->base.dataReader = NULL; T_LONG_JMP(pTaskInfo->env, code); } @@ -2769,7 +2751,6 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { if (isTaskKilled(pTaskInfo)) { pAPI->tsdReader.tsdReaderReleaseDataBlock(reader); - pInfo->base.dataReader = NULL; T_LONG_JMP(pTaskInfo->env, pTaskInfo->code); } @@ -2779,12 +2760,6 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { continue; } - if (pQueryCond->order == TSDB_ORDER_ASC) { - pQueryCond->twindows.skey = pBlock->info.window.ekey + 1; - } else { - pQueryCond->twindows.ekey = pBlock->info.window.skey - 1; - } - uint32_t status = 0; code = loadDataBlock(pOperator, &pInfo->base, pBlock, &status); // code = loadDataBlockFromOneTable(pOperator, pTableScanInfo, pBlock, &status); @@ -2806,16 +2781,9 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { pOperator->resultInfo.totalRows += pBlock->info.rows; pInfo->base.readRecorder.elapsedTime += (taosGetTimestampUs() - st) / 1000.0; - qTrace("tsdb/read-table-data: %p, close reader", reader); - pInfo->base.dataReader = NULL; return pBlock; } - pAPI->tsdReader.tsdReaderClose(source->dataReader); - source->dataReader = NULL; - pInfo->base.dataReader = NULL; - blockDataDestroy(source->inputBlock); - source->inputBlock = NULL; return NULL; } @@ -2851,6 +2819,8 @@ int32_t dumpQueryTableCond(const SQueryTableDataCond* src, SQueryTableDataCond* int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { STableMergeScanInfo* pInfo = pOperator->info; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SReadHandle* pHandle = &pInfo->base.readHandle; + SStorageAPI* pAPI = &pTaskInfo->storageAPI; { size_t numOfTables = tableListGetSize(pInfo->base.pTableListInfo); @@ -2867,53 +2837,29 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { int32_t tableStartIdx = pInfo->tableStartIndex; int32_t tableEndIdx = pInfo->tableEndIndex; - pInfo->base.dataReader = NULL; - - // todo the total available buffer should be determined by total capacity of buffer of this task. - // the additional one is reserved for merge result - // pInfo->sortBufSize = pInfo->bufPageSize * (tableEndIdx - tableStartIdx + 1 + 1); - int32_t kWay = (TSDB_MAX_BYTES_PER_ROW * 2) / (pInfo->pResBlock->info.rowSize); - if (kWay >= 128) { - kWay = 128; - } else if (kWay <= 2) { - kWay = 2; - } else { - int i = 2; - while (i * 2 <= kWay) i = i * 2; - kWay = i; - } - - pInfo->sortBufSize = pInfo->bufPageSize * (kWay + 1); + pInfo->sortBufSize = 2048 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pInfo->bufPageSize, numOfBufPage, + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); - + int64_t mergeLimit = -1; + if (pInfo->limitInfo.limit.limit != -1 || pInfo->limitInfo.limit.offset != -1) { + mergeLimit = pInfo->limitInfo.limit.limit + pInfo->limitInfo.limit.offset; + } + tsortSetMergeLimit(pInfo->pSortHandle, mergeLimit); tsortSetFetchRawDataFp(pInfo->pSortHandle, getTableDataBlockImpl, NULL, NULL); // one table has one data block int32_t numOfTable = tableEndIdx - tableStartIdx + 1; - pInfo->queryConds = taosArrayInit(numOfTable, sizeof(SQueryTableDataCond)); - - for (int32_t i = 0; i < numOfTable; ++i) { - STableMergeScanSortSourceParam param = {0}; - param.readerIdx = i; - param.pOperator = pOperator; - param.inputBlock = createOneDataBlock(pInfo->pResBlock, false); - taosArrayPush(pInfo->sortSourceParams, ¶m); + STableMergeScanSortSourceParam param = {0}; + param.pOperator = pOperator; + STableKeyInfo* startKeyInfo = tableListGetInfo(pInfo->base.pTableListInfo, tableStartIdx); + pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, &pInfo->base.cond, startKeyInfo, numOfTable, pInfo->pReaderBlock, (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), false, NULL); - SQueryTableDataCond cond; - dumpQueryTableCond(&pInfo->base.cond, &cond); - taosArrayPush(pInfo->queryConds, &cond); - } - - for (int32_t i = 0; i < numOfTable; ++i) { - SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); - STableMergeScanSortSourceParam* param = taosArrayGet(pInfo->sortSourceParams, i); - ps->param = param; - ps->onlyRef = true; - tsortAddSource(pInfo->pSortHandle, ps); - } + SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); + ps->param = ¶m; + ps->onlyRef = true; + tsortAddSource(pInfo->pSortHandle, ps); int32_t code = tsortOpen(pInfo->pSortHandle); @@ -2929,8 +2875,6 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SStorageAPI* pAPI = &pTaskInfo->storageAPI; - int32_t numOfTable = taosArrayGetSize(pInfo->queryConds); - SSortExecInfo sortExecInfo = tsortGetSortExecInfo(pInfo->pSortHandle); pInfo->sortExecInfo.sortMethod = sortExecInfo.sortMethod; pInfo->sortExecInfo.sortBuffer = sortExecInfo.sortBuffer; @@ -2938,24 +2882,14 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->sortExecInfo.readBytes += sortExecInfo.readBytes; pInfo->sortExecInfo.writeBytes += sortExecInfo.writeBytes; - for (int32_t i = 0; i < numOfTable; ++i) { - STableMergeScanSortSourceParam* param = taosArrayGet(pInfo->sortSourceParams, i); - blockDataDestroy(param->inputBlock); - pAPI->tsdReader.tsdReaderClose(param->dataReader); - param->dataReader = NULL; + if (pInfo->base.dataReader != NULL) { + pAPI->tsdReader.tsdReaderClose(pInfo->base.dataReader); + pInfo->base.dataReader = NULL; } - taosArrayClear(pInfo->sortSourceParams); tsortDestroySortHandle(pInfo->pSortHandle); pInfo->pSortHandle = NULL; - for (int32_t i = 0; i < taosArrayGetSize(pInfo->queryConds); i++) { - SQueryTableDataCond* cond = taosArrayGet(pInfo->queryConds, i); - taosMemoryFree(cond->colList); - } - taosArrayDestroy(pInfo->queryConds); - pInfo->queryConds = NULL; - resetLimitInfoForNextGroup(&pInfo->limitInfo); return TSDB_CODE_SUCCESS; } @@ -2968,28 +2902,32 @@ SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, SSDataBlock* SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; blockDataCleanup(pResBlock); - + STupleHandle* pTupleHandle = NULL; while (1) { - STupleHandle* pTupleHandle = tsortNextTuple(pHandle); - if (pTupleHandle == NULL) { - break; + while (1) { + pTupleHandle = tsortNextTuple(pHandle); + if (pTupleHandle == NULL) { + break; + } + + appendOneRowToDataBlock(pResBlock, pTupleHandle); + if (pResBlock->info.rows >= capacity) { + break; + } } - appendOneRowToDataBlock(pResBlock, pTupleHandle); - if (pResBlock->info.rows >= capacity) { - break; + if (tsortIsClosed(pHandle)) { + terrno = TSDB_CODE_TSC_QUERY_CANCELLED; + T_LONG_JMP(pOperator->pTaskInfo->env, terrno); } - } - if (tsortIsClosed(pHandle)) { - terrno = TSDB_CODE_TSC_QUERY_CANCELLED; - T_LONG_JMP(pOperator->pTaskInfo->env, terrno); + bool limitReached = applyLimitOffset(&pInfo->limitInfo, pResBlock, pTaskInfo); + qDebug("%s get sorted row block, rows:%" PRId64 ", limit:%" PRId64, GET_TASKID(pTaskInfo), pResBlock->info.rows, + pInfo->limitInfo.numOfOutputRows); + if (pTupleHandle == NULL || limitReached || pResBlock->info.rows > 0) { + break; + } } - - bool limitReached = applyLimitOffset(&pInfo->limitInfo, pResBlock, pTaskInfo); - qDebug("%s get sorted row block, rows:%" PRId64 ", limit:%" PRId64, GET_TASKID(pTaskInfo), pResBlock->info.rows, - pInfo->limitInfo.numOfOutputRows); - return (pResBlock->info.rows > 0) ? pResBlock : NULL; } @@ -3053,14 +2991,7 @@ void destroyTableMergeScanOperatorInfo(void* param) { STableMergeScanInfo* pTableScanInfo = (STableMergeScanInfo*)param; cleanupQueryTableDataCond(&pTableScanInfo->base.cond); - int32_t numOfTable = taosArrayGetSize(pTableScanInfo->queryConds); - - for (int32_t i = 0; i < numOfTable; i++) { - STableMergeScanSortSourceParam* p = taosArrayGet(pTableScanInfo->sortSourceParams, i); - blockDataDestroy(p->inputBlock); - pTableScanInfo->base.readerAPI.tsdReaderClose(p->dataReader); - p->dataReader = NULL; - } + int32_t numOfTable = taosArrayGetSize(pTableScanInfo->sortSourceParams); pTableScanInfo->base.readerAPI.tsdReaderClose(pTableScanInfo->base.dataReader); pTableScanInfo->base.dataReader = NULL; @@ -3069,16 +3000,11 @@ void destroyTableMergeScanOperatorInfo(void* param) { tsortDestroySortHandle(pTableScanInfo->pSortHandle); pTableScanInfo->pSortHandle = NULL; - for (int i = 0; i < taosArrayGetSize(pTableScanInfo->queryConds); i++) { - SQueryTableDataCond* pCond = taosArrayGet(pTableScanInfo->queryConds, i); - taosMemoryFree(pCond->colList); - } - - taosArrayDestroy(pTableScanInfo->queryConds); destroyTableScanBase(&pTableScanInfo->base, &pTableScanInfo->base.readerAPI); pTableScanInfo->pResBlock = blockDataDestroy(pTableScanInfo->pResBlock); pTableScanInfo->pSortInputBlock = blockDataDestroy(pTableScanInfo->pSortInputBlock); + pTableScanInfo->pReaderBlock = blockDataDestroy(pTableScanInfo->pReaderBlock); taosArrayDestroy(pTableScanInfo->pSortInfo); taosMemoryFreeClear(param); @@ -3140,6 +3066,8 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->base.scanFlag = MAIN_SCAN; pInfo->base.readHandle = *readHandle; + pInfo->readIdx = -1; + pInfo->base.limitInfo.limit.limit = -1; pInfo->base.limitInfo.slimit.limit = -1; pInfo->base.pTableListInfo = pTableListInfo; @@ -3162,6 +3090,8 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); initLimitInfo(pTableScanNode->scan.node.pLimit, pTableScanNode->scan.node.pSlimit, &pInfo->limitInfo); + pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false); + int32_t rowSize = pInfo->pResBlock->info.rowSize; uint32_t nCols = taosArrayGetSize(pInfo->pResBlock->pDataBlock); pInfo->bufPageSize = getProperSortPageSize(rowSize, nCols); @@ -3570,6 +3500,4 @@ static void destoryTableCountScanOperator(void* param) { taosArrayDestroy(pTableCountScanInfo->stbUidList); taosMemoryFreeClear(param); -} - -// clang-format on +} \ No newline at end of file diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index d26db6536facc3d36e9de03afefbf2dc923e32af..30e71487368d719025e4c76c7b1580e816d8e810 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -24,6 +24,7 @@ #include "tpagedbuf.h" #include "tsort.h" #include "tutil.h" +#include "tsimplehash.h" struct STupleHandle { SSDataBlock* pBlock; @@ -42,13 +43,15 @@ struct SSortHandle { int64_t startTs; uint64_t totalElapsed; - uint64_t maxRows; - uint32_t maxTupleLength; - uint32_t sortBufSize; + uint64_t pqMaxRows; + uint32_t pqMaxTupleLength; + uint32_t pqSortBufSize; bool forceUsePQSort; BoundedQueue* pBoundedQueue; uint32_t tmpRowIdx; + int64_t mergeLimit; + int32_t sourceId; SSDataBlock* pDataBlock; SMsortComparParam cmpParam; @@ -173,8 +176,8 @@ void destroyTuple(void* t) { * @return */ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t pageSize, int32_t numOfPages, - SSDataBlock* pBlock, const char* idstr, uint64_t maxRows, uint32_t maxTupleLength, - uint32_t sortBufSize) { + SSDataBlock* pBlock, const char* idstr, uint64_t pqMaxRows, uint32_t pqMaxTupleLength, + uint32_t pqSortBufSize) { SSortHandle* pSortHandle = taosMemoryCalloc(1, sizeof(SSortHandle)); pSortHandle->type = type; @@ -183,10 +186,10 @@ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t page pSortHandle->pSortInfo = pSortInfo; pSortHandle->loops = 0; - pSortHandle->maxTupleLength = maxTupleLength; - if (maxRows != 0) { - pSortHandle->sortBufSize = sortBufSize; - pSortHandle->maxRows = maxRows; + pSortHandle->pqMaxTupleLength = pqMaxTupleLength; + if (pqMaxRows != 0) { + pSortHandle->pqSortBufSize = pqSortBufSize; + pSortHandle->pqMaxRows = pqMaxRows; } pSortHandle->forceUsePQSort = false; @@ -194,10 +197,18 @@ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t page pSortHandle->pDataBlock = createOneDataBlock(pBlock, false); } + pSortHandle->mergeLimit = -1; + pSortHandle->pOrderedSource = taosArrayInit(4, POINTER_BYTES); pSortHandle->cmpParam.orderInfo = pSortInfo; pSortHandle->cmpParam.cmpGroupId = false; - + pSortHandle->cmpParam.sortType = type; + if (type == SORT_BLOCK_TS_MERGE) { + SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pSortInfo, 0); + pSortHandle->cmpParam.tsSlotId = pOrder->slotId; + pSortHandle->cmpParam.order = pOrder->order; + pSortHandle->cmpParam.cmpFn = (pOrder->order == TSDB_ORDER_ASC) ? compareInt64Val : compareInt64ValDesc; + } tsortSetComparFp(pSortHandle, msortComparFn); if (idstr != NULL) { @@ -469,11 +480,14 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (pHandle->type == SORT_SINGLESOURCE_SORT) { pSource->pageIndex++; if (pSource->pageIndex >= taosArrayGetSize(pSource->pageIdList)) { + qDebug("adjust merge tree. %d source completed %d", *numOfCompleted, pSource->pageIndex); (*numOfCompleted) += 1; pSource->src.rowIndex = -1; pSource->pageIndex = -1; pSource->src.pBlock = blockDataDestroy(pSource->src.pBlock); } else { + if (pSource->pageIndex % 512 == 0) qDebug("begin source %p page %d", pSource, pSource->pageIndex); + int32_t* pPgId = taosArrayGet(pSource->pageIdList, pSource->pageIndex); void* pPage = getBufPage(pHandle->pBuf, *pPgId); @@ -486,7 +500,6 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (code != TSDB_CODE_SUCCESS) { return code; } - releaseBufPage(pHandle->pBuf, pPage); } } else { @@ -497,6 +510,7 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (pSource->src.pBlock == NULL) { (*numOfCompleted) += 1; pSource->src.rowIndex = -1; + qDebug("adjust merge tree. %d source completed", *numOfCompleted); } } } @@ -577,53 +591,63 @@ int32_t msortComparFn(const void* pLeft, const void* pRight, void* param) { } } - for (int32_t i = 0; i < pInfo->size; ++i) { - SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pInfo, i); - SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pOrder->slotId); + if (pParam->sortType == SORT_BLOCK_TS_MERGE) { + SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pParam->tsSlotId); + SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pParam->tsSlotId); + int64_t* left1 = (int64_t*)(pLeftColInfoData->pData) + pLeftSource->src.rowIndex; + int64_t* right1 = (int64_t*)(pRightColInfoData->pData) + pRightSource->src.rowIndex; - bool leftNull = false; - if (pLeftColInfoData->hasNull) { - if (pLeftBlock->pBlockAgg == NULL) { - leftNull = colDataIsNull_s(pLeftColInfoData, pLeftSource->src.rowIndex); - } else { - leftNull = - colDataIsNull(pLeftColInfoData, pLeftBlock->info.rows, pLeftSource->src.rowIndex, pLeftBlock->pBlockAgg[i]); + int ret = pParam->cmpFn(left1, right1); + return ret; + } else { + for (int32_t i = 0; i < pInfo->size; ++i) { + SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pInfo, i); + SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pOrder->slotId); + SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pOrder->slotId); + + bool leftNull = false; + if (pLeftColInfoData->hasNull) { + if (pLeftBlock->pBlockAgg == NULL) { + leftNull = colDataIsNull_s(pLeftColInfoData, pLeftSource->src.rowIndex); + } else { + leftNull = colDataIsNull(pLeftColInfoData, pLeftBlock->info.rows, pLeftSource->src.rowIndex, + pLeftBlock->pBlockAgg[i]); + } } - } - SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pOrder->slotId); - bool rightNull = false; - if (pRightColInfoData->hasNull) { - if (pRightBlock->pBlockAgg == NULL) { - rightNull = colDataIsNull_s(pRightColInfoData, pRightSource->src.rowIndex); - } else { - rightNull = colDataIsNull(pRightColInfoData, pRightBlock->info.rows, pRightSource->src.rowIndex, - pRightBlock->pBlockAgg[i]); + bool rightNull = false; + if (pRightColInfoData->hasNull) { + if (pRightBlock->pBlockAgg == NULL) { + rightNull = colDataIsNull_s(pRightColInfoData, pRightSource->src.rowIndex); + } else { + rightNull = colDataIsNull(pRightColInfoData, pRightBlock->info.rows, pRightSource->src.rowIndex, + pRightBlock->pBlockAgg[i]); + } } - } - if (leftNull && rightNull) { - continue; // continue to next slot - } + if (leftNull && rightNull) { + continue; // continue to next slot + } - if (rightNull) { - return pOrder->nullFirst ? 1 : -1; - } + if (rightNull) { + return pOrder->nullFirst ? 1 : -1; + } - if (leftNull) { - return pOrder->nullFirst ? -1 : 1; - } + if (leftNull) { + return pOrder->nullFirst ? -1 : 1; + } - void* left1 = colDataGetData(pLeftColInfoData, pLeftSource->src.rowIndex); - void* right1 = colDataGetData(pRightColInfoData, pRightSource->src.rowIndex); + void* left1 = colDataGetData(pLeftColInfoData, pLeftSource->src.rowIndex); + void* right1 = colDataGetData(pRightColInfoData, pRightSource->src.rowIndex); - __compar_fn_t fn = getKeyComparFunc(pLeftColInfoData->info.type, pOrder->order); + __compar_fn_t fn = getKeyComparFunc(pLeftColInfoData->info.type, pOrder->order); - int ret = fn(left1, right1); - if (ret == 0) { - continue; - } else { - return ret; + int ret = fn(left1, right1); + if (ret == 0) { + continue; + } else { + return ret; + } } } return 0; @@ -668,6 +692,7 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { // Only *numOfInputSources* can be loaded into buffer to perform the external sort. for (int32_t i = 0; i < sortGroup; ++i) { + qDebug("internal merge sort pass %d group %d. num input sources %d ", t, i, numOfInputSources); pHandle->sourceId += 1; int32_t end = (i + 1) * numOfInputSources - 1; @@ -690,13 +715,15 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { return code; } + int nMergedRows = 0; + SArray* pPageIdList = taosArrayInit(4, sizeof(int32_t)); while (1) { if (tsortIsClosed(pHandle)) { code = terrno = TSDB_CODE_TSC_QUERY_CANCELLED; return code; } - + SSDataBlock* pDataBlock = getSortedBlockDataInner(pHandle, &pHandle->cmpParam, numOfRows); if (pDataBlock == NULL) { break; @@ -720,8 +747,12 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { setBufPageDirty(pPage, true); releaseBufPage(pHandle->pBuf, pPage); + nMergedRows += pDataBlock->info.rows; blockDataCleanup(pDataBlock); + if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { + break; + } } sortComparCleanup(&pHandle->cmpParam); @@ -769,39 +800,285 @@ int32_t getProperSortPageSize(size_t rowSize, uint32_t numOfCols) { return pgSize; } -static int32_t createInitialSources(SSortHandle* pHandle) { - size_t sortBufSize = pHandle->numOfPages * pHandle->pageSize; - int32_t code = 0; +static int32_t createPageBuf(SSortHandle* pHandle) { + if (pHandle->pBuf == NULL) { + if (!osTempSpaceAvailable()) { + terrno = TSDB_CODE_NO_DISKSPACE; + qError("create page buf failed since %s, tempDir:%s", terrstr(), tsTempDir); + return terrno; + } - if (pHandle->type == SORT_SINGLESOURCE_SORT) { - SSortSource** pSource = taosArrayGet(pHandle->pOrderedSource, 0); - SSortSource* source = *pSource; - *pSource = NULL; + int32_t code = createDiskbasedBuf(&pHandle->pBuf, pHandle->pageSize, pHandle->numOfPages * pHandle->pageSize, + "tableBlocksBuf", tsTempDir); + dBufSetPrintInfo(pHandle->pBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + return 0; +} - tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); +typedef struct SBlkMergeSupport { + int64_t** aTs; + int32_t* aRowIdx; + int32_t order; +} SBlkMergeSupport; + +static int32_t blockCompareTsFn(const void* pLeft, const void* pRight, void* param) { + int32_t left = *(int32_t*)pLeft; + int32_t right = *(int32_t*)pRight; + + SBlkMergeSupport* pSup = (SBlkMergeSupport*)param; + if (pSup->aRowIdx[left] == -1) { + return 1; + } else if (pSup->aRowIdx[right] == -1) { + return -1; + } + + int64_t leftTs = pSup->aTs[left][pSup->aRowIdx[left]]; + int64_t rightTs = pSup->aTs[right][pSup->aRowIdx[right]]; + + int32_t ret = leftTs>rightTs ? 1 : ((leftTs < rightTs) ? -1 : 0); + if (pSup->order == TSDB_ORDER_DESC) { + ret = -1 * ret; + } + return ret; +} + +static int32_t appendDataBlockToPageBuf(SSortHandle* pHandle, SSDataBlock* blk, SArray* aPgId) { + int32_t pageId = -1; + void* pPage = getNewBufPage(pHandle->pBuf, &pageId); + taosArrayPush(aPgId, &pageId); + + int32_t size = blockDataGetSize(blk) + sizeof(int32_t) + taosArrayGetSize(blk->pDataBlock) * sizeof(int32_t); + ASSERT(size <= getBufPageSize(pHandle->pBuf)); + + blockDataToBuf(pPage, blk); + + setBufPageDirty(pPage, true); + releaseBufPage(pHandle->pBuf, pPage); + + return 0; +} + +static int32_t getPageBufIncForRow(SSDataBlock* blk, int32_t row, int32_t rowIdxInPage) { + int sz = 0; + int numCols = taosArrayGetSize(blk->pDataBlock); + if (!blk->info.hasVarCol) { + sz += numCols * ((rowIdxInPage & 0x7) == 0 ? 1: 0); + sz += blockDataGetRowSize(blk); + } else { + for (int32_t i = 0; i < numCols; ++i) { + SColumnInfoData* pColInfoData = TARRAY_GET_ELEM(blk->pDataBlock, i); + if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { + if (pColInfoData->varmeta.offset[row] != -1) { + char* p = colDataGetData(pColInfoData, row); + sz += varDataTLen(p); + } + + sz += sizeof(pColInfoData->varmeta.offset[0]); + } else { + sz += pColInfoData->info.bytes; - while (1) { - SSDataBlock* pBlock = pHandle->fetchfp(source->param); - if (pBlock == NULL) { - break; + if (((rowIdxInPage) & 0x07) == 0) { + sz += 1; // bitmap + } } + } + } + return sz; +} + +static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockOrderInfo* order, SArray* aExtSrc) { + int pgHeaderSz = sizeof(int32_t) + sizeof(int32_t) * taosArrayGetSize(pHandle->pDataBlock->pDataBlock); + int32_t rowCap = blockDataGetCapacityInRow(pHandle->pDataBlock, pHandle->pageSize, pgHeaderSz); + blockDataEnsureCapacity(pHandle->pDataBlock, rowCap); + blockDataCleanup(pHandle->pDataBlock); + int32_t numBlks = taosArrayGetSize(aBlk); + + SBlkMergeSupport sup; + sup.aRowIdx = taosMemoryCalloc(numBlks, sizeof(int32_t)); + sup.aTs = taosMemoryCalloc(numBlks, sizeof(int64_t*)); + sup.order = order->order; + for (int i = 0; i < numBlks; ++i) { + SSDataBlock* blk = taosArrayGetP(aBlk, i); + SColumnInfoData* col = taosArrayGet(blk->pDataBlock, order->slotId); + sup.aTs[i] = (int64_t*)col->pData; + sup.aRowIdx[i] = 0; + } + + int32_t totalRows = 0; + for (int i = 0; i < numBlks; ++i) { + SSDataBlock* blk = taosArrayGetP(aBlk, i); + totalRows += blk->info.rows; + } + + SArray* aPgId = taosArrayInit(8, sizeof(int32_t)); + + SMultiwayMergeTreeInfo* pTree = NULL; + tMergeTreeCreate(&pTree, taosArrayGetSize(aBlk), &sup, blockCompareTsFn); + int32_t nRows = 0; + int32_t nMergedRows = 0; + bool mergeLimitReached = false; + size_t blkPgSz = pgHeaderSz; + + while (nRows < totalRows) { + int32_t minIdx = tMergeTreeGetChosenIndex(pTree); + SSDataBlock* minBlk = taosArrayGetP(aBlk, minIdx); + int32_t minRow = sup.aRowIdx[minIdx]; + int32_t bufInc = getPageBufIncForRow(minBlk, minRow, pHandle->pDataBlock->info.rows); + + if (blkPgSz <= pHandle->pageSize && blkPgSz + bufInc > pHandle->pageSize) { + appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + nMergedRows += pHandle->pDataBlock->info.rows; + + blockDataCleanup(pHandle->pDataBlock); + blkPgSz = pgHeaderSz; + bufInc = getPageBufIncForRow(minBlk, minRow, 0); + if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { + mergeLimitReached = true; + break; + } + } + blockDataEnsureCapacity(pHandle->pDataBlock, pHandle->pDataBlock->info.rows + 1); + appendOneRowToDataBlock(pHandle->pDataBlock, minBlk, &minRow); + blkPgSz += bufInc; - if (pHandle->pDataBlock == NULL) { - uint32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); - pHandle->pageSize = getProperSortPageSize(blockDataGetRowSize(pBlock), numOfCols); + ++nRows; - // todo, number of pages are set according to the total available sort buffer - pHandle->numOfPages = 1024; - sortBufSize = pHandle->numOfPages * pHandle->pageSize; - pHandle->pDataBlock = createOneDataBlock(pBlock, false); + if (sup.aRowIdx[minIdx] == minBlk->info.rows - 1) { + sup.aRowIdx[minIdx] = -1; + } else { + ++sup.aRowIdx[minIdx]; + } + tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); + } + if (pHandle->pDataBlock->info.rows > 0) { + if (!mergeLimitReached) { + appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + nMergedRows += pHandle->pDataBlock->info.rows; + } + blockDataCleanup(pHandle->pDataBlock); + } + SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false); + doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pMemSrcBlk, &pHandle->sourceId, aPgId); + + taosMemoryFree(sup.aRowIdx); + taosMemoryFree(sup.aTs); + + tMergeTreeDestroy(&pTree); + + return 0; +} + +static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { + SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); + size_t nSrc = taosArrayGetSize(pHandle->pOrderedSource); + SArray* aExtSrc = taosArrayInit(nSrc, POINTER_BYTES); + + size_t maxBufSize = pHandle->numOfPages * pHandle->pageSize; + createPageBuf(pHandle); + + SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, 0); + int32_t szSort = 0; + + SArray* aBlkSort = taosArrayInit(8, POINTER_BYTES); + SSHashObj* mUidBlk = tSimpleHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT)); + while (1) { + SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); + + if (pBlk != NULL) { + szSort += blockDataGetSize(pBlk); + + void* ppBlk = tSimpleHashGet(mUidBlk, &pBlk->info.id.uid, sizeof(pBlk->info.id.uid)); + if (ppBlk != NULL) { + SSDataBlock* tBlk = *(SSDataBlock**)(ppBlk); + blockDataMerge(tBlk, pBlk); + } else { + SSDataBlock* tBlk = createOneDataBlock(pBlk, true); + tSimpleHashPut(mUidBlk, &pBlk->info.id.uid, sizeof(pBlk->info.id.uid), &tBlk, POINTER_BYTES); + taosArrayPush(aBlkSort, &tBlk); } + } + + if ((pBlk != NULL && szSort > maxBufSize) || (pBlk == NULL && szSort > 0)) { + tSimpleHashClear(mUidBlk); + + int64_t p = taosGetTimestampUs(); + sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); + int64_t el = taosGetTimestampUs() - p; + pHandle->sortElapsed += el; - if (pHandle->beforeFp != NULL) { - pHandle->beforeFp(pBlock, pHandle->param); + for (int i = 0; i < taosArrayGetSize(aBlkSort); ++i) { + blockDataDestroy(taosArrayGetP(aBlkSort, i)); } + taosArrayClear(aBlkSort); + szSort = 0; + qDebug("source %zu created", taosArrayGetSize(aExtSrc)); + } + if (pBlk == NULL) { + break; + }; + } + tSimpleHashCleanup(mUidBlk); + taosArrayDestroy(aBlkSort); + tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); + taosArrayAddAll(pHandle->pOrderedSource, aExtSrc); + taosArrayDestroy(aExtSrc); - code = blockDataMerge(pHandle->pDataBlock, pBlock); - if (code != TSDB_CODE_SUCCESS) { + pHandle->type = SORT_SINGLESOURCE_SORT; + return 0; +} + +static int32_t createBlocksQuickSortInitialSources(SSortHandle* pHandle) { + int32_t code = 0; + size_t sortBufSize = pHandle->numOfPages * pHandle->pageSize; + + SSortSource** pSource = taosArrayGet(pHandle->pOrderedSource, 0); + SSortSource* source = *pSource; + *pSource = NULL; + + tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); + + while (1) { + SSDataBlock* pBlock = pHandle->fetchfp(source->param); + if (pBlock == NULL) { + break; + } + + if (pHandle->pDataBlock == NULL) { + uint32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); + pHandle->pageSize = getProperSortPageSize(blockDataGetRowSize(pBlock), numOfCols); + + // todo, number of pages are set according to the total available sort buffer + pHandle->numOfPages = 1024; + sortBufSize = pHandle->numOfPages * pHandle->pageSize; + pHandle->pDataBlock = createOneDataBlock(pBlock, false); + } + + if (pHandle->beforeFp != NULL) { + pHandle->beforeFp(pBlock, pHandle->param); + } + + code = blockDataMerge(pHandle->pDataBlock, pBlock); + if (code != TSDB_CODE_SUCCESS) { + if (source->param && !source->onlyRef) { + taosMemoryFree(source->param); + } + if (!source->onlyRef && source->src.pBlock) { + blockDataDestroy(source->src.pBlock); + source->src.pBlock = NULL; + } + taosMemoryFree(source); + return code; + } + + size_t size = blockDataGetSize(pHandle->pDataBlock); + if (size > sortBufSize) { + // Perform the in-memory sort and then flush data in the buffer into disk. + int64_t p = taosGetTimestampUs(); + code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); + if (code != 0) { if (source->param && !source->onlyRef) { taosMemoryFree(source->param); } @@ -809,74 +1086,67 @@ static int32_t createInitialSources(SSortHandle* pHandle) { blockDataDestroy(source->src.pBlock); source->src.pBlock = NULL; } + taosMemoryFree(source); return code; } - size_t size = blockDataGetSize(pHandle->pDataBlock); - if (size > sortBufSize) { - // Perform the in-memory sort and then flush data in the buffer into disk. - int64_t p = taosGetTimestampUs(); - code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); - if (code != 0) { - if (source->param && !source->onlyRef) { - taosMemoryFree(source->param); - } - if (!source->onlyRef && source->src.pBlock) { - blockDataDestroy(source->src.pBlock); - source->src.pBlock = NULL; - } - - taosMemoryFree(source); - return code; - } - - int64_t el = taosGetTimestampUs() - p; - pHandle->sortElapsed += el; - if (pHandle->maxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->maxRows); - code = doAddToBuf(pHandle->pDataBlock, pHandle); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + int64_t el = taosGetTimestampUs() - p; + pHandle->sortElapsed += el; + if (pHandle->pqMaxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->pqMaxRows); + code = doAddToBuf(pHandle->pDataBlock, pHandle); + if (code != TSDB_CODE_SUCCESS) { + return code; } } + } - if (source->param && !source->onlyRef) { - taosMemoryFree(source->param); - } + if (source->param && !source->onlyRef) { + taosMemoryFree(source->param); + } - taosMemoryFree(source); + taosMemoryFree(source); - if (pHandle->pDataBlock != NULL && pHandle->pDataBlock->info.rows > 0) { - size_t size = blockDataGetSize(pHandle->pDataBlock); + if (pHandle->pDataBlock != NULL && pHandle->pDataBlock->info.rows > 0) { + size_t size = blockDataGetSize(pHandle->pDataBlock); - // Perform the in-memory sort and then flush data in the buffer into disk. - int64_t p = taosGetTimestampUs(); + // Perform the in-memory sort and then flush data in the buffer into disk. + int64_t p = taosGetTimestampUs(); - code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); - if (code != 0) { - return code; - } + code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); + if (code != 0) { + return code; + } - if (pHandle->maxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->maxRows); - int64_t el = taosGetTimestampUs() - p; - pHandle->sortElapsed += el; + if (pHandle->pqMaxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->pqMaxRows); + int64_t el = taosGetTimestampUs() - p; + pHandle->sortElapsed += el; - // All sorted data can fit in memory, external memory sort is not needed. Return to directly - if (size <= sortBufSize && pHandle->pBuf == NULL) { - pHandle->cmpParam.numOfSources = 1; - pHandle->inMemSort = true; + // All sorted data can fit in memory, external memory sort is not needed. Return to directly + if (size <= sortBufSize && pHandle->pBuf == NULL) { + pHandle->cmpParam.numOfSources = 1; + pHandle->inMemSort = true; - pHandle->loops = 1; - pHandle->tupleHandle.rowIndex = -1; - pHandle->tupleHandle.pBlock = pHandle->pDataBlock; - return 0; - } else { - code = doAddToBuf(pHandle->pDataBlock, pHandle); - } + pHandle->loops = 1; + pHandle->tupleHandle.rowIndex = -1; + pHandle->tupleHandle.pBlock = pHandle->pDataBlock; + return 0; + } else { + code = doAddToBuf(pHandle->pDataBlock, pHandle); } } + return code; +} + +static int32_t createInitialSources(SSortHandle* pHandle) { + int32_t code = 0; + if (pHandle->type == SORT_SINGLESOURCE_SORT) { + code = createBlocksQuickSortInitialSources(pHandle); + } else if (pHandle->type == SORT_BLOCK_TS_MERGE) { + code = createBlocksMergeSortInitialSources(pHandle); + } + qDebug("%zu sources created", taosArrayGetSize(pHandle->pOrderedSource)); return code; } @@ -923,6 +1193,10 @@ void tsortSetClosed(SSortHandle* pHandle) { atomic_store_8(&pHandle->closed, 2); } +void tsortSetMergeLimit(SSortHandle* pHandle, int64_t mergeLimit) { + pHandle->mergeLimit = mergeLimit; +} + int32_t tsortSetFetchRawDataFp(SSortHandle* pHandle, _sort_fetch_block_fn_t fetchFp, void (*fp)(SSDataBlock*, void*), void* param) { pHandle->fetchfp = fetchFp; @@ -1002,8 +1276,8 @@ void tsortSetForceUsePQSort(SSortHandle* pHandle) { static bool tsortIsPQSortApplicable(SSortHandle* pHandle) { if (pHandle->type != SORT_SINGLESOURCE_SORT) return false; if (tsortIsForceUsePQSort(pHandle)) return true; - uint64_t maxRowsFitInMemory = pHandle->sortBufSize / (pHandle->maxTupleLength + sizeof(char*)); - return maxRowsFitInMemory > pHandle->maxRows; + uint64_t maxRowsFitInMemory = pHandle->pqSortBufSize / (pHandle->pqMaxTupleLength + sizeof(char*)); + return maxRowsFitInMemory > pHandle->pqMaxRows; } static bool tsortPQCompFn(void* a, void* b, void* param) { @@ -1049,7 +1323,7 @@ static int32_t tupleComparFn(const void* pLeft, const void* pRight, void* param) } static int32_t tsortOpenForPQSort(SSortHandle* pHandle) { - pHandle->pBoundedQueue = createBoundedQueue(pHandle->maxRows, tsortPQCompFn, destroyTuple, pHandle); + pHandle->pBoundedQueue = createBoundedQueue(pHandle->pqMaxRows, tsortPQCompFn, destroyTuple, pHandle); if (NULL == pHandle->pBoundedQueue) return TSDB_CODE_OUT_OF_MEMORY; tsortSetComparFp(pHandle, tupleComparFn); diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index 6e4dde4ec1752f4bb3349c9c41658ab0140264ae..f5eacf0bd5d1c15bb7c773ee60caea7abc0dc0b5 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -361,6 +361,7 @@ static int32_t logicNodeCopy(const SLogicNode* pSrc, SLogicNode* pDst) { COPY_SCALAR_FIELD(groupAction); COPY_SCALAR_FIELD(inputTsOrder); COPY_SCALAR_FIELD(outputTsOrder); + COPY_SCALAR_FIELD(forceCreateNonBlockingOptr); return TSDB_CODE_SUCCESS; } @@ -397,6 +398,7 @@ static int32_t logicScanCopy(const SScanLogicNode* pSrc, SScanLogicNode* pDst) { CLONE_NODE_LIST_FIELD(pTags); CLONE_NODE_FIELD(pSubtable); COPY_SCALAR_FIELD(igLastNull); + COPY_SCALAR_FIELD(groupOrderScan); return TSDB_CODE_SUCCESS; } @@ -545,6 +547,7 @@ static int32_t physiNodeCopy(const SPhysiNode* pSrc, SPhysiNode* pDst) { CLONE_NODE_LIST_FIELD(pChildren); COPY_SCALAR_FIELD(inputTsOrder); COPY_SCALAR_FIELD(outputTsOrder); + COPY_SCALAR_FIELD(forceCreateNonBlockingOptr); return TSDB_CODE_SUCCESS; } @@ -556,6 +559,7 @@ static int32_t physiScanCopy(const SScanPhysiNode* pSrc, SScanPhysiNode* pDst) { COPY_SCALAR_FIELD(suid); COPY_SCALAR_FIELD(tableType); COPY_OBJECT_FIELD(tableName, sizeof(SName)); + COPY_SCALAR_FIELD(groupOrderScan); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 81116a60b06272a8b9be9a7c2438eec4b317f784..f25616065eb064d0160209320ecb744ba6ac23d8 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -1559,6 +1559,7 @@ static const char* jkScanPhysiPlanTableId = "TableId"; static const char* jkScanPhysiPlanSTableId = "STableId"; static const char* jkScanPhysiPlanTableType = "TableType"; static const char* jkScanPhysiPlanTableName = "TableName"; +static const char* jkScanPhysiPlanGroupOrderScan = "GroupOrderScan"; static int32_t physiScanNodeToJson(const void* pObj, SJson* pJson) { const STagScanPhysiNode* pNode = (const STagScanPhysiNode*)pObj; @@ -1582,6 +1583,9 @@ static int32_t physiScanNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddObject(pJson, jkScanPhysiPlanTableName, nameToJson, &pNode->tableName); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddBoolToObject(pJson, jkScanPhysiPlanGroupOrderScan, pNode->groupOrderScan); + } return code; } @@ -1608,6 +1612,9 @@ static int32_t jsonToPhysiScanNode(const SJson* pJson, void* pObj) { if (TSDB_CODE_SUCCESS == code) { code = tjsonToObject(pJson, jkScanPhysiPlanTableName, jsonToName, &pNode->tableName); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonGetBoolValue(pJson, jkScanPhysiPlanGroupOrderScan, &pNode->groupOrderScan); + } return code; } diff --git a/source/libs/nodes/src/nodesMsgFuncs.c b/source/libs/nodes/src/nodesMsgFuncs.c index 1ca37defa4a76a6b679e85facae10a6cd758fb80..20e829766dafe75aa2372aa1e4147ac9b856ee71 100644 --- a/source/libs/nodes/src/nodesMsgFuncs.c +++ b/source/libs/nodes/src/nodesMsgFuncs.c @@ -1853,7 +1853,8 @@ enum { PHY_NODE_CODE_LIMIT, PHY_NODE_CODE_SLIMIT, PHY_NODE_CODE_INPUT_TS_ORDER, - PHY_NODE_CODE_OUTPUT_TS_ORDER + PHY_NODE_CODE_OUTPUT_TS_ORDER, + PHY_NODE_CODE_FORCE_NONBLOCKING_OPTR }; static int32_t physiNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { @@ -1878,6 +1879,9 @@ static int32_t physiNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { if (TSDB_CODE_SUCCESS == code) { code = tlvEncodeEnum(pEncoder, PHY_NODE_CODE_OUTPUT_TS_ORDER, pNode->outputTsOrder); } + if (TSDB_CODE_SUCCESS == code) { + code = tlvEncodeBool(pEncoder, PHY_NODE_CODE_FORCE_NONBLOCKING_OPTR, pNode->forceCreateNonBlockingOptr); + } return code; } @@ -1910,6 +1914,8 @@ static int32_t msgToPhysiNode(STlvDecoder* pDecoder, void* pObj) { case PHY_NODE_CODE_OUTPUT_TS_ORDER: code = tlvDecodeEnum(pTlv, &pNode->outputTsOrder, sizeof(pNode->outputTsOrder)); break; + case PHY_NODE_CODE_FORCE_NONBLOCKING_OPTR: + code = tlvDecodeBool(pTlv, &pNode->forceCreateNonBlockingOptr); default: break; } @@ -1925,7 +1931,8 @@ enum { PHY_SCAN_CODE_BASE_UID, PHY_SCAN_CODE_BASE_SUID, PHY_SCAN_CODE_BASE_TABLE_TYPE, - PHY_SCAN_CODE_BASE_TABLE_NAME + PHY_SCAN_CODE_BASE_TABLE_NAME, + PHY_SCAN_CODE_BASE_GROUP_ORDER_SCAN }; static int32_t physiScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { @@ -1950,6 +1957,9 @@ static int32_t physiScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { if (TSDB_CODE_SUCCESS == code) { code = tlvEncodeObj(pEncoder, PHY_SCAN_CODE_BASE_TABLE_NAME, nameToMsg, &pNode->tableName); } + if (TSDB_CODE_SUCCESS == code) { + code = tlvEncodeBool(pEncoder, PHY_SCAN_CODE_BASE_GROUP_ORDER_SCAN, pNode->groupOrderScan); + } return code; } @@ -1982,6 +1992,9 @@ static int32_t msgToPhysiScanNode(STlvDecoder* pDecoder, void* pObj) { case PHY_SCAN_CODE_BASE_TABLE_NAME: code = tlvDecodeObjFromTlv(pTlv, msgToName, &pNode->tableName); break; + case PHY_SCAN_CODE_BASE_GROUP_ORDER_SCAN: + code = tlvDecodeBool(pTlv, &pNode->groupOrderScan); + break; default: break; } diff --git a/source/libs/planner/inc/planInt.h b/source/libs/planner/inc/planInt.h index 82abc5d1a973dbafa820c41e0614e1b2c90a92c3..092fe1741187dcf3706a64a7be64a3032835b44a 100644 --- a/source/libs/planner/inc/planInt.h +++ b/source/libs/planner/inc/planInt.h @@ -43,6 +43,9 @@ int32_t splitLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan); int32_t scaleOutLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan, SQueryLogicPlan** pLogicPlan); int32_t createPhysiPlan(SPlanContext* pCxt, SQueryLogicPlan* pLogicPlan, SQueryPlan** pPlan, SArray* pExecNodeList); +bool isPartTableAgg(SAggLogicNode* pAgg); +bool isPartTableWinodw(SWindowLogicNode* pWindow); + #ifdef __cplusplus } #endif diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 05f478b116518ac04d824c9e812440e5976ded87..32721d8060405ae9b21e6cdce06ec78d56bdadeb 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -363,6 +363,18 @@ static void scanPathOptSetScanOrder(EScanOrder scanOrder, SScanLogicNode* pScan) } } +static void scanPathOptSetGroupOrderScan(SScanLogicNode* pScan) { + if (pScan->tableType != TSDB_SUPER_TABLE) return; + + if (pScan->node.pParent && nodeType(pScan->node.pParent) == QUERY_NODE_LOGIC_PLAN_AGG) { + SAggLogicNode* pAgg = (SAggLogicNode*)pScan->node.pParent; + bool withSlimit = pAgg->node.pSlimit != NULL || (pAgg->node.pParent && pAgg->node.pParent->pSlimit); + if (withSlimit && isPartTableAgg(pAgg)) { + pScan->groupOrderScan = pAgg->node.forceCreateNonBlockingOptr = true; + } + } +} + static int32_t scanPathOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) { SOsdInfo info = {.scanOrder = SCAN_ORDER_ASC}; int32_t code = scanPathOptMatch(pCxt, pLogicSubplan->pNode, &info); @@ -371,6 +383,7 @@ static int32_t scanPathOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSub if (!pCxt->pPlanCxt->streamQuery) { scanPathOptSetScanOrder(info.scanOrder, info.pScan); } + scanPathOptSetGroupOrderScan(info.pScan); } if (TSDB_CODE_SUCCESS == code && (NULL != info.pDsoFuncs || NULL != info.pSdrFuncs)) { info.pScan->dataRequired = scanPathOptGetDataRequired(info.pSdrFuncs); @@ -1675,6 +1688,7 @@ static int32_t partTagsOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSub if (TSDB_CODE_SUCCESS == code) { if (QUERY_NODE_LOGIC_PLAN_AGG == pNode->pParent->type) { SAggLogicNode* pParent = (SAggLogicNode*)(pNode->pParent); + scanPathOptSetGroupOrderScan(pScan); pParent->hasGroupKeyOptimized = true; } diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index b3d94a5e47aef06960954b88c612b9568f0e45d4..1b92dcd2e75acc6915c46ec2e509939c2189bc55 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -447,6 +447,7 @@ static int32_t createScanPhysiNodeFinalize(SPhysiPlanContext* pCxt, SSubplan* pS pScanPhysiNode->uid = pScanLogicNode->tableId; pScanPhysiNode->suid = pScanLogicNode->stableId; pScanPhysiNode->tableType = pScanLogicNode->tableType; + pScanPhysiNode->groupOrderScan = pScanLogicNode->groupOrderScan; memcpy(&pScanPhysiNode->tableName, &pScanLogicNode->tableName, sizeof(SName)); if (NULL != pScanLogicNode->pTagCond) { pSubplan->pTagCond = nodesCloneNode(pScanLogicNode->pTagCond); @@ -880,6 +881,7 @@ static int32_t createAggPhysiNode(SPhysiPlanContext* pCxt, SNodeList* pChildren, pAgg->mergeDataBlock = (GROUP_ACTION_KEEP == pAggLogicNode->node.groupAction ? false : true); pAgg->groupKeyOptimized = pAggLogicNode->hasGroupKeyOptimized; + pAgg->node.forceCreateNonBlockingOptr = pAggLogicNode->node.forceCreateNonBlockingOptr; SNodeList* pPrecalcExprs = NULL; SNodeList* pGroupKeys = NULL; diff --git a/source/libs/planner/src/planSpliter.c b/source/libs/planner/src/planSpliter.c index 246ee13fb00aa7d30857e63a03f18262ffb10510..84a486649efe4265794d24adf1bde7a295c779d7 100644 --- a/source/libs/planner/src/planSpliter.c +++ b/source/libs/planner/src/planSpliter.c @@ -306,54 +306,6 @@ static bool stbSplIsTableCountQuery(SLogicNode* pNode) { return QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pChild) && SCAN_TYPE_TABLE_COUNT == ((SScanLogicNode*)pChild)->scanType; } -static SNodeList* stbSplGetPartKeys(SLogicNode* pNode) { - if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { - return ((SScanLogicNode*)pNode)->pGroupTags; - } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { - return ((SPartitionLogicNode*)pNode)->pPartitionKeys; - } else { - return NULL; - } -} - -static bool stbSplHasPartTbname(SNodeList* pPartKeys) { - if (NULL == pPartKeys) { - return false; - } - SNode* pPartKey = NULL; - FOREACH(pPartKey, pPartKeys) { - if (QUERY_NODE_GROUPING_SET == nodeType(pPartKey)) { - pPartKey = nodesListGetNode(((SGroupingSetNode*)pPartKey)->pParameterList, 0); - } - if ((QUERY_NODE_FUNCTION == nodeType(pPartKey) && FUNCTION_TYPE_TBNAME == ((SFunctionNode*)pPartKey)->funcType) || - (QUERY_NODE_COLUMN == nodeType(pPartKey) && COLUMN_TYPE_TBNAME == ((SColumnNode*)pPartKey)->colType)) { - return true; - } - } - return false; -} - -static bool stbSplNotSystemScan(SLogicNode* pNode) { - if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { - return SCAN_TYPE_SYSTEM_TABLE != ((SScanLogicNode*)pNode)->scanType; - } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { - return stbSplNotSystemScan((SLogicNode*)nodesListGetNode(pNode->pChildren, 0)); - } else { - return true; - } -} - -static bool stbSplIsPartTableAgg(SAggLogicNode* pAgg) { - if (1 != LIST_LENGTH(pAgg->node.pChildren)) { - return false; - } - if (NULL != pAgg->pGroupKeys) { - return stbSplHasPartTbname(pAgg->pGroupKeys) && - stbSplNotSystemScan((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0)); - } - return stbSplHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0))); -} - static bool stbSplNeedSplit(bool streamQuery, SLogicNode* pNode) { switch (nodeType(pNode)) { case QUERY_NODE_LOGIC_PLAN_SCAN: @@ -364,7 +316,7 @@ static bool stbSplNeedSplit(bool streamQuery, SLogicNode* pNode) { return streamQuery ? false : stbSplIsMultiTbScanChild(streamQuery, pNode); case QUERY_NODE_LOGIC_PLAN_AGG: return (!stbSplHasGatherExecFunc(((SAggLogicNode*)pNode)->pAggFuncs) || - stbSplIsPartTableAgg((SAggLogicNode*)pNode)) && + isPartTableAgg((SAggLogicNode*)pNode)) && stbSplHasMultiTbScan(streamQuery, pNode) && !stbSplIsTableCountQuery(pNode); case QUERY_NODE_LOGIC_PLAN_WINDOW: return stbSplNeedSplitWindow(streamQuery, pNode); @@ -778,10 +730,6 @@ static int32_t stbSplSplitEvent(SSplitContext* pCxt, SStableSplitInfo* pInfo) { } } -static bool stbSplIsPartTableWinodw(SWindowLogicNode* pWindow) { - return stbSplHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pWindow->node.pChildren, 0))); -} - static int32_t stbSplSplitWindowForCrossTable(SSplitContext* pCxt, SStableSplitInfo* pInfo) { switch (((SWindowLogicNode*)pInfo->pSplitNode)->winType) { case WINDOW_TYPE_INTERVAL: @@ -834,7 +782,7 @@ static int32_t stbSplSplitWindowForPartTable(SSplitContext* pCxt, SStableSplitIn } static int32_t stbSplSplitWindowNode(SSplitContext* pCxt, SStableSplitInfo* pInfo) { - if (stbSplIsPartTableWinodw((SWindowLogicNode*)pInfo->pSplitNode)) { + if (isPartTableWinodw((SWindowLogicNode*)pInfo->pSplitNode)) { return stbSplSplitWindowForPartTable(pCxt, pInfo); } else { return stbSplSplitWindowForCrossTable(pCxt, pInfo); @@ -920,7 +868,7 @@ static int32_t stbSplSplitAggNodeForCrossTable(SSplitContext* pCxt, SStableSplit } static int32_t stbSplSplitAggNode(SSplitContext* pCxt, SStableSplitInfo* pInfo) { - if (stbSplIsPartTableAgg((SAggLogicNode*)pInfo->pSplitNode)) { + if (isPartTableAgg((SAggLogicNode*)pInfo->pSplitNode)) { return stbSplSplitAggNodeForPartTable(pCxt, pInfo); } return stbSplSplitAggNodeForCrossTable(pCxt, pInfo); diff --git a/source/libs/planner/src/planUtil.c b/source/libs/planner/src/planUtil.c index 29e87b34ce046d166678ab54ae5095d6e9a859fe..88086cde1d0edb91e2918a26935495be0b3120ce 100644 --- a/source/libs/planner/src/planUtil.c +++ b/source/libs/planner/src/planUtil.c @@ -321,3 +321,57 @@ int32_t adjustLogicNodeDataRequirement(SLogicNode* pNode, EDataOrderLevel requir } return code; } + +static bool stbNotSystemScan(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { + return SCAN_TYPE_SYSTEM_TABLE != ((SScanLogicNode*)pNode)->scanType; + } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { + return stbNotSystemScan((SLogicNode*)nodesListGetNode(pNode->pChildren, 0)); + } else { + return true; + } +} + +static bool stbHasPartTbname(SNodeList* pPartKeys) { + if (NULL == pPartKeys) { + return false; + } + SNode* pPartKey = NULL; + FOREACH(pPartKey, pPartKeys) { + if (QUERY_NODE_GROUPING_SET == nodeType(pPartKey)) { + pPartKey = nodesListGetNode(((SGroupingSetNode*)pPartKey)->pParameterList, 0); + } + if ((QUERY_NODE_FUNCTION == nodeType(pPartKey) && FUNCTION_TYPE_TBNAME == ((SFunctionNode*)pPartKey)->funcType) || + (QUERY_NODE_COLUMN == nodeType(pPartKey) && COLUMN_TYPE_TBNAME == ((SColumnNode*)pPartKey)->colType)) { + return true; + } + } + return false; +} + +static SNodeList* stbSplGetPartKeys(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { + return ((SScanLogicNode*)pNode)->pGroupTags; + } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { + return ((SPartitionLogicNode*)pNode)->pPartitionKeys; + } else { + return NULL; + } +} + +bool isPartTableAgg(SAggLogicNode* pAgg) { + if (1 != LIST_LENGTH(pAgg->node.pChildren)) { + return false; + } + if (NULL != pAgg->pGroupKeys) { + return stbHasPartTbname(pAgg->pGroupKeys) && + stbNotSystemScan((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0)); + } + return stbHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0))); +} + +bool isPartTableWinodw(SWindowLogicNode* pWindow) { + return stbHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pWindow->node.pChildren, 0))); +} + + diff --git a/tests/system-test/0-others/show.py b/tests/system-test/0-others/show.py index 50a1662ba013d4e4dd52ffbf4bdfb9f47f7c92d7..9d26b3a2aeb679195246e0989605b48f12b91a77 100644 --- a/tests/system-test/0-others/show.py +++ b/tests/system-test/0-others/show.py @@ -95,6 +95,23 @@ class TDTestCase: tdSql.checkEqual(f'{db}',tdSql.queryResult[0][0]) tdSql.checkEqual(f'CREATE DATABASE `{db}`',tdSql.queryResult[0][1]) + def show_create_systb_sql(self): + for param in self.ins_param_list: + tdSql.query(f'show create table information_schema.ins_{param}') + tdSql.checkEqual(f'ins_{param}',tdSql.queryResult[0][0]) + + tdSql.execute(f'use information_schema') + tdSql.query(f'show create table ins_{param}') + tdSql.checkEqual(f'ins_{param}',tdSql.queryResult[0][0]) + + for param in self.perf_param_list: + tdSql.query(f'show create table performance_schema.perf_{param}') + tdSql.checkEqual(f'perf_{param}',tdSql.queryResult[0][0]) + + tdSql.execute(f'use performance_schema') + tdSql.query(f'show create table perf_{param}') + tdSql.checkEqual(f'perf_{param}',tdSql.queryResult[0][0]) + def show_create_sql(self): create_db_sql = self.set_create_database_sql(self.db_param) print(create_db_sql) @@ -200,6 +217,7 @@ class TDTestCase: self.perf_check() self.show_create_sql() self.show_create_sysdb_sql() + self.show_create_systb_sql() def stop(self): tdSql.close() diff --git a/tests/system-test/2-query/columnLenUpdated.py b/tests/system-test/2-query/columnLenUpdated.py index e43b32a716017702382ce0384bb377d692e64684..93d9a492f946db423b78f7891b533af8c9a6da9f 100644 --- a/tests/system-test/2-query/columnLenUpdated.py +++ b/tests/system-test/2-query/columnLenUpdated.py @@ -202,7 +202,7 @@ class TDTestCase: if retCode != "TAOS_OK": tdLog.exit("taos -s fail") - tdSql.query("select count(*) from stb group by tg1") + tdSql.query("select count(*) from stb group by tg1 order by count(*) desc") tdSql.checkData(0, 0, 2) tdSql.checkData(1, 0, 1) diff --git a/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py b/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py index 60daa8cdc27f5e683239d6722e4f28ae1b8d90d1..b4046b8c98b311d8f73fc811cf4e70a2d2510e38 100644 --- a/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py +++ b/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py @@ -79,6 +79,11 @@ class TDTestCase: tdSql.query("select count(*) from (select * from meters order by ts desc)") tdSql.checkData(0, 0, allCnt) + rowCnt = tdSql.query("select tbname, count(*) from meters partition by tbname slimit 11") + if rowCnt != 10: + tdLog.exit("partition by tbname should return 10 rows of table data which is " + str(rowCnt)) + return + def run(self): binPath = self.getPath()