From 9b40ec72d6ed24648883ce6e9d90e88718350f7b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 26 Apr 2022 13:09:29 +0800 Subject: [PATCH] fix(query): handle the optimized order by in tablescan operator. --- include/common/tcommon.h | 10 +- include/libs/function/function.h | 2 +- source/dnode/vnode/inc/vnode.h | 15 +-- source/dnode/vnode/src/tsdb/tsdbRead.c | 80 +++++++--------- source/libs/executor/inc/executorimpl.h | 25 +++-- source/libs/executor/src/executorimpl.c | 99 ++++++++++++-------- source/libs/executor/src/scanoperator.c | 116 ++++++++++++------------ 7 files changed, 180 insertions(+), 167 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 7c308f9354..00f9e39c7a 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -99,6 +99,15 @@ typedef struct SColumnInfoData { }; } SColumnInfoData; +typedef struct SQueryTableDataCond { + STimeWindow twindow; + int32_t order; // desc|asc order to iterate the data block + int32_t numOfCols; + SColumnInfo *colList; + bool loadExternalRows; // load external rows or not + int32_t type; // data block load type: +} SQueryTableDataCond; + void* blockDataDestroy(SSDataBlock* pBlock); int32_t tEncodeDataBlock(void** buf, const SSDataBlock* pBlock); void* tDecodeDataBlock(const void* buf, SSDataBlock* pBlock); @@ -229,7 +238,6 @@ typedef struct SResSchame { char name[TSDB_COL_NAME_LEN]; } SResSchema; -// TODO move away to executor.h typedef struct SExprBasicInfo { SResSchema resSchema; int16_t numOfParams; // argument value of each function diff --git a/include/libs/function/function.h b/include/libs/function/function.h index ed8d54cef8..8805dd16eb 100644 --- a/include/libs/function/function.h +++ b/include/libs/function/function.h @@ -202,7 +202,7 @@ typedef struct SqlFunctionCtx { SPoint1 end; SFuncExecFuncs fpSet; SScalarFuncExecFuncs sfp; - SExprInfo *pExpr; + struct SExprInfo *pExpr; struct SDiskbasedBuf *pBuf; struct SSDataBlock *pSrcBlock; int32_t curBufPage; diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index f4975c183e..28a70b8255 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -77,16 +77,15 @@ char *metaTbCursorNext(SMTbCursor *pTbCur); // tsdb typedef struct STsdb STsdb; -typedef struct STsdbQueryCond STsdbQueryCond; typedef void *tsdbReaderT; #define BLOCK_LOAD_OFFSET_SEQ_ORDER 1 #define BLOCK_LOAD_TABLE_SEQ_ORDER 2 #define BLOCK_LOAD_TABLE_RR_ORDER 3 -tsdbReaderT *tsdbQueryTables(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *tableInfoGroup, uint64_t qId, +tsdbReaderT *tsdbQueryTables(STsdb *tsdb, SQueryTableDataCond *pCond, STableGroupInfo *tableInfoGroup, uint64_t qId, uint64_t taskId); -tsdbReaderT tsdbQueryCacheLast(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId, +tsdbReaderT tsdbQueryCacheLast(STsdb *tsdb, SQueryTableDataCond *pCond, STableGroupInfo *groupList, uint64_t qId, void *pMemRef); int32_t tsdbGetFileBlocksDistInfo(tsdbReaderT *pReader, STableBlockDistInfo *pTableBlockInfo); bool isTsdbCacheLastRow(tsdbReaderT *pReader); @@ -98,6 +97,7 @@ bool tsdbNextDataBlock(tsdbReaderT pTsdbReadHandle); void tsdbRetrieveDataBlockInfo(tsdbReaderT *pTsdbReadHandle, SDataBlockInfo *pBlockInfo); int32_t tsdbRetrieveDataBlockStatisInfo(tsdbReaderT *pTsdbReadHandle, SColumnDataAgg **pBlockStatis); SArray *tsdbRetrieveDataBlock(tsdbReaderT *pTsdbReadHandle, SArray *pColumnIdList); +void tsdbResetReadHandle(tsdbReaderT queryHandle, SQueryTableDataCond* pCond); void tsdbDestroyTableGroup(STableGroupInfo *pGroupList); int32_t tsdbGetOneTableGroup(void *pMeta, uint64_t uid, TSKEY startKey, STableGroupInfo *pGroupInfo); int32_t tsdbGetTableGroupFromIdList(STsdb *tsdb, SArray *pTableIdList, STableGroupInfo *pGroupInfo); @@ -157,15 +157,6 @@ struct SVnodeCfg { int8_t hashMethod; }; -struct STsdbQueryCond { - STimeWindow twindow; - int32_t order; // desc|asc order to iterate the data block - int32_t numOfCols; - SColumnInfo *colList; - bool loadExternalRows; // load external rows or not - int32_t type; // data block load type: -}; - typedef struct { TSKEY lastKey; uint64_t uid; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 098b86975b..1e28e27484 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -254,7 +254,7 @@ static SArray* createCheckInfoFromTableGroup(STsdbReadHandle* pTsdbReadHandle, S assert(info.lastKey >= pTsdbReadHandle->window.skey && info.lastKey <= pTsdbReadHandle->window.ekey); } else { - assert(info.lastKey >= pTsdbReadHandle->window.ekey && info.lastKey <= pTsdbReadHandle->window.skey); + info.lastKey = pTsdbReadHandle->window.skey; } taosArrayPush(pTableCheckInfo, &info); @@ -317,7 +317,7 @@ static int64_t getEarliestValidTimestamp(STsdb* pTsdb) { return now - (tsTickPerDay[pCfg->precision] * pCfg->keep2) + 1; // needs to add one tick } -static void setQueryTimewindow(STsdbReadHandle* pTsdbReadHandle, STsdbQueryCond* pCond) { +static void setQueryTimewindow(STsdbReadHandle* pTsdbReadHandle, SQueryTableDataCond* pCond) { pTsdbReadHandle->window = pCond->twindow; bool updateTs = false; @@ -343,7 +343,7 @@ static void setQueryTimewindow(STsdbReadHandle* pTsdbReadHandle, STsdbQueryCond* } } -static STsdbReadHandle* tsdbQueryTablesImpl(STsdb* tsdb, STsdbQueryCond* pCond, uint64_t qId, uint64_t taskId) { +static STsdbReadHandle* tsdbQueryTablesImpl(STsdb* tsdb, SQueryTableDataCond* pCond, uint64_t qId, uint64_t taskId) { STsdbReadHandle* pReadHandle = taosMemoryCalloc(1, sizeof(STsdbReadHandle)); if (pReadHandle == NULL) { goto _end; @@ -422,7 +422,7 @@ _end: return NULL; } -tsdbReaderT* tsdbQueryTables(STsdb* tsdb, STsdbQueryCond* pCond, STableGroupInfo* groupList, uint64_t qId, +tsdbReaderT* tsdbQueryTables(STsdb* tsdb, SQueryTableDataCond* pCond, STableGroupInfo* groupList, uint64_t qId, uint64_t taskId) { STsdbReadHandle* pTsdbReadHandle = tsdbQueryTablesImpl(tsdb, pCond, qId, taskId); if (pTsdbReadHandle == NULL) { @@ -448,7 +448,7 @@ tsdbReaderT* tsdbQueryTables(STsdb* tsdb, STsdbQueryCond* pCond, STableGroupInfo return (tsdbReaderT)pTsdbReadHandle; } -void tsdbResetQueryHandle(tsdbReaderT queryHandle, STsdbQueryCond* pCond) { +void tsdbResetReadHandle(tsdbReaderT queryHandle, SQueryTableDataCond* pCond) { STsdbReadHandle* pTsdbReadHandle = queryHandle; if (emptyQueryTimewindow(pTsdbReadHandle)) { @@ -460,9 +460,9 @@ void tsdbResetQueryHandle(tsdbReaderT queryHandle, STsdbQueryCond* pCond) { return; } - pTsdbReadHandle->order = pCond->order; - pTsdbReadHandle->window = pCond->twindow; - pTsdbReadHandle->type = TSDB_QUERY_TYPE_ALL; + pTsdbReadHandle->order = pCond->order; + pTsdbReadHandle->window = pCond->twindow; + pTsdbReadHandle->type = TSDB_QUERY_TYPE_ALL; pTsdbReadHandle->cur.fid = -1; pTsdbReadHandle->cur.win = TSWINDOW_INITIALIZER; pTsdbReadHandle->checkFiles = true; @@ -485,7 +485,7 @@ void tsdbResetQueryHandle(tsdbReaderT queryHandle, STsdbQueryCond* pCond) { resetCheckInfo(pTsdbReadHandle); } -void tsdbResetQueryHandleForNewTable(tsdbReaderT queryHandle, STsdbQueryCond* pCond, STableGroupInfo* groupList) { +void tsdbResetQueryHandleForNewTable(tsdbReaderT queryHandle, SQueryTableDataCond* pCond, STableGroupInfo* groupList) { STsdbReadHandle* pTsdbReadHandle = queryHandle; pTsdbReadHandle->order = pCond->order; @@ -526,7 +526,7 @@ void tsdbResetQueryHandleForNewTable(tsdbReaderT queryHandle, STsdbQueryCond* pC // pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next); } -tsdbReaderT tsdbQueryLastRow(STsdb* tsdb, STsdbQueryCond* pCond, STableGroupInfo* groupList, uint64_t qId, +tsdbReaderT tsdbQueryLastRow(STsdb* tsdb, SQueryTableDataCond* pCond, STableGroupInfo* groupList, uint64_t qId, uint64_t taskId) { pCond->twindow = updateLastrowForEachGroup(groupList); @@ -555,7 +555,7 @@ tsdbReaderT tsdbQueryLastRow(STsdb* tsdb, STsdbQueryCond* pCond, STableGroupInfo } #if 0 -tsdbReaderT tsdbQueryCacheLast(STsdb *tsdb, STsdbQueryCond *pCond, STableGroupInfo *groupList, uint64_t qId, STsdbMemTable* pMemRef) { +tsdbReaderT tsdbQueryCacheLast(STsdb *tsdb, SQueryTableDataCond *pCond, STableGroupInfo *groupList, uint64_t qId, STsdbMemTable* pMemRef) { STsdbReadHandle *pTsdbReadHandle = (STsdbReadHandle*) tsdbQueryTables(tsdb, pCond, groupList, qId, pMemRef); if (pTsdbReadHandle == NULL) { return NULL; @@ -618,7 +618,7 @@ static STableGroupInfo* trimTableGroup(STimeWindow* window, STableGroupInfo* pGr return pNew; } -tsdbReaderT tsdbQueryRowsInExternalWindow(STsdb* tsdb, STsdbQueryCond* pCond, STableGroupInfo* groupList, uint64_t qId, +tsdbReaderT tsdbQueryRowsInExternalWindow(STsdb* tsdb, SQueryTableDataCond* pCond, STableGroupInfo* groupList, uint64_t qId, uint64_t taskId) { STableGroupInfo* pNew = trimTableGroup(&pCond->twindow, groupList); @@ -1185,10 +1185,11 @@ static int32_t handleDataMergeIfNeeded(STsdbReadHandle* pTsdbReadHandle, SBlock* tsdbDebug("%p no data in mem, %s", pTsdbReadHandle, pTsdbReadHandle->idStr); } - if ((ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key <= binfo.window.ekey)) || - (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key >= binfo.window.skey))) { - if ((ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key < binfo.window.skey)) || - (!ASCENDING_TRAVERSE(pTsdbReadHandle->order) && (key != TSKEY_INITIAL_VAL && key > binfo.window.ekey))) { + bool ascScan = ASCENDING_TRAVERSE(pTsdbReadHandle->order); + + if ((ascScan && (key != TSKEY_INITIAL_VAL && key <= binfo.window.ekey)) || (!ascScan && (key != TSKEY_INITIAL_VAL && key >= binfo.window.skey))) { + + if ((ascScan && (key != TSKEY_INITIAL_VAL && key < binfo.window.skey)) || (!ascScan && (key != TSKEY_INITIAL_VAL && key > binfo.window.ekey))) { // do not load file block into buffer int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order) ? 1 : -1; @@ -1225,8 +1226,7 @@ static int32_t handleDataMergeIfNeeded(STsdbReadHandle* pTsdbReadHandle, SBlock* assert(pTsdbReadHandle->outputCapacity >= binfo.rows); int32_t endPos = getEndPosInDataBlock(pTsdbReadHandle, &binfo); - if ((cur->pos == 0 && endPos == binfo.rows - 1 && ASCENDING_TRAVERSE(pTsdbReadHandle->order)) || - (cur->pos == (binfo.rows - 1) && endPos == 0 && (!ASCENDING_TRAVERSE(pTsdbReadHandle->order)))) { + if ((cur->pos == 0 && endPos == binfo.rows - 1 && ascScan) || (cur->pos == (binfo.rows - 1) && endPos == 0 && (!ascScan))) { pTsdbReadHandle->realNumOfRows = binfo.rows; cur->rows = binfo.rows; @@ -1234,7 +1234,7 @@ static int32_t handleDataMergeIfNeeded(STsdbReadHandle* pTsdbReadHandle, SBlock* cur->mixBlock = false; cur->blockCompleted = true; - if (ASCENDING_TRAVERSE(pTsdbReadHandle->order)) { + if (ascScan) { cur->lastKey = binfo.window.ekey + 1; cur->pos = binfo.rows; } else { @@ -1382,8 +1382,6 @@ static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) { static int32_t doCopyRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end) { - int32_t step = ASCENDING_TRAVERSE(pTsdbReadHandle->order) ? 1 : -1; - SDataCols* pCols = pTsdbReadHandle->rhelper.pDCols[0]; TSKEY* tsArray = pCols->cols[0].pData; @@ -1394,6 +1392,11 @@ static int32_t doCopyRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, int32_t return numOfRows; } + bool ascScan = ASCENDING_TRAVERSE(pTsdbReadHandle->order); + int32_t trueStart = ascScan ? start : end; + int32_t trueEnd = ascScan ? end : start; + int32_t step = ascScan ? 1 : -1; + int32_t requiredNumOfCols = (int32_t)taosArrayGetSize(pTsdbReadHandle->pColumns); // data in buffer has greater timestamp, copy data in file block @@ -1411,7 +1414,7 @@ static int32_t doCopyRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, int32_t if (!IS_VAR_DATA_TYPE(pColInfo->info.type)) { // todo opt performance // memmove(pData, (char*)src->pData + bytes * start, bytes * num); int32_t rowIndex = numOfRows; - for (int32_t k = start; k <= end; ++k, ++rowIndex) { + for (int32_t k = trueStart; ((ascScan && k <= trueEnd) || (!ascScan && k >= trueEnd)); k += step, ++rowIndex) { SCellVal sVal = {0}; if (tdGetColDataOfRow(&sVal, src, k, pCols->bitmapMode) < 0) { TASSERT(0); @@ -1427,7 +1430,7 @@ static int32_t doCopyRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, int32_t int32_t rowIndex = numOfRows; // todo refactor, only copy one-by-one - for (int32_t k = start; k < num + start; ++k, ++rowIndex) { + for (int32_t k = trueStart; ((ascScan && k <= trueEnd) || (!ascScan && k >= trueEnd)); k += step, ++rowIndex) { SCellVal sVal = {0}; if (tdGetColDataOfRow(&sVal, src, k, pCols->bitmapMode) < 0) { TASSERT(0); @@ -1444,26 +1447,19 @@ static int32_t doCopyRowsFromFileBlock(STsdbReadHandle* pTsdbReadHandle, int32_t j++; i++; } else { // pColInfo->info.colId < src->colId, it is a NULL data - int32_t rowIndex = numOfRows; - for (int32_t k = start; k < num + start; ++k, ++rowIndex) { // TODO opt performance - colDataAppend(pColInfo, rowIndex, NULL, true); - } + colDataAppendNNULL(pColInfo, numOfRows, num); i++; } } while (i < requiredNumOfCols) { // the remain columns are all null data SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i); - int32_t rowIndex = numOfRows; - - for (int32_t k = start; k < num + start; ++k, ++rowIndex) { - colDataAppend(pColInfo, rowIndex, NULL, true); // TODO add a fast version to set a number of consecutive NULL value. - } + colDataAppendNNULL(pColInfo, numOfRows, num); i++; } - pTsdbReadHandle->cur.win.ekey = tsArray[end]; - pTsdbReadHandle->cur.lastKey = tsArray[end] + step; + pTsdbReadHandle->cur.win.ekey = tsArray[trueEnd]; + pTsdbReadHandle->cur.lastKey = tsArray[trueEnd] + step; return numOfRows + num; } @@ -2966,7 +2962,7 @@ bool tsdbNextDataBlock(tsdbReaderT pHandle) { // } // // // load the previous row -// STsdbQueryCond cond = {.numOfCols = numOfCols, .loadExternalRows = false, .type = BLOCK_LOAD_OFFSET_SEQ_ORDER}; +// SQueryTableDataCond cond = {.numOfCols = numOfCols, .loadExternalRows = false, .type = BLOCK_LOAD_OFFSET_SEQ_ORDER}; // if (type == TSDB_PREV_ROW) { // cond.order = TSDB_ORDER_DESC; // cond.twindow = (STimeWindow){pTsdbReadHandle->window.skey, INT64_MIN}; @@ -3330,21 +3326,7 @@ SArray* tsdbRetrieveDataBlock(tsdbReaderT* pTsdbReadHandle, SArray* pIdList) { return NULL; } - // todo refactor int32_t numOfRows = doCopyRowsFromFileBlock(pHandle, pHandle->outputCapacity, 0, 0, pBlock->numOfRows - 1); - - // if the buffer is not full in case of descending order query, move the data in the front of the buffer - if (!ASCENDING_TRAVERSE(pHandle->order) && numOfRows < pHandle->outputCapacity) { - int32_t emptySize = pHandle->outputCapacity - numOfRows; - int32_t reqNumOfCols = (int32_t)taosArrayGetSize(pHandle->pColumns); - - for (int32_t i = 0; i < reqNumOfCols; ++i) { - SColumnInfoData* pColInfo = taosArrayGet(pHandle->pColumns, i); - memmove((char*)pColInfo->pData, (char*)pColInfo->pData + emptySize * pColInfo->info.bytes, - numOfRows * pColInfo->info.bytes); - } - } - return pHandle->pColumns; } } diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 363824f2e2..67dd6eeb03 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -319,27 +319,32 @@ typedef struct SColMatchInfo { bool output; } SColMatchInfo; +typedef struct SScanInfo { + int32_t numOfAsc; + int32_t numOfDesc; +} SScanInfo; + typedef struct STableScanInfo { void* dataReader; + int32_t numOfBlocks; // extract basic running information. int32_t numOfSkipped; int32_t numOfBlockStatis; int64_t numOfRows; - int32_t order; // scan order - int32_t times; // repeat counts + int64_t elapsedTime; + int32_t prevGroupId; // previous table group id + SScanInfo scanInfo; int32_t current; - int32_t reverseTimes; // 0 by default - SNode* pFilterNode; // filter operator info - SqlFunctionCtx* pCtx; // next operator query context + SNode* pFilterNode; // filter operator info + SqlFunctionCtx* pCtx; // next operator query context SResultRowInfo* pResultRowInfo; int32_t* rowCellInfoOffset; SExprInfo* pExpr; SSDataBlock* pResBlock; SArray* pColMatchInfo; int32_t numOfOutput; - int64_t elapsedTime; - int32_t prevGroupId; // previous table group id + SQueryTableDataCond cond; int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan int32_t dataBlockLoadFlag; double sampleRatio; // data block sample ratio, 1 by default @@ -627,9 +632,9 @@ SqlFunctionCtx* createSqlFunctionCtx(SExprInfo* pExprInfo, int32_t numOfOutput, SOperatorInfo* createExchangeOperatorInfo(const SNodeList* pSources, SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createTableScanOperatorInfo(void* pReaderHandle, int32_t order, int32_t numOfCols, int32_t dataLoadFlag, int32_t repeatTime, - int32_t reverseTime, SArray* pColMatchInfo, SSDataBlock* pResBlock, SNode* pCondition, - SInterval* pInterval, double ratio, SExecTaskInfo* pTaskInfo); +SOperatorInfo* createTableScanOperatorInfo(void* pDataReader, SQueryTableDataCond* pCond, int32_t numOfOutput, int32_t dataLoadFlag, const uint8_t* scanInfo, + SArray* pColMatchInfo, SSDataBlock* pResBlock, SNode* pCondition, SInterval* pInterval, double sampleRatio, SExecTaskInfo* pTaskInfo); + SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, SSDataBlock* pResultBlock, SExprInfo* pScalarExprInfo, int32_t numOfScalarExpr, SExecTaskInfo* pTaskInfo, const STableGroupInfo* pTableGroupInfo); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index d0da970b2c..c7b5e9f392 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -191,7 +191,7 @@ static SColumnInfo* extractColumnFilterInfo(SExprInfo* pExpr, int32_t numOfOutpu static int32_t setTimestampListJoinInfo(STaskRuntimeEnv* pRuntimeEnv, SVariant* pTag, STableQueryInfo* pTableQueryInfo); static void releaseQueryBuf(size_t numOfTables); static int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order); -// static STsdbQueryCond createTsdbQueryCond(STaskAttr* pQueryAttr, STimeWindow* win); +// static SQueryTableDataCond createTsdbQueryCond(STaskAttr* pQueryAttr, STimeWindow* win); static STableIdInfo createTableIdInfo(STableQueryInfo* pTableQueryInfo); static int32_t getNumOfScanTimes(STaskAttr* pQueryAttr); @@ -3587,8 +3587,8 @@ static void doTableQueryInfoTimeWindowCheck(SExecTaskInfo* pTaskInfo, STableQuer #endif } -// STsdbQueryCond createTsdbQueryCond(STaskAttr* pQueryAttr, STimeWindow* win) { -// STsdbQueryCond cond = { +// SQueryTableDataCond createTsdbQueryCond(STaskAttr* pQueryAttr, STimeWindow* win) { +// SQueryTableDataCond cond = { // .colList = pQueryAttr->tableCols, // .order = pQueryAttr->order.order, // .numOfCols = pQueryAttr->numOfCols, @@ -4677,7 +4677,7 @@ _error: return NULL; } -static int32_t getTableScanOrder(STableScanInfo* pTableScanInfo) { return pTableScanInfo->order; } +//static int32_t getTableScanOrder(STableScanInfo* pTableScanInfo) { return pTableScanInfo->order; } // this is a blocking operator static int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { @@ -5657,8 +5657,7 @@ static STableQueryInfo* initTableQueryInfo(const STableGroupInfo* pTableGroupInf SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SExprInfo* pExprInfo, int32_t numOfCols, SSDataBlock* pResultBlock, SExprInfo* pScalarExprInfo, - int32_t numOfScalarExpr, SExecTaskInfo* pTaskInfo, - const STableGroupInfo* pTableGroupInfo) { + int32_t numOfScalarExpr, SExecTaskInfo* pTaskInfo, const STableGroupInfo* pTableGroupInfo) { SAggOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SAggOperatorInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pInfo == NULL || pOperator == NULL) { @@ -6315,6 +6314,19 @@ static SArray* extractColMatchInfo(SNodeList* pNodeList, SDataBlockDescNode* pOu static SArray* createSortInfo(SNodeList* pNodeList, SNodeList* pNodeListTarget); static SArray* createIndexMap(SNodeList* pNodeList); static SArray* extractPartitionColInfo(SNodeList* pNodeList); +static int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode); + +static SInterval extractIntervalInfo(const STableScanPhysiNode* pTableScanNode) { + SInterval interval = { + .interval = pTableScanNode->interval, + .sliding = pTableScanNode->sliding, + .intervalUnit = pTableScanNode->intervalUnit, + .slidingUnit = pTableScanNode->slidingUnit, + .offset = pTableScanNode->offset, + }; + + return interval; +} SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle, uint64_t queryId, uint64_t taskId, STableGroupInfo* pTableGroupInfo) { @@ -6325,7 +6337,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo SScanPhysiNode* pScanPhyNode = (SScanPhysiNode*)pPhyNode; STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode; - int32_t numOfCols = 0; + int32_t numOfCols = 0; tsdbReaderT pDataReader = doCreateDataReader(pTableScanNode, pHandle, pTableGroupInfo, (uint64_t)queryId, taskId); if (pDataReader == NULL && terrno != 0) { return NULL; @@ -6335,16 +6347,14 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo extractColMatchInfo(pScanPhyNode->pScanCols, pScanPhyNode->node.pOutputDataBlockDesc, &numOfCols); SSDataBlock* pResBlock = createResDataBlock(pScanPhyNode->node.pOutputDataBlockDesc); - SInterval interval = { - .interval = pTableScanNode->interval, - .sliding = pTableScanNode->sliding, - .intervalUnit = pTableScanNode->intervalUnit, - .slidingUnit = pTableScanNode->slidingUnit, - .offset = pTableScanNode->offset, - }; + SQueryTableDataCond cond = {0}; + int32_t code = initQueryTableDataCond(&cond, pTableScanNode); + if (code != TSDB_CODE_SUCCESS) { + return NULL; + } - return createTableScanOperatorInfo(pDataReader, pTableScanNode->scanSeq[0] > 0 ? TSDB_ORDER_ASC : TSDB_ORDER_DESC, - numOfCols, pTableScanNode->dataRequired, pTableScanNode->scanSeq[0], pTableScanNode->scanSeq[1], pColList, + SInterval interval = extractIntervalInfo(pTableScanNode); + return createTableScanOperatorInfo(pDataReader, &cond, numOfCols, pTableScanNode->dataRequired, pTableScanNode->scanSeq, pColList, pResBlock, pScanPhyNode->node.pConditions, &interval, pTableScanNode->ratio, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_EXCHANGE == type) { SExchangePhysiNode* pExchange = (SExchangePhysiNode*)pPhyNode; @@ -6365,10 +6375,10 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo return pOperator; } else if (QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN == type) { SSystemTableScanPhysiNode* pSysScanPhyNode = (SSystemTableScanPhysiNode*)pPhyNode; - SSDataBlock* pResBlock = createResDataBlock(pSysScanPhyNode->scan.node.pOutputDataBlockDesc); + SScanPhysiNode* pScanNode = &pSysScanPhyNode->scan; - struct SScanPhysiNode* pScanNode = &pSysScanPhyNode->scan; - SArray* colList = extractScanColumnId(pScanNode->pScanCols); + SSDataBlock* pResBlock = createResDataBlock(pScanNode->node.pOutputDataBlockDesc); + SArray* colList = extractScanColumnId(pScanNode->pScanCols); SOperatorInfo* pOperator = createSysTableScanOperatorInfo( pHandle->meta, pResBlock, &pScanNode->tableName, pScanNode->node.pConditions, pSysScanPhyNode->mgmtEpSet, @@ -6489,38 +6499,47 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo return pOptr; } -static tsdbReaderT createDataReaderImpl(STableScanPhysiNode* pTableScanNode, STableGroupInfo* pGroupInfo, - void* readHandle, uint64_t queryId, uint64_t taskId) { - STsdbQueryCond cond = {.loadExternalRows = false}; - cond.order = pTableScanNode->scanSeq[0] > 0 ? TSDB_ORDER_ASC : TSDB_ORDER_DESC; - cond.numOfCols = LIST_LENGTH(pTableScanNode->scan.pScanCols); - cond.colList = taosMemoryCalloc(cond.numOfCols, sizeof(SColumnInfo)); - if (cond.colList == NULL) { +static int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode) { + pCond->loadExternalRows = false; + + pCond->order = pTableScanNode->scanSeq[0] > 0 ? TSDB_ORDER_ASC : TSDB_ORDER_DESC; + pCond->numOfCols = LIST_LENGTH(pTableScanNode->scan.pScanCols); + pCond->colList = taosMemoryCalloc(pCond->numOfCols, sizeof(SColumnInfo)); + if (pCond->colList == NULL) { terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; - return NULL; + return terrno; + } + + pCond->twindow = pTableScanNode->scanRange; + +#if 1 + //todo work around a problem, remove it later + if ((pCond->order == TSDB_ORDER_ASC && pCond->twindow.skey > pCond->twindow.ekey) || + (pCond->order == TSDB_ORDER_DESC && pCond->twindow.skey < pCond->twindow.ekey)) { + TSWAP(pCond->twindow.skey, pCond->twindow.ekey, int64_t); } +#endif - cond.twindow = pTableScanNode->scanRange; - cond.type = BLOCK_LOAD_OFFSET_SEQ_ORDER; - // cond.type = pTableScanNode->scanFlag; + pCond->type = BLOCK_LOAD_OFFSET_SEQ_ORDER; + // pCond->type = pTableScanNode->scanFlag; int32_t j = 0; - for (int32_t i = 0; i < cond.numOfCols; ++i) { + for (int32_t i = 0; i < pCond->numOfCols; ++i) { STargetNode* pNode = (STargetNode*)nodesListGetNode(pTableScanNode->scan.pScanCols, i); SColumnNode* pColNode = (SColumnNode*)pNode->pExpr; if (pColNode->colType == COLUMN_TYPE_TAG) { continue; } - cond.colList[j].type = pColNode->node.resType.type; - cond.colList[j].bytes = pColNode->node.resType.bytes; - cond.colList[j].colId = pColNode->colId; + pCond->colList[j].type = pColNode->node.resType.type; + pCond->colList[j].bytes = pColNode->node.resType.bytes; + pCond->colList[j].colId = pColNode->colId; j += 1; } - cond.numOfCols = j; - return tsdbQueryTables(readHandle, &cond, pGroupInfo, queryId, taskId); + pCond->numOfCols = j; + return TSDB_CODE_SUCCESS; } SArray* extractScanColumnId(SNodeList* pNodeList) { @@ -6738,7 +6757,13 @@ tsdbReaderT doCreateDataReader(STableScanPhysiNode* pTableScanNode, SReadHandle* goto _error; } - return createDataReaderImpl(pTableScanNode, pTableGroupInfo, pHandle->reader, queryId, taskId); + SQueryTableDataCond cond = {0}; + code = initQueryTableDataCond(&cond, pTableScanNode); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + return tsdbQueryTables(pHandle->reader, &cond, pTableGroupInfo, queryId, taskId); _error: terrno = code; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index a70e402871..ab7fdb2d92 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -255,17 +255,15 @@ int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableScanInfo, return TSDB_CODE_SUCCESS; } -static void setupEnvForReverseScan(STableScanInfo* pTableScanInfo, SqlFunctionCtx* pCtx, int32_t numOfOutput) { - // reverse order time range +static void prepareForDescendingScan(STableScanInfo* pTableScanInfo, SqlFunctionCtx* pCtx, int32_t numOfOutput) { SET_REVERSE_SCAN_FLAG(pTableScanInfo); switchCtxOrder(pCtx, numOfOutput); - SWITCH_ORDER(pTableScanInfo->order); - setupQueryRangeForReverseScan(pTableScanInfo); +// setupQueryRangeForReverseScan(pTableScanInfo); - pTableScanInfo->times = 1; - pTableScanInfo->current = 0; - pTableScanInfo->reverseTimes = 0; + STimeWindow* pTWindow = &pTableScanInfo->cond.twindow; + TSWAP(pTWindow->skey, pTWindow->ekey, int64_t); + pTableScanInfo->cond.order = TSDB_ORDER_DESC; } static SSDataBlock* doTableScanImpl(SOperatorInfo* pOperator, bool* newgroup) { @@ -311,63 +309,68 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator, bool* newgroup) { return NULL; } - SResultRowInfo* pResultRowInfo = pTableScanInfo->pResultRowInfo; +// SResultRowInfo* pResultRowInfo = pTableScanInfo->pResultRowInfo; *newgroup = false; - while (pTableScanInfo->current < pTableScanInfo->times) { + while (pTableScanInfo->current < pTableScanInfo->scanInfo.numOfAsc) { SSDataBlock* p = doTableScanImpl(pOperator, newgroup); if (p != NULL) { return p; } - if (++pTableScanInfo->current >= pTableScanInfo->times) { - if (pTableScanInfo->reverseTimes <= 0 /* || isTsdbCacheLastRow(pTableScanInfo->pTsdbReadHandle)*/) { - return NULL; - } else { - break; - } + pTableScanInfo->current += 1; + + if (pTableScanInfo->current < pTableScanInfo->scanInfo.numOfAsc) { + setTaskStatus(pTaskInfo, TASK_NOT_COMPLETED); + pTableScanInfo->scanFlag = REPEAT_SCAN; + + STimeWindow* pWin = &pTableScanInfo->cond.twindow; + qDebug("%s start to repeat ascending order scan data blocks due to query func required, qrange:%" PRId64 "-%" PRId64, + GET_TASKID(pTaskInfo), pWin->skey, pWin->ekey); + + // do prepare for the next round table scan operation + tsdbResetReadHandle(pTableScanInfo->dataReader, &pTableScanInfo->cond); } + } - // do prepare for the next round table scan operation - // STsdbQueryCond cond = createTsdbQueryCond(pQueryAttr, &pQueryAttr->window); - // tsdbResetQueryHandle(pTableScanInfo->pTsdbReadHandle, &cond); + int32_t total = pTableScanInfo->scanInfo.numOfAsc + pTableScanInfo->scanInfo.numOfDesc; + if (pTableScanInfo->current < total) { + if (pTableScanInfo->cond.order == TSDB_ORDER_ASC) { + prepareForDescendingScan(pTableScanInfo, pTableScanInfo->pCtx, pTableScanInfo->numOfOutput); + tsdbResetReadHandle(pTableScanInfo->dataReader, &pTableScanInfo->cond); + } - setTaskStatus(pTaskInfo, TASK_NOT_COMPLETED); - pTableScanInfo->scanFlag = REPEAT_SCAN; + STimeWindow* pWin = &pTableScanInfo->cond.twindow; + qDebug("%s start to descending order scan data blocks due to query func required, qrange:%" PRId64 "-%" PRId64, + GET_TASKID(pTaskInfo), pWin->skey, pWin->ekey); - // if (pResultRowInfo->size > 0) { - // pResultRowInfo->curPos = 0; - // } + while (pTableScanInfo->current < total) { + SSDataBlock* p = doTableScanImpl(pOperator, newgroup); + if (p != NULL) { + return p; + } - qDebug("%s start to repeat scan data blocks due to query func required, qrange:%" PRId64 "-%" PRId64, - GET_TASKID(pTaskInfo), pTaskInfo->window.skey, pTaskInfo->window.ekey); - } + pTableScanInfo->current += 1; - SSDataBlock* p = NULL; - // todo refactor - if (pTableScanInfo->reverseTimes > 0) { - setupEnvForReverseScan(pTableScanInfo, pTableScanInfo->pCtx, pTableScanInfo->numOfOutput); - // STsdbQueryCond cond = createTsdbQueryCond(pQueryAttr, &pQueryAttr->window); - // tsdbResetQueryHandle(pTableScanInfo->pTsdbReadHandle, &cond); + if (pTableScanInfo->current < pTableScanInfo->scanInfo.numOfAsc) { + setTaskStatus(pTaskInfo, TASK_NOT_COMPLETED); + pTableScanInfo->scanFlag = REPEAT_SCAN; - qDebug("%s start to reverse scan data blocks due to query func required, qrange:%" PRId64 "-%" PRId64, - GET_TASKID(pTaskInfo), pTaskInfo->window.skey, pTaskInfo->window.ekey); + qDebug("%s start to repeat descending order scan data blocks due to query func required, qrange:%" PRId64 "-%" PRId64, + GET_TASKID(pTaskInfo), pTaskInfo->window.skey, pTaskInfo->window.ekey); - if (pResultRowInfo->size > 0) { - // pResultRowInfo->curPos = pResultRowInfo->size - 1; + // do prepare for the next round table scan operation + tsdbResetReadHandle(pTableScanInfo->dataReader, &pTableScanInfo->cond); + } } - - p = doTableScanImpl(pOperator, newgroup); } - return p; + setTaskStatus(pTaskInfo, TASK_COMPLETED); + return NULL; } -SOperatorInfo* createTableScanOperatorInfo(void* pDataReader, int32_t order, int32_t numOfOutput, int32_t dataLoadFlag, - int32_t repeatTime, int32_t reverseTime, SArray* pColMatchInfo, SSDataBlock* pResBlock, - SNode* pCondition, SInterval* pInterval, double sampleRatio, SExecTaskInfo* pTaskInfo) { - assert(repeatTime > 0); - +SOperatorInfo* createTableScanOperatorInfo(void* pDataReader, SQueryTableDataCond* pCond, int32_t numOfOutput, int32_t dataLoadFlag, const uint8_t* scanInfo, + SArray* pColMatchInfo, SSDataBlock* pResBlock, SNode* pCondition, SInterval* pInterval, double sampleRatio, SExecTaskInfo* pTaskInfo) { STableScanInfo* pInfo = taosMemoryCalloc(1, sizeof(STableScanInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pInfo == NULL || pOperator == NULL) { @@ -378,18 +381,19 @@ SOperatorInfo* createTableScanOperatorInfo(void* pDataReader, int32_t order, int return NULL; } + pInfo->cond = *pCond; + pInfo->scanInfo = (SScanInfo) {.numOfAsc = scanInfo[0], .numOfDesc = scanInfo[1]}; + pInfo->interval = *pInterval; pInfo->sampleRatio = sampleRatio; pInfo->dataBlockLoadFlag= dataLoadFlag; pInfo->pResBlock = pResBlock; pInfo->pFilterNode = pCondition; pInfo->dataReader = pDataReader; - pInfo->times = repeatTime; - pInfo->reverseTimes = reverseTime; - pInfo->order = order; pInfo->current = 0; pInfo->scanFlag = MAIN_SCAN; pInfo->pColMatchInfo = pColMatchInfo; + pOperator->name = "TableScanOperator"; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN; pOperator->blockingOptr = false; @@ -410,19 +414,17 @@ SOperatorInfo* createTableScanOperatorInfo(void* pDataReader, int32_t order, int SOperatorInfo* createTableSeqScanOperatorInfo(void* pTsdbReadHandle) { STableScanInfo* pInfo = taosMemoryCalloc(1, sizeof(STableScanInfo)); - pInfo->dataReader = pTsdbReadHandle; - pInfo->times = 1; - pInfo->reverseTimes = 0; - pInfo->current = 0; + pInfo->dataReader = pTsdbReadHandle; + pInfo->current = 0; pInfo->prevGroupId = -1; SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); - pOperator->name = "TableSeqScanOperator"; - pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_TABLE_SEQ_SCAN; - pOperator->blockingOptr = false; - pOperator->status = OP_NOT_OPENED; - pOperator->info = pInfo; - pOperator->getNextFn = doTableScanImpl; + pOperator->name = "TableSeqScanOperator"; + pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_TABLE_SEQ_SCAN; + pOperator->blockingOptr = false; + pOperator->status = OP_NOT_OPENED; + pOperator->info = pInfo; + pOperator->getNextFn = doTableScanImpl; return pOperator; } -- GitLab