From bc1af6deaf38f04ef4c9fb9aceee2669d3eeedc5 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 19 Jul 2022 15:52:08 +0800 Subject: [PATCH] refactor(stream): batch optimization for submit msg --- include/util/tarray.h | 8 +++ source/dnode/mnode/impl/src/mndConsumer.c | 2 +- source/dnode/vnode/src/tq/tqRead.c | 1 + source/libs/executor/src/executor.c | 17 +++-- source/libs/executor/src/scanoperator.c | 76 +++++++++++++++-------- source/libs/stream/src/streamData.c | 6 +- source/libs/wal/src/walRead.c | 12 ++-- source/util/src/tarray.c | 40 ++++++++++++ 8 files changed, 123 insertions(+), 39 deletions(-) diff --git a/include/util/tarray.h b/include/util/tarray.h index 482f13de39..7c1bc34d71 100644 --- a/include/util/tarray.h +++ b/include/util/tarray.h @@ -87,6 +87,14 @@ void taosArrayRemoveBatch(SArray* pArray, const int32_t* pData, int32_t numOfEle */ void taosArrayRemoveDuplicate(SArray* pArray, __compar_fn_t comparFn, void (*fp)(void*)); +/** + * + * @param pArray + * @param comparFn + * @param fp + */ +void taosArrayRemoveDuplicateP(SArray* pArray, __compar_fn_t comparFn, void (*fp)(void*)); + /** * add all element from the source array list into the destination * @param pArray diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index a60db8a8c2..315b7c3afc 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -452,7 +452,7 @@ static int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) { int32_t code = -1; SArray *newSub = subscribe.topicNames; taosArraySortString(newSub, taosArrayCompareString); - taosArrayRemoveDuplicate(newSub, taosArrayCompareString, taosMemoryFree); + taosArrayRemoveDuplicateP(newSub, taosArrayCompareString, taosMemoryFree); int32_t newTopicNum = taosArrayGetSize(newSub); // check topic existance diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 47b497b480..e4c11c4787 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -299,6 +299,7 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader) { } if (blockDataEnsureCapacity(pBlock, pReader->msgIter.numOfRows) < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; goto FAIL; } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 90952d5786..8fc9ee496b 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -43,16 +43,24 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu // TODO: if a block was set but not consumed, // prevent setting a different type of block pInfo->blockType = type; + pInfo->validBlockIndex = 0; + taosArrayClear(pInfo->pBlockLists); if (type == STREAM_INPUT__DATA_SUBMIT) { - if (tqReaderSetDataMsg(pInfo->tqReader, input, 0) < 0) { - qError("submit msg messed up when initing stream block, %s" PRIx64, id); - return TSDB_CODE_QRY_APP_ERROR; + /*if (tqReaderSetDataMsg(pInfo->tqReader, input, 0) < 0) {*/ + /*qError("submit msg messed up when initing stream block, %s" PRIx64, id);*/ + /*return TSDB_CODE_QRY_APP_ERROR;*/ + /*}*/ + taosArrayClear(pInfo->pBlockLists); + for (int32_t i = 0; i < numOfBlocks; i++) { + SSubmitReq* pReq = POINTER_SHIFT(input, i * sizeof(void*)); + taosArrayPush(pInfo->pBlockLists, &pReq); } } else if (type == STREAM_INPUT__DATA_BLOCK) { for (int32_t i = 0; i < numOfBlocks; ++i) { SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; + // TODO optimize SSDataBlock* p = createOneDataBlock(pDataBlock, false); p->info = pDataBlock->info; @@ -153,7 +161,8 @@ qTaskInfo_t qCreateStreamExecTaskInfo(void* msg, SReadHandle* readers) { return pTaskInfo; } -static SArray* filterQualifiedChildTables(const SStreamScanInfo* pScanInfo, const SArray* tableIdList, const char* idstr) { +static SArray* filterQualifiedChildTables(const SStreamScanInfo* pScanInfo, const SArray* tableIdList, + const char* idstr) { SArray* qa = taosArrayInit(4, sizeof(tb_uid_t)); // let's discard the tables those are not created according to the queried super table. diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index e60f6f8a5b..7ecd9645e1 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1393,24 +1393,47 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { } SDataBlockInfo* pBlockInfo = &pInfo->pRes->info; - blockDataCleanup(pInfo->pRes); - while (tqNextDataBlock(pInfo->tqReader)) { - SSDataBlock block = {0}; + int32_t totBlockNum = taosArrayGetSize(pInfo->pBlockLists); - // todo refactor - int32_t code = tqRetrieveDataBlock(&block, pInfo->tqReader); + while (1) { + if (pInfo->tqReader->pMsg == NULL) { + if (pInfo->validBlockIndex >= totBlockNum) { + return NULL; + } - if (code != TSDB_CODE_SUCCESS || block.info.rows == 0) { - pTaskInfo->code = code; - return NULL; + int32_t current = pInfo->validBlockIndex++; + SSubmitReq* pSubmit = taosArrayGetP(pInfo->pBlockLists, current); + if (tqReaderSetDataMsg(pInfo->tqReader, pSubmit, 0) < 0) { + qError("submit msg messed up when initing stream submit block %p, current %d, total %d", pSubmit, current, + totBlockNum); + pInfo->tqReader->pMsg = NULL; + continue; + } } - setBlockIntoRes(pInfo, &block); + blockDataCleanup(pInfo->pRes); + + while (tqNextDataBlock(pInfo->tqReader)) { + SSDataBlock block = {0}; + int32_t code = tqRetrieveDataBlock(&block, pInfo->tqReader); + + if (code != TSDB_CODE_SUCCESS || block.info.rows == 0) { + continue; + } + + setBlockIntoRes(pInfo, &block); + + if (pBlockInfo->rows > 0) { + break; + } + } if (pBlockInfo->rows > 0) { break; } + /*blockDataCleanup(pInfo->pRes);*/ + pInfo->tqReader->pMsg = NULL; } // record the scan action. @@ -2558,30 +2581,30 @@ typedef struct STableMergeScanInfo { SArray* pSortInfo; SSortHandle* pSortHandle; - SSDataBlock* pSortInputBlock; - int64_t startTs; // sort start time - SArray* sortSourceParams; + SSDataBlock* pSortInputBlock; + int64_t startTs; // sort start time + SArray* sortSourceParams; SFileBlockLoadRecorder readRecorder; - int64_t numOfRows; - SScanInfo scanInfo; - int32_t scanTimes; - SNode* pFilterNode; // filter info, which is push down by optimizer - SqlFunctionCtx* pCtx; // which belongs to the direct upstream operator operator query context - SResultRowInfo* pResultRowInfo; - int32_t* rowEntryInfoOffset; - SExprInfo* pExpr; - SSDataBlock* pResBlock; - SArray* pColMatchInfo; - int32_t numOfOutput; + int64_t numOfRows; + SScanInfo scanInfo; + int32_t scanTimes; + SNode* pFilterNode; // filter info, which is push down by optimizer + SqlFunctionCtx* pCtx; // which belongs to the direct upstream operator operator query context + SResultRowInfo* pResultRowInfo; + int32_t* rowEntryInfoOffset; + SExprInfo* pExpr; + SSDataBlock* pResBlock; + SArray* pColMatchInfo; + int32_t numOfOutput; SExprInfo* pPseudoExpr; int32_t numOfPseudoExpr; SqlFunctionCtx* pPseudoCtx; SQueryTableDataCond cond; - int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan - int32_t dataBlockLoadFlag; + int32_t scanFlag; // table scan flag to denote if it is a repeat/reverse/main scan + int32_t dataBlockLoadFlag; // if the upstream is an interval operator, the interval info is also kept here to get the time // window to check if current data block needs to be loaded. SInterval interval; @@ -2589,7 +2612,8 @@ typedef struct STableMergeScanInfo { } STableMergeScanInfo; int32_t createScanTableListInfo(SScanPhysiNode* pScanNode, SNodeList* pGroupTags, bool groupSort, SReadHandle* pHandle, - STableListInfo* pTableListInfo, SNode* pTagCond, SNode* pTagIndexCond, const char* idStr) { + STableListInfo* pTableListInfo, SNode* pTagCond, SNode* pTagIndexCond, + const char* idStr) { int32_t code = getTableList(pHandle->meta, pHandle->vnode, pScanNode, pTagCond, pTagIndexCond, pTableListInfo); if (code != TSDB_CODE_SUCCESS) { return code; diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index 6b447d05ad..6989c36332 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -81,7 +81,7 @@ SStreamMergedSubmit* streamMergedSubmitNew() { SStreamMergedSubmit* pMerged = (SStreamMergedSubmit*)taosAllocateQitem(sizeof(SStreamMergedSubmit), DEF_QITEM); if (pMerged == NULL) return NULL; pMerged->reqs = taosArrayInit(0, sizeof(void*)); - pMerged->dataRefs = taosArrayInit(0, sizeof(void*)); + pMerged->dataRefs = taosArrayInit(0, sizeof(int32_t*)); if (pMerged->dataRefs == NULL || pMerged->reqs == NULL) goto FAIL; return pMerged; FAIL: @@ -93,7 +93,7 @@ FAIL: int32_t streamMergeSubmit(SStreamMergedSubmit* pMerged, SStreamDataSubmit* pSubmit) { taosArrayPush(pMerged->dataRefs, pSubmit->dataRef); - taosArrayPush(pMerged->reqs, pSubmit->data); + taosArrayPush(pMerged->reqs, &pSubmit->data); pMerged->ver = pSubmit->ver; return 0; } @@ -167,7 +167,7 @@ void streamFreeQitem(SStreamQueueItem* data) { int32_t* ref = taosArrayGet(pMerge->dataRefs, i); (*ref)--; if (*ref == 0) { - void* data = taosArrayGet(pMerge->reqs, i); + void* data = taosArrayGetP(pMerge->reqs, i); taosMemoryFree(data); taosMemoryFree(ref); } diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index c47964803a..8b4225c80c 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -417,7 +417,7 @@ int32_t walReadVer(SWalReader *pRead, int64_t ver) { } if (ver > pRead->pWal->vers.lastVer || ver < pRead->pWal->vers.firstVer) { - wError("vgId:%d, invalid index:%" PRId64 ", first index:%" PRId64 ", last index:%" PRId64, pRead->pWal->cfg.vgId, + wDebug("vgId:%d, invalid index:%" PRId64 ", first index:%" PRId64 ", last index:%" PRId64, pRead->pWal->cfg.vgId, ver, pRead->pWal->vers.firstVer, pRead->pWal->vers.lastVer); terrno = TSDB_CODE_WAL_LOG_NOT_EXIST; return -1; @@ -425,7 +425,7 @@ int32_t walReadVer(SWalReader *pRead, int64_t ver) { if (pRead->curInvalid || pRead->curVersion != ver) { if (walReadSeekVer(pRead, ver) < 0) { - wError("vgId:%d, unexpected wal log index:%" PRId64 ", since %s", pRead->pWal->cfg.vgId, ver, terrstr()); + wError("vgId:%d, unexpected wal log, index:%" PRId64 ", since %s", pRead->pWal->cfg.vgId, ver, terrstr()); return -1; } seeked = true; @@ -452,7 +452,8 @@ int32_t walReadVer(SWalReader *pRead, int64_t ver) { contLen = walValidHeadCksum(pRead->pHead); if (contLen != 0) { - wError("vgId:%d, unexpected wal log index:%" PRId64 ", since head checksum not passed", pRead->pWal->cfg.vgId, ver); + wError("vgId:%d, unexpected wal log, index:%" PRId64 ", since head checksum not passed", pRead->pWal->cfg.vgId, + ver); terrno = TSDB_CODE_WAL_FILE_CORRUPTED; return -1; } @@ -479,7 +480,7 @@ int32_t walReadVer(SWalReader *pRead, int64_t ver) { } if (pRead->pHead->head.version != ver) { - wError("vgId:%d, unexpected wal log index:%" PRId64 ", read request index:%" PRId64, pRead->pWal->cfg.vgId, + wError("vgId:%d, unexpected wal log, index:%" PRId64 ", read request index:%" PRId64, pRead->pWal->cfg.vgId, pRead->pHead->head.version, ver); pRead->curInvalid = 1; terrno = TSDB_CODE_WAL_FILE_CORRUPTED; @@ -489,7 +490,8 @@ int32_t walReadVer(SWalReader *pRead, int64_t ver) { contLen = walValidBodyCksum(pRead->pHead); if (contLen != 0) { - wError("vgId:%d, unexpected wal log index:%" PRId64 ", since body checksum not passed", pRead->pWal->cfg.vgId, ver); + wError("vgId:%d, unexpected wal log, index:%" PRId64 ", since body checksum not passed", pRead->pWal->cfg.vgId, + ver); pRead->curInvalid = 1; terrno = TSDB_CODE_WAL_FILE_CORRUPTED; ASSERT(0); diff --git a/source/util/src/tarray.c b/source/util/src/tarray.c index 23e79da948..6095b67588 100644 --- a/source/util/src/tarray.c +++ b/source/util/src/tarray.c @@ -173,6 +173,46 @@ void taosArrayRemoveDuplicate(SArray* pArray, __compar_fn_t comparFn, void (*fp) pArray->size = pos + 1; } +void taosArrayRemoveDuplicateP(SArray* pArray, __compar_fn_t comparFn, void (*fp)(void*)) { + assert(pArray); + + size_t size = pArray->size; + if (size <= 1) { + return; + } + + int32_t pos = 0; + for (int32_t i = 1; i < size; ++i) { + char* p1 = taosArrayGet(pArray, pos); + char* p2 = taosArrayGet(pArray, i); + + if (comparFn(p1, p2) == 0) { + // do nothing + } else { + if (pos + 1 != i) { + void* p = taosArrayGet(pArray, pos + 1); + if (fp != NULL) { + fp(p); + } + + taosArraySet(pArray, pos + 1, p2); + pos += 1; + } else { + pos += 1; + } + } + } + + if (fp != NULL) { + for (int32_t i = pos + 1; i < pArray->size; ++i) { + void* p = taosArrayGetP(pArray, i); + fp(p); + } + } + + pArray->size = pos + 1; +} + void* taosArrayAddAll(SArray* pArray, const SArray* pInput) { if (pInput) { return taosArrayAddBatch(pArray, pInput->pData, (int32_t)taosArrayGetSize(pInput)); -- GitLab