diff --git a/cmake/rocksdb_CMakeLists.txt.in b/cmake/rocksdb_CMakeLists.txt.in index f238ed20af79ba74d07927eb66b35456a00b279e..45599d82e3a8750f8b6e4823c0f8eb8dc8a79f24 100644 --- a/cmake/rocksdb_CMakeLists.txt.in +++ b/cmake/rocksdb_CMakeLists.txt.in @@ -5,8 +5,8 @@ if (${BUILD_CONTRIB}) URL https://github.com/facebook/rocksdb/archive/refs/tags/v8.1.1.tar.gz URL_HASH MD5=3b4c97ee45df9c8a5517308d31ab008b DOWNLOAD_NO_PROGRESS 1 - DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download" - SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb" + DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download" + SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" @@ -18,8 +18,8 @@ else() URL https://github.com/facebook/rocksdb/archive/refs/tags/v8.1.1.tar.gz URL_HASH MD5=3b4c97ee45df9c8a5517308d31ab008b DOWNLOAD_NO_PROGRESS 1 - DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download" - SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb" + DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download" + SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" diff --git a/include/common/tcommon.h b/include/common/tcommon.h index d88228b436e33f84346de56d773c481176f5804d..bdfb1d32b4642e265b468e4f4e3a7bd476b07ff9 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -54,6 +54,11 @@ typedef struct SSessionKey { uint64_t groupId; } SSessionKey; +typedef struct SVersionRange { + uint64_t minVer; + uint64_t maxVer; +} SVersionRange; + static inline int winKeyCmprImpl(const void* pKey1, const void* pKey2) { SWinKey* pWin1 = (SWinKey*)pKey1; SWinKey* pWin2 = (SWinKey*)pKey2; diff --git a/include/common/tdatablock.h b/include/common/tdatablock.h index cd8e0642cf2dcae26ae288e421083f8accae54d4..c0412d2617500196d2d4d31f9ff49f7bede9a08d 100644 --- a/include/common/tdatablock.h +++ b/include/common/tdatablock.h @@ -177,7 +177,6 @@ static FORCE_INLINE void colDataSetDouble(SColumnInfoData* pColumnInfoData, uint int32_t getJsonValueLen(const char* data); int32_t colDataSetVal(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull); -int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull); int32_t colDataReassignVal(SColumnInfoData* pColumnInfoData, uint32_t dstRowIdx, uint32_t srcRowIdx, const char* pData); int32_t colDataSetNItems(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, uint32_t numOfRows, bool trimValue); int32_t colDataMergeCol(SColumnInfoData* pColumnInfoData, int32_t numOfRow1, int32_t* capacity, @@ -187,6 +186,7 @@ int32_t colDataAssign(SColumnInfoData* pColumnInfoData, const SColumnInfoData* p int32_t blockDataUpdateTsWindow(SSDataBlock* pDataBlock, int32_t tsColumnIndex); int32_t colDataGetLength(const SColumnInfoData* pColumnInfoData, int32_t numOfRows); + int32_t colDataGetRowLength(const SColumnInfoData* pColumnInfoData, int32_t rowIdx); void colDataTrim(SColumnInfoData* pColumnInfoData); @@ -208,7 +208,6 @@ double blockDataGetSerialRowSize(const SSDataBlock* pBlock); size_t blockDataGetSerialMetaSize(uint32_t numOfCols); int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo); -int32_t blockDataSort_rv(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst); int32_t colInfoDataEnsureCapacity(SColumnInfoData* pColumn, uint32_t numOfRows, bool clearPayload); int32_t blockDataEnsureCapacity(SSDataBlock* pDataBlock, uint32_t numOfRows); @@ -237,11 +236,10 @@ int32_t blockDataAppendColInfo(SSDataBlock* pBlock, SColumnInfoData* pColIn SColumnInfoData createColumnInfoData(int16_t type, int32_t bytes, int16_t colId); SColumnInfoData* bdGetColumnInfoData(const SSDataBlock* pBlock, int32_t index); +int32_t blockGetEncodeSize(const SSDataBlock* pBlock); int32_t blockEncode(const SSDataBlock* pBlock, char* data, int32_t numOfCols); const char* blockDecode(SSDataBlock* pBlock, const char* pData); -void blockDebugShowDataBlock(SSDataBlock* pBlock, const char* flag); -void blockDebugShowDataBlocks(const SArray* dataBlocks, const char* flag); // for debug char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** dumpBuf); @@ -251,9 +249,7 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq2** pReq, const SSDataBlock* pData char* buildCtbNameByGroupId(const char* stbName, uint64_t groupId); int32_t buildCtbNameByGroupIdImpl(const char* stbName, uint64_t groupId, char* pBuf); -static FORCE_INLINE int32_t blockGetEncodeSize(const SSDataBlock* pBlock) { - return blockDataGetSerialMetaSize(taosArrayGetSize(pBlock->pDataBlock)) + blockDataGetSize(pBlock); -} +void trimDataBlock(SSDataBlock* pBlock, int32_t totalRows, const bool* pBoolList); #ifdef __cplusplus } diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 3fc94f440862e843e5c6d503d8488f654d0b6c1c..8ebf07bfccfff6edc638f2adf270db5ded8afdba 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -252,7 +252,9 @@ enum { TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_RECOVER_FINISH, "stream-recover-finish", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY, "stream-scan-history", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY_FINISH, "stream-scan-history-finish", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TRANSFER_STATE, "stream-transfer-state", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "stream-task-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT, "stream-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL) @@ -297,8 +299,7 @@ enum { TD_NEW_MSG_SEG(TDMT_VND_STREAM_MSG) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE, "vnode-stream-recover1", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, "vnode-stream-recover2", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY, "vnode-stream-scan-history", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_CHECK_POINT_SOURCE, "vnode-stream-checkpoint-source", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 3f53976c67834285593ec45e4cfb98001f14ea52..3bef15f3a7c49b7a89112344b67182b3da9f3696 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -55,6 +55,9 @@ typedef struct { void* pStateBackend; struct SStorageAPI api; + + int8_t fillHistory; + STimeWindow winRange; } SReadHandle; // in queue mode, data streams are seperated by msg @@ -193,14 +196,6 @@ int32_t qDeserializeTaskStatus(qTaskInfo_t tinfo, const char* pInput, int32_t le void getNextTimeWindow(const SInterval* pInterval, STimeWindow* tw, int32_t order); void getInitialStartTimeWindow(SInterval* pInterval, TSKEY ts, STimeWindow* w, bool ascQuery); STimeWindow getAlignQueryTimeWindow(const SInterval* pInterval, int64_t key); -/** - * return the scan info, in the form of tuple of two items, including table uid and current timestamp - * @param tinfo - * @param uid - * @param ts - * @return - */ -int32_t qGetStreamScanStatus(qTaskInfo_t tinfo, uint64_t* uid, int64_t* ts); SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo); @@ -220,15 +215,22 @@ void* qExtractReaderFromStreamScanner(void* scanner); int32_t qExtractStreamScanner(qTaskInfo_t tinfo, void** scanner); -int32_t qStreamSetParamForRecover(qTaskInfo_t tinfo); -int32_t qStreamSourceRecoverStep1(qTaskInfo_t tinfo, int64_t ver); -int32_t qStreamSourceRecoverStep2(qTaskInfo_t tinfo, int64_t ver); +int32_t qSetStreamOperatorOptionForScanHistory(qTaskInfo_t tinfo); +int32_t qStreamSourceScanParamForHistoryScanStep1(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow); +int32_t qStreamSourceScanParamForHistoryScanStep2(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow); int32_t qStreamRecoverFinish(qTaskInfo_t tinfo); -int32_t qStreamRestoreParam(qTaskInfo_t tinfo); +int32_t qRestoreStreamOperatorOption(qTaskInfo_t tinfo); bool qStreamRecoverScanFinished(qTaskInfo_t tinfo); -void qStreamCloseTsdbReader(void* task); +bool qStreamRecoverScanStep1Finished(qTaskInfo_t tinfo); +bool qStreamRecoverScanStep2Finished(qTaskInfo_t tinfo); +int32_t qStreamRecoverSetAllStepFinished(qTaskInfo_t tinfo); void resetTaskInfo(qTaskInfo_t tinfo); +void qResetStreamInfoTimeWindow(qTaskInfo_t tinfo); + +int32_t qStreamOperatorReleaseState(qTaskInfo_t tInfo); +int32_t qStreamOperatorReloadState(qTaskInfo_t tInfo); + #ifdef __cplusplus } #endif diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index 0826df67c0bc487f268a3eb22b0bfbdbd4e1f092..e263c9d236645ed9d3288cfea091152a0e6b19b9 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -234,29 +234,6 @@ typedef struct SStoreSnapshotFn { int32_t (*getTableInfoFromSnapshot)(SSnapContext* ctx, void** pBuf, int32_t* contLen, int16_t* type, int64_t* uid); } SStoreSnapshotFn; -/** -void metaReaderInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags); -void metaReaderReleaseLock(SMetaReader *pReader); -void metaReaderClear(SMetaReader *pReader); -int32_t metaReaderGetTableEntryByUid(SMetaReader *pReader, tb_uid_t uid); -int32_t metaReaderGetTableEntryByUidCache(SMetaReader *pReader, tb_uid_t uid); -int32_t metaGetTableTags(SMeta *pMeta, uint64_t suid, SArray *uidList); -const void *metaGetTableTagVal(void *tag, int16_t type, STagVal *tagVal); -int metaGetTableNameByUid(void *meta, uint64_t uid, char *tbName); - -int metaGetTableUidByName(void *meta, char *tbName, uint64_t *uid); -int metaGetTableTypeByName(void *meta, char *tbName, ETableType *tbType); -bool metaIsTableExist(SMeta *pMeta, tb_uid_t uid); -int32_t metaGetCachedTableUidList(SMeta *pMeta, tb_uid_t suid, const uint8_t *key, int32_t keyLen, SArray *pList, - bool *acquired); -int32_t metaUidFilterCachePut(SMeta *pMeta, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, - int32_t payloadLen, double selectivityRatio); -tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name); -int32_t metaGetCachedTbGroup(SMeta* pMeta, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray** pList); -int32_t metaPutTbGroupToCache(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, int32_t -payloadLen); - */ - typedef struct SStoreMeta { SMTbCursor* (*openTableMetaCursor)(void* pVnode); // metaOpenTbCursor void (*closeTableMetaCursor)(SMTbCursor* pTbCur); // metaCloseTbCursor @@ -403,7 +380,7 @@ typedef struct SStateStore { SStreamStateCur* (*streamStateSessionSeekKeyCurrentNext)(SStreamState* pState, const SSessionKey* key); struct SStreamFileState* (*streamFileStateInit)(int64_t memSize, uint32_t keySize, uint32_t rowSize, - uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark); + uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, const char*id); void (*streamFileStateDestroy)(struct SStreamFileState* pFileState); void (*streamFileStateClear)(struct SStreamFileState* pFileState); @@ -415,6 +392,7 @@ typedef struct SStateStore { int32_t (*streamStateCommit)(SStreamState* pState); void (*streamStateDestroy)(SStreamState* pState, bool remove); int32_t (*streamStateDeleteCheckPoint)(SStreamState* pState, TSKEY mark); + void (*streamStateReloadInfo)(SStreamState* pState, TSKEY ts); } SStateStore; typedef struct SStorageAPI { diff --git a/include/libs/function/function.h b/include/libs/function/function.h index c92ce254a8bfb157f8f844fbdcc195014ab0c0cb..2e3cd670d73651494aafc148a474dc8d2f48ad3c 100644 --- a/include/libs/function/function.h +++ b/include/libs/function/function.h @@ -129,30 +129,38 @@ typedef struct SSerializeDataHandle { } SSerializeDataHandle; // incremental state storage + +typedef struct SBackendCfWrapper { + void *rocksdb; + void **pHandle; + void *writeOpts; + void *readOpts; + void **cfOpts; + void *dbOpt; + void *param; + void *env; + SListNode *pComparNode; + void *pBackend; + void *compactFactory; + TdThreadRwlock rwLock; + bool remove; + int64_t backendId; + char idstr[64]; +} SBackendCfWrapper; typedef struct STdbState { - void *rocksdb; - void **pHandle; - void *writeOpts; - void *readOpts; - void **cfOpts; - void *dbOpt; + SBackendCfWrapper *pBackendCfWrapper; + int64_t backendCfWrapperId; + char idstr[64]; + struct SStreamTask *pOwner; - void *param; - void *env; - SListNode *pComparNode; - void *pBackend; - char idstr[64]; - void *compactFactory; - TdThreadRwlock rwLock; - - void *db; - void *pStateDb; - void *pFuncStateDb; - void *pFillStateDb; // todo refactor - void *pSessionStateDb; - void *pParNameDb; - void *pParTagDb; - void *txn; + void *db; + void *pStateDb; + void *pFuncStateDb; + void *pFillStateDb; // todo refactor + void *pSessionStateDb; + void *pParNameDb; + void *pParTagDb; + void *txn; } STdbState; typedef struct { diff --git a/include/libs/stream/streamState.h b/include/libs/stream/streamState.h index 7f9d20a9dd878892e512b170921bdb1794defc52..7747df85956798f6271e06822a4812d03829f89e 100644 --- a/include/libs/stream/streamState.h +++ b/include/libs/stream/streamState.h @@ -138,6 +138,8 @@ int32_t streamStateCurPrev(SStreamState* pState, SStreamStateCur* pCur); int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char* tbname); int32_t streamStateGetParName(SStreamState* pState, int64_t groupId, void** pVal); +void streamStateReloadInfo(SStreamState* pState, TSKEY ts); + /***compare func **/ typedef struct SStateChekpoint { diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 73c88fae8d9e0cec33831d1640b8299e0fcaa89d..dbcc31a35ebf2a0f7595ebe26737fe143a2a5a0a 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -44,10 +44,8 @@ enum { TASK_STATUS__DROPPING, TASK_STATUS__FAIL, TASK_STATUS__STOP, - TASK_STATUS__WAIT_DOWNSTREAM, - TASK_STATUS__RECOVER_PREPARE, - TASK_STATUS__RECOVER1, - TASK_STATUS__RECOVER2, + TASK_STATUS__SCAN_HISTORY, // stream task scan history data by using tsdbread in the stream scanner + TASK_STATUS__HALT, // stream task will handle all data in the input queue, and then paused TASK_STATUS__PAUSE, }; @@ -133,7 +131,6 @@ typedef struct { // ref data block, for delete typedef struct { int8_t type; - int64_t ver; SSDataBlock* pBlock; } SStreamRefDataBlock; @@ -203,13 +200,11 @@ static FORCE_INLINE void streamQueueProcessFail(SStreamQueue* queue) { atomic_store_8(&queue->status, STREAM_QUEUE__FAILED); } -void* streamQueueNextItem(SStreamQueue* queue); +void* streamQueueNextItem(SStreamQueue* pQueue); SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type); void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit); -SStreamDataSubmit* streamSubmitBlockClone(SStreamDataSubmit* pSubmit); - typedef struct { char* qmsg; void* pExecutor; // not applicable to encoder and decoder @@ -251,7 +246,7 @@ typedef struct { int8_t reserved; } STaskSinkFetch; -typedef struct { +typedef struct SStreamChildEpInfo { int32_t nodeId; int32_t childId; int32_t taskId; @@ -271,31 +266,55 @@ typedef struct SCheckpointInfo { } SCheckpointInfo; typedef struct SStreamStatus { - int8_t taskStatus; - int8_t schedStatus; - int8_t keepTaskStatus; + int8_t taskStatus; + int8_t downstreamReady; // downstream tasks are all ready now, if this flag is set + int8_t schedStatus; + int8_t keepTaskStatus; + bool transferState; + int8_t timerActive; // timer is active } SStreamStatus; +typedef struct SHistDataRange { + SVersionRange range; + STimeWindow window; +} SHistDataRange; + +typedef struct SSTaskBasicInfo { + int32_t nodeId; // vgroup id or snode id + SEpSet epSet; + int32_t selfChildId; + int32_t totalLevel; + int8_t taskLevel; + int8_t fillHistory; // is fill history task or not +} SSTaskBasicInfo; + +typedef struct SDispatchMsgInfo { + void* pData; // current dispatch data + int16_t msgType; // dispatch msg type + int32_t retryCount; // retry send data count + int64_t blockingTs; // output blocking timestamp +} SDispatchMsgInfo; + +typedef struct { + int8_t outputType; + int8_t outputStatus; + SStreamQueue* outputQueue; +} SSTaskOutputInfo; + struct SStreamTask { - SStreamId id; - int32_t totalLevel; - int8_t taskLevel; - int8_t outputType; - int16_t dispatchMsgType; - SStreamStatus status; - int32_t selfChildId; - int32_t nodeId; // vgroup id - SEpSet epSet; - SCheckpointInfo chkInfo; - STaskExec exec; - int8_t fillHistory; // fill history - int64_t ekey; // end ts key - int64_t endVer; // end version - - // children info - SArray* childEpInfo; // SArray - int32_t nextCheckId; - SArray* checkpointInfo; // SArray + SStreamId id; + SSTaskBasicInfo info; + int8_t outputType; + SDispatchMsgInfo msgInfo; + SStreamStatus status; + SCheckpointInfo chkInfo; + STaskExec exec; + SHistDataRange dataRange; + SStreamId historyTaskId; + SStreamId streamTaskId; + SArray* pUpstreamEpInfoList; // SArray, // children info + int32_t nextCheckId; + SArray* checkpointInfo; // SArray // output union { @@ -314,13 +333,14 @@ struct SStreamTask { // trigger int8_t triggerStatus; int64_t triggerParam; - void* timer; + void* schedTimer; + void* launchTaskTimer; SMsgCb* pMsgCb; // msg handle SStreamState* pState; // state backend // the followings attributes don't be serialized - int32_t recoverTryingDownstream; - int32_t recoverWaitingUpstream; + int32_t notReadyTasks; + int32_t numOfWaitingUpstream; int64_t checkReqId; SArray* checkReqIds; // shuffle int32_t refCnt; @@ -332,21 +352,22 @@ struct SStreamTask { // meta typedef struct SStreamMeta { - char* path; - TDB* db; - TTB* pTaskDb; - TTB* pCheckpointDb; - SHashObj* pTasks; - SArray* pTaskList; // SArray - void* ahandle; - TXN* txn; - FTaskExpand* expandFunc; - int32_t vgId; - SRWLatch lock; - int32_t walScanCounter; - void* streamBackend; - int64_t streamBackendRid; - SHashObj* pTaskBackendUnique; + char* path; + TDB* db; + TTB* pTaskDb; + TTB* pCheckpointDb; + SHashObj* pTasks; + SArray* pTaskList; // SArray + void* ahandle; + TXN* txn; + FTaskExpand* expandFunc; + int32_t vgId; + SRWLatch lock; + int32_t walScanCounter; + void* streamBackend; + int64_t streamBackendRid; + SHashObj* pTaskBackendUnique; + TdThreadMutex backendMutex; } SStreamMeta; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -431,16 +452,17 @@ typedef struct { SMsgHead msgHead; int64_t streamId; int32_t taskId; -} SStreamRecoverStep1Req, SStreamRecoverStep2Req; + int8_t igUntreated; +} SStreamScanHistoryReq; typedef struct { int64_t streamId; int32_t taskId; int32_t childId; -} SStreamRecoverFinishReq; +} SStreamRecoverFinishReq, SStreamTransferReq; -int32_t tEncodeSStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq); -int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq); +int32_t tEncodeStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq); +int32_t tDecodeStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq); typedef struct { int64_t streamId; @@ -509,11 +531,11 @@ typedef struct { SArray* checkpointVer; // SArray } SStreamRecoverDownstreamRsp; -int32_t tEncodeSStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq); -int32_t tDecodeSStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq); +int32_t tEncodeStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq); +int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq); -int32_t tEncodeSStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp); -int32_t tDecodeSStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp); +int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp); +int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp); int32_t tEncodeSStreamTaskRecoverReq(SEncoder* pEncoder, const SStreamRecoverDownstreamReq* pReq); int32_t tDecodeSStreamTaskRecoverReq(SDecoder* pDecoder, SStreamRecoverDownstreamReq* pReq); @@ -525,9 +547,11 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq); int32_t tDecodeStreamRetrieveReq(SDecoder* pDecoder, SStreamRetrieveReq* pReq); void tDeleteStreamRetrieveReq(SStreamRetrieveReq* pReq); -void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq); +int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks, + int64_t dstTaskId); +void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq); -int32_t streamSetupTrigger(SStreamTask* pTask); +int32_t streamSetupScheduleTrigger(SStreamTask* pTask); int32_t streamProcessRunReq(SStreamTask* pTask); int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg, bool exec); @@ -542,30 +566,44 @@ int32_t streamSchedExec(SStreamTask* pTask); int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); bool streamTaskShouldStop(const SStreamStatus* pStatus); bool streamTaskShouldPause(const SStreamStatus* pStatus); +bool streamTaskIsIdle(const SStreamTask* pTask); int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz); +char* createStreamTaskIdStr(int64_t streamId, int32_t taskId); + // recover and fill history -int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version); -int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version); +void streamPrepareNdoCheckDownstream(SStreamTask* pTask); +int32_t streamTaskCheckDownstreamTasks(SStreamTask* pTask); +int32_t streamTaskLaunchScanHistory(SStreamTask* pTask); int32_t streamTaskCheckStatus(SStreamTask* pTask); -int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version); +int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp); +int32_t streamCheckHistoryTaskDownstrem(SStreamTask* pTask); +int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask); +int32_t streamStartRecoverTask(SStreamTask* pTask, int8_t igUntreated); +bool streamTaskRecoverScanStep1Finished(SStreamTask* pTask); +bool streamTaskRecoverScanStep2Finished(SStreamTask* pTask); +int32_t streamTaskRecoverSetAllStepFinished(SStreamTask* pTask); // common -int32_t streamSetParamForRecover(SStreamTask* pTask); -int32_t streamRestoreParam(SStreamTask* pTask); -int32_t streamSetStatusNormal(SStreamTask* pTask); +int32_t streamSetParamForScanHistoryData(SStreamTask* pTask); +int32_t streamRestoreParam(SStreamTask* pTask); +int32_t streamSetStatusNormal(SStreamTask* pTask); +const char* streamGetTaskStatusStr(int32_t status); + // source level -int32_t streamSourceRecoverPrepareStep1(SStreamTask* pTask, int64_t ver); -int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamRecoverStep1Req* pReq); -int32_t streamSourceRecoverScanStep1(SStreamTask* pTask); -int32_t streamBuildSourceRecover2Req(SStreamTask* pTask, SStreamRecoverStep2Req* pReq); -int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver); -int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask); +int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); +int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); +int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated); +int32_t streamSourceScanHistoryData(SStreamTask* pTask); +// int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver); +int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask); + +int32_t streamDispatchTransferStateMsg(SStreamTask* pTask); + // agg level int32_t streamAggRecoverPrepare(SStreamTask* pTask); -// int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask); -int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId); +int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t taskId, int32_t childId); void streamMetaInit(); void streamMetaCleanup(); @@ -591,6 +629,9 @@ int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq); int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp); +int32_t streamTaskReleaseState(SStreamTask* pTask); +int32_t streamTaskReloadState(SStreamTask* pTask); + #ifdef __cplusplus } #endif diff --git a/include/libs/stream/tstreamFileState.h b/include/libs/stream/tstreamFileState.h index 0dbacf6c9f1a52eac62966206ad0b95d52c0620f..b2255013ca5de8246f94eecd13dae4e382c1cd32 100644 --- a/include/libs/stream/tstreamFileState.h +++ b/include/libs/stream/tstreamFileState.h @@ -28,11 +28,10 @@ extern "C" { #endif typedef struct SStreamFileState SStreamFileState; - typedef SList SStreamSnapshot; SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, - GetTsFun fp, void* pFile, TSKEY delMark); + GetTsFun fp, void* pFile, TSKEY delMark, const char* id); void streamFileStateDestroy(SStreamFileState* pFileState); void streamFileStateClear(SStreamFileState* pFileState); bool needClearDiskBuff(SStreamFileState* pFileState); @@ -50,6 +49,7 @@ int32_t recoverSnapshot(SStreamFileState* pFileState); int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list); int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark); int32_t streamFileStateGeSelectRowSize(SStreamFileState* pFileState); +void streamFileStateReloadInfo(SStreamFileState* pFileState, TSKEY ts); #ifdef __cplusplus } diff --git a/source/common/src/systable.c b/source/common/src/systable.c index c2024a9a779661eb1876525be29497664f9c6eaa..5d1854ee2c3b1a50af448508c655388c4c059fcd 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -160,7 +160,7 @@ static const SSysDbTableSchema streamSchema[] = { static const SSysDbTableSchema streamTaskSchema[] = { {.name = "stream_name", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "task_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, + {.name = "task_id", .bytes = 32, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "node_type", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "node_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, {.name = "level", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 96889882b65f75ccb7f7de5095d03286c1a2609d..b2f03fa7ba54606a924214f98559c818bbef3ac9 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -47,7 +47,6 @@ int32_t colDataGetLength(const SColumnInfoData* pColumnInfoData, int32_t numOfRo } } - int32_t colDataGetRowLength(const SColumnInfoData* pColumnInfoData, int32_t rowIdx) { if (colDataIsNull_s(pColumnInfoData, rowIdx)) return 0; @@ -67,10 +66,6 @@ int32_t colDataGetFullLength(const SColumnInfoData* pColumnInfoData, int32_t num } } -void colDataTrim(SColumnInfoData* pColumnInfoData) { - // TODO -} - int32_t getJsonValueLen(const char* data) { int32_t dataLen = 0; if (*data == TSDB_DATA_TYPE_NULL) { @@ -89,10 +84,6 @@ int32_t getJsonValueLen(const char* data) { return dataLen; } -int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull) { - return colDataSetVal(pColumnInfoData, rowIndex, pData, isNull); -} - int32_t colDataSetVal(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull) { if (isNull) { // There is a placehold for each NULL value of binary or nchar type. @@ -174,7 +165,7 @@ int32_t colDataReassignVal(SColumnInfoData* pColumnInfoData, uint32_t dstRowIdx, } -int32_t colDataReserve(SColumnInfoData* pColumnInfoData, size_t newSize) { +static int32_t colDataReserve(SColumnInfoData* pColumnInfoData, size_t newSize) { if (!IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) { return TSDB_CODE_SUCCESS; } @@ -643,7 +634,7 @@ int32_t blockDataToBuf(char* buf, const SSDataBlock* pBlock) { } else { memcpy(pStart, pCol->pData, dataSize); pStart += dataSize; - } + } } return 0; @@ -882,41 +873,8 @@ int32_t dataBlockCompar(const void* p1, const void* p2, const void* param) { return 0; } -static int32_t doAssignOneTuple(SColumnInfoData* pDstCols, int32_t numOfRows, const SSDataBlock* pSrcBlock, - int32_t tupleIndex) { - int32_t code = 0; - size_t numOfCols = taosArrayGetSize(pSrcBlock->pDataBlock); - - for (int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData* pDst = &pDstCols[i]; - SColumnInfoData* pSrc = taosArrayGet(pSrcBlock->pDataBlock, i); - - if (pSrc->hasNull && colDataIsNull(pSrc, pSrcBlock->info.rows, tupleIndex, pSrcBlock->pBlockAgg[i])) { - code = colDataSetVal(pDst, numOfRows, NULL, true); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } else { - char* p = colDataGetData(pSrc, tupleIndex); - code = colDataSetVal(pDst, numOfRows, p, false); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - } - - return TSDB_CODE_SUCCESS; -} - static int32_t blockDataAssign(SColumnInfoData* pCols, const SSDataBlock* pDataBlock, const int32_t* index) { -#if 0 - for (int32_t i = 0; i < pDataBlock->info.rows; ++i) { - int32_t code = doAssignOneTuple(pCols, i, pDataBlock, index[i]); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } -#else + size_t numOfCols = taosArrayGetSize(pDataBlock->pDataBlock); for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData* pDst = &pCols[i]; @@ -941,7 +899,7 @@ static int32_t blockDataAssign(SColumnInfoData* pCols, const SSDataBlock* pDataB } } } -#endif + return TSDB_CODE_SUCCESS; } @@ -1101,114 +1059,6 @@ int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo) { return TSDB_CODE_SUCCESS; } -#if 0 -typedef struct SHelper { - int32_t index; - union { - char* pData; - int64_t i64; - double d64; - }; -} SHelper; - -SHelper* createTupleIndex_rv(int32_t numOfRows, SArray* pOrderInfo, SSDataBlock* pBlock) { - int32_t sortValLengthPerRow = 0; - int32_t numOfCols = taosArrayGetSize(pOrderInfo); - - for (int32_t i = 0; i < numOfCols; ++i) { - SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i); - SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, pInfo->slotId); - pInfo->pColData = pColInfo; - sortValLengthPerRow += pColInfo->info.bytes; - } - - size_t len = sortValLengthPerRow * pBlock->info.rows; - - char* buf = taosMemoryCalloc(1, len); - SHelper* phelper = taosMemoryCalloc(numOfRows, sizeof(SHelper)); - for (int32_t i = 0; i < numOfRows; ++i) { - phelper[i].index = i; - phelper[i].pData = buf + sortValLengthPerRow * i; - } - - int32_t offset = 0; - for (int32_t i = 0; i < numOfCols; ++i) { - SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i); - for (int32_t j = 0; j < numOfRows; ++j) { - phelper[j].i64 = *(int32_t*)pInfo->pColData->pData + pInfo->pColData->info.bytes * j; - // memcpy(phelper[j].pData + offset, pInfo->pColData->pData + pInfo->pColData->info.bytes * j, - // pInfo->pColData->info.bytes); - } - - offset += pInfo->pColData->info.bytes; - } - - taosMemoryFree(buf); - return phelper; -} - -int32_t dataBlockCompar_rv(const void* p1, const void* p2, const void* param) { - const SSDataBlockSortHelper* pHelper = (const SSDataBlockSortHelper*)param; - - SHelper* left = (SHelper*)p1; - SHelper* right = (SHelper*)p2; - - SArray* pInfo = pHelper->orderInfo; - - int32_t offset = 0; - int32_t leftx = *(int32_t*)left->pData; //*(int32_t*)(left->pData + offset); - int32_t rightx = *(int32_t*)right->pData; //*(int32_t*)(right->pData + offset); - - if (leftx == rightx) { - return 0; - } else { - return (leftx < rightx) ? -1 : 1; - } - return 0; -} - -int32_t blockDataSort_rv(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst) { - // Allocate the additional buffer. - int64_t p0 = taosGetTimestampUs(); - - SSDataBlockSortHelper helper = {.pDataBlock = pDataBlock, .orderInfo = pOrderInfo}; - - uint32_t rows = pDataBlock->info.rows; - SHelper* index = createTupleIndex_rv(rows, helper.orderInfo, pDataBlock); - if (index == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return terrno; - } - - taosqsort(index, rows, sizeof(SHelper), &helper, dataBlockCompar_rv); - - int64_t p1 = taosGetTimestampUs(); - SColumnInfoData* pCols = createHelpColInfoData(pDataBlock); - if (pCols == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return terrno; - } - - int64_t p2 = taosGetTimestampUs(); - - // int32_t code = blockDataAssign(pCols, pDataBlock, index); - // if (code != TSDB_CODE_SUCCESS) { - // terrno = code; - // return code; - // } - - int64_t p3 = taosGetTimestampUs(); - - copyBackToBlock(pDataBlock, pCols); - int64_t p4 = taosGetTimestampUs(); - - printf("sort:%" PRId64 ", create:%" PRId64 ", assign:%" PRId64 ", copyback:%" PRId64 ", rows:%d\n", p1 - p0, p2 - p1, - p3 - p2, p4 - p3, rows); - // destroyTupleIndex(index); - return 0; -} -#endif - void blockDataCleanup(SSDataBlock* pDataBlock) { blockDataEmpty(pDataBlock); SDataBlockInfo* pInfo = &pDataBlock->info; @@ -1345,8 +1195,7 @@ void blockDataFreeRes(SSDataBlock* pBlock) { colDataDestroy(pColInfoData); } - taosArrayDestroy(pBlock->pDataBlock); - pBlock->pDataBlock = NULL; + pBlock->pDataBlock = taosArrayDestroy(pBlock->pDataBlock); taosMemoryFreeClear(pBlock->pBlockAgg); memset(&pBlock->info, 0, sizeof(SDataBlockInfo)); } @@ -1361,6 +1210,7 @@ void* blockDataDestroy(SSDataBlock* pBlock) { return NULL; } +// todo remove it int32_t assignOneDataBlock(SSDataBlock* dst, const SSDataBlock* src) { dst->info = src->info; dst->info.rows = 0; @@ -1759,16 +1609,6 @@ static void colDataKeepFirstNRows(SColumnInfoData* pColInfoData, size_t n, size_ if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { // pColInfoData->varmeta.length = colDataMoveVarData(pColInfoData, 0, n); memset(&pColInfoData->varmeta.offset[n], 0, total - n); - } else { // reset the bitmap value - /*int32_t stopIndex = BitmapLen(n) * 8; - for(int32_t i = n; i < stopIndex; ++i) { - colDataClearNull_f(pColInfoData->nullbitmap, i); - } - - int32_t remain = BitmapLen(total) - BitmapLen(n); - if (remain > 0) { - memset(pColInfoData->nullbitmap+BitmapLen(n), 0, remain); - }*/ } } @@ -1875,32 +1715,6 @@ void* tDecodeDataBlock(const void* buf, SSDataBlock* pBlock) { return (void*)buf; } -int32_t tEncodeDataBlocks(void** buf, const SArray* blocks) { - int32_t tlen = 0; - int32_t sz = taosArrayGetSize(blocks); - tlen += taosEncodeFixedI32(buf, sz); - - for (int32_t i = 0; i < sz; i++) { - SSDataBlock* pBlock = taosArrayGet(blocks, i); - tlen += tEncodeDataBlock(buf, pBlock); - } - - return tlen; -} - -void* tDecodeDataBlocks(const void* buf, SArray** blocks) { - int32_t sz; - buf = taosDecodeFixedI32(buf, &sz); - - *blocks = taosArrayInit(sz, sizeof(SSDataBlock)); - for (int32_t i = 0; i < sz; i++) { - SSDataBlock pBlock = {0}; - buf = tDecodeDataBlock(buf, &pBlock); - taosArrayPush(*blocks, &pBlock); - } - return (void*)buf; -} - static char* formatTimestamp(char* buf, int64_t val, int precision) { time_t tt; int32_t ms = 0; @@ -1950,101 +1764,6 @@ static char* formatTimestamp(char* buf, int64_t val, int precision) { return buf; } -#if 0 -void blockDebugShowDataBlock(SSDataBlock* pBlock, const char* flag) { - SArray* dataBlocks = taosArrayInit(1, sizeof(SSDataBlock*)); - taosArrayPush(dataBlocks, &pBlock); - blockDebugShowDataBlocks(dataBlocks, flag); - taosArrayDestroy(dataBlocks); -} - -void blockDebugShowDataBlocks(const SArray* dataBlocks, const char* flag) { - char pBuf[128] = {0}; - int32_t sz = taosArrayGetSize(dataBlocks); - for (int32_t i = 0; i < sz; i++) { - SSDataBlock* pDataBlock = taosArrayGet(dataBlocks, i); - size_t numOfCols = taosArrayGetSize(pDataBlock->pDataBlock); - - int32_t rows = pDataBlock->info.rows; - printf("%s |block ver %" PRIi64 " |block type %d |child id %d|group id %" PRIu64 "\n", flag, - pDataBlock->info.version, (int32_t)pDataBlock->info.type, pDataBlock->info.childId, - pDataBlock->info.id.groupId); - for (int32_t j = 0; j < rows; j++) { - printf("%s |", flag); - for (int32_t k = 0; k < numOfCols; k++) { - SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, k); - void* var = POINTER_SHIFT(pColInfoData->pData, j * pColInfoData->info.bytes); - if (k == 0) { - printf("cols:%d |", (int32_t)numOfCols); - } - if (colDataIsNull(pColInfoData, rows, j, NULL)) { - printf(" %15s |", "NULL"); - continue; - } - - switch (pColInfoData->info.type) { - case TSDB_DATA_TYPE_TIMESTAMP: - formatTimestamp(pBuf, *(uint64_t*)var, TSDB_TIME_PRECISION_MILLI); - printf(" %25s |", pBuf); - break; - case TSDB_DATA_TYPE_BOOL: - printf(" %15" PRIi8 " |", *(int8_t*)var); - break; - case TSDB_DATA_TYPE_TINYINT: - printf(" %15" PRIi8 " |", *(int8_t*)var); - break; - case TSDB_DATA_TYPE_SMALLINT: - printf(" %15" PRIi16 " |", *(int16_t*)var); - break; - case TSDB_DATA_TYPE_INT: - printf(" %15d |", *(int32_t*)var); - break; - case TSDB_DATA_TYPE_UTINYINT: - printf(" %15" PRIu8 " |", *(uint8_t*)var); - break; - case TSDB_DATA_TYPE_USMALLINT: - printf(" %15" PRIu16 " |", *(uint16_t*)var); - break; - case TSDB_DATA_TYPE_UINT: - printf(" %15u |", *(uint32_t*)var); - break; - case TSDB_DATA_TYPE_BIGINT: - printf(" %15" PRId64 " |", *(int64_t*)var); - break; - case TSDB_DATA_TYPE_UBIGINT: - printf(" %15" PRIu64 " |", *(uint64_t*)var); - break; - case TSDB_DATA_TYPE_FLOAT: - printf(" %15f |", *(float*)var); - break; - case TSDB_DATA_TYPE_DOUBLE: - printf(" %15lf |", *(double*)var); - break; - case TSDB_DATA_TYPE_VARCHAR: - case TSDB_DATA_TYPE_GEOMETRY: { - char* pData = colDataGetVarData(pColInfoData, j); - int32_t dataSize = TMIN(sizeof(pBuf) - 1, varDataLen(pData)); - memset(pBuf, 0, dataSize + 1); - strncpy(pBuf, varDataVal(pData), dataSize); - printf(" %15s |", pBuf); - } break; - case TSDB_DATA_TYPE_NCHAR: { - char* pData = colDataGetVarData(pColInfoData, j); - int32_t dataSize = TMIN(sizeof(pBuf), varDataLen(pData)); - memset(pBuf, 0, dataSize); - (void)taosUcs4ToMbs((TdUcs4*)varDataVal(pData), dataSize, pBuf); - printf(" %15s |", pBuf); - } break; - default: - break; - } - } - printf("\n"); - } - } -} -#endif - // for debug char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf) { int32_t size = 2048*1024; @@ -2153,182 +1872,6 @@ char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf) return dumpBuf; } -/** - * @brief TODO: Assume that the final generated result it less than 3M - * - * @param pReq - * @param pDataBlocks - * @param vgId - * @param suid - * - */ -#if 0 -int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SSDataBlock* pDataBlock, STSchema* pTSchema, int32_t vgId, - tb_uid_t suid) { - int32_t bufSize = sizeof(SSubmitReq); - int32_t sz = 1; - for (int32_t i = 0; i < sz; ++i) { - const SDataBlockInfo* pBlkInfo = &pDataBlock->info; - - int32_t colNum = taosArrayGetSize(pDataBlock->pDataBlock); - bufSize += pBlkInfo->rows * (TD_ROW_HEAD_LEN + pBlkInfo->rowSize + BitmapLen(colNum)); - bufSize += sizeof(SSubmitBlk); - } - - *pReq = taosMemoryCalloc(1, bufSize); - if (!(*pReq)) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return TSDB_CODE_FAILED; - } - void* pDataBuf = *pReq; - - int32_t msgLen = sizeof(SSubmitReq); - int32_t numOfBlks = 0; - SRowBuilder rb = {0}; - tdSRowInit(&rb, pTSchema->version); - - for (int32_t i = 0; i < sz; ++i) { - int32_t colNum = taosArrayGetSize(pDataBlock->pDataBlock); - int32_t rows = pDataBlock->info.rows; - - if (colNum <= 1) { - // invalid if only with TS col - continue; - } - - if (rb.nCols != colNum) { - tdSRowSetTpInfo(&rb, colNum, pTSchema->flen); - } - - SSubmitBlk* pSubmitBlk = POINTER_SHIFT(pDataBuf, msgLen); - pSubmitBlk->suid = suid; - pSubmitBlk->uid = pDataBlock->info.id.groupId; - pSubmitBlk->numOfRows = rows; - pSubmitBlk->sversion = pTSchema->version; - - msgLen += sizeof(SSubmitBlk); - int32_t dataLen = 0; - for (int32_t j = 0; j < rows; ++j) { // iterate by row - tdSRowResetBuf(&rb, POINTER_SHIFT(pDataBuf, msgLen + dataLen)); // set row buf - bool isStartKey = false; - int32_t offset = 0; - for (int32_t k = 0; k < colNum; ++k) { // iterate by column - SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, k); - STColumn* pCol = &pTSchema->columns[k]; - void* var = POINTER_SHIFT(pColInfoData->pData, j * pColInfoData->info.bytes); - switch (pColInfoData->info.type) { - case TSDB_DATA_TYPE_TIMESTAMP: - if (!isStartKey) { - isStartKey = true; - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NORM, var, true, - offset, k); - continue; // offset should keep 0 for next column - - } else if (colDataIsNull_s(pColInfoData, j)) { - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NULL, NULL, - false, offset, k); - } else { - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NORM, var, - true, offset, k); - } - break; - case TSDB_DATA_TYPE_NCHAR: - case TSDB_DATA_TYPE_VARCHAR: // TSDB_DATA_TYPE_BINARY - case TSDB_DATA_TYPE_GEOMETRY: { - if (colDataIsNull_s(pColInfoData, j)) { - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pColInfoData->info.type, TD_VTYPE_NULL, NULL, - false, offset, k); - } else { - void* data = colDataGetData(pColInfoData, j); - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pColInfoData->info.type, TD_VTYPE_NORM, data, - true, offset, k); - } - break; - } - case TSDB_DATA_TYPE_VARBINARY: - case TSDB_DATA_TYPE_DECIMAL: - case TSDB_DATA_TYPE_BLOB: - case TSDB_DATA_TYPE_JSON: - case TSDB_DATA_TYPE_MEDIUMBLOB: - uError("the column type %" PRIi16 " is defined but not implemented yet", pColInfoData->info.type); - break; - default: - if (pColInfoData->info.type < TSDB_DATA_TYPE_MAX && pColInfoData->info.type > TSDB_DATA_TYPE_NULL) { - if (colDataIsNull_s(pColInfoData, j)) { - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NULL, NULL, false, - offset, k); - } else if (pCol->type == pColInfoData->info.type) { - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, var, true, offset, - k); - } else { - char tv[8] = {0}; - if (pColInfoData->info.type == TSDB_DATA_TYPE_FLOAT) { - float v = 0; - GET_TYPED_DATA(v, float, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); - } else if (pColInfoData->info.type == TSDB_DATA_TYPE_DOUBLE) { - double v = 0; - GET_TYPED_DATA(v, double, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); - } else if (IS_SIGNED_NUMERIC_TYPE(pColInfoData->info.type)) { - int64_t v = 0; - GET_TYPED_DATA(v, int64_t, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); - } else { - uint64_t v = 0; - GET_TYPED_DATA(v, uint64_t, pColInfoData->info.type, var); - SET_TYPED_DATA(&tv, pCol->type, v); - } - tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, tv, true, offset, - k); - } - } else { - uError("the column type %" PRIi16 " is undefined\n", pColInfoData->info.type); - } - break; - } - offset += TYPE_BYTES[pCol->type]; // sum/avg would convert to int64_t/uint64_t/double during aggregation - } - tdSRowEnd(&rb); - dataLen += TD_ROW_LEN(rb.pBuf); -#ifdef TD_DEBUG_PRINT_ROW - tdSRowPrint(rb.pBuf, pTSchema, __func__); -#endif - } - - ++numOfBlks; - - pSubmitBlk->dataLen = dataLen; - msgLen += pSubmitBlk->dataLen; - } - - if (numOfBlks > 0) { - (*pReq)->length = msgLen; - - (*pReq)->header.vgId = htonl(vgId); - (*pReq)->header.contLen = htonl(msgLen); - (*pReq)->length = (*pReq)->header.contLen; - (*pReq)->numOfBlocks = htonl(numOfBlks); - SSubmitBlk* blk = (SSubmitBlk*)((*pReq) + 1); - while (numOfBlks--) { - int32_t dataLen = blk->dataLen; - blk->uid = htobe64(blk->uid); - blk->suid = htobe64(blk->suid); - blk->sversion = htonl(blk->sversion); - blk->dataLen = htonl(blk->dataLen); - blk->schemaLen = htonl(blk->schemaLen); - blk->numOfRows = htonl(blk->numOfRows); - blk = (SSubmitBlk*)(blk->data + dataLen); - } - } else { - // no valid rows - taosMemoryFreeClear(*pReq); - } - - return TSDB_CODE_SUCCESS; -} -#endif - int32_t buildSubmitReqFromDataBlock(SSubmitReq2** ppReq, const SSDataBlock* pDataBlock, const STSchema* pTSchema, int64_t uid, int32_t vgId, tb_uid_t suid) { SSubmitReq2* pReq = *ppReq; @@ -2732,3 +2275,149 @@ const char* blockDecode(SSDataBlock* pBlock, const char* pData) { ASSERT(pStart - pData == dataLen); return pStart; } + +void trimDataBlock(SSDataBlock* pBlock, int32_t totalRows, const bool* pBoolList) { +// int32_t totalRows = pBlock->info.rows; + int32_t bmLen = BitmapLen(totalRows); + char* pBitmap = NULL; + int32_t maxRows = 0; + + size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); + // it is a reserved column for scalar function, and no data in this column yet. + if (pDst->pData == NULL) { + continue; + } + + int32_t numOfRows = 0; + if (IS_VAR_DATA_TYPE(pDst->info.type)) { + int32_t j = 0; + pDst->varmeta.length = 0; + + while (j < totalRows) { + if (pBoolList[j] == 0) { + j += 1; + continue; + } + + if (colDataIsNull_var(pDst, j)) { + colDataSetNull_var(pDst, numOfRows); + } else { + // fix address sanitizer error. p1 may point to memory that will change during realloc of colDataSetVal, first copy it to p2 + char* p1 = colDataGetVarData(pDst, j); + int32_t len = 0; + if (pDst->info.type == TSDB_DATA_TYPE_JSON) { + len = getJsonValueLen(p1); + } else { + len = varDataTLen(p1); + } + char* p2 = taosMemoryMalloc(len); + memcpy(p2, p1, len); + colDataSetVal(pDst, numOfRows, p2, false); + taosMemoryFree(p2); + } + numOfRows += 1; + j += 1; + } + + if (maxRows < numOfRows) { + maxRows = numOfRows; + } + } else { + if (pBitmap == NULL) { + pBitmap = taosMemoryCalloc(1, bmLen); + } + + memcpy(pBitmap, pDst->nullbitmap, bmLen); + memset(pDst->nullbitmap, 0, bmLen); + + int32_t j = 0; + + switch (pDst->info.type) { + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_UBIGINT: + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_TIMESTAMP: + while (j < totalRows) { + if (pBoolList[j] == 0) { + j += 1; + continue; + } + + if (colDataIsNull_f(pBitmap, j)) { + colDataSetNull_f(pDst->nullbitmap, numOfRows); + } else { + ((int64_t*)pDst->pData)[numOfRows] = ((int64_t*)pDst->pData)[j]; + } + numOfRows += 1; + j += 1; + } + break; + case TSDB_DATA_TYPE_FLOAT: + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_UINT: + while (j < totalRows) { + if (pBoolList[j] == 0) { + j += 1; + continue; + } + if (colDataIsNull_f(pBitmap, j)) { + colDataSetNull_f(pDst->nullbitmap, numOfRows); + } else { + ((int32_t*)pDst->pData)[numOfRows] = ((int32_t*)pDst->pData)[j]; + } + numOfRows += 1; + j += 1; + } + break; + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_USMALLINT: + while (j < totalRows) { + if (pBoolList[j] == 0) { + j += 1; + continue; + } + if (colDataIsNull_f(pBitmap, j)) { + colDataSetNull_f(pDst->nullbitmap, numOfRows); + } else { + ((int16_t*)pDst->pData)[numOfRows] = ((int16_t*)pDst->pData)[j]; + } + numOfRows += 1; + j += 1; + } + break; + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_UTINYINT: + while (j < totalRows) { + if (pBoolList[j] == 0) { + j += 1; + continue; + } + if (colDataIsNull_f(pBitmap, j)) { + colDataSetNull_f(pDst->nullbitmap, numOfRows); + } else { + ((int8_t*)pDst->pData)[numOfRows] = ((int8_t*)pDst->pData)[j]; + } + numOfRows += 1; + j += 1; + } + break; + } + } + + if (maxRows < numOfRows) { + maxRows = numOfRows; + } + } + + pBlock->info.rows = maxRows; + if (pBitmap != NULL) { + taosMemoryFree(pBitmap); + } +} + +int32_t blockGetEncodeSize(const SSDataBlock* pBlock) { + return blockDataGetSerialMetaSize(taosArrayGetSize(pBlock->pDataBlock)) + blockDataGetSize(pBlock); +} \ No newline at end of file diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index c098d546b693b88eee078dcdc6a67a6a606663d0..b2fb7243ff28a116dbe6bf8b97a57e16aee5207b 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -76,6 +76,9 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 94b804290a18864f170235ec3b51268f0039681e..69fe4f22b56a5f774b61d4a390a18e02423d8507 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -746,9 +746,10 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_RECOVER_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TRANSFER_STATE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 76e2f930273ec4558a1c11ffb5b75e9ba3a60505..247c1729a34b32243d01ac7b80c96ad3b606f1a7 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -92,7 +92,7 @@ static void vmProcessStreamQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { const STraceId *trace = &pMsg->info.traceId; dGTrace("vgId:%d, msg:%p get from vnode-stream queue", pVnode->vgId, pMsg); - int32_t code = vnodeProcessFetchMsg(pVnode->pImpl, pMsg, pInfo); + int32_t code = vnodeProcessStreamMsg(pVnode->pImpl, pMsg, pInfo); if (code != 0) { if (terrno != 0) code = terrno; dGError("vgId:%d, msg:%p failed to process stream msg %s since %s", pVnode->vgId, pMsg, TMSG_INFO(pMsg->msgType), diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 696549fa05633cde2e0f500272e259f3613fe603..1f4bc19e338a2372846f371b5bb5408e895ce127 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -647,6 +647,14 @@ typedef struct { // SMqSubActionLogEntry* pLogEntry; } SMqRebOutputObj; +typedef struct SStreamConf { + int8_t igExpired; + int8_t trigger; + int8_t fillHistory; + int64_t triggerParam; + int64_t watermark; +} SStreamConf; + typedef struct { char name[TSDB_STREAM_FNAME_LEN]; // ctl @@ -660,12 +668,7 @@ typedef struct { // info int64_t uid; int8_t status; - // config - int8_t igExpired; - int8_t trigger; - int8_t fillHistory; - int64_t triggerParam; - int64_t watermark; + SStreamConf conf; // source and target int64_t sourceDbUid; int64_t targetDbUid; @@ -675,14 +678,18 @@ typedef struct { int64_t targetStbUid; // fixedSinkVg is not applicable for encode and decode - SVgObj fixedSinkVg; + SVgObj fixedSinkVg; int32_t fixedSinkVgId; // 0 for shuffle // transformation char* sql; char* ast; char* physicalPlan; - SArray* tasks; // SArray> + SArray* tasks; // SArray> + + SArray* pHTasksList; // generate the results for already stored ts data + int64_t hTaskUid; // stream task for history ts data + SSchemaWrapper outputSchema; SSchemaWrapper tagSchema; diff --git a/source/dnode/mnode/impl/inc/mndScheduler.h b/source/dnode/mnode/impl/inc/mndScheduler.h index 23085c53eed7ef6234da38f146f962ea96d9fdde..14517a99d39b6c4b1a18b7a3910d5c9d134da59c 100644 --- a/source/dnode/mnode/impl/inc/mndScheduler.h +++ b/source/dnode/mnode/impl/inc/mndScheduler.h @@ -30,7 +30,7 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, int64_t watermark, int64_t deleteMark); -int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream); +int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream, int64_t nextWindowSkey); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index 287b39d8c799c157cc113081cd66963fbdb526d6..a8a719edda4d204baf753d7ccb622ed85f50d9a0 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -30,11 +30,11 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { if (tEncodeI64(pEncoder, pObj->uid) < 0) return -1; if (tEncodeI8(pEncoder, pObj->status) < 0) return -1; - if (tEncodeI8(pEncoder, pObj->igExpired) < 0) return -1; - if (tEncodeI8(pEncoder, pObj->trigger) < 0) return -1; - if (tEncodeI8(pEncoder, pObj->fillHistory) < 0) return -1; - if (tEncodeI64(pEncoder, pObj->triggerParam) < 0) return -1; - if (tEncodeI64(pEncoder, pObj->watermark) < 0) return -1; + if (tEncodeI8(pEncoder, pObj->conf.igExpired) < 0) return -1; + if (tEncodeI8(pEncoder, pObj->conf.trigger) < 0) return -1; + if (tEncodeI8(pEncoder, pObj->conf.fillHistory) < 0) return -1; + if (tEncodeI64(pEncoder, pObj->conf.triggerParam) < 0) return -1; + if (tEncodeI64(pEncoder, pObj->conf.watermark) < 0) return -1; if (tEncodeI64(pEncoder, pObj->sourceDbUid) < 0) return -1; if (tEncodeI64(pEncoder, pObj->targetDbUid) < 0) return -1; @@ -97,11 +97,11 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) { if (tDecodeI64(pDecoder, &pObj->uid) < 0) return -1; if (tDecodeI8(pDecoder, &pObj->status) < 0) return -1; - if (tDecodeI8(pDecoder, &pObj->igExpired) < 0) return -1; - if (tDecodeI8(pDecoder, &pObj->trigger) < 0) return -1; - if (tDecodeI8(pDecoder, &pObj->fillHistory) < 0) return -1; - if (tDecodeI64(pDecoder, &pObj->triggerParam) < 0) return -1; - if (tDecodeI64(pDecoder, &pObj->watermark) < 0) return -1; + if (tDecodeI8(pDecoder, &pObj->conf.igExpired) < 0) return -1; + if (tDecodeI8(pDecoder, &pObj->conf.trigger) < 0) return -1; + if (tDecodeI8(pDecoder, &pObj->conf.fillHistory) < 0) return -1; + if (tDecodeI64(pDecoder, &pObj->conf.triggerParam) < 0) return -1; + if (tDecodeI64(pDecoder, &pObj->conf.watermark) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->sourceDbUid) < 0) return -1; if (tDecodeI64(pDecoder, &pObj->targetDbUid) < 0) return -1; @@ -154,18 +154,10 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) { return 0; } -void tFreeStreamObj(SStreamObj *pStream) { - taosMemoryFree(pStream->sql); - taosMemoryFree(pStream->ast); - taosMemoryFree(pStream->physicalPlan); - - if (pStream->outputSchema.nCols) { - taosMemoryFree(pStream->outputSchema.pSchema); - } - - int32_t sz = taosArrayGetSize(pStream->tasks); - for (int32_t i = 0; i < sz; i++) { - SArray *pLevel = taosArrayGetP(pStream->tasks, i); +static void* freeStreamTasks(SArray* pTaskLevel) { + int32_t numOfLevel = taosArrayGetSize(pTaskLevel); + for (int32_t i = 0; i < numOfLevel; i++) { + SArray *pLevel = taosArrayGetP(pTaskLevel, i); int32_t taskSz = taosArrayGetSize(pLevel); for (int32_t j = 0; j < taskSz; j++) { SStreamTask *pTask = taosArrayGetP(pLevel, j); @@ -175,7 +167,20 @@ void tFreeStreamObj(SStreamObj *pStream) { taosArrayDestroy(pLevel); } - taosArrayDestroy(pStream->tasks); + return taosArrayDestroy(pTaskLevel); +} + +void tFreeStreamObj(SStreamObj *pStream) { + taosMemoryFree(pStream->sql); + taosMemoryFree(pStream->ast); + taosMemoryFree(pStream->physicalPlan); + + if (pStream->outputSchema.nCols || pStream->outputSchema.pSchema) { + taosMemoryFree(pStream->outputSchema.pSchema); + } + + pStream->tasks = freeStreamTasks(pStream->tasks); + pStream->pHTasksList = freeStreamTasks(pStream->pHTasksList); // tagSchema.pSchema if (pStream->tagSchema.nCols > 0) { diff --git a/source/dnode/mnode/impl/src/mndDump.c b/source/dnode/mnode/impl/src/mndDump.c index d57053bb5bd1d26827728264277323183b996a3c..62b5cb00e6abd1ff8fd0a567a7679f825f6782c1 100644 --- a/source/dnode/mnode/impl/src/mndDump.c +++ b/source/dnode/mnode/impl/src/mndDump.c @@ -367,10 +367,10 @@ void dumpStream(SSdb *pSdb, SJson *json) { tjsonAddStringToObject(item, "smaId", i642str(pObj->smaId)); tjsonAddStringToObject(item, "uid", i642str(pObj->uid)); tjsonAddStringToObject(item, "status", i642str(pObj->status)); - tjsonAddStringToObject(item, "igExpired", i642str(pObj->igExpired)); - tjsonAddStringToObject(item, "trigger", i642str(pObj->trigger)); - tjsonAddStringToObject(item, "triggerParam", i642str(pObj->triggerParam)); - tjsonAddStringToObject(item, "watermark", i642str(pObj->watermark)); + tjsonAddStringToObject(item, "igExpired", i642str(pObj->conf.igExpired)); + tjsonAddStringToObject(item, "trigger", i642str(pObj->conf.trigger)); + tjsonAddStringToObject(item, "triggerParam", i642str(pObj->conf.triggerParam)); + tjsonAddStringToObject(item, "watermark", i642str(pObj->conf.watermark)); tjsonAddStringToObject(item, "sourceDbUid", i642str(pObj->sourceDbUid)); tjsonAddStringToObject(item, "targetDbUid", i642str(pObj->targetDbUid)); tjsonAddStringToObject(item, "sourceDb", mndGetDbStr(pObj->sourceDb)); diff --git a/source/dnode/mnode/impl/src/mndIndex.c b/source/dnode/mnode/impl/src/mndIndex.c index 2d2637b8ce2e334a40e644bb846c726da6c19879..b4de51204fa7a6af147b77eecb9e574c222e324a 100644 --- a/source/dnode/mnode/impl/src/mndIndex.c +++ b/source/dnode/mnode/impl/src/mndIndex.c @@ -542,32 +542,32 @@ int32_t mndRetrieveTagIdx(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, i STR_TO_VARSTR(n3, (char *)tNameGetTableName(&stbName)); SColumnInfoData *pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataAppend(pColInfo, numOfRows, (const char *)n1, false); + colDataSetVal(pColInfo, numOfRows, (const char *)n1, false); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataAppend(pColInfo, numOfRows, (const char *)n2, false); + colDataSetVal(pColInfo, numOfRows, (const char *)n2, false); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataAppend(pColInfo, numOfRows, (const char *)n3, false); + colDataSetVal(pColInfo, numOfRows, (const char *)n3, false); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataAppend(pColInfo, numOfRows, (const char *)&invalid, false); + colDataSetVal(pColInfo, numOfRows, (const char *)&invalid, false); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataAppend(pColInfo, numOfRows, (const char *)&pIdx->createdTime, false); + colDataSetVal(pColInfo, numOfRows, (const char *)&pIdx->createdTime, false); char col[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; STR_TO_VARSTR(col, (char *)pIdx->colName); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataAppend(pColInfo, numOfRows, (const char *)col, false); + colDataSetVal(pColInfo, numOfRows, (const char *)col, false); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); char tag[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; STR_TO_VARSTR(tag, (char *)"tag_index"); - colDataAppend(pColInfo, numOfRows, (const char *)tag, false); + colDataSetVal(pColInfo, numOfRows, (const char *)tag, false); numOfRows++; sdbRelease(pSdb, pIdx); diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 9a611fe46af1494bdcb9f77530452512a0d978a2..33905bad86c74f9f990512ef06815190a54d720f 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -22,10 +22,12 @@ #include "tname.h" #include "tuuid.h" +#define SINK_NODE_LEVEL (0) extern bool tsDeployOnSnode; -static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup); -static void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask); +static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, + SVgObj* pVgroup, int32_t fillHistory); +static void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask); int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, int64_t watermark, int64_t deleteMark) { @@ -100,18 +102,16 @@ int32_t mndSetSinkTaskInfo(SStreamObj* pStream, SStreamTask* pTask) { return 0; } -#define SINK_NODE_LEVEL (0) - -int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStreamTask* pTask) { +int32_t mndAddDispatcherForInternalTask(SMnode* pMnode, SStreamObj* pStream, SArray* pSinkNodeList, + SStreamTask* pTask) { bool isShuffle = false; if (pStream->fixedSinkVgId == 0) { SDbObj* pDb = mndAcquireDb(pMnode, pStream->targetDb); if (pDb != NULL && pDb->cfg.numOfVgroups > 1) { - isShuffle = true; pTask->outputType = TASK_OUTPUT__SHUFFLE_DISPATCH; - pTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH; + pTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH; if (mndExtractDbInfo(pMnode, pDb, &pTask->shuffleDispatcher.dbInfo, NULL) < 0) { return -1; } @@ -120,7 +120,6 @@ int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStrea sdbRelease(pMnode->pSdb, pDb); } - SArray* pSinkNodeList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL); int32_t numOfSinkNodes = taosArrayGetSize(pSinkNodeList); if (isShuffle) { @@ -133,7 +132,7 @@ int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStrea for (int32_t j = 0; j < numOfSinkNodes; j++) { SStreamTask* pSinkTask = taosArrayGetP(pSinkNodeList, j); - if (pSinkTask->nodeId == pVgInfo->vgId) { + if (pSinkTask->info.nodeId == pVgInfo->vgId) { pVgInfo->taskId = pSinkTask->id.taskId; break; } @@ -150,11 +149,11 @@ int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStrea int32_t mndAssignStreamTaskToVgroup(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SVgObj* pVgroup) { int32_t msgLen; - pTask->nodeId = pVgroup->vgId; - pTask->epSet = mndGetVgroupEpset(pMnode, pVgroup); + pTask->info.nodeId = pVgroup->vgId; + pTask->info.epSet = mndGetVgroupEpset(pMnode, pVgroup); - plan->execNode.nodeId = pTask->nodeId; - plan->execNode.epSet = pTask->epSet; + plan->execNode.nodeId = pTask->info.nodeId; + plan->execNode.epSet = pTask->info.epSet; if (qSubPlanToString(plan, &pTask->exec.qmsg, &msgLen) < 0) { terrno = TSDB_CODE_QRY_INVALID_INPUT; return -1; @@ -171,14 +170,15 @@ SSnodeObj* mndSchedFetchOneSnode(SMnode* pMnode) { return pObj; } -int32_t mndAssignTaskToSnode(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SSnodeObj* pSnode) { +int32_t mndAssignStreamTaskToSnode(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SSnodeObj* pSnode) { int32_t msgLen; - pTask->nodeId = SNODE_HANDLE; - pTask->epSet = mndAcquireEpFromSnode(pMnode, pSnode); + pTask->info.nodeId = SNODE_HANDLE; + pTask->info.epSet = mndAcquireEpFromSnode(pMnode, pSnode); plan->execNode.nodeId = SNODE_HANDLE; - plan->execNode.epSet = pTask->epSet; + plan->execNode.epSet = pTask->info.epSet; + mDebug("s-task:0x%x set the agg task to snode:%d", pTask->id.taskId, SNODE_HANDLE); if (qSubPlanToString(plan, &pTask->exec.qmsg, &msgLen) < 0) { terrno = TSDB_CODE_QRY_INVALID_INPUT; @@ -187,6 +187,7 @@ int32_t mndAssignTaskToSnode(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, return 0; } +// todo random choose a node to do compute SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) { void* pIter = NULL; SVgObj* pVgroup = NULL; @@ -203,9 +204,9 @@ SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) { } // create sink node for each vgroup. -int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) { - SSdb* pSdb = pMnode->pSdb; - void* pIter = NULL; +int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, int32_t fillHistory) { + SSdb* pSdb = pMnode->pSdb; + void* pIter = NULL; while (1) { SVgObj* pVgroup = NULL; @@ -219,43 +220,45 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) { continue; } - mndAddSinkTaskToStream(pStream, pMnode, pVgroup->vgId, pVgroup); + mndAddSinkTaskToStream(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, fillHistory); sdbRelease(pSdb, pVgroup); } return 0; } -int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup) { - SArray* pTaskList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL); - - SStreamTask* pTask = tNewStreamTask(pStream->uid, TASK_LEVEL__SINK, pStream->fillHistory, 0, pTaskList); +int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup, + int32_t fillHistory) { + SStreamTask* pTask = tNewStreamTask(pStream->uid, TASK_LEVEL__SINK, fillHistory, 0, pTaskList); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - pTask->nodeId = vgId; - pTask->epSet = mndGetVgroupEpset(pMnode, pVgroup); + pTask->info.nodeId = vgId; + pTask->info.epSet = mndGetVgroupEpset(pMnode, pVgroup); mndSetSinkTaskInfo(pStream, pTask); return 0; } -static int32_t mndScheduleFillHistoryStreamTask(SMnode* pMnode, SStreamObj* pStream) { - return 0; -} - -static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SStreamObj* pStream, - SSubplan* plan, uint64_t uid, int8_t taskLevel, int8_t fillHistory, - bool hasExtraSink) { - SStreamTask* pTask = tNewStreamTask(uid, taskLevel, fillHistory, pStream->triggerParam, pTaskList); +static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SArray* pSinkTaskList, + SStreamObj* pStream, SSubplan* plan, uint64_t uid, int8_t fillHistory, + bool hasExtraSink, int64_t firstWindowSkey) { + SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, pStream->conf.triggerParam, pTaskList); if (pTask == NULL) { return terrno; } + // todo set the correct ts, which should be last key of queried table. + STimeWindow* pWindow = &pTask->dataRange.window; + + pWindow->skey = INT64_MIN; + pWindow->ekey = firstWindowSkey - 1; + mDebug("add source task 0x%x window:%" PRId64 " - %" PRId64, pTask->id.taskId, pWindow->skey, pWindow->ekey); + // sink or dispatch if (hasExtraSink) { - mndAddDispatcherForInnerTask(pMnode, pStream, pTask); + mndAddDispatcherForInternalTask(pMnode, pStream, pSinkTaskList, pTask); } else { mndSetSinkTaskInfo(pStream, pTask); } @@ -274,9 +277,9 @@ static SStreamChildEpInfo* createStreamTaskEpInfo(SStreamTask* pTask) { return NULL; } - pEpInfo->childId = pTask->selfChildId; - pEpInfo->epSet = pTask->epSet; - pEpInfo->nodeId = pTask->nodeId; + pEpInfo->childId = pTask->info.selfChildId; + pEpInfo->epSet = pTask->info.epSet; + pEpInfo->nodeId = pTask->info.nodeId; pEpInfo->taskId = pTask->id.taskId; return pEpInfo; @@ -285,222 +288,377 @@ static SStreamChildEpInfo* createStreamTaskEpInfo(SStreamTask* pTask) { void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask) { STaskDispatcherFixedEp* pDispatcher = &pDstTask->fixedEpDispatcher; pDispatcher->taskId = pTask->id.taskId; - pDispatcher->nodeId = pTask->nodeId; - pDispatcher->epSet = pTask->epSet; + pDispatcher->nodeId = pTask->info.nodeId; + pDispatcher->epSet = pTask->info.epSet; pDstTask->outputType = TASK_OUTPUT__FIXED_DISPATCH; - pDstTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH; + pDstTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH; } -int32_t appendToUpstream(SStreamTask* pTask, SStreamTask* pUpstream) { +int32_t setEpToDownstreamTask(SStreamTask* pTask, SStreamTask* pDownstream) { SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pTask); if (pEpInfo == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } - if(pUpstream->childEpInfo == NULL) { - pUpstream->childEpInfo = taosArrayInit(4, POINTER_BYTES); + if (pDownstream->pUpstreamEpInfoList == NULL) { + pDownstream->pUpstreamEpInfoList = taosArrayInit(4, POINTER_BYTES); } - - taosArrayPush(pUpstream->childEpInfo, &pEpInfo); + + taosArrayPush(pDownstream->pUpstreamEpInfoList, &pEpInfo); return TSDB_CODE_SUCCESS; } -int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { - SSdb* pSdb = pMnode->pSdb; +static SArray* addNewTaskList(SArray* pTasksList) { + SArray* pTaskList = taosArrayInit(0, POINTER_BYTES); + taosArrayPush(pTasksList, &pTaskList); + return pTaskList; +} - SQueryPlan* pPlan = qStringToQueryPlan(pStream->physicalPlan); - if (pPlan == NULL) { +// set the history task id +static void setHTasksId(SArray* pTaskList, const SArray* pHTaskList) { + for (int32_t i = 0; i < taosArrayGetSize(pTaskList); ++i) { + SStreamTask** pStreamTask = taosArrayGet(pTaskList, i); + SStreamTask** pHTask = taosArrayGet(pHTaskList, i); + + (*pStreamTask)->historyTaskId.taskId = (*pHTask)->id.taskId; + (*pStreamTask)->historyTaskId.streamId = (*pHTask)->id.streamId; + + (*pHTask)->streamTaskId.taskId = (*pStreamTask)->id.taskId; + (*pHTask)->streamTaskId.streamId = (*pStreamTask)->id.streamId; + + mDebug("s-task:0x%x related history task:0x%x, level:%d", (*pStreamTask)->id.taskId, (*pHTask)->id.taskId, + (*pHTask)->info.taskLevel); + } +} + +static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* pPlan, SStreamObj* pStream, + bool hasExtraSink, int64_t nextWindowSkey) { + // create exec stream task, since only one level, the exec task is also the source task + SArray* pTaskList = addNewTaskList(pStream->tasks); + SSdb* pSdb = pMnode->pSdb; + + SArray* pHTaskList = NULL; + if (pStream->conf.fillHistory) { + pHTaskList = addNewTaskList(pStream->pHTasksList); + } + + SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0); + if (LIST_LENGTH(inner->pNodeList) != 1) { terrno = TSDB_CODE_QRY_INVALID_INPUT; return -1; } - int32_t planTotLevel = LIST_LENGTH(pPlan->pSubplans); - pStream->tasks = taosArrayInit(planTotLevel, POINTER_BYTES); - - bool hasExtraSink = false; - bool externalTargetDB = strcmp(pStream->sourceDb, pStream->targetDb) != 0; - SDbObj* pDbObj = mndAcquireDb(pMnode, pStream->targetDb); - if (pDbObj == NULL) { + SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0); + if (plan->subplanType != SUBPLAN_TYPE_SCAN) { terrno = TSDB_CODE_QRY_INVALID_INPUT; return -1; } - bool multiTarget = (pDbObj->cfg.numOfVgroups > 1); - sdbRelease(pSdb, pDbObj); + void* pIter = NULL; + while (1) { + SVgObj* pVgroup; + pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup); + if (pIter == NULL) { + break; + } - if (planTotLevel == 2 || externalTargetDB || multiTarget || pStream->fixedSinkVgId) { - SArray* taskOneLevel = taosArrayInit(0, POINTER_BYTES); - taosArrayPush(pStream->tasks, &taskOneLevel); + if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) { + sdbRelease(pSdb, pVgroup); + continue; + } - // add extra sink - hasExtraSink = true; - if (pStream->fixedSinkVgId == 0) { - if (mndAddShuffleSinkTasksToStream(pMnode, pStream) < 0) { - // TODO free - return -1; - } - } else { - if (mndAddSinkTaskToStream(pStream, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg) < 0) { - // TODO free - return -1; - } + // new stream task + SArray** pSinkTaskList = taosArrayGet(pStream->tasks, SINK_NODE_LEVEL); + int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, 0, + hasExtraSink, nextWindowSkey); + if (code != TSDB_CODE_SUCCESS) { + sdbRelease(pSdb, pVgroup); + return -1; + } + + if (pStream->conf.fillHistory) { + SArray** pHSinkTaskList = taosArrayGet(pStream->pHTasksList, SINK_NODE_LEVEL); + code = addSourceStreamTask(pMnode, pVgroup, pHTaskList, *pHSinkTaskList, pStream, plan, pStream->hTaskUid, + 1, hasExtraSink, nextWindowSkey); + } + + sdbRelease(pSdb, pVgroup); + if (code != TSDB_CODE_SUCCESS) { + return -1; } } - pStream->totalLevel = planTotLevel + hasExtraSink; + if (pStream->conf.fillHistory) { + setHTasksId(pTaskList, pHTaskList); + } - if (planTotLevel > 1) { - SStreamTask* pInnerTask; - // inner level - { - SArray* taskInnerLevel = taosArrayInit(0, POINTER_BYTES); - taosArrayPush(pStream->tasks, &taskInnerLevel); + return TSDB_CODE_SUCCESS; +} - SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0); - SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0); - if (plan->subplanType != SUBPLAN_TYPE_MERGE) { - terrno = TSDB_CODE_QRY_INVALID_INPUT; - return -1; - } +static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t uid, SStreamTask* pDownstreamTask, + SMnode* pMnode, SSubplan* pPlan, SVgObj* pVgroup, int64_t nextWindowSkey) { + SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, 0, pTaskList); + if (pTask == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } - pInnerTask = tNewStreamTask(pStream->uid, TASK_LEVEL__AGG, pStream->fillHistory, pStream->triggerParam, taskInnerLevel); - if (pInnerTask == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - qDestroyQueryPlan(pPlan); - return -1; - } + // todo set the correct ts, which should be last key of queried table. + STimeWindow* pWindow = &pTask->dataRange.window; + pWindow->skey = INT64_MIN; + pWindow->ekey = nextWindowSkey - 1; - // dispatch - if (mndAddDispatcherForInnerTask(pMnode, pStream, pInnerTask) < 0) { - qDestroyQueryPlan(pPlan); - return -1; - } + mDebug("s-task:0x%x level:%d set time window:%" PRId64 " - %" PRId64, pTask->id.taskId, pTask->info.taskLevel, + pWindow->skey, pWindow->ekey); - if (tsDeployOnSnode) { - SSnodeObj* pSnode = mndSchedFetchOneSnode(pMnode); - if (pSnode == NULL) { - SVgObj* pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid); - if (mndAssignStreamTaskToVgroup(pMnode, pInnerTask, plan, pVgroup) < 0) { - sdbRelease(pSdb, pVgroup); - qDestroyQueryPlan(pPlan); - return -1; - } - sdbRelease(pSdb, pVgroup); - } else { - if (mndAssignTaskToSnode(pMnode, pInnerTask, plan, pSnode) < 0) { - sdbRelease(pSdb, pSnode); - qDestroyQueryPlan(pPlan); - return -1; - } - } - } else { - SVgObj* pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid); - if (mndAssignStreamTaskToVgroup(pMnode, pInnerTask, plan, pVgroup) < 0) { - sdbRelease(pSdb, pVgroup); - qDestroyQueryPlan(pPlan); - return -1; - } + // all the source tasks dispatch result to a single agg node. + setFixedDownstreamEpInfo(pTask, pDownstreamTask); + if (mndAssignStreamTaskToVgroup(pMnode, pTask, pPlan, pVgroup) < 0) { + return -1; + } - sdbRelease(pSdb, pVgroup); - } - } + return setEpToDownstreamTask(pTask, pDownstreamTask); +} - // source level - SArray* taskSourceLevel = taosArrayInit(0, POINTER_BYTES); - taosArrayPush(pStream->tasks, &taskSourceLevel); +static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeList, SMnode* pMnode, SStreamObj* pStream, + int32_t fillHistory, SStreamTask** pAggTask) { + *pAggTask = tNewStreamTask(uid, TASK_LEVEL__AGG, fillHistory, pStream->conf.triggerParam, pTaskList); + if (*pAggTask == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } - SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 1); - SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0); - if (plan->subplanType != SUBPLAN_TYPE_SCAN) { - terrno = TSDB_CODE_QRY_INVALID_INPUT; - return -1; + // dispatch + if (mndAddDispatcherForInternalTask(pMnode, pStream, pSinkNodeList, *pAggTask) < 0) { + return -1; + } + + return 0; +} + +static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, SStreamTask** pAggTask, + SStreamTask** pHAggTask) { + SArray* pAggTaskList = addNewTaskList(pStream->tasks); + SSdb* pSdb = pMnode->pSdb; + + SNodeListNode* pInnerNode = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0); + SSubplan* plan = (SSubplan*)nodesListGetNode(pInnerNode->pNodeList, 0); + if (plan->subplanType != SUBPLAN_TYPE_MERGE) { + terrno = TSDB_CODE_QRY_INVALID_INPUT; + return -1; + } + + *pAggTask = NULL; + SArray* pSinkNodeList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL); + + int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, 0, pAggTask); + if (code != TSDB_CODE_SUCCESS) { + return -1; + } + + SVgObj* pVgroup = NULL; + SSnodeObj* pSnode = NULL; + + if (tsDeployOnSnode) { + pSnode = mndSchedFetchOneSnode(pMnode); + if (pSnode == NULL) { + pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid); } + } else { + pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid); + } - void* pIter = NULL; - while (1) { - SVgObj* pVgroup; - pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup); - if (pIter == NULL) { - break; - } + if (pSnode != NULL) { + code = mndAssignStreamTaskToSnode(pMnode, *pAggTask, plan, pSnode); + } else { + code = mndAssignStreamTaskToVgroup(pMnode, *pAggTask, plan, pVgroup); + } - if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) { - sdbRelease(pSdb, pVgroup); - continue; - } + if (pStream->conf.fillHistory) { + SArray* pHAggTaskList = addNewTaskList(pStream->pHTasksList); + SArray* pHSinkNodeList = taosArrayGetP(pStream->pHTasksList, SINK_NODE_LEVEL); - SStreamTask* pTask = tNewStreamTask(pStream->uid, TASK_LEVEL__SOURCE, pStream->fillHistory, 0, taskSourceLevel); - if (pTask == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + *pHAggTask = NULL; + code = doAddAggTask(pStream->hTaskUid, pHAggTaskList, pHSinkNodeList, pMnode, pStream, pStream->conf.fillHistory, + pHAggTask); + if (code != TSDB_CODE_SUCCESS) { + if (pSnode != NULL) { + sdbRelease(pSdb, pSnode); + } else { sdbRelease(pSdb, pVgroup); - qDestroyQueryPlan(pPlan); - return -1; } + return code; + } - // all the source tasks dispatch result to a single agg node. - setFixedDownstreamEpInfo(pTask, pInnerTask); + if (pSnode != NULL) { + code = mndAssignStreamTaskToSnode(pMnode, *pHAggTask, plan, pSnode); + } else { + code = mndAssignStreamTaskToVgroup(pMnode, *pHAggTask, plan, pVgroup); + } - if (mndAssignStreamTaskToVgroup(pMnode, pTask, plan, pVgroup) < 0) { - sdbRelease(pSdb, pVgroup); - qDestroyQueryPlan(pPlan); - return -1; - } + setHTasksId(pAggTaskList, pHAggTaskList); + } + + if (pSnode != NULL) { + sdbRelease(pSdb, pSnode); + } else { + sdbRelease(pSdb, pVgroup); + } + + return code; +} + +static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPlan, SStreamObj* pStream, + SStreamTask* pDownstreamTask, SStreamTask* pHDownstreamTask, int64_t nextWindowSkey) { + SArray* pSourceTaskList = addNewTaskList(pStream->tasks); - int32_t code = appendToUpstream(pTask, pInnerTask); + SArray* pHSourceTaskList = NULL; + if (pStream->conf.fillHistory) { + pHSourceTaskList = addNewTaskList(pStream->pHTasksList); + } + + SSdb* pSdb = pMnode->pSdb; + SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 1); + SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0); + if (plan->subplanType != SUBPLAN_TYPE_SCAN) { + terrno = TSDB_CODE_QRY_INVALID_INPUT; + return -1; + } + + void* pIter = NULL; + while (1) { + SVgObj* pVgroup; + pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup); + if (pIter == NULL) { + break; + } + + if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) { sdbRelease(pSdb, pVgroup); + continue; + } + int32_t code = + doAddSourceTask(pSourceTaskList, 0, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, nextWindowSkey); + if (code != TSDB_CODE_SUCCESS) { + sdbRelease(pSdb, pVgroup); + terrno = code; + return -1; + } + + if (pStream->conf.fillHistory) { + code = doAddSourceTask(pHSourceTaskList, 1, pStream->hTaskUid, pHDownstreamTask, pMnode, plan, pVgroup, + nextWindowSkey); if (code != TSDB_CODE_SUCCESS) { - terrno = code; - qDestroyQueryPlan(pPlan); - return -1; + sdbRelease(pSdb, pVgroup); + return code; } } - } else if (planTotLevel == 1) { - // create exec stream task, since only one level, the exec task is also the source task - SArray* pTaskList = taosArrayInit(0, POINTER_BYTES); - taosArrayPush(pStream->tasks, &pTaskList); - SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0); - if (LIST_LENGTH(inner->pNodeList) != 1) { - terrno = TSDB_CODE_QRY_INVALID_INPUT; + sdbRelease(pSdb, pVgroup); + } + + if (pStream->conf.fillHistory) { + setHTasksId(pSourceTaskList, pHSourceTaskList); + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t addSinkTasks(SArray* pTasksList, SMnode* pMnode, SStreamObj* pStream, SArray** pCreatedTaskList, + int32_t fillHistory) { + SArray* pSinkTaskList = addNewTaskList(pTasksList); + if (pStream->fixedSinkVgId == 0) { + if (mndAddShuffleSinkTasksToStream(pMnode, pSinkTaskList, pStream, fillHistory) < 0) { + // TODO free return -1; } - - SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0); - if (plan->subplanType != SUBPLAN_TYPE_SCAN) { - terrno = TSDB_CODE_QRY_INVALID_INPUT; + } else { + if (mndAddSinkTaskToStream(pStream, pSinkTaskList, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg, + fillHistory) < 0) { + // TODO free return -1; } + } - void* pIter = NULL; - while (1) { - SVgObj* pVgroup; - pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup); - if (pIter == NULL) { - break; - } + *pCreatedTaskList = pSinkTaskList; + return TSDB_CODE_SUCCESS; +} - if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) { - sdbRelease(pSdb, pVgroup); - continue; - } +static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, int64_t nextWindowSkey) { + SSdb* pSdb = pMnode->pSdb; + int32_t numOfPlanLevel = LIST_LENGTH(pPlan->pSubplans); - // new stream task - int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, pStream, plan, pStream->uid, TASK_LEVEL__SOURCE, pStream->fillHistory, hasExtraSink); - sdbRelease(pSdb, pVgroup); + bool hasExtraSink = false; + bool externalTargetDB = strcmp(pStream->sourceDb, pStream->targetDb) != 0; + SDbObj* pDbObj = mndAcquireDb(pMnode, pStream->targetDb); + if (pDbObj == NULL) { + terrno = TSDB_CODE_QRY_INVALID_INPUT; + return -1; + } + + bool multiTarget = (pDbObj->cfg.numOfVgroups > 1); + sdbRelease(pSdb, pDbObj); + pStream->tasks = taosArrayInit(numOfPlanLevel + 1, POINTER_BYTES); + pStream->pHTasksList = taosArrayInit(numOfPlanLevel + 1, POINTER_BYTES); + + if (numOfPlanLevel == 2 || externalTargetDB || multiTarget || pStream->fixedSinkVgId) { + // add extra sink + hasExtraSink = true; + + SArray* pSinkTaskList = NULL; + int32_t code = addSinkTasks(pStream->tasks, pMnode, pStream, &pSinkTaskList, 0); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // check for fill history + if (pStream->conf.fillHistory) { + SArray* pHSinkTaskList = NULL; + code = addSinkTasks(pStream->pHTasksList, pMnode, pStream, &pHSinkTaskList, 1); if (code != TSDB_CODE_SUCCESS) { - qDestroyQueryPlan(pPlan); - return -1; + return code; } + + setHTasksId(pSinkTaskList, pHSinkTaskList); } } - qDestroyQueryPlan(pPlan); + pStream->totalLevel = numOfPlanLevel + hasExtraSink; + + if (numOfPlanLevel > 1) { + SStreamTask* pAggTask = NULL; + SStreamTask* pHAggTask = NULL; + + int32_t code = addAggTask(pStream, pMnode, pPlan, &pAggTask, &pHAggTask); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // source level + return addSourceTasksForMultiLevelStream(pMnode, pPlan, pStream, pAggTask, pHAggTask, nextWindowSkey); + } else if (numOfPlanLevel == 1) { + return addSourceTasksForOneLevelStream(pMnode, pPlan, pStream, hasExtraSink, nextWindowSkey); + } + return 0; } +int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream, int64_t nextWindowSkey) { + SQueryPlan* pPlan = qStringToQueryPlan(pStream->physicalPlan); + if (pPlan == NULL) { + terrno = TSDB_CODE_QRY_INVALID_INPUT; + return -1; + } + + int32_t code = doScheduleStream(pStream, pMnode, pPlan, nextWindowSkey); + qDestroyQueryPlan(pPlan); + + return code; +} + int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscribeObj* pSub) { SSdb* pSdb = pMnode->pSdb; SVgObj* pVgroup = NULL; @@ -513,8 +671,8 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib terrno = TSDB_CODE_QRY_INVALID_INPUT; return -1; } - }else if(pTopic->subType == TOPIC_SUB_TYPE__TABLE && pTopic->ast != NULL){ - SNode *pAst = NULL; + } else if (pTopic->subType == TOPIC_SUB_TYPE__TABLE && pTopic->ast != NULL) { + SNode* pAst = NULL; if (nodesStringToNode(pTopic->ast, &pAst) != 0) { mError("topic:%s, failed to create since %s", pTopic->name, terrstr()); return -1; @@ -529,7 +687,7 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib nodesDestroyNode(pAst); } - if(pPlan){ + if (pPlan) { int32_t levelNum = LIST_LENGTH(pPlan->pSubplans); if (levelNum != 1) { qDestroyQueryPlan(pPlan); diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index c337d85b688da80266899376969d68dc5f2b6855..68b697ca67c576eb9611b6618290cf0e07f570d8 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -555,20 +555,20 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea streamObj.version = 1; streamObj.sql = taosStrdup(pCreate->sql); streamObj.smaId = smaObj.uid; - streamObj.watermark = pCreate->watermark; + streamObj.conf.watermark = pCreate->watermark; streamObj.deleteMark = pCreate->deleteMark; - streamObj.fillHistory = STREAM_FILL_HISTORY_ON; - streamObj.trigger = STREAM_TRIGGER_WINDOW_CLOSE; - streamObj.triggerParam = pCreate->maxDelay; + streamObj.conf.fillHistory = STREAM_FILL_HISTORY_ON; + streamObj.conf.trigger = STREAM_TRIGGER_WINDOW_CLOSE; + streamObj.conf.triggerParam = pCreate->maxDelay; streamObj.ast = taosStrdup(smaObj.ast); // check the maxDelay - if (streamObj.triggerParam < TSDB_MIN_ROLLUP_MAX_DELAY) { + if (streamObj.conf.triggerParam < TSDB_MIN_ROLLUP_MAX_DELAY) { int64_t msInterval = convertTimeFromPrecisionToUnit(pCreate->interval, pDb->cfg.precision, TIME_UNIT_MILLISECOND); - streamObj.triggerParam = msInterval > TSDB_MIN_ROLLUP_MAX_DELAY ? msInterval : TSDB_MIN_ROLLUP_MAX_DELAY; + streamObj.conf.triggerParam = msInterval > TSDB_MIN_ROLLUP_MAX_DELAY ? msInterval : TSDB_MIN_ROLLUP_MAX_DELAY; } - if (streamObj.triggerParam > TSDB_MAX_ROLLUP_MAX_DELAY) { - streamObj.triggerParam = TSDB_MAX_ROLLUP_MAX_DELAY; + if (streamObj.conf.triggerParam > TSDB_MAX_ROLLUP_MAX_DELAY) { + streamObj.conf.triggerParam = TSDB_MAX_ROLLUP_MAX_DELAY; } if (mndAllocSmaVgroup(pMnode, pDb, &streamObj.fixedSinkVg) != 0) { @@ -597,8 +597,8 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea .pAstRoot = pAst, .topicQuery = false, .streamQuery = true, - .triggerType = streamObj.trigger, - .watermark = streamObj.watermark, + .triggerType = streamObj.conf.trigger, + .watermark = streamObj.conf.watermark, .deleteMark = streamObj.deleteMark, }; @@ -633,7 +633,7 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea if (mndSetCreateSmaVgroupCommitLogs(pMnode, pTrans, &streamObj.fixedSinkVg) != 0) goto _OVER; if (mndSetUpdateSmaStbCommitLogs(pMnode, pTrans, pStb) != 0) goto _OVER; if (mndSetCreateSmaVgroupRedoActions(pMnode, pTrans, pDb, &streamObj.fixedSinkVg, &smaObj) != 0) goto _OVER; - if (mndScheduleStream(pMnode, &streamObj) != 0) goto _OVER; + if (mndScheduleStream(pMnode, &streamObj, 1685959190000) != 0) goto _OVER; if (mndPersistStream(pMnode, pTrans, &streamObj) != 0) goto _OVER; if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; @@ -1278,13 +1278,13 @@ static int32_t mndRetrieveSma(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBloc STR_TO_VARSTR(col, (char *)""); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataAppend(pColInfo, numOfRows, (const char *)col, false); + colDataSetVal(pColInfo, numOfRows, (const char *)col, false); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); char tag[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; STR_TO_VARSTR(tag, (char *)"sma_index"); - colDataAppend(pColInfo, numOfRows, (const char *)tag, false); + colDataSetVal(pColInfo, numOfRows, (const char *)tag, false); numOfRows++; sdbRelease(pSdb, pSma); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 63f49cfe2be3a17ad99db4fa578c60eea6f7dc48..9d8948650a23476461e8787e5ba17c8ecb1c99d1 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -239,7 +239,7 @@ static void mndShowStreamStatus(char *dst, SStreamObj *pStream) { } static void mndShowStreamTrigger(char *dst, SStreamObj *pStream) { - int8_t trigger = pStream->trigger; + int8_t trigger = pStream->conf.trigger; if (trigger == STREAM_TRIGGER_AT_ONCE) { strcpy(dst, "at once"); } else if (trigger == STREAM_TRIGGER_WINDOW_CLOSE) { @@ -299,13 +299,18 @@ static int32_t mndBuildStreamObjFromCreateReq(SMnode *pMnode, SStreamObj *pObj, pObj->smaId = 0; pObj->uid = mndGenerateUid(pObj->name, strlen(pObj->name)); + + char p[TSDB_STREAM_FNAME_LEN + 32] = {0}; + snprintf(p, tListLen(p), "%s_%s", pObj->name, "fillhistory"); + + pObj->hTaskUid = mndGenerateUid(pObj->name, strlen(pObj->name)); pObj->status = 0; - pObj->igExpired = pCreate->igExpired; - pObj->trigger = pCreate->triggerType; - pObj->triggerParam = pCreate->maxDelay; - pObj->watermark = pCreate->watermark; - pObj->fillHistory = pCreate->fillHistory; + pObj->conf.igExpired = pCreate->igExpired; + pObj->conf.trigger = pCreate->triggerType; + pObj->conf.triggerParam = pCreate->maxDelay; + pObj->conf.watermark = pCreate->watermark; + pObj->conf.fillHistory = pCreate->fillHistory; pObj->deleteMark = pCreate->deleteMark; pObj->igCheckUpdate = pCreate->igUpdate; @@ -387,9 +392,9 @@ static int32_t mndBuildStreamObjFromCreateReq(SMnode *pMnode, SStreamObj *pObj, .pAstRoot = pAst, .topicQuery = false, .streamQuery = true, - .triggerType = pObj->trigger == STREAM_TRIGGER_MAX_DELAY ? STREAM_TRIGGER_WINDOW_CLOSE : pObj->trigger, - .watermark = pObj->watermark, - .igExpired = pObj->igExpired, + .triggerType = pObj->conf.trigger == STREAM_TRIGGER_MAX_DELAY ? STREAM_TRIGGER_WINDOW_CLOSE : pObj->conf.trigger, + .watermark = pObj->conf.watermark, + .igExpired = pObj->conf.igExpired, .deleteMark = pObj->deleteMark, .igCheckUpdate = pObj->igCheckUpdate, }; @@ -428,30 +433,37 @@ int32_t mndPersistTaskDeployReq(STrans *pTrans, const SStreamTask *pTask) { SEncoder encoder; tEncoderInit(&encoder, NULL, 0); tEncodeStreamTask(&encoder, pTask); + int32_t size = encoder.pos; int32_t tlen = sizeof(SMsgHead) + size; tEncoderClear(&encoder); + void *buf = taosMemoryCalloc(1, tlen); if (buf == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - ((SMsgHead *)buf)->vgId = htonl(pTask->nodeId); + + ((SMsgHead *)buf)->vgId = htonl(pTask->info.nodeId); + void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); tEncoderInit(&encoder, abuf, size); + tEncodeStreamTask(&encoder, pTask); tEncoderClear(&encoder); STransAction action = {0}; action.mTraceId = pTrans->mTraceId; - memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); + memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); action.pCont = buf; action.contLen = tlen; action.msgType = TDMT_STREAM_TASK_DEPLOY; + if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(buf); return -1; } + return 0; } @@ -459,14 +471,33 @@ int32_t mndPersistStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStrea int32_t level = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < level; i++) { SArray *pLevel = taosArrayGetP(pStream->tasks, i); - int32_t sz = taosArrayGetSize(pLevel); - for (int32_t j = 0; j < sz; j++) { + + int32_t numOfTasks = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < numOfTasks; j++) { SStreamTask *pTask = taosArrayGetP(pLevel, j); if (mndPersistTaskDeployReq(pTrans, pTask) < 0) { return -1; } } } + + // persistent stream task for already stored ts data + if (pStream->conf.fillHistory) { + level = taosArrayGetSize(pStream->pHTasksList); + + for (int32_t i = 0; i < level; i++) { + SArray *pLevel = taosArrayGetP(pStream->pHTasksList, i); + + int32_t numOfTasks = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < numOfTasks; j++) { + SStreamTask *pTask = taosArrayGetP(pLevel, j); + if (mndPersistTaskDeployReq(pTrans, pTask) < 0) { + return -1; + } + } + } + } + return 0; } @@ -474,11 +505,13 @@ int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { if (mndPersistStreamTasks(pMnode, pTrans, pStream) < 0) { return -1; } + SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream); if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); return -1; } + (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); return 0; } @@ -490,6 +523,7 @@ int32_t mndPersistDropStreamLog(SMnode *pMnode, STrans *pTrans, SStreamObj *pStr mndTransDrop(pTrans); return -1; } + (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED); return 0; } @@ -603,16 +637,17 @@ _OVER: static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) { // vnode - /*if (pTask->nodeId > 0) {*/ + /*if (pTask->info.nodeId > 0) {*/ SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - pReq->head.vgId = htonl(pTask->nodeId); + + pReq->head.vgId = htonl(pTask->info.nodeId); pReq->taskId = pTask->id.taskId; STransAction action = {0}; - memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); + memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); action.pCont = pReq; action.contLen = sizeof(SVDropStreamTaskReq); action.msgType = TDMT_STREAM_TASK_DROP; @@ -732,6 +767,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); goto _OVER; } + mInfo("trans:%d, used to create stream:%s", pTrans->id, createStreamReq.name); mndTransSetDbName(pTrans, createStreamReq.sourceDB, streamObj.targetDb); @@ -748,7 +784,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { } // schedule stream task for stream obj - if (mndScheduleStream(pMnode, &streamObj) < 0) { + if (mndScheduleStream(pMnode, &streamObj, createStreamReq.lastTs) < 0) { mError("stream:%s, failed to schedule since %s", createStreamReq.name, terrstr()); mndTransDrop(pTrans); goto _OVER; @@ -834,7 +870,7 @@ static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, con SMStreamDoCheckpointMsg *pMsg) { SStreamCheckpointSourceReq req = {0}; req.checkpointId = pMsg->checkpointId; - req.nodeId = pTask->nodeId; + req.nodeId = pTask->info.nodeId; req.expireTime = -1; req.streamId = pTask->streamId; req.taskId = pTask->taskId; @@ -863,7 +899,7 @@ static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, con SMsgHead *pMsgHead = (SMsgHead *)buf; pMsgHead->contLen = htonl(tlen); - pMsgHead->vgId = htonl(pTask->nodeId); + pMsgHead->vgId = htonl(pTask->info.nodeId); tEncoderClear(&encoder); @@ -902,12 +938,12 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { for (int32_t i = 0; i < totLevel; i++) { SArray *pLevel = taosArrayGetP(pStream->tasks, i); SStreamTask *pTask = taosArrayGetP(pLevel, 0); - if (pTask->taskLevel == TASK_LEVEL__SOURCE) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { int32_t sz = taosArrayGetSize(pLevel); for (int32_t j = 0; j < sz; j++) { SStreamTask *pTask = taosArrayGetP(pLevel, j); - /*A(pTask->nodeId > 0);*/ - SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->nodeId); + /*A(pTask->info.nodeId > 0);*/ + SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId); if (pVgObj == NULL) { taosRUnLockLatch(&pStream->lock); mndReleaseStream(pMnode, pStream); @@ -965,8 +1001,6 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SStreamObj *pStream = NULL; - /*SDbObj *pDb = NULL;*/ - /*SUserObj *pUser = NULL;*/ SMDropStreamReq dropReq = {0}; if (tDeserializeSMDropStreamReq(pReq->pCont, pReq->contLen, &dropReq) < 0) { @@ -1157,7 +1191,7 @@ static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB } pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->watermark, false); + colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->conf.watermark, false); char trigger[20 + VARSTR_HEADER_SIZE] = {0}; char trigger2[20] = {0}; @@ -1187,12 +1221,16 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock while (numOfRows < rowsCapacity) { pShow->pIter = sdbFetch(pSdb, SDB_STREAM, pShow->pIter, (void **)&pStream); - if (pShow->pIter == NULL) break; + if (pShow->pIter == NULL) { + break; + } // lock taosRLockLatch(&pStream->lock); + // count task num int32_t sz = taosArrayGetSize(pStream->tasks); + int32_t count = 0; for (int32_t i = 0; i < sz; i++) { SArray *pLevel = taosArrayGetP(pStream->tasks, i); @@ -1202,10 +1240,12 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock if (numOfRows + count > rowsCapacity) { blockDataEnsureCapacity(pBlock, numOfRows + count); } + // add row for each task for (int32_t i = 0; i < sz; i++) { SArray *pLevel = taosArrayGetP(pStream->tasks, i); int32_t levelCnt = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < levelCnt; j++) { SStreamTask *pTask = taosArrayGetP(pLevel, j); @@ -1215,18 +1255,25 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock // stream name char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName)); + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false); // task id pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)&pTask->id.taskId, false); + + char idstr[128] = {0}; + int32_t len = tintToHex(pTask->id.taskId, &idstr[4]); + idstr[2] = '0'; + idstr[3] = 'x'; + varDataSetLen(idstr, len + 2); + colDataSetVal(pColInfo, numOfRows, idstr, false); // node type char nodeType[20 + VARSTR_HEADER_SIZE] = {0}; varDataSetLen(nodeType, 5); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - if (pTask->nodeId > 0) { + if (pTask->info.nodeId > 0) { memcpy(varDataVal(nodeType), "vnode", 5); } else { memcpy(varDataVal(nodeType), "snode", 5); @@ -1235,30 +1282,50 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock // node id pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - int64_t nodeId = TMAX(pTask->nodeId, 0); + int64_t nodeId = TMAX(pTask->info.nodeId, 0); colDataSetVal(pColInfo, numOfRows, (const char *)&nodeId, false); // level char level[20 + VARSTR_HEADER_SIZE] = {0}; - if (pTask->taskLevel == TASK_LEVEL__SOURCE) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { memcpy(varDataVal(level), "source", 6); varDataSetLen(level, 6); - } else if (pTask->taskLevel == TASK_LEVEL__AGG) { + } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { memcpy(varDataVal(level), "agg", 3); varDataSetLen(level, 3); - } else if (pTask->taskLevel == TASK_LEVEL__SINK) { + } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { memcpy(varDataVal(level), "sink", 4); varDataSetLen(level, 4); - } else if (pTask->taskLevel == TASK_LEVEL__SINK) { } + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)&level, false); // status char status[20 + VARSTR_HEADER_SIZE] = {0}; - char status2[20] = {0}; - strcpy(status, "normal"); - STR_WITH_MAXSIZE_TO_VARSTR(status, status2, sizeof(status)); + int8_t taskStatus = atomic_load_8(&pTask->status.taskStatus); + if (taskStatus == TASK_STATUS__NORMAL) { + memcpy(varDataVal(status), "normal", 6); + varDataSetLen(status, 6); + } else if (taskStatus == TASK_STATUS__DROPPING) { + memcpy(varDataVal(status), "dropping", 8); + varDataSetLen(status, 8); + } else if (taskStatus == TASK_STATUS__FAIL) { + memcpy(varDataVal(status), "fail", 4); + varDataSetLen(status, 4); + } else if (taskStatus == TASK_STATUS__STOP) { + memcpy(varDataVal(status), "stop", 4); + varDataSetLen(status, 4); + } else if (taskStatus == TASK_STATUS__SCAN_HISTORY) { + memcpy(varDataVal(status), "history", 7); + varDataSetLen(status, 7); + } else if (taskStatus == TASK_STATUS__HALT) { + memcpy(varDataVal(status), "halt", 4); + varDataSetLen(status, 4); + } else if (taskStatus == TASK_STATUS__PAUSE) { + memcpy(varDataVal(status), "pause", 5); + varDataSetLen(status, 5); + } pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)&status, false); @@ -1287,10 +1354,10 @@ static int32_t mndPauseStreamTask(STrans *pTrans, SStreamTask *pTask) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - pReq->head.vgId = htonl(pTask->nodeId); + pReq->head.vgId = htonl(pTask->info.nodeId); pReq->taskId = pTask->id.taskId; STransAction action = {0}; - memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); + memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); action.pCont = pReq; action.contLen = sizeof(SVPauseStreamTaskReq); action.msgType = TDMT_STREAM_TASK_PAUSE; @@ -1301,21 +1368,36 @@ static int32_t mndPauseStreamTask(STrans *pTrans, SStreamTask *pTask) { return 0; } -int32_t mndPauseAllStreamTasks(STrans *pTrans, SStreamObj *pStream) { - int32_t size = taosArrayGetSize(pStream->tasks); +int32_t mndPauseAllStreamTaskImpl(STrans *pTrans, SArray* tasks) { + int32_t size = taosArrayGetSize(tasks); for (int32_t i = 0; i < size; i++) { - SArray *pTasks = taosArrayGetP(pStream->tasks, i); + SArray *pTasks = taosArrayGetP(tasks, i); int32_t sz = taosArrayGetSize(pTasks); for (int32_t j = 0; j < sz; j++) { SStreamTask *pTask = taosArrayGetP(pTasks, j); - if (pTask->taskLevel != TASK_LEVEL__SINK && mndPauseStreamTask(pTrans, pTask) < 0) { + if (pTask->info.taskLevel != TASK_LEVEL__SINK && mndPauseStreamTask(pTrans, pTask) < 0) { return -1; } + + if (atomic_load_8(&pTask->status.taskStatus) != TASK_STATUS__PAUSE) { + atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); + } } } return 0; } +int32_t mndPauseAllStreamTasks(STrans *pTrans, SStreamObj *pStream) { + int32_t code = mndPauseAllStreamTaskImpl(pTrans, pStream->tasks); + if (code != 0) { + return code; + } + // pStream->pHTasksList is null + // code = mndPauseAllStreamTaskImpl(pTrans, pStream->pHTasksList); + return code; +} + static int32_t mndPersistStreamLog(STrans *pTrans, const SStreamObj *pStream, int8_t status) { SStreamObj streamObj = {0}; memcpy(streamObj.name, pStream->name, TSDB_STREAM_FNAME_LEN); @@ -1355,6 +1437,10 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { } } + if (pStream->status == STREAM_STATUS__PAUSE) { + return 0; + } + if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) { sdbRelease(pMnode->pSdb, pStream); return -1; @@ -1410,11 +1496,11 @@ static int32_t mndResumeStreamTask(STrans *pTrans, SStreamTask *pTask, int8_t ig terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - pReq->head.vgId = htonl(pTask->nodeId); + pReq->head.vgId = htonl(pTask->info.nodeId); pReq->taskId = pTask->id.taskId; pReq->igUntreated = igUntreated; STransAction action = {0}; - memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); + memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet)); action.pCont = pReq; action.contLen = sizeof(SVResumeStreamTaskReq); action.msgType = TDMT_STREAM_TASK_RESUME; @@ -1432,11 +1518,16 @@ int32_t mndResumeAllStreamTasks(STrans *pTrans, SStreamObj *pStream, int8_t igUn int32_t sz = taosArrayGetSize(pTasks); for (int32_t j = 0; j < sz; j++) { SStreamTask *pTask = taosArrayGetP(pTasks, j); - if (pTask->taskLevel != TASK_LEVEL__SINK && mndResumeStreamTask(pTrans, pTask, igUntreated) < 0) { + if (pTask->info.taskLevel != TASK_LEVEL__SINK && mndResumeStreamTask(pTrans, pTask, igUntreated) < 0) { return -1; } + + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__PAUSE) { + atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus); + } } } + // pStream->pHTasksList is null return 0; } @@ -1463,6 +1554,10 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { } } + if (pStream->status != STREAM_STATUS__PAUSE) { + return 0; + } + if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) { sdbRelease(pMnode->pSdb, pStream); return -1; diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index a82e49f397abe38164e67f89f214c822ae5afdaa..444d066afe8b500eacc6bcea5157b2da39747291 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -875,7 +875,7 @@ static int32_t mndRetrieveVgroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *p // if (pDb == NULL || pDb->compactStartTime <= 0) { // colDataSetNULL(pColInfo, numOfRows); // } else { - // colDataAppend(pColInfo, numOfRows, (const char *)&pDb->compactStartTime, false); + // colDataSetVal(pColInfo, numOfRows, (const char *)&pDb->compactStartTime, false); // } numOfRows++; diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 678dd34e4a6e12ba3573f4d8103e10bcba754119..3d9adf815693dd969ab7fd7b2bc7da466e192ba8 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -52,23 +52,21 @@ void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) { FAIL: if (pMsg->info.handle == NULL) return; - SRpcMsg rsp = { - .code = code, - .info = pMsg->info, - }; + SRpcMsg rsp = { .code = code, .info = pMsg->info}; tmsgSendRsp(&rsp); rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); } int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { - ASSERT(pTask->taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->childEpInfo) != 0); + ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->pUpstreamEpInfoList) != 0); pTask->refCnt = 1; - pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; + pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId); - pTask->inputQueue = streamQueueOpen(0); - pTask->outputQueue = streamQueueOpen(0); + pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; + pTask->inputQueue = streamQueueOpen(512 << 10); + pTask->outputQueue = streamQueueOpen(512 << 10); if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) { return -1; @@ -85,14 +83,18 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { return -1; } - int32_t numOfChildEp = taosArrayGetSize(pTask->childEpInfo); - SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState }; + int32_t numOfChildEp = taosArrayGetSize(pTask->pUpstreamEpInfoList); + SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState, .fillHistory = pTask->info.fillHistory }; initStreamStateAPI(&handle.api); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, 0); ASSERT(pTask->exec.pExecutor); - streamSetupTrigger(pTask); + streamSetupScheduleTrigger(pTask); + + qDebug("snode:%d expand stream task on snode, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", SNODE_HANDLE, + pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel); + return 0; } @@ -149,9 +151,10 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { taosMemoryFree(pTask); return -1; } + tDecoderClear(&decoder); - ASSERT(pTask->taskLevel == TASK_LEVEL__AGG); + ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG); // 2.save task taosWLockLatch(&pSnode->pMeta->lock); @@ -161,19 +164,20 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { return -1; } + int32_t numOfTasks = streamMetaGetNumOfTasks(pSnode->pMeta); taosWUnLockLatch(&pSnode->pMeta->lock); - // 3.go through recover steps to fill history - if (pTask->fillHistory) { - streamSetParamForRecover(pTask); - streamAggRecoverPrepare(pTask); - } + streamPrepareNdoCheckDownstream(pTask); + qDebug("snode:%d s-task:%s is deployed on snode and add into meta, status:%s, numOfTasks:%d", SNODE_HANDLE, pTask->id.idStr, + streamGetTaskStatusStr(pTask->status.taskStatus), numOfTasks); return 0; } int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) { SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg; + qDebug("snode:%d receive msg to drop stream task:0x%x", pSnode->pMeta->vgId, pReq->taskId); + streamMetaRemoveTask(pSnode->pMeta, pReq->taskId); return 0; } @@ -255,13 +259,15 @@ int32_t sndProcessTaskRetrieveRsp(SSnode *pSnode, SRpcMsg *pMsg) { } int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { - void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t len = pMsg->contLen - sizeof(SMsgHead); switch (pMsg->msgType) { - case TDMT_STREAM_TASK_DEPLOY: + case TDMT_STREAM_TASK_DEPLOY: { + void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); return sndProcessTaskDeployReq(pSnode, pReq, len); + } + case TDMT_STREAM_TASK_DROP: - return sndProcessTaskDropReq(pSnode, pReq, len); + return sndProcessTaskDropReq(pSnode, pMsg->pCont, pMsg->contLen); default: ASSERT(0); } @@ -277,7 +283,7 @@ int32_t sndProcessTaskRecoverFinishReq(SSnode *pSnode, SRpcMsg *pMsg) { SDecoder decoder; tDecoderInit(&decoder, msg, msgLen); - tDecodeSStreamRecoverFinishReq(&decoder, &req); + tDecodeStreamRecoverFinishReq(&decoder, &req); tDecoderClear(&decoder); // find task @@ -286,7 +292,7 @@ int32_t sndProcessTaskRecoverFinishReq(SSnode *pSnode, SRpcMsg *pMsg) { return -1; } // do process request - if (streamProcessRecoverFinishReq(pTask, req.childId) < 0) { + if (streamProcessRecoverFinishReq(pTask, req.taskId, req.childId) < 0) { streamMetaReleaseTask(pSnode->pMeta, pTask); return -1; } @@ -300,6 +306,102 @@ int32_t sndProcessTaskRecoverFinishRsp(SSnode *pSnode, SRpcMsg *pMsg) { return 0; } +int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { + char *msgStr = pMsg->pCont; + char *msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); + int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + + SStreamTaskCheckReq req; + SDecoder decoder; + + tDecoderInit(&decoder, (uint8_t *)msgBody, msgLen); + tDecodeStreamTaskCheckReq(&decoder, &req); + tDecoderClear(&decoder); + + int32_t taskId = req.downstreamTaskId; + + SStreamTaskCheckRsp rsp = { + .reqId = req.reqId, + .streamId = req.streamId, + .childId = req.childId, + .downstreamNodeId = req.downstreamNodeId, + .downstreamTaskId = req.downstreamTaskId, + .upstreamNodeId = req.upstreamNodeId, + .upstreamTaskId = req.upstreamTaskId, + }; + + SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, taskId); + + if (pTask != NULL) { + rsp.status = streamTaskCheckStatus(pTask); + streamMetaReleaseTask(pSnode->pMeta, pTask); + + qDebug("s-task:%s recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), status:%s, rsp status %d", + pTask->id.idStr, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, + streamGetTaskStatusStr(pTask->status.taskStatus), rsp.status); + } else { + rsp.status = 0; + qDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 + ") from task:0x%x (vgId:%d), rsp status %d", + taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + } + + SEncoder encoder; + int32_t code; + int32_t len; + + tEncodeSize(tEncodeStreamTaskCheckRsp, &rsp, len, code); + if (code < 0) { + qError("vgId:%d failed to encode task check rsp, task:0x%x", pSnode->pMeta->vgId, taskId); + return -1; + } + + void *buf = rpcMallocCont(sizeof(SMsgHead) + len); + ((SMsgHead *)buf)->vgId = htonl(req.upstreamNodeId); + + void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + tEncoderInit(&encoder, (uint8_t *)abuf, len); + tEncodeStreamTaskCheckRsp(&encoder, &rsp); + tEncoderClear(&encoder); + + SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info}; + + tmsgSendRsp(&rspMsg); + return 0; +} + +int32_t sndProcessStreamTaskCheckRsp(SSnode* pSnode, SRpcMsg* pMsg) { + char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + + int32_t code; + SStreamTaskCheckRsp rsp; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)pReq, len); + code = tDecodeStreamTaskCheckRsp(&decoder, &rsp); + + if (code < 0) { + tDecoderClear(&decoder); + return -1; + } + + tDecoderClear(&decoder); + qDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d", + rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status); + + SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, rsp.upstreamTaskId); + if (pTask == NULL) { + qError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId, + pSnode->pMeta->vgId); + return -1; + } + + code = streamProcessCheckRsp(pTask, &rsp); + streamMetaReleaseTask(pSnode->pMeta, pTask); + return code; +} + int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { switch (pMsg->msgType) { case TDMT_STREAM_TASK_RUN: @@ -312,10 +414,14 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { return sndProcessTaskRetrieveReq(pSnode, pMsg); case TDMT_STREAM_RETRIEVE_RSP: return sndProcessTaskRetrieveRsp(pSnode, pMsg); - case TDMT_STREAM_RECOVER_FINISH: + case TDMT_STREAM_SCAN_HISTORY_FINISH: return sndProcessTaskRecoverFinishReq(pSnode, pMsg); - case TDMT_STREAM_RECOVER_FINISH_RSP: + case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP: return sndProcessTaskRecoverFinishRsp(pSnode, pMsg); + case TDMT_STREAM_TASK_CHECK: + return sndProcessStreamTaskCheckReq(pSnode, pMsg); + case TDMT_STREAM_TASK_CHECK_RSP: + return sndProcessStreamTaskCheckRsp(pSnode, pMsg); default: ASSERT(0); } diff --git a/source/dnode/snode/src/snodeInitApi.c b/source/dnode/snode/src/snodeInitApi.c index f5e924525212c34ffd375454703a91ccc160836f..c046505630251092923189eadbc532e87970e4b6 100644 --- a/source/dnode/snode/src/snodeInitApi.c +++ b/source/dnode/snode/src/snodeInitApi.c @@ -101,6 +101,7 @@ void initStateStoreAPI(SStateStore* pStore) { pStore->streamStateCommit = streamStateCommit; pStore->streamStateDestroy= streamStateDestroy; pStore->streamStateDeleteCheckPoint = streamStateDeleteCheckPoint; + pStore->streamStateReloadInfo = streamStateReloadInfo; } void initFunctionStateStore(SFunctionStateStore* pStore) { diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 17336d42957410a53a0fde0632ebec310e41ec0d..5fb30a0028871530b1e94715fe9f329306323b95 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -96,6 +96,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp); int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg); int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); +int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); void vnodeProposeCommitOnNeed(SVnode *pVnode, bool atExit); @@ -126,8 +127,6 @@ int32_t metaGetCachedTbGroup(void *pVnode, tb_uid_t suid, const uint8_t *pKey, int32_t metaPutTbGroupToCache(void* pVnode, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, int32_t payloadLen); -int64_t metaGetTbNum(SMeta *pMeta); - int32_t metaGetStbStats(void *pVnode, int64_t uid, int64_t *numOfTables); // tsdb diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index b35dc71ed974e656b94b30562ee895a9f81fcd20..7f95e48c419d38656bb2ab1de4b6f521d97e0195 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -45,27 +45,10 @@ extern "C" { typedef struct STqOffsetStore STqOffsetStore; // tqPush - -// typedef struct { -// // msg info -// int64_t consumerId; -// int64_t reqOffset; -// int64_t processedVer; -// int32_t epoch; -// // rpc info -// int64_t reqId; -// SRpcHandleInfo rpcInfo; -// tmr_h timerId; -// int8_t tmrStopped; -// // exec -// int8_t inputStatus; -// int8_t execStatus; -// SStreamQueue inputQ; -// SRWLatch lock; -// } STqPushHandle; +#define EXTRACT_DATA_FROM_WAL_ID (-1) +#define STREAM_TASK_STATUS_CHECK_ID (-2) // tqExec - typedef struct { char* qmsg; // SubPlanToString } STqExecCol; @@ -184,10 +167,10 @@ int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname); // tqStream int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver); int32_t tqStreamTasksScanWal(STQ* pTq); +int32_t tqStreamTasksStatusCheck(STQ* pTq); // tq util int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock); -char* createStreamTaskIdStr(int64_t streamId, int32_t taskId); int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem); int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg); int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index d7694ebfd5abc9b3cc7cf1a36d3936182025118b..71af1697528409c6423a4453b0777215bdd74126 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -64,7 +64,6 @@ typedef struct STsdbReadSnap STsdbReadSnap; typedef struct SBlockInfo SBlockInfo; typedef struct SSmaInfo SSmaInfo; typedef struct SBlockCol SBlockCol; -typedef struct SVersionRange SVersionRange; typedef struct SLDataIter SLDataIter; typedef struct SDiskCol SDiskCol; typedef struct SDiskData SDiskData; @@ -383,11 +382,6 @@ struct TSDBKEY { TSKEY ts; }; -struct SVersionRange { - uint64_t minVer; - uint64_t maxVer; -}; - typedef struct SMemSkipListNode SMemSkipListNode; struct SMemSkipListNode { int8_t level; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index a9541d8c472ecd5403206687be8a6434a39a5dca..7c6c72e99519fdd5adb394f96b4058f70d41fdab 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -178,7 +178,8 @@ SArray* metaGetSmaTbUids(SMeta* pMeta); void* metaGetIdx(SMeta* pMeta); void* metaGetIvtIdx(SMeta* pMeta); -void metaReaderInit(SMetaReader* pReader, SMeta* pMeta, int32_t flags); +int64_t metaGetTbNum(SMeta *pMeta); +void metaReaderDoInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags); int32_t metaCreateTSma(SMeta* pMeta, int64_t version, SSmaCfg* pCfg); int32_t metaDropTSma(SMeta* pMeta, int64_t indexUid); @@ -217,6 +218,7 @@ int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg); int tqUnregisterPushHandle(STQ* pTq, void* pHandle); int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed. +int tqCheckStreamStatus(STQ* pTq); int tqCommit(STQ*); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); @@ -238,14 +240,14 @@ int32_t tqProcessTaskDropReq(STQ* pTq, int64_t version, char* msg, int32_t msgLe int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg); -int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t version, char* msg, int32_t msgLen); +int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t version, SRpcMsg* pMsg); int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec); int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg); -int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg); -int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t version, char* msg, int32_t msgLen); +int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskTransferStateReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRecoverFinishRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqCheckLogInWal(STQ* pTq, int64_t version); diff --git a/source/dnode/vnode/src/meta/metaQuery.c b/source/dnode/vnode/src/meta/metaQuery.c index 0e380ea0b2f42614d873a7c41cd32645da35ce84..c26bb45c2b51b91ae4e4d9f53d7a895fc832ec9e 100644 --- a/source/dnode/vnode/src/meta/metaQuery.c +++ b/source/dnode/vnode/src/meta/metaQuery.c @@ -17,13 +17,13 @@ #include "osMemory.h" #include "tencode.h" -void _metaReaderInit(SMetaReader *pReader, void *pVnode, int32_t flags, SStoreMeta *pAPI) { - SMeta *pMeta = ((SVnode *)pVnode)->pMeta; - metaReaderInit(pReader, pMeta, flags); +void _metaReaderInit(SMetaReader* pReader, void* pVnode, int32_t flags, SStoreMeta* pAPI) { + SMeta* pMeta = ((SVnode*)pVnode)->pMeta; + metaReaderDoInit(pReader, pMeta, flags); pReader->pAPI = pAPI; } -void metaReaderInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags) { +void metaReaderDoInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags) { memset(pReader, 0, sizeof(*pReader)); pReader->pMeta = pMeta; pReader->flags = flags; @@ -143,7 +143,7 @@ tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name) { int metaGetTableNameByUid(void *pVnode, uint64_t uid, char *tbName) { int code = 0; SMetaReader mr = {0}; - metaReaderInit(&mr, ((SVnode *)pVnode)->pMeta, 0); + metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0); code = metaReaderGetTableEntryByUid(&mr, uid); if (code < 0) { metaReaderClear(&mr); @@ -159,7 +159,7 @@ int metaGetTableNameByUid(void *pVnode, uint64_t uid, char *tbName) { int metaGetTableSzNameByUid(void *meta, uint64_t uid, char *tbName) { int code = 0; SMetaReader mr = {0}; - metaReaderInit(&mr, (SMeta *)meta, 0); + metaReaderDoInit(&mr, (SMeta *)meta, 0); code = metaReaderGetTableEntryByUid(&mr, uid); if (code < 0) { metaReaderClear(&mr); @@ -174,7 +174,7 @@ int metaGetTableSzNameByUid(void *meta, uint64_t uid, char *tbName) { int metaGetTableUidByName(void *pVnode, char *tbName, uint64_t *uid) { int code = 0; SMetaReader mr = {0}; - metaReaderInit(&mr, ((SVnode *)pVnode)->pMeta, 0); + metaReaderDoInit(&mr, ((SVnode *)pVnode)->pMeta, 0); SMetaReader *pReader = &mr; @@ -195,7 +195,7 @@ int metaGetTableUidByName(void *pVnode, char *tbName, uint64_t *uid) { int metaGetTableTypeByName(void *pVnode, char *tbName, ETableType *tbType) { int code = 0; SMetaReader mr = {0}; - metaReaderInit(&mr, ((SVnode *)pVnode)->pMeta, 0); + metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0); code = metaGetTableEntryByName(&mr, tbName); if (code == 0) *tbType = mr.me.type; @@ -215,7 +215,7 @@ int metaReadNext(SMetaReader *pReader) { int metaGetTableTtlByUid(void *meta, uint64_t uid, int64_t *ttlDays) { int code = -1; SMetaReader mr = {0}; - metaReaderInit(&mr, (SMeta *)meta, 0); + metaReaderDoInit(&mr, (SMeta *)meta, 0); code = metaReaderGetTableEntryByUid(&mr, uid); if (code < 0) { goto _exit; @@ -244,9 +244,7 @@ SMTbCursor *metaOpenTbCursor(void *pVnode) { return NULL; } - SVnode *pVnodeObj = pVnode; - // metaReaderInit(&pTbCur->mr, pVnodeObj->pMeta, 0); - + SVnode* pVnodeObj = pVnode; // tdbTbcMoveToFirst((TBC *)pTbCur->pDbc); pTbCur->pMeta = pVnodeObj->pMeta; pTbCur->paused = 1; @@ -277,7 +275,7 @@ void metaPauseTbCursor(SMTbCursor *pTbCur) { } void metaResumeTbCursor(SMTbCursor *pTbCur, int8_t first) { if (pTbCur->paused) { - metaReaderInit(&pTbCur->mr, pTbCur->pMeta, 0); + metaReaderDoInit(&pTbCur->mr, pTbCur->pMeta, 0); tdbTbcOpen(((SMeta *)pTbCur->pMeta)->pUidIdx, (TBC **)&pTbCur->pDbc, NULL); @@ -784,7 +782,7 @@ STSmaWrapper *metaGetSmaInfoByTable(SMeta *pMeta, tb_uid_t uid, bool deepCopy) { } SMetaReader mr = {0}; - metaReaderInit(&mr, pMeta, 0); + metaReaderDoInit(&mr, pMeta, 0); int64_t smaId; int smaIdx = 0; STSma *pTSma = NULL; @@ -839,7 +837,7 @@ _err: STSma *metaGetSmaInfoByIndex(SMeta *pMeta, int64_t indexUid) { STSma *pTSma = NULL; SMetaReader mr = {0}; - metaReaderInit(&mr, pMeta, 0); + metaReaderDoInit(&mr, pMeta, 0); if (metaReaderGetTableEntryByUid(&mr, indexUid) < 0) { metaWarn("vgId:%d, failed to get table entry for smaId:%" PRIi64, TD_VID(pMeta->pVnode), indexUid); metaReaderClear(&mr); diff --git a/source/dnode/vnode/src/meta/metaSma.c b/source/dnode/vnode/src/meta/metaSma.c index a49848f4421e1c0e103d2d083713582574aee609..91704f5c7ab1558a5610496757763e053dad570d 100644 --- a/source/dnode/vnode/src/meta/metaSma.c +++ b/source/dnode/vnode/src/meta/metaSma.c @@ -37,7 +37,7 @@ int32_t metaCreateTSma(SMeta *pMeta, int64_t version, SSmaCfg *pCfg) { // validate req // save smaIndex - metaReaderInit(&mr, pMeta, 0); + metaReaderDoInit(&mr, pMeta, 0); if (metaReaderGetTableEntryByUidCache(&mr, pCfg->indexUid) == 0) { #if 1 terrno = TSDB_CODE_TSMA_ALREADY_EXIST; diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index eb2d2e267bc4963dcab16b4997fbce6d4261af5d..cb4b3231f61ebce6633dd3180cee13636828ac25 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -709,7 +709,7 @@ int metaCreateTable(SMeta *pMeta, int64_t ver, SVCreateTbReq *pReq, STableMetaRs } // validate req - metaReaderInit(&mr, pMeta, 0); + metaReaderDoInit(&mr, pMeta, 0); if (metaGetTableEntryByName(&mr, pReq->name) == 0) { if (pReq->type == TSDB_CHILD_TABLE && pReq->ctb.suid != mr.me.ctbEntry.suid) { terrno = TSDB_CODE_TDB_TABLE_IN_OTHER_STABLE; diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 39aa5c30437a72f6ad26f22f4c2a01aa03a8c7dc..d393f4b6bc191f69d9405686c0fc9bf66e03eed5 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -896,7 +896,7 @@ static int32_t tdRSmaInfoClone(SSma *pSma, SRSmaInfo *pInfo) { return TSDB_CODE_SUCCESS; } - metaReaderInit(&mr, SMA_META(pSma), 0); + metaReaderDoInit(&mr, SMA_META(pSma), 0); smaDebug("vgId:%d, rsma clone qTaskInfo for suid:%" PRIi64, SMA_VID(pSma), pInfo->suid); if (metaReaderGetTableEntryByUidCache(&mr, pInfo->suid) < 0) { code = terrno; @@ -1116,7 +1116,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) { } int64_t nRsmaTables = 0; - metaReaderInit(&mr, SMA_META(pSma), 0); + metaReaderDoInit(&mr, SMA_META(pSma), 0); if (!(uidStore.tbUids = taosArrayInit(1024, sizeof(tb_uid_t)))) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 3cb8652a4b77ee2e7698db1c81ae108ba538e8d3..cd4f8795d5ad244db7d8f3d9f45cff5303282aec 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -18,8 +18,6 @@ // 0: not init // 1: already inited // 2: wait to be inited or cleaup -#define WAL_READ_TASKS_ID (-1) - static int32_t tqInitialize(STQ* pTq); static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return TMQ_HANDLE_STATUS_EXEC == pHandle->status; } @@ -158,6 +156,29 @@ void tqClose(STQ* pTq) { taosMemoryFree(pTq); } +static bool hasStreamTaskInTimer(SStreamMeta* pMeta) { + bool inTimer = false; + + taosWLockLatch(&pMeta->lock); + + void* pIter = NULL; + while(1) { + pIter = taosHashIterate(pMeta->pTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + if (pTask->status.timerActive == 1) { + inTimer = true; + } + } + + taosWUnLockLatch(&pMeta->lock); + + return inTimer; +} + void tqNotifyClose(STQ* pTq) { if (pTq != NULL) { taosWLockLatch(&pTq->pStreamMeta->lock); @@ -170,16 +191,29 @@ void tqNotifyClose(STQ* pTq) { } SStreamTask* pTask = *(SStreamTask**)pIter; - tqDebug("vgId:%d s-task:%s set dropping flag", pTq->pStreamMeta->vgId, pTask->id.idStr); + tqDebug("vgId:%d s-task:%s set closing flag", pTq->pStreamMeta->vgId, pTask->id.idStr); pTask->status.taskStatus = TASK_STATUS__STOP; int64_t st = taosGetTimestampMs(); qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); + int64_t el = taosGetTimestampMs() - st; tqDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pTq->pStreamMeta->vgId, pTask->id.idStr, el); } taosWUnLockLatch(&pTq->pStreamMeta->lock); + + tqDebug("vgId:%d start to check all tasks", pTq->pStreamMeta->vgId); + + int64_t st = taosGetTimestampMs(); + + while(hasStreamTaskInTimer(pTq->pStreamMeta)) { + tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pTq->pStreamMeta->vgId); + taosMsleep(100); + } + + int64_t el = taosGetTimestampMs() - st; + tqDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%"PRId64" ms", pTq->pStreamMeta->vgId, el); } } @@ -771,19 +805,32 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; pTask->pMsgCb = &pTq->pVnode->msgCb; pTask->pMeta = pTq->pStreamMeta; + pTask->chkInfo.version = ver; pTask->chkInfo.currentVer = ver; - // expand executor - pTask->status.taskStatus = (pTask->fillHistory) ? TASK_STATUS__WAIT_DOWNSTREAM : TASK_STATUS__NORMAL; + pTask->dataRange.range.maxVer = ver; + pTask->dataRange.range.minVer = ver; - if (pTask->taskLevel == TASK_LEVEL__SOURCE) { - pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1); + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + SStreamTask* pSateTask = pTask; + SStreamTask task = {0}; + if (pTask->info.fillHistory) { + task.id = pTask->streamTaskId; + task.pMeta = pTask->pMeta; + pSateTask = &task; + } + + pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1); if (pTask->pState == NULL) { return -1; } - SReadHandle handle = {.vnode = pTq->pVnode, .initTqReader = 1, .pStateBackend = pTask->pState}; + SReadHandle handle = {.vnode = pTq->pVnode, + .initTqReader = 1, + .pStateBackend = pTask->pState, + .fillHistory = pTask->info.fillHistory, + .winRange = pTask->dataRange.window}; initStorageAPI(&handle.api); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId); @@ -792,14 +839,25 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { } qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId); - } else if (pTask->taskLevel == TASK_LEVEL__AGG) { - pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1); + } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { + SStreamTask* pSateTask = pTask; + SStreamTask task = {0}; + if (pTask->info.fillHistory) { + task.id = pTask->streamTaskId; + task.pMeta = pTask->pMeta; + pSateTask = &task; + } + pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1); if (pTask->pState == NULL) { return -1; } - int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo); - SReadHandle handle = {.vnode = NULL, .numOfVgroups = numOfVgroups, .pStateBackend = pTask->pState}; + int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->pUpstreamEpInfoList); + SReadHandle handle = {.vnode = NULL, + .numOfVgroups = numOfVgroups, + .pStateBackend = pTask->pState, + .fillHistory = pTask->info.fillHistory, + .winRange = pTask->dataRange.window}; initStorageAPI(&handle.api); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId); @@ -834,15 +892,17 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { tSimpleHashSetFreeFp(pTask->tbSink.pTblInfo, freePtr); } - if (pTask->taskLevel == TASK_LEVEL__SOURCE) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { SWalFilterCond cond = {.deleteMsg = 1}; // delete msg also extract from wal files pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond); } - streamSetupTrigger(pTask); + streamSetupScheduleTrigger(pTask); - tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", vgId, - pTask->id.idStr, pTask->chkInfo.version, pTask->selfChildId, pTask->taskLevel); + tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64 + " child id:%d, level:%d, scan-history:%d, trigger:%" PRId64 " ms", + vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel, + pTask->info.fillHistory, pTask->triggerParam); // next valid version will add one pTask->chkInfo.version += 1; @@ -858,10 +918,11 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); - tDecodeSStreamTaskCheckReq(&decoder, &req); + tDecodeStreamTaskCheckReq(&decoder, &req); tDecoderClear(&decoder); - int32_t taskId = req.downstreamTaskId; + int32_t taskId = req.downstreamTaskId; + SStreamTaskCheckRsp rsp = { .reqId = req.reqId, .streamId = req.streamId, @@ -878,23 +939,20 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { rsp.status = streamTaskCheckStatus(pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask); - tqDebug("s-task:%s recv task check req(reqId:0x%" PRIx64 - ") %d at node %d task status:%d, check req from task %d at node %d, rsp status %d", - pTask->id.idStr, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, pTask->status.taskStatus, - rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + tqDebug("s-task:%s recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), status:%s, rsp status %d", + pTask->id.idStr, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, + streamGetTaskStatusStr(pTask->status.taskStatus), rsp.status); } else { rsp.status = 0; - tqDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 - ") %d at node %d, check req from task:0x%x at node %d, rsp status %d", - taskId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, - rsp.status); + tqDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d", + taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } SEncoder encoder; int32_t code; int32_t len; - tEncodeSize(tEncodeSStreamTaskCheckRsp, &rsp, len, code); + tEncodeSize(tEncodeStreamTaskCheckRsp, &rsp, len, code); if (code < 0) { tqError("vgId:%d failed to encode task check rsp, task:0x%x", pTq->pStreamMeta->vgId, taskId); return -1; @@ -905,7 +963,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); tEncoderInit(&encoder, (uint8_t*)abuf, len); - tEncodeSStreamTaskCheckRsp(&encoder, &rsp); + tEncodeStreamTaskCheckRsp(&encoder, &rsp); tEncoderClear(&encoder); SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info}; @@ -914,13 +972,16 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { return 0; } -int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { +int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { + char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + int32_t code; SStreamTaskCheckRsp rsp; SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)msg, msgLen); - code = tDecodeSStreamTaskCheckRsp(&decoder, &rsp); + tDecoderInit(&decoder, (uint8_t*)pReq, len); + code = tDecodeStreamTaskCheckRsp(&decoder, &rsp); if (code < 0) { tDecoderClear(&decoder); @@ -928,17 +989,18 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32 } tDecoderClear(&decoder); - tqDebug("tq recv task check rsp(reqId:0x%" PRIx64 ") %d at node %d check req from task:0x%x at node %d, status %d", - rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); + tqDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d", + rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.upstreamTaskId); if (pTask == NULL) { - tqError("tq failed to locate the stream task:0x%x vgId:%d, it may have been destroyed", rsp.upstreamTaskId, + tqError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId, pTq->pStreamMeta->vgId); + terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; return -1; } - code = streamProcessTaskCheckRsp(pTask, &rsp, sversion); + code = streamProcessCheckRsp(pTask, &rsp); streamMetaReleaseTask(pTq->pStreamMeta, pTask); return code; } @@ -971,105 +1033,232 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms tDecoderClear(&decoder); + SStreamMeta* pStreamMeta = pTq->pStreamMeta; + // 2.save task, use the newest commit version as the initial start version of stream task. - taosWLockLatch(&pTq->pStreamMeta->lock); - code = streamMetaAddDeployedTask(pTq->pStreamMeta, sversion, pTask); - int32_t numOfTasks = streamMetaGetNumOfTasks(pTq->pStreamMeta); + taosWLockLatch(&pStreamMeta->lock); + code = streamMetaAddDeployedTask(pStreamMeta, sversion, pTask); + + int32_t numOfTasks = streamMetaGetNumOfTasks(pStreamMeta); if (code < 0) { tqError("vgId:%d failed to add s-task:%s, total:%d", vgId, pTask->id.idStr, numOfTasks); - taosWUnLockLatch(&pTq->pStreamMeta->lock); + taosWUnLockLatch(&pStreamMeta->lock); return -1; } - taosWUnLockLatch(&pTq->pStreamMeta->lock); + taosWUnLockLatch(&pStreamMeta->lock); - // 3.go through recover steps to fill history - if (pTask->fillHistory) { - streamTaskCheckDownstream(pTask, sversion); - } + // 3. It's an fill history task, do nothing. wait for the main task to start it + streamPrepareNdoCheckDownstream(pTask); + + tqDebug("vgId:%d s-task:%s is deployed and add into meta, status:%s, numOfTasks:%d", vgId, pTask->id.idStr, + streamGetTaskStatusStr(pTask->status.taskStatus), numOfTasks); - tqDebug("vgId:%d s-task:%s is deployed and add meta from mnd, status:%d, total:%d", vgId, pTask->id.idStr, - pTask->status.taskStatus, numOfTasks); return 0; } -int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) { - int32_t code; +int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { + int32_t code = TSDB_CODE_SUCCESS; char* msg = pMsg->pCont; - int32_t msgLen = pMsg->contLen; - SStreamRecoverStep1Req* pReq = (SStreamRecoverStep1Req*)msg; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId); + SStreamMeta* pMeta = pTq->pStreamMeta; + SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)msg; + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->taskId); if (pTask == NULL) { + tqError("vgId:%d failed to acquire stream task:0x%x during stream recover, task may have been destroyed", + pMeta->vgId, pReq->taskId); return -1; } // check param int64_t fillVer1 = pTask->chkInfo.version; if (fillVer1 <= 0) { - streamMetaReleaseTask(pTq->pStreamMeta, pTask); + streamMetaReleaseTask(pMeta, pTask); return -1; } // do recovery step 1 - tqDebug("s-task:%s start non-blocking recover stage(step 1) scan", pTask->id.idStr); + const char* pId = pTask->id.idStr; + tqDebug("s-task:%s start history data scan stage(step 1), status:%s", pId, + streamGetTaskStatusStr(pTask->status.taskStatus)); + int64_t st = taosGetTimestampMs(); + int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE, + TASK_SCHED_STATUS__WAITING); + if (schedStatus != TASK_SCHED_STATUS__INACTIVE) { + ASSERT(0); + return 0; + } - streamSourceRecoverScanStep1(pTask); - if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { - tqDebug("s-task:%s is dropped, abort recover in step1", pTask->id.idStr); + if (!pReq->igUntreated && !streamTaskRecoverScanStep1Finished(pTask)) { + streamSourceScanHistoryData(pTask); + } - streamMetaReleaseTask(pTq->pStreamMeta, pTask); + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING || streamTaskShouldPause(&pTask->status)) { + tqDebug("s-task:%s is dropped or paused, abort recover in step1", pId); + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + streamMetaReleaseTask(pMeta, pTask); return 0; } double el = (taosGetTimestampMs() - st) / 1000.0; - tqDebug("s-task:%s non-blocking recover stage(step 1) ended, elapsed time:%.2fs", pTask->id.idStr, el); + tqDebug("s-task:%s history data scan stage(step 1) ended, elapsed time:%.2fs", pId, el); - // build msg to launch next step - SStreamRecoverStep2Req req; - code = streamBuildSourceRecover2Req(pTask, &req); - if (code < 0) { - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return -1; - } + if (pTask->info.fillHistory) { + SVersionRange* pRange = NULL; + SStreamTask* pStreamTask = NULL; - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { - return 0; - } + if (!pReq->igUntreated && !streamTaskRecoverScanStep1Finished(pTask)) { + // 1. stop the related stream task, get the current scan wal version of stream task, ver. + pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.taskId); + if (pStreamTask == NULL) { + // todo handle error + } - // serialize msg - int32_t len = sizeof(SStreamRecoverStep1Req); + ASSERT(pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE); - void* serializedReq = rpcMallocCont(len); - if (serializedReq == NULL) { - tqError("s-task:%s failed to prepare the step2 stage, out of memory", pTask->id.idStr); - return -1; - } + // wait for the stream task get ready for scan history data + while (((pStreamTask->status.downstreamReady == 0) && (pStreamTask->status.taskStatus != TASK_STATUS__STOP)) || + pStreamTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { + tqDebug( + "s-task:%s level:%d related stream task:%s not ready for halt, wait for it continue and recheck in 100ms", + pTask->id.idStr, pTask->info.taskLevel, pStreamTask->id.idStr); + taosMsleep(100); + } + + // now we can stop the stream task execution + pStreamTask->status.taskStatus = TASK_STATUS__HALT; + tqDebug("s-task:%s level:%d status is set to halt by history scan task:%s", pStreamTask->id.idStr, + pStreamTask->info.taskLevel, pId); + + // if it's an source task, extract the last version in wal. + pRange = &pTask->dataRange.range; + int64_t latestVer = walReaderGetCurrentVer(pStreamTask->exec.pWalReader); + ASSERT(latestVer >= pRange->maxVer); + + int64_t nextStartVer = pRange->maxVer + 1; + if (nextStartVer > latestVer - 1) { + // no input data yet. no need to execute the secondardy scan while stream task halt + streamTaskRecoverSetAllStepFinished(pTask); + tqDebug("s-task:%s no need to perform secondary scan-history-data(step 2), since no data ingest during secondary scan", pId); + } else { + // 2. do secondary scan of the history data, the time window remain, and the version range is updated to + // [pTask->dataRange.range.maxVer, ver1] + pRange->minVer = nextStartVer; + pRange->maxVer = latestVer - 1; + } + } + + if (!streamTaskRecoverScanStep1Finished(pTask)) { + tqDebug("s-task:%s level:%d verRange:%" PRId64 " - %" PRId64 + " do secondary scan-history-data after halt the related stream task:%s", + pId, pTask->info.taskLevel, pRange->minVer, pRange->maxVer, pStreamTask->id.idStr); + ASSERT(pTask->status.schedStatus == TASK_SCHED_STATUS__WAITING); - memcpy(serializedReq, &req, len); + st = taosGetTimestampMs(); + streamSetParamForStreamScannerStep2(pTask, pRange, &pTask->dataRange.window); + } + + if (!streamTaskRecoverScanStep2Finished(pTask)) { + streamSourceScanHistoryData(pTask); + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING || streamTaskShouldPause(&pTask->status)) { + tqDebug("s-task:%s is dropped or paused, abort recover in step1", pId); + streamMetaReleaseTask(pMeta, pTask); + return 0; + } - // dispatch msg - tqDebug("s-task:%s start recover block stage", pTask->id.idStr); + streamTaskRecoverSetAllStepFinished(pTask); + } + + el = (taosGetTimestampMs() - st) / 1000.0; + tqDebug("s-task:%s history data scan stage(step 2) ended, elapsed time:%.2fs", pId, el); + + // 3. notify the downstream tasks to transfer executor state after handle all history blocks. + if (!pTask->status.transferState) { + code = streamDispatchTransferStateMsg(pTask); + if (code != TSDB_CODE_SUCCESS) { + // todo handle error + } + + pTask->status.transferState = true; + } + + // 4. 1) transfer the ownership of executor state, 2) update the scan data range for source task. + // 5. resume the related stream task. + streamTryExec(pTask); + + pTask->status.taskStatus = TASK_STATUS__DROPPING; + tqDebug("s-task:%s scan-history-task set status to be dropping", pId); + + streamMetaSaveTask(pMeta, pTask); + streamMetaSaveTask(pMeta, pStreamTask); + + streamMetaReleaseTask(pMeta, pTask); + streamMetaReleaseTask(pMeta, pStreamTask); + + taosWLockLatch(&pMeta->lock); + if (streamMetaCommit(pTask->pMeta) < 0) { + // persist to disk + } + taosWUnLockLatch(&pMeta->lock); + } else { + // todo update the chkInfo version for current task. + // this task has an associated history stream task, so we need to scan wal from the end version of + // history scan. The current version of chkInfo.current is not updated during the history scan + STimeWindow* pWindow = &pTask->dataRange.window; + + if (pTask->historyTaskId.taskId == 0) { + *pWindow = (STimeWindow){INT64_MIN, INT64_MAX}; + tqDebug("s-task:%s no associated task, reset the time window:%" PRId64 " - %" PRId64, pId, pWindow->skey, + pWindow->ekey); + } else { + tqDebug("s-task:%s history data scan completed, now start to scan data from wal, start ver:%" PRId64 + ", window:%" PRId64 " - %" PRId64, + pId, pTask->chkInfo.currentVer, pWindow->skey, pWindow->ekey); + } + + code = streamTaskScanHistoryDataComplete(pTask); + streamMetaReleaseTask(pMeta, pTask); + + // let's start the stream task by extracting data from wal + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + tqStartStreamTasks(pTq); + } + + return code; + } - SRpcMsg rpcMsg = { - .code = 0, .contLen = len, .msgType = TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, .pCont = serializedReq}; - tmsgPutToQueue(&pTq->pVnode->msgCb, WRITE_QUEUE, &rpcMsg); return 0; } -int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { - int32_t code = 0; +// notify the downstream tasks to transfer executor state after handle all history blocks. +int32_t tqProcessTaskTransferStateReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { + SStreamTransferReq req; - SStreamRecoverStep2Req* pReq = (SStreamRecoverStep2Req*)msg; + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, msgLen); + int32_t code = tDecodeStreamRecoverFinishReq(&decoder, &req); + tDecoderClear(&decoder); - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId); + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId); if (pTask == NULL) { + tqError("failed to find task:0x%x, it may have been dropped already", req.taskId); return -1; } - // do recovery step 2 + // transfer the ownership of executor state + streamTaskReleaseState(pTask); + tqDebug("s-task:%s receive state transfer req", pTask->id.idStr); + + SStreamTask* pStreamTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->streamTaskId.taskId); + streamTaskReloadState(pStreamTask); + + ASSERT(pTask->streamTaskId.taskId != 0); + pTask->status.transferState = true; // persistent data? + +#if 0 + // do check if current task handle all data in the input queue int64_t st = taosGetTimestampMs(); tqDebug("s-task:%s start step2 recover, ts:%" PRId64, pTask->id.idStr, st); @@ -1108,16 +1297,19 @@ int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t tqDebug("s-task:%s step2 recover finished, el:%.2fs", pTask->id.idStr, el); // dispatch recover finish req to all related downstream task - code = streamDispatchRecoverFinishReq(pTask); + code = streamDispatchScanHistoryFinishMsg(pTask); if (code < 0) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); return -1; } - atomic_store_8(&pTask->fillHistory, 0); + atomic_store_8(&pTask->info.fillHistory, 0); streamMetaSaveTask(pTq->pStreamMeta, pTask); +#endif + streamSchedExec(pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask); + return 0; } @@ -1130,7 +1322,7 @@ int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg) { SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msg, msgLen); - tDecodeSStreamRecoverFinishReq(&decoder, &req); + tDecodeStreamRecoverFinishReq(&decoder, &req); tDecoderClear(&decoder); // find task @@ -1139,7 +1331,7 @@ int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg) { return -1; } // do process request - if (streamProcessRecoverFinishReq(pTask, req.childId) < 0) { + if (streamProcessRecoverFinishReq(pTask, req.taskId, req.childId) < 0) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); return -1; } @@ -1212,22 +1404,31 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { int32_t taskId = pReq->taskId; int32_t vgId = TD_VID(pTq->pVnode); - if (taskId == WAL_READ_TASKS_ID) { // all tasks are extracted submit data from the wal + if (taskId == STREAM_TASK_STATUS_CHECK_ID) { + tqStreamTasksStatusCheck(pTq); + return 0; + } + + if (taskId == EXTRACT_DATA_FROM_WAL_ID) { // all tasks are extracted submit data from the wal tqStreamTasksScanWal(pTq); return 0; } SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); if (pTask != NULL) { - if (pTask->status.taskStatus == TASK_STATUS__NORMAL) { - tqDebug("vgId:%d s-task:%s start to process block from wal, last chk point:%" PRId64, vgId, pTask->id.idStr, + // even in halt status, the data in inputQ must be processed + int8_t status = pTask->status.taskStatus; + if (status == TASK_STATUS__NORMAL || status == TASK_STATUS__HALT) { + tqDebug("vgId:%d s-task:%s start to process block from inputQ, last chk point:%" PRId64, vgId, pTask->id.idStr, pTask->chkInfo.version); streamProcessRunReq(pTask); } else { - if (streamTaskShouldPause(&pTask->status)) { +// if (streamTaskShouldPause(&pTask->status)) { atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - } - tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr); +// } + + tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId, + pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus); } streamMetaReleaseTask(pTq->pStreamMeta, pTask); @@ -1286,65 +1487,100 @@ int32_t tqProcessTaskDropReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgL return 0; } +int32_t tqProcessTaskPauseImpl(SStreamMeta* pStreamMeta, SStreamTask* pTask) { + if (pTask) { + if (!streamTaskShouldPause(&pTask->status)) { + tqDebug("vgId:%d s-task:%s set pause flag", pStreamMeta->vgId, pTask->id.idStr); + atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); + } + streamMetaReleaseTask(pStreamMeta, pTask); + } else { + return -1; + } + return 0; +} + int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)msg; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId); + int32_t code = tqProcessTaskPauseImpl(pTq->pStreamMeta, pTask); + if (code != 0) { + return code; + } + SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.taskId); + if (pHistoryTask) { + code = tqProcessTaskPauseImpl(pTq->pStreamMeta, pHistoryTask); + } + return code; +} + +int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, int8_t igUntreated) { + int32_t vgId = pTq->pStreamMeta->vgId; if (pTask) { - tqDebug("vgId:%d s-task:%s set pause flag", pTq->pStreamMeta->vgId, pTask->id.idStr); - atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus); - atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); + if (streamTaskShouldPause(&pTask->status)) { + atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus); + + // no lock needs to secure the access of the version + if (igUntreated && pTask->info.taskLevel == TASK_LEVEL__SOURCE && !pTask->info.fillHistory) { + // discard all the data when the stream task is suspended. + walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion); + tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64 + ", schedStatus:%d", + vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus); + } else { // from the previous paused version and go on + tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d", + vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus); + } + + if (pTask->info.fillHistory && pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + streamStartRecoverTask(pTask, igUntreated); + } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && taosQueueItemSize(pTask->inputQueue->queue) == 0) { + tqStartStreamTasks(pTq); + } else { + streamSchedExec(pTask); + } + } streamMetaReleaseTask(pTq->pStreamMeta, pTask); + } else { + return -1; } return 0; } int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg; - - int32_t vgId = pTq->pStreamMeta->vgId; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId); - if (pTask) { - atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus); - - // no lock needs to secure the access of the version - if (pReq->igUntreated && pTask->taskLevel == TASK_LEVEL__SOURCE) { - // discard all the data when the stream task is suspended. - walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion); - tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64 - ", schedStatus:%d", - vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus); - } else { // from the previous paused version and go on - tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d", - vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus); - } - - if (pTask->taskLevel == TASK_LEVEL__SOURCE && taosQueueItemSize(pTask->inputQueue->queue) == 0) { - tqStartStreamTasks(pTq); - } else { - streamSchedExec(pTask); - } - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - } else { - tqError("vgId:%d failed to find the s-task:0x%x for resume stream task", vgId, pReq->taskId); + int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated); + if (code != 0) { + return code; } - return 0; + SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.taskId); + if (pHistoryTask) { + code = tqProcessTaskResumeImpl(pTq, pHistoryTask, sversion, pReq->igUntreated); + } + return code; } int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) { - char* msgStr = pMsg->pCont; - char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); - int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + char* msgStr = pMsg->pCont; + char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); + int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + SDecoder decoder; + SStreamRetrieveReq req; - SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); tDecodeStreamRetrieveReq(&decoder, &req); tDecoderClear(&decoder); + int32_t taskId = req.dstTaskId; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); + if (pTask) { SRpcMsg rsp = {.info = pMsg->info, .code = 0}; streamProcessRetrieveReq(pTask, &req, &rsp); + streamMetaReleaseTask(pTq->pStreamMeta, pTask); tDeleteStreamRetrieveReq(&req); return 0; @@ -1425,43 +1661,3 @@ FAIL: int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; } -int32_t tqStartStreamTasks(STQ* pTq) { - int32_t vgId = TD_VID(pTq->pVnode); - SStreamMeta* pMeta = pTq->pStreamMeta; - - taosWLockLatch(&pMeta->lock); - - int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - if (numOfTasks == 0) { - tqInfo("vgId:%d no stream tasks exist", vgId); - taosWUnLockLatch(&pMeta->lock); - return 0; - } - - pMeta->walScanCounter += 1; - - if (pMeta->walScanCounter > 1) { - tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter); - taosWUnLockLatch(&pMeta->lock); - return 0; - } - - SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); - if (pRunReq == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr()); - taosWUnLockLatch(&pMeta->lock); - return -1; - } - - tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks); - pRunReq->head.vgId = vgId; - pRunReq->streamId = 0; - pRunReq->taskId = WAL_READ_TASKS_ID; - - SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; - tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); - taosWUnLockLatch(&pMeta->lock); - - return 0; -} diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 44a387a2f8c5dbe61362bc2c444a74475cd6d5bc..ba983b1833fa04ca26d6093d07f9f85eafef36ec 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -114,7 +114,7 @@ bool isValValidForTable(STqHandle* pHandle, SWalCont* pHead) { } SMetaReader mr = {0}; - metaReaderInit(&mr, pHandle->execHandle.pTqReader->pVnodeMeta, 0); + metaReaderDoInit(&mr, pHandle->execHandle.pTqReader->pVnodeMeta, 0); if (metaGetTableEntryByName(&mr, req.tbName) < 0) { metaReaderClear(&mr); @@ -1109,7 +1109,7 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { } SStreamTask* pTask = *(SStreamTask**)pIter; - if (pTask->taskLevel == TASK_LEVEL__SOURCE) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { int32_t code = qUpdateTableListForStreamScanner(pTask->exec.pExecutor, tbUidList, isAdd); if (code != 0) { tqError("vgId:%d, s-task:%s update qualified table error for stream task", vgId, pTask->id.idStr); diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index fe80f486918413390ee7916fb97fe07c58a1b80d..5db3e735ccbf63c41cc88372b09b9682060334c5 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -16,6 +16,7 @@ #include "tq.h" static int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle); +static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId); // this function should be executed by stream threads. // extract submit block from WAL, and add them into the input queue for the sources tasks. @@ -57,7 +58,111 @@ int32_t tqStreamTasksScanWal(STQ* pTq) { return 0; } -static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) { +int32_t tqStreamTasksStatusCheck(STQ* pTq) { + int32_t vgId = TD_VID(pTq->pVnode); + SStreamMeta* pMeta = pTq->pStreamMeta; + + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + tqDebug("vgId:%d start to check all (%d) stream tasks downstream status", vgId, numOfTasks); + if (numOfTasks == 0) { + return TSDB_CODE_SUCCESS; + } + + SArray* pTaskList = NULL; + taosWLockLatch(&pMeta->lock); + pTaskList = taosArrayDup(pMeta->pTaskList, NULL); + taosWUnLockLatch(&pMeta->lock); + + for (int32_t i = 0; i < numOfTasks; ++i) { + int32_t* pTaskId = taosArrayGet(pTaskList, i); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, *pTaskId); + if (pTask == NULL) { + continue; + } + + streamTaskCheckDownstreamTasks(pTask); + streamMetaReleaseTask(pMeta, pTask); + } + taosArrayDestroy(pTaskList); + + return 0; +} + +int32_t tqCheckStreamStatus(STQ* pTq) { + int32_t vgId = TD_VID(pTq->pVnode); + SStreamMeta* pMeta = pTq->pStreamMeta; + + taosWLockLatch(&pMeta->lock); + + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + if (numOfTasks == 0) { + tqInfo("vgId:%d no stream tasks exist", vgId); + taosWUnLockLatch(&pMeta->lock); + return 0; + } + + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); + if (pRunReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr()); + taosWUnLockLatch(&pMeta->lock); + return -1; + } + + tqDebug("vgId:%d check for stream tasks status, numOfTasks:%d", vgId, numOfTasks); + pRunReq->head.vgId = vgId; + pRunReq->streamId = 0; + pRunReq->taskId = STREAM_TASK_STATUS_CHECK_ID; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; + tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); + taosWUnLockLatch(&pMeta->lock); + + return 0; +} + +int32_t tqStartStreamTasks(STQ* pTq) { + int32_t vgId = TD_VID(pTq->pVnode); + SStreamMeta* pMeta = pTq->pStreamMeta; + + taosWLockLatch(&pMeta->lock); + + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + if (numOfTasks == 0) { + tqInfo("vgId:%d no stream tasks exist", vgId); + taosWUnLockLatch(&pMeta->lock); + return 0; + } + + pMeta->walScanCounter += 1; + + if (pMeta->walScanCounter > 1) { + tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter); + taosWUnLockLatch(&pMeta->lock); + return 0; + } + + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); + if (pRunReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr()); + taosWUnLockLatch(&pMeta->lock); + return -1; + } + + tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks); + pRunReq->head.vgId = vgId; + pRunReq->streamId = 0; + pRunReq->taskId = EXTRACT_DATA_FROM_WAL_ID; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; + tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); + taosWUnLockLatch(&pMeta->lock); + + return 0; +} + +int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) { // seek the stored version and extract data from WAL int64_t firstVer = walReaderGetValidFirstVer(pTask->exec.pWalReader); if (pTask->chkInfo.currentVer < firstVer) { @@ -102,7 +207,7 @@ static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) { int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { *pScanIdle = true; - bool noNewDataInWal = true; + bool noDataInWal = true; int32_t vgId = pStreamMeta->vgId; int32_t numOfTasks = taosArrayGetSize(pStreamMeta->pTaskList); @@ -129,15 +234,13 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { } int32_t status = pTask->status.taskStatus; - if (pTask->taskLevel != TASK_LEVEL__SOURCE) { -// tqTrace("s-task:%s level:%d not source task, no need to start", pTask->id.idStr, pTask->taskLevel); + if (pTask->info.taskLevel != TASK_LEVEL__SOURCE) { streamMetaReleaseTask(pStreamMeta, pTask); continue; } - if (streamTaskShouldStop(&pTask->status) || status == TASK_STATUS__RECOVER_PREPARE || - status == TASK_STATUS__WAIT_DOWNSTREAM || streamTaskShouldPause(&pTask->status)) { - tqDebug("s-task:%s not ready for new submit block from wal, status:%d", pTask->id.idStr, status); + if (status != TASK_STATUS__NORMAL) { + tqDebug("s-task:%s not ready for new submit block from wal, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); streamMetaReleaseTask(pStreamMeta, pTask); continue; } @@ -157,39 +260,47 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { continue; } + int32_t numOfItemsInQ = taosQueueItemSize(pTask->inputQueue->queue); + // append the data for the stream SStreamQueueItem* pItem = NULL; code = extractMsgFromWal(pTask->exec.pWalReader, (void**) &pItem, pTask->id.idStr); - if (code != TSDB_CODE_SUCCESS) { // failed, continue - streamMetaReleaseTask(pStreamMeta, pTask); - continue; - } - // delete ignore - if (pItem == NULL) { + if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItemsInQ == 0)) { // failed, continue streamMetaReleaseTask(pStreamMeta, pTask); continue; } - noNewDataInWal = false; + if (pItem != NULL) { + noDataInWal = false; + code = tAppendDataToInputQueue(pTask, pItem); + if (code == TSDB_CODE_SUCCESS) { + pTask->chkInfo.currentVer = walReaderGetCurrentVer(pTask->exec.pWalReader); + tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, + pTask->chkInfo.currentVer); + } else { + tqError("s-task:%s append input queue failed, too many in inputQ, ver:%" PRId64, pTask->id.idStr, + pTask->chkInfo.currentVer); + } + } - code = tqAddInputBlockNLaunchTask(pTask, pItem); - if (code == TSDB_CODE_SUCCESS) { - pTask->chkInfo.currentVer = walReaderGetCurrentVer(pTask->exec.pWalReader); - tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, - pTask->chkInfo.currentVer); - } else { - tqError("s-task:%s append input queue failed, ver:%" PRId64, pTask->id.idStr, pTask->chkInfo.currentVer); + if ((code == TSDB_CODE_SUCCESS) || (numOfItemsInQ > 0)) { + code = streamSchedExec(pTask); + if (code != TSDB_CODE_SUCCESS) { + streamMetaReleaseTask(pStreamMeta, pTask); + return -1; + } } streamMetaReleaseTask(pStreamMeta, pTask); } // all wal are checked, and no new data available in wal. - if (noNewDataInWal) { + if (noDataInWal) { *pScanIdle = true; } taosArrayDestroy(pTaskList); return 0; } + diff --git a/source/dnode/vnode/src/tq/tqScan.c b/source/dnode/vnode/src/tq/tqScan.c index 0c83f1f12c1f85b9959c212745546f4d7748afe9..cbe3ffee9e9fa5f62bad48fc0df07bfedf2ea610 100644 --- a/source/dnode/vnode/src/tq/tqScan.c +++ b/source/dnode/vnode/src/tq/tqScan.c @@ -48,7 +48,7 @@ static int32_t tqAddBlockSchemaToRsp(const STqExecHandle* pExec, STaosxRsp* pRsp static int32_t tqAddTbNameToRsp(const STQ* pTq, int64_t uid, STaosxRsp* pRsp, int32_t n) { SMetaReader mr = {0}; - metaReaderInit(&mr, pTq->pVnode->pMeta, 0); + metaReaderDoInit(&mr, pTq->pVnode->pMeta, 0); // TODO add reference to gurantee success if (metaReaderGetTableEntryByUidCache(&mr, uid) < 0) { diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 650f62828f34c29189f2b7674d54446cc52c37b1..b22650d2498c17677607a99fce0dd4cba312d230 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -309,7 +309,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d tbData.uid = pTableSinkInfo->uid; } else { SMetaReader mr = {0}; - metaReaderInit(&mr, pVnode->pMeta, 0); + metaReaderDoInit(&mr, pVnode->pMeta, 0); if (metaGetTableEntryByName(&mr, ctbName) < 0) { metaReaderClear(&mr); taosMemoryFree(pTableSinkInfo); @@ -412,7 +412,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d if (k == 0) { SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); void* colData = colDataGetData(pColData, j); - tqDebug("tq sink pipe2, row %d, col %d ts %" PRId64, j, k, *(int64_t*)colData); + tqTrace("tq sink pipe2, row %d, col %d ts %" PRId64, j, k, *(int64_t*)colData); } if (IS_SET_NULL(pCol)) { SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index ae69c4d456158c0dffd21cfbaac405fa7915a280..c61d42d44e8fbba044164911d5682335035c0bc9 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -20,12 +20,6 @@ static int32_t tqSendMetaPollRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp, int32_t vgId); -char* createStreamTaskIdStr(int64_t streamId, int32_t taskId) { - char buf[128] = {0}; - sprintf(buf, "0x%" PRIx64 "-0x%x", streamId, taskId); - return taosStrdup(buf); -} - int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem) { int32_t code = tAppendDataToInputQueue(pTask, pQueueItem); if (code < 0) { diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index 31b13b8411127cfc742ed78bfcc56191126b93fc..cde0e6f1b77f1e00ea550d29029ff1484b4032b2 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -1610,7 +1610,7 @@ static tb_uid_t getTableSuidByUid(tb_uid_t uid, STsdb *pTsdb) { tb_uid_t suid = 0; SMetaReader mr = {0}; - metaReaderInit(&mr, pTsdb->pVnode->pMeta, 0); + metaReaderDoInit(&mr, pTsdb->pVnode->pMeta, 0); if (metaReaderGetTableEntryByUidCache(&mr, uid) < 0) { metaReaderClear(&mr); // table not esist return 0; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 84dcde06ac348a68b107eec97a79f48d3b8cbde0..165448fb7bcdfc1f1c832070066dfa8bcc2fbeea 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -248,7 +248,7 @@ static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdb STbData* piMemTbData); static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, int8_t* pLevel); -static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); +static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const char* id); static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); static int32_t doBuildDataBlock(STsdbReader* pReader); static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader); @@ -775,7 +775,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void pReader->order = pCond->order; pReader->idStr = (idstr != NULL) ? taosStrdup(idstr) : NULL; - pReader->verRange = getQueryVerRange(pVnode, pCond, level); + pReader->verRange = getQueryVerRange(pVnode, pCond, idstr); pReader->type = pCond->type; pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); pReader->blockInfoBuf.numPerBucket = 1000; // 1000 tables per bucket @@ -3721,7 +3721,7 @@ static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* ret return VND_TSDB(pVnode); } -SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level) { +SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const char* id) { int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion; int64_t endVer = 0; @@ -3732,6 +3732,9 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_ endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion; } + tsdbDebug("queried verRange:%"PRId64"-%"PRId64", revised query verRange:%"PRId64"-%"PRId64", %s", pCond->startVersion, + pCond->endVersion, startVer, endVer, id); + return (SVersionRange){.minVer = startVer, .maxVer = endVer}; } @@ -5452,7 +5455,7 @@ int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) { int32_t tsdbGetTableSchema(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) { SMetaReader mr = {0}; - metaReaderInit(&mr, ((SVnode*)pVnode)->pMeta, 0); + metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0); int32_t code = metaReaderGetTableEntryByUidCache(&mr, uid); if (code != TSDB_CODE_SUCCESS) { terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; @@ -5584,57 +5587,3 @@ void tsdbReaderSetId(STsdbReader* pReader, const char* idstr) { void tsdbReaderSetCloseFlag(STsdbReader* pReader) { pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED; } -/*-------------todo:refactor the implementation of those APIs in this file to seperate the API into two files------*/ -// opt perf, do NOT create so many readers -int64_t tsdbGetLastTimestamp(SVnode* pVnode, void* pTableList, int32_t numOfTables, const char* pIdStr) { - SQueryTableDataCond cond = {.type = TIMEWINDOW_RANGE_CONTAINED, .numOfCols = 1, .order = TSDB_ORDER_DESC, - .startVersion = -1, .endVersion = -1}; - cond.twindows.skey = INT64_MIN; - cond.twindows.ekey = INT64_MAX; - - cond.colList = taosMemoryCalloc(1, sizeof(SColumnInfo)); - cond.pSlotList = taosMemoryMalloc(sizeof(int32_t) * cond.numOfCols); - if (cond.colList == NULL || cond.pSlotList == NULL) { - // todo - } - - cond.colList[0].colId = 1; - cond.colList[0].slotId = 0; - cond.colList[0].type = TSDB_DATA_TYPE_TIMESTAMP; - - cond.pSlotList[0] = 0; - - STableKeyInfo* pTableKeyInfo = pTableList; - STsdbReader* pReader = NULL; - SSDataBlock* pBlock = createDataBlock(); - - SColumnInfoData data = {0}; - data.info = (SColumnInfo) {.type = TSDB_DATA_TYPE_TIMESTAMP, .colId = 1, .bytes = TSDB_KEYSIZE}; - blockDataAppendColInfo(pBlock, &data); - - int64_t key = INT64_MIN; - - for(int32_t i = 0; i < numOfTables; ++i) { - int32_t code = tsdbReaderOpen(pVnode, &cond, &pTableKeyInfo[i], 1, pBlock, (void**)&pReader, pIdStr, false, NULL); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - bool hasData = false; - code = tsdbNextDataBlock(pReader, &hasData); - if (!hasData || code != TSDB_CODE_SUCCESS) { - continue; - } - - SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, 0); - int64_t k = *(int64_t*)pCol->pData; - - if (key < k) { - key = k; - } - - tsdbReaderClose(pReader); - } - - return 0; -} diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 556ec335266e1019470555a0655ce6d6977a7253..84671197d8f3502eb8d02b1fa31edcb7b9a171ca 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -528,25 +528,25 @@ void tsdbFidKeyRange(int32_t fid, int32_t minutes, int8_t precision, TSKEY *minK *maxKey = *minKey + tsTickPerMin[precision] * minutes - 1; } -int32_t tsdbFidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int64_t now) { +int32_t tsdbFidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int64_t nowSec) { int32_t aFid[3]; TSKEY key; if (pKeepCfg->precision == TSDB_TIME_PRECISION_MILLI) { - now = now * 1000; + nowSec = nowSec * 1000; } else if (pKeepCfg->precision == TSDB_TIME_PRECISION_MICRO) { - now = now * 1000000l; + nowSec = nowSec * 1000000l; } else if (pKeepCfg->precision == TSDB_TIME_PRECISION_NANO) { - now = now * 1000000000l; + nowSec = nowSec * 1000000000l; } else { ASSERT(0); } - key = now - pKeepCfg->keep0 * tsTickPerMin[pKeepCfg->precision]; + key = nowSec - pKeepCfg->keep0 * tsTickPerMin[pKeepCfg->precision]; aFid[0] = tsdbKeyFid(key, pKeepCfg->days, pKeepCfg->precision); - key = now - pKeepCfg->keep1 * tsTickPerMin[pKeepCfg->precision]; + key = nowSec - pKeepCfg->keep1 * tsTickPerMin[pKeepCfg->precision]; aFid[1] = tsdbKeyFid(key, pKeepCfg->days, pKeepCfg->precision); - key = now - pKeepCfg->keep2 * tsTickPerMin[pKeepCfg->precision]; + key = nowSec - pKeepCfg->keep2 * tsTickPerMin[pKeepCfg->precision]; aFid[2] = tsdbKeyFid(key, pKeepCfg->days, pKeepCfg->precision); if (fid >= aFid[0]) { @@ -640,7 +640,7 @@ SColVal *tsdbRowIterNext(STSDBRowIter *pIter) { int32_t tsdbRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) { int32_t code = 0; TSDBKEY key = TSDBROW_KEY(pRow); - SColVal * pColVal = &(SColVal){0}; + SColVal *pColVal = &(SColVal){0}; STColumn *pTColumn; int32_t iCol, jCol = 1; @@ -764,7 +764,7 @@ int32_t tsdbRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) } } -int32_t tsdbRowMergerInit(SRowMerger* pMerger, STSchema *pSchema) { +int32_t tsdbRowMergerInit(SRowMerger *pMerger, STSchema *pSchema) { pMerger->pTSchema = pSchema; pMerger->pArray = taosArrayInit(pSchema->numOfCols, sizeof(SColVal)); if (pMerger->pArray == NULL) { @@ -774,7 +774,7 @@ int32_t tsdbRowMergerInit(SRowMerger* pMerger, STSchema *pSchema) { } } -void tsdbRowMergerClear(SRowMerger* pMerger) { +void tsdbRowMergerClear(SRowMerger *pMerger) { for (int32_t iCol = 1; iCol < pMerger->pTSchema->numOfCols; iCol++) { SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); if (IS_VAR_DATA_TYPE(pTColVal->type)) { @@ -785,7 +785,7 @@ void tsdbRowMergerClear(SRowMerger* pMerger) { taosArrayClear(pMerger->pArray); } -void tsdbRowMergerCleanup(SRowMerger* pMerger) { +void tsdbRowMergerCleanup(SRowMerger *pMerger) { int32_t numOfCols = taosArrayGetSize(pMerger->pArray); for (int32_t iCol = 1; iCol < numOfCols; iCol++) { SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); @@ -1041,8 +1041,6 @@ int32_t tsdbBuildDeleteSkyline2(SArray *aDelData, int32_t sidx, int32_t eidx, SA // SBlockData ====================================================== int32_t tBlockDataCreate(SBlockData *pBlockData) { - int32_t code = 0; - pBlockData->suid = 0; pBlockData->uid = 0; pBlockData->nRow = 0; @@ -1051,7 +1049,7 @@ int32_t tBlockDataCreate(SBlockData *pBlockData) { pBlockData->aTSKEY = NULL; pBlockData->nColData = 0; pBlockData->aColData = NULL; - return code; + return 0; } void tBlockDataDestroy(SBlockData *pBlockData) { @@ -1107,8 +1105,8 @@ int32_t tBlockDataInit(SBlockData *pBlockData, TABLEID *pId, STSchema *pTSchema, int32_t iColumn = 1; STColumn *pTColumn = &pTSchema->columns[iColumn]; for (int32_t iCid = 0; iCid < nCid; iCid++) { - - // aCid array (from taos client catalog) contains columns that does not exist in the pTSchema. the pTSchema is newer + // aCid array (from taos client catalog) contains columns that does not exist in the pTSchema. the pTSchema is + // newer if (pTColumn == NULL) { continue; } @@ -1239,7 +1237,7 @@ int32_t tBlockDataAppendRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTS _exit: return code; } -static int32_t tBlockDataUpdateRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema) { +int32_t tBlockDataUpdateRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema) { int32_t code = 0; // version diff --git a/source/dnode/vnode/src/vnd/vnodeInitApi.c b/source/dnode/vnode/src/vnd/vnodeInitApi.c index d2db6368a2d041fa25916ae8202c61e7339b6c8e..28a88561af0aea5aa2ad236d0d0d71ff6da43093 100644 --- a/source/dnode/vnode/src/vnd/vnodeInitApi.c +++ b/source/dnode/vnode/src/vnd/vnodeInitApi.c @@ -203,6 +203,7 @@ void initStateStoreAPI(SStateStore* pStore) { pStore->streamStateCommit = streamStateCommit; pStore->streamStateDestroy = streamStateDestroy; pStore->streamStateDeleteCheckPoint = streamStateDeleteCheckPoint; + pStore->streamStateReloadInfo = streamStateReloadInfo; } void initMetaReaderAPI(SStoreMetaReader* pMetaReader) { diff --git a/source/dnode/vnode/src/vnd/vnodeQuery.c b/source/dnode/vnode/src/vnd/vnodeQuery.c index 022fc4c951cf2b1d27326dc4788affc8bb9caf48..c122a98a123850dbc4132cffb6d3719a5ae72e72 100644 --- a/source/dnode/vnode/src/vnd/vnodeQuery.c +++ b/source/dnode/vnode/src/vnd/vnodeQuery.c @@ -62,7 +62,7 @@ int vnodeGetTableMeta(SVnode *pVnode, SRpcMsg *pMsg, bool direct) { } // query meta - metaReaderInit(&mer1, pVnode->pMeta, 0); + metaReaderDoInit(&mer1, pVnode->pMeta, 0); if (metaGetTableEntryByName(&mer1, infoReq.tbName) < 0) { code = terrno; @@ -79,7 +79,7 @@ int vnodeGetTableMeta(SVnode *pVnode, SRpcMsg *pMsg, bool direct) { schemaTag = mer1.me.stbEntry.schemaTag; metaRsp.suid = mer1.me.uid; } else if (mer1.me.type == TSDB_CHILD_TABLE) { - metaReaderInit(&mer2, pVnode->pMeta, META_READER_NOLOCK); + metaReaderDoInit(&mer2, pVnode->pMeta, META_READER_NOLOCK); if (metaReaderGetTableEntryByUid(&mer2, mer1.me.ctbEntry.suid) < 0) goto _exit; strcpy(metaRsp.stbName, mer2.me.name); @@ -175,7 +175,7 @@ int vnodeGetTableCfg(SVnode *pVnode, SRpcMsg *pMsg, bool direct) { } // query meta - metaReaderInit(&mer1, pVnode->pMeta, 0); + metaReaderDoInit(&mer1, pVnode->pMeta, 0); if (metaGetTableEntryByName(&mer1, cfgReq.tbName) < 0) { code = terrno; @@ -188,7 +188,7 @@ int vnodeGetTableCfg(SVnode *pVnode, SRpcMsg *pMsg, bool direct) { code = TSDB_CODE_VND_HASH_MISMATCH; goto _exit; } else if (mer1.me.type == TSDB_CHILD_TABLE) { - metaReaderInit(&mer2, pVnode->pMeta, 0); + metaReaderDoInit(&mer2, pVnode->pMeta, 0); if (metaReaderGetTableEntryByUid(&mer2, mer1.me.ctbEntry.suid) < 0) goto _exit; strcpy(cfgRsp.stbName, mer2.me.name); diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 85fcde2a520167b159aff75a1172b11d2ffb4e83..40bca578278328188b5a009bbf568988956c5fcf 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -402,10 +402,6 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg if (!syncUtilUserCommit(pMsg->msgType)) goto _exit; - if (pMsg->msgType == TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE || pMsg->msgType == TDMT_STREAM_TASK_CHECK_RSP) { - if (tqCheckLogInWal(pVnode->pTq, ver)) return 0; - } - // skip header pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); len = pMsg->contLen - sizeof(SMsgHead); @@ -501,16 +497,6 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg goto _err; } } break; - case TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE: { - if (tqProcessTaskRecover2Req(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { - goto _err; - } - } break; - case TDMT_STREAM_TASK_CHECK_RSP: { - if (tqProcessStreamTaskCheckRsp(pVnode->pTq, ver, pReq, len) < 0) { - goto _err; - } - } break; case TDMT_VND_ALTER_CONFIRM: needCommit = pVnode->config.hashChange; if (vnodeProcessAlterConfirmReq(pVnode, ver, pReq, len, pRsp) < 0) { @@ -641,26 +627,49 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { // return tqProcessPollReq(pVnode->pTq, pMsg); case TDMT_VND_TMQ_VG_WALINFO: return tqProcessVgWalInfoReq(pVnode->pTq, pMsg); + default: + vError("unknown msg type:%d in fetch queue", pMsg->msgType); + return TSDB_CODE_APP_ERROR; + } +} + +int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { + vTrace("vgId:%d, msg:%p in fetch queue is processing", pVnode->config.vgId, pMsg); + if ((pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_VND_TABLE_META || pMsg->msgType == TDMT_VND_TABLE_CFG || + pMsg->msgType == TDMT_VND_BATCH_META) && + !syncIsReadyForRead(pVnode->sync)) { + vnodeRedirectRpcMsg(pVnode, pMsg, terrno); + return 0; + } + + switch (pMsg->msgType) { case TDMT_STREAM_TASK_RUN: return tqProcessTaskRunReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_DISPATCH: return tqProcessTaskDispatchReq(pVnode->pTq, pMsg, true); - case TDMT_STREAM_TASK_CHECK: - return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_DISPATCH_RSP: return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg); + case TDMT_STREAM_TASK_CHECK: + return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg); + case TDMT_STREAM_TASK_CHECK_RSP: + return tqProcessStreamTaskCheckRsp(pVnode->pTq, 0, pMsg); case TDMT_STREAM_RETRIEVE: return tqProcessTaskRetrieveReq(pVnode->pTq, pMsg); case TDMT_STREAM_RETRIEVE_RSP: return tqProcessTaskRetrieveRsp(pVnode->pTq, pMsg); - case TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE: - return tqProcessTaskRecover1Req(pVnode->pTq, pMsg); - case TDMT_STREAM_RECOVER_FINISH: + case TDMT_VND_STREAM_SCAN_HISTORY: + return tqProcessTaskScanHistory(pVnode->pTq, pMsg); + case TDMT_STREAM_TRANSFER_STATE: { + char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + return tqProcessTaskTransferStateReq(pVnode->pTq, 0, pReq, len); + } + case TDMT_STREAM_SCAN_HISTORY_FINISH: return tqProcessTaskRecoverFinishReq(pVnode->pTq, pMsg); - case TDMT_STREAM_RECOVER_FINISH_RSP: + case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP: return tqProcessTaskRecoverFinishRsp(pVnode->pTq, pMsg); default: - vError("unknown msg type:%d in fetch queue", pMsg->msgType); + vError("unknown msg type:%d in stream queue", pMsg->msgType); return TSDB_CODE_APP_ERROR; } } @@ -1695,7 +1704,7 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe tDecodeSBatchDeleteReq(&decoder, &deleteReq); SMetaReader mr = {0}; - metaReaderInit(&mr, pVnode->pMeta, META_READER_NOLOCK); + metaReaderDoInit(&mr, pVnode->pMeta, META_READER_NOLOCK); int32_t sz = taosArrayGetSize(deleteReq.deleteReqs); for (int32_t i = 0; i < sz; i++) { diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index ff551e6534d1c445c4465e8f2ad3328f03899196..360da41482c43cc13236b104052220c87bd216cf 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -554,7 +554,7 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", pVnode->config.vgId); } else { vInfo("vgId:%d start to launch stream tasks", pVnode->config.vgId); - tqStartStreamTasks(pVnode->pTq); + tqCheckStreamStatus(pVnode->pTq); } } diff --git a/source/libs/command/src/command.c b/source/libs/command/src/command.c index f59653700b69f830a2c0d5ce939af72942806951..dad20c915cc32dc2163fad8e3a0a0a6a4eb203a9 100644 --- a/source/libs/command/src/command.c +++ b/source/libs/command/src/command.c @@ -392,7 +392,7 @@ static int32_t setAliveResultIntoDataBlock(int64_t* pConnId, SSDataBlock* pBlock int32_t status = 0; int32_t code = getAliveStatusFromApi(pConnId, dbName, &status); if (code == TSDB_CODE_SUCCESS) { - colDataAppend(pCol1, 0, (const char*)&status, false); + colDataSetVal(pCol1, 0, (const char*)&status, false); } return code; } diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 5d663df50e0e532cc185a1c226a178c600414dda..0ba9aae1336069d24959d73b9c885bae10dce580 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -285,6 +285,8 @@ typedef struct SStreamAggSupporter { int16_t stateKeyType; SDiskbasedBuf* pResultBuf; SStateStore stateStore; + STimeWindow winRange; + SStorageAPI* pSessionAPI; } SStreamAggSupporter; typedef struct SWindowSupporter { @@ -503,6 +505,8 @@ typedef struct SStreamSessionAggOperatorInfo { SArray* pUpdated; SSHashObj* pStUpdated; int64_t dataVersion; + SArray* historyWins; + bool isHistoryOp; } SStreamSessionAggOperatorInfo; typedef struct SStreamStateAggOperatorInfo { @@ -522,6 +526,8 @@ typedef struct SStreamStateAggOperatorInfo { SArray* pUpdated; SSHashObj* pSeUpdated; int64_t dataVersion; + bool isHistoryOp; + SArray* historyWins; } SStreamStateAggOperatorInfo; typedef struct SStreamPartitionOperatorInfo { @@ -678,6 +684,8 @@ void doUpdateNumOfRows(SqlFunctionCtx* pCtx, SResultRow* pRow, int32_t numOfExpr void doClearBufferedBlocks(SStreamScanInfo* pInfo); uint64_t calcGroupId(char* pData, int32_t len); +void streamOpReleaseState(struct SOperatorInfo* pOperator); +void streamOpReloadState(struct SOperatorInfo* pOperator); #ifdef __cplusplus } diff --git a/source/libs/executor/inc/operator.h b/source/libs/executor/inc/operator.h index 1d2685b8c6f98aa8309c2e9900c4d378ff227e48..e6c3405d7ff1b75326601adc0b436ca8295ee78d 100644 --- a/source/libs/executor/inc/operator.h +++ b/source/libs/executor/inc/operator.h @@ -35,6 +35,7 @@ typedef SSDataBlock* (*__optr_fn_t)(struct SOperatorInfo* pOptr); typedef void (*__optr_close_fn_t)(void* param); typedef int32_t (*__optr_explain_fn_t)(struct SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len); typedef int32_t (*__optr_reqBuf_fn_t)(struct SOperatorInfo* pOptr); +typedef void (*__optr_state_fn_t)(struct SOperatorInfo* pOptr); typedef struct SOperatorFpSet { __optr_open_fn_t _openFn; // DO NOT invoke this function directly @@ -45,6 +46,8 @@ typedef struct SOperatorFpSet { __optr_encode_fn_t encodeResultRow; __optr_decode_fn_t decodeResultRow; __optr_explain_fn_t getExplainFn; + __optr_state_fn_t releaseStreamStateFn; + __optr_state_fn_t reloadStreamStateFn; } SOperatorFpSet; enum { @@ -126,13 +129,13 @@ SOperatorInfo* createTimeSliceOperatorInfo(SOperatorInfo* downstream, SPhysiNode SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SSortMergeJoinPhysiNode* pJoinNode, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); +SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle); -SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild); +SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle); SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo); +SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle); SOperatorInfo* createStreamFillOperatorInfo(SOperatorInfo* downstream, SStreamFillPhysiNode* pPhyFillNode, SExecTaskInfo* pTaskInfo); @@ -143,6 +146,7 @@ SOperatorInfo* createEventwindowOperatorInfo(SOperatorInfo* downstream, SPhysiNo SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn, __optr_fn_t cleanup, __optr_close_fn_t closeFn, __optr_reqBuf_fn_t reqBufFn, __optr_explain_fn_t explain); +void setOperatorStreamStateFn(SOperatorInfo* pOperator, __optr_state_fn_t relaseFn, __optr_state_fn_t reloadFn); int32_t optrDummyOpenFn(SOperatorInfo* pOperator); int32_t appendDownstream(SOperatorInfo* p, SOperatorInfo** pDownstream, int32_t num); void setOperatorCompleted(SOperatorInfo* pOperator); diff --git a/source/libs/executor/inc/querytask.h b/source/libs/executor/inc/querytask.h index 6497bd90b43ee73422c9dbae96f871a27f4e29b0..cdf37bcc6b5a9cd2a06f0398cd17675e2ce62531 100644 --- a/source/libs/executor/inc/querytask.h +++ b/source/libs/executor/inc/querytask.h @@ -62,10 +62,12 @@ typedef struct { SSchemaWrapper* schema; char tbName[TSDB_TABLE_NAME_LEN]; // this is the current scan table: todo refactor int8_t recoverStep; + bool recoverStep1Finished; + bool recoverStep2Finished; int8_t recoverScanFinished; SQueryTableDataCond tableCond; - int64_t fillHistoryVer1; - int64_t fillHistoryVer2; + SVersionRange fillHistoryVer; + STimeWindow fillHistoryWindow; SStreamState* pState; int64_t dataVersion; int64_t checkPointId; diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 88e2165a127aff848a72fb8cc82aa48f341f4e34..a3d94a0891e64ed2c2b0f165e0256a915279be0b 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -92,6 +92,7 @@ static int32_t doSetStreamOpOpen(SOperatorInfo* pOperator, char* id) { qError("join not supported for stream block scan, %s" PRIx64, id); return TSDB_CODE_APP_ERROR; } + pOperator->status = OP_NOT_OPENED; return doSetStreamOpOpen(pOperator->pDownstream[0], id); } @@ -115,6 +116,16 @@ void resetTaskInfo(qTaskInfo_t tinfo) { clearStreamBlock(pTaskInfo->pRoot); } +void qResetStreamInfoTimeWindow(qTaskInfo_t tinfo) { + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*) tinfo; + if (pTaskInfo == NULL) { + return; + } + + qDebug("%s set fill history start key:%"PRId64, GET_TASKID(pTaskInfo), INT64_MIN); + pTaskInfo->streamInfo.fillHistoryWindow.skey = INT64_MIN; +} + static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t numOfBlocks, int32_t type, const char* id) { if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { if (pOperator->numOfDownstream == 0) { @@ -130,10 +141,9 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu return doSetStreamBlock(pOperator->pDownstream[0], input, numOfBlocks, type, id); } else { pOperator->status = OP_NOT_OPENED; - SStreamScanInfo* pInfo = pOperator->info; - qDebug("s-task:%s in this batch, all %d blocks need to be processed and dump results", id, (int32_t)numOfBlocks); + qDebug("s-task:%s in this batch, %d blocks need to be processed", id, (int32_t)numOfBlocks); ASSERT(pInfo->validBlockIndex == 0 && taosArrayGetSize(pInfo->pBlockLists) == 0); if (type == STREAM_INPUT__MERGED_SUBMIT) { @@ -265,6 +275,7 @@ qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* pReaderHandle, int3 terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } + pTaskInfo->pRoot = createRawScanOperatorInfo(pReaderHandle, pTaskInfo); if (NULL == pTaskInfo->pRoot) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -314,8 +325,8 @@ qTaskInfo_t qCreateStreamExecTaskInfo(void* msg, SReadHandle* readers, int32_t v return NULL; } - struct SSubplan* pPlan = NULL; - int32_t code = qStringToSubplan(msg, &pPlan); + SSubplan* pPlan = NULL; + int32_t code = qStringToSubplan(msg, &pPlan); if (code != TSDB_CODE_SUCCESS) { terrno = code; return NULL; @@ -869,19 +880,41 @@ int32_t qExtractStreamScanner(qTaskInfo_t tinfo, void** scanner) { } } -int32_t qStreamSourceRecoverStep1(qTaskInfo_t tinfo, int64_t ver) { +int32_t qStreamSourceScanParamForHistoryScanStep1(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM); - pTaskInfo->streamInfo.fillHistoryVer1 = ver; - pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__PREPARE1; + + SStreamTaskInfo* pStreamInfo = &pTaskInfo->streamInfo; + + pStreamInfo->fillHistoryVer = *pVerRange; + pStreamInfo->fillHistoryWindow = *pWindow; + pStreamInfo->recoverStep = STREAM_RECOVER_STEP__PREPARE1; + pStreamInfo->recoverStep1Finished = false; + pStreamInfo->recoverStep2Finished = false; + + qDebug("%s step 1. set param for stream scanner for scan history data, verRange:%" PRId64 " - %" PRId64 ", window:%" PRId64 + " - %" PRId64, + GET_TASKID(pTaskInfo), pStreamInfo->fillHistoryVer.minVer, pStreamInfo->fillHistoryVer.maxVer, pWindow->skey, + pWindow->ekey); return 0; } -int32_t qStreamSourceRecoverStep2(qTaskInfo_t tinfo, int64_t ver) { +int32_t qStreamSourceScanParamForHistoryScanStep2(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM); - pTaskInfo->streamInfo.fillHistoryVer2 = ver; - pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__PREPARE2; + + SStreamTaskInfo* pStreamInfo = &pTaskInfo->streamInfo; + + pStreamInfo->fillHistoryVer = *pVerRange; + pStreamInfo->fillHistoryWindow = *pWindow; + pStreamInfo->recoverStep = STREAM_RECOVER_STEP__PREPARE2; + pStreamInfo->recoverStep1Finished = true; + pStreamInfo->recoverStep2Finished = false; + + qDebug("%s step 2. set param for stream scanner for scan history data, verRange:%" PRId64 " - %" PRId64 ", window:%" PRId64 + " - %" PRId64, + GET_TASKID(pTaskInfo), pStreamInfo->fillHistoryVer.minVer, pStreamInfo->fillHistoryVer.maxVer, pWindow->skey, + pWindow->ekey); return 0; } @@ -892,55 +925,58 @@ int32_t qStreamRecoverFinish(qTaskInfo_t tinfo) { return 0; } -int32_t qStreamSetParamForRecover(qTaskInfo_t tinfo) { +int32_t qSetStreamOperatorOptionForScanHistory(qTaskInfo_t tinfo) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; SOperatorInfo* pOperator = pTaskInfo->pRoot; while (1) { - if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL || - pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL || - pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) { + int32_t type = pOperator->operatorType; + if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL || type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL || + type == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) { SStreamIntervalOperatorInfo* pInfo = pOperator->info; - ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE || - pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE); - ASSERT(pInfo->twAggSup.calTriggerSaved == 0 && pInfo->twAggSup.deleteMarkSaved == 0); + STimeWindowAggSupp* pSup = &pInfo->twAggSup; + + ASSERT(pSup->calTrigger == STREAM_TRIGGER_AT_ONCE || pSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE); + ASSERT(pSup->calTriggerSaved == 0 && pSup->deleteMarkSaved == 0); - qInfo("save stream param for interval: %d, %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark); + qInfo("save stream param for interval: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark); - pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger; - pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark; - pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; - pInfo->twAggSup.deleteMark = INT64_MAX; + pSup->calTriggerSaved = pSup->calTrigger; + pSup->deleteMarkSaved = pSup->deleteMark; + pSup->calTrigger = STREAM_TRIGGER_AT_ONCE; + pSup->deleteMark = INT64_MAX; pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData; pInfo->ignoreExpiredData = false; - } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION || - pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION || - pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) { + } else if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION || + type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION || + type == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) { SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE || - pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE); + STimeWindowAggSupp* pSup = &pInfo->twAggSup; + + ASSERT(pSup->calTrigger == STREAM_TRIGGER_AT_ONCE || pSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE); + ASSERT(pSup->calTriggerSaved == 0 && pSup->deleteMarkSaved == 0); - ASSERT(pInfo->twAggSup.calTriggerSaved == 0 && pInfo->twAggSup.deleteMarkSaved == 0); - qInfo("save stream param for session: %d, %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark); + qInfo("save stream param for session: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark); - pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger; - pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark; - pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; - pInfo->twAggSup.deleteMark = INT64_MAX; + pSup->calTriggerSaved = pSup->calTrigger; + pSup->deleteMarkSaved = pSup->deleteMark; + pSup->calTrigger = STREAM_TRIGGER_AT_ONCE; + pSup->deleteMark = INT64_MAX; pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData; pInfo->ignoreExpiredData = false; - } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE) { + } else if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE) { SStreamStateAggOperatorInfo* pInfo = pOperator->info; - ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE || - pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE); - ASSERT(pInfo->twAggSup.calTriggerSaved == 0 && pInfo->twAggSup.deleteMarkSaved == 0); + STimeWindowAggSupp* pSup = &pInfo->twAggSup; - qInfo("save stream param for state: %d, %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark); + ASSERT(pSup->calTrigger == STREAM_TRIGGER_AT_ONCE || pSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE); + ASSERT(pSup->calTriggerSaved == 0 && pSup->deleteMarkSaved == 0); - pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger; - pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark; - pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; - pInfo->twAggSup.deleteMark = INT64_MAX; + qInfo("save stream param for state: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark); + + pSup->calTriggerSaved = pSup->calTrigger; + pSup->deleteMarkSaved = pSup->deleteMark; + pSup->calTrigger = STREAM_TRIGGER_AT_ONCE; + pSup->deleteMark = INT64_MAX; pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData; pInfo->ignoreExpiredData = false; } @@ -961,7 +997,7 @@ int32_t qStreamSetParamForRecover(qTaskInfo_t tinfo) { return 0; } -int32_t qStreamRestoreParam(qTaskInfo_t tinfo) { +int32_t qRestoreStreamOperatorOption(qTaskInfo_t tinfo) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; SOperatorInfo* pOperator = pTaskInfo->pRoot; @@ -1009,6 +1045,26 @@ bool qStreamRecoverScanFinished(qTaskInfo_t tinfo) { return pTaskInfo->streamInfo.recoverScanFinished; } +bool qStreamRecoverScanStep1Finished(qTaskInfo_t tinfo) { + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; + return pTaskInfo->streamInfo.recoverStep1Finished; +} + +bool qStreamRecoverScanStep2Finished(qTaskInfo_t tinfo) { + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; + return pTaskInfo->streamInfo.recoverStep2Finished; +} + +int32_t qStreamRecoverSetAllStepFinished(qTaskInfo_t tinfo) { + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; + pTaskInfo->streamInfo.recoverStep1Finished = true; + pTaskInfo->streamInfo.recoverStep2Finished = true; + + // reset the time window + pTaskInfo->streamInfo.fillHistoryWindow.skey = INT64_MIN; + return 0; +} + void* qExtractReaderFromStreamScanner(void* scanner) { SStreamScanInfo* pInfo = scanner; return (void*)pInfo->tqReader; @@ -1323,4 +1379,16 @@ SArray* getTableListInfo(const SExecTaskInfo* pTaskInfo) { SOperatorInfo* pOperator = pTaskInfo->pRoot; extractTableList(pArray, pOperator); return pArray; -} \ No newline at end of file +} + +int32_t qStreamOperatorReleaseState(qTaskInfo_t tInfo) { + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*) tInfo; + pTaskInfo->pRoot->fpSet.releaseStreamStateFn(pTaskInfo->pRoot); + return 0; +} + +int32_t qStreamOperatorReloadState(qTaskInfo_t tInfo) { + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*) tInfo; + pTaskInfo->pRoot->fpSet.reloadStreamStateFn(pTaskInfo->pRoot); + return 0; +} diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index 42b8a9d31c4afb42b824f0a458bcd3decd2d98a8..eb55ab5e08fee7cae12db9e634216df6da4f246e 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -540,140 +540,12 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoD } int8_t* pIndicator = (int8_t*)p->pData; - int32_t totalRows = pBlock->info.rows; - if (status == FILTER_RESULT_ALL_QUALIFIED) { // here nothing needs to be done } else if (status == FILTER_RESULT_NONE_QUALIFIED) { pBlock->info.rows = 0; } else { - int32_t bmLen = BitmapLen(totalRows); - char* pBitmap = NULL; - int32_t maxRows = 0; - - size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); - for (int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i); - // it is a reserved column for scalar function, and no data in this column yet. - if (pDst->pData == NULL) { - continue; - } - - int32_t numOfRows = 0; - if (IS_VAR_DATA_TYPE(pDst->info.type)) { - int32_t j = 0; - - while (j < totalRows) { - if (pIndicator[j] == 0) { - j += 1; - continue; - } - - if (colDataIsNull_var(pDst, j)) { - colDataSetNull_var(pDst, numOfRows); - } else { - char* p1 = colDataGetVarData(pDst, j); - colDataReassignVal(pDst, numOfRows, j, p1); - } - numOfRows += 1; - j += 1; - } - - if (maxRows < numOfRows) { - maxRows = numOfRows; - } - } else { - if (pBitmap == NULL) { - pBitmap = taosMemoryCalloc(1, bmLen); - } - - memcpy(pBitmap, pDst->nullbitmap, bmLen); - memset(pDst->nullbitmap, 0, bmLen); - - int32_t j = 0; - - switch (pDst->info.type) { - case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_UBIGINT: - case TSDB_DATA_TYPE_DOUBLE: - case TSDB_DATA_TYPE_TIMESTAMP: - while (j < totalRows) { - if (pIndicator[j] == 0) { - j += 1; - continue; - } - - if (colDataIsNull_f(pBitmap, j)) { - colDataSetNull_f(pDst->nullbitmap, numOfRows); - } else { - ((int64_t*)pDst->pData)[numOfRows] = ((int64_t*)pDst->pData)[j]; - } - numOfRows += 1; - j += 1; - } - break; - case TSDB_DATA_TYPE_FLOAT: - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_UINT: - while (j < totalRows) { - if (pIndicator[j] == 0) { - j += 1; - continue; - } - if (colDataIsNull_f(pBitmap, j)) { - colDataSetNull_f(pDst->nullbitmap, numOfRows); - } else { - ((int32_t*)pDst->pData)[numOfRows] = ((int32_t*)pDst->pData)[j]; - } - numOfRows += 1; - j += 1; - } - break; - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_USMALLINT: - while (j < totalRows) { - if (pIndicator[j] == 0) { - j += 1; - continue; - } - if (colDataIsNull_f(pBitmap, j)) { - colDataSetNull_f(pDst->nullbitmap, numOfRows); - } else { - ((int16_t*)pDst->pData)[numOfRows] = ((int16_t*)pDst->pData)[j]; - } - numOfRows += 1; - j += 1; - } - break; - case TSDB_DATA_TYPE_BOOL: - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_UTINYINT: - while (j < totalRows) { - if (pIndicator[j] == 0) { - j += 1; - continue; - } - if (colDataIsNull_f(pBitmap, j)) { - colDataSetNull_f(pDst->nullbitmap, numOfRows); - } else { - ((int8_t*)pDst->pData)[numOfRows] = ((int8_t*)pDst->pData)[j]; - } - numOfRows += 1; - j += 1; - } - break; - } - } - - if (maxRows < numOfRows) { - maxRows = numOfRows; - } - } - - pBlock->info.rows = maxRows; - if (pBitmap != NULL) { - taosMemoryFree(pBitmap); - } + trimDataBlock(pBlock, pBlock->info.rows, (bool*) pIndicator); } } @@ -1105,9 +977,11 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa int32_t size = 0; void* pVal = NULL; int32_t code = pAPI->stateStore.streamStateSessionGet(pState, pKey, &pVal, &size); - ASSERT(code == 0); + // ASSERT(code == 0); if (code == -1) { - // coverity scan + // for history + qWarn("===stream===not found session result key:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, pKey->win.skey, + pKey->win.ekey, pKey->groupId); pGroupResInfo->index += 1; continue; } @@ -1177,40 +1051,16 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa return TSDB_CODE_SUCCESS; } -void qStreamCloseTsdbReader(void* task) { - if (task == NULL) { - return; +void streamOpReleaseState(SOperatorInfo* pOperator) { + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.releaseStreamStateFn) { + downstream->fpSet.releaseStreamStateFn(downstream); } +} - SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)task; - SOperatorInfo* pOp = pTaskInfo->pRoot; - - qDebug("stream close tsdb reader, reset status uid:%" PRId64 " ts:%" PRId64, pTaskInfo->streamInfo.currentOffset.uid, - pTaskInfo->streamInfo.currentOffset.ts); - - // todo refactor, other thread may already use this read to extract data. - pTaskInfo->streamInfo.currentOffset = (STqOffsetVal){0}; - while (pOp->numOfDownstream == 1 && pOp->pDownstream[0]) { - SOperatorInfo* pDownstreamOp = pOp->pDownstream[0]; - if (pDownstreamOp->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { - SStreamScanInfo* pInfo = pDownstreamOp->info; - if (pInfo->pTableScanOp) { - STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; - - setOperatorCompleted(pInfo->pTableScanOp); - while (pTaskInfo->owner != 0) { - taosMsleep(100); - qDebug("wait for the reader stopping"); - } - - pTaskInfo->storageAPI.tsdReader.tsdReaderClose(pTSInfo->base.dataReader); - pTSInfo->base.dataReader = NULL; - - // restore the status, todo refactor. - pInfo->pTableScanOp->status = OP_OPENED; - pTaskInfo->status = TASK_NOT_COMPLETED; - return; - } - } - } +void streamOpReloadState(SOperatorInfo* pOperator) { + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.reloadStreamStateFn) { + downstream->fpSet.reloadStreamStateFn(downstream); + } } diff --git a/source/libs/executor/src/filloperator.c b/source/libs/executor/src/filloperator.c index 92152924f9dc196149663069643d588c871918ec..7798ded61bd310e5e447a76693f6b58bf5e2fc4f 100644 --- a/source/libs/executor/src/filloperator.c +++ b/source/libs/executor/src/filloperator.c @@ -1562,6 +1562,7 @@ SOperatorInfo* createStreamFillOperatorInfo(SOperatorInfo* downstream, SStreamFi pTaskInfo); pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamFill, NULL, destroyStreamFillOperatorInfo, optrDefaultBufFn, NULL); + setOperatorStreamStateFn(pOperator, streamOpReleaseState, streamOpReloadState); code = appendDownstream(pOperator, &downstream, 1); if (code != TSDB_CODE_SUCCESS) { diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 57c22d56d38dadf5012a032e7f298f935bbd101e..9228c923a6893ccacbbf51cf3ab90958f7b02bb4 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -1036,7 +1036,7 @@ void appendCreateTableRow(void* pState, SExprSupp* pTableSup, SExprSupp* pTagSup } void* pGpIdCol = taosArrayGet(pDestBlock->pDataBlock, UD_GROUPID_COLUMN_INDEX); - colDataAppend(pGpIdCol, pDestBlock->info.rows, (const char*)&groupId, false); + colDataSetVal(pGpIdCol, pDestBlock->info.rows, (const char*)&groupId, false); pDestBlock->info.rows++; blockDataDestroy(pTmpBlock); } else { @@ -1324,6 +1324,7 @@ SOperatorInfo* createStreamPartitionOperatorInfo(SOperatorInfo* downstream, SStr pOperator->exprSupp.pExprInfo = pExprInfo; pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamHashPartition, NULL, destroyStreamPartitionOperatorInfo, optrDefaultBufFn, NULL); + setOperatorStreamStateFn(pOperator, streamOpReleaseState, streamOpReloadState); initParDownStream(downstream, &pInfo->partitionSup, &pInfo->scalarSup); code = appendDownstream(pOperator, &downstream, 1); diff --git a/source/libs/executor/src/operator.c b/source/libs/executor/src/operator.c index 730252c7ee2e4ae6fe5bb36d5ab159cfe7cb966f..2db5ea2f1e2b55cd5faf4fe12f47c6f292122079 100644 --- a/source/libs/executor/src/operator.c +++ b/source/libs/executor/src/operator.c @@ -38,11 +38,18 @@ SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn, .closeFn = closeFn, .reqBufFn = reqBufFn, .getExplainFn = explain, + .releaseStreamStateFn = NULL, + .reloadStreamStateFn = NULL, }; return fpSet; } +void setOperatorStreamStateFn(SOperatorInfo* pOperator, __optr_state_fn_t relaseFn, __optr_state_fn_t reloadFn) { + pOperator->fpSet.releaseStreamStateFn = relaseFn; + pOperator->fpSet.reloadStreamStateFn = reloadFn; +} + int32_t optrDummyOpenFn(SOperatorInfo* pOperator) { OPTR_SET_OPENED(pOperator); pOperator->cost.openCost = 0; @@ -485,13 +492,13 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode; pOptr = createSessionAggOperatorInfo(ops[0], pSessionNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION == type) { - pOptr = createStreamSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo); + pOptr = createStreamSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, pHandle); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION == type) { int32_t children = 0; - pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); + pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION == type) { int32_t children = pHandle->numOfVgroups; - pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children); + pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle); } else if (QUERY_NODE_PHYSICAL_PLAN_PARTITION == type) { pOptr = createPartitionOperatorInfo(ops[0], (SPartitionPhysiNode*)pPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION == type) { @@ -500,7 +507,7 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR SStateWinodwPhysiNode* pStateNode = (SStateWinodwPhysiNode*)pPhyNode; pOptr = createStatewindowOperatorInfo(ops[0], pStateNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE == type) { - pOptr = createStreamStateAggOperatorInfo(ops[0], pPhyNode, pTaskInfo); + pOptr = createStreamStateAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, pHandle); } else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN == type) { pOptr = createMergeJoinOperatorInfo(ops, size, (SSortMergeJoinPhysiNode*)pPhyNode, pTaskInfo); } else if (QUERY_NODE_PHYSICAL_PLAN_FILL == type) { diff --git a/source/libs/executor/src/projectoperator.c b/source/libs/executor/src/projectoperator.c index cd450c5bb7fcfb0f9cbeb4767fa584cc30ecc3aa..412a4bfbc00514410df1d69f65d6233af5be55af 100644 --- a/source/libs/executor/src/projectoperator.c +++ b/source/libs/executor/src/projectoperator.c @@ -73,6 +73,20 @@ static void destroyIndefinitOperatorInfo(void* param) { taosMemoryFreeClear(param); } +void streamOperatorReleaseState(SOperatorInfo* pOperator) { + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.releaseStreamStateFn) { + downstream->fpSet.releaseStreamStateFn(downstream); + } +} + +void streamOperatorReloadState(SOperatorInfo* pOperator) { + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.reloadStreamStateFn) { + downstream->fpSet.reloadStreamStateFn(downstream); + } +} + SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhysiNode* pProjPhyNode, SExecTaskInfo* pTaskInfo) { int32_t code = TSDB_CODE_SUCCESS; @@ -136,6 +150,7 @@ SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhys pTaskInfo); pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doProjectOperation, NULL, destroyProjectOperatorInfo, optrDefaultBufFn, NULL); + setOperatorStreamStateFn(pOperator, streamOperatorReleaseState, streamOperatorReloadState); code = appendDownstream(pOperator, &downstream, 1); if (code != TSDB_CODE_SUCCESS) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index fd9b713ee62d133ffc3418a1d197ecffacad89fd..74210ee06e7797f16988c6c14d4af12360c9880b 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -44,6 +44,7 @@ int32_t scanDebug = 0; #define SET_REVERSE_SCAN_FLAG(_info) ((_info)->scanFlag = REVERSE_SCAN) #define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC)) #define STREAM_SCAN_OP_NAME "StreamScanOperator" +#define STREAM_SCAN_OP_STATE_NAME "StreamScanFillHistoryState" typedef struct STableMergeScanExecInfo { SFileBlockLoadRecorder blockRecorder; @@ -489,12 +490,12 @@ int32_t addTagPseudoColumnData(SReadHandle* pHandle, const SExprInfo* pExpr, int } int32_t code = 0; + bool freeReader = false; // backup the rows int32_t backupRows = pBlock->info.rows; pBlock->info.rows = rows; - bool freeReader = false; STableCachedVal val = {0}; SMetaReader mr = {0}; @@ -1558,13 +1559,13 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock blockDataEnsureCapacity(pInfo->pRes, pBlock->info.rows); - pInfo->pRes->info.rows = pBlock->info.rows; - pInfo->pRes->info.id.uid = pBlock->info.id.uid; - pInfo->pRes->info.type = STREAM_NORMAL; - pInfo->pRes->info.version = pBlock->info.version; + pBlockInfo->rows = pBlock->info.rows; + pBlockInfo->id.uid = pBlock->info.id.uid; + pBlockInfo->type = STREAM_NORMAL; + pBlockInfo->version = pBlock->info.version; STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; - pInfo->pRes->info.id.groupId = getTableGroupId(pTableScanInfo->base.pTableListInfo, pBlock->info.id.uid); + pBlockInfo->id.groupId = getTableGroupId(pTableScanInfo->base.pTableListInfo, pBlock->info.id.uid); // todo extract method for (int32_t i = 0; i < taosArrayGetSize(pInfo->matchInfo.pList); ++i) { @@ -1594,7 +1595,7 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock // currently only the tbname pseudo column if (pInfo->numOfPseudoExpr > 0) { int32_t code = addTagPseudoColumnData(&pInfo->readHandle, pInfo->pPseudoExpr, pInfo->numOfPseudoExpr, pInfo->pRes, - pInfo->pRes->info.rows, GET_TASKID(pTaskInfo), &pTableScanInfo->base.metaCache); + pBlockInfo->rows, GET_TASKID(pTaskInfo), &pTableScanInfo->base.metaCache); // ignore the table not exists error, since this table may have been dropped during the scan procedure. if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_PAR_TABLE_NOT_EXIST) { blockDataFreeRes((SSDataBlock*)pBlock); @@ -1611,7 +1612,6 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock pInfo->pRes->info.dataLoad = 1; blockDataUpdateTsWindow(pInfo->pRes, pInfo->primaryTsIndex); -// blockDataFreeRes((SSDataBlock*)pBlock); calBlockTbName(pInfo, pInfo->pRes); return 0; @@ -1769,7 +1769,7 @@ void streamScanOperatorDecode(void* pBuff, int32_t len, SStreamScanInfo* pInfo) return; } - void* pUpInfo = pInfo->stateStore.updateInfoInit(0, TSDB_TIME_PRECISION_MILLI, 0); + void* pUpInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo)); int32_t code = pInfo->stateStore.updateInfoDeserialize(pBuff, len, pUpInfo); if (code == TSDB_CODE_SUCCESS) { pInfo->pUpdateInfo = pUpInfo; @@ -1783,25 +1783,28 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { SStorageAPI* pAPI = &pTaskInfo->storageAPI; SStreamScanInfo* pInfo = pOperator->info; + SStreamTaskInfo* pStreamInfo = &pTaskInfo->streamInfo; - qDebug("stream scan started, %s", GET_TASKID(pTaskInfo)); + qDebug("stream scan started, %s", id); - if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__PREPARE1 || - pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__PREPARE2) { + if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__PREPARE1 || pStreamInfo->recoverStep == STREAM_RECOVER_STEP__PREPARE2) { STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; - memcpy(&pTSInfo->base.cond, &pTaskInfo->streamInfo.tableCond, sizeof(SQueryTableDataCond)); - if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__PREPARE1) { - pTSInfo->base.cond.startVersion = 0; - pTSInfo->base.cond.endVersion = pTaskInfo->streamInfo.fillHistoryVer1; - qDebug("stream recover step1, verRange:%" PRId64 " - %" PRId64, pTSInfo->base.cond.startVersion, - pTSInfo->base.cond.endVersion); - pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__SCAN1; + memcpy(&pTSInfo->base.cond, &pStreamInfo->tableCond, sizeof(SQueryTableDataCond)); + + if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__PREPARE1) { + pTSInfo->base.cond.startVersion = pStreamInfo->fillHistoryVer.minVer; + pTSInfo->base.cond.endVersion = pStreamInfo->fillHistoryVer.maxVer; + + pTSInfo->base.cond.twindows = pStreamInfo->fillHistoryWindow; + qDebug("stream recover step1, verRange:%" PRId64 "-%" PRId64 " window:%"PRId64"-%"PRId64", %s", pTSInfo->base.cond.startVersion, + pTSInfo->base.cond.endVersion, pTSInfo->base.cond.twindows.skey, pTSInfo->base.cond.twindows.ekey, id); + pStreamInfo->recoverStep = STREAM_RECOVER_STEP__SCAN1; } else { - pTSInfo->base.cond.startVersion = pTaskInfo->streamInfo.fillHistoryVer1 + 1; - pTSInfo->base.cond.endVersion = pTaskInfo->streamInfo.fillHistoryVer2; - qDebug("stream recover step2, verRange:%" PRId64 " - %" PRId64, pTSInfo->base.cond.startVersion, - pTSInfo->base.cond.endVersion); - pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__SCAN2; + pTSInfo->base.cond.startVersion = pStreamInfo->fillHistoryVer.minVer; + pTSInfo->base.cond.endVersion = pStreamInfo->fillHistoryVer.maxVer; + qDebug("stream recover step2, verRange:%" PRId64 " - %" PRId64", %s", pTSInfo->base.cond.startVersion, + pTSInfo->base.cond.endVersion, id); + pStreamInfo->recoverStep = STREAM_RECOVER_STEP__SCAN2; } pAPI->tsdReader.tsdReaderClose(pTSInfo->base.dataReader); @@ -1811,11 +1814,11 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pTSInfo->scanTimes = 0; pTSInfo->currentGroupId = -1; - pTaskInfo->streamInfo.recoverScanFinished = false; + pStreamInfo->recoverScanFinished = false; } - if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__SCAN1 || - pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__SCAN2) { + if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1 || + pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN2) { if (pInfo->blockRecoverContiCnt > 100) { pInfo->blockRecoverTotCnt += pInfo->blockRecoverContiCnt; pInfo->blockRecoverContiCnt = 0; @@ -1866,11 +1869,11 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pInfo->blockRecoverContiCnt++; calBlockTbName(pInfo, pInfo->pRecoverRes); if (!pInfo->igCheckUpdate && pInfo->pUpdateInfo) { - if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__SCAN1) { + if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1) { TSKEY maxTs = pAPI->stateStore.updateInfoFillBlockData(pInfo->pUpdateInfo, pInfo->pRecoverRes, pInfo->primaryTsIndex); pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs); } else { - pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pTaskInfo->streamInfo.fillHistoryVer2); + pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pStreamInfo->fillHistoryVer.maxVer); doCheckUpdate(pInfo, pInfo->pRecoverRes->info.window.ekey, pInfo->pRecoverRes); } } @@ -1884,7 +1887,7 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { printDataBlock(pInfo->pRecoverRes, "scan recover"); return pInfo->pRecoverRes; } - pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__NONE; + pStreamInfo->recoverStep = STREAM_RECOVER_STEP__NONE; STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; pAPI->tsdReader.tsdReaderClose(pTSInfo->base.dataReader); @@ -1893,7 +1896,7 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pTSInfo->base.cond.startVersion = -1; pTSInfo->base.cond.endVersion = -1; - pTaskInfo->streamInfo.recoverScanFinished = true; + pStreamInfo->recoverScanFinished = true; return NULL; } @@ -1912,7 +1915,7 @@ FETCH_NEXT_BLOCK: SPackedData* pPacked = taosArrayGet(pInfo->pBlockLists, current); SSDataBlock* pBlock = pPacked->pDataBlock; if (pBlock->info.parTbName[0]) { - pAPI->stateStore.streamStatePutParName(pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, pBlock->info.parTbName); + pAPI->stateStore.streamStatePutParName(pStreamInfo->pState, pBlock->info.id.groupId, pBlock->info.parTbName); } // TODO move into scan @@ -2092,11 +2095,39 @@ FETCH_NEXT_BLOCK: doCheckUpdate(pInfo, pBlockInfo->window.ekey, pBlock); doFilter(pBlock, pOperator->exprSupp.pFilterInfo, NULL); + + { // do additional time window filter + STimeWindow* pWindow = &pStreamInfo->fillHistoryWindow; + + if (pWindow->skey != INT64_MIN) { + qDebug("%s filter for additional history window, skey:%"PRId64, id, pWindow->skey); + + bool* p = taosMemoryCalloc(pBlock->info.rows, sizeof(bool)); + bool hasUnqualified = false; + + SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, pInfo->primaryTsIndex); + for(int32_t i = 0; i < pBlock->info.rows; ++i) { + int64_t* ts = (int64_t*) colDataGetData(pCol, i); + p[i] = (*ts >= pWindow->skey); + + if (!p[i]) { + hasUnqualified = true; + } + } + + if (hasUnqualified) { + trimDataBlock(pBlock, pBlock->info.rows, p); + } + + taosMemoryFree(p); + } + } + pBlock->info.dataLoad = 1; blockDataUpdateTsWindow(pBlock, pInfo->primaryTsIndex); - qDebug("%" PRId64 " rows in datablock, update res:%" PRId64 " %s", pBlockInfo->rows, - pInfo->pUpdateDataRes->info.rows, id); + qDebug("%s %" PRId64 " rows in datablock, update res:%" PRId64, id, pBlockInfo->rows, + pInfo->pUpdateDataRes->info.rows); if (pBlockInfo->rows > 0 || pInfo->pUpdateDataRes->info.rows > 0) { break; } @@ -2294,6 +2325,57 @@ static void destroyStreamScanOperatorInfo(void* param) { taosMemoryFree(pStreamScan); } +void streamScanReleaseState(SOperatorInfo* pOperator) { + SStreamScanInfo* pInfo = pOperator->info; + if (!pInfo->pState) { + return; + } + if (!pInfo->pUpdateInfo) { + return; + } + int32_t len = pInfo->stateStore.updateInfoSerialize(NULL, 0, pInfo->pUpdateInfo); + void* pBuff = taosMemoryCalloc(1, len); + pInfo->stateStore.updateInfoSerialize(pBuff, len, pInfo->pUpdateInfo); + pInfo->stateStore.streamStateSaveInfo(pInfo->pState, STREAM_SCAN_OP_STATE_NAME, strlen(STREAM_SCAN_OP_STATE_NAME), pBuff, len); + taosMemoryFree(pBuff); +} + +void streamScanReloadState(SOperatorInfo* pOperator) { + SStreamScanInfo* pInfo = pOperator->info; + if (!pInfo->pState) { + return; + } + void* pBuff = NULL; + int32_t len = 0; + pInfo->stateStore.streamStateGetInfo(pInfo->pState, STREAM_SCAN_OP_STATE_NAME, strlen(STREAM_SCAN_OP_STATE_NAME), &pBuff, &len); + SUpdateInfo* pUpInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo)); + int32_t code = pInfo->stateStore.updateInfoDeserialize(pBuff, len, pUpInfo); + taosMemoryFree(pBuff); + if (code == TSDB_CODE_SUCCESS && pInfo->pUpdateInfo) { + if (pInfo->pUpdateInfo->minTS < 0) { + pInfo->stateStore.updateInfoDestroy(pInfo->pUpdateInfo); + pInfo->pUpdateInfo = pUpInfo; + } else { + pInfo->pUpdateInfo->minTS = TMAX(pInfo->pUpdateInfo->minTS, pUpInfo->minTS); + pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pUpInfo->maxDataVersion); + SHashObj* curMap = pInfo->pUpdateInfo->pMap; + void *pIte = taosHashIterate(curMap, NULL); + while (pIte != NULL) { + size_t keySize = 0; + int64_t* pUid = taosHashGetKey(pIte, &keySize); + taosHashPut(pUpInfo->pMap, pUid, sizeof(int64_t), pIte, sizeof(TSKEY)); + pIte = taosHashIterate(curMap, pIte); + } + taosHashCleanup(curMap); + pInfo->pUpdateInfo->pMap = pUpInfo->pMap; + pUpInfo->pMap = NULL; + pInfo->stateStore.updateInfoDestroy(pUpInfo); + } + } else { + pInfo->stateStore.updateInfoDestroy(pUpInfo); + } +} + SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhysiNode* pTableScanNode, SNode* pTagCond, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo) { SArray* pColIds = NULL; @@ -2475,6 +2557,7 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys __optr_fn_t nextFn = (pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM) ? doStreamScan : doQueueScan; pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, nextFn, NULL, destroyStreamScanOperatorInfo, optrDefaultBufFn, NULL); + setOperatorStreamStateFn(pOperator, streamScanReleaseState, streamScanReloadState); return pOperator; diff --git a/source/libs/executor/src/sysscanoperator.c b/source/libs/executor/src/sysscanoperator.c index 70fe42595e63f1135df3dbf6cab7a27163f0a6c5..a1f83dda2f051f7ea09b4048b0fa5a2634c49d22 100644 --- a/source/libs/executor/src/sysscanoperator.c +++ b/source/libs/executor/src/sysscanoperator.c @@ -966,20 +966,20 @@ static int32_t sysTableUserColsFillOneTableCols(const SSysTableScanInfo* pInfo, // table name pColInfoData = taosArrayGet(dataBlock->pDataBlock, 0); - colDataAppend(pColInfoData, numOfRows, tName, false); + colDataSetVal(pColInfoData, numOfRows, tName, false); // database name pColInfoData = taosArrayGet(dataBlock->pDataBlock, 1); - colDataAppend(pColInfoData, numOfRows, dbname, false); + colDataSetVal(pColInfoData, numOfRows, dbname, false); pColInfoData = taosArrayGet(dataBlock->pDataBlock, 2); - colDataAppend(pColInfoData, numOfRows, tableType, false); + colDataSetVal(pColInfoData, numOfRows, tableType, false); // col name char colName[TSDB_COL_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; STR_TO_VARSTR(colName, schemaRow->pSchema[i].name); pColInfoData = taosArrayGet(dataBlock->pDataBlock, 3); - colDataAppend(pColInfoData, numOfRows, colName, false); + colDataSetVal(pColInfoData, numOfRows, colName, false); // col type int8_t colType = schemaRow->pSchema[i].type; @@ -994,10 +994,10 @@ static int32_t sysTableUserColsFillOneTableCols(const SSysTableScanInfo* pInfo, (int32_t)((schemaRow->pSchema[i].bytes - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)); } varDataSetLen(colTypeStr, colTypeLen); - colDataAppend(pColInfoData, numOfRows, (char*)colTypeStr, false); + colDataSetVal(pColInfoData, numOfRows, (char*)colTypeStr, false); pColInfoData = taosArrayGet(dataBlock->pDataBlock, 5); - colDataAppend(pColInfoData, numOfRows, (const char*)&schemaRow->pSchema[i].bytes, false); + colDataSetVal(pColInfoData, numOfRows, (const char*)&schemaRow->pSchema[i].bytes, false); for (int32_t j = 6; j <= 8; ++j) { pColInfoData = taosArrayGet(dataBlock->pDataBlock, j); diff --git a/source/libs/executor/src/timesliceoperator.c b/source/libs/executor/src/timesliceoperator.c index 022440b2ad238fcca62180c4b03174a4dec4eb75..7c009c942a76af58f25ebba4f95a8af4b8c9164d 100644 --- a/source/libs/executor/src/timesliceoperator.c +++ b/source/libs/executor/src/timesliceoperator.c @@ -272,7 +272,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp continue; } else if (isIsfilledPseudoColumn(pExprInfo)) { bool isFilled = true; - colDataAppend(pDst, pResBlock->info.rows, (char*)&isFilled, false); + colDataSetVal(pDst, pResBlock->info.rows, (char*)&isFilled, false); continue; } else if (!isInterpFunc(pExprInfo)) { if (isGroupKeyFunc(pExprInfo)) { diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 78d1e97554eb65fd7b2d26afef8d454ca05ec9fc..c4111ded9259ce9238862b53c874433422ef7f33 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -28,6 +28,9 @@ #define IS_FINAL_OP(op) ((op)->isFinal) #define DEAULT_DELETE_MARK (1000LL * 60LL * 60LL * 24LL * 365LL * 10LL); +#define STREAM_INTERVAL_OP_STATE_NAME "StreamIntervalHistoryState" +#define STREAM_SESSION_OP_STATE_NAME "StreamSessionHistoryState" +#define STREAM_STATE_OP_STATE_NAME "StreamStateHistoryState" typedef struct SStateWindowInfo { SResultWindowInfo winInfo; @@ -2726,6 +2729,38 @@ int32_t getMaxFunResSize(SExprSupp* pSup, int32_t numOfCols) { return size; } +void streamIntervalReleaseState(SOperatorInfo* pOperator) { + if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + int32_t resSize = sizeof(TSKEY); + pInfo->statestore.streamStateSaveInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, strlen(STREAM_INTERVAL_OP_STATE_NAME), &pInfo->twAggSup.maxTs, resSize); + } + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + pAPI->stateStore.streamStateCommit(pInfo->pState); + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.releaseStreamStateFn) { + downstream->fpSet.releaseStreamStateFn(downstream); + } +} + +void streamIntervalReloadState(SOperatorInfo* pOperator) { + if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pInfo->statestore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, + strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size); + TSKEY ts = *(TSKEY*)pBuf; + taosMemoryFree(pBuf); + pInfo->statestore.streamStateReloadInfo(pInfo->pState, ts); + } + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.reloadStreamStateFn) { + downstream->fpSet.reloadStreamStateFn(downstream); + } +} + SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild) { SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; @@ -2823,7 +2858,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, pInfo->pUpdatedMap = NULL; int32_t funResSize= getMaxFunResSize(&pOperator->exprSupp, numOfCols); pInfo->pState->pFileState = pAPI->stateStore.streamFileStateInit(tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, - compareTs, pInfo->pState, pInfo->twAggSup.deleteMark); + compareTs, pInfo->pState, pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo)); pInfo->dataVersion = 0; pInfo->statestore = pTaskInfo->storageAPI.stateStore; pInfo->recvGetAll = false; @@ -2835,6 +2870,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, pOperator->fpSet = createOperatorFpSet(NULL, doStreamFinalIntervalAgg, NULL, destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL); + setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState); if (pPhyNode->type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { initIntervalDownStream(downstream, pPhyNode->type, pInfo); } @@ -2873,12 +2909,14 @@ void destroyStreamSessionAggOperatorInfo(void* param) { } taosArrayDestroy(pInfo->pChildren); } + colDataDestroy(&pInfo->twAggSup.timeWindowData); blockDataDestroy(pInfo->pDelRes); blockDataDestroy(pInfo->pWinBlock); blockDataDestroy(pInfo->pUpdateRes); tSimpleHashCleanup(pInfo->pStDeleted); + taosArrayDestroy(pInfo->historyWins); taosMemoryFreeClear(param); } @@ -2928,7 +2966,7 @@ void initDownStream(SOperatorInfo* downstream, SStreamAggSupporter* pAggSup, uin } int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, int32_t numOfOutput, int64_t gap, - SStreamState* pState, int32_t keySize, int16_t keyType, SStateStore* pStore) { + SStreamState* pState, int32_t keySize, int16_t keyType, SStateStore* pStore, SReadHandle* pHandle, SStorageAPI* pApi) { pSup->resultRowSize = keySize + getResultRowSize(pCtx, numOfOutput); pSup->pScanBlock = createSpecialDataBlock(STREAM_CLEAR); pSup->gap = gap; @@ -2970,6 +3008,16 @@ int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, pCtx[i].saveHandle.pBuf = pSup->pResultBuf; } + if (pHandle) { + pSup->winRange = pHandle->winRange; + // temporary + if (pSup->winRange.ekey <= 0) { + pSup->winRange.ekey = INT64_MAX; + } + } + + pSup->pSessionAPI = pApi; + return TSDB_CODE_SUCCESS; } @@ -2997,6 +3045,13 @@ void getCurSessionWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT bool isInvalidSessionWin(SResultWindowInfo* pWinInfo) { return pWinInfo->sessionWin.win.skey == 0; } +bool inWinRange(STimeWindow* range, STimeWindow* cur) { + if (cur->skey >= range->skey && cur->ekey <= range->ekey) { + return true; + } + return false; +} + void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, SResultWindowInfo* pCurWin) { pCurWin->sessionWin.groupId = groupId; @@ -3005,6 +3060,12 @@ void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT int32_t size = pAggSup->resultRowSize; int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, pAggSup->gap, &pCurWin->pOutputBuf, &size); + if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->sessionWin.win)) { + code = TSDB_CODE_FAILED; + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->pOutputBuf, &pAggSup->pSessionAPI->stateStore); + pCurWin->pOutputBuf = taosMemoryMalloc(size); + } + if (code == TSDB_CODE_SUCCESS) { pCurWin->isOutput = true; } else { @@ -3151,7 +3212,8 @@ static void compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pC while (1) { SResultWindowInfo winInfo = {0}; SStreamStateCur* pCur = getNextSessionWinInfo(pAggSup, pStUpdated, pCurWin, &winInfo); - if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap)) { + if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap) || + !inWinRange(&pAggSup->winRange, &winInfo.sessionWin.win)) { taosMemoryFree(winInfo.pOutputBuf); pAPI->stateStore.streamStateFreeCur(pCur); break; @@ -3375,8 +3437,12 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS SResultWindowInfo childWin = {0}; childWin.sessionWin = *pWinKey; int32_t code = getSessionWinBuf(pChAggSup, pCur, &childWin); - if (code == TSDB_CODE_SUCCESS && pWinKey->win.skey <= childWin.sessionWin.win.skey && - childWin.sessionWin.win.ekey <= pWinKey->win.ekey) { + + if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &childWin.sessionWin.win)) { + continue; + } + + if (code == TSDB_CODE_SUCCESS && inWinRange(&pWinKey->win, &childWin.sessionWin.win)) { if (num == 0) { setSessionOutputBuf(pAggSup, pWinKey->win.skey, pWinKey->win.ekey, pWinKey->groupId, &parentWin); code = initSessionOutputBuf(&parentWin, &pResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); @@ -3475,6 +3541,29 @@ void doBuildSessionResult(SOperatorInfo* pOperator, void* pState, SGroupResInfo* // clear the existed group id pBlock->info.id.groupId = 0; buildSessionResultDataBlock(pOperator, pState, pBlock, &pOperator->exprSupp, pGroupResInfo); + if (pBlock->info.rows == 0) { + cleanupGroupResInfo(pGroupResInfo); + } +} +void getMaxTsWins(const SArray* pAllWins, SArray* pMaxWins) { + int32_t size = taosArrayGetSize(pAllWins); + if (size == 0) { + return; + } + + SSessionKey* pSeKey = taosArrayGet(pAllWins, size - 1); + taosArrayPush(pMaxWins, pSeKey); + if (pSeKey->groupId == 0) { + return; + } + uint64_t preGpId = pSeKey->groupId; + for (int32_t i = size - 2; i >= 0; i--) { + pSeKey = taosArrayGet(pAllWins, i); + if (preGpId != pSeKey->groupId) { + taosArrayPush(pMaxWins, pSeKey); + preGpId = pSeKey->groupId; + } + } } static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { @@ -3554,7 +3643,7 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { // if chIndex + 1 - size > 0, add new child for (int32_t i = 0; i < chIndex + 1 - size; i++) { SOperatorInfo* pChildOp = - createStreamFinalSessionAggOperatorInfo(NULL, pInfo->pPhyNode, pOperator->pTaskInfo, 0); + createStreamFinalSessionAggOperatorInfo(NULL, pInfo->pPhyNode, pOperator->pTaskInfo, 0, NULL); if (!pChildOp) { T_LONG_JMP(pOperator->pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); } @@ -3576,6 +3665,9 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); tSimpleHashCleanup(pInfo->pStUpdated); pInfo->pStUpdated = NULL; + if(pInfo->isHistoryOp) { + getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); + } initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); pInfo->pUpdated = NULL; blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); @@ -3602,8 +3694,51 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { return NULL; } +void streamSessionReleaseState(SOperatorInfo* pOperator) { + if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); + pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_STATE_NAME, strlen(STREAM_SESSION_OP_STATE_NAME), pInfo->historyWins->pData, resSize); + } + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.releaseStreamStateFn) { + downstream->fpSet.releaseStreamStateFn(downstream); + } +} + +void resetWinRange(STimeWindow* winRange) { + winRange->skey = INT16_MIN; + winRange->skey = INT16_MAX; +} + +void streamSessionReloadState(SOperatorInfo* pOperator) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + resetWinRange(&pAggSup->winRange); + + SResultWindowInfo winInfo = {0}; + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, + strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); + int32_t num = size / sizeof(SSessionKey); + SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; + ASSERT(size == num * sizeof(SSessionKey)); + for (int32_t i = 0; i < num; i++) { + SResultWindowInfo winInfo = {0}; + setSessionOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].win.ekey, pSeKeyBuf[i].groupId, &winInfo); + compactSessionWindow(pOperator, &winInfo, pInfo->pStUpdated, pInfo->pStDeleted); + } + taosMemoryFree(pBuf); + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.reloadStreamStateFn) { + downstream->fpSet.reloadStreamStateFn(downstream); + } +} + SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo) { + SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) { SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode; int32_t numOfCols = 0; int32_t code = TSDB_CODE_OUT_OF_MEMORY; @@ -3634,7 +3769,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh } code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, pSessionNode->gap, - pTaskInfo->streamInfo.pState, 0, 0, &pTaskInfo->storageAPI.stateStore); + pTaskInfo->streamInfo.pState, 0, 0, &pTaskInfo->storageAPI.stateStore, pHandle, &pTaskInfo->storageAPI); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -3666,11 +3801,19 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh pInfo->pUpdated = NULL; pInfo->pStUpdated = NULL; pInfo->dataVersion = 0; + pInfo->historyWins = taosArrayInit(4, sizeof(SSessionKey)); + if (!pInfo->historyWins) { + goto _error; + } + if (pHandle) { + pInfo->isHistoryOp = pHandle->fillHistory; + } setOperatorInfo(pOperator, "StreamSessionWindowAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION, true, OP_NOT_OPENED, pInfo, pTaskInfo); pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionAgg, NULL, destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL); + setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionReloadState); if (downstream) { initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup); @@ -3778,7 +3921,6 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); tSimpleHashCleanup(pInfo->pStUpdated); pInfo->pStUpdated = NULL; - initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); pInfo->pUpdated = NULL; blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity); @@ -3809,9 +3951,9 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { } SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo, int32_t numOfChild) { + SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle) { int32_t code = TSDB_CODE_OUT_OF_MEMORY; - SOperatorInfo* pOperator = createStreamSessionAggOperatorInfo(downstream, pPhyNode, pTaskInfo); + SOperatorInfo* pOperator = createStreamSessionAggOperatorInfo(downstream, pPhyNode, pTaskInfo, pHandle); if (pOperator == NULL) { goto _error; } @@ -3828,14 +3970,14 @@ SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL, destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL); } - + setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionReloadState); setOperatorInfo(pOperator, name, pPhyNode->type, false, OP_NOT_OPENED, pInfo, pTaskInfo); pOperator->operatorType = pPhyNode->type; if (numOfChild > 0) { pInfo->pChildren = taosArrayInit(numOfChild, sizeof(void*)); for (int32_t i = 0; i < numOfChild; i++) { - SOperatorInfo* pChildOp = createStreamFinalSessionAggOperatorInfo(NULL, pPhyNode, pTaskInfo, 0); + SOperatorInfo* pChildOp = createStreamFinalSessionAggOperatorInfo(NULL, pPhyNode, pTaskInfo, 0, NULL); if (pChildOp == NULL) { goto _error; } @@ -3876,6 +4018,7 @@ void destroyStreamStateOperatorInfo(void* param) { } colDataDestroy(&pInfo->twAggSup.timeWindowData); blockDataDestroy(pInfo->pDelRes); + taosArrayDestroy(pInfo->historyWins); tSimpleHashCleanup(pInfo->pSeDeleted); taosMemoryFreeClear(param); } @@ -3892,6 +4035,9 @@ bool isEqualStateKey(SStateWindowInfo* pWin, char* pKeyData) { } bool compareStateKey(void* data, void* key) { + if (!data || !key) { + return true; + } SStateKeys* stateKey = (SStateKeys*)key; stateKey->pData = (char*)key + sizeof(SStateKeys); return compareVal(data, stateKey); @@ -3913,9 +4059,15 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys); pCurWin->pStateKey->isNull = false; + if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->winInfo.sessionWin.win)) { + code = TSDB_CODE_FAILED; + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->winInfo.pOutputBuf, &pAggSup->pSessionAPI->stateStore); + pCurWin->winInfo.pOutputBuf = taosMemoryMalloc(size); + } + if (code == TSDB_CODE_SUCCESS) { pCurWin->winInfo.isOutput = true; - } else { + } else if (pKeyData) { if (IS_VAR_DATA_TYPE(pAggSup->stateKeyType)) { varDataCopy(pCurWin->pStateKey->pData, pKeyData); } else { @@ -4113,6 +4265,10 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { tSimpleHashCleanup(pInfo->pSeUpdated); pInfo->pSeUpdated = NULL; + if(pInfo->isHistoryOp) { + getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); + } + initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); pInfo->pUpdated = NULL; blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); @@ -4138,8 +4294,73 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { return NULL; } +void streamStateReleaseState(SOperatorInfo* pOperator) { + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); + pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_STATE_NAME, strlen(STREAM_STATE_OP_STATE_NAME), pInfo->historyWins->pData, resSize); + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.releaseStreamStateFn) { + downstream->fpSet.releaseStreamStateFn(downstream); + } +} + +static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SResultWindowInfo* pNextWin, + SSHashObj* pStUpdated, SSHashObj* pStDeleted) { + SExprSupp* pSup = &pOperator->exprSupp; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + SResultRow* pCurResult = NULL; + int32_t numOfOutput = pOperator->exprSupp.numOfExprs; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + initSessionOutputBuf(pCurWin, &pCurResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); + SResultRow* pWinResult = NULL; + initSessionOutputBuf(pNextWin, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset); + + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, true); + compactFunctions(pSup->pCtx, pAggSup->pDummyCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); + tSimpleHashRemove(pStUpdated, &pNextWin->sessionWin, sizeof(SSessionKey)); + if (pNextWin->isOutput && pStDeleted) { + saveDeleteRes(pStDeleted, pNextWin->sessionWin); + } + removeSessionResult(pStUpdated, pAggSup->pResultRows, pNextWin->sessionWin); + doDeleteSessionWindow(pAggSup, &pNextWin->sessionWin); + taosMemoryFree(pNextWin->pOutputBuf); + saveSessionOutputBuf(pAggSup, pCurWin); +} + +void streamStateReloadState(SOperatorInfo* pOperator) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + resetWinRange(&pAggSup->winRange); + + SSessionKey seKey = {.win.skey = INT64_MIN, .win.ekey = INT64_MIN, .groupId = 0}; + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_STATE_OP_STATE_NAME, + strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size); + int32_t num = size / sizeof(SSessionKey); + SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; + ASSERT(size == num * sizeof(SSessionKey)); + for (int32_t i = 0; i < num; i++) { + SStateWindowInfo curInfo = {0}; + SStateWindowInfo nextInfo = {0}; + setStateOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].groupId, NULL, &curInfo, &nextInfo); + if (compareStateKey(curInfo.pStateKey,nextInfo.pStateKey)) { + compactStateWindow(pOperator, &curInfo.winInfo, &nextInfo.winInfo, pInfo->pStUpdated, pInfo->pStDeleted); + } + } + taosMemoryFree(pBuf); + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.reloadStreamStateFn) { + downstream->fpSet.reloadStreamStateFn(downstream); + } +} + SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo) { + SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) { SStreamStateWinodwPhysiNode* pStateNode = (SStreamStateWinodwPhysiNode*)pPhyNode; int32_t tsSlotId = ((SColumnNode*)pStateNode->window.pTspk)->slotId; SColumnNode* pColNode = (SColumnNode*)((STargetNode*)pStateNode->pStateKey)->pExpr; @@ -4183,7 +4404,7 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys int32_t keySize = sizeof(SStateKeys) + pColNode->node.resType.bytes; int16_t type = pColNode->node.resType.type; code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, 0, pTaskInfo->streamInfo.pState, keySize, - type, &pTaskInfo->storageAPI.stateStore); + type, &pTaskInfo->storageAPI.stateStore, pHandle, &pTaskInfo->storageAPI); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -4199,11 +4420,19 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys pInfo->pUpdated = NULL; pInfo->pSeUpdated = NULL; pInfo->dataVersion = 0; + pInfo->historyWins = taosArrayInit(4, sizeof(SSessionKey)); + if (!pInfo->historyWins) { + goto _error; + } + if (pHandle) { + pInfo->isHistoryOp = pHandle->fillHistory; + } setOperatorInfo(pOperator, "StreamStateAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE, true, OP_NOT_OPENED, pInfo, pTaskInfo); pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamStateAgg, NULL, destroyStreamStateOperatorInfo, optrDefaultBufFn, NULL); + setOperatorStreamStateFn(pOperator, streamStateReleaseState, streamStateReloadState); initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup); code = appendDownstream(pOperator, &downstream, 1); if (code != TSDB_CODE_SUCCESS) { @@ -5021,13 +5250,16 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys pInfo->pUpdated = NULL; pInfo->pUpdatedMap = NULL; int32_t funResSize= getMaxFunResSize(pSup, numOfCols); - pInfo->pState->pFileState = pTaskInfo->storageAPI.stateStore.streamFileStateInit(tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, - compareTs, pInfo->pState, pInfo->twAggSup.deleteMark); + + pInfo->pState->pFileState = pTaskInfo->storageAPI.stateStore.streamFileStateInit( + tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, compareTs, pInfo->pState, + pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo)); setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED, pInfo, pTaskInfo); pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL, destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL); + setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState); pInfo->statestore = pTaskInfo->storageAPI.stateStore; pInfo->recvGetAll = false; diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index daf06c81d13d0d85aa6d47f3147d822a0e915311..c0be5f99c1b8f193cc29c192888fff4572b48b7a 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1063,7 +1063,7 @@ static STupleHandle* tsortPQSortNextTuple(SSortHandle* pHandle) { if (!pData) { colDataSetNULL(bdGetColumnInfoData(pHandle->pDataBlock, i), 0); } else { - colDataAppend(bdGetColumnInfoData(pHandle->pDataBlock, i), 0, pData, false); + colDataSetVal(bdGetColumnInfoData(pHandle->pDataBlock, i), 0, pData, false); } } pHandle->pDataBlock->info.rows++; diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 1c5bd6d59c0efa105b8322dbf2d6adecd47dbf04..21b36d69ec24fbf7c5bd6df80d0aa7088e5b052e 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -1712,7 +1712,7 @@ int32_t percentileFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId); varDataSetLen(buf, len); - colDataAppend(pCol, pBlock->info.rows, buf, false); + colDataSetVal(pCol, pBlock->info.rows, buf, false); tMemBucketDestroy(pMemBucket); return pResInfo->numOfRes; diff --git a/source/libs/geometry/src/geomFunc.c b/source/libs/geometry/src/geomFunc.c index 0e2bac584db4ef8602394a57e0d1fbcfb21fa79a..3588bf8b7d932b9ffc2ce94a47971883380cea4a 100644 --- a/source/libs/geometry/src/geomFunc.c +++ b/source/libs/geometry/src/geomFunc.c @@ -145,7 +145,7 @@ int32_t executeMakePointFunc(SColumnInfoData *pInputData[], int32_t iLeft, int32 goto _exit; } - colDataAppend(pOutputData, TMAX(iLeft, iRight), output, (output == NULL)); + colDataSetVal(pOutputData, TMAX(iLeft, iRight), output, (output == NULL)); _exit: if (output) { @@ -165,7 +165,7 @@ int32_t executeGeomFromTextFunc(SColumnInfoData *pInputData, int32_t i, SColumnI goto _exit; } - colDataAppend(pOutputData, i, output, (output == NULL)); + colDataSetVal(pOutputData, i, output, (output == NULL)); _exit: if (output) { @@ -185,7 +185,7 @@ int32_t executeAsTextFunc(SColumnInfoData *pInputData, int32_t i, SColumnInfoDat goto _exit; } - colDataAppend(pOutputData, i, output, (output == NULL)); + colDataSetVal(pOutputData, i, output, (output == NULL)); _exit: if (output) { @@ -213,7 +213,7 @@ int32_t executeRelationFunc(const GEOSGeometry *geom1, const GEOSPreparedGeometr } } - colDataAppend(pOutputData, i, &res, (res==-1)); + colDataSetVal(pOutputData, i, &res, (res==-1)); return code; } diff --git a/source/libs/geometry/test/geomFuncTestUtil.cpp b/source/libs/geometry/test/geomFuncTestUtil.cpp index cb59ea098fdf20112382123196517b404f60ac51..0918781499e98ef32e9ffa2a9a1e717d9266c4eb 100644 --- a/source/libs/geometry/test/geomFuncTestUtil.cpp +++ b/source/libs/geometry/test/geomFuncTestUtil.cpp @@ -84,7 +84,7 @@ void setScalarParam(SScalarParam *sclParam, int32_t type, void *valueArray, TDRo } else { const char *val = (const char *)valueArray + (i * bytes); - colDataAppend(sclParam->columnData, i, val, false); + colDataSetVal(sclParam->columnData, i, val, false); } } } diff --git a/source/libs/planner/test/CMakeLists.txt b/source/libs/planner/test/CMakeLists.txt index b9d5c85717c71cdcb73cf660ac796541896c85e0..73aca8572a0ff5c0dd916ed65a168eedf2424f95 100644 --- a/source/libs/planner/test/CMakeLists.txt +++ b/source/libs/planner/test/CMakeLists.txt @@ -12,10 +12,17 @@ IF(NOT TD_DARWIN) "${SOURCE_LIST}/../../../parser/test/mockCatalogService.cpp" ) - TARGET_LINK_LIBRARIES( - plannerTest - PUBLIC os util common nodes planner parser catalog transport gtest function qcom - ) + IF (TD_GRANT) + TARGET_LINK_LIBRARIES( + plannerTest + PUBLIC os util common nodes planner parser catalog transport gtest function qcom grant + ) + ELSE () + TARGET_LINK_LIBRARIES( + plannerTest + PUBLIC os util common nodes planner parser catalog transport gtest function qcom + ) + ENDIF() TARGET_INCLUDE_DIRECTORIES( plannerTest diff --git a/source/libs/scheduler/test/CMakeLists.txt b/source/libs/scheduler/test/CMakeLists.txt index ce928862215d3efe7dc14c1c50e581416cb18777..703bd5932b2301d0006960f94a0131f792501c43 100644 --- a/source/libs/scheduler/test/CMakeLists.txt +++ b/source/libs/scheduler/test/CMakeLists.txt @@ -7,10 +7,18 @@ IF(NOT TD_DARWIN) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST) ADD_EXECUTABLE(schedulerTest ${SOURCE_LIST}) - TARGET_LINK_LIBRARIES( - schedulerTest - PUBLIC os util common catalog transport gtest qcom taos_static planner scheduler - ) + + IF (TD_GRANT) + TARGET_LINK_LIBRARIES( + schedulerTest + PUBLIC os util common catalog transport gtest qcom taos_static planner scheduler grant + ) + ELSE () + TARGET_LINK_LIBRARIES( + schedulerTest + PUBLIC os util common catalog transport gtest qcom taos_static planner scheduler + ) + ENDIF() TARGET_INCLUDE_DIRECTORIES( schedulerTest diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index da4e442f1a32d0cec52dcdfc749946bb1daf6a08..b6bc9c888bec1136f82e13d5f072c0acbed9d937 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -42,10 +42,11 @@ typedef struct { TdThreadMutex cfMutex; SHashObj* cfInst; int64_t defaultCfInit; -} SBackendHandle; +} SBackendWrapper; void* streamBackendInit(const char* path); void streamBackendCleanup(void* arg); +void streamBackendHandleCleanup(void* arg); SListNode* streamBackendAddCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg); diff --git a/source/libs/stream/inc/streamInc.h b/source/libs/stream/inc/streamInc.h index c7ee308b6149970fc7f039cb0bc857a8031d4f73..eec37d7dbb6cfaa4e7e3e9cd197c297256593657 100644 --- a/source/libs/stream/inc/streamInc.h +++ b/source/libs/stream/inc/streamInc.h @@ -31,8 +31,9 @@ typedef struct { void* timer; } SStreamGlobalEnv; -static SStreamGlobalEnv streamEnv; +extern SStreamGlobalEnv streamEnv; +void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration); int32_t streamDispatchStreamBlock(SStreamTask* pTask); SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); @@ -41,20 +42,20 @@ SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamT void destroyStreamDataBlock(SStreamDataBlock* pBlock); int32_t streamRetrieveReqToData(const SStreamRetrieveReq* pReq, SStreamDataBlock* pData); -int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* data); - int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock); int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); +int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData); int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); -int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId, - SEpSet* pEpSet); +int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId, + SEpSet* pEpSet); SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem); extern int32_t streamBackendId; +extern int32_t streamBackendCfWrapperId; #ifdef __cplusplus } diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 93bcd6a4d910bf3140169a3b217911e48a532cdd..691d31e64cd8061f5a4a04507864501078ebc4f5 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -16,11 +16,13 @@ #include "streamInc.h" #include "ttimer.h" -#define STREAM_TASK_INPUT_QUEUEU_CAPACITY 20480 -#define STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE (50) +#define STREAM_TASK_INPUT_QUEUE_CAPACITY 20480 +#define STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE (30) #define ONE_MB_F (1048576.0) #define QUEUE_MEM_SIZE_IN_MB(_q) (taosQueueMemorySize(_q) / ONE_MB_F) +SStreamGlobalEnv streamEnv; + int32_t streamInit() { int8_t old; while (1) { @@ -36,6 +38,7 @@ int32_t streamInit() { } atomic_store_8(&streamEnv.inited, 1); } + return 0; } @@ -52,17 +55,30 @@ void streamCleanUp() { } } +char* createStreamTaskIdStr(int64_t streamId, int32_t taskId) { + char buf[128] = {0}; + sprintf(buf, "0x%" PRIx64 "-0x%x", streamId, taskId); + return taosStrdup(buf); +} + void streamSchedByTimer(void* param, void* tmrId) { SStreamTask* pTask = (void*)param; + int8_t status = atomic_load_8(&pTask->triggerStatus); + qDebug("s-task:%s in scheduler timer, trigger status:%d", pTask->id.idStr, status); + if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) { streamMetaReleaseTask(NULL, pTask); + qDebug("s-task:%s jump out of schedTimer", pTask->id.idStr); return; } - if (atomic_load_8(&pTask->triggerStatus) == TASK_TRIGGER_STATUS__ACTIVE) { + if (status == TASK_TRIGGER_STATUS__ACTIVE) { SStreamTrigger* trigger = taosAllocateQitem(sizeof(SStreamTrigger), DEF_QITEM, 0); - if (trigger == NULL) return; + if (trigger == NULL) { + return; + } + trigger->type = STREAM_INPUT__GET_RES; trigger->pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); if (trigger->pBlock == NULL) { @@ -74,23 +90,28 @@ void streamSchedByTimer(void* param, void* tmrId) { atomic_store_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE); if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)trigger) < 0) { - taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->timer); + taosFreeQitem(trigger); + taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->schedTimer); return; } streamSchedExec(pTask); } - taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->timer); + taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->schedTimer); } -int32_t streamSetupTrigger(SStreamTask* pTask) { +int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { if (pTask->triggerParam != 0) { int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1); - ASSERT(ref == 2); - pTask->timer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer); + ASSERT(ref == 2 && pTask->schedTimer == NULL); + + qDebug("s-task:%s setup scheduler trigger, delay:%"PRId64" ms", pTask->id.idStr, pTask->triggerParam); + + pTask->schedTimer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer); pTask->triggerStatus = TASK_TRIGGER_STATUS__INACTIVE; } + return 0; } @@ -103,16 +124,20 @@ int32_t streamSchedExec(SStreamTask* pTask) { if (pRunReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + qError("failed to create msg to aunch s-task:%s, reason out of memory", pTask->id.idStr); return -1; } - pRunReq->head.vgId = pTask->nodeId; + pRunReq->head.vgId = pTask->info.nodeId; pRunReq->streamId = pTask->id.streamId; pRunReq->taskId = pTask->id.taskId; + qDebug("trigger to run s-task:%s", pTask->id.idStr); + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &msg); - qDebug("trigger to run s-task:%s", pTask->id.idStr); + } else { + qDebug("s-task:%s not launch task since sched status:%d", pTask->id.idStr, pTask->status.schedStatus); } return 0; @@ -142,7 +167,7 @@ int32_t streamTaskEnqueueBlocks(SStreamTask* pTask, const SStreamDispatchReq* pR pDispatchRsp->streamId = htobe64(pReq->streamId); pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId); pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId); - pDispatchRsp->downstreamNodeId = htonl(pTask->nodeId); + pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId); pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId); pRsp->pCont = buf; @@ -158,21 +183,18 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, // enqueue if (pData != NULL) { - qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x, reqId %" PRId64, pTask->id.idStr, pTask->selfChildId, - pReq->srcTaskId, pReq->reqId); + qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x(vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, pTask->info.selfChildId, + pReq->srcTaskId, pReq->srcNodeId, pReq->reqId); pData->type = STREAM_INPUT__DATA_RETRIEVE; pData->srcVgId = 0; - // decode - /*pData->blocks = pReq->data;*/ - /*pBlock->sourceVer = pReq->sourceVer;*/ streamRetrieveReqToData(pReq, pData); if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pData) == 0) { status = TASK_INPUT_STATUS__NORMAL; } else { status = TASK_INPUT_STATUS__FAILED; } - } else { + } else { // todo handle oom /*streamTaskInputFail(pTask);*/ /*status = TASK_INPUT_STATUS__FAILED;*/ } @@ -187,6 +209,7 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, pRsp->pCont = buf; pRsp->contLen = sizeof(SMsgHead) + sizeof(SStreamRetrieveRsp); tmsgSendRsp(pRsp); + return status == TASK_INPUT_STATUS__NORMAL ? 0 : -1; } @@ -231,9 +254,26 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S return 0; } +// todo record the idle time for dispatch data int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) { + if (code != TSDB_CODE_SUCCESS) { + // dispatch message failed: network error, or node not available. + // in case of the input queue is full, the code will be TSDB_CODE_SUCCESS, the and pRsp>inputStatus will be set + // flag. here we need to retry dispatch this message to downstream task immediately. handle the case the failure + // happened too fast. todo handle the shuffle dispatch failure + qError("s-task:%s failed to dispatch msg to task:0x%x, code:%s, retry cnt:%d", pTask->id.idStr, + pRsp->downstreamTaskId, tstrerror(code), ++pTask->msgInfo.retryCount); + int32_t ret = streamDispatchAllBlocks(pTask, pTask->msgInfo.pData); + if (ret != TSDB_CODE_SUCCESS) { + + } + + return TSDB_CODE_SUCCESS; + } + qDebug("s-task:%s receive dispatch rsp, output status:%d code:%d", pTask->id.idStr, pRsp->inputStatus, code); + // there are other dispatch message not response yet if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); qDebug("s-task:%s is shuffle, left waiting rsp %d", pTask->id.idStr, leftRsp); @@ -242,23 +282,39 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i } } - int8_t old = atomic_exchange_8(&pTask->outputStatus, pRsp->inputStatus); - ASSERT(old == TASK_OUTPUT_STATUS__WAIT); + pTask->msgInfo.retryCount = 0; + ASSERT(pTask->outputStatus == TASK_OUTPUT_STATUS__WAIT); + + qDebug("s-task:%s output status is set to:%d", pTask->id.idStr, pTask->outputStatus); - // the input queue of the (down stream) task that receive the output data is full, so the TASK_INPUT_STATUS_BLOCKED is rsp - // todo we need to send EMPTY PACKAGE to detect if the input queue is available for output of upstream task, every 50 ms. + // the input queue of the (down stream) task that receive the output data is full, + // so the TASK_INPUT_STATUS_BLOCKED is rsp + // todo blocking the output status if (pRsp->inputStatus == TASK_INPUT_STATUS__BLOCKED) { - // TODO: init recover timer - qError("s-task:%s inputQ of downstream task:0x%x is full, need to block output", pTask->id.idStr, pRsp->downstreamTaskId); + pTask->msgInfo.blockingTs = taosGetTimestampMs(); // record the blocking start time + + int32_t waitDuration = 300; // 300 ms + qError("s-task:%s inputQ of downstream task:0x%x is full, time:%" PRId64 "wait for %dms and retry dispatch data", + pTask->id.idStr, pRsp->downstreamTaskId, pTask->msgInfo.blockingTs, waitDuration); + streamRetryDispatchStreamBlock(pTask, waitDuration); + } else { // pipeline send data in output queue + // this message has been sent successfully, let's try next one. + destroyStreamDataBlock(pTask->msgInfo.pData); + pTask->msgInfo.pData = NULL; + + if (pTask->msgInfo.blockingTs != 0) { + int64_t el = taosGetTimestampMs() - pTask->msgInfo.blockingTs; + qDebug("s-task:%s resume to normal from inputQ blocking, idle time:%"PRId64"ms", pTask->id.idStr, el); + pTask->msgInfo.blockingTs = 0; + } + // now ready for next data output atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); - qError("s-task:%s ignore error, and reset task output status:%d", pTask->id.idStr, pTask->outputStatus); - return 0; + // otherwise, continue dispatch the first block to down stream task in pipeline + streamDispatchStreamBlock(pTask); } - // otherwise, continue dispatch the first block to down stream task in pipeline - streamDispatchStreamBlock(pTask); return 0; } @@ -267,25 +323,23 @@ int32_t streamProcessRunReq(SStreamTask* pTask) { return -1; } - /*if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {*/ + /*if (pTask->dispatchType == TASK_OUTPUT__FIXED_DISPATCH || pTask->dispatchType == TASK_OUTPUT__SHUFFLE_DISPATCH) {*/ /*streamDispatchStreamBlock(pTask);*/ /*}*/ return 0; } int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) { - qDebug("s-task:%s receive retrieve req from node %d taskId:0x%x", pTask->id.idStr, pReq->srcNodeId, pReq->srcTaskId); streamTaskEnqueueRetrieve(pTask, pReq, pRsp); - - ASSERT(pTask->taskLevel != TASK_LEVEL__SINK); + ASSERT(pTask->info.taskLevel != TASK_LEVEL__SINK); streamSchedExec(pTask); return 0; } bool tInputQueueIsFull(const SStreamTask* pTask) { - bool isFull = taosQueueItemSize((pTask->inputQueue->queue)) >= STREAM_TASK_INPUT_QUEUEU_CAPACITY; + bool isFull = taosQueueItemSize((pTask->inputQueue->queue)) >= STREAM_TASK_INPUT_QUEUE_CAPACITY; double size = QUEUE_MEM_SIZE_IN_MB(pTask->inputQueue->queue); - return (isFull || size >= STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE); + return (isFull || size >= STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE); } int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { @@ -295,9 +349,9 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { if (type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit* px = (SStreamDataSubmit*)pItem; - if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && tInputQueueIsFull(pTask)) { + if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && tInputQueueIsFull(pTask)) { qError("s-task:%s input queue is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data", - pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY, STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE, total, + pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); streamDataSubmitDestroy(px); taosFreeQitem(pItem); @@ -315,9 +369,9 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { px->submit.msgLen, px->submit.ver, total, size + px->submit.msgLen/1048576.0); } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__REF_DATA_BLOCK) { - if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && (tInputQueueIsFull(pTask))) { + if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && (tInputQueueIsFull(pTask))) { qError("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", - pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY, STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE, total, + pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); destroyStreamDataBlock((SStreamDataBlock*) pItem); return -1; @@ -346,19 +400,21 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { static void* streamQueueCurItem(SStreamQueue* queue) { return queue->qItem; } -void* streamQueueNextItem(SStreamQueue* queue) { - int8_t dequeueFlag = atomic_exchange_8(&queue->status, STREAM_QUEUE__PROCESSING); - if (dequeueFlag == STREAM_QUEUE__FAILED) { - ASSERT(queue->qItem != NULL); - return streamQueueCurItem(queue); +void* streamQueueNextItem(SStreamQueue* pQueue) { + int8_t flag = atomic_exchange_8(&pQueue->status, STREAM_QUEUE__PROCESSING); + + if (flag == STREAM_QUEUE__FAILED) { + ASSERT(pQueue->qItem != NULL); + return streamQueueCurItem(pQueue); } else { - queue->qItem = NULL; - taosGetQitem(queue->qall, &queue->qItem); - if (queue->qItem == NULL) { - taosReadAllQitems(queue->queue, queue->qall); - taosGetQitem(queue->qall, &queue->qItem); + pQueue->qItem = NULL; + taosGetQitem(pQueue->qall, &pQueue->qItem); + if (pQueue->qItem == NULL) { + taosReadAllQitems(pQueue->queue, pQueue->qall); + taosGetQitem(pQueue->qall, &pQueue->qItem); } - return streamQueueCurItem(queue); + + return streamQueueCurItem(pQueue); } } diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index cebe4e8204777926bf6aabf3352436595b84011d..4646af641f4ebacbea9c0a7a0452c9ac55ed6ac9 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -40,16 +40,8 @@ typedef struct { rocksdb_comparator_t** pCompares; } RocksdbCfInst; -uint32_t nextPow2(uint32_t x) { - x = x - 1; - x = x | (x >> 1); - x = x | (x >> 2); - x = x | (x >> 4); - x = x | (x >> 8); - x = x | (x >> 16); - return x + 1; -} -int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf); +uint32_t nextPow2(uint32_t x); +int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf); void destroyRocksdbCfInst(RocksdbCfInst* inst); @@ -71,7 +63,22 @@ typedef int (*BackendCmpFunc)(void* state, const char* aBuf, size_t aLen, const typedef void (*DestroyFunc)(void* state); typedef int32_t (*EncodeValueFunc)(void* value, int32_t vlen, int64_t ttl, char** dest); typedef int32_t (*DecodeValueFunc)(void* value, int32_t vlen, int64_t* ttl, char** dest); +typedef struct { + const char* key; + int32_t len; + int idx; + BackendCmpFunc cmpFunc; + EncodeFunc enFunc; + DecodeFunc deFunc; + ToStringFunc toStrFunc; + CompareName cmpName; + DestroyFunc detroyFunc; + EncodeValueFunc enValueFunc; + DecodeValueFunc deValueFunc; +} SCfInit; + +#define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX)); const char* compareDefaultName(void* name); const char* compareStateName(void* name); const char* compareWinKeyName(void* name); @@ -80,11 +87,67 @@ const char* compareFuncKeyName(void* name); const char* compareParKeyName(void* name); const char* comparePartagKeyName(void* name); +int defaultKeyComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen); +int defaultKeyEncode(void* k, char* buf); +int defaultKeyDecode(void* k, char* buf); +int defaultKeyToString(void* k, char* buf); + +int stateKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen); +int stateKeyEncode(void* k, char* buf); +int stateKeyDecode(void* k, char* buf); +int stateKeyToString(void* k, char* buf); + +int stateSessionKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen); +int stateSessionKeyEncode(void* ses, char* buf); +int stateSessionKeyDecode(void* ses, char* buf); +int stateSessionKeyToString(void* k, char* buf); + +int winKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen); +int winKeyEncode(void* k, char* buf); +int winKeyDecode(void* k, char* buf); +int winKeyToString(void* k, char* buf); + +int tupleKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen); +int tupleKeyEncode(void* k, char* buf); +int tupleKeyDecode(void* k, char* buf); +int tupleKeyToString(void* k, char* buf); + +int parKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen); +int parKeyEncode(void* k, char* buf); +int parKeyDecode(void* k, char* buf); +int parKeyToString(void* k, char* buf); + +int stremaValueEncode(void* k, char* buf); +int streamValueDecode(void* k, char* buf); +int32_t streamValueToString(void* k, char* buf); +int32_t streaValueIsStale(void* k, int64_t ts); +void destroyFunc(void* arg); + +int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest); +int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest); + +SCfInit ginitDict[] = { + {"default", 7, 0, defaultKeyComp, defaultKeyEncode, defaultKeyDecode, defaultKeyToString, compareDefaultName, + destroyFunc, encodeValueFunc, decodeValueFunc}, + {"state", 5, 1, stateKeyDBComp, stateKeyEncode, stateKeyDecode, stateKeyToString, compareStateName, destroyFunc, + encodeValueFunc, decodeValueFunc}, + {"fill", 4, 2, winKeyDBComp, winKeyEncode, winKeyDecode, winKeyToString, compareWinKeyName, destroyFunc, + encodeValueFunc, decodeValueFunc}, + {"sess", 4, 3, stateSessionKeyDBComp, stateSessionKeyEncode, stateSessionKeyDecode, stateSessionKeyToString, + compareSessionKeyName, destroyFunc, encodeValueFunc, decodeValueFunc}, + {"func", 4, 4, tupleKeyDBComp, tupleKeyEncode, tupleKeyDecode, tupleKeyToString, compareFuncKeyName, destroyFunc, + encodeValueFunc, decodeValueFunc}, + {"parname", 7, 5, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, compareParKeyName, destroyFunc, + encodeValueFunc, decodeValueFunc}, + {"partag", 6, 6, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, comparePartagKeyName, destroyFunc, + encodeValueFunc, decodeValueFunc}, +}; + void* streamBackendInit(const char* path) { uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; qDebug("start to init stream backend at %s", path); - SBackendHandle* pHandle = taosMemoryCalloc(1, sizeof(SBackendHandle)); + SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper)); pHandle->list = tdListNew(sizeof(SCfComparator)); taosThreadMutexInit(&pHandle->mutex, NULL); taosThreadMutexInit(&pHandle->cfMutex, NULL); @@ -154,8 +217,8 @@ _EXIT: return NULL; } void streamBackendCleanup(void* arg) { - SBackendHandle* pHandle = (SBackendHandle*)arg; - RocksdbCfInst** pIter = (RocksdbCfInst**)taosHashIterate(pHandle->cfInst, NULL); + SBackendWrapper* pHandle = (SBackendWrapper*)arg; + RocksdbCfInst** pIter = (RocksdbCfInst**)taosHashIterate(pHandle->cfInst, NULL); while (pIter != NULL) { RocksdbCfInst* inst = *pIter; destroyRocksdbCfInst(inst); @@ -194,17 +257,79 @@ void streamBackendCleanup(void* arg) { qDebug("destroy stream backend backend:%p", pHandle); return; } +void streamBackendHandleCleanup(void* arg) { + SBackendCfWrapper* wrapper = arg; + bool remove = wrapper->remove; + qDebug("start to do-close backendwrapper %p, %s", wrapper, wrapper->idstr); + if (wrapper->rocksdb == NULL) { + return; + } + + int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]); + + char* err = NULL; + if (remove) { + for (int i = 0; i < cfLen; i++) { + if (wrapper->pHandle[i] != NULL) + rocksdb_drop_column_family(wrapper->rocksdb, ((rocksdb_column_family_handle_t**)wrapper->pHandle)[i], &err); + if (err != NULL) { + // qError("failed to create cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); + taosMemoryFreeClear(err); + } + } + } else { + rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); + for (int i = 0; i < cfLen; i++) { + if (wrapper->pHandle[i] != NULL) rocksdb_flush_cf(wrapper->rocksdb, flushOpt, wrapper->pHandle[i], &err); + if (err != NULL) { + qError("failed to create cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err); + taosMemoryFreeClear(err); + } + } + rocksdb_flushoptions_destroy(flushOpt); + } + + for (int i = 0; i < cfLen; i++) { + if (wrapper->pHandle[i] != NULL) { + rocksdb_column_family_handle_destroy(wrapper->pHandle[i]); + } + } + taosMemoryFreeClear(wrapper->pHandle); + for (int i = 0; i < cfLen; i++) { + rocksdb_options_destroy(wrapper->cfOpts[i]); + rocksdb_block_based_options_destroy(((RocksdbCfParam*)wrapper->param)[i].tableOpt); + } + + if (remove) { + streamBackendDelCompare(wrapper->pBackend, wrapper->pComparNode); + } + rocksdb_writeoptions_destroy(wrapper->writeOpts); + wrapper->writeOpts = NULL; + + rocksdb_readoptions_destroy(wrapper->readOpts); + wrapper->readOpts = NULL; + taosMemoryFreeClear(wrapper->cfOpts); + taosMemoryFreeClear(wrapper->param); + + taosThreadRwlockDestroy(&wrapper->rwLock); + wrapper->rocksdb = NULL; + taosReleaseRef(streamBackendId, wrapper->backendId); + + qDebug("end to do-close backendwrapper %p, %s", wrapper, wrapper->idstr); + taosMemoryFree(wrapper); + return; +} SListNode* streamBackendAddCompare(void* backend, void* arg) { - SBackendHandle* pHandle = (SBackendHandle*)backend; - SListNode* node = NULL; + SBackendWrapper* pHandle = (SBackendWrapper*)backend; + SListNode* node = NULL; taosThreadMutexLock(&pHandle->mutex); node = tdListAdd(pHandle->list, arg); taosThreadMutexUnlock(&pHandle->mutex); return node; } void streamBackendDelCompare(void* backend, void* arg) { - SBackendHandle* pHandle = (SBackendHandle*)backend; - SListNode* node = NULL; + SBackendWrapper* pHandle = (SBackendWrapper*)backend; + SListNode* node = NULL; taosThreadMutexLock(&pHandle->mutex); node = tdListPopNode(pHandle->list, arg); taosThreadMutexUnlock(&pHandle->mutex); @@ -542,23 +667,6 @@ void destroyFunc(void* arg) { return; } -typedef struct { - const char* key; - int32_t len; - int idx; - BackendCmpFunc cmpFunc; - EncodeFunc enFunc; - DecodeFunc deFunc; - ToStringFunc toStrFunc; - CompareName cmpName; - DestroyFunc detroyFunc; - EncodeValueFunc enValueFunc; - DecodeValueFunc deValueFunc; - -} SCfInit; - -#define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX)); - int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest) { SStreamValue key = {.unixTimestamp = ttl, .len = vlen, .data = (char*)(value)}; int32_t len = 0; @@ -613,22 +721,6 @@ int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest) { } return key.len; } -SCfInit ginitDict[] = { - {"default", 7, 0, defaultKeyComp, defaultKeyEncode, defaultKeyDecode, defaultKeyToString, compareDefaultName, - destroyFunc, encodeValueFunc, decodeValueFunc}, - {"state", 5, 1, stateKeyDBComp, stateKeyEncode, stateKeyDecode, stateKeyToString, compareStateName, destroyFunc, - encodeValueFunc, decodeValueFunc}, - {"fill", 4, 2, winKeyDBComp, winKeyEncode, winKeyDecode, winKeyToString, compareWinKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, - {"sess", 4, 3, stateSessionKeyDBComp, stateSessionKeyEncode, stateSessionKeyDecode, stateSessionKeyToString, - compareSessionKeyName, destroyFunc, encodeValueFunc, decodeValueFunc}, - {"func", 4, 4, tupleKeyDBComp, tupleKeyEncode, tupleKeyDecode, tupleKeyToString, compareFuncKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, - {"parname", 7, 5, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, compareParKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, - {"partag", 6, 6, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, comparePartagKeyName, destroyFunc, - encodeValueFunc, decodeValueFunc}, -}; const char* compareDefaultName(void* arg) { (void)arg; @@ -697,11 +789,11 @@ void destroyRocksdbCfInst(RocksdbCfInst* inst) { } int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf) { - SBackendHandle* handle = backend; - char* err = NULL; - int64_t streamId; - int32_t taskId, dummy = 0; - char suffix[64] = {0}; + SBackendWrapper* handle = backend; + char* err = NULL; + int64_t streamId; + int32_t taskId, dummy = 0; + char suffix[64] = {0}; rocksdb_options_t** cfOpts = taosMemoryCalloc(nCf, sizeof(rocksdb_options_t*)); RocksdbCfParam* params = taosMemoryCalloc(nCf, sizeof(RocksdbCfParam*)); @@ -821,23 +913,30 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t int streamStateOpenBackend(void* backend, SStreamState* pState) { qInfo("start to open state %p on backend %p 0x%" PRIx64 "-%d", pState, backend, pState->streamId, pState->taskId); taosAcquireRef(streamBackendId, pState->streamBackendRid); - SBackendHandle* handle = backend; - - sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-%d", pState->streamId, pState->taskId); + SBackendWrapper* handle = backend; + SBackendCfWrapper* pBackendCfWrapper = taosMemoryCalloc(1, sizeof(SBackendCfWrapper)); taosThreadMutexLock(&handle->cfMutex); + RocksdbCfInst** ppInst = taosHashGet(handle->cfInst, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1); if (ppInst != NULL && *ppInst != NULL) { RocksdbCfInst* inst = *ppInst; - pState->pTdbState->rocksdb = inst->db; - pState->pTdbState->pHandle = (void**)inst->pHandle; - pState->pTdbState->writeOpts = inst->wOpt; - pState->pTdbState->readOpts = inst->rOpt; - pState->pTdbState->cfOpts = (void**)(inst->cfOpt); - pState->pTdbState->dbOpt = handle->dbOpt; - pState->pTdbState->param = inst->param; - pState->pTdbState->pBackend = handle; - pState->pTdbState->pComparNode = inst->pCompareNode; + pBackendCfWrapper->rocksdb = inst->db; + pBackendCfWrapper->pHandle = (void**)inst->pHandle; + pBackendCfWrapper->writeOpts = inst->wOpt; + pBackendCfWrapper->readOpts = inst->rOpt; + pBackendCfWrapper->cfOpts = (void**)(inst->cfOpt); + pBackendCfWrapper->dbOpt = handle->dbOpt; + pBackendCfWrapper->param = inst->param; + pBackendCfWrapper->pBackend = handle; + pBackendCfWrapper->pComparNode = inst->pCompareNode; taosThreadMutexUnlock(&handle->cfMutex); + pBackendCfWrapper->backendId = pState->streamBackendRid; + memcpy(pBackendCfWrapper->idstr, pState->pTdbState->idstr, sizeof(pState->pTdbState->idstr)); + + int64_t id = taosAddRef(streamBackendCfWrapperId, pBackendCfWrapper); + pState->pTdbState->backendCfWrapperId = id; + pState->pTdbState->pBackendCfWrapper = pBackendCfWrapper; + qInfo("succ to open state %p on backendWrapper, %p, %s", pState, pBackendCfWrapper, pBackendCfWrapper->idstr); return 0; } taosThreadMutexUnlock(&handle->cfMutex); @@ -870,27 +969,33 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { pCompare[i] = compare; } rocksdb_column_family_handle_t** cfHandle = taosMemoryCalloc(cfLen, sizeof(rocksdb_column_family_handle_t*)); - pState->pTdbState->rocksdb = handle->db; - pState->pTdbState->pHandle = (void**)cfHandle; - pState->pTdbState->writeOpts = rocksdb_writeoptions_create(); - pState->pTdbState->readOpts = rocksdb_readoptions_create(); - pState->pTdbState->cfOpts = (void**)cfOpt; - pState->pTdbState->dbOpt = handle->dbOpt; - pState->pTdbState->param = param; - pState->pTdbState->pBackend = handle; - - taosThreadRwlockInit(&pState->pTdbState->rwLock, NULL); + pBackendCfWrapper->rocksdb = handle->db; + pBackendCfWrapper->pHandle = (void**)cfHandle; + pBackendCfWrapper->writeOpts = rocksdb_writeoptions_create(); + pBackendCfWrapper->readOpts = rocksdb_readoptions_create(); + pBackendCfWrapper->cfOpts = (void**)cfOpt; + pBackendCfWrapper->dbOpt = handle->dbOpt; + pBackendCfWrapper->param = param; + pBackendCfWrapper->pBackend = handle; + pBackendCfWrapper->backendId = pState->streamBackendRid; + taosThreadRwlockInit(&pBackendCfWrapper->rwLock, NULL); SCfComparator compare = {.comp = pCompare, .numOfComp = cfLen}; - pState->pTdbState->pComparNode = streamBackendAddCompare(handle, &compare); - rocksdb_writeoptions_disable_WAL(pState->pTdbState->writeOpts, 1); - qInfo("succ to open state %p on backend, %p, 0x%" PRIx64 "-%d", pState, handle, pState->streamId, pState->taskId); + pBackendCfWrapper->pComparNode = streamBackendAddCompare(handle, &compare); + rocksdb_writeoptions_disable_WAL(pBackendCfWrapper->writeOpts, 1); + memcpy(pBackendCfWrapper->idstr, pState->pTdbState->idstr, sizeof(pState->pTdbState->idstr)); + + int64_t id = taosAddRef(streamBackendCfWrapperId, pBackendCfWrapper); + pState->pTdbState->backendCfWrapperId = id; + pState->pTdbState->pBackendCfWrapper = pBackendCfWrapper; + qInfo("succ to open state %p on backendWrapper %p %s", pState, pBackendCfWrapper, pBackendCfWrapper->idstr); return 0; } void streamStateCloseBackend(SStreamState* pState, bool remove) { - SBackendHandle* pHandle = pState->pTdbState->pBackend; + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + SBackendWrapper* pHandle = wrapper->pBackend; taosThreadMutexLock(&pHandle->cfMutex); - RocksdbCfInst** ppInst = taosHashGet(pHandle->cfInst, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1); + RocksdbCfInst** ppInst = taosHashGet(pHandle->cfInst, wrapper->idstr, strlen(pState->pTdbState->idstr) + 1); if (ppInst != NULL && *ppInst != NULL) { RocksdbCfInst* inst = *ppInst; taosMemoryFree(inst); @@ -899,63 +1004,10 @@ void streamStateCloseBackend(SStreamState* pState, bool remove) { taosThreadMutexUnlock(&pHandle->cfMutex); char* status[] = {"close", "drop"}; - qInfo("start to close %s state %p on backend %p 0x%" PRIx64 "-%d", status[remove == false ? 0 : 1], pState, pHandle, - pState->streamId, pState->taskId); - if (pState->pTdbState->rocksdb == NULL) { - return; - } - - int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]); - - char* err = NULL; - if (remove) { - for (int i = 0; i < cfLen; i++) { - if (pState->pTdbState->pHandle[i] != NULL) - rocksdb_drop_column_family(pState->pTdbState->rocksdb, - ((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[i], &err); - if (err != NULL) { - qError("failed to create cf:%s_%s, reason:%s", pState->pTdbState->idstr, ginitDict[i].key, err); - taosMemoryFreeClear(err); - } - } - } else { - rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); - for (int i = 0; i < cfLen; i++) { - if (pState->pTdbState->pHandle[i] != NULL) - rocksdb_flush_cf(pState->pTdbState->rocksdb, flushOpt, pState->pTdbState->pHandle[i], &err); - if (err != NULL) { - qError("failed to create cf:%s_%s, reason:%s", pState->pTdbState->idstr, ginitDict[i].key, err); - taosMemoryFreeClear(err); - } - } - rocksdb_flushoptions_destroy(flushOpt); - } - - for (int i = 0; i < cfLen; i++) { - if (pState->pTdbState->pHandle[i] != NULL) { - rocksdb_column_family_handle_destroy(pState->pTdbState->pHandle[i]); - } - } - taosMemoryFreeClear(pState->pTdbState->pHandle); - for (int i = 0; i < cfLen; i++) { - rocksdb_options_destroy(pState->pTdbState->cfOpts[i]); - rocksdb_block_based_options_destroy(((RocksdbCfParam*)pState->pTdbState->param)[i].tableOpt); - } - - if (remove) { - streamBackendDelCompare(pState->pTdbState->pBackend, pState->pTdbState->pComparNode); - } - rocksdb_writeoptions_destroy(pState->pTdbState->writeOpts); - pState->pTdbState->writeOpts = NULL; - - rocksdb_readoptions_destroy(pState->pTdbState->readOpts); - pState->pTdbState->readOpts = NULL; - taosMemoryFreeClear(pState->pTdbState->cfOpts); - taosMemoryFreeClear(pState->pTdbState->param); - - taosThreadRwlockDestroy(&pState->pTdbState->rwLock); - pState->pTdbState->rocksdb = NULL; - taosReleaseRef(streamBackendId, pState->streamBackendRid); + qInfo("start to close %s state %p on backendWrapper %p %s", status[remove == false ? 0 : 1], pState, wrapper, + wrapper->idstr); + wrapper->remove |= remove; // update by other pState + taosReleaseRef(streamBackendCfWrapperId, pState->pTdbState->backendCfWrapperId); } void streamStateDestroyCompar(void* arg) { SCfComparator* comp = (SCfComparator*)arg; @@ -975,26 +1027,26 @@ int streamStateGetCfIdx(SStreamState* pState, const char* funcName) { } } if (pState != NULL && idx != -1) { + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; rocksdb_column_family_handle_t* cf = NULL; - taosThreadRwlockRdlock(&pState->pTdbState->rwLock); - cf = pState->pTdbState->pHandle[idx]; - taosThreadRwlockUnlock(&pState->pTdbState->rwLock); + taosThreadRwlockRdlock(&wrapper->rwLock); + cf = wrapper->pHandle[idx]; + taosThreadRwlockUnlock(&wrapper->rwLock); if (cf == NULL) { char buf[128] = {0}; - GEN_COLUMN_FAMILY_NAME(buf, pState->pTdbState->idstr, ginitDict[idx].key); + GEN_COLUMN_FAMILY_NAME(buf, wrapper->idstr, ginitDict[idx].key); char* err = NULL; - taosThreadRwlockWrlock(&pState->pTdbState->rwLock); - cf = rocksdb_create_column_family(pState->pTdbState->rocksdb, pState->pTdbState->cfOpts[idx], buf, &err); + taosThreadRwlockWrlock(&wrapper->rwLock); + cf = rocksdb_create_column_family(wrapper->rocksdb, wrapper->cfOpts[idx], buf, &err); if (err != NULL) { idx = -1; - qError("failed to to open cf, %p 0x%" PRIx64 "-%d_%s, reason:%s", pState, pState->streamId, pState->taskId, - funcName, err); + qError("failed to to open cf, %p %s_%s, reason:%s", pState, wrapper->idstr, funcName, err); taosMemoryFree(err); } else { - pState->pTdbState->pHandle[idx] = cf; + wrapper->pHandle[idx] = cf; } - taosThreadRwlockUnlock(&pState->pTdbState->rwLock); + taosThreadRwlockUnlock(&wrapper->rwLock); } } @@ -1014,8 +1066,9 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa rocksdb_readoptions_t** readOpt) { int idx = streamStateGetCfIdx(pState, cfName); + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; if (snapshot != NULL) { - *snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(pState->pTdbState->rocksdb); + *snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(wrapper->rocksdb); } rocksdb_readoptions_t* rOpt = rocksdb_readoptions_create(); *readOpt = rOpt; @@ -1023,8 +1076,7 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa rocksdb_readoptions_set_snapshot(rOpt, *snapshot); rocksdb_readoptions_set_fill_cache(rOpt, 0); - return rocksdb_create_iterator_cf(pState->pTdbState->rocksdb, rOpt, - ((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[idx]); + return rocksdb_create_iterator_cf(wrapper->rocksdb, rOpt, ((rocksdb_column_family_handle_t**)wrapper->pHandle)[idx]); } #define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \ @@ -1038,15 +1090,15 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa code = -1; \ break; \ } \ - char toString[128] = {0}; \ + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \ + char toString[128] = {0}; \ if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ - rocksdb_column_family_handle_t* pHandle = \ - ((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[ginitDict[i].idx]; \ - rocksdb_t* db = pState->pTdbState->rocksdb; \ - rocksdb_writeoptions_t* opts = pState->pTdbState->writeOpts; \ - char* ttlV = NULL; \ - int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \ + rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \ + rocksdb_t* db = wrapper->rocksdb; \ + rocksdb_writeoptions_t* opts = wrapper->writeOpts; \ + char* ttlV = NULL; \ + int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \ rocksdb_put_cf(db, opts, pHandle, (const char*)buf, klen, (const char*)ttlV, (size_t)ttlVLen, &err); \ if (err != NULL) { \ taosMemoryFree(err); \ @@ -1058,81 +1110,76 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa taosMemoryFree(ttlV); \ } while (0); -#define STREAM_STATE_GET_ROCKSDB(pState, funcname, key, pVal, vLen) \ - do { \ - code = 0; \ - char buf[128] = {0}; \ - char* err = NULL; \ - int i = streamStateGetCfIdx(pState, funcname); \ - if (i < 0) { \ - qWarn("streamState failed to get cf name: %s", funcname); \ - code = -1; \ - break; \ - } \ - char toString[128] = {0}; \ - if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ - int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ - rocksdb_column_family_handle_t* pHandle = \ - ((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[ginitDict[i].idx]; \ - rocksdb_t* db = pState->pTdbState->rocksdb; \ - rocksdb_readoptions_t* opts = pState->pTdbState->readOpts; \ - size_t len = 0; \ - char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \ - if (val == NULL || len == 0) { \ - if (err == NULL) { \ - qTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, pState->pTdbState->idstr, \ - funcname); \ - } else { \ - qError("streamState str: %s failed to read from %s_%s, err: %s", toString, pState->pTdbState->idstr, funcname, \ - err); \ - taosMemoryFreeClear(err); \ - } \ - code = -1; \ - } else { \ - char* p = NULL; \ - int32_t tlen = ginitDict[i].deValueFunc(val, len, NULL, (char**)pVal); \ - if (tlen <= 0) { \ - qError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, pState->pTdbState->idstr, \ - funcname); \ - code = -1; \ - } else { \ - qTrace("streamState str: %s succ to read from %s_%s, valLen:%d", toString, pState->pTdbState->idstr, funcname, \ - tlen); \ - } \ - taosMemoryFree(val); \ - if (vLen != NULL) *vLen = tlen; \ - } \ - if (code == 0) \ - qDebug("streamState str: %s succ to read from %s_%s", toString, pState->pTdbState->idstr, funcname); \ +#define STREAM_STATE_GET_ROCKSDB(pState, funcname, key, pVal, vLen) \ + do { \ + code = 0; \ + char buf[128] = {0}; \ + char* err = NULL; \ + int i = streamStateGetCfIdx(pState, funcname); \ + if (i < 0) { \ + qWarn("streamState failed to get cf name: %s", funcname); \ + code = -1; \ + break; \ + } \ + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \ + char toString[128] = {0}; \ + if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ + int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ + rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \ + rocksdb_t* db = wrapper->rocksdb; \ + rocksdb_readoptions_t* opts = wrapper->readOpts; \ + size_t len = 0; \ + char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \ + if (val == NULL || len == 0) { \ + if (err == NULL) { \ + qTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, wrapper->idstr, funcname); \ + } else { \ + qError("streamState str: %s failed to read from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ + taosMemoryFreeClear(err); \ + } \ + code = -1; \ + } else { \ + char* p = NULL; \ + int32_t tlen = ginitDict[i].deValueFunc(val, len, NULL, (char**)pVal); \ + if (tlen <= 0) { \ + qError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, wrapper->idstr, \ + funcname); \ + code = -1; \ + } else { \ + qTrace("streamState str: %s succ to read from %s_%s, valLen:%d", toString, wrapper->idstr, funcname, tlen); \ + } \ + taosMemoryFree(val); \ + if (vLen != NULL) *vLen = tlen; \ + } \ + if (code == 0) qDebug("streamState str: %s succ to read from %s_%s", toString, wrapper->idstr, funcname); \ } while (0); -#define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \ - do { \ - code = 0; \ - char buf[128] = {0}; \ - char* err = NULL; \ - int i = streamStateGetCfIdx(pState, funcname); \ - if (i < 0) { \ - qWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \ - code = -1; \ - break; \ - } \ - char toString[128] = {0}; \ - if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ - int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ - rocksdb_column_family_handle_t* pHandle = \ - ((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[ginitDict[i].idx]; \ - rocksdb_t* db = pState->pTdbState->rocksdb; \ - rocksdb_writeoptions_t* opts = pState->pTdbState->writeOpts; \ - rocksdb_delete_cf(db, opts, pHandle, (const char*)buf, klen, &err); \ - if (err != NULL) { \ - qError("streamState str: %s failed to del from %s_%s, err: %s", toString, pState->pTdbState->idstr, funcname, \ - err); \ - taosMemoryFree(err); \ - code = -1; \ - } else { \ - qTrace("streamState str: %s succ to del from %s_%s", toString, pState->pTdbState->idstr, funcname); \ - } \ +#define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \ + do { \ + code = 0; \ + char buf[128] = {0}; \ + char* err = NULL; \ + int i = streamStateGetCfIdx(pState, funcname); \ + if (i < 0) { \ + qWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \ + code = -1; \ + break; \ + } \ + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \ + char toString[128] = {0}; \ + if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \ + int32_t klen = ginitDict[i].enFunc((void*)key, buf); \ + rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \ + rocksdb_t* db = wrapper->rocksdb; \ + rocksdb_writeoptions_t* opts = wrapper->writeOpts; \ + rocksdb_delete_cf(db, opts, pHandle, (const char*)buf, klen, &err); \ + if (err != NULL) { \ + qError("streamState str: %s failed to del from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \ + taosMemoryFree(err); \ + code = -1; \ + } else { \ + qTrace("streamState str: %s succ to del from %s_%s", toString, wrapper->idstr, funcname); \ + } \ } while (0); // state cf @@ -1158,18 +1205,19 @@ int32_t streamStateDel_rocksdb(SStreamState* pState, const SWinKey* key) { int32_t streamStateClear_rocksdb(SStreamState* pState) { qDebug("streamStateClear_rocksdb"); - char sKeyStr[128] = {0}; - char eKeyStr[128] = {0}; - SStateKey sKey = {.key = {.ts = 0, .groupId = 0}, .opNum = pState->number}; - SStateKey eKey = {.key = {.ts = INT64_MAX, .groupId = UINT64_MAX}, .opNum = pState->number}; + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + char sKeyStr[128] = {0}; + char eKeyStr[128] = {0}; + SStateKey sKey = {.key = {.ts = 0, .groupId = 0}, .opNum = pState->number}; + SStateKey eKey = {.key = {.ts = INT64_MAX, .groupId = UINT64_MAX}, .opNum = pState->number}; int sLen = stateKeyEncode(&sKey, sKeyStr); int eLen = stateKeyEncode(&eKey, eKeyStr); - if (pState->pTdbState->pHandle[1] != NULL) { + if (wrapper->pHandle[1] != NULL) { char* err = NULL; - rocksdb_delete_range_cf(pState->pTdbState->rocksdb, pState->pTdbState->writeOpts, pState->pTdbState->pHandle[1], - sKeyStr, sLen, eKeyStr, eLen, &err); + rocksdb_delete_range_cf(wrapper->rocksdb, wrapper->writeOpts, wrapper->pHandle[1], sKeyStr, sLen, eKeyStr, eLen, + &err); if (err != NULL) { char toStringStart[128] = {0}; char toStringEnd[128] = {0}; @@ -1179,7 +1227,7 @@ int32_t streamStateClear_rocksdb(SStreamState* pState) { qWarn("failed to delete range cf(state) start: %s, end:%s, reason:%s", toStringStart, toStringEnd, err); taosMemoryFree(err); } else { - rocksdb_compact_range_cf(pState->pTdbState->rocksdb, pState->pTdbState->pHandle[1], sKeyStr, sLen, eKeyStr, eLen); + rocksdb_compact_range_cf(wrapper->rocksdb, wrapper->pHandle[1], sKeyStr, sLen, eKeyStr, eLen); } } @@ -1273,8 +1321,9 @@ SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWin if (pCur == NULL) { return NULL; } + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; pCur->number = pState->number; - pCur->db = pState->pTdbState->rocksdb; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -1307,15 +1356,16 @@ SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWin SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateGetCur_rocksdb"); - int32_t code = 0; + int32_t code = 0; + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + const SStateKey maxStateKey = {.key = {.groupId = UINT64_MAX, .ts = INT64_MAX}, .opNum = INT64_MAX}; STREAM_STATE_PUT_ROCKSDB(pState, "state", &maxStateKey, "", 0); - char buf[128] = {0}; - int32_t klen = stateKeyEncode((void*)&maxStateKey, buf); - + char buf[128] = {0}; + int32_t klen = stateKeyEncode((void*)&maxStateKey, buf); SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) return NULL; - pCur->db = pState->pTdbState->rocksdb; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); rocksdb_iter_seek(pCur->iter, buf, (size_t)klen); @@ -1335,10 +1385,11 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinK SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateGetCur_rocksdb"); - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) return NULL; - pCur->db = pState->pTdbState->rocksdb; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -1426,12 +1477,14 @@ int32_t streamStateSessionDel_rocksdb(SStreamState* pState, const SSessionKey* k } SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pState, const SSessionKey* key) { qDebug("streamStateSessionSeekKeyCurrentPrev_rocksdb"); - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) { return NULL; } pCur->number = pState->number; - pCur->db = pState->pTdbState->rocksdb; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -1467,11 +1520,12 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pSta } SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pState, SSessionKey* key) { qDebug("streamStateSessionSeekKeyCurrentNext_rocksdb"); - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) { return NULL; } - pCur->db = pState->pTdbState->rocksdb; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -1504,11 +1558,12 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pSta SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, const SSessionKey* key) { qDebug("streamStateSessionSeekKeyNext_rocksdb"); - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) { return NULL; } - pCur->db = pState->pTdbState->rocksdb; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); pCur->number = pState->number; @@ -1597,11 +1652,12 @@ int32_t streamStateFillDel_rocksdb(SStreamState* pState, const SWinKey* key) { SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateFillGetCur_rocksdb"); - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; if (pCur == NULL) return NULL; - pCur->db = pState->pTdbState->rocksdb; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -1656,12 +1712,13 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateFillSeekKeyNext_rocksdb"); - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (!pCur) { return NULL; } - pCur->db = pState->pTdbState->rocksdb; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -1692,12 +1749,13 @@ SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const } SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateFillSeekKeyPrev_rocksdb"); - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) { return NULL; } - pCur->db = pState->pTdbState->rocksdb; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -1728,12 +1786,13 @@ SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const } int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) { qDebug("streamStateSessionGetKeyByRange_rocksdb"); - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) { return -1; } pCur->number = pState->number; - pCur->db = pState->pTdbState->rocksdb; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); @@ -1964,6 +2023,7 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co int code = 0; char* err = NULL; + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; rocksdb_snapshot_t* snapshot = NULL; rocksdb_readoptions_t* readopts = NULL; rocksdb_iterator_t* pIter = streamStateIterCreate(pState, "default", &snapshot, &readopts); @@ -1996,15 +2056,16 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co } rocksdb_iter_next(pIter); } - rocksdb_release_snapshot(pState->pTdbState->rocksdb, snapshot); + rocksdb_release_snapshot(wrapper->rocksdb, snapshot); rocksdb_readoptions_destroy(readopts); rocksdb_iter_destroy(pIter); return code; } void* streamDefaultIterCreate_rocksdb(SStreamState* pState) { - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - pCur->db = pState->pTdbState->rocksdb; + pCur->db = wrapper->rocksdb; pCur->iter = streamStateIterCreate(pState, "default", (rocksdb_snapshot_t**)&pCur->snapshot, (rocksdb_readoptions_t**)&pCur->readOpt); return pCur; @@ -2051,7 +2112,8 @@ void streamStateClearBatch(void* pBatch) { rocksdb_writebatch_clear((rocksdb_ void streamStateDestroyBatch(void* pBatch) { rocksdb_writebatch_destroy((rocksdb_writebatch_t*)pBatch); } int32_t streamStatePutBatch(SStreamState* pState, const char* cfName, rocksdb_writebatch_t* pBatch, void* key, void* val, int32_t vlen, int64_t ttl) { - int i = streamStateGetCfIdx(pState, cfName); + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + int i = streamStateGetCfIdx(pState, cfName); if (i < 0) { qError("streamState failed to put to cf name:%s", cfName); @@ -2062,7 +2124,7 @@ int32_t streamStatePutBatch(SStreamState* pState, const char* cfName, rocksdb_wr char* ttlV = NULL; int32_t ttlVLen = ginitDict[i].enValueFunc(val, vlen, ttl, &ttlV); - rocksdb_column_family_handle_t* pCf = pState->pTdbState->pHandle[ginitDict[i].idx]; + rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[i].idx]; rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); taosMemoryFree(ttlV); return 0; @@ -2074,7 +2136,9 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb char* ttlV = tmpBuf; int32_t ttlVLen = ginitDict[cfIdx].enValueFunc(val, vlen, ttl, &ttlV); - rocksdb_column_family_handle_t* pCf = pState->pTdbState->pHandle[ginitDict[cfIdx].idx]; + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + + rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[cfIdx].idx]; rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen); if (tmpBuf == NULL) { @@ -2083,8 +2147,9 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb return 0; } int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) { - char* err = NULL; - rocksdb_write(pState->pTdbState->rocksdb, pState->pTdbState->writeOpts, (rocksdb_writebatch_t*)pBatch, &err); + char* err = NULL; + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; + rocksdb_write(wrapper->rocksdb, wrapper->writeOpts, (rocksdb_writebatch_t*)pBatch, &err); if (err != NULL) { qError("streamState failed to write batch, err:%s", err); taosMemoryFree(err); @@ -2092,3 +2157,13 @@ int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) { } return 0; } + +uint32_t nextPow2(uint32_t x) { + x = x - 1; + x = x | (x >> 1); + x = x | (x >> 2); + x = x | (x >> 4); + x = x | (x >> 8); + x = x | (x >> 16); + return x + 1; +} \ No newline at end of file diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 670cfbead1e180061fe0f972290351125eb9852c..722c557b8f1e2b6b44bf851454f60ed6ca14ad23 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -123,7 +123,7 @@ int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pR static int32_t streamAlignCheckpoint(SStreamTask* pTask, int64_t checkpointId, int32_t childId) { if (pTask->checkpointingId == 0) { pTask->checkpointingId = checkpointId; - pTask->checkpointAlignCnt = taosArrayGetSize(pTask->childEpInfo); + pTask->checkpointAlignCnt = taosArrayGetSize(pTask->pUpstreamEpInfoList); } ASSERT(pTask->checkpointingId == checkpointId); @@ -165,7 +165,7 @@ int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStre int64_t checkpointId = pReq->checkpointId; int32_t childId = pReq->childId; - if (taosArrayGetSize(pTask->childEpInfo) > 0) { + if (taosArrayGetSize(pTask->pUpstreamEpInfoList) > 0) { code = streamAlignCheckpoint(pTask, checkpointId, childId); if (code > 0) { return 0; diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index 7c06e7deb30501368b3588cf0906841fd8afaf54..37923ca80720e314afe2b272629c1409c2341342 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -64,11 +64,11 @@ SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamT if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit* pSubmit = (SStreamDataSubmit*)pItem; - pStreamBlocks->childId = pTask->selfChildId; + pStreamBlocks->childId = pTask->info.selfChildId; pStreamBlocks->sourceVer = pSubmit->ver; } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { SStreamMergedSubmit* pMerged = (SStreamMergedSubmit*)pItem; - pStreamBlocks->childId = pTask->selfChildId; + pStreamBlocks->childId = pTask->info.selfChildId; pStreamBlocks->sourceVer = pMerged->ver; } @@ -164,26 +164,6 @@ int32_t streamMergeSubmit(SStreamMergedSubmit* pMerged, SStreamDataSubmit* pSubm return 0; } -static FORCE_INLINE void streamDataSubmitRefInc(SStreamDataSubmit* pDataSubmit) { - atomic_add_fetch_32(pDataSubmit->dataRef, 1); -} - -SStreamDataSubmit* streamSubmitBlockClone(SStreamDataSubmit* pSubmit) { - int32_t len = 0; - if (pSubmit->type == STREAM_INPUT__DATA_SUBMIT) { - len = pSubmit->submit.msgLen; - } - - SStreamDataSubmit* pSubmitClone = taosAllocateQitem(sizeof(SStreamDataSubmit), DEF_QITEM, len); - if (pSubmitClone == NULL) { - return NULL; - } - - streamDataSubmitRefInc(pSubmit); - memcpy(pSubmitClone, pSubmit, sizeof(SStreamDataSubmit)); - return pSubmitClone; -} - SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem) { if (dst->type == STREAM_INPUT__DATA_BLOCK && pElem->type == STREAM_INPUT__DATA_BLOCK) { SStreamDataBlock* pBlock = (SStreamDataBlock*)dst; diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 7af3219f85f16bc0e87af16e66b2ef576ae2951c..d93de7b1e5d3b0811463643b512c4c8cf9aa341d 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -13,16 +13,19 @@ * along with this program. If not, see . */ +#include "ttimer.h" #include "streamInc.h" -#define MAX_BLOCK_NAME_NUM 1024 +#define MAX_BLOCK_NAME_NUM 1024 +#define DISPATCH_RETRY_INTERVAL_MS 300 +#define MAX_CONTINUE_RETRY_COUNT 5 typedef struct SBlockName { uint32_t hashValue; char parTbName[TSDB_TABLE_NAME_LEN]; } SBlockName; -int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) { +static int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; @@ -44,6 +47,37 @@ int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* p return pEncoder->pos; } +static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) { + int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock); + void* buf = taosMemoryCalloc(1, dataStrLen); + if (buf == NULL) return -1; + + SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf; + pRetrieve->useconds = 0; + pRetrieve->precision = TSDB_DEFAULT_PRECISION; + pRetrieve->compressed = 0; + pRetrieve->completed = 1; + pRetrieve->streamBlockType = pBlock->info.type; + pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows); + pRetrieve->skey = htobe64(pBlock->info.window.skey); + pRetrieve->ekey = htobe64(pBlock->info.window.ekey); + pRetrieve->version = htobe64(pBlock->info.version); + pRetrieve->watermark = htobe64(pBlock->info.watermark); + memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN); + + int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock); + pRetrieve->numOfCols = htonl(numOfCols); + + int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols); + actualLen += sizeof(SRetrieveTableRsp); + ASSERT(actualLen <= dataStrLen); + taosArrayPush(pReq->dataLen, &actualLen); + taosArrayPush(pReq->data, &buf); + + pReq->totalLen += dataStrLen; + return 0; +} + int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; @@ -72,6 +106,27 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) { return 0; } +int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks, + int64_t dstTaskId) { + pReq->streamId = pTask->id.streamId; + pReq->dataSrcVgId = vgId; + pReq->upstreamTaskId = pTask->id.taskId; + pReq->upstreamChildId = pTask->info.selfChildId; + pReq->upstreamNodeId = pTask->info.nodeId; + pReq->blockNum = numOfBlocks; + pReq->taskId = dstTaskId; + + pReq->data = taosArrayInit(numOfBlocks, POINTER_BYTES); + pReq->dataLen = taosArrayInit(numOfBlocks, sizeof(int32_t)); + if (pReq->data == NULL || pReq->dataLen == NULL) { + taosArrayDestroyP(pReq->data, taosMemoryFree); + taosArrayDestroy(pReq->dataLen); + return TSDB_CODE_OUT_OF_MEMORY; + } + + return TSDB_CODE_SUCCESS; +} + void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq) { taosArrayDestroyP(pReq->data, taosMemoryFree); taosArrayDestroy(pReq->dataLen); @@ -132,17 +187,17 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) SStreamRetrieveReq req = { .streamId = pTask->id.streamId, - .srcNodeId = pTask->nodeId, + .srcNodeId = pTask->info.nodeId, .srcTaskId = pTask->id.taskId, .pRetrieve = pRetrieve, .retrieveLen = dataStrLen, }; - int32_t sz = taosArrayGetSize(pTask->childEpInfo); + int32_t sz = taosArrayGetSize(pTask->pUpstreamEpInfoList); ASSERT(sz > 0); for (int32_t i = 0; i < sz; i++) { req.reqId = tGenIdPI64(); - SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->childEpInfo, i); + SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); req.dstNodeId = pEpInfo->nodeId; req.dstTaskId = pEpInfo->taskId; int32_t len; @@ -171,8 +226,8 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) } buf = NULL; - qDebug("s-task:%s (child %d) send retrieve req to task %d at node %d, reqId:0x%" PRIx64, pTask->id.idStr, - pTask->selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId); + qDebug("s-task:%s (child %d) send retrieve req to task:0x%x (vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, + pTask->info.selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId); } code = 0; @@ -182,44 +237,13 @@ CLEAR: return code; } -static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) { - int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock); - void* buf = taosMemoryCalloc(1, dataStrLen); - if (buf == NULL) return -1; - - SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf; - pRetrieve->useconds = 0; - pRetrieve->precision = TSDB_DEFAULT_PRECISION; - pRetrieve->compressed = 0; - pRetrieve->completed = 1; - pRetrieve->streamBlockType = pBlock->info.type; - pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows); - pRetrieve->skey = htobe64(pBlock->info.window.skey); - pRetrieve->ekey = htobe64(pBlock->info.window.ekey); - pRetrieve->version = htobe64(pBlock->info.version); - pRetrieve->watermark = htobe64(pBlock->info.watermark); - memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN); - - int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock); - pRetrieve->numOfCols = htonl(numOfCols); - - int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols); - actualLen += sizeof(SRetrieveTableRsp); - ASSERT(actualLen <= dataStrLen); - taosArrayPush(pReq->dataLen, &actualLen); - taosArrayPush(pReq->data, &buf); - - pReq->totalLen += dataStrLen; - return 0; -} - int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet) { void* buf = NULL; int32_t code = -1; SRpcMsg msg = {0}; int32_t tlen; - tEncodeSize(tEncodeSStreamTaskCheckReq, pReq, tlen, code); + tEncodeSize(tEncodeStreamTaskCheckReq, pReq, tlen, code); if (code < 0) { return -1; } @@ -234,7 +258,7 @@ int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pR SEncoder encoder; tEncoderInit(&encoder, abuf, tlen); - if ((code = tEncodeSStreamTaskCheckReq(&encoder, pReq)) < 0) { + if ((code = tEncodeStreamTaskCheckReq(&encoder, pReq)) < 0) { rpcFreeCont(buf); return code; } @@ -245,21 +269,21 @@ int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pR msg.pCont = buf; msg.msgType = TDMT_STREAM_TASK_CHECK; - qDebug("s-task:%s dispatch check msg to downstream s-task:%" PRIx64 ":%d node %d: check msg", pTask->id.idStr, - pReq->streamId, pReq->downstreamTaskId, nodeId); + qDebug("s-task:%s (level:%d) dispatch check msg to s-task:%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr, + pTask->info.taskLevel, pReq->streamId, pReq->downstreamTaskId, nodeId); tmsgSendReq(pEpSet, &msg); return 0; } -int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId, - SEpSet* pEpSet) { +int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId, + SEpSet* pEpSet) { void* buf = NULL; int32_t code = -1; SRpcMsg msg = {0}; int32_t tlen; - tEncodeSize(tEncodeSStreamRecoverFinishReq, pReq, tlen, code); + tEncodeSize(tEncodeStreamRecoverFinishReq, pReq, tlen, code); if (code < 0) { return -1; } @@ -275,7 +299,7 @@ int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecov SEncoder encoder; tEncoderInit(&encoder, abuf, tlen); - if ((code = tEncodeSStreamRecoverFinishReq(&encoder, pReq)) < 0) { + if ((code = tEncodeStreamRecoverFinishReq(&encoder, pReq)) < 0) { if (buf) { rpcFreeCont(buf); } @@ -286,17 +310,18 @@ int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecov msg.contLen = tlen + sizeof(SMsgHead); msg.pCont = buf; - msg.msgType = TDMT_STREAM_RECOVER_FINISH; + msg.msgType = TDMT_STREAM_SCAN_HISTORY_FINISH; msg.info.noResp = 1; tmsgSendReq(pEpSet, &msg); - qDebug("s-task:%s dispatch recover finish msg to downstream taskId:0x%x node %d: recover finish msg", pTask->id.idStr, - pReq->taskId, vgId); + const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + qDebug("s-task:%s status:%s dispatch scan-history-data finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus, + pReq->taskId, vgId); return 0; } -int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) { +static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) { void* buf = NULL; int32_t code = -1; SRpcMsg msg = {0}; @@ -304,7 +329,10 @@ int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, in // serialize int32_t tlen; tEncodeSize(tEncodeStreamDispatchReq, pReq, tlen, code); - if (code < 0) goto FAIL; + if (code < 0) { + goto FAIL; + } + code = -1; buf = rpcMallocCont(sizeof(SMsgHead) + tlen); if (buf == NULL) { @@ -323,16 +351,16 @@ int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, in msg.contLen = tlen + sizeof(SMsgHead); msg.pCont = buf; - msg.msgType = pTask->dispatchMsgType; + msg.msgType = pTask->msgInfo.msgType; qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId); - tmsgSendReq(pEpSet, &msg); - - code = 0; - return 0; + return tmsgSendReq(pEpSet, &msg); FAIL: - if (buf) rpcFreeCont(buf); + if (buf) { + rpcFreeCont(buf); + } + return code; } @@ -365,8 +393,6 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName); } - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; - /*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/ SUseDbRsp* pDbInfo = &pTask->shuffleDispatcher.dbInfo; hashValue = @@ -386,13 +412,16 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S for (j = 0; j < vgSz; j++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j); ASSERT(pVgInfo->vgId > 0); + if (hashValue >= pVgInfo->hashBegin && hashValue <= pVgInfo->hashEnd) { if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) { return -1; } + if (pReqs[j].blockNum == 0) { atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); } + pReqs[j].blockNum++; found = true; break; @@ -404,25 +433,17 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData) { int32_t code = 0; + int32_t numOfBlocks = taosArrayGetSize(pData->blocks); ASSERT(numOfBlocks != 0); if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { - SStreamDispatchReq req = { - .streamId = pTask->id.streamId, - .dataSrcVgId = pData->srcVgId, - .upstreamTaskId = pTask->id.taskId, - .upstreamChildId = pTask->selfChildId, - .upstreamNodeId = pTask->nodeId, - .blockNum = numOfBlocks, - }; - - req.data = taosArrayInit(numOfBlocks, sizeof(void*)); - req.dataLen = taosArrayInit(numOfBlocks, sizeof(int32_t)); - if (req.data == NULL || req.dataLen == NULL) { - taosArrayDestroyP(req.data, taosMemoryFree); - taosArrayDestroy(req.dataLen); - return TSDB_CODE_OUT_OF_MEMORY; + SStreamDispatchReq req = {0}; + + int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId; + code = tInitStreamDispatchReq(&req, pTask, pData->srcVgId, numOfBlocks, downstreamTaskId); + if (code != TSDB_CODE_SUCCESS) { + return code; } for (int32_t i = 0; i < numOfBlocks; i++) { @@ -438,12 +459,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat int32_t vgId = pTask->fixedEpDispatcher.nodeId; SEpSet* pEpSet = &pTask->fixedEpDispatcher.epSet; - int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId; - - req.taskId = downstreamTaskId; - qDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to down stream s-task:0x%x in vgId:%d", pTask->id.idStr, - pTask->selfChildId, numOfBlocks, downstreamTaskId, vgId); + qDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d)", pTask->id.idStr, + pTask->info.selfChildId, numOfBlocks, downstreamTaskId, vgId); code = doSendDispatchMsg(pTask, &req, vgId, pEpSet); taosArrayDestroyP(req.data, taosMemoryFree); @@ -453,8 +471,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat int32_t rspCnt = atomic_load_32(&pTask->shuffleDispatcher.waitingRspCnt); ASSERT(rspCnt == 0); - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; - int32_t vgSz = taosArrayGetSize(vgInfo); + SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + int32_t vgSz = taosArrayGetSize(vgInfo); + SStreamDispatchReq* pReqs = taosMemoryCalloc(vgSz, sizeof(SStreamDispatchReq)); if (pReqs == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -462,20 +481,11 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat } for (int32_t i = 0; i < vgSz; i++) { - pReqs[i].streamId = pTask->id.streamId; - pReqs[i].dataSrcVgId = pData->srcVgId; - pReqs[i].upstreamTaskId = pTask->id.taskId; - pReqs[i].upstreamChildId = pTask->selfChildId; - pReqs[i].upstreamNodeId = pTask->nodeId; - pReqs[i].blockNum = 0; - pReqs[i].data = taosArrayInit(0, sizeof(void*)); - pReqs[i].dataLen = taosArrayInit(0, sizeof(int32_t)); - if (pReqs[i].data == NULL || pReqs[i].dataLen == NULL) { + SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); + code = tInitStreamDispatchReq(&pReqs[i], pTask, pData->srcVgId, 0, pVgInfo->taskId); + if (code != TSDB_CODE_SUCCESS) { goto FAIL_SHUFFLE_DISPATCH; } - - SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - pReqs[i].taskId = pVgInfo->taskId; } for (int32_t i = 0; i < numOfBlocks; i++) { @@ -483,15 +493,18 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat // TODO: do not use broadcast if (pDataBlock->info.type == STREAM_DELETE_RESULT) { + for (int32_t j = 0; j < vgSz; j++) { if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) { goto FAIL_SHUFFLE_DISPATCH; } + if (pReqs[j].blockNum == 0) { atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); } pReqs[j].blockNum++; } + continue; } @@ -500,16 +513,17 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat } } - qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to %d vgroups", pTask->id.idStr, pTask->selfChildId, + qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to %d vgroups", pTask->id.idStr, pTask->info.selfChildId, numOfBlocks, vgSz); for (int32_t i = 0; i < vgSz; i++) { if (pReqs[i].blockNum > 0) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, pTask->selfChildId, + qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, pTask->info.selfChildId, pReqs[i].blockNum, pVgInfo->vgId); - if (doSendDispatchMsg(pTask, &pReqs[i], pVgInfo->vgId, &pVgInfo->epSet) < 0) { + code = doSendDispatchMsg(pTask, &pReqs[i], pVgInfo->vgId, &pVgInfo->epSet); + if (code < 0) { goto FAIL_SHUFFLE_DISPATCH; } } @@ -522,46 +536,87 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat taosArrayDestroyP(pReqs[i].data, taosMemoryFree); taosArrayDestroy(pReqs[i].dataLen); } + taosMemoryFree(pReqs); } + return code; } +static void doRetryDispatchData(void* param, void* tmrId) { + SStreamTask* pTask = param; + ASSERT(pTask->outputStatus == TASK_OUTPUT_STATUS__WAIT); + + int32_t code = streamDispatchAllBlocks(pTask, pTask->msgInfo.pData); + if (code != TSDB_CODE_SUCCESS) { + qDebug("s-task:%s reset the waitRspCnt to be 0 before launch retry dispatch", pTask->id.idStr); + atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0); + streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); + } +} + +void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration) { + qError("s-task:%s dispatch data in %"PRId64"ms", pTask->id.idStr, waitDuration); + taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamEnv.timer, &pTask->launchTaskTimer); +} + int32_t streamDispatchStreamBlock(SStreamTask* pTask) { - ASSERT(pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH); + ASSERT((pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH)); + int32_t numOfElems = taosQueueItemSize(pTask->outputQueue->queue); if (numOfElems > 0) { qDebug("s-task:%s try to dispatch intermediate result block to downstream, elem in outputQ:%d", pTask->id.idStr, numOfElems); } + // to make sure only one dispatch is running int8_t old = atomic_val_compare_exchange_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); if (old != TASK_OUTPUT_STATUS__NORMAL) { - qDebug("s-task:%s task wait for dispatch rsp, not dispatch now, output status:%d", pTask->id.idStr, old); + qDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", pTask->id.idStr, old); return 0; } + ASSERT(pTask->msgInfo.pData == NULL); qDebug("s-task:%s start to dispatch msg, set output status:%d", pTask->id.idStr, pTask->outputStatus); - SStreamDataBlock* pDispatchedBlock = streamQueueNextItem(pTask->outputQueue); - if (pDispatchedBlock == NULL) { + SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputQueue); + if (pBlock == NULL) { atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); - qDebug("s-task:%s stop dispatching since no output in output queue, output status:%d", pTask->id.idStr, - pTask->outputStatus); + qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", pTask->id.idStr, pTask->outputStatus); return 0; } - ASSERT(pDispatchedBlock->type == STREAM_INPUT__DATA_BLOCK); + pTask->msgInfo.pData = pBlock; + ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK); - int32_t code = streamDispatchAllBlocks(pTask, pDispatchedBlock); - if (code != TSDB_CODE_SUCCESS) { - streamQueueProcessFail(pTask->outputQueue); - atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); - qDebug("s-task:%s failed to dispatch msg to downstream, output status:%d", pTask->id.idStr, pTask->outputStatus); + int32_t retryCount = 0; + + while (1) { + int32_t code = streamDispatchAllBlocks(pTask, pBlock); + if (code == TSDB_CODE_SUCCESS) { + break; + } + + qDebug("s-task:%s failed to dispatch msg to downstream, code:%s, output status:%d, retry cnt:%d", pTask->id.idStr, + tstrerror(terrno), pTask->outputStatus, retryCount); + + // todo deal with only partially success dispatch case + atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0); + if (terrno == TSDB_CODE_APP_IS_STOPPING) { // in case of this error, do not retry anymore + destroyStreamDataBlock(pTask->msgInfo.pData); + pTask->msgInfo.pData = NULL; + return code; + } + + if (++retryCount > MAX_CONTINUE_RETRY_COUNT) { // add to timer to retry + qDebug("s-task:%s failed to dispatch msg to downstream for %d times, code:%s, add timer to retry in %dms", pTask->id.idStr, + retryCount, tstrerror(terrno), DISPATCH_RETRY_INTERVAL_MS); + streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); + break; + } } - // this block can be freed only when it has been pushed to down stream. - destroyStreamDataBlock(pDispatchedBlock); - return code; + // this block can not be deleted until it has been sent to downstream task successfully. + return TSDB_CODE_SUCCESS; } diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 46290c306f5b641c21309b2dcc9c97554c93e723..6e1804b08ed638c1a8fdb5fcf005162e2188a066 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -108,10 +108,11 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i assignOneDataBlock(&block, taosArrayGet(pRetrieveBlock->blocks, 0)); block.info.type = STREAM_PULL_OVER; - block.info.childId = pTask->selfChildId; + block.info.childId = pTask->info.selfChildId; taosArrayPush(pRes, &block); numOfBlocks += 1; - qDebug("s-task:%s(child %d) processed retrieve, reqId:0x%" PRIx64, pTask->id.idStr, pTask->selfChildId, + + qDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64" dump results", pTask->id.idStr, pTask->info.selfChildId, pRetrieveBlock->reqId); } @@ -127,7 +128,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i SSDataBlock block = {0}; assignOneDataBlock(&block, output); - block.info.childId = pTask->selfChildId; + block.info.childId = pTask->info.selfChildId; size += blockDataGetSize(output) + sizeof(SSDataBlock) + sizeof(SColumnInfoData) * blockDataGetNumOfCols(&block); numOfBlocks += 1; @@ -135,7 +136,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i taosArrayPush(pRes, &block); qDebug("s-task:%s (child %d) executed and get %d result blocks, size:%.2fMiB", pTask->id.idStr, - pTask->selfChildId, numOfBlocks, size / 1048576.0); + pTask->info.selfChildId, numOfBlocks, size / 1048576.0); // current output should be dispatched to down stream nodes if (numOfBlocks >= MAX_STREAM_RESULT_DUMP_THRESHOLD) { @@ -164,7 +165,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { int32_t code = 0; - ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE); + ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); void* exec = pTask->exec.pExecutor; qSetStreamOpOpen(exec); @@ -179,7 +180,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { int32_t batchCnt = 0; while (1) { - if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) { + if (streamTaskShouldStop(&pTask->status)) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); return 0; } @@ -189,18 +190,37 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { if (qExecTask(exec, &output, &ts) < 0) { continue; } + if (output == NULL) { if (qStreamRecoverScanFinished(exec)) { finished = true; } else { qSetStreamOpOpen(exec); + if (streamTaskShouldPause(&pTask->status)) { + SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); + if (qRes == NULL) { + taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + qRes->type = STREAM_INPUT__DATA_BLOCK; + qRes->blocks = pRes; + code = streamTaskOutputResultBlock(pTask, qRes); + if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) { + taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); + taosFreeQitem(qRes); + return code; + } + return 0; + } } break; } SSDataBlock block = {0}; assignOneDataBlock(&block, output); - block.info.childId = pTask->selfChildId; + block.info.childId = pTask->info.selfChildId; taosArrayPush(pRes, &block); batchCnt++; @@ -237,11 +257,6 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { taosFreeQitem(qRes); return code; } -// -// if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { -// qDebug("s-task:%s scan exec dispatch blocks:%d", pTask->id.idStr, batchCnt); -// streamDispatchStreamBlock(pTask); -// } if (finished) { break; @@ -275,7 +290,7 @@ int32_t streamBatchExec(SStreamTask* pTask, int32_t batchLimit) { return -1; } - if (pTask->taskLevel == TASK_LEVEL__SINK) { + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { ASSERT(((SStreamQueueItem*)pItem)->type == STREAM_INPUT__DATA_BLOCK); streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pItem); } @@ -317,6 +332,75 @@ int32_t updateCheckPointInfo(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } +static void waitForTaskIdle(SStreamTask* pTask, SStreamTask* pStreamTask) { + // wait for the stream task to be idle + int64_t st = taosGetTimestampMs(); + + while (!streamTaskIsIdle(pStreamTask)) { + qDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", pTask->id.idStr, + pTask->info.taskLevel, pStreamTask->id.idStr); + taosMsleep(100); + } + + double el = (taosGetTimestampMs() - st) / 1000.0; + if (el > 0) { + qDebug("s-task:%s wait for stream task:%s for %.2fs to handle all data in inputQ", pTask->id.idStr, + pStreamTask->id.idStr, el); + } +} + +static int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { + SStreamTask* pStreamTask = streamMetaAcquireTask(pTask->pMeta, pTask->streamTaskId.taskId); + qDebug("s-task:%s scan history task end, update stream task:%s info, transfer exec state", pTask->id.idStr, pStreamTask->id.idStr); + + // todo handle stream task is dropped here + + ASSERT(pStreamTask != NULL && pStreamTask->historyTaskId.taskId == pTask->id.taskId); + STimeWindow* pTimeWindow = &pStreamTask->dataRange.window; + + // here we need to wait for the stream task handle all data in the input queue. + if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) { + ASSERT(pStreamTask->status.taskStatus == TASK_STATUS__HALT); + } else { + ASSERT(pStreamTask->status.taskStatus == TASK_STATUS__NORMAL); + pStreamTask->status.taskStatus = TASK_STATUS__HALT; + } + + // wait for the stream task to be idle + waitForTaskIdle(pTask, pStreamTask); + + if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) { + // update the scan data range for source task. + qDebug("s-task:%s level:%d stream task window %" PRId64 " - %" PRId64 " update to %" PRId64 " - %" PRId64 + ", status:%s, sched-status:%d", + pStreamTask->id.idStr, TASK_LEVEL__SOURCE, pTimeWindow->skey, pTimeWindow->ekey, INT64_MIN, + pTimeWindow->ekey, streamGetTaskStatusStr(TASK_STATUS__NORMAL), pStreamTask->status.schedStatus); + + // todo transfer state + } else { + // for sink tasks, they are continue to execute, no need to be halt. + // the process should be stopped for a while, during the term of transfer task state. + // OR wait for the inputQ && outputQ of agg tasks are all consumed, and then start the state transfer + qDebug("s-task:%s no need to update time window, for non-source task", pStreamTask->id.idStr); + + // todo transfer state + } + + // expand the query time window for stream scanner + pTimeWindow->skey = INT64_MIN; + qResetStreamInfoTimeWindow(pStreamTask->exec.pExecutor); + + // transfer the ownership of executor state + streamTaskReleaseState(pTask); + streamTaskReloadState(pStreamTask); + + streamSetStatusNormal(pStreamTask); + + streamSchedExec(pStreamTask); + streamMetaReleaseTask(pTask->pMeta, pStreamTask); + return TSDB_CODE_SUCCESS; +} + /** * todo: the batch of blocks should be tuned dynamic, according to the total elapsed time of each batch of blocks, the * appropriate batch of blocks should be handled in 5 to 10 sec. @@ -331,20 +415,23 @@ int32_t streamExecForAll(SStreamTask* pTask) { SStreamQueueItem* pInput = NULL; // merge multiple input data if possible in the input queue. - qDebug("s-task:%s start to extract data block from inputQ", id); + qDebug("s-task:%s start to extract data block from inputQ, status:%s", id, streamGetTaskStatusStr(pTask->status.taskStatus)); while (1) { - if (streamTaskShouldPause(&pTask->status)) { + // downstream task's input queue is blocked, stop immediately + if (streamTaskShouldPause(&pTask->status) || (pTask->outputStatus == TASK_OUTPUT_STATUS__BLOCKED) || + streamTaskShouldStop(&pTask->status)) { if (batchSize > 1) { break; } else { + qDebug("123 %s", pTask->id.idStr); return 0; } } SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); if (qItem == NULL) { - if (pTask->taskLevel == TASK_LEVEL__SOURCE && batchSize < MIN_STREAM_EXEC_BATCH_NUM && times < 5) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && batchSize < MIN_STREAM_EXEC_BATCH_NUM && times < 5) { times++; taosMsleep(10); qDebug("===stream===try again batchSize:%d", batchSize); @@ -358,7 +445,7 @@ int32_t streamExecForAll(SStreamTask* pTask) { if (pInput == NULL) { pInput = qItem; streamQueueProcessSuccess(pTask->inputQueue); - if (pTask->taskLevel == TASK_LEVEL__SINK) { + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { break; } } else { @@ -389,28 +476,20 @@ int32_t streamExecForAll(SStreamTask* pTask) { } if (pInput == NULL) { + if (pTask->info.fillHistory && pTask->status.transferState) { + int32_t code = streamTransferStateToStreamTask(pTask); + } + break; } - if (pTask->taskLevel == TASK_LEVEL__SINK) { + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { ASSERT(pInput->type == STREAM_INPUT__DATA_BLOCK); qDebug("s-task:%s sink task start to sink %d blocks", id, batchSize); streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput); continue; } - // wait for the task to be ready to go - while (pTask->taskLevel == TASK_LEVEL__SOURCE) { - int8_t status = atomic_load_8(&pTask->status.taskStatus); - if (status != TASK_STATUS__NORMAL && status != TASK_STATUS__PAUSE) { - qError("stream task wait for the end of fill history, s-task:%s, status:%d", id, - atomic_load_8(&pTask->status.taskStatus)); - taosMsleep(100); - } else { - break; - } - } - int64_t st = taosGetTimestampMs(); qDebug("s-task:%s start to process batch of blocks, num:%d", id, batchSize); @@ -423,7 +502,7 @@ int32_t streamExecForAll(SStreamTask* pTask) { const SStreamTrigger* pTrigger = (const SStreamTrigger*)pInput; qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); } else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { - ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE); + ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput; qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit, @@ -455,14 +534,34 @@ int32_t streamExecForAll(SStreamTask* pTask) { streamTaskExecImpl(pTask, pInput, &resSize, &totalBlocks); double el = (taosGetTimestampMs() - st) / 1000.0; - qDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", - id, el, resSize / 1048576.0, totalBlocks); + qDebug("s-task:%s batch of (%d)input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", + id, batchSize, el, resSize / 1048576.0, totalBlocks); + streamFreeQitem(pInput); } return 0; } +bool streamTaskIsIdle(const SStreamTask* pTask) { + int32_t numOfItems = taosQueueItemSize(pTask->inputQueue->queue); + if (numOfItems > 0) { + return false; + } + + numOfItems = taosQallItemSize(pTask->inputQueue->qall); + if (numOfItems > 0) { + return false; + } + + // blocked by downstream task + if (pTask->outputStatus == TASK_OUTPUT_STATUS__BLOCKED) { + return false; + } + + return (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE); +} + int32_t streamTryExec(SStreamTask* pTask) { // this function may be executed by multi-threads, so status check is required. int8_t schedStatus = @@ -477,7 +576,8 @@ int32_t streamTryExec(SStreamTask* pTask) { // todo the task should be commit here atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - qDebug("s-task:%s exec completed", pTask->id.idStr); + qDebug("s-task:%s exec completed, status:%s, sched-status:%d", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), + pTask->status.schedStatus); if (!taosQueueEmpty(pTask->inputQueue->queue) && (!streamTaskShouldStop(&pTask->status)) && (!streamTaskShouldPause(&pTask->status))) { @@ -487,3 +587,25 @@ int32_t streamTryExec(SStreamTask* pTask) { return 0; } + +int32_t streamTaskReleaseState(SStreamTask* pTask) { + qDebug("s-task:%s release exec state", pTask->id.idStr); + void* pExecutor = pTask->exec.pExecutor; + if (pExecutor != NULL) { + int32_t code = qStreamOperatorReleaseState(pExecutor); + return code; + } else { + return TSDB_CODE_SUCCESS; + } +} + +int32_t streamTaskReloadState(SStreamTask* pTask) { + qDebug("s-task:%s reload exec state", pTask->id.idStr); + void* pExecutor = pTask->exec.pExecutor; + if (pExecutor != NULL) { + int32_t code = qStreamOperatorReloadState(pExecutor); + return code; + } else { + return TSDB_CODE_SUCCESS; + } +} diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 5c31b1dd602595a264693aa965fdba9b7448ae13..8242f84312a073fd0d83a3e1774773bac8ec0ac6 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -21,10 +21,18 @@ static TdThreadOnce streamMetaModuleInit = PTHREAD_ONCE_INIT; int32_t streamBackendId = 0; -static void streamMetaEnvInit() { streamBackendId = taosOpenRef(20, streamBackendCleanup); } +int32_t streamBackendCfWrapperId = 0; + +static void streamMetaEnvInit() { + streamBackendId = taosOpenRef(64, streamBackendCleanup); + streamBackendCfWrapperId = taosOpenRef(64, streamBackendHandleCleanup); +} void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); } -void streamMetaCleanup() { taosCloseRef(streamBackendId); } +void streamMetaCleanup() { + taosCloseRef(streamBackendId); + taosCloseRef(streamBackendCfWrapperId); +} SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId) { int32_t code = -1; @@ -93,10 +101,14 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF goto _err; } pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); + pMeta->pTaskBackendUnique = + taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); taosMemoryFree(streamPath); taosInitRWLatch(&pMeta->lock); + taosThreadMutexInit(&pMeta->backendMutex, NULL); + return pMeta; _err: @@ -127,9 +139,14 @@ void streamMetaClose(SStreamMeta* pMeta) { } SStreamTask* pTask = *(SStreamTask**)pIter; - if (pTask->timer) { - taosTmrStop(pTask->timer); - pTask->timer = NULL; + if (pTask->schedTimer) { + taosTmrStop(pTask->schedTimer); + pTask->schedTimer = NULL; + } + + if (pTask->launchTaskTimer) { + taosTmrStop(pTask->launchTaskTimer); + pTask->launchTaskTimer = NULL; } tFreeStreamTask(pTask); @@ -139,6 +156,8 @@ void streamMetaClose(SStreamMeta* pMeta) { taosRemoveRef(streamBackendId, pMeta->streamBackendRid); pMeta->pTaskList = taosArrayDestroy(pMeta->pTaskList); taosMemoryFree(pMeta->path); + taosThreadMutexDestroy(&pMeta->backendMutex); + taosHashCleanup(pMeta->pTaskBackendUnique); taosMemoryFree(pMeta); } @@ -266,18 +285,53 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { } void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { - taosWLockLatch(&pMeta->lock); + SStreamTask* pTask = NULL; + // pre-delete operation + taosWLockLatch(&pMeta->lock); SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); if (ppTask) { - SStreamTask* pTask = *ppTask; + pTask = *ppTask; + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); + } else { + qDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId); + taosWUnLockLatch(&pMeta->lock); + return; + } + taosWUnLockLatch(&pMeta->lock); + + qDebug("s-task:0x%x set task status:%s", taskId, streamGetTaskStatusStr(TASK_STATUS__DROPPING)); + while(1) { + taosRLockLatch(&pMeta->lock); + ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); + + if (ppTask) { + if ((*ppTask)->status.timerActive == 0) { + taosRUnLockLatch(&pMeta->lock); + break; + } + + taosMsleep(10); + qDebug("s-task:%s wait for quit from timer", (*ppTask)->id.idStr); + taosRUnLockLatch(&pMeta->lock); + } else { + taosRUnLockLatch(&pMeta->lock); + break; + } + } + + // let's do delete of stream task + taosWLockLatch(&pMeta->lock); + ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); + if (ppTask) { taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t)); tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); - int32_t num = taosArrayGetSize(pMeta->pTaskList); + ASSERT(pTask->status.timerActive == 0); + int32_t num = taosArrayGetSize(pMeta->pTaskList); qDebug("s-task:%s set the drop task flag, remain running s-task:%d", pTask->id.idStr, num - 1); for (int32_t i = 0; i < num; ++i) { int32_t* pTaskId = taosArrayGet(pMeta->pTaskList, i); @@ -380,6 +434,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { taosMemoryFree(pTask); continue; } + if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId), &pTask, sizeof(void*)) < 0) { tdbFree(pKey); tdbFree(pVal); @@ -388,10 +443,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { return -1; } - if (pTask->fillHistory) { - ASSERT(pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM); - streamTaskCheckDownstream(pTask, ver); - } + ASSERT(pTask->status.downstreamReady == 0); } tdbFree(pKey); diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index eb2535782ea6810fd82440f814012279a54bd64a..9ded58597ff3233098dfc2e213adc246d636903c 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -14,93 +14,131 @@ */ #include "streamInc.h" +#include "ttimer.h" +#include "wal.h" -int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { - qDebug("s-task:%s at node %d launch recover", pTask->id.idStr, pTask->nodeId); +int32_t streamStartRecoverTask(SStreamTask* pTask, int8_t igUntreated) { + SStreamScanHistoryReq req; + streamBuildSourceRecover1Req(pTask, &req, igUntreated); + int32_t len = sizeof(SStreamScanHistoryReq); - if (pTask->taskLevel == TASK_LEVEL__SOURCE) { - atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__RECOVER_PREPARE); - qDebug("s-task:%s set task status:%d and start to recover", pTask->id.idStr, pTask->status.taskStatus); + void* serializedReq = rpcMallocCont(len); + if (serializedReq == NULL) { + return -1; + } - streamSetParamForRecover(pTask); - streamSourceRecoverPrepareStep1(pTask, version); + memcpy(serializedReq, &req, len); - SStreamRecoverStep1Req req; - streamBuildSourceRecover1Req(pTask, &req); - int32_t len = sizeof(SStreamRecoverStep1Req); + SRpcMsg rpcMsg = {.contLen = len, .pCont = serializedReq, .msgType = TDMT_VND_STREAM_SCAN_HISTORY}; + if (tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg) < 0) { + /*ASSERT(0);*/ + } - void* serializedReq = rpcMallocCont(len); - if (serializedReq == NULL) { - return -1; - } + return 0; +} - memcpy(serializedReq, &req, len); +const char* streamGetTaskStatusStr(int32_t status) { + switch(status) { + case TASK_STATUS__NORMAL: return "normal"; + case TASK_STATUS__SCAN_HISTORY: return "scan-history"; + case TASK_STATUS__HALT: return "halt"; + case TASK_STATUS__PAUSE: return "paused"; + default:return ""; + } +} - SRpcMsg rpcMsg = { .contLen = len, .pCont = serializedReq, .msgType = TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE }; - if (tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg) < 0) { - /*ASSERT(0);*/ - } +static int32_t doLaunchScanHistoryTask(SStreamTask* pTask) { + SVersionRange* pRange = &pTask->dataRange.range; - } else if (pTask->taskLevel == TASK_LEVEL__AGG) { - atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); - streamSetParamForRecover(pTask); + qDebug("s-task:%s vgId:%d status:%s, start scan-history-data task, verRange:%" PRId64 " - %" PRId64, pTask->id.idStr, + pTask->info.nodeId, streamGetTaskStatusStr(pTask->status.taskStatus), pRange->minVer, pRange->maxVer); + + streamSetParamForScanHistoryData(pTask); + streamSetParamForStreamScannerStep1(pTask, pRange, &pTask->dataRange.window); + + int32_t code = streamStartRecoverTask(pTask, 0); + return code; +} + +int32_t streamTaskLaunchScanHistory(SStreamTask* pTask) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { + return doLaunchScanHistoryTask(pTask); + } else { + ASSERT(pTask->status.taskStatus == TASK_STATUS__NORMAL); + qDebug("s-task:%s no need to scan-history-data, status:%s, sched-status:%d, ver:%" PRId64, pTask->id.idStr, + streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus, + walReaderGetCurrentVer(pTask->exec.pWalReader)); + } + } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { + streamSetStatusNormal(pTask); + streamSetParamForScanHistoryData(pTask); streamAggRecoverPrepare(pTask); - } else if (pTask->taskLevel == TASK_LEVEL__SINK) { - // sink nodes has no specified operation for fill history - atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); + } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + streamSetStatusNormal(pTask); + qDebug("s-task:%s sink task convert to normal immediately", pTask->id.idStr); } return 0; } -// checkstatus -int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { - qDebug("s-task:%s in fill history stage, ver:%"PRId64, pTask->id.idStr, version); +// check status +int32_t streamTaskCheckDownstreamTasks(SStreamTask* pTask) { + SHistDataRange* pRange = &pTask->dataRange; + STimeWindow* pWindow = &pRange->window; SStreamTaskCheckReq req = { .streamId = pTask->id.streamId, .upstreamTaskId = pTask->id.taskId, - .upstreamNodeId = pTask->nodeId, - .childId = pTask->selfChildId, + .upstreamNodeId = pTask->info.nodeId, + .childId = pTask->info.selfChildId, }; // serialize if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { - req.reqId = tGenIdPI64(); req.downstreamNodeId = pTask->fixedEpDispatcher.nodeId; req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; pTask->checkReqId = req.reqId; - qDebug("s-task:%s at node %d check downstream task:0x%x at node %d", pTask->id.idStr, pTask->nodeId, req.downstreamTaskId, - req.downstreamNodeId); + qDebug("s-task:%s check single downstream task:0x%x(vgId:%d) ver:%" PRId64 "-%" PRId64 " window:%" PRId64 + "-%" PRId64 ", req:0x%" PRIx64, + pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId, pRange->range.minVer, pRange->range.maxVer, + pWindow->skey, pWindow->ekey, req.reqId); + streamDispatchCheckMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgs = taosArrayGetSize(vgInfo); - pTask->recoverTryingDownstream = numOfVgs; + pTask->notReadyTasks = numOfVgs; pTask->checkReqIds = taosArrayInit(numOfVgs, sizeof(int64_t)); + qDebug("s-task:%s check %d downstream tasks, ver:%" PRId64 "-%" PRId64 " window:%" PRId64 "-%" PRId64, + pTask->id.idStr, numOfVgs, pRange->range.minVer, pRange->range.maxVer, pWindow->skey, pWindow->ekey); + for (int32_t i = 0; i < numOfVgs; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); req.reqId = tGenIdPI64(); taosArrayPush(pTask->checkReqIds, &req.reqId); req.downstreamNodeId = pVgInfo->vgId; req.downstreamTaskId = pVgInfo->taskId; - qDebug("s-task:%s at node %d check downstream task:0x%x at node %d (shuffle)", pTask->id.idStr, pTask->nodeId, - req.downstreamTaskId, req.downstreamNodeId); + qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (shuffle), idx:%d", pTask->id.idStr, pTask->info.nodeId, + req.downstreamTaskId, req.downstreamNodeId, i); streamDispatchCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } else { - qDebug("s-task:%s at node %d direct launch recover since no downstream", pTask->id.idStr, pTask->nodeId); - streamTaskLaunchRecover(pTask, version); + pTask->status.downstreamReady = 1; + qDebug("s-task:%s (vgId:%d) no downstream tasks, set downstream checked, try to launch scan-history-data, status:%s", + pTask->id.idStr, pTask->info.nodeId, streamGetTaskStatusStr(pTask->status.taskStatus)); + + streamTaskLaunchScanHistory(pTask); } return 0; } -int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { +int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { SStreamTaskCheckReq req = { .reqId = pRsp->reqId, .streamId = pRsp->streamId, @@ -111,7 +149,7 @@ int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp .childId = pRsp->childId, }; - qDebug("s-task:%s at node %d check downstream task:0x%x at node %d (recheck)", pTask->id.idStr, pTask->nodeId, + qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (recheck)", pTask->id.idStr, pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId); if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { @@ -135,11 +173,9 @@ int32_t streamTaskCheckStatus(SStreamTask* pTask) { return atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL? 1:0; } -int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version) { +int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { ASSERT(pTask->id.taskId == pRsp->upstreamTaskId); - - qDebug("s-task:%s at node %d recv check rsp from task:0x%x at node %d: status %d", pTask->id.idStr, - pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status); + const char* id = pTask->id.idStr; if (pRsp->status == 1) { if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { @@ -158,146 +194,386 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* return -1; } - int32_t left = atomic_sub_fetch_32(&pTask->recoverTryingDownstream, 1); + int32_t left = atomic_sub_fetch_32(&pTask->notReadyTasks, 1); ASSERT(left >= 0); if (left == 0) { taosArrayDestroy(pTask->checkReqIds); pTask->checkReqIds = NULL; - qDebug("s-task:%s all %d downstream tasks are ready, now enter into recover stage", pTask->id.idStr, numOfReqs); - streamTaskLaunchRecover(pTask, version); + if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { + qDebug("s-task:%s all %d downstream tasks are ready, now enter into scan-history-data stage, status:%s", id, numOfReqs, + streamGetTaskStatusStr(pTask->status.taskStatus)); + streamTaskLaunchScanHistory(pTask); + } else { + ASSERT(pTask->status.taskStatus == TASK_STATUS__NORMAL); + qDebug("s-task:%s fixed downstream task is ready, now ready for data from wal, status:%s", id, + streamGetTaskStatusStr(pTask->status.taskStatus)); + } + } else { + int32_t total = taosArrayGetSize(pTask->shuffleDispatcher.dbInfo.pVgroupInfos); + qDebug("s-task:%s (vgId:%d) recv check rsp from task:0x%x (vgId:%d) status:%d, total:%d not ready:%d", id, + pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status, total, left); } } else if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { if (pRsp->reqId != pTask->checkReqId) { return -1; } - streamTaskLaunchRecover(pTask, version); + // set the downstream tasks have been checked flag + ASSERT(pTask->status.downstreamReady == 0); + pTask->status.downstreamReady = 1; + + ASSERT(pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY || pTask->status.taskStatus == TASK_STATUS__NORMAL); + if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { + qDebug("s-task:%s fixed downstream task is ready, now enter into scan-history-data stage, status:%s", id, + streamGetTaskStatusStr(pTask->status.taskStatus)); + streamTaskLaunchScanHistory(pTask); + } else { + qDebug("s-task:%s fixed downstream task is ready, ready for data from inputQ, status:%s", id, + streamGetTaskStatusStr(pTask->status.taskStatus)); + } } else { ASSERT(0); } - } else { // not ready, wait for 100ms and retry - qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, wait for 100ms and retry", pTask->id.idStr, - pRsp->downstreamTaskId, pRsp->downstreamNodeId); + } else { // not ready, wait for 100ms and retry + qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, wait for 100ms and retry", id, pRsp->downstreamTaskId, + pRsp->downstreamNodeId); taosMsleep(100); - streamRecheckOneDownstream(pTask, pRsp); + streamRecheckDownstream(pTask, pRsp); } return 0; } // common -int32_t streamSetParamForRecover(SStreamTask* pTask) { - void* exec = pTask->exec.pExecutor; - return qStreamSetParamForRecover(exec); +int32_t streamSetParamForScanHistoryData(SStreamTask* pTask) { + qDebug("s-task:%s set operator option for scan-history-data", pTask->id.idStr); + return qSetStreamOperatorOptionForScanHistory(pTask->exec.pExecutor); } + int32_t streamRestoreParam(SStreamTask* pTask) { - void* exec = pTask->exec.pExecutor; - return qStreamRestoreParam(exec); + qDebug("s-task:%s restore operator param after scan-history-data", pTask->id.idStr); + return qRestoreStreamOperatorOption(pTask->exec.pExecutor); } int32_t streamSetStatusNormal(SStreamTask* pTask) { + qDebug("s-task:%s set task status to be normal, prev:%s", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus)); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); return 0; } // source -int32_t streamSourceRecoverPrepareStep1(SStreamTask* pTask, int64_t ver) { - void* exec = pTask->exec.pExecutor; - return qStreamSourceRecoverStep1(exec, ver); +int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange *pVerRange, STimeWindow* pWindow) { + return qStreamSourceScanParamForHistoryScanStep1(pTask->exec.pExecutor, pVerRange, pWindow); +} + +int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange *pVerRange, STimeWindow* pWindow) { + return qStreamSourceScanParamForHistoryScanStep2(pTask->exec.pExecutor, pVerRange, pWindow); } -int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamRecoverStep1Req* pReq) { - pReq->msgHead.vgId = pTask->nodeId; +int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated) { + pReq->msgHead.vgId = pTask->info.nodeId; pReq->streamId = pTask->id.streamId; pReq->taskId = pTask->id.taskId; + pReq->igUntreated = igUntreated; return 0; } -int32_t streamSourceRecoverScanStep1(SStreamTask* pTask) { +int32_t streamSourceScanHistoryData(SStreamTask* pTask) { return streamScanExec(pTask, 100); } -int32_t streamBuildSourceRecover2Req(SStreamTask* pTask, SStreamRecoverStep2Req* pReq) { - pReq->msgHead.vgId = pTask->nodeId; - pReq->streamId = pTask->id.streamId; - pReq->taskId = pTask->id.taskId; +int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { + SStreamRecoverFinishReq req = { .streamId = pTask->id.streamId, .childId = pTask->info.selfChildId }; + + // serialize + if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { + req.taskId = pTask->fixedEpDispatcher.taskId; + streamDoDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); + } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + int32_t numOfVgs = taosArrayGetSize(vgInfo); + + qDebug("s-task:%s send scan-history-data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr, + numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus)); + for (int32_t i = 0; i < numOfVgs; i++) { + SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); + req.taskId = pVgInfo->taskId; + streamDoDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); + } + } + return 0; } -int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver) { - void* exec = pTask->exec.pExecutor; - const char* id = pTask->id.idStr; +static int32_t doDispatchTransferMsg(SStreamTask* pTask, const SStreamTransferReq* pReq, int32_t vgId, SEpSet* pEpSet) { + void* buf = NULL; + int32_t code = -1; + SRpcMsg msg = {0}; + + int32_t tlen; + tEncodeSize(tEncodeStreamRecoverFinishReq, pReq, tlen, code); + if (code < 0) { + return -1; + } + + buf = rpcMallocCont(sizeof(SMsgHead) + tlen); + if (buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + ((SMsgHead*)buf)->vgId = htonl(vgId); + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); - int64_t st = taosGetTimestampMs(); - qDebug("s-task:%s recover step2(blocking stage) started", id); - if (qStreamSourceRecoverStep2(exec, ver) < 0) { + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + if ((code = tEncodeStreamRecoverFinishReq(&encoder, pReq)) < 0) { + if (buf) { + rpcFreeCont(buf); + } + return code; } - int32_t code = streamScanExec(pTask, 100); + tEncoderClear(&encoder); - double el = (taosGetTimestampMs() - st) / 1000.0; - qDebug("s-task:%s recover step2(blocking stage) ended, elapsed time:%.2fs", id, el); + msg.contLen = tlen + sizeof(SMsgHead); + msg.pCont = buf; + msg.msgType = TDMT_STREAM_TRANSFER_STATE; + msg.info.noResp = 1; - return code; + tmsgSendReq(pEpSet, &msg); + qDebug("s-task:%s dispatch transfer state msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pReq->taskId, vgId); + + return 0; } -int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask) { - SStreamRecoverFinishReq req = { .streamId = pTask->id.streamId, .childId = pTask->selfChildId }; +int32_t streamDispatchTransferStateMsg(SStreamTask* pTask) { + SStreamTransferReq req = { .streamId = pTask->id.streamId, .childId = pTask->info.selfChildId }; // serialize if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { - qDebug("s-task:%s send recover finish msg to downstream (fix-dispatch) to taskId:%d, status:%d", pTask->id.idStr, - pTask->fixedEpDispatcher.taskId, pTask->status.taskStatus); + qDebug("s-task:%s send transfer state msg to downstream (fix-dispatch) to taskId:0x%x, status:%s", pTask->id.idStr, + pTask->fixedEpDispatcher.taskId, streamGetTaskStatusStr(pTask->status.taskStatus)); req.taskId = pTask->fixedEpDispatcher.taskId; - streamDispatchOneRecoverFinishReq(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); + doDispatchTransferMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; - int32_t vgSz = taosArrayGetSize(vgInfo); - for (int32_t i = 0; i < vgSz; i++) { + + int32_t numOfVgs = taosArrayGetSize(vgInfo); + for (int32_t i = 0; i < numOfVgs; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); req.taskId = pVgInfo->taskId; - streamDispatchOneRecoverFinishReq(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); + doDispatchTransferMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } + return 0; } // agg int32_t streamAggRecoverPrepare(SStreamTask* pTask) { - pTask->recoverWaitingUpstream = taosArrayGetSize(pTask->childEpInfo); - qDebug("s-task:%s wait for %d upstreams", pTask->id.idStr, pTask->recoverWaitingUpstream); + pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamEpInfoList); + qDebug("s-task:%s agg task is ready and wait for %d upstream tasks complete scan-history procedure", pTask->id.idStr, + pTask->numOfWaitingUpstream); return 0; } -int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask) { +int32_t streamAggUpstreamScanHistoryFinish(SStreamTask* pTask) { void* exec = pTask->exec.pExecutor; - if (qStreamRestoreParam(exec) < 0) { + if (qRestoreStreamOperatorOption(exec) < 0) { return -1; } + if (qStreamRecoverFinish(exec) < 0) { return -1; } - streamSetStatusNormal(pTask); + +// streamSetStatusNormal(pTask); return 0; } -int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId) { - if (pTask->taskLevel == TASK_LEVEL__AGG) { - int32_t left = atomic_sub_fetch_32(&pTask->recoverWaitingUpstream, 1); - qDebug("s-task:%s remain unfinished child tasks:%d", pTask->id.idStr, left); +int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t taskId, int32_t childId) { + if (pTask->info.taskLevel == TASK_LEVEL__AGG) { + int32_t left = atomic_sub_fetch_32(&pTask->numOfWaitingUpstream, 1); ASSERT(left >= 0); + if (left == 0) { - streamAggChildrenRecoverFinish(pTask); + int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamEpInfoList); + qDebug("s-task:%s all %d upstream tasks finish scan-history data", pTask->id.idStr, numOfTasks); + streamAggUpstreamScanHistoryFinish(pTask); + } else { + qDebug("s-task:%s receive scan-history data finish msg from upstream:0x%x(index:%d), unfinished:%d", + pTask->id.idStr, taskId, childId, left); + } + + } + return 0; +} + +static void doCheckDownstreamStatus(SStreamTask* pTask, SStreamTask* pHTask) { + pHTask->dataRange.range.minVer = 0; + pHTask->dataRange.range.maxVer = pTask->chkInfo.currentVer; + + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + qDebug("s-task:%s set the launch condition for fill history s-task:%s, window:%" PRId64 " - %" PRId64 + " ver range:%" PRId64 " - %" PRId64, + pTask->id.idStr, pHTask->id.idStr, pHTask->dataRange.window.skey, pHTask->dataRange.window.ekey, + pHTask->dataRange.range.minVer, pHTask->dataRange.range.maxVer); + } else { + qDebug("s-task:%s no fill history condition for non-source task:%s", pTask->id.idStr, pHTask->id.idStr); + } + + // check if downstream tasks have been ready + streamTaskCheckDownstreamTasks(pHTask); +} + +typedef struct SStreamTaskRetryInfo { + SStreamMeta* pMeta; + int32_t taskId; +} SStreamTaskRetryInfo; + +static void tryLaunchHistoryTask(void* param, void* tmrId) { + SStreamTaskRetryInfo* pInfo = param; + SStreamMeta* pMeta = pInfo->pMeta; + + qDebug("s-task:0x%x in timer to launch related history task", pInfo->taskId); + + taosWLockLatch(&pMeta->lock); + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &pInfo->taskId, sizeof(int32_t)); + if (ppTask) { + ASSERT((*ppTask)->status.timerActive == 1); + + if (streamTaskShouldStop(&(*ppTask)->status)) { + const char* pStatus = streamGetTaskStatusStr((*ppTask)->status.taskStatus); + qDebug("s-task:%s status:%s quit timer task", (*ppTask)->id.idStr, pStatus); + + (*ppTask)->status.timerActive = 0; + taosWUnLockLatch(&pMeta->lock); + return; + } + } + taosWUnLockLatch(&pMeta->lock); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pInfo->taskId); + if (pTask != NULL) { + ASSERT(pTask->status.timerActive == 1); + + // abort the timer if intend to stop task + SStreamTask* pHTask = streamMetaAcquireTask(pMeta, pTask->historyTaskId.taskId); + if (pHTask == NULL && (!streamTaskShouldStop(&pTask->status))) { + const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + qWarn( + "s-task:%s vgId:%d status:%s failed to launch history task:0x%x, since it may not be built, or have been " + "destroyed, or should stop exec", + pTask->id.idStr, pMeta->vgId, pStatus, pTask->historyTaskId.taskId); + + taosTmrReset(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer, &pTask->launchTaskTimer); + streamMetaReleaseTask(pMeta, pTask); + return; + } + + if (pHTask != NULL) { + doCheckDownstreamStatus(pTask, pHTask); + streamMetaReleaseTask(pMeta, pHTask); + } + + // not in timer anymore + pTask->status.timerActive = 0; + streamMetaReleaseTask(pMeta, pTask); + } else { + qError("s-task:0x%x failed to load task, it may have been destroyed", pInfo->taskId); + } + + taosMemoryFree(pInfo); +} + +// todo fix the bug: 2. race condition +// an fill history task needs to be started. +int32_t streamCheckHistoryTaskDownstrem(SStreamTask* pTask) { + SStreamMeta* pMeta = pTask->pMeta; + int32_t hTaskId = pTask->historyTaskId.taskId; + + // Set the execute conditions, including the query time window and the version range + SStreamTask** pHTask = taosHashGet(pMeta->pTasks, &hTaskId, sizeof(hTaskId)); + if (pHTask == NULL) { + qWarn("s-task:%s vgId:%d failed to launch history task:0x%x, since it is not built yet", pTask->id.idStr, + pMeta->vgId, hTaskId); + + SStreamTaskRetryInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamTaskRetryInfo)); + pInfo->taskId = pTask->id.taskId; + pInfo->pMeta = pTask->pMeta; + + if (pTask->launchTaskTimer == NULL) { + pTask->launchTaskTimer = taosTmrStart(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer); + if (pTask->launchTaskTimer == NULL) { + // todo failed to create timer + } else { + pTask->status.timerActive = 1; // timer is active + qDebug("s-task:%s set timer active flag", pTask->id.idStr); + } + } else { // timer exists + pTask->status.timerActive = 1; + qDebug("s-task:%s set timer active flag, task timer not null", pTask->id.idStr); + taosTmrReset(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer, &pTask->launchTaskTimer); } + + // try again in 500ms + return TSDB_CODE_SUCCESS; + } + + doCheckDownstreamStatus(pTask, *pHTask); + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask) { + SStreamMeta* pMeta = pTask->pMeta; + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { + return 0; + } + + // restore param + int32_t code = streamRestoreParam(pTask); + if (code < 0) { + return -1; } + + // dispatch recover finish req to all related downstream task + code = streamDispatchScanHistoryFinishMsg(pTask); + if (code < 0) { + return -1; + } + + ASSERT(pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY); + + // ready to process data from inputQ + streamSetStatusNormal(pTask); + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + + // todo check rsp, commit data + streamMetaSaveTask(pMeta, pTask); return 0; } -int32_t tEncodeSStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq) { +bool streamTaskRecoverScanStep1Finished(SStreamTask* pTask) { + void* exec = pTask->exec.pExecutor; + return qStreamRecoverScanStep1Finished(exec); +} + +bool streamTaskRecoverScanStep2Finished(SStreamTask* pTask) { + void* exec = pTask->exec.pExecutor; + return qStreamRecoverScanStep2Finished(exec); +} + +int32_t streamTaskRecoverSetAllStepFinished(SStreamTask* pTask) { + void* exec = pTask->exec.pExecutor; + return qStreamRecoverSetAllStepFinished(exec); +} + +int32_t tEncodeStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->reqId) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; @@ -310,7 +586,7 @@ int32_t tEncodeSStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq return pEncoder->pos; } -int32_t tDecodeSStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq) { +int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->reqId) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; @@ -323,7 +599,7 @@ int32_t tDecodeSStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq return 0; } -int32_t tEncodeSStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp) { +int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->reqId) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; @@ -337,7 +613,7 @@ int32_t tEncodeSStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp return pEncoder->pos; } -int32_t tDecodeSStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp) { +int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->reqId) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; @@ -351,7 +627,7 @@ int32_t tDecodeSStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp return 0; } -int32_t tEncodeSStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq) { +int32_t tEncodeStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; @@ -359,7 +635,7 @@ int32_t tEncodeSStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverF tEndEncode(pEncoder); return pEncoder->pos; } -int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq) { +int32_t tDecodeStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1; @@ -367,3 +643,41 @@ int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishR tEndDecode(pDecoder); return 0; } + +// todo handle race condition, this task may be destroyed +void streamPrepareNdoCheckDownstream(SStreamTask* pTask) { + if (pTask->info.fillHistory) { + qDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr); + } else { + // calculate the correct start time window, and start the handle the history data for the main task. + if (pTask->historyTaskId.taskId != 0) { + // check downstream tasks for associated scan-history-data tasks + streamCheckHistoryTaskDownstrem(pTask); + + // launch current task + SHistDataRange* pRange = &pTask->dataRange; + int64_t ekey = pRange->window.ekey + 1; + int64_t ver = pRange->range.minVer; + + pRange->window.skey = ekey; + pRange->window.ekey = INT64_MAX; + pRange->range.minVer = 0; + pRange->range.maxVer = ver; + + qDebug("s-task:%s level:%d fill-history task exists, update stream time window:%" PRId64 " - %" PRId64 + ", ver range:%" PRId64 " - %" PRId64, + pTask->id.idStr, pTask->info.taskLevel, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, + pRange->range.maxVer); + } else { + SHistDataRange* pRange = &pTask->dataRange; + qDebug("s-task:%s no associated scan-history task, stream time window:%" PRId64 " - %" PRId64 + ", ver range:%" PRId64 " - %" PRId64, + pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer); + } + + ASSERT(pTask->status.downstreamReady == 0); + + // check downstream tasks for itself + streamTaskCheckDownstreamTasks(pTask); + } +} diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 967c7733c9160b49cabab54b504ad47f520d1d2c..9873e7b4c82e81350deb9a1d198d6c7a016ffc1c 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -116,16 +116,33 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz pState->taskId = pStreamTask->id.taskId; pState->streamId = pStreamTask->id.streamId; + sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-%d", pState->streamId, pState->taskId); #ifdef USE_ROCKSDB SStreamMeta* pMeta = pStreamTask->pMeta; pState->streamBackendRid = pMeta->streamBackendRid; - int code = streamStateOpenBackend(pMeta->streamBackend, pState); - if (code == -1) { - taosReleaseRef(streamBackendId, pMeta->streamBackendRid); - taosMemoryFree(pState); - pState = NULL; + // taosWLockLatch(&pMeta->lock); + taosThreadMutexLock(&pMeta->backendMutex); + void* uniqueId = + taosHashGet(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1); + if (uniqueId == NULL) { + int code = streamStateOpenBackend(pMeta->streamBackend, pState); + if (code == -1) { + taosReleaseRef(streamBackendId, pState->streamBackendRid); + taosThreadMutexUnlock(&pMeta->backendMutex); + taosMemoryFree(pState); + return NULL; + } + taosHashPut(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1, + &pState->pTdbState->backendCfWrapperId, sizeof(pState->pTdbState->backendCfWrapperId)); + } else { + int64_t id = *(int64_t*)uniqueId; + pState->pTdbState->backendCfWrapperId = id; + pState->pTdbState->pBackendCfWrapper = taosAcquireRef(streamBackendCfWrapperId, id); + + taosAcquireRef(streamBackendId, pState->streamBackendRid); } + taosThreadMutexUnlock(&pMeta->backendMutex); pState->pTdbState->pOwner = pTask; pState->pFileState = NULL; @@ -385,8 +402,8 @@ int32_t streamStateClear(SStreamState* pState) { streamStatePut(pState, &key, NULL, 0); while (1) { SStreamStateCur* pCur = streamStateSeekKeyNext(pState, &key); - SWinKey delKey = {0}; - int32_t code = streamStateGetKVByCur(pCur, &delKey, NULL, 0); + SWinKey delKey = {0}; + int32_t code = streamStateGetKVByCur(pCur, &delKey, NULL, 0); streamStateFreeCur(pCur); if (code == 0) { streamStateDel(pState, &delKey); @@ -498,7 +515,7 @@ int32_t streamStateGetKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const void** return -1; } const SStateKey* pKTmp = NULL; - int32_t kLen; + int32_t kLen; if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, pVal, pVLen) < 0) { return -1; } @@ -518,7 +535,7 @@ int32_t streamStateFillGetKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const vo return -1; } const SWinKey* pKTmp = NULL; - int32_t kLen; + int32_t kLen; if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, pVal, pVLen) < 0) { return -1; } @@ -535,7 +552,7 @@ int32_t streamStateGetGroupKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const v return -1; } uint64_t groupId = pKey->groupId; - int32_t code = streamStateFillGetKVByCur(pCur, pKey, pVal, pVLen); + int32_t code = streamStateFillGetKVByCur(pCur, pKey, pVal, pVLen); if (code == 0) { if (pKey->groupId == groupId) { return 0; @@ -553,7 +570,7 @@ int32_t streamStateGetFirst(SStreamState* pState, SWinKey* key) { SWinKey tmp = {.ts = 0, .groupId = 0}; streamStatePut(pState, &tmp, NULL, 0); SStreamStateCur* pCur = streamStateSeekKeyNext(pState, &tmp); - int32_t code = streamStateGetKVByCur(pCur, key, NULL, 0); + int32_t code = streamStateGetKVByCur(pCur, key, NULL, 0); streamStateFreeCur(pCur); streamStateDel(pState, &tmp); return code; @@ -593,7 +610,7 @@ SStreamStateCur* streamStateSeekKeyNext(SStreamState* pState, const SWinKey* key } SStateKey sKey = {.key = *key, .opNum = pState->number}; - int32_t c = 0; + int32_t c = 0; if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateKey), &c) < 0) { streamStateFreeCur(pCur); return NULL; @@ -726,9 +743,9 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa #else SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pState, key); - SSessionKey resKey = *key; - void* tmp = NULL; - int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, &tmp, pVLen); + SSessionKey resKey = *key; + void* tmp = NULL; + int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, &tmp, pVLen); if (code == 0) { if (key->win.skey != resKey.win.skey) { code = -1; @@ -745,6 +762,7 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) { #ifdef USE_ROCKSDB + qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey,key->win.ekey, key->groupId); return streamStateSessionDel_rocksdb(pState, key); #else SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; @@ -767,7 +785,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev(SStreamState* pState, cons } SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; - int32_t c = 0; + int32_t c = 0; if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { streamStateFreeCur(pCur); return NULL; @@ -798,7 +816,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext(SStreamState* pState, cons } SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; - int32_t c = 0; + int32_t c = 0; if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { streamStateFreeCur(pCur); return NULL; @@ -830,7 +848,7 @@ SStreamStateCur* streamStateSessionSeekKeyNext(SStreamState* pState, const SSess } SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; - int32_t c = 0; + int32_t c = 0; if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { streamStateFreeCur(pCur); return NULL; @@ -854,7 +872,7 @@ int32_t streamStateSessionGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, v return -1; } SStateSessionKey* pKTmp = NULL; - int32_t kLen; + int32_t kLen; if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, (const void**)pVal, pVLen) < 0) { return -1; } @@ -873,13 +891,13 @@ int32_t streamStateSessionClear(SStreamState* pState) { #ifdef USE_ROCKSDB return streamStateSessionClear_rocksdb(pState); #else - SSessionKey key = {.win.skey = 0, .win.ekey = 0, .groupId = 0}; + SSessionKey key = {.win.skey = 0, .win.ekey = 0, .groupId = 0}; SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pState, &key); while (1) { SSessionKey delKey = {0}; - void* buf = NULL; - int32_t size = 0; - int32_t code = streamStateSessionGetKVByCur(pCur, &delKey, &buf, &size); + void* buf = NULL; + int32_t size = 0; + int32_t code = streamStateSessionGetKVByCur(pCur, &delKey, &buf, &size); if (code == 0 && size > 0) { memset(buf, 0, size); streamStateSessionPut(pState, &delKey, buf, size); @@ -908,14 +926,14 @@ int32_t streamStateSessionGetKeyByRange(SStreamState* pState, const SSessionKey* } SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; - int32_t c = 0; + int32_t c = 0; if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) { streamStateFreeCur(pCur); return -1; } SSessionKey resKey = *key; - int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, NULL, 0); + int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, NULL, 0); if (code == 0 && sessionRangeKeyCmpr(key, &resKey) == 0) { *curKey = resKey; streamStateFreeCur(pCur); @@ -951,19 +969,19 @@ int32_t streamStateSessionAddIfNotExist(SStreamState* pState, SSessionKey* key, return streamStateSessionAddIfNotExist_rocksdb(pState, key, gap, pVal, pVLen); #else // todo refactor - int32_t res = 0; + int32_t res = 0; SSessionKey originKey = *key; SSessionKey searchKey = *key; searchKey.win.skey = key->win.skey - gap; searchKey.win.ekey = key->win.ekey + gap; int32_t valSize = *pVLen; - void* tmp = tdbRealloc(NULL, valSize); + void* tmp = tdbRealloc(NULL, valSize); if (!tmp) { return -1; } SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentPrev(pState, key); - int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen); + int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen); if (code == 0) { if (sessionRangeKeyCmpr(&searchKey, key) == 0) { memcpy(tmp, *pVal, valSize); @@ -1006,16 +1024,16 @@ int32_t streamStateStateAddIfNotExist(SStreamState* pState, SSessionKey* key, ch #ifdef USE_ROCKSDB return streamStateStateAddIfNotExist_rocksdb(pState, key, pKeyData, keyDataLen, fn, pVal, pVLen); #else - int32_t res = 0; + int32_t res = 0; SSessionKey tmpKey = *key; - int32_t valSize = *pVLen; - void* tmp = tdbRealloc(NULL, valSize); + int32_t valSize = *pVLen; + void* tmp = tdbRealloc(NULL, valSize); if (!tmp) { return -1; } SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentPrev(pState, key); - int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen); + int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen); if (code == 0) { if (key->win.skey <= tmpKey.win.skey && tmpKey.win.ekey <= key->win.ekey) { memcpy(tmp, *pVal, valSize); @@ -1113,6 +1131,8 @@ int32_t streamStateDeleteCheckPoint(SStreamState* pState, TSKEY mark) { #endif } +void streamStateReloadInfo(SStreamState* pState, TSKEY ts) { streamFileStateReloadInfo(pState->pFileState, ts); } + #if 0 char* streamStateSessionDump(SStreamState* pState) { SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 284d1ecab63423a7b52e80c23d197639d36b6844..06da72188c4b3252d32bb50d5c2dc61c9f14d2bc 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -17,9 +17,9 @@ #include "tstream.h" #include "wal.h" -static int32_t mndAddToTaskset(SArray* pArray, SStreamTask* pTask) { +static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) { int32_t childId = taosArrayGetSize(pArray); - pTask->selfChildId = childId; + pTask->info.selfChildId = childId; taosArrayPush(pArray, &pTask); return 0; } @@ -33,8 +33,8 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto pTask->id.taskId = tGenIdPI32(); pTask->id.streamId = streamId; - pTask->taskLevel = taskLevel; - pTask->fillHistory = fillHistory; + pTask->info.taskLevel = taskLevel; + pTask->info.fillHistory = fillHistory; pTask->triggerParam = triggerParam; char buf[128] = {0}; @@ -42,10 +42,11 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto pTask->id.idStr = taosStrdup(buf); pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; + pTask->status.taskStatus = TASK_STATUS__SCAN_HISTORY; pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; - mndAddToTaskset(pTaskList, pTask); + addToTaskset(pTaskList, pTask); return pTask; } @@ -71,30 +72,40 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pTask->id.streamId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->id.taskId) < 0) return -1; - if (tEncodeI32(pEncoder, pTask->totalLevel) < 0) return -1; - if (tEncodeI8(pEncoder, pTask->taskLevel) < 0) return -1; + if (tEncodeI32(pEncoder, pTask->info.totalLevel) < 0) return -1; + if (tEncodeI8(pEncoder, pTask->info.taskLevel) < 0) return -1; if (tEncodeI8(pEncoder, pTask->outputType) < 0) return -1; - if (tEncodeI16(pEncoder, pTask->dispatchMsgType) < 0) return -1; + if (tEncodeI16(pEncoder, pTask->msgInfo.msgType) < 0) return -1; if (tEncodeI8(pEncoder, pTask->status.taskStatus) < 0) return -1; if (tEncodeI8(pEncoder, pTask->status.schedStatus) < 0) return -1; - if (tEncodeI32(pEncoder, pTask->selfChildId) < 0) return -1; - if (tEncodeI32(pEncoder, pTask->nodeId) < 0) return -1; - if (tEncodeSEpSet(pEncoder, &pTask->epSet) < 0) return -1; + if (tEncodeI32(pEncoder, pTask->info.selfChildId) < 0) return -1; + if (tEncodeI32(pEncoder, pTask->info.nodeId) < 0) return -1; + if (tEncodeSEpSet(pEncoder, &pTask->info.epSet) < 0) return -1; if (tEncodeI64(pEncoder, pTask->chkInfo.id) < 0) return -1; if (tEncodeI64(pEncoder, pTask->chkInfo.version) < 0) return -1; - if (tEncodeI8(pEncoder, pTask->fillHistory) < 0) return -1; + if (tEncodeI8(pEncoder, pTask->info.fillHistory) < 0) return -1; - int32_t epSz = taosArrayGetSize(pTask->childEpInfo); + if (tEncodeI64(pEncoder, pTask->historyTaskId.streamId)) return -1; + if (tEncodeI32(pEncoder, pTask->historyTaskId.taskId)) return -1; + if (tEncodeI64(pEncoder, pTask->streamTaskId.streamId)) return -1; + if (tEncodeI32(pEncoder, pTask->streamTaskId.taskId)) return -1; + + if (tEncodeU64(pEncoder, pTask->dataRange.range.minVer)) return -1; + if (tEncodeU64(pEncoder, pTask->dataRange.range.maxVer)) return -1; + if (tEncodeI64(pEncoder, pTask->dataRange.window.skey)) return -1; + if (tEncodeI64(pEncoder, pTask->dataRange.window.ekey)) return -1; + + int32_t epSz = taosArrayGetSize(pTask->pUpstreamEpInfoList); if (tEncodeI32(pEncoder, epSz) < 0) return -1; for (int32_t i = 0; i < epSz; i++) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->childEpInfo, i); + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); if (tEncodeStreamEpInfo(pEncoder, pInfo) < 0) return -1; } - if (pTask->taskLevel != TASK_LEVEL__SINK) { + if (pTask->info.taskLevel != TASK_LEVEL__SINK) { if (tEncodeCStr(pEncoder, pTask->exec.qmsg) < 0) return -1; } @@ -124,25 +135,36 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->id.streamId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->id.taskId) < 0) return -1; - if (tDecodeI32(pDecoder, &pTask->totalLevel) < 0) return -1; - if (tDecodeI8(pDecoder, &pTask->taskLevel) < 0) return -1; + if (tDecodeI32(pDecoder, &pTask->info.totalLevel) < 0) return -1; + if (tDecodeI8(pDecoder, &pTask->info.taskLevel) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->outputType) < 0) return -1; - if (tDecodeI16(pDecoder, &pTask->dispatchMsgType) < 0) return -1; + if (tDecodeI16(pDecoder, &pTask->msgInfo.msgType) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->status.taskStatus) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->status.schedStatus) < 0) return -1; - if (tDecodeI32(pDecoder, &pTask->selfChildId) < 0) return -1; - if (tDecodeI32(pDecoder, &pTask->nodeId) < 0) return -1; - if (tDecodeSEpSet(pDecoder, &pTask->epSet) < 0) return -1; + if (tDecodeI32(pDecoder, &pTask->info.selfChildId) < 0) return -1; + if (tDecodeI32(pDecoder, &pTask->info.nodeId) < 0) return -1; + if (tDecodeSEpSet(pDecoder, &pTask->info.epSet) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->chkInfo.id) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->chkInfo.version) < 0) return -1; - if (tDecodeI8(pDecoder, &pTask->fillHistory) < 0) return -1; + if (tDecodeI8(pDecoder, &pTask->info.fillHistory) < 0) return -1; + + if (tDecodeI64(pDecoder, &pTask->historyTaskId.streamId)) return -1; + if (tDecodeI32(pDecoder, &pTask->historyTaskId.taskId)) return -1; + if (tDecodeI64(pDecoder, &pTask->streamTaskId.streamId)) return -1; + if (tDecodeI32(pDecoder, &pTask->streamTaskId.taskId)) return -1; + + if (tDecodeU64(pDecoder, &pTask->dataRange.range.minVer)) return -1; + if (tDecodeU64(pDecoder, &pTask->dataRange.range.maxVer)) return -1; + if (tDecodeI64(pDecoder, &pTask->dataRange.window.skey)) return -1; + if (tDecodeI64(pDecoder, &pTask->dataRange.window.ekey)) return -1; int32_t epSz; if (tDecodeI32(pDecoder, &epSz) < 0) return -1; - pTask->childEpInfo = taosArrayInit(epSz, sizeof(void*)); + + pTask->pUpstreamEpInfoList = taosArrayInit(epSz, POINTER_BYTES); for (int32_t i = 0; i < epSz; i++) { SStreamChildEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamChildEpInfo)); if (pInfo == NULL) return -1; @@ -150,10 +172,10 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { taosMemoryFreeClear(pInfo); return -1; } - taosArrayPush(pTask->childEpInfo, &pInfo); + taosArrayPush(pTask->pUpstreamEpInfoList, &pInfo); } - if (pTask->taskLevel != TASK_LEVEL__SINK) { + if (pTask->info.taskLevel != TASK_LEVEL__SINK) { if (tDecodeCStrAlloc(pDecoder, &pTask->exec.qmsg) < 0) return -1; } @@ -203,7 +225,7 @@ void tFreeStreamTask(SStreamTask* pTask) { walCloseReader(pTask->exec.pWalReader); } - taosArrayDestroyP(pTask->childEpInfo, taosMemoryFree); + taosArrayDestroyP(pTask->pUpstreamEpInfoList, taosMemoryFree); if (pTask->outputType == TASK_OUTPUT__TABLE) { tDeleteSchemaWrapper(pTask->tbSink.pSchemaWrapper); taosMemoryFree(pTask->tbSink.pTSchema); diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index c10ef1e5572d692ae430088d4d7884a2ba5be490..dd857141c1288da621b8ed2b58af8373d28fbe31 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -43,12 +43,13 @@ struct SStreamFileState { uint64_t maxRowCount; uint64_t curRowCount; GetTsFun getTs; + char* id; }; typedef SRowBuffPos SRowBuffInfo; SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, - GetTsFun fp, void* pFile, TSKEY delMark) { + GetTsFun fp, void* pFile, TSKEY delMark, const char* idstr) { if (memSize <= 0) { memSize = DEFAULT_MAX_STREAM_BUFFER_SIZE; } @@ -70,6 +71,7 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ if (!pFileState->usedBuffs || !pFileState->freeBuffs || !pFileState->rowBuffMap) { goto _error; } + pFileState->keyLen = keySize; pFileState->rowSize = rowSize; pFileState->selectivityRowSize = selectRowSize; @@ -81,6 +83,8 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ pFileState->deleteMark = delMark; pFileState->flushMark = INT64_MIN; pFileState->maxTs = INT64_MIN; + pFileState->id = taosStrdup(idstr); + recoverSnapshot(pFileState); return pFileState; @@ -124,6 +128,8 @@ void streamFileStateDestroy(SStreamFileState* pFileState) { if (!pFileState) { return; } + + taosMemoryFree(pFileState->id); tdListFreeP(pFileState->usedBuffs, destroyRowBuffAllPosPtr); tdListFreeP(pFileState->freeBuffs, destroyRowBuff); tSimpleHashCleanup(pFileState->rowBuffMap); @@ -177,7 +183,8 @@ void popUsedBuffs(SStreamFileState* pFileState, SStreamSnapshot* pFlushList, uin i++; } } - qInfo("do stream state flush %d rows to disck. is used: %d", listNEles(pFlushList), used); + + qInfo("stream state flush %d rows to disk. is used:%d", listNEles(pFlushList), used); } int32_t flushRowBuff(SStreamFileState* pFileState) { @@ -185,13 +192,17 @@ int32_t flushRowBuff(SStreamFileState* pFileState) { if (!pFlushList) { return TSDB_CODE_OUT_OF_MEMORY; } + uint64_t num = (uint64_t)(pFileState->curRowCount * FLUSH_RATIO); num = TMAX(num, FLUSH_NUM); popUsedBuffs(pFileState, pFlushList, num, false); + if (isListEmpty(pFlushList)) { popUsedBuffs(pFileState, pFlushList, num, true); } + flushSnapshot(pFileState, pFlushList, false); + SListIter fIter = {0}; tdListInitIter(pFlushList, &fIter, TD_LIST_FORWARD); SListNode* pNode = NULL; @@ -201,6 +212,7 @@ int32_t flushRowBuff(SStreamFileState* pFileState) { tdListAppend(pFileState->freeBuffs, &pPos->pRowBuff); pPos->pRowBuff = NULL; } + tdListFreeP(pFlushList, destroyRowBuffPosPtr); return TSDB_CODE_SUCCESS; } @@ -269,13 +281,13 @@ int32_t getRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen, voi TSKEY ts = pFileState->getTs(pKey); if (ts > pFileState->maxTs - pFileState->deleteMark && ts < pFileState->flushMark) { int32_t len = 0; - void* pVal = NULL; - int32_t code = streamStateGet_rocksdb(pFileState->pFileStore, pKey, &pVal, &len); + void* p = NULL; + int32_t code = streamStateGet_rocksdb(pFileState->pFileStore, pKey, &p, &len); qDebug("===stream===get %" PRId64 " from disc, res %d", ts, code); if (code == TSDB_CODE_SUCCESS) { - memcpy(pNewPos->pRowBuff, pVal, len); + memcpy(pNewPos->pRowBuff, p, len); } - taosMemoryFree(pVal); + taosMemoryFree(p); } tSimpleHashPut(pFileState->rowBuffMap, pKey, keyLen, &pNewPos, POINTER_BYTES); @@ -348,7 +360,10 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, tdListInitIter(pSnapshot, &iter, TD_LIST_FORWARD); const int32_t BATCH_LIMIT = 256; - SListNode* pNode = NULL; + + int64_t st = taosGetTimestampMs(); + int32_t numOfElems = listNEles(pSnapshot); + SListNode* pNode = NULL; int idx = streamStateGetCfIdx(pFileState->pFileStore, "state"); @@ -359,6 +374,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, while ((pNode = tdListNext(&iter)) != NULL && code == TSDB_CODE_SUCCESS) { SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data; ASSERT(pPos->pRowBuff && pFileState->rowSize > 0); + if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) { streamStatePutBatch_rocksdb(pFileState->pFileStore, batch); streamStateClearBatch(batch); @@ -367,16 +383,22 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, SStateKey sKey = {.key = *((SWinKey*)pPos->pKey), .opNum = ((SStreamState*)pFileState->pFileStore)->number}; code = streamStatePutBatchOptimize(pFileState->pFileStore, idx, batch, &sKey, pPos->pRowBuff, pFileState->rowSize, 0, buf); + // todo handle failure memset(buf, 0, len); - qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code); +// qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code); } taosMemoryFree(buf); if (streamStateGetBatchSize(batch) > 0) { streamStatePutBatch_rocksdb(pFileState->pFileStore, batch); } + streamStateClearBatch(batch); + int64_t elapsed = taosGetTimestampMs() - st; + qDebug("%s flush to disk in batch model completed, rows:%d, batch size:%d, elapsed time:%"PRId64"ms", pFileState->id, numOfElems, + BATCH_LIMIT, elapsed); + if (flushState) { const char* taskKey = "streamFileState"; { @@ -398,8 +420,8 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, } streamStatePutBatch_rocksdb(pFileState->pFileStore, batch); } - streamStateDestroyBatch(batch); + streamStateDestroyBatch(batch); return code; } @@ -502,3 +524,8 @@ int32_t recoverSnapshot(SStreamFileState* pFileState) { } int32_t streamFileStateGeSelectRowSize(SStreamFileState* pFileState) { return pFileState->selectivityRowSize; } + +void streamFileStateReloadInfo(SStreamFileState* pFileState, TSKEY ts) { + pFileState->flushMark = TMAX(pFileState->flushMark, ts); + pFileState->maxTs = TMAX(pFileState->maxTs, ts); +} diff --git a/source/libs/stream/test/tstreamUpdateTest.cpp b/source/libs/stream/test/tstreamUpdateTest.cpp index 18c60aff284414e5ba5044d50000a9bd45718965..0e84d6b8bdad0e40e7257a0d60880439d1f3ba37 100644 --- a/source/libs/stream/test/tstreamUpdateTest.cpp +++ b/source/libs/stream/test/tstreamUpdateTest.cpp @@ -158,7 +158,7 @@ TEST(TD_STREAM_UPDATE_TEST, update) { // void *buf = taosMemoryCalloc(1, bufLen); // int32_t resSize = updateInfoSerialize(buf, bufLen, pSU7); - // SUpdateInfo *pSU6 = updateInfoInit(0, TSDB_TIME_PRECISION_MILLI, 0); + // SUpdateInfo *pSU6 = taosMemoryCalloc(1, sizeof(SUpdateInfo)); // int32_t desSize = updateInfoDeserialize(buf, bufLen, pSU6); // GTEST_ASSERT_EQ(desSize, 0); diff --git a/tests/script/sh/deploy.sh b/tests/script/sh/deploy.sh index 7da8da09bfcf3810b692ff25f37333c93ae87497..5b1773e66418e96902c887f455032083bc0ddd2c 100755 --- a/tests/script/sh/deploy.sh +++ b/tests/script/sh/deploy.sh @@ -118,7 +118,7 @@ echo "statusInterval 1" >> $TAOS_CFG echo "dataDir $DATA_DIR" >> $TAOS_CFG echo "logDir $LOG_DIR" >> $TAOS_CFG echo "debugFlag 0" >> $TAOS_CFG -echo "tmrDebugFlag 131" >> $TAOS_CFG +echo "tmrDebugFlag 143" >> $TAOS_CFG echo "uDebugFlag 143" >> $TAOS_CFG echo "rpcDebugFlag 143" >> $TAOS_CFG echo "jniDebugFlag 143" >> $TAOS_CFG diff --git a/tests/script/tsim/stream/fillHistoryTransform.sim b/tests/script/tsim/stream/fillHistoryTransform.sim new file mode 100644 index 0000000000000000000000000000000000000000..fe58b76b78d6a2bd9614d05022b4f2694bf23db4 --- /dev/null +++ b/tests/script/tsim/stream/fillHistoryTransform.sim @@ -0,0 +1,405 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print =============== create database +sql create database test vgroups 1; +sql select * from information_schema.ins_databases +if $rows != 3 then + return -1 +endi + +print $data00 $data01 $data02 + +sql use test; + +print =====step1 + +sql create table t1(ts timestamp, a int, b int , c int, d double); + +sql insert into t1 values(1648791213000,10,2,3,1.0); + +sql create stream stream0 trigger at_once fill_history 1 IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt as select _wstart, sum(a) from t1 interval(10s); + +$loop_count = 0 +loop00: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt; + +if $rows != 1 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop00 +endi + +if $data01 != 10 then + print =====data01=$data01 + goto loop00 +endi + +sql insert into t1 values(1648791213000,1,2,3,1.0); + +$loop_count = 0 +loop0: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt; + +if $rows != 1 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop0 +endi + +if $data01 != 1 then + print =====data01=$data01 + goto loop0 +endi + +sql insert into t1 values(1648791213001,2,2,3,1.0); + +$loop_count = 0 +loop1: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt; + +if $rows != 1 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop1 +endi + +if $data01 != 3 then + print ======$data01 + goto loop1 +endi + + +sql insert into t1 values(1648791223001,3,2,3,1.0); + +sql insert into t1 values(1648791223002,4,2,3,1.0); + +$loop_count = 0 +loop2: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt; + +if $rows != 2 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop2 +endi + +if $data01 != 3 then + print ======$data01 + goto loop2 +endi + +if $data11 != 7 then + print ======$data01 + goto loop2 +endi + +print =====step1 over + +print =====step2 + +sql create database test1 vgroups 4; + +sql use test1; + +sql create stable st(ts timestamp,a int,b int,c int,d double) tags(ta int,tb int,tc int); +sql create table t1 using st tags(1,1,1); +sql create table t2 using st tags(2,2,2); + +sql insert into t1 values(1648791213000,10,2,3,1.0); + +sql create stream stream1 trigger at_once fill_history 1 IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt1 as select _wstart, sum(a) from st interval(10s); + +$loop_count = 0 +loop00: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt1; + +if $rows != 1 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop00 +endi + +if $data01 != 10 then + print =====data01=$data01 + goto loop00 +endi + +sql insert into t1 values(1648791213000,1,2,3,1.0); + +$loop_count = 0 +loop0: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt1; + +if $rows != 1 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop0 +endi + +if $data01 != 1 then + print =====data01=$data01 + goto loop0 +endi + +sql insert into t1 values(1648791213001,2,2,3,1.0); + +$loop_count = 0 +loop1: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt1; + +if $rows != 1 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop1 +endi + +if $data01 != 3 then + print ======$data01 + goto loop1 +endi + + +sql insert into t1 values(1648791223001,3,2,3,1.0); + +sql insert into t1 values(1648791223002,4,2,3,1.0); + +$loop_count = 0 +loop2: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt1; + +if $rows != 2 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop2 +endi + +if $data01 != 3 then + print ======$data01 + goto loop2 +endi + +if $data11 != 7 then + print ======$data01 + goto loop2 +endi + +print =====step2 over + +print =====step3 + +sql create database test2 vgroups 4; + +sql use test2; + +sql create stable st(ts timestamp,a int,b int,c int,d double) tags(ta int,tb int,tc int); +sql create table t1 using st tags(1,1,1); +sql create table t2 using st tags(2,2,2); + +sql insert into t1 values(1648791213000,10,2,3,1.0); + +sql create stream stream2 trigger at_once fill_history 1 IGNORE EXPIRED 0 IGNORE UPDATE 0 into streamt2 as select _wstart, sum(a) from st partition by ta interval(10s); + +$loop_count = 0 +loop00: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt2; + +if $rows != 1 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop00 +endi + +if $data01 != 10 then + print =====data01=$data01 + goto loop00 +endi + +sql insert into t1 values(1648791213000,1,2,3,1.0); + +$loop_count = 0 +loop0: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt2; + +if $rows != 1 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop0 +endi + +if $data01 != 1 then + print =====data01=$data01 + goto loop0 +endi + +sql insert into t1 values(1648791213001,2,2,3,1.0); + +$loop_count = 0 +loop1: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt2; + +if $rows != 1 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop1 +endi + +if $data01 != 3 then + print ======$data01 + goto loop1 +endi + + +sql insert into t1 values(1648791223001,3,2,3,1.0); + +sql insert into t1 values(1648791223002,4,2,3,1.0); + +$loop_count = 0 +loop2: + +sleep 1000 + +$loop_count = $loop_count + 1 +if $loop_count == 20 then + return -1 +endi + +sql select * from streamt2; + +if $rows != 2 then + print ======$rows + print data00,data01, data02 + print data10,data11, data12 + print data20,data21, data22 + goto loop2 +endi + +if $data01 != 3 then + print ======$data01 + goto loop2 +endi + +if $data11 != 7 then + print ======$data01 + goto loop2 +endi + +print =====step3 over + +print =====over + + +system sh/stop_dnodes.sh diff --git a/tests/script/tsim/stream/partitionby.sim b/tests/script/tsim/stream/partitionby.sim index df4b60314fc8b0b52fa2f7075262cea2cd0106ce..9a660741e7eab7242564225377ba2fc77691604a 100644 --- a/tests/script/tsim/stream/partitionby.sim +++ b/tests/script/tsim/stream/partitionby.sim @@ -14,6 +14,7 @@ sql create table ts3 using st tags(3,2,2); sql create table ts4 using st tags(4,2,2); sql create stream stream_t1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 into test0.streamtST1 as select _wstart, count(*) c1, count(d) c2 , sum(a) c3 , max(b) c4, min(c) c5 from st partition by ta,tb,tc interval(10s); +sleep 500 sql insert into ts1 values(1648791213001,1,12,3,1.0); sql insert into ts2 values(1648791213001,1,12,3,1.0); diff --git a/tests/script/tsim/stream/sliding.sim b/tests/script/tsim/stream/sliding.sim index 05eb7dacba92254c947289332a5cbcce8410271a..18893245fa6b735168f388563566537818d4c88f 100644 --- a/tests/script/tsim/stream/sliding.sim +++ b/tests/script/tsim/stream/sliding.sim @@ -575,8 +575,6 @@ endi $loop_count = 0 print step 7 - - sql create database test3 vgroups 6; sql use test3; sql create stable st(ts timestamp, a int, b int, c int, d double) tags(ta int,tb int,tc int); diff --git a/tests/system-test/0-others/backquote_check.py b/tests/system-test/0-others/backquote_check.py index be8590f913f110e0156d71fa81560fc99fa39132..7c91fd9e8cb9ef732b51efc32452df5601510555 100644 --- a/tests/system-test/0-others/backquote_check.py +++ b/tests/system-test/0-others/backquote_check.py @@ -22,7 +22,7 @@ class TDTestCase: def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) - tdSql.init(conn.cursor()) + tdSql.init(conn.cursor(), True) self.dbname = 'db' self.setsql = TDSetSql() self.stbname = 'stb' diff --git a/tests/system-test/0-others/compatibility.py b/tests/system-test/0-others/compatibility.py index 22e319fdaf076190f3fc31390f5ca88d9e5561b1..cd71de0c06caa4fe8e5a64b58ddc25adc5aec643 100644 --- a/tests/system-test/0-others/compatibility.py +++ b/tests/system-test/0-others/compatibility.py @@ -138,9 +138,9 @@ class TDTestCase: tdLog.printNoPrefix(f"==========step1:prepare and check data in old version-{BASEVERSION}") tdLog.info(f" LD_LIBRARY_PATH=/usr/lib taosBenchmark -t {tableNumbers} -n {recordNumbers1} -y ") os.system(f"LD_LIBRARY_PATH=/usr/lib taosBenchmark -t {tableNumbers} -n {recordNumbers1} -y ") - os.system(f"LD_LIBRARY_PATH=/usr/lib taos -s 'use test;create stream current_stream into current_stream_output_stb as select _wstart as `start`, _wend as wend, max(current) as max_current from meters where voltage <= 220 interval (5s);' ") - os.system('LD_LIBRARY_PATH=/usr/lib taos -s "use test;create stream power_stream into power_stream_output_stb as select ts, concat_ws(\\".\\", location, tbname) as meter_location, current*voltage*cos(phase) as active_power, current*voltage*sin(phase) as reactive_power from meters partition by tbname;" ') - os.system('LD_LIBRARY_PATH=/usr/lib taos -s "use test;show streams;" ') + # os.system(f"LD_LIBRARY_PATH=/usr/lib taos -s 'use test;create stream current_stream into current_stream_output_stb as select _wstart as `start`, _wend as wend, max(current) as max_current from meters where voltage <= 220 interval (5s);' ") + # os.system('LD_LIBRARY_PATH=/usr/lib taos -s "use test;create stream power_stream into power_stream_output_stb as select ts, concat_ws(\\".\\", location, tbname) as meter_location, current*voltage*cos(phase) as active_power, current*voltage*sin(phase) as reactive_power from meters partition by tbname;" ') + # os.system('LD_LIBRARY_PATH=/usr/lib taos -s "use test;show streams;" ') os.system(f"sed -i 's/\/etc\/taos/{cPath}/' 0-others/tmqBasic.json ") # os.system("LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 0-others/tmqBasic.json -y ") os.system('LD_LIBRARY_PATH=/usr/lib taos -s "create topic if not exists tmq_test_topic as select current,voltage,phase from test.meters where voltage <= 106 and current <= 5;" ') @@ -224,7 +224,7 @@ class TDTestCase: args = (caller.filename, caller.lineno) tdLog.exit("%s(%d) failed" % args) tdsql.query("show streams;") - tdsql.checkRows(2) + tdsql.checkRows(0) tdsql.query("select *,tbname from d0.almlog where mcid='m0103';") tdsql.checkRows(6) expectList = [0,3003,20031,20032,20033,30031] diff --git a/tests/system-test/1-insert/db_tb_name_check.py b/tests/system-test/1-insert/db_tb_name_check.py index 23bb53962038a416590a28b99bdd2763bf52a954..fa43603e258f67b8cead63f30130424fdbc60461 100644 --- a/tests/system-test/1-insert/db_tb_name_check.py +++ b/tests/system-test/1-insert/db_tb_name_check.py @@ -44,7 +44,7 @@ class TDTestCase: new_dbname = list(dbname) new_dbname.insert(i,j) dbname_1 = ''.join(new_dbname) - tdSql.execute(f'create database if not exists `{dbname_1}`') + tdSql.execute(f'create database if not exists `{dbname_1}` vgroups 1 replica 1') tdSql.query('select * from information_schema.ins_databases') tdSql.checkEqual(tdSql.queryResult[2][0],str(dbname_1)) tdSql.execute(f'drop database `{dbname_1}`') @@ -56,7 +56,7 @@ class TDTestCase: def tb_name_check(self): dbname = tdCom.getLongName(10) - tdSql.execute(f'create database if not exists `{dbname}`') + tdSql.execute(f'create database if not exists `{dbname}` vgroups 1 replica 1') tdSql.execute(f'use `{dbname}`') tbname = tdCom.getLongName(5) for i in self.special_name: