提交 441ce216 编写于 作者: C Cary Xu

enh: rsma batch process

上级 f0cdafd6
...@@ -2667,31 +2667,6 @@ typedef struct { ...@@ -2667,31 +2667,6 @@ typedef struct {
int32_t padding; int32_t padding;
} SRSmaExecMsg; } SRSmaExecMsg;
typedef struct {
int64_t suid;
int8_t level;
} SRSmaFetchMsg;
static FORCE_INLINE int32_t tEncodeSRSmaFetchMsg(SEncoder* pCoder, const SRSmaFetchMsg* pReq) {
if (tStartEncode(pCoder) < 0) return -1;
if (tEncodeI64(pCoder, pReq->suid) < 0) return -1;
if (tEncodeI8(pCoder, pReq->level) < 0) return -1;
tEndEncode(pCoder);
return 0;
}
static FORCE_INLINE int32_t tDecodeSRSmaFetchMsg(SDecoder* pCoder, SRSmaFetchMsg* pReq) {
if (tStartDecode(pCoder) < 0) return -1;
if (tDecodeI64(pCoder, &pReq->suid) < 0) return -1;
if (tDecodeI8(pCoder, &pReq->level) < 0) return -1;
tEndDecode(pCoder);
return 0;
}
typedef struct { typedef struct {
int8_t version; // for compatibility(default 0) int8_t version; // for compatibility(default 0)
int8_t intervalUnit; // MACRO: TIME_UNIT_XXX int8_t intervalUnit; // MACRO: TIME_UNIT_XXX
......
...@@ -201,7 +201,7 @@ enum { ...@@ -201,7 +201,7 @@ enum {
TD_DEF_MSG_TYPE(TDMT_VND_CANCEL_SMA, "vnode-cancel-sma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_CANCEL_SMA, "vnode-cancel-sma", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_DROP_SMA, "vnode-drop-sma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DROP_SMA, "vnode-drop-sma", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_SUBMIT_RSMA, "vnode-submit-rsma", SSubmitReq, SSubmitRsp) TD_DEF_MSG_TYPE(TDMT_VND_SUBMIT_RSMA, "vnode-submit-rsma", SSubmitReq, SSubmitRsp)
TD_DEF_MSG_TYPE(TDMT_VND_FETCH_RSMA, "vnode-fetch-rsma", SRSmaFetchMsg, NULL) TD_DEF_MSG_TYPE(TDMT_VND_FETCH_RSMA, "vnode-fetch-rsma", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_EXEC_RSMA, "vnode-exec-rsma", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_EXEC_RSMA, "vnode-exec-rsma", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_DELETE, "delete-data", SVDeleteReq, SVDeleteRsp) TD_DEF_MSG_TYPE(TDMT_VND_DELETE, "delete-data", SVDeleteReq, SVDeleteRsp)
TD_DEF_MSG_TYPE(TDMT_VND_BATCH_DEL, "batch-delete", SBatchDeleteReq, NULL) TD_DEF_MSG_TYPE(TDMT_VND_BATCH_DEL, "batch-delete", SBatchDeleteReq, NULL)
......
...@@ -76,6 +76,7 @@ void taosFreeQall(STaosQall *qall); ...@@ -76,6 +76,7 @@ void taosFreeQall(STaosQall *qall);
int32_t taosReadAllQitems(STaosQueue *queue, STaosQall *qall); int32_t taosReadAllQitems(STaosQueue *queue, STaosQall *qall);
int32_t taosGetQitem(STaosQall *qall, void **ppItem); int32_t taosGetQitem(STaosQall *qall, void **ppItem);
void taosResetQitems(STaosQall *qall); void taosResetQitems(STaosQall *qall);
int32_t taosQallItemSize(STaosQall *qall);
STaosQset *taosOpenQset(); STaosQset *taosOpenQset();
void taosCloseQset(STaosQset *qset); void taosCloseQset(STaosQset *qset);
......
...@@ -442,6 +442,8 @@ static void *mndBuildVCreateStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pSt ...@@ -442,6 +442,8 @@ static void *mndBuildVCreateStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pSt
if (req.rollup) { if (req.rollup) {
req.rsmaParam.maxdelay[0] = pStb->maxdelay[0]; req.rsmaParam.maxdelay[0] = pStb->maxdelay[0];
req.rsmaParam.maxdelay[1] = pStb->maxdelay[1]; req.rsmaParam.maxdelay[1] = pStb->maxdelay[1];
req.rsmaParam.watermark[0] = pStb->watermark[0];
req.rsmaParam.watermark[1] = pStb->watermark[1];
if (pStb->ast1Len > 0) { if (pStb->ast1Len > 0) {
if (mndConvertRsmaTask(&req.rsmaParam.qmsg[0], &req.rsmaParam.qmsgLen[0], pStb->pAst1, pStb->uid, if (mndConvertRsmaTask(&req.rsmaParam.qmsg[0], &req.rsmaParam.qmsgLen[0], pStb->pAst1, pStb->uid,
STREAM_TRIGGER_WINDOW_CLOSE, req.rsmaParam.watermark[0]) < 0) { STREAM_TRIGGER_WINDOW_CLOSE, req.rsmaParam.watermark[0]) < 0) {
......
...@@ -90,14 +90,14 @@ struct SRSmaStat { ...@@ -90,14 +90,14 @@ struct SRSmaStat {
SSma *pSma; SSma *pSma;
int64_t commitAppliedVer; // vnode applied version for async commit int64_t commitAppliedVer; // vnode applied version for async commit
int64_t refId; // shared by fetch tasks int64_t refId; // shared by fetch tasks
volatile int64_t qBufSize; // queue buffer size volatile int64_t nBufItems; // number of items in queue buffer
SRWLatch lock; // r/w lock for rsma fs(e.g. qtaskinfo) SRWLatch lock; // r/w lock for rsma fs(e.g. qtaskinfo)
volatile int8_t nExecutor; // [1, max(half of query threads, 4)]
int8_t triggerStat; // shared by fetch tasks int8_t triggerStat; // shared by fetch tasks
int8_t commitStat; // 0 not in committing, 1 in committing int8_t commitStat; // 0 not in committing, 1 in committing
int8_t execStat; // 0 not in exec , 1 in exec
SArray *aTaskFile; // qTaskFiles committed recently(for recovery/snapshot r/w) SArray *aTaskFile; // qTaskFiles committed recently(for recovery/snapshot r/w)
SHashObj *infoHash; // key: suid, value: SRSmaInfo SHashObj *infoHash; // key: suid, value: SRSmaInfo
SHashObj *fetchHash; // key: suid, value: L1 or L2 or L1|L2 tsem_t notEmpty; // has items in queue buffer
}; };
struct SSmaStat { struct SSmaStat {
...@@ -111,7 +111,6 @@ struct SSmaStat { ...@@ -111,7 +111,6 @@ struct SSmaStat {
#define SMA_STAT_TSMA(s) (&(s)->tsmaStat) #define SMA_STAT_TSMA(s) (&(s)->tsmaStat)
#define SMA_STAT_RSMA(s) (&(s)->rsmaStat) #define SMA_STAT_RSMA(s) (&(s)->rsmaStat)
#define RSMA_INFO_HASH(r) ((r)->infoHash) #define RSMA_INFO_HASH(r) ((r)->infoHash)
#define RSMA_FETCH_HASH(r) ((r)->fetchHash)
#define RSMA_TRIGGER_STAT(r) (&(r)->triggerStat) #define RSMA_TRIGGER_STAT(r) (&(r)->triggerStat)
#define RSMA_COMMIT_STAT(r) (&(r)->commitStat) #define RSMA_COMMIT_STAT(r) (&(r)->commitStat)
#define RSMA_REF_ID(r) ((r)->refId) #define RSMA_REF_ID(r) ((r)->refId)
...@@ -120,8 +119,10 @@ struct SSmaStat { ...@@ -120,8 +119,10 @@ struct SSmaStat {
struct SRSmaInfoItem { struct SRSmaInfoItem {
int8_t level; int8_t level;
int8_t triggerStat; int8_t triggerStat;
uint16_t interval; // second uint8_t nSkipped; // number of skipped to fetch data from all active window
int32_t maxDelay; int8_t fetchLevel;
int32_t maxDelay; // ms
int64_t lastFetch; // ms
tmr_h tmrId; tmr_h tmrId;
}; };
...@@ -129,8 +130,10 @@ struct SRSmaInfo { ...@@ -129,8 +130,10 @@ struct SRSmaInfo {
STSchema *pTSchema; STSchema *pTSchema;
int64_t suid; int64_t suid;
int64_t refId; // refId of SRSmaStat int64_t refId; // refId of SRSmaStat
uint64_t delFlag : 1; int64_t lastRecv; // ms
uint64_t lastReceived : 63; // second int8_t delFlag;
int8_t assigned; // 0 idle, 1 assgined for exec
int16_t padding;
T_REF_DECLARE() T_REF_DECLARE()
SRSmaInfoItem items[TSDB_RETENTION_L2]; SRSmaInfoItem items[TSDB_RETENTION_L2];
void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t
......
...@@ -198,7 +198,6 @@ int32_t smaAsyncPreCommit(SSma* pSma); ...@@ -198,7 +198,6 @@ int32_t smaAsyncPreCommit(SSma* pSma);
int32_t smaAsyncCommit(SSma* pSma); int32_t smaAsyncCommit(SSma* pSma);
int32_t smaAsyncPostCommit(SSma* pSma); int32_t smaAsyncPostCommit(SSma* pSma);
int32_t smaDoRetention(SSma* pSma, int64_t now); int32_t smaDoRetention(SSma* pSma, int64_t now);
int32_t smaProcessFetch(SSma* pSma, void* pMsg);
int32_t smaProcessExec(SSma* pSma, void* pMsg); int32_t smaProcessExec(SSma* pSma, void* pMsg);
int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg);
......
...@@ -321,10 +321,10 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { ...@@ -321,10 +321,10 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) {
int32_t nLoops = 0; int32_t nLoops = 0;
while (1) { while (1) {
if (T_REF_VAL_GET(pStat) == 0) { if (T_REF_VAL_GET(pStat) == 0) {
smaDebug("vgId:%d, rsma fetch tasks all finished", SMA_VID(pSma)); smaDebug("vgId:%d, rsma commit, fetch tasks all finished", SMA_VID(pSma));
break; break;
} else { } else {
smaDebug("vgId:%d, rsma fetch tasks not all finished yet", SMA_VID(pSma)); smaDebug("vgId:%d, rsma commit, fetch tasks not all finished yet", SMA_VID(pSma));
} }
++nLoops; ++nLoops;
if (nLoops > 1000) { if (nLoops > 1000) {
...@@ -338,30 +338,25 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { ...@@ -338,30 +338,25 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) {
* 1) This is high cost task and should not put in asyncPreCommit originally. * 1) This is high cost task and should not put in asyncPreCommit originally.
* 2) But, if put in asyncCommit, would trigger taskInfo cloning frequently. * 2) But, if put in asyncCommit, would trigger taskInfo cloning frequently.
*/ */
nLoops = 0; if (tdRSmaProcessExecImpl(pSma, RSMA_EXEC_COMMIT) < 0) {
smaInfo("vgId:%d, start to wait for rsma qtask free, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); return TSDB_CODE_FAILED;
}
int8_t old; smaInfo("vgId:%d, rsma commit, wait for all items to be consumed, TID:%p", SMA_VID(pSma), (void*)taosGetSelfPthreadId());
while (1) { nLoops = 0;
old = atomic_val_compare_exchange_8(&pRSmaStat->execStat, 0, 1); while (atomic_load_64(&pRSmaStat->nBufItems) > 0) {
if (old == 0) break; ++nLoops;
if (++nLoops > 1000) { if (nLoops > 1000) {
sched_yield(); sched_yield();
nLoops = 0; nLoops = 0;
smaDebug("vgId:%d, wait for rsma qtask free, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId());
}
} }
smaInfo("vgId:%d, end to wait for rsma qtask free, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId());
if (tdRSmaProcessExecImpl(pSma, RSMA_EXEC_COMMIT) < 0) {
atomic_store_8(&pRSmaStat->execStat, 0);
return TSDB_CODE_FAILED;
} }
smaInfo("vgId:%d, rsma commit, all items are consumed, TID:%p", SMA_VID(pSma), (void*)taosGetSelfPthreadId());
#if 0 // consuming task of qTaskInfo clone
// step 4: swap queue/qall and iQueue/iQall // step 4: swap queue/qall and iQueue/iQall
// lock // lock
taosWLockLatch(SMA_ENV_LOCK(pEnv)); // taosWLockLatch(SMA_ENV_LOCK(pEnv));
ASSERT(RSMA_INFO_HASH(pRSmaStat)); ASSERT(RSMA_INFO_HASH(pRSmaStat));
...@@ -376,11 +371,9 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) { ...@@ -376,11 +371,9 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) {
pIter = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), pIter); pIter = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), pIter);
} }
atomic_store_64(&pRSmaStat->qBufSize, 0);
atomic_store_8(&pRSmaStat->execStat, 0);
// unlock // unlock
taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); // taosWUnLockLatch(SMA_ENV_LOCK(pEnv));
#endif
// step 5: others // step 5: others
pRSmaStat->commitAppliedVer = pSma->pVnode->state.applied; pRSmaStat->commitAppliedVer = pSma->pVnode->state.applied;
...@@ -426,7 +419,7 @@ static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma) { ...@@ -426,7 +419,7 @@ static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma) {
// step 1: merge qTaskInfo and iQTaskInfo // step 1: merge qTaskInfo and iQTaskInfo
// lock // lock
taosWLockLatch(SMA_ENV_LOCK(pEnv)); // taosWLockLatch(SMA_ENV_LOCK(pEnv));
void *pIter = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), NULL); void *pIter = taosHashIterate(RSMA_INFO_HASH(pRSmaStat), NULL);
while (pIter) { while (pIter) {
...@@ -480,10 +473,9 @@ static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma) { ...@@ -480,10 +473,9 @@ static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma) {
taosHashRemove(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t)); taosHashRemove(RSMA_INFO_HASH(pRSmaStat), pSuid, sizeof(tb_uid_t));
} }
taosArrayDestroy(rsmaDeleted); taosArrayDestroy(rsmaDeleted);
// TODO: remove suid in files?
// unlock // unlock
taosWUnLockLatch(SMA_ENV_LOCK(pEnv)); // taosWUnLockLatch(SMA_ENV_LOCK(pEnv));
// step 2: cleanup outdated qtaskinfo files // step 2: cleanup outdated qtaskinfo files
tdCleanupQTaskInfoFiles(pSma, pRSmaStat); tdCleanupQTaskInfoFiles(pSma, pRSmaStat);
......
...@@ -209,6 +209,7 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS ...@@ -209,6 +209,7 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS
SRSmaStat *pRSmaStat = (SRSmaStat *)(*pSmaStat); SRSmaStat *pRSmaStat = (SRSmaStat *)(*pSmaStat);
pRSmaStat->pSma = (SSma *)pSma; pRSmaStat->pSma = (SSma *)pSma;
atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_INIT); atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_INIT);
tsem_init(&pRSmaStat->notEmpty, 0, 0);
// init smaMgmt // init smaMgmt
smaInit(); smaInit();
...@@ -230,12 +231,6 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS ...@@ -230,12 +231,6 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS
if (!RSMA_INFO_HASH(pRSmaStat)) { if (!RSMA_INFO_HASH(pRSmaStat)) {
return TSDB_CODE_FAILED; return TSDB_CODE_FAILED;
} }
RSMA_FETCH_HASH(pRSmaStat) = taosHashInit(
RSMA_TASK_INFO_HASH_SLOT, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_ENTRY_LOCK);
if (!RSMA_FETCH_HASH(pRSmaStat)) {
return TSDB_CODE_FAILED;
}
} else if (smaType == TSDB_SMA_TYPE_TIME_RANGE) { } else if (smaType == TSDB_SMA_TYPE_TIME_RANGE) {
// TODO // TODO
} else { } else {
...@@ -267,6 +262,7 @@ static void tdDestroyRSmaStat(void *pRSmaStat) { ...@@ -267,6 +262,7 @@ static void tdDestroyRSmaStat(void *pRSmaStat) {
smaDebug("vgId:%d, destroy rsma stat %p", SMA_VID(pSma), pRSmaStat); smaDebug("vgId:%d, destroy rsma stat %p", SMA_VID(pSma), pRSmaStat);
// step 1: set rsma trigger stat cancelled // step 1: set rsma trigger stat cancelled
atomic_store_8(RSMA_TRIGGER_STAT(pStat), TASK_TRIGGER_STAT_CANCELLED); atomic_store_8(RSMA_TRIGGER_STAT(pStat), TASK_TRIGGER_STAT_CANCELLED);
tsem_destroy(&(pStat->notEmpty));
// step 2: destroy the rsma info and associated fetch tasks // step 2: destroy the rsma info and associated fetch tasks
if (taosHashGetSize(RSMA_INFO_HASH(pStat)) > 0) { if (taosHashGetSize(RSMA_INFO_HASH(pStat)) > 0) {
...@@ -279,10 +275,7 @@ static void tdDestroyRSmaStat(void *pRSmaStat) { ...@@ -279,10 +275,7 @@ static void tdDestroyRSmaStat(void *pRSmaStat) {
} }
taosHashCleanup(RSMA_INFO_HASH(pStat)); taosHashCleanup(RSMA_INFO_HASH(pStat));
// step 3: destroy the rsma fetch hash // step 3: wait all triggered fetch tasks finished
taosHashCleanup(RSMA_FETCH_HASH(pStat));
// step 4: wait all triggered fetch tasks finished
int32_t nLoops = 0; int32_t nLoops = 0;
while (1) { while (1) {
if (T_REF_VAL_GET((SSmaStat *)pStat) == 0) { if (T_REF_VAL_GET((SSmaStat *)pStat) == 0) {
......
...@@ -301,8 +301,6 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { ...@@ -301,8 +301,6 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) {
return qWorkerProcessQueryMsg(&handle, pVnode->pQuery, pMsg, 0); return qWorkerProcessQueryMsg(&handle, pVnode->pQuery, pMsg, 0);
case TDMT_SCH_QUERY_CONTINUE: case TDMT_SCH_QUERY_CONTINUE:
return qWorkerProcessCQueryMsg(&handle, pVnode->pQuery, pMsg, 0); return qWorkerProcessCQueryMsg(&handle, pVnode->pQuery, pMsg, 0);
case TDMT_VND_FETCH_RSMA:
return smaProcessFetch(pVnode->pSma, pMsg);
case TDMT_VND_EXEC_RSMA: case TDMT_VND_EXEC_RSMA:
return smaProcessExec(pVnode->pSma, pMsg); return smaProcessExec(pVnode->pSma, pMsg);
default: default:
......
...@@ -3131,6 +3131,7 @@ int32_t aggDecodeResultRow(SOperatorInfo* pOperator, char* result) { ...@@ -3131,6 +3131,7 @@ int32_t aggDecodeResultRow(SOperatorInfo* pOperator, char* result) {
initResultRow(resultRow); initResultRow(resultRow);
pInfo->resultRowInfo.cur = (SResultRowPosition){.pageId = resultRow->pageId, .offset = resultRow->offset}; pInfo->resultRowInfo.cur = (SResultRowPosition){.pageId = resultRow->pageId, .offset = resultRow->offset};
// releaseBufPage(pSup->pResultBuf, getBufPage(pSup->pResultBuf, pageId));
} }
if (offset != length) { if (offset != length) {
......
...@@ -299,6 +299,7 @@ int32_t taosGetQitem(STaosQall *qall, void **ppItem) { ...@@ -299,6 +299,7 @@ int32_t taosGetQitem(STaosQall *qall, void **ppItem) {
} }
void taosResetQitems(STaosQall *qall) { qall->current = qall->start; } void taosResetQitems(STaosQall *qall) { qall->current = qall->start; }
int32_t taosQallItemSize(STaosQall *qall) { return qall->numOfItems; }
STaosQset *taosOpenQset() { STaosQset *taosOpenQset() {
STaosQset *qset = taosMemoryCalloc(sizeof(STaosQset), 1); STaosQset *qset = taosMemoryCalloc(sizeof(STaosQset), 1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册