未验证 提交 caec1313 编写于 作者: H Haojun Liao 提交者: GitHub

Merge pull request #20628 from taosdata/fix/liaohj

fix(tmq): wait for 2mins when subscribe topics.
......@@ -1116,6 +1116,7 @@ _failed:
}
int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) {
const int32_t MAX_RETRY_COUNT = 120 * 2; // let's wait for 2 mins at most
const SArray* container = &topic_list->container;
int32_t sz = taosArrayGetSize(container);
void* buf = NULL;
......@@ -1209,7 +1210,7 @@ int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) {
int32_t retryCnt = 0;
while (TSDB_CODE_MND_CONSUMER_NOT_READY == tmqAskEp(tmq, false)) {
if (retryCnt++ > 40) {
if (retryCnt++ > MAX_RETRY_COUNT) {
goto FAIL;
}
......@@ -1811,7 +1812,6 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) {
if (pRspWrapper == NULL) {
taosReadAllQitems(tmq->mqueue, tmq->qall);
taosGetQitem(tmq->qall, (void**)&pRspWrapper);
if (pRspWrapper == NULL) {
return NULL;
}
......@@ -1831,7 +1831,6 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) {
SMqDataRsp* pDataRsp = &pollRspWrapper->dataRsp;
if (pDataRsp->head.epoch == consumerEpoch) {
// todo fix it: race condition
SMqClientVg* pVg = pollRspWrapper->vgHandle;
// update the epset
......@@ -1843,6 +1842,7 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) {
pVg->epSet = *pollRspWrapper->pEpset;
}
// update the local offset value only for the returned values.
pVg->currentOffset = pDataRsp->rspOffset;
atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE);
......
......@@ -112,19 +112,14 @@ struct STQ {
SVnode* pVnode;
char* path;
int64_t walLogLastVer;
SRWLatch pushLock;
SRWLatch lock;
SHashObj* pPushMgr; // consumerId -> STqPushEntry
SHashObj* pHandle; // subKey -> STqHandle
SHashObj* pCheckInfo; // topic -> SAlterCheckInfo
STqOffsetStore* pOffsetStore;
TDB* pMetaDB;
TTB* pExecStore;
TTB* pCheckStore;
SStreamMeta* pStreamMeta;
};
......@@ -164,7 +159,7 @@ typedef struct {
int32_t size;
} STqOffsetHead;
STqOffsetStore* tqOffsetOpen();
STqOffsetStore* tqOffsetOpen(STQ* pTq);
void tqOffsetClose(STqOffsetStore*);
STqOffset* tqOffsetRead(STqOffsetStore* pStore, const char* subscribeKey);
int32_t tqOffsetWrite(STqOffsetStore* pStore, const STqOffset* pOffset);
......
......@@ -51,7 +51,7 @@ void tqCleanUp() {
}
}
static void destroySTqHandle(void* data) {
static void destroyTqHandle(void* data) {
STqHandle* pData = (STqHandle*)data;
qDestroyTask(pData->execHandle.task);
if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
......@@ -89,9 +89,9 @@ STQ* tqOpen(const char* path, SVnode* pVnode) {
pTq->walLogLastVer = pVnode->pWal->vers.lastVer;
pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK);
taosHashSetFreeFp(pTq->pHandle, destroySTqHandle);
taosHashSetFreeFp(pTq->pHandle, destroyTqHandle);
taosInitRWLatch(&pTq->pushLock);
taosInitRWLatch(&pTq->lock);
pTq->pPushMgr = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK);
taosHashSetFreeFp(pTq->pPushMgr, tqPushEntryFree);
......@@ -236,38 +236,6 @@ int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry) {
}
#endif
// int32_t len = 0;
// int32_t code = 0;
// tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code);
// if (code < 0) {
// return -1;
// }
//
// int32_t tlen = sizeof(SMqRspHead) + len;
// void* buf = rpcMallocCont(tlen);
// if (buf == NULL) {
// return -1;
// }
//
// memcpy(buf, &pPushEntry->dataRsp.head, sizeof(SMqRspHead));
//
// void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));
//
// SEncoder encoder = {0};
// tEncoderInit(&encoder, abuf, len);
// tEncodeSMqDataRsp(&encoder, pRsp);
// tEncoderClear(&encoder);
//
// SRpcMsg rsp = {
// .info = pPushEntry->pInfo,
// .pCont = buf,
// .contLen = tlen,
// .code = 0,
// };
//
// tmsgSendRsp(&rsp);
//
SMqRspHead* pHeader = &pPushEntry->pDataRsp->head;
doSendDataRsp(&pPushEntry->info, pRsp, pHeader->epoch, pHeader->consumerId, pHeader->mqMsgType);
......@@ -444,7 +412,7 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand
char formatBuf[80];
tFormatOffset(formatBuf, 80, pOffsetVal);
tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, prev offset found, offset reset to %s and continue.",
tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, existed offset found, offset reset to %s and continue.",
consumerId, pHandle->subKey, vgId, formatBuf);
return 0;
} else {
......@@ -502,65 +470,76 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand
return 0;
}
static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) {
int32_t code = -1;
STqOffsetVal offset = {0};
SWalCkHead* pCkHead = NULL;
int32_t vgId = TD_VID(pTq->pVnode);
#define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0)
STqOffsetVal reqOffset = pRequest->reqOffset;
static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest,
SRpcMsg* pMsg, STqOffsetVal* pOffset) {
int32_t code = 0;
uint64_t consumerId = pRequest->consumerId;
int32_t vgId = TD_VID(pTq->pVnode);
// 1. reset the offset if needed
if (reqOffset.type > 0) {
offset = reqOffset;
} else { // handle the reset offset cases, according to the consumer's choice.
bool blockReturned = false;
code = extractResetOffsetVal(&offset, pTq, pHandle, pRequest, pMsg, &blockReturned);
if (code != 0) {
return code;
}
// empty block returned, quit
if (blockReturned) {
return 0;
}
}
// this is a normal subscription requirement
if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
SMqDataRsp dataRsp = {0};
tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType);
// lock
taosWLockLatch(&pTq->pushLock);
taosWLockLatch(&pTq->lock);
qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId);
code = tqScanData(pTq, pHandle, &dataRsp, &offset);
code = tqScanData(pTq, pHandle, &dataRsp, pOffset);
// till now, all data has been transferred to consumer, new data needs to push client once arrived.
if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG &&
dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) {
code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP);
taosWUnLockLatch(&pTq->pushLock);
taosWUnLockLatch(&pTq->lock);
return code;
}
taosWUnLockLatch(&pTq->pushLock);
taosWUnLockLatch(&pTq->lock);
code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP);
// NOTE: this pHandle->consumerId may have been changed already.
tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, offset type:%d, uid/version:%" PRId64
", ts:%" PRId64", reqId:0x%"PRIx64,
", ts:%" PRId64 ", reqId:0x%" PRIx64,
consumerId, pHandle->subKey, vgId, dataRsp.blockNum, dataRsp.rspOffset.type, dataRsp.rspOffset.uid,
dataRsp.rspOffset.ts, pRequest->reqId);
tDeleteSMqDataRsp(&dataRsp);
return code;
}
static int32_t doPollDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) {
int32_t code = -1;
STqOffsetVal offset = {0};
SWalCkHead* pCkHead = NULL;
int32_t vgId = TD_VID(pTq->pVnode);
STqOffsetVal reqOffset = pRequest->reqOffset;
uint64_t consumerId = pRequest->consumerId;
// 1. reset the offset if needed
if (IS_OFFSET_RESET_TYPE(reqOffset.type)) {
// handle the reset offset cases, according to the consumer's choice.
bool blockReturned = false;
code = extractResetOffsetVal(&offset, pTq, pHandle, pRequest, pMsg, &blockReturned);
if (code != 0) {
return code;
}
// empty block returned, quit
if (blockReturned) {
return 0;
}
} else { // use the consumer specified offset
// the offset value can not be monotonious increase??
offset = reqOffset;
}
// this is a normal subscribe requirement
if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
return extractDataAndRspForNormalSubscribe(pTq, pHandle, pRequest, pMsg, &offset);
} else { // for taosX
// todo handle the case where re-balance occurs.
// for taosx
SMqMetaRsp metaRsp = {0};
STaosxRsp taosxRsp = {0};
tqInitTaosxRsp(&taosxRsp, pRequest);
......@@ -574,8 +553,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq*
code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp);
tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send meta offset type:%d,uid:%" PRId64
",ts:%" PRId64,
consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid,
metaRsp.rspOffset.ts);
consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.ts);
taosMemoryFree(metaRsp.metaRsp);
tDeleteSTaosxRsp(&taosxRsp);
return code;
......@@ -596,8 +574,6 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq*
}
if (offset.type == TMQ_OFFSET__LOG) {
// if (offset.type == TMQ_OFFSET__LOG) {
int64_t fetchVer = offset.version + 1;
pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048);
if (pCkHead == NULL) {
......@@ -627,8 +603,8 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq*
}
SWalCont* pHead = &pCkHead->head;
tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", consumerId,
pRequest->epoch, vgId, fetchVer, pHead->msgType);
tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d",
consumerId, pRequest->epoch, vgId, fetchVer, pHead->msgType);
if (pHead->msgType == TDMT_VND_SUBMIT) {
SPackedData submit = {
......@@ -678,6 +654,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq*
tDeleteSTaosxRsp(&taosxRsp);
taosMemoryFreeClear(pCkHead);
return 0;
}
}
int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
......@@ -702,31 +679,31 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
}
// 2. check re-balance status
taosRLockLatch(&pTq->pushLock);
taosRLockLatch(&pTq->lock);
if (pHandle->consumerId != consumerId) {
tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64,
consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId);
terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH;
taosRUnLockLatch(&pTq->pushLock);
taosRUnLockLatch(&pTq->lock);
return -1;
}
taosRUnLockLatch(&pTq->pushLock);
taosRUnLockLatch(&pTq->lock);
taosWLockLatch(&pTq->pushLock);
// 3. update the epoch value
taosWLockLatch(&pTq->lock);
int32_t savedEpoch = pHandle->epoch;
if (savedEpoch < reqEpoch) {
tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch);
pHandle->epoch = reqEpoch;
}
taosWUnLockLatch(&pTq->pushLock);
taosWUnLockLatch(&pTq->lock);
char buf[80];
tFormatOffset(buf, 80, &reqOffset);
tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64,
consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId);
return extractDataForMq(pTq, pHandle, &req, pMsg);
return doPollDataForMq(pTq, pHandle, &req, pMsg);
}
int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
......@@ -734,12 +711,12 @@ int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg
tqDebug("vgId:%d, tq process delete sub req %s", pTq->pVnode->config.vgId, pReq->subKey);
taosWLockLatch(&pTq->pushLock);
taosWLockLatch(&pTq->lock);
int32_t code = taosHashRemove(pTq->pPushMgr, pReq->subKey, strlen(pReq->subKey));
if (code != 0) {
tqDebug("vgId:%d, tq remove push handle %s", pTq->pVnode->config.vgId, pReq->subKey);
}
taosWUnLockLatch(&pTq->pushLock);
taosWUnLockLatch(&pTq->lock);
STqHandle* pHandle = taosHashGet(pTq->pHandle, pReq->subKey, strlen(pReq->subKey));
if (pHandle) {
......@@ -803,18 +780,18 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg
SVnode* pVnode = pTq->pVnode;
int32_t vgId = TD_VID(pVnode);
tqDebug("vgId:%d, tq process sub req %s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pVnode->config.vgId, req.subKey,
tqDebug("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pVnode->config.vgId, req.subKey,
req.oldConsumerId, req.newConsumerId);
STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey));
if (pHandle == NULL) {
if (req.oldConsumerId != -1) {
tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId is %" PRId64 "",
tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId:0x%" PRIx64,
req.vgId, req.subKey, req.newConsumerId, req.oldConsumerId);
}
if (req.newConsumerId == -1) {
tqError("vgId:%d, tq invalid rebalance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId);
tqError("vgId:%d, tq invalid re-balance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId);
taosMemoryFree(req.qmsg);
return 0;
}
......@@ -904,16 +881,15 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg
atomic_add_fetch_32(&pHandle->epoch, 1);
taosMemoryFree(req.qmsg);
return tqMetaSaveHandle(pTq, req.subKey, pHandle);
}
} else {
tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId,
req.newConsumerId);
taosWLockLatch(&pTq->pushLock);
taosWLockLatch(&pTq->lock);
atomic_store_32(&pHandle->epoch, -1);
// remove if it has been register in the push manager, and return one empty block to consumer
tqRemovePushEntry(pTq, req.subKey, (int32_t) strlen(req.subKey), pHandle->consumerId, true);
tqRemovePushEntry(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true);
atomic_store_64(&pHandle->consumerId, req.newConsumerId);
atomic_add_fetch_32(&pHandle->epoch, 1);
......@@ -922,12 +898,13 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg
qStreamCloseTsdbReader(pHandle->execHandle.task);
}
taosWUnLockLatch(&pTq->pushLock);
taosWUnLockLatch(&pTq->lock);
if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) {
taosMemoryFree(req.qmsg);
return -1;
}
}
}
taosMemoryFree(req.qmsg);
return 0;
......
......@@ -213,7 +213,7 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver)
if (msgType == TDMT_VND_SUBMIT) {
// lock push mgr to avoid potential msg lost
taosWLockLatch(&pTq->pushLock);
taosWLockLatch(&pTq->lock);
int32_t numOfRegisteredPush = taosHashGetSize(pTq->pPushMgr);
if (numOfRegisteredPush > 0) {
......@@ -231,7 +231,7 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver)
taosArrayDestroy(cachedKeyLens);
// unlock
taosWUnLockLatch(&pTq->pushLock);
taosWUnLockLatch(&pTq->lock);
return -1;
}
......@@ -320,7 +320,7 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver)
taosMemoryFree(data);
}
// unlock
taosWUnLockLatch(&pTq->pushLock);
taosWUnLockLatch(&pTq->lock);
}
if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode)) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册