提交 37a2977a 编写于 作者: L Liu Jicong

feat(tmq): consumer support recover

上级 19ec7bca
......@@ -1332,7 +1332,7 @@ int32_t tDeserializeSCMCreateTopicRsp(void* buf, int32_t bufLen, SCMCreateTopicR
typedef struct {
int64_t consumerId;
} SMqConsumerLostMsg;
} SMqConsumerLostMsg, SMqConsumerRecoverMsg;
typedef struct {
int64_t consumerId;
......
......@@ -145,10 +145,11 @@ enum {
TD_DEF_MSG_TYPE(TDMT_MND_ALTER_TOPIC, "mnode-alter-topic", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_DROP_TOPIC, "mnode-drop-topic", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_SUBSCRIBE, "mnode-subscribe", SCMSubscribeReq, SCMSubscribeRsp)
TD_DEF_MSG_TYPE(TDMT_MND_MQ_ASK_EP, "mnode-mq-ask-ep", SMqAskEpReq, SMqAskEpReq)
TD_DEF_MSG_TYPE(TDMT_MND_MQ_TIMER, "mnode-mq-tmr", SMTimerReq, SMTimerReq)
TD_DEF_MSG_TYPE(TDMT_MND_MQ_CONSUMER_LOST, "mnode-mq-consumer-lost", SMTimerReq, SMTimerReq)
TD_DEF_MSG_TYPE(TDMT_MND_MQ_DO_REBALANCE, "mnode-mq-do-rebalance", SMqDoRebalanceMsg, SMqDoRebalanceMsg)
TD_DEF_MSG_TYPE(TDMT_MND_MQ_ASK_EP, "mnode-mq-ask-ep", SMqAskEpReq, SMqAskEpRsp)
TD_DEF_MSG_TYPE(TDMT_MND_MQ_TIMER, "mnode-mq-tmr", SMTimerReq, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_MQ_CONSUMER_LOST, "mnode-mq-consumer-lost", SMqConsumerLostMsg, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_MQ_CONSUMER_RECOVER, "mnode-mq-consumer-recover", SMqConsumerRecoverMsg, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_MQ_DO_REBALANCE, "mnode-mq-do-rebalance", SMqDoRebalanceMsg, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_MQ_COMMIT_OFFSET, "mnode-mq-commit-offset", SMqCMCommitOffsetReq, SMqCMCommitOffsetRsp)
TD_DEF_MSG_TYPE(TDMT_MND_CREATE_STREAM, "mnode-create-stream", SCMCreateStreamReq, SCMCreateStreamRsp)
TD_DEF_MSG_TYPE(TDMT_MND_ALTER_STREAM, "mnode-alter-stream", NULL, NULL)
......
......@@ -268,14 +268,13 @@ int32_t* taosGetErrno();
#define TSDB_CODE_MND_TOPIC_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03E1)
#define TSDB_CODE_MND_TOO_MANY_TOPICS TAOS_DEF_ERROR_CODE(0, 0x03E2)
#define TSDB_CODE_MND_INVALID_TOPIC TAOS_DEF_ERROR_CODE(0, 0x03E3)
#define TSDB_CODE_MND_INVALID_TOPIC_OPTION TAOS_DEF_ERROR_CODE(0, 0x03E4)
#define TSDB_CODE_MND_TOPIC_OPTION_UNCHNAGED TAOS_DEF_ERROR_CODE(0, 0x03E5)
#define TSDB_CODE_MND_NAME_CONFLICT_WITH_STB TAOS_DEF_ERROR_CODE(0, 0x03E6)
#define TSDB_CODE_MND_CONSUMER_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03E7)
#define TSDB_CODE_MND_UNSUPPORTED_TOPIC TAOS_DEF_ERROR_CODE(0, 0x03E8)
#define TSDB_CODE_MND_SUBSCRIBE_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03E9)
#define TSDB_CODE_MND_OFFSET_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03EA)
#define TSDB_CODE_MND_CONSUMER_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x03EB)
#define TSDB_CODE_MND_INVALID_TOPIC_QUERY TAOS_DEF_ERROR_CODE(0, 0x03E4)
#define TSDB_CODE_MND_INVALID_TOPIC_OPTION TAOS_DEF_ERROR_CODE(0, 0x03E5)
#define TSDB_CODE_MND_CONSUMER_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03E6)
#define TSDB_CODE_MND_TOPIC_OPTION_UNCHNAGED TAOS_DEF_ERROR_CODE(0, 0x03E7)
#define TSDB_CODE_MND_SUBSCRIBE_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03E8)
#define TSDB_CODE_MND_OFFSET_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03E9)
#define TSDB_CODE_MND_CONSUMER_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x03EA)
// mnode-stream
#define TSDB_CODE_MND_STREAM_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03F0)
......
......@@ -89,6 +89,7 @@ typedef enum {
TRN_TYPE_DROP_STREAM = 1020,
TRN_TYPE_ALTER_STREAM = 1021,
TRN_TYPE_CONSUMER_LOST = 1022,
TRN_TYPE_CONSUMER_RECOVER = 1023,
TRN_TYPE_BASIC_SCOPE_END,
TRN_TYPE_GLOBAL_SCOPE = 2000,
TRN_TYPE_CREATE_DNODE = 2001,
......@@ -463,6 +464,7 @@ enum {
CONSUMER_UPDATE__ADD,
CONSUMER_UPDATE__REMOVE,
CONSUMER_UPDATE__LOST,
CONSUMER_UPDATE__RECOVER,
CONSUMER_UPDATE__MODIFY,
};
......@@ -481,6 +483,9 @@ typedef struct {
SArray* rebNewTopics; // SArray<char*>
SArray* rebRemovedTopics; // SArray<char*>
// subscribed by user
SArray* assignedTopics; // SArray<char*>
// data for display
int32_t pid;
SEpSet ep;
......
......@@ -50,6 +50,7 @@ static int32_t mndProcessSubscribeReq(SNodeMsg *pMsg);
static int32_t mndProcessAskEpReq(SNodeMsg *pMsg);
static int32_t mndProcessMqTimerMsg(SNodeMsg *pMsg);
static int32_t mndProcessConsumerLostMsg(SNodeMsg *pMsg);
static int32_t mndProcessConsumerRecoverMsg(SNodeMsg *pMsg);
int32_t mndInitConsumer(SMnode *pMnode) {
SSdbTable table = {.sdbType = SDB_CONSUMER,
......@@ -64,6 +65,7 @@ int32_t mndInitConsumer(SMnode *pMnode) {
mndSetMsgHandle(pMnode, TDMT_MND_MQ_ASK_EP, mndProcessAskEpReq);
mndSetMsgHandle(pMnode, TDMT_MND_MQ_TIMER, mndProcessMqTimerMsg);
mndSetMsgHandle(pMnode, TDMT_MND_MQ_CONSUMER_LOST, mndProcessConsumerLostMsg);
mndSetMsgHandle(pMnode, TDMT_MND_MQ_CONSUMER_RECOVER, mndProcessConsumerRecoverMsg);
mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_CONSUMERS, mndRetrieveConsumer);
mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_CONSUMERS, mndCancelGetNextConsumer);
......@@ -97,6 +99,30 @@ FAIL:
return -1;
}
static int32_t mndProcessConsumerRecoverMsg(SNodeMsg *pMsg) {
SMnode *pMnode = pMsg->pNode;
SMqConsumerRecoverMsg *pRecoverMsg = pMsg->rpcMsg.pCont;
SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, pRecoverMsg->consumerId);
ASSERT(pConsumer);
SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumer->consumerId, pConsumer->cgroup);
pConsumerNew->updateType = CONSUMER_UPDATE__RECOVER;
mndReleaseConsumer(pMnode, pConsumer);
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_TYPE_CONSUMER_RECOVER, &pMsg->rpcMsg);
if (pTrans == NULL) goto FAIL;
if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) goto FAIL;
if (mndTransPrepare(pMnode, pTrans) != 0) goto FAIL;
mndTransDrop(pTrans);
return 0;
FAIL:
tDeleteSMqConsumerObj(pConsumerNew);
mndTransDrop(pTrans);
return -1;
}
static SMqRebSubscribe *mndGetOrCreateRebSub(SHashObj *pHash, const char *key) {
SMqRebSubscribe *pRebSub = taosHashGet(pHash, key, strlen(key) + 1);
if (pRebSub == NULL) {
......@@ -222,8 +248,15 @@ static int32_t mndProcessAskEpReq(SNodeMsg *pMsg) {
// 1. check consumer status
int32_t status = atomic_load_32(&pConsumer->status);
if (status == MQ_CONSUMER_STATUS__LOST) {
// TODO: recover consumer
if (status == MQ_CONSUMER_STATUS__LOST_REBD) {
SMqConsumerRecoverMsg *pRecoverMsg = rpcMallocCont(sizeof(SMqConsumerRecoverMsg));
pRecoverMsg->consumerId = consumerId;
SRpcMsg *pRpcMsg = taosMemoryCalloc(1, sizeof(SRpcMsg));
pRpcMsg->msgType = TDMT_MND_MQ_CONSUMER_RECOVER;
pRpcMsg->pCont = pRecoverMsg;
pRpcMsg->contLen = sizeof(SMqConsumerRecoverMsg);
tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, pRpcMsg);
}
if (status != MQ_CONSUMER_STATUS__READY) {
......@@ -375,6 +408,11 @@ static int32_t mndProcessSubscribeReq(SNodeMsg *pMsg) {
pConsumerNew->rebNewTopics = newSub;
subscribe.topicNames = NULL;
for (int32_t i = 0; i < newTopicNum; i++) {
char *newTopicCopy = strdup(taosArrayGetP(newSub, i));
taosArrayPush(pConsumerNew->assignedTopics, &newTopicCopy);
}
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_TYPE_SUBSCRIBE, &pMsg->rpcMsg);
if (pTrans == NULL) goto SUBSCRIBE_OVER;
if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) goto SUBSCRIBE_OVER;
......@@ -395,6 +433,11 @@ static int32_t mndProcessSubscribeReq(SNodeMsg *pMsg) {
}
pConsumerNew->updateType = CONSUMER_UPDATE__MODIFY;
for (int32_t i = 0; i < newTopicNum; i++) {
char *newTopicCopy = strdup(taosArrayGetP(newSub, i));
taosArrayPush(pConsumerNew->assignedTopics, &newTopicCopy);
}
int32_t oldTopicNum = 0;
if (pConsumerOld->currentTopics) {
oldTopicNum = taosArrayGetSize(pConsumerOld->currentTopics);
......@@ -554,6 +597,7 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer,
if (pNewConsumer->updateType == CONSUMER_UPDATE__MODIFY) {
ASSERT(taosArrayGetSize(pOldConsumer->rebNewTopics) == 0);
ASSERT(taosArrayGetSize(pOldConsumer->rebRemovedTopics) == 0);
ASSERT(taosArrayGetSize(pNewConsumer->assignedTopics) != 0);
SArray *tmp = pOldConsumer->rebNewTopics;
pOldConsumer->rebNewTopics = pNewConsumer->rebNewTopics;
pNewConsumer->rebNewTopics = tmp;
......@@ -562,20 +606,41 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer,
pOldConsumer->rebRemovedTopics = pNewConsumer->rebRemovedTopics;
pNewConsumer->rebRemovedTopics = tmp;
tmp = pOldConsumer->assignedTopics;
pOldConsumer->assignedTopics = pNewConsumer->assignedTopics;
pNewConsumer->assignedTopics = tmp;
pOldConsumer->subscribeTime = pNewConsumer->upTime;
pOldConsumer->status = MQ_CONSUMER_STATUS__MODIFY;
} else if (pNewConsumer->updateType == CONSUMER_UPDATE__LOST) {
ASSERT(taosArrayGetSize(pOldConsumer->rebNewTopics) == 0);
ASSERT(taosArrayGetSize(pOldConsumer->rebRemovedTopics) == 0);
int32_t sz = taosArrayGetSize(pOldConsumer->currentTopics);
pOldConsumer->rebRemovedTopics = taosArrayInit(sz, sizeof(void *));
/*pOldConsumer->rebRemovedTopics = taosArrayInit(sz, sizeof(void *));*/
for (int32_t i = 0; i < sz; i++) {
char *topic = strdup(taosArrayGetP(pOldConsumer->currentTopics, i));
taosArrayPush(pNewConsumer->rebRemovedTopics, &topic);
taosArrayPush(pOldConsumer->rebRemovedTopics, &topic);
}
pOldConsumer->rebalanceTime = pNewConsumer->upTime;
pOldConsumer->status = MQ_CONSUMER_STATUS__LOST;
} else if (pNewConsumer->updateType == CONSUMER_UPDATE__RECOVER) {
ASSERT(taosArrayGetSize(pOldConsumer->currentTopics) == 0);
ASSERT(taosArrayGetSize(pOldConsumer->rebNewTopics) == 0);
ASSERT(taosArrayGetSize(pOldConsumer->assignedTopics) != 0);
int32_t sz = taosArrayGetSize(pOldConsumer->assignedTopics);
for (int32_t i = 0; i < sz; i++) {
char *topic = strdup(taosArrayGetP(pOldConsumer->assignedTopics, i));
taosArrayPush(pOldConsumer->rebNewTopics, &topic);
}
pOldConsumer->rebalanceTime = pNewConsumer->upTime;
pOldConsumer->status = MQ_CONSUMER_STATUS__MODIFY;
} else if (pNewConsumer->updateType == CONSUMER_UPDATE__TOUCH) {
atomic_add_fetch_32(&pOldConsumer->epoch, 1);
......@@ -750,7 +815,7 @@ static int32_t mndRetrieveConsumer(SNodeMsg *pReq, SShowObj *pShow, SSDataBlock
// subscribed topics
char topics[TSDB_SHOW_LIST_LEN + VARSTR_HEADER_SIZE] = {0};
char *showStr = taosShowStrArray(pConsumer->currentTopics);
char *showStr = taosShowStrArray(pConsumer->assignedTopics);
tstrncpy(varDataVal(topics), showStr, TSDB_SHOW_LIST_LEN);
taosMemoryFree(showStr);
varDataSetLen(topics, strlen(varDataVal(topics)));
......
......@@ -34,11 +34,14 @@ SMqConsumerObj *tNewSMqConsumerObj(int64_t consumerId, char cgroup[TSDB_CGROUP_L
pConsumer->currentTopics = taosArrayInit(0, sizeof(void *));
pConsumer->rebNewTopics = taosArrayInit(0, sizeof(void *));
pConsumer->rebRemovedTopics = taosArrayInit(0, sizeof(void *));
pConsumer->assignedTopics = taosArrayInit(0, sizeof(void *));
if (pConsumer->currentTopics == NULL || pConsumer->rebNewTopics == NULL || pConsumer->rebRemovedTopics == NULL) {
if (pConsumer->currentTopics == NULL || pConsumer->rebNewTopics == NULL || pConsumer->rebRemovedTopics == NULL ||
pConsumer->assignedTopics == NULL) {
taosArrayDestroy(pConsumer->currentTopics);
taosArrayDestroy(pConsumer->rebNewTopics);
taosArrayDestroy(pConsumer->rebRemovedTopics);
taosArrayDestroy(pConsumer->assignedTopics);
taosMemoryFree(pConsumer);
return NULL;
}
......@@ -58,6 +61,9 @@ void tDeleteSMqConsumerObj(SMqConsumerObj *pConsumer) {
if (pConsumer->rebRemovedTopics) {
taosArrayDestroyP(pConsumer->rebRemovedTopics, (FDelete)taosMemoryFree);
}
if (pConsumer->assignedTopics) {
taosArrayDestroyP(pConsumer->assignedTopics, (FDelete)taosMemoryFree);
}
}
int32_t tEncodeSMqConsumerObj(void **buf, const SMqConsumerObj *pConsumer) {
......@@ -111,6 +117,18 @@ int32_t tEncodeSMqConsumerObj(void **buf, const SMqConsumerObj *pConsumer) {
tlen += taosEncodeFixedI32(buf, 0);
}
// lost topics
if (pConsumer->assignedTopics) {
sz = taosArrayGetSize(pConsumer->assignedTopics);
tlen += taosEncodeFixedI32(buf, sz);
for (int32_t i = 0; i < sz; i++) {
char *topic = taosArrayGetP(pConsumer->assignedTopics, i);
tlen += taosEncodeString(buf, topic);
}
} else {
tlen += taosEncodeFixedI32(buf, 0);
}
return tlen;
}
......@@ -155,6 +173,15 @@ void *tDecodeSMqConsumerObj(const void *buf, SMqConsumerObj *pConsumer) {
taosArrayPush(pConsumer->rebRemovedTopics, &topic);
}
// reb removed topics
buf = taosDecodeFixedI32(buf, &sz);
pConsumer->assignedTopics = taosArrayInit(sz, sizeof(void *));
for (int32_t i = 0; i < sz; i++) {
char *topic;
buf = taosDecodeString(buf, &topic);
taosArrayPush(pConsumer->assignedTopics, &topic);
}
return (void *)buf;
}
......
......@@ -489,7 +489,7 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib
int32_t levelNum = LIST_LENGTH(pPlan->pSubplans);
if (levelNum != 1) {
qDestroyQueryPlan(pPlan);
terrno = TSDB_CODE_MND_UNSUPPORTED_TOPIC;
terrno = TSDB_CODE_MND_INVALID_TOPIC_QUERY;
return -1;
}
......@@ -498,7 +498,7 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib
int32_t opNum = LIST_LENGTH(inner->pNodeList);
if (opNum != 1) {
qDestroyQueryPlan(pPlan);
terrno = TSDB_CODE_MND_UNSUPPORTED_TOPIC;
terrno = TSDB_CODE_MND_INVALID_TOPIC_QUERY;
return -1;
}
plan = nodesListGetNode(inner->pNodeList, 0);
......
......@@ -261,7 +261,7 @@ static SDDropTopicReq *mndBuildDropTopicMsg(SMnode *pMnode, SVgObj *pVgroup, SMq
static int32_t mndCheckCreateTopicReq(SCMCreateTopicReq *pCreate) {
if (pCreate->name[0] == 0 || pCreate->sql == NULL || pCreate->sql[0] == 0 || pCreate->subscribeDbName[0] == 0) {
terrno = TSDB_CODE_MND_INVALID_TOPIC_OPTION;
terrno = TSDB_CODE_MND_INVALID_TOPIC;
return -1;
}
......
......@@ -271,9 +271,14 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_INVALID_STAGE, "Invalid stage to kill
TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_CANT_PARALLEL, "Invalid stage to kill")
// mnode-mq
TAOS_DEFINE_ERROR(TSDB_CODE_MND_UNSUPPORTED_TOPIC, "Topic with aggregation is unsupported")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_CONSUMER_NOT_READY, "Consumer waiting for rebalance")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOPIC_ALREADY_EXIST, "Topic already exists")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOPIC_NOT_EXIST, "Topic not exist")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOO_MANY_TOPICS, "Too many Topics")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_TOPIC, "Invalid topic")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_TOPIC_QUERY, "Topic with invalid query")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_TOPIC_OPTION, "Topic with invalid option")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_CONSUMER_NOT_EXIST, "Consumer not exist")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_CONSUMER_NOT_READY, "Consumer waiting for rebalance")
// mnode-sma
TAOS_DEFINE_ERROR(TSDB_CODE_MND_SMA_ALREADY_EXIST, "SMA already exists")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册