提交 22a85734 编写于 作者: H Haojun Liao

fix(mq): add more chek for balance couner to avoid the negative value emerges.

上级 b0c06c55
...@@ -558,7 +558,6 @@ int32_t* taosGetErrno(); ...@@ -558,7 +558,6 @@ int32_t* taosGetErrno();
#define TSDB_CODE_TQ_GROUP_NOT_SET TAOS_DEF_ERROR_CODE(0, 0x0A0B) #define TSDB_CODE_TQ_GROUP_NOT_SET TAOS_DEF_ERROR_CODE(0, 0x0A0B)
#define TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND TAOS_DEF_ERROR_CODE(0, 0x0A0C) #define TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND TAOS_DEF_ERROR_CODE(0, 0x0A0C)
#define TSDB_CODE_TQ_NO_COMMITTED_OFFSET TAOS_DEF_ERROR_CODE(0, 0x0A0D) #define TSDB_CODE_TQ_NO_COMMITTED_OFFSET TAOS_DEF_ERROR_CODE(0, 0x0A0D)
#define TSDB_CODE_TQ_NO_SUBSCRIBE_TOPICS TAOS_DEF_ERROR_CODE(0, 0x0A0E)
// wal // wal
// #define TSDB_CODE_WAL_APP_ERROR TAOS_DEF_ERROR_CODE(0, 0x1000) // 2.x // #define TSDB_CODE_WAL_APP_ERROR TAOS_DEF_ERROR_CODE(0, 0x1000) // 2.x
......
...@@ -1076,12 +1076,7 @@ int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) { ...@@ -1076,12 +1076,7 @@ int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) {
SCMSubscribeReq req = {0}; SCMSubscribeReq req = {0};
int32_t code = 0; int32_t code = 0;
if (sz == 0) {
tscDebug("consumer:0x%" PRIx64 " cgroup:%s, subscribe %d topics, not allowed", tmq->consumerId, tmq->groupId, sz);
return TSDB_CODE_TQ_NO_SUBSCRIBE_TOPICS;
} else {
tscDebug("consumer:0x%" PRIx64 " cgroup:%s, subscribe %d topics", tmq->consumerId, tmq->groupId, sz); tscDebug("consumer:0x%" PRIx64 " cgroup:%s, subscribe %d topics", tmq->consumerId, tmq->groupId, sz);
}
req.consumerId = tmq->consumerId; req.consumerId = tmq->consumerId;
tstrncpy(req.clientId, tmq->clientId, 256); tstrncpy(req.clientId, tmq->clientId, 256);
......
...@@ -925,7 +925,7 @@ TEST(clientCase, subscription_test) { ...@@ -925,7 +925,7 @@ TEST(clientCase, subscription_test) {
// 创建订阅 topics 列表 // 创建订阅 topics 列表
tmq_list_t* topicList = tmq_list_new(); tmq_list_t* topicList = tmq_list_new();
tmq_list_append(topicList, "topic_t1"); // tmq_list_append(topicList, "topic_t1");
// 启动订阅 // 启动订阅
tmq_subscribe(tmq, topicList); tmq_subscribe(tmq, topicList);
...@@ -938,6 +938,8 @@ TEST(clientCase, subscription_test) { ...@@ -938,6 +938,8 @@ TEST(clientCase, subscription_test) {
int32_t msgCnt = 0; int32_t msgCnt = 0;
int32_t timeout = 5000; int32_t timeout = 5000;
int32_t count = 0;
while (1) { while (1) {
TAOS_RES* pRes = tmq_consumer_poll(tmq, timeout); TAOS_RES* pRes = tmq_consumer_poll(tmq, timeout);
if (pRes) { if (pRes) {
...@@ -952,6 +954,11 @@ TEST(clientCase, subscription_test) { ...@@ -952,6 +954,11 @@ TEST(clientCase, subscription_test) {
printf("db: %s\n", dbName); printf("db: %s\n", dbName);
printf("vgroup id: %d\n", vgroupId); printf("vgroup id: %d\n", vgroupId);
if (count ++ > 20) {
tmq_unsubscribe(tmq);
break;
}
while (1) { while (1) {
TAOS_ROW row = taos_fetch_row(pRes); TAOS_ROW row = taos_fetch_row(pRes);
if (row == NULL) break; if (row == NULL) break;
......
...@@ -82,18 +82,29 @@ bool mndRebTryStart() { ...@@ -82,18 +82,29 @@ bool mndRebTryStart() {
} }
void mndRebEnd() { void mndRebEnd() {
int32_t val = atomic_sub_fetch_32(&mqRebInExecCnt, 1); mndRebCntDec();
mInfo("rebalance end, rebalance count:%d", val);
} }
void mndRebCntInc() { void mndRebCntInc() {
int32_t val = atomic_add_fetch_32(&mqRebInExecCnt, 1); int32_t val = atomic_add_fetch_32(&mqRebInExecCnt, 1);
mInfo("rebalance trans start, rebalance count:%d", val); mInfo("rebalance trans start, rebalance counter:%d", val);
} }
void mndRebCntDec() { void mndRebCntDec() {
int32_t val = atomic_sub_fetch_32(&mqRebInExecCnt, 1); while (1) {
mInfo("rebalance trans end, rebalance count:%d", val); int32_t val = atomic_load_32(&mqRebInExecCnt);
if (val <= 0) {
mError("rebalance trans end, rebalance counter:%d should not be less equalled than 0, ignore counter desc", val);
break;
}
int32_t newVal = val - 1;
int32_t oldVal = atomic_val_compare_exchange_32(&mqRebInExecCnt, val, newVal);
if (oldVal == val) {
mInfo("rebalance trans end, rebalance counter:%d", newVal);
break;
}
}
} }
static int32_t mndProcessConsumerLostMsg(SRpcMsg *pMsg) { static int32_t mndProcessConsumerLostMsg(SRpcMsg *pMsg) {
...@@ -308,6 +319,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { ...@@ -308,6 +319,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) {
taosRUnLockLatch(&pConsumer->lock); taosRUnLockLatch(&pConsumer->lock);
} else if (status == MQ_CONSUMER_STATUS__MODIFY) { } else if (status == MQ_CONSUMER_STATUS__MODIFY) {
taosRLockLatch(&pConsumer->lock); taosRLockLatch(&pConsumer->lock);
int32_t newTopicNum = taosArrayGetSize(pConsumer->rebNewTopics); int32_t newTopicNum = taosArrayGetSize(pConsumer->rebNewTopics);
for (int32_t i = 0; i < newTopicNum; i++) { for (int32_t i = 0; i < newTopicNum; i++) {
char key[TSDB_SUBSCRIBE_KEY_LEN]; char key[TSDB_SUBSCRIBE_KEY_LEN];
...@@ -700,6 +712,7 @@ int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) { ...@@ -700,6 +712,7 @@ int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) {
// no topics need to be rebalanced // no topics need to be rebalanced
if (taosArrayGetSize(pConsumerNew->rebNewTopics) == 0 && taosArrayGetSize(pConsumerNew->rebRemovedTopics) == 0) { if (taosArrayGetSize(pConsumerNew->rebNewTopics) == 0 && taosArrayGetSize(pConsumerNew->rebRemovedTopics) == 0) {
// mInfo();
goto _over; goto _over;
} }
...@@ -1057,7 +1070,6 @@ static int32_t mndRetrieveConsumer(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock * ...@@ -1057,7 +1070,6 @@ static int32_t mndRetrieveConsumer(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *
} }
taosRLockLatch(&pConsumer->lock); taosRLockLatch(&pConsumer->lock);
mDebug("showing consumer:0x%" PRIx64, pConsumer->consumerId); mDebug("showing consumer:0x%" PRIx64, pConsumer->consumerId);
int32_t topicSz = taosArrayGetSize(pConsumer->assignedTopics); int32_t topicSz = taosArrayGetSize(pConsumer->assignedTopics);
......
...@@ -706,6 +706,9 @@ int32_t mndProcessRpcMsg(SRpcMsg *pMsg) { ...@@ -706,6 +706,9 @@ int32_t mndProcessRpcMsg(SRpcMsg *pMsg) {
} else if (code == 0) { } else if (code == 0) {
mGTrace("msg:%p, successfully processed", pMsg); mGTrace("msg:%p, successfully processed", pMsg);
} else { } else {
if (code == -1) {
code = terrno;
}
mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle, mGError("msg:%p, failed to process since %s, app:%p type:%s", pMsg, tstrerror(code), pMsg->info.ahandle,
TMSG_INFO(pMsg->msgType)); TMSG_INFO(pMsg->msgType));
} }
......
...@@ -263,7 +263,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR ...@@ -263,7 +263,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR
imbConsumerNum = totalVgNum % afterRebConsumerNum; imbConsumerNum = totalVgNum % afterRebConsumerNum;
} }
mInfo("sub:%s mq re-balance %d consumers: at least %d vg each, %d consumer has more vg", sub, mInfo("sub:%s mq re-balance %d consumers: at least %d vgs each, %d consumers has more vgs", sub,
afterRebConsumerNum, minVgCnt, imbConsumerNum); afterRebConsumerNum, minVgCnt, imbConsumerNum);
// 4. first scan: remove consumer more than wanted, put to remove hash // 4. first scan: remove consumer more than wanted, put to remove hash
...@@ -591,13 +591,13 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { ...@@ -591,13 +591,13 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) {
rebOutput.pSub = mndCreateSubscription(pMnode, pTopic, pRebInfo->key); rebOutput.pSub = mndCreateSubscription(pMnode, pTopic, pRebInfo->key);
if (rebOutput.pSub == NULL) { if (rebOutput.pSub == NULL) {
mError("mq rebalance %s failed create sub since %s, abort", pRebInfo->key, terrstr()); mError("mq rebalance %s failed create sub since %s, ignore", pRebInfo->key, terrstr());
taosRUnLockLatch(&pTopic->lock); taosRUnLockLatch(&pTopic->lock);
mndReleaseTopic(pMnode, pTopic); mndReleaseTopic(pMnode, pTopic);
continue; continue;
} }
memcpy(rebOutput.pSub->dbName, pTopic->db, TSDB_DB_FNAME_LEN);
memcpy(rebOutput.pSub->dbName, pTopic->db, TSDB_DB_FNAME_LEN);
taosRUnLockLatch(&pTopic->lock); taosRUnLockLatch(&pTopic->lock);
mndReleaseTopic(pMnode, pTopic); mndReleaseTopic(pMnode, pTopic);
......
...@@ -297,7 +297,7 @@ int32_t tqSeekVer(STqReader* pReader, int64_t ver, const char* id) { ...@@ -297,7 +297,7 @@ int32_t tqSeekVer(STqReader* pReader, int64_t ver, const char* id) {
tqError("tmq poll: wal reader failed to seek to ver:%"PRId64" code:%s, %s", ver, tstrerror(terrno), id); tqError("tmq poll: wal reader failed to seek to ver:%"PRId64" code:%s, %s", ver, tstrerror(terrno), id);
return -1; return -1;
} else { } else {
tqError("tmq poll: wal reader seek to ver:%"PRId64" %s", ver, id); tqDebug("tmq poll: wal reader seek to ver:%"PRId64" %s", ver, id);
return 0; return 0;
} }
} }
...@@ -308,13 +308,12 @@ int32_t tqNextBlock(STqReader* pReader, SFetchRet* ret) { ...@@ -308,13 +308,12 @@ int32_t tqNextBlock(STqReader* pReader, SFetchRet* ret) {
while (1) { while (1) {
if (!fromProcessedMsg) { if (!fromProcessedMsg) {
if (walNextValidMsg(pReader->pWalReader) < 0) { if (walNextValidMsg(pReader->pWalReader) < 0) {
pReader->ver = pReader->ver = pReader->pWalReader->curVersion - pReader->pWalReader->curStopped;
pReader->pWalReader->curVersion - pReader->pWalReader->curStopped;
// pReader->pWalReader->curVersion - (pReader->pWalReader->curInvalid | pReader->pWalReader->curStopped);
ret->offset.type = TMQ_OFFSET__LOG; ret->offset.type = TMQ_OFFSET__LOG;
ret->offset.version = pReader->ver; ret->offset.version = pReader->ver;
ret->fetchType = FETCH_TYPE__NONE; ret->fetchType = FETCH_TYPE__NONE;
tqDebug("return offset %" PRId64 ", no more valid", ret->offset.version); tqDebug("return offset %" PRId64 ", no more valid msg in wal", ret->offset.version);
return -1; return -1;
} }
......
...@@ -1632,8 +1632,12 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { ...@@ -1632,8 +1632,12 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) {
while (1) { while (1) {
SFetchRet ret = {0}; SFetchRet ret = {0};
if (tqNextBlock(pInfo->tqReader, &ret) < 0) { if (tqNextBlock(pInfo->tqReader, &ret) < 0) {
// if the end is reached, terrno is 0
if (terrno != 0) {
qError("failed to get next log block since %s", terrstr()); qError("failed to get next log block since %s", terrstr());
} }
}
if (ret.fetchType == FETCH_TYPE__DATA) { if (ret.fetchType == FETCH_TYPE__DATA) {
blockDataCleanup(pInfo->pRes); blockDataCleanup(pInfo->pRes);
setBlockIntoRes(pInfo, &ret.data, true); setBlockIntoRes(pInfo, &ret.data, true);
......
...@@ -436,7 +436,6 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TQ_META_KEY_DUP_IN_TXN, "TQ met key dup in txn ...@@ -436,7 +436,6 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TQ_META_KEY_DUP_IN_TXN, "TQ met key dup in txn
TAOS_DEFINE_ERROR(TSDB_CODE_TQ_GROUP_NOT_SET, "TQ group not exist") TAOS_DEFINE_ERROR(TSDB_CODE_TQ_GROUP_NOT_SET, "TQ group not exist")
TAOS_DEFINE_ERROR(TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND, "TQ table schema not found") TAOS_DEFINE_ERROR(TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND, "TQ table schema not found")
TAOS_DEFINE_ERROR(TSDB_CODE_TQ_NO_COMMITTED_OFFSET, "TQ no committed offset") TAOS_DEFINE_ERROR(TSDB_CODE_TQ_NO_COMMITTED_OFFSET, "TQ no committed offset")
TAOS_DEFINE_ERROR(TSDB_CODE_TQ_NO_SUBSCRIBE_TOPICS, "TQ no topics")
// wal // wal
TAOS_DEFINE_ERROR(TSDB_CODE_WAL_FILE_CORRUPTED, "WAL file is corrupted") TAOS_DEFINE_ERROR(TSDB_CODE_WAL_FILE_CORRUPTED, "WAL file is corrupted")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册