未验证 提交 1d6a5720 编写于 作者: S Shengliang Guan 提交者: GitHub

Merge pull request #3883 from taosdata/feature/crash

Feature/crash
......@@ -126,6 +126,8 @@ int32_t balanceAllocVnodes(SVgObj *pVgroup) {
balanceAccquireDnodeList();
mDebug("db:%s, try alloc %d vnodes to vgroup, dnodes total:%d, avail:%d", pVgroup->dbName, pVgroup->numOfVnodes,
mnodeGetDnodesNum(), tsBalanceDnodeListSize);
for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) {
for (; dnode < tsBalanceDnodeListSize; ++dnode) {
SDnodeObj *pDnode = tsBalanceDnodeList[dnode];
......@@ -135,17 +137,33 @@ int32_t balanceAllocVnodes(SVgObj *pVgroup) {
pVnodeGid->pDnode = pDnode;
dnode++;
vnodes++;
mDebug("dnode:%d, is selected, vnodeIndex:%d", pDnode->dnodeId, i);
break;
} else {
mDebug("dnode:%d, is not selected, status:%s vnodes:%d disk:%fGB role:%d", pDnode->dnodeId,
mnodeGetDnodeStatusStr(pDnode->status), pDnode->openVnodes, pDnode->diskAvailable,
pDnode->alternativeRole);
}
}
}
if (vnodes != pVgroup->numOfVnodes) {
mDebug("vgId:%d, db:%s need vnodes:%d, but alloc:%d, free them", pVgroup->vgId, pVgroup->dbName,
pVgroup->numOfVnodes, vnodes);
balanceReleaseDnodeList();
balanceUnLock();
mDebug("db:%s, need vnodes:%d, but alloc:%d", pVgroup->dbName, pVgroup->numOfVnodes, vnodes);
void * pIter = NULL;
SDnodeObj *pDnode = NULL;
while (1) {
pIter = mnodeGetNextDnode(pIter, &pDnode);
if (pDnode == NULL) break;
mDebug("dnode:%d, status:%s vnodes:%d disk:%fGB role:%d", pDnode->dnodeId, mnodeGetDnodeStatusStr(pDnode->status),
pDnode->openVnodes, pDnode->diskAvailable, pDnode->alternativeRole);
mnodeDecDnodeRef(pDnode);
}
sdbFreeIter(pIter);
if (mnodeGetOnlineDnodesNum() == 0) {
return TSDB_CODE_MND_NOT_READY;
} else {
......@@ -553,7 +571,8 @@ static void balanceCheckDnodeAccess() {
if (pDnode->status != TAOS_DN_STATUS_DROPPING && pDnode->status != TAOS_DN_STATUS_OFFLINE) {
pDnode->status = TAOS_DN_STATUS_OFFLINE;
pDnode->offlineReason = TAOS_DN_OFF_STATUS_MSG_TIMEOUT;
mInfo("dnode:%d, set to offline state", pDnode->dnodeId);
mInfo("dnode:%d, set to offline state, access seq:%d, last seq:%d", pDnode->dnodeId, tsAccessSquence,
pDnode->lastAccess);
balanceSetVgroupOffline(pDnode);
}
}
......
......@@ -471,7 +471,8 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
mnodeGetClusterId());
return TSDB_CODE_MND_INVALID_CLUSTER_ID;
} else {
mTrace("dnode:%d, status received, access times %d", pDnode->dnodeId, pDnode->lastAccess);
mTrace("dnode:%d, status received, access times %d openVnodes:%d:%d", pDnode->dnodeId, pDnode->lastAccess,
htons(pStatus->openVnodes), pDnode->openVnodes);
}
}
......
......@@ -2409,14 +2409,16 @@ static void mnodeProcessCreateChildTableRsp(SRpcMsg *rpcMsg) {
}
} else {
if (mnodeMsg->retry++ < 10) {
mDebug("app:%p:%p, table:%s, create table rsp received, need retry, times:%d result:%s thandle:%p",
mnodeMsg->rpcMsg.ahandle, mnodeMsg, pTable->info.tableId, mnodeMsg->retry, tstrerror(rpcMsg->code),
mnodeMsg->rpcMsg.handle);
mDebug("app:%p:%p, table:%s, create table rsp received, need retry, times:%d vgId:%d sid:%d uid:%" PRIu64
" result:%s thandle:%p",
mnodeMsg->rpcMsg.ahandle, mnodeMsg, pTable->info.tableId, mnodeMsg->retry, pTable->vgId, pTable->sid,
pTable->uid, tstrerror(rpcMsg->code), mnodeMsg->rpcMsg.handle);
dnodeDelayReprocessMnodeWriteMsg(mnodeMsg);
} else {
mError("app:%p:%p, table:%s, failed to create in dnode, result:%s thandle:%p", mnodeMsg->rpcMsg.ahandle, mnodeMsg,
pTable->info.tableId, tstrerror(rpcMsg->code), mnodeMsg->rpcMsg.handle);
mError("app:%p:%p, table:%s, failed to create in dnode, vgId:%d sid:%d uid:%" PRIu64 ", result:%s thandle:%p",
mnodeMsg->rpcMsg.ahandle, mnodeMsg, pTable->info.tableId, pTable->vgId, pTable->sid, pTable->uid,
tstrerror(rpcMsg->code), mnodeMsg->rpcMsg.handle);
SSdbOper oper = {.type = SDB_OPER_GLOBAL, .table = tsChildTableSdb, .pObj = pTable};
sdbDeleteRow(&oper);
......
......@@ -270,31 +270,34 @@ void mnodeUpdateVgroup(SVgObj *pVgroup) {
Traverse all vgroups on mnode, if there no such vgId on a dnode, so send msg to this dnode for re-creating this vgId/vnode
*/
void mnodeCheckUnCreatedVgroup(SDnodeObj *pDnode, SVnodeLoad *pVloads, int32_t openVnodes) {
SVnodeLoad *pNextV = NULL;
void *pIter = NULL;
while (1) {
SVgObj *pVgroup;
pIter = mnodeGetNextVgroup(pIter, &pVgroup);
if (pVgroup == NULL) break;
pNextV = pVloads;
int32_t i;
for (i = 0; i < openVnodes; ++i) {
if ((pVgroup->vnodeGid[i].pDnode == pDnode) && (pVgroup->vgId == pNextV->vgId)) {
break;
}
pNextV++;
}
if (i == openVnodes) {
if (pVgroup->status == TAOS_VG_STATUS_CREATING || pVgroup->status == TAOS_VG_STATUS_DROPPING) {
mDebug("vgId:%d, not exist in dnode:%d and status is %s, do nothing", pVgroup->vgId, pDnode->dnodeId,
vgroupStatus[pVgroup->status]);
} else {
mDebug("vgId:%d, not exist in dnode:%d and status is %s, send create msg", pVgroup->vgId, pDnode->dnodeId,
vgroupStatus[pVgroup->status]);
mnodeSendCreateVgroupMsg(pVgroup, NULL);
for (int v = 0; v < pVgroup->numOfVnodes; ++v) {
if (pVgroup->vnodeGid[v].dnodeId == pDnode->dnodeId) {
// vgroup should have a vnode on this dnode
bool have = false;
for (int32_t i = 0; i < openVnodes; ++i) {
SVnodeLoad *pVload = pVloads + i;
if (pVgroup->vgId == pVload->vgId) {
have = true;
break;
}
}
if (have) continue;
if (pVgroup->status == TAOS_VG_STATUS_CREATING || pVgroup->status == TAOS_VG_STATUS_DROPPING) {
mDebug("vgId:%d, not exist in dnode:%d and status is %s, do nothing", pVgroup->vgId, pDnode->dnodeId,
vgroupStatus[pVgroup->status]);
} else {
mDebug("vgId:%d, not exist in dnode:%d and status is %s, send create msg", pVgroup->vgId, pDnode->dnodeId,
vgroupStatus[pVgroup->status]);
mnodeSendCreateVgroupMsg(pVgroup, NULL);
}
}
}
......@@ -302,7 +305,6 @@ void mnodeCheckUnCreatedVgroup(SDnodeObj *pDnode, SVnodeLoad *pVloads, int32_t o
}
sdbFreeIter(pIter);
return;
}
void mnodeUpdateVgroupStatus(SVgObj *pVgroup, SDnodeObj *pDnode, SVnodeLoad *pVload) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册