未验证 提交 1d6a5720 编写于 作者: S Shengliang Guan 提交者: GitHub

Merge pull request #3883 from taosdata/feature/crash

Feature/crash
...@@ -126,6 +126,8 @@ int32_t balanceAllocVnodes(SVgObj *pVgroup) { ...@@ -126,6 +126,8 @@ int32_t balanceAllocVnodes(SVgObj *pVgroup) {
balanceAccquireDnodeList(); balanceAccquireDnodeList();
mDebug("db:%s, try alloc %d vnodes to vgroup, dnodes total:%d, avail:%d", pVgroup->dbName, pVgroup->numOfVnodes,
mnodeGetDnodesNum(), tsBalanceDnodeListSize);
for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) { for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) {
for (; dnode < tsBalanceDnodeListSize; ++dnode) { for (; dnode < tsBalanceDnodeListSize; ++dnode) {
SDnodeObj *pDnode = tsBalanceDnodeList[dnode]; SDnodeObj *pDnode = tsBalanceDnodeList[dnode];
...@@ -135,17 +137,33 @@ int32_t balanceAllocVnodes(SVgObj *pVgroup) { ...@@ -135,17 +137,33 @@ int32_t balanceAllocVnodes(SVgObj *pVgroup) {
pVnodeGid->pDnode = pDnode; pVnodeGid->pDnode = pDnode;
dnode++; dnode++;
vnodes++; vnodes++;
mDebug("dnode:%d, is selected, vnodeIndex:%d", pDnode->dnodeId, i);
break; break;
} else {
mDebug("dnode:%d, is not selected, status:%s vnodes:%d disk:%fGB role:%d", pDnode->dnodeId,
mnodeGetDnodeStatusStr(pDnode->status), pDnode->openVnodes, pDnode->diskAvailable,
pDnode->alternativeRole);
} }
} }
} }
if (vnodes != pVgroup->numOfVnodes) { if (vnodes != pVgroup->numOfVnodes) {
mDebug("vgId:%d, db:%s need vnodes:%d, but alloc:%d, free them", pVgroup->vgId, pVgroup->dbName,
pVgroup->numOfVnodes, vnodes);
balanceReleaseDnodeList(); balanceReleaseDnodeList();
balanceUnLock(); balanceUnLock();
mDebug("db:%s, need vnodes:%d, but alloc:%d", pVgroup->dbName, pVgroup->numOfVnodes, vnodes);
void * pIter = NULL;
SDnodeObj *pDnode = NULL;
while (1) {
pIter = mnodeGetNextDnode(pIter, &pDnode);
if (pDnode == NULL) break;
mDebug("dnode:%d, status:%s vnodes:%d disk:%fGB role:%d", pDnode->dnodeId, mnodeGetDnodeStatusStr(pDnode->status),
pDnode->openVnodes, pDnode->diskAvailable, pDnode->alternativeRole);
mnodeDecDnodeRef(pDnode);
}
sdbFreeIter(pIter);
if (mnodeGetOnlineDnodesNum() == 0) { if (mnodeGetOnlineDnodesNum() == 0) {
return TSDB_CODE_MND_NOT_READY; return TSDB_CODE_MND_NOT_READY;
} else { } else {
...@@ -553,7 +571,8 @@ static void balanceCheckDnodeAccess() { ...@@ -553,7 +571,8 @@ static void balanceCheckDnodeAccess() {
if (pDnode->status != TAOS_DN_STATUS_DROPPING && pDnode->status != TAOS_DN_STATUS_OFFLINE) { if (pDnode->status != TAOS_DN_STATUS_DROPPING && pDnode->status != TAOS_DN_STATUS_OFFLINE) {
pDnode->status = TAOS_DN_STATUS_OFFLINE; pDnode->status = TAOS_DN_STATUS_OFFLINE;
pDnode->offlineReason = TAOS_DN_OFF_STATUS_MSG_TIMEOUT; pDnode->offlineReason = TAOS_DN_OFF_STATUS_MSG_TIMEOUT;
mInfo("dnode:%d, set to offline state", pDnode->dnodeId); mInfo("dnode:%d, set to offline state, access seq:%d, last seq:%d", pDnode->dnodeId, tsAccessSquence,
pDnode->lastAccess);
balanceSetVgroupOffline(pDnode); balanceSetVgroupOffline(pDnode);
} }
} }
......
...@@ -471,7 +471,8 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) { ...@@ -471,7 +471,8 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
mnodeGetClusterId()); mnodeGetClusterId());
return TSDB_CODE_MND_INVALID_CLUSTER_ID; return TSDB_CODE_MND_INVALID_CLUSTER_ID;
} else { } else {
mTrace("dnode:%d, status received, access times %d", pDnode->dnodeId, pDnode->lastAccess); mTrace("dnode:%d, status received, access times %d openVnodes:%d:%d", pDnode->dnodeId, pDnode->lastAccess,
htons(pStatus->openVnodes), pDnode->openVnodes);
} }
} }
......
...@@ -2409,14 +2409,16 @@ static void mnodeProcessCreateChildTableRsp(SRpcMsg *rpcMsg) { ...@@ -2409,14 +2409,16 @@ static void mnodeProcessCreateChildTableRsp(SRpcMsg *rpcMsg) {
} }
} else { } else {
if (mnodeMsg->retry++ < 10) { if (mnodeMsg->retry++ < 10) {
mDebug("app:%p:%p, table:%s, create table rsp received, need retry, times:%d result:%s thandle:%p", mDebug("app:%p:%p, table:%s, create table rsp received, need retry, times:%d vgId:%d sid:%d uid:%" PRIu64
mnodeMsg->rpcMsg.ahandle, mnodeMsg, pTable->info.tableId, mnodeMsg->retry, tstrerror(rpcMsg->code), " result:%s thandle:%p",
mnodeMsg->rpcMsg.handle); mnodeMsg->rpcMsg.ahandle, mnodeMsg, pTable->info.tableId, mnodeMsg->retry, pTable->vgId, pTable->sid,
pTable->uid, tstrerror(rpcMsg->code), mnodeMsg->rpcMsg.handle);
dnodeDelayReprocessMnodeWriteMsg(mnodeMsg); dnodeDelayReprocessMnodeWriteMsg(mnodeMsg);
} else { } else {
mError("app:%p:%p, table:%s, failed to create in dnode, result:%s thandle:%p", mnodeMsg->rpcMsg.ahandle, mnodeMsg, mError("app:%p:%p, table:%s, failed to create in dnode, vgId:%d sid:%d uid:%" PRIu64 ", result:%s thandle:%p",
pTable->info.tableId, tstrerror(rpcMsg->code), mnodeMsg->rpcMsg.handle); mnodeMsg->rpcMsg.ahandle, mnodeMsg, pTable->info.tableId, pTable->vgId, pTable->sid, pTable->uid,
tstrerror(rpcMsg->code), mnodeMsg->rpcMsg.handle);
SSdbOper oper = {.type = SDB_OPER_GLOBAL, .table = tsChildTableSdb, .pObj = pTable}; SSdbOper oper = {.type = SDB_OPER_GLOBAL, .table = tsChildTableSdb, .pObj = pTable};
sdbDeleteRow(&oper); sdbDeleteRow(&oper);
......
...@@ -270,24 +270,26 @@ void mnodeUpdateVgroup(SVgObj *pVgroup) { ...@@ -270,24 +270,26 @@ void mnodeUpdateVgroup(SVgObj *pVgroup) {
Traverse all vgroups on mnode, if there no such vgId on a dnode, so send msg to this dnode for re-creating this vgId/vnode Traverse all vgroups on mnode, if there no such vgId on a dnode, so send msg to this dnode for re-creating this vgId/vnode
*/ */
void mnodeCheckUnCreatedVgroup(SDnodeObj *pDnode, SVnodeLoad *pVloads, int32_t openVnodes) { void mnodeCheckUnCreatedVgroup(SDnodeObj *pDnode, SVnodeLoad *pVloads, int32_t openVnodes) {
SVnodeLoad *pNextV = NULL;
void *pIter = NULL; void *pIter = NULL;
while (1) { while (1) {
SVgObj *pVgroup; SVgObj *pVgroup;
pIter = mnodeGetNextVgroup(pIter, &pVgroup); pIter = mnodeGetNextVgroup(pIter, &pVgroup);
if (pVgroup == NULL) break; if (pVgroup == NULL) break;
pNextV = pVloads; for (int v = 0; v < pVgroup->numOfVnodes; ++v) {
int32_t i; if (pVgroup->vnodeGid[v].dnodeId == pDnode->dnodeId) {
for (i = 0; i < openVnodes; ++i) { // vgroup should have a vnode on this dnode
if ((pVgroup->vnodeGid[i].pDnode == pDnode) && (pVgroup->vgId == pNextV->vgId)) { bool have = false;
for (int32_t i = 0; i < openVnodes; ++i) {
SVnodeLoad *pVload = pVloads + i;
if (pVgroup->vgId == pVload->vgId) {
have = true;
break; break;
} }
pNextV++;
} }
if (i == openVnodes) { if (have) continue;
if (pVgroup->status == TAOS_VG_STATUS_CREATING || pVgroup->status == TAOS_VG_STATUS_DROPPING) { if (pVgroup->status == TAOS_VG_STATUS_CREATING || pVgroup->status == TAOS_VG_STATUS_DROPPING) {
mDebug("vgId:%d, not exist in dnode:%d and status is %s, do nothing", pVgroup->vgId, pDnode->dnodeId, mDebug("vgId:%d, not exist in dnode:%d and status is %s, do nothing", pVgroup->vgId, pDnode->dnodeId,
vgroupStatus[pVgroup->status]); vgroupStatus[pVgroup->status]);
...@@ -297,12 +299,12 @@ void mnodeCheckUnCreatedVgroup(SDnodeObj *pDnode, SVnodeLoad *pVloads, int32_t o ...@@ -297,12 +299,12 @@ void mnodeCheckUnCreatedVgroup(SDnodeObj *pDnode, SVnodeLoad *pVloads, int32_t o
mnodeSendCreateVgroupMsg(pVgroup, NULL); mnodeSendCreateVgroupMsg(pVgroup, NULL);
} }
} }
}
mnodeDecVgroupRef(pVgroup); mnodeDecVgroupRef(pVgroup);
} }
sdbFreeIter(pIter); sdbFreeIter(pIter);
return;
} }
void mnodeUpdateVgroupStatus(SVgObj *pVgroup, SDnodeObj *pDnode, SVnodeLoad *pVload) { void mnodeUpdateVgroupStatus(SVgObj *pVgroup, SDnodeObj *pDnode, SVnodeLoad *pVload) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册