提交 f6a3c243 编写于 作者: H Hui Li

[TD-638]

上级 0b7dea8c
...@@ -617,6 +617,16 @@ static void dnodeSendStatusMsg(void *handle, void *tmrId) { ...@@ -617,6 +617,16 @@ static void dnodeSendStatusMsg(void *handle, void *tmrId) {
pStatus->diskAvailable = tsAvailDataDirGB; pStatus->diskAvailable = tsAvailDataDirGB;
pStatus->alternativeRole = (uint8_t) tsAlternativeRole; pStatus->alternativeRole = (uint8_t) tsAlternativeRole;
// fill cluster cfg parameters
pStatus->ClusterCfgPara.numOfMnodes = tsNumOfMnodes;
pStatus->ClusterCfgPara.mnodeEqualVnodeNum = tsMnodeEqualVnodeNum;
pStatus->ClusterCfgPara.offlineThreshold = tsOfflineThreshold;
pStatus->ClusterCfgPara.statusInterval = tsStatusInterval;
strcpy(pStatus->ClusterCfgPara.arbitrator, tsArbitrator);
strcpy(pStatus->ClusterCfgPara.timezone, tsTimezone);
strcpy(pStatus->ClusterCfgPara.locale, tsLocale);
strcpy(pStatus->ClusterCfgPara.charset, tsCharset);
vnodeBuildStatusMsg(pStatus); vnodeBuildStatusMsg(pStatus);
contLen = sizeof(SDMStatusMsg) + pStatus->openVnodes * sizeof(SVnodeLoad); contLen = sizeof(SDMStatusMsg) + pStatus->openVnodes * sizeof(SVnodeLoad);
pStatus->openVnodes = htons(pStatus->openVnodes); pStatus->openVnodes = htons(pStatus->openVnodes);
......
...@@ -121,6 +121,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_DNODE_NOT_EXIST, 0, 0x0331, "mnode dnod ...@@ -121,6 +121,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_DNODE_NOT_EXIST, 0, 0x0331, "mnode dnod
TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_NOT_EXIST, 0, 0x0332, "mnode vgroup not exist") TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_NOT_EXIST, 0, 0x0332, "mnode vgroup not exist")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_NO_REMOVE_MASTER, 0, 0x0333, "mnode cant not remove master") TAOS_DEFINE_ERROR(TSDB_CODE_MND_NO_REMOVE_MASTER, 0, 0x0333, "mnode cant not remove master")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_NO_ENOUGH_DNODES, 0, 0x0334, "mnode no enough dnodes") TAOS_DEFINE_ERROR(TSDB_CODE_MND_NO_ENOUGH_DNODES, 0, 0x0334, "mnode no enough dnodes")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_CLUSTER_CFG_INCONSISTENT, 0, 0x0335, "mnode cluster cfg inconsistent")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_ACCT_ALREADY_EXIST, 0, 0x0340, "mnode accounts already exist") TAOS_DEFINE_ERROR(TSDB_CODE_MND_ACCT_ALREADY_EXIST, 0, 0x0340, "mnode accounts already exist")
TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_ACCT, 0, 0x0341, "mnode invalid account") TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_ACCT, 0, 0x0341, "mnode invalid account")
......
...@@ -556,6 +556,17 @@ typedef struct { ...@@ -556,6 +556,17 @@ typedef struct {
SDMMnodeInfo nodeInfos[TSDB_MAX_REPLICA]; SDMMnodeInfo nodeInfos[TSDB_MAX_REPLICA];
} SDMMnodeInfos; } SDMMnodeInfos;
typedef struct {
int32_t numOfMnodes; // tsNumOfMnodes
int32_t mnodeEqualVnodeNum; // tsMnodeEqualVnodeNum
int32_t offlineThreshold; // tsOfflineThreshold
int32_t statusInterval; // tsStatusInterval
char arbitrator[TSDB_EP_LEN]; // tsArbitrator
char timezone[64]; // tsTimezone
char locale[TSDB_LOCALE_LEN]; // tsLocale
char charset[TSDB_LOCALE_LEN]; // tsCharset
} SClusterCfg;
typedef struct { typedef struct {
uint32_t version; uint32_t version;
int32_t dnodeId; int32_t dnodeId;
...@@ -568,6 +579,7 @@ typedef struct { ...@@ -568,6 +579,7 @@ typedef struct {
float diskAvailable; // GB float diskAvailable; // GB
uint8_t alternativeRole; uint8_t alternativeRole;
uint8_t reserve[15]; uint8_t reserve[15];
SClusterCfg ClusterCfgPara;
SVnodeLoad load[]; SVnodeLoad load[];
} SDMStatusMsg; } SDMStatusMsg;
......
...@@ -277,6 +277,20 @@ static void mnodeProcessCfgDnodeMsgRsp(SRpcMsg *rpcMsg) { ...@@ -277,6 +277,20 @@ static void mnodeProcessCfgDnodeMsgRsp(SRpcMsg *rpcMsg) {
mPrint("cfg dnode rsp is received"); mPrint("cfg dnode rsp is received");
} }
static bool mnodeCheckClusterCfgPara(const SClusterCfg *clusterCfg) {
if (clusterCfg->numOfMnodes != tsNumOfMnodes) return false;
if (clusterCfg->mnodeEqualVnodeNum != tsMnodeEqualVnodeNum) return false;
if (clusterCfg->offlineThreshold != tsOfflineThreshold) return false;
if (clusterCfg->statusInterval != tsStatusInterval) return false;
if (0 != strncasecmp(clusterCfg->arbitrator, tsArbitrator, strlen(tsArbitrator))) return false;
if (0 != strncasecmp(clusterCfg->timezone, tsTimezone, strlen(tsTimezone))) return false;
if (0 != strncasecmp(clusterCfg->locale, tsLocale, strlen(tsLocale))) return false;
if (0 != strncasecmp(clusterCfg->charset, tsCharset, strlen(tsCharset))) return false;
return true;
}
static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) { static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
SDMStatusMsg *pStatus = pMsg->rpcMsg.pCont; SDMStatusMsg *pStatus = pMsg->rpcMsg.pCont;
pStatus->dnodeId = htonl(pStatus->dnodeId); pStatus->dnodeId = htonl(pStatus->dnodeId);
...@@ -312,7 +326,6 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) { ...@@ -312,7 +326,6 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
pDnode->alternativeRole = pStatus->alternativeRole; pDnode->alternativeRole = pStatus->alternativeRole;
pDnode->totalVnodes = pStatus->numOfTotalVnodes; pDnode->totalVnodes = pStatus->numOfTotalVnodes;
pDnode->moduleStatus = pStatus->moduleStatus; pDnode->moduleStatus = pStatus->moduleStatus;
pDnode->lastAccess = tsAccessSquence;
if (pStatus->dnodeId == 0) { if (pStatus->dnodeId == 0) {
mTrace("dnode:%d %s, first access", pDnode->dnodeId, pDnode->dnodeEp); mTrace("dnode:%d %s, first access", pDnode->dnodeId, pDnode->dnodeEp);
...@@ -338,6 +351,14 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) { ...@@ -338,6 +351,14 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
} }
if (pDnode->status == TAOS_DN_STATUS_OFFLINE) { if (pDnode->status == TAOS_DN_STATUS_OFFLINE) {
// Verify whether the cluster parameters are consistent when status change from offline to ready
bool ret = mnodeCheckClusterCfgPara(&(pStatus->ClusterCfgPara));
if (false == ret) {
mnodeDecDnodeRef(pDnode);
mError("dnode %s cluster cfg parameters inconsistent", pStatus->dnodeEp);
return TSDB_CODE_MND_CLUSTER_CFG_INCONSISTENT;
}
mTrace("dnode:%d, from offline to online", pDnode->dnodeId); mTrace("dnode:%d, from offline to online", pDnode->dnodeId);
pDnode->status = TAOS_DN_STATUS_READY; pDnode->status = TAOS_DN_STATUS_READY;
balanceUpdateMnode(); balanceUpdateMnode();
...@@ -352,6 +373,8 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) { ...@@ -352,6 +373,8 @@ static int32_t mnodeProcessDnodeStatusMsg(SMnodeMsg *pMsg) {
return TSDB_CODE_MND_OUT_OF_MEMORY; return TSDB_CODE_MND_OUT_OF_MEMORY;
} }
pDnode->lastAccess = tsAccessSquence;
mnodeGetMnodeInfos(&pRsp->mnodes); mnodeGetMnodeInfos(&pRsp->mnodes);
pRsp->dnodeCfg.dnodeId = htonl(pDnode->dnodeId); pRsp->dnodeCfg.dnodeId = htonl(pDnode->dnodeId);
......
system sh/stop_dnodes.sh
system sh/deploy.sh -n dnode1 -i 1
system sh/deploy.sh -n dnode2 -i 2
system sh/deploy.sh -n dnode3 -i 3
system sh/deploy.sh -n dnode4 -i 4
system sh/deploy.sh -n dnode5 -i 5
system sh/deploy.sh -n dnode6 -i 6
system sh/deploy.sh -n dnode7 -i 7
system sh/cfg.sh -n dnode1 -c numOfMnodes -v 2
system sh/cfg.sh -n dnode1 -c mnodeEqualVnodeNum -v 4
system sh/cfg.sh -n dnode1 -c offlineThreshold -v 15
system sh/cfg.sh -n dnode1 -c statusInterval -v 3
system sh/cfg.sh -n dnode1 -c arbitrator -v $arbitrator
#system sh/cfg.sh -n dnode1 -c timezone -v ""
#system sh/cfg.sh -n dnode1 -c locale -v ""
#system sh/cfg.sh -n dnode1 -c charset -v ""
system sh/cfg.sh -n dnode1 -c balanceInterval -v 10
######## dnode 2 the same with dnode1
system sh/cfg.sh -n dnode2 -c numOfMnodes -v 2
system sh/cfg.sh -n dnode2 -c mnodeEqualVnodeNum -v 4
system sh/cfg.sh -n dnode2 -c offlineThreshold -v 15
system sh/cfg.sh -n dnode2 -c statusInterval -v 3
system sh/cfg.sh -n dnode2 -c arbitrator -v $arbitrator
#system sh/cfg.sh -n dnode2 -c timezone -v ""
#system sh/cfg.sh -n dnode2 -c locale -v ""
#system sh/cfg.sh -n dnode2 -c charset -v ""
system sh/cfg.sh -n dnode2 -c balanceInterval -v 10
######## dnode 3 one para no same with dnode1
system sh/cfg.sh -n dnode3 -c numOfMnodes -v 3
system sh/cfg.sh -n dnode3 -c mnodeEqualVnodeNum -v 4
system sh/cfg.sh -n dnode3 -c offlineThreshold -v 15
system sh/cfg.sh -n dnode3 -c statusInterval -v 3
system sh/cfg.sh -n dnode3 -c arbitrator -v $arbitrator
#system sh/cfg.sh -n dnode3 -c timezone -v ""
#system sh/cfg.sh -n dnode3 -c locale -v ""
#system sh/cfg.sh -n dnode3 -c charset -v ""
system sh/cfg.sh -n dnode3 -c balanceInterval -v 10
######## dnode 4 one para no same with dnode1
system sh/cfg.sh -n dnode4 -c numOfMnodes -v 2
system sh/cfg.sh -n dnode4 -c mnodeEqualVnodeNum -v 5
system sh/cfg.sh -n dnode4 -c offlineThreshold -v 15
system sh/cfg.sh -n dnode4 -c statusInterval -v 3
system sh/cfg.sh -n dnode4 -c arbitrator -v $arbitrator
#system sh/cfg.sh -n dnode4 -c timezone -v ""
#system sh/cfg.sh -n dnode4 -c locale -v ""
#system sh/cfg.sh -n dnode4 -c charset -v ""
system sh/cfg.sh -n dnode4 -c balanceInterval -v 10
######## dnode 5 one para no same with dnode1
system sh/cfg.sh -n dnode5 -c numOfMnodes -v 2
system sh/cfg.sh -n dnode5 -c mnodeEqualVnodeNum -v 4
system sh/cfg.sh -n dnode5 -c offlineThreshold -v 16
system sh/cfg.sh -n dnode5 -c statusInterval -v 3
system sh/cfg.sh -n dnode5 -c arbitrator -v $arbitrator
#system sh/cfg.sh -n dnode5 -c timezone -v ""
#system sh/cfg.sh -n dnode5 -c locale -v ""
#system sh/cfg.sh -n dnode5 -c charset -v ""
system sh/cfg.sh -n dnode5 -c balanceInterval -v 10
######## dnode 6 one para no same with dnode1
system sh/cfg.sh -n dnode6 -c numOfMnodes -v 2
system sh/cfg.sh -n dnode6 -c mnodeEqualVnodeNum -v 4
system sh/cfg.sh -n dnode6 -c offlineThreshold -v 15
system sh/cfg.sh -n dnode6 -c statusInterval -v 2
system sh/cfg.sh -n dnode6 -c arbitrator -v $arbitrator
#system sh/cfg.sh -n dnode6 -c timezone -v ""
#system sh/cfg.sh -n dnode6 -c locale -v ""
#system sh/cfg.sh -n dnode6 -c charset -v ""
system sh/cfg.sh -n dnode6 -c balanceInterval -v 10
######## dnode 7 one para no same with dnode1
system sh/cfg.sh -n dnode7 -c numOfMnodes -v 2
system sh/cfg.sh -n dnode7 -c mnodeEqualVnodeNum -v 4
system sh/cfg.sh -n dnode7 -c offlineThreshold -v 15
system sh/cfg.sh -n dnode7 -c statusInterval -v 3
system sh/cfg.sh -n dnode7 -c arbitrator -v "plum-VirtualBox:8001"
#system sh/cfg.sh -n dnode7 -c timezone -v ""
#system sh/cfg.sh -n dnode7 -c locale -v ""
#system sh/cfg.sh -n dnode7 -c charset -v ""
system sh/cfg.sh -n dnode7 -c balanceInterval -v 10
print ============== step0: start tarbitrator
system sh/exec_tarbitrator.sh -s start
print ============== step1: start dnode1
system sh/exec.sh -n dnode1 -s start
sleep 3000
sql connect
print ============== step2: start dnode2~7 and add into cluster
system sh/exec.sh -n dnode2 -s start
system sh/exec.sh -n dnode3 -s start
system sh/exec.sh -n dnode4 -s start
system sh/exec.sh -n dnode5 -s start
system sh/exec.sh -n dnode6 -s start
system sh/exec.sh -n dnode7 -s start
sql create dnode $hostname2
sql create dnode $hostname3
sql create dnode $hostname4
sql create dnode $hostname5
sql create dnode $hostname6
sql create dnode $hostname7
sleep 10000
wait_dnode_created:
sql show dnodes
if $rows != 7 then
sleep 2000
goto wait_dnode_created
endi
print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1
print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2
print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3
print $data0_4 $data1_4 $data2_4 $data3_4 $data4_4
print $data0_5 $data1_5 $data2_5 $data3_5 $data4_5
print $data0_6 $data1_6 $data2_6 $data3_6 $data4_6
print $data0_7 $data1_7 $data2_7 $data3_7 $data4_7
$dnode1Status = $data4_1
$dnode2Status = $data4_2
$dnode3Status = $data4_3
$dnode4Status = $data4_4
$dnode5Status = $data4_5
$dnode6Status = $data4_6
$dnode7Status = $data4_7
if $dnode1Status != ready then
return -1
endi
if $dnode2Status != ready then
return -1
endi
if $dnode3Status != offline then
return -1
endi
if $dnode4Status != offline then
return -1
endi
if $dnode5Status != offline then
return -1
endi
if $dnode6Status != offline then
return -1
endi
if $dnode7Status != offline then
return -1
endi
sleep 10000
wait_dnode_offline_overtime_dropped:
sql show dnodes
print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1
print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2
print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3
print $data0_4 $data1_4 $data2_4 $data3_4 $data4_4
print $data0_5 $data1_5 $data2_5 $data3_5 $data4_5
print $data0_6 $data1_6 $data2_6 $data3_6 $data4_6
print $data0_7 $data1_7 $data2_7 $data3_7 $data4_7
if $rows != 2 then
sleep 2000
goto wait_dnode_offline_overtime_dropped
endi
print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1
print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2
print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3
print $data0_4 $data1_4 $data2_4 $data3_4 $data4_4
print $data0_5 $data1_5 $data2_5 $data3_5 $data4_5
print $data0_6 $data1_6 $data2_6 $data3_6 $data4_6
print $data0_7 $data1_7 $data2_7 $data3_7 $data4_7
$dnode1Status = $data4_1
$dnode2Status = $data4_2
$dnode3Status = $data4_3
$dnode4Status = $data4_4
$dnode5Status = $data4_5
$dnode6Status = $data4_6
$dnode7Status = $data4_7
if $dnode1Status != ready then
return -1
endi
if $dnode2Status != ready then
return -1
endi
...@@ -5,11 +5,11 @@ system sh/deploy.sh -n dnode3 -i 3 ...@@ -5,11 +5,11 @@ system sh/deploy.sh -n dnode3 -i 3
system sh/deploy.sh -n dnode4 -i 4 system sh/deploy.sh -n dnode4 -i 4
system sh/deploy.sh -n dnode5 -i 5 system sh/deploy.sh -n dnode5 -i 5
system sh/cfg.sh -n dnode1 -c numOfMPeers -v 1 system sh/cfg.sh -n dnode1 -c numOfMnodes -v 1
system sh/cfg.sh -n dnode2 -c numOfMPeers -v 1 system sh/cfg.sh -n dnode2 -c numOfMnodes -v 1
system sh/cfg.sh -n dnode3 -c numOfMPeers -v 1 system sh/cfg.sh -n dnode3 -c numOfMnodes -v 1
system sh/cfg.sh -n dnode4 -c numOfMPeers -v 1 system sh/cfg.sh -n dnode4 -c numOfMnodes -v 1
system sh/cfg.sh -n dnode5 -c numOfMPeers -v 1 system sh/cfg.sh -n dnode5 -c numOfMnodes -v 1
system sh/cfg.sh -n dnode1 -c walLevel -v 1 system sh/cfg.sh -n dnode1 -c walLevel -v 1
system sh/cfg.sh -n dnode2 -c walLevel -v 1 system sh/cfg.sh -n dnode2 -c walLevel -v 1
......
...@@ -96,7 +96,12 @@ endi ...@@ -96,7 +96,12 @@ endi
print ============== step3: stop dnode4, and remove its vnodeX subdirector print ============== step3: stop dnode4, and remove its vnodeX subdirector
system sh/exec.sh -n dnode4 -s stop -x SIGINT system sh/exec.sh -n dnode4 -s stop -x SIGINT
sleep $sleepTimer sleep $sleepTimer
$loopCnt = 0
wait_dnode4_offline_0: wait_dnode4_offline_0:
$loopCnt = $loopCnt + 1
if $loopCnt == 10 then
return -1
endi
sql show dnodes sql show dnodes
if $rows != 4 then if $rows != 4 then
sleep 2000 sleep 2000
...@@ -148,7 +153,14 @@ sleep 1000 ...@@ -148,7 +153,14 @@ sleep 1000
print ============== step4: restart dnode4, waiting sync end print ============== step4: restart dnode4, waiting sync end
system sh/exec.sh -n dnode4 -s start system sh/exec.sh -n dnode4 -s start
sleep $sleepTimer sleep $sleepTimer
$loopCnt = 0
wait_dnode4_reready: wait_dnode4_reready:
$loopCnt = $loopCnt + 1
if $loopCnt == 10 then
return -1
endi
sql show dnodes sql show dnodes
if $rows != 4 then if $rows != 4 then
sleep 2000 sleep 2000
...@@ -171,7 +183,13 @@ if $dnode4Status != ready then ...@@ -171,7 +183,13 @@ if $dnode4Status != ready then
goto wait_dnode4_reready goto wait_dnode4_reready
endi endi
$loopCnt = 0
wait_dnode4_vgroup_slave: wait_dnode4_vgroup_slave:
$loopCnt = $loopCnt + 1
if $loopCnt == 10 then
return -1
endi
sql show vgroups sql show vgroups
if $rows != 1 then if $rows != 1 then
sleep 2000 sleep 2000
...@@ -200,7 +218,13 @@ system sh/exec.sh -n dnode2 -s stop ...@@ -200,7 +218,13 @@ system sh/exec.sh -n dnode2 -s stop
system sh/exec.sh -n dnode3 -s stop system sh/exec.sh -n dnode3 -s stop
sleep $sleepTimer sleep $sleepTimer
$loopCnt = 0
wait_dnode23_offline: wait_dnode23_offline:
$loopCnt = $loopCnt + 1
if $loopCnt == 10 then
return -1
endi
sql show dnodes sql show dnodes
if $rows != 4 then if $rows != 4 then
sleep 2000 sleep 2000
...@@ -231,7 +255,13 @@ if $dnode4Status != ready then ...@@ -231,7 +255,13 @@ if $dnode4Status != ready then
goto wait_dnode23_offline goto wait_dnode23_offline
endi endi
$loopCnt = 0
wait_dnode4_vgroup_master: wait_dnode4_vgroup_master:
$loopCnt = $loopCnt + 1
if $loopCnt == 10 then
return -1
endi
sql show vgroups sql show vgroups
if $rows != 1 then if $rows != 1 then
sleep 2000 sleep 2000
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册