提交 3b78d438 编写于 作者: C cadem

add learner

上级 230adc54
...@@ -1286,6 +1286,9 @@ typedef struct { ...@@ -1286,6 +1286,9 @@ typedef struct {
int16_t hashSuffix; int16_t hashSuffix;
int32_t tsdbPageSize; int32_t tsdbPageSize;
int64_t reserved[8]; int64_t reserved[8];
int8_t learnerReplica;
int8_t learnerSelfIndex;
SReplica learnerReplicas[TSDB_MAX_LEARNER_REPLICA];
} SCreateVnodeReq; } SCreateVnodeReq;
int32_t tSerializeSCreateVnodeReq(void* buf, int32_t bufLen, SCreateVnodeReq* pReq); int32_t tSerializeSCreateVnodeReq(void* buf, int32_t bufLen, SCreateVnodeReq* pReq);
...@@ -1357,7 +1360,10 @@ typedef struct { ...@@ -1357,7 +1360,10 @@ typedef struct {
int8_t replica; int8_t replica;
SReplica replicas[TSDB_MAX_REPLICA]; SReplica replicas[TSDB_MAX_REPLICA];
int64_t reserved[8]; int64_t reserved[8];
} SAlterVnodeReplicaReq; int8_t learnerSelfIndex;
int8_t learnerReplica;
SReplica learnerReplicas[TSDB_MAX_LEARNER_REPLICA];
} SAlterVnodeReplicaReq, SAlterVnodeTypeReq;
int32_t tSerializeSAlterVnodeReplicaReq(void* buf, int32_t bufLen, SAlterVnodeReplicaReq* pReq); int32_t tSerializeSAlterVnodeReplicaReq(void* buf, int32_t bufLen, SAlterVnodeReplicaReq* pReq);
int32_t tDeserializeSAlterVnodeReplicaReq(void* buf, int32_t bufLen, SAlterVnodeReplicaReq* pReq); int32_t tDeserializeSAlterVnodeReplicaReq(void* buf, int32_t bufLen, SAlterVnodeReplicaReq* pReq);
...@@ -1629,7 +1635,10 @@ int32_t tDeserializeSCreateDropMQSNodeReq(void* buf, int32_t bufLen, SMCreateQno ...@@ -1629,7 +1635,10 @@ int32_t tDeserializeSCreateDropMQSNodeReq(void* buf, int32_t bufLen, SMCreateQno
typedef struct { typedef struct {
int8_t replica; int8_t replica;
SReplica replicas[TSDB_MAX_REPLICA]; SReplica replicas[TSDB_MAX_REPLICA];
} SDCreateMnodeReq, SDAlterMnodeReq; int8_t learnerReplica;
SReplica learnerReplicas[TSDB_MAX_LEARNER_REPLICA];
int64_t lastIndex;
} SDCreateMnodeReq, SDAlterMnodeReq, SDAlterMnodeTypeReq;
int32_t tSerializeSDCreateMnodeReq(void* buf, int32_t bufLen, SDCreateMnodeReq* pReq); int32_t tSerializeSDCreateMnodeReq(void* buf, int32_t bufLen, SDCreateMnodeReq* pReq);
int32_t tDeserializeSDCreateMnodeReq(void* buf, int32_t bufLen, SDCreateMnodeReq* pReq); int32_t tDeserializeSDCreateMnodeReq(void* buf, int32_t bufLen, SDCreateMnodeReq* pReq);
......
...@@ -83,6 +83,8 @@ enum { ...@@ -83,6 +83,8 @@ enum {
TD_DEF_MSG_TYPE(TDMT_DND_CONFIG_DNODE, "config-dnode", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_DND_CONFIG_DNODE, "config-dnode", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_DND_SYSTABLE_RETRIEVE, "dnode-retrieve", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_DND_SYSTABLE_RETRIEVE, "dnode-retrieve", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_DND_MAX_MSG, "dnd-max", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_DND_MAX_MSG, "dnd-max", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_DND_ALTER_MNODE_TYPE, "dnode-alter-mnode-type", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_DND_ALTER_VNODE_TYPE, "dnode-alter-vnode-type", NULL, NULL)
TD_NEW_MSG_SEG(TDMT_MND_MSG) TD_NEW_MSG_SEG(TDMT_MND_MSG)
TD_DEF_MSG_TYPE(TDMT_MND_CONNECT, "connect", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_CONNECT, "connect", NULL, NULL)
......
...@@ -33,8 +33,11 @@ typedef struct { ...@@ -33,8 +33,11 @@ typedef struct {
bool deploy; bool deploy;
int8_t selfIndex; int8_t selfIndex;
int8_t numOfReplicas; int8_t numOfReplicas;
SReplica replicas[TSDB_MAX_REPLICA]; int8_t numOfTotalReplicas;
SReplica replicas[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
int32_t nodeRoles[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SMsgCb msgCb; SMsgCb msgCb;
int64_t lastIndex;
} SMnodeOpt; } SMnodeOpt;
/* ------------------------ SMnode ------------------------ */ /* ------------------------ SMnode ------------------------ */
...@@ -69,6 +72,8 @@ int32_t mndStart(SMnode *pMnode); ...@@ -69,6 +72,8 @@ int32_t mndStart(SMnode *pMnode);
*/ */
void mndStop(SMnode *pMnode); void mndStop(SMnode *pMnode);
int32_t mndIsCatchUp(SMnode *pMnode);
/** /**
* @brief Get mnode monitor info. * @brief Get mnode monitor info.
* *
......
...@@ -55,6 +55,8 @@ extern "C" { ...@@ -55,6 +55,8 @@ extern "C" {
#define SYNC_INDEX_INVALID -1 #define SYNC_INDEX_INVALID -1
#define SYNC_TERM_INVALID -1 #define SYNC_TERM_INVALID -1
#define SYNC_LEARNER_CATCHUP 10
typedef enum { typedef enum {
SYNC_STRATEGY_NO_SNAPSHOT = 0, SYNC_STRATEGY_NO_SNAPSHOT = 0,
SYNC_STRATEGY_STANDARD_SNAPSHOT = 1, SYNC_STRATEGY_STANDARD_SNAPSHOT = 1,
...@@ -76,19 +78,29 @@ typedef enum { ...@@ -76,19 +78,29 @@ typedef enum {
TAOS_SYNC_STATE_CANDIDATE = 101, TAOS_SYNC_STATE_CANDIDATE = 101,
TAOS_SYNC_STATE_LEADER = 102, TAOS_SYNC_STATE_LEADER = 102,
TAOS_SYNC_STATE_ERROR = 103, TAOS_SYNC_STATE_ERROR = 103,
TAOS_SYNC_STATE_LEARNER = 104,
} ESyncState; } ESyncState;
typedef enum {
TAOS_SYNC_ROLE_VOTER = 0,
TAOS_SYNC_ROLE_LEARNER = 1,
TAOS_SYNC_ROLE_ERROR = 2,
} ESyncRole;
typedef struct SNodeInfo { typedef struct SNodeInfo {
int64_t clusterId; int64_t clusterId;
int32_t nodeId; int32_t nodeId;
uint16_t nodePort; uint16_t nodePort;
char nodeFqdn[TSDB_FQDN_LEN]; char nodeFqdn[TSDB_FQDN_LEN];
ESyncRole nodeRole;
} SNodeInfo; } SNodeInfo;
typedef struct SSyncCfg { typedef struct SSyncCfg {
int32_t totalReplicaNum;
int32_t replicaNum; int32_t replicaNum;
int32_t myIndex; int32_t myIndex;
SNodeInfo nodeInfo[TSDB_MAX_REPLICA]; SNodeInfo nodeInfo[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SyncIndex lastIndex;
} SSyncCfg; } SSyncCfg;
typedef struct SFsmCbMeta { typedef struct SFsmCbMeta {
...@@ -155,6 +167,7 @@ typedef struct SSyncFSM { ...@@ -155,6 +167,7 @@ typedef struct SSyncFSM {
void (*FpBecomeLeaderCb)(const struct SSyncFSM* pFsm); void (*FpBecomeLeaderCb)(const struct SSyncFSM* pFsm);
void (*FpBecomeFollowerCb)(const struct SSyncFSM* pFsm); void (*FpBecomeFollowerCb)(const struct SSyncFSM* pFsm);
void (*FpBecomeLearnerCb)(const struct SSyncFSM* pFsm);
int32_t (*FpGetSnapshot)(const struct SSyncFSM* pFsm, SSnapshot* pSnapshot, void* pReaderParam, void** ppReader); int32_t (*FpGetSnapshot)(const struct SSyncFSM* pFsm, SSnapshot* pSnapshot, void* pReaderParam, void** ppReader);
void (*FpGetSnapshotInfo)(const struct SSyncFSM* pFsm, SSnapshot* pSnapshot); void (*FpGetSnapshotInfo)(const struct SSyncFSM* pFsm, SSnapshot* pSnapshot);
...@@ -236,6 +249,7 @@ void syncStop(int64_t rid); ...@@ -236,6 +249,7 @@ void syncStop(int64_t rid);
void syncPreStop(int64_t rid); void syncPreStop(int64_t rid);
void syncPostStop(int64_t rid); void syncPostStop(int64_t rid);
int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak, int64_t* seq); int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak, int64_t* seq);
int32_t syncIsCatchUp(int64_t rid);
int32_t syncProcessMsg(int64_t rid, SRpcMsg* pMsg); int32_t syncProcessMsg(int64_t rid, SRpcMsg* pMsg);
int32_t syncReconfig(int64_t rid, SSyncCfg* pCfg); int32_t syncReconfig(int64_t rid, SSyncCfg* pCfg);
int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex); int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex);
......
...@@ -285,6 +285,7 @@ typedef enum ELogicConditionType { ...@@ -285,6 +285,7 @@ typedef enum ELogicConditionType {
#define TSDB_DNODE_ROLE_VNODE 2 #define TSDB_DNODE_ROLE_VNODE 2
#define TSDB_MAX_REPLICA 5 #define TSDB_MAX_REPLICA 5
#define TSDB_MAX_LEARNER_REPLICA 10
#define TSDB_SYNC_LOG_BUFFER_SIZE 4096 #define TSDB_SYNC_LOG_BUFFER_SIZE 4096
#define TSDB_SYNC_LOG_BUFFER_RETENTION (TSDB_SYNC_LOG_BUFFER_SIZE >> 4) #define TSDB_SYNC_LOG_BUFFER_RETENTION (TSDB_SYNC_LOG_BUFFER_SIZE >> 4)
......
...@@ -4129,6 +4129,12 @@ int32_t tSerializeSCreateVnodeReq(void *buf, int32_t bufLen, SCreateVnodeReq *pR ...@@ -4129,6 +4129,12 @@ int32_t tSerializeSCreateVnodeReq(void *buf, int32_t bufLen, SCreateVnodeReq *pR
for (int32_t i = 0; i < 8; ++i) { for (int32_t i = 0; i < 8; ++i) {
if (tEncodeI64(&encoder, pReq->reserved[i]) < 0) return -1; if (tEncodeI64(&encoder, pReq->reserved[i]) < 0) return -1;
} }
if (tEncodeI8(&encoder, pReq->learnerReplica) < 0) return -1;
if (tEncodeI8(&encoder, pReq->learnerSelfIndex) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
}
tEndEncode(&encoder); tEndEncode(&encoder);
...@@ -4207,6 +4213,12 @@ int32_t tDeserializeSCreateVnodeReq(void *buf, int32_t bufLen, SCreateVnodeReq * ...@@ -4207,6 +4213,12 @@ int32_t tDeserializeSCreateVnodeReq(void *buf, int32_t bufLen, SCreateVnodeReq *
for (int32_t i = 0; i < 8; ++i) { for (int32_t i = 0; i < 8; ++i) {
if (tDecodeI64(&decoder, &pReq->reserved[i]) < 0) return -1; if (tDecodeI64(&decoder, &pReq->reserved[i]) < 0) return -1;
} }
if (tDecodeI8(&decoder, &pReq->learnerReplica) < 0) return -1;
if (tDecodeI8(&decoder, &pReq->learnerSelfIndex) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
}
tEndDecode(&decoder); tEndDecode(&decoder);
tDecoderClear(&decoder); tDecoderClear(&decoder);
...@@ -4433,6 +4445,12 @@ int32_t tSerializeSAlterVnodeReplicaReq(void *buf, int32_t bufLen, SAlterVnodeRe ...@@ -4433,6 +4445,12 @@ int32_t tSerializeSAlterVnodeReplicaReq(void *buf, int32_t bufLen, SAlterVnodeRe
for (int32_t i = 0; i < 8; ++i) { for (int32_t i = 0; i < 8; ++i) {
if (tEncodeI64(&encoder, pReq->reserved[i]) < 0) return -1; if (tEncodeI64(&encoder, pReq->reserved[i]) < 0) return -1;
} }
if (tEncodeI8(&encoder, pReq->learnerSelfIndex) < 0) return -1;
if (tEncodeI8(&encoder, pReq->learnerReplica) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
}
tEndEncode(&encoder); tEndEncode(&encoder);
int32_t tlen = encoder.pos; int32_t tlen = encoder.pos;
...@@ -4456,6 +4474,12 @@ int32_t tDeserializeSAlterVnodeReplicaReq(void *buf, int32_t bufLen, SAlterVnode ...@@ -4456,6 +4474,12 @@ int32_t tDeserializeSAlterVnodeReplicaReq(void *buf, int32_t bufLen, SAlterVnode
for (int32_t i = 0; i < 8; ++i) { for (int32_t i = 0; i < 8; ++i) {
if (tDecodeI64(&decoder, &pReq->reserved[i]) < 0) return -1; if (tDecodeI64(&decoder, &pReq->reserved[i]) < 0) return -1;
} }
if (tDecodeI8(&decoder, &pReq->learnerSelfIndex) < 0) return -1;
if (tDecodeI8(&decoder, &pReq->learnerReplica) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
}
tEndDecode(&decoder); tEndDecode(&decoder);
tDecoderClear(&decoder); tDecoderClear(&decoder);
...@@ -4767,6 +4791,12 @@ int32_t tSerializeSDCreateMnodeReq(void *buf, int32_t bufLen, SDCreateMnodeReq * ...@@ -4767,6 +4791,12 @@ int32_t tSerializeSDCreateMnodeReq(void *buf, int32_t bufLen, SDCreateMnodeReq *
SReplica *pReplica = &pReq->replicas[i]; SReplica *pReplica = &pReq->replicas[i];
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1; if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
} }
if (tEncodeI8(&encoder, pReq->learnerReplica) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
}
if (tEncodeI64(&encoder, pReq->lastIndex) < 0) return -1;
tEndEncode(&encoder); tEndEncode(&encoder);
int32_t tlen = encoder.pos; int32_t tlen = encoder.pos;
...@@ -4784,6 +4814,12 @@ int32_t tDeserializeSDCreateMnodeReq(void *buf, int32_t bufLen, SDCreateMnodeReq ...@@ -4784,6 +4814,12 @@ int32_t tDeserializeSDCreateMnodeReq(void *buf, int32_t bufLen, SDCreateMnodeReq
SReplica *pReplica = &pReq->replicas[i]; SReplica *pReplica = &pReq->replicas[i];
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1; if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
} }
if (tDecodeI8(&decoder, &pReq->learnerReplica) < 0) return -1;
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
SReplica *pReplica = &pReq->learnerReplicas[i];
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
}
if (tDecodeI64(&decoder, &pReq->lastIndex) < 0) return -1;
tEndDecode(&decoder); tEndDecode(&decoder);
tDecoderClear(&decoder); tDecoderClear(&decoder);
......
...@@ -32,6 +32,7 @@ typedef struct SDnodeMgmt { ...@@ -32,6 +32,7 @@ typedef struct SDnodeMgmt {
TdThread crashReportThread; TdThread crashReportThread;
SSingleWorker mgmtWorker; SSingleWorker mgmtWorker;
ProcessCreateNodeFp processCreateNodeFp; ProcessCreateNodeFp processCreateNodeFp;
ProcessAlterNodeTypeFp processAlterNodeTypeFp;
ProcessDropNodeFp processDropNodeFp; ProcessDropNodeFp processDropNodeFp;
SendMonitorReportFp sendMonitorReportFp; SendMonitorReportFp sendMonitorReportFp;
GetVnodeLoadsFp getVnodeLoadsFp; GetVnodeLoadsFp getVnodeLoadsFp;
......
...@@ -352,6 +352,7 @@ SArray *dmGetMsgHandles() { ...@@ -352,6 +352,7 @@ SArray *dmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_DND_CONFIG_DNODE, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_CONFIG_DNODE, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_SERVER_STATUS, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_SERVER_STATUS, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_SYSTABLE_RETRIEVE, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_SYSTABLE_RETRIEVE, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_MNODE_TYPE, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
// Requests handled by MNODE // Requests handled by MNODE
if (dmSetMgmtHandle(pArray, TDMT_MND_GRANT, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_GRANT, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
......
...@@ -48,6 +48,7 @@ static int32_t dmOpenMgmt(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { ...@@ -48,6 +48,7 @@ static int32_t dmOpenMgmt(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) {
pMgmt->path = pInput->path; pMgmt->path = pInput->path;
pMgmt->name = pInput->name; pMgmt->name = pInput->name;
pMgmt->processCreateNodeFp = pInput->processCreateNodeFp; pMgmt->processCreateNodeFp = pInput->processCreateNodeFp;
pMgmt->processAlterNodeTypeFp = pInput->processAlterNodeTypeFp;
pMgmt->processDropNodeFp = pInput->processDropNodeFp; pMgmt->processDropNodeFp = pInput->processDropNodeFp;
pMgmt->sendMonitorReportFp = pInput->sendMonitorReportFp; pMgmt->sendMonitorReportFp = pInput->sendMonitorReportFp;
pMgmt->getVnodeLoadsFp = pInput->getVnodeLoadsFp; pMgmt->getVnodeLoadsFp = pInput->getVnodeLoadsFp;
......
...@@ -227,6 +227,9 @@ static void dmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { ...@@ -227,6 +227,9 @@ static void dmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) {
case TDMT_DND_DROP_SNODE: case TDMT_DND_DROP_SNODE:
code = (*pMgmt->processDropNodeFp)(SNODE, pMsg); code = (*pMgmt->processDropNodeFp)(SNODE, pMsg);
break; break;
case TDMT_DND_ALTER_MNODE_TYPE:
code = (*pMgmt->processAlterNodeTypeFp)(MNODE, pMsg);
break;
case TDMT_DND_SERVER_STATUS: case TDMT_DND_SERVER_STATUS:
code = dmProcessServerRunStatus(pMgmt, pMsg); code = dmProcessServerRunStatus(pMgmt, pMsg);
break; break;
......
...@@ -24,12 +24,16 @@ static int32_t mmDecodeOption(SJson *pJson, SMnodeOpt *pOption) { ...@@ -24,12 +24,16 @@ static int32_t mmDecodeOption(SJson *pJson, SMnodeOpt *pOption) {
if (code < 0) return -1; if (code < 0) return -1;
tjsonGetInt32ValueFromDouble(pJson, "selfIndex", pOption->selfIndex, code); tjsonGetInt32ValueFromDouble(pJson, "selfIndex", pOption->selfIndex, code);
if (code < 0) return 0; if (code < 0) return 0;
tjsonGetInt32ValueFromDouble(pJson, "lastIndex", pOption->lastIndex, code);
if (code < 0) return 0;
SJson *replicas = tjsonGetObjectItem(pJson, "replicas"); SJson *replicas = tjsonGetObjectItem(pJson, "replicas");
if (replicas == NULL) return 0; if (replicas == NULL) return 0;
pOption->numOfReplicas = tjsonGetArraySize(replicas); pOption->numOfTotalReplicas = tjsonGetArraySize(replicas);
pOption->numOfReplicas = 0;
for (int32_t i = 0; i < pOption->numOfReplicas; ++i) { for (int32_t i = 0; i < pOption->numOfTotalReplicas; ++i) {
SJson *replica = tjsonGetArrayItem(replicas, i); SJson *replica = tjsonGetArrayItem(replicas, i);
if (replica == NULL) return -1; if (replica == NULL) return -1;
...@@ -40,6 +44,14 @@ static int32_t mmDecodeOption(SJson *pJson, SMnodeOpt *pOption) { ...@@ -40,6 +44,14 @@ static int32_t mmDecodeOption(SJson *pJson, SMnodeOpt *pOption) {
if (code < 0) return -1; if (code < 0) return -1;
tjsonGetUInt16ValueFromDouble(replica, "port", pReplica->port, code); tjsonGetUInt16ValueFromDouble(replica, "port", pReplica->port, code);
if (code < 0) return -1; if (code < 0) return -1;
tjsonGetInt32ValueFromDouble(replica, "role", pOption->nodeRoles[i], code);
if (code < 0) return -1;
if(pOption->nodeRoles[i] == TAOS_SYNC_ROLE_VOTER){
pOption->numOfReplicas++;
}
}
for (int32_t i = 0; i < pOption->numOfTotalReplicas; ++i) {
} }
return 0; return 0;
...@@ -112,14 +124,14 @@ _OVER: ...@@ -112,14 +124,14 @@ _OVER:
} }
static int32_t mmEncodeOption(SJson *pJson, const SMnodeOpt *pOption) { static int32_t mmEncodeOption(SJson *pJson, const SMnodeOpt *pOption) {
if (pOption->deploy && pOption->numOfReplicas > 0) { if (pOption->deploy && pOption->numOfTotalReplicas > 0) {
if (tjsonAddDoubleToObject(pJson, "selfIndex", pOption->selfIndex) < 0) return -1; if (tjsonAddDoubleToObject(pJson, "selfIndex", pOption->selfIndex) < 0) return -1;
SJson *replicas = tjsonCreateArray(); SJson *replicas = tjsonCreateArray();
if (replicas == NULL) return -1; if (replicas == NULL) return -1;
if (tjsonAddItemToObject(pJson, "replicas", replicas) < 0) return -1; if (tjsonAddItemToObject(pJson, "replicas", replicas) < 0) return -1;
for (int32_t i = 0; i < pOption->numOfReplicas; ++i) { for (int32_t i = 0; i < pOption->numOfTotalReplicas; ++i) {
SJson *replica = tjsonCreateObject(); SJson *replica = tjsonCreateObject();
if (replica == NULL) return -1; if (replica == NULL) return -1;
...@@ -127,10 +139,13 @@ static int32_t mmEncodeOption(SJson *pJson, const SMnodeOpt *pOption) { ...@@ -127,10 +139,13 @@ static int32_t mmEncodeOption(SJson *pJson, const SMnodeOpt *pOption) {
if (tjsonAddDoubleToObject(replica, "id", pReplica->id) < 0) return -1; if (tjsonAddDoubleToObject(replica, "id", pReplica->id) < 0) return -1;
if (tjsonAddStringToObject(replica, "fqdn", pReplica->fqdn) < 0) return -1; if (tjsonAddStringToObject(replica, "fqdn", pReplica->fqdn) < 0) return -1;
if (tjsonAddDoubleToObject(replica, "port", pReplica->port) < 0) return -1; if (tjsonAddDoubleToObject(replica, "port", pReplica->port) < 0) return -1;
if (tjsonAddDoubleToObject(replica, "role", pOption->nodeRoles[i]) < 0) return -1;
if (tjsonAddItemToArray(replicas, replica) < 0) return -1; if (tjsonAddItemToArray(replicas, replica) < 0) return -1;
} }
} }
if (tjsonAddDoubleToObject(pJson, "lastIndex", pOption->lastIndex) < 0) return -1;
if (tjsonAddDoubleToObject(pJson, "deployed", pOption->deploy) < 0) return -1; if (tjsonAddDoubleToObject(pJson, "deployed", pOption->deploy) < 0) return -1;
return 0; return 0;
......
...@@ -33,12 +33,24 @@ int32_t mmProcessCreateReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) { ...@@ -33,12 +33,24 @@ int32_t mmProcessCreateReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) {
return -1; return -1;
} }
SMnodeOpt option = {.deploy = true, .numOfReplicas = createReq.replica, .selfIndex = -1}; SMnodeOpt option = {.deploy = true, .numOfReplicas = createReq.replica,
.numOfTotalReplicas = createReq.replica + createReq.learnerReplica,
.selfIndex = -1, .lastIndex = createReq.lastIndex};
memcpy(option.replicas, createReq.replicas, sizeof(createReq.replicas)); memcpy(option.replicas, createReq.replicas, sizeof(createReq.replicas));
for (int32_t i = 0; i < option.numOfReplicas; ++i) { for (int32_t i = 0; i < createReq.replica; ++i) {
if (createReq.replicas[i].id == pInput->pData->dnodeId) { if (createReq.replicas[i].id == pInput->pData->dnodeId) {
option.selfIndex = i; option.selfIndex = i;
} }
option.nodeRoles[i] = TAOS_SYNC_ROLE_VOTER;
}
memcpy(&(option.replicas[createReq.replica]), createReq.learnerReplicas, sizeof(createReq.learnerReplicas));
for (int32_t i = 0; i < createReq.learnerReplica; ++i) {
if (createReq.learnerReplicas[i].id == pInput->pData->dnodeId) {
option.selfIndex = createReq.replica + i;
}
option.nodeRoles[createReq.replica + i] = TAOS_SYNC_ROLE_LEARNER;
} }
if (option.selfIndex == -1) { if (option.selfIndex == -1) {
...@@ -92,6 +104,8 @@ SArray *mmGetMsgHandles() { ...@@ -92,6 +104,8 @@ SArray *mmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_CONFIG_DNODE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_CONFIG_DNODE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_MNODE_TYPE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_VNODE_TYPE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_CONNECT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_CONNECT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_MND_CREATE_ACCT, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_CREATE_ACCT, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
......
...@@ -45,9 +45,11 @@ static void mmBuildOptionForDeploy(SMnodeMgmt *pMgmt, const SMgmtInputOpt *pInpu ...@@ -45,9 +45,11 @@ static void mmBuildOptionForDeploy(SMnodeMgmt *pMgmt, const SMgmtInputOpt *pInpu
pOption->dnodeId = pMgmt->pData->dnodeId; pOption->dnodeId = pMgmt->pData->dnodeId;
pOption->selfIndex = 0; pOption->selfIndex = 0;
pOption->numOfReplicas = 1; pOption->numOfReplicas = 1;
pOption->numOfTotalReplicas = 1;
pOption->replicas[0].id = 1; pOption->replicas[0].id = 1;
pOption->replicas[0].port = tsServerPort; pOption->replicas[0].port = tsServerPort;
tstrncpy(pOption->replicas[0].fqdn, tsLocalFqdn, TSDB_FQDN_LEN); tstrncpy(pOption->replicas[0].fqdn, tsLocalFqdn, TSDB_FQDN_LEN);
pOption->lastIndex = SYNC_INDEX_INVALID;
} }
static void mmBuildOptionForOpen(SMnodeMgmt *pMgmt, SMnodeOpt *pOption) { static void mmBuildOptionForOpen(SMnodeMgmt *pMgmt, SMnodeOpt *pOption) {
...@@ -123,9 +125,10 @@ static int32_t mmOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { ...@@ -123,9 +125,10 @@ static int32_t mmOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) {
} }
tmsgReportStartup("mnode-worker", "initialized"); tmsgReportStartup("mnode-worker", "initialized");
if (option.numOfReplicas > 0) { if (option.numOfTotalReplicas > 0) {
option.deploy = true; option.deploy = true;
option.numOfReplicas = 0; option.numOfReplicas = 0;
option.numOfTotalReplicas = 0;
if (mmWriteFile(pMgmt->path, &option) != 0) { if (mmWriteFile(pMgmt->path, &option) != 0) {
dError("failed to write mnode file since %s", terrstr()); dError("failed to write mnode file since %s", terrstr());
return -1; return -1;
...@@ -152,6 +155,10 @@ static void mmStop(SMnodeMgmt *pMgmt) { ...@@ -152,6 +155,10 @@ static void mmStop(SMnodeMgmt *pMgmt) {
mndStop(pMgmt->pMnode); mndStop(pMgmt->pMnode);
} }
static int32_t mmSyncIsCatchUp(SMnodeMgmt *pMgmt) {
return mndIsCatchUp(pMgmt->pMnode);
}
SMgmtFunc mmGetMgmtFunc() { SMgmtFunc mmGetMgmtFunc() {
SMgmtFunc mgmtFunc = {0}; SMgmtFunc mgmtFunc = {0};
mgmtFunc.openFp = mmOpen; mgmtFunc.openFp = mmOpen;
...@@ -162,6 +169,7 @@ SMgmtFunc mmGetMgmtFunc() { ...@@ -162,6 +169,7 @@ SMgmtFunc mmGetMgmtFunc() {
mgmtFunc.dropFp = (NodeDropFp)mmProcessDropReq; mgmtFunc.dropFp = (NodeDropFp)mmProcessDropReq;
mgmtFunc.requiredFp = mmRequire; mgmtFunc.requiredFp = mmRequire;
mgmtFunc.getHandlesFp = mmGetMsgHandles; mgmtFunc.getHandlesFp = mmGetMsgHandles;
mgmtFunc.isCatchUpFp = (NodeIsCatchUpFp)mmSyncIsCatchUp;
return mgmtFunc; return mgmtFunc;
} }
...@@ -90,6 +90,7 @@ int32_t vmProcessDropVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); ...@@ -90,6 +90,7 @@ int32_t vmProcessDropVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t vmProcessDisableVnodeWriteReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t vmProcessDisableVnodeWriteReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg); int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
int32_t vmProcessAlterVnodeTypeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
// vmFile.c // vmFile.c
int32_t vmGetVnodeListFromFile(SVnodeMgmt *pMgmt, SWrapperCfg **ppCfgs, int32_t *numOfVnodes); int32_t vmGetVnodeListFromFile(SVnodeMgmt *pMgmt, SWrapperCfg **ppCfgs, int32_t *numOfVnodes);
......
...@@ -131,15 +131,34 @@ static void vmGenerateVnodeCfg(SCreateVnodeReq *pCreate, SVnodeCfg *pCfg) { ...@@ -131,15 +131,34 @@ static void vmGenerateVnodeCfg(SCreateVnodeReq *pCreate, SVnodeCfg *pCfg) {
pCfg->tsdbPageSize = pCreate->tsdbPageSize * 1024; pCfg->tsdbPageSize = pCreate->tsdbPageSize * 1024;
pCfg->standby = 0; pCfg->standby = 0;
pCfg->syncCfg.myIndex = pCreate->selfIndex; pCfg->syncCfg.replicaNum = 0;
pCfg->syncCfg.replicaNum = pCreate->replica; pCfg->syncCfg.totalReplicaNum = 0;
memset(&pCfg->syncCfg.nodeInfo, 0, sizeof(pCfg->syncCfg.nodeInfo)); memset(&pCfg->syncCfg.nodeInfo, 0, sizeof(pCfg->syncCfg.nodeInfo));
for (int32_t i = 0; i < pCreate->replica; ++i) { for (int32_t i = 0; i < pCreate->replica; ++i) {
SNodeInfo *pNode = &pCfg->syncCfg.nodeInfo[i]; SNodeInfo *pNode = &pCfg->syncCfg.nodeInfo[i];
pNode->nodeId = pCreate->replicas[i].id; pNode->nodeId = pCreate->replicas[pCfg->syncCfg.replicaNum].id;
pNode->nodePort = pCreate->replicas[i].port; pNode->nodePort = pCreate->replicas[pCfg->syncCfg.replicaNum].port;
tstrncpy(pNode->nodeFqdn, pCreate->replicas[i].fqdn, TSDB_FQDN_LEN); pNode->nodeRole = TAOS_SYNC_ROLE_VOTER;
tstrncpy(pNode->nodeFqdn, pCreate->replicas[pCfg->syncCfg.replicaNum].fqdn, TSDB_FQDN_LEN);
tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
pCfg->syncCfg.replicaNum++;
}
if(pCreate->selfIndex != -1){
pCfg->syncCfg.myIndex = pCreate->selfIndex;
}
for (int32_t i = pCfg->syncCfg.replicaNum; i < pCreate->replica + pCreate->learnerReplica; ++i) {
SNodeInfo *pNode = &pCfg->syncCfg.nodeInfo[i];
pNode->nodeId = pCreate->learnerReplicas[pCfg->syncCfg.totalReplicaNum].id;
pNode->nodePort = pCreate->learnerReplicas[pCfg->syncCfg.totalReplicaNum].port;
pNode->nodeRole = TAOS_SYNC_ROLE_LEARNER;
tstrncpy(pNode->nodeFqdn, pCreate->learnerReplicas[pCfg->syncCfg.totalReplicaNum].fqdn, TSDB_FQDN_LEN);
tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
pCfg->syncCfg.totalReplicaNum++;
}
pCfg->syncCfg.totalReplicaNum += pCfg->syncCfg.replicaNum;
if(pCreate->learnerSelfIndex != -1){
pCfg->syncCfg.myIndex = pCreate->replica + pCreate->learnerSelfIndex;
} }
} }
...@@ -182,23 +201,35 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { ...@@ -182,23 +201,35 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return -1; return -1;
} }
dInfo("vgId:%d, start to create vnode, page:%d pageSize:%d buffer:%d szPage:%d szBuf:%" PRIu64 dInfo("vgId:%d, vnode management handle msgType:%s, start to create vnode, page:%d pageSize:%d buffer:%d szPage:%d szBuf:%" PRIu64
", cacheLast:%d cacheLastSize:%d sstTrigger:%d tsdbPageSize:%d %d dbname:%s dbId:%" PRId64 ", cacheLast:%d cacheLastSize:%d sstTrigger:%d tsdbPageSize:%d %d dbname:%s dbId:%" PRId64
", days:%d keep0:%d keep1:%d keep2:%d tsma:%d precision:%d compression:%d minRows:%d maxRows:%d" ", days:%d keep0:%d keep1:%d keep2:%d tsma:%d precision:%d compression:%d minRows:%d maxRows:%d"
", wal fsync:%d level:%d retentionPeriod:%d retentionSize:%" PRId64 " rollPeriod:%d segSize:%" PRId64 ", wal fsync:%d level:%d retentionPeriod:%d retentionSize:%" PRId64 " rollPeriod:%d segSize:%" PRId64
", hash method:%d begin:%u end:%u prefix:%d surfix:%d replica:%d selfIndex:%d strict:%d", ", hash method:%d begin:%u end:%u prefix:%d surfix:%d replica:%d selfIndex:%d "
req.vgId, req.pages, req.pageSize, req.buffer, req.pageSize * 1024, (uint64_t)req.buffer * 1024 * 1024, "learnerReplica:%d learnerSelfIndex:%d strict:%d",
req.vgId, TMSG_INFO(pMsg->msgType), req.pages, req.pageSize, req.buffer, req.pageSize * 1024,
(uint64_t)req.buffer * 1024 * 1024,
req.cacheLast, req.cacheLastSize, req.sstTrigger, req.tsdbPageSize, req.tsdbPageSize * 1024, req.db, req.dbUid, req.cacheLast, req.cacheLastSize, req.sstTrigger, req.tsdbPageSize, req.tsdbPageSize * 1024, req.db, req.dbUid,
req.daysPerFile, req.daysToKeep0, req.daysToKeep1, req.daysToKeep2, req.isTsma, req.precision, req.compression, req.daysPerFile, req.daysToKeep0, req.daysToKeep1, req.daysToKeep2, req.isTsma, req.precision, req.compression,
req.minRows, req.maxRows, req.walFsyncPeriod, req.walLevel, req.walRetentionPeriod, req.walRetentionSize, req.minRows, req.maxRows, req.walFsyncPeriod, req.walLevel, req.walRetentionPeriod, req.walRetentionSize,
req.walRollPeriod, req.walSegmentSize, req.hashMethod, req.hashBegin, req.hashEnd, req.hashPrefix, req.walRollPeriod, req.walSegmentSize, req.hashMethod, req.hashBegin, req.hashEnd, req.hashPrefix,
req.hashSuffix, req.replica, req.selfIndex, req.strict); req.hashSuffix, req.replica, req.selfIndex, req.learnerReplica, req.learnerSelfIndex, req.strict);
for (int32_t i = 0; i < req.replica; ++i) { for (int32_t i = 0; i < req.replica; ++i) {
dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", req.vgId, i, req.replicas[i].fqdn, req.replicas[i].port, dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", req.vgId, i, req.replicas[i].fqdn, req.replicas[i].port,
req.replicas[i].id); req.replicas[i].id);
} }
for (int32_t i = 0; i < req.learnerReplica; ++i) {
dInfo("vgId:%d, learnerReplica:%d ep:%s:%u dnode:%d", req.vgId, i, req.learnerReplicas[i].fqdn,
req.learnerReplicas[i].port, req.replicas[i].id);
}
SReplica *pReplica = &req.replicas[req.selfIndex]; SReplica *pReplica = NULL;
if(req.selfIndex != -1){
pReplica = &req.replicas[req.selfIndex];
}
else{
pReplica = &req.learnerReplicas[req.learnerSelfIndex];
}
if (pReplica->id != pMgmt->pData->dnodeId || pReplica->port != tsServerPort || if (pReplica->id != pMgmt->pData->dnodeId || pReplica->port != tsServerPort ||
strcmp(pReplica->fqdn, tsLocalFqdn) != 0) { strcmp(pReplica->fqdn, tsLocalFqdn) != 0) {
terrno = TSDB_CODE_INVALID_MSG; terrno = TSDB_CODE_INVALID_MSG;
...@@ -275,7 +306,8 @@ _OVER: ...@@ -275,7 +306,8 @@ _OVER:
vnodeClose(pImpl); vnodeClose(pImpl);
vnodeDestroy(path, pMgmt->pTfs); vnodeDestroy(path, pMgmt->pTfs);
} else { } else {
dInfo("vgId:%d, vnode is created", req.vgId); dInfo("vgId:%d, vnode management handle msgType:%s, end to create vnode, vnode is created",
req.vgId, TMSG_INFO(pMsg->msgType));
} }
tFreeSCreateVnodeReq(&req); tFreeSCreateVnodeReq(&req);
...@@ -283,6 +315,106 @@ _OVER: ...@@ -283,6 +315,106 @@ _OVER:
return code; return code;
} }
int32_t vmProcessAlterVnodeTypeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
SAlterVnodeTypeReq req = {0};
if (tDeserializeSAlterVnodeReplicaReq(pMsg->pCont, pMsg->contLen, &req) != 0) {
terrno = TSDB_CODE_INVALID_MSG;
return -1;
}
dInfo("vgId:%d, vnode management handle msgType:%s, start to process alter-node-type-request",
req.vgId, TMSG_INFO(pMsg->msgType));
SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId);
if (pVnode == NULL) {
dError("vgId:%d, failed to alter hashrange since %s", req.vgId, terrstr());
terrno = TSDB_CODE_VND_NOT_EXIST;
return -1;
}
dInfo("vgId:%d, checking node catch up", req.vgId);
if(!vnodeIsCatchUp(pVnode->pImpl) == 0){
return -1;
}
dInfo("node:%s, catched up leader, continue to process alter-node-type-request", pMgmt->name);
int32_t vgId = req.vgId;
dInfo("vgId:%d, start to alter vnode type replica:%d selfIndex:%d strict:%d", vgId, req.replica, req.selfIndex,
req.strict);
for (int32_t i = 0; i < req.replica; ++i) {
SReplica *pReplica = &req.replicas[i];
dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->id);
}
for (int32_t i = 0; i < req.learnerReplica; ++i) {
SReplica *pReplica = &req.learnerReplicas[i];
dInfo("vgId:%d, learnerReplicas:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->id);
}
if (req.replica <= 0 ||
(req.selfIndex < 0 && req.learnerSelfIndex <0)||
req.selfIndex >= req.replica || req.learnerSelfIndex >= req.learnerReplica) {
terrno = TSDB_CODE_INVALID_MSG;
dError("vgId:%d, failed to alter replica since invalid msg", vgId);
return -1;
}
SReplica *pReplica = NULL;
if(req.selfIndex > 0){
pReplica = &req.replicas[req.selfIndex];
}
else{
pReplica = &req.learnerReplicas[req.learnerSelfIndex];
}
if (pReplica->id != pMgmt->pData->dnodeId || pReplica->port != tsServerPort ||
strcmp(pReplica->fqdn, tsLocalFqdn) != 0) {
terrno = TSDB_CODE_INVALID_MSG;
dError("vgId:%d, dnodeId:%d ep:%s:%u not matched with local dnode", vgId, pReplica->id, pReplica->fqdn,
pReplica->port);
return -1;
}
dInfo("vgId:%d, start to close vnode", vgId);
SWrapperCfg wrapperCfg = {
.dropped = pVnode->dropped,
.vgId = pVnode->vgId,
.vgVersion = pVnode->vgVersion,
};
tstrncpy(wrapperCfg.path, pVnode->path, sizeof(wrapperCfg.path));
vmCloseVnode(pMgmt, pVnode, false);
char path[TSDB_FILENAME_LEN] = {0};
snprintf(path, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, vgId);
dInfo("vgId:%d, start to alter vnode replica at %s", vgId, path);
if (vnodeAlterReplica(path, &req, pMgmt->pTfs) < 0) {
dError("vgId:%d, failed to alter vnode at %s since %s", vgId, path, terrstr());
return -1;
}
dInfo("vgId:%d, begin to open vnode", vgId);
SVnode *pImpl = vnodeOpen(path, pMgmt->pTfs, pMgmt->msgCb);
if (pImpl == NULL) {
dError("vgId:%d, failed to open vnode at %s since %s", vgId, path, terrstr());
return -1;
}
if (vmOpenVnode(pMgmt, &wrapperCfg, pImpl) != 0) {
dError("vgId:%d, failed to open vnode mgmt since %s", vgId, terrstr());
return -1;
}
if (vnodeStart(pImpl) != 0) {
dError("vgId:%d, failed to start sync since %s", vgId, terrstr());
return -1;
}
dInfo("vgId:%d, vnode management handle msgType:%s, end to process alter-node-type-request, vnode config is altered",
req.vgId, TMSG_INFO(pMsg->msgType));
return 0;
}
int32_t vmProcessDisableVnodeWriteReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { int32_t vmProcessDisableVnodeWriteReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
SDisableVnodeWriteReq req = {0}; SDisableVnodeWriteReq req = {0};
if (tDeserializeSDisableVnodeWriteReq(pMsg->pCont, pMsg->contLen, &req) != 0) { if (tDeserializeSDisableVnodeWriteReq(pMsg->pCont, pMsg->contLen, &req) != 0) {
...@@ -378,20 +510,35 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { ...@@ -378,20 +510,35 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
} }
int32_t vgId = alterReq.vgId; int32_t vgId = alterReq.vgId;
dInfo("vgId:%d, start to alter vnode replica:%d selfIndex:%d strict:%d", vgId, alterReq.replica, alterReq.selfIndex, dInfo("vgId:%d,vnode management handle msgType:%s, start to alter vnode replica:%d selfIndex:%d leanerReplica:%d "
alterReq.strict); "learnerSelfIndex:%d strict:%d",
vgId, TMSG_INFO(pMsg->msgType), alterReq.replica, alterReq.selfIndex, alterReq.learnerReplica,
alterReq.learnerSelfIndex, alterReq.strict);
for (int32_t i = 0; i < alterReq.replica; ++i) { for (int32_t i = 0; i < alterReq.replica; ++i) {
SReplica *pReplica = &alterReq.replicas[i]; SReplica *pReplica = &alterReq.replicas[i];
dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->port); dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->port);
} }
for (int32_t i = 0; i < alterReq.learnerReplica; ++i) {
if (alterReq.replica <= 0 || alterReq.selfIndex < 0 || alterReq.selfIndex >= alterReq.replica) { SReplica *pReplica = &alterReq.learnerReplicas[i];
dInfo("vgId:%d, learnerReplicas:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->port);
}
if (alterReq.replica <= 0 ||
(alterReq.selfIndex < 0 && alterReq.learnerSelfIndex <0)||
alterReq.selfIndex >= alterReq.replica || alterReq.learnerSelfIndex >= alterReq.learnerReplica) {
terrno = TSDB_CODE_INVALID_MSG; terrno = TSDB_CODE_INVALID_MSG;
dError("vgId:%d, failed to alter replica since invalid msg", vgId); dError("vgId:%d, failed to alter replica since invalid msg", vgId);
return -1; return -1;
} }
SReplica *pReplica = &alterReq.replicas[alterReq.selfIndex]; SReplica *pReplica = NULL;
if(alterReq.selfIndex != -1){
pReplica = &alterReq.replicas[alterReq.selfIndex];
}
else{
pReplica = &alterReq.learnerReplicas[alterReq.learnerSelfIndex];
}
if (pReplica->id != pMgmt->pData->dnodeId || pReplica->port != tsServerPort || if (pReplica->id != pMgmt->pData->dnodeId || pReplica->port != tsServerPort ||
strcmp(pReplica->fqdn, tsLocalFqdn) != 0) { strcmp(pReplica->fqdn, tsLocalFqdn) != 0) {
terrno = TSDB_CODE_INVALID_MSG; terrno = TSDB_CODE_INVALID_MSG;
...@@ -425,7 +572,7 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { ...@@ -425,7 +572,7 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return -1; return -1;
} }
dInfo("vgId:%d, close vnode", vgId); dInfo("vgId:%d, begin to open vnode", vgId);
SVnode *pImpl = vnodeOpen(path, pMgmt->pTfs, pMgmt->msgCb); SVnode *pImpl = vnodeOpen(path, pMgmt->pTfs, pMgmt->msgCb);
if (pImpl == NULL) { if (pImpl == NULL) {
dError("vgId:%d, failed to open vnode at %s since %s", vgId, path, terrstr()); dError("vgId:%d, failed to open vnode at %s since %s", vgId, path, terrstr());
...@@ -442,7 +589,10 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { ...@@ -442,7 +589,10 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return -1; return -1;
} }
dInfo("vgId:%d, vnode config is altered", vgId); dInfo("vgId:%d, vnode management handle msgType:%s, end to alter vnode replica:%d selfIndex:%d leanerReplica:%d "
"learnerSelfIndex:%d strict:%d",
vgId, TMSG_INFO(pMsg->msgType), alterReq.replica, alterReq.selfIndex, alterReq.learnerReplica,
alterReq.learnerSelfIndex, alterReq.strict);
return 0; return 0;
} }
...@@ -548,6 +698,7 @@ SArray *vmGetMsgHandles() { ...@@ -548,6 +698,7 @@ SArray *vmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_VND_TRIM, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TRIM, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_VNODE_TYPE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_SYNC_TIMEOUT_ELECTION, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_TIMEOUT_ELECTION, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_SYNC_CLIENT_REQUEST, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_CLIENT_REQUEST, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER;
......
...@@ -49,6 +49,9 @@ static void vmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { ...@@ -49,6 +49,9 @@ static void vmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) {
case TDMT_VND_ALTER_HASHRANGE: case TDMT_VND_ALTER_HASHRANGE:
code = vmProcessAlterHashRangeReq(pMgmt, pMsg); code = vmProcessAlterHashRangeReq(pMgmt, pMsg);
break; break;
case TDMT_DND_ALTER_VNODE_TYPE:
code = vmProcessAlterVnodeTypeReq(pMgmt, pMsg);
break;
default: default:
terrno = TSDB_CODE_MSG_NOT_PROCESSED; terrno = TSDB_CODE_MSG_NOT_PROCESSED;
dGError("msg:%p, not processed in vnode-mgmt queue", pMsg); dGError("msg:%p, not processed in vnode-mgmt queue", pMsg);
......
...@@ -169,6 +169,8 @@ static int32_t dmProcessCreateNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) { ...@@ -169,6 +169,8 @@ static int32_t dmProcessCreateNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
return -1; return -1;
} }
dInfo("start to process create-node-request");
pWrapper = &pDnode->wrappers[ntype]; pWrapper = &pDnode->wrappers[ntype];
if (taosMkDir(pWrapper->path) != 0) { if (taosMkDir(pWrapper->path) != 0) {
dmReleaseWrapper(pWrapper); dmReleaseWrapper(pWrapper);
...@@ -198,6 +200,64 @@ static int32_t dmProcessCreateNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) { ...@@ -198,6 +200,64 @@ static int32_t dmProcessCreateNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
return code; return code;
} }
static int32_t dmProcessAlterNodeTypeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
SDnode *pDnode = dmInstance();
SMgmtWrapper *pWrapper = dmAcquireWrapper(pDnode, ntype);
if (pWrapper == NULL) {
dError("fail to process alter node type since node not exist");
return -1;
}
dmReleaseWrapper(pWrapper);
dInfo("node:%s, start to process alter-node-type-request", pWrapper->name);
pWrapper = &pDnode->wrappers[ntype];
if(pWrapper->func.isCatchUpFp != NULL){
dInfo("node:%s, checking node catch up", pWrapper->name);
if(!(*pWrapper->func.isCatchUpFp)(pWrapper->pMgmt) == 0){
return -1;
}
}
dInfo("node:%s, catched up leader, continue to process alter-node-type-request", pWrapper->name);
taosThreadMutexLock(&pDnode->mutex);
dInfo("node:%s, stopping node", pWrapper->name);
dmStopNode(pWrapper);
dInfo("node:%s, closing node", pWrapper->name);
dmCloseNode(pWrapper);
pWrapper = &pDnode->wrappers[ntype];
if (taosMkDir(pWrapper->path) != 0) {
dmReleaseWrapper(pWrapper);
terrno = TAOS_SYSTEM_ERROR(errno);
dError("failed to create dir:%s since %s", pWrapper->path, terrstr());
return -1;
}
SMgmtInputOpt input = dmBuildMgmtInputOpt(pWrapper);
dInfo("node:%s, start to create", pWrapper->name);
int32_t code = (*pWrapper->func.createFp)(&input, pMsg);
if (code != 0) {
dError("node:%s, failed to create since %s", pWrapper->name, terrstr());
} else {
dInfo("node:%s, has been created", pWrapper->name);
code = dmOpenNode(pWrapper);
if (code == 0) {
code = dmStartNode(pWrapper);
}
pWrapper->deployed = true;
pWrapper->required = true;
}
taosThreadMutexUnlock(&pDnode->mutex);
return code;
}
static int32_t dmProcessDropNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) { static int32_t dmProcessDropNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
SDnode *pDnode = dmInstance(); SDnode *pDnode = dmInstance();
...@@ -251,6 +311,7 @@ SMgmtInputOpt dmBuildMgmtInputOpt(SMgmtWrapper *pWrapper) { ...@@ -251,6 +311,7 @@ SMgmtInputOpt dmBuildMgmtInputOpt(SMgmtWrapper *pWrapper) {
.name = pWrapper->name, .name = pWrapper->name,
.pData = &pWrapper->pDnode->data, .pData = &pWrapper->pDnode->data,
.processCreateNodeFp = dmProcessCreateNodeReq, .processCreateNodeFp = dmProcessCreateNodeReq,
.processAlterNodeTypeFp = dmProcessAlterNodeTypeReq,
.processDropNodeFp = dmProcessDropNodeReq, .processDropNodeFp = dmProcessDropNodeReq,
.sendMonitorReportFp = dmSendMonitorReport, .sendMonitorReportFp = dmSendMonitorReport,
.getVnodeLoadsFp = dmGetVnodeLoads, .getVnodeLoadsFp = dmGetVnodeLoads,
......
...@@ -89,6 +89,7 @@ typedef void (*SendMonitorReportFp)(); ...@@ -89,6 +89,7 @@ typedef void (*SendMonitorReportFp)();
typedef void (*GetVnodeLoadsFp)(SMonVloadInfo *pInfo); typedef void (*GetVnodeLoadsFp)(SMonVloadInfo *pInfo);
typedef void (*GetMnodeLoadsFp)(SMonMloadInfo *pInfo); typedef void (*GetMnodeLoadsFp)(SMonMloadInfo *pInfo);
typedef void (*GetQnodeLoadsFp)(SQnodeLoad *pInfo); typedef void (*GetQnodeLoadsFp)(SQnodeLoad *pInfo);
typedef int32_t (*ProcessAlterNodeTypeFp)(EDndNodeType ntype, SRpcMsg *pMsg);
typedef struct { typedef struct {
int32_t dnodeId; int32_t dnodeId;
...@@ -112,6 +113,7 @@ typedef struct { ...@@ -112,6 +113,7 @@ typedef struct {
SDnodeData *pData; SDnodeData *pData;
SMsgCb msgCb; SMsgCb msgCb;
ProcessCreateNodeFp processCreateNodeFp; ProcessCreateNodeFp processCreateNodeFp;
ProcessAlterNodeTypeFp processAlterNodeTypeFp;
ProcessDropNodeFp processDropNodeFp; ProcessDropNodeFp processDropNodeFp;
SendMonitorReportFp sendMonitorReportFp; SendMonitorReportFp sendMonitorReportFp;
GetVnodeLoadsFp getVnodeLoadsFp; GetVnodeLoadsFp getVnodeLoadsFp;
...@@ -132,6 +134,7 @@ typedef int32_t (*NodeCreateFp)(const SMgmtInputOpt *pInput, SRpcMsg *pMsg); ...@@ -132,6 +134,7 @@ typedef int32_t (*NodeCreateFp)(const SMgmtInputOpt *pInput, SRpcMsg *pMsg);
typedef int32_t (*NodeDropFp)(const SMgmtInputOpt *pInput, SRpcMsg *pMsg); typedef int32_t (*NodeDropFp)(const SMgmtInputOpt *pInput, SRpcMsg *pMsg);
typedef int32_t (*NodeRequireFp)(const SMgmtInputOpt *pInput, bool *required); typedef int32_t (*NodeRequireFp)(const SMgmtInputOpt *pInput, bool *required);
typedef SArray *(*NodeGetHandlesFp)(); // array of SMgmtHandle typedef SArray *(*NodeGetHandlesFp)(); // array of SMgmtHandle
typedef bool (*NodeIsCatchUpFp)(void *pMgmt);
typedef struct { typedef struct {
NodeOpenFp openFp; NodeOpenFp openFp;
...@@ -142,6 +145,7 @@ typedef struct { ...@@ -142,6 +145,7 @@ typedef struct {
NodeDropFp dropFp; NodeDropFp dropFp;
NodeRequireFp requiredFp; NodeRequireFp requiredFp;
NodeGetHandlesFp getHandlesFp; NodeGetHandlesFp getHandlesFp;
NodeIsCatchUpFp isCatchUpFp;
} SMgmtFunc; } SMgmtFunc;
typedef struct { typedef struct {
......
...@@ -215,6 +215,8 @@ typedef struct { ...@@ -215,6 +215,8 @@ typedef struct {
bool syncRestore; bool syncRestore;
int64_t stateStartTime; int64_t stateStartTime;
SDnodeObj* pDnode; SDnodeObj* pDnode;
int32_t role;
SyncIndex lastIndex;
} SMnodeObj; } SMnodeObj;
typedef struct { typedef struct {
...@@ -341,6 +343,7 @@ typedef struct { ...@@ -341,6 +343,7 @@ typedef struct {
ESyncState syncState; ESyncState syncState;
bool syncRestore; bool syncRestore;
bool syncCanRead; bool syncCanRead;
ESyncRole nodeRole;
} SVnodeGid; } SVnodeGid;
typedef struct { typedef struct {
...@@ -361,7 +364,7 @@ typedef struct { ...@@ -361,7 +364,7 @@ typedef struct {
int8_t compact; int8_t compact;
int8_t isTsma; int8_t isTsma;
int8_t replica; int8_t replica;
SVnodeGid vnodeGid[TSDB_MAX_REPLICA]; SVnodeGid vnodeGid[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
void* pTsma; void* pTsma;
int32_t numOfCachedTables; int32_t numOfCachedTables;
} SVgObj; } SVgObj;
......
...@@ -92,8 +92,11 @@ typedef struct { ...@@ -92,8 +92,11 @@ typedef struct {
int64_t transSeq; int64_t transSeq;
TdThreadMutex lock; TdThreadMutex lock;
int8_t selfIndex; int8_t selfIndex;
int8_t numOfTotalReplicas;
int8_t numOfReplicas; int8_t numOfReplicas;
SReplica replicas[TSDB_MAX_REPLICA]; SReplica replicas[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
ESyncRole nodeRoles[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SyncIndex lastIndex;
} SSyncMgmt; } SSyncMgmt;
typedef struct { typedef struct {
......
...@@ -491,7 +491,10 @@ static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) { ...@@ -491,7 +491,10 @@ static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
pMnode->selfDnodeId = pOption->dnodeId; pMnode->selfDnodeId = pOption->dnodeId;
pMnode->syncMgmt.selfIndex = pOption->selfIndex; pMnode->syncMgmt.selfIndex = pOption->selfIndex;
pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas; pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
pMnode->syncMgmt.lastIndex = pOption->lastIndex;
memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas)); memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
} }
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) { SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
...@@ -582,6 +585,11 @@ int32_t mndStart(SMnode *pMnode) { ...@@ -582,6 +585,11 @@ int32_t mndStart(SMnode *pMnode) {
return mndInitTimer(pMnode); return mndInitTimer(pMnode);
} }
int32_t mndIsCatchUp(SMnode *pMnode) {
int64_t rid = pMnode->syncMgmt.sync;
return syncIsCatchUp(rid);
}
void mndStop(SMnode *pMnode) { void mndStop(SMnode *pMnode) {
mndSetStop(pMnode); mndSetStop(pMnode);
mndSyncStop(pMnode); mndSyncStop(pMnode);
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include "mndTrans.h" #include "mndTrans.h"
#include "tmisce.h" #include "tmisce.h"
#define MNODE_VER_NUMBER 1 #define MNODE_VER_NUMBER 2
#define MNODE_RESERVE_SIZE 64 #define MNODE_RESERVE_SIZE 64
static int32_t mndCreateDefaultMnode(SMnode *pMnode); static int32_t mndCreateDefaultMnode(SMnode *pMnode);
...@@ -53,6 +53,7 @@ int32_t mndInitMnode(SMnode *pMnode) { ...@@ -53,6 +53,7 @@ int32_t mndInitMnode(SMnode *pMnode) {
mndSetMsgHandle(pMnode, TDMT_MND_CREATE_MNODE, mndProcessCreateMnodeReq); mndSetMsgHandle(pMnode, TDMT_MND_CREATE_MNODE, mndProcessCreateMnodeReq);
mndSetMsgHandle(pMnode, TDMT_DND_CREATE_MNODE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_DND_CREATE_MNODE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_DND_ALTER_MNODE_TYPE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_MND_ALTER_MNODE, mndProcessAlterMnodeReq); mndSetMsgHandle(pMnode, TDMT_MND_ALTER_MNODE, mndProcessAlterMnodeReq);
mndSetMsgHandle(pMnode, TDMT_MND_ALTER_MNODE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_MND_ALTER_MNODE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_MND_DROP_MNODE, mndProcessDropMnodeReq); mndSetMsgHandle(pMnode, TDMT_MND_DROP_MNODE, mndProcessDropMnodeReq);
...@@ -126,6 +127,8 @@ static SSdbRaw *mndMnodeActionEncode(SMnodeObj *pObj) { ...@@ -126,6 +127,8 @@ static SSdbRaw *mndMnodeActionEncode(SMnodeObj *pObj) {
SDB_SET_INT32(pRaw, dataPos, pObj->id, _OVER) SDB_SET_INT32(pRaw, dataPos, pObj->id, _OVER)
SDB_SET_INT64(pRaw, dataPos, pObj->createdTime, _OVER) SDB_SET_INT64(pRaw, dataPos, pObj->createdTime, _OVER)
SDB_SET_INT64(pRaw, dataPos, pObj->updateTime, _OVER) SDB_SET_INT64(pRaw, dataPos, pObj->updateTime, _OVER)
SDB_SET_INT32(pRaw, dataPos, pObj->role, _OVER)
SDB_SET_INT64(pRaw, dataPos, pObj->lastIndex, _OVER)
SDB_SET_RESERVE(pRaw, dataPos, MNODE_RESERVE_SIZE, _OVER) SDB_SET_RESERVE(pRaw, dataPos, MNODE_RESERVE_SIZE, _OVER)
terrno = 0; terrno = 0;
...@@ -149,7 +152,7 @@ static SSdbRow *mndMnodeActionDecode(SSdbRaw *pRaw) { ...@@ -149,7 +152,7 @@ static SSdbRow *mndMnodeActionDecode(SSdbRaw *pRaw) {
int8_t sver = 0; int8_t sver = 0;
if (sdbGetRawSoftVer(pRaw, &sver) != 0) return NULL; if (sdbGetRawSoftVer(pRaw, &sver) != 0) return NULL;
if (sver != MNODE_VER_NUMBER) { if (sver != 1 && sver != 2) {
terrno = TSDB_CODE_SDB_INVALID_DATA_VER; terrno = TSDB_CODE_SDB_INVALID_DATA_VER;
goto _OVER; goto _OVER;
} }
...@@ -164,6 +167,10 @@ static SSdbRow *mndMnodeActionDecode(SSdbRaw *pRaw) { ...@@ -164,6 +167,10 @@ static SSdbRow *mndMnodeActionDecode(SSdbRaw *pRaw) {
SDB_GET_INT32(pRaw, dataPos, &pObj->id, _OVER) SDB_GET_INT32(pRaw, dataPos, &pObj->id, _OVER)
SDB_GET_INT64(pRaw, dataPos, &pObj->createdTime, _OVER) SDB_GET_INT64(pRaw, dataPos, &pObj->createdTime, _OVER)
SDB_GET_INT64(pRaw, dataPos, &pObj->updateTime, _OVER) SDB_GET_INT64(pRaw, dataPos, &pObj->updateTime, _OVER)
if(sver >=2){
SDB_GET_INT32(pRaw, dataPos, &pObj->role, _OVER)
SDB_GET_INT64(pRaw, dataPos, &pObj->lastIndex, _OVER)
}
SDB_GET_RESERVE(pRaw, dataPos, MNODE_RESERVE_SIZE, _OVER) SDB_GET_RESERVE(pRaw, dataPos, MNODE_RESERVE_SIZE, _OVER)
terrno = 0; terrno = 0;
...@@ -204,7 +211,9 @@ static int32_t mndMnodeActionDelete(SSdb *pSdb, SMnodeObj *pObj) { ...@@ -204,7 +211,9 @@ static int32_t mndMnodeActionDelete(SSdb *pSdb, SMnodeObj *pObj) {
static int32_t mndMnodeActionUpdate(SSdb *pSdb, SMnodeObj *pOld, SMnodeObj *pNew) { static int32_t mndMnodeActionUpdate(SSdb *pSdb, SMnodeObj *pOld, SMnodeObj *pNew) {
mTrace("mnode:%d, perform update action, old row:%p new row:%p", pOld->id, pOld, pNew); mTrace("mnode:%d, perform update action, old row:%p new row:%p", pOld->id, pOld, pNew);
pOld->role = pNew->role;
pOld->updateTime = pNew->updateTime; pOld->updateTime = pNew->updateTime;
pOld->lastIndex = pNew->lastIndex;
mndReloadSyncConfig(pSdb->pMnode); mndReloadSyncConfig(pSdb->pMnode);
return 0; return 0;
...@@ -302,6 +311,27 @@ static int32_t mndBuildCreateMnodeRedoAction(STrans *pTrans, SDCreateMnodeReq *p ...@@ -302,6 +311,27 @@ static int32_t mndBuildCreateMnodeRedoAction(STrans *pTrans, SDCreateMnodeReq *p
return 0; return 0;
} }
static int32_t mndBuildAlterMnodeTypeRedoAction(STrans *pTrans,
SDAlterMnodeTypeReq *pAlterMnodeTypeReq, SEpSet *pAlterMnodeTypeEpSet) {
int32_t contLen = tSerializeSDCreateMnodeReq(NULL, 0, pAlterMnodeTypeReq);
void *pReq = taosMemoryMalloc(contLen);
tSerializeSDCreateMnodeReq(pReq, contLen, pAlterMnodeTypeReq);
STransAction action = {
.epSet = *pAlterMnodeTypeEpSet,
.pCont = pReq,
.contLen = contLen,
.msgType = TDMT_DND_ALTER_MNODE_TYPE,
.acceptableCode = TSDB_CODE_MNODE_ALREADY_DEPLOYED,
};
if (mndTransAppendRedoAction(pTrans, &action) != 0) {
taosMemoryFree(pReq);
return -1;
}
return 0;
}
static int32_t mndBuildAlterMnodeRedoAction(STrans *pTrans, SDCreateMnodeReq *pAlterReq, SEpSet *pAlterEpSet) { static int32_t mndBuildAlterMnodeRedoAction(STrans *pTrans, SDCreateMnodeReq *pAlterReq, SEpSet *pAlterEpSet) {
int32_t contLen = tSerializeSDCreateMnodeReq(NULL, 0, pAlterReq); int32_t contLen = tSerializeSDCreateMnodeReq(NULL, 0, pAlterReq);
void *pReq = taosMemoryMalloc(contLen); void *pReq = taosMemoryMalloc(contLen);
...@@ -347,6 +377,7 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno ...@@ -347,6 +377,7 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno
SSdb *pSdb = pMnode->pSdb; SSdb *pSdb = pMnode->pSdb;
void *pIter = NULL; void *pIter = NULL;
int32_t numOfReplicas = 0; int32_t numOfReplicas = 0;
int32_t numOfLearnerReplicas = 0;
SDCreateMnodeReq createReq = {0}; SDCreateMnodeReq createReq = {0};
SEpSet createEpset = {0}; SEpSet createEpset = {0};
...@@ -355,18 +386,29 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno ...@@ -355,18 +386,29 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno
pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pMObj); pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pMObj);
if (pIter == NULL) break; if (pIter == NULL) break;
createReq.replicas[numOfReplicas].id = pMObj->id; if(pMObj->role == TAOS_SYNC_ROLE_VOTER){
createReq.replicas[numOfReplicas].port = pMObj->pDnode->port; createReq.replicas[numOfReplicas].id = pMObj->id;
memcpy(createReq.replicas[numOfReplicas].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN); createReq.replicas[numOfReplicas].port = pMObj->pDnode->port;
memcpy(createReq.replicas[numOfReplicas].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
numOfReplicas++;
}
else{
createReq.learnerReplicas[numOfLearnerReplicas].id = pMObj->id;
createReq.learnerReplicas[numOfLearnerReplicas].port = pMObj->pDnode->port;
memcpy(createReq.learnerReplicas[numOfLearnerReplicas].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
numOfLearnerReplicas++;
}
numOfReplicas++;
sdbRelease(pSdb, pMObj); sdbRelease(pSdb, pMObj);
} }
createReq.replica = numOfReplicas + 1; createReq.replica = numOfReplicas;
createReq.replicas[numOfReplicas].id = pDnode->id; createReq.learnerReplica = numOfLearnerReplicas + 1;
createReq.replicas[numOfReplicas].port = pDnode->port; createReq.learnerReplicas[numOfLearnerReplicas].id = pDnode->id;
memcpy(createReq.replicas[numOfReplicas].fqdn, pDnode->fqdn, TSDB_FQDN_LEN); createReq.learnerReplicas[numOfLearnerReplicas].port = pDnode->port;
memcpy(createReq.learnerReplicas[numOfLearnerReplicas].fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
createReq.lastIndex = pObj->lastIndex;
createEpset.inUse = 0; createEpset.inUse = 0;
createEpset.numOfEps = 1; createEpset.numOfEps = 1;
...@@ -378,23 +420,78 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno ...@@ -378,23 +420,78 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno
return 0; return 0;
} }
static int32_t mndSetAlterMnodeTypeRedoActions(SMnode *pMnode, STrans *pTrans, SDnodeObj *pDnode, SMnodeObj *pObj) {
SSdb *pSdb = pMnode->pSdb;
void *pIter = NULL;
SDAlterMnodeTypeReq alterReq = {0};
SEpSet createEpset = {0};
while (1) {
SMnodeObj *pMObj = NULL;
pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pMObj);
if (pIter == NULL) break;
if(pMObj->role == TAOS_SYNC_ROLE_VOTER){
alterReq.replicas[alterReq.replica].id = pMObj->id;
alterReq.replicas[alterReq.replica].port = pMObj->pDnode->port;
memcpy(alterReq.replicas[alterReq.replica].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
alterReq.replica++;
}
else{
alterReq.learnerReplicas[alterReq.learnerReplica].id = pMObj->id;
alterReq.learnerReplicas[alterReq.learnerReplica].port = pMObj->pDnode->port;
memcpy(alterReq.learnerReplicas[alterReq.learnerReplica].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
alterReq.learnerReplica++;
}
sdbRelease(pSdb, pMObj);
}
alterReq.replicas[alterReq.replica].id = pDnode->id;
alterReq.replicas[alterReq.replica].port = pDnode->port;
memcpy(alterReq.replicas[alterReq.replica].fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
alterReq.replica++;
alterReq.lastIndex = pObj->lastIndex;
createEpset.inUse = 0;
createEpset.numOfEps = 1;
createEpset.eps[0].port = pDnode->port;
memcpy(createEpset.eps[0].fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
if (mndBuildAlterMnodeTypeRedoAction(pTrans, &alterReq, &createEpset) != 0) return -1;
return 0;
}
static int32_t mndCreateMnode(SMnode *pMnode, SRpcMsg *pReq, SDnodeObj *pDnode, SMCreateMnodeReq *pCreate) { static int32_t mndCreateMnode(SMnode *pMnode, SRpcMsg *pReq, SDnodeObj *pDnode, SMCreateMnodeReq *pCreate) {
int32_t code = -1; int32_t code = -1;
SMnodeObj mnodeObj = {0};
mnodeObj.id = pDnode->id;
mnodeObj.createdTime = taosGetTimestampMs();
mnodeObj.updateTime = mnodeObj.createdTime;
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq, "create-mnode"); STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq, "create-mnode");
if (pTrans == NULL) goto _OVER; if (pTrans == NULL) goto _OVER;
mndTransSetSerial(pTrans); mndTransSetSerial(pTrans);
mInfo("trans:%d, used to create mnode:%d", pTrans->id, pCreate->dnodeId); mInfo("trans:%d, used to create mnode:%d", pTrans->id, pCreate->dnodeId);
if (mndTrancCheckConflict(pMnode, pTrans) != 0) goto _OVER; if (mndTrancCheckConflict(pMnode, pTrans) != 0) goto _OVER;
SMnodeObj mnodeObj = {0};
mnodeObj.id = pDnode->id;
mnodeObj.createdTime = taosGetTimestampMs();
mnodeObj.updateTime = mnodeObj.createdTime;
mnodeObj.role = TAOS_SYNC_ROLE_LEARNER;
mnodeObj.lastIndex = pMnode->applied;
if (mndSetCreateMnodeRedoActions(pMnode, pTrans, pDnode, &mnodeObj) != 0) goto _OVER; if (mndSetCreateMnodeRedoActions(pMnode, pTrans, pDnode, &mnodeObj) != 0) goto _OVER;
if (mndSetCreateMnodeRedoLogs(pMnode, pTrans, &mnodeObj) != 0) goto _OVER; if (mndSetCreateMnodeRedoLogs(pMnode, pTrans, &mnodeObj) != 0) goto _OVER;
if (mndSetCreateMnodeCommitLogs(pMnode, pTrans, &mnodeObj) != 0) goto _OVER;
SMnodeObj mnodeLeaderObj = {0};
mnodeLeaderObj.id = pDnode->id;
mnodeLeaderObj.createdTime = taosGetTimestampMs();
mnodeLeaderObj.updateTime = mnodeLeaderObj.createdTime;
mnodeLeaderObj.role = TAOS_SYNC_ROLE_VOTER;
mnodeLeaderObj.lastIndex = pMnode->applied + 1;
if (mndSetAlterMnodeTypeRedoActions(pMnode, pTrans, pDnode, &mnodeLeaderObj) != 0) goto _OVER;
if (mndSetCreateMnodeCommitLogs(pMnode, pTrans, &mnodeLeaderObj) != 0) goto _OVER;
if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER;
code = 0; code = 0;
...@@ -514,6 +611,7 @@ static int32_t mndSetDropMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDnode ...@@ -514,6 +611,7 @@ static int32_t mndSetDropMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDnode
int32_t mndSetDropMnodeInfoToTrans(SMnode *pMnode, STrans *pTrans, SMnodeObj *pObj, bool force) { int32_t mndSetDropMnodeInfoToTrans(SMnode *pMnode, STrans *pTrans, SMnodeObj *pObj, bool force) {
if (pObj == NULL) return 0; if (pObj == NULL) return 0;
pObj->lastIndex = pMnode->applied;
if (mndSetDropMnodeRedoActions(pMnode, pTrans, pObj->pDnode, pObj, force) != 0) return -1; if (mndSetDropMnodeRedoActions(pMnode, pTrans, pObj->pDnode, pObj, force) != 0) return -1;
if (mndSetDropMnodeCommitLogs(pMnode, pTrans, pObj) != 0) return -1; if (mndSetDropMnodeCommitLogs(pMnode, pTrans, pObj) != 0) return -1;
return 0; return 0;
...@@ -730,7 +828,8 @@ static void mndReloadSyncConfig(SMnode *pMnode) { ...@@ -730,7 +828,8 @@ static void mndReloadSyncConfig(SMnode *pMnode) {
void *pIter = NULL; void *pIter = NULL;
int32_t updatingMnodes = 0; int32_t updatingMnodes = 0;
int32_t readyMnodes = 0; int32_t readyMnodes = 0;
SSyncCfg cfg = {.myIndex = -1}; SSyncCfg cfg = {.myIndex = -1, .lastIndex = 0,};
SyncIndex maxIndex = 0;
while (1) { while (1) {
pIter = sdbFetchAll(pSdb, SDB_MNODE, pIter, (void **)&pObj, &objStatus, false); pIter = sdbFetchAll(pSdb, SDB_MNODE, pIter, (void **)&pObj, &objStatus, false);
...@@ -745,26 +844,41 @@ static void mndReloadSyncConfig(SMnode *pMnode) { ...@@ -745,26 +844,41 @@ static void mndReloadSyncConfig(SMnode *pMnode) {
} }
if (objStatus == SDB_STATUS_READY || objStatus == SDB_STATUS_CREATING) { if (objStatus == SDB_STATUS_READY || objStatus == SDB_STATUS_CREATING) {
SNodeInfo *pNode = &cfg.nodeInfo[cfg.replicaNum]; SNodeInfo *pNode = &cfg.nodeInfo[cfg.totalReplicaNum];
pNode->nodeId = pObj->pDnode->id; pNode->nodeId = pObj->pDnode->id;
pNode->clusterId = mndGetClusterId(pMnode); pNode->clusterId = mndGetClusterId(pMnode);
pNode->nodePort = pObj->pDnode->port; pNode->nodePort = pObj->pDnode->port;
pNode->nodeRole = pObj->role;
tstrncpy(pNode->nodeFqdn, pObj->pDnode->fqdn, TSDB_FQDN_LEN); tstrncpy(pNode->nodeFqdn, pObj->pDnode->fqdn, TSDB_FQDN_LEN);
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); (void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
mInfo("vgId:1, ep:%s:%u dnode:%d", pNode->nodeFqdn, pNode->nodePort, pNode->nodeId); mInfo("vgId:1, ep:%s:%u dnode:%d", pNode->nodeFqdn, pNode->nodePort, pNode->nodeId);
if (pObj->pDnode->id == pMnode->selfDnodeId) { if (pObj->pDnode->id == pMnode->selfDnodeId) {
cfg.myIndex = cfg.replicaNum; cfg.myIndex = cfg.totalReplicaNum;
}
if(pNode->nodeRole == TAOS_SYNC_ROLE_VOTER){
cfg.replicaNum++;
}
cfg.totalReplicaNum++;
if(pObj->lastIndex > cfg.lastIndex){
cfg.lastIndex = pObj->lastIndex;
}
}
if (objStatus == SDB_STATUS_DROPPING) {
if(pObj->lastIndex > cfg.lastIndex){
cfg.lastIndex = pObj->lastIndex;
} }
cfg.replicaNum++;
} }
mInfo("vgId:1, mnode:%d, role:%d, lastIndex:%" PRId64, pObj->id, pObj->role, pObj->lastIndex);
sdbReleaseLock(pSdb, pObj, false); sdbReleaseLock(pSdb, pObj, false);
} }
if (readyMnodes <= 0 || updatingMnodes <= 0) { //if (readyMnodes <= 0 || updatingMnodes <= 0) {
mInfo("vgId:1, mnode sync not reconfig since readyMnodes:%d updatingMnodes:%d", readyMnodes, updatingMnodes); // mInfo("vgId:1, mnode sync not reconfig since readyMnodes:%d updatingMnodes:%d", readyMnodes, updatingMnodes);
return; // return;
} //}
if (cfg.myIndex == -1) { if (cfg.myIndex == -1) {
#if 1 #if 1
...@@ -777,12 +891,14 @@ static void mndReloadSyncConfig(SMnode *pMnode) { ...@@ -777,12 +891,14 @@ static void mndReloadSyncConfig(SMnode *pMnode) {
return; return;
} }
if (updatingMnodes > 0) { if (pMnode->syncMgmt.sync > 0) {
mInfo("vgId:1, mnode sync reconfig, replica:%d myIndex:%d", cfg.replicaNum, cfg.myIndex); mInfo("vgId:1, mnode sync reconfig, totalReplica:%d replica:%d myIndex:%d",
for (int32_t i = 0; i < cfg.replicaNum; ++i) { cfg.totalReplicaNum, cfg.replicaNum, cfg.myIndex);
for (int32_t i = 0; i < cfg.totalReplicaNum; ++i) {
SNodeInfo *pNode = &cfg.nodeInfo[i]; SNodeInfo *pNode = &cfg.nodeInfo[i];
mInfo("vgId:1, index:%d, ep:%s:%u dnode:%d cluster:%" PRId64, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId, mInfo("vgId:1, index:%d, ep:%s:%u dnode:%d cluster:%" PRId64 " role:%d", i, pNode->nodeFqdn, pNode->nodePort,
pNode->clusterId); pNode->nodeId, pNode->clusterId, pNode->nodeRole);
} }
int32_t code = syncReconfig(pMnode->syncMgmt.sync, &cfg); int32_t code = syncReconfig(pMnode->syncMgmt.sync, &cfg);
......
...@@ -237,6 +237,23 @@ static void mndBecomeFollower(const SSyncFSM *pFsm) { ...@@ -237,6 +237,23 @@ static void mndBecomeFollower(const SSyncFSM *pFsm) {
taosThreadMutexUnlock(&pMgmt->lock); taosThreadMutexUnlock(&pMgmt->lock);
} }
static void mndBecomeLearner(const SSyncFSM *pFsm) {
SMnode *pMnode = pFsm->data;
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
mInfo("vgId:1, become learner");
taosThreadMutexLock(&pMgmt->lock);
if (pMgmt->transId != 0) {
mInfo("vgId:1, become learner and post sem, trans:%d, failed to propose since not leader", pMgmt->transId);
pMgmt->transId = 0;
pMgmt->transSec = 0;
pMgmt->transSeq = 0;
pMgmt->errCode = TSDB_CODE_SYN_NOT_LEADER;
tsem_post(&pMgmt->syncSem);
}
taosThreadMutexUnlock(&pMgmt->lock);
}
static void mndBecomeLeader(const SSyncFSM *pFsm) { static void mndBecomeLeader(const SSyncFSM *pFsm) {
mInfo("vgId:1, become leader"); mInfo("vgId:1, become leader");
SMnode *pMnode = pFsm->data; SMnode *pMnode = pFsm->data;
...@@ -278,6 +295,7 @@ SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) { ...@@ -278,6 +295,7 @@ SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) {
pFsm->FpReConfigCb = NULL; pFsm->FpReConfigCb = NULL;
pFsm->FpBecomeLeaderCb = mndBecomeLeader; pFsm->FpBecomeLeaderCb = mndBecomeLeader;
pFsm->FpBecomeFollowerCb = mndBecomeFollower; pFsm->FpBecomeFollowerCb = mndBecomeFollower;
pFsm->FpBecomeLearnerCb = mndBecomeLearner;
pFsm->FpGetSnapshot = mndSyncGetSnapshot; pFsm->FpGetSnapshot = mndSyncGetSnapshot;
pFsm->FpGetSnapshotInfo = mndSyncGetSnapshotInfo; pFsm->FpGetSnapshotInfo = mndSyncGetSnapshotInfo;
pFsm->FpSnapshotStartRead = mndSnapshotStartRead; pFsm->FpSnapshotStartRead = mndSnapshotStartRead;
...@@ -317,13 +335,16 @@ int32_t mndInitSync(SMnode *pMnode) { ...@@ -317,13 +335,16 @@ int32_t mndInitSync(SMnode *pMnode) {
mInfo("vgId:1, start to open sync, replica:%d selfIndex:%d", pMgmt->numOfReplicas, pMgmt->selfIndex); mInfo("vgId:1, start to open sync, replica:%d selfIndex:%d", pMgmt->numOfReplicas, pMgmt->selfIndex);
SSyncCfg *pCfg = &syncInfo.syncCfg; SSyncCfg *pCfg = &syncInfo.syncCfg;
pCfg->totalReplicaNum = pMgmt->numOfTotalReplicas;
pCfg->replicaNum = pMgmt->numOfReplicas; pCfg->replicaNum = pMgmt->numOfReplicas;
pCfg->myIndex = pMgmt->selfIndex; pCfg->myIndex = pMgmt->selfIndex;
for (int32_t i = 0; i < pMgmt->numOfReplicas; ++i) { pCfg->lastIndex = pMgmt->lastIndex;
for (int32_t i = 0; i < pMgmt->numOfTotalReplicas; ++i) {
SNodeInfo *pNode = &pCfg->nodeInfo[i]; SNodeInfo *pNode = &pCfg->nodeInfo[i];
pNode->nodeId = pMgmt->replicas[i].id; pNode->nodeId = pMgmt->replicas[i].id;
pNode->nodePort = pMgmt->replicas[i].port; pNode->nodePort = pMgmt->replicas[i].port;
tstrncpy(pNode->nodeFqdn, pMgmt->replicas[i].fqdn, sizeof(pNode->nodeFqdn)); tstrncpy(pNode->nodeFqdn, pMgmt->replicas[i].fqdn, sizeof(pNode->nodeFqdn));
pNode->nodeRole = pMgmt->nodeRoles[i];
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); (void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
mInfo("vgId:1, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId, mInfo("vgId:1, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId,
pNode->clusterId); pNode->clusterId);
......
...@@ -62,6 +62,7 @@ int32_t mndInitVgroup(SMnode *pMnode) { ...@@ -62,6 +62,7 @@ int32_t mndInitVgroup(SMnode *pMnode) {
mndSetMsgHandle(pMnode, TDMT_VND_COMPACT_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_COMPACT_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_VND_DISABLE_WRITE_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_DISABLE_WRITE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_SYNC_FORCE_FOLLOWER_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_SYNC_FORCE_FOLLOWER_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_DND_ALTER_VNODE_TYPE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_MND_REDISTRIBUTE_VGROUP, mndProcessRedistributeVgroupMsg); mndSetMsgHandle(pMnode, TDMT_MND_REDISTRIBUTE_VGROUP, mndProcessRedistributeVgroupMsg);
mndSetMsgHandle(pMnode, TDMT_MND_SPLIT_VGROUP, mndProcessSplitVgroupMsg); mndSetMsgHandle(pMnode, TDMT_MND_SPLIT_VGROUP, mndProcessSplitVgroupMsg);
...@@ -203,7 +204,7 @@ static int32_t mndVgroupActionUpdate(SSdb *pSdb, SVgObj *pOld, SVgObj *pNew) { ...@@ -203,7 +204,7 @@ static int32_t mndVgroupActionUpdate(SSdb *pSdb, SVgObj *pOld, SVgObj *pNew) {
pNew->compStorage = pOld->compStorage; pNew->compStorage = pOld->compStorage;
pNew->pointsWritten = pOld->pointsWritten; pNew->pointsWritten = pOld->pointsWritten;
pNew->compact = pOld->compact; pNew->compact = pOld->compact;
memcpy(pOld->vnodeGid, pNew->vnodeGid, TSDB_MAX_REPLICA * sizeof(SVnodeGid)); memcpy(pOld->vnodeGid, pNew->vnodeGid, (TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA) * sizeof(SVnodeGid));
return 0; return 0;
} }
...@@ -244,8 +245,10 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg ...@@ -244,8 +245,10 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg
createReq.compression = pDb->cfg.compression; createReq.compression = pDb->cfg.compression;
createReq.strict = pDb->cfg.strict; createReq.strict = pDb->cfg.strict;
createReq.cacheLast = pDb->cfg.cacheLast; createReq.cacheLast = pDb->cfg.cacheLast;
createReq.replica = pVgroup->replica; createReq.replica = 0;
createReq.learnerReplica = 0;
createReq.selfIndex = -1; createReq.selfIndex = -1;
createReq.learnerSelfIndex = -1;
createReq.hashBegin = pVgroup->hashBegin; createReq.hashBegin = pVgroup->hashBegin;
createReq.hashEnd = pVgroup->hashEnd; createReq.hashEnd = pVgroup->hashEnd;
createReq.hashMethod = pDb->cfg.hashMethod; createReq.hashMethod = pDb->cfg.hashMethod;
...@@ -263,7 +266,15 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg ...@@ -263,7 +266,15 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg
createReq.tsdbPageSize = pDb->cfg.tsdbPageSize; createReq.tsdbPageSize = pDb->cfg.tsdbPageSize;
for (int32_t v = 0; v < pVgroup->replica; ++v) { for (int32_t v = 0; v < pVgroup->replica; ++v) {
SReplica *pReplica = &createReq.replicas[v]; SReplica *pReplica = NULL;
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
pReplica = &createReq.replicas[createReq.replica];
}
else{
pReplica = &createReq.learnerReplicas[createReq.learnerReplica];
}
SVnodeGid *pVgid = &pVgroup->vnodeGid[v]; SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId); SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
if (pVgidDnode == NULL) { if (pVgidDnode == NULL) {
...@@ -275,21 +286,40 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg ...@@ -275,21 +286,40 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg
memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN); memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN);
mndReleaseDnode(pMnode, pVgidDnode); mndReleaseDnode(pMnode, pVgidDnode);
if (pDnode->id == pVgid->dnodeId) { if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
createReq.selfIndex = v; if (pDnode->id == pVgid->dnodeId) {
createReq.selfIndex = createReq.replica;
}
}
else{
if (pDnode->id == pVgid->dnodeId) {
createReq.learnerSelfIndex = createReq.learnerReplica;
}
}
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
createReq.replica++;
}
else{
createReq.learnerReplica++;
} }
} }
if (createReq.selfIndex == -1) { if (createReq.selfIndex == -1 && createReq.learnerSelfIndex == -1) {
terrno = TSDB_CODE_APP_ERROR; terrno = TSDB_CODE_APP_ERROR;
return NULL; return NULL;
} }
mInfo("vgId:%d, build create vnode req, replica:%d selfIndex:%d strict:%d", createReq.vgId, createReq.replica, mInfo("vgId:%d, build create vnode req, replica:%d selfIndex:%d learnerReplica:%d learnerSelfIndex:%d strict:%d",
createReq.selfIndex, createReq.strict); createReq.vgId, createReq.replica, createReq.selfIndex, createReq.learnerReplica,
createReq.learnerReplica, createReq.strict);
for (int32_t i = 0; i < createReq.replica; ++i) { for (int32_t i = 0; i < createReq.replica; ++i) {
mInfo("vgId:%d, replica:%d ep:%s:%u", createReq.vgId, i, createReq.replicas[i].fqdn, createReq.replicas[i].port); mInfo("vgId:%d, replica:%d ep:%s:%u", createReq.vgId, i, createReq.replicas[i].fqdn, createReq.replicas[i].port);
} }
for (int32_t i = 0; i < createReq.learnerReplica; ++i) {
mInfo("vgId:%d, replica:%d ep:%s:%u", createReq.vgId, i, createReq.learnerReplicas[i].fqdn,
createReq.learnerReplicas[i].port);
}
int32_t contLen = tSerializeSCreateVnodeReq(NULL, 0, &createReq); int32_t contLen = tSerializeSCreateVnodeReq(NULL, 0, &createReq);
if (contLen < 0) { if (contLen < 0) {
...@@ -356,12 +386,24 @@ static void *mndBuildAlterVnodeReplicaReq(SMnode *pMnode, SDbObj *pDb, SVgObj *p ...@@ -356,12 +386,24 @@ static void *mndBuildAlterVnodeReplicaReq(SMnode *pMnode, SDbObj *pDb, SVgObj *p
SAlterVnodeReplicaReq alterReq = { SAlterVnodeReplicaReq alterReq = {
.vgId = pVgroup->vgId, .vgId = pVgroup->vgId,
.strict = pDb->cfg.strict, .strict = pDb->cfg.strict,
.replica = pVgroup->replica, .replica = 0,
.learnerReplica = 0,
.selfIndex = -1, .selfIndex = -1,
.learnerSelfIndex = -1,
}; };
for (int32_t v = 0; v < pVgroup->replica; ++v) { for (int32_t v = 0; v < pVgroup->replica; ++v) {
SReplica *pReplica = &alterReq.replicas[v]; SReplica *pReplica = NULL;
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
pReplica = &alterReq.replicas[alterReq.replica];
alterReq.replica++;
}
else{
pReplica = &alterReq.learnerReplicas[alterReq.learnerReplica];
alterReq.learnerReplica++;
}
SVnodeGid *pVgid = &pVgroup->vnodeGid[v]; SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId); SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
if (pVgidDnode == NULL) return NULL; if (pVgidDnode == NULL) return NULL;
...@@ -371,18 +413,30 @@ static void *mndBuildAlterVnodeReplicaReq(SMnode *pMnode, SDbObj *pDb, SVgObj *p ...@@ -371,18 +413,30 @@ static void *mndBuildAlterVnodeReplicaReq(SMnode *pMnode, SDbObj *pDb, SVgObj *p
memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN); memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN);
mndReleaseDnode(pMnode, pVgidDnode); mndReleaseDnode(pMnode, pVgidDnode);
if (dnodeId == pVgid->dnodeId) { if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
alterReq.selfIndex = v; if (dnodeId == pVgid->dnodeId) {
alterReq.selfIndex = v;
}
}
else{
if (dnodeId == pVgid->dnodeId) {
alterReq.learnerSelfIndex = v;
}
} }
} }
alterReq.replica = pVgroup->replica;
mInfo("vgId:%d, build alter vnode req, replica:%d selfIndex:%d strict:%d", alterReq.vgId, alterReq.replica, mInfo("vgId:%d, build alter vnode req, replica:%d selfIndex:%d learnerReplica:%d learnerSelfIndex:%d strict:%d",
alterReq.selfIndex, alterReq.strict); alterReq.vgId, alterReq.replica, alterReq.selfIndex, alterReq.learnerReplica,
alterReq.learnerSelfIndex, alterReq.strict);
for (int32_t i = 0; i < alterReq.replica; ++i) { for (int32_t i = 0; i < alterReq.replica; ++i) {
mInfo("vgId:%d, replica:%d ep:%s:%u", alterReq.vgId, i, alterReq.replicas[i].fqdn, alterReq.replicas[i].port); mInfo("vgId:%d, replica:%d ep:%s:%u", alterReq.vgId, i, alterReq.replicas[i].fqdn, alterReq.replicas[i].port);
} }
for (int32_t i = 0; i < alterReq.learnerReplica; ++i) {
mInfo("vgId:%d, learnerReplica:%d ep:%s:%u", alterReq.vgId, i,
alterReq.learnerReplicas[i].fqdn, alterReq.learnerReplicas[i].port);
}
if (alterReq.selfIndex == -1) { if (alterReq.selfIndex == -1 && alterReq.learnerSelfIndex == -1) {
terrno = TSDB_CODE_APP_ERROR; terrno = TSDB_CODE_APP_ERROR;
return NULL; return NULL;
} }
...@@ -1194,6 +1248,30 @@ int32_t mndAddAlterVnodeReplicaAction(SMnode *pMnode, STrans *pTrans, SDbObj *pD ...@@ -1194,6 +1248,30 @@ int32_t mndAddAlterVnodeReplicaAction(SMnode *pMnode, STrans *pTrans, SDbObj *pD
return 0; return 0;
} }
int32_t mndAddAlterVnodeTypeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int32_t dnodeId) {
SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
if (pDnode == NULL) return -1;
STransAction action = {0};
action.epSet = mndGetDnodeEpset(pDnode);
mndReleaseDnode(pMnode, pDnode);
int32_t contLen = 0;
void *pReq = mndBuildAlterVnodeReplicaReq(pMnode, pDb, pVgroup, dnodeId, &contLen);
if (pReq == NULL) return -1;
action.pCont = pReq;
action.contLen = contLen;
action.msgType = TDMT_DND_ALTER_VNODE_TYPE;
if (mndTransAppendRedoAction(pTrans, &action) != 0) {
taosMemoryFree(pReq);
return -1;
}
return 0;
}
static int32_t mndAddDisableVnodeWriteAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, static int32_t mndAddDisableVnodeWriteAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
int32_t dnodeId) { int32_t dnodeId) {
SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId); SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
...@@ -1953,18 +2031,33 @@ int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pOldDb ...@@ -1953,18 +2031,33 @@ int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pOldDb
mInfo("db:%s, vgId:%d, will add 2 vnodes, vn:0 dnode:%d", pVgroup->dbName, pVgroup->vgId, mInfo("db:%s, vgId:%d, will add 2 vnodes, vn:0 dnode:%d", pVgroup->dbName, pVgroup->vgId,
pVgroup->vnodeGid[0].dnodeId); pVgroup->vnodeGid[0].dnodeId);
//add first
if (mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray) != 0) return -1; if (mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray) != 0) return -1;
newVgroup.vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_LEARNER;
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0) if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0)
return -1; return -1;
if (mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[1]) != 0) return -1; if (mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[1]) != 0) return -1;
newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
if (mndAddAlterVnodeTypeAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[1].dnodeId) != 0)
return -1;
if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1; if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1;
//add second
if (mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray) != 0) return -1; if (mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray) != 0) return -1;
newVgroup.vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
newVgroup.vnodeGid[2].nodeRole = TAOS_SYNC_ROLE_VOTER;
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0) if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0)
return -1; return -1;
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[1].dnodeId) != 0) if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[1].dnodeId) != 0)
return -1; return -1;
if (mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[2]) != 0) return -1; if (mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[2]) != 0) return -1;
if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1; if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1;
} else if (newVgroup.replica == 3 && pNewDb->cfg.replications == 1) { } else if (newVgroup.replica == 3 && pNewDb->cfg.replications == 1) {
mInfo("db:%s, vgId:%d, will remove 2 vnodes, vn:0 dnode:%d vn:1 dnode:%d vn:2 dnode:%d", pVgroup->dbName, mInfo("db:%s, vgId:%d, will remove 2 vnodes, vn:0 dnode:%d vn:1 dnode:%d vn:2 dnode:%d", pVgroup->dbName,
......
...@@ -68,6 +68,7 @@ void vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnapshot); ...@@ -68,6 +68,7 @@ void vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnapshot);
void vnodeGetInfo(SVnode *pVnode, const char **dbname, int32_t *vgId); void vnodeGetInfo(SVnode *pVnode, const char **dbname, int32_t *vgId);
int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen); int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen);
int32_t vnodeGetAllTableList(SVnode *pVnode, uint64_t uid, SArray *list); int32_t vnodeGetAllTableList(SVnode *pVnode, uint64_t uid, SArray *list);
int32_t vnodeIsCatchUp(SVnode *pVnode);
int32_t vnodeGetCtbIdList(SVnode *pVnode, int64_t suid, SArray *list); int32_t vnodeGetCtbIdList(SVnode *pVnode, int64_t suid, SArray *list);
int32_t vnodeGetCtbIdListByFilter(SVnode *pVnode, int64_t suid, SArray *list, bool (*filter)(void *arg), void *arg); int32_t vnodeGetCtbIdListByFilter(SVnode *pVnode, int64_t suid, SArray *list, bool (*filter)(void *arg), void *arg);
......
...@@ -57,6 +57,28 @@ int vnodeCheckCfg(const SVnodeCfg *pCfg) { ...@@ -57,6 +57,28 @@ int vnodeCheckCfg(const SVnodeCfg *pCfg) {
return 0; return 0;
} }
const char* vnodeRoleToStr(ESyncRole role) {
switch (role) {
case TAOS_SYNC_ROLE_VOTER:
return "voter";
case TAOS_SYNC_ROLE_LEARNER:
return "learner";
default:
return "unknown";
}
}
const ESyncRole vnodeStrToRole(char* str) {
if(strcmp(str, "voter") == 0){
return TAOS_SYNC_ROLE_VOTER;
}
if(strcmp(str, "learner") == 0){
return TAOS_SYNC_ROLE_LEARNER;
}
return TAOS_SYNC_ROLE_ERROR;
}
int vnodeEncodeConfig(const void *pObj, SJson *pJson) { int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
const SVnodeCfg *pCfg = (SVnodeCfg *)pObj; const SVnodeCfg *pCfg = (SVnodeCfg *)pObj;
...@@ -117,6 +139,7 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) { ...@@ -117,6 +139,7 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
if (tjsonAddIntegerToObject(pJson, "hashSuffix", pCfg->hashSuffix) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "hashSuffix", pCfg->hashSuffix) < 0) return -1;
if (tjsonAddIntegerToObject(pJson, "syncCfg.replicaNum", pCfg->syncCfg.replicaNum) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "syncCfg.replicaNum", pCfg->syncCfg.replicaNum) < 0) return -1;
if (tjsonAddIntegerToObject(pJson, "syncCfg.totalReplicaNum", pCfg->syncCfg.totalReplicaNum) < 0) return -1;
if (tjsonAddIntegerToObject(pJson, "syncCfg.myIndex", pCfg->syncCfg.myIndex) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "syncCfg.myIndex", pCfg->syncCfg.myIndex) < 0) return -1;
if (tjsonAddIntegerToObject(pJson, "vndStats.stables", pCfg->vndStats.numOfSTables) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "vndStats.stables", pCfg->vndStats.numOfSTables) < 0) return -1;
...@@ -128,9 +151,9 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) { ...@@ -128,9 +151,9 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
SJson *nodeInfo = tjsonCreateArray(); SJson *nodeInfo = tjsonCreateArray();
if (nodeInfo == NULL) return -1; if (nodeInfo == NULL) return -1;
if (tjsonAddItemToObject(pJson, "syncCfg.nodeInfo", nodeInfo) < 0) return -1; if (tjsonAddItemToObject(pJson, "syncCfg.nodeInfo", nodeInfo) < 0) return -1;
vDebug("vgId:%d, encode config, replicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum, vDebug("vgId:%d, encode config, replicas:%d totalReplicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum,
pCfg->syncCfg.myIndex); pCfg->syncCfg.totalReplicaNum, pCfg->syncCfg.myIndex);
for (int i = 0; i < pCfg->syncCfg.replicaNum; ++i) { for (int i = 0; i < pCfg->syncCfg.totalReplicaNum; ++i) {
SJson *info = tjsonCreateObject(); SJson *info = tjsonCreateObject();
SNodeInfo *pNode = (SNodeInfo *)&pCfg->syncCfg.nodeInfo[i]; SNodeInfo *pNode = (SNodeInfo *)&pCfg->syncCfg.nodeInfo[i];
if (info == NULL) return -1; if (info == NULL) return -1;
...@@ -138,6 +161,7 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) { ...@@ -138,6 +161,7 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
if (tjsonAddStringToObject(info, "nodeFqdn", pNode->nodeFqdn) < 0) return -1; if (tjsonAddStringToObject(info, "nodeFqdn", pNode->nodeFqdn) < 0) return -1;
if (tjsonAddIntegerToObject(info, "nodeId", pNode->nodeId) < 0) return -1; if (tjsonAddIntegerToObject(info, "nodeId", pNode->nodeId) < 0) return -1;
if (tjsonAddIntegerToObject(info, "clusterId", pNode->clusterId) < 0) return -1; if (tjsonAddIntegerToObject(info, "clusterId", pNode->clusterId) < 0) return -1;
if (tjsonAddStringToObject(info, "nodeRole", vnodeRoleToStr(pNode->nodeRole)) < 0) return -1;
if (tjsonAddItemToArray(nodeInfo, info) < 0) return -1; if (tjsonAddItemToArray(nodeInfo, info) < 0) return -1;
vDebug("vgId:%d, encode config, replica:%d ep:%s:%u dnode:%d", pCfg->vgId, i, pNode->nodeFqdn, pNode->nodePort, vDebug("vgId:%d, encode config, replica:%d ep:%s:%u dnode:%d", pCfg->vgId, i, pNode->nodeFqdn, pNode->nodePort,
pNode->nodeId); pNode->nodeId);
...@@ -235,6 +259,8 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) { ...@@ -235,6 +259,8 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) {
tjsonGetNumberValue(pJson, "syncCfg.replicaNum", pCfg->syncCfg.replicaNum, code); tjsonGetNumberValue(pJson, "syncCfg.replicaNum", pCfg->syncCfg.replicaNum, code);
if (code < 0) return -1; if (code < 0) return -1;
tjsonGetNumberValue(pJson, "syncCfg.totalReplicaNum", pCfg->syncCfg.totalReplicaNum, code);
if (code < 0) return -1;
tjsonGetNumberValue(pJson, "syncCfg.myIndex", pCfg->syncCfg.myIndex, code); tjsonGetNumberValue(pJson, "syncCfg.myIndex", pCfg->syncCfg.myIndex, code);
if (code < 0) return -1; if (code < 0) return -1;
...@@ -251,10 +277,10 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) { ...@@ -251,10 +277,10 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) {
SJson *nodeInfo = tjsonGetObjectItem(pJson, "syncCfg.nodeInfo"); SJson *nodeInfo = tjsonGetObjectItem(pJson, "syncCfg.nodeInfo");
int arraySize = tjsonGetArraySize(nodeInfo); int arraySize = tjsonGetArraySize(nodeInfo);
if (arraySize != pCfg->syncCfg.replicaNum) return -1; if (arraySize != pCfg->syncCfg.totalReplicaNum) return -1;
vDebug("vgId:%d, decode config, replicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum, vDebug("vgId:%d, decode config, replicas:%d totalReplicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum,
pCfg->syncCfg.myIndex); pCfg->syncCfg.totalReplicaNum, pCfg->syncCfg.myIndex);
for (int i = 0; i < arraySize; ++i) { for (int i = 0; i < arraySize; ++i) {
SJson *info = tjsonGetArrayItem(nodeInfo, i); SJson *info = tjsonGetArrayItem(nodeInfo, i);
SNodeInfo *pNode = &pCfg->syncCfg.nodeInfo[i]; SNodeInfo *pNode = &pCfg->syncCfg.nodeInfo[i];
...@@ -266,6 +292,9 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) { ...@@ -266,6 +292,9 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) {
if (code < 0) return -1; if (code < 0) return -1;
tjsonGetNumberValue(info, "clusterId", pNode->clusterId, code); tjsonGetNumberValue(info, "clusterId", pNode->clusterId, code);
if (code < 0) return -1; if (code < 0) return -1;
char role[10];
tjsonGetStringValue(info, "nodeRole", role);
pNode->nodeRole = vnodeStrToRole(role);
vDebug("vgId:%d, decode config, replica:%d ep:%s:%u dnode:%d", pCfg->vgId, i, pNode->nodeFqdn, pNode->nodePort, vDebug("vgId:%d, decode config, replica:%d ep:%s:%u dnode:%d", pCfg->vgId, i, pNode->nodeFqdn, pNode->nodePort,
pNode->nodeId); pNode->nodeId);
} }
......
...@@ -76,19 +76,41 @@ int32_t vnodeAlterReplica(const char *path, SAlterVnodeReplicaReq *pReq, STfs *p ...@@ -76,19 +76,41 @@ int32_t vnodeAlterReplica(const char *path, SAlterVnodeReplicaReq *pReq, STfs *p
} }
SSyncCfg *pCfg = &info.config.syncCfg; SSyncCfg *pCfg = &info.config.syncCfg;
pCfg->myIndex = pReq->selfIndex;
pCfg->replicaNum = pReq->replica; pCfg->replicaNum = 0;
pCfg->totalReplicaNum = 0;
memset(&pCfg->nodeInfo, 0, sizeof(pCfg->nodeInfo)); memset(&pCfg->nodeInfo, 0, sizeof(pCfg->nodeInfo));
vInfo("vgId:%d, save config while alter, replicas:%d selfIndex:%d", pReq->vgId, pCfg->replicaNum, pCfg->myIndex);
for (int i = 0; i < pReq->replica; ++i) { for (int i = 0; i < pReq->replica; ++i) {
SNodeInfo *pNode = &pCfg->nodeInfo[i]; SNodeInfo *pNode = &pCfg->nodeInfo[i];
pNode->nodeId = pReq->replicas[i].id; pNode->nodeId = pReq->replicas[i].id;
pNode->nodePort = pReq->replicas[i].port; pNode->nodePort = pReq->replicas[i].port;
tstrncpy(pNode->nodeFqdn, pReq->replicas[i].fqdn, sizeof(pNode->nodeFqdn)); tstrncpy(pNode->nodeFqdn, pReq->replicas[i].fqdn, sizeof(pNode->nodeFqdn));
pNode->nodeRole = TAOS_SYNC_ROLE_VOTER;
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort); (void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
vInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", pReq->vgId, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId); vInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", pReq->vgId, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId);
pCfg->replicaNum++;
} }
if(pReq->selfIndex != -1){
pCfg->myIndex = pReq->selfIndex;
}
for (int i = pCfg->replicaNum; i < pReq->replica + pReq->learnerReplica; ++i) {
SNodeInfo *pNode = &pCfg->nodeInfo[i];
pNode->nodeId = pReq->learnerReplicas[pCfg->totalReplicaNum].id;
pNode->nodePort = pReq->learnerReplicas[pCfg->totalReplicaNum].port;
pNode->nodeRole = TAOS_SYNC_ROLE_LEARNER;
tstrncpy(pNode->nodeFqdn, pReq->learnerReplicas[pCfg->totalReplicaNum].fqdn, sizeof(pNode->nodeFqdn));
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
vInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", pReq->vgId, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId);
pCfg->totalReplicaNum++;
}
pCfg->totalReplicaNum += pReq->replica;
if(pReq->learnerSelfIndex != -1){
pCfg->myIndex = pReq->replica + pReq->learnerSelfIndex;
}
vInfo("vgId:%d, save config while alter, replicas:%d totalReplicas:%d selfIndex:%d",
pReq->vgId, pCfg->replicaNum, pCfg->totalReplicaNum, pCfg->myIndex);
info.config.syncCfg = *pCfg; info.config.syncCfg = *pCfg;
ret = vnodeSaveInfo(dir, &info); ret = vnodeSaveInfo(dir, &info);
...@@ -181,6 +203,7 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod ...@@ -181,6 +203,7 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod
SSyncCfg *pCfg = &info.config.syncCfg; SSyncCfg *pCfg = &info.config.syncCfg;
pCfg->myIndex = 0; pCfg->myIndex = 0;
pCfg->replicaNum = 1; pCfg->replicaNum = 1;
pCfg->totalReplicaNum = 1;
memset(&pCfg->nodeInfo, 0, sizeof(pCfg->nodeInfo)); memset(&pCfg->nodeInfo, 0, sizeof(pCfg->nodeInfo));
vInfo("vgId:%d, alter vnode replicas to 1", pReq->srcVgId); vInfo("vgId:%d, alter vnode replicas to 1", pReq->srcVgId);
...@@ -248,7 +271,7 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) { ...@@ -248,7 +271,7 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) {
// save vnode info on dnode ep changed // save vnode info on dnode ep changed
bool updated = false; bool updated = false;
SSyncCfg *pCfg = &info.config.syncCfg; SSyncCfg *pCfg = &info.config.syncCfg;
for (int32_t i = 0; i < pCfg->replicaNum; ++i) { for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SNodeInfo *pNode = &pCfg->nodeInfo[i]; SNodeInfo *pNode = &pCfg->nodeInfo[i];
if (tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort)) { if (tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort)) {
updated = true; updated = true;
...@@ -406,6 +429,10 @@ void vnodeClose(SVnode *pVnode) { ...@@ -406,6 +429,10 @@ void vnodeClose(SVnode *pVnode) {
// start the sync timer after the queue is ready // start the sync timer after the queue is ready
int32_t vnodeStart(SVnode *pVnode) { return vnodeSyncStart(pVnode); } int32_t vnodeStart(SVnode *pVnode) { return vnodeSyncStart(pVnode); }
int32_t vnodeIsCatchUp(SVnode *pVnode){
return syncIsCatchUp(pVnode->sync);
}
void vnodeStop(SVnode *pVnode) {} void vnodeStop(SVnode *pVnode) {}
int64_t vnodeGetSyncHandle(SVnode *pVnode) { return pVnode->sync; } int64_t vnodeGetSyncHandle(SVnode *pVnode) { return pVnode->sync; }
......
...@@ -1447,7 +1447,8 @@ int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen) { ...@@ -1447,7 +1447,8 @@ int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen) {
} }
static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) { static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) {
vInfo("vgId:%d, alter replica confim msg is processed", TD_VID(pVnode)); vInfo("vgId:%d, vnode management handle msgType:alter-confirm, alter replica confim msg is processed",
TD_VID(pVnode));
pRsp->msgType = TDMT_VND_ALTER_CONFIRM_RSP; pRsp->msgType = TDMT_VND_ALTER_CONFIRM_RSP;
pRsp->code = TSDB_CODE_SUCCESS; pRsp->code = TSDB_CODE_SUCCESS;
pRsp->pCont = NULL; pRsp->pCont = NULL;
......
...@@ -571,6 +571,19 @@ static void vnodeBecomeFollower(const SSyncFSM *pFsm) { ...@@ -571,6 +571,19 @@ static void vnodeBecomeFollower(const SSyncFSM *pFsm) {
taosThreadMutexUnlock(&pVnode->lock); taosThreadMutexUnlock(&pVnode->lock);
} }
static void vnodeBecomeLearner(const SSyncFSM *pFsm) {
SVnode *pVnode = pFsm->data;
vInfo("vgId:%d, become learner", pVnode->config.vgId);
taosThreadMutexLock(&pVnode->lock);
if (pVnode->blocked) {
pVnode->blocked = false;
vDebug("vgId:%d, become learner and post block", pVnode->config.vgId);
tsem_post(&pVnode->syncSem);
}
taosThreadMutexUnlock(&pVnode->lock);
}
static void vnodeBecomeLeader(const SSyncFSM *pFsm) { static void vnodeBecomeLeader(const SSyncFSM *pFsm) {
SVnode *pVnode = pFsm->data; SVnode *pVnode = pFsm->data;
vDebug("vgId:%d, become leader", pVnode->config.vgId); vDebug("vgId:%d, become leader", pVnode->config.vgId);
...@@ -612,6 +625,7 @@ static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) { ...@@ -612,6 +625,7 @@ static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) {
pFsm->FpApplyQueueItems = vnodeApplyQueueItems; pFsm->FpApplyQueueItems = vnodeApplyQueueItems;
pFsm->FpBecomeLeaderCb = vnodeBecomeLeader; pFsm->FpBecomeLeaderCb = vnodeBecomeLeader;
pFsm->FpBecomeFollowerCb = vnodeBecomeFollower; pFsm->FpBecomeFollowerCb = vnodeBecomeFollower;
pFsm->FpBecomeLearnerCb = vnodeBecomeLearner;
pFsm->FpReConfigCb = NULL; pFsm->FpReConfigCb = NULL;
pFsm->FpSnapshotStartRead = vnodeSnapshotStartRead; pFsm->FpSnapshotStartRead = vnodeSnapshotStartRead;
pFsm->FpSnapshotStopRead = vnodeSnapshotStopRead; pFsm->FpSnapshotStopRead = vnodeSnapshotStopRead;
...@@ -644,7 +658,7 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path) { ...@@ -644,7 +658,7 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path) {
SSyncCfg *pCfg = &syncInfo.syncCfg; SSyncCfg *pCfg = &syncInfo.syncCfg;
vInfo("vgId:%d, start to open sync, replica:%d selfIndex:%d", pVnode->config.vgId, pCfg->replicaNum, pCfg->myIndex); vInfo("vgId:%d, start to open sync, replica:%d selfIndex:%d", pVnode->config.vgId, pCfg->replicaNum, pCfg->myIndex);
for (int32_t i = 0; i < pCfg->replicaNum; ++i) { for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SNodeInfo *pNode = &pCfg->nodeInfo[i]; SNodeInfo *pNode = &pCfg->nodeInfo[i];
vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn, pNode->nodePort, vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn, pNode->nodePort,
pNode->nodeId, pNode->clusterId); pNode->nodeId, pNode->clusterId);
......
...@@ -24,12 +24,13 @@ extern "C" { ...@@ -24,12 +24,13 @@ extern "C" {
// SIndexMgr ----------------------------- // SIndexMgr -----------------------------
typedef struct SSyncIndexMgr { typedef struct SSyncIndexMgr {
SRaftId (*replicas)[TSDB_MAX_REPLICA]; SRaftId (*replicas)[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SyncIndex index[TSDB_MAX_REPLICA]; SyncIndex index[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SyncTerm privateTerm[TSDB_MAX_REPLICA]; // for advanced function SyncTerm privateTerm[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA]; // for advanced function
int64_t startTimeArr[TSDB_MAX_REPLICA]; int64_t startTimeArr[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
int64_t recvTimeArr[TSDB_MAX_REPLICA]; int64_t recvTimeArr[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
int32_t replicaNum; int32_t replicaNum;
int32_t totalReplicaNum;
SSyncNode *pNode; SSyncNode *pNode;
} SSyncIndexMgr; } SSyncIndexMgr;
......
...@@ -54,13 +54,13 @@ typedef struct SSyncLogReplMgr SSyncLogReplMgr; ...@@ -54,13 +54,13 @@ typedef struct SSyncLogReplMgr SSyncLogReplMgr;
#define MAX_CONFIG_INDEX_COUNT 256 #define MAX_CONFIG_INDEX_COUNT 256
typedef struct SRaftCfg { typedef struct SRaftCfg {
SSyncCfg cfg; SSyncCfg cfg;
int32_t batchSize; int32_t batchSize;
int8_t isStandBy; int8_t isStandBy;
int8_t snapshotStrategy; int8_t snapshotStrategy;
SyncIndex lastConfigIndex; SyncIndex lastConfigIndex;
int32_t configIndexCount; int32_t configIndexCount;
SyncIndex configIndexArr[MAX_CONFIG_INDEX_COUNT]; SyncIndex configIndexArr[MAX_CONFIG_INDEX_COUNT];
} SRaftCfg; } SRaftCfg;
typedef struct SRaftId { typedef struct SRaftId {
...@@ -127,12 +127,13 @@ typedef struct SSyncNode { ...@@ -127,12 +127,13 @@ typedef struct SSyncNode {
SRaftId myRaftId; SRaftId myRaftId;
int32_t peersNum; int32_t peersNum;
SNodeInfo peersNodeInfo[TSDB_MAX_REPLICA]; SNodeInfo peersNodeInfo[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SEpSet peersEpset[TSDB_MAX_REPLICA]; SEpSet peersEpset[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SRaftId peersId[TSDB_MAX_REPLICA]; SRaftId peersId[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
int32_t replicaNum; int32_t replicaNum;
SRaftId replicasId[TSDB_MAX_REPLICA]; int32_t totalReplicaNum;
SRaftId replicasId[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
// raft algorithm // raft algorithm
SSyncFSM* pFsm; SSyncFSM* pFsm;
...@@ -188,7 +189,7 @@ typedef struct SSyncNode { ...@@ -188,7 +189,7 @@ typedef struct SSyncNode {
uint64_t heartbeatTimerCounter; uint64_t heartbeatTimerCounter;
// peer heartbeat timer // peer heartbeat timer
SSyncTimer peerHeartbeatTimerArr[TSDB_MAX_REPLICA]; SSyncTimer peerHeartbeatTimerArr[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
// tools // tools
SSyncRespMgr* pSyncRespMgr; SSyncRespMgr* pSyncRespMgr;
...@@ -196,13 +197,13 @@ typedef struct SSyncNode { ...@@ -196,13 +197,13 @@ typedef struct SSyncNode {
// restore state // restore state
bool restoreFinish; bool restoreFinish;
// SSnapshot* pSnapshot; // SSnapshot* pSnapshot;
SSyncSnapshotSender* senders[TSDB_MAX_REPLICA]; SSyncSnapshotSender* senders[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SSyncSnapshotReceiver* pNewNodeReceiver; SSyncSnapshotReceiver* pNewNodeReceiver;
// log replication mgr // log replication mgr
SSyncLogReplMgr* logReplMgrs[TSDB_MAX_REPLICA]; SSyncLogReplMgr* logReplMgrs[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
SPeerState peerStates[TSDB_MAX_REPLICA]; SPeerState peerStates[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
// is config changing // is config changing
bool changing; bool changing;
...@@ -275,6 +276,7 @@ void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term); ...@@ -275,6 +276,7 @@ void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term);
void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term); void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term);
void syncNodeStepDown(SSyncNode* pSyncNode, SyncTerm newTerm); void syncNodeStepDown(SSyncNode* pSyncNode, SyncTerm newTerm);
void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr); void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr);
void syncNodeBecomeLearner(SSyncNode* pSyncNode, const char* debugStr);
void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr); void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr);
void syncNodeCandidate2Leader(SSyncNode* pSyncNode); void syncNodeCandidate2Leader(SSyncNode* pSyncNode);
void syncNodeFollower2Candidate(SSyncNode* pSyncNode); void syncNodeFollower2Candidate(SSyncNode* pSyncNode);
......
...@@ -237,6 +237,7 @@ typedef struct SyncLeaderTransfer { ...@@ -237,6 +237,7 @@ typedef struct SyncLeaderTransfer {
typedef enum { typedef enum {
SYNC_LOCAL_CMD_STEP_DOWN = 100, SYNC_LOCAL_CMD_STEP_DOWN = 100,
SYNC_LOCAL_CMD_FOLLOWER_CMT, SYNC_LOCAL_CMD_FOLLOWER_CMT,
SYNC_LOCAL_CMD_LEARNER_CMT,
} ESyncLocalCmd; } ESyncLocalCmd;
typedef struct SyncLocalCmd { typedef struct SyncLocalCmd {
......
...@@ -56,6 +56,8 @@ typedef struct SSyncLogBuffer { ...@@ -56,6 +56,8 @@ typedef struct SSyncLogBuffer {
int64_t size; int64_t size;
TdThreadMutex mutex; TdThreadMutex mutex;
TdThreadMutexAttr attr; TdThreadMutexAttr attr;
int64_t totalIndex;
bool isCatchup;
} SSyncLogBuffer; } SSyncLogBuffer;
// SSyncLogRepMgr // SSyncLogRepMgr
......
...@@ -137,8 +137,10 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) { ...@@ -137,8 +137,10 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
pReply->term = pMsg->term; pReply->term = pMsg->term;
} }
syncNodeStepDown(ths, pMsg->term); if(ths->raftCfg.cfg.nodeInfo[ths->raftCfg.cfg.myIndex].nodeRole != TAOS_SYNC_ROLE_LEARNER){
resetElect = true; syncNodeStepDown(ths, pMsg->term);
resetElect = true;
}
if (pMsg->dataLen < sizeof(SSyncRaftEntry)) { if (pMsg->dataLen < sizeof(SSyncRaftEntry)) {
sError("vgId:%d, incomplete append entries received. prev index:%" PRId64 ", term:%" PRId64 ", datalen:%d", sError("vgId:%d, incomplete append entries received. prev index:%" PRId64 ", term:%" PRId64 ", datalen:%d",
......
...@@ -41,6 +41,8 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) { ...@@ -41,6 +41,8 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
int32_t ret = 0; int32_t ret = 0;
for (int i = 0; i < pNode->peersNum; ++i) { for (int i = 0; i < pNode->peersNum; ++i) {
if(pNode->peersNodeInfo[i].nodeRole == TAOS_SYNC_ROLE_LEARNER) continue;
SRpcMsg rpcMsg = {0}; SRpcMsg rpcMsg = {0};
ret = syncBuildRequestVote(&rpcMsg, pNode->vgId); ret = syncBuildRequestVote(&rpcMsg, pNode->vgId);
if (ret < 0) { if (ret < 0) {
......
...@@ -26,6 +26,7 @@ SSyncIndexMgr *syncIndexMgrCreate(SSyncNode *pNode) { ...@@ -26,6 +26,7 @@ SSyncIndexMgr *syncIndexMgrCreate(SSyncNode *pNode) {
pIndexMgr->replicas = &pNode->replicasId; pIndexMgr->replicas = &pNode->replicasId;
pIndexMgr->replicaNum = pNode->replicaNum; pIndexMgr->replicaNum = pNode->replicaNum;
pIndexMgr->totalReplicaNum = pNode->totalReplicaNum;
pIndexMgr->pNode = pNode; pIndexMgr->pNode = pNode;
syncIndexMgrClear(pIndexMgr); syncIndexMgrClear(pIndexMgr);
...@@ -35,6 +36,7 @@ SSyncIndexMgr *syncIndexMgrCreate(SSyncNode *pNode) { ...@@ -35,6 +36,7 @@ SSyncIndexMgr *syncIndexMgrCreate(SSyncNode *pNode) {
void syncIndexMgrUpdate(SSyncIndexMgr *pIndexMgr, SSyncNode *pNode) { void syncIndexMgrUpdate(SSyncIndexMgr *pIndexMgr, SSyncNode *pNode) {
pIndexMgr->replicas = &pNode->replicasId; pIndexMgr->replicas = &pNode->replicasId;
pIndexMgr->replicaNum = pNode->replicaNum; pIndexMgr->replicaNum = pNode->replicaNum;
pIndexMgr->totalReplicaNum = pNode->totalReplicaNum;
pIndexMgr->pNode = pNode; pIndexMgr->pNode = pNode;
syncIndexMgrClear(pIndexMgr); syncIndexMgrClear(pIndexMgr);
} }
...@@ -50,14 +52,14 @@ void syncIndexMgrClear(SSyncIndexMgr *pIndexMgr) { ...@@ -50,14 +52,14 @@ void syncIndexMgrClear(SSyncIndexMgr *pIndexMgr) {
memset(pIndexMgr->privateTerm, 0, sizeof(pIndexMgr->privateTerm)); memset(pIndexMgr->privateTerm, 0, sizeof(pIndexMgr->privateTerm));
int64_t timeNow = taosGetTimestampMs(); int64_t timeNow = taosGetTimestampMs();
for (int i = 0; i < pIndexMgr->replicaNum; ++i) { for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
pIndexMgr->startTimeArr[i] = 0; pIndexMgr->startTimeArr[i] = 0;
pIndexMgr->recvTimeArr[i] = timeNow; pIndexMgr->recvTimeArr[i] = timeNow;
} }
} }
void syncIndexMgrSetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncIndex index) { void syncIndexMgrSetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncIndex index) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) { for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) { if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
(pIndexMgr->index)[i] = index; (pIndexMgr->index)[i] = index;
return; return;
...@@ -69,7 +71,7 @@ void syncIndexMgrSetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, Sync ...@@ -69,7 +71,7 @@ void syncIndexMgrSetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, Sync
} }
SSyncLogReplMgr *syncNodeGetLogReplMgr(SSyncNode *pNode, SRaftId *pRaftId) { SSyncLogReplMgr *syncNodeGetLogReplMgr(SSyncNode *pNode, SRaftId *pRaftId) {
for (int i = 0; i < pNode->replicaNum; i++) { for (int i = 0; i < pNode->totalReplicaNum; i++) {
if (syncUtilSameId(&pNode->replicasId[i], pRaftId)) { if (syncUtilSameId(&pNode->replicasId[i], pRaftId)) {
return pNode->logReplMgrs[i]; return pNode->logReplMgrs[i];
} }
...@@ -80,7 +82,7 @@ SSyncLogReplMgr *syncNodeGetLogReplMgr(SSyncNode *pNode, SRaftId *pRaftId) { ...@@ -80,7 +82,7 @@ SSyncLogReplMgr *syncNodeGetLogReplMgr(SSyncNode *pNode, SRaftId *pRaftId) {
} }
SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) { SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) { for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) { if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
SyncIndex idx = (pIndexMgr->index)[i]; SyncIndex idx = (pIndexMgr->index)[i];
return idx; return idx;
...@@ -93,7 +95,7 @@ SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) ...@@ -93,7 +95,7 @@ SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId)
} }
void syncIndexMgrSetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, int64_t startTime) { void syncIndexMgrSetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, int64_t startTime) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) { for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) { if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
(pIndexMgr->startTimeArr)[i] = startTime; (pIndexMgr->startTimeArr)[i] = startTime;
return; return;
...@@ -105,7 +107,7 @@ void syncIndexMgrSetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, ...@@ -105,7 +107,7 @@ void syncIndexMgrSetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId,
} }
int64_t syncIndexMgrGetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) { int64_t syncIndexMgrGetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) { for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) { if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
int64_t startTime = (pIndexMgr->startTimeArr)[i]; int64_t startTime = (pIndexMgr->startTimeArr)[i];
return startTime; return startTime;
...@@ -118,7 +120,7 @@ int64_t syncIndexMgrGetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftI ...@@ -118,7 +120,7 @@ int64_t syncIndexMgrGetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftI
} }
void syncIndexMgrSetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, int64_t recvTime) { void syncIndexMgrSetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, int64_t recvTime) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) { for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) { if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
(pIndexMgr->recvTimeArr)[i] = recvTime; (pIndexMgr->recvTimeArr)[i] = recvTime;
return; return;
...@@ -130,7 +132,7 @@ void syncIndexMgrSetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, i ...@@ -130,7 +132,7 @@ void syncIndexMgrSetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, i
} }
int64_t syncIndexMgrGetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) { int64_t syncIndexMgrGetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) { for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) { if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
int64_t recvTime = (pIndexMgr->recvTimeArr)[i]; int64_t recvTime = (pIndexMgr->recvTimeArr)[i];
return recvTime; return recvTime;
...@@ -143,7 +145,7 @@ int64_t syncIndexMgrGetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId ...@@ -143,7 +145,7 @@ int64_t syncIndexMgrGetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId
} }
void syncIndexMgrSetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncTerm term) { void syncIndexMgrSetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncTerm term) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) { for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) { if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
(pIndexMgr->privateTerm)[i] = term; (pIndexMgr->privateTerm)[i] = term;
return; return;
...@@ -155,7 +157,7 @@ void syncIndexMgrSetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncT ...@@ -155,7 +157,7 @@ void syncIndexMgrSetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncT
} }
SyncTerm syncIndexMgrGetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) { SyncTerm syncIndexMgrGetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
for (int i = 0; i < pIndexMgr->replicaNum; ++i) { for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) { if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
SyncTerm term = (pIndexMgr->privateTerm)[i]; SyncTerm term = (pIndexMgr->privateTerm)[i];
return term; return term;
......
此差异已折叠。
...@@ -242,6 +242,8 @@ int32_t syncLogBufferInitWithoutLock(SSyncLogBuffer* pBuf, SSyncNode* pNode) { ...@@ -242,6 +242,8 @@ int32_t syncLogBufferInitWithoutLock(SSyncLogBuffer* pBuf, SSyncNode* pNode) {
// update startIndex // update startIndex
pBuf->startIndex = takeDummy ? index : index + 1; pBuf->startIndex = takeDummy ? index : index + 1;
pBuf->isCatchup = false;
sInfo("vgId:%d, init sync log buffer. buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId, sInfo("vgId:%d, init sync log buffer. buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId,
pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
...@@ -324,6 +326,15 @@ int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt ...@@ -324,6 +326,15 @@ int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt
goto _out; goto _out;
} }
if(pNode->raftCfg.cfg.nodeInfo[pNode->raftCfg.cfg.myIndex].nodeRole == TAOS_SYNC_ROLE_LEARNER &&
index > 0 && index > pBuf->totalIndex){
pBuf->totalIndex = index;
sTrace("vgId:%d, update learner progress. index:%" PRId64 ", term:%" PRId64 ": prevterm:%" PRId64
" != lastmatch:%" PRId64 ". log buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")",
pNode->vgId, pEntry->index, pEntry->term, prevTerm, lastMatchTerm, pBuf->startIndex, pBuf->commitIndex,
pBuf->matchIndex, pBuf->endIndex);
}
if (index - pBuf->startIndex >= pBuf->size) { if (index - pBuf->startIndex >= pBuf->size) {
sWarn("vgId:%d, out of buffer range. index:%" PRId64 ", term:%" PRId64 ". log buffer: [%" PRId64 " %" PRId64 sWarn("vgId:%d, out of buffer range. index:%" PRId64 ", term:%" PRId64 ". log buffer: [%" PRId64 " %" PRId64
" %" PRId64 ", %" PRId64 ")", " %" PRId64 ", %" PRId64 ")",
...@@ -483,7 +494,7 @@ _out: ...@@ -483,7 +494,7 @@ _out:
} }
int32_t syncFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, SyncTerm term, SSyncRaftEntry* pEntry, int32_t syncFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, SyncTerm term, SSyncRaftEntry* pEntry,
int32_t applyCode) { int32_t applyCode) {
if (pNode->replicaNum == 1 && pNode->restoreFinish && pNode->vgId != 1) { if (pNode->replicaNum == 1 && pNode->restoreFinish && pNode->vgId != 1) {
return 0; return 0;
} }
...@@ -957,7 +968,7 @@ void syncLogReplDestroy(SSyncLogReplMgr* pMgr) { ...@@ -957,7 +968,7 @@ void syncLogReplDestroy(SSyncLogReplMgr* pMgr) {
} }
int32_t syncNodeLogReplInit(SSyncNode* pNode) { int32_t syncNodeLogReplInit(SSyncNode* pNode) {
for (int i = 0; i < TSDB_MAX_REPLICA; i++) { for (int i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; i++) {
ASSERT(pNode->logReplMgrs[i] == NULL); ASSERT(pNode->logReplMgrs[i] == NULL);
pNode->logReplMgrs[i] = syncLogReplCreate(); pNode->logReplMgrs[i] = syncLogReplCreate();
if (pNode->logReplMgrs[i] == NULL) { if (pNode->logReplMgrs[i] == NULL) {
...@@ -970,7 +981,7 @@ int32_t syncNodeLogReplInit(SSyncNode* pNode) { ...@@ -970,7 +981,7 @@ int32_t syncNodeLogReplInit(SSyncNode* pNode) {
} }
void syncNodeLogReplDestroy(SSyncNode* pNode) { void syncNodeLogReplDestroy(SSyncNode* pNode) {
for (int i = 0; i < TSDB_MAX_REPLICA; i++) { for (int i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; i++) {
syncLogReplDestroy(pNode->logReplMgrs[i]); syncLogReplDestroy(pNode->logReplMgrs[i]);
pNode->logReplMgrs[i] = NULL; pNode->logReplMgrs[i] = NULL;
} }
...@@ -1100,7 +1111,7 @@ int32_t syncLogBufferReset(SSyncLogBuffer* pBuf, SSyncNode* pNode) { ...@@ -1100,7 +1111,7 @@ int32_t syncLogBufferReset(SSyncLogBuffer* pBuf, SSyncNode* pNode) {
pBuf->endIndex = pBuf->matchIndex + 1; pBuf->endIndex = pBuf->matchIndex + 1;
// reset repl mgr // reset repl mgr
for (int i = 0; i < pNode->replicaNum; i++) { for (int i = 0; i < pNode->totalReplicaNum; i++) {
SSyncLogReplMgr* pMgr = pNode->logReplMgrs[i]; SSyncLogReplMgr* pMgr = pNode->logReplMgrs[i];
syncLogReplReset(pMgr); syncLogReplReset(pMgr);
} }
......
...@@ -18,21 +18,45 @@ ...@@ -18,21 +18,45 @@
#include "syncUtil.h" #include "syncUtil.h"
#include "tjson.h" #include "tjson.h"
const char* syncRoleToStr(ESyncRole role) {
switch (role) {
case TAOS_SYNC_ROLE_VOTER:
return "voter";
case TAOS_SYNC_ROLE_LEARNER:
return "learner";
default:
return "unknown";
}
}
const ESyncRole syncStrToRole(char* str) {
if(strcmp(str, "voter") == 0){
return TAOS_SYNC_ROLE_VOTER;
}
if(strcmp(str, "learner") == 0){
return TAOS_SYNC_ROLE_LEARNER;
}
return TAOS_SYNC_ROLE_ERROR;
}
static int32_t syncEncodeSyncCfg(const void *pObj, SJson *pJson) { static int32_t syncEncodeSyncCfg(const void *pObj, SJson *pJson) {
SSyncCfg *pCfg = (SSyncCfg *)pObj; SSyncCfg *pCfg = (SSyncCfg *)pObj;
if (tjsonAddDoubleToObject(pJson, "totalReplicaNum", pCfg->totalReplicaNum) < 0) return -1;
if (tjsonAddDoubleToObject(pJson, "replicaNum", pCfg->replicaNum) < 0) return -1; if (tjsonAddDoubleToObject(pJson, "replicaNum", pCfg->replicaNum) < 0) return -1;
if (tjsonAddDoubleToObject(pJson, "myIndex", pCfg->myIndex) < 0) return -1; if (tjsonAddDoubleToObject(pJson, "myIndex", pCfg->myIndex) < 0) return -1;
SJson *nodeInfo = tjsonCreateArray(); SJson *nodeInfo = tjsonCreateArray();
if (nodeInfo == NULL) return -1; if (nodeInfo == NULL) return -1;
if (tjsonAddItemToObject(pJson, "nodeInfo", nodeInfo) < 0) return -1; if (tjsonAddItemToObject(pJson, "nodeInfo", nodeInfo) < 0) return -1;
for (int32_t i = 0; i < pCfg->replicaNum; ++i) { for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SJson *info = tjsonCreateObject(); SJson *info = tjsonCreateObject();
if (info == NULL) return -1; if (info == NULL) return -1;
if (tjsonAddDoubleToObject(info, "nodePort", pCfg->nodeInfo[i].nodePort) < 0) return -1; if (tjsonAddDoubleToObject(info, "nodePort", pCfg->nodeInfo[i].nodePort) < 0) return -1;
if (tjsonAddStringToObject(info, "nodeFqdn", pCfg->nodeInfo[i].nodeFqdn) < 0) return -1; if (tjsonAddStringToObject(info, "nodeFqdn", pCfg->nodeInfo[i].nodeFqdn) < 0) return -1;
if (tjsonAddIntegerToObject(info, "nodeId", pCfg->nodeInfo[i].nodeId) < 0) return -1; if (tjsonAddIntegerToObject(info, "nodeId", pCfg->nodeInfo[i].nodeId) < 0) return -1;
if (tjsonAddIntegerToObject(info, "clusterId", pCfg->nodeInfo[i].clusterId) < 0) return -1; if (tjsonAddIntegerToObject(info, "clusterId", pCfg->nodeInfo[i].clusterId) < 0) return -1;
if (tjsonAddStringToObject(info, "nodeRole", syncRoleToStr(pCfg->nodeInfo[i].nodeRole)) < 0) return -1;
if (tjsonAddItemToArray(nodeInfo, info) < 0) return -1; if (tjsonAddItemToArray(nodeInfo, info) < 0) return -1;
} }
...@@ -90,7 +114,8 @@ int32_t syncWriteCfgFile(SSyncNode *pNode) { ...@@ -90,7 +114,8 @@ int32_t syncWriteCfgFile(SSyncNode *pNode) {
if (taosRenameFile(file, realfile) != 0) goto _OVER; if (taosRenameFile(file, realfile) != 0) goto _OVER;
code = 0; code = 0;
sInfo("vgId:%d, succeed to write sync cfg file:%s, len:%d", pNode->vgId, realfile, len); sInfo("vgId:%d, succeed to write sync cfg file:%s, len:%d, lastConfigIndex:%" PRId64, pNode->vgId,
realfile, len, pNode->raftCfg.lastConfigIndex);
_OVER: _OVER:
if (pJson != NULL) tjsonDelete(pJson); if (pJson != NULL) tjsonDelete(pJson);
...@@ -108,6 +133,7 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) { ...@@ -108,6 +133,7 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) {
SSyncCfg *pCfg = (SSyncCfg *)pObj; SSyncCfg *pCfg = (SSyncCfg *)pObj;
int32_t code = 0; int32_t code = 0;
tjsonGetInt32ValueFromDouble(pJson, "totalReplicaNum", pCfg->totalReplicaNum, code);
tjsonGetInt32ValueFromDouble(pJson, "replicaNum", pCfg->replicaNum, code); tjsonGetInt32ValueFromDouble(pJson, "replicaNum", pCfg->replicaNum, code);
if (code < 0) return -1; if (code < 0) return -1;
tjsonGetInt32ValueFromDouble(pJson, "myIndex", pCfg->myIndex, code); tjsonGetInt32ValueFromDouble(pJson, "myIndex", pCfg->myIndex, code);
...@@ -115,9 +141,9 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) { ...@@ -115,9 +141,9 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) {
SJson *nodeInfo = tjsonGetObjectItem(pJson, "nodeInfo"); SJson *nodeInfo = tjsonGetObjectItem(pJson, "nodeInfo");
if (nodeInfo == NULL) return -1; if (nodeInfo == NULL) return -1;
pCfg->replicaNum = tjsonGetArraySize(nodeInfo); pCfg->totalReplicaNum = tjsonGetArraySize(nodeInfo);
for (int32_t i = 0; i < pCfg->replicaNum; ++i) { for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SJson *info = tjsonGetArrayItem(nodeInfo, i); SJson *info = tjsonGetArrayItem(nodeInfo, i);
if (info == NULL) return -1; if (info == NULL) return -1;
tjsonGetUInt16ValueFromDouble(info, "nodePort", pCfg->nodeInfo[i].nodePort, code); tjsonGetUInt16ValueFromDouble(info, "nodePort", pCfg->nodeInfo[i].nodePort, code);
...@@ -126,6 +152,10 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) { ...@@ -126,6 +152,10 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) {
if (code < 0) return -1; if (code < 0) return -1;
tjsonGetNumberValue(info, "nodeId", pCfg->nodeInfo[i].nodeId, code); tjsonGetNumberValue(info, "nodeId", pCfg->nodeInfo[i].nodeId, code);
tjsonGetNumberValue(info, "clusterId", pCfg->nodeInfo[i].clusterId, code); tjsonGetNumberValue(info, "clusterId", pCfg->nodeInfo[i].clusterId, code);
char role[10];
code = tjsonGetStringValue(info, "nodeRole", role);
if(code < 0) return -1;
pCfg->nodeInfo[i].nodeRole = syncStrToRole(role);
} }
return 0; return 0;
......
...@@ -66,10 +66,10 @@ int32_t syncNodeReplicate(SSyncNode* pNode) { ...@@ -66,10 +66,10 @@ int32_t syncNodeReplicate(SSyncNode* pNode) {
} }
int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) { int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) {
if (pNode->state != TAOS_SYNC_STATE_LEADER || pNode->replicaNum == 1) { if (pNode->state != TAOS_SYNC_STATE_LEADER || pNode->raftCfg.cfg.totalReplicaNum == 1) {
return -1; return -1;
} }
for (int32_t i = 0; i < pNode->replicaNum; i++) { for (int32_t i = 0; i < pNode->totalReplicaNum; i++) {
if (syncUtilSameId(&pNode->replicasId[i], &pNode->myRaftId)) { if (syncUtilSameId(&pNode->replicasId[i], &pNode->myRaftId)) {
continue; continue;
} }
......
...@@ -795,13 +795,18 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { ...@@ -795,13 +795,18 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
return -1; return -1;
} }
if (pMsg->term > raftStoreGetTerm(pSyncNode)) { if(pSyncNode->raftCfg.cfg.nodeInfo[pSyncNode->raftCfg.cfg.myIndex].nodeRole != TAOS_SYNC_ROLE_LEARNER){
syncNodeStepDown(pSyncNode, pMsg->term); if (pMsg->term > raftStoreGetTerm(pSyncNode)) {
syncNodeStepDown(pSyncNode, pMsg->term);
}
}
else{
syncNodeUpdateTermWithoutStepDown(pSyncNode, pMsg->term);
} }
// state, term, seq/ack // state, term, seq/ack
int32_t code = 0; int32_t code = 0;
if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) { if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER || pSyncNode->state == TAOS_SYNC_STATE_LEARNER) {
if (pMsg->term == raftStoreGetTerm(pSyncNode)) { if (pMsg->term == raftStoreGetTerm(pSyncNode)) {
if (pMsg->seq == SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT) { if (pMsg->seq == SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT) {
syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq pre-snapshot"); syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq pre-snapshot");
......
...@@ -58,7 +58,7 @@ static void syncNodeCleanConfigIndex(SSyncNode* ths) { ...@@ -58,7 +58,7 @@ static void syncNodeCleanConfigIndex(SSyncNode* ths) {
static int32_t syncNodeTimerRoutine(SSyncNode* ths) { static int32_t syncNodeTimerRoutine(SSyncNode* ths) {
ths->tmrRoutineNum++; ths->tmrRoutineNum++;
if (ths->tmrRoutineNum % 60 == 0 && ths->replicaNum > 1) { if (ths->tmrRoutineNum % 60 == 0 && ths->totalReplicaNum > 1) {
sNInfo(ths, "timer routines"); sNInfo(ths, "timer routines");
} else { } else {
sNTrace(ths, "timer routines"); sNTrace(ths, "timer routines");
......
...@@ -30,7 +30,7 @@ SVotesGranted *voteGrantedCreate(SSyncNode *pNode) { ...@@ -30,7 +30,7 @@ SVotesGranted *voteGrantedCreate(SSyncNode *pNode) {
return NULL; return NULL;
} }
pVotesGranted->replicas = &pNode->replicasId; pVotesGranted->replicas = (void*)&pNode->replicasId;
pVotesGranted->replicaNum = pNode->replicaNum; pVotesGranted->replicaNum = pNode->replicaNum;
voteGrantedClearVotes(pVotesGranted); voteGrantedClearVotes(pVotesGranted);
...@@ -49,7 +49,7 @@ void voteGrantedDestroy(SVotesGranted *pVotesGranted) { ...@@ -49,7 +49,7 @@ void voteGrantedDestroy(SVotesGranted *pVotesGranted) {
} }
void voteGrantedUpdate(SVotesGranted *pVotesGranted, SSyncNode *pNode) { void voteGrantedUpdate(SVotesGranted *pVotesGranted, SSyncNode *pNode) {
pVotesGranted->replicas = &pNode->replicasId; pVotesGranted->replicas = (void*)&pNode->replicasId;
pVotesGranted->replicaNum = pNode->replicaNum; pVotesGranted->replicaNum = pNode->replicaNum;
voteGrantedClearVotes(pVotesGranted); voteGrantedClearVotes(pVotesGranted);
...@@ -115,7 +115,7 @@ SVotesRespond *votesRespondCreate(SSyncNode *pNode) { ...@@ -115,7 +115,7 @@ SVotesRespond *votesRespondCreate(SSyncNode *pNode) {
return NULL; return NULL;
} }
pVotesRespond->replicas = &pNode->replicasId; pVotesRespond->replicas = (void*)&pNode->replicasId;
pVotesRespond->replicaNum = pNode->replicaNum; pVotesRespond->replicaNum = pNode->replicaNum;
pVotesRespond->term = 0; pVotesRespond->term = 0;
pVotesRespond->pNode = pNode; pVotesRespond->pNode = pNode;
...@@ -130,7 +130,7 @@ void votesRespondDestory(SVotesRespond *pVotesRespond) { ...@@ -130,7 +130,7 @@ void votesRespondDestory(SVotesRespond *pVotesRespond) {
} }
void votesRespondUpdate(SVotesRespond *pVotesRespond, SSyncNode *pNode) { void votesRespondUpdate(SVotesRespond *pVotesRespond, SSyncNode *pNode) {
pVotesRespond->replicas = &pNode->replicasId; pVotesRespond->replicas = (void*)&pNode->replicasId;
pVotesRespond->replicaNum = pNode->replicaNum; pVotesRespond->replicaNum = pNode->replicaNum;
pVotesRespond->term = 0; pVotesRespond->term = 0;
pVotesRespond->pNode = pNode; pVotesRespond->pNode = pNode;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册