From e13de8ce2c41af6cdf389a7ee74b4287d9fc998d Mon Sep 17 00:00:00 2001 From: Jeff Tao Date: Mon, 30 Nov 2020 09:12:42 +0000 Subject: [PATCH] return mnode initialization stage to client --- src/dnode/src/dnodeMPeer.c | 11 +++-- src/dnode/src/dnodeMRead.c | 11 +++-- src/dnode/src/dnodeMWrite.c | 18 +++++---- src/dnode/src/dnodeMain.c | 2 +- src/dnode/src/dnodeShell.c | 2 +- src/inc/mnode.h | 3 ++ src/inc/taoserror.h | 3 ++ src/mnode/src/mnodeMain.c | 80 +++++++++++++++++++++++++++---------- src/mnode/src/mnodeSdb.c | 2 +- src/mnode/src/mnodeUser.c | 15 ++++++- src/rpc/src/rpcMain.c | 5 +-- 11 files changed, 111 insertions(+), 41 deletions(-) diff --git a/src/dnode/src/dnodeMPeer.c b/src/dnode/src/dnodeMPeer.c index ee6dc5212e..adeccd1f9c 100644 --- a/src/dnode/src/dnodeMPeer.c +++ b/src/dnode/src/dnodeMPeer.c @@ -122,11 +122,16 @@ void dnodeFreeMPeerQueue() { } void dnodeDispatchToMPeerQueue(SRpcMsg *pMsg) { - if (!mnodeIsRunning() || tsMPeerQueue == NULL) { + if (!mnodeIsRunning()) { dnodeSendRedirectMsg(pMsg, false); } else { - SMnodeMsg *pPeer = mnodeCreateMsg(pMsg); - taosWriteQitem(tsMPeerQueue, TAOS_QTYPE_RPC, pPeer); + if (!mnodeIsReady()) { + SRpcMsg rpcRsp = {.handle = pMsg->handle, .code = mnodeInitCode(), .pCont = NULL}; + rpcSendResponse(&rpcRsp); + } else { + SMnodeMsg *pPeer = mnodeCreateMsg(pMsg); + taosWriteQitem(tsMPeerQueue, TAOS_QTYPE_RPC, pPeer); + } } rpcFreeCont(pMsg->pCont); diff --git a/src/dnode/src/dnodeMRead.c b/src/dnode/src/dnodeMRead.c index 65f3af7b3b..bb0d6f94f2 100644 --- a/src/dnode/src/dnodeMRead.c +++ b/src/dnode/src/dnodeMRead.c @@ -123,11 +123,16 @@ void dnodeFreeMReadQueue() { } void dnodeDispatchToMReadQueue(SRpcMsg *pMsg) { - if (!mnodeIsRunning() || tsMReadQueue == NULL) { + if (!mnodeIsRunning()) { dnodeSendRedirectMsg(pMsg, true); } else { - SMnodeMsg *pRead = mnodeCreateMsg(pMsg); - taosWriteQitem(tsMReadQueue, TAOS_QTYPE_RPC, pRead); + if (!mnodeIsReady()) { + SRpcMsg rpcRsp = {.handle = pMsg->handle, .code = mnodeInitCode(), .pCont = NULL}; + rpcSendResponse(&rpcRsp); + } else { + SMnodeMsg *pRead = mnodeCreateMsg(pMsg); + taosWriteQitem(tsMReadQueue, TAOS_QTYPE_RPC, pRead); + } } rpcFreeCont(pMsg->pCont); diff --git a/src/dnode/src/dnodeMWrite.c b/src/dnode/src/dnodeMWrite.c index ef2d49ef42..3c539ceefa 100644 --- a/src/dnode/src/dnodeMWrite.c +++ b/src/dnode/src/dnodeMWrite.c @@ -123,13 +123,18 @@ void dnodeFreeMWritequeue() { } void dnodeDispatchToMWriteQueue(SRpcMsg *pMsg) { - if (!mnodeIsRunning() || tsMWriteQueue == NULL) { + if (!mnodeIsRunning()) { dnodeSendRedirectMsg(pMsg, true); } else { - SMnodeMsg *pWrite = mnodeCreateMsg(pMsg); - dDebug("msg:%p, app:%p type:%s is put into mwrite queue:%p", pWrite, pWrite->rpcMsg.ahandle, - taosMsg[pWrite->rpcMsg.msgType], tsMWriteQueue); - taosWriteQitem(tsMWriteQueue, TAOS_QTYPE_RPC, pWrite); + if (!mnodeIsReady()) { + SRpcMsg rpcRsp = {.handle = pMsg->handle, .code = mnodeInitCode(), .pCont = NULL}; + rpcSendResponse(&rpcRsp); + } else { + SMnodeMsg *pWrite = mnodeCreateMsg(pMsg); + dDebug("msg:%p, app:%p type:%s is put into mwrite queue:%p", pWrite, pWrite->rpcMsg.ahandle, + taosMsg[pWrite->rpcMsg.msgType], tsMWriteQueue); + taosWriteQitem(tsMWriteQueue, TAOS_QTYPE_RPC, pWrite); + } } rpcFreeCont(pMsg->pCont); @@ -187,7 +192,7 @@ static void *dnodeProcessMWriteQueue(void *param) { void dnodeReprocessMWriteMsg(void *pMsg) { SMnodeMsg *pWrite = pMsg; - if (!mnodeIsRunning() || tsMWriteQueue == NULL) { + if (!mnodeIsRunning()) { dDebug("msg:%p, app:%p type:%s is redirected for mnode not running, retry times:%d", pWrite, pWrite->rpcMsg.ahandle, taosMsg[pWrite->rpcMsg.msgType], pWrite->retry); @@ -196,7 +201,6 @@ void dnodeReprocessMWriteMsg(void *pMsg) { } else { dDebug("msg:%p, app:%p type:%s is reput into mwrite queue:%p, retry times:%d", pWrite, pWrite->rpcMsg.ahandle, taosMsg[pWrite->rpcMsg.msgType], tsMWriteQueue, pWrite->retry); - taosWriteQitem(tsMWriteQueue, TAOS_QTYPE_RPC, pWrite); } } diff --git a/src/dnode/src/dnodeMain.c b/src/dnode/src/dnodeMain.c index 9f52dbd331..ce45105fb3 100644 --- a/src/dnode/src/dnodeMain.c +++ b/src/dnode/src/dnodeMain.c @@ -63,6 +63,7 @@ static const SDnodeComponent tsDnodeComponents[] = { {"dnodeeps", dnodeInitEps, dnodeCleanupEps}, {"globalcfg" ,taosCheckGlobalCfg, NULL}, {"mnodeinfos",dnodeInitMInfos, dnodeCleanupMInfos}, + {"shell", dnodeInitShell, dnodeCleanupShell}, {"wal", walInit, walCleanUp}, {"check", dnodeInitCheck, dnodeCleanupCheck}, // NOTES: dnodeInitCheck must be behind the dnodeinitStorage component !!! {"vread", dnodeInitVRead, dnodeCleanupVRead}, @@ -75,7 +76,6 @@ static const SDnodeComponent tsDnodeComponents[] = { {"mgmt", dnodeInitMgmt, dnodeCleanupMgmt}, {"modules", dnodeInitModules, dnodeCleanupModules}, {"mgmt-tmr", dnodeInitMgmtTimer, dnodeCleanupMgmtTimer}, - {"shell", dnodeInitShell, dnodeCleanupShell}, {"telemetry", dnodeInitTelemetry, dnodeCleanupTelemetry}, }; diff --git a/src/dnode/src/dnodeShell.c b/src/dnode/src/dnodeShell.c index 89f657f789..fa5f9c24e8 100644 --- a/src/dnode/src/dnodeShell.c +++ b/src/dnode/src/dnodeShell.c @@ -144,7 +144,7 @@ static void dnodeProcessMsgFromShell(SRpcMsg *pMsg, SRpcEpSet *pEpSet) { static int dnodeRetrieveUserAuthInfo(char *user, char *spi, char *encrypt, char *secret, char *ckey) { int code = mnodeRetriveAuth(user, spi, encrypt, secret, ckey); - if (code != TSDB_CODE_APP_NOT_READY) return code; + if (code != TSDB_CODE_RPC_REDIRECT) return code; SAuthMsg *pMsg = rpcMallocCont(sizeof(SAuthMsg)); tstrncpy(pMsg->user, user, sizeof(pMsg->user)); diff --git a/src/inc/mnode.h b/src/inc/mnode.h index bdc30b0c46..453e8ab3f6 100644 --- a/src/inc/mnode.h +++ b/src/inc/mnode.h @@ -65,12 +65,15 @@ void mnodeStopSystem(); void sdbUpdateAsync(); void sdbUpdateSync(void *pMnodes); bool mnodeIsRunning(); +bool mnodeIsReady(); +int32_t mnodeInitCode(); int32_t mnodeProcessRead(SMnodeMsg *pMsg); int32_t mnodeProcessWrite(SMnodeMsg *pMsg); int32_t mnodeProcessPeerReq(SMnodeMsg *pMsg); void mnodeProcessPeerRsp(SRpcMsg *pMsg); int32_t mnodeRetriveAuth(char *user, char *spi, char *encrypt, char *secret, char *ckey); + #ifdef __cplusplus } #endif diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index 5fde6d40d5..59f25a0765 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -125,6 +125,9 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_SHOWOBJ, 0, 0x030B, "Data expir TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_QUERY_ID, 0, 0x030C, "Invalid query id") TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_STREAM_ID, 0, 0x030D, "Invalid stream id") TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_CONN_ID, 0, 0x030E, "Invalid connection id") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_INIT, 0, 0x030F, "Mnode is initializing") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_INIT_SDB, 0, 0x0310, "Mnode is initializing meta data, it takes a while if many tables exists") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_INIT_OTHER, 0, 0x0311, "Mnode is initializing other data") TAOS_DEFINE_ERROR(TSDB_CODE_MND_SDB_OBJ_ALREADY_THERE, 0, 0x0320, "Object already there") TAOS_DEFINE_ERROR(TSDB_CODE_MND_SDB_ERROR, 0, 0x0321, "Unexpected generic error in sdb") diff --git a/src/mnode/src/mnodeMain.c b/src/mnode/src/mnodeMain.c index d15b32da54..05ea0b93a1 100644 --- a/src/mnode/src/mnodeMain.c +++ b/src/mnode/src/mnodeMain.c @@ -17,6 +17,7 @@ #include "os.h" #include "taosdef.h" #include "tsched.h" +#include "taoserror.h" #include "tbalance.h" #include "tgrant.h" #include "ttimer.h" @@ -37,30 +38,41 @@ #include "mnodeShow.h" #include "mnodeProfile.h" +typedef enum { + TSDB_MND_STATUS_NOT_RUNNING, + TSDB_MND_STATUS_INIT, + TSDB_MND_STATUS_INIT_SDB, + TSDB_MND_STATUS_INIT_OTHER, + TSDB_MND_STATUS_READY, + TSDB_MND_STATUS_CLEANING, +} EMndStatus; + typedef struct { const char *const name; int (*init)(); void (*cleanup)(); + EMndStatus status; } SMnodeComponent; -void *tsMnodeTmr = NULL; -static bool tsMgmtIsRunning = false; +void *tsMnodeTmr = NULL; +static bool tsMgmtIsRunning = false; +static EMndStatus tsMgmtStatus = TSDB_MND_STATUS_NOT_RUNNING; static const SMnodeComponent tsMnodeComponents[] = { - {"sdbref", sdbInitRef, sdbCleanUpRef}, - {"profile", mnodeInitProfile, mnodeCleanupProfile}, - {"cluster", mnodeInitCluster, mnodeCleanupCluster}, - {"accts", mnodeInitAccts, mnodeCleanupAccts}, - {"users", mnodeInitUsers, mnodeCleanupUsers}, - {"dnodes", mnodeInitDnodes, mnodeCleanupDnodes}, - {"dbs", mnodeInitDbs, mnodeCleanupDbs}, - {"vgroups", mnodeInitVgroups, mnodeCleanupVgroups}, - {"tables", mnodeInitTables, mnodeCleanupTables}, - {"mnodes", mnodeInitMnodes, mnodeCleanupMnodes}, - {"sdb", sdbInit, sdbCleanUp}, - {"balance", balanceInit, balanceCleanUp}, - {"grant", grantInit, grantCleanUp}, - {"show", mnodeInitShow, mnodeCleanUpShow} + {"sdbref", sdbInitRef, sdbCleanUpRef, TSDB_MND_STATUS_INIT}, + {"profile", mnodeInitProfile, mnodeCleanupProfile, TSDB_MND_STATUS_INIT}, + {"cluster", mnodeInitCluster, mnodeCleanupCluster, TSDB_MND_STATUS_INIT}, + {"accts", mnodeInitAccts, mnodeCleanupAccts, TSDB_MND_STATUS_INIT}, + {"users", mnodeInitUsers, mnodeCleanupUsers, TSDB_MND_STATUS_INIT}, + {"dnodes", mnodeInitDnodes, mnodeCleanupDnodes, TSDB_MND_STATUS_INIT}, + {"dbs", mnodeInitDbs, mnodeCleanupDbs, TSDB_MND_STATUS_INIT}, + {"vgroups", mnodeInitVgroups, mnodeCleanupVgroups, TSDB_MND_STATUS_INIT}, + {"tables", mnodeInitTables, mnodeCleanupTables, TSDB_MND_STATUS_INIT}, + {"mnodes", mnodeInitMnodes, mnodeCleanupMnodes, TSDB_MND_STATUS_INIT}, + {"sdb", sdbInit, sdbCleanUp, TSDB_MND_STATUS_INIT_SDB}, + {"balance", balanceInit, balanceCleanUp, TSDB_MND_STATUS_INIT_OTHER}, + {"grant", grantInit, grantCleanUp, TSDB_MND_STATUS_INIT_OTHER}, + {"show", mnodeInitShow, mnodeCleanUpShow, TSDB_MND_STATUS_INIT_OTHER}, }; static void mnodeInitTimer(); @@ -76,21 +88,24 @@ static void mnodeCleanupComponents(int32_t stepId) { static int32_t mnodeInitComponents() { int32_t code = 0; for (int32_t i = 0; i < sizeof(tsMnodeComponents) / sizeof(tsMnodeComponents[0]); i++) { + tsMgmtStatus = tsMnodeComponents[i].status; if (tsMnodeComponents[i].init() != 0) { mnodeCleanupComponents(i); code = -1; break; } + // sleep(3); } return code; } int32_t mnodeStartSystem() { - if (tsMgmtIsRunning) { + if (tsMgmtStatus != TSDB_MND_STATUS_NOT_RUNNING) { mInfo("mnode module already started..."); return 0; } + tsMgmtStatus = TSDB_MND_STATUS_INIT; mInfo("starting to initialize mnode ..."); if (mkdir(tsMnodeDir, 0755) != 0 && errno != EEXIST) { mError("failed to init mnode dir:%s, reason:%s", tsMnodeDir, strerror(errno)); @@ -106,7 +121,7 @@ int32_t mnodeStartSystem() { } grantReset(TSDB_GRANT_ALL, 0); - tsMgmtIsRunning = true; + tsMgmtStatus = TSDB_MND_STATUS_READY; mInfo("mnode is initialized successfully"); @@ -126,7 +141,7 @@ int32_t mnodeInitSystem() { void mnodeCleanupSystem() { if (tsMgmtIsRunning) { mInfo("starting to clean up mnode"); - tsMgmtIsRunning = false; + tsMgmtStatus = TSDB_MND_STATUS_CLEANING; dnodeFreeMWritequeue(); dnodeFreeMReadQueue(); @@ -134,6 +149,7 @@ void mnodeCleanupSystem() { mnodeCleanupTimer(); mnodeCleanupComponents(sizeof(tsMnodeComponents) / sizeof(tsMnodeComponents[0]) - 1); + tsMgmtStatus = TSDB_MND_STATUS_NOT_RUNNING; mInfo("mnode is cleaned up"); } } @@ -184,5 +200,29 @@ static bool mnodeNeedStart() { } bool mnodeIsRunning() { - return tsMgmtIsRunning; + return (tsMgmtStatus != TSDB_MND_STATUS_NOT_RUNNING && tsMgmtStatus != TSDB_MND_STATUS_CLEANING); +} + +bool mnodeIsReady() { + return (tsMgmtStatus == TSDB_MND_STATUS_READY); +} + +int32_t mnodeInitCode() { + int32_t code = -1; + + switch (tsMgmtStatus) { + case TSDB_MND_STATUS_INIT: + code = TSDB_CODE_MND_INIT; + break; + case TSDB_MND_STATUS_INIT_SDB: + code = TSDB_CODE_MND_INIT_SDB; + break; + case TSDB_MND_STATUS_INIT_OTHER: + code = TSDB_CODE_MND_INIT_OTHER; + break; + default: + code = TSDB_CODE_MND_INIT; + } + + return code; } diff --git a/src/mnode/src/mnodeSdb.c b/src/mnode/src/mnodeSdb.c index 6670e87a7c..2e6ba8175d 100644 --- a/src/mnode/src/mnodeSdb.c +++ b/src/mnode/src/mnodeSdb.c @@ -299,7 +299,7 @@ void sdbUpdateAsync() { void sdbUpdateSync(void *pMnodes) { SMnodeInfos *mnodes = pMnodes; - if (!mnodeIsRunning()) { + if (!mnodeIsReady()) { mDebug("vgId:1, mnode not start yet, update sync config later"); return; } diff --git a/src/mnode/src/mnodeUser.c b/src/mnode/src/mnodeUser.c index fb26086d04..8d57ad30dc 100644 --- a/src/mnode/src/mnodeUser.c +++ b/src/mnode/src/mnodeUser.c @@ -585,10 +585,21 @@ void mnodeDropAllUsers(SAcctObj *pAcct) { } int32_t mnodeRetriveAuth(char *user, char *spi, char *encrypt, char *secret, char *ckey) { + *secret = 0; + + if (!mnodeIsRunning()) { + mDebug("user:%s, mnode is not running, fail to auth", user); + return TSDB_CODE_RPC_REDIRECT; + } + + if (!mnodeIsReady()) { + mDebug("user:%s, failed to auth user, mnode is not ready", user); + return mnodeInitCode(); + } + if (!sdbIsMaster()) { - *secret = 0; mDebug("user:%s, failed to auth user, mnode is not master", user); - return TSDB_CODE_APP_NOT_READY; + return TSDB_CODE_RPC_REDIRECT; } SUserObj *pUser = mnodeGetUser(user); diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index acceaf9d7a..2b7271459e 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -1551,10 +1551,9 @@ static int rpcCheckAuthentication(SRpcConn *pConn, char *msg, int msgLen) { if ( !rpcIsReq(pHead->msgType) ) { // for response, if code is auth failure, it shall bypass the auth process code = htonl(pHead->code); - if (code == TSDB_CODE_RPC_INVALID_TIME_STAMP || code == TSDB_CODE_RPC_AUTH_FAILURE || - code == TSDB_CODE_RPC_AUTH_REQUIRED || code == TSDB_CODE_MND_INVALID_USER || code == TSDB_CODE_RPC_NOT_READY) { - pHead->msgLen = (int32_t)htonl((uint32_t)pHead->msgLen); + if (code != 0) { // tTrace("%s, dont check authentication since code is:0x%x", pConn->info, code); + pHead->msgLen = (int32_t)htonl((uint32_t)pHead->msgLen); return 0; } } -- GitLab