diff --git a/include/common/taosmsg.h b/include/common/taosmsg.h index b238bfa566764ee5cbf870d8d5378458b3e667aa..50594fac0055d4f1847bdf7cd98ec47f383364c5 100644 --- a/include/common/taosmsg.h +++ b/include/common/taosmsg.h @@ -706,41 +706,30 @@ typedef struct { } SStatusRsp; typedef struct { - uint32_t vgId; - int32_t dbCfgVersion; - int32_t maxTables; - int32_t cacheBlockSize; - int32_t totalBlocks; - int32_t daysPerFile; - int32_t daysToKeep; - int32_t daysToKeep1; - int32_t daysToKeep2; - int32_t minRowsPerFileBlock; - int32_t maxRowsPerFileBlock; - int32_t commitTime; - int32_t fsyncPeriod; - int8_t precision; - int8_t compression; - int8_t walLevel; - int8_t vgReplica; - int8_t wals; - int8_t quorum; - int8_t update; - int8_t cacheLastRow; - int32_t vgCfgVersion; - int8_t dbReplica; - int8_t dbType; - int8_t reserved[8]; -} SVnodeCfg; - -typedef struct { - int32_t nodeId; - char nodeEp[TSDB_EP_LEN]; + uint16_t port; + char fqdn[TSDB_FQDN_LEN]; } SVnodeDesc; typedef struct { char db[TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN]; - SVnodeCfg cfg; + uint32_t vgId; + int32_t cacheBlockSize; + int32_t totalBlocks; + int32_t daysPerFile; + int32_t daysToKeep0; + int32_t daysToKeep1; + int32_t daysToKeep2; + int32_t minRowsPerFileBlock; + int32_t maxRowsPerFileBlock; + int8_t precision; + int8_t compression; + int8_t cacheLastRow; + int8_t update; + int8_t walLevel; + int8_t replica; + int8_t quorum; + int8_t reserved[9]; + int32_t fsyncPeriod; SVnodeDesc nodes[TSDB_MAX_REPLICA]; } SCreateVnodeMsg, SAlterVnodeMsg; diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index a0602ec1b05ed1b4b991c8bf4fec7615f4aa97c5..30583686c5e31c276e0761369cf7c8b0106947c2 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -22,7 +22,6 @@ extern "C" { #include #include "taosdef.h" -#include "wal.h" typedef int64_t SyncNodeId; typedef int32_t SyncGroupId; @@ -41,6 +40,7 @@ typedef struct { } SSyncBuffer; typedef struct { + SyncNodeId nodeId; uint16_t nodePort; // node sync Port char nodeFqdn[TSDB_FQDN_LEN]; // node FQDN } SNodeInfo; @@ -83,11 +83,38 @@ typedef struct SSyncFSM { } SSyncFSM; +typedef struct SSyncLogStore { + void* pData; + + // write log with given index + int32_t (*logWrite)(struct SSyncLogStore* logStore, SyncIndex index, SSyncBuffer* pBuf); + + // mark log with given index has been commtted + int32_t (*logCommit)(struct SSyncLogStore* logStore, SyncIndex index); + + // prune log before given index + int32_t (*logPrune)(struct SSyncLogStore* logStore, SyncIndex index); + + // rollback log after given index + int32_t (*logRollback)(struct SSyncLogStore* logStore, SyncIndex index); +} SSyncLogStore; + typedef struct SSyncServerState { - SNodeInfo voteFor; + SyncNodeId voteFor; SSyncTerm term; } SSyncServerState; +typedef struct SSyncClusterConfig { + // Log index number of current cluster config. + SyncIndex index; + + // Log index number of previous cluster config. + SyncIndex prevIndex; + + // current cluster + const SSyncCluster* cluster; +} SSyncClusterConfig; + typedef struct SStateManager { void* pData; @@ -95,35 +122,38 @@ typedef struct SStateManager { const SSyncServerState* (*readServerState)(struct SStateManager* stateMng); - void (*saveCluster)(struct SStateManager* stateMng, const SSyncCluster* cluster); + void (*saveCluster)(struct SStateManager* stateMng, const SSyncClusterConfig* cluster); - const SSyncCluster* (*readCluster)(struct SStateManager* stateMng); + const SSyncClusterConfig* (*readCluster)(struct SStateManager* stateMng); } SStateManager; typedef struct { SyncGroupId vgId; - twalh walHandle; - SyncIndex snapshotIndex; SSyncCluster syncCfg; SSyncFSM fsm; + SSyncLogStore logStore; + SStateManager stateManager; } SSyncInfo; +struct SSyncNode; +typedef struct SSyncNode SSyncNode; + int32_t syncInit(); void syncCleanUp(); -SyncNodeId syncStart(const SSyncInfo*); +SSyncNode syncStart(const SSyncInfo*); void syncStop(SyncNodeId); -int32_t syncPropose(SyncNodeId nodeId, SSyncBuffer buffer, void* pData, bool isWeak); +int32_t syncPropose(SSyncNode syncNode, SSyncBuffer buffer, void* pData, bool isWeak); -int32_t syncAddNode(SyncNodeId nodeId, const SNodeInfo *pNode); +int32_t syncAddNode(SSyncNode syncNode, const SNodeInfo *pNode); -int32_t syncRemoveNode(SyncNodeId nodeId, const SNodeInfo *pNode); +int32_t syncRemoveNode(SSyncNode syncNode, const SNodeInfo *pNode); extern int32_t syncDebugFlag; diff --git a/include/server/dnode/dnode.h b/include/server/dnode/dnode.h index 3499913afaca98a60fb8e0c1d1c668372bf1e39f..bc0d1e89b00be24dedd2b036c66d42d02ff2fbcc 100644 --- a/include/server/dnode/dnode.h +++ b/include/server/dnode/dnode.h @@ -67,6 +67,11 @@ void dnodeSendRedirectMsg(struct SRpcMsg *rpcMsg, bool forShell); */ void dnodeGetEp(int32_t dnodeId, char *ep, char *fqdn, uint16_t *port); +/** + * Report the startup progress. + */ +void dnodeReportStartup(char *name, char *desc); + #ifdef __cplusplus } #endif diff --git a/include/server/vnode/tq/tq.h b/include/server/vnode/tq/tq.h index f30ba75c429a5116a8f84d8fdb624772168b925d..c90991ae1074b43103bba361ad0ec2dfda470314 100644 --- a/include/server/vnode/tq/tq.h +++ b/include/server/vnode/tq/tq.h @@ -23,64 +23,79 @@ extern "C" { #endif typedef struct tmqMsgHead { - int32_t headLen; int32_t protoVer; + int32_t msgType; int64_t cgId; - int64_t topicId; int64_t clientId; - int32_t checksum; - int32_t msgType; } tmqMsgHead; +typedef struct tmqOneAck { + int64_t topicId; + int64_t consumeOffset; +} tmqOneAck; + +typedef struct tmqAcks { + int32_t ackNum; + //should be sorted + tmqOneAck acks[]; +} tmqAcks; + //TODO: put msgs into common typedef struct tmqConnectReq { tmqMsgHead head; + tmqAcks acks; } tmqConnectReq; -typedef struct tmqConnectResp { +typedef struct tmqConnectRsp { tmqMsgHead head; int8_t status; -} tmqConnectResp; +} tmqConnectRsp; typedef struct tmqDisconnectReq { tmqMsgHead head; } tmqDisconnectReq; -typedef struct tmqDisconnectResp { +typedef struct tmqDisconnectRsp { tmqMsgHead head; int8_t status; -} tmqDiconnectResp; +} tmqDiconnectRsp; typedef struct tmqConsumeReq { tmqMsgHead head; - int64_t commitOffset; + tmqAcks acks; } tmqConsumeReq; -typedef struct tmqConsumeResp { - tmqMsgHead head; - char content[]; -} tmqConsumeResp; +typedef struct tmqMsgContent { + int64_t topicId; + int64_t msgLen; + char msg[]; +} tmqMsgContent; + +typedef struct tmqConsumeRsp { + tmqMsgHead head; + int64_t bodySize; + tmqMsgContent msgs[]; +} tmqConsumeRsp; -// typedef struct tmqMnodeSubscribeReq { tmqMsgHead head; int64_t topicLen; char topic[]; } tmqSubscribeReq; -typedef struct tmqMnodeSubscribeResp { +typedef struct tmqMnodeSubscribeRsp { tmqMsgHead head; int64_t vgId; char ep[]; //TSDB_EP_LEN -} tmqSubscribeResp; +} tmqSubscribeRsp; typedef struct tmqHeartbeatReq { } tmqHeartbeatReq; -typedef struct tmqHeartbeatResp { +typedef struct tmqHeartbeatRsp { -} tmqHeartbeatResp; +} tmqHeartbeatRsp; typedef struct tqTopicVhandle { //name @@ -92,33 +107,57 @@ typedef struct tqTopicVhandle { } tqTopicVhandle; typedef struct STQ { - //the set for topics - //key=topicName: str - //value=tqTopicVhandle + //the collection of group handle - //a map - //key= - //value=consumeOffset: int64_t } STQ; #define TQ_BUFFER_SIZE 8 +//TODO: define a serializer and deserializer typedef struct tqBufferItem { int64_t offset; + //executors are identical but not concurrent + //so it must be a copy in each item void* executor; + int64_t size; void* content; } tqBufferItem; -typedef struct tqGroupHandle { - char* topic; //c style, end with '\0' - int64_t cgId; - void* ahandle; - int64_t consumeOffset; +typedef struct tqBufferHandle { + //char* topic; //c style, end with '\0' + //int64_t cgId; + //void* ahandle; + int64_t nextConsumeOffset; + int64_t topicId; int32_t head; int32_t tail; tqBufferItem buffer[TQ_BUFFER_SIZE]; +} tqBufferHandle; + +typedef struct tqListHandle { + tqBufferHandle* bufHandle; + struct tqListHandle* next; +} tqListHandle; + +typedef struct tqGroupHandle { + int64_t cId; + int64_t cgId; + void* ahandle; + int32_t topicNum; + tqListHandle *head; } tqGroupHandle; +typedef struct tqQueryExec { + void* src; + tqBufferItem* dest; + void* executor; +} tqQueryExec; + +typedef struct tqQueryMsg { + tqQueryExec *exec; + struct tqQueryMsg *next; +} tqQueryMsg; + //init in each vnode STQ* tqInit(void* ref_func(void*), void* unref_func(void*)); void tqCleanUp(STQ*); @@ -127,20 +166,33 @@ void tqCleanUp(STQ*); int tqPushMsg(STQ*, void* msg, int64_t version); int tqCommit(STQ*); -//void* will be replace by a msg type -int tqHandleConsumeMsg(STQ*, tmqConsumeReq* msg); +int tqConsume(STQ*, tmqConsumeReq*); -tqGroupHandle* tqFindGHandleBycId(STQ*, int64_t cId); +tqGroupHandle* tqGetGroupHandle(STQ*, int64_t cId); int tqOpenTCGroup(STQ*, int64_t topicId, int64_t cgId, int64_t cId); int tqCloseTCGroup(STQ*, int64_t topicId, int64_t cgId, int64_t cId); int tqMoveOffsetToNext(tqGroupHandle*); int tqResetOffset(STQ*, int64_t topicId, int64_t cgId, int64_t offset); -int tqFetchMsg(tqGroupHandle*, void*); int tqRegisterContext(tqGroupHandle*, void*); -int tqLaunchQuery(STQ*, int64_t topicId, int64_t cgId, void* query); +int tqLaunchQuery(tqGroupHandle*); int tqSendLaunchQuery(STQ*, int64_t topicId, int64_t cgId, void* query); +int tqSerializeGroupHandle(tqGroupHandle *gHandle, void** ppBytes, int32_t offset); +int tqSerializeListHandle(tqListHandle *listHandle, void** ppBytes, int32_t offset); +int tqSerializeBufHandle(tqBufferHandle *bufHandle, void** ppBytes, int32_t offset); +int tqSerializeBufItem(tqBufferItem *bufItem, void** ppBytes, int32_t offset); + +int tqDeserializeGroupHandle(const void* pBytes, tqGroupHandle **pGhandle); +int tqDeserializeListHandle(const void* pBytes, tqListHandle **pListHandle); +int tqDeserializeBufHandle(const void* pBytes, tqBufferHandle **pBufHandle); +int tqDeserializeBufItem(const void* pBytes, tqBufferItem **pBufItem); + +int tqGetGHandleSSize(const tqGroupHandle *gHandle); +int tqListHandleSSize(const tqListHandle *listHandle); +int tqBufHandleSSize(const tqBufferHandle *bufHandle); +int tqBufItemSSize(const tqBufferItem *bufItem); + #ifdef __cplusplus } #endif diff --git a/include/server/vnode/vnode.h b/include/server/vnode/vnode.h index 00decfe338d98d6989a21bef5e404372a4d03c63..ecb1412b0639e1688a81657f387e6535cfb4c050 100644 --- a/include/server/vnode/vnode.h +++ b/include/server/vnode/vnode.h @@ -46,6 +46,11 @@ typedef struct { */ void (*GetDnodeEp)(int32_t dnodeId, char *ep, char *fqdn, uint16_t *port); + /** + * Report the startup progress. + */ + void (*ReportStartup)(char *name, char *desc); + } SVnodeFp; typedef struct { diff --git a/include/util/taoserror.h b/include/util/taoserror.h index cf8cd510c5e848ae3310f0e70873c9598b0266c9..76c5f575a5c64bcf98065ba9bb14d2412816dcb6 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -233,11 +233,11 @@ int32_t* taosGetErrno(); #define TSDB_CODE_VND_NO_SUCH_FILE_OR_DIR TAOS_DEF_ERROR_CODE(0, 0x0507) //"Missing data file") #define TSDB_CODE_VND_OUT_OF_MEMORY TAOS_DEF_ERROR_CODE(0, 0x0508) //"Out of memory") #define TSDB_CODE_VND_APP_ERROR TAOS_DEF_ERROR_CODE(0, 0x0509) //"Unexpected generic error in vnode") -#define TSDB_CODE_VND_INVALID_VRESION_FILE TAOS_DEF_ERROR_CODE(0, 0x050A) //"Invalid version file") -#define TSDB_CODE_VND_IS_FULL TAOS_DEF_ERROR_CODE(0, 0x050B) //"Database memory is full for commit failed") -#define TSDB_CODE_VND_IS_FLOWCTRL TAOS_DEF_ERROR_CODE(0, 0x050C) //"Database memory is full for waiting commit") +#define TSDB_CODE_VND_INVALID_CFG_FILE TAOS_DEF_ERROR_CODE(0, 0x050A) //"Invalid config file) +#define TSDB_CODE_VND_INVALID_TERM_FILE TAOS_DEF_ERROR_CODE(0, 0x050B) //"Invalid term file") +#define TSDB_CODE_VND_IS_FLOWCTRL TAOS_DEF_ERROR_CODE(0, 0x050C) //"Database memory is full") #define TSDB_CODE_VND_IS_DROPPING TAOS_DEF_ERROR_CODE(0, 0x050D) //"Database is dropping") -#define TSDB_CODE_VND_IS_BALANCING TAOS_DEF_ERROR_CODE(0, 0x050E) //"Database is balancing") +#define TSDB_CODE_VND_IS_UPDATING TAOS_DEF_ERROR_CODE(0, 0x050E) //"Database is updating") #define TSDB_CODE_VND_IS_CLOSING TAOS_DEF_ERROR_CODE(0, 0x0510) //"Database is closing") #define TSDB_CODE_VND_NOT_SYNCED TAOS_DEF_ERROR_CODE(0, 0x0511) //"Database suspended") #define TSDB_CODE_VND_NO_WRITE_AUTH TAOS_DEF_ERROR_CODE(0, 0x0512) //"Database write operation denied") diff --git a/include/util/tdef.h b/include/util/tdef.h index 170502aa9d3e55bf7df7249291eefc8c8686f635..80cd3cf8b8dcc8a954ec6a304b496322e16d69b7 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -382,6 +382,9 @@ do { \ #define TSDB_DATA_TYPE_UINT 13 // 4 bytes #define TSDB_DATA_TYPE_UBIGINT 14 // 8 bytes +enum { TRANS_STAT_INIT = 0, TRANS_STAT_EXECUTING, TRANS_STAT_EXECUTED, TRANS_STAT_ROLLBACKING, TRANS_STAT_ROLLBACKED }; +enum { TRANS_OPER_INIT = 0, TRANS_OPER_EXECUTE, TRANS_OPER_ROLLBACK }; + #ifdef __cplusplus } #endif diff --git a/source/libs/wal/src/wal.c b/source/libs/wal/src/wal.c index 1e74c1613cb34f706ccef733d58e664edf429dfe..8c0fc2b775497c5f1588eb2fa34db7f4eedd00ff 100644 --- a/source/libs/wal/src/wal.c +++ b/source/libs/wal/src/wal.c @@ -15,5 +15,10 @@ #include "wal.h" -int32_t walInit() {return 0;} -void walCleanUp() {} \ No newline at end of file +int32_t walInit() { return 0; } + +void walCleanUp() {} + +twalh walOpen(char *path, SWalCfg *pCfg) { return NULL; } + +int32_t walAlter(twalh pWal, SWalCfg *pCfg) { return 0; } \ No newline at end of file diff --git a/source/server/dnode/src/dnodeInt.c b/source/server/dnode/src/dnodeInt.c index d294143e576cea8842a0c2b4248c94e130b0be4e..7b0b87368ecca768c6b79e83e5a58d7f1366c613 100644 --- a/source/server/dnode/src/dnodeInt.c +++ b/source/server/dnode/src/dnodeInt.c @@ -37,7 +37,7 @@ EDnStat dnodeGetRunStat() { return tsDnode.runStatus; } void dnodeSetRunStat(EDnStat stat) { tsDnode.runStatus = stat; } -static void dnodeReportStartup(char *name, char *desc) { +void dnodeReportStartup(char *name, char *desc) { SStartupStep *startup = &tsDnode.startup; tstrncpy(startup->name, name, strlen(startup->name)); tstrncpy(startup->desc, desc, strlen(startup->desc)); @@ -58,6 +58,7 @@ static int32_t dnodeInitVnode() { para.fp.GetDnodeEp = dnodeGetEp; para.fp.SendMsgToDnode = dnodeSendMsgToDnode; para.fp.SendMsgToMnode = dnodeSendMsgToMnode; + para.fp.ReportStartup = dnodeReportStartup; return vnodeInit(para); } diff --git a/source/server/vnode/inc/vnodeCfg.h b/source/server/vnode/inc/vnodeFile.h similarity index 77% rename from source/server/vnode/inc/vnodeCfg.h rename to source/server/vnode/inc/vnodeFile.h index 342d801f44411076deacc300d2305d1deb80b20b..31364d8c03471c703891432c92318b7ea0de61c0 100644 --- a/source/server/vnode/inc/vnodeCfg.h +++ b/source/server/vnode/inc/vnodeFile.h @@ -21,8 +21,10 @@ extern "C" { #endif #include "vnodeInt.h" -int32_t vnodeReadCfg(SVnode *pVnode); -int32_t vnodeWriteCfg(SCreateVnodeMsg *pVnodeCfg); +int32_t vnodeReadCfg(int32_t vgId, SVnodeCfg *pCfg); +int32_t vnodeWriteCfg(int32_t vgId, SVnodeCfg *pCfg); +int32_t vnodeReadTerm(int32_t vgId, SSyncServerState *pState); +int32_t vnodeWriteTerm(int32_t vgid, SSyncServerState *pState); #ifdef __cplusplus } diff --git a/source/server/vnode/inc/vnodeInt.h b/source/server/vnode/inc/vnodeInt.h index 8188cb56a3af9927bcc8408101f6f7aebef78cb5..3c7487f68149e56727102e95c634b6821c534b81 100644 --- a/source/server/vnode/inc/vnodeInt.h +++ b/source/server/vnode/inc/vnodeInt.h @@ -16,11 +16,12 @@ #ifndef _TD_VNODE_INT_H_ #define _TD_VNODE_INT_H_ +#include "os.h" #include "amalloc.h" #include "meta.h" -#include "os.h" #include "sync.h" #include "taosmsg.h" +#include "tglobal.h" #include "tlog.h" #include "tq.h" #include "tqueue.h" @@ -43,57 +44,75 @@ extern int32_t vDebugFlag; #define vDebug(...) { if (vDebugFlag & DEBUG_DEBUG) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }} #define vTrace(...) { if (vDebugFlag & DEBUG_TRACE) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }} +typedef struct STsdbCfg { + int32_t cacheBlockSize; // MB + int32_t totalBlocks; + int32_t daysPerFile; + int32_t daysToKeep0; + int32_t daysToKeep1; + int32_t daysToKeep2; + int32_t minRowsPerFileBlock; + int32_t maxRowsPerFileBlock; + uint8_t precision; // time resolution + int8_t compression; + int8_t cacheLastRow; + int8_t update; +} STsdbCfg; + +typedef struct SMetaCfg { +} SMetaCfg; + +typedef struct SSyncCluster { + int8_t replica; + int8_t quorum; + SNodeInfo nodes[TSDB_MAX_REPLICA]; +} SSyncCfg; + +typedef struct SVnodeCfg { + char db[TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN]; + int8_t dropped; + SWalCfg wal; + STsdbCfg tsdb; + SMetaCfg meta; + SSyncCfg sync; +} SVnodeCfg; + typedef struct { - int32_t vgId; // global vnode group ID - int32_t refCount; // reference count - SMemAllocator *allocator; - SMeta *pMeta; - STsdb *pTsdb; - STQ *pTQ; - twalh pWal; - SyncNodeId syncNode; - taos_queue pWriteQ; // write queue - taos_queue pQueryQ; // read query queue - taos_queue pFetchQ; // read fetch/cancel queue - SWalCfg walCfg; - SSyncCluster syncCfg; - char db[TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN]; - int64_t queuedWMsgSize; - int32_t queuedWMsg; - int32_t queuedRMsg; - int32_t numOfQHandle; // current initialized and existed query handle in current dnode - int8_t status; - int8_t role; - int8_t accessState; - int8_t dropped; - pthread_mutex_t statusMutex; + int32_t vgId; // global vnode group ID + int32_t refCount; // reference count + SMemAllocator *allocator; + SMeta *pMeta; + STsdb *pTsdb; + STQ *pTQ; + twalh pWal; + void *pQuery; + SyncNodeId syncNode; + taos_queue pWriteQ; // write queue + taos_queue pQueryQ; // read query queue + taos_queue pFetchQ; // read fetch/cancel queue + SVnodeCfg cfg; + SSyncServerState term; + int64_t queuedWMsgSize; + int32_t queuedWMsg; + int32_t queuedRMsg; + int32_t numOfQHandle; // current initialized and existed query handle in current dnode + int8_t role; + int8_t accessState; + int8_t dropped; + int8_t status; + pthread_mutex_t statusMutex; } SVnode; typedef struct { int32_t len; - void * rsp; - void * qhandle; // used by query and retrieve msg + void *rsp; + void *qhandle; // used by query and retrieve msg } SVnRsp; void vnodeSendMsgToDnode(struct SRpcEpSet *epSet, struct SRpcMsg *rpcMsg); void vnodeSendMsgToMnode(struct SRpcMsg *rpcMsg); void vnodeGetDnodeEp(int32_t dnodeId, char *ep, char *fqdn, uint16_t *port); - -int32_t vnodeCreate(SCreateVnodeMsg *pVnodeCfg); -int32_t vnodeDrop(int32_t vgId); -int32_t vnodeOpen(int32_t vgId); -int32_t vnodeAlter(SVnode *pVnode, SCreateVnodeMsg *pVnodeCfg); -int32_t vnodeSync(int32_t vgId); -int32_t vnodeClose(int32_t vgId); -void vnodeCleanUp(SVnode *pVnode); -void vnodeDestroy(SVnode *pVnode); -int32_t vnodeCompact(int32_t vgId); -void vnodeBackup(int32_t vgId); -void vnodeGetStatus(struct SStatusMsg *status); - -SVnode *vnodeAcquire(int32_t vgId); -SVnode *vnodeAcquireNotClose(int32_t vgId); -void vnodeRelease(SVnode *pVnode); +void vnodeReportStartup(char *name, char *desc); #ifdef __cplusplus } diff --git a/source/server/vnode/inc/vnodeMgmtMsg.h b/source/server/vnode/inc/vnodeMain.h similarity index 59% rename from source/server/vnode/inc/vnodeMgmtMsg.h rename to source/server/vnode/inc/vnodeMain.h index 4d5533f2fe0adc25228f616827e2ae74572adb81..d0d84792e8409409e33c087dcd6422505be82100 100644 --- a/source/server/vnode/inc/vnodeMgmtMsg.h +++ b/source/server/vnode/inc/vnodeMain.h @@ -13,23 +13,30 @@ * along with this program. If not, see . */ -#ifndef _TD_VNODE_MGMT_MSG_H_ -#define _TD_VNODE_MGMT_MSG_H_ +#ifndef _TD_VNODE_MAIN_H_ +#define _TD_VNODE_MAIN_H_ + +#include "vnodeInt.h" #ifdef __cplusplus extern "C" { #endif -#include "vnodeInt.h" -int32_t vnodeProcessCreateVnodeMsg(SRpcMsg *rpcMsg); -int32_t vnodeProcessAlterVnodeMsg(SRpcMsg *rpcMsg); -int32_t vnodeProcessSyncVnodeMsg(SRpcMsg *rpcMsg); -int32_t vnodeProcessCompactVnodeMsg(SRpcMsg *rpcMsg); -int32_t vnodeProcessDropVnodeMsg(SRpcMsg *rpcMsg); -int32_t vnodeProcessAlterStreamReq(SRpcMsg *pMsg); +int32_t vnodeInitMain(); +void vnodeCleanupMain(); + +SVnode *vnodeAcquireInAllState(int32_t vgId); +SVnode *vnodeAcquire(int32_t vgId); +void vnodeRelease(SVnode *pVnode); + +int32_t vnodeCreateVnode(int32_t vgId, SVnodeCfg *pCfg); +int32_t vnodeAlterVnode(SVnode *pVnode, SVnodeCfg *pCfg); +int32_t vnodeDropVnode(SVnode *pVnode); +int32_t vnodeSyncVnode(SVnode *pVnode); +int32_t vnodeCompactVnode(SVnode *pVnode); #ifdef __cplusplus } #endif -#endif /*_TD_VNODE_MGMT_H_*/ +#endif /*_TD_VNODE_MAIN_H_*/ diff --git a/source/server/vnode/inc/vnodeMgmt.h b/source/server/vnode/inc/vnodeMgmt.h index 23dc826db740ed54fdf61ffe456fe289215272d8..ccd1b28b6118ce3f3b9acc0b8c6e39e05dd7666c 100644 --- a/source/server/vnode/inc/vnodeMgmt.h +++ b/source/server/vnode/inc/vnodeMgmt.h @@ -21,6 +21,14 @@ extern "C" { #endif #include "vnodeInt.h" + +typedef struct { + SVnode *pVnode; + SRpcMsg rpcMsg; + char pCont[]; +} SVnMgmtMsg; + + int32_t vnodeInitMgmt(); void vnodeCleanupMgmt(); void vnodeProcessMgmtMsg(SRpcMsg *pMsg); diff --git a/source/server/vnode/inc/vnodeStatus.h b/source/server/vnode/inc/vnodeStatus.h deleted file mode 100644 index c7f1b4c96d80475305c623f9d77194501e8d132f..0000000000000000000000000000000000000000 --- a/source/server/vnode/inc/vnodeStatus.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_VNODE_STATUS_H_ -#define _TD_VNODE_STATUS_H_ - -#ifdef __cplusplus -extern "C" { -#endif -#include "vnodeInt.h" - -typedef enum _VN_STATUS { - TAOS_VN_STATUS_INIT = 0, - TAOS_VN_STATUS_READY = 1, - TAOS_VN_STATUS_CLOSING = 2, - TAOS_VN_STATUS_UPDATING = 3 -} EVnodeStatus; - -// vnodeStatus -extern char* vnodeStatus[]; - -bool vnodeSetInitStatus(SVnode* pVnode); -bool vnodeSetReadyStatus(SVnode* pVnode); -bool vnodeSetClosingStatus(SVnode* pVnode); -bool vnodeSetUpdatingStatus(SVnode* pVnode); - -bool vnodeInInitStatus(SVnode* pVnode); -bool vnodeInReadyStatus(SVnode* pVnode); -bool vnodeInClosingStatus(SVnode* pVnode); - -#ifdef __cplusplus -} -#endif - -#endif /*_TD_VNODE_STATUS_H_*/ \ No newline at end of file diff --git a/source/server/vnode/inc/vnodeVersion.h b/source/server/vnode/inc/vnodeVersion.h deleted file mode 100644 index 81e6758559f8890afe1f013df75bf6b8263609a9..0000000000000000000000000000000000000000 --- a/source/server/vnode/inc/vnodeVersion.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_VNODE_VERSION_H_ -#define _TD_VNODE_VERSION_H_ - -#ifdef __cplusplus -extern "C" { -#endif -#include "vnodeInt.h" - -int32_t vnodeReadVersion(SVnode *pVnode); -int32_t vnodeSaveVersion(SVnode *pVnode); - -#ifdef __cplusplus -} -#endif - -#endif /*_TD_VNODE_VERSION_H_*/ diff --git a/source/server/vnode/inc/vnodeWorker.h b/source/server/vnode/inc/vnodeWorker.h deleted file mode 100644 index eea35011a8ca0c5da4094cf7d522ae615fc57a9f..0000000000000000000000000000000000000000 --- a/source/server/vnode/inc/vnodeWorker.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_VNODE_WORKER_H_ -#define _TD_VNODE_WORKER_H_ - -#ifdef __cplusplus -extern "C" { -#endif -#include "vnodeInt.h" - -int32_t vnodeInitWorker(); -void vnodeCleanupWorker(); -void vnodeProcessCleanupTask(SVnode *pVnode); -void vnodeProcessDestroyTask(SVnode *pVnode); -void vnodeProcessBackupTask(SVnode *pVnode); - -#ifdef __cplusplus -} -#endif - -#endif /*_TD_VNODE_WORKER_H_*/ \ No newline at end of file diff --git a/source/server/vnode/src/vnodeCfg.c b/source/server/vnode/src/vnodeCfg.c deleted file mode 100644 index 9c01a47f8f23f08b04b9557b566f4501d587ccf8..0000000000000000000000000000000000000000 --- a/source/server/vnode/src/vnodeCfg.c +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" -#include "cJSON.h" -#include "tglobal.h" -#include "vnodeCfg.h" - -static void vnodeLoadCfg(SVnode *pVnode, SCreateVnodeMsg *vnodeMsg) { -#if 0 - tstrncpy(pVnode->db, vnodeMsg->db, sizeof(pVnode->db)); - pVnode->dbCfgVersion = vnodeMsg->cfg.dbCfgVersion; - pVnode->vgCfgVersion = vnodeMsg->cfg.vgCfgVersion; - pVnode->tsdbCfg.cacheBlockSize = vnodeMsg->cfg.cacheBlockSize; - pVnode->tsdbCfg.totalBlocks = vnodeMsg->cfg.totalBlocks; - pVnode->tsdbCfg.daysPerFile = vnodeMsg->cfg.daysPerFile; - pVnode->tsdbCfg.keep = vnodeMsg->cfg.daysToKeep; - pVnode->tsdbCfg.keep1 = vnodeMsg->cfg.daysToKeep1; - pVnode->tsdbCfg.keep2 = vnodeMsg->cfg.daysToKeep2; - pVnode->tsdbCfg.minRowsPerFileBlock = vnodeMsg->cfg.minRowsPerFileBlock; - pVnode->tsdbCfg.maxRowsPerFileBlock = vnodeMsg->cfg.maxRowsPerFileBlock; - pVnode->tsdbCfg.precision = vnodeMsg->cfg.precision; - pVnode->tsdbCfg.compression = vnodeMsg->cfg.compression; - pVnode->tsdbCfg.update = vnodeMsg->cfg.update; - pVnode->tsdbCfg.cacheLastRow = vnodeMsg->cfg.cacheLastRow; - pVnode->walCfg.walLevel = vnodeMsg->cfg.walLevel; - pVnode->walCfg.fsyncPeriod = vnodeMsg->cfg.fsyncPeriod; - pVnode->walCfg.keep = TAOS_WAL_NOT_KEEP; - pVnode->syncCfg.replica = vnodeMsg->cfg.vgReplica; - pVnode->syncCfg.quorum = vnodeMsg->cfg.quorum; - pVnode->dbReplica = vnodeMsg->cfg.dbReplica; - pVnode->dbType = vnodeMsg->cfg.dbType; - - for (int i = 0; i < pVnode->syncCfg.replica; ++i) { - SVnodeDesc *node = &vnodeMsg->nodes[i]; - pVnode->syncCfg.nodeInfo[i].nodeId = node->nodeId; - taosGetFqdnPortFromEp(node->nodeEp, pVnode->syncCfg.nodeInfo[i].nodeFqdn, &pVnode->syncCfg.nodeInfo[i].nodePort); - pVnode->syncCfg.nodeInfo[i].nodePort += TSDB_PORT_SYNC; - } - - vInfo("vgId:%d, load vnode cfg successfully, replcia:%d", pVnode->vgId, pVnode->syncCfg.replica); - for (int32_t i = 0; i < pVnode->syncCfg.replica; i++) { - SNodeInfo *node = &pVnode->syncCfg.nodeInfo[i]; - vInfo("vgId:%d, dnode:%d, %s:%u", pVnode->vgId, node->nodeId, node->nodeFqdn, node->nodePort); - } -#endif -} - -int32_t vnodeReadCfg(SVnode *pVnode) { -#if 0 - int32_t ret = TSDB_CODE_VND_APP_ERROR; - int32_t len = 0; - int maxLen = 1000; - char * content = calloc(1, maxLen + 1); - cJSON * root = NULL; - FILE * fp = NULL; - bool nodeChanged = false; - - SCreateVnodeMsg vnodeMsg; - - char file[TSDB_FILENAME_LEN + 30] = {0}; - sprintf(file, "%s/vnode%d/config.json", tsVnodeDir, pVnode->vgId); - - vnodeMsg.cfg.vgId = pVnode->vgId; - - fp = fopen(file, "r"); - if (!fp) { - vError("vgId:%d, failed to open vnode cfg file:%s to read, error:%s", pVnode->vgId, file, strerror(errno)); - ret = TAOS_SYSTEM_ERROR(errno); - goto PARSE_VCFG_ERROR; - } - - len = (int32_t)fread(content, 1, maxLen, fp); - if (len <= 0) { - vError("vgId:%d, failed to read %s, content is null", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - - content[len] = 0; - root = cJSON_Parse(content); - if (root == NULL) { - vError("vgId:%d, failed to read %s, invalid json format", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - - cJSON *db = cJSON_GetObjectItem(root, "db"); - if (!db || db->type != cJSON_String || db->valuestring == NULL) { - vError("vgId:%d, failed to read %s, db not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - tstrncpy(vnodeMsg.db, db->valuestring, sizeof(vnodeMsg.db)); - - cJSON *dbCfgVersion = cJSON_GetObjectItem(root, "cfgVersion"); - if (!dbCfgVersion || dbCfgVersion->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, cfgVersion not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.dbCfgVersion = (int32_t)dbCfgVersion->valueint; - - cJSON *vgCfgVersion = cJSON_GetObjectItem(root, "vgCfgVersion"); - if (!vgCfgVersion || vgCfgVersion->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, vgCfgVersion not found", pVnode->vgId, file); - vnodeMsg.cfg.vgCfgVersion = 0; - } else { - vnodeMsg.cfg.vgCfgVersion = (int32_t)vgCfgVersion->valueint; - } - - cJSON *cacheBlockSize = cJSON_GetObjectItem(root, "cacheBlockSize"); - if (!cacheBlockSize || cacheBlockSize->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, cacheBlockSize not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.cacheBlockSize = (int32_t)cacheBlockSize->valueint; - - cJSON *totalBlocks = cJSON_GetObjectItem(root, "totalBlocks"); - if (!totalBlocks || totalBlocks->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, totalBlocks not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.totalBlocks = (int32_t)totalBlocks->valueint; - - cJSON *daysPerFile = cJSON_GetObjectItem(root, "daysPerFile"); - if (!daysPerFile || daysPerFile->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, daysPerFile not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.daysPerFile = (int32_t)daysPerFile->valueint; - - cJSON *daysToKeep = cJSON_GetObjectItem(root, "daysToKeep"); - if (!daysToKeep || daysToKeep->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, daysToKeep not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.daysToKeep = (int32_t)daysToKeep->valueint; - - cJSON *daysToKeep1 = cJSON_GetObjectItem(root, "daysToKeep1"); - if (!daysToKeep1 || daysToKeep1->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, daysToKeep1 not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.daysToKeep1 = (int32_t)daysToKeep1->valueint; - - cJSON *daysToKeep2 = cJSON_GetObjectItem(root, "daysToKeep2"); - if (!daysToKeep2 || daysToKeep2->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, daysToKeep2 not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.daysToKeep2 = (int32_t)daysToKeep2->valueint; - - cJSON *minRowsPerFileBlock = cJSON_GetObjectItem(root, "minRowsPerFileBlock"); - if (!minRowsPerFileBlock || minRowsPerFileBlock->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, minRowsPerFileBlock not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.minRowsPerFileBlock = (int32_t)minRowsPerFileBlock->valueint; - - cJSON *maxRowsPerFileBlock = cJSON_GetObjectItem(root, "maxRowsPerFileBlock"); - if (!maxRowsPerFileBlock || maxRowsPerFileBlock->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, maxRowsPerFileBlock not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.maxRowsPerFileBlock = (int32_t)maxRowsPerFileBlock->valueint; - - cJSON *precision = cJSON_GetObjectItem(root, "precision"); - if (!precision || precision->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, precision not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.precision = (int8_t)precision->valueint; - - cJSON *compression = cJSON_GetObjectItem(root, "compression"); - if (!compression || compression->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, compression not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.compression = (int8_t)compression->valueint; - - cJSON *walLevel = cJSON_GetObjectItem(root, "walLevel"); - if (!walLevel || walLevel->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, walLevel not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.walLevel = (int8_t)walLevel->valueint; - - cJSON *fsyncPeriod = cJSON_GetObjectItem(root, "fsync"); - if (!walLevel || walLevel->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, fsyncPeriod not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.fsyncPeriod = (int32_t)fsyncPeriod->valueint; - - cJSON *wals = cJSON_GetObjectItem(root, "wals"); - if (!wals || wals->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, wals not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.wals = (int8_t)wals->valueint; - - cJSON *vgReplica = cJSON_GetObjectItem(root, "replica"); - if (!vgReplica || vgReplica->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, replica not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.vgReplica = (int8_t)vgReplica->valueint; - - cJSON *dbReplica = cJSON_GetObjectItem(root, "dbReplica"); - if (!dbReplica || dbReplica->type != cJSON_Number) { - vWarn("vgId:%d, failed to read %s, dbReplica not found", pVnode->vgId, file); - vnodeMsg.cfg.dbReplica = vnodeMsg.cfg.vgReplica; - vnodeMsg.cfg.vgCfgVersion = 0; - } else { - vnodeMsg.cfg.dbReplica = (int8_t)dbReplica->valueint; - } - - cJSON *quorum = cJSON_GetObjectItem(root, "quorum"); - if (!quorum || quorum->type != cJSON_Number) { - vError("vgId: %d, failed to read %s, quorum not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - vnodeMsg.cfg.quorum = (int8_t)quorum->valueint; - - cJSON *update = cJSON_GetObjectItem(root, "update"); - if (!update || update->type != cJSON_Number) { - vWarn("vgId: %d, failed to read %s, update not found", pVnode->vgId, file); - vnodeMsg.cfg.update = 0; - vnodeMsg.cfg.vgCfgVersion = 0; - } else { - vnodeMsg.cfg.update = (int8_t)update->valueint; - } - - cJSON *cacheLastRow = cJSON_GetObjectItem(root, "cacheLastRow"); - if (!cacheLastRow || cacheLastRow->type != cJSON_Number) { - vWarn("vgId: %d, failed to read %s, cacheLastRow not found", pVnode->vgId, file); - vnodeMsg.cfg.cacheLastRow = 0; - vnodeMsg.cfg.vgCfgVersion = 0; - } else { - vnodeMsg.cfg.cacheLastRow = (int8_t)cacheLastRow->valueint; - } - - cJSON *dbType = cJSON_GetObjectItem(root, "dbType"); - if (!dbType || dbType->type != cJSON_Number) { - vWarn("vgId: %d, failed to read %s, dbType not found", pVnode->vgId, file); - vnodeMsg.cfg.dbType = 0; - } else { - vnodeMsg.cfg.dbType = (int8_t)dbType->valueint; - } - - cJSON *nodeInfos = cJSON_GetObjectItem(root, "nodeInfos"); - if (!nodeInfos || nodeInfos->type != cJSON_Array) { - vError("vgId:%d, failed to read %s, nodeInfos not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - - int size = cJSON_GetArraySize(nodeInfos); - if (size != vnodeMsg.cfg.vgReplica) { - vError("vgId:%d, failed to read %s, nodeInfos size not matched", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - - for (int i = 0; i < size; ++i) { - cJSON *nodeInfo = cJSON_GetArrayItem(nodeInfos, i); - if (nodeInfo == NULL) continue; - SVnodeDesc *node = &vnodeMsg.nodes[i]; - - cJSON *nodeId = cJSON_GetObjectItem(nodeInfo, "nodeId"); - if (!nodeId || nodeId->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, nodeId not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - node->nodeId = (int32_t)nodeId->valueint; - - cJSON *nodeEp = cJSON_GetObjectItem(nodeInfo, "nodeEp"); - if (!nodeEp || nodeEp->type != cJSON_String || nodeEp->valuestring == NULL) { - vError("vgId:%d, failed to read %s, nodeFqdn not found", pVnode->vgId, file); - goto PARSE_VCFG_ERROR; - } - tstrncpy(node->nodeEp, nodeEp->valuestring, TSDB_EP_LEN); - - char nodeEpStr[TSDB_EP_LEN]; - vnodeGetDnodeEp(node->nodeId, nodeEpStr, NULL, NULL); - bool changed = (strcmp(node->nodeEp, nodeEpStr) != 0); - if (changed) { - tstrncpy(node->nodeEp, nodeEpStr, TSDB_EP_LEN); - nodeChanged = changed; - } - } - - ret = TSDB_CODE_SUCCESS; - -PARSE_VCFG_ERROR: - if (content != NULL) free(content); - if (root != NULL) cJSON_Delete(root); - if (fp != NULL) fclose(fp); - - if (nodeChanged) { - vnodeWriteCfg(&vnodeMsg); - } - - if (ret == TSDB_CODE_SUCCESS) { - vnodeLoadCfg(pVnode, &vnodeMsg); - } - - terrno = 0; - return ret; -} - -int32_t vnodeWriteCfg(SCreateVnodeMsg *pMsg) { - char file[TSDB_FILENAME_LEN + 30] = {0}; - sprintf(file, "%s/vnode%d/config.json", tsVnodeDir, pMsg->cfg.vgId); - - FILE *fp = fopen(file, "w"); - if (!fp) { - vError("vgId:%d, failed to write %s error:%s", pMsg->cfg.vgId, file, strerror(errno)); - terrno = TAOS_SYSTEM_ERROR(errno); - return terrno; - } - - int32_t len = 0; - int32_t maxLen = 1000; - char * content = calloc(1, maxLen + 1); - - len += snprintf(content + len, maxLen - len, "{\n"); - len += snprintf(content + len, maxLen - len, " \"db\": \"%s\",\n", pMsg->db); - len += snprintf(content + len, maxLen - len, " \"cfgVersion\": %d,\n", pMsg->cfg.dbCfgVersion); - len += snprintf(content + len, maxLen - len, " \"vgCfgVersion\": %d,\n", pMsg->cfg.vgCfgVersion); - len += snprintf(content + len, maxLen - len, " \"cacheBlockSize\": %d,\n", pMsg->cfg.cacheBlockSize); - len += snprintf(content + len, maxLen - len, " \"totalBlocks\": %d,\n", pMsg->cfg.totalBlocks); - len += snprintf(content + len, maxLen - len, " \"daysPerFile\": %d,\n", pMsg->cfg.daysPerFile); - len += snprintf(content + len, maxLen - len, " \"daysToKeep\": %d,\n", pMsg->cfg.daysToKeep); - len += snprintf(content + len, maxLen - len, " \"daysToKeep1\": %d,\n", pMsg->cfg.daysToKeep1); - len += snprintf(content + len, maxLen - len, " \"daysToKeep2\": %d,\n", pMsg->cfg.daysToKeep2); - len += snprintf(content + len, maxLen - len, " \"minRowsPerFileBlock\": %d,\n", pMsg->cfg.minRowsPerFileBlock); - len += snprintf(content + len, maxLen - len, " \"maxRowsPerFileBlock\": %d,\n", pMsg->cfg.maxRowsPerFileBlock); - len += snprintf(content + len, maxLen - len, " \"precision\": %d,\n", pMsg->cfg.precision); - len += snprintf(content + len, maxLen - len, " \"compression\": %d,\n", pMsg->cfg.compression); - len += snprintf(content + len, maxLen - len, " \"walLevel\": %d,\n", pMsg->cfg.walLevel); - len += snprintf(content + len, maxLen - len, " \"fsync\": %d,\n", pMsg->cfg.fsyncPeriod); - len += snprintf(content + len, maxLen - len, " \"replica\": %d,\n", pMsg->cfg.vgReplica); - len += snprintf(content + len, maxLen - len, " \"dbReplica\": %d,\n", pMsg->cfg.dbReplica); - len += snprintf(content + len, maxLen - len, " \"wals\": %d,\n", pMsg->cfg.wals); - len += snprintf(content + len, maxLen - len, " \"quorum\": %d,\n", pMsg->cfg.quorum); - len += snprintf(content + len, maxLen - len, " \"update\": %d,\n", pMsg->cfg.update); - len += snprintf(content + len, maxLen - len, " \"cacheLastRow\": %d,\n", pMsg->cfg.cacheLastRow); - len += snprintf(content + len, maxLen - len, " \"dbType\": %d,\n", pMsg->cfg.dbType); - len += snprintf(content + len, maxLen - len, " \"nodeInfos\": [{\n"); - for (int32_t i = 0; i < pMsg->cfg.vgReplica; i++) { - SVnodeDesc *node = &pMsg->nodes[i]; - vnodeGetDnodeEp(node->nodeId, node->nodeEp, NULL, NULL); - len += snprintf(content + len, maxLen - len, " \"nodeId\": %d,\n", node->nodeId); - len += snprintf(content + len, maxLen - len, " \"nodeEp\": \"%s\"\n", node->nodeEp); - if (i < pMsg->cfg.vgReplica - 1) { - len += snprintf(content + len, maxLen - len, " },{\n"); - } else { - len += snprintf(content + len, maxLen - len, " }]\n"); - } - } - len += snprintf(content + len, maxLen - len, "}\n"); - - fwrite(content, 1, len, fp); - taosFsync(fileno(fp)); - fclose(fp); - free(content); - terrno = 0; - - vInfo("vgId:%d, successed to write %s", pMsg->cfg.vgId, file); -#endif - return TSDB_CODE_SUCCESS; -} diff --git a/source/server/vnode/src/vnodeFile.c b/source/server/vnode/src/vnodeFile.c new file mode 100644 index 0000000000000000000000000000000000000000..9835e3e0fbcb8e5604a612bc7f2832bbb326538f --- /dev/null +++ b/source/server/vnode/src/vnodeFile.c @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#define _DEFAULT_SOURCE +#include "cJSON.h" +#include "vnodeFile.h" + +int32_t vnodeReadCfg(int32_t vgId, SVnodeCfg *pCfg) { + int32_t ret = TSDB_CODE_VND_APP_ERROR; + int32_t len = 0; + int maxLen = 1000; + char *content = calloc(1, maxLen + 1); + cJSON *root = NULL; + FILE *fp = NULL; + + char file[PATH_MAX + 30] = {0}; + sprintf(file, "%s/vnode%d/config.json", tsVnodeDir, vgId); + + fp = fopen(file, "r"); + if (!fp) { + vError("vgId:%d, failed to open vnode cfg file:%s to read, error:%s", vgId, file, strerror(errno)); + ret = TAOS_SYSTEM_ERROR(errno); + goto PARSE_VCFG_ERROR; + } + + len = (int32_t)fread(content, 1, maxLen, fp); + if (len <= 0) { + vError("vgId:%d, failed to read %s, content is null", vgId, file); + goto PARSE_VCFG_ERROR; + } + + content[len] = 0; + root = cJSON_Parse(content); + if (root == NULL) { + vError("vgId:%d, failed to read %s, invalid json format", vgId, file); + goto PARSE_VCFG_ERROR; + } + + cJSON *db = cJSON_GetObjectItem(root, "db"); + if (!db || db->type != cJSON_String || db->valuestring == NULL) { + vError("vgId:%d, failed to read %s, db not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + tstrncpy(pCfg->db, db->valuestring, sizeof(pCfg->db)); + + cJSON *dropped = cJSON_GetObjectItem(root, "dropped"); + if (!dropped || dropped->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, dropped not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->dropped = (int32_t)dropped->valueint; + + cJSON *cacheBlockSize = cJSON_GetObjectItem(root, "cacheBlockSize"); + if (!cacheBlockSize || cacheBlockSize->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, cacheBlockSize not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.cacheBlockSize = (int32_t)cacheBlockSize->valueint; + + cJSON *totalBlocks = cJSON_GetObjectItem(root, "totalBlocks"); + if (!totalBlocks || totalBlocks->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, totalBlocks not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.totalBlocks = (int32_t)totalBlocks->valueint; + + cJSON *daysPerFile = cJSON_GetObjectItem(root, "daysPerFile"); + if (!daysPerFile || daysPerFile->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, daysPerFile not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.daysPerFile = (int32_t)daysPerFile->valueint; + + cJSON *daysToKeep0 = cJSON_GetObjectItem(root, "daysToKeep0"); + if (!daysToKeep0 || daysToKeep0->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, daysToKeep0 not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.daysToKeep0 = (int32_t)daysToKeep0->valueint; + + cJSON *daysToKeep1 = cJSON_GetObjectItem(root, "daysToKeep1"); + if (!daysToKeep1 || daysToKeep1->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, daysToKeep1 not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.daysToKeep1 = (int32_t)daysToKeep1->valueint; + + cJSON *daysToKeep2 = cJSON_GetObjectItem(root, "daysToKeep2"); + if (!daysToKeep2 || daysToKeep2->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, daysToKeep2 not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.daysToKeep2 = (int32_t)daysToKeep2->valueint; + + cJSON *minRowsPerFileBlock = cJSON_GetObjectItem(root, "minRowsPerFileBlock"); + if (!minRowsPerFileBlock || minRowsPerFileBlock->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, minRowsPerFileBlock not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.minRowsPerFileBlock = (int32_t)minRowsPerFileBlock->valueint; + + cJSON *maxRowsPerFileBlock = cJSON_GetObjectItem(root, "maxRowsPerFileBlock"); + if (!maxRowsPerFileBlock || maxRowsPerFileBlock->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, maxRowsPerFileBlock not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.maxRowsPerFileBlock = (int32_t)maxRowsPerFileBlock->valueint; + + cJSON *precision = cJSON_GetObjectItem(root, "precision"); + if (!precision || precision->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, precision not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.precision = (int8_t)precision->valueint; + + cJSON *compression = cJSON_GetObjectItem(root, "compression"); + if (!compression || compression->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, compression not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.compression = (int8_t)compression->valueint; + + cJSON *update = cJSON_GetObjectItem(root, "update"); + if (!update || update->type != cJSON_Number) { + vError("vgId: %d, failed to read %s, update not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.update = (int8_t)update->valueint; + + cJSON *cacheLastRow = cJSON_GetObjectItem(root, "cacheLastRow"); + if (!cacheLastRow || cacheLastRow->type != cJSON_Number) { + vError("vgId: %d, failed to read %s, cacheLastRow not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->tsdb.cacheLastRow = (int8_t)cacheLastRow->valueint; + + cJSON *walLevel = cJSON_GetObjectItem(root, "walLevel"); + if (!walLevel || walLevel->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, walLevel not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->wal.walLevel = (int8_t)walLevel->valueint; + + cJSON *fsyncPeriod = cJSON_GetObjectItem(root, "fsyncPeriod"); + if (!walLevel || walLevel->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, fsyncPeriod not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->wal.fsyncPeriod = (int32_t)fsyncPeriod->valueint; + + cJSON *replica = cJSON_GetObjectItem(root, "replica"); + if (!replica || replica->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, replica not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->sync.replica = (int8_t)replica->valueint; + + cJSON *quorum = cJSON_GetObjectItem(root, "quorum"); + if (!quorum || quorum->type != cJSON_Number) { + vError("vgId: %d, failed to read %s, quorum not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + pCfg->sync.quorum = (int8_t)quorum->valueint; + + cJSON *nodes = cJSON_GetObjectItem(root, "nodes"); + if (!nodes || nodes->type != cJSON_Array) { + vError("vgId:%d, failed to read %s, nodes not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + + int size = cJSON_GetArraySize(nodes); + if (size != pCfg->sync.replica) { + vError("vgId:%d, failed to read %s, nodes size not matched", vgId, file); + goto PARSE_VCFG_ERROR; + } + + for (int i = 0; i < size; ++i) { + cJSON *nodeInfo = cJSON_GetArrayItem(nodes, i); + if (nodeInfo == NULL) continue; + SNodeInfo *node = &pCfg->sync.nodes[i]; + + cJSON *port = cJSON_GetObjectItem(nodeInfo, "port"); + if (!port || port->type != cJSON_Number) { + vError("vgId:%d, failed to read %s, port not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + node->nodePort = (uint16_t)port->valueint; + + cJSON *fqdn = cJSON_GetObjectItem(nodeInfo, "fqdn"); + if (!fqdn || fqdn->type != cJSON_String || fqdn->valuestring == NULL) { + vError("vgId:%d, failed to read %s, fqdn not found", vgId, file); + goto PARSE_VCFG_ERROR; + } + tstrncpy(node->nodeFqdn, fqdn->valuestring, TSDB_FQDN_LEN); + } + + ret = TSDB_CODE_SUCCESS; + +PARSE_VCFG_ERROR: + if (content != NULL) free(content); + if (root != NULL) cJSON_Delete(root); + if (fp != NULL) fclose(fp); + + terrno = 0; + return ret; +} + +int32_t vnodeWriteCfg(int32_t vgId, SVnodeCfg *pCfg) { + int32_t code = 0; + char file[PATH_MAX + 30] = {0}; + sprintf(file, "%s/vnode%d/config.json", tsVnodeDir, vgId); + + FILE *fp = fopen(file, "w"); + if (!fp) { + vError("vgId:%d, failed to write %s error:%s", vgId, file, strerror(errno)); + terrno = TAOS_SYSTEM_ERROR(errno); + return terrno; + } + + int32_t len = 0; + int32_t maxLen = 1000; + char *content = calloc(1, maxLen + 1); + + len += snprintf(content + len, maxLen - len, "{\n"); + // vnode + len += snprintf(content + len, maxLen - len, " \"vgId\": %d,\n", vgId); + len += snprintf(content + len, maxLen - len, " \"db\": \"%s\",\n", pCfg->db); + len += snprintf(content + len, maxLen - len, " \"dropped\": %d,\n", pCfg->dropped); + // tsdb + len += snprintf(content + len, maxLen - len, " \"cacheBlockSize\": %d,\n", pCfg->tsdb.cacheBlockSize); + len += snprintf(content + len, maxLen - len, " \"totalBlocks\": %d,\n", pCfg->tsdb.totalBlocks); + len += snprintf(content + len, maxLen - len, " \"daysPerFile\": %d,\n", pCfg->tsdb.daysPerFile); + len += snprintf(content + len, maxLen - len, " \"daysToKeep0\": %d,\n", pCfg->tsdb.daysToKeep0); + len += snprintf(content + len, maxLen - len, " \"daysToKeep1\": %d,\n", pCfg->tsdb.daysToKeep1); + len += snprintf(content + len, maxLen - len, " \"daysToKeep2\": %d,\n", pCfg->tsdb.daysToKeep2); + len += snprintf(content + len, maxLen - len, " \"minRowsPerFileBlock\": %d,\n", pCfg->tsdb.minRowsPerFileBlock); + len += snprintf(content + len, maxLen - len, " \"maxRowsPerFileBlock\": %d,\n", pCfg->tsdb.maxRowsPerFileBlock); + len += snprintf(content + len, maxLen - len, " \"precision\": %d,\n", pCfg->tsdb.precision); + len += snprintf(content + len, maxLen - len, " \"compression\": %d,\n", pCfg->tsdb.compression); + len += snprintf(content + len, maxLen - len, " \"cacheLastRow\": %d,\n", pCfg->tsdb.cacheLastRow); + len += snprintf(content + len, maxLen - len, " \"update\": %d,\n", pCfg->tsdb.update); + // wal + len += snprintf(content + len, maxLen - len, " \"walLevel\": %d,\n", pCfg->wal.walLevel); + len += snprintf(content + len, maxLen - len, " \"fsyncPeriod\": %d,\n", pCfg->wal.fsyncPeriod); + // sync + len += snprintf(content + len, maxLen - len, " \"quorum\": %d,\n", pCfg->sync.quorum); + len += snprintf(content + len, maxLen - len, " \"replica\": %d,\n", pCfg->sync.replica); + len += snprintf(content + len, maxLen - len, " \"nodes\": [{\n"); + for (int32_t i = 0; i < pCfg->sync.replica; i++) { + SNodeInfo *node = &pCfg->sync.nodes[i]; + len += snprintf(content + len, maxLen - len, " \"port\": %u,\n", node->nodePort); + len += snprintf(content + len, maxLen - len, " \"fqdn\": \"%s\"\n", node->nodeFqdn); + if (i < pCfg->sync.replica - 1) { + len += snprintf(content + len, maxLen - len, " },{\n"); + } else { + len += snprintf(content + len, maxLen - len, " }]\n"); + } + } + len += snprintf(content + len, maxLen - len, "}\n"); + + fwrite(content, 1, len, fp); + taosFsyncFile(fileno(fp)); + fclose(fp); + free(content); + terrno = 0; + + vInfo("vgId:%d, successed to write %s", vgId, file); + return TSDB_CODE_SUCCESS; +} + +int32_t vnodeReadTerm(int32_t vgId, SSyncServerState *pState) { + int32_t ret = TSDB_CODE_VND_APP_ERROR; + int32_t len = 0; + int32_t maxLen = 100; + char *content = calloc(1, maxLen + 1); + cJSON *root = NULL; + FILE *fp = NULL; + + char file[PATH_MAX + 30] = {0}; + sprintf(file, "%s/vnode%d/term.json", tsVnodeDir, vgId); + + len = (int32_t)fread(content, 1, maxLen, fp); + if (len <= 0) { + vError("vgId:%d, failed to read %s since content is null", vgId, file); + goto PARSE_TERM_ERROR; + } + + root = cJSON_Parse(content); + if (root == NULL) { + vError("vgId:%d, failed to read %s since invalid json format", vgId, file); + goto PARSE_TERM_ERROR; + } + + cJSON *term = cJSON_GetObjectItem(root, "term"); + if (!term || term->type != cJSON_Number) { + vError("vgId:%d, failed to read %s since term not found", vgId, file); + goto PARSE_TERM_ERROR; + } + pState->term = (uint64_t)term->valueint; + + cJSON *voteFor = cJSON_GetObjectItem(root, "voteFor"); + if (!voteFor || voteFor->type != cJSON_Number) { + vError("vgId:%d, failed to read %s since voteFor not found", vgId, file); + goto PARSE_TERM_ERROR; + } + pState->voteFor = (int64_t)voteFor->valueint; + + vInfo("vgId:%d, read %s success, voteFor:%" PRIu64 ", term:%" PRIu64, vgId, file, pState->voteFor, pState->term); + +PARSE_TERM_ERROR: + if (content != NULL) free(content); + if (root != NULL) cJSON_Delete(root); + if (fp != NULL) fclose(fp); + + return ret; +} + +int32_t vnodeWriteTerm(int32_t vgId, SSyncServerState *pState) { + char file[PATH_MAX + 30] = {0}; + sprintf(file, "%s/vnode%d/term.json", tsVnodeDir, vgId); + + FILE *fp = fopen(file, "w"); + if (!fp) { + vError("vgId:%d, failed to write %s since %s", vgId, file, strerror(errno)); + return -1; + } + + int32_t len = 0; + int32_t maxLen = 100; + char *content = calloc(1, maxLen + 1); + + len += snprintf(content + len, maxLen - len, "{\n"); + len += snprintf(content + len, maxLen - len, " \"term\": %" PRIu64 "\n", pState->term); + len += snprintf(content + len, maxLen - len, " \"voteFor\": %" PRIu64 "\n", pState->voteFor); + len += snprintf(content + len, maxLen - len, "}\n"); + + fwrite(content, 1, len, fp); + taosFsyncFile(fileno(fp)); + fclose(fp); + free(content); + + vInfo("vgId:%d, write %s success, voteFor:%" PRIu64 ", term:%" PRIu64, vgId, file, pState->voteFor, pState->term); + return TSDB_CODE_SUCCESS; +} \ No newline at end of file diff --git a/source/server/vnode/src/vnodeInt.c b/source/server/vnode/src/vnodeInt.c index 7ec5200e5d961aa86c87f58a41f85109a59e7aa1..9e1739a68e0c4814a3a8f66e62bb878c779df382 100644 --- a/source/server/vnode/src/vnodeInt.c +++ b/source/server/vnode/src/vnodeInt.c @@ -14,935 +14,78 @@ */ #define _DEFAULT_SOURCE -#include "tglobal.h" -#include "ttimer.h" -#include "thash.h" -// #include "query.h" -#include "vnodeCfg.h" -#include "vnodeMgmt.h" -#include "vnodeRead.h" -#include "vnodeStatus.h" -#include "vnodeVersion.h" -#include "vnodeWorker.h" -#include "vnodeWrite.h" +#include "os.h" #include "tstep.h" +#include "vnodeMain.h" #include "vnodeMgmt.h" #include "vnodeRead.h" -#include "vnodeWorker.h" #include "vnodeWrite.h" -typedef struct { - pthread_t thread; - int32_t threadIndex; - int32_t failed; - int32_t opened; - int32_t vnodeNum; - int32_t * vnodeList; -} SOpenVnodeThread; - static struct { SSteps *steps; SVnodeFp fp; - void * timer; - SHashObj *hash; - int32_t openVnodes; - int32_t totalVnodes; void (*msgFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *); -} tsVnode; +} tsVint; void vnodeGetDnodeEp(int32_t dnodeId, char *ep, char *fqdn, uint16_t *port) { - return (*tsVnode.fp.GetDnodeEp)(dnodeId, ep, fqdn, port); + return (*tsVint.fp.GetDnodeEp)(dnodeId, ep, fqdn, port); } void vnodeSendMsgToDnode(struct SRpcEpSet *epSet, struct SRpcMsg *rpcMsg) { - (*tsVnode.fp.SendMsgToDnode)(epSet, rpcMsg); -} - -void vnodeSendMsgToMnode(struct SRpcMsg *rpcMsg) { return (*tsVnode.fp.SendMsgToMnode)(rpcMsg); } - -static void vnodeIncRef(void *ptNode) { - assert(ptNode != NULL); - - SVnode **ppVnode = (SVnode **)ptNode; - assert(ppVnode); - assert(*ppVnode); - - SVnode *pVnode = *ppVnode; - atomic_add_fetch_32(&pVnode->refCount, 1); - vTrace("vgId:%d, get vnode, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); -} - -SVnode *vnodeAcquire(int32_t vgId) { - SVnode *pVnode = NULL; - - // taosHashGetClone(tsVnode.hash, &vgId, sizeof(int32_t), vnodeIncRef, (void*)&pVnode); - if (pVnode == NULL) { - terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; - vDebug("vgId:%d, not exist", vgId); - return NULL; - } - - return pVnode; -} - -SVnode *vnodeAcquireNotClose(int32_t vgId) { - // SVnode *pVnode = vnodeAcquire(vgId); - // if (pVnode != NULL && pVnode->preClose == 1) { - // vnodeRelease(pVnode); - // terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; - // vDebug("vgId:%d, not exist, pre closing", vgId); - // return NULL; - // } - - // return pVnode; - return NULL; -} - -void vnodeRelease(SVnode *pVnode) { - if (pVnode == NULL) return; - - int32_t refCount = atomic_sub_fetch_32(&pVnode->refCount, 1); - int32_t vgId = pVnode->vgId; - - vTrace("vgId:%d, release vnode, refCount:%d pVnode:%p", vgId, refCount, pVnode); - assert(refCount >= 0); - - if (refCount <= 0) { - vDebug("vgId:%d, vnode will be destroyed, refCount:%d pVnode:%p", vgId, refCount, pVnode); - vnodeProcessDestroyTask(pVnode); - int32_t count = taosHashGetSize(tsVnode.hash); - vDebug("vgId:%d, vnode is destroyed, vnodes:%d", vgId, count); - } -} - -static int32_t vnodeProcessTsdbStatus(void *arg, int32_t status, int32_t eno); - -int32_t vnodeCreate(SCreateVnodeMsg *pVnodeCfg) { - int32_t code; - - SVnode *pVnode = vnodeAcquire(pVnodeCfg->cfg.vgId); - if (pVnode != NULL) { - vDebug("vgId:%d, vnode already exist, refCount:%d pVnode:%p", pVnodeCfg->cfg.vgId, pVnode->refCount, pVnode); - vnodeRelease(pVnode); - return TSDB_CODE_SUCCESS; - } - -#if 0 - if (tfsMkdir("vnode") < 0) { - vError("vgId:%d, failed to create vnode dir, reason:%s", pVnodeCfg->cfg.vgId, tstrerror(terrno)); - return terrno; - } - - char vnodeDir[TSDB_FILENAME_LEN] = "\0"; - snprintf(vnodeDir, TSDB_FILENAME_LEN, "/vnode/vnode%d", pVnodeCfg->cfg.vgId); - if (tfsMkdir(vnodeDir) < 0) { - vError("vgId:%d, failed to create vnode dir %s, reason:%s", pVnodeCfg->cfg.vgId, vnodeDir, strerror(errno)); - return terrno; - } - - code = vnodeWriteCfg(pVnodeCfg); - if (code != TSDB_CODE_SUCCESS) { - vError("vgId:%d, failed to save vnode cfg, reason:%s", pVnodeCfg->cfg.vgId, tstrerror(code)); - return code; - } - - if (tsdbCreateRepo(pVnodeCfg->cfg.vgId) < 0) { - vError("vgId:%d, failed to create tsdb in vnode, reason:%s", pVnodeCfg->cfg.vgId, tstrerror(terrno)); - return TSDB_CODE_VND_INIT_FAILED; - } -#endif - vInfo("vgId:%d, vnode dir is created, walLevel:%d fsyncPeriod:%d", pVnodeCfg->cfg.vgId, pVnodeCfg->cfg.walLevel, - pVnodeCfg->cfg.fsyncPeriod); - code = vnodeOpen(pVnodeCfg->cfg.vgId); - - return code; -} - -int32_t vnodeSync(int32_t vgId) { -#if 0 - SVnode *pVnode = vnodeAcquireNotClose(vgId); - if (pVnode == NULL) { - vDebug("vgId:%d, failed to sync, vnode not find", vgId); - return TSDB_CODE_VND_INVALID_VGROUP_ID; - } - - if (pVnode->role == TAOS_SYNC_ROLE_SLAVE) { - vInfo("vgId:%d, vnode will sync, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); - - pVnode->version = 0; - pVnode->fversion = 0; - walResetVersion(pVnode->wal, pVnode->fversion); - - syncRecover(pVnode->sync); - } - - vnodeRelease(pVnode); -#endif - return TSDB_CODE_SUCCESS; -} - -int32_t vnodeDrop(int32_t vgId) { - SVnode *pVnode = vnodeAcquireNotClose(vgId); - if (pVnode == NULL) { - vDebug("vgId:%d, failed to drop, vnode not find", vgId); - return TSDB_CODE_VND_INVALID_VGROUP_ID; - } - if (pVnode->dropped) { - vnodeRelease(pVnode); - return TSDB_CODE_SUCCESS; - } - - vInfo("vgId:%d, vnode will be dropped, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); - pVnode->dropped = 1; - - vnodeRelease(pVnode); - vnodeProcessCleanupTask(pVnode); - - return TSDB_CODE_SUCCESS; -} - -int32_t vnodeCompact(int32_t vgId) { -#if 0 - SVnode *pVnode = vnodeAcquire(vgId); - if (pVnode != NULL) { - vDebug("vgId:%d, compact vnode msg is received", vgId); - // not care success or not - tsdbCompact(((SVnode *)pVnode)->tsdb); - vnodeRelease(pVnode); - } else { - vInfo("vgId:%d, vnode not exist, can't compact it", vgId); - return TSDB_CODE_VND_INVALID_VGROUP_ID; - } -#endif - return TSDB_CODE_SUCCESS; -} - -static int32_t vnodeAlterImp(SVnode *pVnode, SCreateVnodeMsg *pVnodeCfg) { -#if 0 - STsdbCfg tsdbCfg = pVnode->tsdbCfg; - SSyncCfg syncCfg = pVnode->syncCfg; - int32_t dbCfgVersion = pVnode->dbCfgVersion; - int32_t vgCfgVersion = pVnode->vgCfgVersion; - - int32_t code = vnodeWriteCfg(pVnodeCfg); - if (code != TSDB_CODE_SUCCESS) { - pVnode->dbCfgVersion = dbCfgVersion; - pVnode->vgCfgVersion = vgCfgVersion; - pVnode->syncCfg = syncCfg; - pVnode->tsdbCfg = tsdbCfg; - return code; - } - - code = vnodeReadCfg(pVnode); - if (code != TSDB_CODE_SUCCESS) { - pVnode->dbCfgVersion = dbCfgVersion; - pVnode->vgCfgVersion = vgCfgVersion; - pVnode->syncCfg = syncCfg; - pVnode->tsdbCfg = tsdbCfg; - return code; - } - - code = walAlter(pVnode->wal, &pVnode->walCfg); - if (code != TSDB_CODE_SUCCESS) { - pVnode->dbCfgVersion = dbCfgVersion; - pVnode->vgCfgVersion = vgCfgVersion; - pVnode->syncCfg = syncCfg; - pVnode->tsdbCfg = tsdbCfg; - return code; - } - - bool tsdbCfgChanged = (memcmp(&tsdbCfg, &pVnode->tsdbCfg, sizeof(STsdbCfg)) != 0); - bool syncCfgChanged = (memcmp(&syncCfg, &pVnode->syncCfg, sizeof(SSyncCfg)) != 0); - - vDebug("vgId:%d, tsdbchanged:%d syncchanged:%d while alter vnode", pVnode->vgId, tsdbCfgChanged, syncCfgChanged); - - if (tsdbCfgChanged || syncCfgChanged) { - // vnode in non-ready state and still needs to return success instead of TSDB_CODE_VND_INVALID_STATUS - // dbCfgVersion can be corrected by status msg - if (syncCfgChanged) { - if (!vnodeSetUpdatingStatus(pVnode)) { - vDebug("vgId:%d, vnode is not ready, do alter operation later", pVnode->vgId); - pVnode->dbCfgVersion = dbCfgVersion; - pVnode->vgCfgVersion = vgCfgVersion; - pVnode->syncCfg = syncCfg; - pVnode->tsdbCfg = tsdbCfg; - return TSDB_CODE_SUCCESS; - } - - code = syncReconfig(pVnode->sync, &pVnode->syncCfg); - if (code != TSDB_CODE_SUCCESS) { - pVnode->dbCfgVersion = dbCfgVersion; - pVnode->vgCfgVersion = vgCfgVersion; - pVnode->syncCfg = syncCfg; - pVnode->tsdbCfg = tsdbCfg; - vnodeSetReadyStatus(pVnode); - return code; - } - } - - if (tsdbCfgChanged && pVnode->tsdb) { - code = tsdbConfigRepo(pVnode->tsdb, &pVnode->tsdbCfg); - if (code != TSDB_CODE_SUCCESS) { - pVnode->dbCfgVersion = dbCfgVersion; - pVnode->vgCfgVersion = vgCfgVersion; - pVnode->syncCfg = syncCfg; - pVnode->tsdbCfg = tsdbCfg; - vnodeSetReadyStatus(pVnode); - return code; - } - } - - vnodeSetReadyStatus(pVnode); - } -#endif - return 0; -} - -int32_t vnodeAlter(SVnode *pVnode, SCreateVnodeMsg *pVnodeCfg) { -#if 0 - vDebug("vgId:%d, current dbCfgVersion:%d vgCfgVersion:%d, input dbCfgVersion:%d vgCfgVersion:%d", pVnode->vgId, - pVnode->dbCfgVersion, pVnode->vgCfgVersion, pVnodeCfg->cfg.dbCfgVersion, pVnodeCfg->cfg.vgCfgVersion); - - if (pVnode->dbCfgVersion == pVnodeCfg->cfg.dbCfgVersion && pVnode->vgCfgVersion == pVnodeCfg->cfg.vgCfgVersion) { - vDebug("vgId:%d, cfg not change", pVnode->vgId); - return TSDB_CODE_SUCCESS; - } - - int32_t code = vnodeAlterImp(pVnode, pVnodeCfg); - - if (code != 0) { - vError("vgId:%d, failed to alter vnode, code:0x%x", pVnode->vgId, code); - } else { - vDebug("vgId:%d, vnode is altered", pVnode->vgId); - } - - return code; -#endif - return 0; -} - -static void vnodeFindWalRootDir(int32_t vgId, char *walRootDir) { -#if 0 - char vnodeDir[TSDB_FILENAME_LEN] = "\0"; - snprintf(vnodeDir, TSDB_FILENAME_LEN, "/vnode/vnode%d/wal", vgId); - - TDIR *tdir = tfsOpendir(vnodeDir); - if (!tdir) return; - - const TFILE *tfile = tfsReaddir(tdir); - if (!tfile) { - tfsClosedir(tdir); - return; - } - - sprintf(walRootDir, "%s/vnode/vnode%d", TFS_DISK_PATH(tfile->level, tfile->id), vgId); - - tfsClosedir(tdir); -#endif -} - -int32_t vnodeOpen(int32_t vgId) { -#if 0 - char temp[TSDB_FILENAME_LEN * 3]; - char rootDir[TSDB_FILENAME_LEN * 2]; - char walRootDir[TSDB_FILENAME_LEN * 2] = {0}; - snprintf(rootDir, TSDB_FILENAME_LEN * 2, "%s/vnode%d", tsVnodeDir, vgId); - - SVnode *pVnode = calloc(sizeof(SVnode), 1); - if (pVnode == NULL) { - vError("vgId:%d, failed to open vnode since no enough memory", vgId); - return TAOS_SYSTEM_ERROR(errno); - } - - atomic_add_fetch_32(&pVnode->refCount, 1); - - pVnode->vgId = vgId; - pVnode->fversion = 0; - pVnode->version = 0; - pVnode->tsdbCfg.tsdbId = pVnode->vgId; - pVnode->rootDir = strdup(rootDir); - pVnode->accessState = TSDB_VN_ALL_ACCCESS; - tsem_init(&pVnode->sem, 0, 0); - pthread_mutex_init(&pVnode->statusMutex, NULL); - vnodeSetInitStatus(pVnode); - - tsdbIncCommitRef(pVnode->vgId); - - int32_t code = vnodeReadCfg(pVnode); - if (code != TSDB_CODE_SUCCESS) { - vError("vgId:%d, failed to read config file, set cfgVersion to 0", pVnode->vgId); - vnodeCleanUp(pVnode); - return 0; - } - - code = vnodeReadVersion(pVnode); - if (code != TSDB_CODE_SUCCESS) { - pVnode->version = 0; - vError("vgId:%d, failed to read file version, generate it from data file", pVnode->vgId); - // Allow vnode start even when read file version fails, set file version as wal version or zero - // vnodeCleanUp(pVnode); - // return code; - } - - pVnode->fversion = pVnode->version; - - pVnode->pWriteQ = vnodeAllocWriteQueue(pVnode); - pVnode->pQueryQ = vnodeAllocQueryQueue(pVnode); - pVnode->pFetchQ = vnodeAllocFetchQueue(pVnode); - if (pVnode->pWriteQ == NULL || pVnode->pQueryQ == NULL || pVnode->pFetchQ == NULL) { - vnodeCleanUp(pVnode); - return terrno; - } - - STsdbAppH appH = {0}; - appH.appH = (void *)pVnode; - appH.notifyStatus = vnodeProcessTsdbStatus; - appH.cqH = pVnode->cq; - appH.cqCreateFunc = cqCreate; - appH.cqDropFunc = cqDrop; - - terrno = 0; - pVnode->tsdb = tsdbOpenRepo(&(pVnode->tsdbCfg), &appH); - if (pVnode->tsdb == NULL) { - vnodeCleanUp(pVnode); - return terrno; - } else if (tsdbGetState(pVnode->tsdb) != TSDB_STATE_OK) { - vError("vgId:%d, failed to open tsdb(state: %d), replica:%d reason:%s", pVnode->vgId, tsdbGetState(pVnode->tsdb), - pVnode->syncCfg.replica, tstrerror(terrno)); - if (pVnode->syncCfg.replica <= 1) { - vnodeCleanUp(pVnode); - return TSDB_CODE_VND_INVALID_TSDB_STATE; - } else { - pVnode->fversion = 0; - pVnode->version = 0; - } - } - - // walRootDir for wal & syncInfo.path (not empty dir of /vnode/vnode{pVnode->vgId}/wal) - vnodeFindWalRootDir(pVnode->vgId, walRootDir); - if (walRootDir[0] == 0) { - int level = -1, id = -1; - - tfsAllocDisk(TFS_PRIMARY_LEVEL, &level, &id); - if (level < 0 || id < 0) { - vnodeCleanUp(pVnode); - return terrno; - } - - sprintf(walRootDir, "%s/vnode/vnode%d", TFS_DISK_PATH(level, id), vgId); - } - - sprintf(temp, "%s/wal", walRootDir); - pVnode->walCfg.vgId = pVnode->vgId; - pVnode->wal = walOpen(temp, &pVnode->walCfg); - if (pVnode->wal == NULL) { - vnodeCleanUp(pVnode); - return terrno; - } - - walRestore(pVnode->wal, pVnode, (FWalWrite)vnodeProcessWalMsg); - if (pVnode->version == 0) { - pVnode->fversion = 0; - pVnode->version = walGetVersion(pVnode->wal); - } - - code = tsdbSyncCommit(pVnode->tsdb); - if (code != 0) { - vError("vgId:%d, failed to commit after restore from wal since %s", pVnode->vgId, tstrerror(code)); - vnodeCleanUp(pVnode); - return code; - } - - walRemoveAllOldFiles(pVnode->wal); - walRenew(pVnode->wal); - - pVnode->qMgmt = qOpenQueryMgmt(pVnode->vgId); - if (pVnode->qMgmt == NULL) { - vnodeCleanUp(pVnode); - return terrno; - } - - pVnode->events = NULL; - - vDebug("vgId:%d, vnode is opened in %s - %s, pVnode:%p", pVnode->vgId, rootDir, walRootDir, pVnode); - - taosHashPut(tsVnode.hash, &pVnode->vgId, sizeof(int32_t), &pVnode, sizeof(SVnode *)); - - vnodeSetReadyStatus(pVnode); - pVnode->role = TAOS_SYNC_ROLE_MASTER; -#endif - return TSDB_CODE_SUCCESS; -} - -int32_t vnodeClose(int32_t vgId) { - SVnode *pVnode = vnodeAcquireNotClose(vgId); - if (pVnode == NULL) return 0; - if (pVnode->dropped) { - vnodeRelease(pVnode); - return 0; - } - - // pVnode->preClose = 1; - - vDebug("vgId:%d, vnode will be closed, pVnode:%p", pVnode->vgId, pVnode); - vnodeRelease(pVnode); - vnodeCleanUp(pVnode); - - return 0; -} - -void vnodeDestroy(SVnode *pVnode) { -#if 0 - int32_t code = 0; - int32_t vgId = pVnode->vgId; - - if (pVnode->qMgmt) { - qCleanupQueryMgmt(pVnode->qMgmt); - pVnode->qMgmt = NULL; - } - - if (pVnode->wal) { - walStop(pVnode->wal); - } - - if (pVnode->tsdb) { - // the deleted vnode does not need to commit, so as to speed up the deletion - int toCommit = 1; - if (pVnode->dropped) toCommit = 0; - - code = tsdbCloseRepo(pVnode->tsdb, toCommit); - pVnode->tsdb = NULL; - } - - // stop continuous query - if (pVnode->cq) { - void *cq = pVnode->cq; - pVnode->cq = NULL; - cqClose(cq); - } - - if (pVnode->wal) { - if (code != 0) { - vError("vgId:%d, failed to commit while close tsdb repo, keep wal", pVnode->vgId); - } else { - walRemoveAllOldFiles(pVnode->wal); - } - walClose(pVnode->wal); - pVnode->wal = NULL; - } - - if (pVnode->pWriteQ) { - vnodeFreeWriteQueue(pVnode->pWriteQ); - pVnode->pWriteQ = NULL; - } - - if (pVnode->pQueryQ) { - vnodeFreeQueryQueue(pVnode->pQueryQ); - pVnode->pQueryQ = NULL; - } - - if (pVnode->pFetchQ) { - vnodeFreeFetchQueue(pVnode->pFetchQ); - pVnode->pFetchQ = NULL; - } - - tfree(pVnode->rootDir); - - if (pVnode->dropped) { - char rootDir[TSDB_FILENAME_LEN] = {0}; - char stagingDir[TSDB_FILENAME_LEN] = {0}; - sprintf(rootDir, "%s/vnode%d", "vnode", vgId); - sprintf(stagingDir, "%s/.staging/vnode%d", "vnode_bak", vgId); - - tfsRename(rootDir, stagingDir); - - vnodeProcessBackupTask(pVnode); - - // dnodeSendStatusMsgToMnode(); - } - - tsem_destroy(&pVnode->sem); - pthread_mutex_destroy(&pVnode->statusMutex); - free(pVnode); - tsdbDecCommitRef(vgId); -#endif -} - -void vnodeCleanUp(SVnode *pVnode) { -#if 0 - vDebug("vgId:%d, vnode will cleanup, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); - - vnodeSetClosingStatus(pVnode); - - taosHashRemove(tsVnode.hash, &pVnode->vgId, sizeof(int32_t)); - - // stop replication module - if (pVnode->sync > 0) { - int64_t sync = pVnode->sync; - pVnode->sync = -1; - syncStop(sync); - } - - vDebug("vgId:%d, vnode is cleaned, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); - vnodeRelease(pVnode); -#endif -} - -#if 0 -static int32_t vnodeProcessTsdbStatus(void *arg, int32_t status, int32_t eno) { - SVnode *pVnode = arg; - - if (eno != TSDB_CODE_SUCCESS) { - vError("vgId:%d, failed to commit since %s, fver:%" PRIu64 " vver:%" PRIu64, pVnode->vgId, tstrerror(eno), - pVnode->fversion, pVnode->version); - pVnode->isCommiting = 0; - pVnode->isFull = 1; - return 0; - } - - if (status == TSDB_STATUS_COMMIT_START) { - pVnode->isCommiting = 1; - pVnode->cversion = pVnode->version; - vInfo("vgId:%d, start commit, fver:%" PRIu64 " vver:%" PRIu64, pVnode->vgId, pVnode->fversion, pVnode->version); - if (!vnodeInInitStatus(pVnode)) { - return walRenew(pVnode->wal); - } - return 0; - } - - if (status == TSDB_STATUS_COMMIT_OVER) { - pVnode->isCommiting = 0; - pVnode->isFull = 0; - pVnode->fversion = pVnode->cversion; - vInfo("vgId:%d, commit over, fver:%" PRIu64 " vver:%" PRIu64, pVnode->vgId, pVnode->fversion, pVnode->version); - if (!vnodeInInitStatus(pVnode)) { - walRemoveOneOldFile(pVnode->wal); - } - return vnodeSaveVersion(pVnode); - } - - // timer thread callback - if (status == TSDB_STATUS_COMMIT_NOBLOCK) { - qSolveCommitNoBlock(pVnode->tsdb, pVnode->qMgmt); - } - - return 0; -} -#endif - -static void *vnodeOpenVnode(void *param) { - SOpenVnodeThread *pThread = param; - - vDebug("thread:%d, start to open %d vnodes", pThread->threadIndex, pThread->vnodeNum); - setThreadName("vnodeOpenVnode"); - - for (int32_t v = 0; v < pThread->vnodeNum; ++v) { - int32_t vgId = pThread->vnodeList[v]; - - char stepDesc[TSDB_STEP_DESC_LEN] = {0}; - snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to restore, %d of %d have been opened", vgId, - tsVnode.openVnodes, tsVnode.totalVnodes); - // (*vnodeInst()->fp.ReportStartup)("open-vnodes", stepDesc); - - if (vnodeOpen(vgId) < 0) { - vError("vgId:%d, failed to open vnode by thread:%d", vgId, pThread->threadIndex); - pThread->failed++; - } else { - vDebug("vgId:%d, is opened by thread:%d", vgId, pThread->threadIndex); - pThread->opened++; - } - - atomic_add_fetch_32(&tsVnode.openVnodes, 1); - } - - vDebug("thread:%d, total vnodes:%d, opened:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened, - pThread->failed); - return NULL; -} - -static int32_t vnodeGetVnodeListFromDisk(int32_t vnodeList[], int32_t *numOfVnodes) { -#if 0 - DIR *dir = opendir(tsVnodeDir); - if (dir == NULL) return TSDB_CODE_DND_NO_WRITE_ACCESS; - - *numOfVnodes = 0; - struct dirent *de = NULL; - while ((de = readdir(dir)) != NULL) { - if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; - if (de->d_type & DT_DIR) { - if (strncmp("vnode", de->d_name, 5) != 0) continue; - int32_t vnode = atoi(de->d_name + 5); - if (vnode == 0) continue; - - (*numOfVnodes)++; - - if (*numOfVnodes >= TSDB_MAX_VNODES) { - vError("vgId:%d, too many vnode directory in disk, exist:%d max:%d", vnode, *numOfVnodes, TSDB_MAX_VNODES); - closedir(dir); - return TSDB_CODE_DND_TOO_MANY_VNODES; - } else { - vnodeList[*numOfVnodes - 1] = vnode; - } - } - } - closedir(dir); -#endif - return TSDB_CODE_SUCCESS; -} - -static int32_t vnodeOpenVnodes() { - int32_t vnodeList[TSDB_MAX_VNODES] = {0}; - int32_t numOfVnodes = 0; - int32_t status = vnodeGetVnodeListFromDisk(vnodeList, &numOfVnodes); - - if (status != TSDB_CODE_SUCCESS) { - vInfo("failed to get vnode list from disk since code:%d", status); - return status; - } - - tsVnode.totalVnodes = numOfVnodes; - - int32_t threadNum = tsNumOfCores; - int32_t vnodesPerThread = numOfVnodes / threadNum + 1; - - SOpenVnodeThread *threads = calloc(threadNum, sizeof(SOpenVnodeThread)); - for (int32_t t = 0; t < threadNum; ++t) { - threads[t].threadIndex = t; - threads[t].vnodeList = calloc(vnodesPerThread, sizeof(int32_t)); - } - - for (int32_t v = 0; v < numOfVnodes; ++v) { - int32_t t = v % threadNum; - SOpenVnodeThread *pThread = &threads[t]; - pThread->vnodeList[pThread->vnodeNum++] = vnodeList[v]; - } - - vInfo("start %d threads to open %d vnodes", threadNum, numOfVnodes); - - for (int32_t t = 0; t < threadNum; ++t) { - SOpenVnodeThread *pThread = &threads[t]; - if (pThread->vnodeNum == 0) continue; - - pthread_attr_t thAttr; - pthread_attr_init(&thAttr); - pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE); - if (pthread_create(&pThread->thread, &thAttr, vnodeOpenVnode, pThread) != 0) { - vError("thread:%d, failed to create thread to open vnode, reason:%s", pThread->threadIndex, strerror(errno)); - } - - pthread_attr_destroy(&thAttr); - } - - int32_t openVnodes = 0; - int32_t failedVnodes = 0; - for (int32_t t = 0; t < threadNum; ++t) { - SOpenVnodeThread *pThread = &threads[t]; - if (pThread->vnodeNum > 0 && taosCheckPthreadValid(pThread->thread)) { - pthread_join(pThread->thread, NULL); - } - openVnodes += pThread->opened; - failedVnodes += pThread->failed; - free(pThread->vnodeList); - } - - free(threads); - vInfo("there are total vnodes:%d, opened:%d", numOfVnodes, openVnodes); - - if (failedVnodes != 0) { - vError("there are total vnodes:%d, failed:%d", numOfVnodes, failedVnodes); - return -1; - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t vnodeGetVnodeList(int32_t vnodeList[], int32_t *numOfVnodes) { - void *pIter = taosHashIterate(tsVnode.hash, NULL); - while (pIter) { - SVnode **pVnode = pIter; - if (*pVnode) { - (*numOfVnodes)++; - if (*numOfVnodes >= TSDB_MAX_VNODES) { - vError("vgId:%d, too many open vnodes, exist:%d max:%d", (*pVnode)->vgId, *numOfVnodes, TSDB_MAX_VNODES); - continue; - } else { - vnodeList[*numOfVnodes - 1] = (*pVnode)->vgId; - } - } - - pIter = taosHashIterate(tsVnode.hash, pIter); - } - - return TSDB_CODE_SUCCESS; + (*tsVint.fp.SendMsgToDnode)(epSet, rpcMsg); } -static void vnodeCleanupVnodes() { - int32_t vnodeList[TSDB_MAX_VNODES] = {0}; - int32_t numOfVnodes = 0; - - int32_t code = vnodeGetVnodeList(vnodeList, &numOfVnodes); +void vnodeSendMsgToMnode(struct SRpcMsg *rpcMsg) { return (*tsVint.fp.SendMsgToMnode)(rpcMsg); } - if (code != TSDB_CODE_SUCCESS) { - vInfo("failed to get dnode list since code %d", code); - return; - } - - for (int32_t i = 0; i < numOfVnodes; ++i) { - vnodeClose(vnodeList[i]); - } - - vInfo("total vnodes:%d are all closed", numOfVnodes); -} - -static void vnodeInitMsgFp() { - tsVnode.msgFp[TSDB_MSG_TYPE_MD_CREATE_VNODE] = vnodeProcessMgmtMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MD_ALTER_VNODE] = vnodeProcessMgmtMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MD_SYNC_VNODE] = vnodeProcessMgmtMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MD_COMPACT_VNODE] = vnodeProcessMgmtMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MD_DROP_VNODE] = vnodeProcessMgmtMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MD_ALTER_STREAM] = vnodeProcessMgmtMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MD_CREATE_TABLE] = vnodeProcessWriteMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MD_DROP_TABLE] = vnodeProcessWriteMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MD_ALTER_TABLE] = vnodeProcessWriteMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MD_DROP_STABLE] = vnodeProcessWriteMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_SUBMIT] = vnodeProcessWriteMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_UPDATE_TAG_VAL] = vnodeProcessWriteMsg; - //mq related - tsVnode.msgFp[TSDB_MSG_TYPE_MQ_CONNECT] = vnodeProcessWriteMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MQ_DISCONNECT] = vnodeProcessWriteMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MQ_ACK] = vnodeProcessWriteMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MQ_RESET] = vnodeProcessWriteMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MQ_QUERY] = vnodeProcessReadMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_MQ_CONSUME] = vnodeProcessReadMsg; - //mq related end - tsVnode.msgFp[TSDB_MSG_TYPE_QUERY] = vnodeProcessReadMsg; - tsVnode.msgFp[TSDB_MSG_TYPE_FETCH] = vnodeProcessReadMsg; -} +void vnodeReportStartup(char *name, char *desc) { (*tsVint.fp.ReportStartup)(name, desc); } void vnodeProcessMsg(SRpcMsg *pMsg) { - if (tsVnode.msgFp[pMsg->msgType]) { - (*tsVnode.msgFp[pMsg->msgType])(pMsg); + if (tsVint.msgFp[pMsg->msgType]) { + (*tsVint.msgFp[pMsg->msgType])(pMsg); } else { assert(0); } } -int32_t vnodeInitMain() { - vnodeInitMsgFp(); - - tsVnode.timer = taosTmrInit(100, 200, 60000, "VND-TIMER"); - if (tsVnode.timer == NULL) { - vError("failed to init vnode timer"); - return -1; - } - - tsVnode.hash = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); - if (tsVnode.hash == NULL) { - taosTmrCleanUp(tsVnode.timer); - vError("failed to init vnode mgmt"); - return -1; - } - - vInfo("vnode main is initialized"); - return vnodeOpenVnodes(); -} - -void vnodeCleanupMain() { - taosTmrCleanUp(tsVnode.timer); - tsVnode.timer = NULL; - - vnodeCleanupVnodes(); - - taosHashCleanup(tsVnode.hash); - tsVnode.hash = NULL; -} - -static void vnodeBuildVloadMsg(SVnode *pVnode, SStatusMsg *pStatus) { -#if 0 - int64_t totalStorage = 0; - int64_t compStorage = 0; - int64_t pointsWritten = 0; - - if (vnodeInClosingStatus(pVnode)) return; - if (pStatus->openVnodes >= TSDB_MAX_VNODES) return; - - if (pVnode->tsdb) { - tsdbReportStat(pVnode->tsdb, &pointsWritten, &totalStorage, &compStorage); - } - - SVnodeLoad *pLoad = &pStatus->load[pStatus->openVnodes++]; - pLoad->vgId = htonl(pVnode->vgId); - pLoad->dbCfgVersion = htonl(pVnode->dbCfgVersion); - pLoad->vgCfgVersion = htonl(pVnode->vgCfgVersion); - pLoad->totalStorage = htobe64(totalStorage); - pLoad->compStorage = htobe64(compStorage); - pLoad->pointsWritten = htobe64(pointsWritten); - pLoad->vnodeVersion = htobe64(pVnode->version); - pLoad->status = pVnode->status; - pLoad->role = pVnode->role; - pLoad->replica = pVnode->syncCfg.replica; - pLoad->compact = (pVnode->tsdb != NULL) ? tsdbGetCompactState(pVnode->tsdb) : 0; -#endif -} - -void vnodeGetStatus(struct SStatusMsg *pStatus) { - void *pIter = taosHashIterate(tsVnode.hash, NULL); - while (pIter) { - SVnode **pVnode = pIter; - if (*pVnode) { - vnodeBuildVloadMsg(*pVnode, pStatus); - } - pIter = taosHashIterate(tsVnode.hash, pIter); - } -} - -void vnodeSetAccess(struct SVgroupAccess *pAccess, int32_t numOfVnodes) { - for (int32_t i = 0; i < numOfVnodes; ++i) { - pAccess[i].vgId = htonl(pAccess[i].vgId); - SVnode *pVnode = vnodeAcquireNotClose(pAccess[i].vgId); - if (pVnode != NULL) { - pVnode->accessState = pAccess[i].accessState; - if (pVnode->accessState != TSDB_VN_ALL_ACCCESS) { - vDebug("vgId:%d, access state is set to %d", pAccess[i].vgId, pVnode->accessState); - } - vnodeRelease(pVnode); - } - } -} - -void vnodeBackup(int32_t vgId) { - char newDir[TSDB_FILENAME_LEN] = {0}; - char stagingDir[TSDB_FILENAME_LEN] = {0}; - - sprintf(newDir, "%s/vnode%d", "vnode_bak", vgId); - sprintf(stagingDir, "%s/.staging/vnode%d", "vnode_bak", vgId); - -#if 0 - if (tsEnableVnodeBak) { - tfsRmdir(newDir); - tfsRename(stagingDir, newDir); - } else { - vInfo("vgId:%d, vnode backup not enabled", vgId); - - tfsRmdir(stagingDir); - } -#endif +static void vnodeInitMsgFp() { + tsVint.msgFp[TSDB_MSG_TYPE_MD_CREATE_VNODE] = vnodeProcessMgmtMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MD_ALTER_VNODE] = vnodeProcessMgmtMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MD_SYNC_VNODE] = vnodeProcessMgmtMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MD_COMPACT_VNODE] = vnodeProcessMgmtMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MD_DROP_VNODE] = vnodeProcessMgmtMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MD_ALTER_STREAM] = vnodeProcessMgmtMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MD_CREATE_TABLE] = vnodeProcessWriteMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MD_DROP_TABLE] = vnodeProcessWriteMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MD_ALTER_TABLE] = vnodeProcessWriteMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MD_DROP_STABLE] = vnodeProcessWriteMsg; + tsVint.msgFp[TSDB_MSG_TYPE_SUBMIT] = vnodeProcessWriteMsg; + tsVint.msgFp[TSDB_MSG_TYPE_UPDATE_TAG_VAL] = vnodeProcessWriteMsg; + // mq related + tsVint.msgFp[TSDB_MSG_TYPE_MQ_CONNECT] = vnodeProcessWriteMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MQ_DISCONNECT] = vnodeProcessWriteMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MQ_ACK] = vnodeProcessWriteMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MQ_RESET] = vnodeProcessWriteMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MQ_QUERY] = vnodeProcessReadMsg; + tsVint.msgFp[TSDB_MSG_TYPE_MQ_CONSUME] = vnodeProcessReadMsg; + // mq related end + tsVint.msgFp[TSDB_MSG_TYPE_QUERY] = vnodeProcessReadMsg; + tsVint.msgFp[TSDB_MSG_TYPE_FETCH] = vnodeProcessReadMsg; } int32_t vnodeInit(SVnodePara para) { - tsVnode.fp = para.fp; + vnodeInitMsgFp(); + tsVint.fp = para.fp; struct SSteps *steps = taosStepInit(8, NULL); if (steps == NULL) return -1; taosStepAdd(steps, "vnode-main", vnodeInitMain, vnodeCleanupMain); - taosStepAdd(steps, "vnode-worker", vnodeInitWorker, vnodeCleanupWorker); taosStepAdd(steps, "vnode-read", vnodeInitRead, vnodeCleanupRead); taosStepAdd(steps, "vnode-mgmt", vnodeInitMgmt, vnodeCleanupMgmt); taosStepAdd(steps, "vnode-write", vnodeInitWrite, vnodeCleanupWrite); - tsVnode.steps = steps; - return taosStepExec(tsVnode.steps); + tsVint.steps = steps; + return taosStepExec(tsVint.steps); } -void vnodeCleanup() { taosStepCleanup(tsVnode.steps); } +void vnodeCleanup() { taosStepCleanup(tsVint.steps); } diff --git a/source/server/vnode/src/vnodeMain.c b/source/server/vnode/src/vnodeMain.c new file mode 100644 index 0000000000000000000000000000000000000000..5143f04c5b4b18f0fcfc2e0ce439b9f5a5b05734 --- /dev/null +++ b/source/server/vnode/src/vnodeMain.c @@ -0,0 +1,628 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#define _DEFAULT_SOURCE +#include "thash.h" +#include "tthread.h" +#include "vnodeFile.h" +#include "vnodeMain.h" +#include "vnodeMgmt.h" +#include "vnodeRead.h" +#include "vnodeWrite.h" + +typedef enum _VN_STATUS { + TAOS_VN_STATUS_INIT = 0, + TAOS_VN_STATUS_READY = 1, + TAOS_VN_STATUS_CLOSING = 2, + TAOS_VN_STATUS_UPDATING = 3 +} EVnodeStatus; + +char *vnodeStatus[] = {"init", "ready", "closing", "updating"}; + +typedef struct { + pthread_t *threadId; + int32_t threadIndex; + int32_t failed; + int32_t opened; + int32_t vnodeNum; + int32_t *vnodeList; +} SOpenVnodeThread; + +static struct { + SHashObj *hash; + int32_t openVnodes; + int32_t totalVnodes; +} tsVnode; + +static bool vnodeSetInitStatus(SVnode *pVnode) { + pthread_mutex_lock(&pVnode->statusMutex); + pVnode->status = TAOS_VN_STATUS_INIT; + pthread_mutex_unlock(&pVnode->statusMutex); + return true; +} + +static bool vnodeSetReadyStatus(SVnode *pVnode) { + bool set = false; + pthread_mutex_lock(&pVnode->statusMutex); + + if (pVnode->status == TAOS_VN_STATUS_INIT || pVnode->status == TAOS_VN_STATUS_UPDATING) { + pVnode->status = TAOS_VN_STATUS_READY; + set = true; + } + + pthread_mutex_unlock(&pVnode->statusMutex); + return set; +} + +static bool vnodeSetUpdatingStatus(SVnode *pVnode) { + bool set = false; + pthread_mutex_lock(&pVnode->statusMutex); + + if (pVnode->status == TAOS_VN_STATUS_READY) { + pVnode->status = TAOS_VN_STATUS_UPDATING; + set = true; + } + + pthread_mutex_unlock(&pVnode->statusMutex); + return set; +} + +static bool vnodeSetClosingStatus(SVnode *pVnode) { + bool set = false; + pthread_mutex_lock(&pVnode->statusMutex); + + if (pVnode->status == TAOS_VN_STATUS_INIT || pVnode->status == TAOS_VN_STATUS_READY) { + pVnode->status = TAOS_VN_STATUS_CLOSING; + set = true; + } + + pthread_mutex_unlock(&pVnode->statusMutex); + return set; +} + +static bool vnodeInStatus(SVnode *pVnode, EVnodeStatus status) { + bool in = false; + pthread_mutex_lock(&pVnode->statusMutex); + + if (pVnode->status == status) { + in = true; + } + + pthread_mutex_unlock(&pVnode->statusMutex); + return in; +} + +static void vnodeDestroyVnode(SVnode *pVnode) { + int32_t code = 0; + int32_t vgId = pVnode->vgId; + + if (pVnode->pQuery) { + // todo + } + + if (pVnode->pMeta) { + // todo + } + + if (pVnode->pTsdb) { + // todo + } + + if (pVnode->pTQ) { + // todo + } + + if (pVnode->pWal) { + // todo + } + + if (pVnode->allocator) { + // todo + } + + if (pVnode->pWriteQ) { + vnodeFreeWriteQueue(pVnode->pWriteQ); + pVnode->pWriteQ = NULL; + } + + if (pVnode->pQueryQ) { + vnodeFreeQueryQueue(pVnode->pQueryQ); + pVnode->pQueryQ = NULL; + } + + if (pVnode->pFetchQ) { + vnodeFreeFetchQueue(pVnode->pFetchQ); + pVnode->pFetchQ = NULL; + } + + if (pVnode->dropped) { + // todo + } + + pthread_mutex_destroy(&pVnode->statusMutex); + free(pVnode); +} + +static void vnodeCleanupVnode(SVnode *pVnode) { + vnodeSetClosingStatus(pVnode); + taosHashRemove(tsVnode.hash, &pVnode->vgId, sizeof(int32_t)); + vnodeRelease(pVnode); +} + +static int32_t vnodeOpenVnode(int32_t vgId) { + int32_t code = 0; + + SVnode *pVnode = calloc(sizeof(SVnode), 1); + if (pVnode == NULL) { + vError("vgId:%d, failed to open vnode since no enough memory", vgId); + return TAOS_SYSTEM_ERROR(errno); + } + + pVnode->vgId = vgId; + pVnode->accessState = TAOS_VN_STATUS_INIT; + pVnode->status = TSDB_VN_ALL_ACCCESS; + pVnode->refCount = 1; + pVnode->role = TAOS_SYNC_ROLE_CANDIDATE; + pthread_mutex_init(&pVnode->statusMutex, NULL); + + code = vnodeReadCfg(vgId, &pVnode->cfg); + if (code != TSDB_CODE_SUCCESS) { + vError("vgId:%d, failed to read config file, set cfgVersion to 0", pVnode->vgId); + pVnode->cfg.dropped = 1; + vnodeCleanupVnode(pVnode); + return 0; + } + + code = vnodeReadTerm(vgId, &pVnode->term); + if (code != TSDB_CODE_SUCCESS) { + vError("vgId:%d, failed to read term file since %s", pVnode->vgId, tstrerror(code)); + pVnode->cfg.dropped = 1; + vnodeCleanupVnode(pVnode); + return code; + } + + pVnode->pWriteQ = vnodeAllocWriteQueue(pVnode); + pVnode->pQueryQ = vnodeAllocQueryQueue(pVnode); + pVnode->pFetchQ = vnodeAllocFetchQueue(pVnode); + if (pVnode->pWriteQ == NULL || pVnode->pQueryQ == NULL || pVnode->pFetchQ == NULL) { + vnodeCleanupVnode(pVnode); + return terrno; + } + + char path[PATH_MAX + 20]; + snprintf(path, sizeof(path), "%s/vnode%d/wal", tsVnodeDir, vgId); + pVnode->pWal = walOpen(path, &pVnode->cfg.wal); + if (pVnode->pWal == NULL) { + vnodeCleanupVnode(pVnode); + return terrno; + } + + vDebug("vgId:%d, vnode is opened", pVnode->vgId); + taosHashPut(tsVnode.hash, &pVnode->vgId, sizeof(int32_t), &pVnode, sizeof(SVnode *)); + + vnodeSetReadyStatus(pVnode); + return TSDB_CODE_SUCCESS; +} + +int32_t vnodeCreateVnode(int32_t vgId, SVnodeCfg *pCfg) { + int32_t code = 0; + char path[PATH_MAX + 20] = {0}; + + snprintf(path, sizeof(path), "%s/vnode%d", tsVnodeDir, vgId); + if (taosMkDir(path) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + vError("vgId:%d, failed to create since %s", vgId, tstrerror(code)); + return code; + } + + snprintf(path, sizeof(path), "%s/vnode%d/cfg", tsVnodeDir, vgId); + if (taosMkDir(path) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + vError("vgId:%d, failed to create since %s", vgId, tstrerror(code)); + return code; + } + + snprintf(path, sizeof(path), "%s/vnode%d/wal", tsVnodeDir, vgId); + if (taosMkDir(path) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + vError("vgId:%d, failed to create since %s", vgId, tstrerror(code)); + return code; + } + + snprintf(path, sizeof(path), "%s/vnode%d/tq", tsVnodeDir, vgId); + if (taosMkDir(path) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + vError("vgId:%d, failed to create since %s", vgId, tstrerror(code)); + return code; + } + + snprintf(path, sizeof(path), "%s/vnode%d/tsdb", tsVnodeDir, vgId); + if (taosMkDir(path) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + vError("vgId:%d, failed to create since %s", vgId, tstrerror(code)); + return code; + } + + snprintf(path, sizeof(path), "%s/vnode%d/meta", tsVnodeDir, vgId); + if (taosMkDir(path) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + vError("vgId:%d, failed to create since %s", vgId, tstrerror(code)); + return code; + } + + code = vnodeWriteCfg(vgId, pCfg); + if (code != 0) { + vError("vgId:%d, failed to save vnode cfg since %s", vgId, tstrerror(code)); + return code; + } + + return vnodeOpenVnode(vgId); +} + +int32_t vnodeAlterVnode(SVnode * pVnode, SVnodeCfg *pCfg) { + int32_t code = 0; + int32_t vgId = pVnode->vgId; + + bool walChanged = (memcmp(&pCfg->wal, &pVnode->cfg.wal, sizeof(SWalCfg)) != 0); + bool tsdbChanged = (memcmp(&pCfg->tsdb, &pVnode->cfg.tsdb, sizeof(STsdbCfg)) != 0); + bool metaChanged = (memcmp(&pCfg->meta, &pVnode->cfg.meta, sizeof(SMetaCfg)) != 0); + bool syncChanged = (memcmp(&pCfg->sync, &pVnode->cfg.sync, sizeof(SSyncCluster)) != 0); + + if (!walChanged && !tsdbChanged && !metaChanged && !syncChanged) { + vDebug("vgId:%d, nothing changed", vgId); + vnodeRelease(pVnode); + return code; + } + + code = vnodeWriteCfg(pVnode->vgId, pCfg); + if (code != 0) { + vError("vgId:%d, failed to write alter msg to file since %s", vgId, tstrerror(code)); + vnodeRelease(pVnode); + return code; + } + + pVnode->cfg = *pCfg; + + if (walChanged) { + code = walAlter(pVnode->pWal, &pVnode->cfg.wal); + if (code != 0) { + vDebug("vgId:%d, failed to alter wal since %s", vgId, tstrerror(code)); + vnodeRelease(pVnode); + return code; + } + } + + if (tsdbChanged) { + // todo + } + + if (metaChanged) { + // todo + } + + if (syncChanged) { + // todo + } + + vnodeRelease(pVnode); + return code; +} + +int32_t vnodeDropVnode(SVnode *pVnode) { + if (pVnode->cfg.dropped) { + vInfo("vgId:%d, already set drop flag, ref:%d", pVnode->vgId, pVnode->refCount); + vnodeRelease(pVnode); + return TSDB_CODE_SUCCESS; + } + + pVnode->cfg.dropped = 1; + int32_t code = vnodeWriteCfg(pVnode->vgId, &pVnode->cfg); + if (code == 0) { + vInfo("vgId:%d, set drop flag, ref:%d", pVnode->vgId, pVnode->refCount); + vnodeCleanupVnode(pVnode); + } else { + vError("vgId:%d, failed to set drop flag since %s", pVnode->vgId, tstrerror(code)); + pVnode->cfg.dropped = 0; + } + + vnodeRelease(pVnode); + return code; +} + +int32_t vnodeSyncVnode(SVnode *pVnode) { + return TSDB_CODE_SUCCESS; +} + +int32_t vnodeCompactVnode(SVnode *pVnode) { + return TSDB_CODE_SUCCESS; +} + +static void *vnodeOpenVnodeFunc(void *param) { + SOpenVnodeThread *pThread = param; + + vDebug("thread:%d, start to open %d vnodes", pThread->threadIndex, pThread->vnodeNum); + setThreadName("vnodeOpenVnode"); + + for (int32_t v = 0; v < pThread->vnodeNum; ++v) { + int32_t vgId = pThread->vnodeList[v]; + + char stepDesc[TSDB_STEP_DESC_LEN] = {0}; + snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to restore, %d of %d have been opened", vgId, + tsVnode.openVnodes, tsVnode.totalVnodes); + // (*vnodeInst()->fp.ReportStartup)("open-vnodes", stepDesc); + + if (vnodeOpenVnode(vgId) < 0) { + vError("vgId:%d, failed to open vnode by thread:%d", vgId, pThread->threadIndex); + pThread->failed++; + } else { + vDebug("vgId:%d, is opened by thread:%d", vgId, pThread->threadIndex); + pThread->opened++; + } + + atomic_add_fetch_32(&tsVnode.openVnodes, 1); + } + + vDebug("thread:%d, total vnodes:%d, opened:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened, + pThread->failed); + return NULL; +} + +static int32_t vnodeGetVnodeListFromDisk(int32_t vnodeList[], int32_t *numOfVnodes) { +#if 0 + DIR *dir = opendir(tsVnodeDir); + if (dir == NULL) return TSDB_CODE_DND_NO_WRITE_ACCESS; + + *numOfVnodes = 0; + struct dirent *de = NULL; + while ((de = readdir(dir)) != NULL) { + if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; + if (de->d_type & DT_DIR) { + if (strncmp("vnode", de->d_name, 5) != 0) continue; + int32_t vnode = atoi(de->d_name + 5); + if (vnode == 0) continue; + + (*numOfVnodes)++; + + if (*numOfVnodes >= TSDB_MAX_VNODES) { + vError("vgId:%d, too many vnode directory in disk, exist:%d max:%d", vnode, *numOfVnodes, TSDB_MAX_VNODES); + closedir(dir); + return TSDB_CODE_DND_TOO_MANY_VNODES; + } else { + vnodeList[*numOfVnodes - 1] = vnode; + } + } + } + closedir(dir); +#endif + return TSDB_CODE_SUCCESS; +} + +static int32_t vnodeOpenVnodes() { + int32_t vnodeList[TSDB_MAX_VNODES] = {0}; + int32_t numOfVnodes = 0; + int32_t status = vnodeGetVnodeListFromDisk(vnodeList, &numOfVnodes); + + if (status != TSDB_CODE_SUCCESS) { + vInfo("failed to get vnode list from disk since code:%d", status); + return status; + } + + tsVnode.totalVnodes = numOfVnodes; + + int32_t threadNum = tsNumOfCores; + int32_t vnodesPerThread = numOfVnodes / threadNum + 1; + + SOpenVnodeThread *threads = calloc(threadNum, sizeof(SOpenVnodeThread)); + for (int32_t t = 0; t < threadNum; ++t) { + threads[t].threadIndex = t; + threads[t].vnodeList = calloc(vnodesPerThread, sizeof(int32_t)); + } + + for (int32_t v = 0; v < numOfVnodes; ++v) { + int32_t t = v % threadNum; + SOpenVnodeThread *pThread = &threads[t]; + pThread->vnodeList[pThread->vnodeNum++] = vnodeList[v]; + } + + vInfo("start %d threads to open %d vnodes", threadNum, numOfVnodes); + + for (int32_t t = 0; t < threadNum; ++t) { + SOpenVnodeThread *pThread = &threads[t]; + if (pThread->vnodeNum == 0) continue; + + pThread->threadId = taosCreateThread(vnodeOpenVnodeFunc, pThread); + if (pThread->threadId == NULL) { + vError("thread:%d, failed to create thread to open vnode, reason:%s", pThread->threadIndex, strerror(errno)); + } + } + + int32_t openVnodes = 0; + int32_t failedVnodes = 0; + for (int32_t t = 0; t < threadNum; ++t) { + SOpenVnodeThread *pThread = &threads[t]; + taosDestoryThread(pThread->threadId); + pThread->threadId = NULL; + + openVnodes += pThread->opened; + failedVnodes += pThread->failed; + free(pThread->vnodeList); + } + + free(threads); + vInfo("there are total vnodes:%d, opened:%d", numOfVnodes, openVnodes); + + if (failedVnodes != 0) { + vError("there are total vnodes:%d, failed:%d", numOfVnodes, failedVnodes); + return -1; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t vnodeGetVnodeList(SVnode *vnodeList[], int32_t *numOfVnodes) { + void *pIter = taosHashIterate(tsVnode.hash, NULL); + while (pIter) { + SVnode **pVnode = pIter; + if (*pVnode) { + (*numOfVnodes)++; + if (*numOfVnodes >= TSDB_MAX_VNODES) { + vError("vgId:%d, too many open vnodes, exist:%d max:%d", (*pVnode)->vgId, *numOfVnodes, TSDB_MAX_VNODES); + continue; + } else { + vnodeList[*numOfVnodes - 1] = (*pVnode); + } + } + + pIter = taosHashIterate(tsVnode.hash, pIter); + } + + return TSDB_CODE_SUCCESS; +} + +static void vnodeCleanupVnodes() { + SVnode* vnodeList[TSDB_MAX_VNODES] = {0}; + int32_t numOfVnodes = 0; + + int32_t code = vnodeGetVnodeList(vnodeList, &numOfVnodes); + + if (code != TSDB_CODE_SUCCESS) { + vInfo("failed to get dnode list since code %d", code); + return; + } + + for (int32_t i = 0; i < numOfVnodes; ++i) { + vnodeCleanupVnode(vnodeList[i]); + } + + vInfo("total vnodes:%d are all closed", numOfVnodes); +} + +static void vnodeIncRef(void *ptNode) { + assert(ptNode != NULL); + + SVnode **ppVnode = (SVnode **)ptNode; + assert(ppVnode); + assert(*ppVnode); + + SVnode *pVnode = *ppVnode; + atomic_add_fetch_32(&pVnode->refCount, 1); + vTrace("vgId:%d, get vnode, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); +} + +SVnode *vnodeAcquireInAllState(int32_t vgId) { + SVnode *pVnode = NULL; + + // taosHashGetClone(tsVnode.hash, &vgId, sizeof(int32_t), vnodeIncRef, (void*)&pVnode); + if (pVnode == NULL) { + vDebug("vgId:%d, can't accquire since not exist", vgId); + terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; + return NULL; + } + + return pVnode; +} + +SVnode *vnodeAcquire(int32_t vgId) { + SVnode *pVnode = vnodeAcquireInAllState(vgId); + if (pVnode == NULL) return NULL; + + if (vnodeInStatus(pVnode, TAOS_VN_STATUS_READY)) { + return pVnode; + } else { + vDebug("vgId:%d, can't accquire since not in ready status", vgId); + vnodeRelease(pVnode); + terrno = TSDB_CODE_VND_INVALID_TSDB_STATE; + return NULL; + } +} + +void vnodeRelease(SVnode *pVnode) { + if (pVnode == NULL) return; + + int32_t refCount = atomic_sub_fetch_32(&pVnode->refCount, 1); + int32_t vgId = pVnode->vgId; + + vTrace("vgId:%d, release vnode, refCount:%d pVnode:%p", vgId, refCount, pVnode); + assert(refCount >= 0); + + if (refCount <= 0) { + vDebug("vgId:%d, vnode will be destroyed, refCount:%d pVnode:%p", vgId, refCount, pVnode); + vnodeDestroyVnode(pVnode); + int32_t count = taosHashGetSize(tsVnode.hash); + vDebug("vgId:%d, vnode is destroyed, vnodes:%d", vgId, count); + } +} + +int32_t vnodeInitMain() { + tsVnode.hash = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); + if (tsVnode.hash == NULL) { + vError("failed to init vnode mgmt"); + return -1; + } + + vInfo("vnode main is initialized"); + return vnodeOpenVnodes(); +} + +void vnodeCleanupMain() { + vnodeCleanupVnodes(); + taosHashCleanup(tsVnode.hash); + tsVnode.hash = NULL; +} + +static void vnodeBuildVloadMsg(SVnode *pVnode, SStatusMsg *pStatus) { + int64_t totalStorage = 0; + int64_t compStorage = 0; + int64_t pointsWritten = 0; + + if (pStatus->openVnodes >= TSDB_MAX_VNODES) return; + + // if (pVnode->tsdb) { + // tsdbReportStat(pVnode->tsdb, &pointsWritten, &totalStorage, &compStorage); + // } + + SVnodeLoad *pLoad = &pStatus->load[pStatus->openVnodes++]; + pLoad->vgId = htonl(pVnode->vgId); + pLoad->totalStorage = htobe64(totalStorage); + pLoad->compStorage = htobe64(compStorage); + pLoad->pointsWritten = htobe64(pointsWritten); + pLoad->status = pVnode->status; + pLoad->role = pVnode->role; +} + +void vnodeGetStatus(SStatusMsg *pStatus) { + void *pIter = taosHashIterate(tsVnode.hash, NULL); + while (pIter) { + SVnode **pVnode = pIter; + if (*pVnode) { + vnodeBuildVloadMsg(*pVnode, pStatus); + } + pIter = taosHashIterate(tsVnode.hash, pIter); + } +} + +void vnodeSetAccess(SVgroupAccess *pAccess, int32_t numOfVnodes) { + for (int32_t i = 0; i < numOfVnodes; ++i) { + pAccess[i].vgId = htonl(pAccess[i].vgId); + SVnode *pVnode = vnodeAcquire(pAccess[i].vgId); + if (pVnode != NULL) { + pVnode->accessState = pAccess[i].accessState; + if (pVnode->accessState != TSDB_VN_ALL_ACCCESS) { + vDebug("vgId:%d, access state is set to %d", pAccess[i].vgId, pVnode->accessState); + } + vnodeRelease(pVnode); + } + } +} diff --git a/source/server/vnode/src/vnodeMgmt.c b/source/server/vnode/src/vnodeMgmt.c index 8662e5a9203059c0188330051be3a5776177fb0f..d20e36641e2eb8cdda9b48f8fca2a32ab9850183 100644 --- a/source/server/vnode/src/vnodeMgmt.c +++ b/source/server/vnode/src/vnodeMgmt.c @@ -15,21 +15,184 @@ #define _DEFAULT_SOURCE #include "os.h" - +#include "vnodeMain.h" #include "vnodeMgmt.h" -#include "vnodeMgmtMsg.h" - -typedef struct { - SRpcMsg rpcMsg; - char pCont[]; -} SVnMgmtMsg; static struct { - SWorkerPool pool; - taos_queue pQueue; + SWorkerPool createPool; + taos_queue createQueue; + SWorkerPool workerPool; + taos_queue workerQueue; int32_t (*msgFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *); } tsVmgmt = {0}; +static int32_t vnodeParseCreateVnodeReq(SRpcMsg *rpcMsg, int32_t *vgId, SVnodeCfg *pCfg) { + SCreateVnodeMsg *pCreate = rpcMsg->pCont; + *vgId = htonl(pCreate->vgId); + + pCfg->dropped = 0; + tstrncpy(pCfg->db, pCreate->db, sizeof(pCfg->db)); + + pCfg->tsdb.cacheBlockSize = htonl(pCreate->cacheBlockSize); + pCfg->tsdb.totalBlocks = htonl(pCreate->totalBlocks); + pCfg->tsdb.daysPerFile = htonl(pCreate->daysPerFile); + pCfg->tsdb.daysToKeep1 = htonl(pCreate->daysToKeep1); + pCfg->tsdb.daysToKeep2 = htonl(pCreate->daysToKeep2); + pCfg->tsdb.daysToKeep0 = htonl(pCreate->daysToKeep0); + pCfg->tsdb.minRowsPerFileBlock = htonl(pCreate->minRowsPerFileBlock); + pCfg->tsdb.maxRowsPerFileBlock = htonl(pCreate->maxRowsPerFileBlock); + pCfg->tsdb.precision = pCreate->precision; + pCfg->tsdb.compression = pCreate->compression; + pCfg->tsdb.cacheLastRow = pCreate->cacheLastRow; + pCfg->tsdb.update = pCreate->update; + + pCfg->wal.fsyncPeriod = htonl(pCreate->fsyncPeriod); + pCfg->wal.walLevel = pCreate->walLevel; + + pCfg->sync.replica = pCreate->replica; + pCfg->sync.quorum = pCreate->quorum; + + for (int32_t j = 0; j < pCreate->replica; ++j) { + pCfg->sync.nodes[j].nodePort = htons(pCreate->nodes[j].port); + tstrncpy(pCfg->sync.nodes[j].nodeFqdn, pCreate->nodes[j].fqdn, TSDB_FQDN_LEN); + } + + return 0; +} + +static int32_t vnodeProcessCreateVnodeReq(SRpcMsg *rpcMsg) { + SVnodeCfg vnodeCfg = {0}; + int32_t vgId = 0; + + int32_t code = vnodeParseCreateVnodeReq(rpcMsg, &vgId, &vnodeCfg); + if (code != 0) { + vError("failed to parse create vnode msg since %s", tstrerror(code)); + } + + vDebug("vgId:%d, create vnode req is received", vgId); + + SVnode *pVnode = vnodeAcquireInAllState(vgId); + if (pVnode != NULL) { + vDebug("vgId:%d, already exist, return success", vgId); + vnodeRelease(pVnode); + return code; + } + + code = vnodeCreateVnode(vgId, &vnodeCfg); + if (code != 0) { + vError("vgId:%d, failed to create vnode since %s", vgId, tstrerror(code)); + } + + return code; +} + +static int32_t vnodeProcessAlterVnodeReq(SRpcMsg *rpcMsg) { + SVnodeCfg vnodeCfg = {0}; + int32_t vgId = 0; + + int32_t code = vnodeParseCreateVnodeReq(rpcMsg, &vgId, &vnodeCfg); + if (code != 0) { + vError("failed to parse create vnode msg since %s", tstrerror(code)); + } + + vDebug("vgId:%d, alter vnode req is received", vgId); + + SVnode *pVnode = vnodeAcquire(vgId); + if (pVnode == NULL) { + code = terrno; + vDebug("vgId:%d, failed to alter vnode since %s", vgId, tstrerror(code)); + return code; + } + + code = vnodeAlterVnode(pVnode, &vnodeCfg); + if (code != 0) { + vError("vgId:%d, failed to alter vnode since %s", vgId, tstrerror(code)); + } + + vnodeRelease(pVnode); + return code; +} + +static SDropVnodeMsg *vnodeParseDropVnodeReq(SRpcMsg *rpcMsg) { + SDropVnodeMsg *pDrop = rpcMsg->pCont; + pDrop->vgId = htonl(pDrop->vgId); + return pDrop; +} + +static int32_t vnodeProcessSyncVnodeReq(SRpcMsg *rpcMsg) { + SSyncVnodeMsg *pSync = (SSyncVnodeMsg *)vnodeParseDropVnodeReq(rpcMsg); + + int32_t code = 0; + int32_t vgId = pSync->vgId; + vDebug("vgId:%d, sync vnode req is received", vgId); + + SVnode *pVnode = vnodeAcquire(vgId); + if (pVnode == NULL) { + code = terrno; + vDebug("vgId:%d, failed to sync since %s", vgId, tstrerror(code)); + return code; + } + + code = vnodeSyncVnode(pVnode); + if (code != 0) { + vError("vgId:%d, failed to compact vnode since %s", vgId, tstrerror(code)); + } + + vnodeRelease(pVnode); + return code; +} + +static int32_t vnodeProcessCompactVnodeReq(SRpcMsg *rpcMsg) { + SCompactVnodeMsg *pCompact = (SCompactVnodeMsg *)vnodeParseDropVnodeReq(rpcMsg); + + int32_t code = 0; + int32_t vgId = pCompact->vgId; + vDebug("vgId:%d, compact vnode req is received", vgId); + + SVnode *pVnode = vnodeAcquire(vgId); + if (pVnode == NULL) { + code = terrno; + vDebug("vgId:%d, failed to compact since %s", vgId, tstrerror(code)); + return code; + } + + code = vnodeCompactVnode(pVnode); + if (code != 0) { + vError("vgId:%d, failed to compact vnode since %s", vgId, tstrerror(code)); + } + + vnodeRelease(pVnode); + return code; +} + +static int32_t vnodeProcessDropVnodeReq(SRpcMsg *rpcMsg) { + SDropVnodeMsg *pDrop = vnodeParseDropVnodeReq(rpcMsg); + + int32_t code = 0; + int32_t vgId = pDrop->vgId; + vDebug("vgId:%d, drop vnode req is received", vgId); + + SVnode *pVnode = vnodeAcquire(vgId); + if (pVnode == NULL) { + code = terrno; + vDebug("vgId:%d, failed to drop since %s", vgId, tstrerror(code)); + return code; + } + + code = vnodeDropVnode(pVnode); + if (code != 0) { + vError("vgId:%d, failed to drop vnode since %s", vgId, tstrerror(code)); + } + + vnodeRelease(pVnode); + return code; +} + +static int32_t vnodeProcessAlterStreamReq(SRpcMsg *pMsg) { + vError("alter stream msg not processed"); + return TSDB_CODE_VND_MSG_NOT_PROCESSED; +} + static int32_t vnodeProcessMgmtStart(void *unused, SVnMgmtMsg *pMgmt, int32_t qtype) { SRpcMsg *pMsg = &pMgmt->rpcMsg; int32_t msgType = pMsg->msgType; @@ -43,27 +206,21 @@ static int32_t vnodeProcessMgmtStart(void *unused, SVnMgmtMsg *pMgmt, int32_t qt } } -static void vnodeSendMgmtEnd(void *unused, SVnMgmtMsg *pMgmt, int32_t qtype, int32_t code) { +static void vnodeProcessMgmtEnd(void *unused, SVnMgmtMsg *pMgmt, int32_t qtype, int32_t code) { SRpcMsg *pMsg = &pMgmt->rpcMsg; - SRpcMsg rsp = {0}; + vTrace("msg:%p, is processed, result:%s", pMgmt, tstrerror(code)); - rsp.code = code; - vTrace("msg:%p, is processed, code:0x%x", pMgmt, rsp.code); - if (rsp.code != TSDB_CODE_DND_ACTION_IN_PROGRESS) { - rsp.handle = pMsg->handle; - rsp.pCont = NULL; - rpcSendResponse(&rsp); - } - - taosFreeQitem(pMsg); + SRpcMsg rsp = {.code = code, .handle = pMsg->handle}; + rpcSendResponse(&rsp); + taosFreeQitem(pMgmt); } static void vnodeInitMgmtReqFp() { - tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_CREATE_VNODE] = vnodeProcessCreateVnodeMsg; - tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_ALTER_VNODE] = vnodeProcessAlterVnodeMsg; - tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_SYNC_VNODE] = vnodeProcessSyncVnodeMsg; - tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_COMPACT_VNODE]= vnodeProcessCompactVnodeMsg; - tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_DROP_VNODE] = vnodeProcessDropVnodeMsg; + tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_CREATE_VNODE] = vnodeProcessCreateVnodeReq; + tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_ALTER_VNODE] = vnodeProcessAlterVnodeReq; + tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_SYNC_VNODE] = vnodeProcessSyncVnodeReq; + tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_COMPACT_VNODE] = vnodeProcessCompactVnodeReq; + tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_DROP_VNODE] = vnodeProcessDropVnodeReq; tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_ALTER_STREAM] = vnodeProcessAlterStreamReq; } @@ -75,14 +232,18 @@ static int32_t vnodeWriteToMgmtQueue(SRpcMsg *pMsg) { pMgmt->rpcMsg = *pMsg; pMgmt->rpcMsg.pCont = pMgmt->pCont; memcpy(pMgmt->pCont, pMsg->pCont, pMsg->contLen); - taosWriteQitem(tsVmgmt.pQueue, TAOS_QTYPE_RPC, pMgmt); - return TSDB_CODE_SUCCESS; + if (pMsg->msgType == TSDB_MSG_TYPE_MD_CREATE_VNODE) { + return taosWriteQitem(tsVmgmt.createQueue, TAOS_QTYPE_RPC, pMgmt); + } else { + return taosWriteQitem(tsVmgmt.workerQueue, TAOS_QTYPE_RPC, pMgmt); + } } void vnodeProcessMgmtMsg(SRpcMsg *pMsg) { int32_t code = vnodeWriteToMgmtQueue(pMsg); if (code != TSDB_CODE_SUCCESS) { + vError("msg, ahandle:%p type:%s not processed since %s", pMsg->ahandle, taosMsg[pMsg->msgType], tstrerror(code)); SRpcMsg rsp = {.handle = pMsg->handle, .code = code}; rpcSendResponse(&rsp); } @@ -93,25 +254,41 @@ void vnodeProcessMgmtMsg(SRpcMsg *pMsg) { int32_t vnodeInitMgmt() { vnodeInitMgmtReqFp(); - SWorkerPool *pPool = &tsVmgmt.pool; - pPool->name = "vmgmt"; + SWorkerPool *pPool = &tsVmgmt.createPool; + pPool->name = "vnode-mgmt-create"; pPool->startFp = (ProcessStartFp)vnodeProcessMgmtStart; - pPool->endFp = (ProcessEndFp)vnodeSendMgmtEnd; + pPool->endFp = (ProcessEndFp)vnodeProcessMgmtEnd; pPool->min = 1; pPool->max = 1; if (tWorkerInit(pPool) != 0) { return TSDB_CODE_VND_OUT_OF_MEMORY; } - tsVmgmt.pQueue = tWorkerAllocQueue(pPool, NULL); + tsVmgmt.createQueue = tWorkerAllocQueue(pPool, NULL); - vInfo("vmgmt is initialized, max worker %d", pPool->max); + pPool = &tsVmgmt.workerPool; + pPool->name = "vnode-mgmt-worker"; + pPool->startFp = (ProcessStartFp)vnodeProcessMgmtStart; + pPool->endFp = (ProcessEndFp)vnodeProcessMgmtEnd; + pPool->min = 1; + pPool->max = 1; + if (tWorkerInit(pPool) != 0) { + return TSDB_CODE_VND_OUT_OF_MEMORY; + } + + tsVmgmt.workerQueue = tWorkerAllocQueue(pPool, NULL); + + vInfo("vmgmt is initialized"); return TSDB_CODE_SUCCESS; } void vnodeCleanupMgmt() { - tWorkerFreeQueue(&tsVmgmt.pool, tsVmgmt.pQueue); - tWorkerCleanup(&tsVmgmt.pool); - tsVmgmt.pQueue = NULL; + tWorkerFreeQueue(&tsVmgmt.createPool, tsVmgmt.createQueue); + tWorkerCleanup(&tsVmgmt.createPool); + tsVmgmt.createQueue = NULL; + + tWorkerFreeQueue(&tsVmgmt.workerPool, tsVmgmt.workerQueue); + tWorkerCleanup(&tsVmgmt.workerPool); + tsVmgmt.createQueue = NULL; vInfo("vmgmt is closed"); } diff --git a/source/server/vnode/src/vnodeMgmtMsg.c b/source/server/vnode/src/vnodeMgmtMsg.c deleted file mode 100644 index 3b3fc530d2c6bfff58fab5df9a2babed706215c3..0000000000000000000000000000000000000000 --- a/source/server/vnode/src/vnodeMgmtMsg.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "vnodeMgmtMsg.h" - -static SCreateVnodeMsg* vnodeParseVnodeMsg(SRpcMsg *rpcMsg) { - SCreateVnodeMsg *pCreate = rpcMsg->pCont; - pCreate->cfg.vgId = htonl(pCreate->cfg.vgId); - pCreate->cfg.dbCfgVersion = htonl(pCreate->cfg.dbCfgVersion); - pCreate->cfg.vgCfgVersion = htonl(pCreate->cfg.vgCfgVersion); - pCreate->cfg.maxTables = htonl(pCreate->cfg.maxTables); - pCreate->cfg.cacheBlockSize = htonl(pCreate->cfg.cacheBlockSize); - pCreate->cfg.totalBlocks = htonl(pCreate->cfg.totalBlocks); - pCreate->cfg.daysPerFile = htonl(pCreate->cfg.daysPerFile); - pCreate->cfg.daysToKeep1 = htonl(pCreate->cfg.daysToKeep1); - pCreate->cfg.daysToKeep2 = htonl(pCreate->cfg.daysToKeep2); - pCreate->cfg.daysToKeep = htonl(pCreate->cfg.daysToKeep); - pCreate->cfg.minRowsPerFileBlock = htonl(pCreate->cfg.minRowsPerFileBlock); - pCreate->cfg.maxRowsPerFileBlock = htonl(pCreate->cfg.maxRowsPerFileBlock); - pCreate->cfg.fsyncPeriod = htonl(pCreate->cfg.fsyncPeriod); - pCreate->cfg.commitTime = htonl(pCreate->cfg.commitTime); - - for (int32_t j = 0; j < pCreate->cfg.vgReplica; ++j) { - pCreate->nodes[j].nodeId = htonl(pCreate->nodes[j].nodeId); - } - - return pCreate; -} - -int32_t vnodeProcessCreateVnodeMsg(SRpcMsg *rpcMsg) { - SCreateVnodeMsg *pCreate = vnodeParseVnodeMsg(rpcMsg); - SVnode *pVnode = vnodeAcquire(pCreate->cfg.vgId); - if (pVnode != NULL) { - vDebug("vgId:%d, already exist, return success", pCreate->cfg.vgId); - vnodeRelease(pVnode); - return TSDB_CODE_SUCCESS; - } else { - vDebug("vgId:%d, create vnode msg is received", pCreate->cfg.vgId); - return vnodeCreate(pCreate); - } -} - -int32_t vnodeProcessAlterVnodeMsg(SRpcMsg *rpcMsg) { - SAlterVnodeMsg *pAlter = vnodeParseVnodeMsg(rpcMsg); - - void *pVnode = vnodeAcquireNotClose(pAlter->cfg.vgId); - if (pVnode != NULL) { - vDebug("vgId:%d, alter vnode msg is received", pAlter->cfg.vgId); - int32_t code = vnodeAlter(pVnode, pAlter); - vnodeRelease(pVnode); - return code; - } else { - vInfo("vgId:%d, vnode not exist, can't alter it", pAlter->cfg.vgId); - return TSDB_CODE_VND_INVALID_VGROUP_ID; - } -} - -int32_t vnodeProcessSyncVnodeMsg(SRpcMsg *rpcMsg) { - SSyncVnodeMsg *pSyncVnode = rpcMsg->pCont; - pSyncVnode->vgId = htonl(pSyncVnode->vgId); - - return vnodeSync(pSyncVnode->vgId); -} - -int32_t vnodeProcessCompactVnodeMsg(SRpcMsg *rpcMsg) { - SCompactVnodeMsg *pCompactVnode = rpcMsg->pCont; - pCompactVnode->vgId = htonl(pCompactVnode->vgId); - return vnodeCompact(pCompactVnode->vgId); -} - -int32_t vnodeProcessDropVnodeMsg(SRpcMsg *rpcMsg) { - SDropVnodeMsg *pDrop = rpcMsg->pCont; - pDrop->vgId = htonl(pDrop->vgId); - - return vnodeDrop(pDrop->vgId); -} - -int32_t vnodeProcessAlterStreamReq(SRpcMsg *pMsg) { return 0; } diff --git a/source/server/vnode/src/vnodeRead.c b/source/server/vnode/src/vnodeRead.c index dd3b2a7aa06976a6d4c25ef60ca6617ed7bc06a6..2ca2a81739b95d9e35754702d42fcc382747e5c2 100644 --- a/source/server/vnode/src/vnodeRead.c +++ b/source/server/vnode/src/vnodeRead.c @@ -14,14 +14,9 @@ */ #define _DEFAULT_SOURCE -#include "os.h" -#include "taosmsg.h" -#include "tglobal.h" -// #include "query.h" - +#include "vnodeMain.h" #include "vnodeRead.h" #include "vnodeReadMsg.h" -#include "vnodeStatus.h" static struct { SWorkerPool query; @@ -50,11 +45,6 @@ static int32_t vnodeWriteToRQueue(SVnode *pVnode, void *pCont, int32_t contLen, } #endif - if (!vnodeInReadyStatus(pVnode)) { - vDebug("vgId:%d, failed to write into vread queue, vnode status is %s", pVnode->vgId, vnodeStatus[pVnode->status]); - return TSDB_CODE_APP_NOT_READY; - } - int32_t size = sizeof(SReadMsg) + contLen; SReadMsg *pRead = taosAllocateQitem(size); if (pRead == NULL) { @@ -119,7 +109,7 @@ void vnodeProcessReadMsg(SRpcMsg *pMsg) { pHead->contLen = htonl(pHead->contLen); assert(pHead->contLen > 0); - SVnode *pVnode = vnodeAcquireNotClose(pHead->vgId); + SVnode *pVnode = vnodeAcquire(pHead->vgId); if (pVnode != NULL) { code = vnodeWriteToRQueue(pVnode, pCont, pHead->contLen, TAOS_QTYPE_RPC, pMsg); if (code == TSDB_CODE_SUCCESS) queuedMsgNum++; diff --git a/source/server/vnode/src/vnodeReadMsg.c b/source/server/vnode/src/vnodeReadMsg.c index 21ecde332646d0af23970b1e456b0379480aff9e..6dbfbf592034feb8746997d2bf94a5a308aad1de 100644 --- a/source/server/vnode/src/vnodeReadMsg.c +++ b/source/server/vnode/src/vnodeReadMsg.c @@ -14,11 +14,7 @@ */ #define _DEFAULT_SOURCE -#include "os.h" -#include "taosmsg.h" -#include "tglobal.h" -// #include "query.h" -#include "vnodeStatus.h" +#include "vnodeMain.h" #include "vnodeRead.h" #include "vnodeReadMsg.h" @@ -225,16 +221,16 @@ int32_t vnodeProcessConsumeMsg(SVnode *pVnode, SReadMsg *pRead) { tmqMsgHead msgHead = pConsumeMsg->head; //extract head STQ *pTq = pVnode->pTQ; - tqGroupHandle *pHandle = tqFindGHandleBycId(pTq, msgHead.clientId); + /*tqBufferHandle *pHandle = tqGetHandle(pTq, msgHead.clientId);*/ //return msg if offset not moved - if(pConsumeMsg->commitOffset == pHandle->consumeOffset) { + /*if(pConsumeMsg->commitOffset == pHandle->consumeOffset) {*/ //return msg - return 0; - } + /*return 0;*/ + /*}*/ //or move offset - tqMoveOffsetToNext(pHandle); + /*tqMoveOffsetToNext(pHandle);*/ //fetch or register context - tqFetchMsg(pHandle, pRead); + /*tqFetchMsg(pHandle, pRead);*/ //judge mode, tail read or catch up read /*int64_t lastVer = walLastVer(pVnode->wal);*/ //launch new query diff --git a/source/server/vnode/src/vnodeStatus.c b/source/server/vnode/src/vnodeStatus.c deleted file mode 100644 index b0b3bbfa498c7435f9c90d38a53bcd6e9a06e5ff..0000000000000000000000000000000000000000 --- a/source/server/vnode/src/vnodeStatus.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" -#include "taosmsg.h" -// #include "query.h" -#include "vnodeRead.h" -#include "vnodeStatus.h" -#include "vnodeWrite.h" - -char* vnodeStatus[] = { - "init", - "ready", - "closing", - "updating", - "reset" -}; - -bool vnodeSetInitStatus(SVnode* pVnode) { - pthread_mutex_lock(&pVnode->statusMutex); - pVnode->status = TAOS_VN_STATUS_INIT; - pthread_mutex_unlock(&pVnode->statusMutex); - return true; -} - -bool vnodeSetReadyStatus(SVnode* pVnode) { - bool set = false; - pthread_mutex_lock(&pVnode->statusMutex); - - if (pVnode->status == TAOS_VN_STATUS_INIT || pVnode->status == TAOS_VN_STATUS_READY || - pVnode->status == TAOS_VN_STATUS_UPDATING) { - pVnode->status = TAOS_VN_STATUS_READY; - set = true; - } - -#if 0 - qQueryMgmtReOpen(pVnode->qMgmt); -#endif - - pthread_mutex_unlock(&pVnode->statusMutex); - return set; -} - -static bool vnodeSetClosingStatusImp(SVnode* pVnode) { - bool set = false; - pthread_mutex_lock(&pVnode->statusMutex); - - if (pVnode->status == TAOS_VN_STATUS_READY || pVnode->status == TAOS_VN_STATUS_INIT) { - pVnode->status = TAOS_VN_STATUS_CLOSING; - set = true; - } - - pthread_mutex_unlock(&pVnode->statusMutex); - return set; -} - -bool vnodeSetClosingStatus(SVnode* pVnode) { - if (pVnode->status == TAOS_VN_STATUS_CLOSING) - return true; - - while (!vnodeSetClosingStatusImp(pVnode)) { - taosMsleep(1); - } - -#if 0 - // release local resources only after cutting off outside connections - qQueryMgmtNotifyClosed(pVnode->qMgmt); -#endif - vnodeWaitReadCompleted(pVnode); - vnodeWaitWriteCompleted(pVnode); - - return true; -} - -bool vnodeSetUpdatingStatus(SVnode* pVnode) { - bool set = false; - pthread_mutex_lock(&pVnode->statusMutex); - - if (pVnode->status == TAOS_VN_STATUS_READY) { - pVnode->status = TAOS_VN_STATUS_UPDATING; - set = true; - } - - pthread_mutex_unlock(&pVnode->statusMutex); - return set; -} - -bool vnodeInInitStatus(SVnode* pVnode) { - bool in = false; - pthread_mutex_lock(&pVnode->statusMutex); - - if (pVnode->status == TAOS_VN_STATUS_INIT) { - in = true; - } - - pthread_mutex_unlock(&pVnode->statusMutex); - return in; -} - -bool vnodeInReadyStatus(SVnode* pVnode) { - bool in = false; - pthread_mutex_lock(&pVnode->statusMutex); - - if (pVnode->status == TAOS_VN_STATUS_READY) { - in = true; - } - - pthread_mutex_unlock(&pVnode->statusMutex); - return in; -} - -bool vnodeInClosingStatus(SVnode* pVnode) { - bool in = false; - pthread_mutex_lock(&pVnode->statusMutex); - - if (pVnode->status == TAOS_VN_STATUS_CLOSING) { - in = true; - } - - pthread_mutex_unlock(&pVnode->statusMutex); - return in; -} - diff --git a/source/server/vnode/src/vnodeVersion.c b/source/server/vnode/src/vnodeVersion.c deleted file mode 100644 index 2b3d1b3a00dd2e08f522a6d98bd3814a84fefc62..0000000000000000000000000000000000000000 --- a/source/server/vnode/src/vnodeVersion.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" -#include "cJSON.h" -#include "tglobal.h" -#include "vnodeVersion.h" - -int32_t vnodeReadVersion(SVnode *pVnode) { - int32_t len = 0; - int32_t maxLen = 100; - char * content = calloc(1, maxLen + 1); - cJSON * root = NULL; - FILE * fp = NULL; - - terrno = TSDB_CODE_VND_INVALID_VRESION_FILE; - char file[TSDB_FILENAME_LEN + 30] = {0}; - sprintf(file, "%s/vnode%d/version.json", tsVnodeDir, pVnode->vgId); - - fp = fopen(file, "r"); - if (!fp) { - if (errno != ENOENT) { - vError("vgId:%d, failed to read %s, error:%s", pVnode->vgId, file, strerror(errno)); - terrno = TAOS_SYSTEM_ERROR(errno); - } else { - terrno = TSDB_CODE_SUCCESS; - } - goto PARSE_VER_ERROR; - } - - len = (int32_t)fread(content, 1, maxLen, fp); - if (len <= 0) { - vError("vgId:%d, failed to read %s, content is null", pVnode->vgId, file); - goto PARSE_VER_ERROR; - } - - root = cJSON_Parse(content); - if (root == NULL) { - vError("vgId:%d, failed to read %s, invalid json format", pVnode->vgId, file); - goto PARSE_VER_ERROR; - } - - cJSON *ver = cJSON_GetObjectItem(root, "version"); - if (!ver || ver->type != cJSON_Number) { - vError("vgId:%d, failed to read %s, version not found", pVnode->vgId, file); - goto PARSE_VER_ERROR; - } -#if 0 - pVnode->version = (uint64_t)ver->valueint; - - terrno = TSDB_CODE_SUCCESS; - vInfo("vgId:%d, read %s successfully, fver:%" PRIu64, pVnode->vgId, file, pVnode->version); -#endif - -PARSE_VER_ERROR: - if (content != NULL) free(content); - if (root != NULL) cJSON_Delete(root); - if (fp != NULL) fclose(fp); - - return terrno; -} - -int32_t vnodeSaveVersion(SVnode *pVnode) { - char file[TSDB_FILENAME_LEN + 30] = {0}; - sprintf(file, "%s/vnode%d/version.json", tsVnodeDir, pVnode->vgId); - - FILE *fp = fopen(file, "w"); - if (!fp) { - vError("vgId:%d, failed to write %s, reason:%s", pVnode->vgId, file, strerror(errno)); - return -1; - } - - int32_t len = 0; - int32_t maxLen = 100; - char * content = calloc(1, maxLen + 1); - -#if 0 - len += snprintf(content + len, maxLen - len, "{\n"); - len += snprintf(content + len, maxLen - len, " \"version\": %" PRIu64 "\n", pVnode->fversion); - len += snprintf(content + len, maxLen - len, "}\n"); -#endif - fwrite(content, 1, len, fp); - taosFsyncFile(fileno(fp)); - fclose(fp); - free(content); - terrno = 0; - - // vInfo("vgId:%d, successed to write %s, fver:%" PRIu64, pVnode->vgId, file, pVnode->fversion); - return TSDB_CODE_SUCCESS; -} \ No newline at end of file diff --git a/source/server/vnode/src/vnodeWorker.c b/source/server/vnode/src/vnodeWorker.c deleted file mode 100644 index a8b5723ffb326673aece1cb0fb8649016593bb09..0000000000000000000000000000000000000000 --- a/source/server/vnode/src/vnodeWorker.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" - -#include "vnodeWorker.h" - -enum { CLEANUP_TASK = 0, DESTROY_TASK = 1, BACKUP_TASK = 2 }; - -typedef struct { - int32_t vgId; - int32_t code; - int32_t type; - void * rpcHandle; - SVnode *pVnode; -} SVnTask; - -static struct { - SWorkerPool pool; - taos_queue pQueue; -} tsVworker = {0}; - -static void vnodeProcessTaskStart(void *unused, SVnTask *pTask, int32_t qtype) { - pTask->code = 0; - - switch (pTask->type) { - case CLEANUP_TASK: - vnodeCleanUp(pTask->pVnode); - break; - case DESTROY_TASK: - vnodeDestroy(pTask->pVnode); - break; - case BACKUP_TASK: - vnodeBackup(pTask->vgId); - break; - default: - break; - } -} - -static void vnodeProcessTaskEnd(void *unused, SVnTask *pTask, int32_t qtype, int32_t code) { - if (pTask->rpcHandle != NULL) { - SRpcMsg rpcRsp = {.handle = pTask->rpcHandle, .code = pTask->code}; - rpcSendResponse(&rpcRsp); - } - - taosFreeQitem(pTask); -} - -static int32_t vnodeWriteIntoTaskQueue(SVnode *pVnode, int32_t type, void *rpcHandle) { - SVnTask *pTask = taosAllocateQitem(sizeof(SVnTask)); - if (pTask == NULL) return TSDB_CODE_VND_OUT_OF_MEMORY; - - pTask->vgId = pVnode->vgId; - pTask->pVnode = pVnode; - pTask->rpcHandle = rpcHandle; - pTask->type = type; - - - return taosWriteQitem(tsVworker.pQueue, TAOS_QTYPE_RPC, pTask); -} - -void vnodeProcessCleanupTask(SVnode *pVnode) { - vnodeWriteIntoTaskQueue(pVnode, CLEANUP_TASK, NULL); -} - -void vnodeProcessDestroyTask(SVnode *pVnode) { - vnodeWriteIntoTaskQueue(pVnode, DESTROY_TASK, NULL); -} - -void vnodeProcessBackupTask(SVnode *pVnode) { - vnodeWriteIntoTaskQueue(pVnode, BACKUP_TASK, NULL); -} - -int32_t vnodeInitWorker() { - SWorkerPool *pPool = &tsVworker.pool; - pPool->name = "vworker"; - pPool->startFp = (ProcessStartFp)vnodeProcessTaskStart; - pPool->endFp = (ProcessEndFp)vnodeProcessTaskEnd; - pPool->min = 0; - pPool->max = 1; - if (tWorkerInit(pPool) != 0) { - return TSDB_CODE_VND_OUT_OF_MEMORY; - } - - tsVworker.pQueue = tWorkerAllocQueue(pPool, NULL); - - vInfo("vworker is initialized, max worker %d", pPool->max); - return TSDB_CODE_SUCCESS; -} - -void vnodeCleanupWorker() { - tWorkerFreeQueue(&tsVworker.pool, tsVworker.pQueue); - tWorkerCleanup(&tsVworker.pool); - tsVworker.pQueue = NULL; - vInfo("vworker is closed"); -} diff --git a/source/server/vnode/src/vnodeWrite.c b/source/server/vnode/src/vnodeWrite.c index 4c845d96ab502ba0e7dc40a6cecf38c5957ac572..f3258af0bf5ab4c03be211bd4755776b4c5e693a 100644 --- a/source/server/vnode/src/vnodeWrite.c +++ b/source/server/vnode/src/vnodeWrite.c @@ -15,12 +15,7 @@ #define _DEFAULT_SOURCE #include "os.h" -#include "tglobal.h" -#include "tqueue.h" -#include "tworker.h" -#include "taosmsg.h" - -#include "vnodeStatus.h" +#include "vnodeMain.h" #include "vnodeWrite.h" #include "vnodeWriteMsg.h" @@ -68,11 +63,6 @@ static int32_t vnodeWriteToWQueue(SVnode *pVnode, SWalHead *pHead, int32_t qtype return TSDB_CODE_WAL_SIZE_LIMIT; } - if (!vnodeInReadyStatus(pVnode)) { - vError("vgId:%d, failed to write into vwqueue, vstatus is %s", pVnode->vgId, vnodeStatus[pVnode->status]); - return TSDB_CODE_APP_NOT_READY; - } - if (tsVwrite.queuedBytes > tsMaxVnodeQueuedBytes) { vDebug("vgId:%d, too many bytes:%" PRId64 " in vwqueue, flow control", pVnode->vgId, tsVwrite.queuedBytes); return TSDB_CODE_VND_IS_FLOWCTRL; @@ -122,7 +112,7 @@ void vnodeProcessWriteMsg(SRpcMsg *pRpcMsg) { pMsg->vgId = htonl(pMsg->vgId); pMsg->contLen = htonl(pMsg->contLen); - SVnode *pVnode = vnodeAcquireNotClose(pMsg->vgId); + SVnode *pVnode = vnodeAcquire(pMsg->vgId); if (pVnode == NULL) { code = TSDB_CODE_VND_INVALID_VGROUP_ID; } else { diff --git a/source/server/vnode/tq/inc/tqInt.h b/source/server/vnode/tq/inc/tqInt.h index 0896e7afabb4e11666d8522c6a3222c05299e35a..d19e9ec81e3fcc6ceee3c2af43a9c9c91821122b 100644 --- a/source/server/vnode/tq/inc/tqInt.h +++ b/source/server/vnode/tq/inc/tqInt.h @@ -26,13 +26,13 @@ extern "C" { //create persistent storage for meta info such as consuming offset //return value > 0: cgId //return value <= 0: error code -int tqCreateTCGroup(STQ*, const char* topic, int cgId, tqGroupHandle** handle); +//int tqCreateTCGroup(STQ*, const char* topic, int cgId, tqBufferHandle** handle); //create ring buffer in memory and load consuming offset //int tqOpenTCGroup(STQ*, const char* topic, int cgId); //destroy ring buffer and persist consuming offset //int tqCloseTCGroup(STQ*, const char* topic, int cgId); //delete persistent storage for meta info -int tqDropTCGroup(STQ*, const char* topic, int cgId); +//int tqDropTCGroup(STQ*, const char* topic, int cgId); #ifdef __cplusplus } diff --git a/source/server/vnode/tq/src/tq.c b/source/server/vnode/tq/src/tq.c index e0f2fc545e73695dc015dfd7bba4f81740cd85cf..52702057d692ce905dff14ea9be05264f0dc42e3 100644 --- a/source/server/vnode/tq/src/tq.c +++ b/source/server/vnode/tq/src/tq.c @@ -21,65 +21,150 @@ //send to fetch queue // //handle management message +// +static int tqProtoCheck(tmqMsgHead *pMsg) { + return pMsg->protoVer == 0; +} -tqGroupHandle* tqLookupGroupHandle(STQ *pTq, const char* topic, int cgId) { - //look in memory - // - //not found, try to restore from disk - // - //still not found - return NULL; +static int tqAckOneTopic(tqBufferHandle *bhandle, tmqOneAck *pAck, tqQueryMsg** ppQuery) { + //clean old item and move forward + int32_t consumeOffset = pAck->consumeOffset; + int idx = consumeOffset % TQ_BUFFER_SIZE; + ASSERT(bhandle->buffer[idx].content && bhandle->buffer[idx].executor); + tfree(bhandle->buffer[idx].content); + if( 1 /* TODO: need to launch new query */) { + tqQueryMsg* pNewQuery = malloc(sizeof(tqQueryMsg)); + if(pNewQuery == NULL) { + //TODO: memory insufficient + return -1; + } + //TODO: lock executor + pNewQuery->exec->executor = bhandle->buffer[idx].executor; + //TODO: read from wal and assign to src + pNewQuery->exec->src = 0; + pNewQuery->exec->dest = &bhandle->buffer[idx]; + pNewQuery->next = *ppQuery; + *ppQuery = pNewQuery; + } + return 0; +} + +static int tqAck(tqGroupHandle* ghandle, tmqAcks* pAcks) { + int32_t ackNum = pAcks->ackNum; + tmqOneAck *acks = pAcks->acks; + //double ptr for acks and list + int i = 0; + tqListHandle* node = ghandle->head; + int ackCnt = 0; + tqQueryMsg *pQuery = NULL; + while(i < ackNum && node->next) { + if(acks[i].topicId == node->next->bufHandle->topicId) { + ackCnt++; + tqAckOneTopic(node->next->bufHandle, &acks[i], &pQuery); + } else if(acks[i].topicId < node->next->bufHandle->topicId) { + i++; + } else { + node = node->next; + } + } + if(pQuery) { + //post message + } + return ackCnt; } static int tqCommitTCGroup(tqGroupHandle* handle) { - //persist into disk + //persist modification into disk return 0; } -int tqCreateTCGroup(STQ *pTq, const char* topic, int cgId, tqGroupHandle** handle) { +int tqCreateTCGroup(STQ *pTq, int64_t topicId, int64_t cgId, int64_t cId, tqGroupHandle** handle) { + //create in disk return 0; } -int tqOpenTGroup(STQ* pTq, const char* topic, int cgId) { - int code; - tqGroupHandle* handle = tqLookupGroupHandle(pTq, topic, cgId); - if(handle == NULL) { - code = tqCreateTCGroup(pTq, topic, cgId, &handle); - if(code != 0) { - return code; - } - } - ASSERT(handle != NULL); - - //put into STQ - +int tqOpenTCGroup(STQ* pTq, int64_t topicId, int64_t cgId, int64_t cId) { + //look up in disk + //create + //open return 0; } -/*int tqCloseTCGroup(STQ* pTq, const char* topic, int cgId) {*/ - /*tqGroupHandle* handle = tqLookupGroupHandle(pTq, topic, cgId);*/ - /*return tqCommitTCGroup(handle);*/ -/*}*/ +int tqCloseTCGroup(STQ* pTq, int64_t topicId, int64_t cgId, int64_t cId) { + return 0; +} -int tqDropTCGroup(STQ* pTq, const char* topic, int cgId) { +int tqDropTCGroup(STQ* pTq, int64_t topicId, int64_t cgId, int64_t cId) { //delete from disk return 0; } +static int tqFetch(tqGroupHandle* ghandle, void** msg) { + tqListHandle* head = ghandle->head; + tqListHandle* node = head; + int totSize = 0; + //TODO: make it a macro + int sizeLimit = 4 * 1024; + tmqMsgContent* buffer = malloc(sizeLimit); + if(buffer == NULL) { + //TODO:memory insufficient + return -1; + } + //iterate the list to get msgs of all topics + //until all topic iterated or msgs over sizeLimit + while(node->next) { + node = node->next; + tqBufferHandle* bufHandle = node->bufHandle; + int idx = bufHandle->nextConsumeOffset % TQ_BUFFER_SIZE; + if(bufHandle->buffer[idx].content != NULL && + bufHandle->buffer[idx].offset == bufHandle->nextConsumeOffset + ) { + totSize += bufHandle->buffer[idx].size; + if(totSize > sizeLimit) { + void *ptr = realloc(buffer, totSize); + if(ptr == NULL) { + totSize -= bufHandle->buffer[idx].size; + //TODO:memory insufficient + //return msgs already copied + break; + } + } + *((int64_t*)buffer) = bufHandle->topicId; + buffer = POINTER_SHIFT(buffer, sizeof(int64_t)); + *((int64_t*)buffer) = bufHandle->buffer[idx].size; + buffer = POINTER_SHIFT(buffer, sizeof(int64_t)); + memcpy(buffer, bufHandle->buffer[idx].content, bufHandle->buffer[idx].size); + buffer = POINTER_SHIFT(buffer, bufHandle->buffer[idx].size); + if(totSize > sizeLimit) { + break; + } + } + } + if(totSize == 0) { + //no msg + return -1; + } -int tqFetchMsg(tqGroupHandle* handle, void* msg) { - return 0; + return totSize; } -int tqMoveOffsetToNext(tqGroupHandle* handle) { - return 0; + +tqGroupHandle* tqGetGroupHandle(STQ* pTq, int64_t cId) { + return NULL; } +int tqLaunchQuery(tqGroupHandle* ghandle) { + return 0; +} -tqGroupHandle* tqFindGHandleBycId(STQ* pTq, int64_t cId) { - return NULL; +int tqSendLaunchQuery(STQ* pTq, int64_t topicId, int64_t cgId, void* query) { + return 0; } +/*int tqMoveOffsetToNext(tqGroupHandle* ghandle) {*/ + /*return 0;*/ +/*}*/ + int tqPushMsg(STQ* pTq , void* p, int64_t version) { //add reference //judge and launch new query @@ -91,10 +176,121 @@ int tqCommit(STQ* pTq) { return 0; } -int tqHandleConsumeMsg(STQ* pTq, tmqConsumeReq* msg) { - //parse msg and extract topic and cgId - //lookup handle - //confirm message and send to consumer +int tqConsume(STQ* pTq, tmqConsumeReq* pMsg) { + if(!tqProtoCheck((tmqMsgHead *)pMsg)) { + //proto version invalid + return -1; + } + int64_t clientId = pMsg->head.clientId; + tqGroupHandle *ghandle = tqGetGroupHandle(pTq, clientId); + if(ghandle == NULL) { + //client not connect + return -1; + } + if(pMsg->acks.ackNum != 0) { + if(tqAck(ghandle, &pMsg->acks) != 0) { + //ack not success + return -1; + } + } + + tmqConsumeRsp *pRsp = (tmqConsumeRsp*) pMsg; + + if(tqFetch(ghandle, (void**)&pRsp->msgs) < 0) { + //fetch error + return -1; + } + //judge and launch new query + if(tqLaunchQuery(ghandle)) { + //launch query error + return -1; + } + return 0; +} + + +int tqSerializeGroupHandle(tqGroupHandle *gHandle, void** ppBytes, int32_t offset) { + //calculate size + int sz = tqGetGHandleSSize(gHandle); + if(sz <= 0) { + //TODO: err + return -1; + } + void* ptr = realloc(*ppBytes, sz); + if(ptr == NULL) { + free(ppBytes); + //TODO: memory err + return -1; + } + *ppBytes = ptr; + //do serialize + *(int64_t*)ptr = gHandle->cId; + ptr = POINTER_SHIFT(ptr, sizeof(int64_t)); + *(int64_t*)ptr = gHandle->cgId; + ptr = POINTER_SHIFT(ptr, sizeof(int64_t)); + *(int32_t*)ptr = gHandle->topicNum; + if(gHandle->topicNum > 0) { + tqSerializeListHandle(gHandle->head, ppBytes, ptr - *ppBytes); + } + return 0; +} + +int tqSerializeListHandle(tqListHandle *listHandle, void** ppBytes, int32_t offset) { + void* ptr = POINTER_SHIFT(*ppBytes, offset); + tqListHandle *node = listHandle; + while(node->next) { + node = node->next; + offset = tqSerializeBufHandle(node->bufHandle, ppBytes, offset); + } + return offset; +} +int tqSerializeBufHandle(tqBufferHandle *bufHandle, void** ppBytes, int32_t offset) { + void *ptr = POINTER_SHIFT(*ppBytes, offset); + *(int64_t*)ptr = bufHandle->nextConsumeOffset; + ptr = POINTER_SHIFT(ptr, sizeof(int64_t)); + *(int64_t*)ptr = bufHandle->topicId; + ptr = POINTER_SHIFT(ptr, sizeof(int64_t)); + *(int32_t*)ptr = bufHandle->head; + ptr = POINTER_SHIFT(ptr, sizeof(int32_t)); + *(int32_t*)ptr = bufHandle->tail; + ptr = POINTER_SHIFT(ptr, sizeof(int32_t)); + for(int i = 0; i < TQ_BUFFER_SIZE; i++) { + int sz = tqSerializeBufItem(&bufHandle->buffer[i], ppBytes, ptr - *ppBytes); + ptr = POINTER_SHIFT(ptr, sz); + } + return ptr - *ppBytes; +} + +int tqSerializeBufItem(tqBufferItem *bufItem, void** ppBytes, int32_t offset) { + void *ptr = POINTER_SHIFT(*ppBytes, offset); + //TODO: do we need serialize this? + return 0; +} + +int tqDeserializeGroupHandle(const void* pBytes, tqGroupHandle **pGhandle) { + return 0; +} +int tqDeserializeListHandle(const void* pBytes, tqListHandle **pListHandle) { + return 0; +} +int tqDeserializeBufHandle(const void* pBytes, tqBufferHandle **pBufHandle) { + return 0; +} +int tqDeserializeBufItem(const void* pBytes, tqBufferItem **pBufItem) { + return 0; +} + + +int tqGetGHandleSSize(const tqGroupHandle *gHandle) { + return 0; +} +int tqListHandleSSize(const tqListHandle *listHandle) { + return 0; +} +int tqBufHandleSSize(const tqBufferHandle *bufHandle) { + return 0; +} +int tqBufItemSSize(const tqBufferItem *bufItem) { return 0; } diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 68cd067fb970e6e39a7a2c1bf370d860d5514e91..8e5d7a47fd9797d5aba796ab809859238e79dbf0 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -245,11 +245,11 @@ TAOS_DEFINE_ERROR(TSDB_CODE_VND_NO_DISK_PERMISSIONS, "No write permission f TAOS_DEFINE_ERROR(TSDB_CODE_VND_NO_SUCH_FILE_OR_DIR, "Missing data file") TAOS_DEFINE_ERROR(TSDB_CODE_VND_OUT_OF_MEMORY, "Out of memory") TAOS_DEFINE_ERROR(TSDB_CODE_VND_APP_ERROR, "Unexpected generic error in vnode") -TAOS_DEFINE_ERROR(TSDB_CODE_VND_INVALID_VRESION_FILE, "Invalid version file") -TAOS_DEFINE_ERROR(TSDB_CODE_VND_IS_FULL, "Database memory is full for commit failed") -TAOS_DEFINE_ERROR(TSDB_CODE_VND_IS_FLOWCTRL, "Database memory is full for waiting commit") +TAOS_DEFINE_ERROR(TSDB_CODE_VND_INVALID_CFG_FILE, "Invalid config file") +TAOS_DEFINE_ERROR(TSDB_CODE_VND_INVALID_TERM_FILE, "Invalid term file") +TAOS_DEFINE_ERROR(TSDB_CODE_VND_IS_FLOWCTRL, "Database memory is full") TAOS_DEFINE_ERROR(TSDB_CODE_VND_IS_DROPPING, "Database is dropping") -TAOS_DEFINE_ERROR(TSDB_CODE_VND_IS_BALANCING, "Database is balancing") +TAOS_DEFINE_ERROR(TSDB_CODE_VND_IS_UPDATING, "Database is updating") TAOS_DEFINE_ERROR(TSDB_CODE_VND_IS_CLOSING, "Database is closing") TAOS_DEFINE_ERROR(TSDB_CODE_VND_NOT_SYNCED, "Database suspended") TAOS_DEFINE_ERROR(TSDB_CODE_VND_NO_WRITE_AUTH, "Database write operation denied")