提交 60a0da97 编写于 作者: dengyihao's avatar dengyihao

Merge branch 'develop3' into v3

...@@ -672,16 +672,16 @@ typedef struct { ...@@ -672,16 +672,16 @@ typedef struct {
} SDnodeCfg; } SDnodeCfg;
typedef struct { typedef struct {
int32_t dnodeId; int32_t id;
int8_t isMnode; int8_t isMnode;
int8_t reserved; int8_t reserved;
uint16_t dnodePort; uint16_t port;
char dnodeFqdn[TSDB_FQDN_LEN]; char fqdn[TSDB_FQDN_LEN];
} SDnodeEp; } SDnodeEp;
typedef struct { typedef struct {
int32_t dnodeNum; int32_t num;
SDnodeEp dnodeEps[]; SDnodeEp eps[];
} SDnodeEps; } SDnodeEps;
typedef struct { typedef struct {
...@@ -820,9 +820,9 @@ typedef struct { ...@@ -820,9 +820,9 @@ typedef struct {
} SCreateDnodeMsg, SDropDnodeMsg; } SCreateDnodeMsg, SDropDnodeMsg;
typedef struct { typedef struct {
int32_t dnodeId; int32_t dnodeId;
int8_t replica; int8_t replica;
int8_t reserved[3]; int8_t reserved[3];
SReplica replicas[TSDB_MAX_REPLICA]; SReplica replicas[TSDB_MAX_REPLICA];
} SCreateMnodeMsg, SAlterMnodeMsg, SDropMnodeMsg; } SCreateMnodeMsg, SAlterMnodeMsg, SDropMnodeMsg;
......
...@@ -28,10 +28,6 @@ extern char tsSecond[]; ...@@ -28,10 +28,6 @@ extern char tsSecond[];
extern char tsLocalFqdn[]; extern char tsLocalFqdn[];
extern char tsLocalEp[]; extern char tsLocalEp[];
extern uint16_t tsServerPort; extern uint16_t tsServerPort;
extern uint16_t tsDnodeShellPort;
extern uint16_t tsDnodeDnodePort;
extern uint16_t tsSyncPort;
extern uint16_t tsArbitratorPort;
extern int32_t tsStatusInterval; extern int32_t tsStatusInterval;
extern int32_t tsNumOfMnodes; extern int32_t tsNumOfMnodes;
extern int8_t tsEnableVnodeBak; extern int8_t tsEnableVnodeBak;
......
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_DNODE_H_
#define _TD_DNODE_H_
#include "tdef.h"
#ifdef __cplusplus
extern "C" {
#endif
/* ------------------------ TYPES EXPOSED ------------------------ */
typedef struct SDnode SDnode;
typedef struct {
/**
* @brief software version of the program.
*
*/
int32_t sver;
/**
* @brief num of CPU cores.
*
*/
int32_t numOfCores;
/**
* @brief number of threads per CPU core.
*
*/
float numOfThreadsPerCore;
/**
* @brief the proportion of total CPU cores available for query processing.
*
*/
float ratioOfQueryCores;
/**
* @brief max number of connections allowed in dnode.
*
*/
int32_t maxShellConns;
/**
* @brief time interval of heart beat from shell to dnode, seconds.
*
*/
int32_t shellActivityTimer;
/**
* @brief time interval of dnode status reporting to mnode, seconds, for cluster only.
*
*/
int32_t statusInterval;
/**
* @brief first port number for the connection (12 continuous UDP/TCP port number are used).
*
*/
uint16_t serverPort;
/**
* @brief data file's directory.
*
*/
char dataDir[TSDB_FILENAME_LEN];
/**
* @brief local endpoint.
*
*/
char localEp[TSDB_EP_LEN];
/**
* @brieflocal fully qualified domain name (FQDN).
*
*/
char localFqdn[TSDB_FQDN_LEN];
/**
* @brief first fully qualified domain name (FQDN) for TDengine system.
*
*/
char firstEp[TSDB_EP_LEN];
/**
* @brief system time zone.
*
*/
char timezone[TSDB_TIMEZONE_LEN];
/**
* @brief system locale.
*
*/
char locale[TSDB_LOCALE_LEN];
/**
* @briefdefault system charset.
*
*/
char charset[TSDB_LOCALE_LEN];
} SDnodeOpt;
/* ------------------------ SDnode ------------------------ */
/**
* @brief Initialize and start the dnode.
*
* @param pOption Option of the dnode.
* @return SDnode* The dnode object.
*/
SDnode *dndInit(SDnodeOpt *pOption);
/**
* @brief Stop and cleanup the dnode.
*
* @param pDnode The dnode object to close.
*/
void dndCleanup(SDnode *pDnode);
#ifdef __cplusplus
}
#endif
#endif /*_TD_DNODE_H_*/
...@@ -13,63 +13,265 @@ ...@@ -13,63 +13,265 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_MNODE_H_ #ifndef _TD_MND_H_
#define _TD_MNODE_H_ #define _TD_MND_H_
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
typedef enum { MN_MSG_TYPE_WRITE = 1, MN_MSG_TYPE_APPLY, MN_MSG_TYPE_SYNC, MN_MSG_TYPE_READ } EMnMsgType; /* ------------------------ TYPES EXPOSED ------------------------ */
typedef struct SDnode SDnode;
typedef struct SMnode SMnode;
typedef struct SMnodeMsg SMnodeMsg; typedef struct SMnodeMsg SMnodeMsg;
typedef void (*SendMsgToDnodeFp)(SDnode *pDnode, struct SEpSet *epSet, struct SRpcMsg *rpcMsg);
typedef void (*SendMsgToMnodeFp)(SDnode *pDnode, struct SRpcMsg *rpcMsg);
typedef void (*SendRedirectMsgFp)(SDnode *pDnode, struct SRpcMsg *rpcMsg);
typedef int32_t (*PutMsgToMnodeQFp)(SDnode *pDnode, SMnodeMsg *pMsg);
typedef struct { typedef struct SMnodeLoad {
int8_t replica; /**
int8_t selfIndex; * @brief the number of dnodes in cluster.
SReplica replicas[TSDB_MAX_REPLICA]; *
} SMnodeCfg; */
typedef struct {
int64_t numOfDnode; int64_t numOfDnode;
/**
* @brief the number of mnodes in cluster.
*
*/
int64_t numOfMnode; int64_t numOfMnode;
/**
* @brief the number of vgroups in cluster.
*
*/
int64_t numOfVgroup; int64_t numOfVgroup;
/**
* @brief the number of databases in cluster.
*
*/
int64_t numOfDatabase; int64_t numOfDatabase;
/**
* @brief the number of super tables in cluster.
*
*/
int64_t numOfSuperTable; int64_t numOfSuperTable;
/**
* @brief the number of child tables in cluster.
*
*/
int64_t numOfChildTable; int64_t numOfChildTable;
/**
* @brief the number of normal tables in cluster.
*
*/
int64_t numOfNormalTable;
/**
* @brief the number of numOfTimeseries in cluster.
*
*/
int64_t numOfColumn; int64_t numOfColumn;
/**
* @brief total points written in cluster.
*
*/
int64_t totalPoints; int64_t totalPoints;
/**
* @brief total storage in cluster.
*
*/
int64_t totalStorage; int64_t totalStorage;
/**
* @brief total compressed storage in cluster.
*
*/
int64_t compStorage; int64_t compStorage;
} SMnodeLoad; } SMnodeLoad;
typedef struct { typedef struct {
/**
* @brief dnodeId of this mnode.
*
*/
int32_t dnodeId; int32_t dnodeId;
/**
* @brief clusterId of this mnode.
*
*/
int64_t clusterId; int64_t clusterId;
void (*SendMsgToDnode)(struct SEpSet *epSet, struct SRpcMsg *rpcMsg);
void (*SendMsgToMnode)(struct SRpcMsg *rpcMsg);
void (*SendRedirectMsg)(struct SRpcMsg *rpcMsg, bool forShell);
int32_t (*PutMsgIntoApplyQueue)(SMnodeMsg *pMsg);
} SMnodePara;
int32_t mnodeInit(SMnodePara para); /**
void mnodeCleanup(); * @brief replica num of this mnode.
*
*/
int8_t replica;
/**
* @brief self index in the array of replicas.
*
*/
int8_t selfIndex;
/**
* @brief detail replica information of this mnode.
*
*/
SReplica replicas[TSDB_MAX_REPLICA];
/**
* @brief the parent dnode of this mnode.
*
*/
SDnode *pDnode;
/**
* @brief put apply msg to the write queue in dnode.
*
*/
PutMsgToMnodeQFp putMsgToApplyMsgFp;
/**
* @brief the callback function while send msg to dnode.
*
*/
SendMsgToDnodeFp sendMsgToDnodeFp;
/**
* @brief the callback function while send msg to mnode.
*
*/
SendMsgToMnodeFp sendMsgToMnodeFp;
/**
* @brief the callback function while send redirect msg to clients or peers.
*
*/
SendRedirectMsgFp sendRedirectMsgFp;
} SMnodeOpt;
/* ------------------------ SMnode ------------------------ */
/**
* @brief Open a mnode.
*
* @param path Path of the mnode.
* @param pOption Option of the mnode.
* @return SMnode* The mnode object.
*/
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption);
/**
* @brief Close a mnode.
*
* @param pMnode The mnode object to close.
*/
void mndClose(SMnode *pMnode);
/**
* @brief Close a mnode.
*
* @param pMnode The mnode object to close.
* @param pOption Options of the mnode.
* @return int32_t 0 for success, -1 for failure.
*/
int32_t mndAlter(SMnode *pMnode, const SMnodeOpt *pOption);
/**
* @brief Drop a mnode.
*
* @param path Path of the mnode.
*/
void mndDestroy(const char *path);
/**
* @brief Get mnode statistics info.
*
* @param pMnode The mnode object.
* @param pLoad Statistics of the mnode.
* @return int32_t 0 for success, -1 for failure.
*/
int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad);
/**
* @brief Get user authentication info.
*
* @param pMnode The mnode object.
* @param user
* @param spi
* @param encrypt
* @param secret
* @param ckey
* @return int32_t 0 for success, -1 for failure.
*/
int32_t mndRetriveAuth(SMnode *pMnode, char *user, char *spi, char *encrypt, char *secret, char *ckey);
/**
* @brief Initialize mnode msg.
*
* @param pMnode The mnode object.
* @param pMsg The request rpc msg.
* @return int32_t The created mnode msg.
*/
SMnodeMsg *mndInitMsg(SMnode *pMnode, SRpcMsg *pRpcMsg);
/**
* @brief Cleanup mnode msg.
*
* @param pMsg The request msg.
*/
void mndCleanupMsg(SMnodeMsg *pMsg);
int32_t mnodeDeploy(SMnodeCfg *pCfg); /**
void mnodeUnDeploy(); * @brief Cleanup mnode msg.
int32_t mnodeStart(SMnodeCfg *pCfg); *
int32_t mnodeAlter(SMnodeCfg *pCfg); * @param pMsg The request msg.
void mnodeStop(); * @param code The error code.
*/
void mndSendRsp(SMnodeMsg *pMsg, int32_t code);
int32_t mnodeGetLoad(SMnodeLoad *pLoad); /**
int32_t mnodeRetriveAuth(char *user, char *spi, char *encrypt, char *secret, char *ckey); * @brief Process the read request.
*
* @param pMsg The request msg.
* @return int32_t 0 for success, -1 for failure.
*/
void mndProcessReadMsg(SMnodeMsg *pMsg);
SMnodeMsg *mnodeInitMsg(SRpcMsg *pRpcMsg); /**
void mnodeCleanupMsg(SMnodeMsg *pMsg); * @brief Process the write request.
void mnodeProcessMsg(SMnodeMsg *pMsg, EMnMsgType msgType); *
* @param pMsg The request msg.
* @return int32_t 0 for success, -1 for failure.
*/
void mndProcessWriteMsg(SMnodeMsg *pMsg);
/**
* @brief Process the sync request.
*
* @param pMsg The request msg.
* @return int32_t 0 for success, -1 for failure.
*/
void mndProcessSyncMsg(SMnodeMsg *pMsg);
/**
* @brief Process the apply request.
*
* @param pMsg The request msg.
* @return int32_t 0 for success, -1 for failure.
*/
void mndProcessApplyMsg(SMnodeMsg *pMsg);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_MNODE_H_*/ #endif /*_TD_MND_H_*/
...@@ -56,39 +56,39 @@ extern "C" { ...@@ -56,39 +56,39 @@ extern "C" {
dataPos += valLen; \ dataPos += valLen; \
} }
#define SDB_SET_INT64(pData, dataPos, val) \ #define SDB_SET_INT64(pRaw, dataPos, val) \
{ \ { \
if (sdbSetRawInt64(pRaw, dataPos, val) != 0) { \ if (sdbSetRawInt64(pRaw, dataPos, val) != 0) { \
sdbFreeRaw(pRaw); \ sdbFreeRaw(pRaw); \
return NULL; \ return NULL; \
}; \ } \
dataPos += sizeof(int64_t); \ dataPos += sizeof(int64_t); \
} }
#define SDB_SET_INT32(pData, dataPos, val) \ #define SDB_SET_INT32(pRaw, dataPos, val) \
{ \ { \
if (sdbSetRawInt32(pRaw, dataPos, val) != 0) { \ if (sdbSetRawInt32(pRaw, dataPos, val) != 0) { \
sdbFreeRaw(pRaw); \ sdbFreeRaw(pRaw); \
return NULL; \ return NULL; \
}; \ } \
dataPos += sizeof(int32_t); \ dataPos += sizeof(int32_t); \
} }
#define SDB_SET_INT8(pData, dataPos, val) \ #define SDB_SET_INT8(pRaw, dataPos, val) \
{ \ { \
if (sdbSetRawInt8(pRaw, dataPos, val) != 0) { \ if (sdbSetRawInt8(pRaw, dataPos, val) != 0) { \
sdbFreeRaw(pRaw); \ sdbFreeRaw(pRaw); \
return NULL; \ return NULL; \
}; \ } \
dataPos += sizeof(int8_t); \ dataPos += sizeof(int8_t); \
} }
#define SDB_SET_BINARY(pRaw, dataPos, val, valLen) \ #define SDB_SET_BINARY(pRaw, dataPos, val, valLen) \
{ \ { \
if (sdbSetRawBinary(pRaw, dataPos, val, valLen) != 0) { \ if (sdbSetRawBinary(pRaw, dataPos, val, valLen) != 0) { \
sdbFreeRaw(pRaw); \ sdbFreeRaw(pRaw); \
return NULL; \ return NULL; \
}; \ } \
dataPos += valLen; \ dataPos += valLen; \
} }
...@@ -97,7 +97,7 @@ extern "C" { ...@@ -97,7 +97,7 @@ extern "C" {
if (sdbSetRawDataLen(pRaw, dataLen) != 0) { \ if (sdbSetRawDataLen(pRaw, dataLen) != 0) { \
sdbFreeRaw(pRaw); \ sdbFreeRaw(pRaw); \
return NULL; \ return NULL; \
}; \ } \
} }
typedef struct SSdbRaw SSdbRaw; typedef struct SSdbRaw SSdbRaw;
...@@ -114,54 +114,181 @@ typedef enum { ...@@ -114,54 +114,181 @@ typedef enum {
SDB_START = 0, SDB_START = 0,
SDB_TRANS = 1, SDB_TRANS = 1,
SDB_CLUSTER = 2, SDB_CLUSTER = 2,
SDB_DNODE = 3, SDB_MNODE = 3,
SDB_MNODE = 4, SDB_DNODE = 4,
SDB_USER = 5, SDB_USER = 5,
SDB_AUTH = 6, SDB_AUTH = 6,
SDB_ACCT = 7, SDB_ACCT = 7,
SDB_DB = 8,
SDB_VGROUP = 9, SDB_VGROUP = 9,
SDB_STABLE = 10, SDB_STABLE = 9,
SDB_DB = 10,
SDB_FUNC = 11, SDB_FUNC = 11,
SDB_MAX = 12 SDB_MAX = 12
} ESdbType; } ESdbType;
typedef int32_t (*SdbInsertFp)(void *pObj); typedef struct SSdb SSdb;
typedef int32_t (*SdbUpdateFp)(void *pSrcObj, void *pDstObj); typedef int32_t (*SdbInsertFp)(SSdb *pSdb, void *pObj);
typedef int32_t (*SdbDeleteFp)(void *pObj); typedef int32_t (*SdbUpdateFp)(SSdb *pSdb, void *pSrcObj, void *pDstObj);
typedef int32_t (*SdbDeployFp)(); typedef int32_t (*SdbDeleteFp)(SSdb *pSdb, void *pObj);
typedef int32_t (*SdbDeployFp)(SSdb *pSdb);
typedef SSdbRow *(*SdbDecodeFp)(SSdbRaw *pRaw); typedef SSdbRow *(*SdbDecodeFp)(SSdbRaw *pRaw);
typedef SSdbRaw *(*SdbEncodeFp)(void *pObj); typedef SSdbRaw *(*SdbEncodeFp)(void *pObj);
typedef struct { typedef struct {
ESdbType sdbType; /**
EKeyType keyType; * @brief The sdb type of the table.
*
*/
ESdbType sdbType;
/**
* @brief The key type of the table.
*
*/
EKeyType keyType;
/**
* @brief The callback function when the table is first deployed.
*
*/
SdbDeployFp deployFp; SdbDeployFp deployFp;
/**
* @brief Encode one row of the table into rawdata.
*
*/
SdbEncodeFp encodeFp; SdbEncodeFp encodeFp;
/**
* @brief Decode one row of the table from rawdata.
*
*/
SdbDecodeFp decodeFp; SdbDecodeFp decodeFp;
/**
* @brief The callback function when insert a row to sdb.
*
*/
SdbInsertFp insertFp; SdbInsertFp insertFp;
/**
* @brief The callback function when undate a row in sdb.
*
*/
SdbUpdateFp updateFp; SdbUpdateFp updateFp;
/**
* @brief The callback function when delete a row from sdb.
*
*/
SdbDeleteFp deleteFp; SdbDeleteFp deleteFp;
} SSdbTable; } SSdbTable;
int32_t sdbInit(); typedef struct SSdbOpt {
void sdbCleanup(); /**
void sdbSetTable(SSdbTable table); * @brief The path of the sdb file.
*
*/
const char *path;
} SSdbOpt;
int32_t sdbOpen(); /**
void sdbClose(); * @brief Initialize and start the sdb.
int32_t sdbWrite(SSdbRaw *pRaw); *
* @param pOption Option of the sdb.
* @return SSdb* The sdb object.
*/
SSdb *sdbInit(SSdbOpt *pOption);
/**
* @brief Stop and cleanup the sdb.
*
* @param pSdb The sdb object to close.
*/
void sdbCleanup(SSdb *pSdb);
int32_t sdbDeploy(); /**
void sdbUnDeploy(); * @brief Set the properties of sdb table.
*
* @param pSdb The sdb object.
* @param table The properties of the table.
* @return int32_t 0 for success, -1 for failure.
*/
int32_t sdbSetTable(SSdb *pSdb, SSdbTable table);
/**
* @brief Set the initial rows of sdb.
*
* @param pSdb The sdb object.
* @return int32_t 0 for success, -1 for failure.
*/
int32_t sdbDeploy(SSdb *pSdb);
void *sdbAcquire(ESdbType sdb, void *pKey); /**
void sdbRelease(void *pObj); * @brief Load sdb from file.
void *sdbFetch(ESdbType sdb, void *pIter, void **ppObj); *
void sdbCancelFetch(void *pIter); * @param pSdb The sdb object.
int32_t sdbGetSize(ESdbType sdb); * @return int32_t 0 for success, -1 for failure.
*/
int32_t sdbReadFile(SSdb *pSdb);
/**
* @brief Parse and write raw data to sdb.
*
* @param pSdb The sdb object.
* @param pRaw The raw data.
* @return int32_t 0 for success, -1 for failure.
*/
int32_t sdbWrite(SSdb *pSdb, SSdbRaw *pRaw);
/**
* @brief Acquire a row from sdb
*
* @param pSdb The sdb object.
* @param type The type of the row.
* @param pKey The key value of the row.
* @return void* The object of the row.
*/
void *sdbAcquire(SSdb *pSdb, ESdbType type, void *pKey);
/**
* @brief Release a row from sdb.
*
* @param pSdb The sdb object.
* @param pObj The object of the row.
*/
void sdbRelease(SSdb *pSdb, void *pObj);
/**
* @brief Traverse a sdb table
*
* @param pSdb The sdb object.
* @param type The type of the table.
* @param type The initial iterator of the table.
* @param pObj The object of the row just fetched.
* @return void* The next iterator of the table.
*/
void *sdbFetch(SSdb *pSdb, ESdbType type, void *pIter, void **ppObj);
/**
* @brief Cancel a traversal
*
* @param pSdb The sdb object.
* @param pIter The iterator of the table.
* @param type The initial iterator of table.
*/
void sdbCancelFetch(SSdb *pSdb, void *pIter);
/**
* @brief Get the number of rows in the table
*
* @param pSdb The sdb object.
* @param pIter The type of the table.
* @record int32_t The number of rows in the table
*/
int32_t sdbGetSize(SSdb *pSdb, ESdbType type);
SSdbRaw *sdbAllocRaw(ESdbType sdb, int8_t sver, int32_t dataLen); SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen);
void sdbFreeRaw(SSdbRaw *pRaw); void sdbFreeRaw(SSdbRaw *pRaw);
int32_t sdbSetRawInt8(SSdbRaw *pRaw, int32_t dataPos, int8_t val); int32_t sdbSetRawInt8(SSdbRaw *pRaw, int32_t dataPos, int8_t val);
int32_t sdbSetRawInt32(SSdbRaw *pRaw, int32_t dataPos, int32_t val); int32_t sdbSetRawInt32(SSdbRaw *pRaw, int32_t dataPos, int32_t val);
......
...@@ -109,11 +109,10 @@ typedef struct TqTopicVhandle { ...@@ -109,11 +109,10 @@ typedef struct TqTopicVhandle {
#define TQ_BUFFER_SIZE 8 #define TQ_BUFFER_SIZE 8
// TODO: define a serializer and deserializer
typedef struct TqBufferItem { typedef struct TqBufferItem {
int64_t offset; int64_t offset;
// executors are identical but not concurrent // executors are identical but not concurrent
// so it must be a copy in each item // so there must be a copy in each item
void* executor; void* executor;
int64_t size; int64_t size;
void* content; void* content;
...@@ -156,23 +155,111 @@ typedef struct TqQueryMsg { ...@@ -156,23 +155,111 @@ typedef struct TqQueryMsg {
typedef struct TqLogReader { typedef struct TqLogReader {
void* logHandle; void* logHandle;
int32_t (*walRead)(void* logHandle, void** data, int64_t ver); int32_t (*logRead)(void* logHandle, void** data, int64_t ver);
int64_t (*walGetFirstVer)(void* logHandle); int64_t (*logGetFirstVer)(void* logHandle);
int64_t (*walGetSnapshotVer)(void* logHandle); int64_t (*logGetSnapshotVer)(void* logHandle);
int64_t (*walGetLastVer)(void* logHandle); int64_t (*logGetLastVer)(void* logHandle);
} TqLogReader; } TqLogReader;
typedef struct TqConfig { typedef struct TqConfig {
// TODO // TODO
} TqConfig; } TqConfig;
typedef struct TqMemRef {
SMemAllocatorFactory *pAlloctorFactory;
SMemAllocator *pAllocator;
} TqMemRef;
typedef struct TqSerializedHead {
int16_t ver;
int16_t action;
int32_t checksum;
int64_t ssize;
char content[];
} TqSerializedHead;
typedef int (*TqSerializeFun)(const void* pObj, TqSerializedHead** ppHead);
typedef const void* (*TqDeserializeFun)(const TqSerializedHead* pHead, void** ppObj);
typedef void (*TqDeleteFun)(void*);
#define TQ_BUCKET_MASK 0xFF
#define TQ_BUCKET_SIZE 256
#define TQ_PAGE_SIZE 4096
//key + offset + size
#define TQ_IDX_SIZE 24
//4096 / 24
#define TQ_MAX_IDX_ONE_PAGE 170
//24 * 170
#define TQ_IDX_PAGE_BODY_SIZE 4080
//4096 - 4080
#define TQ_IDX_PAGE_HEAD_SIZE 16
#define TQ_ACTION_CONST 0
#define TQ_ACTION_INUSE 1
#define TQ_ACTION_INUSE_CONT 2
#define TQ_ACTION_INTXN 3
#define TQ_SVER 0
//TODO: inplace mode is not implemented
#define TQ_UPDATE_INPLACE 0
#define TQ_UPDATE_APPEND 1
#define TQ_DUP_INTXN_REWRITE 0
#define TQ_DUP_INTXN_REJECT 2
static inline bool TqUpdateAppend(int32_t tqConfigFlag) {
return tqConfigFlag & TQ_UPDATE_APPEND;
}
static inline bool TqDupIntxnReject(int32_t tqConfigFlag) {
return tqConfigFlag & TQ_DUP_INTXN_REJECT;
}
static const int8_t TQ_CONST_DELETE = TQ_ACTION_CONST;
#define TQ_DELETE_TOKEN (void*)&TQ_CONST_DELETE
typedef struct TqMetaHandle {
int64_t key;
int64_t offset;
int64_t serializedSize;
void* valueInUse;
void* valueInTxn;
} TqMetaHandle;
typedef struct TqMetaList {
TqMetaHandle handle;
struct TqMetaList* next;
//struct TqMetaList* inTxnPrev;
//struct TqMetaList* inTxnNext;
struct TqMetaList* unpersistPrev;
struct TqMetaList* unpersistNext;
} TqMetaList;
typedef struct TqMetaStore {
TqMetaList* bucket[TQ_BUCKET_SIZE];
//a table head
TqMetaList* unpersistHead;
//TODO:temporaral use, to be replaced by unified tfile
int fileFd;
//TODO:temporaral use, to be replaced by unified tfile
int idxFd;
char* dirPath;
int32_t tqConfigFlag;
TqSerializeFun pSerializer;
TqDeserializeFun pDeserializer;
TqDeleteFun pDeleter;
} TqMetaStore;
typedef struct STQ { typedef struct STQ {
// the collection of group handle // the collection of group handle
// the handle of kvstore // the handle of kvstore
const char* path; char* path;
TqConfig* tqConfig; TqConfig* tqConfig;
TqLogReader* tqLogReader; TqLogReader* tqLogReader;
SMemAllocatorFactory* allocFac; TqMemRef tqMemRef;
TqMetaStore* tqMeta;
} STQ; } STQ;
// open in each vnode // open in each vnode
...@@ -187,7 +274,7 @@ int tqConsume(STQ*, TmqConsumeReq*); ...@@ -187,7 +274,7 @@ int tqConsume(STQ*, TmqConsumeReq*);
TqGroupHandle* tqGetGroupHandle(STQ*, int64_t cId); TqGroupHandle* tqGetGroupHandle(STQ*, int64_t cId);
int tqOpenTCGroup(STQ*, int64_t topicId, int64_t cgId, int64_t cId); TqGroupHandle* tqOpenTCGroup(STQ*, int64_t topicId, int64_t cgId, int64_t cId);
int tqCloseTCGroup(STQ*, int64_t topicId, int64_t cgId, int64_t cId); int tqCloseTCGroup(STQ*, int64_t topicId, int64_t cgId, int64_t cId);
int tqMoveOffsetToNext(TqGroupHandle*); int tqMoveOffsetToNext(TqGroupHandle*);
int tqResetOffset(STQ*, int64_t topicId, int64_t cgId, int64_t offset); int tqResetOffset(STQ*, int64_t topicId, int64_t cgId, int64_t offset);
...@@ -195,18 +282,9 @@ int tqRegisterContext(TqGroupHandle*, void* ahandle); ...@@ -195,18 +282,9 @@ int tqRegisterContext(TqGroupHandle*, void* ahandle);
int tqLaunchQuery(TqGroupHandle*); int tqLaunchQuery(TqGroupHandle*);
int tqSendLaunchQuery(TqGroupHandle*); int tqSendLaunchQuery(TqGroupHandle*);
int tqSerializeGroupHandle(TqGroupHandle* gHandle, void** ppBytes); int tqSerializeGroupHandle(const TqGroupHandle* gHandle, TqSerializedHead** ppHead);
void* tqSerializeListHandle(TqListHandle* listHandle, void* ptr);
void* tqSerializeBufHandle(TqBufferHandle* bufHandle, void* ptr);
void* tqSerializeBufItem(TqBufferItem* bufItem, void* ptr);
const void* tqDeserializeGroupHandle(const void* pBytes, TqGroupHandle* ghandle);
const void* tqDeserializeBufHandle(const void* pBytes, TqBufferHandle* bufHandle);
const void* tqDeserializeBufItem(const void* pBytes, TqBufferItem* bufItem);
int tqGetGHandleSSize(const TqGroupHandle* gHandle); const void* tqDeserializeGroupHandle(const TqSerializedHead* pHead, TqGroupHandle** gHandle);
int tqBufHandleSSize();
int tqBufItemSSize();
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -184,21 +184,9 @@ typedef struct { ...@@ -184,21 +184,9 @@ typedef struct {
SRpcMsg rpcMsg[]; SRpcMsg rpcMsg[];
} SVnodeMsg; } SVnodeMsg;
typedef struct {
void (*SendMsgToDnode)(SEpSet *pEpSet, SRpcMsg *pMsg);
void (*SendMsgToMnode)(SRpcMsg *pMsg);
int32_t (*PutMsgIntoApplyQueue)(int32_t vgId, SVnodeMsg *pMsg);
} SVnodePara;
int32_t vnodeInit(SVnodePara);
void vnodeCleanup();
int32_t vnodeAlter(SVnode *pVnode, const SVnodeCfg *pCfg); int32_t vnodeAlter(SVnode *pVnode, const SVnodeCfg *pCfg);
SVnode *vnodeCreate(int32_t vgId, const char *path, const SVnodeCfg *pCfg);
void vnodeDrop(SVnode *pVnode);
int32_t vnodeCompact(SVnode *pVnode); int32_t vnodeCompact(SVnode *pVnode);
int32_t vnodeSync(SVnode *pVnode); int32_t vnodeSync(SVnode *pVnode);
int32_t vnodeGetLoad(SVnode *pVnode, SVnodeLoad *pLoad); int32_t vnodeGetLoad(SVnode *pVnode, SVnodeLoad *pLoad);
SVnodeMsg *vnodeInitMsg(int32_t msgNum); SVnodeMsg *vnodeInitMsg(int32_t msgNum);
......
...@@ -51,7 +51,7 @@ typedef struct SRpcMsg { ...@@ -51,7 +51,7 @@ typedef struct SRpcMsg {
} SRpcMsg; } SRpcMsg;
typedef struct SRpcInit { typedef struct SRpcInit {
uint16_t localPort; // local port uint16_t localPort; // local port
char *label; // for debug purpose char *label; // for debug purpose
int numOfThreads; // number of threads to handle connections int numOfThreads; // number of threads to handle connections
int sessions; // number of sessions allowed int sessions; // number of sessions allowed
...@@ -66,10 +66,12 @@ typedef struct SRpcInit { ...@@ -66,10 +66,12 @@ typedef struct SRpcInit {
char *ckey; // ciphering key char *ckey; // ciphering key
// call back to process incoming msg, code shall be ignored by server app // call back to process incoming msg, code shall be ignored by server app
void (*cfp)(SRpcMsg *, SEpSet *); void (*cfp)(void *parent, SRpcMsg *, SEpSet *);
// call back to retrieve the client auth info, for server app only // call back to retrieve the client auth info, for server app only
int (*afp)(char *tableId, char *spi, char *encrypt, char *secret, char *ckey); int (*afp)(void *parent, char *tableId, char *spi, char *encrypt, char *secret, char *ckey);
void *parent;
} SRpcInit; } SRpcInit;
int32_t rpcInit(); int32_t rpcInit();
......
...@@ -16,11 +16,21 @@ ...@@ -16,11 +16,21 @@
#define _TD_WAL_H_ #define _TD_WAL_H_
#include "os.h" #include "os.h"
#include "tdef.h"
#include "tlog.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
extern int32_t wDebugFlag;
#define wFatal(...) { if (wDebugFlag & DEBUG_FATAL) { taosPrintLog("WAL FATAL ", 255, __VA_ARGS__); }}
#define wError(...) { if (wDebugFlag & DEBUG_ERROR) { taosPrintLog("WAL ERROR ", 255, __VA_ARGS__); }}
#define wWarn(...) { if (wDebugFlag & DEBUG_WARN) { taosPrintLog("WAL WARN ", 255, __VA_ARGS__); }}
#define wInfo(...) { if (wDebugFlag & DEBUG_INFO) { taosPrintLog("WAL ", 255, __VA_ARGS__); }}
#define wDebug(...) { if (wDebugFlag & DEBUG_DEBUG) { taosPrintLog("WAL ", wDebugFlag, __VA_ARGS__); }}
#define wTrace(...) { if (wDebugFlag & DEBUG_TRACE) { taosPrintLog("WAL ", wDebugFlag, __VA_ARGS__); }}
typedef enum { typedef enum {
TAOS_WAL_NOLOG = 0, TAOS_WAL_NOLOG = 0,
TAOS_WAL_WRITE = 1, TAOS_WAL_WRITE = 1,
...@@ -28,9 +38,8 @@ typedef enum { ...@@ -28,9 +38,8 @@ typedef enum {
} EWalType; } EWalType;
typedef struct { typedef struct {
int8_t msgType; int8_t sver;
int8_t sver; // sver 2 for WAL SDataRow/SMemRow compatibility int8_t reserved[3];
int8_t reserved[2];
int32_t len; int32_t len;
int64_t version; int64_t version;
uint32_t signature; uint32_t signature;
...@@ -44,21 +53,63 @@ typedef struct { ...@@ -44,21 +53,63 @@ typedef struct {
EWalType walLevel; // wal level EWalType walLevel; // wal level
} SWalCfg; } SWalCfg;
struct SWal; #define WAL_PREFIX "wal"
typedef struct SWal SWal; // WAL HANDLE #define WAL_LOG_SUFFIX "log"
typedef int32_t (*FWalWrite)(void *ahandle, void *pHead, int32_t qtype, void *pMsg); #define WAL_INDEX_SUFFIX "idx"
#define WAL_PREFIX_LEN 3
#define WAL_REFRESH_MS 1000
#define WAL_MAX_SIZE (TSDB_MAX_WAL_SIZE + sizeof(SWalHead) + 16)
#define WAL_SIGNATURE ((uint32_t)(0xFAFBFDFEUL))
#define WAL_PATH_LEN (TSDB_FILENAME_LEN + 12)
#define WAL_FILE_LEN (WAL_PATH_LEN + 32)
//#define WAL_FILE_NUM 1 // 3
#define WAL_CUR_POS_READ_ONLY 1
#define WAL_CUR_FILE_READ_ONLY 2
typedef struct SWal {
// cfg
int32_t vgId;
int32_t fsyncPeriod; // millisecond
EWalType level;
//reference
int64_t refId;
//current tfd
int64_t curLogTfd;
int64_t curIdxTfd;
//current version
int64_t curVersion;
int64_t curOffset;
//current file version
int64_t curFileFirstVersion;
int64_t curFileLastVersion;
//wal fileset version
int64_t firstVersion;
int64_t snapshotVersion;
int64_t lastVersion;
//fsync status
int32_t fsyncSeq;
//ctl
int32_t curStatus;
pthread_mutex_t mutex;
//path
char path[WAL_PATH_LEN];
} SWal; // WAL HANDLE
typedef int32_t (*FWalWrite)(void *ahandle, void *pHead);
// module initialization // module initialization
int32_t walInit(); int32_t walInit();
void walCleanUp(); void walCleanUp();
// handle open and ctl // handle open and ctl
SWal *walOpen(char *path, SWalCfg *pCfg); SWal *walOpen(const char *path, SWalCfg *pCfg);
void walStop(SWal *pWal);
int32_t walAlter(SWal *, SWalCfg *pCfg); int32_t walAlter(SWal *, SWalCfg *pCfg);
void walClose(SWal *); void walClose(SWal *);
// write // write
// int64_t walWriteWithMsgType(SWal*, int8_t msgType, void* body, int32_t bodyLen); //int64_t walWriteWithMsgType(SWal*, int8_t msgType, void* body, int32_t bodyLen);
int64_t walWrite(SWal *, int64_t index, void *body, int32_t bodyLen); int64_t walWrite(SWal *, int64_t index, void *body, int32_t bodyLen);
int64_t walWriteBatch(SWal *, void **bodies, int32_t *bodyLen, int32_t batchSize); int64_t walWriteBatch(SWal *, void **bodies, int32_t *bodyLen, int32_t batchSize);
...@@ -68,7 +119,8 @@ int32_t walCommit(SWal *, int64_t ver); ...@@ -68,7 +119,8 @@ int32_t walCommit(SWal *, int64_t ver);
// truncate after // truncate after
int32_t walRollback(SWal *, int64_t ver); int32_t walRollback(SWal *, int64_t ver);
// notify that previous log can be pruned safely // notify that previous log can be pruned safely
int32_t walPrune(SWal *, int64_t ver); int32_t walTakeSnapshot(SWal *, int64_t ver);
//int32_t walDataCorrupted(SWal*);
// read // read
int32_t walRead(SWal *, SWalHead **, int64_t ver); int32_t walRead(SWal *, SWalHead **, int64_t ver);
...@@ -78,7 +130,11 @@ int32_t walReadWithFp(SWal *, FWalWrite writeFp, int64_t verStart, int32_t readN ...@@ -78,7 +130,11 @@ int32_t walReadWithFp(SWal *, FWalWrite writeFp, int64_t verStart, int32_t readN
int64_t walGetFirstVer(SWal *); int64_t walGetFirstVer(SWal *);
int64_t walGetSnapshotVer(SWal *); int64_t walGetSnapshotVer(SWal *);
int64_t walGetLastVer(SWal *); int64_t walGetLastVer(SWal *);
// int32_t walDataCorrupted(SWal*);
//internal
int32_t walGetNextFile(SWal *pWal, int64_t *nextFileId);
int32_t walGetOldFile(SWal *pWal, int64_t curFileId, int32_t minDiff, int64_t *oldFileId);
int32_t walGetNewFile(SWal *pWal, int64_t *newFileId);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -46,6 +46,7 @@ extern "C" { ...@@ -46,6 +46,7 @@ extern "C" {
#include <math.h> #include <math.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#include <dirent.h>
#include <unistd.h> #include <unistd.h>
#include "osAtomic.h" #include "osAtomic.h"
......
...@@ -20,12 +20,12 @@ ...@@ -20,12 +20,12 @@
extern "C" { extern "C" {
#endif #endif
void taosRemoveDir(const char *dirname); void taosRemoveDir(const char *dirname);
bool taosDirExist(char *dirname); int32_t taosDirExist(char *dirname);
bool taosMkDir(const char *dirname); int32_t taosMkDir(const char *dirname);
void taosRemoveOldFiles(char *dirname, int32_t keepDays); void taosRemoveOldFiles(char *dirname, int32_t keepDays);
bool taosExpandDir(char *dirname, char *outname, int32_t maxlen); int32_t taosExpandDir(char *dirname, char *outname, int32_t maxlen);
bool taosRealPath(char *dirname, int32_t maxlen); int32_t taosRealPath(char *dirname, int32_t maxlen);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -68,12 +68,13 @@ int32_t* taosGetErrno(); ...@@ -68,12 +68,13 @@ int32_t* taosGetErrno();
#define TSDB_CODE_FILE_CORRUPTED TAOS_DEF_ERROR_CODE(0, 0x0106) #define TSDB_CODE_FILE_CORRUPTED TAOS_DEF_ERROR_CODE(0, 0x0106)
#define TSDB_CODE_CHECKSUM_ERROR TAOS_DEF_ERROR_CODE(0, 0x0107) #define TSDB_CODE_CHECKSUM_ERROR TAOS_DEF_ERROR_CODE(0, 0x0107)
#define TSDB_CODE_INVALID_MSG TAOS_DEF_ERROR_CODE(0, 0x0108) #define TSDB_CODE_INVALID_MSG TAOS_DEF_ERROR_CODE(0, 0x0108)
#define TSDB_CODE_REF_NO_MEMORY TAOS_DEF_ERROR_CODE(0, 0x0109) #define TSDB_CODE_MSG_NOT_PROCESSED TAOS_DEF_ERROR_CODE(0, 0x0109)
#define TSDB_CODE_REF_FULL TAOS_DEF_ERROR_CODE(0, 0x010A) #define TSDB_CODE_REF_NO_MEMORY TAOS_DEF_ERROR_CODE(0, 0x0110)
#define TSDB_CODE_REF_ID_REMOVED TAOS_DEF_ERROR_CODE(0, 0x010B) #define TSDB_CODE_REF_FULL TAOS_DEF_ERROR_CODE(0, 0x0111)
#define TSDB_CODE_REF_INVALID_ID TAOS_DEF_ERROR_CODE(0, 0x010C) #define TSDB_CODE_REF_ID_REMOVED TAOS_DEF_ERROR_CODE(0, 0x0112)
#define TSDB_CODE_REF_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x010D) #define TSDB_CODE_REF_INVALID_ID TAOS_DEF_ERROR_CODE(0, 0x0113)
#define TSDB_CODE_REF_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x010E) #define TSDB_CODE_REF_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x0114)
#define TSDB_CODE_REF_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x0115)
//client //client
#define TSDB_CODE_TSC_INVALID_OPERATION TAOS_DEF_ERROR_CODE(0, 0x0200) //"Invalid Operation") #define TSDB_CODE_TSC_INVALID_OPERATION TAOS_DEF_ERROR_CODE(0, 0x0200) //"Invalid Operation")
...@@ -223,20 +224,20 @@ int32_t* taosGetErrno(); ...@@ -223,20 +224,20 @@ int32_t* taosGetErrno();
#define TSDB_CODE_MND_TOPIC_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x0395) //"Topic already exists) #define TSDB_CODE_MND_TOPIC_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x0395) //"Topic already exists)
// dnode // dnode
#define TSDB_CODE_DND_MSG_NOT_PROCESSED TAOS_DEF_ERROR_CODE(0, 0x0400) //"Message not processed") #define TSDB_CODE_DND_ACTION_IN_PROGRESS TAOS_DEF_ERROR_CODE(0, 0x0400)
#define TSDB_CODE_DND_OUT_OF_MEMORY TAOS_DEF_ERROR_CODE(0, 0x0401) //"Dnode out of memory") #define TSDB_CODE_DND_EXITING TAOS_DEF_ERROR_CODE(0, 0x0401)
#define TSDB_CODE_DND_MNODE_ID_NOT_MATCH_DNODE TAOS_DEF_ERROR_CODE(0, 0x0402) //"Mnode Id not match Dnode") #define TSDB_CODE_DND_INVALID_MSG_LEN TAOS_DEF_ERROR_CODE(0, 0x0402)
#define TSDB_CODE_DND_MNODE_ALREADY_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0403) //"Mnode already deployed") #define TSDB_CODE_DND_DNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0410)
#define TSDB_CODE_DND_MNODE_NOT_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0404) //"Mnode not deployed") #define TSDB_CODE_DND_DNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0411)
#define TSDB_CODE_DND_READ_MNODE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0405) //"Read mnode.json error") #define TSDB_CODE_DND_MNODE_ALREADY_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0420)
#define TSDB_CODE_DND_WRITE_MNODE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0406) //"Write mnode.json error") #define TSDB_CODE_DND_MNODE_NOT_DEPLOYED TAOS_DEF_ERROR_CODE(0, 0x0421)
#define TSDB_CODE_DND_NO_WRITE_ACCESS TAOS_DEF_ERROR_CODE(0, 0x0407) //"No permission for disk files in dnode") #define TSDB_CODE_DND_MNODE_ID_INVALID TAOS_DEF_ERROR_CODE(0, 0x0422)
#define TSDB_CODE_DND_INVALID_MSG_LEN TAOS_DEF_ERROR_CODE(0, 0x0408) //"Invalid message length") #define TSDB_CODE_DND_MNODE_ID_NOT_FOUND TAOS_DEF_ERROR_CODE(0, 0x0423)
#define TSDB_CODE_DND_ACTION_IN_PROGRESS TAOS_DEF_ERROR_CODE(0, 0x0409) //"Action in progress") #define TSDB_CODE_DND_MNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0424)
#define TSDB_CODE_DND_TOO_MANY_VNODES TAOS_DEF_ERROR_CODE(0, 0x040A) //"Too many vnode directories") #define TSDB_CODE_DND_MNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0425)
#define TSDB_CODE_DND_EXITING TAOS_DEF_ERROR_CODE(0, 0x040B) //"Dnode is exiting" #define TSDB_CODE_DND_VNODE_TOO_MANY_VNODES TAOS_DEF_ERROR_CODE(0, 0x0430)
#define TSDB_CODE_DND_PARSE_VNODE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x040C) //"Parse vnodes.json error") #define TSDB_CODE_DND_VNODE_READ_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0431)
#define TSDB_CODE_DND_PARSE_DNODE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x040D) //"Parse dnodes.json error") #define TSDB_CODE_DND_VNODE_WRITE_FILE_ERROR TAOS_DEF_ERROR_CODE(0, 0x0432)
// vnode // vnode
#define TSDB_CODE_VND_ACTION_IN_PROGRESS TAOS_DEF_ERROR_CODE(0, 0x0500) //"Action in progress") #define TSDB_CODE_VND_ACTION_IN_PROGRESS TAOS_DEF_ERROR_CODE(0, 0x0500) //"Action in progress")
......
...@@ -82,10 +82,10 @@ extern SGlobalCfg tsGlobalConfig[]; ...@@ -82,10 +82,10 @@ extern SGlobalCfg tsGlobalConfig[];
extern int32_t tsGlobalConfigNum; extern int32_t tsGlobalConfigNum;
extern char * tsCfgStatusStr[]; extern char * tsCfgStatusStr[];
void taosReadGlobalLogCfg(); void taosReadGlobalLogCfg();
bool taosReadGlobalCfg(); int32_t taosReadGlobalCfg();
void taosPrintGlobalCfg(); void taosPrintGlobalCfg();
void taosDumpGlobalCfg(); void taosDumpGlobalCfg();
void taosInitConfigOption(SGlobalCfg cfg); void taosInitConfigOption(SGlobalCfg cfg);
SGlobalCfg *taosGetConfigOption(const char *option); SGlobalCfg *taosGetConfigOption(const char *option);
......
...@@ -358,12 +358,6 @@ do { \ ...@@ -358,12 +358,6 @@ do { \
#define TSDB_DEFAULT_STABLES_HASH_SIZE 100 #define TSDB_DEFAULT_STABLES_HASH_SIZE 100
#define TSDB_DEFAULT_CTABLES_HASH_SIZE 20000 #define TSDB_DEFAULT_CTABLES_HASH_SIZE 20000
#define TSDB_PORT_DNODESHELL 0
#define TSDB_PORT_DNODEDNODE 5
#define TSDB_PORT_SYNC 10
#define TSDB_PORT_HTTP 11
#define TSDB_PORT_ARBITRATOR 12
#define TSDB_MAX_WAL_SIZE (1024*1024*3) #define TSDB_MAX_WAL_SIZE (1024*1024*3)
#define TSDB_ARB_DUMMY_TIME 4765104000000 // 2121-01-01 00:00:00.000, :P #define TSDB_ARB_DUMMY_TIME 4765104000000 // 2121-01-01 00:00:00.000, :P
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#ifndef _TD_UTIL_LOG_H #ifndef _TD_UTIL_LOG_H
#define _TD_UTIL_LOG_H #define _TD_UTIL_LOG_H
#include "os.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
......
...@@ -38,9 +38,9 @@ extern SNoteObj tsHttpNote; ...@@ -38,9 +38,9 @@ extern SNoteObj tsHttpNote;
extern SNoteObj tsTscNote; extern SNoteObj tsTscNote;
extern SNoteObj tsInfoNote; extern SNoteObj tsInfoNote;
void taosInitNotes(); int32_t taosInitNotes();
void taosNotePrint(SNoteObj* pNote, const char* const format, ...); void taosNotePrint(SNoteObj* pNote, const char* const format, ...);
void taosNotePrintBuffer(SNoteObj *pNote, char *buffer, int32_t len); void taosNotePrintBuffer(SNoteObj* pNote, char* buffer, int32_t len);
#define nPrintHttp(...) \ #define nPrintHttp(...) \
if (tsHttpEnableRecordSql) { \ if (tsHttpEnableRecordSql) { \
...@@ -53,7 +53,7 @@ void taosNotePrintBuffer(SNoteObj *pNote, char *buffer, int32_t len); ...@@ -53,7 +53,7 @@ void taosNotePrintBuffer(SNoteObj *pNote, char *buffer, int32_t len);
} }
#define nInfo(buffer, len) \ #define nInfo(buffer, len) \
if (tscEmbedded == 1) { \ if (tscEmbeddedInUtil == 1) { \
taosNotePrintBuffer(&tsInfoNote, buffer, len); \ taosNotePrintBuffer(&tsInfoNote, buffer, len); \
} }
......
...@@ -22,10 +22,13 @@ ...@@ -22,10 +22,13 @@
extern "C" { extern "C" {
#endif #endif
typedef struct SWorkerPool SWorkerPool;
typedef struct SMWorkerPool SMWorkerPool;
typedef struct SWorker { typedef struct SWorker {
int32_t id; // worker ID int32_t id; // worker ID
pthread_t thread; // thread pthread_t thread; // thread
struct SWorkerPool *pool; SWorkerPool *pool;
} SWorker; } SWorker;
typedef struct SWorkerPool { typedef struct SWorkerPool {
...@@ -39,11 +42,11 @@ typedef struct SWorkerPool { ...@@ -39,11 +42,11 @@ typedef struct SWorkerPool {
} SWorkerPool; } SWorkerPool;
typedef struct SMWorker { typedef struct SMWorker {
int32_t id; // worker id int32_t id; // worker id
pthread_t thread; // thread pthread_t thread; // thread
taos_qall qall; taos_qall qall;
taos_qset qset; // queue set taos_qset qset; // queue set
struct SMWorkerPool *pool; SMWorkerPool *pool;
} SMWorker; } SMWorker;
typedef struct SMWorkerPool { typedef struct SMWorkerPool {
......
...@@ -20,20 +20,21 @@ ...@@ -20,20 +20,21 @@
extern "C" { extern "C" {
#endif #endif
#include "os.h"
#include "tlog.h" #include "tlog.h"
extern int32_t uDebugFlag; extern int32_t uDebugFlag;
extern int8_t tscEmbedded; extern int8_t tscEmbeddedInUtil;
#define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", tscEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} #define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", tscEmbeddedInUtil ? 255 : uDebugFlag, __VA_ARGS__); }}
#define uError(...) { if (uDebugFlag & DEBUG_ERROR) { taosPrintLog("UTL ERROR ", tscEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} #define uError(...) { if (uDebugFlag & DEBUG_ERROR) { taosPrintLog("UTL ERROR ", tscEmbeddedInUtil ? 255 : uDebugFlag, __VA_ARGS__); }}
#define uWarn(...) { if (uDebugFlag & DEBUG_WARN) { taosPrintLog("UTL WARN ", tscEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} #define uWarn(...) { if (uDebugFlag & DEBUG_WARN) { taosPrintLog("UTL WARN ", tscEmbeddedInUtil ? 255 : uDebugFlag, __VA_ARGS__); }}
#define uInfo(...) { if (uDebugFlag & DEBUG_INFO) { taosPrintLog("UTL ", tscEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} #define uInfo(...) { if (uDebugFlag & DEBUG_INFO) { taosPrintLog("UTL ", tscEmbeddedInUtil ? 255 : uDebugFlag, __VA_ARGS__); }}
#define uDebug(...) { if (uDebugFlag & DEBUG_DEBUG) { taosPrintLog("UTL ", uDebugFlag, __VA_ARGS__); }} #define uDebug(...) { if (uDebugFlag & DEBUG_DEBUG) { taosPrintLog("UTL ", uDebugFlag, __VA_ARGS__); }}
#define uTrace(...) { if (uDebugFlag & DEBUG_TRACE) { taosPrintLog("UTL ", uDebugFlag, __VA_ARGS__); }} #define uTrace(...) { if (uDebugFlag & DEBUG_TRACE) { taosPrintLog("UTL ", uDebugFlag, __VA_ARGS__); }}
#define pError(...) { taosPrintLog("APP ERROR ", 255, __VA_ARGS__); } #define pError(...) { taosPrintLog("APP ERROR ", 255, __VA_ARGS__); }
#define pPrint(...) { taosPrintLog("APP ", 255, __VA_ARGS__); } #define pPrint(...) { taosPrintLog("APP ", 255, __VA_ARGS__); }
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -33,10 +33,6 @@ char tsArbitrator[TSDB_EP_LEN] = {0}; ...@@ -33,10 +33,6 @@ char tsArbitrator[TSDB_EP_LEN] = {0};
char tsLocalFqdn[TSDB_FQDN_LEN] = {0}; char tsLocalFqdn[TSDB_FQDN_LEN] = {0};
char tsLocalEp[TSDB_EP_LEN] = {0}; // Local End Point, hostname:port char tsLocalEp[TSDB_EP_LEN] = {0}; // Local End Point, hostname:port
uint16_t tsServerPort = 6030; uint16_t tsServerPort = 6030;
uint16_t tsDnodeShellPort = 6030; // udp[6035-6039] tcp[6035]
uint16_t tsDnodeDnodePort = 6035; // udp/tcp
uint16_t tsSyncPort = 6040;
uint16_t tsArbitratorPort = 6042;
int32_t tsStatusInterval = 1; // second int32_t tsStatusInterval = 1; // second
int32_t tsNumOfMnodes = 1; int32_t tsNumOfMnodes = 1;
int8_t tsEnableVnodeBak = 1; int8_t tsEnableVnodeBak = 1;
...@@ -1679,7 +1675,7 @@ int32_t taosCheckGlobalCfg() { ...@@ -1679,7 +1675,7 @@ int32_t taosCheckGlobalCfg() {
taosCheckDataDirCfg(); taosCheckDataDirCfg();
if (!taosDirExist(tsTempDir)) { if (taosDirExist(tsTempDir) != 0) {
return -1; return -1;
} }
...@@ -1726,11 +1722,6 @@ int32_t taosCheckGlobalCfg() { ...@@ -1726,11 +1722,6 @@ int32_t taosCheckGlobalCfg() {
} }
} }
tsDnodeShellPort = tsServerPort + TSDB_PORT_DNODESHELL; // udp[6035-6039] tcp[6035]
tsDnodeDnodePort = tsServerPort + TSDB_PORT_DNODEDNODE; // udp/tcp
tsSyncPort = tsServerPort + TSDB_PORT_SYNC;
tsHttpPort = tsServerPort + TSDB_PORT_HTTP;
if (tsQueryBufferSize >= 0) { if (tsQueryBufferSize >= 0) {
tsQueryBufferSizeBytes = tsQueryBufferSize * 1048576UL; tsQueryBufferSizeBytes = tsQueryBufferSize * 1048576UL;
} }
......
add_subdirectory(mnode) add_subdirectory(mnode)
add_subdirectory(vnode) add_subdirectory(vnode)
add_subdirectory(qnode) add_subdirectory(qnode)
add_subdirectory(mgmt) add_subdirectory(mgmt)
\ No newline at end of file
aux_source_directory(src DNODE_SRC) add_subdirectory(daemon)
add_executable(taosd ${DNODE_SRC}) add_subdirectory(impl)
target_link_libraries( \ No newline at end of file
taosd
PUBLIC cjson
PUBLIC mnode
PUBLIC vnode
PUBLIC wal
PUBLIC sync
PUBLIC taos
)
target_include_directories(
taosd
PUBLIC "${CMAKE_SOURCE_DIR}/include/dnode"
private "${CMAKE_CURRENT_SOURCE_DIR}/inc"
)
aux_source_directory(src DAEMON_SRC)
add_executable(taosd ${DAEMON_SRC})
target_link_libraries(
taosd
PUBLIC dnode
PUBLIC util
PUBLIC os
)
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "dnode.h"
#include "os.h"
#include "tconfig.h"
#include "tglobal.h"
#include "tnote.h"
#include "ulog.h"
static struct {
bool stop;
bool dumpConfig;
bool generateGrant;
bool printAuth;
bool printVersion;
char configDir[PATH_MAX];
} global = {0};
void dmnSigintHandle(int signum, void *info, void *ctx) {
uError("singal:%d is received", signum);
global.stop = true;
}
void dmnSetSignalHandle() {
taosSetSignal(SIGTERM, dmnSigintHandle);
taosSetSignal(SIGHUP, dmnSigintHandle);
taosSetSignal(SIGINT, dmnSigintHandle);
taosSetSignal(SIGABRT, dmnSigintHandle);
taosSetSignal(SIGBREAK, dmnSigintHandle);
}
int dmnParseOption(int argc, char const *argv[]) {
tstrncpy(global.configDir, "/etc/taos", PATH_MAX);
for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "-c") == 0) {
if (i < argc - 1) {
if (strlen(argv[++i]) >= PATH_MAX) {
printf("config file path overflow");
return -1;
}
tstrncpy(global.configDir, argv[i], PATH_MAX);
} else {
printf("'-c' requires a parameter, default is %s\n", configDir);
return -1;
}
} else if (strcmp(argv[i], "-C") == 0) {
global.dumpConfig = true;
} else if (strcmp(argv[i], "-k") == 0) {
global.generateGrant = true;
} else if (strcmp(argv[i], "-A") == 0) {
global.printAuth = true;
} else if (strcmp(argv[i], "-V") == 0) {
global.printVersion = true;
} else {
}
}
return 0;
}
void dmnGenerateGrant() {
#if 0
grantParseParameter();
#endif
}
void dmnPrintVersion() {
#ifdef TD_ENTERPRISE
char *releaseName = "enterprise";
#else
char *releaseName = "community";
#endif
printf("%s version: %s compatible_version: %s\n", releaseName, version, compatible_version);
printf("gitinfo: %s\n", gitinfo);
printf("gitinfoI: %s\n", gitinfoOfInternal);
printf("builuInfo: %s\n", buildinfo);
}
int dmnReadConfig(const char *path) {
taosInitGlobalCfg();
taosReadGlobalLogCfg();
if (taosMkDir(tsLogDir) != 0) {
printf("failed to create dir: %s, reason: %s\n", tsLogDir, strerror(errno));
return -1;
}
char temp[PATH_MAX];
snprintf(temp, PATH_MAX, "%s/taosdlog", tsLogDir);
if (taosInitLog(temp, tsNumOfLogLines, 1) != 0) {
printf("failed to init log file\n");
return -1;
}
if (taosInitNotes() != 0) {
printf("failed to init log file\n");
return -1;
}
if (taosReadGlobalCfg() != 0) {
uError("failed to read global config");
return -1;
}
if (taosCheckGlobalCfg() != 0) {
uError("failed to check global config");
return -1;
}
taosSetCoreDump(tsEnableCoreFile);
return 0;
}
void dmnDumpConfig() { taosDumpGlobalCfg(); }
void dmnWaitSignal() {
dmnSetSignalHandle();
while (!global.stop) {
taosMsleep(100);
}
}
void dmnInitOption(SDnodeOpt *pOption) {
pOption->sver = tsVersion;
pOption->numOfCores = tsNumOfCores;
pOption->numOfThreadsPerCore = tsNumOfThreadsPerCore;
pOption->ratioOfQueryCores = tsRatioOfQueryCores;
pOption->maxShellConns = tsMaxShellConns;
pOption->shellActivityTimer = tsShellActivityTimer;
pOption->statusInterval = tsStatusInterval;
pOption->serverPort = tsServerPort;
tstrncpy(pOption->dataDir, tsDataDir, TSDB_FILENAME_LEN);
tstrncpy(pOption->localEp, tsLocalEp, TSDB_EP_LEN);
tstrncpy(pOption->localFqdn, tsLocalFqdn, TSDB_FQDN_LEN);
tstrncpy(pOption->firstEp, tsFirst, TSDB_EP_LEN);
tstrncpy(pOption->timezone, tsTimezone, TSDB_TIMEZONE_LEN);
tstrncpy(pOption->locale, tsLocale, TSDB_LOCALE_LEN);
tstrncpy(pOption->charset, tsCharset, TSDB_LOCALE_LEN);
}
int dmnRunDnode() {
SDnodeOpt option = {0};
dmnInitOption(&option);
SDnode *pDnode = dndInit(&option);
if (pDnode == NULL) {
uInfo("Failed to start TDengine, please check the log at %s", tsLogDir);
return -1;
}
uInfo("Started TDengine service successfully.");
dmnWaitSignal();
uInfo("TDengine is shut down!");
dndCleanup(pDnode);
taosCloseLog();
return 0;
}
int main(int argc, char const *argv[]) {
if (dmnParseOption(argc, argv) != 0) {
return -1;
}
if (global.generateGrant) {
dmnGenerateGrant();
return 0;
}
if (global.printVersion) {
dmnPrintVersion();
return 0;
}
if (dmnReadConfig(global.configDir) != 0) {
return -1;
}
if (global.dumpConfig) {
dmnDumpConfig();
return 0;
}
return dmnRunDnode();
}
aux_source_directory(src DNODE_SRC)
add_library(dnode STATIC ${DNODE_SRC})
target_link_libraries(
dnode
PUBLIC cjson
PUBLIC mnode
PUBLIC vnode
PUBLIC wal
PUBLIC sync
PUBLIC taos
)
target_include_directories(
dnode
PUBLIC "${CMAKE_SOURCE_DIR}/include/dnode/mgmt"
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"
)
...@@ -13,27 +13,27 @@ ...@@ -13,27 +13,27 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_DNODE_DNODE_H_ #ifndef _TD_DND_DNODE_H_
#define _TD_DNODE_DNODE_H_ #define _TD_DND_DNODE_H_
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#include "dnodeInt.h" #include "dndInt.h"
int32_t dnodeInitDnode(); int32_t dndInitDnode(SDnode *pDnode);
void dnodeCleanupDnode(); void dndCleanupDnode(SDnode *pDnode);
void dnodeProcessDnodeMsg(SRpcMsg *pMsg, SEpSet *pEpSet); void dndProcessDnodeReq(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dndProcessDnodeRsp(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
int32_t dnodeGetDnodeId(); int32_t dndGetDnodeId(SDnode *pDnode);
int64_t dnodeGetClusterId(); int64_t dndGetClusterId(SDnode *pDnode);
void dnodeGetDnodeEp(int32_t dnodeId, char *epstr, char *fqdn, uint16_t *port); void dndGetDnodeEp(SDnode *pDnode, int32_t dnodeId, char *pEp, char *pFqdn, uint16_t *pPort);
void dnodeGetMnodeEpSetForPeer(SEpSet *epSet); void dndGetMnodeEpSet(SDnode *pDnode, SEpSet *pEpSet);
void dnodeGetMnodeEpSetForShell(SEpSet *epSet); void dndSendRedirectMsg(SDnode *pDnode, SRpcMsg *pMsg);
void dnodeSendRedirectMsg(SRpcMsg *rpcMsg, bool forShell);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_DNODE_DNODE_H_*/ #endif /*_TD_DND_DNODE_H_*/
\ No newline at end of file \ No newline at end of file
...@@ -13,17 +13,28 @@ ...@@ -13,17 +13,28 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_DNODE_INT_H_ #ifndef _TD_DND_INT_H_
#define _TD_DNODE_INT_H_ #define _TD_DND_INT_H_
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#include "cJSON.h"
#include "os.h" #include "os.h"
#include "taosmsg.h" #include "taosmsg.h"
#include "tglobal.h" #include "thash.h"
#include "tlockfree.h"
#include "tlog.h" #include "tlog.h"
#include "tqueue.h"
#include "trpc.h" #include "trpc.h"
#include "tthread.h"
#include "ttime.h"
#include "tworker.h"
#include "dnode.h"
#include "mnode.h"
#include "vnode.h"
extern int32_t dDebugFlag; extern int32_t dDebugFlag;
...@@ -34,21 +45,88 @@ extern int32_t dDebugFlag; ...@@ -34,21 +45,88 @@ extern int32_t dDebugFlag;
#define dDebug(...) { if (dDebugFlag & DEBUG_DEBUG) { taosPrintLog("DND ", dDebugFlag, __VA_ARGS__); }} #define dDebug(...) { if (dDebugFlag & DEBUG_DEBUG) { taosPrintLog("DND ", dDebugFlag, __VA_ARGS__); }}
#define dTrace(...) { if (dDebugFlag & DEBUG_TRACE) { taosPrintLog("DND ", dDebugFlag, __VA_ARGS__); }} #define dTrace(...) { if (dDebugFlag & DEBUG_TRACE) { taosPrintLog("DND ", dDebugFlag, __VA_ARGS__); }}
typedef enum { DN_RUN_STAT_INIT, DN_RUN_STAT_RUNNING, DN_RUN_STAT_STOPPED } EDnStat; typedef enum { DND_STAT_INIT, DND_STAT_RUNNING, DND_STAT_STOPPED } EStat;
typedef void (*MsgFp)(SRpcMsg *pMsg, SEpSet *pEpSet); typedef void (*DndMsgFp)(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEps);
typedef struct {
char *dnode;
char *mnode;
char *vnodes;
} SDnodeDir;
typedef struct {
int32_t dnodeId;
int32_t dropped;
uint32_t rebootTime;
int64_t clusterId;
SEpSet mnodeEpSet;
char *file;
SHashObj *dnodeHash;
SDnodeEps *dnodeEps;
pthread_t *threadId;
SRWLatch latch;
} SDnodeMgmt;
typedef struct {
int32_t refCount;
int8_t deployed;
int8_t dropped;
int8_t replica;
int8_t selfIndex;
SReplica replicas[TSDB_MAX_REPLICA];
char *file;
SMnode *pMnode;
SRWLatch latch;
taos_queue pReadQ;
taos_queue pWriteQ;
taos_queue pApplyQ;
taos_queue pSyncQ;
taos_queue pMgmtQ;
SWorkerPool mgmtPool;
SWorkerPool readPool;
SWorkerPool writePool;
SWorkerPool syncPool;
} SMnodeMgmt;
typedef struct {
SHashObj *hash;
int32_t openVnodes;
int32_t totalVnodes;
SRWLatch latch;
taos_queue pMgmtQ;
SWorkerPool mgmtPool;
SWorkerPool queryPool;
SWorkerPool fetchPool;
SMWorkerPool syncPool;
SMWorkerPool writePool;
} SVnodesMgmt;
typedef struct {
void *serverRpc;
void *clientRpc;
DndMsgFp msgFp[TSDB_MSG_TYPE_MAX];
} STransMgmt;
int32_t dnodeInit(); typedef struct SDnode {
void dnodeCleanup(); EStat stat;
SDnodeOpt opt;
SDnodeDir dir;
SDnodeMgmt dmgmt;
SMnodeMgmt mmgmt;
SVnodesMgmt vmgmt;
STransMgmt tmgmt;
SStartupMsg startup;
} SDnode;
EDnStat dnodeGetRunStat(); EStat dndGetStat(SDnode *pDnode);
void dnodeSetRunStat(); void dndSetStat(SDnode *pDnode, EStat stat);
char *dndStatStr(EStat stat);
void dnodeReportStartup(char *name, char *desc); void dndReportStartup(SDnode *pDnode, char *pName, char *pDesc);
void dnodeReportStartupFinished(char *name, char *desc); void dndGetStartup(SDnode *pDnode, SStartupMsg *pStartup);
void dnodeGetStartup(SStartupMsg *);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_DNODE_INT_H_*/ #endif /*_TD_DND_INT_H_*/
\ No newline at end of file \ No newline at end of file
...@@ -13,26 +13,24 @@ ...@@ -13,26 +13,24 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_DNODE_VNODES_H_ #ifndef _TD_DND_MNODE_H_
#define _TD_DNODE_VNODES_H_ #define _TD_DND_MNODE_H_
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#include "dnodeInt.h" #include "dndInt.h"
int32_t dnodeInitVnodes(); int32_t dndInitMnode(SDnode *pDnode);
void dnodeCleanupVnodes(); void dndCleanupMnode(SDnode *pDnode);
void dnodeGetVnodeLoads(SVnodeLoads *pVloads); int32_t dndGetUserAuthFromMnode(SDnode *pDnode, char *user, char *spi, char *encrypt, char *secret, char *ckey);
void dndProcessMnodeMgmtMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dnodeProcessVnodeMgmtMsg(SRpcMsg *pMsg, SEpSet *pEpSet); void dndProcessMnodeReadMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dnodeProcessVnodeWriteMsg(SRpcMsg *pMsg, SEpSet *pEpSet); void dndProcessMnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dnodeProcessVnodeSyncMsg(SRpcMsg *pMsg, SEpSet *pEpSet); void dndProcessMnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dnodeProcessVnodeQueryMsg(SRpcMsg *pMsg, SEpSet *pEpSet);
void dnodeProcessVnodeFetchMsg(SRpcMsg *pMsg, SEpSet *pEpSet);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_DNODE_VNODES_H_*/ #endif /*_TD_DND_MNODE_H_*/
\ No newline at end of file \ No newline at end of file
...@@ -13,21 +13,21 @@ ...@@ -13,21 +13,21 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_DNODE_TRANSPORT_H_ #ifndef _TD_DND_TRANSPORT_H_
#define _TD_DNODE_TRANSPORT_H_ #define _TD_DND_TRANSPORT_H_
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#include "dnodeInt.h" #include "dndInt.h"
int32_t dnodeInitTrans(); int32_t dndInitTrans(SDnode *pDnode);
void dnodeCleanupTrans(); void dndCleanupTrans(SDnode *pDnode);
void dnodeSendMsgToMnode(SRpcMsg *rpcMsg); void dndSendMsgToMnode(SDnode *pDnode, SRpcMsg *pRpcMsg);
void dnodeSendMsgToDnode(SEpSet *epSet, SRpcMsg *rpcMsg); void dndSendMsgToDnode(SDnode *pDnode, SEpSet *pEpSet, SRpcMsg *pRpcMsg);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_DNODE_TRANSPORT_H_*/ #endif /*_TD_DND_TRANSPORT_H_*/
...@@ -13,25 +13,25 @@ ...@@ -13,25 +13,25 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_DNODE_MNODE_H_ #ifndef _TD_DND_VNODES_H_
#define _TD_DNODE_MNODE_H_ #define _TD_DND_VNODES_H_
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#include "dnodeInt.h" #include "dndInt.h"
int32_t dnodeInitMnode(); int32_t dndInitVnodes(SDnode *pDnode);
void dnodeCleanupMnode(); void dndCleanupVnodes(SDnode *pDnode);
int32_t dnodeGetUserAuthFromMnode(char *user, char *spi, char *encrypt, char *secret, char *ckey); void dndGetVnodeLoads(SDnode *pDnode, SVnodeLoads *pVloads);
void dndProcessVnodeMgmtMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dnodeProcessMnodeMgmtMsg(SRpcMsg *pMsg, SEpSet *pEpSet); void dndProcessVnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dnodeProcessMnodeReadMsg(SRpcMsg *pMsg, SEpSet *pEpSet); void dndProcessVnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dnodeProcessMnodeWriteMsg(SRpcMsg *pMsg, SEpSet *pEpSet); void dndProcessVnodeQueryMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dnodeProcessMnodeSyncMsg(SRpcMsg *pMsg, SEpSet *pEpSet); void dndProcessVnodeFetchMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_DNODE_MNODE_H_*/ #endif /*_TD_DND_VNODES_H_*/
\ No newline at end of file \ No newline at end of file
...@@ -14,90 +14,64 @@ ...@@ -14,90 +14,64 @@
*/ */
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "dnodeDnode.h" #include "dndDnode.h"
#include "dnodeTransport.h" #include "dndTransport.h"
#include "dnodeVnodes.h" #include "dndVnodes.h"
#include "cJSON.h"
#include "thash.h" int32_t dndGetDnodeId(SDnode *pDnode) {
#include "tthread.h" SDnodeMgmt *pMgmt = &pDnode->dmgmt;
#include "ttime.h" taosRLockLatch(&pMgmt->latch);
int32_t dnodeId = pMgmt->dnodeId;
static struct { taosRUnLockLatch(&pMgmt->latch);
int32_t dnodeId;
int64_t clusterId;
SDnodeEps *dnodeEps;
SHashObj *dnodeHash;
SEpSet mnodeEpSetForShell;
SEpSet mnodeEpSetForPeer;
char file[PATH_MAX + 20];
uint32_t rebootTime;
int8_t dropped;
int8_t threadStop;
pthread_t *threadId;
pthread_mutex_t mutex;
} tsDnode = {0};
int32_t dnodeGetDnodeId() {
int32_t dnodeId = 0;
pthread_mutex_lock(&tsDnode.mutex);
dnodeId = tsDnode.dnodeId;
pthread_mutex_unlock(&tsDnode.mutex);
return dnodeId; return dnodeId;
} }
int64_t dnodeGetClusterId() { int64_t dndGetClusterId(SDnode *pDnode) {
int64_t clusterId = 0; SDnodeMgmt *pMgmt = &pDnode->dmgmt;
pthread_mutex_lock(&tsDnode.mutex); taosRLockLatch(&pMgmt->latch);
clusterId = tsDnode.clusterId; int64_t clusterId = pMgmt->clusterId;
pthread_mutex_unlock(&tsDnode.mutex); taosRUnLockLatch(&pMgmt->latch);
return clusterId; return clusterId;
} }
void dnodeGetDnodeEp(int32_t dnodeId, char *ep, char *fqdn, uint16_t *port) { void dndGetDnodeEp(SDnode *pDnode, int32_t dnodeId, char *pEp, char *pFqdn, uint16_t *pPort) {
pthread_mutex_lock(&tsDnode.mutex); SDnodeMgmt *pMgmt = &pDnode->dmgmt;
taosRLockLatch(&pMgmt->latch);
SDnodeEp *pEp = taosHashGet(tsDnode.dnodeHash, &dnodeId, sizeof(int32_t)); SDnodeEp *pDnodeEp = taosHashGet(pMgmt->dnodeHash, &dnodeId, sizeof(int32_t));
if (pEp != NULL) { if (pDnodeEp != NULL) {
if (port) *port = pEp->dnodePort; if (pPort != NULL) {
if (fqdn) tstrncpy(fqdn, pEp->dnodeFqdn, TSDB_FQDN_LEN); *pPort = pDnodeEp->port;
if (ep) snprintf(ep, TSDB_EP_LEN, "%s:%u", pEp->dnodeFqdn, pEp->dnodePort); }
if (pFqdn != NULL) {
tstrncpy(pFqdn, pDnodeEp->fqdn, TSDB_FQDN_LEN);
}
if (pEp != NULL) {
snprintf(pEp, TSDB_EP_LEN, "%s:%u", pDnodeEp->fqdn, pDnodeEp->port);
}
} }
pthread_mutex_unlock(&tsDnode.mutex); taosRUnLockLatch(&pMgmt->latch);
} }
void dnodeGetMnodeEpSetForPeer(SEpSet *pEpSet) { void dndGetMnodeEpSet(SDnode *pDnode, SEpSet *pEpSet) {
pthread_mutex_lock(&tsDnode.mutex); SDnodeMgmt *pMgmt = &pDnode->dmgmt;
*pEpSet = tsDnode.mnodeEpSetForPeer; taosRLockLatch(&pMgmt->latch);
pthread_mutex_unlock(&tsDnode.mutex); *pEpSet = pMgmt->mnodeEpSet;
taosRUnLockLatch(&pMgmt->latch);
} }
void dnodeGetMnodeEpSetForShell(SEpSet *pEpSet) { void dndSendRedirectMsg(SDnode *pDnode, SRpcMsg *pMsg) {
pthread_mutex_lock(&tsDnode.mutex);
*pEpSet = tsDnode.mnodeEpSetForShell;
pthread_mutex_unlock(&tsDnode.mutex);
}
void dnodeSendRedirectMsg(SRpcMsg *pMsg, bool forShell) {
int32_t msgType = pMsg->msgType; int32_t msgType = pMsg->msgType;
SEpSet epSet = {0}; SEpSet epSet = {0};
if (forShell) { dndGetMnodeEpSet(pDnode, &epSet);
dnodeGetMnodeEpSetForShell(&epSet);
} else {
dnodeGetMnodeEpSetForPeer(&epSet);
}
dDebug("RPC %p, msg:%s is redirected, num:%d use:%d", pMsg->handle, taosMsg[msgType], epSet.numOfEps, epSet.inUse);
dDebug("RPC %p, msg:%s is redirected, num:%d inUse:%d", pMsg->handle, taosMsg[msgType], epSet.numOfEps, epSet.inUse);
for (int32_t i = 0; i < epSet.numOfEps; ++i) { for (int32_t i = 0; i < epSet.numOfEps; ++i) {
dDebug("mnode index:%d %s:%u", i, epSet.fqdn[i], epSet.port[i]); dDebug("mnode index:%d %s:%u", i, epSet.fqdn[i], epSet.port[i]);
if (strcmp(epSet.fqdn[i], tsLocalFqdn) == 0) { if (strcmp(epSet.fqdn[i], pDnode->opt.localFqdn) == 0 && epSet.port[i] == pDnode->opt.serverPort) {
if ((epSet.port[i] == tsServerPort + TSDB_PORT_DNODEDNODE && !forShell) || epSet.inUse = (i + 1) % epSet.numOfEps;
(epSet.port[i] == tsServerPort && forShell)) {
epSet.inUse = (i + 1) % epSet.numOfEps;
dDebug("mnode index:%d %s:%d set inUse to %d", i, epSet.fqdn[i], epSet.port[i], epSet.inUse);
}
} }
epSet.port[i] = htons(epSet.port[i]); epSet.port[i] = htons(epSet.port[i]);
...@@ -106,219 +80,225 @@ void dnodeSendRedirectMsg(SRpcMsg *pMsg, bool forShell) { ...@@ -106,219 +80,225 @@ void dnodeSendRedirectMsg(SRpcMsg *pMsg, bool forShell) {
rpcSendRedirectRsp(pMsg->handle, &epSet); rpcSendRedirectRsp(pMsg->handle, &epSet);
} }
static void dnodeUpdateMnodeEpSet(SEpSet *pEpSet) { static void dndUpdateMnodeEpSet(SDnode *pDnode, SEpSet *pEpSet) {
if (pEpSet == NULL || pEpSet->numOfEps <= 0) { dInfo("mnode is changed, num:%d inUse:%d", pEpSet->numOfEps, pEpSet->inUse);
dError("mnode is changed, but content is invalid, discard it");
return;
} else {
dInfo("mnode is changed, num:%d use:%d", pEpSet->numOfEps, pEpSet->inUse);
}
pthread_mutex_lock(&tsDnode.mutex); SDnodeMgmt *pMgmt = &pDnode->dmgmt;
taosWLockLatch(&pMgmt->latch);
tsDnode.mnodeEpSetForPeer = *pEpSet; pMgmt->mnodeEpSet = *pEpSet;
for (int32_t i = 0; i < pEpSet->numOfEps; ++i) { for (int32_t i = 0; i < pEpSet->numOfEps; ++i) {
pEpSet->port[i] -= TSDB_PORT_DNODEDNODE;
dInfo("mnode index:%d %s:%u", i, pEpSet->fqdn[i], pEpSet->port[i]); dInfo("mnode index:%d %s:%u", i, pEpSet->fqdn[i], pEpSet->port[i]);
} }
tsDnode.mnodeEpSetForShell = *pEpSet;
pthread_mutex_unlock(&tsDnode.mutex); taosWUnLockLatch(&pMgmt->latch);
} }
static void dnodePrintDnodes() { static void dndPrintDnodes(SDnode *pDnode) {
dDebug("print dnode endpoint list, num:%d", tsDnode.dnodeEps->dnodeNum); SDnodeMgmt *pMgmt = &pDnode->dmgmt;
for (int32_t i = 0; i < tsDnode.dnodeEps->dnodeNum; i++) {
SDnodeEp *ep = &tsDnode.dnodeEps->dnodeEps[i]; dDebug("print dnode ep list, num:%d", pMgmt->dnodeEps->num);
dDebug("dnode:%d, fqdn:%s port:%u isMnode:%d", ep->dnodeId, ep->dnodeFqdn, ep->dnodePort, ep->isMnode); for (int32_t i = 0; i < pMgmt->dnodeEps->num; i++) {
SDnodeEp *pEp = &pMgmt->dnodeEps->eps[i];
dDebug("dnode:%d, fqdn:%s port:%u isMnode:%d", pEp->id, pEp->fqdn, pEp->port, pEp->isMnode);
} }
} }
static void dnodeResetDnodes(SDnodeEps *pEps) { static void dndResetDnodes(SDnode *pDnode, SDnodeEps *pDnodeEps) {
assert(pEps != NULL); SDnodeMgmt *pMgmt = &pDnode->dmgmt;
int32_t size = sizeof(SDnodeEps) + pEps->dnodeNum * sizeof(SDnodeEp);
if (pEps->dnodeNum > tsDnode.dnodeEps->dnodeNum) { int32_t size = sizeof(SDnodeEps) + pDnodeEps->num * sizeof(SDnodeEp);
if (pDnodeEps->num > pMgmt->dnodeEps->num) {
SDnodeEps *tmp = calloc(1, size); SDnodeEps *tmp = calloc(1, size);
if (tmp == NULL) return; if (tmp == NULL) return;
tfree(tsDnode.dnodeEps); tfree(pMgmt->dnodeEps);
tsDnode.dnodeEps = tmp; pMgmt->dnodeEps = tmp;
} }
if (tsDnode.dnodeEps != pEps) { if (pMgmt->dnodeEps != pDnodeEps) {
memcpy(tsDnode.dnodeEps, pEps, size); memcpy(pMgmt->dnodeEps, pDnodeEps, size);
} }
tsDnode.mnodeEpSetForPeer.inUse = 0; pMgmt->mnodeEpSet.inUse = 0;
tsDnode.mnodeEpSetForShell.inUse = 0; pMgmt->mnodeEpSet.numOfEps = 0;
int32_t mIndex = 0; int32_t mIndex = 0;
for (int32_t i = 0; i < tsDnode.dnodeEps->dnodeNum; i++) { for (int32_t i = 0; i < pMgmt->dnodeEps->num; i++) {
SDnodeEp *ep = &tsDnode.dnodeEps->dnodeEps[i]; SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
if (!ep->isMnode) continue; if (!pDnodeEp->isMnode) continue;
if (mIndex >= TSDB_MAX_REPLICA) continue; if (mIndex >= TSDB_MAX_REPLICA) continue;
strcpy(tsDnode.mnodeEpSetForShell.fqdn[mIndex], ep->dnodeFqdn); pMgmt->mnodeEpSet.numOfEps++;
strcpy(tsDnode.mnodeEpSetForPeer.fqdn[mIndex], ep->dnodeFqdn); strcpy(pMgmt->mnodeEpSet.fqdn[mIndex], pDnodeEp->fqdn);
tsDnode.mnodeEpSetForShell.port[mIndex] = ep->dnodePort; pMgmt->mnodeEpSet.port[mIndex] = pDnodeEp->port;
tsDnode.mnodeEpSetForShell.port[mIndex] = ep->dnodePort + tsDnodeDnodePort;
mIndex++; mIndex++;
} }
for (int32_t i = 0; i < tsDnode.dnodeEps->dnodeNum; ++i) { for (int32_t i = 0; i < pMgmt->dnodeEps->num; ++i) {
SDnodeEp *ep = &tsDnode.dnodeEps->dnodeEps[i]; SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
taosHashPut(tsDnode.dnodeHash, &ep->dnodeId, sizeof(int32_t), ep, sizeof(SDnodeEp)); taosHashPut(pMgmt->dnodeHash, &pDnodeEp->id, sizeof(int32_t), pDnodeEp, sizeof(SDnodeEp));
} }
dnodePrintDnodes(); dndPrintDnodes(pDnode);
} }
static bool dnodeIsEpChanged(int32_t dnodeId, char *epStr) { static bool dndIsEpChanged(SDnode *pDnode, int32_t dnodeId, char *pEp) {
bool changed = false; bool changed = false;
pthread_mutex_lock(&tsDnode.mutex);
SDnodeEp *pEp = taosHashGet(tsDnode.dnodeHash, &dnodeId, sizeof(int32_t)); SDnodeMgmt *pMgmt = &pDnode->dmgmt;
if (pEp != NULL) { taosRLockLatch(&pMgmt->latch);
char epSaved[TSDB_EP_LEN + 1];
snprintf(epSaved, TSDB_EP_LEN, "%s:%u", pEp->dnodeFqdn, pEp->dnodePort); SDnodeEp *pDnodeEp = taosHashGet(pMgmt->dnodeHash, &dnodeId, sizeof(int32_t));
changed = strcmp(epStr, epSaved) != 0; if (pDnodeEp != NULL) {
char epstr[TSDB_EP_LEN + 1];
snprintf(epstr, TSDB_EP_LEN, "%s:%u", pDnodeEp->fqdn, pDnodeEp->port);
changed = strcmp(pEp, epstr) != 0;
} }
pthread_mutex_unlock(&tsDnode.mutex); taosRUnLockLatch(&pMgmt->latch);
return changed; return changed;
} }
static int32_t dnodeReadDnodes() { static int32_t dndReadDnodes(SDnode *pDnode) {
SDnodeMgmt *pMgmt = &pDnode->dmgmt;
int32_t code = TSDB_CODE_DND_DNODE_READ_FILE_ERROR;
int32_t len = 0; int32_t len = 0;
int32_t maxLen = 30000; int32_t maxLen = 30000;
char *content = calloc(1, maxLen + 1); char *content = calloc(1, maxLen + 1);
cJSON *root = NULL; cJSON *root = NULL;
FILE *fp = NULL; FILE *fp = NULL;
fp = fopen(tsDnode.file, "r"); fp = fopen(pMgmt->file, "r");
if (!fp) { if (fp == NULL) {
dDebug("file %s not exist", tsDnode.file); dDebug("file %s not exist", pMgmt->file);
code = 0;
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
len = (int32_t)fread(content, 1, maxLen, fp); len = (int32_t)fread(content, 1, maxLen, fp);
if (len <= 0) { if (len <= 0) {
dError("failed to read %s since content is null", tsDnode.file); dError("failed to read %s since content is null", pMgmt->file);
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
content[len] = 0; content[len] = 0;
root = cJSON_Parse(content); root = cJSON_Parse(content);
if (root == NULL) { if (root == NULL) {
dError("failed to read %s since invalid json format", tsDnode.file); dError("failed to read %s since invalid json format", pMgmt->file);
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
cJSON *dnodeId = cJSON_GetObjectItem(root, "dnodeId"); cJSON *dnodeId = cJSON_GetObjectItem(root, "dnodeId");
if (!dnodeId || dnodeId->type != cJSON_String) { if (!dnodeId || dnodeId->type != cJSON_String) {
dError("failed to read %s since dnodeId not found", tsDnode.file); dError("failed to read %s since dnodeId not found", pMgmt->file);
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
tsDnode.dnodeId = atoi(dnodeId->valuestring); pMgmt->dnodeId = atoi(dnodeId->valuestring);
cJSON *clusterId = cJSON_GetObjectItem(root, "clusterId"); cJSON *clusterId = cJSON_GetObjectItem(root, "clusterId");
if (!clusterId || clusterId->type != cJSON_String) { if (!clusterId || clusterId->type != cJSON_String) {
dError("failed to read %s since clusterId not found", tsDnode.file); dError("failed to read %s since clusterId not found", pMgmt->file);
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
tsDnode.clusterId = atoll(clusterId->valuestring); pMgmt->clusterId = atoll(clusterId->valuestring);
cJSON *dropped = cJSON_GetObjectItem(root, "dropped"); cJSON *dropped = cJSON_GetObjectItem(root, "dropped");
if (!dropped || dropped->type != cJSON_String) { if (!dropped || dropped->type != cJSON_String) {
dError("failed to read %s since dropped not found", tsDnode.file); dError("failed to read %s since dropped not found", pMgmt->file);
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
tsDnode.dropped = atoi(dropped->valuestring); pMgmt->dropped = atoi(dropped->valuestring);
cJSON *dnodeInfos = cJSON_GetObjectItem(root, "dnodeInfos"); cJSON *dnodeInfos = cJSON_GetObjectItem(root, "dnodeInfos");
if (!dnodeInfos || dnodeInfos->type != cJSON_Array) { if (!dnodeInfos || dnodeInfos->type != cJSON_Array) {
dError("failed to read %s since dnodeInfos not found", tsDnode.file); dError("failed to read %s since dnodeInfos not found", pMgmt->file);
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
int32_t dnodeInfosSize = cJSON_GetArraySize(dnodeInfos); int32_t dnodeInfosSize = cJSON_GetArraySize(dnodeInfos);
if (dnodeInfosSize <= 0) { if (dnodeInfosSize <= 0) {
dError("failed to read %s since dnodeInfos size:%d invalid", tsDnode.file, dnodeInfosSize); dError("failed to read %s since dnodeInfos size:%d invalid", pMgmt->file, dnodeInfosSize);
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
tsDnode.dnodeEps = calloc(1, dnodeInfosSize * sizeof(SDnodeEp) + sizeof(SDnodeEps)); pMgmt->dnodeEps = calloc(1, dnodeInfosSize * sizeof(SDnodeEp) + sizeof(SDnodeEps));
if (tsDnode.dnodeEps == NULL) { if (pMgmt->dnodeEps == NULL) {
dError("failed to calloc dnodeEpList since %s", strerror(errno)); dError("failed to calloc dnodeEpList since %s", strerror(errno));
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
tsDnode.dnodeEps->dnodeNum = dnodeInfosSize; pMgmt->dnodeEps->num = dnodeInfosSize;
for (int32_t i = 0; i < dnodeInfosSize; ++i) { for (int32_t i = 0; i < dnodeInfosSize; ++i) {
cJSON *dnodeInfo = cJSON_GetArrayItem(dnodeInfos, i); cJSON *dnodeInfo = cJSON_GetArrayItem(dnodeInfos, i);
if (dnodeInfo == NULL) break; if (dnodeInfo == NULL) break;
SDnodeEp *pEp = &tsDnode.dnodeEps->dnodeEps[i]; SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
cJSON *dnodeId = cJSON_GetObjectItem(dnodeInfo, "dnodeId"); cJSON *dnodeId = cJSON_GetObjectItem(dnodeInfo, "dnodeId");
if (!dnodeId || dnodeId->type != cJSON_String) { if (!dnodeId || dnodeId->type != cJSON_String) {
dError("failed to read %s, dnodeId not found", tsDnode.file); dError("failed to read %s, dnodeId not found", pMgmt->file);
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
pEp->dnodeId = atoi(dnodeId->valuestring); pDnodeEp->id = atoi(dnodeId->valuestring);
cJSON *isMnode = cJSON_GetObjectItem(dnodeInfo, "isMnode"); cJSON *isMnode = cJSON_GetObjectItem(dnodeInfo, "isMnode");
if (!isMnode || isMnode->type != cJSON_String) { if (!isMnode || isMnode->type != cJSON_String) {
dError("failed to read %s, isMnode not found", tsDnode.file); dError("failed to read %s, isMnode not found", pMgmt->file);
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
pEp->isMnode = atoi(isMnode->valuestring); pDnodeEp->isMnode = atoi(isMnode->valuestring);
cJSON *dnodeFqdn = cJSON_GetObjectItem(dnodeInfo, "dnodeFqdn"); cJSON *dnodeFqdn = cJSON_GetObjectItem(dnodeInfo, "dnodeFqdn");
if (!dnodeFqdn || dnodeFqdn->type != cJSON_String || dnodeFqdn->valuestring == NULL) { if (!dnodeFqdn || dnodeFqdn->type != cJSON_String || dnodeFqdn->valuestring == NULL) {
dError("failed to read %s, dnodeFqdn not found", tsDnode.file); dError("failed to read %s, dnodeFqdn not found", pMgmt->file);
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
tstrncpy(pEp->dnodeFqdn, dnodeFqdn->valuestring, TSDB_FQDN_LEN); tstrncpy(pDnodeEp->fqdn, dnodeFqdn->valuestring, TSDB_FQDN_LEN);
cJSON *dnodePort = cJSON_GetObjectItem(dnodeInfo, "dnodePort"); cJSON *dnodePort = cJSON_GetObjectItem(dnodeInfo, "dnodePort");
if (!dnodePort || dnodePort->type != cJSON_String) { if (!dnodePort || dnodePort->type != cJSON_String) {
dError("failed to read %s, dnodePort not found", tsDnode.file); dError("failed to read %s, dnodePort not found", pMgmt->file);
goto PRASE_DNODE_OVER; goto PRASE_DNODE_OVER;
} }
pEp->dnodePort = atoi(dnodePort->valuestring); pDnodeEp->port = atoi(dnodePort->valuestring);
} }
dInfo("succcessed to read file %s", tsDnode.file); code = 0;
dnodePrintDnodes(); dInfo("succcessed to read file %s", pMgmt->file);
dndPrintDnodes(pDnode);
PRASE_DNODE_OVER: PRASE_DNODE_OVER:
if (content != NULL) free(content); if (content != NULL) free(content);
if (root != NULL) cJSON_Delete(root); if (root != NULL) cJSON_Delete(root);
if (fp != NULL) fclose(fp); if (fp != NULL) fclose(fp);
if (dnodeIsEpChanged(tsDnode.dnodeId, tsLocalEp)) { if (dndIsEpChanged(pDnode, pMgmt->dnodeId, pDnode->opt.localEp)) {
dError("localEp %s different with %s and need reconfigured", tsLocalEp, tsDnode.file); dError("localEp %s different with %s and need reconfigured", pDnode->opt.localEp, pMgmt->file);
return -1; return -1;
} }
if (tsDnode.dnodeEps == NULL) { if (pMgmt->dnodeEps == NULL) {
tsDnode.dnodeEps = calloc(1, sizeof(SDnodeEps) + sizeof(SDnodeEp)); pMgmt->dnodeEps = calloc(1, sizeof(SDnodeEps) + sizeof(SDnodeEp));
tsDnode.dnodeEps->dnodeNum = 1; pMgmt->dnodeEps->num = 1;
tsDnode.dnodeEps->dnodeEps[0].dnodePort = tsServerPort; pMgmt->dnodeEps->eps[0].isMnode = 1;
tstrncpy(tsDnode.dnodeEps->dnodeEps[0].dnodeFqdn, tsLocalFqdn, TSDB_FQDN_LEN); pMgmt->dnodeEps->eps[0].port = pDnode->opt.serverPort;
tstrncpy(pMgmt->dnodeEps->eps[0].fqdn, pDnode->opt.localFqdn, TSDB_FQDN_LEN);
} }
dnodeResetDnodes(tsDnode.dnodeEps); dndResetDnodes(pDnode, pMgmt->dnodeEps);
terrno = 0; terrno = 0;
return 0; return 0;
} }
static int32_t dnodeWriteDnodes() { static int32_t dndWriteDnodes(SDnode *pDnode) {
FILE *fp = fopen(tsDnode.file, "w"); SDnodeMgmt *pMgmt = &pDnode->dmgmt;
if (!fp) {
dError("failed to write %s since %s", tsDnode.file, strerror(errno)); FILE *fp = fopen(pMgmt->file, "w");
if (fp == NULL) {
dError("failed to write %s since %s", pMgmt->file, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
return -1; return -1;
} }
...@@ -327,17 +307,17 @@ static int32_t dnodeWriteDnodes() { ...@@ -327,17 +307,17 @@ static int32_t dnodeWriteDnodes() {
char *content = calloc(1, maxLen + 1); char *content = calloc(1, maxLen + 1);
len += snprintf(content + len, maxLen - len, "{\n"); len += snprintf(content + len, maxLen - len, "{\n");
len += snprintf(content + len, maxLen - len, " \"dnodeId\": \"%d\",\n", tsDnode.dnodeId); len += snprintf(content + len, maxLen - len, " \"dnodeId\": \"%d\",\n", pMgmt->dnodeId);
len += snprintf(content + len, maxLen - len, " \"clusterId\": \"%" PRId64 "\",\n", tsDnode.clusterId); len += snprintf(content + len, maxLen - len, " \"clusterId\": \"%" PRId64 "\",\n", pMgmt->clusterId);
len += snprintf(content + len, maxLen - len, " \"dropped\": \"%d\",\n", tsDnode.dropped); len += snprintf(content + len, maxLen - len, " \"dropped\": \"%d\",\n", pMgmt->dropped);
len += snprintf(content + len, maxLen - len, " \"dnodeInfos\": [{\n"); len += snprintf(content + len, maxLen - len, " \"dnodeInfos\": [{\n");
for (int32_t i = 0; i < tsDnode.dnodeEps->dnodeNum; ++i) { for (int32_t i = 0; i < pMgmt->dnodeEps->num; ++i) {
SDnodeEp *ep = &tsDnode.dnodeEps->dnodeEps[i]; SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
len += snprintf(content + len, maxLen - len, " \"dnodeId\": \"%d\",\n", ep->dnodeId); len += snprintf(content + len, maxLen - len, " \"dnodeId\": \"%d\",\n", pDnodeEp->id);
len += snprintf(content + len, maxLen - len, " \"isMnode\": \"%d\",\n", ep->isMnode); len += snprintf(content + len, maxLen - len, " \"isMnode\": \"%d\",\n", pDnodeEp->isMnode);
len += snprintf(content + len, maxLen - len, " \"dnodeFqdn\": \"%s\",\n", ep->dnodeFqdn); len += snprintf(content + len, maxLen - len, " \"dnodeFqdn\": \"%s\",\n", pDnodeEp->fqdn);
len += snprintf(content + len, maxLen - len, " \"dnodePort\": \"%u\"\n", ep->dnodePort); len += snprintf(content + len, maxLen - len, " \"dnodePort\": \"%u\"\n", pDnodeEp->port);
if (i < tsDnode.dnodeEps->dnodeNum - 1) { if (i < pMgmt->dnodeEps->num - 1) {
len += snprintf(content + len, maxLen - len, " },{\n"); len += snprintf(content + len, maxLen - len, " },{\n");
} else { } else {
len += snprintf(content + len, maxLen - len, " }]\n"); len += snprintf(content + len, maxLen - len, " }]\n");
...@@ -351,11 +331,11 @@ static int32_t dnodeWriteDnodes() { ...@@ -351,11 +331,11 @@ static int32_t dnodeWriteDnodes() {
free(content); free(content);
terrno = 0; terrno = 0;
dInfo("successed to write %s", tsDnode.file); dInfo("successed to write %s", pMgmt->file);
return 0; return 0;
} }
static void dnodeSendStatusMsg() { static void dndSendStatusMsg(SDnode *pDnode) {
int32_t contLen = sizeof(SStatusMsg) + TSDB_MAX_VNODES * sizeof(SVnodeLoad); int32_t contLen = sizeof(SStatusMsg) + TSDB_MAX_VNODES * sizeof(SVnodeLoad);
SStatusMsg *pStatus = rpcMallocCont(contLen); SStatusMsg *pStatus = rpcMallocCont(contLen);
...@@ -364,98 +344,109 @@ static void dnodeSendStatusMsg() { ...@@ -364,98 +344,109 @@ static void dnodeSendStatusMsg() {
return; return;
} }
pStatus->sversion = htonl(tsVersion); bool changed = false;
pStatus->dnodeId = htonl(dnodeGetDnodeId());
pStatus->clusterId = htobe64(dnodeGetClusterId());
pStatus->rebootTime = htonl(tsDnode.rebootTime);
pStatus->numOfCores = htonl(tsNumOfCores);
tstrncpy(pStatus->dnodeEp, tsLocalEp, TSDB_EP_LEN);
pStatus->clusterCfg.statusInterval = htonl(tsStatusInterval); SDnodeMgmt *pMgmt = &pDnode->dmgmt;
taosRLockLatch(&pMgmt->latch);
pStatus->sversion = htonl(pDnode->opt.sver);
pStatus->dnodeId = htonl(pMgmt->dnodeId);
pStatus->clusterId = htobe64(pMgmt->clusterId);
pStatus->rebootTime = htonl(pMgmt->rebootTime);
pStatus->numOfCores = htonl(pDnode->opt.numOfCores);
tstrncpy(pStatus->dnodeEp, pDnode->opt.localEp, TSDB_EP_LEN);
pStatus->clusterCfg.statusInterval = htonl(pDnode->opt.statusInterval);
tstrncpy(pStatus->clusterCfg.timezone, pDnode->opt.timezone, TSDB_TIMEZONE_LEN);
tstrncpy(pStatus->clusterCfg.locale, pDnode->opt.locale, TSDB_LOCALE_LEN);
tstrncpy(pStatus->clusterCfg.charset, pDnode->opt.charset, TSDB_LOCALE_LEN);
pStatus->clusterCfg.checkTime = 0; pStatus->clusterCfg.checkTime = 0;
tstrncpy(pStatus->clusterCfg.timezone, tsTimezone, TSDB_TIMEZONE_LEN);
tstrncpy(pStatus->clusterCfg.locale, tsLocale, TSDB_LOCALE_LEN);
tstrncpy(pStatus->clusterCfg.charset, tsCharset, TSDB_LOCALE_LEN);
char timestr[32] = "1970-01-01 00:00:00.00"; char timestr[32] = "1970-01-01 00:00:00.00";
(void)taosParseTime(timestr, &pStatus->clusterCfg.checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, 0); (void)taosParseTime(timestr, &pStatus->clusterCfg.checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, 0);
taosRUnLockLatch(&pMgmt->latch);
dnodeGetVnodeLoads(&pStatus->vnodeLoads); dndGetVnodeLoads(pDnode, &pStatus->vnodeLoads);
contLen = sizeof(SStatusMsg) + pStatus->vnodeLoads.num * sizeof(SVnodeLoad); contLen = sizeof(SStatusMsg) + pStatus->vnodeLoads.num * sizeof(SVnodeLoad);
SRpcMsg rpcMsg = {.pCont = pStatus, .contLen = contLen, .msgType = TSDB_MSG_TYPE_STATUS}; SRpcMsg rpcMsg = {.pCont = pStatus, .contLen = contLen, .msgType = TSDB_MSG_TYPE_STATUS};
dnodeSendMsgToMnode(&rpcMsg); dndSendMsgToMnode(pDnode, &rpcMsg);
} }
static void dnodeUpdateCfg(SDnodeCfg *pCfg) { static void dndUpdateDnodeCfg(SDnode *pDnode, SDnodeCfg *pCfg) {
if (tsDnode.dnodeId == 0) return; SDnodeMgmt *pMgmt = &pDnode->dmgmt;
if (tsDnode.dropped) return; if (pMgmt->dnodeId == 0 || pMgmt->dropped != pCfg->dropped) {
dInfo("set dnodeId:%d clusterId:%" PRId64 " dropped:%d", pCfg->dnodeId, pCfg->clusterId, pCfg->dropped);
pthread_mutex_lock(&tsDnode.mutex);
tsDnode.dnodeId = pCfg->dnodeId;
tsDnode.clusterId = pCfg->clusterId;
tsDnode.dropped = pCfg->dropped;
dInfo("dnodeId is set to %d, clusterId is set to %" PRId64, pCfg->dnodeId, pCfg->clusterId);
dnodeWriteDnodes(); taosWLockLatch(&pMgmt->latch);
pthread_mutex_unlock(&tsDnode.mutex); pMgmt->dnodeId = pCfg->dnodeId;
pMgmt->clusterId = pCfg->clusterId;
pMgmt->dropped = pCfg->dropped;
(void)dndWriteDnodes(pDnode);
taosWUnLockLatch(&pMgmt->latch);
}
} }
static void dnodeUpdateDnodeEps(SDnodeEps *pEps) { static void dndUpdateDnodeEps(SDnode *pDnode, SDnodeEps *pDnodeEps) {
if (pEps == NULL || pEps->dnodeNum <= 0) return; if (pDnodeEps == NULL || pDnodeEps->num <= 0) return;
pthread_mutex_lock(&tsDnode.mutex); SDnodeMgmt *pMgmt = &pDnode->dmgmt;
taosWLockLatch(&pMgmt->latch);
if (pEps->dnodeNum != tsDnode.dnodeEps->dnodeNum) { if (pDnodeEps->num != pMgmt->dnodeEps->num) {
dnodeResetDnodes(pEps); dndResetDnodes(pDnode, pDnodeEps);
dnodeWriteDnodes(); dndWriteDnodes(pDnode);
} else { } else {
int32_t size = pEps->dnodeNum * sizeof(SDnodeEp) + sizeof(SDnodeEps); int32_t size = pDnodeEps->num * sizeof(SDnodeEp) + sizeof(SDnodeEps);
if (memcmp(tsDnode.dnodeEps, pEps, size) != 0) { if (memcmp(pMgmt->dnodeEps, pDnodeEps, size) != 0) {
dnodeResetDnodes(pEps); dndResetDnodes(pDnode, pDnodeEps);
dnodeWriteDnodes(); dndWriteDnodes(pDnode);
} }
} }
pthread_mutex_unlock(&tsDnode.mutex); taosWUnLockLatch(&pMgmt->latch);
} }
static void dnodeProcessStatusRsp(SRpcMsg *pMsg) { static void dndProcessStatusRsp(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
if (pMsg->code != TSDB_CODE_SUCCESS) return; if (pEpSet && pEpSet->numOfEps > 0) {
dndUpdateMnodeEpSet(pDnode, pEpSet);
}
SStatusRsp *pStatusRsp = pMsg->pCont; if (pMsg->code != TSDB_CODE_SUCCESS) return;
SDnodeCfg *pCfg = &pStatusRsp->dnodeCfg; SStatusRsp *pRsp = pMsg->pCont;
SDnodeCfg *pCfg = &pRsp->dnodeCfg;
pCfg->dnodeId = htonl(pCfg->dnodeId); pCfg->dnodeId = htonl(pCfg->dnodeId);
pCfg->clusterId = htobe64(pCfg->clusterId); pCfg->clusterId = htobe64(pCfg->clusterId);
dnodeUpdateCfg(pCfg); dndUpdateDnodeCfg(pDnode, pCfg);
if (pCfg->dropped) return; if (pCfg->dropped) return;
SDnodeEps *pEps = &pStatusRsp->dnodeEps; SDnodeEps *pDnodeEps = &pRsp->dnodeEps;
pEps->dnodeNum = htonl(pEps->dnodeNum); pDnodeEps->num = htonl(pDnodeEps->num);
for (int32_t i = 0; i < pEps->dnodeNum; ++i) { for (int32_t i = 0; i < pDnodeEps->num; ++i) {
pEps->dnodeEps[i].dnodeId = htonl(pEps->dnodeEps[i].dnodeId); pDnodeEps->eps[i].id = htonl(pDnodeEps->eps[i].id);
pEps->dnodeEps[i].dnodePort = htons(pEps->dnodeEps[i].dnodePort); pDnodeEps->eps[i].port = htons(pDnodeEps->eps[i].port);
} }
dnodeUpdateDnodeEps(pEps); dndUpdateDnodeEps(pDnode, pDnodeEps);
} }
static void dnodeProcessConfigDnodeReq(SRpcMsg *pMsg) { static void dndProcessAuthRsp(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) { assert(1); }
static void dndProcessGrantRsp(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) { assert(1); }
static void dndProcessConfigDnodeReq(SDnode *pDnode, SRpcMsg *pMsg) {
SCfgDnodeMsg *pCfg = pMsg->pCont; SCfgDnodeMsg *pCfg = pMsg->pCont;
int32_t code = taosCfgDynamicOptions(pCfg->config); int32_t code = TSDB_CODE_OPS_NOT_SUPPORT;
SRpcMsg rspMsg = {.handle = pMsg->handle, .pCont = NULL, .contLen = 0, .code = code}; SRpcMsg rspMsg = {.handle = pMsg->handle, .pCont = NULL, .contLen = 0, .code = code};
rpcSendResponse(&rspMsg); rpcSendResponse(&rspMsg);
rpcFreeCont(pMsg->pCont); rpcFreeCont(pMsg->pCont);
} }
static void dnodeProcessStartupReq(SRpcMsg *pMsg) { static void dndProcessStartupReq(SDnode *pDnode, SRpcMsg *pMsg) {
dInfo("startup msg is received, cont:%s", (char *)pMsg->pCont); dInfo("startup msg is received");
SStartupMsg *pStartup = rpcMallocCont(sizeof(SStartupMsg)); SStartupMsg *pStartup = rpcMallocCont(sizeof(SStartupMsg));
dnodeGetStartup(pStartup); dndGetStartup(pDnode, pStartup);
dInfo("startup msg is sent, step:%s desc:%s finished:%d", pStartup->name, pStartup->desc, pStartup->finished); dInfo("startup msg is sent, step:%s desc:%s finished:%d", pStartup->name, pStartup->desc, pStartup->finished);
...@@ -465,96 +456,117 @@ static void dnodeProcessStartupReq(SRpcMsg *pMsg) { ...@@ -465,96 +456,117 @@ static void dnodeProcessStartupReq(SRpcMsg *pMsg) {
} }
static void *dnodeThreadRoutine(void *param) { static void *dnodeThreadRoutine(void *param) {
int32_t ms = tsStatusInterval * 1000; SDnode *pDnode = param;
int32_t ms = pDnode->opt.statusInterval * 1000;
while (!tsDnode.threadStop) { while (true) {
if (dnodeGetRunStat() != DN_RUN_STAT_RUNNING) {
continue;
} else {
dnodeSendStatusMsg();
}
taosMsleep(ms); taosMsleep(ms);
pthread_testcancel();
if (dndGetStat(pDnode) == DND_STAT_RUNNING) {
// dndSendStatusMsg(pDnode);
}
} }
} }
int32_t dnodeInitDnode() { int32_t dndInitDnode(SDnode *pDnode) {
tsDnode.dnodeId = 0; SDnodeMgmt *pMgmt = &pDnode->dmgmt;
tsDnode.clusterId = 0;
tsDnode.dnodeEps = NULL; pMgmt->dnodeId = 0;
snprintf(tsDnode.file, sizeof(tsDnode.file), "%s/dnode.json", tsDnodeDir); pMgmt->rebootTime = taosGetTimestampSec();
tsDnode.rebootTime = taosGetTimestampSec(); pMgmt->dropped = 0;
tsDnode.dropped = 0; pMgmt->clusterId = 0;
pthread_mutex_init(&tsDnode.mutex, NULL);
tsDnode.threadStop = false; char path[PATH_MAX];
snprintf(path, PATH_MAX, "%s/dnode.json", pDnode->dir.dnode);
tsDnode.dnodeHash = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); pMgmt->file = strdup(path);
if (tsDnode.dnodeHash == NULL) { if (pMgmt->file == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
pMgmt->dnodeHash = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
if (pMgmt->dnodeHash == NULL) {
dError("failed to init dnode hash"); dError("failed to init dnode hash");
return TSDB_CODE_DND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
tsDnode.threadId = taosCreateThread(dnodeThreadRoutine, NULL); if (dndReadDnodes(pDnode) != 0) {
if (tsDnode.threadId == NULL) { dError("failed to read file:%s since %s", pMgmt->file, terrstr());
dError("failed to init dnode thread"); return -1;
return TSDB_CODE_DND_OUT_OF_MEMORY;
} }
int32_t code = dnodeReadDnodes(); taosInitRWLatch(&pMgmt->latch);
if (code != 0) {
dError("failed to read file:%s since %s", tsDnode.file, tstrerror(code)); pMgmt->threadId = taosCreateThread(dnodeThreadRoutine, pDnode);
return code; if (pMgmt->threadId == NULL) {
dError("failed to init dnode thread");
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
dInfo("dnode-dnode is initialized"); dInfo("dnode-dnode is initialized");
return 0; return 0;
} }
void dnodeCleanupDnode() { void dndCleanupDnode(SDnode *pDnode) {
if (tsDnode.threadId != NULL) { SDnodeMgmt *pMgmt = &pDnode->dmgmt;
tsDnode.threadStop = true;
taosDestoryThread(tsDnode.threadId); if (pMgmt->threadId != NULL) {
tsDnode.threadId = NULL; taosDestoryThread(pMgmt->threadId);
pMgmt->threadId = NULL;
} }
pthread_mutex_lock(&tsDnode.mutex); taosWLockLatch(&pMgmt->latch);
if (tsDnode.dnodeEps != NULL) { if (pMgmt->dnodeEps != NULL) {
free(tsDnode.dnodeEps); free(pMgmt->dnodeEps);
tsDnode.dnodeEps = NULL; pMgmt->dnodeEps = NULL;
} }
if (tsDnode.dnodeHash) { if (pMgmt->dnodeHash != NULL) {
taosHashCleanup(tsDnode.dnodeHash); taosHashCleanup(pMgmt->dnodeHash);
tsDnode.dnodeHash = NULL; pMgmt->dnodeHash = NULL;
} }
pthread_mutex_unlock(&tsDnode.mutex); if (pMgmt->file != NULL) {
pthread_mutex_destroy(&tsDnode.mutex); free(pMgmt->file);
pMgmt->file = NULL;
}
taosWUnLockLatch(&pMgmt->latch);
dInfo("dnode-dnode is cleaned up"); dInfo("dnode-dnode is cleaned up");
} }
void dnodeProcessDnodeMsg(SRpcMsg *pMsg, SEpSet *pEpSet) { void dndProcessDnodeReq(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
int32_t msgType = pMsg->msgType; switch (pMsg->msgType) {
if (msgType == TSDB_MSG_TYPE_STATUS_RSP && pEpSet) {
dnodeUpdateMnodeEpSet(pEpSet);
}
switch (msgType) {
case TSDB_MSG_TYPE_NETWORK_TEST: case TSDB_MSG_TYPE_NETWORK_TEST:
dnodeProcessStartupReq(pMsg); dndProcessStartupReq(pDnode, pMsg);
break; break;
case TSDB_MSG_TYPE_CONFIG_DNODE_IN: case TSDB_MSG_TYPE_CONFIG_DNODE_IN:
dnodeProcessConfigDnodeReq(pMsg); dndProcessConfigDnodeReq(pDnode, pMsg);
break;
case TSDB_MSG_TYPE_STATUS_RSP:
dnodeProcessStatusRsp(pMsg);
break; break;
default: default:
dError("RPC %p, %s not processed", pMsg->handle, taosMsg[msgType]); dError("RPC %p, dnode req:%s not processed", pMsg->handle, taosMsg[pMsg->msgType]);
SRpcMsg rspMsg = {.handle = pMsg->handle, .code = TSDB_CODE_DND_MSG_NOT_PROCESSED}; SRpcMsg rspMsg = {.handle = pMsg->handle, .code = TSDB_CODE_MSG_NOT_PROCESSED};
rpcSendResponse(&rspMsg); rpcSendResponse(&rspMsg);
rpcFreeCont(pMsg->pCont); rpcFreeCont(pMsg->pCont);
} }
} }
void dndProcessDnodeRsp(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
switch (pMsg->msgType) {
case TSDB_MSG_TYPE_STATUS_RSP:
dndProcessStatusRsp(pDnode, pMsg, pEpSet);
break;
case TSDB_MSG_TYPE_AUTH_RSP:
dndProcessAuthRsp(pDnode, pMsg, pEpSet);
break;
case TSDB_MSG_TYPE_GRANT_RSP:
dndProcessGrantRsp(pDnode, pMsg, pEpSet);
break;
default:
dError("RPC %p, dnode rsp:%s not processed", pMsg->handle, taosMsg[pMsg->msgType]);
}
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http:www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "dndMnode.h"
#include "dndDnode.h"
#include "dndTransport.h"
static int32_t dndInitMnodeReadWorker(SDnode *pDnode);
static int32_t dndInitMnodeWriteWorker(SDnode *pDnode);
static int32_t dndInitMnodeSyncWorker(SDnode *pDnode);
static int32_t dndInitMnodeMgmtWorker(SDnode *pDnode);
static void dndCleanupMnodeReadWorker(SDnode *pDnode);
static void dndCleanupMnodeWriteWorker(SDnode *pDnode);
static void dndCleanupMnodeSyncWorker(SDnode *pDnode);
static void dndCleanupMnodeMgmtWorker(SDnode *pDnode);
static int32_t dndAllocMnodeReadQueue(SDnode *pDnode);
static int32_t dndAllocMnodeWriteQueue(SDnode *pDnode);
static int32_t dndAllocMnodeApplyQueue(SDnode *pDnode);
static int32_t dndAllocMnodeSyncQueue(SDnode *pDnode);
static int32_t dndAllocMnodeMgmtQueue(SDnode *pDnode);
static void dndFreeMnodeReadQueue(SDnode *pDnode);
static void dndFreeMnodeWriteQueue(SDnode *pDnode);
static void dndFreeMnodeApplyQueue(SDnode *pDnode);
static void dndFreeMnodeSyncQueue(SDnode *pDnode);
static void dndFreeMnodeMgmtQueue(SDnode *pDnode);
static void dndProcessMnodeReadQueue(SDnode *pDnode, SMnodeMsg *pMsg);
static void dndProcessMnodeWriteQueue(SDnode *pDnode, SMnodeMsg *pMsg);
static void dndProcessMnodeApplyQueue(SDnode *pDnode, SMnodeMsg *pMsg);
static void dndProcessMnodeSyncQueue(SDnode *pDnode, SMnodeMsg *pMsg);
static void dndProcessMnodeMgmtQueue(SDnode *pDnode, SRpcMsg *pMsg);
static int32_t dndWriteMnodeMsgToQueue(SMnode *pMnode, taos_queue pQueue, SRpcMsg *pRpcMsg);
void dndProcessMnodeReadMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dndProcessMnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dndProcessMnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dndProcessMnodeMgmtMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
static int32_t dndPutMsgIntoMnodeApplyQueue(SDnode *pDnode, SMnodeMsg *pMsg);
static int32_t dndStartMnodeWorker(SDnode *pDnode);
static void dndStopMnodeWorker(SDnode *pDnode);
static SMnode *dndAcquireMnode(SDnode *pDnode);
static void dndReleaseMnode(SDnode *pDnode, SMnode *pMnode);
static int32_t dndReadMnodeFile(SDnode *pDnode);
static int32_t dndWriteMnodeFile(SDnode *pDnode);
static int32_t dndOpenMnode(SDnode *pDnode, SMnodeOpt *pOption);
static int32_t dndAlterMnode(SDnode *pDnode, SMnodeOpt *pOption);
static int32_t dndDropMnode(SDnode *pDnode);
static int32_t dndProcessCreateMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg);
static int32_t dndProcessAlterMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg);
static int32_t dndProcessDropMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg);
static SMnode *dndAcquireMnode(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = NULL;
int32_t refCount = 0;
taosRLockLatch(&pMgmt->latch);
if (pMgmt->deployed && !pMgmt->dropped) {
refCount = atomic_add_fetch_32(&pMgmt->refCount, 1);
pMnode = pMgmt->pMnode;
} else {
terrno = TSDB_CODE_DND_MNODE_NOT_DEPLOYED;
}
taosRUnLockLatch(&pMgmt->latch);
if (pMnode != NULL) {
dTrace("acquire mnode, refCount:%d", refCount);
}
return pMnode;
}
static void dndReleaseMnode(SDnode *pDnode, SMnode *pMnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
int32_t refCount = 0;
taosRLockLatch(&pMgmt->latch);
if (pMnode != NULL) {
refCount = atomic_sub_fetch_32(&pMgmt->refCount, 1);
}
taosRUnLockLatch(&pMgmt->latch);
if (pMnode != NULL) {
dTrace("release mnode, refCount:%d", refCount);
}
}
static int32_t dndReadMnodeFile(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
int32_t code = TSDB_CODE_DND_MNODE_READ_FILE_ERROR;
int32_t len = 0;
int32_t maxLen = 4096;
char *content = calloc(1, maxLen + 1);
cJSON *root = NULL;
FILE *fp = fopen(pMgmt->file, "r");
if (fp == NULL) {
dDebug("file %s not exist", pMgmt->file);
code = 0;
goto PRASE_MNODE_OVER;
}
len = (int32_t)fread(content, 1, maxLen, fp);
if (len <= 0) {
dError("failed to read %s since content is null", pMgmt->file);
goto PRASE_MNODE_OVER;
}
content[len] = 0;
root = cJSON_Parse(content);
if (root == NULL) {
dError("failed to read %s since invalid json format", pMgmt->file);
goto PRASE_MNODE_OVER;
}
cJSON *deployed = cJSON_GetObjectItem(root, "deployed");
if (!deployed || deployed->type != cJSON_String) {
dError("failed to read %s since deployed not found", pMgmt->file);
goto PRASE_MNODE_OVER;
}
pMgmt->deployed = atoi(deployed->valuestring);
cJSON *dropped = cJSON_GetObjectItem(root, "dropped");
if (!dropped || dropped->type != cJSON_String) {
dError("failed to read %s since dropped not found", pMgmt->file);
goto PRASE_MNODE_OVER;
}
pMgmt->dropped = atoi(dropped->valuestring);
cJSON *nodes = cJSON_GetObjectItem(root, "nodes");
if (!nodes || nodes->type != cJSON_Array) {
dError("failed to read %s since nodes not found", pMgmt->file);
goto PRASE_MNODE_OVER;
}
pMgmt->replica = cJSON_GetArraySize(nodes);
if (pMgmt->replica <= 0 || pMgmt->replica > TSDB_MAX_REPLICA) {
dError("failed to read %s since nodes size %d invalid", pMgmt->file, pMgmt->replica);
goto PRASE_MNODE_OVER;
}
for (int32_t i = 0; i < pMgmt->replica; ++i) {
cJSON *node = cJSON_GetArrayItem(nodes, i);
if (node == NULL) break;
SReplica *pReplica = &pMgmt->replicas[i];
cJSON *id = cJSON_GetObjectItem(node, "id");
if (!id || id->type != cJSON_String || id->valuestring == NULL) {
dError("failed to read %s since id not found", pMgmt->file);
goto PRASE_MNODE_OVER;
}
pReplica->id = atoi(id->valuestring);
cJSON *fqdn = cJSON_GetObjectItem(node, "fqdn");
if (!fqdn || fqdn->type != cJSON_String || fqdn->valuestring == NULL) {
dError("failed to read %s since fqdn not found", pMgmt->file);
goto PRASE_MNODE_OVER;
}
tstrncpy(pReplica->fqdn, fqdn->valuestring, TSDB_FQDN_LEN);
cJSON *port = cJSON_GetObjectItem(node, "port");
if (!port || port->type != cJSON_String || port->valuestring == NULL) {
dError("failed to read %s since port not found", pMgmt->file);
goto PRASE_MNODE_OVER;
}
pReplica->port = atoi(port->valuestring);
}
code = 0;
dInfo("succcessed to read file %s", pMgmt->file);
PRASE_MNODE_OVER:
if (content != NULL) free(content);
if (root != NULL) cJSON_Delete(root);
if (fp != NULL) fclose(fp);
terrno = code;
return code;
}
static int32_t dndWriteMnodeFile(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
char file[PATH_MAX + 20] = {0};
snprintf(file, sizeof(file), "%s.bak", pMgmt->file);
FILE *fp = fopen(file, "w");
if (fp == NULL) {
terrno = TSDB_CODE_DND_MNODE_WRITE_FILE_ERROR;
dError("failed to write %s since %s", file, terrstr());
return -1;
}
int32_t len = 0;
int32_t maxLen = 4096;
char *content = calloc(1, maxLen + 1);
len += snprintf(content + len, maxLen - len, "{\n");
len += snprintf(content + len, maxLen - len, " \"deployed\": \"%d\",\n", pMgmt->deployed);
len += snprintf(content + len, maxLen - len, " \"dropped\": \"%d\",\n", pMgmt->dropped);
len += snprintf(content + len, maxLen - len, " \"nodes\": [{\n");
for (int32_t i = 0; i < pMgmt->replica; ++i) {
SReplica *pReplica = &pMgmt->replicas[i];
len += snprintf(content + len, maxLen - len, " \"id\": \"%d\",\n", pReplica->id);
len += snprintf(content + len, maxLen - len, " \"fqdn\": \"%s\",\n", pReplica->fqdn);
len += snprintf(content + len, maxLen - len, " \"port\": \"%u\"\n", pReplica->port);
if (i < pMgmt->replica - 1) {
len += snprintf(content + len, maxLen - len, " },{\n");
} else {
len += snprintf(content + len, maxLen - len, " }]\n");
}
}
len += snprintf(content + len, maxLen - len, "}\n");
fwrite(content, 1, len, fp);
taosFsyncFile(fileno(fp));
fclose(fp);
free(content);
if (taosRenameFile(file, pMgmt->file) != 0) {
terrno = TSDB_CODE_DND_MNODE_WRITE_FILE_ERROR;
dError("failed to rename %s since %s", pMgmt->file, terrstr());
return -1;
}
dInfo("successed to write %s", pMgmt->file);
return 0;
}
static int32_t dndStartMnodeWorker(SDnode *pDnode) {
if (dndInitMnodeReadWorker(pDnode) != 0) {
dError("failed to start mnode read worker since %s", terrstr());
return -1;
}
if (dndInitMnodeWriteWorker(pDnode) != 0) {
dError("failed to start mnode write worker since %s", terrstr());
return -1;
}
if (dndInitMnodeSyncWorker(pDnode) != 0) {
dError("failed to start mnode sync worker since %s", terrstr());
return -1;
}
if (dndAllocMnodeReadQueue(pDnode) != 0) {
dError("failed to alloc mnode read queue since %s", terrstr());
return -1;
}
if (dndAllocMnodeWriteQueue(pDnode) != 0) {
dError("failed to alloc mnode write queue since %s", terrstr());
return -1;
}
if (dndAllocMnodeApplyQueue(pDnode) != 0) {
dError("failed to alloc mnode apply queue since %s", terrstr());
return -1;
}
if (dndAllocMnodeSyncQueue(pDnode) != 0) {
dError("failed to alloc mnode sync queue since %s", terrstr());
return -1;
}
return 0;
}
static void dndStopMnodeWorker(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
taosWLockLatch(&pMgmt->latch);
pMgmt->deployed = 0;
pMgmt->pMnode = NULL;
taosWUnLockLatch(&pMgmt->latch);
while (pMgmt->refCount > 1) taosMsleep(10);
while (!taosQueueEmpty(pMgmt->pReadQ)) taosMsleep(10);
while (!taosQueueEmpty(pMgmt->pApplyQ)) taosMsleep(10);
while (!taosQueueEmpty(pMgmt->pWriteQ)) taosMsleep(10);
while (!taosQueueEmpty(pMgmt->pSyncQ)) taosMsleep(10);
dndCleanupMnodeReadWorker(pDnode);
dndCleanupMnodeWriteWorker(pDnode);
dndCleanupMnodeSyncWorker(pDnode);
dndFreeMnodeReadQueue(pDnode);
dndFreeMnodeWriteQueue(pDnode);
dndFreeMnodeApplyQueue(pDnode);
dndFreeMnodeSyncQueue(pDnode);
}
static bool dndNeedDeployMnode(SDnode *pDnode) {
if (dndGetDnodeId(pDnode) > 0) {
return false;
}
if (dndGetClusterId(pDnode) > 0) {
return false;
}
if (strcmp(pDnode->opt.localEp, pDnode->opt.firstEp) != 0) {
return false;
}
return true;
}
static void dndInitMnodeOption(SDnode *pDnode, SMnodeOpt *pOption) {
pOption->pDnode = pDnode;
pOption->sendMsgToDnodeFp = dndSendMsgToDnode;
pOption->sendMsgToMnodeFp = dndSendMsgToMnode;
pOption->sendRedirectMsgFp = dndSendRedirectMsg;
pOption->putMsgToApplyMsgFp = dndPutMsgIntoMnodeApplyQueue;
pOption->dnodeId = dndGetDnodeId(pDnode);
pOption->clusterId = dndGetClusterId(pDnode);
}
static void dndBuildMnodeDeployOption(SDnode *pDnode, SMnodeOpt *pOption) {
dndInitMnodeOption(pDnode, pOption);
pOption->replica = 1;
pOption->selfIndex = 0;
SReplica *pReplica = &pOption->replicas[0];
pReplica->id = 1;
pReplica->port = pDnode->opt.serverPort;
tstrncpy(pReplica->fqdn, pDnode->opt.localFqdn, TSDB_FQDN_LEN);
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
pMgmt->selfIndex = pOption->selfIndex;
pMgmt->replica = pOption->replica;
memcpy(&pMgmt->replicas, pOption->replicas, sizeof(SReplica) * TSDB_MAX_REPLICA);
}
static void dndBuildMnodeOpenOption(SDnode *pDnode, SMnodeOpt *pOption) {
dndInitMnodeOption(pDnode, pOption);
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
pOption->selfIndex = pMgmt->selfIndex;
pOption->replica = pMgmt->replica;
memcpy(&pOption->replicas, pMgmt->replicas, sizeof(SReplica) * TSDB_MAX_REPLICA);
}
static int32_t dndBuildMnodeOptionFromMsg(SDnode *pDnode, SMnodeOpt *pOption, SCreateMnodeMsg *pMsg) {
dndInitMnodeOption(pDnode, pOption);
pOption->dnodeId = dndGetDnodeId(pDnode);
pOption->clusterId = dndGetClusterId(pDnode);
pOption->replica = pMsg->replica;
pOption->selfIndex = -1;
for (int32_t i = 0; i < pMsg->replica; ++i) {
SReplica *pReplica = &pOption->replicas[i];
pReplica->id = pMsg->replicas[i].id;
pReplica->port = pMsg->replicas[i].port;
tstrncpy(pReplica->fqdn, pMsg->replicas[i].fqdn, TSDB_FQDN_LEN);
if (pReplica->id == pOption->dnodeId) {
pOption->selfIndex = i;
}
}
if (pOption->selfIndex == -1) {
terrno = TSDB_CODE_DND_MNODE_ID_NOT_FOUND;
dError("failed to build mnode options since %s", terrstr());
return -1;
}
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
pMgmt->selfIndex = pOption->selfIndex;
pMgmt->replica = pOption->replica;
memcpy(&pMgmt->replicas, pOption->replicas, sizeof(SReplica) * TSDB_MAX_REPLICA);
return 0;
}
static int32_t dndOpenMnode(SDnode *pDnode, SMnodeOpt *pOption) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
int32_t code = dndStartMnodeWorker(pDnode);
if (code != 0) {
dError("failed to start mnode worker since %s", terrstr());
return code;
}
SMnode *pMnode = mndOpen(pDnode->dir.mnode, pOption);
if (pMnode == NULL) {
dError("failed to open mnode since %s", terrstr());
code = terrno;
dndStopMnodeWorker(pDnode);
terrno = code;
return code;
}
if (dndWriteMnodeFile(pDnode) != 0) {
dError("failed to write mnode file since %s", terrstr());
code = terrno;
dndStopMnodeWorker(pDnode);
mndClose(pMnode);
mndDestroy(pDnode->dir.mnode);
terrno = code;
return code;
}
taosWLockLatch(&pMgmt->latch);
pMgmt->pMnode = pMnode;
pMgmt->deployed = 1;
taosWUnLockLatch(&pMgmt->latch);
return 0;
}
static int32_t dndAlterMnode(SDnode *pDnode, SMnodeOpt *pOption) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
if (pMnode == NULL) {
dError("failed to alter mnode since %s", terrstr());
return -1;
}
if (mndAlter(pMnode, pOption) != 0) {
dError("failed to alter mnode since %s", terrstr());
dndReleaseMnode(pDnode, pMnode);
return -1;
}
dndReleaseMnode(pDnode, pMnode);
return 0;
}
static int32_t dndDropMnode(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
if (pMnode == NULL) {
dError("failed to drop mnode since %s", terrstr());
return -1;
}
taosRLockLatch(&pMgmt->latch);
pMgmt->dropped = 1;
taosRUnLockLatch(&pMgmt->latch);
if (dndWriteMnodeFile(pDnode) != 0) {
taosRLockLatch(&pMgmt->latch);
pMgmt->dropped = 0;
taosRUnLockLatch(&pMgmt->latch);
dndReleaseMnode(pDnode, pMnode);
dError("failed to drop mnode since %s", terrstr());
return -1;
}
dndStopMnodeWorker(pDnode);
dndWriteMnodeFile(pDnode);
mndClose(pMnode);
mndDestroy(pDnode->dir.mnode);
return 0;
}
static SCreateMnodeMsg *dndParseCreateMnodeMsg(SRpcMsg *pRpcMsg) {
SCreateMnodeMsg *pMsg = pRpcMsg->pCont;
pMsg->dnodeId = htonl(pMsg->dnodeId);
for (int32_t i = 0; i < pMsg->replica; ++i) {
pMsg->replicas[i].id = htonl(pMsg->replicas[i].id);
pMsg->replicas[i].port = htons(pMsg->replicas[i].port);
}
return pMsg;
}
static int32_t dndProcessCreateMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) {
SCreateMnodeMsg *pMsg = dndParseCreateMnodeMsg(pRpcMsg->pCont);
if (pMsg->dnodeId != dndGetDnodeId(pDnode)) {
terrno = TSDB_CODE_DND_MNODE_ID_INVALID;
return -1;
} else {
SMnodeOpt option = {0};
if (dndBuildMnodeOptionFromMsg(pDnode, &option, pMsg) != 0) {
return -1;
}
return dndOpenMnode(pDnode, &option);
}
}
static int32_t dndProcessAlterMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) {
SAlterMnodeMsg *pMsg = dndParseCreateMnodeMsg(pRpcMsg->pCont);
if (pMsg->dnodeId != dndGetDnodeId(pDnode)) {
terrno = TSDB_CODE_DND_MNODE_ID_INVALID;
return -1;
} else {
SMnodeOpt option = {0};
if (dndBuildMnodeOptionFromMsg(pDnode, &option, pMsg) != 0) {
return -1;
}
return dndAlterMnode(pDnode, &option);
}
}
static int32_t dndProcessDropMnodeReq(SDnode *pDnode, SRpcMsg *pRpcMsg) {
SDropMnodeMsg *pMsg = dndParseCreateMnodeMsg(pRpcMsg->pCont);
if (pMsg->dnodeId != dndGetDnodeId(pDnode)) {
terrno = TSDB_CODE_DND_MNODE_ID_INVALID;
return -1;
} else {
return dndDropMnode(pDnode);
}
}
static void dndProcessMnodeMgmtQueue(SDnode *pDnode, SRpcMsg *pMsg) {
int32_t code = 0;
switch (pMsg->msgType) {
case TSDB_MSG_TYPE_CREATE_MNODE_IN:
code = dndProcessCreateMnodeReq(pDnode, pMsg);
break;
case TSDB_MSG_TYPE_ALTER_MNODE_IN:
code = dndProcessAlterMnodeReq(pDnode, pMsg);
break;
case TSDB_MSG_TYPE_DROP_MNODE_IN:
code = dndProcessDropMnodeReq(pDnode, pMsg);
break;
default:
code = TSDB_CODE_MSG_NOT_PROCESSED;
break;
}
SRpcMsg rsp = {.code = code, .handle = pMsg->handle};
rpcSendResponse(&rsp);
rpcFreeCont(pMsg->pCont);
taosFreeQitem(pMsg);
}
static void dndProcessMnodeReadQueue(SDnode *pDnode, SMnodeMsg *pMsg) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
if (pMnode != NULL) {
mndProcessReadMsg(pMsg);
dndReleaseMnode(pDnode, pMnode);
} else {
mndSendRsp(pMsg, terrno);
}
mndCleanupMsg(pMsg);
}
static void dndProcessMnodeWriteQueue(SDnode *pDnode, SMnodeMsg *pMsg) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
if (pMnode != NULL) {
mndProcessWriteMsg(pMsg);
dndReleaseMnode(pDnode, pMnode);
} else {
mndSendRsp(pMsg, terrno);
}
mndCleanupMsg(pMsg);
}
static void dndProcessMnodeApplyQueue(SDnode *pDnode, SMnodeMsg *pMsg) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
if (pMnode != NULL) {
mndProcessApplyMsg(pMsg);
dndReleaseMnode(pDnode, pMnode);
} else {
mndSendRsp(pMsg, terrno);
}
mndCleanupMsg(pMsg);
}
static void dndProcessMnodeSyncQueue(SDnode *pDnode, SMnodeMsg *pMsg) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
if (pMnode != NULL) {
mndProcessSyncMsg(pMsg);
dndReleaseMnode(pDnode, pMnode);
} else {
mndSendRsp(pMsg, terrno);
}
mndCleanupMsg(pMsg);
}
static int32_t dndWriteMnodeMsgToQueue(SMnode *pMnode, taos_queue pQueue, SRpcMsg *pRpcMsg) {
assert(pQueue);
SMnodeMsg *pMsg = mndInitMsg(pMnode, pRpcMsg);
if (pMsg == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
if (taosWriteQitem(pQueue, pMsg) != 0) {
mndCleanupMsg(pMsg);
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
return 0;
}
void dndProcessMnodeMgmtMsg(SDnode *pDnode, SRpcMsg *pRpcMsg, SEpSet *pEpSet) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
SRpcMsg *pMsg = taosAllocateQitem(sizeof(SRpcMsg));
if (pMsg == NULL || taosWriteQitem(pMgmt->pMgmtQ, pMsg) != 0) {
SRpcMsg rsp = {.handle = pRpcMsg->handle, .code = TSDB_CODE_OUT_OF_MEMORY};
rpcSendResponse(&rsp);
rpcFreeCont(pRpcMsg->pCont);
taosFreeQitem(pMsg);
}
}
void dndProcessMnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
if (pMnode == NULL || dndWriteMnodeMsgToQueue(pMnode, pMgmt->pWriteQ, pMsg) != 0) {
SRpcMsg rsp = {.handle = pMsg->handle, .code = terrno};
rpcSendResponse(&rsp);
rpcFreeCont(pMsg->pCont);
}
dndReleaseMnode(pDnode, pMnode);
}
void dndProcessMnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
if (pMnode == NULL || dndWriteMnodeMsgToQueue(pMnode, pMgmt->pSyncQ, pMsg) != 0) {
SRpcMsg rsp = {.handle = pMsg->handle, .code = terrno};
rpcSendResponse(&rsp);
rpcFreeCont(pMsg->pCont);
}
dndReleaseMnode(pDnode, pMnode);
}
void dndProcessMnodeReadMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
if (pMnode == NULL || dndWriteMnodeMsgToQueue(pMnode, pMgmt->pSyncQ, pMsg) != 0) {
SRpcMsg rsp = {.handle = pMsg->handle, .code = terrno};
rpcSendResponse(&rsp);
rpcFreeCont(pMsg->pCont);
}
dndReleaseMnode(pDnode, pMnode);
}
static int32_t dndPutMsgIntoMnodeApplyQueue(SDnode *pDnode, SMnodeMsg *pMsg) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
if (pMnode == NULL) {
return -1;
}
int32_t code = taosWriteQitem(pMgmt->pApplyQ, pMsg);
dndReleaseMnode(pDnode, pMnode);
return code;
}
static int32_t dndAllocMnodeMgmtQueue(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
pMgmt->pMgmtQ = tWorkerAllocQueue(&pMgmt->mgmtPool, pDnode, (FProcessItem)dndProcessMnodeMgmtQueue);
if (pMgmt->pMgmtQ == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
return 0;
}
static void dndFreeMnodeMgmtQueue(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
tWorkerFreeQueue(&pMgmt->mgmtPool, pMgmt->pMgmtQ);
pMgmt->pMgmtQ = NULL;
}
static int32_t dndInitMnodeMgmtWorker(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SWorkerPool *pPool = &pMgmt->mgmtPool;
pPool->name = "mnode-mgmt";
pPool->min = 1;
pPool->max = 1;
if (tWorkerInit(pPool) != 0) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
dDebug("mnode mgmt worker is initialized");
return 0;
}
static void dndCleanupMnodeMgmtWorker(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
tWorkerCleanup(&pMgmt->mgmtPool);
dDebug("mnode mgmt worker is closed");
}
static int32_t dndAllocMnodeReadQueue(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
pMgmt->pReadQ = tWorkerAllocQueue(&pMgmt->readPool, pDnode, (FProcessItem)dndProcessMnodeReadQueue);
if (pMgmt->pReadQ == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
return 0;
}
static void dndFreeMnodeReadQueue(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
tWorkerFreeQueue(&pMgmt->readPool, pMgmt->pReadQ);
pMgmt->pReadQ = NULL;
}
static int32_t dndInitMnodeReadWorker(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SWorkerPool *pPool = &pMgmt->readPool;
pPool->name = "mnode-read";
pPool->min = 0;
pPool->max = 1;
if (tWorkerInit(pPool) != 0) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
dDebug("mnode read worker is initialized");
return 0;
}
static void dndCleanupMnodeReadWorker(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
tWorkerCleanup(&pMgmt->readPool);
dDebug("mnode read worker is closed");
}
static int32_t dndAllocMnodeWriteQueue(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
pMgmt->pWriteQ = tWorkerAllocQueue(&pMgmt->writePool, pDnode, (FProcessItem)dndProcessMnodeWriteQueue);
if (pMgmt->pWriteQ == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
return 0;
}
static void dndFreeMnodeWriteQueue(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
tWorkerFreeQueue(&pMgmt->writePool, pMgmt->pWriteQ);
pMgmt->pWriteQ = NULL;
}
static int32_t dndAllocMnodeApplyQueue(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
pMgmt->pApplyQ = tWorkerAllocQueue(&pMgmt->writePool, pDnode, (FProcessItem)dndProcessMnodeApplyQueue);
if (pMgmt->pApplyQ == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
return 0;
}
static void dndFreeMnodeApplyQueue(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
tWorkerFreeQueue(&pMgmt->writePool, pMgmt->pApplyQ);
pMgmt->pApplyQ = NULL;
}
static int32_t dndInitMnodeWriteWorker(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SWorkerPool *pPool = &pMgmt->writePool;
pPool->name = "mnode-write";
pPool->min = 0;
pPool->max = 1;
if (tWorkerInit(pPool) != 0) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
dDebug("mnode write worker is initialized");
return 0;
}
static void dndCleanupMnodeWriteWorker(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
tWorkerCleanup(&pMgmt->writePool);
dDebug("mnode write worker is closed");
}
static int32_t dndAllocMnodeSyncQueue(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
pMgmt->pSyncQ = tWorkerAllocQueue(&pMgmt->syncPool, pDnode, (FProcessItem)dndProcessMnodeSyncQueue);
if (pMgmt->pSyncQ == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
return 0;
}
static void dndFreeMnodeSyncQueue(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
tWorkerFreeQueue(&pMgmt->syncPool, pMgmt->pSyncQ);
pMgmt->pSyncQ = NULL;
}
static int32_t dndInitMnodeSyncWorker(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SWorkerPool *pPool = &pMgmt->syncPool;
pPool->name = "mnode-sync";
pPool->min = 0;
pPool->max = 1;
if (tWorkerInit(pPool) != 0) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
dDebug("mnode sync worker is initialized");
return 0;
}
static void dndCleanupMnodeSyncWorker(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
tWorkerCleanup(&pMgmt->syncPool);
dDebug("mnode sync worker is closed");
}
int32_t dndInitMnode(SDnode *pDnode) {
dInfo("dnode-mnode start to init");
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
taosInitRWLatch(&pMgmt->latch);
if (dndInitMnodeMgmtWorker(pDnode) != 0) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%s/mnode.json", pDnode->dir.dnode);
pMgmt->file = strdup(path);
if (pMgmt->file == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
if (dndReadMnodeFile(pDnode) != 0) {
return -1;
}
if (pMgmt->dropped) {
dInfo("mnode has been deployed and needs to be deleted");
mndDestroy(pDnode->dir.mnode);
return 0;
}
if (!pMgmt->deployed) {
bool needDeploy = dndNeedDeployMnode(pDnode);
if (!needDeploy) {
dDebug("mnode does not need to be deployed");
return 0;
}
dInfo("start to deploy mnode");
SMnodeOpt option = {0};
dndBuildMnodeDeployOption(pDnode, &option);
return dndOpenMnode(pDnode, &option);
} else {
dInfo("start to open mnode");
SMnodeOpt option = {0};
dndBuildMnodeOpenOption(pDnode, &option);
return dndOpenMnode(pDnode, &option);
}
}
void dndCleanupMnode(SDnode *pDnode) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
dInfo("dnode-mnode start to clean up");
dndStopMnodeWorker(pDnode);
dndCleanupMnodeMgmtWorker(pDnode);
tfree(pMgmt->file);
dInfo("dnode-mnode is cleaned up");
}
int32_t dndGetUserAuthFromMnode(SDnode *pDnode, char *user, char *spi, char *encrypt, char *secret, char *ckey) {
SMnodeMgmt *pMgmt = &pDnode->mmgmt;
SMnode *pMnode = dndAcquireMnode(pDnode);
if (pMnode == NULL) {
terrno = TSDB_CODE_APP_NOT_READY;
dTrace("failed to get user auth since %s", terrstr());
return -1;
}
int32_t code = mndRetriveAuth(pMnode, user, spi, encrypt, secret, ckey);
dndReleaseMnode(pDnode, pMnode);
return code;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* this file is mainly responsible for the communication between DNODEs. Each
* dnode works as both server and client. Dnode may send status, grant, config
* messages to mnode, mnode may send create/alter/drop table/vnode messages
* to dnode. All theses messages are handled from here
*/
#define _DEFAULT_SOURCE
#include "dndTransport.h"
#include "dndDnode.h"
#include "dndMnode.h"
#include "dndVnodes.h"
#define INTERNAL_USER "_internal"
#define INTERNAL_CKEY "_key"
#define INTERNAL_SECRET "_secret"
static void dndInitMsgFp(STransMgmt *pMgmt) {
// msg from client to dnode
pMgmt->msgFp[TSDB_MSG_TYPE_SUBMIT] = dndProcessVnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_QUERY] = dndProcessVnodeQueryMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_FETCH] = dndProcessVnodeFetchMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_TABLE] = dndProcessVnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_TABLE] = dndProcessVnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_TABLE] = dndProcessVnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_UPDATE_TAG_VAL] = dndProcessVnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_TABLE_META] = dndProcessVnodeQueryMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_TABLES_META] = dndProcessVnodeQueryMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_MQ_QUERY] = dndProcessVnodeQueryMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_MQ_CONSUME] = dndProcessVnodeQueryMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_MQ_CONNECT] = dndProcessVnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_MQ_DISCONNECT] = dndProcessVnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_MQ_ACK] = dndProcessVnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_MQ_RESET] = dndProcessVnodeWriteMsg;
// msg from client to mnode
pMgmt->msgFp[TSDB_MSG_TYPE_CONNECT] = dndProcessMnodeReadMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_ACCT] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_ACCT] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_ACCT] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_USER] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_USER] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_USER] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_DNODE] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CONFIG_DNODE] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_DNODE] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_DB] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_DB] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_USE_DB] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_DB] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_SYNC_DB] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_TOPIC] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_TOPIC] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_TOPIC] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_FUNCTION] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_FUNCTION] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_FUNCTION] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_STABLE] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_STABLE] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_STABLE] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_STABLE_VGROUP] = dndProcessMnodeReadMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_KILL_QUERY] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_KILL_CONN] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_HEARTBEAT] = dndProcessMnodeReadMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_SHOW] = dndProcessMnodeReadMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_SHOW_RETRIEVE] = dndProcessMnodeReadMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_SHOW_RETRIEVE_FUNC] = dndProcessMnodeReadMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_COMPACT_VNODE] = dndProcessMnodeWriteMsg;
// message from client to dnode
pMgmt->msgFp[TSDB_MSG_TYPE_NETWORK_TEST] = dndProcessDnodeReq;
// message from mnode to vnode
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_STABLE_IN] = dndProcessVnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_STABLE_IN_RSP] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_STABLE_IN] = dndProcessVnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_STABLE_IN_RSP] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_STABLE_IN] = dndProcessVnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_STABLE_IN_RSP] = dndProcessMnodeWriteMsg;
// message from mnode to dnode
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_VNODE_IN] = dndProcessVnodeMgmtMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_VNODE_IN_RSP] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_VNODE_IN] = dndProcessVnodeMgmtMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_VNODE_IN_RSP] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_VNODE_IN] = dndProcessVnodeMgmtMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_VNODE_IN_RSP] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_SYNC_VNODE_IN] = dndProcessVnodeMgmtMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_SYNC_VNODE_IN_RSP] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_AUTH_VNODE_IN] = dndProcessVnodeMgmtMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_AUTH_VNODE_IN_RSP] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_COMPACT_VNODE_IN] = dndProcessVnodeMgmtMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_COMPACT_VNODE_IN_RSP] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_MNODE_IN] = dndProcessMnodeMgmtMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CREATE_MNODE_IN_RSP] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_MNODE_IN] = dndProcessMnodeMgmtMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_ALTER_MNODE_IN_RSP] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_MNODE_IN] = dndProcessMnodeMgmtMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_DROP_MNODE_IN_RSP] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_CONFIG_DNODE_IN] = dndProcessDnodeReq;
pMgmt->msgFp[TSDB_MSG_TYPE_CONFIG_DNODE_IN_RSP] = dndProcessMnodeWriteMsg;
// message from dnode to mnode
pMgmt->msgFp[TSDB_MSG_TYPE_AUTH] = dndProcessMnodeReadMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_AUTH_RSP] = dndProcessDnodeRsp;
pMgmt->msgFp[TSDB_MSG_TYPE_GRANT] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_GRANT_RSP] = dndProcessDnodeRsp;
pMgmt->msgFp[TSDB_MSG_TYPE_STATUS] = dndProcessMnodeWriteMsg;
pMgmt->msgFp[TSDB_MSG_TYPE_STATUS_RSP] = dndProcessDnodeRsp;
}
static void dndProcessResponse(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet) {
SDnode *pDnode = parent;
STransMgmt *pMgmt = &pDnode->tmgmt;
int32_t msgType = pMsg->msgType;
if (dndGetStat(pDnode) == DND_STAT_STOPPED) {
if (pMsg == NULL || pMsg->pCont == NULL) return;
dTrace("RPC %p, rsp:%s is ignored since dnode is stopping", pMsg->handle, taosMsg[msgType]);
rpcFreeCont(pMsg->pCont);
return;
}
DndMsgFp fp = pMgmt->msgFp[msgType];
if (fp != NULL) {
dTrace("RPC %p, rsp:%s will be processed, code:%s", pMsg->handle, taosMsg[msgType], tstrerror(pMsg->code));
(*fp)(pDnode, pMsg, pEpSet);
} else {
dError("RPC %p, rsp:%s not processed", pMsg->handle, taosMsg[msgType]);
rpcFreeCont(pMsg->pCont);
}
}
static int32_t dndInitClient(SDnode *pDnode) {
STransMgmt *pMgmt = &pDnode->tmgmt;
SRpcInit rpcInit;
memset(&rpcInit, 0, sizeof(rpcInit));
rpcInit.label = "DND-C";
rpcInit.numOfThreads = 1;
rpcInit.cfp = dndProcessResponse;
rpcInit.sessions = 8;
rpcInit.connType = TAOS_CONN_CLIENT;
rpcInit.idleTime = pDnode->opt.shellActivityTimer * 1000;
rpcInit.user = INTERNAL_USER;
rpcInit.ckey = INTERNAL_CKEY;
rpcInit.secret = INTERNAL_SECRET;
rpcInit.parent = pDnode;
pMgmt->clientRpc = rpcOpen(&rpcInit);
if (pMgmt->clientRpc == NULL) {
dError("failed to init rpc client");
return -1;
}
dDebug("dnode rpc client is initialized");
return 0;
}
static void dndCleanupClient(SDnode *pDnode) {
STransMgmt *pMgmt = &pDnode->tmgmt;
if (pMgmt->clientRpc) {
rpcClose(pMgmt->clientRpc);
pMgmt->clientRpc = NULL;
dDebug("dnode rpc client is closed");
}
}
static void dndProcessRequest(void *param, SRpcMsg *pMsg, SEpSet *pEpSet) {
SDnode *pDnode = param;
STransMgmt *pMgmt = &pDnode->tmgmt;
int32_t msgType = pMsg->msgType;
if (msgType == TSDB_MSG_TYPE_NETWORK_TEST) {
dTrace("RPC %p, network test req will be processed", pMsg->handle);
dndProcessDnodeReq(pDnode, pMsg, pEpSet);
return;
}
if (dndGetStat(pDnode) == DND_STAT_STOPPED) {
dError("RPC %p, req:%s is ignored since dnode exiting", pMsg->handle, taosMsg[msgType]);
SRpcMsg rspMsg = {.handle = pMsg->handle, .code = TSDB_CODE_DND_EXITING};
rpcSendResponse(&rspMsg);
rpcFreeCont(pMsg->pCont);
return;
} else if (dndGetStat(pDnode) != DND_STAT_RUNNING) {
dError("RPC %p, req:%s is ignored since dnode not running", pMsg->handle, taosMsg[msgType]);
SRpcMsg rspMsg = {.handle = pMsg->handle, .code = TSDB_CODE_APP_NOT_READY};
rpcSendResponse(&rspMsg);
rpcFreeCont(pMsg->pCont);
return;
}
if (pMsg->pCont == NULL) {
dTrace("RPC %p, req:%s not processed since content is null", pMsg->handle, taosMsg[msgType]);
SRpcMsg rspMsg = {.handle = pMsg->handle, .code = TSDB_CODE_DND_INVALID_MSG_LEN};
rpcSendResponse(&rspMsg);
return;
}
DndMsgFp fp = pMgmt->msgFp[msgType];
if (fp != NULL) {
dTrace("RPC %p, req:%s will be processed", pMsg->handle, taosMsg[msgType]);
(*fp)(pDnode, pMsg, pEpSet);
} else {
dError("RPC %p, req:%s is not processed", pMsg->handle, taosMsg[msgType]);
SRpcMsg rspMsg = {.handle = pMsg->handle, .code = TSDB_CODE_MSG_NOT_PROCESSED};
rpcSendResponse(&rspMsg);
rpcFreeCont(pMsg->pCont);
}
}
static void dndSendMsgToMnodeRecv(SDnode *pDnode, SRpcMsg *pRpcMsg, SRpcMsg *pRpcRsp) {
STransMgmt *pMgmt = &pDnode->tmgmt;
SEpSet epSet = {0};
dndGetMnodeEpSet(pDnode, &epSet);
rpcSendRecv(pMgmt->clientRpc, &epSet, pRpcMsg, pRpcRsp);
}
static int32_t dndAuthInternalMsg(SDnode *pDnode, char *user, char *spi, char *encrypt, char *secret, char *ckey) {
if (strcmp(user, INTERNAL_USER) == 0) {
// A simple temporary implementation
char pass[32] = {0};
taosEncryptPass((uint8_t *)(INTERNAL_SECRET), strlen(INTERNAL_SECRET), pass);
memcpy(secret, pass, TSDB_KEY_LEN);
*spi = 0;
*encrypt = 0;
*ckey = 0;
return 0;
} else if (strcmp(user, TSDB_NETTEST_USER) == 0) {
// A simple temporary implementation
char pass[32] = {0};
taosEncryptPass((uint8_t *)(TSDB_NETTEST_USER), strlen(TSDB_NETTEST_USER), pass);
memcpy(secret, pass, TSDB_KEY_LEN);
*spi = 0;
*encrypt = 0;
*ckey = 0;
return 0;
} else {
return -1;
}
}
static int32_t dndRetrieveUserAuthInfo(void *parent, char *user, char *spi, char *encrypt, char *secret, char *ckey) {
SDnode *pDnode = parent;
if (dndAuthInternalMsg(parent, user, spi, encrypt, secret, ckey) == 0) {
// dTrace("get internal auth success");
return 0;
}
if (dndGetUserAuthFromMnode(pDnode, user, spi, encrypt, secret, ckey) == 0) {
// dTrace("get auth from internal mnode");
return 0;
}
if (terrno != TSDB_CODE_APP_NOT_READY) {
dTrace("failed to get user auth from internal mnode since %s", terrstr());
return -1;
}
// dDebug("user:%s, send auth msg to other mnodes", user);
SAuthMsg *pMsg = rpcMallocCont(sizeof(SAuthMsg));
tstrncpy(pMsg->user, user, TSDB_USER_LEN);
SRpcMsg rpcMsg = {.pCont = pMsg, .contLen = sizeof(SAuthMsg), .msgType = TSDB_MSG_TYPE_AUTH};
SRpcMsg rpcRsp = {0};
dndSendMsgToMnodeRecv(pDnode, &rpcMsg, &rpcRsp);
if (rpcRsp.code != 0) {
terrno = rpcRsp.code;
dError("user:%s, failed to get user auth from other mnodes since %s", user, terrstr());
} else {
SAuthRsp *pRsp = rpcRsp.pCont;
memcpy(secret, pRsp->secret, TSDB_KEY_LEN);
memcpy(ckey, pRsp->ckey, TSDB_KEY_LEN);
*spi = pRsp->spi;
*encrypt = pRsp->encrypt;
dDebug("user:%s, success to get user auth from other mnodes", user);
}
rpcFreeCont(rpcRsp.pCont);
return rpcRsp.code;
}
static int32_t dndInitServer(SDnode *pDnode) {
STransMgmt *pMgmt = &pDnode->tmgmt;
dndInitMsgFp(pMgmt);
int32_t numOfThreads = (int32_t)((pDnode->opt.numOfCores * pDnode->opt.numOfThreadsPerCore) / 2.0);
if (numOfThreads < 1) {
numOfThreads = 1;
}
SRpcInit rpcInit;
memset(&rpcInit, 0, sizeof(rpcInit));
rpcInit.localPort = pDnode->opt.serverPort;
rpcInit.label = "DND-S";
rpcInit.numOfThreads = numOfThreads;
rpcInit.cfp = dndProcessRequest;
rpcInit.sessions = pDnode->opt.maxShellConns;
rpcInit.connType = TAOS_CONN_SERVER;
rpcInit.idleTime = pDnode->opt.shellActivityTimer * 1000;
rpcInit.afp = dndRetrieveUserAuthInfo;
rpcInit.parent = pDnode;
pMgmt->serverRpc = rpcOpen(&rpcInit);
if (pMgmt->serverRpc == NULL) {
dError("failed to init rpc server");
return -1;
}
dDebug("dnode rpc server is initialized");
return 0;
}
static void dndCleanupServer(SDnode *pDnode) {
STransMgmt *pMgmt = &pDnode->tmgmt;
if (pMgmt->serverRpc) {
rpcClose(pMgmt->serverRpc);
pMgmt->serverRpc = NULL;
dDebug("dnode rpc server is closed");
}
}
int32_t dndInitTrans(SDnode *pDnode) {
if (dndInitClient(pDnode) != 0) {
return -1;
}
if (dndInitServer(pDnode) != 0) {
return -1;
}
dInfo("dnode-transport is initialized");
return 0;
}
void dndCleanupTrans(SDnode *pDnode) {
dInfo("dnode-transport start to clean up");
dndCleanupServer(pDnode);
dndCleanupClient(pDnode);
dInfo("dnode-transport is cleaned up");
}
void dndSendMsgToDnode(SDnode *pDnode, SEpSet *pEpSet, SRpcMsg *pMsg) {
STransMgmt *pMgmt = &pDnode->tmgmt;
rpcSendRequest(pMgmt->clientRpc, pEpSet, pMsg, NULL);
}
void dndSendMsgToMnode(SDnode *pDnode, SRpcMsg *pMsg) {
SEpSet epSet = {0};
dndGetMnodeEpSet(pDnode, &epSet);
dndSendMsgToDnode(pDnode, &epSet, pMsg);
}
\ No newline at end of file
...@@ -10,26 +10,19 @@ ...@@ -10,26 +10,19 @@
* FITNESS FOR A PARTICULAR PURPOSE. * FITNESS FOR A PARTICULAR PURPOSE.
* *
* You should have received a copy of the GNU Affero General Public License * You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http:www.gnu.org/licenses/>.
*/ */
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "dnodeVnodes.h" #include "dndVnodes.h"
#include "dnodeTransport.h" #include "dndTransport.h"
#include "cJSON.h"
#include "thash.h"
#include "tlockfree.h"
#include "tqueue.h"
#include "tstep.h"
#include "tthread.h"
#include "tworker.h"
#include "vnode.h"
typedef struct { typedef struct {
int32_t vgId; int32_t vgId;
int32_t refCount; int32_t refCount;
int8_t dropped; int8_t dropped;
int8_t accessState; int8_t accessState;
char *path;
SVnode *pImpl; SVnode *pImpl;
taos_queue pWriteQ; taos_queue pWriteQ;
taos_queue pSyncQ; taos_queue pSyncQ;
...@@ -39,65 +32,106 @@ typedef struct { ...@@ -39,65 +32,106 @@ typedef struct {
} SVnodeObj; } SVnodeObj;
typedef struct { typedef struct {
pthread_t *threadId;
int32_t threadIndex;
int32_t failed;
int32_t opened;
int32_t vnodeNum; int32_t vnodeNum;
int32_t opened;
int32_t failed;
int32_t threadIndex;
pthread_t *pThreadId;
SVnodeObj *pVnodes; SVnodeObj *pVnodes;
} SVThread; SDnode *pDnode;
} SVnodeThread;
static struct {
SHashObj *hash; static int32_t dndInitVnodeReadWorker(SDnode *pDnode);
SWorkerPool mgmtPool; static int32_t dndInitVnodeWriteWorker(SDnode *pDnode);
SWorkerPool queryPool; static int32_t dndInitVnodeSyncWorker(SDnode *pDnode);
SWorkerPool fetchPool; static int32_t dndInitVnodeMgmtWorker(SDnode *pDnode);
SMWorkerPool syncPool; static void dndCleanupVnodeReadWorker(SDnode *pDnode);
SMWorkerPool writePool; static void dndCleanupVnodeWriteWorker(SDnode *pDnode);
taos_queue pMgmtQ; static void dndCleanupVnodeSyncWorker(SDnode *pDnode);
SSteps *pSteps; static void dndCleanupVnodeMgmtWorker(SDnode *pDnode);
int32_t openVnodes; static int32_t dndAllocVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode);
int32_t totalVnodes; static int32_t dndAllocVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode);
SRWLatch latch; static int32_t dndAllocVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode);
} tsVnodes; static int32_t dndAllocVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode);
static int32_t dndAllocVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode);
static int32_t dnodeAllocVnodeQueryQueue(SVnodeObj *pVnode); static void dndFreeVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode);
static void dnodeFreeVnodeQueryQueue(SVnodeObj *pVnode); static void dndFreeVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode);
static int32_t dnodeAllocVnodeFetchQueue(SVnodeObj *pVnode); static void dndFreeVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode);
static void dnodeFreeVnodeFetchQueue(SVnodeObj *pVnode); static void dndFreeVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode);
static int32_t dnodeAllocVnodeWriteQueue(SVnodeObj *pVnode); static void dndFreeVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode);
static void dnodeFreeVnodeWriteQueue(SVnodeObj *pVnode);
static int32_t dnodeAllocVnodeApplyQueue(SVnodeObj *pVnode); static void dndProcessVnodeQueryQueue(SVnodeObj *pVnode, SVnodeMsg *pMsg);
static void dnodeFreeVnodeApplyQueue(SVnodeObj *pVnode); static void dndProcessVnodeFetchQueue(SVnodeObj *pVnode, SVnodeMsg *pMsg);
static int32_t dnodeAllocVnodeSyncQueue(SVnodeObj *pVnode); static void dndProcessVnodeWriteQueue(SVnodeObj *pVnode, taos_qall qall, int32_t numOfMsgs);
static void dnodeFreeVnodeSyncQueue(SVnodeObj *pVnode); static void dndProcessVnodeApplyQueue(SVnodeObj *pVnode, taos_qall qall, int32_t numOfMsgs);
static void dndProcessVnodeSyncQueue(SVnodeObj *pVnode, taos_qall qall, int32_t numOfMsgs);
static SVnodeObj *dnodeAcquireVnode(int32_t vgId) { static void dndProcessVnodeMgmtQueue(SDnode *pDnode, SRpcMsg *pMsg);
SVnodeObj *pVnode = NULL; void dndProcessVnodeQueryMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
int32_t refCount = 0; void dndProcessVnodeFetchMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
void dndProcessVnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
taosRLockLatch(&tsVnodes.latch); void dndProcessVnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
taosHashGetClone(tsVnodes.hash, &vgId, sizeof(int32_t), (void *)&pVnode); void dndProcessVnodeMgmtMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
static int32_t dndPutMsgIntoVnodeApplyQueue(SDnode *pDnode, int32_t vgId, SVnodeMsg *pMsg);
static SVnodeObj *dndAcquireVnode(SDnode *pDnode, int32_t vgId);
static void dndReleaseVnode(SDnode *pDnode, SVnodeObj *pVnode);
static int32_t dndCreateVnodeWrapper(SDnode *pDnode, int32_t vgId, char *path, SVnode *pImpl);
static void dndDropVnodeWrapper(SDnode *pDnode, SVnodeObj *pVnode);
static SVnodeObj **dndGetVnodesFromHash(SDnode *pDnode, int32_t *numOfVnodes);
static int32_t dndGetVnodesFromFile(SDnode *pDnode, SVnodeObj **ppVnodes, int32_t *numOfVnodes);
static int32_t dndWriteVnodesToFile(SDnode *pDnode);
static int32_t dndCreateVnode(SDnode *pDnode, int32_t vgId, SVnodeCfg *pCfg);
static int32_t dndDropVnode(SDnode *pDnode, SVnodeObj *pVnode);
static int32_t dndOpenVnodes(SDnode *pDnode);
static void dndCloseVnodes(SDnode *pDnode);
static int32_t dndProcessCreateVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg);
static int32_t dndProcessAlterVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg);
static int32_t dndProcessDropVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg);
static int32_t dndProcessAuthVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg);
static int32_t dndProcessSyncVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg);
static int32_t dndProcessCompactVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg);
static SVnodeObj *dndAcquireVnode(SDnode *pDnode, int32_t vgId) {
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
SVnodeObj *pVnode = NULL;
int32_t refCount = 0;
taosRLockLatch(&pMgmt->latch);
taosHashGetClone(pMgmt->hash, &vgId, sizeof(int32_t), (void *)&pVnode);
if (pVnode == NULL) { if (pVnode == NULL) {
terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
} else { } else {
refCount = atomic_add_fetch_32(&pVnode->refCount, 1); refCount = atomic_add_fetch_32(&pVnode->refCount, 1);
} }
taosRUnLockLatch(&tsVnodes.latch); taosRUnLockLatch(&pMgmt->latch);
dTrace("vgId:%d, accquire vnode, refCount:%d", pVnode->vgId, refCount); dTrace("vgId:%d, acquire vnode, refCount:%d", pVnode->vgId, refCount);
return pVnode; return pVnode;
} }
static void dnodeReleaseVnode(SVnodeObj *pVnode) { static void dndReleaseVnode(SDnode *pDnode, SVnodeObj *pVnode) {
int32_t refCount = atomic_sub_fetch_32(&pVnode->refCount, 1); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
dTrace("vgId:%d, release vnode, refCount:%d", pVnode->vgId, refCount); int32_t refCount = 0;
taosRLockLatch(&pMgmt->latch);
if (pVnode != NULL) {
refCount = atomic_sub_fetch_32(&pVnode->refCount, 1);
}
taosRUnLockLatch(&pMgmt->latch);
if (pVnode != NULL) {
dTrace("vgId:%d, release vnode, refCount:%d", pVnode->vgId, refCount);
}
} }
static int32_t dnodeCreateVnodeWrapper(int32_t vgId, SVnode *pImpl) { static int32_t dndCreateVnodeWrapper(SDnode *pDnode, int32_t vgId, char *path, SVnode *pImpl) {
SVnodeObj *pVnode = calloc(1, sizeof(SVnodeObj)); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
SVnodeObj *pVnode = calloc(1, sizeof(SVnodeObj));
if (pVnode == NULL) { if (pVnode == NULL) {
return TSDB_CODE_DND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
pVnode->vgId = vgId; pVnode->vgId = vgId;
...@@ -106,45 +140,49 @@ static int32_t dnodeCreateVnodeWrapper(int32_t vgId, SVnode *pImpl) { ...@@ -106,45 +140,49 @@ static int32_t dnodeCreateVnodeWrapper(int32_t vgId, SVnode *pImpl) {
pVnode->accessState = TSDB_VN_ALL_ACCCESS; pVnode->accessState = TSDB_VN_ALL_ACCCESS;
pVnode->pImpl = pImpl; pVnode->pImpl = pImpl;
int32_t code = dnodeAllocVnodeQueryQueue(pVnode); pVnode->path = tstrdup(path);
if (code != 0) { if (pVnode->path == NULL) {
return code; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
code = dnodeAllocVnodeFetchQueue(pVnode); if (dndAllocVnodeQueryQueue(pDnode, pVnode) != 0) {
if (code != 0) { return -1;
return code;
} }
code = dnodeAllocVnodeWriteQueue(pVnode); if (dndAllocVnodeFetchQueue(pDnode, pVnode) != 0) {
if (code != 0) { return -1;
return code;
} }
code = dnodeAllocVnodeApplyQueue(pVnode); if (dndAllocVnodeWriteQueue(pDnode, pVnode) != 0) {
if (code != 0) { return -1;
return code;
} }
code = dnodeAllocVnodeSyncQueue(pVnode); if (dndAllocVnodeApplyQueue(pDnode, pVnode) != 0) {
if (code != 0) { return -1;
return code; }
if (dndAllocVnodeSyncQueue(pDnode, pVnode) != 0) {
return -1;
} }
taosWLockLatch(&tsVnodes.latch); taosWLockLatch(&pMgmt->latch);
code = taosHashPut(tsVnodes.hash, &vgId, sizeof(int32_t), &pVnode, sizeof(SVnodeObj *)); int32_t code = taosHashPut(pMgmt->hash, &vgId, sizeof(int32_t), &pVnode, sizeof(SVnodeObj *));
taosWUnLockLatch(&tsVnodes.latch); taosWUnLockLatch(&pMgmt->latch);
if (code != 0) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
}
return code; return code;
} }
static void dnodeDropVnodeWrapper(SVnodeObj *pVnode) { static void dndDropVnodeWrapper(SDnode *pDnode, SVnodeObj *pVnode) {
taosWLockLatch(&tsVnodes.latch); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
taosHashRemove(tsVnodes.hash, &pVnode->vgId, sizeof(int32_t)); taosWLockLatch(&pMgmt->latch);
taosWUnLockLatch(&tsVnodes.latch); taosHashRemove(pMgmt->hash, &pVnode->vgId, sizeof(int32_t));
taosWUnLockLatch(&pMgmt->latch);
// wait all queue empty dndReleaseVnode(pDnode, pVnode);
dnodeReleaseVnode(pVnode);
while (pVnode->refCount > 0) taosMsleep(10); while (pVnode->refCount > 0) taosMsleep(10);
while (!taosQueueEmpty(pVnode->pWriteQ)) taosMsleep(10); while (!taosQueueEmpty(pVnode->pWriteQ)) taosMsleep(10);
while (!taosQueueEmpty(pVnode->pSyncQ)) taosMsleep(10); while (!taosQueueEmpty(pVnode->pSyncQ)) taosMsleep(10);
...@@ -152,21 +190,22 @@ static void dnodeDropVnodeWrapper(SVnodeObj *pVnode) { ...@@ -152,21 +190,22 @@ static void dnodeDropVnodeWrapper(SVnodeObj *pVnode) {
while (!taosQueueEmpty(pVnode->pQueryQ)) taosMsleep(10); while (!taosQueueEmpty(pVnode->pQueryQ)) taosMsleep(10);
while (!taosQueueEmpty(pVnode->pFetchQ)) taosMsleep(10); while (!taosQueueEmpty(pVnode->pFetchQ)) taosMsleep(10);
dnodeFreeVnodeQueryQueue(pVnode); dndFreeVnodeQueryQueue(pDnode, pVnode);
dnodeFreeVnodeFetchQueue(pVnode); dndFreeVnodeFetchQueue(pDnode, pVnode);
dnodeFreeVnodeWriteQueue(pVnode); dndFreeVnodeWriteQueue(pDnode, pVnode);
dnodeFreeVnodeApplyQueue(pVnode); dndFreeVnodeApplyQueue(pDnode, pVnode);
dnodeFreeVnodeSyncQueue(pVnode); dndFreeVnodeSyncQueue(pDnode, pVnode);
} }
static SVnodeObj **dnodeGetVnodesFromHash(int32_t *numOfVnodes) { static SVnodeObj **dndGetVnodesFromHash(SDnode *pDnode, int32_t *numOfVnodes) {
taosRLockLatch(&tsVnodes.latch); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
taosRLockLatch(&pMgmt->latch);
int32_t num = 0; int32_t num = 0;
int32_t size = taosHashGetSize(tsVnodes.hash); int32_t size = taosHashGetSize(pMgmt->hash);
SVnodeObj **pVnodes = calloc(size, sizeof(SVnodeObj *)); SVnodeObj **pVnodes = calloc(size, sizeof(SVnodeObj *));
void *pIter = taosHashIterate(tsVnodes.hash, NULL); void *pIter = taosHashIterate(pMgmt->hash, NULL);
while (pIter) { while (pIter) {
SVnodeObj **ppVnode = pIter; SVnodeObj **ppVnode = pIter;
SVnodeObj *pVnode = *ppVnode; SVnodeObj *pVnode = *ppVnode;
...@@ -174,21 +213,21 @@ static SVnodeObj **dnodeGetVnodesFromHash(int32_t *numOfVnodes) { ...@@ -174,21 +213,21 @@ static SVnodeObj **dnodeGetVnodesFromHash(int32_t *numOfVnodes) {
num++; num++;
if (num < size) { if (num < size) {
int32_t refCount = atomic_add_fetch_32(&pVnode->refCount, 1); int32_t refCount = atomic_add_fetch_32(&pVnode->refCount, 1);
dTrace("vgId:%d, accquire vnode, refCount:%d", pVnode->vgId, refCount); dTrace("vgId:%d, acquire vnode, refCount:%d", pVnode->vgId, refCount);
pVnodes[num] = (*ppVnode); pVnodes[num] = (*ppVnode);
} }
} }
pIter = taosHashIterate(tsVnodes.hash, pIter); pIter = taosHashIterate(pMgmt->hash, pIter);
} }
taosRUnLockLatch(&tsVnodes.latch); taosRUnLockLatch(&pMgmt->latch);
*numOfVnodes = num; *numOfVnodes = num;
return pVnodes; return pVnodes;
} }
static int32_t dnodeGetVnodesFromFile(SVnodeObj **ppVnodes, int32_t *numOfVnodes) { static int32_t dndGetVnodesFromFile(SDnode *pDnode, SVnodeObj **ppVnodes, int32_t *numOfVnodes) {
int32_t code = TSDB_CODE_DND_PARSE_VNODE_FILE_ERROR; int32_t code = TSDB_CODE_DND_VNODE_READ_FILE_ERROR;
int32_t len = 0; int32_t len = 0;
int32_t maxLen = 30000; int32_t maxLen = 30000;
char *content = calloc(1, maxLen + 1); char *content = calloc(1, maxLen + 1);
...@@ -197,10 +236,10 @@ static int32_t dnodeGetVnodesFromFile(SVnodeObj **ppVnodes, int32_t *numOfVnodes ...@@ -197,10 +236,10 @@ static int32_t dnodeGetVnodesFromFile(SVnodeObj **ppVnodes, int32_t *numOfVnodes
char file[PATH_MAX + 20] = {0}; char file[PATH_MAX + 20] = {0};
SVnodeObj *pVnodes = NULL; SVnodeObj *pVnodes = NULL;
snprintf(file, PATH_MAX + 20, "%s/vnodes.json", tsVnodeDir); snprintf(file, PATH_MAX + 20, "%s/vnodes.json", pDnode->dir.vnodes);
fp = fopen(file, "r"); fp = fopen(file, "r");
if (!fp) { if (fp == NULL) {
dDebug("file %s not exist", file); dDebug("file %s not exist", file);
code = 0; code = 0;
goto PRASE_VNODE_OVER; goto PRASE_VNODE_OVER;
...@@ -267,15 +306,16 @@ PRASE_VNODE_OVER: ...@@ -267,15 +306,16 @@ PRASE_VNODE_OVER:
return code; return code;
} }
static int32_t dnodeWriteVnodesToFile() { static int32_t dndWriteVnodesToFile(SDnode *pDnode) {
char file[PATH_MAX + 20] = {0}; char file[PATH_MAX + 20] = {0};
char realfile[PATH_MAX + 20] = {0}; char realfile[PATH_MAX + 20] = {0};
snprintf(file, PATH_MAX + 20, "%s/vnodes.json.bak", tsVnodeDir); snprintf(file, PATH_MAX + 20, "%s/vnodes.json.bak", pDnode->dir.vnodes);
snprintf(realfile, PATH_MAX + 20, "%s/vnodes.json", tsVnodeDir); snprintf(realfile, PATH_MAX + 20, "%s/vnodes.json", pDnode->dir.vnodes);
FILE *fp = fopen(file, "w"); FILE *fp = fopen(file, "w");
if (!fp) { if (fp != NULL) {
dError("failed to write %s since %s", file, strerror(errno)); terrno = TAOS_SYSTEM_ERROR(errno);
dError("failed to write %s since %s", file, terrstr());
return -1; return -1;
} }
...@@ -283,7 +323,7 @@ static int32_t dnodeWriteVnodesToFile() { ...@@ -283,7 +323,7 @@ static int32_t dnodeWriteVnodesToFile() {
int32_t maxLen = 30000; int32_t maxLen = 30000;
char *content = calloc(1, maxLen + 1); char *content = calloc(1, maxLen + 1);
int32_t numOfVnodes = 0; int32_t numOfVnodes = 0;
SVnodeObj **pVnodes = dnodeGetVnodesFromHash(&numOfVnodes); SVnodeObj **pVnodes = dndGetVnodesFromHash(pDnode, &numOfVnodes);
len += snprintf(content + len, maxLen - len, "{\n"); len += snprintf(content + len, maxLen - len, "{\n");
len += snprintf(content + len, maxLen - len, " \"vnodes\": [{\n"); len += snprintf(content + len, maxLen - len, " \"vnodes\": [{\n");
...@@ -307,7 +347,7 @@ static int32_t dnodeWriteVnodesToFile() { ...@@ -307,7 +347,7 @@ static int32_t dnodeWriteVnodesToFile() {
for (int32_t i = 0; i < numOfVnodes; ++i) { for (int32_t i = 0; i < numOfVnodes; ++i) {
SVnodeObj *pVnode = pVnodes[i]; SVnodeObj *pVnode = pVnodes[i];
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
} }
if (pVnodes != NULL) { if (pVnodes != NULL) {
...@@ -318,50 +358,53 @@ static int32_t dnodeWriteVnodesToFile() { ...@@ -318,50 +358,53 @@ static int32_t dnodeWriteVnodesToFile() {
return taosRenameFile(file, realfile); return taosRenameFile(file, realfile);
} }
static int32_t dnodeCreateVnode(int32_t vgId, SVnodeCfg *pCfg) { static int32_t dndCreateVnode(SDnode *pDnode, int32_t vgId, SVnodeCfg *pCfg) {
int32_t code = 0;
char path[PATH_MAX + 20] = {0}; char path[PATH_MAX + 20] = {0};
snprintf(path, sizeof(path),"%s/vnode%d", tsVnodeDir, vgId); snprintf(path, sizeof(path), "%s/vnode%d", pDnode->dir.vnodes, vgId);
SVnode *pImpl = vnodeCreate(vgId, path, pCfg); // SVnode *pImpl = vnodeCreate(vgId, path, pCfg);
SVnode *pImpl = vnodeOpen(path, NULL);
if (pImpl == NULL) { if (pImpl == NULL) {
code = terrno; return -1;
return code;
} }
code = dnodeCreateVnodeWrapper(vgId, pImpl); int32_t code = dndCreateVnodeWrapper(pDnode, vgId, path, pImpl);
if (code != 0) { if (code != 0) {
vnodeDrop(pImpl); vnodeClose(pImpl);
vnodeDestroy(path);
terrno = code;
return code; return code;
} }
code = dnodeWriteVnodesToFile(); code = dndWriteVnodesToFile(pDnode);
if (code != 0) { if (code != 0) {
vnodeDrop(pImpl); vnodeClose(pImpl);
vnodeDestroy(path);
terrno = code;
return code; return code;
} }
return code; return 0;
} }
static int32_t dnodeDropVnode(SVnodeObj *pVnode) { static int32_t dndDropVnode(SDnode *pDnode, SVnodeObj *pVnode) {
pVnode->dropped = 1; pVnode->dropped = 1;
if (dndWriteVnodesToFile(pDnode) != 0) {
int32_t code = dnodeWriteVnodesToFile();
if (code != 0) {
pVnode->dropped = 0; pVnode->dropped = 0;
return code; return -1;
} }
dnodeDropVnodeWrapper(pVnode); dndDropVnodeWrapper(pDnode, pVnode);
vnodeDrop(pVnode->pImpl); vnodeClose(pVnode->pImpl);
dnodeWriteVnodesToFile(); vnodeDestroy(pVnode->path);
dndWriteVnodesToFile(pDnode);
return 0; return 0;
} }
static void *dnodeOpenVnodeFunc(void *param) { static void *dnodeOpenVnodeFunc(void *param) {
SVThread *pThread = param; SVnodeThread *pThread = param;
SDnode *pDnode = pThread->pDnode;
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
dDebug("thread:%d, start to open %d vnodes", pThread->threadIndex, pThread->vnodeNum); dDebug("thread:%d, start to open %d vnodes", pThread->threadIndex, pThread->vnodeNum);
setThreadName("open-vnodes"); setThreadName("open-vnodes");
...@@ -371,22 +414,22 @@ static void *dnodeOpenVnodeFunc(void *param) { ...@@ -371,22 +414,22 @@ static void *dnodeOpenVnodeFunc(void *param) {
char stepDesc[TSDB_STEP_DESC_LEN] = {0}; char stepDesc[TSDB_STEP_DESC_LEN] = {0};
snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to restore, %d of %d have been opened", pVnode->vgId, snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to restore, %d of %d have been opened", pVnode->vgId,
tsVnodes.openVnodes, tsVnodes.totalVnodes); pMgmt->openVnodes, pMgmt->totalVnodes);
dnodeReportStartup("open-vnodes", stepDesc); dndReportStartup(pDnode, "open-vnodes", stepDesc);
char path[PATH_MAX + 20] = {0}; char path[PATH_MAX + 20] = {0};
snprintf(path, sizeof(path),"%s/vnode%d", tsVnodeDir, pVnode->vgId); snprintf(path, sizeof(path), "%s/vnode%d", pDnode->dir.vnodes, pVnode->vgId);
SVnode *pImpl = vnodeOpen(path, NULL); SVnode *pImpl = vnodeOpen(path, NULL);
if (pImpl == NULL) { if (pImpl == NULL) {
dError("vgId:%d, failed to open vnode by thread:%d", pVnode->vgId, pThread->threadIndex); dError("vgId:%d, failed to open vnode by thread:%d", pVnode->vgId, pThread->threadIndex);
pThread->failed++; pThread->failed++;
} else { } else {
dnodeCreateVnodeWrapper(pVnode->vgId, pImpl); dndCreateVnodeWrapper(pDnode, pVnode->vgId, path, pImpl);
dDebug("vgId:%d, is opened by thread:%d", pVnode->vgId, pThread->threadIndex); dDebug("vgId:%d, is opened by thread:%d", pVnode->vgId, pThread->threadIndex);
pThread->opened++; pThread->opened++;
} }
atomic_add_fetch_32(&tsVnodes.openVnodes, 1); atomic_add_fetch_32(&pMgmt->openVnodes, 1);
} }
dDebug("thread:%d, total vnodes:%d, opened:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened, dDebug("thread:%d, total vnodes:%d, opened:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened,
...@@ -394,90 +437,93 @@ static void *dnodeOpenVnodeFunc(void *param) { ...@@ -394,90 +437,93 @@ static void *dnodeOpenVnodeFunc(void *param) {
return NULL; return NULL;
} }
static int32_t dnodeOpenVnodes() { static int32_t dndOpenVnodes(SDnode *pDnode) {
taosInitRWLatch(&tsVnodes.latch); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
taosInitRWLatch(&pMgmt->latch);
tsVnodes.hash = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); pMgmt->hash = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK);
if (tsVnodes.hash == NULL) { if (pMgmt->hash == NULL) {
dError("failed to init vnode hash"); dError("failed to init vnode hash");
return TSDB_CODE_VND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
SVnodeObj *pVnodes = NULL; SVnodeObj *pVnodes = NULL;
int32_t numOfVnodes = 0; int32_t numOfVnodes = 0;
int32_t code = dnodeGetVnodesFromFile(&pVnodes, &numOfVnodes); if (dndGetVnodesFromFile(pDnode, &pVnodes, &numOfVnodes) != 0) {
if (code != TSDB_CODE_SUCCESS) { dInfo("failed to get vnode list from disk since %s", terrstr());
dInfo("failed to get vnode list from disk since %s", tstrerror(code)); return -1;
return code;
} }
tsVnodes.totalVnodes = numOfVnodes; pMgmt->totalVnodes = numOfVnodes;
int32_t threadNum = tsNumOfCores; int32_t threadNum = tsNumOfCores;
int32_t vnodesPerThread = numOfVnodes / threadNum + 1; int32_t vnodesPerThread = numOfVnodes / threadNum + 1;
SVThread *threads = calloc(threadNum, sizeof(SVThread)); SVnodeThread *threads = calloc(threadNum, sizeof(SVnodeThread));
for (int32_t t = 0; t < threadNum; ++t) { for (int32_t t = 0; t < threadNum; ++t) {
threads[t].threadIndex = t; threads[t].threadIndex = t;
threads[t].pVnodes = calloc(vnodesPerThread, sizeof(SVnodeObj)); threads[t].pVnodes = calloc(vnodesPerThread, sizeof(SVnodeObj));
} }
for (int32_t v = 0; v < numOfVnodes; ++v) { for (int32_t v = 0; v < numOfVnodes; ++v) {
int32_t t = v % threadNum; int32_t t = v % threadNum;
SVThread *pThread = &threads[t]; SVnodeThread *pThread = &threads[t];
pThread->pVnodes[pThread->vnodeNum++] = pVnodes[v]; pThread->pVnodes[pThread->vnodeNum++] = pVnodes[v];
} }
dInfo("start %d threads to open %d vnodes", threadNum, numOfVnodes); dInfo("start %d threads to open %d vnodes", threadNum, numOfVnodes);
for (int32_t t = 0; t < threadNum; ++t) { for (int32_t t = 0; t < threadNum; ++t) {
SVThread *pThread = &threads[t]; SVnodeThread *pThread = &threads[t];
if (pThread->vnodeNum == 0) continue; if (pThread->vnodeNum == 0) continue;
pThread->threadId = taosCreateThread(dnodeOpenVnodeFunc, pThread); pThread->pThreadId = taosCreateThread(dnodeOpenVnodeFunc, pThread);
if (pThread->threadId == NULL) { if (pThread->pThreadId == NULL) {
dError("thread:%d, failed to create thread to open vnode, reason:%s", pThread->threadIndex, strerror(errno)); dError("thread:%d, failed to create thread to open vnode, reason:%s", pThread->threadIndex, strerror(errno));
} }
} }
for (int32_t t = 0; t < threadNum; ++t) { for (int32_t t = 0; t < threadNum; ++t) {
SVThread *pThread = &threads[t]; SVnodeThread *pThread = &threads[t];
taosDestoryThread(pThread->threadId); taosDestoryThread(pThread->pThreadId);
pThread->threadId = NULL; pThread->pThreadId = NULL;
free(pThread->pVnodes); free(pThread->pVnodes);
} }
free(threads); free(threads);
if (tsVnodes.openVnodes != tsVnodes.totalVnodes) { if (pMgmt->openVnodes != pMgmt->totalVnodes) {
dError("there are total vnodes:%d, opened:%d", tsVnodes.totalVnodes, tsVnodes.openVnodes); dError("there are total vnodes:%d, opened:%d", pMgmt->totalVnodes, pMgmt->openVnodes);
return -1; return -1;
} else { } else {
dInfo("total vnodes:%d open successfully", tsVnodes.totalVnodes); dInfo("total vnodes:%d open successfully", pMgmt->totalVnodes);
return 0;
} }
return TSDB_CODE_SUCCESS;
} }
static void dnodeCloseVnodes() { static void dndCloseVnodes(SDnode *pDnode) {
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
int32_t numOfVnodes = 0; int32_t numOfVnodes = 0;
SVnodeObj **pVnodes = dnodeGetVnodesFromHash(&numOfVnodes); SVnodeObj **pVnodes = dndGetVnodesFromHash(pDnode, &numOfVnodes);
for (int32_t i = 0; i < numOfVnodes; ++i) { for (int32_t i = 0; i < numOfVnodes; ++i) {
dnodeDropVnodeWrapper(pVnodes[i]); dndDropVnodeWrapper(pDnode, pVnodes[i]);
} }
if (pVnodes != NULL) { if (pVnodes != NULL) {
free(pVnodes); free(pVnodes);
} }
if (tsVnodes.hash != NULL) { if (pMgmt->hash != NULL) {
taosHashCleanup(tsVnodes.hash); taosHashCleanup(pMgmt->hash);
tsVnodes.hash = NULL; pMgmt->hash = NULL;
} }
dInfo("total vnodes:%d are all closed", numOfVnodes); dInfo("total vnodes:%d are all closed", numOfVnodes);
} }
static int32_t dnodeParseCreateVnodeReq(SRpcMsg *rpcMsg, int32_t *vgId, SVnodeCfg *pCfg) { static int32_t dndParseCreateVnodeReq(SRpcMsg *rpcMsg, int32_t *vgId, SVnodeCfg *pCfg) {
SCreateVnodeMsg *pCreate = rpcMsg->pCont; SCreateVnodeMsg *pCreate = rpcMsg->pCont;
*vgId = htonl(pCreate->vgId); *vgId = htonl(pCreate->vgId);
...@@ -519,182 +565,178 @@ static SAuthVnodeMsg *vnodeParseAuthVnodeReq(SRpcMsg *rpcMsg) { ...@@ -519,182 +565,178 @@ static SAuthVnodeMsg *vnodeParseAuthVnodeReq(SRpcMsg *rpcMsg) {
return pAuth; return pAuth;
} }
static int32_t vnodeProcessCreateVnodeReq(SRpcMsg *rpcMsg) { static int32_t dndProcessCreateVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
SVnodeCfg vnodeCfg = {0}; SVnodeCfg vnodeCfg = {0};
int32_t vgId = 0; int32_t vgId = 0;
dnodeParseCreateVnodeReq(rpcMsg, &vgId, &vnodeCfg); dndParseCreateVnodeReq(rpcMsg, &vgId, &vnodeCfg);
dDebug("vgId:%d, create vnode req is received", vgId); dDebug("vgId:%d, create vnode req is received", vgId);
SVnodeObj *pVnode = dnodeAcquireVnode(vgId); SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
if (pVnode != NULL) { if (pVnode != NULL) {
dDebug("vgId:%d, already exist, return success", vgId); dDebug("vgId:%d, already exist, return success", vgId);
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
return 0; return 0;
} }
int32_t code = dnodeCreateVnode(vgId, &vnodeCfg); if (dndCreateVnode(pDnode, vgId, &vnodeCfg) != 0) {
if (code != 0) { dError("vgId:%d, failed to create vnode since %s", vgId, terrstr());
dError("vgId:%d, failed to create vnode since %s", vgId, tstrerror(code)); return terrno;
} }
return code; return 0;
} }
static int32_t vnodeProcessAlterVnodeReq(SRpcMsg *rpcMsg) { static int32_t dndProcessAlterVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
SVnodeCfg vnodeCfg = {0}; SVnodeCfg vnodeCfg = {0};
int32_t vgId = 0; int32_t vgId = 0;
int32_t code = 0;
dnodeParseCreateVnodeReq(rpcMsg, &vgId, &vnodeCfg); dndParseCreateVnodeReq(rpcMsg, &vgId, &vnodeCfg);
dDebug("vgId:%d, alter vnode req is received", vgId); dDebug("vgId:%d, alter vnode req is received", vgId);
SVnodeObj *pVnode = dnodeAcquireVnode(vgId); SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
if (pVnode == NULL) { if (pVnode == NULL) {
code = terrno; dDebug("vgId:%d, failed to alter vnode since %s", vgId, terrstr());
dDebug("vgId:%d, failed to alter vnode since %s", vgId, tstrerror(code)); return terrno;
return code;
} }
code = vnodeAlter(pVnode->pImpl, &vnodeCfg); if (vnodeAlter(pVnode->pImpl, &vnodeCfg) != 0) {
if (code != 0) { dError("vgId:%d, failed to alter vnode since %s", vgId, terrstr());
dError("vgId:%d, failed to alter vnode since %s", vgId, tstrerror(code)); dndReleaseVnode(pDnode, pVnode);
return terrno;
} }
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
return code; return 0;
} }
static int32_t vnodeProcessDropVnodeReq(SRpcMsg *rpcMsg) { static int32_t dndProcessDropVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
SDropVnodeMsg *pDrop = vnodeParseDropVnodeReq(rpcMsg); SDropVnodeMsg *pDrop = vnodeParseDropVnodeReq(rpcMsg);
int32_t code = 0;
int32_t vgId = pDrop->vgId; int32_t vgId = pDrop->vgId;
dDebug("vgId:%d, drop vnode req is received", vgId); dDebug("vgId:%d, drop vnode req is received", vgId);
SVnodeObj *pVnode = dnodeAcquireVnode(vgId); SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
if (pVnode == NULL) { if (pVnode == NULL) {
code = terrno; dDebug("vgId:%d, failed to drop since %s", vgId, terrstr());
dDebug("vgId:%d, failed to drop since %s", vgId, tstrerror(code)); return terrno;
return code;
} }
code = dnodeDropVnode(pVnode); if (dndDropVnode(pDnode, pVnode) != 0) {
if (code != 0) { dError("vgId:%d, failed to drop vnode since %s", vgId, terrstr());
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
dError("vgId:%d, failed to drop vnode since %s", vgId, tstrerror(code)); return terrno;
} }
return code; return 0;
} }
static int32_t vnodeProcessAuthVnodeReq(SRpcMsg *rpcMsg) { static int32_t dndProcessAuthVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
SAuthVnodeMsg *pAuth = (SAuthVnodeMsg *)vnodeParseAuthVnodeReq(rpcMsg); SAuthVnodeMsg *pAuth = (SAuthVnodeMsg *)vnodeParseAuthVnodeReq(rpcMsg);
int32_t code = 0; int32_t code = 0;
int32_t vgId = pAuth->vgId; int32_t vgId = pAuth->vgId;
dDebug("vgId:%d, auth vnode req is received", vgId); dDebug("vgId:%d, auth vnode req is received", vgId);
SVnodeObj *pVnode = dnodeAcquireVnode(vgId); SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
if (pVnode == NULL) { if (pVnode == NULL) {
code = terrno; dDebug("vgId:%d, failed to auth since %s", vgId, terrstr());
dDebug("vgId:%d, failed to auth since %s", vgId, tstrerror(code)); return terrno;
return code;
} }
pVnode->accessState = pAuth->accessState; pVnode->accessState = pAuth->accessState;
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
return code; return 0;
} }
static int32_t vnodeProcessSyncVnodeReq(SRpcMsg *rpcMsg) { static int32_t dndProcessSyncVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
SAuthVnodeMsg *pAuth = (SAuthVnodeMsg *)vnodeParseAuthVnodeReq(rpcMsg); SAuthVnodeMsg *pAuth = (SAuthVnodeMsg *)vnodeParseAuthVnodeReq(rpcMsg);
int32_t code = 0;
int32_t vgId = pAuth->vgId; int32_t vgId = pAuth->vgId;
dDebug("vgId:%d, auth vnode req is received", vgId); dDebug("vgId:%d, auth vnode req is received", vgId);
SVnodeObj *pVnode = dnodeAcquireVnode(vgId); SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
if (pVnode == NULL) { if (pVnode == NULL) {
code = terrno; dDebug("vgId:%d, failed to auth since %s", vgId, terrstr());
dDebug("vgId:%d, failed to auth since %s", vgId, tstrerror(code)); return terrno;
return code;
} }
code = vnodeSync(pVnode->pImpl); if (vnodeSync(pVnode->pImpl) != 0) {
if (code != 0) { dError("vgId:%d, failed to auth vnode since %s", vgId, terrstr());
dError("vgId:%d, failed to auth vnode since %s", vgId, tstrerror(code)); dndReleaseVnode(pDnode, pVnode);
return terrno;
} }
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
return code; return 0;
} }
static int32_t vnodeProcessCompactVnodeReq(SRpcMsg *rpcMsg) { static int32_t dndProcessCompactVnodeReq(SDnode *pDnode, SRpcMsg *rpcMsg) {
SCompactVnodeMsg *pCompact = (SCompactVnodeMsg *)vnodeParseDropVnodeReq(rpcMsg); SCompactVnodeMsg *pCompact = (SCompactVnodeMsg *)vnodeParseDropVnodeReq(rpcMsg);
int32_t code = 0;
int32_t vgId = pCompact->vgId; int32_t vgId = pCompact->vgId;
dDebug("vgId:%d, compact vnode req is received", vgId); dDebug("vgId:%d, compact vnode req is received", vgId);
SVnodeObj *pVnode = dnodeAcquireVnode(vgId); SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
if (pVnode == NULL) { if (pVnode == NULL) {
code = terrno; dDebug("vgId:%d, failed to compact since %s", vgId, terrstr());
dDebug("vgId:%d, failed to compact since %s", vgId, tstrerror(code)); return terrno;
return code;
} }
code = vnodeCompact(pVnode->pImpl); if (vnodeCompact(pVnode->pImpl) != 0) {
if (code != 0) { dError("vgId:%d, failed to compact vnode since %s", vgId, terrstr());
dError("vgId:%d, failed to compact vnode since %s", vgId, tstrerror(code)); dndReleaseVnode(pDnode, pVnode);
return terrno;
} }
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
return code; return 0;
} }
static void dnodeProcessVnodeMgmtQueue(void *unused, SRpcMsg *pMsg) { static void dndProcessVnodeMgmtQueue(SDnode *pDnode, SRpcMsg *pMsg) {
int32_t code = 0; int32_t code = 0;
switch (pMsg->msgType) { switch (pMsg->msgType) {
case TSDB_MSG_TYPE_CREATE_VNODE_IN: case TSDB_MSG_TYPE_CREATE_VNODE_IN:
code = vnodeProcessCreateVnodeReq(pMsg); code = dndProcessCreateVnodeReq(pDnode, pMsg);
break; break;
case TSDB_MSG_TYPE_ALTER_VNODE_IN: case TSDB_MSG_TYPE_ALTER_VNODE_IN:
code = vnodeProcessAlterVnodeReq(pMsg); code = dndProcessAlterVnodeReq(pDnode, pMsg);
break; break;
case TSDB_MSG_TYPE_DROP_VNODE_IN: case TSDB_MSG_TYPE_DROP_VNODE_IN:
code = vnodeProcessDropVnodeReq(pMsg); code = dndProcessDropVnodeReq(pDnode, pMsg);
break; break;
case TSDB_MSG_TYPE_AUTH_VNODE_IN: case TSDB_MSG_TYPE_AUTH_VNODE_IN:
code = vnodeProcessAuthVnodeReq(pMsg); code = dndProcessAuthVnodeReq(pDnode, pMsg);
break; break;
case TSDB_MSG_TYPE_SYNC_VNODE_IN: case TSDB_MSG_TYPE_SYNC_VNODE_IN:
code = vnodeProcessSyncVnodeReq(pMsg); code = dndProcessSyncVnodeReq(pDnode, pMsg);
break; break;
case TSDB_MSG_TYPE_COMPACT_VNODE_IN: case TSDB_MSG_TYPE_COMPACT_VNODE_IN:
code = vnodeProcessCompactVnodeReq(pMsg); code = dndProcessCompactVnodeReq(pDnode, pMsg);
break; break;
default: default:
code = TSDB_CODE_DND_MSG_NOT_PROCESSED; code = TSDB_CODE_MSG_NOT_PROCESSED;
break; break;
} }
SRpcMsg rsp = {.code = code, .handle = pMsg->handle}; if (code != 0) {
rpcSendResponse(&rsp); SRpcMsg rsp = {.code = code, .handle = pMsg->handle};
rpcFreeCont(pMsg->pCont); rpcSendResponse(&rsp);
taosFreeQitem(pMsg); rpcFreeCont(pMsg->pCont);
taosFreeQitem(pMsg);
}
} }
static void dnodeProcessVnodeQueryQueue(SVnodeObj *pVnode, SVnodeMsg *pMsg) { static void dndProcessVnodeQueryQueue(SVnodeObj *pVnode, SVnodeMsg *pMsg) {
vnodeProcessMsg(pVnode->pImpl, pMsg, VN_MSG_TYPE_QUERY); vnodeProcessMsg(pVnode->pImpl, pMsg, VN_MSG_TYPE_QUERY);
} }
static void dnodeProcessVnodeFetchQueue(SVnodeObj *pVnode, SVnodeMsg *pMsg) { static void dndProcessVnodeFetchQueue(SVnodeObj *pVnode, SVnodeMsg *pMsg) {
vnodeProcessMsg(pVnode->pImpl, pMsg, VN_MSG_TYPE_FETCH); vnodeProcessMsg(pVnode->pImpl, pMsg, VN_MSG_TYPE_FETCH);
} }
static void dnodeProcessVnodeWriteQueue(SVnodeObj *pVnode, taos_qall qall, int32_t numOfMsgs) { static void dndProcessVnodeWriteQueue(SVnodeObj *pVnode, taos_qall qall, int32_t numOfMsgs) {
SVnodeMsg *pMsg = vnodeInitMsg(numOfMsgs); SVnodeMsg *pMsg = vnodeInitMsg(numOfMsgs);
SRpcMsg *pRpcMsg = NULL; SRpcMsg *pRpcMsg = NULL;
...@@ -707,7 +749,7 @@ static void dnodeProcessVnodeWriteQueue(SVnodeObj *pVnode, taos_qall qall, int32 ...@@ -707,7 +749,7 @@ static void dnodeProcessVnodeWriteQueue(SVnodeObj *pVnode, taos_qall qall, int32
vnodeProcessMsg(pVnode->pImpl, pMsg, VN_MSG_TYPE_WRITE); vnodeProcessMsg(pVnode->pImpl, pMsg, VN_MSG_TYPE_WRITE);
} }
static void dnodeProcessVnodeApplyQueue(SVnodeObj *pVnode, taos_qall qall, int32_t numOfMsgs) { static void dndProcessVnodeApplyQueue(SVnodeObj *pVnode, taos_qall qall, int32_t numOfMsgs) {
SVnodeMsg *pMsg = NULL; SVnodeMsg *pMsg = NULL;
for (int32_t i = 0; i < numOfMsgs; ++i) { for (int32_t i = 0; i < numOfMsgs; ++i) {
taosGetQitem(qall, (void **)&pMsg); taosGetQitem(qall, (void **)&pMsg);
...@@ -715,7 +757,7 @@ static void dnodeProcessVnodeApplyQueue(SVnodeObj *pVnode, taos_qall qall, int32 ...@@ -715,7 +757,7 @@ static void dnodeProcessVnodeApplyQueue(SVnodeObj *pVnode, taos_qall qall, int32
} }
} }
static void dnodeProcessVnodeSyncQueue(SVnodeObj *pVnode, taos_qall qall, int32_t numOfMsgs) { static void dndProcessVnodeSyncQueue(SVnodeObj *pVnode, taos_qall qall, int32_t numOfMsgs) {
SVnodeMsg *pMsg = NULL; SVnodeMsg *pMsg = NULL;
for (int32_t i = 0; i < numOfMsgs; ++i) { for (int32_t i = 0; i < numOfMsgs; ++i) {
taosGetQitem(qall, (void **)&pMsg); taosGetQitem(qall, (void **)&pMsg);
...@@ -723,18 +765,20 @@ static void dnodeProcessVnodeSyncQueue(SVnodeObj *pVnode, taos_qall qall, int32_ ...@@ -723,18 +765,20 @@ static void dnodeProcessVnodeSyncQueue(SVnodeObj *pVnode, taos_qall qall, int32_
} }
} }
static int32_t dnodeWriteRpcMsgToVnodeQueue(taos_queue pQueue, SRpcMsg *pRpcMsg) { static int32_t dndWriteRpcMsgToVnodeQueue(taos_queue pQueue, SRpcMsg *pRpcMsg) {
int32_t code = 0; int32_t code = 0;
if (pQueue == NULL) { if (pQueue == NULL) {
code = TSDB_CODE_DND_MSG_NOT_PROCESSED; code = TSDB_CODE_MSG_NOT_PROCESSED;
} else { } else {
SRpcMsg *pMsg = taosAllocateQitem(sizeof(SRpcMsg)); SRpcMsg *pMsg = taosAllocateQitem(sizeof(SRpcMsg));
if (pMsg == NULL) { if (pMsg == NULL) {
code = TSDB_CODE_DND_OUT_OF_MEMORY; code = TSDB_CODE_OUT_OF_MEMORY;
} else { } else {
*pMsg = *pRpcMsg; *pMsg = *pRpcMsg;
code = taosWriteQitem(pQueue, pMsg); if (taosWriteQitem(pQueue, pMsg) != 0) {
code = TSDB_CODE_OUT_OF_MEMORY;
}
} }
} }
...@@ -745,18 +789,23 @@ static int32_t dnodeWriteRpcMsgToVnodeQueue(taos_queue pQueue, SRpcMsg *pRpcMsg) ...@@ -745,18 +789,23 @@ static int32_t dnodeWriteRpcMsgToVnodeQueue(taos_queue pQueue, SRpcMsg *pRpcMsg)
} }
} }
static int32_t dnodeWriteVnodeMsgToVnodeQueue(taos_queue pQueue, SRpcMsg *pRpcMsg) { static int32_t dndWriteVnodeMsgToVnodeQueue(taos_queue pQueue, SRpcMsg *pRpcMsg) {
int32_t code = 0; int32_t code = 0;
if (pQueue == NULL) { if (pQueue == NULL) {
code = TSDB_CODE_DND_MSG_NOT_PROCESSED; code = TSDB_CODE_MSG_NOT_PROCESSED;
} else { } else {
SVnodeMsg *pMsg = vnodeInitMsg(1); SVnodeMsg *pMsg = vnodeInitMsg(1);
if (pMsg == NULL) { if (pMsg == NULL) {
code = TSDB_CODE_DND_OUT_OF_MEMORY; code = TSDB_CODE_OUT_OF_MEMORY;
} else { } else {
vnodeAppendMsg(pMsg, pRpcMsg); if (vnodeAppendMsg(pMsg, pRpcMsg) != 0) {
code = taosWriteQitem(pQueue, pMsg); code = terrno;
} else {
if (taosWriteQitem(pQueue, pMsg) != 0) {
code = TSDB_CODE_OUT_OF_MEMORY;
}
}
} }
} }
...@@ -767,11 +816,11 @@ static int32_t dnodeWriteVnodeMsgToVnodeQueue(taos_queue pQueue, SRpcMsg *pRpcMs ...@@ -767,11 +816,11 @@ static int32_t dnodeWriteVnodeMsgToVnodeQueue(taos_queue pQueue, SRpcMsg *pRpcMs
} }
} }
static SVnodeObj *dnodeAcquireVnodeFromMsg(SRpcMsg *pMsg) { static SVnodeObj *dndAcquireVnodeFromMsg(SDnode *pDnode, SRpcMsg *pMsg) {
SMsgHead *pHead = (SMsgHead *)pMsg->pCont; SMsgHead *pHead = (SMsgHead *)pMsg->pCont;
pHead->vgId = htonl(pHead->vgId); pHead->vgId = htonl(pHead->vgId);
SVnodeObj *pVnode = dnodeAcquireVnode(pHead->vgId); SVnodeObj *pVnode = dndAcquireVnode(pDnode, pHead->vgId);
if (pVnode == NULL) { if (pVnode == NULL) {
SRpcMsg rsp = {.handle = pMsg->handle, .code = terrno}; SRpcMsg rsp = {.handle = pMsg->handle, .code = terrno};
rpcSendResponse(&rsp); rpcSendResponse(&rsp);
...@@ -781,242 +830,295 @@ static SVnodeObj *dnodeAcquireVnodeFromMsg(SRpcMsg *pMsg) { ...@@ -781,242 +830,295 @@ static SVnodeObj *dnodeAcquireVnodeFromMsg(SRpcMsg *pMsg) {
return pVnode; return pVnode;
} }
void dnodeProcessVnodeMgmtMsg(SRpcMsg *pMsg, SEpSet *pEpSet) { dnodeWriteRpcMsgToVnodeQueue(tsVnodes.pMgmtQ, pMsg); } void dndProcessVnodeMgmtMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
dndWriteRpcMsgToVnodeQueue(pMgmt->pMgmtQ, pMsg);
}
void dnodeProcessVnodeWriteMsg(SRpcMsg *pMsg, SEpSet *pEpSet) { void dndProcessVnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
SVnodeObj *pVnode = dnodeAcquireVnodeFromMsg(pMsg); SVnodeObj *pVnode = dndAcquireVnodeFromMsg(pDnode, pMsg);
if (pVnode != NULL) { if (pVnode != NULL) {
dnodeWriteRpcMsgToVnodeQueue(pVnode->pWriteQ, pMsg); dndWriteRpcMsgToVnodeQueue(pVnode->pWriteQ, pMsg);
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
} }
} }
void dnodeProcessVnodeSyncMsg(SRpcMsg *pMsg, SEpSet *pEpSet) { void dndProcessVnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
SVnodeObj *pVnode = dnodeAcquireVnodeFromMsg(pMsg); SVnodeObj *pVnode = dndAcquireVnodeFromMsg(pDnode, pMsg);
if (pVnode != NULL) { if (pVnode != NULL) {
dnodeWriteVnodeMsgToVnodeQueue(pVnode->pSyncQ, pMsg); dndWriteVnodeMsgToVnodeQueue(pVnode->pSyncQ, pMsg);
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
} }
} }
void dnodeProcessVnodeQueryMsg(SRpcMsg *pMsg, SEpSet *pEpSet) { void dndProcessVnodeQueryMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
SVnodeObj *pVnode = dnodeAcquireVnodeFromMsg(pMsg); SVnodeObj *pVnode = dndAcquireVnodeFromMsg(pDnode, pMsg);
if (pVnode != NULL) { if (pVnode != NULL) {
dnodeWriteVnodeMsgToVnodeQueue(pVnode->pQueryQ, pMsg); dndWriteVnodeMsgToVnodeQueue(pVnode->pQueryQ, pMsg);
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
} }
} }
void dnodeProcessVnodeFetchMsg(SRpcMsg *pMsg, SEpSet *pEpSet) { void dndProcessVnodeFetchMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) {
SVnodeObj *pVnode = dnodeAcquireVnodeFromMsg(pMsg); SVnodeObj *pVnode = dndAcquireVnodeFromMsg(pDnode, pMsg);
if (pVnode != NULL) { if (pVnode != NULL) {
dnodeWriteVnodeMsgToVnodeQueue(pVnode->pFetchQ, pMsg); dndWriteVnodeMsgToVnodeQueue(pVnode->pFetchQ, pMsg);
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
} }
} }
static int32_t dnodePutMsgIntoVnodeApplyQueue(int32_t vgId, SVnodeMsg *pMsg) { static int32_t dndPutMsgIntoVnodeApplyQueue(SDnode *pDnode, int32_t vgId, SVnodeMsg *pMsg) {
SVnodeObj *pVnode = dnodeAcquireVnode(vgId); SVnodeObj *pVnode = dndAcquireVnode(pDnode, vgId);
if (pVnode == NULL) { if (pVnode == NULL) {
return terrno; return -1;
} }
int32_t code = taosWriteQitem(pVnode->pApplyQ, pMsg); int32_t code = taosWriteQitem(pVnode->pApplyQ, pMsg);
dnodeReleaseVnode(pVnode); dndReleaseVnode(pDnode, pVnode);
return code; return code;
} }
static int32_t dnodeInitVnodeMgmtWorker() { static int32_t dndInitVnodeMgmtWorker(SDnode *pDnode) {
SWorkerPool *pPool = &tsVnodes.mgmtPool; SVnodesMgmt *pMgmt = &pDnode->vmgmt;
SWorkerPool *pPool = &pMgmt->mgmtPool;
pPool->name = "vnode-mgmt"; pPool->name = "vnode-mgmt";
pPool->min = 1; pPool->min = 1;
pPool->max = 1; pPool->max = 1;
if (tWorkerInit(pPool) != 0) { if (tWorkerInit(pPool) != 0) {
return TSDB_CODE_VND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
tsVnodes.pMgmtQ = tWorkerAllocQueue(pPool, NULL, (FProcessItem)dnodeProcessVnodeMgmtQueue); pMgmt->pMgmtQ = tWorkerAllocQueue(pPool, pDnode, (FProcessItem)dndProcessVnodeMgmtQueue);
if (tsVnodes.pMgmtQ == NULL) { if (pMgmt->pMgmtQ == NULL) {
return TSDB_CODE_VND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
return 0; return 0;
} }
static void dnodeCleanupVnodeMgmtWorker() { static void dndCleanupVnodeMgmtWorker(SDnode *pDnode) {
tWorkerFreeQueue(&tsVnodes.mgmtPool, tsVnodes.pMgmtQ); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
tWorkerCleanup(&tsVnodes.mgmtPool); tWorkerFreeQueue(&pMgmt->mgmtPool, pMgmt->pMgmtQ);
tsVnodes.pMgmtQ = NULL; tWorkerCleanup(&pMgmt->mgmtPool);
pMgmt->pMgmtQ = NULL;
} }
static int32_t dnodeAllocVnodeQueryQueue(SVnodeObj *pVnode) { static int32_t dndAllocVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode) {
pVnode->pQueryQ = tWorkerAllocQueue(&tsVnodes.queryPool, pVnode, (FProcessItem)dnodeProcessVnodeQueryQueue); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
pVnode->pQueryQ = tWorkerAllocQueue(&pMgmt->queryPool, pVnode, (FProcessItem)dndProcessVnodeQueryQueue);
if (pVnode->pQueryQ == NULL) { if (pVnode->pQueryQ == NULL) {
return TSDB_CODE_DND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
return 0; return 0;
} }
static void dnodeFreeVnodeQueryQueue(SVnodeObj *pVnode) { static void dndFreeVnodeQueryQueue(SDnode *pDnode, SVnodeObj *pVnode) {
tWorkerFreeQueue(&tsVnodes.queryPool, pVnode->pQueryQ); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
tWorkerFreeQueue(&pMgmt->queryPool, pVnode->pQueryQ);
pVnode->pQueryQ = NULL; pVnode->pQueryQ = NULL;
} }
static int32_t dnodeAllocVnodeFetchQueue(SVnodeObj *pVnode) { static int32_t dndAllocVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode) {
pVnode->pFetchQ = tWorkerAllocQueue(&tsVnodes.fetchPool, pVnode, (FProcessItem)dnodeProcessVnodeFetchQueue); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
pVnode->pFetchQ = tWorkerAllocQueue(&pMgmt->fetchPool, pVnode, (FProcessItem)dndProcessVnodeFetchQueue);
if (pVnode->pFetchQ == NULL) { if (pVnode->pFetchQ == NULL) {
return TSDB_CODE_DND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
return 0; return 0;
} }
static void dnodeFreeVnodeFetchQueue(SVnodeObj *pVnode) { static void dndFreeVnodeFetchQueue(SDnode *pDnode, SVnodeObj *pVnode) {
tWorkerFreeQueue(&tsVnodes.fetchPool, pVnode->pFetchQ); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
tWorkerFreeQueue(&pMgmt->fetchPool, pVnode->pFetchQ);
pVnode->pFetchQ = NULL; pVnode->pFetchQ = NULL;
} }
static int32_t dnodeInitVnodeReadWorker() { static int32_t dndInitVnodeReadWorker(SDnode *pDnode) {
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
int32_t maxFetchThreads = 4; int32_t maxFetchThreads = 4;
float threadsForQuery = MAX(tsNumOfCores * tsRatioOfQueryCores, 1); float threadsForQuery = MAX(pDnode->opt.numOfCores * pDnode->opt.ratioOfQueryCores, 1);
SWorkerPool *pPool = &tsVnodes.queryPool; SWorkerPool *pPool = &pMgmt->queryPool;
pPool->name = "vnode-query"; pPool->name = "vnode-query";
pPool->min = (int32_t)threadsForQuery; pPool->min = (int32_t)threadsForQuery;
pPool->max = pPool->min; pPool->max = pPool->min;
if (tWorkerInit(pPool) != 0) { if (tWorkerInit(pPool) != 0) {
return TSDB_CODE_VND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
pPool = &tsVnodes.fetchPool; pPool = &pMgmt->fetchPool;
pPool->name = "vnode-fetch"; pPool->name = "vnode-fetch";
pPool->min = MIN(maxFetchThreads, tsNumOfCores); pPool->min = MIN(maxFetchThreads, pDnode->opt.numOfCores);
pPool->max = pPool->min; pPool->max = pPool->min;
if (tWorkerInit(pPool) != 0) { if (tWorkerInit(pPool) != 0) {
TSDB_CODE_VND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
return 0; return 0;
} }
static void dnodeCleanupVnodeReadWorker() { static void dndCleanupVnodeReadWorker(SDnode *pDnode) {
tWorkerCleanup(&tsVnodes.fetchPool); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
tWorkerCleanup(&tsVnodes.queryPool); tWorkerCleanup(&pMgmt->fetchPool);
tWorkerCleanup(&pMgmt->queryPool);
} }
static int32_t dnodeAllocVnodeWriteQueue(SVnodeObj *pVnode) { static int32_t dndAllocVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode) {
pVnode->pWriteQ = tMWorkerAllocQueue(&tsVnodes.writePool, pVnode, (FProcessItems)dnodeProcessVnodeWriteQueue); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
pVnode->pWriteQ = tMWorkerAllocQueue(&pMgmt->writePool, pVnode, (FProcessItems)dndProcessVnodeWriteQueue);
if (pVnode->pWriteQ == NULL) { if (pVnode->pWriteQ == NULL) {
return TSDB_CODE_DND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
return 0; return 0;
} }
static void dnodeFreeVnodeWriteQueue(SVnodeObj *pVnode) { static void dndFreeVnodeWriteQueue(SDnode *pDnode, SVnodeObj *pVnode) {
tMWorkerFreeQueue(&tsVnodes.writePool, pVnode->pWriteQ); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
tMWorkerFreeQueue(&pMgmt->writePool, pVnode->pWriteQ);
pVnode->pWriteQ = NULL; pVnode->pWriteQ = NULL;
} }
static int32_t dnodeAllocVnodeApplyQueue(SVnodeObj *pVnode) { static int32_t dndAllocVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode) {
pVnode->pApplyQ = tMWorkerAllocQueue(&tsVnodes.writePool, pVnode, (FProcessItems)dnodeProcessVnodeApplyQueue); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
pVnode->pApplyQ = tMWorkerAllocQueue(&pMgmt->writePool, pVnode, (FProcessItems)dndProcessVnodeApplyQueue);
if (pVnode->pApplyQ == NULL) { if (pVnode->pApplyQ == NULL) {
return TSDB_CODE_DND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
return 0; return 0;
} }
static void dnodeFreeVnodeApplyQueue(SVnodeObj *pVnode) { static void dndFreeVnodeApplyQueue(SDnode *pDnode, SVnodeObj *pVnode) {
tMWorkerFreeQueue(&tsVnodes.writePool, pVnode->pApplyQ); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
tMWorkerFreeQueue(&pMgmt->writePool, pVnode->pApplyQ);
pVnode->pApplyQ = NULL; pVnode->pApplyQ = NULL;
} }
static int32_t dnodeInitVnodeWriteWorker() { static int32_t dndInitVnodeWriteWorker(SDnode *pDnode) {
SMWorkerPool *pPool = &tsVnodes.writePool; SVnodesMgmt *pMgmt = &pDnode->vmgmt;
SMWorkerPool *pPool = &pMgmt->writePool;
pPool->name = "vnode-write"; pPool->name = "vnode-write";
pPool->max = tsNumOfCores; pPool->max = tsNumOfCores;
if (tMWorkerInit(pPool) != 0) { if (tMWorkerInit(pPool) != 0) {
return TSDB_CODE_VND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
return 0; return 0;
} }
static void dnodeCleanupVnodeWriteWorker() { tMWorkerCleanup(&tsVnodes.writePool); } static void dndCleanupVnodeWriteWorker(SDnode *pDnode) {
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
tMWorkerCleanup(&pMgmt->writePool);
}
static int32_t dnodeAllocVnodeSyncQueue(SVnodeObj *pVnode) { static int32_t dndAllocVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode) {
pVnode->pSyncQ = tMWorkerAllocQueue(&tsVnodes.writePool, pVnode, (FProcessItems)dnodeProcessVnodeSyncQueue); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
pVnode->pSyncQ = tMWorkerAllocQueue(&pMgmt->writePool, pVnode, (FProcessItems)dndProcessVnodeSyncQueue);
if (pVnode->pSyncQ == NULL) { if (pVnode->pSyncQ == NULL) {
return TSDB_CODE_DND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
return 0; return 0;
} }
static void dnodeFreeVnodeSyncQueue(SVnodeObj *pVnode) { static void dndFreeVnodeSyncQueue(SDnode *pDnode, SVnodeObj *pVnode) {
tMWorkerFreeQueue(&tsVnodes.writePool, pVnode->pSyncQ); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
tMWorkerFreeQueue(&pMgmt->writePool, pVnode->pSyncQ);
pVnode->pSyncQ = NULL; pVnode->pSyncQ = NULL;
} }
static int32_t dnodeInitVnodeSyncWorker() { static int32_t dndInitVnodeSyncWorker(SDnode *pDnode) {
int32_t maxThreads = tsNumOfCores / 2; int32_t maxThreads = tsNumOfCores / 2;
if (maxThreads < 1) maxThreads = 1; if (maxThreads < 1) maxThreads = 1;
SMWorkerPool *pPool = &tsVnodes.writePool; SVnodesMgmt *pMgmt = &pDnode->vmgmt;
SMWorkerPool *pPool = &pMgmt->writePool;
pPool->name = "vnode-sync"; pPool->name = "vnode-sync";
pPool->max = maxThreads; pPool->max = maxThreads;
if (tMWorkerInit(pPool) != 0) { if (tMWorkerInit(pPool) != 0) {
return TSDB_CODE_VND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
} }
return 0; return 0;
} }
static void dnodeCleanupVnodeSyncWorker() { tMWorkerCleanup(&tsVnodes.syncPool); } static void dndCleanupVnodeSyncWorker(SDnode *pDnode) {
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
static int32_t dnodeInitVnodeModule() { tMWorkerCleanup(&pMgmt->syncPool);
SVnodePara para;
para.SendMsgToDnode = dnodeSendMsgToDnode;
para.SendMsgToMnode = dnodeSendMsgToMnode;
para.PutMsgIntoApplyQueue = dnodePutMsgIntoVnodeApplyQueue;
return vnodeInit(para);
} }
int32_t dnodeInitVnodes() { int32_t dndInitVnodes(SDnode *pDnode) {
dInfo("dnode-vnodes start to init"); dInfo("dnode-vnodes start to init");
SSteps *pSteps = taosStepInit(6, dnodeReportStartup); if (dndInitVnodeReadWorker(pDnode) != 0) {
taosStepAdd(pSteps, "dnode-vnode-env", dnodeInitVnodeModule, vnodeCleanup); dError("failed to init vnodes read worker since %s", terrstr());
taosStepAdd(pSteps, "dnode-vnode-mgmt", dnodeInitVnodeMgmtWorker, dnodeCleanupVnodeMgmtWorker); return -1;
taosStepAdd(pSteps, "dnode-vnode-read", dnodeInitVnodeReadWorker, dnodeCleanupVnodeReadWorker); }
taosStepAdd(pSteps, "dnode-vnode-write", dnodeInitVnodeWriteWorker, dnodeCleanupVnodeWriteWorker);
taosStepAdd(pSteps, "dnode-vnode-sync", dnodeInitVnodeSyncWorker, dnodeCleanupVnodeSyncWorker);
taosStepAdd(pSteps, "dnode-vnodes", dnodeOpenVnodes, dnodeCleanupVnodes);
tsVnodes.pSteps = pSteps; if (dndInitVnodeWriteWorker(pDnode) != 0) {
return taosStepExec(pSteps); dError("failed to init vnodes write worker since %s", terrstr());
} return -1;
}
void dnodeCleanupVnodes() { if (dndInitVnodeSyncWorker(pDnode) != 0) {
if (tsVnodes.pSteps != NULL) { dError("failed to init vnodes sync worker since %s", terrstr());
dInfo("dnode-vnodes start to clean up"); return -1;
taosStepCleanup(tsVnodes.pSteps); }
tsVnodes.pSteps = NULL;
dInfo("dnode-vnodes is cleaned up"); if (dndInitVnodeMgmtWorker(pDnode) != 0) {
dError("failed to init vnodes mgmt worker since %s", terrstr());
return -1;
} }
if (dndOpenVnodes(pDnode) != 0) {
dError("failed to open vnodes since %s", terrstr());
return -1;
}
dInfo("dnode-vnodes is initialized");
return 0;
}
void dndCleanupVnodes(SDnode *pDnode) {
dInfo("dnode-vnodes start to clean up");
dndCloseVnodes(pDnode);
dndCleanupVnodeReadWorker(pDnode);
dndCleanupVnodeWriteWorker(pDnode);
dndCleanupVnodeSyncWorker(pDnode);
dndCleanupVnodeMgmtWorker(pDnode);
dInfo("dnode-vnodes is cleaned up");
} }
void dnodeGetVnodeLoads(SVnodeLoads *pLoads) { void dndGetVnodeLoads(SDnode *pDnode, SVnodeLoads *pLoads) {
pLoads->num = taosHashGetSize(tsVnodes.hash); SVnodesMgmt *pMgmt = &pDnode->vmgmt;
taosRLockLatch(&pMgmt->latch);
pLoads->num = taosHashGetSize(pMgmt->hash);
int32_t v = 0; int32_t v = 0;
void *pIter = taosHashIterate(tsVnodes.hash, NULL); void *pIter = taosHashIterate(pMgmt->hash, NULL);
while (pIter) { while (pIter) {
SVnodeObj **ppVnode = pIter; SVnodeObj **ppVnode = pIter;
if (ppVnode == NULL) continue; if (ppVnode == NULL || *ppVnode == NULL) continue;
SVnodeObj *pVnode = *ppVnode;
if (pVnode == NULL) continue;
SVnodeObj *pVnode = *ppVnode;
SVnodeLoad *pLoad = &pLoads->data[v++]; SVnodeLoad *pLoad = &pLoads->data[v++];
vnodeGetLoad(pVnode->pImpl, pLoad); vnodeGetLoad(pVnode->pImpl, pLoad);
pLoad->vgId = htonl(pLoad->vgId); pLoad->vgId = htonl(pLoad->vgId);
pLoad->totalStorage = htobe64(pLoad->totalStorage); pLoad->totalStorage = htobe64(pLoad->totalStorage);
...@@ -1024,6 +1126,8 @@ void dnodeGetVnodeLoads(SVnodeLoads *pLoads) { ...@@ -1024,6 +1126,8 @@ void dnodeGetVnodeLoads(SVnodeLoads *pLoads) {
pLoad->pointsWritten = htobe64(pLoad->pointsWritten); pLoad->pointsWritten = htobe64(pLoad->pointsWritten);
pLoad->tablesNum = htobe64(pLoad->tablesNum); pLoad->tablesNum = htobe64(pLoad->tablesNum);
pIter = taosHashIterate(tsVnodes.hash, pIter); pIter = taosHashIterate(pMgmt->hash, pIter);
} }
taosRUnLockLatch(&pMgmt->latch);
} }
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "dndDnode.h"
#include "dndMnode.h"
#include "dndTransport.h"
#include "dndVnodes.h"
#include "sync.h"
#include "tcache.h"
#include "tcrc32c.h"
#include "wal.h"
EStat dndGetStat(SDnode *pDnode) { return pDnode->stat; }
void dndSetStat(SDnode *pDnode, EStat stat) {
dDebug("dnode status set from %s to %s", dndStatStr(pDnode->stat), dndStatStr(stat));
pDnode->stat = stat;
}
char *dndStatStr(EStat stat) {
switch (stat) {
case DND_STAT_INIT:
return "init";
case DND_STAT_RUNNING:
return "running";
case DND_STAT_STOPPED:
return "stopped";
default:
return "unknown";
}
}
void dndReportStartup(SDnode *pDnode, char *pName, char *pDesc) {
SStartupMsg *pStartup = &pDnode->startup;
tstrncpy(pStartup->name, pName, TSDB_STEP_NAME_LEN);
tstrncpy(pStartup->desc, pDesc, TSDB_STEP_DESC_LEN);
pStartup->finished = 0;
}
void dndGetStartup(SDnode *pDnode, SStartupMsg *pStartup) {
memcpy(pStartup, &pDnode->startup, sizeof(SStartupMsg));
pStartup->finished = (dndGetStat(pDnode) == DND_STAT_RUNNING);
}
static int32_t dndCheckRunning(char *dataDir) {
char filepath[PATH_MAX] = {0};
snprintf(filepath, sizeof(filepath), "%s/.running", dataDir);
FileFd fd = taosOpenFileCreateWriteTrunc(filepath);
if (fd < 0) {
dError("failed to lock file:%s since %s, quit", filepath, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
int32_t ret = taosLockFile(fd);
if (ret != 0) {
dError("failed to lock file:%s since %s, quit", filepath, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
taosCloseFile(fd);
return -1;
}
return 0;
}
static int32_t dndInitEnv(SDnode *pDnode, SDnodeOpt *pOption) {
if (dndCheckRunning(pOption->dataDir) != 0) {
return -1;
}
char path[PATH_MAX + 100];
snprintf(path, sizeof(path), "%s%smnode", pOption->dataDir, TD_DIRSEP);
pDnode->dir.mnode = tstrdup(path);
snprintf(path, sizeof(path), "%s%svnode", pOption->dataDir, TD_DIRSEP);
pDnode->dir.vnodes = tstrdup(path);
snprintf(path, sizeof(path), "%s%sdnode", pOption->dataDir, TD_DIRSEP);
pDnode->dir.dnode = tstrdup(path);
if (pDnode->dir.mnode == NULL || pDnode->dir.vnodes == NULL || pDnode->dir.dnode == NULL) {
dError("failed to malloc dir object");
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
if (taosMkDir(pDnode->dir.dnode) != 0) {
dError("failed to create dir:%s since %s", pDnode->dir.dnode, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
if (taosMkDir(pDnode->dir.mnode) != 0) {
dError("failed to create dir:%s since %s", pDnode->dir.mnode, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
if (taosMkDir(pDnode->dir.vnodes) != 0) {
dError("failed to create dir:%s since %s", pDnode->dir.vnodes, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
memcpy(&pDnode->opt, pOption, sizeof(SDnodeOpt));
return 0;
}
static void dndCleanupEnv(SDnode *pDnode) {
if (pDnode->dir.mnode != NULL) {
tfree(pDnode->dir.mnode);
}
if (pDnode->dir.vnodes != NULL) {
tfree(pDnode->dir.vnodes);
}
if (pDnode->dir.dnode != NULL) {
tfree(pDnode->dir.dnode);
}
taosStopCacheRefreshWorker();
}
SDnode *dndInit(SDnodeOpt *pOption) {
taosIgnSIGPIPE();
taosBlockSIGPIPE();
taosResolveCRC();
SDnode *pDnode = calloc(1, sizeof(SDnode));
if (pDnode == NULL) {
dError("failed to create dnode object");
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
dInfo("start to initialize TDengine");
dndSetStat(pDnode, DND_STAT_INIT);
if (dndInitEnv(pDnode, pOption) != 0) {
dError("failed to init env");
dndCleanup(pDnode);
return NULL;
}
if (rpcInit() != 0) {
dError("failed to init rpc env");
dndCleanup(pDnode);
return NULL;
}
if (walInit() != 0) {
dError("failed to init wal env");
dndCleanup(pDnode);
return NULL;
}
if (dndInitDnode(pDnode) != 0) {
dError("failed to init dnode");
dndCleanup(pDnode);
return NULL;
}
if (dndInitVnodes(pDnode) != 0) {
dError("failed to init vnodes");
dndCleanup(pDnode);
return NULL;
}
if (dndInitMnode(pDnode) != 0) {
dError("failed to init mnode");
dndCleanup(pDnode);
return NULL;
}
if (dndInitTrans(pDnode) != 0) {
dError("failed to init transport");
dndCleanup(pDnode);
return NULL;
}
dndSetStat(pDnode, DND_STAT_RUNNING);
dndReportStartup(pDnode, "TDengine", "initialized successfully");
dInfo("TDengine is initialized successfully");
return pDnode;
}
void dndCleanup(SDnode *pDnode) {
if (dndGetStat(pDnode) == DND_STAT_STOPPED) {
dError("dnode is shutting down");
return;
}
dInfo("start to cleanup TDengine");
dndSetStat(pDnode, DND_STAT_STOPPED);
dndCleanupTrans(pDnode);
dndCleanupMnode(pDnode);
dndCleanupVnodes(pDnode);
dndCleanupDnode(pDnode);
walCleanUp();
rpcCleanup();
dndCleanupEnv(pDnode);
free(pDnode);
dInfo("TDengine is cleaned up successfully");
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "dnodeInt.h"
static bool stop = false;
static void sigintHandler(int32_t signum, void *info, void *ctx) { stop = true; }
static void setSignalHandler() {
taosSetSignal(SIGTERM, sigintHandler);
taosSetSignal(SIGHUP, sigintHandler);
taosSetSignal(SIGINT, sigintHandler);
taosSetSignal(SIGABRT, sigintHandler);
taosSetSignal(SIGBREAK, sigintHandler);
}
int main(int argc, char const *argv[]) {
setSignalHandler();
int32_t code = dnodeInit();
if (code != 0) {
dInfo("Failed to start TDengine, please check the log at:%s", tsLogDir);
exit(EXIT_FAILURE);
}
while (!stop) {
taosMsleep(100);
}
dInfo("TDengine is shut down!");
dnodeCleanup();
return 0;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "dnodeDnode.h"
#include "dnodeMnode.h"
#include "dnodeTransport.h"
#include "dnodeVnodes.h"
#include "sync.h"
#include "tcache.h"
#include "tconfig.h"
#include "tnote.h"
#include "tstep.h"
#include "wal.h"
static struct {
SStartupMsg startup;
EDnStat runStat;
SSteps *steps;
} tsInt;
EDnStat dnodeGetRunStat() { return tsInt.runStat; }
void dnodeSetRunStat(EDnStat stat) { tsInt.runStat = stat; }
void dnodeReportStartup(char *name, char *desc) {
SStartupMsg *pStartup = &tsInt.startup;
tstrncpy(pStartup->name, name, strlen(pStartup->name));
tstrncpy(pStartup->desc, desc, strlen(pStartup->desc));
pStartup->finished = 0;
}
void dnodeReportStartupFinished(char *name, char *desc) {
SStartupMsg *pStartup = &tsInt.startup;
tstrncpy(pStartup->name, name, strlen(pStartup->name));
tstrncpy(pStartup->desc, desc, strlen(pStartup->desc));
pStartup->finished = 1;
}
void dnodeGetStartup(SStartupMsg *pStartup) { memcpy(pStartup, &tsInt.startup, sizeof(SStartupMsg)); }
static int32_t dnodeCheckRunning(char *dir) {
char filepath[256] = {0};
snprintf(filepath, sizeof(filepath), "%s/.running", dir);
FileFd fd = taosOpenFileCreateWriteTrunc(filepath);
if (fd < 0) {
dError("failed to open lock file:%s since %s, quit", filepath, strerror(errno));
return -1;
}
int32_t ret = taosLockFile(fd);
if (ret != 0) {
dError("failed to lock file:%s since %s, quit", filepath, strerror(errno));
taosCloseFile(fd);
return -1;
}
return 0;
}
static int32_t dnodeInitDir() {
sprintf(tsMnodeDir, "%s/mnode", tsDataDir);
sprintf(tsVnodeDir, "%s/vnode", tsDataDir);
sprintf(tsDnodeDir, "%s/dnode", tsDataDir);
if (!taosMkDir(tsDnodeDir)) {
dError("failed to create dir:%s since %s", tsDnodeDir, strerror(errno));
return -1;
}
if (!taosMkDir(tsMnodeDir)) {
dError("failed to create dir:%s since %s", tsMnodeDir, strerror(errno));
return -1;
}
if (!taosMkDir(tsVnodeDir)) {
dError("failed to create dir:%s since %s", tsVnodeDir, strerror(errno));
return -1;
}
if (dnodeCheckRunning(tsDnodeDir) != 0) {
return -1;
}
return 0;
}
static int32_t dnodeInitMain() {
tsInt.runStat = DN_RUN_STAT_STOPPED;
tscEmbedded = 1;
taosIgnSIGPIPE();
taosBlockSIGPIPE();
taosResolveCRC();
taosInitGlobalCfg();
taosReadGlobalLogCfg();
taosSetCoreDump(tsEnableCoreFile);
if (!taosMkDir(tsLogDir)) {
printf("failed to create dir: %s, reason: %s\n", tsLogDir, strerror(errno));
return -1;
}
char temp[TSDB_FILENAME_LEN];
sprintf(temp, "%s/taosdlog", tsLogDir);
if (taosInitLog(temp, tsNumOfLogLines, 1) < 0) {
printf("failed to init log file\n");
}
if (!taosReadGlobalCfg()) {
taosPrintGlobalCfg();
dError("TDengine read global config failed");
return -1;
}
dInfo("start to initialize TDengine");
taosInitNotes();
if (taosCheckGlobalCfg() != 0) {
return -1;
}
dnodeInitDir();
return 0;
}
static void dnodeCleanupMain() {
taos_cleanup();
taosCloseLog();
taosStopCacheRefreshWorker();
}
int32_t dnodeInit() {
SSteps *steps = taosStepInit(10, dnodeReportStartup);
if (steps == NULL) return -1;
taosStepAdd(steps, "dnode-main", dnodeInitMain, dnodeCleanupMain);
taosStepAdd(steps, "dnode-rpc", rpcInit, rpcCleanup);
taosStepAdd(steps, "dnode-tfs", NULL, NULL);
taosStepAdd(steps, "dnode-wal", walInit, walCleanUp);
//taosStepAdd(steps, "dnode-sync", syncInit, syncCleanUp);
taosStepAdd(steps, "dnode-dnode", dnodeInitDnode, dnodeCleanupDnode);
taosStepAdd(steps, "dnode-vnodes", dnodeInitVnodes, dnodeCleanupVnodes);
taosStepAdd(steps, "dnode-mnode", dnodeInitMnode, dnodeCleanupMnode);
taosStepAdd(steps, "dnode-trans", dnodeInitTrans, dnodeCleanupTrans);
tsInt.steps = steps;
taosStepExec(tsInt.steps);
dnodeSetRunStat(DN_RUN_STAT_RUNNING);
dnodeReportStartupFinished("TDengine", "initialized successfully");
dInfo("TDengine is initialized successfully");
return 0;
}
void dnodeCleanup() {
if (dnodeGetRunStat() != DN_RUN_STAT_STOPPED) {
dnodeSetRunStat(DN_RUN_STAT_STOPPED);
taosStepCleanup(tsInt.steps);
tsInt.steps = NULL;
}
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "dnodeMnode.h"
#include "cJSON.h"
#include "dnodeDnode.h"
#include "dnodeTransport.h"
#include "mnode.h"
#include "tlockfree.h"
#include "tqueue.h"
#include "tstep.h"
#include "tworker.h"
static struct {
int32_t refCount;
int8_t deployed;
int8_t dropped;
SWorkerPool mgmtPool;
SWorkerPool readPool;
SWorkerPool writePool;
SWorkerPool syncPool;
taos_queue pReadQ;
taos_queue pWriteQ;
taos_queue pApplyQ;
taos_queue pSyncQ;
taos_queue pMgmtQ;
SSteps *pSteps;
SRWLatch latch;
} tsMnode = {0};
static int32_t dnodeAllocMnodeReadQueue();
static void dnodeFreeMnodeReadQueue();
static int32_t dnodeAllocMnodeWriteQueue();
static void dnodeFreeMnodeWriteQueue();
static int32_t dnodeAllocMnodeApplyQueue();
static void dnodeFreeMnodeApplyQueue();
static int32_t dnodeAllocMnodeSyncQueue();
static void dnodeFreeMnodeSyncQueue();
static int32_t dnodeAcquireMnode() {
taosRLockLatch(&tsMnode.latch);
int32_t code = tsMnode.deployed ? 0 : TSDB_CODE_DND_MNODE_NOT_DEPLOYED;
if (code == 0) {
atomic_add_fetch_32(&tsMnode.refCount, 1);
}
taosRUnLockLatch(&tsMnode.latch);
return code;
}
static void dnodeReleaseMnode() { atomic_sub_fetch_32(&tsMnode.refCount, 1); }
static int32_t dnodeReadMnodeFile() {
int32_t code = TSDB_CODE_DND_READ_MNODE_FILE_ERROR;
int32_t len = 0;
int32_t maxLen = 300;
char *content = calloc(1, maxLen + 1);
cJSON *root = NULL;
FILE *fp = NULL;
char file[PATH_MAX + 20] = {0};
snprintf(file, sizeof(file), "%s/mnode.json", tsDnodeDir);
fp = fopen(file, "r");
if (!fp) {
dDebug("file %s not exist", file);
code = 0;
goto PRASE_MNODE_OVER;
}
len = (int32_t)fread(content, 1, maxLen, fp);
if (len <= 0) {
dError("failed to read %s since content is null", file);
goto PRASE_MNODE_OVER;
}
content[len] = 0;
root = cJSON_Parse(content);
if (root == NULL) {
dError("failed to read %s since invalid json format", file);
goto PRASE_MNODE_OVER;
}
cJSON *deployed = cJSON_GetObjectItem(root, "deployed");
if (!deployed || deployed->type != cJSON_String) {
dError("failed to read %s since deployed not found", file);
goto PRASE_MNODE_OVER;
}
tsMnode.deployed = atoi(deployed->valuestring);
cJSON *dropped = cJSON_GetObjectItem(root, "dropped");
if (!dropped || dropped->type != cJSON_String) {
dError("failed to read %s since dropped not found", file);
goto PRASE_MNODE_OVER;
}
tsMnode.dropped = atoi(dropped->valuestring);
code = 0;
dInfo("succcessed to read file %s", file);
PRASE_MNODE_OVER:
if (content != NULL) free(content);
if (root != NULL) cJSON_Delete(root);
if (fp != NULL) fclose(fp);
return code;
}
static int32_t dnodeWriteMnodeFile() {
char file[PATH_MAX + 20] = {0};
char realfile[PATH_MAX + 20] = {0};
snprintf(file, sizeof(file), "%s/mnode.json.bak", tsDnodeDir);
snprintf(realfile, sizeof(realfile), "%s/mnode.json", tsDnodeDir);
FILE *fp = fopen(file, "w");
if (!fp) {
dError("failed to write %s since %s", file, strerror(errno));
return TSDB_CODE_DND_WRITE_MNODE_FILE_ERROR;
}
int32_t len = 0;
int32_t maxLen = 300;
char *content = calloc(1, maxLen + 1);
len += snprintf(content + len, maxLen - len, "{\n");
len += snprintf(content + len, maxLen - len, " \"deployed\": \"%d\",\n", tsMnode.deployed);
len += snprintf(content + len, maxLen - len, " \"dropped\": \"%d\"\n", tsMnode.dropped);
len += snprintf(content + len, maxLen - len, "}\n");
fwrite(content, 1, len, fp);
taosFsyncFile(fileno(fp));
fclose(fp);
free(content);
int32_t code = taosRenameFile(file, realfile);
if (code != 0) {
dError("failed to rename %s since %s", file, tstrerror(code));
return TSDB_CODE_DND_WRITE_MNODE_FILE_ERROR;
}
dInfo("successed to write %s", realfile);
return 0;
}
static int32_t dnodeStartMnode() {
int32_t code = dnodeAllocMnodeReadQueue();
if (code != 0) {
return code;
}
code = dnodeAllocMnodeWriteQueue();
if (code != 0) {
return code;
}
code = dnodeAllocMnodeApplyQueue();
if (code != 0) {
return code;
}
code = dnodeAllocMnodeSyncQueue();
if (code != 0) {
return code;
}
taosWLockLatch(&tsMnode.latch);
tsMnode.deployed = 1;
taosWUnLockLatch(&tsMnode.latch);
return mnodeStart(NULL);
}
static void dnodeStopMnode() {
taosWLockLatch(&tsMnode.latch);
tsMnode.deployed = 0;
taosWUnLockLatch(&tsMnode.latch);
dnodeReleaseMnode();
while (tsMnode.refCount > 0) taosMsleep(10);
while (!taosQueueEmpty(tsMnode.pReadQ)) taosMsleep(10);
while (!taosQueueEmpty(tsMnode.pApplyQ)) taosMsleep(10);
while (!taosQueueEmpty(tsMnode.pWriteQ)) taosMsleep(10);
while (!taosQueueEmpty(tsMnode.pSyncQ)) taosMsleep(10);
dnodeFreeMnodeReadQueue();
dnodeFreeMnodeWriteQueue();
dnodeFreeMnodeApplyQueue();
dnodeFreeMnodeSyncQueue();
}
static int32_t dnodeUnDeployMnode() {
tsMnode.dropped = 1;
int32_t code = dnodeWriteMnodeFile();
if (code != 0) {
tsMnode.dropped = 0;
dError("failed to undeploy mnode since %s", tstrerror(code));
return code;
}
dnodeStopMnode();
mnodeUnDeploy();
dnodeWriteMnodeFile();
return code;
}
static int32_t dnodeDeployMnode(SMnodeCfg *pCfg) {
int32_t code = mnodeDeploy(pCfg);
if (code != 0) {
dError("failed to deploy mnode since %s", tstrerror(code));
return code;
}
code = dnodeStartMnode();
if (code != 0) {
dnodeUnDeployMnode();
dError("failed to deploy mnode since %s", tstrerror(code));
return code;
}
code = dnodeWriteMnodeFile();
if (code != 0) {
dnodeUnDeployMnode();
dError("failed to deploy mnode since %s", tstrerror(code));
return code;
}
dInfo("deploy mnode success");
return code;
}
static int32_t dnodeAlterMnode(SMnodeCfg *pCfg) {
int32_t code = dnodeAcquireMnode();
if (code == 0) {
code = mnodeAlter(pCfg);
dnodeReleaseMnode();
}
return code;
}
static SCreateMnodeMsg *dnodeParseCreateMnodeMsg(SRpcMsg *pRpcMsg) {
SCreateMnodeMsg *pMsg = pRpcMsg->pCont;
pMsg->dnodeId = htonl(pMsg->dnodeId);
for (int32_t i = 0; i < pMsg->replica; ++i) {
pMsg->replicas[i].port = htons(pMsg->replicas[i].port);
}
return pMsg;
}
static int32_t dnodeProcessCreateMnodeReq(SRpcMsg *pRpcMsg) {
SAlterMnodeMsg *pMsg = (SAlterMnodeMsg *)dnodeParseCreateMnodeMsg(pRpcMsg->pCont);
if (pMsg->dnodeId != dnodeGetDnodeId()) {
return TSDB_CODE_DND_MNODE_ID_NOT_MATCH_DNODE;
} else {
SMnodeCfg cfg = {0};
cfg.replica = pMsg->replica;
memcpy(cfg.replicas, pMsg->replicas, sizeof(SReplica) * sizeof(TSDB_MAX_REPLICA));
return dnodeDeployMnode(&cfg);
}
}
static int32_t dnodeProcessAlterMnodeReq(SRpcMsg *pRpcMsg) {
SAlterMnodeMsg *pMsg = (SAlterMnodeMsg *)dnodeParseCreateMnodeMsg(pRpcMsg->pCont);
if (pMsg->dnodeId != dnodeGetDnodeId()) {
return TSDB_CODE_DND_MNODE_ID_NOT_MATCH_DNODE;
} else {
SMnodeCfg cfg = {0};
cfg.replica = pMsg->replica;
memcpy(cfg.replicas, pMsg->replicas, sizeof(SReplica) * sizeof(TSDB_MAX_REPLICA));
return dnodeAlterMnode(&cfg);
}
}
static int32_t dnodeProcessDropMnodeReq(SRpcMsg *pMsg) {
SAlterMnodeMsg *pCfg = pMsg->pCont;
pCfg->dnodeId = htonl(pCfg->dnodeId);
if (pCfg->dnodeId != dnodeGetDnodeId()) {
return TSDB_CODE_DND_MNODE_ID_NOT_MATCH_DNODE;
} else {
return dnodeUnDeployMnode();
}
}
static void dnodeProcessMnodeMgmtQueue(void *unused, SRpcMsg *pMsg) {
int32_t code = 0;
switch (pMsg->msgType) {
case TSDB_MSG_TYPE_CREATE_MNODE_IN:
code = dnodeProcessCreateMnodeReq(pMsg);
break;
case TSDB_MSG_TYPE_ALTER_MNODE_IN:
code = dnodeProcessAlterMnodeReq(pMsg);
break;
case TSDB_MSG_TYPE_DROP_MNODE_IN:
code = dnodeProcessDropMnodeReq(pMsg);
break;
default:
code = TSDB_CODE_DND_MSG_NOT_PROCESSED;
break;
}
SRpcMsg rsp = {.code = code, .handle = pMsg->handle};
rpcSendResponse(&rsp);
rpcFreeCont(pMsg->pCont);
taosFreeQitem(pMsg);
}
static void dnodeProcessMnodeReadQueue(void *unused, SMnodeMsg *pMsg) { mnodeProcessMsg(pMsg, MN_MSG_TYPE_READ); }
static void dnodeProcessMnodeWriteQueue(void *unused, SMnodeMsg *pMsg) { mnodeProcessMsg(pMsg, MN_MSG_TYPE_WRITE); }
static void dnodeProcessMnodeApplyQueue(void *unused, SMnodeMsg *pMsg) { mnodeProcessMsg(pMsg, MN_MSG_TYPE_APPLY); }
static void dnodeProcessMnodeSyncQueue(void *unused, SMnodeMsg *pMsg) { mnodeProcessMsg(pMsg, MN_MSG_TYPE_SYNC); }
static int32_t dnodeWriteMnodeMsgToQueue(taos_queue pQueue, SRpcMsg *pRpcMsg) {
int32_t code = 0;
if (pQueue == NULL) {
code = TSDB_CODE_DND_MSG_NOT_PROCESSED;
} else {
SMnodeMsg *pMsg = mnodeInitMsg(pRpcMsg);
if (pMsg == NULL) {
code = terrno;
}
}
if (code != TSDB_CODE_SUCCESS) {
SRpcMsg rsp = {.handle = pRpcMsg->handle, .code = code};
rpcSendResponse(&rsp);
rpcFreeCont(pRpcMsg->pCont);
}
}
void dnodeProcessMnodeMgmtMsg(SRpcMsg *pMsg, SEpSet *pEpSet) { dnodeWriteMnodeMsgToQueue(tsMnode.pMgmtQ, pMsg); }
void dnodeProcessMnodeWriteMsg(SRpcMsg *pMsg, SEpSet *pEpSet) {
if (dnodeAcquireMnode() == 0) {
dnodeWriteMnodeMsgToQueue(tsMnode.pWriteQ, pMsg);
dnodeReleaseMnode();
} else {
dnodeSendRedirectMsg(pMsg, 0);
}
}
void dnodeProcessMnodeSyncMsg(SRpcMsg *pMsg, SEpSet *pEpSet) {
int32_t code = dnodeAcquireMnode();
if (code == 0) {
dnodeWriteMnodeMsgToQueue(tsMnode.pSyncQ, pMsg);
dnodeReleaseMnode();
} else {
SRpcMsg rsp = {.handle = pMsg->handle, .code = code};
rpcSendResponse(&rsp);
rpcFreeCont(pMsg->pCont);
}
}
void dnodeProcessMnodeReadMsg(SRpcMsg *pMsg, SEpSet *pEpSet) {
if (dnodeAcquireMnode() == 0) {
dnodeWriteMnodeMsgToQueue(tsMnode.pReadQ, pMsg);
dnodeReleaseMnode();
} else {
dnodeSendRedirectMsg(pMsg, 0);
}
}
static int32_t dnodePutMsgIntoMnodeApplyQueue(SMnodeMsg *pMsg) {
int32_t code = dnodeAcquireMnode();
if (code != 0) return code;
code = taosWriteQitem(tsMnode.pApplyQ, pMsg);
dnodeReleaseMnode();
return code;
}
static int32_t dnodeAllocMnodeMgmtQueue() {
tsMnode.pMgmtQ = tWorkerAllocQueue(&tsMnode.mgmtPool, NULL, (FProcessItem)dnodeProcessMnodeMgmtQueue);
if (tsMnode.pMgmtQ == NULL) {
return TSDB_CODE_DND_OUT_OF_MEMORY;
}
return 0;
}
static void dnodeFreeMnodeMgmtQueue() {
tWorkerFreeQueue(&tsMnode.mgmtPool, tsMnode.pMgmtQ);
tsMnode.pMgmtQ = NULL;
}
static int32_t dnodeInitMnodeMgmtWorker() {
SWorkerPool *pPool = &tsMnode.mgmtPool;
pPool->name = "mnode-mgmt";
pPool->min = 1;
pPool->max = 1;
return tWorkerInit(pPool);
}
static void dnodeCleanupMnodeMgmtWorker() { tWorkerCleanup(&tsMnode.mgmtPool); }
static int32_t dnodeAllocMnodeReadQueue() {
tsMnode.pReadQ = tWorkerAllocQueue(&tsMnode.readPool, NULL, (FProcessItem)dnodeProcessMnodeReadQueue);
if (tsMnode.pReadQ == NULL) {
return TSDB_CODE_DND_OUT_OF_MEMORY;
}
return 0;
}
static void dnodeFreeMnodeReadQueue() {
tWorkerFreeQueue(&tsMnode.readPool, tsMnode.pReadQ);
tsMnode.pReadQ = NULL;
}
static int32_t dnodeInitMnodeReadWorker() {
SWorkerPool *pPool = &tsMnode.readPool;
pPool->name = "mnode-read";
pPool->min = 0;
pPool->max = 1;
return tWorkerInit(pPool);
}
static void dnodeCleanupMnodeReadWorker() { tWorkerCleanup(&tsMnode.readPool); }
static int32_t dnodeAllocMnodeWriteQueue() {
tsMnode.pWriteQ = tWorkerAllocQueue(&tsMnode.writePool, NULL, (FProcessItem)dnodeProcessMnodeWriteQueue);
if (tsMnode.pWriteQ == NULL) {
return TSDB_CODE_DND_OUT_OF_MEMORY;
}
return 0;
}
static void dnodeFreeMnodeWriteQueue() {
tWorkerFreeQueue(&tsMnode.writePool, tsMnode.pWriteQ);
tsMnode.pWriteQ = NULL;
}
static int32_t dnodeAllocMnodeApplyQueue() {
tsMnode.pApplyQ = tWorkerAllocQueue(&tsMnode.writePool, NULL, (FProcessItem)dnodeProcessMnodeApplyQueue);
if (tsMnode.pApplyQ == NULL) {
return TSDB_CODE_DND_OUT_OF_MEMORY;
}
return 0;
}
static void dnodeFreeMnodeApplyQueue() {
tWorkerFreeQueue(&tsMnode.writePool, tsMnode.pApplyQ);
tsMnode.pApplyQ = NULL;
}
static int32_t dnodeInitMnodeWriteWorker() {
SWorkerPool *pPool = &tsMnode.writePool;
pPool->name = "mnode-write";
pPool->min = 0;
pPool->max = 1;
return tWorkerInit(pPool);
}
static void dnodeCleanupMnodeWriteWorker() { tWorkerCleanup(&tsMnode.writePool); }
static int32_t dnodeAllocMnodeSyncQueue() {
tsMnode.pSyncQ = tWorkerAllocQueue(&tsMnode.syncPool, NULL, (FProcessItem)dnodeProcessMnodeSyncQueue);
if (tsMnode.pSyncQ == NULL) {
return TSDB_CODE_DND_OUT_OF_MEMORY;
}
return 0;
}
static void dnodeFreeMnodeSyncQueue() {
tWorkerFreeQueue(&tsMnode.syncPool, tsMnode.pSyncQ);
tsMnode.pSyncQ = NULL;
}
static int32_t dnodeInitMnodeSyncWorker() {
SWorkerPool *pPool = &tsMnode.syncPool;
pPool->name = "mnode-sync";
pPool->min = 0;
pPool->max = 1;
return tWorkerInit(pPool);
}
static void dnodeCleanupMnodeSyncWorker() { tWorkerCleanup(&tsMnode.syncPool); }
static int32_t dnodeInitMnodeModule() {
taosInitRWLatch(&tsMnode.latch);
SMnodePara para;
para.dnodeId = dnodeGetDnodeId();
para.clusterId = dnodeGetClusterId();
para.SendMsgToDnode = dnodeSendMsgToDnode;
para.SendMsgToMnode = dnodeSendMsgToMnode;
para.SendRedirectMsg = dnodeSendRedirectMsg;
return mnodeInit(para);
}
static void dnodeCleanupMnodeModule() { mnodeCleanup(); }
static bool dnodeNeedDeployMnode() {
if (dnodeGetDnodeId() > 0) return false;
if (dnodeGetClusterId() > 0) return false;
if (strcmp(tsFirst, tsLocalEp) != 0) return false;
return true;
}
static int32_t dnodeOpenMnode() {
int32_t code = dnodeReadMnodeFile();
if (code != 0) {
dError("failed to read open mnode since %s", tstrerror(code));
return code;
}
if (tsMnode.dropped) {
dInfo("mnode already dropped, undeploy it");
return dnodeUnDeployMnode();
}
if (!tsMnode.deployed) {
bool needDeploy = dnodeNeedDeployMnode();
if (!needDeploy) return 0;
dInfo("start to deploy mnode");
SMnodeCfg cfg = {.replica = 1};
cfg.replicas[0].port = tsServerPort;
tstrncpy(cfg.replicas[0].fqdn, tsLocalFqdn, TSDB_FQDN_LEN);
code = dnodeDeployMnode(&cfg);
} else {
dInfo("start to open mnode");
return dnodeStartMnode();
}
}
static void dnodeCloseMnode() {
if (dnodeAcquireMnode() == 0) {
dnodeStopMnode();
}
}
int32_t dnodeInitMnode() {
dInfo("dnode-mnode start to init");
SSteps *pSteps = taosStepInit(6, dnodeReportStartup);
taosStepAdd(pSteps, "dnode-mnode-env", dnodeInitMnodeModule, dnodeCleanupMnodeModule);
taosStepAdd(pSteps, "dnode-mnode-mgmt", dnodeInitMnodeMgmtWorker, dnodeCleanupMnodeMgmtWorker);
taosStepAdd(pSteps, "dnode-mnode-read", dnodeInitMnodeReadWorker, dnodeCleanupMnodeReadWorker);
taosStepAdd(pSteps, "dnode-mnode-write", dnodeInitMnodeWriteWorker, dnodeCleanupMnodeWriteWorker);
taosStepAdd(pSteps, "dnode-mnode-sync", dnodeInitMnodeSyncWorker, dnodeCleanupMnodeSyncWorker);
taosStepAdd(pSteps, "dnode-mnode", dnodeOpenMnode, dnodeCloseMnode);
tsMnode.pSteps = pSteps;
int32_t code = taosStepExec(pSteps);
if (code != 0) {
dError("dnode-mnode init failed since %s", tstrerror(code));
} else {
dInfo("dnode-mnode is initialized");
}
}
void dnodeCleanupMnode() {
if (tsMnode.pSteps == NULL) {
dInfo("dnode-mnode start to clean up");
taosStepCleanup(tsMnode.pSteps);
tsMnode.pSteps = NULL;
dInfo("dnode-mnode is cleaned up");
}
}
int32_t dnodeGetUserAuthFromMnode(char *user, char *spi, char *encrypt, char *secret, char *ckey) {
int32_t code = dnodeAcquireMnode();
if (code != 0) {
dTrace("failed to get user auth since mnode not deployed");
return code;
}
code = mnodeRetriveAuth(user, spi, encrypt, secret, ckey);
dnodeReleaseMnode();
return code;
}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* this file is mainly responsible for the communication between DNODEs. Each
* dnode works as both server and client. Dnode may send status, grant, config
* messages to mnode, mnode may send create/alter/drop table/vnode messages
* to dnode. All theses messages are handled from here
*/
#define _DEFAULT_SOURCE
#include "dnodeTransport.h"
#include "dnodeDnode.h"
#include "dnodeMnode.h"
#include "dnodeVnodes.h"
static struct {
void *peerRpc;
void *shellRpc;
void *clientRpc;
MsgFp msgFp[TSDB_MSG_TYPE_MAX];
} tsTrans;
static void dnodeInitMsgFp() {
// msg from client to dnode
tsTrans.msgFp[TSDB_MSG_TYPE_SUBMIT] = dnodeProcessVnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_QUERY] = dnodeProcessVnodeQueryMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_FETCH] = dnodeProcessVnodeFetchMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_TABLE] = dnodeProcessVnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_TABLE] = dnodeProcessVnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_TABLE] = dnodeProcessVnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_UPDATE_TAG_VAL] = dnodeProcessVnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_TABLE_META] = dnodeProcessVnodeQueryMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_TABLES_META] = dnodeProcessVnodeQueryMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_MQ_QUERY] = dnodeProcessVnodeQueryMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_MQ_CONSUME] = dnodeProcessVnodeQueryMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_MQ_CONNECT] = dnodeProcessVnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_MQ_DISCONNECT] = dnodeProcessVnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_MQ_ACK] = dnodeProcessVnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_MQ_RESET] = dnodeProcessVnodeWriteMsg;
// msg from client to mnode
tsTrans.msgFp[TSDB_MSG_TYPE_CONNECT] = dnodeProcessMnodeReadMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_ACCT] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_ACCT] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_ACCT] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_USER] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_USER] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_USER] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_DNODE] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CONFIG_DNODE] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_DNODE] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_DB] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_DB] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_USE_DB] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_DB] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_SYNC_DB] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_TOPIC] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_TOPIC] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_TOPIC] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_FUNCTION] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_FUNCTION] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_FUNCTION] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_STABLE] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_STABLE] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_STABLE] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_STABLE_VGROUP] = dnodeProcessMnodeReadMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_KILL_QUERY] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_KILL_CONN] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_HEARTBEAT] = dnodeProcessMnodeReadMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_SHOW] = dnodeProcessMnodeReadMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_SHOW_RETRIEVE] = dnodeProcessMnodeReadMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_SHOW_RETRIEVE_FUNC] = dnodeProcessMnodeReadMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_COMPACT_VNODE] = dnodeProcessMnodeWriteMsg;
// message from client to dnode
tsTrans.msgFp[TSDB_MSG_TYPE_NETWORK_TEST] = dnodeProcessDnodeMsg;
// message from mnode to vnode
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_STABLE_IN] = dnodeProcessVnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_STABLE_IN_RSP] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_STABLE_IN] = dnodeProcessVnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_STABLE_IN_RSP] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_STABLE_IN] = dnodeProcessVnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_STABLE_IN] = dnodeProcessMnodeWriteMsg;
// message from mnode to dnode
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_VNODE_IN] = dnodeProcessVnodeMgmtMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_VNODE_IN_RSP] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_VNODE_IN] = dnodeProcessVnodeMgmtMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_VNODE_IN_RSP] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_VNODE_IN] = dnodeProcessVnodeMgmtMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_VNODE_IN_RSP] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_SYNC_VNODE_IN] = dnodeProcessVnodeMgmtMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_SYNC_VNODE_IN_RSP] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_AUTH_VNODE_IN] = dnodeProcessVnodeMgmtMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_AUTH_VNODE_IN_RSP] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_COMPACT_VNODE_IN] = dnodeProcessVnodeMgmtMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_COMPACT_VNODE_IN_RSP] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_MNODE_IN] = dnodeProcessMnodeMgmtMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CREATE_MNODE_IN_RSP] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_MNODE_IN] = dnodeProcessMnodeMgmtMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_ALTER_MNODE_IN_RSP] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_MNODE_IN] = dnodeProcessMnodeMgmtMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_DROP_MNODE_IN_RSP] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CONFIG_DNODE_IN] = dnodeProcessDnodeMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_CONFIG_DNODE_IN_RSP] = dnodeProcessMnodeWriteMsg;
// message from dnode to mnode
tsTrans.msgFp[TSDB_MSG_TYPE_AUTH] = dnodeProcessMnodeReadMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_AUTH_RSP] = dnodeProcessDnodeMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_GRANT] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_GRANT_RSP] = dnodeProcessDnodeMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_STATUS] = dnodeProcessMnodeWriteMsg;
tsTrans.msgFp[TSDB_MSG_TYPE_STATUS_RSP] = dnodeProcessDnodeMsg;
}
static void dnodeProcessPeerReq(SRpcMsg *pMsg, SEpSet *pEpSet) {
SRpcMsg rspMsg = {.handle = pMsg->handle};
int32_t msgType = pMsg->msgType;
if (msgType == TSDB_MSG_TYPE_NETWORK_TEST) {
dnodeProcessDnodeMsg(pMsg, pEpSet);
return;
}
if (dnodeGetRunStat() != DN_RUN_STAT_RUNNING) {
rspMsg.code = TSDB_CODE_APP_NOT_READY;
rpcSendResponse(&rspMsg);
rpcFreeCont(pMsg->pCont);
dTrace("RPC %p, peer req:%s is ignored since dnode not running", pMsg->handle, taosMsg[msgType]);
return;
}
if (pMsg->pCont == NULL) {
rspMsg.code = TSDB_CODE_DND_INVALID_MSG_LEN;
rpcSendResponse(&rspMsg);
return;
}
MsgFp fp = tsTrans.msgFp[msgType];
if (fp != NULL) {
dTrace("RPC %p, peer req:%s will be processed", pMsg->handle, taosMsg[msgType]);
(*fp)(pMsg, pEpSet);
} else {
dError("RPC %p, peer req:%s not processed", pMsg->handle, taosMsg[msgType]);
rspMsg.code = TSDB_CODE_DND_MSG_NOT_PROCESSED;
rpcSendResponse(&rspMsg);
rpcFreeCont(pMsg->pCont);
}
}
static int32_t dnodeInitPeerServer() {
SRpcInit rpcInit;
memset(&rpcInit, 0, sizeof(rpcInit));
rpcInit.localPort = tsDnodeDnodePort;
rpcInit.label = "DND-S";
rpcInit.numOfThreads = 1;
rpcInit.cfp = dnodeProcessPeerReq;
rpcInit.sessions = TSDB_MAX_VNODES << 4;
rpcInit.connType = TAOS_CONN_SERVER;
rpcInit.idleTime = tsShellActivityTimer * 1000;
tsTrans.peerRpc = rpcOpen(&rpcInit);
if (tsTrans.peerRpc == NULL) {
dError("failed to init peer rpc server");
return -1;
}
dInfo("dnode peer rpc server is initialized");
return 0;
}
static void dnodeCleanupPeerServer() {
if (tsTrans.peerRpc) {
rpcClose(tsTrans.peerRpc);
tsTrans.peerRpc = NULL;
dInfo("dnode peer server is closed");
}
}
static void dnodeProcessPeerRsp(SRpcMsg *pMsg, SEpSet *pEpSet) {
int32_t msgType = pMsg->msgType;
if (dnodeGetRunStat() == DN_RUN_STAT_STOPPED) {
if (pMsg == NULL || pMsg->pCont == NULL) return;
dTrace("RPC %p, peer rsp:%s is ignored since dnode is stopping", pMsg->handle, taosMsg[msgType]);
rpcFreeCont(pMsg->pCont);
return;
}
MsgFp fp = tsTrans.msgFp[msgType];
if (fp != NULL) {
dTrace("RPC %p, peer rsp:%s will be processed, code:%s", pMsg->handle, taosMsg[msgType], tstrerror(pMsg->code));
(*fp)(pMsg, pEpSet);
} else {
dDebug("RPC %p, peer rsp:%s not processed", pMsg->handle, taosMsg[msgType]);
}
rpcFreeCont(pMsg->pCont);
}
static int32_t dnodeInitClient() {
char secret[TSDB_KEY_LEN] = "secret";
SRpcInit rpcInit;
memset(&rpcInit, 0, sizeof(rpcInit));
rpcInit.label = "DND-C";
rpcInit.numOfThreads = 1;
rpcInit.cfp = dnodeProcessPeerRsp;
rpcInit.sessions = TSDB_MAX_VNODES << 4;
rpcInit.connType = TAOS_CONN_CLIENT;
rpcInit.idleTime = tsShellActivityTimer * 1000;
rpcInit.user = "t";
rpcInit.ckey = "key";
rpcInit.secret = secret;
tsTrans.clientRpc = rpcOpen(&rpcInit);
if (tsTrans.clientRpc == NULL) {
dError("failed to init peer rpc client");
return -1;
}
dInfo("dnode peer rpc client is initialized");
return 0;
}
static void dnodeCleanupClient() {
if (tsTrans.clientRpc) {
rpcClose(tsTrans.clientRpc);
tsTrans.clientRpc = NULL;
dInfo("dnode peer rpc client is closed");
}
}
static void dnodeProcessShellReq(SRpcMsg *pMsg, SEpSet *pEpSet) {
SRpcMsg rspMsg = {.handle = pMsg->handle};
int32_t msgType = pMsg->msgType;
if (dnodeGetRunStat() == DN_RUN_STAT_STOPPED) {
dError("RPC %p, shell req:%s is ignored since dnode exiting", pMsg->handle, taosMsg[msgType]);
rspMsg.code = TSDB_CODE_DND_EXITING;
rpcSendResponse(&rspMsg);
rpcFreeCont(pMsg->pCont);
return;
} else if (dnodeGetRunStat() != DN_RUN_STAT_RUNNING) {
dError("RPC %p, shell req:%s is ignored since dnode not running", pMsg->handle, taosMsg[msgType]);
rspMsg.code = TSDB_CODE_APP_NOT_READY;
rpcSendResponse(&rspMsg);
rpcFreeCont(pMsg->pCont);
return;
}
if (pMsg->pCont == NULL) {
rspMsg.code = TSDB_CODE_DND_INVALID_MSG_LEN;
rpcSendResponse(&rspMsg);
return;
}
MsgFp fp = tsTrans.msgFp[msgType];
if (fp != NULL) {
dTrace("RPC %p, shell req:%s will be processed", pMsg->handle, taosMsg[msgType]);
(*fp)(pMsg, pEpSet);
} else {
dError("RPC %p, shell req:%s is not processed", pMsg->handle, taosMsg[msgType]);
rspMsg.code = TSDB_CODE_DND_MSG_NOT_PROCESSED;
rpcSendResponse(&rspMsg);
rpcFreeCont(pMsg->pCont);
}
}
static void dnodeSendMsgToMnodeRecv(SRpcMsg *rpcMsg, SRpcMsg *rpcRsp) {
SEpSet epSet = {0};
dnodeGetMnodeEpSetForPeer(&epSet);
rpcSendRecv(tsTrans.clientRpc, &epSet, rpcMsg, rpcRsp);
}
static int32_t dnodeRetrieveUserAuthInfo(char *user, char *spi, char *encrypt, char *secret, char *ckey) {
int32_t code = dnodeGetUserAuthFromMnode(user, spi, encrypt, secret, ckey);
if (code != TSDB_CODE_APP_NOT_READY) return code;
SAuthMsg *pMsg = rpcMallocCont(sizeof(SAuthMsg));
tstrncpy(pMsg->user, user, sizeof(pMsg->user));
dDebug("user:%s, send auth msg to mnodes", user);
SRpcMsg rpcMsg = {.pCont = pMsg, .contLen = sizeof(SAuthMsg), .msgType = TSDB_MSG_TYPE_AUTH};
SRpcMsg rpcRsp = {0};
dnodeSendMsgToMnodeRecv(&rpcMsg, &rpcRsp);
if (rpcRsp.code != 0) {
dError("user:%s, auth msg received from mnodes, error:%s", user, tstrerror(rpcRsp.code));
} else {
dDebug("user:%s, auth msg received from mnodes", user);
SAuthRsp *pRsp = rpcRsp.pCont;
memcpy(secret, pRsp->secret, TSDB_KEY_LEN);
memcpy(ckey, pRsp->ckey, TSDB_KEY_LEN);
*spi = pRsp->spi;
*encrypt = pRsp->encrypt;
}
rpcFreeCont(rpcRsp.pCont);
return rpcRsp.code;
}
static int32_t dnodeInitShellServer() {
int32_t numOfThreads = (int32_t)((tsNumOfCores * tsNumOfThreadsPerCore) / 2.0);
if (numOfThreads < 1) {
numOfThreads = 1;
}
SRpcInit rpcInit;
memset(&rpcInit, 0, sizeof(rpcInit));
rpcInit.localPort = tsDnodeShellPort;
rpcInit.label = "SHELL";
rpcInit.numOfThreads = numOfThreads;
rpcInit.cfp = dnodeProcessShellReq;
rpcInit.sessions = tsMaxShellConns;
rpcInit.connType = TAOS_CONN_SERVER;
rpcInit.idleTime = tsShellActivityTimer * 1000;
rpcInit.afp = dnodeRetrieveUserAuthInfo;
tsTrans.shellRpc = rpcOpen(&rpcInit);
if (tsTrans.shellRpc == NULL) {
dError("failed to init shell rpc server");
return -1;
}
dInfo("dnode shell rpc server is initialized");
return 0;
}
static void dnodeCleanupShellServer() {
if (tsTrans.shellRpc) {
rpcClose(tsTrans.shellRpc);
tsTrans.shellRpc = NULL;
}
}
int32_t dnodeInitTrans() {
if (dnodeInitClient() != 0) {
return -1;
}
if (dnodeInitPeerServer() != 0) {
return -1;
}
if (dnodeInitShellServer() != 0) {
return -1;
}
return 0;
}
void dnodeCleanupTrans() {
dnodeCleanupShellServer();
dnodeCleanupPeerServer();
dnodeCleanupClient();
}
void dnodeSendMsgToDnode(SEpSet *epSet, SRpcMsg *rpcMsg) { rpcSendRequest(tsTrans.clientRpc, epSet, rpcMsg, NULL); }
void dnodeSendMsgToMnode(SRpcMsg *rpcMsg) {
SEpSet epSet = {0};
dnodeGetMnodeEpSetForPeer(&epSet);
dnodeSendMsgToDnode(&epSet, rpcMsg);
}
\ No newline at end of file
add_subdirectory(impl) add_subdirectory(impl)
add_subdirectory(sdb) add_subdirectory(sdb)
add_subdirectory(transaction)
...@@ -3,12 +3,11 @@ add_library(mnode ${MNODE_SRC}) ...@@ -3,12 +3,11 @@ add_library(mnode ${MNODE_SRC})
target_include_directories( target_include_directories(
mnode mnode
PUBLIC "${CMAKE_SOURCE_DIR}/include/dnode/mnode" PUBLIC "${CMAKE_SOURCE_DIR}/include/dnode/mnode"
private "${CMAKE_CURRENT_SOURCE_DIR}/inc" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"
) )
target_link_libraries( target_link_libraries(
mnode mnode
PRIVATE sdb PRIVATE sdb
PRIVATE transaction PRIVATE transport
PUBLIC transport PRIVATE cjson
PUBLIC cjson
) )
\ No newline at end of file
...@@ -13,20 +13,20 @@ ...@@ -13,20 +13,20 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_MNODE_ACCT_H_ #ifndef _TD_MND_ACCT_H_
#define _TD_MNODE_ACCT_H_ #define _TD_MND_ACCT_H_
#include "mnodeInt.h" #include "mndInt.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
int32_t mnodeInitAcct(); int32_t mndInitAcct(SMnode *pMnode);
void mnodeCleanupAcct(); void mndCleanupAcct(SMnode *pMnode);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_MNODE_ACCT_H_*/ #endif /*_TD_MND_ACCT_H_*/
...@@ -13,20 +13,20 @@ ...@@ -13,20 +13,20 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_MNODE_AUTH_H_ #ifndef _TD_MND_AUTH_H_
#define _TD_MNODE_AUTH_H_ #define _TD_MND_AUTH_H_
#include "mnodeInt.h" #include "mndInt.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
int32_t mnodeInitAuth(); int32_t mndInitAuth(SMnode *pMnode);
void mnodeCleanupAuth(); void mndCleanupAuth(SMnode *pMnode);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_MNODE_AUTH_H_*/ #endif /*_TD_MND_AUTH_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MND_BALANCE_H_
#define _TD_MND_BALANCE_H_
#include "mndInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mndInitBalance(SMnode *pMnode);
void mndCleanupBalance(SMnode *pMnode);
#ifdef __cplusplus
}
#endif
#endif /*_TD_MND_BALANCE_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MND_CLUSTER_H_
#define _TD_MND_CLUSTER_H_
#include "mndInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mndInitCluster(SMnode *pMnode);
void mndCleanupCluster(SMnode *pMnode);
#ifdef __cplusplus
}
#endif
#endif /*_TD_MND_CLUSTER_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MND_DATABASE_H_
#define _TD_MND_DATABASE_H_
#include "mndInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mndInitDb(SMnode *pMnode);
void mndCleanupDb(SMnode *pMnode);
#ifdef __cplusplus
}
#endif
#endif /*_TD_MND_DATABASE_H_*/
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_MNODE_DEF_H_ #ifndef _TD_MND_DEF_H_
#define _TD_MNODE_DEF_H_ #define _TD_MND_DEF_H_
#include "os.h" #include "os.h"
#include "taosmsg.h" #include "taosmsg.h"
...@@ -39,44 +39,57 @@ extern int32_t mDebugFlag; ...@@ -39,44 +39,57 @@ extern int32_t mDebugFlag;
#define mDebug(...) { if (mDebugFlag & DEBUG_DEBUG) { taosPrintLog("MND ", mDebugFlag, __VA_ARGS__); }} #define mDebug(...) { if (mDebugFlag & DEBUG_DEBUG) { taosPrintLog("MND ", mDebugFlag, __VA_ARGS__); }}
#define mTrace(...) { if (mDebugFlag & DEBUG_TRACE) { taosPrintLog("MND ", mDebugFlag, __VA_ARGS__); }} #define mTrace(...) { if (mDebugFlag & DEBUG_TRACE) { taosPrintLog("MND ", mDebugFlag, __VA_ARGS__); }}
// #define mLError(...) { monSaveLog(2, __VA_ARGS__); mError(__VA_ARGS__) } typedef struct SClusterObj SClusterObj;
// #define mLWarn(...) { monSaveLog(1, __VA_ARGS__); mWarn(__VA_ARGS__) } typedef struct SDnodeObj SDnodeObj;
// #define mLInfo(...) { monSaveLog(0, __VA_ARGS__); mInfo(__VA_ARGS__) } typedef struct SMnodeObj SMnodeObj;
typedef struct SAcctObj SAcctObj;
typedef struct SUserObj SUserObj;
typedef struct SDbObj SDbObj;
typedef struct SVgObj SVgObj;
typedef struct SSTableObj SSTableObj;
typedef struct SFuncObj SFuncObj;
typedef struct SOperObj SOperObj;
#define mLError(...) {mError(__VA_ARGS__) } typedef enum {
#define mLWarn(...) {mWarn(__VA_ARGS__) } MND_AUTH_ACCT_START = 0,
#define mLInfo(...) {mInfo(__VA_ARGS__) } MND_AUTH_ACCT_USER,
MND_AUTH_ACCT_DNODE,
typedef struct SClusterObj SClusterObj; MND_AUTH_ACCT_MNODE,
typedef struct SDnodeObj SDnodeObj; MND_AUTH_ACCT_DB,
typedef struct SMnodeObj SMnodeObj; MND_AUTH_ACCT_TABLE,
typedef struct SAcctObj SAcctObj; MND_AUTH_ACCT_MAX
typedef struct SUserObj SUserObj; } EAuthAcct;
typedef struct SDbObj SDbObj;
typedef struct SVgObj SVgObj;
typedef struct SSTableObj SSTableObj;
typedef struct SFuncObj SFuncObj;
typedef struct SOperObj SOperObj;
typedef enum { typedef enum {
MN_AUTH_ACCT_START = 0, MND_AUTH_OP_START = 0,
MN_AUTH_ACCT_USER, MND_AUTH_OP_CREATE_USER,
MN_AUTH_ACCT_DNODE, MND_AUTH_OP_ALTER_USER,
MN_AUTH_ACCT_MNODE, MND_AUTH_OP_DROP_USER,
MN_AUTH_ACCT_DB, MND_AUTH_MAX
MN_AUTH_ACCT_TABLE, } EAuthOp;
MN_AUTH_ACCT_MAX
} EMnAuthAcct;
typedef enum { typedef enum {
MN_AUTH_OP_START = 0, TRN_STAGE_PREPARE = 1,
MN_AUTH_OP_CREATE_USER, TRN_STAGE_EXECUTE = 2,
MN_AUTH_OP_ALTER_USER, TRN_STAGE_COMMIT = 3,
MN_AUTH_OP_DROP_USER, TRN_STAGE_ROLLBACK = 4,
MN_AUTH_MAX TRN_STAGE_RETRY = 5
} EMnAuthOp; } ETrnStage;
typedef enum { TRN_POLICY_ROLLBACK = 1, TRN_POLICY_RETRY = 2 } ETrnPolicy;
typedef struct STrans {
int32_t id;
ETrnStage stage;
ETrnPolicy policy;
SMnode *pMnode;
void *rpcHandle;
SArray *redoLogs;
SArray *undoLogs;
SArray *commitLogs;
SArray *redoActions;
SArray *undoActions;
} STrans;
typedef struct SClusterObj { typedef struct SClusterObj {
int64_t id; int64_t id;
...@@ -180,6 +193,7 @@ typedef struct SDbObj { ...@@ -180,6 +193,7 @@ typedef struct SDbObj {
int64_t createdTime; int64_t createdTime;
int64_t updateTime; int64_t updateTime;
SDbCfg cfg; SDbCfg cfg;
int64_t uid;
int8_t status; int8_t status;
int32_t numOfVgroups; int32_t numOfVgroups;
int32_t numOfTables; int32_t numOfTables;
...@@ -218,13 +232,13 @@ typedef struct SVgObj { ...@@ -218,13 +232,13 @@ typedef struct SVgObj {
} SVgObj; } SVgObj;
typedef struct SSTableObj { typedef struct SSTableObj {
char tableId[TSDB_TABLE_NAME_LEN]; char tableId[TSDB_TABLE_NAME_LEN];
uint64_t uid; uint64_t uid;
int64_t createdTime; int64_t createdTime;
int64_t updateTime; int64_t updateTime;
int32_t numOfColumns; // used by normal table int32_t numOfColumns; // used by normal table
int32_t numOfTags; int32_t numOfTags;
SSchema * schema; SSchema *schema;
} SSTableObj; } SSTableObj;
typedef struct SFuncObj { typedef struct SFuncObj {
...@@ -262,25 +276,26 @@ typedef struct { ...@@ -262,25 +276,26 @@ typedef struct {
typedef struct { typedef struct {
int32_t len; int32_t len;
void *rsp; void *rsp;
} SMnRsp; } SMnodeRsp;
typedef struct SMnodeMsg { typedef struct SMnodeMsg {
SMnode *pMnode;
void (*fp)(SMnodeMsg *pMsg, int32_t code); void (*fp)(SMnodeMsg *pMsg, int32_t code);
SRpcConnInfo conn; SRpcConnInfo conn;
SUserObj *pUser; SUserObj *pUser;
int16_t received; int16_t received;
int16_t successed; int16_t successed;
int16_t expected; int16_t expected;
int16_t retry; int16_t retry;
int32_t code; int32_t code;
int64_t createdTime; int64_t createdTime;
SMnRsp rpcRsp; SMnodeRsp rpcRsp;
SRpcMsg rpcMsg; SRpcMsg rpcMsg;
char pCont[]; char pCont[];
} SMnodeMsg; } SMnodeMsg;
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_MNODE_DEF_H_*/ #endif /*_TD_MND_DEF_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MND_DNODE_H_
#define _TD_MND_DNODE_H_
#include "mndInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mndInitDnode(SMnode *pMnode);
void mndCleanupDnode(SMnode *pMnode);
#ifdef __cplusplus
}
#endif
#endif /*_TD_MND_DNODE_H_*/
...@@ -13,20 +13,20 @@ ...@@ -13,20 +13,20 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_MNODE_CLUSTER_H_ #ifndef _TD_MND_FUNC_H_
#define _TD_MNODE_CLUSTER_H_ #define _TD_MND_FUNC_H_
#include "mnodeInt.h" #include "mndInt.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
int32_t mnodeInitCluster(); int32_t mndInitFunc(SMnode *pMnode);
void mnodeCleanupCluster(); void mndCleanupFunc(SMnode *pMnode);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_MNODE_CLUSTER_H_*/ #endif /*_TD_MND_FUNC_H_*/
...@@ -13,31 +13,56 @@ ...@@ -13,31 +13,56 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_MNODE_INT_H_ #ifndef _TD_MND_INT_H_
#define _TD_MNODE_INT_H_ #define _TD_MND_INT_H_
#include "mnodeDef.h" #include "mndDef.h"
#include "sdb.h" #include "sdb.h"
#include "trn.h" #include "tqueue.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
typedef void (*MnodeRpcFp)(SMnodeMsg *pMsg); typedef int32_t (*MndMsgFp)(SMnode *pMnode, SMnodeMsg *pMsg);
typedef int32_t (*MndInitFp)(SMnode *pMnode);
typedef void (*MndCleanupFp)(SMnode *pMnode);
tmr_h mnodeGetTimer(); typedef struct {
int32_t mnodeGetDnodeId(); const char *name;
int64_t mnodeGetClusterId(); MndInitFp initFp;
MndCleanupFp cleanupFp;
} SMnodeStep;
void mnodeSendMsgToDnode(struct SEpSet *epSet, struct SRpcMsg *rpcMsg); typedef struct SMnode {
void mnodeSendMsgToMnode(struct SRpcMsg *rpcMsg); int32_t dnodeId;
void mnodeSendRedirectMsg(struct SRpcMsg *rpcMsg, bool forShell); int64_t clusterId;
int8_t replica;
int8_t selfIndex;
SReplica replicas[TSDB_MAX_REPLICA];
tmr_h timer;
char *path;
SSdb *pSdb;
SDnode *pDnode;
SArray *pSteps;
MndMsgFp msgFp[TSDB_MSG_TYPE_MAX];
SendMsgToDnodeFp sendMsgToDnodeFp;
SendMsgToMnodeFp sendMsgToMnodeFp;
SendRedirectMsgFp sendRedirectMsgFp;
PutMsgToMnodeQFp putMsgToApplyMsgFp;
} SMnode;
void mnodeSetMsgFp(int32_t msgType, MnodeRpcFp fp); tmr_h mndGetTimer(SMnode *pMnode);
int32_t mndGetDnodeId(SMnode *pMnode);
int64_t mndGetClusterId(SMnode *pMnode);
void mndSendMsgToDnode(SMnode *pMnode, SEpSet *pEpSet, SRpcMsg *rpcMsg);
void mndSendMsgToMnode(SMnode *pMnode, SRpcMsg *pMsg);
void mndSendRedirectMsg(SMnode *pMnode, SRpcMsg *pMsg);
void mndSetMsgHandle(SMnode *pMnode, int32_t msgType, MndMsgFp fp);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_MNODE_INT_H_*/ #endif /*_TD_MND_INT_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MND_MNODE_H_
#define _TD_MND_MNODE_H_
#include "mndInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mndInitMnode(SMnode *pMnode);
void mndCleanupMnode(SMnode *pMnode);
void mndGetMnodeEpSetForPeer(SEpSet *epSet, bool redirect);
void mndGetMnodeEpSetForShell(SEpSet *epSet, bool redirect);
#ifdef __cplusplus
}
#endif
#endif /*_TD_MND_MNODE_H_*/
...@@ -13,18 +13,18 @@ ...@@ -13,18 +13,18 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_MNODE_OPER_H_ #ifndef _TD_MND_OPER_H_
#define _TD_MNODE_OPER_H_ #define _TD_MND_OPER_H_
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
int32_t mnodeInitOper(); int32_t mndInitOper(SMnode *pMnode);
void mnodeCleanupOper(); void mndCleanupOper(SMnode *pMnode);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_MNODE_OPER_H_*/ #endif /*_TD_MND_OPER_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MND_PROFILE_H_
#define _TD_MND_PROFILE_H_
#include "mndInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mndInitProfile(SMnode *pMnode);
void mndCleanupProfile(SMnode *pMnode);
#ifdef __cplusplus
}
#endif
#endif /*_TD_MND_PROFILE_H_*/
...@@ -13,20 +13,20 @@ ...@@ -13,20 +13,20 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_MNODE_BALANCE_H_ #ifndef _TD_MND_SHOW_H_
#define _TD_MNODE_BALANCE_H_ #define _TD_MND_SHOW_H_
#include "mnodeInt.h" #include "mndInt.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
int32_t mnodeInitBalance(); int32_t mndInitShow(SMnode *pMnode);
void mnodeCleanupBalance(); void mndCleanupShow(SMnode *pMnode);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_MNODE_BALANCE_H_*/ #endif /*_TD_MND_SHOW_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MND_STABLE_H_
#define _TD_MND_STABLE_H_
#include "mndInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mndInitStable(SMnode *pMnode);
void mndCleanupStable(SMnode *pMnode);
#ifdef __cplusplus
}
#endif
#endif /*_TD_MND_STABLE_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MND_SYNC_H_
#define _TD_MND_SYNC_H_
#include "mndInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mndInitSync(SMnode *pMnode);
void mndCleanupSync(SMnode *pMnode);
bool mndIsMaster(SMnode *pMnode);
int32_t mndSyncPropose(SSdbRaw *pRaw, void *pData);
#ifdef __cplusplus
}
#endif
#endif /*_TD_MND_SYNC_H_*/
...@@ -13,19 +13,19 @@ ...@@ -13,19 +13,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_MNODE_TELEMETRY_H_ #ifndef _TD_MND_TELEMETRY_H_
#define _TD_MNODE_TELEMETRY_H_ #define _TD_MND_TELEMETRY_H_
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#include "mnodeInt.h" #include "mndInt.h"
int32_t mnodeInitTelem(); int32_t mndInitTelem(SMnode *pMnode);
void mnodeCleanupTelem(); void mndCleanupTelem(SMnode *pMnode);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_MNODE_TELEMETRY_H_*/ #endif /*_TD_MND_TELEMETRY_H_*/
...@@ -13,37 +13,35 @@ ...@@ -13,37 +13,35 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_TRANSACTION_H_ #ifndef _TD_TRANSACTION_INT_H_
#define _TD_TRANSACTION_H_ #define _TD_TRANSACTION_INT_H_
#include "sdb.h" #include "mndInt.h"
#include "taosmsg.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
typedef struct STrans STrans; int32_t mndInitTrans(SMnode *pMnode);
typedef enum { TRN_POLICY_ROLLBACK = 1, TRN_POLICY_RETRY = 2 } ETrnPolicy; void mndCleanupTrans(SMnode *pMnode);
int32_t trnInit(); STrans *mndTransCreate(SMnode *pMnode, ETrnPolicy policy, void *rpcHandle);
void trnCleanup(); void mndTransDrop(STrans *pTrans);
int32_t mndTransAppendRedolog(STrans *pTrans, SSdbRaw *pRaw);
int32_t mndTransAppendUndolog(STrans *pTrans, SSdbRaw *pRaw);
int32_t mndTransAppendCommitlog(STrans *pTrans, SSdbRaw *pRaw);
int32_t mndTransAppendRedoAction(STrans *pTrans, SEpSet *, void *pMsg);
int32_t mndTransAppendUndoAction(STrans *pTrans, SEpSet *, void *pMsg);
STrans *trnCreate(ETrnPolicy); int32_t mndTransPrepare(STrans *pTrans, int32_t (*syncfp)(SSdbRaw *pRaw, void *pData));
void trnDrop(STrans *pTrans); int32_t mndTransApply(SMnode *pMnode, SSdbRaw *pRaw, void *pData, int32_t code);
void trnSetRpcHandle(STrans *pTrans, void *rpcHandle); int32_t mndTransExecute(SSdb *pSdb, int32_t tranId);
int32_t trnAppendRedoLog(STrans *pTrans, SSdbRaw *pRaw);
int32_t trnAppendUndoLog(STrans *pTrans, SSdbRaw *pRaw);
int32_t trnAppendCommitLog(STrans *pTrans, SSdbRaw *pRaw);
int32_t trnAppendRedoAction(STrans *pTrans, SEpSet *, void *pMsg);
int32_t trnAppendUndoAction(STrans *pTrans, SEpSet *, void *pMsg);
int32_t trnPrepare(STrans *pTrans, int32_t (*syncfp)(SSdbRaw *pRaw, void *pData)); SSdbRaw *mndTransActionEncode(STrans *pTrans);
int32_t trnApply(SSdbRaw *pRaw, void *pData, int32_t code); SSdbRow *mndTransActionDecode(SSdbRaw *pRaw);
int32_t trnExecute(int32_t tranId);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_TRANSACTION_H_*/ #endif /*_TD_TRANSACTION_INT_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MND_USER_H_
#define _TD_MND_USER_H_
#include "mndInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mndInitUser(SMnode *pMnode);
void mndCleanupUser(SMnode *pMnode);
#ifdef __cplusplus
}
#endif
#endif /*_TD_MND_USER_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MND_VGROUP_H_
#define _TD_MND_VGROUP_H_
#include "mndInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mndInitVgroup(SMnode *pMnode);
void mndCleanupVgroup(SMnode *pMnode);
#ifdef __cplusplus
}
#endif
#endif /*_TD_MND_VGROUP_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MNODE_DATABASE_H_
#define _TD_MNODE_DATABASE_H_
#include "mnodeInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mnodeInitDb();
void mnodeCleanupDb();
#ifdef __cplusplus
}
#endif
#endif /*_TD_MNODE_DATABASE_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MNODE_DNODE_H_
#define _TD_MNODE_DNODE_H_
#include "mnodeInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mnodeInitDnode();
void mnodeCleanupDnode();
#ifdef __cplusplus
}
#endif
#endif /*_TD_MNODE_DNODE_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MNODE_FUNC_H_
#define _TD_MNODE_FUNC_H_
#include "mnodeInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mnodeInitFunc();
void mnodeCleanupFunc();
#ifdef __cplusplus
}
#endif
#endif /*_TD_MNODE_FUNC_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MNODE_MNODE_H_
#define _TD_MNODE_MNODE_H_
#include "mnodeInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mnodeInitMnode();
void mnodeCleanupMnode();
void mnodeGetMnodeEpSetForPeer(SEpSet *epSet, bool redirect);
void mnodeGetMnodeEpSetForShell(SEpSet *epSet, bool redirect);
#ifdef __cplusplus
}
#endif
#endif /*_TD_MNODE_MNODE_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MNODE_PROFILE_H_
#define _TD_MNODE_PROFILE_H_
#include "mnodeInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mnodeInitProfile();
void mnodeCleanupProfile();
#ifdef __cplusplus
}
#endif
#endif /*_TD_MNODE_PROFILE_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MNODE_SHOW_H_
#define _TD_MNODE_SHOW_H_
#include "mnodeInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mnodeInitShow();
void mnodeCleanUpShow();
#ifdef __cplusplus
}
#endif
#endif /*_TD_MNODE_SHOW_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MNODE_STABLE_H_
#define _TD_MNODE_STABLE_H_
#include "mnodeInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mnodeInitStable();
void mnodeCleanupStable();
#ifdef __cplusplus
}
#endif
#endif /*_TD_MNODE_STABLE_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MNODE_SYNC_H_
#define _TD_MNODE_SYNC_H_
#include "mnodeInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mnodeInitSync();
void mnodeCleanUpSync();
int32_t mnodeSyncPropose(SSdbRaw *pRaw, void *pData);
bool mnodeIsMaster();
#ifdef __cplusplus
}
#endif
#endif /*_TD_MNODE_SYNC_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MNODE_USER_H_
#define _TD_MNODE_USER_H_
#include "mnodeInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mnodeInitUser();
void mnodeCleanupUser();
#ifdef __cplusplus
}
#endif
#endif /*_TD_MNODE_USER_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_MNODE_VGROUP_H_
#define _TD_MNODE_VGROUP_H_
#include "mnodeInt.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t mnodeInitVgroup();
void mnodeCleanupVgroup();
#ifdef __cplusplus
}
#endif
#endif /*_TD_MNODE_VGROUP_H_*/
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
*/ */
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "mnodeInt.h" #include "mndInt.h"
#define SDB_ACCT_VER 1 #define SDB_ACCT_VER 1
...@@ -48,10 +48,10 @@ static SSdbRow *mnodeAcctActionDecode(SSdbRaw *pRaw) { ...@@ -48,10 +48,10 @@ static SSdbRow *mnodeAcctActionDecode(SSdbRaw *pRaw) {
return NULL; return NULL;
} }
SSdbRow *pRow = sdbAllocRow(sizeof(SAcctObj)); SSdbRow *pRow = sdbAllocRow(sizeof(SAcctObj));
SAcctObj *pAcct = sdbGetRowObj(pRow); SAcctObj *pAcct = sdbGetRowObj(pRow);
if (pAcct == NULL) return NULL; if (pAcct == NULL) return NULL;
int32_t dataPos = 0; int32_t dataPos = 0;
SDB_GET_BINARY(pRaw, pRow, dataPos, pAcct->acct, TSDB_USER_LEN) SDB_GET_BINARY(pRaw, pRow, dataPos, pAcct->acct, TSDB_USER_LEN)
SDB_GET_INT64(pRaw, pRow, dataPos, &pAcct->createdTime) SDB_GET_INT64(pRaw, pRow, dataPos, &pAcct->createdTime)
...@@ -68,18 +68,18 @@ static SSdbRow *mnodeAcctActionDecode(SSdbRaw *pRaw) { ...@@ -68,18 +68,18 @@ static SSdbRow *mnodeAcctActionDecode(SSdbRaw *pRaw) {
return pRow; return pRow;
} }
static int32_t mnodeAcctActionInsert(SAcctObj *pAcct) { return 0; } static int32_t mnodeAcctActionInsert(SSdb *pSdb, SAcctObj *pAcct) { return 0; }
static int32_t mnodeAcctActionDelete(SAcctObj *pAcct) { return 0; } static int32_t mnodeAcctActionDelete(SSdb *pSdb, SAcctObj *pAcct) { return 0; }
static int32_t mnodeAcctActionUpdate(SAcctObj *pSrcAcct, SAcctObj *pDstAcct) { static int32_t mnodeAcctActionUpdate(SSdb *pSdb, SAcctObj *pSrcAcct, SAcctObj *pDstAcct) {
SAcctObj tObj; SAcctObj tObj;
int32_t len = (int32_t)((int8_t *)&tObj.info - (int8_t *)&tObj); int32_t len = (int32_t)((int8_t *)&tObj.info - (int8_t *)&tObj);
memcpy(pDstAcct, pSrcAcct, len); memcpy(pDstAcct, pSrcAcct, len);
return 0; return 0;
} }
static int32_t mnodeCreateDefaultAcct() { static int32_t mnodeCreateDefaultAcct(SSdb *pSdb) {
int32_t code = 0; int32_t code = 0;
SAcctObj acctObj = {0}; SAcctObj acctObj = {0};
...@@ -98,21 +98,20 @@ static int32_t mnodeCreateDefaultAcct() { ...@@ -98,21 +98,20 @@ static int32_t mnodeCreateDefaultAcct() {
if (pRaw == NULL) return -1; if (pRaw == NULL) return -1;
sdbSetRawStatus(pRaw, SDB_STATUS_READY); sdbSetRawStatus(pRaw, SDB_STATUS_READY);
return sdbWrite(pRaw); return sdbWrite(pSdb, pRaw);
} }
int32_t mnodeInitAcct() { int32_t mndInitAcct(SMnode *pMnode) {
SSdbTable table = {.sdbType = SDB_ACCT, SSdbTable table = {.sdbType = SDB_ACCT,
.keyType = SDB_KEY_BINARY, .keyType = SDB_KEY_BINARY,
.deployFp = (SdbDeployFp)mnodeCreateDefaultAcct, .deployFp = mnodeCreateDefaultAcct,
.encodeFp = (SdbEncodeFp)mnodeAcctActionEncode, .encodeFp = (SdbEncodeFp)mnodeAcctActionEncode,
.decodeFp = (SdbDecodeFp)mnodeAcctActionDecode, .decodeFp = (SdbDecodeFp)mnodeAcctActionDecode,
.insertFp = (SdbInsertFp)mnodeAcctActionInsert, .insertFp = (SdbInsertFp)mnodeAcctActionInsert,
.updateFp = (SdbUpdateFp)mnodeAcctActionUpdate, .updateFp = (SdbUpdateFp)mnodeAcctActionUpdate,
.deleteFp = (SdbDeleteFp)mnodeAcctActionDelete}; .deleteFp = (SdbDeleteFp)mnodeAcctActionDelete};
sdbSetTable(table);
return 0; return sdbSetTable(pMnode->pSdb, table);
} }
void mnodeCleanupAcct() {} void mndCleanupAcct(SMnode *pMnode) {}
...@@ -15,15 +15,11 @@ ...@@ -15,15 +15,11 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "os.h" #include "os.h"
#include "mnodeInt.h" #include "mndAuth.h"
int32_t mnodeInitSync() { return 0; } int32_t mndInitAuth(SMnode *pMnode) { return 0; }
void mnodeCleanUpSync() {} void mndCleanupAuth(SMnode *pMnode) {}
int32_t mnodeSyncPropose(SSdbRaw *pRaw, void *pData) { int32_t mndRetriveAuth(SMnode *pMnode, char *user, char *spi, char *encrypt, char *secret, char *ckey) {
trnApply(pData, pData, 0);
free(pData);
return 0; return 0;
} }
\ No newline at end of file
bool mnodeIsMaster() { return true; }
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mndInt.h"
int32_t mndInitBalance(SMnode *pMnode) { return 0; }
void mndCleanupBalance(SMnode *pMnode) {}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mndInt.h"
int32_t mndInitCluster(SMnode *pMnode) { return 0; }
void mndCleanupCluster(SMnode *pMnode) {}
\ No newline at end of file
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "os.h" #include "os.h"
#include "mnodeInt.h" #include "mndInt.h"
int32_t mnodeInitBalance() { return 0; } int32_t mndInitDb(SMnode *pMnode) { return 0; }
void mnodeCleanupBalance() {} void mndCleanupDb(SMnode *pMnode) {}
\ No newline at end of file \ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mndInt.h"
int32_t mndInitDnode(SMnode *pMnode) { return 0; }
void mndCleanupDnode(SMnode *pMnode) {}
\ No newline at end of file
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "os.h" #include "os.h"
#include "mnodeInt.h" #include "mndInt.h"
int32_t mnodeInitCluster() { return 0; } int32_t mndInitFunc(SMnode *pMnode) { return 0; }
void mnodeCleanupCluster() {} void mndCleanupFunc(SMnode *pMnode) {}
\ No newline at end of file \ No newline at end of file
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "os.h" #include "os.h"
#include "mnodeInt.h" #include "mndInt.h"
int32_t mnodeInitMnode() { return 0; } int32_t mndInitMnode(SMnode *pMnode) { return 0; }
void mnodeCleanupMnode() {} void mndCleanupMnode(SMnode *pMnode) {}
void mnodeGetMnodeEpSetForPeer(SEpSet *epSet, bool redirect) {} void mndGetMnodeEpSetForPeer(SEpSet *epSet, bool redirect) {}
void mnodeGetMnodeEpSetForShell(SEpSet *epSet, bool redirect) {} void mndGetMnodeEpSetForShell(SEpSet *epSet, bool redirect) {}
\ No newline at end of file \ No newline at end of file
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "os.h" #include "os.h"
#include "mnodeInt.h" #include "mndInt.h"
int32_t mnodeInitDb() { return 0; } int32_t mndInitOper(SMnode *pMnode) { return 0; }
void mnodeCleanupDb() {} void mndCleanupOper(SMnode *pMnode) {}
\ No newline at end of file \ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mndInt.h"
int32_t mndInitProfile(SMnode *pMnode) { return 0; }
void mndCleanupProfile(SMnode *pMnode) {}
\ No newline at end of file
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "os.h" #include "os.h"
#include "mnodeInt.h" #include "mndInt.h"
int32_t mnodeInitDnode() { return 0; } int32_t mndInitShow(SMnode *pMnode) { return 0; }
void mnodeCleanupDnode() {} void mndCleanupShow(SMnode *pMnode) {}
\ No newline at end of file \ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mndInt.h"
int32_t mndInitStable(SMnode *pMnode) { return 0; }
void mndCleanupStable(SMnode *pMnode) {}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mndInt.h"
#include "mndTrans.h"
int32_t mndInitSync(SMnode *pMnode) { return 0; }
void mndCleanupSync(SMnode *pMnode) {}
int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, void *pData) {
mndTransApply(pMnode, pData, pData, 0);
free(pData);
return 0;
}
bool mndIsMaster(SMnode *pMnode) { return true; }
\ No newline at end of file
...@@ -14,10 +14,10 @@ ...@@ -14,10 +14,10 @@
*/ */
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "mnodeTelem.h" #include "mndTelem.h"
#include "tbuffer.h" #include "tbuffer.h"
#include "tglobal.h" #include "tglobal.h"
#include "mnodeSync.h" #include "mndSync.h"
#define TELEMETRY_SERVER "telemetry.taosdata.com" #define TELEMETRY_SERVER "telemetry.taosdata.com"
#define TELEMETRY_PORT 80 #define TELEMETRY_PORT 80
...@@ -36,9 +36,9 @@ static struct { ...@@ -36,9 +36,9 @@ static struct {
char email[TSDB_FQDN_LEN]; char email[TSDB_FQDN_LEN];
} tsTelem; } tsTelem;
static void mnodeBeginObject(SBufferWriter* bw) { tbufWriteChar(bw, '{'); } static void mndBeginObject(SBufferWriter* bw) { tbufWriteChar(bw, '{'); }
static void mnodeCloseObject(SBufferWriter* bw) { static void mndCloseObject(SBufferWriter* bw) {
size_t len = tbufTell(bw); size_t len = tbufTell(bw);
if (tbufGetData(bw, false)[len - 1] == ',') { if (tbufGetData(bw, false)[len - 1] == ',') {
tbufWriteCharAt(bw, len - 1, '}'); tbufWriteCharAt(bw, len - 1, '}');
...@@ -64,14 +64,14 @@ static void closeArray(SBufferWriter* bw) { ...@@ -64,14 +64,14 @@ static void closeArray(SBufferWriter* bw) {
} }
#endif #endif
static void mnodeWriteString(SBufferWriter* bw, const char* str) { static void mndWriteString(SBufferWriter* bw, const char* str) {
tbufWriteChar(bw, '"'); tbufWriteChar(bw, '"');
tbufWrite(bw, str, strlen(str)); tbufWrite(bw, str, strlen(str));
tbufWriteChar(bw, '"'); tbufWriteChar(bw, '"');
} }
static void mnodeAddIntField(SBufferWriter* bw, const char* k, int64_t v) { static void mndAddIntField(SBufferWriter* bw, const char* k, int64_t v) {
mnodeWriteString(bw, k); mndWriteString(bw, k);
tbufWriteChar(bw, ':'); tbufWriteChar(bw, ':');
char buf[32]; char buf[32];
sprintf(buf, "%" PRId64, v); sprintf(buf, "%" PRId64, v);
...@@ -79,14 +79,14 @@ static void mnodeAddIntField(SBufferWriter* bw, const char* k, int64_t v) { ...@@ -79,14 +79,14 @@ static void mnodeAddIntField(SBufferWriter* bw, const char* k, int64_t v) {
tbufWriteChar(bw, ','); tbufWriteChar(bw, ',');
} }
static void mnodeAddStringField(SBufferWriter* bw, const char* k, const char* v) { static void mndAddStringField(SBufferWriter* bw, const char* k, const char* v) {
mnodeWriteString(bw, k); mndWriteString(bw, k);
tbufWriteChar(bw, ':'); tbufWriteChar(bw, ':');
mnodeWriteString(bw, v); mndWriteString(bw, v);
tbufWriteChar(bw, ','); tbufWriteChar(bw, ',');
} }
static void mnodeAddCpuInfo(SBufferWriter* bw) { static void mndAddCpuInfo(SBufferWriter* bw) {
char* line = NULL; char* line = NULL;
size_t size = 0; size_t size = 0;
int32_t done = 0; int32_t done = 0;
...@@ -100,11 +100,11 @@ static void mnodeAddCpuInfo(SBufferWriter* bw) { ...@@ -100,11 +100,11 @@ static void mnodeAddCpuInfo(SBufferWriter* bw) {
line[size - 1] = '\0'; line[size - 1] = '\0';
if (((done & 1) == 0) && strncmp(line, "model name", 10) == 0) { if (((done & 1) == 0) && strncmp(line, "model name", 10) == 0) {
const char* v = strchr(line, ':') + 2; const char* v = strchr(line, ':') + 2;
mnodeAddStringField(bw, "cpuModel", v); mndAddStringField(bw, "cpuModel", v);
done |= 1; done |= 1;
} else if (((done & 2) == 0) && strncmp(line, "cpu cores", 9) == 0) { } else if (((done & 2) == 0) && strncmp(line, "cpu cores", 9) == 0) {
const char* v = strchr(line, ':') + 2; const char* v = strchr(line, ':') + 2;
mnodeWriteString(bw, "numOfCpu"); mndWriteString(bw, "numOfCpu");
tbufWriteChar(bw, ':'); tbufWriteChar(bw, ':');
tbufWrite(bw, v, strlen(v)); tbufWrite(bw, v, strlen(v));
tbufWriteChar(bw, ','); tbufWriteChar(bw, ',');
...@@ -116,7 +116,7 @@ static void mnodeAddCpuInfo(SBufferWriter* bw) { ...@@ -116,7 +116,7 @@ static void mnodeAddCpuInfo(SBufferWriter* bw) {
fclose(fp); fclose(fp);
} }
static void mnodeAddOsInfo(SBufferWriter* bw) { static void mndAddOsInfo(SBufferWriter* bw) {
char* line = NULL; char* line = NULL;
size_t size = 0; size_t size = 0;
...@@ -133,7 +133,7 @@ static void mnodeAddOsInfo(SBufferWriter* bw) { ...@@ -133,7 +133,7 @@ static void mnodeAddOsInfo(SBufferWriter* bw) {
p++; p++;
line[size - 2] = 0; line[size - 2] = 0;
} }
mnodeAddStringField(bw, "os", p); mndAddStringField(bw, "os", p);
break; break;
} }
} }
...@@ -142,7 +142,7 @@ static void mnodeAddOsInfo(SBufferWriter* bw) { ...@@ -142,7 +142,7 @@ static void mnodeAddOsInfo(SBufferWriter* bw) {
fclose(fp); fclose(fp);
} }
static void mnodeAddMemoryInfo(SBufferWriter* bw) { static void mndAddMemoryInfo(SBufferWriter* bw) {
char* line = NULL; char* line = NULL;
size_t size = 0; size_t size = 0;
...@@ -156,7 +156,7 @@ static void mnodeAddMemoryInfo(SBufferWriter* bw) { ...@@ -156,7 +156,7 @@ static void mnodeAddMemoryInfo(SBufferWriter* bw) {
if (strncmp(line, "MemTotal", 8) == 0) { if (strncmp(line, "MemTotal", 8) == 0) {
const char* p = strchr(line, ':') + 1; const char* p = strchr(line, ':') + 1;
while (*p == ' ') p++; while (*p == ' ') p++;
mnodeAddStringField(bw, "memory", p); mndAddStringField(bw, "memory", p);
break; break;
} }
} }
...@@ -165,32 +165,32 @@ static void mnodeAddMemoryInfo(SBufferWriter* bw) { ...@@ -165,32 +165,32 @@ static void mnodeAddMemoryInfo(SBufferWriter* bw) {
fclose(fp); fclose(fp);
} }
static void mnodeAddVersionInfo(SBufferWriter* bw) { static void mndAddVersionInfo(SBufferWriter* bw) {
mnodeAddStringField(bw, "version", version); mndAddStringField(bw, "version", version);
mnodeAddStringField(bw, "buildInfo", buildinfo); mndAddStringField(bw, "buildInfo", buildinfo);
mnodeAddStringField(bw, "gitInfo", gitinfo); mndAddStringField(bw, "gitInfo", gitinfo);
mnodeAddStringField(bw, "email", tsTelem.email); mndAddStringField(bw, "email", tsTelem.email);
} }
static void mnodeAddRuntimeInfo(SBufferWriter* bw) { static void mndAddRuntimeInfo(SBufferWriter* bw) {
SMnodeLoad load = {0}; SMnodeLoad load = {0};
if (mnodeGetLoad(&load) != 0) { if (mndGetLoad(NULL, &load) != 0) {
return; return;
} }
mnodeAddIntField(bw, "numOfDnode", load.numOfDnode); mndAddIntField(bw, "numOfDnode", load.numOfDnode);
mnodeAddIntField(bw, "numOfMnode", load.numOfMnode); mndAddIntField(bw, "numOfMnode", load.numOfMnode);
mnodeAddIntField(bw, "numOfVgroup", load.numOfVgroup); mndAddIntField(bw, "numOfVgroup", load.numOfVgroup);
mnodeAddIntField(bw, "numOfDatabase", load.numOfDatabase); mndAddIntField(bw, "numOfDatabase", load.numOfDatabase);
mnodeAddIntField(bw, "numOfSuperTable", load.numOfSuperTable); mndAddIntField(bw, "numOfSuperTable", load.numOfSuperTable);
mnodeAddIntField(bw, "numOfChildTable", load.numOfChildTable); mndAddIntField(bw, "numOfChildTable", load.numOfChildTable);
mnodeAddIntField(bw, "numOfColumn", load.numOfColumn); mndAddIntField(bw, "numOfColumn", load.numOfColumn);
mnodeAddIntField(bw, "numOfPoint", load.totalPoints); mndAddIntField(bw, "numOfPoint", load.totalPoints);
mnodeAddIntField(bw, "totalStorage", load.totalStorage); mndAddIntField(bw, "totalStorage", load.totalStorage);
mnodeAddIntField(bw, "compStorage", load.compStorage); mndAddIntField(bw, "compStorage", load.compStorage);
} }
static void mnodeSendTelemetryReport() { static void mndSendTelemetryReport() {
char buf[128] = {0}; char buf[128] = {0};
uint32_t ip = taosGetIpv4FromFqdn(TELEMETRY_SERVER); uint32_t ip = taosGetIpv4FromFqdn(TELEMETRY_SERVER);
if (ip == 0xffffffff) { if (ip == 0xffffffff) {
...@@ -203,20 +203,20 @@ static void mnodeSendTelemetryReport() { ...@@ -203,20 +203,20 @@ static void mnodeSendTelemetryReport() {
return; return;
} }
int64_t clusterId = mnodeGetClusterId(); int64_t clusterId = mndGetClusterId(NULL);
char clusterIdStr[20] = {0}; char clusterIdStr[20] = {0};
snprintf(clusterIdStr, sizeof(clusterIdStr), "%" PRId64, clusterId); snprintf(clusterIdStr, sizeof(clusterIdStr), "%" PRId64, clusterId);
SBufferWriter bw = tbufInitWriter(NULL, false); SBufferWriter bw = tbufInitWriter(NULL, false);
mnodeBeginObject(&bw); mndBeginObject(&bw);
mnodeAddStringField(&bw, "instanceId", clusterIdStr); mndAddStringField(&bw, "instanceId", clusterIdStr);
mnodeAddIntField(&bw, "reportVersion", 1); mndAddIntField(&bw, "reportVersion", 1);
mnodeAddOsInfo(&bw); mndAddOsInfo(&bw);
mnodeAddCpuInfo(&bw); mndAddCpuInfo(&bw);
mnodeAddMemoryInfo(&bw); mndAddMemoryInfo(&bw);
mnodeAddVersionInfo(&bw); mndAddVersionInfo(&bw);
mnodeAddRuntimeInfo(&bw); mndAddRuntimeInfo(&bw);
mnodeCloseObject(&bw); mndCloseObject(&bw);
const char* header = const char* header =
"POST /report HTTP/1.1\n" "POST /report HTTP/1.1\n"
...@@ -240,12 +240,12 @@ static void mnodeSendTelemetryReport() { ...@@ -240,12 +240,12 @@ static void mnodeSendTelemetryReport() {
taosCloseSocket(fd); taosCloseSocket(fd);
} }
static void* mnodeTelemThreadFp(void* param) { static void* mndTelemThreadFp(void* param) {
struct timespec end = {0}; struct timespec end = {0};
clock_gettime(CLOCK_REALTIME, &end); clock_gettime(CLOCK_REALTIME, &end);
end.tv_sec += 300; // wait 5 minutes before send first report end.tv_sec += 300; // wait 5 minutes before send first report
setThreadName("mnode-telem"); setThreadName("mnd-telem");
while (!tsTelem.exit) { while (!tsTelem.exit) {
int32_t r = 0; int32_t r = 0;
...@@ -256,8 +256,8 @@ static void* mnodeTelemThreadFp(void* param) { ...@@ -256,8 +256,8 @@ static void* mnodeTelemThreadFp(void* param) {
if (r == 0) break; if (r == 0) break;
if (r != ETIMEDOUT) continue; if (r != ETIMEDOUT) continue;
if (mnodeIsMaster()) { if (mndIsMaster(NULL)) {
mnodeSendTelemetryReport(); mndSendTelemetryReport();
} }
end.tv_sec += REPORT_INTERVAL; end.tv_sec += REPORT_INTERVAL;
} }
...@@ -265,7 +265,7 @@ static void* mnodeTelemThreadFp(void* param) { ...@@ -265,7 +265,7 @@ static void* mnodeTelemThreadFp(void* param) {
return NULL; return NULL;
} }
static void mnodeGetEmail(char* filepath) { static void mndGetEmail(char* filepath) {
int32_t fd = taosOpenFileRead(filepath); int32_t fd = taosOpenFileRead(filepath);
if (fd < 0) { if (fd < 0) {
return; return;
...@@ -278,7 +278,7 @@ static void mnodeGetEmail(char* filepath) { ...@@ -278,7 +278,7 @@ static void mnodeGetEmail(char* filepath) {
taosCloseFile(fd); taosCloseFile(fd);
} }
int32_t mnodeInitTelem() { int32_t mndInitTelem(SMnode *pMnode) {
tsTelem.enable = tsEnableTelemetryReporting; tsTelem.enable = tsEnableTelemetryReporting;
if (!tsTelem.enable) return 0; if (!tsTelem.enable) return 0;
...@@ -287,23 +287,23 @@ int32_t mnodeInitTelem() { ...@@ -287,23 +287,23 @@ int32_t mnodeInitTelem() {
pthread_cond_init(&tsTelem.cond, NULL); pthread_cond_init(&tsTelem.cond, NULL);
tsTelem.email[0] = 0; tsTelem.email[0] = 0;
mnodeGetEmail("/usr/local/taos/email"); mndGetEmail("/usr/local/taos/email");
pthread_attr_t attr; pthread_attr_t attr;
pthread_attr_init(&attr); pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
int32_t code = pthread_create(&tsTelem.thread, &attr, mnodeTelemThreadFp, NULL); int32_t code = pthread_create(&tsTelem.thread, &attr, mndTelemThreadFp, NULL);
pthread_attr_destroy(&attr); pthread_attr_destroy(&attr);
if (code != 0) { if (code != 0) {
mTrace("failed to create telemetry thread since :%s", strerror(code)); mTrace("failed to create telemetry thread since :%s", strerror(code));
} }
mInfo("mnode telemetry is initialized"); mInfo("mnd telemetry is initialized");
return 0; return 0;
} }
void mnodeCleanupTelem() { void mndCleanupTelem(SMnode *pMnode) {
if (!tsTelem.enable) return; if (!tsTelem.enable) return;
if (taosCheckPthreadValid(tsTelem.thread)) { if (taosCheckPthreadValid(tsTelem.thread)) {
......
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "mndTrans.h"
#include "trpc.h"
#define SDB_TRANS_VER 1
#define TRN_DEFAULT_ARRAY_SIZE 8
SSdbRaw *mndTransActionEncode(STrans *pTrans) {
int32_t rawDataLen = 10 * sizeof(int32_t);
int32_t redoLogNum = taosArrayGetSize(pTrans->redoLogs);
int32_t undoLogNum = taosArrayGetSize(pTrans->undoLogs);
int32_t commitLogNum = taosArrayGetSize(pTrans->commitLogs);
int32_t redoActionNum = taosArrayGetSize(pTrans->redoActions);
int32_t undoActionNum = taosArrayGetSize(pTrans->undoActions);
for (int32_t i = 0; i < redoLogNum; ++i) {
SSdbRaw *pTmp = taosArrayGet(pTrans->redoLogs, i);
rawDataLen += sdbGetRawTotalSize(pTmp);
}
for (int32_t i = 0; i < undoLogNum; ++i) {
SSdbRaw *pTmp = taosArrayGet(pTrans->undoLogs, i);
rawDataLen += sdbGetRawTotalSize(pTmp);
}
for (int32_t i = 0; i < commitLogNum; ++i) {
SSdbRaw *pTmp = taosArrayGet(pTrans->commitLogs, i);
rawDataLen += sdbGetRawTotalSize(pTmp);
}
SSdbRaw *pRaw = sdbAllocRaw(SDB_TRANS, SDB_TRANS_VER, rawDataLen);
if (pRaw == NULL) {
mError("trn:%d, failed to alloc raw since %s", pTrans->id, terrstr());
return NULL;
}
int32_t dataPos = 0;
SDB_SET_INT32(pRaw, dataPos, pTrans->id)
SDB_SET_INT8(pRaw, dataPos, pTrans->stage)
SDB_SET_INT8(pRaw, dataPos, pTrans->policy)
SDB_SET_INT32(pRaw, dataPos, redoLogNum)
SDB_SET_INT32(pRaw, dataPos, undoLogNum)
SDB_SET_INT32(pRaw, dataPos, commitLogNum)
SDB_SET_INT32(pRaw, dataPos, redoActionNum)
SDB_SET_INT32(pRaw, dataPos, undoActionNum)
for (int32_t i = 0; i < redoLogNum; ++i) {
SSdbRaw *pTmp = taosArrayGet(pTrans->redoLogs, i);
int32_t len = sdbGetRawTotalSize(pTmp);
SDB_SET_INT32(pRaw, dataPos, len)
SDB_SET_BINARY(pRaw, dataPos, (void *)pTmp, len)
}
for (int32_t i = 0; i < undoLogNum; ++i) {
SSdbRaw *pTmp = taosArrayGet(pTrans->undoLogs, i);
int32_t len = sdbGetRawTotalSize(pTmp);
SDB_SET_INT32(pRaw, dataPos, len)
SDB_SET_BINARY(pRaw, dataPos, (void *)pTmp, len)
}
for (int32_t i = 0; i < commitLogNum; ++i) {
SSdbRaw *pTmp = taosArrayGet(pTrans->commitLogs, i);
int32_t len = sdbGetRawTotalSize(pTmp);
SDB_SET_INT32(pRaw, dataPos, len)
SDB_SET_BINARY(pRaw, dataPos, (void *)pTmp, len)
}
mDebug("trn:%d, is encoded as raw:%p, len:%d", pTrans->id, pRaw, dataPos);
return pRaw;
}
SSdbRow *mndTransActionDecode(SSdbRaw *pRaw) {
int8_t sver = 0;
if (sdbGetRawSoftVer(pRaw, &sver) != 0) {
mError("failed to get soft ver from raw:%p since %s", pRaw, terrstr());
return NULL;
}
if (sver != SDB_TRANS_VER) {
terrno = TSDB_CODE_SDB_INVALID_DATA_VER;
mError("failed to get check soft ver from raw:%p since %s", pRaw, terrstr());
return NULL;
}
SSdbRow *pRow = sdbAllocRow(sizeof(STrans));
STrans *pTrans = sdbGetRowObj(pRow);
if (pTrans == NULL) {
mError("failed to alloc trans from raw:%p since %s", pRaw, terrstr());
return NULL;
}
pTrans->redoLogs = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->undoLogs = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->commitLogs = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->redoActions = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->undoActions = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
if (pTrans->redoLogs == NULL || pTrans->undoLogs == NULL || pTrans->commitLogs == NULL ||
pTrans->redoActions == NULL || pTrans->undoActions == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
mDebug("trn:%d, failed to create array while parsed from raw:%p", pTrans->id, pRaw);
return NULL;
}
int32_t redoLogNum = 0;
int32_t undoLogNum = 0;
int32_t commitLogNum = 0;
int32_t redoActionNum = 0;
int32_t undoActionNum = 0;
int32_t dataPos = 0;
SDB_GET_INT32(pRaw, pRow, dataPos, &pTrans->id)
SDB_GET_INT8(pRaw, pRow, dataPos, (int8_t *)&pTrans->stage)
SDB_GET_INT8(pRaw, pRow, dataPos, (int8_t *)&pTrans->policy)
SDB_GET_INT32(pRaw, pRow, dataPos, &redoLogNum)
SDB_GET_INT32(pRaw, pRow, dataPos, &undoLogNum)
SDB_GET_INT32(pRaw, pRow, dataPos, &commitLogNum)
SDB_GET_INT32(pRaw, pRow, dataPos, &redoActionNum)
SDB_GET_INT32(pRaw, pRow, dataPos, &undoActionNum)
int32_t code = 0;
for (int32_t i = 0; i < redoLogNum; ++i) {
int32_t dataLen = 0;
SDB_GET_INT32(pRaw, pRow, dataPos, &dataLen)
char *pData = malloc(dataLen);
SDB_GET_BINARY(pRaw, pRow, dataPos, pData, dataLen);
void *ret = taosArrayPush(pTrans->redoLogs, pData);
if (ret == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
break;
}
}
if (code != 0) {
terrno = code;
mError("trn:%d, failed to parse from raw:%p since %s", pTrans->id, pRaw, terrstr());
mndTransDrop(pTrans);
return NULL;
}
mDebug("trn:%d, is parsed from raw:%p", pTrans->id, pRaw);
return pRow;
}
static int32_t mndTransActionInsert(SSdb *pSdb, STrans *pTrans) {
SArray *pArray = pTrans->redoLogs;
int32_t arraySize = taosArrayGetSize(pArray);
for (int32_t i = 0; i < arraySize; ++i) {
SSdbRaw *pRaw = taosArrayGet(pArray, i);
int32_t code = sdbWrite(pSdb, pRaw);
if (code != 0) {
mError("trn:%d, failed to write raw:%p to sdb since %s", pTrans->id, pRaw, terrstr());
return code;
}
}
mDebug("trn:%d, write to sdb", pTrans->id);
return 0;
}
static int32_t mndTransActionDelete(SSdb *pSdb, STrans *pTrans) {
SArray *pArray = pTrans->redoLogs;
int32_t arraySize = taosArrayGetSize(pArray);
for (int32_t i = 0; i < arraySize; ++i) {
SSdbRaw *pRaw = taosArrayGet(pArray, i);
int32_t code = sdbWrite(pSdb, pRaw);
if (code != 0) {
mError("trn:%d, failed to write raw:%p to sdb since %s", pTrans->id, pRaw, terrstr());
return code;
}
}
mDebug("trn:%d, delete from sdb", pTrans->id);
return 0;
}
static int32_t mndTransActionUpdate(SSdb *pSdb, STrans *pTrans, STrans *pDstTrans) {
assert(true);
SArray *pArray = pTrans->redoLogs;
int32_t arraySize = taosArrayGetSize(pArray);
for (int32_t i = 0; i < arraySize; ++i) {
SSdbRaw *pRaw = taosArrayGet(pArray, i);
int32_t code = sdbWrite(pSdb, pRaw);
if (code != 0) {
mError("trn:%d, failed to write raw:%p to sdb since %s", pTrans->id, pRaw, terrstr());
return code;
}
}
pTrans->stage = pDstTrans->stage;
mDebug("trn:%d, update in sdb", pTrans->id);
return 0;
}
static int32_t trnGenerateTransId() { return 1; }
STrans *mndTransCreate(SMnode *pMnode, ETrnPolicy policy, void *rpcHandle) {
STrans *pTrans = calloc(1, sizeof(STrans));
if (pTrans == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
mError("failed to create transaction since %s", terrstr());
return NULL;
}
pTrans->id = trnGenerateTransId();
pTrans->stage = TRN_STAGE_PREPARE;
pTrans->policy = policy;
pTrans->rpcHandle = rpcHandle;
pTrans->redoLogs = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->undoLogs = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->commitLogs = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->redoActions = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->undoActions = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
if (pTrans->redoLogs == NULL || pTrans->undoLogs == NULL || pTrans->commitLogs == NULL ||
pTrans->redoActions == NULL || pTrans->undoActions == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
mError("failed to create transaction since %s", terrstr());
return NULL;
}
mDebug("trn:%d, is created, %p", pTrans->id, pTrans);
return pTrans;
}
static void trnDropArray(SArray *pArray) {
for (int32_t i = 0; i < pArray->size; ++i) {
SSdbRaw *pRaw = taosArrayGet(pArray, i);
tfree(pRaw);
}
taosArrayDestroy(pArray);
}
void mndTransDrop(STrans *pTrans) {
trnDropArray(pTrans->redoLogs);
trnDropArray(pTrans->undoLogs);
trnDropArray(pTrans->commitLogs);
trnDropArray(pTrans->redoActions);
trnDropArray(pTrans->undoActions);
mDebug("trn:%d, is dropped, %p", pTrans->id, pTrans);
tfree(pTrans);
}
void mndTransSetRpcHandle(STrans *pTrans, void *rpcHandle) {
pTrans->rpcHandle = rpcHandle;
mTrace("trn:%d, set rpc handle:%p", pTrans->id, rpcHandle);
}
static int32_t mndTransAppendArray(SArray *pArray, SSdbRaw *pRaw) {
if (pArray == NULL || pRaw == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
void *ptr = taosArrayPush(pArray, pRaw);
if (ptr == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
return 0;
}
int32_t mndTransAppendRedolog(STrans *pTrans, SSdbRaw *pRaw) {
int32_t code = mndTransAppendArray(pTrans->redoLogs, pRaw);
mTrace("trn:%d, raw:%p append to redo logs, code:%d", pTrans->id, pRaw, code);
return code;
}
int32_t mndTransAppendUndolog(STrans *pTrans, SSdbRaw *pRaw) {
int32_t code = mndTransAppendArray(pTrans->undoLogs, pRaw);
mTrace("trn:%d, raw:%p append to undo logs, code:%d", pTrans->id, pRaw, code);
return code;
}
int32_t mndTransAppendCommitlog(STrans *pTrans, SSdbRaw *pRaw) {
int32_t code = mndTransAppendArray(pTrans->commitLogs, pRaw);
mTrace("trn:%d, raw:%p append to commit logs, code:%d", pTrans->id, pRaw, code);
return code;
}
int32_t mndTransAppendRedoAction(STrans *pTrans, SEpSet *pEpSet, void *pMsg) {
int32_t code = mndTransAppendArray(pTrans->redoActions, pMsg);
mTrace("trn:%d, msg:%p append to redo actions", pTrans->id, pMsg);
return code;
}
int32_t mndTransAppendUndoAction(STrans *pTrans, SEpSet *pEpSet, void *pMsg) {
int32_t code = mndTransAppendArray(pTrans->undoActions, pMsg);
mTrace("trn:%d, msg:%p append to undo actions", pTrans->id, pMsg);
return code;
}
int32_t mndInitTrans(SMnode *pMnode) {
SSdbTable table = {.sdbType = SDB_TRANS,
.keyType = SDB_KEY_INT32,
.encodeFp = (SdbEncodeFp)mndTransActionEncode,
.decodeFp = (SdbDecodeFp)mndTransActionDecode,
.insertFp = (SdbInsertFp)mndTransActionInsert,
.updateFp = (SdbUpdateFp)mndTransActionUpdate,
.deleteFp = (SdbDeleteFp)mndTransActionDelete};
return sdbSetTable(pMnode->pSdb, table);
}
void mndCleanupTrans(SMnode *pMnode) {}
int32_t mndTransPrepare(STrans *pTrans, int32_t (*syncfp)(SSdbRaw *pRaw, void *pData)) {
if (syncfp == NULL) return -1;
SSdbRaw *pRaw = mndTransActionEncode(pTrans);
if (pRaw == NULL) {
mError("trn:%d, failed to decode trans since %s", pTrans->id, terrstr());
return -1;
}
sdbSetRawStatus(pRaw, SDB_STATUS_CREATING);
if (sdbWrite(pTrans->pMnode->pSdb, pRaw) != 0) {
mError("trn:%d, failed to write trans since %s", pTrans->id, terrstr());
return -1;
}
if ((*syncfp)(pRaw, pTrans->rpcHandle) != 0) {
mError("trn:%d, failed to sync trans since %s", pTrans->id, terrstr());
return -1;
}
return 0;
}
static void trnSendRpcRsp(void *rpcHandle, int32_t code) {
if (rpcHandle != NULL) {
SRpcMsg rspMsg = {.handle = rpcHandle, .code = terrno};
rpcSendResponse(&rspMsg);
}
}
int32_t mndTransApply(SMnode *pMnode, SSdbRaw *pRaw, void *pData, int32_t code) {
if (code != 0) {
trnSendRpcRsp(pData, terrno);
return 0;
}
if (sdbWrite(pMnode->pSdb, pData) != 0) {
code = terrno;
trnSendRpcRsp(pData, code);
terrno = code;
return -1;
}
return 0;
}
static int32_t trnExecuteArray(SMnode *pMnode, SArray *pArray) {
for (int32_t i = 0; i < pArray->size; ++i) {
SSdbRaw *pRaw = taosArrayGetP(pArray, i);
if (sdbWrite(pMnode->pSdb, pRaw) != 0) {
return -1;
}
}
return 0;
}
static int32_t trnExecuteRedoLogs(STrans *pTrans) { return trnExecuteArray(pTrans->pMnode, pTrans->redoLogs); }
static int32_t trnExecuteUndoLogs(STrans *pTrans) { return trnExecuteArray(pTrans->pMnode, pTrans->undoLogs); }
static int32_t trnExecuteCommitLogs(STrans *pTrans) { return trnExecuteArray(pTrans->pMnode, pTrans->commitLogs); }
static int32_t trnExecuteRedoActions(STrans *pTrans) { return trnExecuteArray(pTrans->pMnode, pTrans->redoActions); }
static int32_t trnExecuteUndoActions(STrans *pTrans) { return trnExecuteArray(pTrans->pMnode, pTrans->undoActions); }
static int32_t trnPerformPrepareStage(STrans *pTrans) {
if (trnExecuteRedoLogs(pTrans) == 0) {
pTrans->stage = TRN_STAGE_EXECUTE;
return 0;
} else {
pTrans->stage = TRN_STAGE_ROLLBACK;
return -1;
}
}
static int32_t trnPerformExecuteStage(STrans *pTrans) {
int32_t code = trnExecuteRedoActions(pTrans);
if (code == 0) {
pTrans->stage = TRN_STAGE_COMMIT;
return 0;
} else if (code == TSDB_CODE_MND_ACTION_IN_PROGRESS) {
return -1;
} else {
if (pTrans->policy == TRN_POLICY_RETRY) {
pTrans->stage = TRN_STAGE_RETRY;
} else {
pTrans->stage = TRN_STAGE_ROLLBACK;
}
return 0;
}
}
static int32_t trnPerformCommitStage(STrans *pTrans) {
if (trnExecuteCommitLogs(pTrans) == 0) {
pTrans->stage = TRN_STAGE_EXECUTE;
return 0;
} else {
pTrans->stage = TRN_STAGE_ROLLBACK;
return -1;
}
}
static int32_t trnPerformRollbackStage(STrans *pTrans) {
if (trnExecuteCommitLogs(pTrans) == 0) {
pTrans->stage = TRN_STAGE_EXECUTE;
return 0;
} else {
pTrans->stage = TRN_STAGE_ROLLBACK;
return -1;
}
}
static int32_t trnPerformRetryStage(STrans *pTrans) {
if (trnExecuteCommitLogs(pTrans) == 0) {
pTrans->stage = TRN_STAGE_EXECUTE;
return 0;
} else {
pTrans->stage = TRN_STAGE_ROLLBACK;
return -1;
}
}
int32_t mndTransExecute(SSdb *pSdb, int32_t tranId) {
int32_t code = 0;
STrans *pTrans = sdbAcquire(pSdb, SDB_TRANS, &tranId);
if (pTrans == NULL) {
return -1;
}
if (pTrans->stage == TRN_STAGE_PREPARE) {
if (trnPerformPrepareStage(pTrans) != 0) {
sdbRelease(pSdb, pTrans);
return -1;
}
}
if (pTrans->stage == TRN_STAGE_EXECUTE) {
if (trnPerformExecuteStage(pTrans) != 0) {
sdbRelease(pSdb, pTrans);
return -1;
}
}
if (pTrans->stage == TRN_STAGE_COMMIT) {
if (trnPerformCommitStage(pTrans) != 0) {
sdbRelease(pSdb, pTrans);
return -1;
}
}
if (pTrans->stage == TRN_STAGE_ROLLBACK) {
if (trnPerformRollbackStage(pTrans) != 0) {
sdbRelease(pSdb, pTrans);
return -1;
}
}
if (pTrans->stage == TRN_STAGE_RETRY) {
if (trnPerformRetryStage(pTrans) != 0) {
sdbRelease(pSdb, pTrans);
return -1;
}
}
sdbRelease(pSdb, pTrans);
return 0;
}
\ No newline at end of file
...@@ -14,14 +14,13 @@ ...@@ -14,14 +14,13 @@
*/ */
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "mnodeSync.h" #include "mndSync.h"
#include "os.h"
#include "tglobal.h"
#include "tkey.h" #include "tkey.h"
#include "mndTrans.h"
#define SDB_USER_VER 1 #define SDB_USER_VER 1
static SSdbRaw *mnodeUserActionEncode(SUserObj *pUser) { static SSdbRaw *mndUserActionEncode(SUserObj *pUser) {
SSdbRaw *pRaw = sdbAllocRaw(SDB_USER, SDB_USER_VER, sizeof(SAcctObj)); SSdbRaw *pRaw = sdbAllocRaw(SDB_USER, SDB_USER_VER, sizeof(SAcctObj));
if (pRaw == NULL) return NULL; if (pRaw == NULL) return NULL;
...@@ -37,7 +36,7 @@ static SSdbRaw *mnodeUserActionEncode(SUserObj *pUser) { ...@@ -37,7 +36,7 @@ static SSdbRaw *mnodeUserActionEncode(SUserObj *pUser) {
return pRaw; return pRaw;
} }
static SSdbRow *mnodeUserActionDecode(SSdbRaw *pRaw) { static SSdbRow *mndUserActionDecode(SSdbRaw *pRaw) {
int8_t sver = 0; int8_t sver = 0;
if (sdbGetRawSoftVer(pRaw, &sver) != 0) return NULL; if (sdbGetRawSoftVer(pRaw, &sver) != 0) return NULL;
...@@ -61,14 +60,14 @@ static SSdbRow *mnodeUserActionDecode(SSdbRaw *pRaw) { ...@@ -61,14 +60,14 @@ static SSdbRow *mnodeUserActionDecode(SSdbRaw *pRaw) {
return pRow; return pRow;
} }
static int32_t mnodeUserActionInsert(SUserObj *pUser) { static int32_t mndUserActionInsert(SSdb *pSdb, SUserObj *pUser) {
pUser->prohibitDbHash = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); pUser->prohibitDbHash = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
if (pUser->prohibitDbHash == NULL) { if (pUser->prohibitDbHash == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1; return -1;
} }
pUser->pAcct = sdbAcquire(SDB_ACCT, pUser->acct); pUser->pAcct = sdbAcquire(pSdb, SDB_ACCT, pUser->acct);
if (pUser->pAcct == NULL) { if (pUser->pAcct == NULL) {
terrno = TSDB_CODE_MND_ACCT_NOT_EXIST; terrno = TSDB_CODE_MND_ACCT_NOT_EXIST;
return -1; return -1;
...@@ -77,28 +76,28 @@ static int32_t mnodeUserActionInsert(SUserObj *pUser) { ...@@ -77,28 +76,28 @@ static int32_t mnodeUserActionInsert(SUserObj *pUser) {
return 0; return 0;
} }
static int32_t mnodeUserActionDelete(SUserObj *pUser) { static int32_t mndUserActionDelete(SSdb *pSdb, SUserObj *pUser) {
if (pUser->prohibitDbHash) { if (pUser->prohibitDbHash) {
taosHashCleanup(pUser->prohibitDbHash); taosHashCleanup(pUser->prohibitDbHash);
pUser->prohibitDbHash = NULL; pUser->prohibitDbHash = NULL;
} }
if (pUser->acct != NULL) { if (pUser->acct != NULL) {
sdbRelease(pUser->pAcct); sdbRelease(pSdb, pUser->pAcct);
pUser->pAcct = NULL; pUser->pAcct = NULL;
} }
return 0; return 0;
} }
static int32_t mnodeUserActionUpdate(SUserObj *pSrcUser, SUserObj *pDstUser) { static int32_t mndUserActionUpdate(SSdb *pSdb, SUserObj *pSrcUser, SUserObj *pDstUser) {
SUserObj tObj; SUserObj tObj;
int32_t len = (int32_t)((int8_t *)tObj.prohibitDbHash - (int8_t *)&tObj); int32_t len = (int32_t)((int8_t *)tObj.prohibitDbHash - (int8_t *)&tObj);
memcpy(pDstUser, pSrcUser, len); memcpy(pDstUser, pSrcUser, len);
return 0; return 0;
} }
static int32_t mnodeCreateDefaultUser(char *acct, char *user, char *pass) { static int32_t mndCreateDefaultUser(SSdb *pSdb, char *acct, char *user, char *pass) {
SUserObj userObj = {0}; SUserObj userObj = {0};
tstrncpy(userObj.user, user, TSDB_USER_LEN); tstrncpy(userObj.user, user, TSDB_USER_LEN);
tstrncpy(userObj.acct, acct, TSDB_USER_LEN); tstrncpy(userObj.acct, acct, TSDB_USER_LEN);
...@@ -110,30 +109,26 @@ static int32_t mnodeCreateDefaultUser(char *acct, char *user, char *pass) { ...@@ -110,30 +109,26 @@ static int32_t mnodeCreateDefaultUser(char *acct, char *user, char *pass) {
userObj.rootAuth = 1; userObj.rootAuth = 1;
} }
SSdbRaw *pRaw = mnodeUserActionEncode(&userObj); SSdbRaw *pRaw = mndUserActionEncode(&userObj);
if (pRaw == NULL) return -1; if (pRaw == NULL) return -1;
sdbSetRawStatus(pRaw, SDB_STATUS_READY); sdbSetRawStatus(pRaw, SDB_STATUS_READY);
return sdbWrite(pRaw); return sdbWrite(pSdb, pRaw);
} }
static int32_t mnodeCreateDefaultUsers() { static int32_t mndCreateDefaultUsers(SSdb *pSdb) {
if (mnodeCreateDefaultUser(TSDB_DEFAULT_USER, TSDB_DEFAULT_USER, TSDB_DEFAULT_PASS) != 0) { if (mndCreateDefaultUser(pSdb, TSDB_DEFAULT_USER, TSDB_DEFAULT_USER, TSDB_DEFAULT_PASS) != 0) {
return -1; return -1;
} }
if (mnodeCreateDefaultUser(TSDB_DEFAULT_USER, "monitor", tsInternalPass) != 0) { if (mndCreateDefaultUser(pSdb, TSDB_DEFAULT_USER, "_" TSDB_DEFAULT_USER, TSDB_DEFAULT_PASS) != 0) {
return -1;
}
if (mnodeCreateDefaultUser(TSDB_DEFAULT_USER, "_" TSDB_DEFAULT_USER, tsInternalPass) != 0) {
return -1; return -1;
} }
return 0; return 0;
} }
static int32_t mnodeCreateUser(char *acct, char *user, char *pass, SMnodeMsg *pMsg) { static int32_t mndCreateUser(SMnode *pMnode, char *acct, char *user, char *pass, SMnodeMsg *pMsg) {
SUserObj userObj = {0}; SUserObj userObj = {0};
tstrncpy(userObj.user, user, TSDB_USER_LEN); tstrncpy(userObj.user, user, TSDB_USER_LEN);
tstrncpy(userObj.acct, acct, TSDB_USER_LEN); tstrncpy(userObj.acct, acct, TSDB_USER_LEN);
...@@ -142,41 +137,43 @@ static int32_t mnodeCreateUser(char *acct, char *user, char *pass, SMnodeMsg *pM ...@@ -142,41 +137,43 @@ static int32_t mnodeCreateUser(char *acct, char *user, char *pass, SMnodeMsg *pM
userObj.updateTime = userObj.createdTime; userObj.updateTime = userObj.createdTime;
userObj.rootAuth = 0; userObj.rootAuth = 0;
STrans *pTrans = trnCreate(TRN_POLICY_ROLLBACK); STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, pMsg->rpcMsg.handle);
if (pTrans == NULL) return -1; if (pTrans == NULL) return -1;
trnSetRpcHandle(pTrans, pMsg->rpcMsg.handle);
SSdbRaw *pRedoRaw = mnodeUserActionEncode(&userObj); SSdbRaw *pRedoRaw = mndUserActionEncode(&userObj);
if (pRedoRaw == NULL || trnAppendRedoLog(pTrans, pRedoRaw) != 0) { if (pRedoRaw == NULL || mndTransAppendRedolog(pTrans, pRedoRaw) != 0) {
trnDrop(pTrans); mError("failed to append redo log since %s", terrstr());
mndTransDrop(pTrans);
return -1; return -1;
} }
sdbSetRawStatus(pRedoRaw, SDB_STATUS_CREATING); sdbSetRawStatus(pRedoRaw, SDB_STATUS_CREATING);
SSdbRaw *pUndoRaw = mnodeUserActionEncode(&userObj); SSdbRaw *pUndoRaw = mndUserActionEncode(&userObj);
if (pUndoRaw == NULL || trnAppendUndoLog(pTrans, pUndoRaw) != 0) { if (pUndoRaw == NULL || mndTransAppendUndolog(pTrans, pUndoRaw) != 0) {
trnDrop(pTrans); mError("failed to append undo log since %s", terrstr());
mndTransDrop(pTrans);
return -1; return -1;
} }
sdbSetRawStatus(pUndoRaw, SDB_STATUS_DROPPED); sdbSetRawStatus(pUndoRaw, SDB_STATUS_DROPPED);
SSdbRaw *pCommitRaw = mnodeUserActionEncode(&userObj); SSdbRaw *pCommitRaw = mndUserActionEncode(&userObj);
if (pCommitRaw == NULL || trnAppendCommitLog(pTrans, pCommitRaw) != 0) { if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) {
trnDrop(pTrans); mError("failed to append commit log since %s", terrstr());
mndTransDrop(pTrans);
return -1; return -1;
} }
sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY);
if (trnPrepare(pTrans, mnodeSyncPropose) != 0) { if (mndTransPrepare(pTrans, mndSyncPropose) != 0) {
trnDrop(pTrans); mndTransDrop(pTrans);
return -1; return -1;
} }
trnDrop(pTrans); mndTransDrop(pTrans);
return 0; return 0;
} }
static int32_t mnodeProcessCreateUserMsg(SMnodeMsg *pMsg) { static int32_t mndProcessCreateUserMsg(SMnode *pMnode, SMnodeMsg *pMsg) {
SCreateUserMsg *pCreate = pMsg->rpcMsg.pCont; SCreateUserMsg *pCreate = pMsg->rpcMsg.pCont;
if (pCreate->user[0] == 0) { if (pCreate->user[0] == 0) {
...@@ -191,23 +188,23 @@ static int32_t mnodeProcessCreateUserMsg(SMnodeMsg *pMsg) { ...@@ -191,23 +188,23 @@ static int32_t mnodeProcessCreateUserMsg(SMnodeMsg *pMsg) {
return -1; return -1;
} }
SUserObj *pUser = sdbAcquire(SDB_USER, pCreate->user); SUserObj *pUser = sdbAcquire(pMnode->pSdb, SDB_USER, pCreate->user);
if (pUser != NULL) { if (pUser != NULL) {
sdbRelease(pUser); sdbRelease(pMnode->pSdb, pUser);
terrno = TSDB_CODE_MND_USER_ALREADY_EXIST; terrno = TSDB_CODE_MND_USER_ALREADY_EXIST;
mError("user:%s, failed to create since %s", pCreate->user, terrstr()); mError("user:%s, failed to create since %s", pCreate->user, terrstr());
return -1; return -1;
} }
SUserObj *pOperUser = sdbAcquire(SDB_USER, pMsg->conn.user); SUserObj *pOperUser = sdbAcquire(pMnode->pSdb, SDB_USER, pMsg->conn.user);
if (pOperUser == NULL) { if (pOperUser == NULL) {
terrno = TSDB_CODE_MND_NO_USER_FROM_CONN; terrno = TSDB_CODE_MND_NO_USER_FROM_CONN;
mError("user:%s, failed to create since %s", pCreate->user, terrstr()); mError("user:%s, failed to create since %s", pCreate->user, terrstr());
return -1; return -1;
} }
int32_t code = mnodeCreateUser(pOperUser->acct, pCreate->user, pCreate->pass, pMsg); int32_t code = mndCreateUser(pMnode, pOperUser->acct, pCreate->user, pCreate->pass, pMsg);
sdbRelease(pOperUser); sdbRelease(pMnode->pSdb, pOperUser);
if (code != 0) { if (code != 0) {
mError("user:%s, failed to create since %s", pCreate->user, terrstr()); mError("user:%s, failed to create since %s", pCreate->user, terrstr());
...@@ -217,18 +214,19 @@ static int32_t mnodeProcessCreateUserMsg(SMnodeMsg *pMsg) { ...@@ -217,18 +214,19 @@ static int32_t mnodeProcessCreateUserMsg(SMnodeMsg *pMsg) {
return TSDB_CODE_MND_ACTION_IN_PROGRESS; return TSDB_CODE_MND_ACTION_IN_PROGRESS;
} }
int32_t mnodeInitUser() { int32_t mndInitUser(SMnode *pMnode) {
SSdbTable table = {.sdbType = SDB_USER, SSdbTable table = {.sdbType = SDB_USER,
.keyType = SDB_KEY_BINARY, .keyType = SDB_KEY_BINARY,
.deployFp = (SdbDeployFp)mnodeCreateDefaultUsers, .deployFp = (SdbDeployFp)mndCreateDefaultUsers,
.encodeFp = (SdbEncodeFp)mnodeUserActionEncode, .encodeFp = (SdbEncodeFp)mndUserActionEncode,
.decodeFp = (SdbDecodeFp)mnodeUserActionDecode, .decodeFp = (SdbDecodeFp)mndUserActionDecode,
.insertFp = (SdbInsertFp)mnodeUserActionInsert, .insertFp = (SdbInsertFp)mndUserActionInsert,
.updateFp = (SdbUpdateFp)mnodeUserActionUpdate, .updateFp = (SdbUpdateFp)mndUserActionUpdate,
.deleteFp = (SdbDeleteFp)mnodeUserActionDelete}; .deleteFp = (SdbDeleteFp)mndUserActionDelete};
sdbSetTable(table);
return 0; mndSetMsgHandle(pMnode, TSDB_MSG_TYPE_CREATE_USER, mndProcessCreateUserMsg);
return sdbSetTable(pMnode->pSdb, table);
} }
void mnodeCleanupUser() {} void mndCleanupUser(SMnode *pMnode) {}
\ No newline at end of file \ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mndInt.h"
int32_t mndInitVgroup(SMnode *pMnode) { return 0; }
void mndCleanupVgroup(SMnode *pMnode) {}
\ No newline at end of file
...@@ -14,380 +14,409 @@ ...@@ -14,380 +14,409 @@
*/ */
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "os.h" #include "mndAcct.h"
#include "tglobal.h" #include "mndAuth.h"
#include "tstep.h" #include "mndBalance.h"
#include "tqueue.h" #include "mndCluster.h"
#include "mnodeAcct.h" #include "mndDb.h"
#include "mnodeAuth.h" #include "mndDnode.h"
#include "mnodeBalance.h" #include "mndFunc.h"
#include "mnodeCluster.h" #include "mndMnode.h"
#include "mnodeDb.h" #include "mndOper.h"
#include "mnodeDnode.h" #include "mndProfile.h"
#include "mnodeFunc.h" #include "mndShow.h"
#include "mnodeMnode.h" #include "mndStable.h"
#include "mnodeOper.h" #include "mndSync.h"
#include "mnodeProfile.h" #include "mndTelem.h"
#include "mnodeShow.h" #include "mndTrans.h"
#include "mnodeStable.h" #include "mndUser.h"
#include "mnodeSync.h" #include "mndVgroup.h"
#include "mnodeTelem.h"
#include "mnodeUser.h" int32_t mndGetDnodeId(SMnode *pMnode) {
#include "mnodeVgroup.h" if (pMnode != NULL) {
return pMnode->dnodeId;
static struct {
int32_t dnodeId;
int64_t clusterId;
tmr_h timer;
SSteps *pInitSteps;
SSteps *pStartSteps;
SMnodePara para;
MnodeRpcFp msgFp[TSDB_MSG_TYPE_MAX];
} tsMint;
int32_t mnodeGetDnodeId() { return tsMint.para.dnodeId; }
int64_t mnodeGetClusterId() { return tsMint.para.clusterId; }
void mnodeSendMsgToDnode(struct SEpSet *epSet, struct SRpcMsg *rpcMsg) { (*tsMint.para.SendMsgToDnode)(epSet, rpcMsg); }
void mnodeSendMsgToMnode(struct SRpcMsg *rpcMsg) { return (*tsMint.para.SendMsgToMnode)(rpcMsg); }
void mnodeSendRedirectMsg(struct SRpcMsg *rpcMsg, bool forShell) { (*tsMint.para.SendRedirectMsg)(rpcMsg, forShell); }
static int32_t mnodeInitTimer() {
if (tsMint.timer == NULL) {
tsMint.timer = taosTmrInit(tsMaxShellConns, 200, 3600000, "MND");
}
if (tsMint.timer == NULL) {
return -1;
} }
return 0; return -1;
} }
static void mnodeCleanupTimer() { int64_t mndGetClusterId(SMnode *pMnode) {
if (tsMint.timer != NULL) { if (pMnode != NULL) {
taosTmrCleanUp(tsMint.timer); return pMnode->clusterId;
tsMint.timer = NULL;
} }
return -1;
} }
tmr_h mnodeGetTimer() { return tsMint.timer; } tmr_h mndGetTimer(SMnode *pMnode) {
if (pMnode != NULL) {
return pMnode->timer;
}
static int32_t mnodeSetPara(SMnodePara para) { return NULL;
tsMint.para = para; }
if (tsMint.para.SendMsgToDnode == NULL) { void mndSendMsgToDnode(SMnode *pMnode, SEpSet *pEpSet, SRpcMsg *pMsg) {
terrno = TSDB_CODE_MND_APP_ERROR; if (pMnode != NULL && pMnode->sendMsgToDnodeFp != NULL) {
return -1; (*pMnode->sendMsgToDnodeFp)(pMnode->pDnode, pEpSet, pMsg);
} }
}
if (tsMint.para.SendMsgToMnode == NULL) { void mndSendMsgToMnode(SMnode *pMnode, SRpcMsg *pMsg) {
terrno = TSDB_CODE_MND_APP_ERROR; if (pMnode != NULL && pMnode->sendMsgToMnodeFp != NULL) {
return -1; (*pMnode->sendMsgToMnodeFp)(pMnode->pDnode, pMsg);
} }
}
if (tsMint.para.SendRedirectMsg == NULL) { void mndSendRedirectMsg(SMnode *pMnode, SRpcMsg *pMsg) {
terrno = TSDB_CODE_MND_APP_ERROR; if (pMnode != NULL && pMnode->sendRedirectMsgFp != NULL) {
return -1; (*pMnode->sendRedirectMsgFp)(pMnode->pDnode, pMsg);
} }
}
if (tsMint.para.PutMsgIntoApplyQueue == NULL) { static int32_t mndInitTimer(SMnode *pMnode) {
terrno = TSDB_CODE_MND_APP_ERROR; if (pMnode->timer == NULL) {
pMnode->timer = taosTmrInit(5000, 200, 3600000, "MND");
}
if (pMnode->timer == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1; return -1;
} }
if (tsMint.para.dnodeId < 0) { return 0;
terrno = TSDB_CODE_MND_APP_ERROR; }
static void mndCleanupTimer(SMnode *pMnode) {
if (pMnode->timer != NULL) {
taosTmrCleanUp(pMnode->timer);
pMnode->timer = NULL;
}
}
static int32_t mnodeCreateDir(SMnode *pMnode, const char *path) {
pMnode->path = strdup(path);
if (pMnode->path == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1; return -1;
} }
if (tsMint.para.clusterId < 0) { if (taosMkDir(pMnode->path) != 0) {
terrno = TSDB_CODE_MND_APP_ERROR; terrno = TAOS_SYSTEM_ERROR(errno);
return -1; return -1;
} }
return 0; return 0;
} }
static int32_t mnodeAllocInitSteps() { static int32_t mndInitSdb(SMnode *pMnode) {
struct SSteps *steps = taosStepInit(16, NULL); SSdbOpt opt = {0};
if (steps == NULL) return -1; opt.path = pMnode->path;
if (taosStepAdd(steps, "mnode-trans", trnInit, trnCleanup) != 0) return -1; pMnode->pSdb = sdbInit(&opt);
if (taosStepAdd(steps, "mnode-cluster", mnodeInitCluster, mnodeCleanupCluster) != 0) return -1; if (pMnode->pSdb == NULL) {
if (taosStepAdd(steps, "mnode-dnode", mnodeInitDnode, mnodeCleanupDnode) != 0) return -1; return -1;
if (taosStepAdd(steps, "mnode-mnode", mnodeInitMnode, mnodeCleanupMnode) != 0) return -1; }
if (taosStepAdd(steps, "mnode-acct", mnodeInitAcct, mnodeCleanupAcct) != 0) return -1;
if (taosStepAdd(steps, "mnode-auth", mnodeInitAuth, mnodeCleanupAuth) != 0) return -1;
if (taosStepAdd(steps, "mnode-user", mnodeInitUser, mnodeCleanupUser) != 0) return -1;
if (taosStepAdd(steps, "mnode-db", mnodeInitDb, mnodeCleanupDb) != 0) return -1;
if (taosStepAdd(steps, "mnode-vgroup", mnodeInitVgroup, mnodeCleanupVgroup) != 0) return -1;
if (taosStepAdd(steps, "mnode-stable", mnodeInitStable, mnodeCleanupStable) != 0) return -1;
if (taosStepAdd(steps, "mnode-func", mnodeInitFunc, mnodeCleanupFunc) != 0) return -1;
if (taosStepAdd(steps, "mnode-sdb", sdbInit, sdbCleanup) != 0) return -1;
tsMint.pInitSteps = steps;
return 0; return 0;
} }
static int32_t mnodeAllocStartSteps() { static int32_t mndDeploySdb(SMnode *pMnode) { return sdbDeploy(pMnode->pSdb); }
struct SSteps *steps = taosStepInit(8, NULL); static int32_t mndReadSdb(SMnode *pMnode) { return sdbReadFile(pMnode->pSdb); }
if (steps == NULL) return -1;
static void mndCleanupSdb(SMnode *pMnode) {
if (pMnode->pSdb) {
sdbCleanup(pMnode->pSdb);
pMnode->pSdb = NULL;
}
}
taosStepAdd(steps, "mnode-timer", mnodeInitTimer, NULL); static int32_t mndAllocStep(SMnode *pMnode, char *name, MndInitFp initFp, MndCleanupFp cleanupFp) {
taosStepAdd(steps, "mnode-sdb-file", sdbOpen, sdbClose); SMnodeStep step = {0};
taosStepAdd(steps, "mnode-balance", mnodeInitBalance, mnodeCleanupBalance); step.name = name;
taosStepAdd(steps, "mnode-profile", mnodeInitProfile, mnodeCleanupProfile); step.initFp = initFp;
taosStepAdd(steps, "mnode-show", mnodeInitShow, mnodeCleanUpShow); step.cleanupFp = cleanupFp;
taosStepAdd(steps, "mnode-sync", mnodeInitSync, mnodeCleanUpSync); if (taosArrayPush(pMnode->pSteps, &step) == NULL) {
taosStepAdd(steps, "mnode-telem", mnodeInitTelem, mnodeCleanupTelem); terrno = TSDB_CODE_OUT_OF_MEMORY;
taosStepAdd(steps, "mnode-timer", NULL, mnodeCleanupTimer); return -1;
}
tsMint.pStartSteps = steps;
return 0; return 0;
} }
int32_t mnodeInit(SMnodePara para) { static int32_t mndInitSteps(SMnode *pMnode) {
if (mnodeSetPara(para) != 0) { if (mndAllocStep(pMnode, "mnode-sdb", mndInitSdb, mndCleanupSdb) != 0) return -1;
mError("failed to init mnode para since %s", terrstr()); if (mndAllocStep(pMnode, "mnode-trans", mndInitTrans, mndCleanupTrans) != 0) return -1;
return -1; if (mndAllocStep(pMnode, "mnode-cluster", mndInitCluster, mndCleanupCluster) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-dnode", mndInitDnode, mndCleanupDnode) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-mnode", mndInitMnode, mndCleanupMnode) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-acct", mndInitAcct, mndCleanupAcct) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-auth", mndInitAuth, mndCleanupAuth) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-user", mndInitUser, mndCleanupUser) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-db", mndInitDb, mndCleanupDb) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-vgroup", mndInitVgroup, mndCleanupVgroup) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-stable", mndInitStable, mndCleanupStable) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-func", mndInitFunc, mndCleanupFunc) != 0) return -1;
if (pMnode->clusterId <= 0) {
if (mndAllocStep(pMnode, "mnode-sdb-deploy", mndDeploySdb, NULL) != 0) return -1;
} else {
if (mndAllocStep(pMnode, "mnode-sdb-read", mndReadSdb, NULL) != 0) return -1;
} }
if (mndAllocStep(pMnode, "mnode-timer", mndInitTimer, NULL) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-balance", mndInitBalance, mndCleanupBalance) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-profile", mndInitProfile, mndCleanupProfile) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-show", mndInitShow, mndCleanupShow) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-sync", mndInitSync, mndCleanupSync) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-telem", mndInitTelem, mndCleanupTelem) != 0) return -1;
if (mndAllocStep(pMnode, "mnode-timer", NULL, mndCleanupTimer) != 0) return -1;
if (mnodeAllocInitSteps() != 0) { return 0;
mError("failed to alloc init steps since %s", terrstr()); }
return -1;
static void mndCleanupSteps(SMnode *pMnode, int32_t pos) {
if (pMnode->pSteps == NULL) return;
if (pos == -1) {
pos = taosArrayGetSize(pMnode->pSteps);
} }
if (mnodeAllocStartSteps() != 0) { for (int32_t s = pos; s >= 0; s--) {
mError("failed to alloc start steps since %s", terrstr()); SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
return -1; mDebug("step:%s will cleanup", pStep->name);
if (pStep->cleanupFp != NULL) {
(*pStep->cleanupFp)(pMnode);
}
} }
return taosStepExec(tsMint.pInitSteps); taosArrayClear(pMnode->pSteps);
pMnode->pSteps = NULL;
} }
void mnodeCleanup() { taosStepCleanup(tsMint.pInitSteps); } static int32_t mndExecSteps(SMnode *pMnode) {
int32_t size = taosArrayGetSize(pMnode->pSteps);
for (int32_t pos = 0; pos < size; pos++) {
SMnodeStep *pStep = taosArrayGet(pMnode->pSteps, pos);
if (pStep->initFp == NULL) continue;
// (*pMnode->reportProgress)(pStep->name, "start initialize");
int32_t mnodeDeploy(SMnodeCfg *pCfg) { if ((*pStep->initFp)(pMnode) != 0) {
if (tsMint.para.dnodeId <= 0 && tsMint.para.clusterId <= 0) { mError("step:%s exec failed since %s, start to cleanup", pStep->name, terrstr());
if (sdbDeploy() != 0) { mndCleanupSteps(pMnode, pos);
mError("failed to deploy sdb since %s", terrstr());
return -1; return -1;
} else {
mDebug("step:%s is initialized", pStep->name);
} }
// (*pMnode->reportProgress)(pStep->name, "initialize completed");
} }
mDebug("mnode is deployed");
return 0; return 0;
} }
void mnodeUnDeploy() { sdbUnDeploy(); } static int32_t mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
pMnode->dnodeId = pOption->dnodeId;
int32_t mnodeStart(SMnodeCfg *pCfg) { return taosStepExec(tsMint.pStartSteps); } pMnode->clusterId = pOption->clusterId;
pMnode->replica = pOption->replica;
pMnode->selfIndex = pOption->selfIndex;
memcpy(&pMnode->replicas, pOption->replicas, sizeof(SReplica) * TSDB_MAX_REPLICA);
pMnode->pDnode = pOption->pDnode;
pMnode->putMsgToApplyMsgFp = pOption->putMsgToApplyMsgFp;
pMnode->sendMsgToDnodeFp = pOption->sendMsgToDnodeFp;
pMnode->sendMsgToMnodeFp = pOption->sendMsgToMnodeFp;
pMnode->sendRedirectMsgFp = pOption->sendRedirectMsgFp;
if (pMnode->sendMsgToDnodeFp == NULL || pMnode->sendMsgToMnodeFp == NULL || pMnode->sendRedirectMsgFp == NULL ||
pMnode->putMsgToApplyMsgFp == NULL) {
terrno = TSDB_CODE_MND_APP_ERROR;
return -1;
}
int32_t mnodeAlter(SMnodeCfg *pCfg) { return 0; } if (pMnode->dnodeId < 0 || pMnode->clusterId < 0) {
terrno = TSDB_CODE_MND_APP_ERROR;
return -1;
}
void mnodeStop() { taosStepCleanup(tsMint.pStartSteps); } return 0;
}
int32_t mnodeGetLoad(SMnodeLoad *pLoad) { return 0; } SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
mDebug("start to open mnode in %s", path);
SMnodeMsg *mnodeInitMsg(SRpcMsg *pRpcMsg) { SMnode *pMnode = calloc(1, sizeof(SMnode));
SMnodeMsg *pMsg = taosAllocateQitem(sizeof(SMnodeMsg)); if (pMnode == NULL) {
if (pMsg == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
mError("failed to open mnode since %s", terrstr());
return NULL; return NULL;
} }
if (rpcGetConnInfo(pRpcMsg->handle, &pMsg->conn) != 0) { pMnode->pSteps = taosArrayInit(24, sizeof(SMnodeStep));
mnodeCleanupMsg(pMsg); if (pMnode->pSteps == NULL) {
mError("can not get user from conn:%p", pMsg->rpcMsg.handle); free(pMnode);
terrno = TSDB_CODE_MND_NO_USER_FROM_CONN; terrno = TSDB_CODE_OUT_OF_MEMORY;
mError("failed to open mnode since %s", terrstr());
return NULL; return NULL;
} }
pMsg->rpcMsg = *pRpcMsg; int32_t code = mnodeCreateDir(pMnode, path);
pMsg->createdTime = taosGetTimestampSec(); if (mnodeCreateDir(pMnode, path) != 0) {
mError("failed to open mnode since %s", tstrerror(code));
return pMsg; mndClose(pMnode);
} terrno = code;
return NULL;
void mnodeCleanupMsg(SMnodeMsg *pMsg) {
if (pMsg->pUser != NULL) {
sdbRelease(pMsg->pUser);
} }
taosFreeQitem(pMsg); code = mndSetOptions(pMnode, pOption);
} if (code != 0) {
mError("failed to open mnode since %s", tstrerror(code));
mndClose(pMnode);
terrno = code;
return NULL;
}
static void mnodeProcessRpcMsg(SMnodeMsg *pMsg) { code = mndInitSteps(pMnode);
int32_t msgType = pMsg->rpcMsg.msgType; if (code != 0) {
mError("failed to open mnode since %s", tstrerror(code));
mndClose(pMnode);
terrno = code;
return NULL;
}
if (tsMint.msgFp[msgType] == NULL) { code = mndExecSteps(pMnode);
if (code != 0) {
mError("failed to open mnode since %s", tstrerror(code));
mndClose(pMnode);
terrno = code;
return NULL;
} }
(*tsMint.msgFp[msgType])(pMsg); mDebug("mnode open successfully ");
return pMnode;
} }
void mnodeSetMsgFp(int32_t msgType, MnodeRpcFp fp) { void mndClose(SMnode *pMnode) {
if (msgType > 0 || msgType < TSDB_MSG_TYPE_MAX) { if (pMnode != NULL) {
tsMint.msgFp[msgType] = fp; mDebug("start to close mnode");
mndCleanupSteps(pMnode, -1);
tfree(pMnode->path);
tfree(pMnode);
mDebug("mnode is closed");
} }
} }
void mnodeProcessMsg(SMnodeMsg *pMsg, EMnMsgType msgType) { int32_t mndAlter(SMnode *pMnode, const SMnodeOpt *pOption) {
if (!mnodeIsMaster()) { mDebug("start to alter mnode");
mnodeSendRedirectMsg(&pMsg->rpcMsg, true); mDebug("mnode is altered");
mnodeCleanupMsg(pMsg); return 0;
return;
}
switch (msgType) {
case MN_MSG_TYPE_READ:
case MN_MSG_TYPE_WRITE:
case MN_MSG_TYPE_SYNC:
mnodeProcessRpcMsg(pMsg);
break;
case MN_MSG_TYPE_APPLY:
break;
default:
break;
}
} }
#if 0 void mndDestroy(const char *path) {
mDebug("start to destroy mnode at %s", path);
static void mnodeProcessWriteReq(SMnodeMsg *pMsg, void *unused) { taosRemoveDir(path);
int32_t msgType = pMsg->rpcMsg.msgType; mDebug("mnode is destroyed");
void *ahandle = pMsg->rpcMsg.ahandle; }
int32_t code = 0;
if (pMsg->rpcMsg.pCont == NULL) {
mError("msg:%p, app:%p type:%s content is null", pMsg, ahandle, taosMsg[msgType]);
code = TSDB_CODE_MND_INVALID_MSG_LEN;
goto PROCESS_WRITE_REQ_END;
}
if (!mnodeIsMaster()) { int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) {
SMnRsp *rpcRsp = &pMsg->rpcRsp; pLoad->numOfDnode = 0;
SEpSet *epSet = rpcMallocCont(sizeof(SEpSet)); pLoad->numOfMnode = 0;
mnodeGetMnodeEpSetForShell(epSet, true); pLoad->numOfVgroup = 0;
rpcRsp->rsp = epSet; pLoad->numOfDatabase = 0;
rpcRsp->len = sizeof(SEpSet); pLoad->numOfSuperTable = 0;
pLoad->numOfChildTable = 0;
pLoad->numOfColumn = 0;
pLoad->totalPoints = 0;
pLoad->totalStorage = 0;
pLoad->compStorage = 0;
mDebug("msg:%p, app:%p type:%s in write queue, is redirected, numOfEps:%d inUse:%d", pMsg, ahandle, return 0;
taosMsg[msgType], epSet->numOfEps, epSet->inUse); }
code = TSDB_CODE_RPC_REDIRECT; SMnodeMsg *mndInitMsg(SMnode *pMnode, SRpcMsg *pRpcMsg) {
goto PROCESS_WRITE_REQ_END; SMnodeMsg *pMsg = taosAllocateQitem(sizeof(SMnodeMsg));
if (pMsg == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
} }
if (tsMworker.writeMsgFp[msgType] == NULL) { if (rpcGetConnInfo(pRpcMsg->handle, &pMsg->conn) != 0) {
mError("msg:%p, app:%p type:%s not processed", pMsg, ahandle, taosMsg[msgType]); mndCleanupMsg(pMsg);
code = TSDB_CODE_MND_MSG_NOT_PROCESSED; mError("can not get user from conn:%p", pMsg->rpcMsg.handle);
goto PROCESS_WRITE_REQ_END; terrno = TSDB_CODE_MND_NO_USER_FROM_CONN;
return NULL;
} }
code = (*tsMworker.writeMsgFp[msgType])(pMsg); pMsg->rpcMsg = *pRpcMsg;
pMsg->createdTime = taosGetTimestampSec();
PROCESS_WRITE_REQ_END: return pMsg;
mnodeSendRsp(pMsg, code);
} }
static void mnodeProcessReadReq(SMnodeMsg *pMsg, void *unused) { void mndCleanupMsg(SMnodeMsg *pMsg) {
int32_t msgType = pMsg->rpcMsg.msgType; if (pMsg->pUser != NULL) {
void *ahandle = pMsg->rpcMsg.ahandle; sdbRelease(pMsg->pMnode->pSdb, pMsg->pUser);
int32_t code = 0;
if (pMsg->rpcMsg.pCont == NULL) {
mError("msg:%p, app:%p type:%s in mread queue, content is null", pMsg, ahandle, taosMsg[msgType]);
code = TSDB_CODE_MND_INVALID_MSG_LEN;
goto PROCESS_READ_REQ_END;
}
if (!mnodeIsMaster()) {
SMnRsp *rpcRsp = &pMsg->rpcRsp;
SEpSet *epSet = rpcMallocCont(sizeof(SEpSet));
if (!epSet) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto PROCESS_READ_REQ_END;
}
mnodeGetMnodeEpSetForShell(epSet, true);
rpcRsp->rsp = epSet;
rpcRsp->len = sizeof(SEpSet);
mDebug("msg:%p, app:%p type:%s in mread queue is redirected, numOfEps:%d inUse:%d", pMsg, ahandle, taosMsg[msgType],
epSet->numOfEps, epSet->inUse);
code = TSDB_CODE_RPC_REDIRECT;
goto PROCESS_READ_REQ_END;
}
if (tsMworker.readMsgFp[msgType] == NULL) {
mError("msg:%p, app:%p type:%s in mread queue, not processed", pMsg, ahandle, taosMsg[msgType]);
code = TSDB_CODE_MND_MSG_NOT_PROCESSED;
goto PROCESS_READ_REQ_END;
} }
mTrace("msg:%p, app:%p type:%s will be processed in mread queue", pMsg, ahandle, taosMsg[msgType]); taosFreeQitem(pMsg);
code = (*tsMworker.readMsgFp[msgType])(pMsg);
PROCESS_READ_REQ_END:
mnodeSendRsp(pMsg, code);
} }
static void mnodeProcessPeerReq(SMnodeMsg *pMsg, void *unused) { void mndSendRsp(SMnodeMsg *pMsg, int32_t code) {}
static void mndProcessRpcMsg(SMnodeMsg *pMsg) {
SMnode *pMnode = pMsg->pMnode;
int32_t code = 0;
int32_t msgType = pMsg->rpcMsg.msgType; int32_t msgType = pMsg->rpcMsg.msgType;
void *ahandle = pMsg->rpcMsg.ahandle; void *ahandle = pMsg->rpcMsg.ahandle;
int32_t code = 0; bool isReq = (msgType % 2 == 1);
if (pMsg->rpcMsg.pCont == NULL) { if (isReq && !mndIsMaster(pMnode)) {
mError("msg:%p, ahandle:%p type:%s in mpeer queue, content is null", pMsg, ahandle, taosMsg[msgType]); code = TSDB_CODE_APP_NOT_READY;
code = TSDB_CODE_MND_INVALID_MSG_LEN; goto PROCESS_RPC_END;
goto PROCESS_PEER_REQ_END;
} }
if (!mnodeIsMaster()) { if (isReq && pMsg->rpcMsg.pCont == NULL) {
SMnRsp *rpcRsp = &pMsg->rpcRsp; mError("msg:%p, app:%p type:%s content is null", pMsg, ahandle, taosMsg[msgType]);
SEpSet *epSet = rpcMallocCont(sizeof(SEpSet)); code = TSDB_CODE_MND_INVALID_MSG_LEN;
mnodeGetMnodeEpSetForPeer(epSet, true); goto PROCESS_RPC_END;
rpcRsp->rsp = epSet; }
rpcRsp->len = sizeof(SEpSet);
mDebug("msg:%p, ahandle:%p type:%s in mpeer queue is redirected, numOfEps:%d inUse:%d", pMsg, ahandle, MndMsgFp fp = pMnode->msgFp[msgType];
taosMsg[msgType], epSet->numOfEps, epSet->inUse); if (fp == NULL) {
mError("msg:%p, app:%p type:%s not processed", pMsg, ahandle, taosMsg[msgType]);
code = TSDB_CODE_MSG_NOT_PROCESSED;
goto PROCESS_RPC_END;
}
code = TSDB_CODE_RPC_REDIRECT; code = (*fp)(pMnode, pMsg);
goto PROCESS_PEER_REQ_END; if (code != 0) {
code = terrno;
mError("msg:%p, app:%p type:%s failed to process since %s", pMsg, ahandle, taosMsg[msgType], terrstr());
goto PROCESS_RPC_END;
} }
if (tsMworker.peerReqFp[msgType] == NULL) { PROCESS_RPC_END:
mError("msg:%p, ahandle:%p type:%s in mpeer queue, not processed", pMsg, ahandle, taosMsg[msgType]); if (isReq) {
code = TSDB_CODE_MND_MSG_NOT_PROCESSED; if (code == TSDB_CODE_APP_NOT_READY) {
goto PROCESS_PEER_REQ_END; mndSendRedirectMsg(pMnode, &pMsg->rpcMsg);
} else if (code != 0) {
SRpcMsg rspMsg = {.handle = pMsg->rpcMsg.handle, .code = code};
rpcSendResponse(&rspMsg);
} else {
}
} }
code = (*tsMworker.peerReqFp[msgType])(pMsg); mndCleanupMsg(pMsg);
}
PROCESS_PEER_REQ_END: void mndSetMsgHandle(SMnode *pMnode, int32_t msgType, MndMsgFp fp) {
mnodeSendRsp(pMsg, code); if (msgType >= 0 && msgType < TSDB_MSG_TYPE_MAX) {
pMnode->msgFp[msgType] = fp;
}
} }
static void mnodeProcessPeerRsp(SMnodeMsg *pMsg, void *unused) { void mndProcessReadMsg(SMnodeMsg *pMsg) { mndProcessRpcMsg(pMsg); }
int32_t msgType = pMsg->rpcMsg.msgType;
SRpcMsg *pRpcMsg = &pMsg->rpcMsg;
if (!mnodeIsMaster()) { void mndProcessWriteMsg(SMnodeMsg *pMsg) { mndProcessRpcMsg(pMsg); }
mError("msg:%p, ahandle:%p type:%s not processed for not master", pRpcMsg, pRpcMsg->ahandle, taosMsg[msgType]);
mnodeCleanupMsg2(pMsg);
}
if (tsMworker.peerRspFp[msgType]) { void mndProcessSyncMsg(SMnodeMsg *pMsg) { mndProcessRpcMsg(pMsg); }
(*tsMworker.peerRspFp[msgType])(pRpcMsg);
} else {
mError("msg:%p, ahandle:%p type:%s is not processed", pRpcMsg, pRpcMsg->ahandle, taosMsg[msgType]);
}
mnodeCleanupMsg2(pMsg); void mndProcessApplyMsg(SMnodeMsg *pMsg) {}
}
#endif
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mnodeAuth.h"
int32_t mnodeInitAuth() { return 0; }
void mnodeCleanupAuth() {}
int32_t mnodeRetriveAuth(char *user, char *spi, char *encrypt, char *secret, char *ckey) {
if (strcmp(user, TSDB_NETTEST_USER) == 0) {
char pass[32] = {0};
taosEncryptPass((uint8_t *)user, strlen(user), pass);
*spi = 0;
*encrypt = 0;
*ckey = 0;
memcpy(secret, pass, TSDB_KEY_LEN);
mDebug("nettest user is authorized");
return 0;
}
return 0;
}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mnodeInt.h"
int32_t mnodeInitFunc() { return 0; }
void mnodeCleanupFunc() {}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mnodeInt.h"
int32_t mnodeInitOper() { return 0; }
void mnodeCleanupOper() {}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mnodeInt.h"
int32_t mnodeInitProfile() { return 0; }
void mnodeCleanupProfile() {}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mnodeInt.h"
int32_t mnodeInitShow() { return 0; }
void mnodeCleanUpShow() {}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mnodeInt.h"
int32_t mnodeInitStable() { return 0; }
void mnodeCleanupStable() {}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "mnodeInt.h"
int32_t mnodeInitVgroup() { return 0; }
void mnodeCleanupVgroup() {}
\ No newline at end of file
...@@ -3,7 +3,7 @@ add_library(sdb ${MNODE_SRC}) ...@@ -3,7 +3,7 @@ add_library(sdb ${MNODE_SRC})
target_include_directories( target_include_directories(
sdb sdb
PUBLIC "${CMAKE_SOURCE_DIR}/include/dnode/mnode/sdb" PUBLIC "${CMAKE_SOURCE_DIR}/include/dnode/mnode/sdb"
private "${CMAKE_CURRENT_SOURCE_DIR}/inc" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"
) )
target_link_libraries( target_link_libraries(
sdb sdb
......
...@@ -37,7 +37,7 @@ extern "C" { ...@@ -37,7 +37,7 @@ extern "C" {
#define SDB_MAX_SIZE (32 * 1024) #define SDB_MAX_SIZE (32 * 1024)
typedef struct SSdbRaw { typedef struct SSdbRaw {
int8_t sdb; int8_t type;
int8_t sver; int8_t sver;
int8_t status; int8_t status;
int8_t reserved; int8_t reserved;
...@@ -46,13 +46,13 @@ typedef struct SSdbRaw { ...@@ -46,13 +46,13 @@ typedef struct SSdbRaw {
} SSdbRaw; } SSdbRaw;
typedef struct SSdbRow { typedef struct SSdbRow {
ESdbType sdb; ESdbType type;
ESdbStatus status; ESdbStatus status;
int32_t refCount; int32_t refCount;
char pObj[]; char pObj[];
} SSdbRow; } SSdbRow;
typedef struct { typedef struct SSdb {
char *currDir; char *currDir;
char *syncDir; char *syncDir;
char *tmpDir; char *tmpDir;
...@@ -67,11 +67,10 @@ typedef struct { ...@@ -67,11 +67,10 @@ typedef struct {
SdbDeployFp deployFps[SDB_MAX]; SdbDeployFp deployFps[SDB_MAX];
SdbEncodeFp encodeFps[SDB_MAX]; SdbEncodeFp encodeFps[SDB_MAX];
SdbDecodeFp decodeFps[SDB_MAX]; SdbDecodeFp decodeFps[SDB_MAX];
} SSdbMgr; } SSdb;
extern SSdbMgr tsSdb; int32_t sdbWriteFile(SSdb *pSdb);
int32_t sdbWriteRaw(SSdb *pSdb, SSdbRaw *pRaw);
int32_t sdbWriteImp(SSdbRaw *pRaw);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -15,77 +15,100 @@ ...@@ -15,77 +15,100 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "sdbInt.h" #include "sdbInt.h"
#include "tglobal.h"
SSdbMgr tsSdb = {0}; SSdb *sdbInit(SSdbOpt *pOption) {
mDebug("start to init sdb in %s", pOption->path);
int32_t sdbInit() { SSdb *pSdb = calloc(1, sizeof(SSdb));
char path[PATH_MAX + 100]; if (pSdb == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
snprintf(path, PATH_MAX + 100, "%s%scur%s", tsMnodeDir, TD_DIRSEP, TD_DIRSEP); mError("failed to init sdb since %s", terrstr());
tsSdb.currDir = strdup(path); return NULL;
}
snprintf(path, PATH_MAX + 100, "%s%ssync%s", tsMnodeDir, TD_DIRSEP, TD_DIRSEP);
tsSdb.syncDir = strdup(path);
snprintf(path, PATH_MAX + 100, "%s%stmp%s", tsMnodeDir, TD_DIRSEP, TD_DIRSEP);
tsSdb.tmpDir = strdup(path);
if (tsSdb.currDir == NULL || tsSdb.currDir == NULL || tsSdb.currDir == NULL) { char path[PATH_MAX + 100];
return TSDB_CODE_OUT_OF_MEMORY; snprintf(path, PATH_MAX + 100, "%s", pOption->path);
pSdb->currDir = strdup(path);
snprintf(path, PATH_MAX + 100, "%s%ssync", pOption->path, TD_DIRSEP);
pSdb->syncDir = strdup(path);
snprintf(path, PATH_MAX + 100, "%s%stmp", pOption->path, TD_DIRSEP);
pSdb->tmpDir = strdup(path);
if (pSdb->currDir == NULL || pSdb->currDir == NULL || pSdb->currDir == NULL) {
sdbCleanup(pSdb);
terrno = TSDB_CODE_OUT_OF_MEMORY;
mError("failed to init sdb since %s", terrstr());
return NULL;
} }
for (int32_t i = 0; i < SDB_MAX; ++i) { for (int32_t i = 0; i < SDB_MAX; ++i) {
int32_t type; taosInitRWLatch(&pSdb->locks[i]);
if (tsSdb.keyTypes[i] == SDB_KEY_INT32) {
type = TSDB_DATA_TYPE_INT;
} else if (tsSdb.keyTypes[i] == SDB_KEY_INT64) {
type = TSDB_DATA_TYPE_BIGINT;
} else {
type = TSDB_DATA_TYPE_BINARY;
}
SHashObj *hash = taosHashInit(64, taosGetDefaultHashFunction(type), true, HASH_NO_LOCK);
if (hash == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
tsSdb.hashObjs[i] = hash;
taosInitRWLatch(&tsSdb.locks[i]);
} }
return 0; mDebug("sdb init successfully");
return pSdb;
} }
void sdbCleanup() { void sdbCleanup(SSdb *pSdb) {
if (tsSdb.currDir != NULL) { mDebug("start to cleanup sdb");
tfree(tsSdb.currDir);
if (pSdb->curVer != pSdb->lastCommitVer) {
mDebug("write sdb file for curVer:% " PRId64 " and lastVer:%" PRId64, pSdb->curVer, pSdb->lastCommitVer);
sdbWriteFile(pSdb);
} }
if (tsSdb.syncDir != NULL) { if (pSdb->currDir != NULL) {
tfree(tsSdb.syncDir); tfree(pSdb->currDir);
} }
if (tsSdb.tmpDir != NULL) { if (pSdb->syncDir != NULL) {
tfree(tsSdb.tmpDir); tfree(pSdb->syncDir);
}
if (pSdb->tmpDir != NULL) {
tfree(pSdb->tmpDir);
} }
for (int32_t i = 0; i < SDB_MAX; ++i) { for (int32_t i = 0; i < SDB_MAX; ++i) {
SHashObj *hash = tsSdb.hashObjs[i]; SHashObj *hash = pSdb->hashObjs[i];
if (hash != NULL) { if (hash != NULL) {
taosHashClear(hash);
taosHashCleanup(hash); taosHashCleanup(hash);
} }
tsSdb.hashObjs[i] = NULL; pSdb->hashObjs[i] = NULL;
} }
mDebug("sdb is cleaned up");
} }
void sdbSetTable(SSdbTable table) { int32_t sdbSetTable(SSdb *pSdb, SSdbTable table) {
ESdbType sdb = table.sdbType; ESdbType sdb = table.sdbType;
tsSdb.keyTypes[sdb] = table.keyType; pSdb->keyTypes[sdb] = table.keyType;
tsSdb.insertFps[sdb] = table.insertFp; pSdb->insertFps[sdb] = table.insertFp;
tsSdb.updateFps[sdb] = table.updateFp; pSdb->updateFps[sdb] = table.updateFp;
tsSdb.deleteFps[sdb] = table.deleteFp; pSdb->deleteFps[sdb] = table.deleteFp;
tsSdb.deployFps[sdb] = table.deployFp; pSdb->deployFps[sdb] = table.deployFp;
tsSdb.encodeFps[sdb] = table.encodeFp; pSdb->encodeFps[sdb] = table.encodeFp;
tsSdb.decodeFps[sdb] = table.decodeFp; pSdb->decodeFps[sdb] = table.decodeFp;
for (int32_t i = 0; i < SDB_MAX; ++i) {
int32_t type;
if (pSdb->keyTypes[i] == SDB_KEY_INT32) {
type = TSDB_DATA_TYPE_INT;
} else if (pSdb->keyTypes[i] == SDB_KEY_INT64) {
type = TSDB_DATA_TYPE_BIGINT;
} else {
type = TSDB_DATA_TYPE_BINARY;
}
SHashObj *hash = taosHashInit(64, taosGetDefaultHashFunction(type), true, HASH_NO_LOCK);
if (hash == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
pSdb->hashObjs[i] = hash;
taosInitRWLatch(&pSdb->locks[i]);
}
return 0;
} }
\ No newline at end of file
...@@ -15,70 +15,71 @@ ...@@ -15,70 +15,71 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "sdbInt.h" #include "sdbInt.h"
#include "tglobal.h"
#include "tchecksum.h" #include "tchecksum.h"
static int32_t sdbCreateDir() { static int32_t sdbCreateDir(SSdb *pSdb) {
mDebug("start to create mnode at %s", tsMnodeDir); if (taosMkDir(pSdb->currDir) != 0) {
if (!taosMkDir(tsSdb.currDir)) {
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
mError("failed to create dir:%s since %s", tsSdb.currDir, terrstr()); mError("failed to create dir:%s since %s", pSdb->currDir, terrstr());
return -1; return -1;
} }
if (!taosMkDir(tsSdb.syncDir)) { if (taosMkDir(pSdb->syncDir) != 0) {
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
mError("failed to create dir:%s since %s", tsSdb.syncDir, terrstr()); mError("failed to create dir:%s since %s", pSdb->syncDir, terrstr());
return -1; return -1;
} }
if (!taosMkDir(tsSdb.tmpDir)) { if (taosMkDir(pSdb->tmpDir) != 0) {
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
mError("failed to create dir:%s since %s", tsSdb.tmpDir, terrstr()); mError("failed to create dir:%s since %s", pSdb->tmpDir, terrstr());
return -1; return -1;
} }
return 0; return 0;
} }
static int32_t sdbRunDeployFp() { static int32_t sdbRunDeployFp(SSdb *pSdb) {
mDebug("start to run deploy functions"); mDebug("start to deploy sdb");
for (int32_t i = SDB_START; i < SDB_MAX; ++i) { for (int32_t i = SDB_MAX - 1; i > SDB_START; --i) {
SdbDeployFp fp = tsSdb.deployFps[i]; SdbDeployFp fp = pSdb->deployFps[i];
if (fp == NULL) continue; if (fp == NULL) continue;
if ((*fp)() != 0) {
if ((*fp)(pSdb) != 0) {
mError("failed to deploy sdb:%d since %s", i, terrstr()); mError("failed to deploy sdb:%d since %s", i, terrstr());
return -1; return -1;
} }
} }
mDebug("sdb deploy successfully");
return 0; return 0;
} }
static int32_t sdbReadDataFile() { int32_t sdbReadFile(SSdb *pSdb) {
int64_t offset = 0;
int32_t code = 0;
int32_t readLen = 0;
int64_t ret = 0;
SSdbRaw *pRaw = malloc(SDB_MAX_SIZE); SSdbRaw *pRaw = malloc(SDB_MAX_SIZE);
if (pRaw == NULL) { if (pRaw == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
mError("failed read file since %s", terrstr());
return -1; return -1;
} }
char file[PATH_MAX] = {0}; char file[PATH_MAX] = {0};
snprintf(file, sizeof(file), "%ssdb.data", tsSdb.currDir); snprintf(file, sizeof(file), "%s%ssdb.data", pSdb->currDir, TD_DIRSEP);
FileFd fd = taosOpenFileRead(file); FileFd fd = taosOpenFileRead(file);
if (fd <= 0) { if (fd <= 0) {
free(pRaw); free(pRaw);
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
mError("failed to open file:%s for read since %s", file, terrstr()); mError("failed to read file:%s since %s", file, terrstr());
return -1; return -1;
} }
int64_t offset = 0;
int32_t code = 0;
int32_t readLen = 0;
int64_t ret = 0;
while (1) { while (1) {
readLen = sizeof(SSdbRaw); readLen = sizeof(SSdbRaw);
ret = taosReadFile(fd, pRaw, readLen); ret = taosReadFile(fd, pRaw, readLen);
...@@ -117,7 +118,7 @@ static int32_t sdbReadDataFile() { ...@@ -117,7 +118,7 @@ static int32_t sdbReadDataFile() {
break; break;
} }
code = sdbWriteImp(pRaw); code = sdbWriteRaw(pSdb, pRaw);
if (code != 0) { if (code != 0) {
mError("failed to read file:%s since %s", file, terrstr()); mError("failed to read file:%s since %s", file, terrstr());
goto PARSE_SDB_DATA_ERROR; goto PARSE_SDB_DATA_ERROR;
...@@ -129,13 +130,18 @@ static int32_t sdbReadDataFile() { ...@@ -129,13 +130,18 @@ static int32_t sdbReadDataFile() {
PARSE_SDB_DATA_ERROR: PARSE_SDB_DATA_ERROR:
taosCloseFile(fd); taosCloseFile(fd);
sdbFreeRaw(pRaw); sdbFreeRaw(pRaw);
terrno = code; terrno = code;
return code; return code;
} }
static int32_t sdbWriteDataFile() { int32_t sdbWriteFile(SSdb *pSdb) {
int32_t code = 0;
char tmpfile[PATH_MAX] = {0}; char tmpfile[PATH_MAX] = {0};
snprintf(tmpfile, sizeof(tmpfile), "%ssdb.data", tsSdb.tmpDir); snprintf(tmpfile, sizeof(tmpfile), "%s%ssdb.data", pSdb->tmpDir, TD_DIRSEP);
char curfile[PATH_MAX] = {0};
snprintf(curfile, sizeof(curfile), "%s%ssdb.data", pSdb->currDir, TD_DIRSEP);
FileFd fd = taosOpenFileCreateWrite(tmpfile); FileFd fd = taosOpenFileCreateWrite(tmpfile);
if (fd <= 0) { if (fd <= 0) {
...@@ -144,14 +150,12 @@ static int32_t sdbWriteDataFile() { ...@@ -144,14 +150,12 @@ static int32_t sdbWriteDataFile() {
return -1; return -1;
} }
int32_t code = 0;
for (int32_t i = SDB_MAX - 1; i > SDB_START; --i) { for (int32_t i = SDB_MAX - 1; i > SDB_START; --i) {
SdbEncodeFp encodeFp = tsSdb.encodeFps[i]; SdbEncodeFp encodeFp = pSdb->encodeFps[i];
if (encodeFp == NULL) continue; if (encodeFp == NULL) continue;
SHashObj *hash = tsSdb.hashObjs[i]; SHashObj *hash = pSdb->hashObjs[i];
SRWLatch *pLock = &tsSdb.locks[i]; SRWLatch *pLock = &pSdb->locks[i];
taosWLockLatch(pLock); taosWLockLatch(pLock);
SSdbRow **ppRow = taosHashIterate(hash, NULL); SSdbRow **ppRow = taosHashIterate(hash, NULL);
...@@ -191,68 +195,44 @@ static int32_t sdbWriteDataFile() { ...@@ -191,68 +195,44 @@ static int32_t sdbWriteDataFile() {
if (code == 0) { if (code == 0) {
code = taosFsyncFile(fd); code = taosFsyncFile(fd);
if (code != 0) {
code = TAOS_SYSTEM_ERROR(errno);
mError("failed to write file:%s since %s", tmpfile, tstrerror(code));
}
} }
taosCloseFile(fd); taosCloseFile(fd);
if (code == 0) { if (code == 0) {
char curfile[PATH_MAX] = {0};
snprintf(curfile, sizeof(curfile), "%ssdb.data", tsSdb.currDir);
code = taosRenameFile(tmpfile, curfile); code = taosRenameFile(tmpfile, curfile);
if (code != 0) {
code = TAOS_SYSTEM_ERROR(errno);
mError("failed to write file:%s since %s", curfile, tstrerror(code));
}
} }
if (code != 0) { if (code != 0) {
terrno = code; mError("failed to write file:%s since %s", curfile, tstrerror(code));
mError("failed to write sdb file since %s", terrstr());
} else { } else {
mDebug("write sdb file successfully"); mDebug("write file:%s successfully", curfile);
} }
terrno = code;
return code; return code;
} }
int32_t sdbOpen() { int32_t sdbDeploy(SSdb *pSdb) {
mDebug("start to read mnode file"); if (sdbCreateDir(pSdb) != 0) {
if (sdbReadDataFile() != 0) {
return -1;
}
return 0;
}
void sdbClose() {
if (tsSdb.curVer != tsSdb.lastCommitVer) {
mDebug("start to write mnode file");
sdbWriteDataFile();
}
for (int32_t i = 0; i < SDB_MAX; ++i) {
SHashObj *hash = tsSdb.hashObjs[i];
if (hash != NULL) {
taosHashClear(hash);
}
}
}
int32_t sdbDeploy() {
if (sdbCreateDir() != 0) {
return -1; return -1;
} }
if (sdbRunDeployFp() != 0) { if (sdbRunDeployFp(pSdb) != 0) {
return -1; return -1;
} }
if (sdbWriteDataFile() != 0) { if (sdbWriteFile(pSdb) != 0) {
return -1; return -1;
} }
sdbClose();
return 0; return 0;
} }
void sdbUnDeploy() {
mDebug("start to undeploy mnode");
taosRemoveDir(tsMnodeDir);
}
...@@ -15,15 +15,14 @@ ...@@ -15,15 +15,14 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "sdbInt.h" #include "sdbInt.h"
#include "tglobal.h"
static SHashObj *sdbGetHash(int32_t sdb) { static SHashObj *sdbGetHash(SSdb *pSdb, int32_t type) {
if (sdb >= SDB_MAX || sdb <= SDB_START) { if (type >= SDB_MAX || type <= SDB_START) {
terrno = TSDB_CODE_SDB_INVALID_TABLE_TYPE; terrno = TSDB_CODE_SDB_INVALID_TABLE_TYPE;
return NULL; return NULL;
} }
SHashObj *hash = tsSdb.hashObjs[sdb]; SHashObj *hash = pSdb->hashObjs[type];
if (hash == NULL) { if (hash == NULL) {
terrno = TSDB_CODE_SDB_APP_ERROR; terrno = TSDB_CODE_SDB_APP_ERROR;
return NULL; return NULL;
...@@ -32,9 +31,9 @@ static SHashObj *sdbGetHash(int32_t sdb) { ...@@ -32,9 +31,9 @@ static SHashObj *sdbGetHash(int32_t sdb) {
return hash; return hash;
} }
static int32_t sdbGetkeySize(ESdbType sdb, void *pKey) { static int32_t sdbGetkeySize(SSdb *pSdb, ESdbType type, void *pKey) {
int32_t keySize; int32_t keySize;
EKeyType keyType = tsSdb.keyTypes[sdb]; EKeyType keyType = pSdb->keyTypes[type];
if (keyType == SDB_KEY_INT32) { if (keyType == SDB_KEY_INT32) {
keySize = sizeof(int32_t); keySize = sizeof(int32_t);
...@@ -47,77 +46,81 @@ static int32_t sdbGetkeySize(ESdbType sdb, void *pKey) { ...@@ -47,77 +46,81 @@ static int32_t sdbGetkeySize(ESdbType sdb, void *pKey) {
return keySize; return keySize;
} }
static int32_t sdbInsertRow(SHashObj *hash, SSdbRaw *pRaw, SSdbRow *pRow, int32_t keySize) { static int32_t sdbInsertRow(SSdb *pSdb, SHashObj *hash, SSdbRaw *pRaw, SSdbRow *pRow, int32_t keySize) {
SRWLatch *pLock = &tsSdb.locks[pRow->sdb]; int32_t code = 0;
SRWLatch *pLock = &pSdb->locks[pRow->type];
taosWLockLatch(pLock); taosWLockLatch(pLock);
SSdbRow *pDstRow = taosHashGet(hash, pRow->pObj, keySize); SSdbRow *pDstRow = taosHashGet(hash, pRow->pObj, keySize);
if (pDstRow != NULL) { if (pDstRow != NULL) {
terrno = TSDB_CODE_SDB_OBJ_ALREADY_THERE;
taosWUnLockLatch(pLock); taosWUnLockLatch(pLock);
sdbFreeRow(pRow); sdbFreeRow(pRow);
return -1; return TSDB_CODE_SDB_OBJ_ALREADY_THERE;
} }
pRow->refCount = 1; pRow->refCount = 1;
pRow->status = pRaw->status; pRow->status = pRaw->status;
if (taosHashPut(hash, pRow->pObj, keySize, &pRow, sizeof(void *)) != 0) { if (taosHashPut(hash, pRow->pObj, keySize, &pRow, sizeof(void *)) != 0) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
taosWUnLockLatch(pLock); taosWUnLockLatch(pLock);
sdbFreeRow(pRow); sdbFreeRow(pRow);
return -1; return TSDB_CODE_SDB_OBJ_ALREADY_THERE;
} }
taosWUnLockLatch(pLock); taosWUnLockLatch(pLock);
SdbInsertFp insertFp = tsSdb.insertFps[pRow->sdb]; SdbInsertFp insertFp = pSdb->insertFps[pRow->type];
if (insertFp != NULL) { if (insertFp != NULL) {
if ((*insertFp)(pRow->pObj) != 0) { code = (*insertFp)(pSdb, pRow->pObj);
if (code != 0) {
taosWLockLatch(pLock); taosWLockLatch(pLock);
taosHashRemove(hash, pRow->pObj, keySize); taosHashRemove(hash, pRow->pObj, keySize);
taosWUnLockLatch(pLock); taosWUnLockLatch(pLock);
sdbFreeRow(pRow); sdbFreeRow(pRow);
return -1; return code;
} }
} }
return 0; return 0;
} }
static int32_t sdbUpdateRow(SHashObj *hash, SSdbRaw *pRaw, SSdbRow *pRow, int32_t keySize) { static int32_t sdbUpdateRow(SSdb *pSdb, SHashObj *hash, SSdbRaw *pRaw, SSdbRow *pRow, int32_t keySize) {
SRWLatch *pLock = &tsSdb.locks[pRow->sdb]; int32_t code = 0;
SRWLatch *pLock = &pSdb->locks[pRow->type];
taosRLockLatch(pLock); taosRLockLatch(pLock);
SSdbRow **ppDstRow = taosHashGet(hash, pRow->pObj, keySize); SSdbRow **ppDstRow = taosHashGet(hash, pRow->pObj, keySize);
if (ppDstRow == NULL || *ppDstRow == NULL) { if (ppDstRow == NULL || *ppDstRow == NULL) {
taosRUnLockLatch(pLock); taosRUnLockLatch(pLock);
return sdbInsertRow(hash, pRaw, pRow, keySize); return sdbInsertRow(pSdb, hash, pRaw, pRow, keySize);
} }
SSdbRow *pDstRow = *ppDstRow; SSdbRow *pDstRow = *ppDstRow;
pRow->status = pRaw->status; pRow->status = pRaw->status;
taosRUnLockLatch(pLock); taosRUnLockLatch(pLock);
SdbUpdateFp updateFp = tsSdb.updateFps[pRow->sdb]; SdbUpdateFp updateFp = pSdb->updateFps[pRow->type];
if (updateFp != NULL) { if (updateFp != NULL) {
(*updateFp)(pRow->pObj, pDstRow->pObj); code = (*updateFp)(pSdb, pRow->pObj, pDstRow->pObj);
} }
sdbFreeRow(pRow); sdbFreeRow(pRow);
return 0; return code;
} }
static int32_t sdbDeleteRow(SHashObj *hash, SSdbRaw *pRaw, SSdbRow *pRow, int32_t keySize) { static int32_t sdbDeleteRow(SSdb *pSdb, SHashObj *hash, SSdbRaw *pRaw, SSdbRow *pRow, int32_t keySize) {
SRWLatch *pLock = &tsSdb.locks[pRow->sdb]; int32_t code = 0;
SRWLatch *pLock = &pSdb->locks[pRow->type];
taosWLockLatch(pLock); taosWLockLatch(pLock);
SSdbRow **ppDstRow = taosHashGet(hash, pRow->pObj, keySize); SSdbRow **ppDstRow = taosHashGet(hash, pRow->pObj, keySize);
if (ppDstRow == NULL || *ppDstRow == NULL) { if (ppDstRow == NULL || *ppDstRow == NULL) {
terrno = TSDB_CODE_SDB_OBJ_NOT_THERE;
taosWUnLockLatch(pLock); taosWUnLockLatch(pLock);
sdbFreeRow(pRow); sdbFreeRow(pRow);
return -1; return TSDB_CODE_SDB_OBJ_NOT_THERE;
} }
SSdbRow *pDstRow = *ppDstRow; SSdbRow *pDstRow = *ppDstRow;
...@@ -125,71 +128,67 @@ static int32_t sdbDeleteRow(SHashObj *hash, SSdbRaw *pRaw, SSdbRow *pRow, int32_ ...@@ -125,71 +128,67 @@ static int32_t sdbDeleteRow(SHashObj *hash, SSdbRaw *pRaw, SSdbRow *pRow, int32_
taosHashRemove(hash, pDstRow->pObj, keySize); taosHashRemove(hash, pDstRow->pObj, keySize);
taosWUnLockLatch(pLock); taosWUnLockLatch(pLock);
SdbDeleteFp deleteFp = tsSdb.deleteFps[pDstRow->sdb]; SdbDeleteFp deleteFp = pSdb->deleteFps[pDstRow->type];
if (deleteFp != NULL) { if (deleteFp != NULL) {
(void)(*deleteFp)(pDstRow->pObj); code = (*deleteFp)(pSdb, pDstRow->pObj);
} }
sdbRelease(pDstRow->pObj); sdbRelease(pSdb, pDstRow->pObj);
sdbFreeRow(pRow); sdbFreeRow(pRow);
return 0; return code;
} }
int32_t sdbWriteImp(SSdbRaw *pRaw) { int32_t sdbWriteRaw(SSdb *pSdb, SSdbRaw *pRaw) {
SHashObj *hash = sdbGetHash(pRaw->sdb); SHashObj *hash = sdbGetHash(pSdb, pRaw->type);
if (hash == NULL) return -1; if (hash == NULL) return terrno;
SdbDecodeFp decodeFp = tsSdb.decodeFps[pRaw->sdb]; SdbDecodeFp decodeFp = pSdb->decodeFps[pRaw->type];
SSdbRow *pRow = (*decodeFp)(pRaw); SSdbRow *pRow = (*decodeFp)(pRaw);
if (pRow == NULL) { if (pRow == NULL) {
terrno = TSDB_CODE_SDB_INVALID_DATA_CONTENT; return terrno;
return -1;
} }
pRow->sdb = pRaw->sdb; pRow->type = pRaw->type;
int32_t keySize = sdbGetkeySize(pRow->sdb, pRow->pObj); int32_t keySize = sdbGetkeySize(pSdb, pRow->type, pRow->pObj);
int32_t code = -1; int32_t code = TSDB_CODE_SDB_INVALID_ACTION_TYPE;
switch (pRaw->status) { switch (pRaw->status) {
case SDB_STATUS_CREATING: case SDB_STATUS_CREATING:
code = sdbInsertRow(hash, pRaw, pRow, keySize); code = sdbInsertRow(pSdb, hash, pRaw, pRow, keySize);
break; break;
case SDB_STATUS_READY: case SDB_STATUS_READY:
case SDB_STATUS_DROPPING: case SDB_STATUS_DROPPING:
code = sdbUpdateRow(hash, pRaw, pRow, keySize); code = sdbUpdateRow(pSdb, hash, pRaw, pRow, keySize);
break; break;
case SDB_STATUS_DROPPED: case SDB_STATUS_DROPPED:
code = sdbDeleteRow(hash, pRaw, pRow, keySize); code = sdbDeleteRow(pSdb, hash, pRaw, pRow, keySize);
break;
default:
terrno = TSDB_CODE_SDB_INVALID_ACTION_TYPE;
break; break;
} }
return code; return code;
} }
int32_t sdbWrite(SSdbRaw *pRaw) { int32_t sdbWrite(SSdb *pSdb, SSdbRaw *pRaw) {
int32_t code = sdbWriteImp(pRaw); int32_t code = sdbWriteRaw(pSdb, pRaw);
sdbFreeRaw(pRaw); sdbFreeRaw(pRaw);
return code; return code;
} }
void *sdbAcquire(ESdbType sdb, void *pKey) { void *sdbAcquire(SSdb *pSdb, ESdbType type, void *pKey) {
SHashObj *hash = sdbGetHash(sdb); SHashObj *hash = sdbGetHash(pSdb, type);
if (hash == NULL) return NULL; if (hash == NULL) return NULL;
void *pRet = NULL; void *pRet = NULL;
int32_t keySize = sdbGetkeySize(sdb, pKey); int32_t keySize = sdbGetkeySize(pSdb, type, pKey);
SRWLatch *pLock = &tsSdb.locks[sdb]; SRWLatch *pLock = &pSdb->locks[type];
taosRLockLatch(pLock); taosRLockLatch(pLock);
SSdbRow **ppRow = taosHashGet(hash, pKey, keySize); SSdbRow **ppRow = taosHashGet(hash, pKey, keySize);
if (ppRow == NULL || *ppRow == NULL) { if (ppRow == NULL || *ppRow == NULL) {
terrno = TSDB_CODE_SDB_OBJ_NOT_THERE;
taosRUnLockLatch(pLock); taosRUnLockLatch(pLock);
terrno = TSDB_CODE_SDB_OBJ_NOT_THERE;
return NULL; return NULL;
} }
...@@ -214,13 +213,13 @@ void *sdbAcquire(ESdbType sdb, void *pKey) { ...@@ -214,13 +213,13 @@ void *sdbAcquire(ESdbType sdb, void *pKey) {
return pRet; return pRet;
} }
void sdbRelease(void *pObj) { void sdbRelease(SSdb *pSdb, void *pObj) {
if (pObj == NULL) return; if (pObj == NULL) return;
SSdbRow *pRow = (SSdbRow *)((char *)pObj - sizeof(SSdbRow)); SSdbRow *pRow = (SSdbRow *)((char *)pObj - sizeof(SSdbRow));
if (pRow->sdb >= SDB_MAX || pRow->sdb <= SDB_START) return; if (pRow->type >= SDB_MAX || pRow->type <= SDB_START) return;
SRWLatch *pLock = &tsSdb.locks[pRow->sdb]; SRWLatch *pLock = &pSdb->locks[pRow->type];
taosRLockLatch(pLock); taosRLockLatch(pLock);
int32_t ref = atomic_sub_fetch_32(&pRow->refCount, 1); int32_t ref = atomic_sub_fetch_32(&pRow->refCount, 1);
...@@ -231,11 +230,11 @@ void sdbRelease(void *pObj) { ...@@ -231,11 +230,11 @@ void sdbRelease(void *pObj) {
taosRUnLockLatch(pLock); taosRUnLockLatch(pLock);
} }
void *sdbFetch(ESdbType sdb, void *pIter, void **ppObj) { void *sdbFetch(SSdb *pSdb, ESdbType type, void *pIter, void **ppObj) {
SHashObj *hash = sdbGetHash(sdb); SHashObj *hash = sdbGetHash(pSdb, type);
if (hash == NULL) return NULL; if (hash == NULL) return NULL;
SRWLatch *pLock = &tsSdb.locks[sdb]; SRWLatch *pLock = &pSdb->locks[type];
taosRLockLatch(pLock); taosRLockLatch(pLock);
SSdbRow **ppRow = taosHashIterate(hash, ppRow); SSdbRow **ppRow = taosHashIterate(hash, ppRow);
...@@ -255,23 +254,23 @@ void *sdbFetch(ESdbType sdb, void *pIter, void **ppObj) { ...@@ -255,23 +254,23 @@ void *sdbFetch(ESdbType sdb, void *pIter, void **ppObj) {
return ppRow; return ppRow;
} }
void sdbCancelFetch(void *pIter) { void sdbCancelFetch(SSdb *pSdb, void *pIter) {
if (pIter == NULL) return; if (pIter == NULL) return;
SSdbRow *pRow = *(SSdbRow **)pIter; SSdbRow *pRow = *(SSdbRow **)pIter;
SHashObj *hash = sdbGetHash(pRow->sdb); SHashObj *hash = sdbGetHash(pSdb, pRow->type);
if (hash == NULL) return; if (hash == NULL) return;
SRWLatch *pLock = &tsSdb.locks[pRow->sdb]; SRWLatch *pLock = &pSdb->locks[pRow->type];
taosRLockLatch(pLock); taosRLockLatch(pLock);
taosHashCancelIterate(hash, pIter); taosHashCancelIterate(hash, pIter);
taosRUnLockLatch(pLock); taosRUnLockLatch(pLock);
} }
int32_t sdbGetSize(ESdbType sdb) { int32_t sdbGetSize(SSdb *pSdb, ESdbType type) {
SHashObj *hash = sdbGetHash(sdb); SHashObj *hash = sdbGetHash(pSdb, type);
if (hash == NULL) return 0; if (hash == NULL) return 0;
SRWLatch *pLock = &tsSdb.locks[sdb]; SRWLatch *pLock = &pSdb->locks[type];
taosRLockLatch(pLock); taosRLockLatch(pLock);
int32_t size = taosHashGetSize(hash); int32_t size = taosHashGetSize(hash);
taosRUnLockLatch(pLock); taosRUnLockLatch(pLock);
......
...@@ -16,14 +16,14 @@ ...@@ -16,14 +16,14 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "sdbInt.h" #include "sdbInt.h"
SSdbRaw *sdbAllocRaw(ESdbType sdb, int8_t sver, int32_t dataLen) { SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen) {
SSdbRaw *pRaw = calloc(1, dataLen + sizeof(SSdbRaw)); SSdbRaw *pRaw = calloc(1, dataLen + sizeof(SSdbRaw));
if (pRaw == NULL) { if (pRaw == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL; return NULL;
} }
pRaw->sdb = sdb; pRaw->type = type;
pRaw->sver = sver; pRaw->sver = sver;
pRaw->dataLen = dataLen; pRaw->dataLen = dataLen;
return pRaw; return pRaw;
......
...@@ -35,4 +35,4 @@ void *sdbGetRowObj(SSdbRow *pRow) { ...@@ -35,4 +35,4 @@ void *sdbGetRowObj(SSdbRow *pRow) {
return pRow->pObj; return pRow->pObj;
} }
void sdbFreeRow(SSdbRow *pRow) { free(pRow); } void sdbFreeRow(SSdbRow *pRow) { tfree(pRow); }
aux_source_directory(src MNODE_SRC)
add_library(transaction ${MNODE_SRC})
target_include_directories(
transaction
PUBLIC "${CMAKE_SOURCE_DIR}/include/dnode/mnode/transaction"
private "${CMAKE_CURRENT_SOURCE_DIR}/inc"
)
target_link_libraries(
transaction
PRIVATE os
PRIVATE common
PRIVATE util
PRIVATE sdb
PRIVATE transport
)
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TRANSACTION_INT_H_
#define _TD_TRANSACTION_INT_H_
#include "os.h"
#include "trn.h"
#include "tglobal.h"
#include "tarray.h"
#include "tlog.h"
#ifdef __cplusplus
extern "C" {
#endif
#define mFatal(...) { if (mDebugFlag & DEBUG_FATAL) { taosPrintLog("MND FATAL ", 255, __VA_ARGS__); }}
#define mError(...) { if (mDebugFlag & DEBUG_ERROR) { taosPrintLog("MND ERROR ", 255, __VA_ARGS__); }}
#define mWarn(...) { if (mDebugFlag & DEBUG_WARN) { taosPrintLog("MND WARN ", 255, __VA_ARGS__); }}
#define mInfo(...) { if (mDebugFlag & DEBUG_INFO) { taosPrintLog("MND ", 255, __VA_ARGS__); }}
#define mDebug(...) { if (mDebugFlag & DEBUG_DEBUG) { taosPrintLog("MND ", mDebugFlag, __VA_ARGS__); }}
#define mTrace(...) { if (mDebugFlag & DEBUG_TRACE) { taosPrintLog("MND ", mDebugFlag, __VA_ARGS__); }}
#define TRN_VER 1
#define TRN_DEFAULT_ARRAY_SIZE 8
typedef enum {
TRN_STAGE_PREPARE = 1,
TRN_STAGE_EXECUTE = 2,
TRN_STAGE_COMMIT = 3,
TRN_STAGE_ROLLBACK = 4,
TRN_STAGE_RETRY = 5
} ETrnStage;
typedef struct STrans {
int32_t id;
int8_t stage;
int8_t policy;
void *rpcHandle;
SArray *redoLogs;
SArray *undoLogs;
SArray *commitLogs;
SArray *redoActions;
SArray *undoActions;
} STrans;
SSdbRaw *trnActionEncode(STrans *pTrans);
STrans *trnActionDecode(SSdbRaw *pRaw);
int32_t trnActionInsert(STrans *pTrans);
int32_t trnActionDelete(STrans *pTrans);
int32_t trnActionUpdate(STrans *pSrcTrans, STrans *pDstTrans);
int32_t trnGenerateTransId();
#ifdef __cplusplus
}
#endif
#endif /*_TD_TRANSACTION_INT_H_*/
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "trnInt.h"
#define SDB_TRANS_VER 1
SSdbRaw *trnActionEncode(STrans *pTrans) {
int32_t rawDataLen = 10 * sizeof(int32_t);
int32_t redoLogNum = taosArrayGetSize(pTrans->redoLogs);
int32_t undoLogNum = taosArrayGetSize(pTrans->undoLogs);
int32_t commitLogNum = taosArrayGetSize(pTrans->commitLogs);
int32_t redoActionNum = taosArrayGetSize(pTrans->redoActions);
int32_t undoActionNum = taosArrayGetSize(pTrans->undoActions);
for (int32_t index = 0; index < redoLogNum; ++index) {
SSdbRaw *pRaw = taosArrayGet(pTrans->redoLogs, index);
rawDataLen += sdbGetRawTotalSize(pRaw);
}
for (int32_t index = 0; index < undoLogNum; ++index) {
SSdbRaw *pRaw = taosArrayGet(pTrans->undoLogs, index);
rawDataLen += sdbGetRawTotalSize(pRaw);
}
for (int32_t index = 0; index < commitLogNum; ++index) {
SSdbRaw *pRaw = taosArrayGet(pTrans->commitLogs, index);
rawDataLen += sdbGetRawTotalSize(pRaw);
}
SSdbRaw *pRaw = sdbAllocRaw(SDB_TRANS, SDB_TRANS_VER, rawDataLen);
if (pRaw == NULL) return NULL;
int32_t dataPos = 0;
SDB_SET_INT32(pData, dataPos, pTrans->id)
SDB_SET_INT8(pData, dataPos, pTrans->stage)
SDB_SET_INT8(pData, dataPos, pTrans->policy)
SDB_SET_INT32(pData, dataPos, redoLogNum)
SDB_SET_INT32(pData, dataPos, undoLogNum)
SDB_SET_INT32(pData, dataPos, commitLogNum)
SDB_SET_INT32(pData, dataPos, redoActionNum)
SDB_SET_INT32(pData, dataPos, undoActionNum)
SDB_SET_DATALEN(pRaw, dataPos);
return pRaw;
}
STrans *trnActionDecode(SSdbRaw *pRaw) {
int8_t sver = 0;
if (sdbGetRawSoftVer(pRaw, &sver) != 0) return NULL;
if (sver != SDB_TRANS_VER) {
terrno = TSDB_CODE_SDB_INVALID_DATA_VER;
return NULL;
}
SSdbRow *pRow = sdbAllocRow(sizeof(STrans));
STrans *pTrans = sdbGetRowObj(pRow);
if (pTrans == NULL) return NULL;
int32_t redoLogNum = 0;
int32_t undoLogNum = 0;
int32_t commitLogNum = 0;
int32_t redoActionNum = 0;
int32_t undoActionNum = 0;
int32_t dataPos = 0;
SDB_GET_INT32(pRaw, pRow, dataPos, &pTrans->id)
SDB_GET_INT8(pRaw, pRow, dataPos, &pTrans->stage)
SDB_GET_INT8(pRaw, pRow, dataPos, &pTrans->policy)
SDB_GET_INT32(pRaw, pRow, dataPos, &redoLogNum)
SDB_GET_INT32(pRaw, pRow, dataPos, &undoLogNum)
SDB_GET_INT32(pRaw, pRow, dataPos, &commitLogNum)
SDB_GET_INT32(pRaw, pRow, dataPos, &redoActionNum)
SDB_GET_INT32(pRaw, pRow, dataPos, &undoActionNum)
for (int32_t index = 0; index < redoLogNum; ++index) {
int32_t dataLen = 0;
SDB_GET_INT32(pRaw, pRow, dataPos, &dataLen)
char *pData = malloc(dataLen);
SDB_GET_BINARY(pRaw, pRow, dataPos, pData, dataLen);
void *ret = taosArrayPush(pTrans->redoLogs, pData);
if (ret == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
break;
}
}
// if (code != 0) {
// trnDrop(pTrans);
// terrno = code;
// return NULL;
// }
return pTrans;
}
int32_t trnActionInsert(STrans *pTrans) {
SArray *pArray = pTrans->redoLogs;
int32_t arraySize = taosArrayGetSize(pArray);
for (int32_t index = 0; index < arraySize; ++index) {
SSdbRaw *pRaw = taosArrayGetP(pArray, index);
int32_t code = sdbWrite(pRaw);
if (code != 0) {
return code;
}
}
return 0;
}
int32_t trnActionDelete(STrans *pTrans) {
SArray *pArray = pTrans->redoLogs;
int32_t arraySize = taosArrayGetSize(pArray);
for (int32_t index = 0; index < arraySize; ++index) {
SSdbRaw *pRaw = taosArrayGetP(pArray, index);
int32_t code = sdbWrite(pRaw);
if (code != 0) {
return code;
}
}
return 0;
}
int32_t trnActionUpdate(STrans *pSrcTrans, STrans *pDstTrans) { return 0; }
int32_t trnGenerateTransId() { return 1; }
STrans *trnCreate(ETrnPolicy policy) {
STrans *pTrans = calloc(1, sizeof(STrans));
if (pTrans == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
pTrans->id = trnGenerateTransId();
pTrans->stage = TRN_STAGE_PREPARE;
pTrans->policy = policy;
pTrans->redoLogs = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->undoLogs = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->commitLogs = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->redoActions = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
pTrans->undoActions = taosArrayInit(TRN_DEFAULT_ARRAY_SIZE, sizeof(void *));
if (pTrans->redoLogs == NULL || pTrans->undoLogs == NULL || pTrans->commitLogs == NULL ||
pTrans->redoActions == NULL || pTrans->undoActions == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
return pTrans;
}
static void trnDropArray(SArray *pArray) {
for (int32_t index = 0; index < pArray->size; ++index) {
SSdbRaw *pRaw = taosArrayGetP(pArray, index);
tfree(pRaw);
}
taosArrayDestroy(pArray);
}
void trnDrop(STrans *pTrans) {
trnDropArray(pTrans->redoLogs);
trnDropArray(pTrans->undoLogs);
trnDropArray(pTrans->commitLogs);
trnDropArray(pTrans->redoActions);
trnDropArray(pTrans->undoActions);
tfree(pTrans);
}
void trnSetRpcHandle(STrans *pTrans, void *rpcHandle) { pTrans->rpcHandle = rpcHandle; }
static int32_t trnAppendArray(SArray *pArray, SSdbRaw *pRaw) {
if (pArray == NULL || pRaw == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
void *ptr = taosArrayPush(pArray, &pRaw);
if (ptr == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
return 0;
}
int32_t trnAppendRedoLog(STrans *pTrans, SSdbRaw *pRaw) { return trnAppendArray(pTrans->redoLogs, pRaw); }
int32_t trnAppendUndoLog(STrans *pTrans, SSdbRaw *pRaw) { return trnAppendArray(pTrans->undoLogs, pRaw); }
int32_t trnAppendCommitLog(STrans *pTrans, SSdbRaw *pRaw) { return trnAppendArray(pTrans->commitLogs, pRaw); }
int32_t trnAppendRedoAction(STrans *pTrans, SEpSet *pEpSet, void *pMsg) {
return trnAppendArray(pTrans->redoActions, pMsg);
}
int32_t trnAppendUndoAction(STrans *pTrans, SEpSet *pEpSet, void *pMsg) {
return trnAppendArray(pTrans->undoActions, pMsg);
}
int32_t trnInit() {
SSdbTable table = {.sdbType = SDB_TRANS,
.keyType = SDB_KEY_INT32,
.encodeFp = (SdbEncodeFp)trnActionEncode,
.decodeFp = (SdbDecodeFp)trnActionDecode,
.insertFp = (SdbInsertFp)trnActionInsert,
.updateFp = (SdbUpdateFp)trnActionUpdate,
.deleteFp = (SdbDeleteFp)trnActionDelete};
sdbSetTable(table);
return 0;
}
void trnCleanup() {}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "trnInt.h"
#include "trpc.h"
int32_t trnPrepare(STrans *pTrans, int32_t (*syncfp)(SSdbRaw *pRaw, void *pData)) {
if (syncfp == NULL) return -1;
SSdbRaw *pRaw = trnActionEncode(pTrans);
if (pRaw == NULL) {
mError("tranId:%d, failed to decode trans since %s", pTrans->id, terrstr());
return -1;
}
if (sdbWrite(pRaw) != 0) {
mError("tranId:%d, failed to write trans since %s", pTrans->id, terrstr());
return -1;
}
if ((*syncfp)(pRaw, pTrans->rpcHandle) != 0) {
mError("tranId:%d, failed to sync trans since %s", pTrans->id, terrstr());
return -1;
}
return 0;
}
static void trnSendRpcRsp(void *rpcHandle, int32_t code) {
if (rpcHandle != NULL) {
SRpcMsg rspMsg = {.handle = rpcHandle, .code = terrno};
rpcSendResponse(&rspMsg);
}
}
int32_t trnApply(SSdbRaw *pRaw, void *pData, int32_t code) {
if (code != 0) {
trnSendRpcRsp(pData, terrno);
return 0;
}
if (sdbWrite(pData) != 0) {
code = terrno;
trnSendRpcRsp(pData, code);
terrno = code;
return -1;
}
return 0;
}
static int32_t trnExecuteArray(SArray *pArray) {
for (int32_t index = 0; index < pArray->size; ++index) {
SSdbRaw *pRaw = taosArrayGetP(pArray, index);
if (sdbWrite(pRaw) != 0) {
return -1;
}
}
return 0;
}
static int32_t trnExecuteRedoLogs(STrans *pTrans) { return trnExecuteArray(pTrans->redoLogs); }
static int32_t trnExecuteUndoLogs(STrans *pTrans) { return trnExecuteArray(pTrans->undoLogs); }
static int32_t trnExecuteCommitLogs(STrans *pTrans) { return trnExecuteArray(pTrans->commitLogs); }
static int32_t trnExecuteRedoActions(STrans *pTrans) { return trnExecuteArray(pTrans->redoActions); }
static int32_t trnExecuteUndoActions(STrans *pTrans) { return trnExecuteArray(pTrans->undoActions); }
static int32_t trnPerformPrepareStage(STrans *pTrans) {
if (trnExecuteRedoLogs(pTrans) == 0) {
pTrans->stage = TRN_STAGE_EXECUTE;
return 0;
} else {
pTrans->stage = TRN_STAGE_ROLLBACK;
return -1;
}
}
static int32_t trnPerformExecuteStage(STrans *pTrans) {
int32_t code = trnExecuteRedoActions(pTrans);
if (code == 0) {
pTrans->stage = TRN_STAGE_COMMIT;
return 0;
} else if (code == TSDB_CODE_MND_ACTION_IN_PROGRESS) {
return -1;
} else {
if (pTrans->policy == TRN_POLICY_RETRY) {
pTrans->stage = TRN_STAGE_RETRY;
} else {
pTrans->stage = TRN_STAGE_ROLLBACK;
}
return 0;
}
}
static int32_t trnPerformCommitStage(STrans *pTrans) {
if (trnExecuteCommitLogs(pTrans) == 0) {
pTrans->stage = TRN_STAGE_EXECUTE;
return 0;
} else {
pTrans->stage = TRN_STAGE_ROLLBACK;
return -1;
}
}
static int32_t trnPerformRollbackStage(STrans *pTrans) {
if (trnExecuteCommitLogs(pTrans) == 0) {
pTrans->stage = TRN_STAGE_EXECUTE;
return 0;
} else {
pTrans->stage = TRN_STAGE_ROLLBACK;
return -1;
}
}
static int32_t trnPerformRetryStage(STrans *pTrans) {
if (trnExecuteCommitLogs(pTrans) == 0) {
pTrans->stage = TRN_STAGE_EXECUTE;
return 0;
} else {
pTrans->stage = TRN_STAGE_ROLLBACK;
return -1;
}
}
int32_t trnExecute(int32_t tranId) {
int32_t code = 0;
STrans *pTrans = sdbAcquire(SDB_TRANS, &tranId);
if (pTrans == NULL) {
return -1;
}
if (pTrans->stage == TRN_STAGE_PREPARE) {
if (trnPerformPrepareStage(pTrans) != 0) {
sdbRelease(pTrans);
return -1;
}
}
if (pTrans->stage == TRN_STAGE_EXECUTE) {
if (trnPerformExecuteStage(pTrans) != 0) {
sdbRelease(pTrans);
return -1;
}
}
if (pTrans->stage == TRN_STAGE_COMMIT) {
if (trnPerformCommitStage(pTrans) != 0) {
sdbRelease(pTrans);
return -1;
}
}
if (pTrans->stage == TRN_STAGE_ROLLBACK) {
if (trnPerformRollbackStage(pTrans) != 0) {
sdbRelease(pTrans);
return -1;
}
}
if (pTrans->stage == TRN_STAGE_RETRY) {
if (trnPerformRetryStage(pTrans) != 0) {
sdbRelease(pTrans);
return -1;
}
}
sdbRelease(pTrans);
return 0;
}
\ No newline at end of file
...@@ -17,9 +17,6 @@ ...@@ -17,9 +17,6 @@
#include "vnodeInt.h" #include "vnodeInt.h"
#include "tqueue.h" #include "tqueue.h"
int32_t vnodeInit(SVnodePara para) { return 0; }
void vnodeCleanup() {}
int32_t vnodeAlter(SVnode *pVnode, const SVnodeCfg *pCfg) { return 0; } int32_t vnodeAlter(SVnode *pVnode, const SVnodeCfg *pCfg) { return 0; }
SVnode *vnodeCreate(int32_t vgId, const char *path, const SVnodeCfg *pCfg) { return NULL; } SVnode *vnodeCreate(int32_t vgId, const char *path, const SVnodeCfg *pCfg) { return NULL; }
void vnodeDrop(SVnode *pVnode) {} void vnodeDrop(SVnode *pVnode) {}
...@@ -31,7 +28,7 @@ int32_t vnodeGetLoad(SVnode *pVnode, SVnodeLoad *pLoad) { return 0; } ...@@ -31,7 +28,7 @@ int32_t vnodeGetLoad(SVnode *pVnode, SVnodeLoad *pLoad) { return 0; }
SVnodeMsg *vnodeInitMsg(int32_t msgNum) { SVnodeMsg *vnodeInitMsg(int32_t msgNum) {
SVnodeMsg *pMsg = taosAllocateQitem(msgNum * sizeof(SRpcMsg *) + sizeof(SVnodeMsg)); SVnodeMsg *pMsg = taosAllocateQitem(msgNum * sizeof(SRpcMsg *) + sizeof(SVnodeMsg));
if (pMsg == NULL) { if (pMsg == NULL) {
terrno = TSDB_CODE_VND_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL; return NULL;
} else { } else {
pMsg->allocNum = msgNum; pMsg->allocNum = msgNum;
...@@ -41,7 +38,7 @@ SVnodeMsg *vnodeInitMsg(int32_t msgNum) { ...@@ -41,7 +38,7 @@ SVnodeMsg *vnodeInitMsg(int32_t msgNum) {
int32_t vnodeAppendMsg(SVnodeMsg *pMsg, SRpcMsg *pRpcMsg) { int32_t vnodeAppendMsg(SVnodeMsg *pMsg, SRpcMsg *pRpcMsg) {
if (pMsg->curNum >= pMsg->allocNum) { if (pMsg->curNum >= pMsg->allocNum) {
return TSDB_CODE_VND_OUT_OF_MEMORY; return TSDB_CODE_OUT_OF_MEMORY;
} }
pMsg->rpcMsg[pMsg->curNum++] = *pRpcMsg; pMsg->rpcMsg[pMsg->curNum++] = *pRpcMsg;
......
...@@ -17,97 +17,22 @@ ...@@ -17,97 +17,22 @@
#define _TQ_META_STORE_H_ #define _TQ_META_STORE_H_
#include "os.h" #include "os.h"
#include "tq.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#define TQ_BUCKET_MASK 0xFF
#define TQ_BUCKET_SIZE 256
#define TQ_PAGE_SIZE 4096
//key + offset + size
#define TQ_IDX_SIZE 24
//4096 / 24
#define TQ_MAX_IDX_ONE_PAGE 170
//24 * 170
#define TQ_IDX_PAGE_BODY_SIZE 4080
//4096 - 4080
#define TQ_IDX_PAGE_HEAD_SIZE 16
#define TQ_ACTION_CONST 0
#define TQ_ACTION_INUSE 1
#define TQ_ACTION_INUSE_CONT 2
#define TQ_ACTION_INTXN 3
#define TQ_SVER 0
//TODO: inplace mode is not implemented
#define TQ_UPDATE_INPLACE 0
#define TQ_UPDATE_APPEND 1
#define TQ_DUP_INTXN_REWRITE 0
#define TQ_DUP_INTXN_REJECT 2
static inline bool TqUpdateAppend(int32_t tqConfigFlag) {
return tqConfigFlag & TQ_UPDATE_APPEND;
}
static inline bool TqDupIntxnReject(int32_t tqConfigFlag) {
return tqConfigFlag & TQ_DUP_INTXN_REJECT;
}
static const int8_t TQ_CONST_DELETE = TQ_ACTION_CONST;
#define TQ_DELETE_TOKEN (void*)&TQ_CONST_DELETE
typedef struct TqSerializedHead {
int16_t ver;
int16_t action;
int32_t checksum;
int64_t ssize;
char content[];
} TqSerializedHead;
typedef struct TqMetaHandle {
int64_t key;
int64_t offset;
int64_t serializedSize;
void* valueInUse;
void* valueInTxn;
} TqMetaHandle;
typedef struct TqMetaList {
TqMetaHandle handle;
struct TqMetaList* next;
//struct TqMetaList* inTxnPrev;
//struct TqMetaList* inTxnNext;
struct TqMetaList* unpersistPrev;
struct TqMetaList* unpersistNext;
} TqMetaList;
typedef struct TqMetaStore {
TqMetaList* bucket[TQ_BUCKET_SIZE];
//a table head
TqMetaList* unpersistHead;
int fileFd; //TODO:temporaral use, to be replaced by unified tfile
int idxFd; //TODO:temporaral use, to be replaced by unified tfile
char* dirPath;
int32_t tqConfigFlag;
int (*serializer)(const void* pObj, TqSerializedHead** ppHead);
const void* (*deserializer)(const TqSerializedHead* pHead, void** ppObj);
void (*deleter)(void*);
} TqMetaStore;
TqMetaStore* tqStoreOpen(const char* path, TqMetaStore* tqStoreOpen(const char* path,
int serializer(const void* pObj, TqSerializedHead** ppHead), TqSerializeFun pSerializer,
const void* deserializer(const TqSerializedHead* pHead, void** ppObj), TqDeserializeFun pDeserializer,
void deleter(void* pObj), TqDeleteFun pDeleter,
int32_t tqConfigFlag int32_t tqConfigFlag
); );
int32_t tqStoreClose(TqMetaStore*); int32_t tqStoreClose(TqMetaStore*);
//int32_t tqStoreDelete(TqMetaStore*); //int32_t tqStoreDelete(TqMetaStore*);
//int32_t TqStoreCommitAll(TqMetaStore*); //int32_t tqStoreCommitAll(TqMetaStore*);
int32_t tqStorePersist(TqMetaStore*); int32_t tqStorePersist(TqMetaStore*);
//clean deleted idx and data from persistent file //clean deleted idx and data from persistent file
int32_t tqStoreCompact(TqMetaStore*); int32_t tqStoreCompact(TqMetaStore*);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
*/ */
#include "tqInt.h" #include "tqInt.h"
#include "tqMetaStore.h"
//static //static
//read next version data //read next version data
...@@ -24,6 +25,46 @@ ...@@ -24,6 +25,46 @@
// //
int tqGetgHandleSSize(const TqGroupHandle *gHandle); int tqGetgHandleSSize(const TqGroupHandle *gHandle);
int tqBufHandleSSize();
int tqBufItemSSize();
TqGroupHandle* tqFindHandle(STQ* pTq, int64_t topicId, int64_t cgId, int64_t cId) {
TqGroupHandle* gHandle;
return NULL;
}
void* tqSerializeListHandle(TqListHandle* listHandle, void* ptr);
void* tqSerializeBufHandle(TqBufferHandle* bufHandle, void* ptr);
void* tqSerializeBufItem(TqBufferItem* bufItem, void* ptr);
const void* tqDeserializeBufHandle(const void* pBytes, TqBufferHandle* bufHandle);
const void* tqDeserializeBufItem(const void* pBytes, TqBufferItem* bufItem);
STQ* tqOpen(const char* path, TqConfig* tqConfig, TqLogReader* tqLogReader, SMemAllocatorFactory *allocFac) {
STQ* pTq = malloc(sizeof(STQ));
if(pTq == NULL) {
//TODO: memory error
return NULL;
}
strcpy(pTq->path, path);
pTq->tqConfig = tqConfig;
pTq->tqLogReader = tqLogReader;
pTq->tqMemRef.pAlloctorFactory = allocFac;
pTq->tqMemRef.pAllocator = allocFac->create();
if(pTq->tqMemRef.pAllocator == NULL) {
//TODO
}
pTq->tqMeta = tqStoreOpen(path,
(TqSerializeFun)tqSerializeGroupHandle,
(TqDeserializeFun)tqDeserializeGroupHandle,
free,
0);
if(pTq->tqMeta == NULL) {
//TODO: free STQ
return NULL;
}
return pTq;
}
static int tqProtoCheck(TmqMsgHead *pMsg) { static int tqProtoCheck(TmqMsgHead *pMsg) {
return pMsg->protoVer == 0; return pMsg->protoVer == 0;
...@@ -83,14 +124,29 @@ static int tqCommitTCGroup(TqGroupHandle* handle) { ...@@ -83,14 +124,29 @@ static int tqCommitTCGroup(TqGroupHandle* handle) {
int tqCreateTCGroup(STQ *pTq, int64_t topicId, int64_t cgId, int64_t cId, TqGroupHandle** handle) { int tqCreateTCGroup(STQ *pTq, int64_t topicId, int64_t cgId, int64_t cId, TqGroupHandle** handle) {
//create in disk //create in disk
TqGroupHandle* gHandle = (TqGroupHandle*)malloc(sizeof(TqGroupHandle));
if(gHandle == NULL) {
//TODO
return -1;
}
memset(gHandle, 0, sizeof(TqGroupHandle));
return 0; return 0;
} }
int tqOpenTCGroup(STQ* pTq, int64_t topicId, int64_t cgId, int64_t cId) { TqGroupHandle* tqOpenTCGroup(STQ* pTq, int64_t topicId, int64_t cgId, int64_t cId) {
//look up in disk TqGroupHandle* gHandle = tqHandleGet(pTq->tqMeta, cId);
if(gHandle == NULL) {
int code = tqCreateTCGroup(pTq, topicId, cgId, cId, &gHandle);
if(code != 0) {
//TODO
return NULL;
}
}
//create //create
//open //open
return 0; return gHandle;
} }
int tqCloseTCGroup(STQ* pTq, int64_t topicId, int64_t cgId, int64_t cId) { int tqCloseTCGroup(STQ* pTq, int64_t topicId, int64_t cgId, int64_t cId) {
...@@ -207,16 +263,20 @@ int tqConsume(STQ* pTq, TmqConsumeReq* pMsg) { ...@@ -207,16 +263,20 @@ int tqConsume(STQ* pTq, TmqConsumeReq* pMsg) {
return 0; return 0;
} }
int tqSerializeGroupHandle(TqGroupHandle *gHandle, void** ppBytes) { int tqSerializeGroupHandle(const TqGroupHandle *gHandle, TqSerializedHead** ppHead) {
//calculate size //calculate size
int sz = tqGetgHandleSSize(gHandle); int sz = tqGetgHandleSSize(gHandle) + sizeof(TqSerializedHead);
void* ptr = realloc(*ppBytes, sz); if(sz > (*ppHead)->ssize) {
if(ptr == NULL) { void* tmpPtr = realloc(*ppHead, sz);
free(ppBytes); if(tmpPtr == NULL) {
//TODO: memory err free(*ppHead);
return -1; //TODO: memory err
return -1;
}
*ppHead = tmpPtr;
(*ppHead)->ssize = sz;
} }
*ppBytes = ptr; void* ptr = (*ppHead)->content;
//do serialization //do serialization
*(int64_t*)ptr = gHandle->cId; *(int64_t*)ptr = gHandle->cId;
ptr = POINTER_SHIFT(ptr, sizeof(int64_t)); ptr = POINTER_SHIFT(ptr, sizeof(int64_t));
...@@ -261,8 +321,9 @@ void* tqSerializeBufItem(TqBufferItem *bufItem, void* ptr) { ...@@ -261,8 +321,9 @@ void* tqSerializeBufItem(TqBufferItem *bufItem, void* ptr) {
return ptr; return ptr;
} }
const void* tqDeserializeGroupHandle(const void* pBytes, TqGroupHandle *gHandle) { const void* tqDeserializeGroupHandle(const TqSerializedHead* pHead, TqGroupHandle **ppGHandle) {
const void* ptr = pBytes; TqGroupHandle *gHandle = *ppGHandle;
const void* ptr = pHead->content;
gHandle->cId = *(int64_t*)ptr; gHandle->cId = *(int64_t*)ptr;
ptr = POINTER_SHIFT(ptr, sizeof(int64_t)); ptr = POINTER_SHIFT(ptr, sizeof(int64_t));
gHandle->cgId = *(int64_t*)ptr; gHandle->cgId = *(int64_t*)ptr;
...@@ -317,15 +378,15 @@ const void* tqDeserializeBufItem(const void* pBytes, TqBufferItem *bufItem) { ...@@ -317,15 +378,15 @@ const void* tqDeserializeBufItem(const void* pBytes, TqBufferItem *bufItem) {
//TODO: make this a macro //TODO: make this a macro
int tqGetgHandleSSize(const TqGroupHandle *gHandle) { int tqGetgHandleSSize(const TqGroupHandle *gHandle) {
return sizeof(int64_t) * 2 return sizeof(int64_t) * 2 //cId + cgId
+ sizeof(int32_t) + sizeof(int32_t) //topicNum
+ gHandle->topicNum * tqBufHandleSSize(); + gHandle->topicNum * tqBufHandleSSize();
} }
//TODO: make this a macro //TODO: make this a macro
int tqBufHandleSSize() { int tqBufHandleSSize() {
return sizeof(int64_t) * 2 return sizeof(int64_t) * 2 // nextConsumeOffset + topicId
+ sizeof(int32_t) * 2 + sizeof(int32_t) * 2 // head + tail
+ TQ_BUFFER_SIZE * tqBufItemSSize(); + TQ_BUFFER_SIZE * tqBufItemSSize();
} }
......
...@@ -69,10 +69,10 @@ static inline int tqReadLastPage(int fd, TqIdxPageBuf* pBuf) { ...@@ -69,10 +69,10 @@ static inline int tqReadLastPage(int fd, TqIdxPageBuf* pBuf) {
} }
TqMetaStore* tqStoreOpen(const char* path, TqMetaStore* tqStoreOpen(const char* path,
int serializer(const void* pObj, TqSerializedHead** ppHead), TqSerializeFun serializer,
const void* deserializer(const TqSerializedHead* pHead, void** ppObj), TqDeserializeFun deserializer,
void deleter(void* pObj), TqDeleteFun deleter,
int32_t tqConfigFlag int32_t tqConfigFlag
) { ) {
TqMetaStore* pMeta = malloc(sizeof(TqMetaStore)); TqMetaStore* pMeta = malloc(sizeof(TqMetaStore));
if(pMeta == NULL) { if(pMeta == NULL) {
...@@ -92,7 +92,7 @@ TqMetaStore* tqStoreOpen(const char* path, ...@@ -92,7 +92,7 @@ TqMetaStore* tqStoreOpen(const char* path,
char name[pathLen+10]; char name[pathLen+10];
strcpy(name, path); strcpy(name, path);
if(!taosDirExist(name) && !taosMkDir(name)) { if (taosDirExist(name) != 0 && taosMkDir(name) != 0) {
ASSERT(false); ASSERT(false);
} }
strcat(name, "/" TQ_IDX_NAME); strcat(name, "/" TQ_IDX_NAME);
...@@ -127,9 +127,9 @@ TqMetaStore* tqStoreOpen(const char* path, ...@@ -127,9 +127,9 @@ TqMetaStore* tqStoreOpen(const char* path,
pMeta->fileFd = fileFd; pMeta->fileFd = fileFd;
pMeta->serializer = serializer; pMeta->pSerializer = serializer;
pMeta->deserializer = deserializer; pMeta->pDeserializer = deserializer;
pMeta->deleter = deleter; pMeta->pDeleter = deleter;
pMeta->tqConfigFlag = tqConfigFlag; pMeta->tqConfigFlag = tqConfigFlag;
//read idx file and load into memory //read idx file and load into memory
...@@ -171,25 +171,25 @@ TqMetaStore* tqStoreOpen(const char* path, ...@@ -171,25 +171,25 @@ TqMetaStore* tqStoreOpen(const char* path,
} }
if(serializedObj->action == TQ_ACTION_INUSE) { if(serializedObj->action == TQ_ACTION_INUSE) {
if(serializedObj->ssize != sizeof(TqSerializedHead)) { if(serializedObj->ssize != sizeof(TqSerializedHead)) {
pMeta->deserializer(serializedObj, &pNode->handle.valueInUse); pMeta->pDeserializer(serializedObj, &pNode->handle.valueInUse);
} else { } else {
pNode->handle.valueInUse = TQ_DELETE_TOKEN; pNode->handle.valueInUse = TQ_DELETE_TOKEN;
} }
} else if(serializedObj->action == TQ_ACTION_INTXN) { } else if(serializedObj->action == TQ_ACTION_INTXN) {
if(serializedObj->ssize != sizeof(TqSerializedHead)) { if(serializedObj->ssize != sizeof(TqSerializedHead)) {
pMeta->deserializer(serializedObj, &pNode->handle.valueInTxn); pMeta->pDeserializer(serializedObj, &pNode->handle.valueInTxn);
} else { } else {
pNode->handle.valueInTxn = TQ_DELETE_TOKEN; pNode->handle.valueInTxn = TQ_DELETE_TOKEN;
} }
} else if(serializedObj->action == TQ_ACTION_INUSE_CONT) { } else if(serializedObj->action == TQ_ACTION_INUSE_CONT) {
if(serializedObj->ssize != sizeof(TqSerializedHead)) { if(serializedObj->ssize != sizeof(TqSerializedHead)) {
pMeta->deserializer(serializedObj, &pNode->handle.valueInUse); pMeta->pDeserializer(serializedObj, &pNode->handle.valueInUse);
} else { } else {
pNode->handle.valueInUse = TQ_DELETE_TOKEN; pNode->handle.valueInUse = TQ_DELETE_TOKEN;
} }
TqSerializedHead* ptr = POINTER_SHIFT(serializedObj, serializedObj->ssize); TqSerializedHead* ptr = POINTER_SHIFT(serializedObj, serializedObj->ssize);
if(ptr->ssize != sizeof(TqSerializedHead)) { if(ptr->ssize != sizeof(TqSerializedHead)) {
pMeta->deserializer(ptr, &pNode->handle.valueInTxn); pMeta->pDeserializer(ptr, &pNode->handle.valueInTxn);
} else { } else {
pNode->handle.valueInTxn = TQ_DELETE_TOKEN; pNode->handle.valueInTxn = TQ_DELETE_TOKEN;
} }
...@@ -225,11 +225,11 @@ TqMetaStore* tqStoreOpen(const char* path, ...@@ -225,11 +225,11 @@ TqMetaStore* tqStoreOpen(const char* path,
if(pBucketNode) { if(pBucketNode) {
if(pBucketNode->handle.valueInUse if(pBucketNode->handle.valueInUse
&& pBucketNode->handle.valueInUse != TQ_DELETE_TOKEN) { && pBucketNode->handle.valueInUse != TQ_DELETE_TOKEN) {
pMeta->deleter(pBucketNode->handle.valueInUse); pMeta->pDeleter(pBucketNode->handle.valueInUse);
} }
if(pBucketNode->handle.valueInTxn if(pBucketNode->handle.valueInTxn
&& pBucketNode->handle.valueInTxn != TQ_DELETE_TOKEN) { && pBucketNode->handle.valueInTxn != TQ_DELETE_TOKEN) {
pMeta->deleter(pBucketNode->handle.valueInTxn); pMeta->pDeleter(pBucketNode->handle.valueInTxn);
} }
free(pBucketNode); free(pBucketNode);
} }
...@@ -253,11 +253,11 @@ int32_t tqStoreClose(TqMetaStore* pMeta) { ...@@ -253,11 +253,11 @@ int32_t tqStoreClose(TqMetaStore* pMeta) {
ASSERT(pNode->unpersistPrev == NULL); ASSERT(pNode->unpersistPrev == NULL);
if(pNode->handle.valueInTxn if(pNode->handle.valueInTxn
&& pNode->handle.valueInTxn != TQ_DELETE_TOKEN) { && pNode->handle.valueInTxn != TQ_DELETE_TOKEN) {
pMeta->deleter(pNode->handle.valueInTxn); pMeta->pDeleter(pNode->handle.valueInTxn);
} }
if(pNode->handle.valueInUse if(pNode->handle.valueInUse
&& pNode->handle.valueInUse != TQ_DELETE_TOKEN) { && pNode->handle.valueInUse != TQ_DELETE_TOKEN) {
pMeta->deleter(pNode->handle.valueInUse); pMeta->pDeleter(pNode->handle.valueInUse);
} }
TqMetaList* next = pNode->next; TqMetaList* next = pNode->next;
free(pNode); free(pNode);
...@@ -280,11 +280,11 @@ int32_t tqStoreDelete(TqMetaStore* pMeta) { ...@@ -280,11 +280,11 @@ int32_t tqStoreDelete(TqMetaStore* pMeta) {
while(pNode) { while(pNode) {
if(pNode->handle.valueInTxn if(pNode->handle.valueInTxn
&& pNode->handle.valueInTxn != TQ_DELETE_TOKEN) { && pNode->handle.valueInTxn != TQ_DELETE_TOKEN) {
pMeta->deleter(pNode->handle.valueInTxn); pMeta->pDeleter(pNode->handle.valueInTxn);
} }
if(pNode->handle.valueInUse if(pNode->handle.valueInUse
&& pNode->handle.valueInUse != TQ_DELETE_TOKEN) { && pNode->handle.valueInUse != TQ_DELETE_TOKEN) {
pMeta->deleter(pNode->handle.valueInUse); pMeta->pDeleter(pNode->handle.valueInUse);
} }
TqMetaList* next = pNode->next; TqMetaList* next = pNode->next;
free(pNode); free(pNode);
...@@ -338,7 +338,7 @@ int32_t tqStorePersist(TqMetaStore* pMeta) { ...@@ -338,7 +338,7 @@ int32_t tqStorePersist(TqMetaStore* pMeta) {
if(pNode->handle.valueInUse == TQ_DELETE_TOKEN) { if(pNode->handle.valueInUse == TQ_DELETE_TOKEN) {
pSHead->ssize = sizeof(TqSerializedHead); pSHead->ssize = sizeof(TqSerializedHead);
} else { } else {
pMeta->serializer(pNode->handle.valueInUse, &pSHead); pMeta->pSerializer(pNode->handle.valueInUse, &pSHead);
} }
nBytes = write(pMeta->fileFd, pSHead, pSHead->ssize); nBytes = write(pMeta->fileFd, pSHead, pSHead->ssize);
ASSERT(nBytes == pSHead->ssize); ASSERT(nBytes == pSHead->ssize);
...@@ -349,7 +349,7 @@ int32_t tqStorePersist(TqMetaStore* pMeta) { ...@@ -349,7 +349,7 @@ int32_t tqStorePersist(TqMetaStore* pMeta) {
if(pNode->handle.valueInTxn == TQ_DELETE_TOKEN) { if(pNode->handle.valueInTxn == TQ_DELETE_TOKEN) {
pSHead->ssize = sizeof(TqSerializedHead); pSHead->ssize = sizeof(TqSerializedHead);
} else { } else {
pMeta->serializer(pNode->handle.valueInTxn, &pSHead); pMeta->pSerializer(pNode->handle.valueInTxn, &pSHead);
} }
int nBytesTxn = write(pMeta->fileFd, pSHead, pSHead->ssize); int nBytesTxn = write(pMeta->fileFd, pSHead, pSHead->ssize);
ASSERT(nBytesTxn == pSHead->ssize); ASSERT(nBytesTxn == pSHead->ssize);
...@@ -423,7 +423,7 @@ static int32_t tqHandlePutCommitted(TqMetaStore* pMeta, int64_t key, void* value ...@@ -423,7 +423,7 @@ static int32_t tqHandlePutCommitted(TqMetaStore* pMeta, int64_t key, void* value
//TODO: think about thread safety //TODO: think about thread safety
if(pNode->handle.valueInUse if(pNode->handle.valueInUse
&& pNode->handle.valueInUse != TQ_DELETE_TOKEN) { && pNode->handle.valueInUse != TQ_DELETE_TOKEN) {
pMeta->deleter(pNode->handle.valueInUse); pMeta->pDeleter(pNode->handle.valueInUse);
} }
//change pointer ownership //change pointer ownership
pNode->handle.valueInUse = value; pNode->handle.valueInUse = value;
...@@ -496,7 +496,7 @@ static inline int32_t tqHandlePutImpl(TqMetaStore* pMeta, int64_t key, void* val ...@@ -496,7 +496,7 @@ static inline int32_t tqHandlePutImpl(TqMetaStore* pMeta, int64_t key, void* val
return -2; return -2;
} }
if(pNode->handle.valueInTxn != TQ_DELETE_TOKEN) { if(pNode->handle.valueInTxn != TQ_DELETE_TOKEN) {
pMeta->deleter(pNode->handle.valueInTxn); pMeta->pDeleter(pNode->handle.valueInTxn);
} }
} }
pNode->handle.valueInTxn = value; pNode->handle.valueInTxn = value;
...@@ -562,7 +562,7 @@ int32_t tqHandleCommit(TqMetaStore* pMeta, int64_t key) { ...@@ -562,7 +562,7 @@ int32_t tqHandleCommit(TqMetaStore* pMeta, int64_t key) {
} }
if(pNode->handle.valueInUse if(pNode->handle.valueInUse
&& pNode->handle.valueInUse != TQ_DELETE_TOKEN) { && pNode->handle.valueInUse != TQ_DELETE_TOKEN) {
pMeta->deleter(pNode->handle.valueInUse); pMeta->pDeleter(pNode->handle.valueInUse);
} }
pNode->handle.valueInUse = pNode->handle.valueInTxn; pNode->handle.valueInUse = pNode->handle.valueInTxn;
pNode->handle.valueInTxn = NULL; pNode->handle.valueInTxn = NULL;
...@@ -582,7 +582,7 @@ int32_t tqHandleAbort(TqMetaStore* pMeta, int64_t key) { ...@@ -582,7 +582,7 @@ int32_t tqHandleAbort(TqMetaStore* pMeta, int64_t key) {
if(pNode->handle.key == key) { if(pNode->handle.key == key) {
if(pNode->handle.valueInTxn) { if(pNode->handle.valueInTxn) {
if(pNode->handle.valueInTxn != TQ_DELETE_TOKEN) { if(pNode->handle.valueInTxn != TQ_DELETE_TOKEN) {
pMeta->deleter(pNode->handle.valueInTxn); pMeta->pDeleter(pNode->handle.valueInTxn);
} }
pNode->handle.valueInTxn = NULL; pNode->handle.valueInTxn = NULL;
tqLinkUnpersist(pMeta, pNode); tqLinkUnpersist(pMeta, pNode);
...@@ -602,7 +602,7 @@ int32_t tqHandleDel(TqMetaStore* pMeta, int64_t key) { ...@@ -602,7 +602,7 @@ int32_t tqHandleDel(TqMetaStore* pMeta, int64_t key) {
while(pNode) { while(pNode) {
if(pNode->handle.valueInTxn != TQ_DELETE_TOKEN) { if(pNode->handle.valueInTxn != TQ_DELETE_TOKEN) {
if(pNode->handle.valueInTxn) { if(pNode->handle.valueInTxn) {
pMeta->deleter(pNode->handle.valueInTxn); pMeta->pDeleter(pNode->handle.valueInTxn);
} }
pNode->handle.valueInTxn = TQ_DELETE_TOKEN; pNode->handle.valueInTxn = TQ_DELETE_TOKEN;
tqLinkUnpersist(pMeta, pNode); tqLinkUnpersist(pMeta, pNode);
......
#include <gtest/gtest.h>
#include <cstring>
#include <iostream>
#include <queue>
#include "tq.h"
using namespace std;
TEST(TqSerializerTest, basicTest) {
TqGroupHandle* gHandle = (TqGroupHandle*)malloc(sizeof(TqGroupHandle));
}
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "sync.h" #include "sync.h"
#include "sync_type.h" #include "sync_type.h"
#include "thash.h"
#include "raft_message.h" #include "raft_message.h"
#include "sync_raft_impl.h" #include "sync_raft_impl.h"
#include "sync_raft_quorum.h" #include "sync_raft_quorum.h"
...@@ -43,9 +44,9 @@ struct SSyncRaft { ...@@ -43,9 +44,9 @@ struct SSyncRaft {
// owner sync node // owner sync node
SSyncNode* pNode; SSyncNode* pNode;
SSyncCluster cluster; // hash map nodeId -> SNodeInfo*
SHashObj* nodeInfoMap;
int selfIndex;
SyncNodeId selfId; SyncNodeId selfId;
SyncGroupId selfGroupId; SyncGroupId selfGroupId;
......
...@@ -39,8 +39,6 @@ struct SSyncRaftLog { ...@@ -39,8 +39,6 @@ struct SSyncRaftLog {
SyncIndex commitIndex; SyncIndex commitIndex;
SyncIndex appliedIndex; SyncIndex appliedIndex;
}; };
SSyncRaftLog* syncRaftLogOpen(); SSyncRaftLog* syncRaftLogOpen();
......
...@@ -20,11 +20,11 @@ ...@@ -20,11 +20,11 @@
#include "syncInt.h" #include "syncInt.h"
#include "sync_type.h" #include "sync_type.h"
// syncRaftReplicate sends an append RPC with new entries to the given peer, // syncRaftMaybeSendAppend sends an append RPC with new entries to the given peer,
// if necessary. Returns true if a message was sent. The sendIfEmpty // if necessary. Returns true if a message was sent. The sendIfEmpty
// argument controls whether messages with no entries will be sent // argument controls whether messages with no entries will be sent
// ("empty" messages are useful to convey updated Commit indexes, but // ("empty" messages are useful to convey updated Commit indexes, but
// are undesirable when we're sending multiple messages in a batch). // are undesirable when we're sending multiple messages in a batch).
bool syncRaftReplicate(SSyncRaft* pRaft, SSyncRaftProgress* progress, bool sendIfEmpty); bool syncRaftMaybeSendAppend(SSyncRaft* pRaft, SSyncRaftProgress* progress, bool sendIfEmpty);
#endif /* TD_SYNC_RAFT_REPLICATION_H */ #endif /* TD_SYNC_RAFT_REPLICATION_H */
...@@ -13,13 +13,13 @@ ...@@ -13,13 +13,13 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "raft_configuration.h" #ifndef _TD_LIBS_SYNC_CONST_H
#include "raft.h" #define _TD_LIBS_SYNC_CONST_H
int syncRaftConfigurationIndexOfNode(SSyncRaft *pRaft, SyncNodeId id) { #include "sync.h"
return (int)(id);
}
int syncRaftConfigurationVoterCount(SSyncRaft *pRaft) { static int kSyncRaftMaxInflghtMsgs = 20;
return pRaft->cluster.replica;
} static SyncIndex kMaxCommitIndex = UINT64_MAX;
\ No newline at end of file
#endif /* _TD_LIBS_SYNC_CONST_H */
...@@ -33,6 +33,11 @@ struct SSyncRaftChanger { ...@@ -33,6 +33,11 @@ struct SSyncRaftChanger {
typedef int (*configChangeFp)(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css, typedef int (*configChangeFp)(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
// Simple carries out a series of configuration changes that (in aggregate)
// mutates the incoming majority config Voters[0] by at most one. This method
// will return an error if that is not the case, if the resulting quorum is
// zero, or if the configuration is in a joint state (i.e. if there is an
// outgoing configuration).
int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css, int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
......
...@@ -28,6 +28,8 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft); ...@@ -28,6 +28,8 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft);
void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType); void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType);
void syncRaftCampaign(SSyncRaft* pRaft, ESyncRaftElectionType cType);
void syncRaftTriggerHeartbeat(SSyncRaft* pRaft); void syncRaftTriggerHeartbeat(SSyncRaft* pRaft);
void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft); void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft);
...@@ -51,4 +53,6 @@ void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState); ...@@ -51,4 +53,6 @@ void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState);
void syncRaftBroadcastAppend(SSyncRaft* pRaft); void syncRaftBroadcastAppend(SSyncRaft* pRaft);
SNodeInfo* syncRaftGetNodeById(SSyncRaft *pRaft, SyncNodeId id);
#endif /* _TD_LIBS_SYNC_RAFT_IMPL_H */ #endif /* _TD_LIBS_SYNC_RAFT_IMPL_H */
...@@ -18,54 +18,47 @@ ...@@ -18,54 +18,47 @@
#include "sync.h" #include "sync.h"
/** // Inflights limits the number of MsgApp (represented by the largest index
* SSyncRaftInflights limits the number of MsgApp (represented by the largest index // contained within) sent to followers but not yet acknowledged by them. Callers
* contained within) sent to followers but not yet acknowledged by them. Callers // use Full() to check whether more messages can be sent, call Add() whenever
* use syncRaftInflightFull() to check whether more messages can be sent, // they are sending a new append, and release "quota" via FreeLE() whenever an
* call syncRaftInflightAdd() whenever they are sending a new append, // ack is received.
* and release "quota" via FreeLE() whenever an ack is received.
**/
typedef struct SSyncRaftInflights { typedef struct SSyncRaftInflights {
/* the starting index in the buffer */ // the starting index in the buffer
int start; int start;
/* number of inflights in the buffer */ // number of inflights in the buffer
int count; int count;
/* the size of the buffer */ // the size of the buffer
int size; int size;
/** // buffer contains the index of the last entry
* buffer contains the index of the last entry // inside one message.
* inside one message.
**/
SyncIndex* buffer; SyncIndex* buffer;
} SSyncRaftInflights; } SSyncRaftInflights;
SSyncRaftInflights* syncRaftOpenInflights(int size); SSyncRaftInflights* syncRaftOpenInflights(int size);
void syncRaftCloseInflights(SSyncRaftInflights*); void syncRaftCloseInflights(SSyncRaftInflights*);
// reset frees all inflights.
static FORCE_INLINE void syncRaftInflightReset(SSyncRaftInflights* inflights) { static FORCE_INLINE void syncRaftInflightReset(SSyncRaftInflights* inflights) {
inflights->count = 0; inflights->count = 0;
inflights->start = 0; inflights->start = 0;
} }
// Full returns true if no more messages can be sent at the moment.
static FORCE_INLINE bool syncRaftInflightFull(SSyncRaftInflights* inflights) { static FORCE_INLINE bool syncRaftInflightFull(SSyncRaftInflights* inflights) {
return inflights->count == inflights->size; return inflights->count == inflights->size;
} }
/** // Add notifies the Inflights that a new message with the given index is being
* syncRaftInflightAdd notifies the Inflights that a new message with the given index is being // dispatched. Full() must be called prior to Add() to verify that there is room
* dispatched. syncRaftInflightFull() must be called prior to syncRaftInflightAdd() // for one more message, and consecutive calls to add Add() must provide a
* to verify that there is room for one more message, // monotonic sequence of indexes.
* and consecutive calls to add syncRaftInflightAdd() must provide a
* monotonic sequence of indexes.
**/
void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex); void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex);
/** // FreeLE frees the inflights smaller or equal to the given `to` flight.
* syncRaftInflightFreeLE frees the inflights smaller or equal to the given `to` flight.
**/
void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex); void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex);
/** /**
......
...@@ -13,15 +13,37 @@ ...@@ -13,15 +13,37 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef _TD_LIBS_SYNC_RAFT_CONFIGURATION_H #ifndef _TD_LIBS_SYNC_RAFT_NODE_MAP_H
#define _TD_LIBS_SYNC_RAFT_CONFIGURATION_H #define _TD_LIBS_SYNC_RAFT_NODE_MAP_H
#include "thash.h"
#include "sync.h" #include "sync.h"
#include "sync_type.h" #include "sync_type.h"
// return -1 if cannot find this id struct SSyncRaftNodeMap {
int syncRaftConfigurationIndexOfNode(SSyncRaft *pRaft, SyncNodeId id); SHashObj* nodeIdMap;
};
int syncRaftConfigurationVoterCount(SSyncRaft *pRaft); void syncRaftInitNodeMap(SSyncRaftNodeMap* nodeMap);
void syncRaftFreeNodeMap(SSyncRaftNodeMap* nodeMap);
#endif /* _TD_LIBS_SYNC_RAFT_CONFIGURATION_H */ void syncRaftClearNodeMap(SSyncRaftNodeMap* nodeMap);
\ No newline at end of file
bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId);
void syncRaftCopyNodeMap(SSyncRaftNodeMap* from, SSyncRaftNodeMap* to);
void syncRaftUnionNodeMap(SSyncRaftNodeMap* nodeMap, SSyncRaftNodeMap* to);
void syncRaftAddToNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId);
void syncRaftRemoveFromNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId);
int32_t syncRaftNodeMapSize(const SSyncRaftNodeMap* nodeMap);
// return true if reach the end
bool syncRaftIterateNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId *pId);
bool syncRaftIsAllNodeInProgressMap(SSyncRaftNodeMap* nodeMap, SSyncRaftProgressMap* progressMap);
#endif /* _TD_LIBS_SYNC_RAFT_NODE_MAP_H */
\ No newline at end of file
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "sync_type.h" #include "sync_type.h"
#include "sync_raft_inflights.h" #include "sync_raft_inflights.h"
#include "thash.h"
/** /**
* State defines how the leader should interact with the follower. * State defines how the leader should interact with the follower.
...@@ -64,141 +65,123 @@ static const char* kProgressStateString[] = { ...@@ -64,141 +65,123 @@ static const char* kProgressStateString[] = {
"Snapshot", "Snapshot",
}; };
/** // Progress represents a follower’s progress in the view of the leader. Leader
* Progress represents a follower’s progress in the view of the leader. Leader maintains // maintains progresses of all followers, and sends entries to the follower
* progresses of all followers, and sends entries to the follower based on its progress. // based on its progress.
**/ //
// NB(tbg): Progress is basically a state machine whose transitions are mostly
// strewn around `*raft.raft`. Additionally, some fields are only used when in a
// certain State. All of this isn't ideal.
struct SSyncRaftProgress { struct SSyncRaftProgress {
// index in raft cluster config SyncGroupId groupId;
int selfIndex;
SyncNodeId id; SyncNodeId id;
int16_t refCount;
SyncIndex nextIndex; SyncIndex nextIndex;
SyncIndex matchIndex; SyncIndex matchIndex;
/** // State defines how the leader should interact with the follower.
* State defines how the leader should interact with the follower. //
* // When in StateProbe, leader sends at most one replication message
* When in StateProbe, leader sends at most one replication message // per heartbeat interval. It also probes actual progress of the follower.
* per heartbeat interval. It also probes actual progress of the follower. //
* // When in StateReplicate, leader optimistically increases next
* When in StateReplicate, leader optimistically increases next // to the latest entry sent after sending replication message. This is
* to the latest entry sent after sending replication message. This is // an optimized state for fast replicating log entries to the follower.
* an optimized state for fast replicating log entries to the follower. //
* // When in StateSnapshot, leader should have sent out snapshot
* When in StateSnapshot, leader should have sent out snapshot // before and stops sending any replication message.
* before and stops sending any replication message.
**/
ESyncRaftProgressState state; ESyncRaftProgressState state;
/** // PendingSnapshot is used in StateSnapshot.
* pendingSnapshotIndex is used in PROGRESS_STATE_SNAPSHOT. // If there is a pending snapshot, the pendingSnapshot will be set to the
* If there is a pending snapshot, the pendingSnapshotIndex will be set to the // index of the snapshot. If pendingSnapshot is set, the replication process of
* index of the snapshot. If pendingSnapshotIndex is set, the replication process of // this Progress will be paused. raft will not resend snapshot until the pending one
* this Progress will be paused. raft will not resend snapshot until the pending one // is reported to be failed.
* is reported to be failed.
**/
SyncIndex pendingSnapshotIndex; SyncIndex pendingSnapshotIndex;
/** // RecentActive is true if the progress is recently active. Receiving any messages
* recentActive is true if the progress is recently active. Receiving any messages // from the corresponding follower indicates the progress is active.
* from the corresponding follower indicates the progress is active. // RecentActive can be reset to false after an election timeout.
* RecentActive can be reset to false after an election timeout. //
**/ // TODO(tbg): the leader should always have this set to true.
bool recentActive; bool recentActive;
/** // ProbeSent is used while this follower is in StateProbe. When ProbeSent is
* probeSent is used while this follower is in StateProbe. When probeSent is // true, raft should pause sending replication message to this peer until
* true, raft should pause sending replication message to this peer until // ProbeSent is reset. See ProbeAcked() and IsPaused().
* probeSent is reset. See ProbeAcked() and IsPaused().
**/
bool probeSent; bool probeSent;
/** // Inflights is a sliding window for the inflight messages.
* inflights is a sliding window for the inflight messages. // Each inflight message contains one or more log entries.
* Each inflight message contains one or more log entries. // The max number of entries per message is defined in raft config as MaxSizePerMsg.
* The max number of entries per message is defined in raft config as MaxSizePerMsg. // Thus inflight effectively limits both the number of inflight messages
* Thus inflight effectively limits both the number of inflight messages // and the bandwidth each Progress can use.
* and the bandwidth each Progress can use. // When inflights is Full, no more message should be sent.
* When inflights is Full, no more message should be sent. // When a leader sends out a message, the index of the last
* When a leader sends out a message, the index of the last // entry should be added to inflights. The index MUST be added
* entry should be added to inflights. The index MUST be added // into inflights in order.
* into inflights in order. // When a leader receives a reply, the previous inflights should
* When a leader receives a reply, the previous inflights should // be freed by calling inflights.FreeLE with the index of the last
* be freed by calling inflights.FreeLE with the index of the last // received entry.
* received entry.
**/
SSyncRaftInflights* inflights; SSyncRaftInflights* inflights;
/** // IsLearner is true if this progress is tracked for a learner.
* IsLearner is true if this progress is tracked for a learner.
**/
bool isLearner; bool isLearner;
}; };
struct SSyncRaftProgressMap { struct SSyncRaftProgressMap {
SSyncRaftProgress progress[TSDB_MAX_REPLICA]; // map nodeId -> SSyncRaftProgress*
SHashObj* progressMap;
}; };
static FORCE_INLINE const char* syncRaftProgressStateString(const SSyncRaftProgress* progress) { static FORCE_INLINE const char* syncRaftProgressStateString(const SSyncRaftProgress* progress) {
return kProgressStateString[progress->state]; return kProgressStateString[progress->state];
} }
void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress); void syncRaftResetProgress(SSyncRaft* pRaft, SSyncRaftProgress* progress);
/** // BecomeProbe transitions into StateProbe. Next is reset to Match+1 or,
* syncRaftProgressBecomeProbe transitions into StateProbe. Next is reset to Match+1 or, // optionally and if larger, the index of the pending snapshot.
* optionally and if larger, the index of the pending snapshot.
**/
void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress); void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress);
/** // BecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
* syncRaftProgressBecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
**/
void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress); void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress);
/** // MaybeUpdate is called when an MsgAppResp arrives from the follower, with the
* syncRaftProgressMaybeUpdate is called when an MsgAppResp arrives from the follower, with the // index acked by it. The method returns false if the given n index comes from
* index acked by it. The method returns false if the given n index comes from // an outdated message. Otherwise it updates the progress and returns true.
* an outdated message. Otherwise it updates the progress and returns true.
**/
bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex); bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex);
/** // OptimisticUpdate signals that appends all the way up to and including index n
* syncRaftProgressOptimisticNextIndex signals that appends all the way up to and including index n // are in-flight. As a result, Next is increased to n+1.
* are in-flight. As a result, Next is increased to n+1.
**/
static FORCE_INLINE void syncRaftProgressOptimisticNextIndex(SSyncRaftProgress* progress, SyncIndex nextIndex) { static FORCE_INLINE void syncRaftProgressOptimisticNextIndex(SSyncRaftProgress* progress, SyncIndex nextIndex) {
progress->nextIndex = nextIndex + 1; progress->nextIndex = nextIndex + 1;
} }
/** // MaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The
* syncRaftProgressMaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The // arguments are the index of the append message rejected by the follower, and
* arguments are the index of the append message rejected by the follower, and // the hint that we want to decrease to.
* the hint that we want to decrease to. //
* // Rejections can happen spuriously as messages are sent out of order or
* Rejections can happen spuriously as messages are sent out of order or // duplicated. In such cases, the rejection pertains to an index that the
* duplicated. In such cases, the rejection pertains to an index that the // Progress already knows were previously acknowledged, and false is returned
* Progress already knows were previously acknowledged, and false is returned // without changing the Progress.
* without changing the Progress. //
* // If the rejection is genuine, Next is lowered sensibly, and the Progress is
* If the rejection is genuine, Next is lowered sensibly, and the Progress is // cleared for sending log entries.
* cleared for sending log entries.
**/
bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress, bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress,
SyncIndex rejected, SyncIndex matchHint); SyncIndex rejected, SyncIndex matchHint);
/** // IsPaused returns whether sending log entries to this node has been throttled.
* syncRaftProgressIsPaused returns whether sending log entries to this node has been throttled. // This is done when a node has rejected recent MsgApps, is currently waiting
* This is done when a node has rejected recent MsgApps, is currently waiting // for a snapshot, or has reached the MaxInflightMsgs limit. In normal
* for a snapshot, or has reached the MaxInflightMsgs limit. In normal // operation, this is false. A throttled node will be contacted less frequently
* operation, this is false. A throttled node will be contacted less frequently // until it has reached a state in which it's able to accept a steady stream of
* until it has reached a state in which it's able to accept a steady stream of // log entries again.
* log entries again.
**/
bool syncRaftProgressIsPaused(SSyncRaftProgress* progress); bool syncRaftProgressIsPaused(SSyncRaftProgress* progress);
static FORCE_INLINE SyncIndex syncRaftProgressNextIndex(SSyncRaftProgress* progress) { static FORCE_INLINE SyncIndex syncRaftProgressNextIndex(SSyncRaftProgress* progress) {
...@@ -221,22 +204,35 @@ static FORCE_INLINE bool syncRaftProgressRecentActive(SSyncRaftProgress* progres ...@@ -221,22 +204,35 @@ static FORCE_INLINE bool syncRaftProgressRecentActive(SSyncRaftProgress* progres
return progress->recentActive; return progress->recentActive;
} }
int syncRaftFindProgressIndexByNodeId(const SSyncRaftProgressMap* progressMap, SyncNodeId id); void syncRaftInitProgressMap(SSyncRaftProgressMap* progressMap);
void syncRaftFreeProgressMap(SSyncRaftProgressMap* progressMap);
void syncRaftClearProgressMap(SSyncRaftProgressMap* progressMap);
void syncRaftCopyProgressMap(SSyncRaftProgressMap* from, SSyncRaftProgressMap* to);
SSyncRaftProgress* syncRaftFindProgressByNodeId(const SSyncRaftProgressMap* progressMap, SyncNodeId id);
int syncRaftAddToProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id); int syncRaftAddToProgressMap(SSyncRaftProgressMap* progressMap, SSyncRaftProgress* progress);
void syncRaftRemoveFromProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id); void syncRaftRemoveFromProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id);
bool syncRaftIsInProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id);
/** /**
* return true if progress's log is up-todate * return true if progress's log is up-todate
**/ **/
bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress); bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress);
// BecomeSnapshot moves the Progress to StateSnapshot with the specified pending
// snapshot index.
void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex); void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex);
void syncRaftCopyProgress(const SSyncRaftProgress* from, SSyncRaftProgress* to); void syncRaftCopyProgress(const SSyncRaftProgress* from, SSyncRaftProgress* to);
void syncRaftProgressMapCopy(const SSyncRaftProgressMap* from, SSyncRaftProgressMap* to); // return true if reach the end
bool syncRaftIterateProgressMap(const SSyncRaftProgressMap* progressMap, SSyncRaftProgress *pProgress);
bool syncRaftVisitProgressMap(SSyncRaftProgressMap* progressMap, visitProgressFp fp, void* arg);
#if 0 #if 0
......
...@@ -21,7 +21,9 @@ ...@@ -21,7 +21,9 @@
#include "sync_raft_quorum_joint.h" #include "sync_raft_quorum_joint.h"
#include "sync_raft_progress.h" #include "sync_raft_progress.h"
#include "sync_raft_proto.h" #include "sync_raft_proto.h"
#include "thash.h"
// Config reflects the configuration tracked in a ProgressTracker.
struct SSyncRaftProgressTrackerConfig { struct SSyncRaftProgressTrackerConfig {
SSyncRaftQuorumJointConfig voters; SSyncRaftQuorumJointConfig voters;
...@@ -83,34 +85,47 @@ struct SSyncRaftProgressTracker { ...@@ -83,34 +85,47 @@ struct SSyncRaftProgressTracker {
SSyncRaftProgressMap progressMap; SSyncRaftProgressMap progressMap;
ESyncRaftVoteType votes[TSDB_MAX_REPLICA]; // nodeid -> ESyncRaftVoteType map
SHashObj* votesMap;
int maxInflightMsgs; int maxInflightMsgs;
SSyncRaft* pRaft;
}; };
SSyncRaftProgressTracker* syncRaftOpenProgressTracker(); SSyncRaftProgressTracker* syncRaftOpenProgressTracker(SSyncRaft* pRaft);
void syncRaftInitTrackConfig(SSyncRaftProgressTrackerConfig* config);
void syncRaftFreeTrackConfig(SSyncRaftProgressTrackerConfig* config);
void syncRaftFreeTrackConfig(SSyncRaftProgressTrackerConfig* config);
// ResetVotes prepares for a new round of vote counting via recordVote.
void syncRaftResetVotes(SSyncRaftProgressTracker*); void syncRaftResetVotes(SSyncRaftProgressTracker*);
typedef void (*visitProgressFp)(int i, SSyncRaftProgress* progress, void* arg);
void syncRaftProgressVisit(SSyncRaftProgressTracker*, visitProgressFp visit, void* arg); void syncRaftProgressVisit(SSyncRaftProgressTracker*, visitProgressFp visit, void* arg);
/** // RecordVote records that the node with the given id voted for this Raft
* syncRaftRecordVote records that the node with the given id voted for this Raft // instance if v == true (and declined it otherwise).
* instance if v == true (and declined it otherwise). void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, SyncNodeId id, bool grant);
**/
void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, int i, bool grant);
void syncRaftCloneTrackerConfig(const SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressTrackerConfig* result); void syncRaftCopyTrackerConfig(const SSyncRaftProgressTrackerConfig* from, SSyncRaftProgressTrackerConfig* to);
int syncRaftCheckProgress(const SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); int syncRaftCheckTrackerConfigInProgress(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
/** // TallyVotes returns the number of granted and rejected Votes, and whether the
* syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the // election outcome is known.
* election outcome is known.
**/
ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* rejected, int *granted); ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* rejected, int *granted);
void syncRaftConfigState(const SSyncRaftProgressTracker* tracker, SSyncConfigState* cs); void syncRaftConfigState(SSyncRaftProgressTracker* tracker, SSyncConfigState* cs);
// Committed returns the largest log index known to be committed based on what
// the voting members of the group have acknowledged.
SyncIndex syncRaftCommittedIndex(SSyncRaftProgressTracker* tracker);
// QuorumActive returns true if the quorum is active from the view of the local
// raft state machine. Otherwise, it returns false.
bool syncRaftQuorumActive(SSyncRaftProgressTracker* tracker);
bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId); bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#define TD_SYNC_RAFT_PROTO_H #define TD_SYNC_RAFT_PROTO_H
#include "sync_type.h" #include "sync_type.h"
#include "sync_raft_node_map.h"
typedef enum ESyncRaftConfChangeType { typedef enum ESyncRaftConfChangeType {
SYNC_RAFT_Conf_AddNode = 0, SYNC_RAFT_Conf_AddNode = 0,
...@@ -58,4 +59,19 @@ typedef struct SSyncConfigState { ...@@ -58,4 +59,19 @@ typedef struct SSyncConfigState {
bool autoLeave; bool autoLeave;
} SSyncConfigState; } SSyncConfigState;
static FORCE_INLINE bool syncRaftConfArrayIsEmpty(const SSyncConfChangeSingleArray* ary) {
return ary->n == 0;
}
static FORCE_INLINE void syncRaftInitConfArray(SSyncConfChangeSingleArray* ary) {
*ary = (SSyncConfChangeSingleArray) {
.changes = NULL,
.n = 0,
};
}
static FORCE_INLINE void syncRaftFreeConfArray(SSyncConfChangeSingleArray* ary) {
if (ary->changes != NULL) free(ary->changes);
}
#endif /* TD_SYNC_RAFT_PROTO_H */ #endif /* TD_SYNC_RAFT_PROTO_H */
...@@ -19,24 +19,31 @@ ...@@ -19,24 +19,31 @@
#include "taosdef.h" #include "taosdef.h"
#include "sync.h" #include "sync.h"
#include "sync_type.h" #include "sync_type.h"
#include "sync_raft_node_map.h"
#include "thash.h"
/** // JointConfig is a configuration of two groups of (possibly overlapping)
* SSyncRaftQuorumJointConfig is a configuration of two groups of (possibly overlapping) // majority configurations. Decisions require the support of both majorities.
* majority configurations. Decisions require the support of both majorities.
**/
typedef struct SSyncRaftQuorumJointConfig { typedef struct SSyncRaftQuorumJointConfig {
SSyncRaftNodeMap outgoing; SSyncRaftNodeMap outgoing;
SSyncRaftNodeMap incoming; SSyncRaftNodeMap incoming;
} SSyncRaftQuorumJointConfig; } SSyncRaftQuorumJointConfig;
/** // IDs returns a newly initialized map representing the set of voters present
* syncRaftVoteResult takes a mapping of voters to yes/no (true/false) votes and returns // in the joint configuration.
* a result indicating whether the vote is pending, lost, or won. A joint quorum void syncRaftJointConfigIDs(SSyncRaftQuorumJointConfig* config, SSyncRaftNodeMap* nodeMap);
* requires both majority quorums to vote in favor.
**/
ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteType* votes);
bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId); // CommittedIndex returns the largest committed index for the given joint
// quorum. An index is jointly committed if it is committed in both constituent
// majorities.
SyncIndex syncRaftJointConfigCommittedIndex(const SSyncRaftQuorumJointConfig* config, matchAckIndexerFp indexer, void* arg);
// VoteResult takes a mapping of voters to yes/no (true/false) votes and returns
// a result indicating whether the vote is pending, lost, or won. A joint quorum
// requires both majority quorums to vote in favor.
ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, SHashObj* votesMap);
void syncRaftInitQuorumJointConfig(SSyncRaftQuorumJointConfig* config);
static FORCE_INLINE bool syncRaftJointConfigInOutgoing(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) { static FORCE_INLINE bool syncRaftJointConfigInOutgoing(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
return syncRaftIsInNodeMap(&config->outgoing, id); return syncRaftIsInNodeMap(&config->outgoing, id);
...@@ -59,7 +66,19 @@ static FORCE_INLINE const SSyncRaftNodeMap* syncRaftJointConfigOutgoing(const SS ...@@ -59,7 +66,19 @@ static FORCE_INLINE const SSyncRaftNodeMap* syncRaftJointConfigOutgoing(const SS
} }
static FORCE_INLINE void syncRaftJointConfigClearOutgoing(SSyncRaftQuorumJointConfig* config) { static FORCE_INLINE void syncRaftJointConfigClearOutgoing(SSyncRaftQuorumJointConfig* config) {
memset(&config->outgoing, 0, sizeof(SSyncCluster)); syncRaftClearNodeMap(&config->outgoing);
}
static FORCE_INLINE bool syncRaftJointConfigIsIncomingEmpty(const SSyncRaftQuorumJointConfig* config) {
return syncRaftNodeMapSize(&config->incoming) == 0;
}
static FORCE_INLINE bool syncRaftJointConfigIsOutgoingEmpty(const SSyncRaftQuorumJointConfig* config) {
return syncRaftNodeMapSize(&config->outgoing) == 0;
}
static FORCE_INLINE bool syncRaftJointConfigIsInOutgoing(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
return syncRaftIsInNodeMap(&config->outgoing, id);
} }
#endif /* _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H */ #endif /* _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H */
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "sync.h" #include "sync.h"
#include "sync_type.h" #include "sync_type.h"
#include "sync_raft_quorum.h" #include "sync_raft_quorum.h"
#include "thash.h"
/** /**
* syncRaftMajorityVoteResult takes a mapping of voters to yes/no (true/false) votes and returns * syncRaftMajorityVoteResult takes a mapping of voters to yes/no (true/false) votes and returns
...@@ -26,6 +27,10 @@ ...@@ -26,6 +27,10 @@
* yes/no has been reached), won (a quorum of yes has been reached), or lost (a * yes/no has been reached), won (a quorum of yes has been reached), or lost (a
* quorum of no has been reached). * quorum of no has been reached).
**/ **/
ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncRaftNodeMap* config, const ESyncRaftVoteType* votes); ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncRaftNodeMap* config, SHashObj* votesMap);
// CommittedIndex computes the committed index from those supplied via the
// provided AckedIndexer (for the active config).
SyncIndex syncRaftMajorityConfigCommittedIndex(const SSyncRaftNodeMap* config, matchAckIndexerFp indexer, void* arg);
#endif /* _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H */ #endif /* _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H */
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
// the Changer only needs a ProgressMap (not a whole Tracker) at which point // the Changer only needs a ProgressMap (not a whole Tracker) at which point
// this can just take LastIndex and MaxInflight directly instead and cook up // this can just take LastIndex and MaxInflight directly instead and cook up
// the results from that alone. // the results from that alone.
int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs); int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs,
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
#endif /* TD_SYNC_RAFT_RESTORE_H */ #endif /* TD_SYNC_RAFT_RESTORE_H */
...@@ -32,6 +32,8 @@ typedef struct SSyncRaftProgress SSyncRaftProgress; ...@@ -32,6 +32,8 @@ typedef struct SSyncRaftProgress SSyncRaftProgress;
typedef struct SSyncRaftProgressMap SSyncRaftProgressMap; typedef struct SSyncRaftProgressMap SSyncRaftProgressMap;
typedef struct SSyncRaftProgressTrackerConfig SSyncRaftProgressTrackerConfig; typedef struct SSyncRaftProgressTrackerConfig SSyncRaftProgressTrackerConfig;
typedef struct SSyncRaftNodeMap SSyncRaftNodeMap;
typedef struct SSyncRaftProgressTracker SSyncRaftProgressTracker; typedef struct SSyncRaftProgressTracker SSyncRaftProgressTracker;
typedef struct SSyncRaftChanger SSyncRaftChanger; typedef struct SSyncRaftChanger SSyncRaftChanger;
...@@ -68,11 +70,6 @@ typedef struct SSyncClusterConfig { ...@@ -68,11 +70,6 @@ typedef struct SSyncClusterConfig {
const SSyncCluster* cluster; const SSyncCluster* cluster;
} SSyncClusterConfig; } SSyncClusterConfig;
typedef struct {
int32_t replica;
SyncNodeId nodeId[TSDB_MAX_REPLICA];
} SSyncRaftNodeMap;
typedef enum { typedef enum {
SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0, SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0,
SYNC_RAFT_CAMPAIGN_ELECTION = 1, SYNC_RAFT_CAMPAIGN_ELECTION = 1,
...@@ -80,9 +77,6 @@ typedef enum { ...@@ -80,9 +77,6 @@ typedef enum {
} ESyncRaftElectionType; } ESyncRaftElectionType;
typedef enum { typedef enum {
// the init vote resp status
SYNC_RAFT_VOTE_RESP_UNKNOWN = 0,
// grant the vote request // grant the vote request
SYNC_RAFT_VOTE_RESP_GRANT = 1, SYNC_RAFT_VOTE_RESP_GRANT = 1,
...@@ -90,4 +84,8 @@ typedef enum { ...@@ -90,4 +84,8 @@ typedef enum {
SYNC_RAFT_VOTE_RESP_REJECT = 2, SYNC_RAFT_VOTE_RESP_REJECT = 2,
} ESyncRaftVoteType; } ESyncRaftVoteType;
typedef void (*visitProgressFp)(SSyncRaftProgress* progress, void* arg);
typedef void (*matchAckIndexerFp)(SyncNodeId id, void* arg, SyncIndex* index);
#endif /* _TD_LIBS_SYNC_TYPE_H */ #endif /* _TD_LIBS_SYNC_TYPE_H */
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
*/ */
#include "raft.h" #include "raft.h"
#include "raft_configuration.h" #include "sync_raft_impl.h"
#include "raft_log.h" #include "raft_log.h"
#include "sync_raft_restore.h" #include "sync_raft_restore.h"
#include "raft_replication.h" #include "raft_replication.h"
...@@ -59,8 +59,13 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { ...@@ -59,8 +59,13 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) {
logStore = &(pRaft->logStore); logStore = &(pRaft->logStore);
fsm = &(pRaft->fsm); fsm = &(pRaft->fsm);
pRaft->nodeInfoMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
if (pRaft->nodeInfoMap == NULL) {
return -1;
}
// init progress tracker // init progress tracker
pRaft->tracker = syncRaftOpenProgressTracker(); pRaft->tracker = syncRaftOpenProgressTracker(pRaft);
if (pRaft->tracker == NULL) { if (pRaft->tracker == NULL) {
return -1; return -1;
} }
...@@ -96,11 +101,22 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { ...@@ -96,11 +101,22 @@ int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) {
.tracker = pRaft->tracker, .tracker = pRaft->tracker,
.lastIndex = syncRaftLogLastIndex(pRaft->log), .lastIndex = syncRaftLogLastIndex(pRaft->log),
}; };
if (syncRaftRestoreConfig(&changer, &confState) < 0) { SSyncRaftProgressTrackerConfig config;
SSyncRaftProgressMap progressMap;
if (syncRaftRestoreConfig(&changer, &confState, &config, &progressMap) < 0) {
syncError("syncRaftRestoreConfig for vgid %d fail", pInfo->vgId); syncError("syncRaftRestoreConfig for vgid %d fail", pInfo->vgId);
return -1; return -1;
} }
// save restored config and progress map to tracker
syncRaftCopyProgressMap(&progressMap, &pRaft->tracker->progressMap);
syncRaftCopyTrackerConfig(&config, &pRaft->tracker->config);
// free progress map and config
syncRaftFreeProgressMap(&progressMap);
syncRaftFreeTrackConfig(&config);
if (!syncRaftIsEmptyServerState(&serverState)) { if (!syncRaftIsEmptyServerState(&serverState)) {
syncRaftLoadState(pRaft, &serverState); syncRaftLoadState(pRaft, &serverState);
} }
...@@ -140,6 +156,7 @@ int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) { ...@@ -140,6 +156,7 @@ int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
int32_t syncRaftTick(SSyncRaft* pRaft) { int32_t syncRaftTick(SSyncRaft* pRaft) {
pRaft->currentTick += 1; pRaft->currentTick += 1;
pRaft->tickFp(pRaft);
return 0; return 0;
} }
...@@ -151,8 +168,8 @@ static int deserializeClusterStateFromBuffer(SSyncConfigState* cluster, const ch ...@@ -151,8 +168,8 @@ static int deserializeClusterStateFromBuffer(SSyncConfigState* cluster, const ch
return 0; return 0;
} }
static void visitProgressMaybeSendAppend(int i, SSyncRaftProgress* progress, void* arg) { static void visitProgressMaybeSendAppend(SSyncRaftProgress* progress, void* arg) {
syncRaftReplicate(arg, progress, false); syncRaftMaybeSendAppend(arg, progress, false);
} }
// switchToConfig reconfigures this node to use the provided configuration. It // switchToConfig reconfigures this node to use the provided configuration. It
...@@ -169,13 +186,12 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi ...@@ -169,13 +186,12 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi
SSyncRaftProgress* progress = NULL; SSyncRaftProgress* progress = NULL;
syncRaftConfigState(pRaft->tracker, cs); syncRaftConfigState(pRaft->tracker, cs);
i = syncRaftFindProgressIndexByNodeId(&pRaft->tracker->progressMap, selfId); progress = syncRaftFindProgressByNodeId(&pRaft->tracker->progressMap, selfId);
exist = (i != -1); exist = (progress != NULL);
// Update whether the node itself is a learner, resetting to false when the // Update whether the node itself is a learner, resetting to false when the
// node is removed. // node is removed.
if (exist) { if (exist) {
progress = &pRaft->tracker->progressMap.progress[i];
pRaft->isLearner = progress->isLearner; pRaft->isLearner = progress->isLearner;
} else { } else {
pRaft->isLearner = false; pRaft->isLearner = false;
...@@ -196,7 +212,7 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi ...@@ -196,7 +212,7 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi
// The remaining steps only make sense if this node is the leader and there // The remaining steps only make sense if this node is the leader and there
// are other nodes. // are other nodes.
if (pRaft->state != TAOS_SYNC_STATE_LEADER || cs->voters.replica == 0) { if (pRaft->state != TAOS_SYNC_STATE_LEADER || syncRaftNodeMapSize(&cs->voters) == 0) {
return; return;
} }
...@@ -212,8 +228,11 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi ...@@ -212,8 +228,11 @@ static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfi
// If the the leadTransferee was removed or demoted, abort the leadership transfer. // If the the leadTransferee was removed or demoted, abort the leadership transfer.
SyncNodeId leadTransferee = pRaft->leadTransferee; SyncNodeId leadTransferee = pRaft->leadTransferee;
if (leadTransferee != SYNC_NON_NODE_ID && !syncRaftIsInNodeMap(&pRaft->tracker->config.voters, leadTransferee)) { if (leadTransferee != SYNC_NON_NODE_ID) {
abortLeaderTransfer(pRaft); if (!syncRaftIsInNodeMap(&pRaft->tracker->config.voters.incoming, leadTransferee) &&
!syncRaftIsInNodeMap(&pRaft->tracker->config.voters.outgoing, leadTransferee)) {
abortLeaderTransfer(pRaft);
}
} }
} }
} }
...@@ -286,8 +305,8 @@ static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) ...@@ -286,8 +305,8 @@ static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg)
* but it will not receive MsgApp or MsgHeartbeat, so it will not create * but it will not receive MsgApp or MsgHeartbeat, so it will not create
* disruptive term increases * disruptive term increases
**/ **/
int peerIndex = syncRaftConfigurationIndexOfNode(pRaft, pMsg->from); SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from);
if (peerIndex < 0) { if (pNode == NULL) {
return true; return true;
} }
SSyncMessage* msg = syncNewEmptyAppendRespMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term); SSyncMessage* msg = syncNewEmptyAppendRespMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term);
...@@ -295,7 +314,7 @@ static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) ...@@ -295,7 +314,7 @@ static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg)
return true; return true;
} }
pRaft->io.send(msg, &(pRaft->cluster.nodeInfo[peerIndex])); pRaft->io.send(msg, pNode);
} else { } else {
// ignore other cases // ignore other cases
syncInfo("[%d:%d] [term:%" PRId64 "] ignored a %d message with lower term from %d [term:%" PRId64 "]", syncInfo("[%d:%d] [term:%" PRId64 "] ignored a %d message with lower term from %d [term:%" PRId64 "]",
......
...@@ -16,15 +16,14 @@ ...@@ -16,15 +16,14 @@
#include "syncInt.h" #include "syncInt.h"
#include "raft.h" #include "raft.h"
#include "raft_log.h" #include "raft_log.h"
#include "raft_configuration.h" #include "sync_raft_impl.h"
#include "raft_message.h" #include "raft_message.h"
int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
const RaftMsg_Append_Entries *appendEntries = &(pMsg->appendEntries); const RaftMsg_Append_Entries *appendEntries = &(pMsg->appendEntries);
int peerIndex = syncRaftConfigurationIndexOfNode(pRaft, pMsg->from); SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from);
if (pNode == NULL) {
if (peerIndex < 0) {
return 0; return 0;
} }
...@@ -44,6 +43,6 @@ int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMs ...@@ -44,6 +43,6 @@ int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMs
pRaft->selfGroupId, pRaft->selfId, pMsg->from, appendEntries->index); pRaft->selfGroupId, pRaft->selfId, pMsg->from, appendEntries->index);
out: out:
pRaft->io.send(pRespMsg, &(pRaft->cluster.nodeInfo[peerIndex])); pRaft->io.send(pRespMsg, pNode);
return 0; return 0;
} }
\ No newline at end of file
...@@ -19,24 +19,6 @@ ...@@ -19,24 +19,6 @@
#include "raft_message.h" #include "raft_message.h"
int syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { int syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
if (pRaft->state == TAOS_SYNC_STATE_LEADER) {
syncDebug("[%d:%d] ignoring RAFT_MSG_INTERNAL_ELECTION because already leader", pRaft->selfGroupId, pRaft->selfId);
return 0;
}
if (!syncRaftIsPromotable(pRaft)) {
syncDebug("[%d:%d] is unpromotable and can not campaign", pRaft->selfGroupId, pRaft->selfId);
return 0;
}
// if there is pending uncommitted config,cannot start election
if (syncRaftLogNumOfPendingConf(pRaft->log) > 0 && syncRaftHasUnappliedLog(pRaft->log)) {
syncWarn("[%d:%d] cannot syncRaftStartElection at term %" PRId64 " since there are still pending configuration changes to apply",
pRaft->selfGroupId, pRaft->selfId, pRaft->term);
return 0;
}
syncInfo("[%d:%d] is starting a new election at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term);
if (pRaft->preVote) { if (pRaft->preVote) {
syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_PRE_ELECTION); syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_PRE_ELECTION);
} else { } else {
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "syncInt.h" #include "syncInt.h"
#include "raft.h" #include "raft.h"
#include "raft_configuration.h" #include "sync_raft_impl.h"
#include "raft_log.h" #include "raft_log.h"
#include "raft_message.h" #include "raft_message.h"
...@@ -23,10 +23,11 @@ static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); ...@@ -23,10 +23,11 @@ static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg);
int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
SSyncMessage* pRespMsg; SSyncMessage* pRespMsg;
int voteIndex = syncRaftConfigurationIndexOfNode(pRaft, pMsg->from); SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from);
if (voteIndex == -1) { if (pNode == NULL) {
return 0; return 0;
} }
bool grant; bool grant;
SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log);
SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log); SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log);
...@@ -42,17 +43,19 @@ int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { ...@@ -42,17 +43,19 @@ int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
grant ? "grant" : "reject", grant ? "grant" : "reject",
pMsg->from, pMsg->vote.lastTerm, pMsg->vote.lastIndex, pRaft->term); pMsg->from, pMsg->vote.lastTerm, pMsg->vote.lastIndex, pRaft->term);
pRaft->io.send(pRespMsg, &(pRaft->cluster.nodeInfo[voteIndex])); pRaft->io.send(pRespMsg, pNode);
return 0; return 0;
} }
static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
if (!(pRaft->voteFor == SYNC_NON_NODE_ID || pMsg->term > pRaft->term || pRaft->voteFor == pMsg->from)) { bool canVote =
return false; // We can vote if this is a repeat of a vote we've already cast...
} pRaft->voteFor == pMsg->from ||
if (!syncRaftLogIsUptodate(pRaft->log, pMsg->vote.lastIndex, pMsg->vote.lastTerm)) { // ...we haven't voted and we don't think there's a leader yet in this term...
return false; (pRaft->voteFor == SYNC_NON_NODE_ID && pRaft->leaderId == SYNC_NON_NODE_ID) ||
} // ...or this is a PreVote for a future term...
(pMsg->vote.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION && pMsg->term > pRaft->term);
return true; // ...and we believe the candidate is up to date.
return canVote && syncRaftLogIsUptodate(pRaft->log, pMsg->vote.lastIndex, pMsg->vote.lastTerm);
} }
\ No newline at end of file
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "syncInt.h" #include "syncInt.h"
#include "raft.h" #include "raft.h"
#include "raft_configuration.h" #include "sync_raft_impl.h"
#include "raft_message.h" #include "raft_message.h"
int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
...@@ -25,8 +25,8 @@ int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { ...@@ -25,8 +25,8 @@ int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
assert(pRaft->state == TAOS_SYNC_STATE_CANDIDATE); assert(pRaft->state == TAOS_SYNC_STATE_CANDIDATE);
voterIndex = syncRaftConfigurationIndexOfNode(pRaft, pMsg->from); SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from);
if (voterIndex == -1) { if (pNode == NULL) {
syncError("[%d:%d] recv vote resp from unknown server %d", pRaft->selfGroupId, pRaft->selfId, pMsg->from); syncError("[%d:%d] recv vote resp from unknown server %d", pRaft->selfGroupId, pRaft->selfId, pMsg->from);
return 0; return 0;
} }
...@@ -45,12 +45,14 @@ int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { ...@@ -45,12 +45,14 @@ int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
if (result == SYNC_RAFT_VOTE_WON) { if (result == SYNC_RAFT_VOTE_WON) {
if (pRaft->candidateState.inPreVote) { if (pRaft->candidateState.inPreVote) {
syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); syncRaftCampaign(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION);
} else { } else {
syncRaftBecomeLeader(pRaft); syncRaftBecomeLeader(pRaft);
syncRaftBroadcastAppend(pRaft);
} }
} else if (result == SYNC_RAFT_VOTE_LOST) { } else if (result == SYNC_RAFT_VOTE_LOST) {
// pb.MsgPreVoteResp contains future term of pre-candidate
// m.Term > r.Term; reuse r.Term
syncRaftBecomeFollower(pRaft, pRaft->term, SYNC_NON_NODE_ID); syncRaftBecomeFollower(pRaft, pRaft->term, SYNC_NON_NODE_ID);
} }
......
...@@ -22,14 +22,14 @@ ...@@ -22,14 +22,14 @@
static bool sendSnapshot(SSyncRaft* pRaft, SSyncRaftProgress* progress); static bool sendSnapshot(SSyncRaft* pRaft, SSyncRaftProgress* progress);
static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress, static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress,
SyncIndex prevIndex, SyncTerm prevTerm, SyncIndex prevIndex, SyncTerm prevTerm,
const SSyncRaftEntry *entries, int nEntry); SSyncRaftEntry *entries, int nEntry);
// syncRaftReplicate sends an append RPC with new entries to the given peer, // maybeSendAppend sends an append RPC with new entries to the given peer,
// if necessary. Returns true if a message was sent. The sendIfEmpty // if necessary. Returns true if a message was sent. The sendIfEmpty
// argument controls whether messages with no entries will be sent // argument controls whether messages with no entries will be sent
// ("empty" messages are useful to convey updated Commit indexes, but // ("empty" messages are useful to convey updated Commit indexes, but
// are undesirable when we're sending multiple messages in a batch). // are undesirable when we're sending multiple messages in a batch).
bool syncRaftReplicate(SSyncRaft* pRaft, SSyncRaftProgress* progress, bool sendIfEmpty) { bool syncRaftMaybeSendAppend(SSyncRaft* pRaft, SSyncRaftProgress* progress, bool sendIfEmpty) {
assert(pRaft->state == TAOS_SYNC_STATE_LEADER); assert(pRaft->state == TAOS_SYNC_STATE_LEADER);
SyncNodeId nodeId = progress->id; SyncNodeId nodeId = progress->id;
...@@ -68,10 +68,13 @@ static bool sendSnapshot(SSyncRaft* pRaft, SSyncRaftProgress* progress) { ...@@ -68,10 +68,13 @@ static bool sendSnapshot(SSyncRaft* pRaft, SSyncRaftProgress* progress) {
static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress, static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress,
SyncIndex prevIndex, SyncTerm prevTerm, SyncIndex prevIndex, SyncTerm prevTerm,
const SSyncRaftEntry *entries, int nEntry) { SSyncRaftEntry *entries, int nEntry) {
SNodeInfo* pNode = syncRaftGetNodeById(pRaft, progress->id);
if (pNode == NULL) {
return false;
}
SyncIndex lastIndex; SyncIndex lastIndex;
SyncTerm logTerm = prevTerm; SyncTerm logTerm = prevTerm;
SNodeInfo* pNode = &(pRaft->cluster.nodeInfo[progress->selfIndex]);
SSyncMessage* msg = syncNewAppendMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term, SSyncMessage* msg = syncNewAppendMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term,
prevIndex, prevTerm, pRaft->log->commitIndex, prevIndex, prevTerm, pRaft->log->commitIndex,
...@@ -87,7 +90,7 @@ static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress, ...@@ -87,7 +90,7 @@ static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress,
case PROGRESS_STATE_REPLICATE: case PROGRESS_STATE_REPLICATE:
lastIndex = entries[nEntry - 1].index; lastIndex = entries[nEntry - 1].index;
syncRaftProgressOptimisticNextIndex(progress, lastIndex); syncRaftProgressOptimisticNextIndex(progress, lastIndex);
syncRaftInflightAdd(&progress->inflights, lastIndex); syncRaftInflightAdd(progress->inflights, lastIndex);
break; break;
case PROGRESS_STATE_PROBE: case PROGRESS_STATE_PROBE:
progress->probeSent = true; progress->probeSent = true;
......
...@@ -99,7 +99,7 @@ void syncCleanUp() { ...@@ -99,7 +99,7 @@ void syncCleanUp() {
SSyncNode* syncStart(const SSyncInfo* pInfo) { SSyncNode* syncStart(const SSyncInfo* pInfo) {
pthread_mutex_lock(&gSyncManager->mutex); pthread_mutex_lock(&gSyncManager->mutex);
SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &pInfo->vgId, sizeof(SyncGroupId)); SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &pInfo->vgId, sizeof(SyncGroupId*));
if (ppNode != NULL) { if (ppNode != NULL) {
syncInfo("vgroup %d already exist", pInfo->vgId); syncInfo("vgroup %d already exist", pInfo->vgId);
pthread_mutex_unlock(&gSyncManager->mutex); pthread_mutex_unlock(&gSyncManager->mutex);
...@@ -140,7 +140,7 @@ SSyncNode* syncStart(const SSyncInfo* pInfo) { ...@@ -140,7 +140,7 @@ SSyncNode* syncStart(const SSyncInfo* pInfo) {
void syncStop(const SSyncNode* pNode) { void syncStop(const SSyncNode* pNode) {
pthread_mutex_lock(&gSyncManager->mutex); pthread_mutex_lock(&gSyncManager->mutex);
SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &pNode->vgId, sizeof(SyncGroupId)); SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &pNode->vgId, sizeof(SyncGroupId*));
if (ppNode == NULL) { if (ppNode == NULL) {
syncInfo("vgroup %d not exist", pNode->vgId); syncInfo("vgroup %d not exist", pNode->vgId);
pthread_mutex_unlock(&gSyncManager->mutex); pthread_mutex_unlock(&gSyncManager->mutex);
...@@ -288,7 +288,7 @@ static void *syncWorkerMain(void *argv) { ...@@ -288,7 +288,7 @@ static void *syncWorkerMain(void *argv) {
static void syncNodeTick(void *param, void *tmrId) { static void syncNodeTick(void *param, void *tmrId) {
SyncGroupId vgId = (SyncGroupId)param; SyncGroupId vgId = (SyncGroupId)param;
SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &vgId, sizeof(SyncGroupId)); SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &vgId, sizeof(SyncGroupId*));
if (ppNode == NULL) { if (ppNode == NULL) {
return; return;
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "raft.h"
#include "syncInt.h" #include "syncInt.h"
#include "sync_raft_config_change.h" #include "sync_raft_config_change.h"
#include "sync_raft_progress.h" #include "sync_raft_progress.h"
...@@ -40,40 +41,7 @@ static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* ...@@ -40,40 +41,7 @@ static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig*
static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
SSyncRaftProgressMap* progressMap, SyncNodeId id); SSyncRaftProgressMap* progressMap, SyncNodeId id);
static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
SSyncRaftProgressMap* progressMap, SyncNodeId id); SSyncRaftProgressMap* progressMap, SyncNodeId id);
// syncRaftChangerSimpleConfig carries out a series of configuration changes that (in aggregate)
// mutates the incoming majority config Voters[0] by at most one. This method
// will return an error if that is not the case, if the resulting quorum is
// zero, or if the configuration is in a joint state (i.e. if there is an
// outgoing configuration).
int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
int ret;
ret = checkAndCopy(changer, config, progressMap);
if (ret != 0) {
return ret;
}
if (hasJointConfig(config)) {
syncError("can't apply simple config change in joint config");
return -1;
}
ret = applyConfig(changer, config, progressMap, css);
if (ret != 0) {
return ret;
}
int n = symDiff(syncRaftJointConfigIncoming(&changer->tracker->config.voters),
syncRaftJointConfigIncoming(&config->voters));
if (n > 1) {
syncError("more than one voter changed without entering joint config");
return -1;
}
return checkAndReturn(config, progressMap);
}
// EnterJoint verifies that the outgoing (=right) majority config of the joint // EnterJoint verifies that the outgoing (=right) majority config of the joint
// config is empty and initializes it with a copy of the incoming (=left) // config is empty and initializes it with a copy of the incoming (=left)
...@@ -96,12 +64,13 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S ...@@ -96,12 +64,13 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S
if (ret != 0) { if (ret != 0) {
return ret; return ret;
} }
if (hasJointConfig(config)) { if (hasJointConfig(config)) {
syncError("config is already joint"); syncError("config is already joint");
return -1; return -1;
} }
if(config->voters.incoming.replica == 0) { if(syncRaftJointConfigIsIncomingEmpty(&config->voters) == 0) {
// We allow adding nodes to an empty config for convenience (testing and // We allow adding nodes to an empty config for convenience (testing and
// bootstrap), but you can't enter a joint state. // bootstrap), but you can't enter a joint state.
syncError("can't make a zero-voter config joint"); syncError("can't make a zero-voter config joint");
...@@ -112,7 +81,7 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S ...@@ -112,7 +81,7 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S
syncRaftJointConfigClearOutgoing(&config->voters); syncRaftJointConfigClearOutgoing(&config->voters);
// Copy incoming to outgoing. // Copy incoming to outgoing.
memcpy(&config->voters.outgoing, &config->voters.incoming, sizeof(SSyncCluster)); syncRaftCopyNodeMap(&config->voters.incoming, &config->voters.outgoing);
ret = applyConfig(changer, config, progressMap, css); ret = applyConfig(changer, config, progressMap, css);
if (ret != 0) { if (ret != 0) {
...@@ -123,84 +92,43 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S ...@@ -123,84 +92,43 @@ int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const S
return checkAndReturn(config, progressMap); return checkAndReturn(config, progressMap);
} }
// checkAndCopy copies the tracker's config and progress map (deeply enough for // Simple carries out a series of configuration changes that (in aggregate)
// the purposes of the Changer) and returns those copies. It returns an error // mutates the incoming majority config Voters[0] by at most one. This method
// if checkInvariants does. // will return an error if that is not the case, if the resulting quorum is
static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { // zero, or if the configuration is in a joint state (i.e. if there is an
syncRaftCloneTrackerConfig(&changer->tracker->config, config); // outgoing configuration).
int i; int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
for (i = 0; i < TSDB_MAX_REPLICA; ++i) { SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
SSyncRaftProgress* progress = &(changer->tracker->progressMap.progress[i]); int ret;
if (progress->id == SYNC_NON_NODE_ID) {
continue; ret = checkAndCopy(changer, config, progressMap);
} if (ret != 0) {
syncRaftCopyProgress(progress, &(progressMap->progress[i])); return ret;
} }
return checkAndReturn(config, progressMap);
}
// checkAndReturn calls checkInvariants on the input and returns either the if (hasJointConfig(config)) {
// resulting error or the input. syncError("can't apply simple config change in joint config");
static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
if (checkInvariants(config, progressMap) != 0) {
return -1; return -1;
} }
return 0; ret = applyConfig(changer, config, progressMap, css);
}
// checkInvariants makes sure that the config and progress are compatible with
// each other. This is used to check both what the Changer is initialized with,
// as well as what it returns.
static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
int ret = syncRaftCheckProgress(config, progressMap);
if (ret != 0) { if (ret != 0) {
return ret; return ret;
} }
int i; int n = symDiff(syncRaftJointConfigIncoming(&changer->tracker->config.voters),
// Any staged learner was staged because it could not be directly added due syncRaftJointConfigIncoming(&config->voters));
// to a conflicting voter in the outgoing config. if (n > 1) {
for (i = 0; i < TSDB_MAX_REPLICA; ++i) { syncError("more than one voter changed without entering joint config");
if (!syncRaftJointConfigInOutgoing(&config->voters, config->learnersNext.nodeId[i])) { return -1;
return -1;
}
if (progressMap->progress[i].id != SYNC_NON_NODE_ID && progressMap->progress[i].isLearner) {
syncError("%d is in LearnersNext, but is already marked as learner", progressMap->progress[i].id);
return -1;
}
}
// Conversely Learners and Voters doesn't intersect at all.
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
if (syncRaftJointConfigInIncoming(&config->voters, config->learners.nodeId[i])) {
syncError("%d is in Learners and voter.incoming", progressMap->progress[i].id);
return -1;
}
if (progressMap->progress[i].id != SYNC_NON_NODE_ID && !progressMap->progress[i].isLearner) {
syncError("%d is in Learners, but is not marked as learner", progressMap->progress[i].id);
return -1;
}
}
if (!hasJointConfig(config)) {
// We enforce that empty maps are nil instead of zero.
if (config->learnersNext.replica > 0) {
syncError("cfg.LearnersNext must be nil when not joint");
return -1;
}
if (config->autoLeave) {
syncError("AutoLeave must be false when not joint");
return -1;
}
} }
return 0; return checkAndReturn(config, progressMap);
}
static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config) {
return config->voters.outgoing.replica > 0;
} }
// apply a change to the configuration. By convention, changes to voters are
// always made to the incoming majority config Voters[0]. Voters[1] is either
// empty or preserves the outgoing majority configuration while in a joint state.
static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css) { SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css) {
int i; int i;
...@@ -227,7 +155,7 @@ static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig ...@@ -227,7 +155,7 @@ static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig
} }
} }
if (config->voters.incoming.replica == 0) { if (syncRaftJointConfigIsIncomingEmpty(&config->voters)) {
syncError("removed all voters"); syncError("removed all voters");
return -1; return -1;
} }
...@@ -235,86 +163,16 @@ static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig ...@@ -235,86 +163,16 @@ static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig
return 0; return 0;
} }
// symdiff returns the count of the symmetric difference between the sets of
// uint64s, i.e. len( (l - r) \union (r - l)).
static int symDiff(const SSyncRaftNodeMap* l, const SSyncRaftNodeMap* r) {
int n;
int i;
int j0, j1;
const SSyncRaftNodeMap* pairs[2][2] = {
{l, r}, // count elems in l but not in r
{r, l}, // count elems in r but not in l
};
for (n = 0, i = 0; i < 2; ++i) {
const SSyncRaftNodeMap** pp = pairs[i];
const SSyncRaftNodeMap* p0 = pp[0];
const SSyncRaftNodeMap* p1 = pp[1];
for (j0 = 0; j0 < TSDB_MAX_REPLICA; ++j0) {
SyncNodeId id = p0->nodeId[j0];
if (id == SYNC_NON_NODE_ID) {
continue;
}
for (j1 = 0; j1 < p1->replica; ++j1) {
if (p1->nodeId[j1] != SYNC_NON_NODE_ID && p1->nodeId[j1] != id) {
n+=1;
}
}
}
}
return n;
}
static void initProgress(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
SSyncRaftProgressMap* progressMap, SyncNodeId id, bool isLearner) {
}
// nilAwareDelete deletes from a map, nil'ing the map itself if it is empty after.
static void nilAwareDelete(SSyncRaftNodeMap* nodeMap, SyncNodeId id) {
int i;
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
if (nodeMap->nodeId[i] == id) {
nodeMap->replica -= 1;
nodeMap->nodeId[i] = SYNC_NON_NODE_ID;
break;
}
}
assert(nodeMap->replica >= 0);
}
// nilAwareAdd populates a map entry, creating the map if necessary.
static void nilAwareAdd(SSyncRaftNodeMap* nodeMap, SyncNodeId id) {
int i, j;
for (i = 0, j = -1; i < TSDB_MAX_REPLICA; ++i) {
if (nodeMap->nodeId[i] == id) {
return;
}
if (j == -1 && nodeMap->nodeId[i] == SYNC_NON_NODE_ID) {
j = i;
}
}
assert(j != -1);
nodeMap->nodeId[j] = id;
nodeMap->replica += 1;
}
// makeVoter adds or promotes the given ID to be a voter in the incoming // makeVoter adds or promotes the given ID to be a voter in the incoming
// majority config. // majority config.
static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
SSyncRaftProgressMap* progressMap, SyncNodeId id) { SSyncRaftProgressMap* progressMap, SyncNodeId id) {
int i = syncRaftFindProgressIndexByNodeId(progressMap, id); SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, id);
if (i == -1) { if (progress == NULL) {
initProgress(changer, config, progressMap, id, false); initProgress(changer, config, progressMap, id, false);
i = syncRaftFindProgressIndexByNodeId(progressMap, id); return;
} }
assert(i != -1);
SSyncRaftProgress* progress = &(progressMap->progress[i]);
progress->isLearner = false; progress->isLearner = false;
nilAwareDelete(&config->learners, id); nilAwareDelete(&config->learners, id);
...@@ -337,14 +195,12 @@ static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* ...@@ -337,14 +195,12 @@ static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig*
// LeaveJoint(). // LeaveJoint().
static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
SSyncRaftProgressMap* progressMap, SyncNodeId id) { SSyncRaftProgressMap* progressMap, SyncNodeId id) {
int i = syncRaftFindProgressIndexByNodeId(progressMap, id); SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, id);
if (i == -1) { if (progress == NULL) {
initProgress(changer, config, progressMap, id, false); initProgress(changer, config, progressMap, id, true);
i = syncRaftFindProgressIndexByNodeId(progressMap, id); return;
} }
assert(i != -1);
SSyncRaftProgress* progress = &(progressMap->progress[i]);
if (progress->isLearner) { if (progress->isLearner) {
return; return;
} }
...@@ -352,15 +208,15 @@ static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfi ...@@ -352,15 +208,15 @@ static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfi
removeNodeId(changer, config, progressMap, id); removeNodeId(changer, config, progressMap, id);
// ... but save the Progress. // ... but save the Progress.
syncRaftAddToProgressMap(progressMap, id); syncRaftAddToProgressMap(progressMap, progress);
// Use LearnersNext if we can't add the learner to Learners directly, i.e. // Use LearnersNext if we can't add the learner to Learners directly, i.e.
// if the peer is still tracked as a voter in the outgoing config. It will // if the peer is still tracked as a voter in the outgoing config. It will
// be turned into a learner in LeaveJoint(). // be turned into a learner in LeaveJoint().
// //
// Otherwise, add a regular learner right away. // Otherwise, add a regular learner right away.
bool inOutgoing = syncRaftJointConfigInCluster(&config->voters.outgoing, id); bool inInOutgoing = syncRaftJointConfigIsInOutgoing(&config->voters, id);
if (inOutgoing) { if (inInOutgoing) {
nilAwareAdd(&config->learnersNext, id); nilAwareAdd(&config->learnersNext, id);
} else { } else {
nilAwareAdd(&config->learners, id); nilAwareAdd(&config->learners, id);
...@@ -371,8 +227,8 @@ static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfi ...@@ -371,8 +227,8 @@ static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfi
// removeNodeId this peer as a voter or learner from the incoming config. // removeNodeId this peer as a voter or learner from the incoming config.
static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
SSyncRaftProgressMap* progressMap, SyncNodeId id) { SSyncRaftProgressMap* progressMap, SyncNodeId id) {
int i = syncRaftFindProgressIndexByNodeId(progressMap, id); SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, id);
if (i == -1) { if (progress == NULL) {
return; return;
} }
...@@ -381,8 +237,173 @@ static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConf ...@@ -381,8 +237,173 @@ static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConf
nilAwareDelete(&config->learnersNext, id); nilAwareDelete(&config->learnersNext, id);
// If the peer is still a voter in the outgoing config, keep the Progress. // If the peer is still a voter in the outgoing config, keep the Progress.
bool inOutgoing = syncRaftJointConfigInCluster(&config->voters.outgoing, id); bool inInOutgoing = syncRaftJointConfigIsInOutgoing(&config->voters, id);
if (!inOutgoing) { if (!inInOutgoing) {
syncRaftRemoveFromProgressMap(progressMap, id); syncRaftRemoveFromProgressMap(progressMap, id);
} }
}
// initProgress initializes a new progress for the given node or learner.
static void initProgress(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
SSyncRaftProgressMap* progressMap, SyncNodeId id, bool isLearner) {
if (!isLearner) {
syncRaftJointConfigAddToIncoming(&config->voters, id);
} else {
nilAwareAdd(&config->learners, id);
}
SSyncRaftProgress* pProgress = (SSyncRaftProgress*)malloc(sizeof(SSyncRaftProgress));
assert (pProgress != NULL);
*pProgress = (SSyncRaftProgress) {
// Initializing the Progress with the last index means that the follower
// can be probed (with the last index).
//
// TODO(tbg): seems awfully optimistic. Using the first index would be
// better. The general expectation here is that the follower has no log
// at all (and will thus likely need a snapshot), though the app may
// have applied a snapshot out of band before adding the replica (thus
// making the first index the better choice).
.id = id,
.groupId = changer->tracker->pRaft->selfGroupId,
.nextIndex = changer->lastIndex,
.matchIndex = 0,
.state = PROGRESS_STATE_PROBE,
.pendingSnapshotIndex = 0,
.probeSent = false,
.inflights = syncRaftOpenInflights(changer->tracker->maxInflightMsgs),
.isLearner = isLearner,
// When a node is first added, we should mark it as recently active.
// Otherwise, CheckQuorum may cause us to step down if it is invoked
// before the added node has had a chance to communicate with us.
.recentActive = true,
.refCount = 0,
};
syncRaftAddToProgressMap(progressMap, pProgress);
}
// checkInvariants makes sure that the config and progress are compatible with
// each other. This is used to check both what the Changer is initialized with,
// as well as what it returns.
static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
int ret = syncRaftCheckTrackerConfigInProgress(config, progressMap);
if (ret != 0) {
return ret;
}
// Any staged learner was staged because it could not be directly added due
// to a conflicting voter in the outgoing config.
SyncNodeId* pNodeId = NULL;
while (!syncRaftIterateNodeMap(&config->learnersNext, pNodeId)) {
SyncNodeId nodeId = *pNodeId;
if (!syncRaftJointConfigInOutgoing(&config->voters, nodeId)) {
syncError("[%d] is in LearnersNext, but not outgoing", nodeId);
return -1;
}
SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, nodeId);
assert(progress);
assert(progress->id == nodeId);
if (progress->isLearner) {
syncError("[%d:%d] is in LearnersNext, but is already marked as learner", progress->groupId, nodeId);
return -1;
}
}
// Conversely Learners and Voters doesn't intersect at all.
pNodeId = NULL;
while (!syncRaftIterateNodeMap(&config->learners, pNodeId)) {
SyncNodeId nodeId = *pNodeId;
if (syncRaftJointConfigInOutgoing(&config->voters, nodeId)) {
syncError("%d is in Learners and outgoing", nodeId);
return -1;
}
SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, nodeId);
assert(progress);
assert(progress->id == nodeId);
if (!progress->isLearner) {
syncError("[%d:%d] is in Learners, but is not marked as learner", progress->groupId, nodeId);
return -1;
}
}
if (!hasJointConfig(config)) {
// We enforce that empty maps are nil instead of zero.
if (syncRaftNodeMapSize(&config->learnersNext) > 0) {
syncError("cfg.LearnersNext must be nil when not joint");
return -1;
}
if (config->autoLeave) {
syncError("AutoLeave must be false when not joint");
return -1;
}
}
return 0;
}
// checkAndCopy copies the tracker's config and progress map (deeply enough for
// the purposes of the Changer) and returns those copies. It returns an error
// if checkInvariants does.
static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
syncRaftCopyTrackerConfig(&changer->tracker->config, config);
syncRaftClearProgressMap(progressMap);
SSyncRaftProgress* pProgress = NULL;
while (!syncRaftIterateProgressMap(&changer->tracker->progressMap, pProgress)) {
syncRaftAddToProgressMap(progressMap, pProgress);
}
return checkAndReturn(config, progressMap);
}
// checkAndReturn calls checkInvariants on the input and returns either the
// resulting error or the input.
static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
if (checkInvariants(config, progressMap) != 0) {
return -1;
}
return 0;
}
static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config) {
return !syncRaftJointConfigIsOutgoingEmpty(&config->voters);
}
// symdiff returns the count of the symmetric difference between the sets of
// uint64s, i.e. len( (l - r) \union (r - l)).
static int symDiff(const SSyncRaftNodeMap* l, const SSyncRaftNodeMap* r) {
int n;
int i;
int j0, j1;
const SSyncRaftNodeMap* pairs[2][2] = {
{l, r}, // count elems in l but not in r
{r, l}, // count elems in r but not in l
};
for (n = 0, i = 0; i < 2; ++i) {
const SSyncRaftNodeMap** pp = pairs[i];
const SSyncRaftNodeMap* p0 = pp[0];
const SSyncRaftNodeMap* p1 = pp[1];
SyncNodeId* pNodeId;
while (!syncRaftIterateNodeMap(p0, pNodeId)) {
if (!syncRaftIsInNodeMap(p1, *pNodeId)) {
n+=1;
}
}
}
return n;
}
// nilAwareDelete deletes from a map, nil'ing the map itself if it is empty after.
static void nilAwareDelete(SSyncRaftNodeMap* nodeMap, SyncNodeId id) {
syncRaftRemoveFromNodeMap(nodeMap, id);
}
// nilAwareAdd populates a map entry, creating the map if necessary.
static void nilAwareAdd(SSyncRaftNodeMap* nodeMap, SyncNodeId id) {
syncRaftAddToNodeMap(nodeMap, id);
} }
\ No newline at end of file
...@@ -17,15 +17,40 @@ ...@@ -17,15 +17,40 @@
#include "raft.h" #include "raft.h"
#include "raft_log.h" #include "raft_log.h"
#include "raft_message.h" #include "raft_message.h"
#include "sync_raft_progress_tracker.h"
void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) { void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
SyncTerm term; if (pRaft->state == TAOS_SYNC_STATE_LEADER) {
syncDebug("[%d:%d] ignoring RAFT_MSG_INTERNAL_ELECTION because already leader", pRaft->selfGroupId, pRaft->selfId);
return;
}
if (!syncRaftIsPromotable(pRaft)) {
syncWarn("[%d:%d] is unpromotable and can not syncRaftCampaign", pRaft->selfGroupId, pRaft->selfId);
return;
}
// if there is pending uncommitted config,cannot start election
if (syncRaftLogNumOfPendingConf(pRaft->log) > 0 && syncRaftHasUnappliedLog(pRaft->log)) {
syncWarn("[%d:%d] cannot syncRaftStartElection at term %" PRId64 " since there are still pending configuration changes to apply",
pRaft->selfGroupId, pRaft->selfId, pRaft->term);
return;
}
syncInfo("[%d:%d] is starting a new election at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term);
syncRaftCampaign(pRaft, cType);
}
// syncRaftCampaign transitions the raft instance to candidate state. This must only be
// called after verifying that this is a legitimate transition.
void syncRaftCampaign(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
bool preVote; bool preVote;
ESyncRaftMessageType voteMsgType; SyncTerm term;
if (syncRaftIsPromotable(pRaft)) { if (syncRaftIsPromotable(pRaft)) {
syncDebug("[%d:%d] is unpromotable; campaign() should have been called", pRaft->selfGroupId, pRaft->selfId); syncDebug("[%d:%d] is unpromotable; syncRaftCampaign() should have been called", pRaft->selfGroupId, pRaft->selfId);
return 0; return;
} }
if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) {
...@@ -35,7 +60,6 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) { ...@@ -35,7 +60,6 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
term = pRaft->term + 1; term = pRaft->term + 1;
} else { } else {
syncRaftBecomeCandidate(pRaft); syncRaftBecomeCandidate(pRaft);
voteMsgType = RAFT_MSG_VOTE;
term = pRaft->term; term = pRaft->term;
preVote = false; preVote = false;
} }
...@@ -43,10 +67,8 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) { ...@@ -43,10 +67,8 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
int quorum = syncRaftQuorum(pRaft); int quorum = syncRaftQuorum(pRaft);
ESyncRaftVoteResult result = syncRaftPollVote(pRaft, pRaft->selfId, preVote, true, NULL, NULL); ESyncRaftVoteResult result = syncRaftPollVote(pRaft, pRaft->selfId, preVote, true, NULL, NULL);
if (result == SYNC_RAFT_VOTE_WON) { if (result == SYNC_RAFT_VOTE_WON) {
/** // We won the election after voting for ourselves (which must mean that
* We won the election after voting for ourselves (which must mean that // this is a single-node cluster). Advance to the next state.
* this is a single-node cluster). Advance to the next state.
**/
if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) {
syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION);
} else { } else {
...@@ -59,12 +81,23 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) { ...@@ -59,12 +81,23 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
int i; int i;
SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log);
SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log); SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log);
for (i = 0; i < pRaft->cluster.replica; ++i) { SSyncRaftNodeMap nodeMap;
if (i == pRaft->cluster.selfIndex) { syncRaftJointConfigIDs(&pRaft->tracker->config.voters, &nodeMap);
SyncNodeId *pNodeId = NULL;
while (!syncRaftIterateNodeMap(&nodeMap, pNodeId)) {
SyncNodeId nodeId = *pNodeId;
if (nodeId == SYNC_NON_NODE_ID) {
continue; continue;
} }
SyncNodeId nodeId = pRaft->cluster.nodeInfo[i].nodeId; if (nodeId == pRaft->selfId) {
continue;
}
SNodeInfo* pNode = syncRaftGetNodeById(pRaft, nodeId);
if (pNode == NULL) {
continue;
}
SSyncMessage* pMsg = syncNewVoteMsg(pRaft->selfGroupId, pRaft->selfId, SSyncMessage* pMsg = syncNewVoteMsg(pRaft->selfGroupId, pRaft->selfId,
term, cType, lastIndex, lastTerm); term, cType, lastIndex, lastTerm);
...@@ -72,10 +105,10 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) { ...@@ -72,10 +105,10 @@ void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
continue; continue;
} }
syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %" PRId64 "] sent %d request to %d at term %" PRId64 "", syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %" PRId64 "] sent vote request to %d at term %" PRId64 "",
pRaft->selfGroupId, pRaft->selfId, lastTerm, pRaft->selfGroupId, pRaft->selfId, lastTerm,
lastIndex, voteMsgType, nodeId, pRaft->term); lastIndex, nodeId, pRaft->term);
pRaft->io.send(pMsg, &(pRaft->cluster.nodeInfo[i])); pRaft->io.send(pMsg, pNode);
} }
} }
\ No newline at end of file
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
*/ */
#include "raft.h" #include "raft.h"
#include "raft_configuration.h" #include "sync_raft_impl.h"
#include "raft_log.h" #include "raft_log.h"
#include "raft_replication.h" #include "raft_replication.h"
#include "sync_raft_progress_tracker.h" #include "sync_raft_progress_tracker.h"
...@@ -25,6 +25,8 @@ static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg); ...@@ -25,6 +25,8 @@ static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg);
static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg); static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg);
static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg); static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg);
static bool increaseUncommittedSize(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n);
static int triggerAll(SSyncRaft* pRaft); static int triggerAll(SSyncRaft* pRaft);
static void tickElection(SSyncRaft* pRaft); static void tickElection(SSyncRaft* pRaft);
...@@ -82,13 +84,22 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft) { ...@@ -82,13 +84,22 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft) {
resetRaft(pRaft, pRaft->term); resetRaft(pRaft, pRaft->term);
pRaft->leaderId = pRaft->leaderId; pRaft->leaderId = pRaft->leaderId;
pRaft->state = TAOS_SYNC_STATE_LEADER; pRaft->state = TAOS_SYNC_STATE_LEADER;
// TODO: check if there is pending config log
int nPendingConf = syncRaftLogNumOfPendingConf(pRaft->log);
if (nPendingConf > 1) {
syncFatal("unexpected multiple uncommitted config entry");
}
syncInfo("[%d:%d] became leader at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(&pRaft->tracker->progressMap, pRaft->selfId);
assert(progress != NULL);
// Followers enter replicate mode when they've been successfully probed
// (perhaps after having received a snapshot as a result). The leader is
// trivially in this state. Note that r.reset() has initialized this
// progress with the last index already.
syncRaftProgressBecomeReplicate(progress);
// Conservatively set the pendingConfIndex to the last index in the
// log. There may or may not be a pending config change, but it's
// safe to delay any future proposals until we commit all our
// pending log entries, and scanning the entire tail of the log
// could be expensive.
SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log);
pRaft->pendingConfigIndex = lastIndex;
// after become leader, send a no-op log // after become leader, send a no-op log
SSyncRaftEntry* entry = (SSyncRaftEntry*)malloc(sizeof(SSyncRaftEntry)); SSyncRaftEntry* entry = (SSyncRaftEntry*)malloc(sizeof(SSyncRaftEntry));
...@@ -103,6 +114,7 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft) { ...@@ -103,6 +114,7 @@ void syncRaftBecomeLeader(SSyncRaft* pRaft) {
}; };
appendEntries(pRaft, entry, 1); appendEntries(pRaft, entry, 1);
//syncRaftTriggerHeartbeat(pRaft); //syncRaftTriggerHeartbeat(pRaft);
syncInfo("[%d:%d] became leader at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term);
} }
void syncRaftTriggerHeartbeat(SSyncRaft* pRaft) { void syncRaftTriggerHeartbeat(SSyncRaft* pRaft) {
...@@ -123,15 +135,16 @@ bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft) { ...@@ -123,15 +135,16 @@ bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft) {
} }
int syncRaftQuorum(SSyncRaft* pRaft) { int syncRaftQuorum(SSyncRaft* pRaft) {
return pRaft->cluster.replica / 2 + 1; return 0;
//return pRaft->cluster.replica / 2 + 1;
} }
ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id, ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id,
bool preVote, bool grant, bool preVote, bool grant,
int* rejected, int *granted) { int* rejected, int *granted) {
int voterIndex = syncRaftConfigurationIndexOfNode(pRaft, id); SNodeInfo* pNode = syncRaftGetNodeById(pRaft, id);
if (voterIndex == -1) { if (pNode == NULL) {
return SYNC_RAFT_VOTE_PENDING; return true;
} }
if (grant) { if (grant) {
...@@ -142,7 +155,7 @@ ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id, ...@@ -142,7 +155,7 @@ ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id,
pRaft->selfGroupId, pRaft->selfId, preVote, id, pRaft->term); pRaft->selfGroupId, pRaft->selfId, preVote, id, pRaft->term);
} }
syncRaftRecordVote(pRaft->tracker, voterIndex, grant); syncRaftRecordVote(pRaft->tracker, pNode->nodeId, grant);
return syncRaftTallyVotes(pRaft->tracker, rejected, granted); return syncRaftTallyVotes(pRaft->tracker, rejected, granted);
} }
/* /*
...@@ -154,7 +167,7 @@ ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id, ...@@ -154,7 +167,7 @@ ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id,
pRaft->selfGroupId, pRaft->selfId, id, pRaft->term); pRaft->selfGroupId, pRaft->selfId, id, pRaft->term);
} }
int voteIndex = syncRaftConfigurationIndexOfNode(pRaft, id); int voteIndex = syncRaftGetNodeById(pRaft, id);
assert(voteIndex < pRaft->cluster.replica && voteIndex >= 0); assert(voteIndex < pRaft->cluster.replica && voteIndex >= 0);
assert(pRaft->candidateState.votes[voteIndex] == SYNC_RAFT_VOTE_RESP_UNKNOWN); assert(pRaft->candidateState.votes[voteIndex] == SYNC_RAFT_VOTE_RESP_UNKNOWN);
...@@ -185,19 +198,30 @@ void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState) { ...@@ -185,19 +198,30 @@ void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState) {
pRaft->voteFor = serverState->voteFor; pRaft->voteFor = serverState->voteFor;
} }
static void visitProgressSendAppend(int i, SSyncRaftProgress* progress, void* arg) { static void visitProgressSendAppend(SSyncRaftProgress* progress, void* arg) {
SSyncRaft* pRaft = (SSyncRaft*)arg; SSyncRaft* pRaft = (SSyncRaft*)arg;
if (pRaft->selfId == progress->id) { if (pRaft->selfId == progress->id) {
return; return;
} }
syncRaftReplicate(arg, progress, true); syncRaftMaybeSendAppend(arg, progress, true);
} }
// bcastAppend sends RPC, with entries to all peers that are not up-to-date
// according to the progress recorded in r.prs.
void syncRaftBroadcastAppend(SSyncRaft* pRaft) { void syncRaftBroadcastAppend(SSyncRaft* pRaft) {
syncRaftProgressVisit(pRaft->tracker, visitProgressSendAppend, pRaft); syncRaftProgressVisit(pRaft->tracker, visitProgressSendAppend, pRaft);
} }
SNodeInfo* syncRaftGetNodeById(SSyncRaft *pRaft, SyncNodeId id) {
SNodeInfo **ppNode = taosHashGet(pRaft->nodeInfoMap, &id, sizeof(SyncNodeId*));
if (ppNode != NULL) {
return *ppNode;
}
return NULL;
}
static int convertClear(SSyncRaft* pRaft) { static int convertClear(SSyncRaft* pRaft) {
} }
...@@ -223,7 +247,7 @@ static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg) { ...@@ -223,7 +247,7 @@ static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
syncRaftHandleVoteRespMessage(pRaft, pMsg); syncRaftHandleVoteRespMessage(pRaft, pMsg);
return 0; return 0;
} else if (msgType == RAFT_MSG_APPEND) { } else if (msgType == RAFT_MSG_APPEND) {
syncRaftBecomeFollower(pRaft, pRaft->term, pMsg->from); syncRaftBecomeFollower(pRaft, pMsg->term, pMsg->from);
syncRaftHandleAppendEntriesMessage(pRaft, pMsg); syncRaftHandleAppendEntriesMessage(pRaft, pMsg);
} }
return 0; return 0;
...@@ -234,9 +258,7 @@ static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg) { ...@@ -234,9 +258,7 @@ static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
return 0; return 0;
} }
/** // tickElection is run by followers and candidates after r.electionTimeout.
* tickElection is run by followers and candidates per tick.
**/
static void tickElection(SSyncRaft* pRaft) { static void tickElection(SSyncRaft* pRaft) {
pRaft->electionElapsed += 1; pRaft->electionElapsed += 1;
...@@ -254,10 +276,16 @@ static void tickElection(SSyncRaft* pRaft) { ...@@ -254,10 +276,16 @@ static void tickElection(SSyncRaft* pRaft) {
syncRaftStep(pRaft, syncInitElectionMsg(&msg, pRaft->selfId)); syncRaftStep(pRaft, syncInitElectionMsg(&msg, pRaft->selfId));
} }
// tickHeartbeat is run by leaders to send a MsgBeat after r.heartbeatTimeout.
static void tickHeartbeat(SSyncRaft* pRaft) { static void tickHeartbeat(SSyncRaft* pRaft) {
} }
// TODO
static bool increaseUncommittedSize(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) {
return false;
}
static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) { static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) {
SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log);
SyncTerm term = pRaft->term; SyncTerm term = pRaft->term;
...@@ -268,9 +296,16 @@ static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) { ...@@ -268,9 +296,16 @@ static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) {
entries[i].index = lastIndex + 1 + i; entries[i].index = lastIndex + 1 + i;
} }
// Track the size of this uncommitted proposal.
if (!increaseUncommittedSize(pRaft, entries, n)) {
// Drop the proposal.
return;
}
syncRaftLogAppend(pRaft->log, entries, n); syncRaftLogAppend(pRaft->log, entries, n);
SSyncRaftProgress* progress = &(pRaft->tracker->progressMap.progress[pRaft->cluster.selfIndex]); SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(&pRaft->tracker->progressMap, pRaft->selfId);
assert(progress != NULL);
syncRaftProgressMaybeUpdate(progress, lastIndex); syncRaftProgressMaybeUpdate(progress, lastIndex);
// Regardless of syncRaftMaybeCommit's return, our caller will call bcastAppend. // Regardless of syncRaftMaybeCommit's return, our caller will call bcastAppend.
syncRaftMaybeCommit(pRaft); syncRaftMaybeCommit(pRaft);
...@@ -297,7 +332,7 @@ static int triggerAll(SSyncRaft* pRaft) { ...@@ -297,7 +332,7 @@ static int triggerAll(SSyncRaft* pRaft) {
continue; continue;
} }
syncRaftReplicate(pRaft, pRaft->tracker->progressMap.progress[i], true); syncRaftMaybeSendAppend(pRaft, pRaft->tracker->progressMap.progress[i], true);
} }
#endif #endif
return 0; return 0;
...@@ -307,8 +342,8 @@ static void abortLeaderTransfer(SSyncRaft* pRaft) { ...@@ -307,8 +342,8 @@ static void abortLeaderTransfer(SSyncRaft* pRaft) {
pRaft->leadTransferee = SYNC_NON_NODE_ID; pRaft->leadTransferee = SYNC_NON_NODE_ID;
} }
static void initProgress(int i, SSyncRaftProgress* progress, void* arg) { static void resetProgress(SSyncRaftProgress* progress, void* arg) {
syncRaftInitProgress(i, (SSyncRaft*)arg, progress); syncRaftResetProgress((SSyncRaft*)arg, progress);
} }
static void resetRaft(SSyncRaft* pRaft, SyncTerm term) { static void resetRaft(SSyncRaft* pRaft, SyncTerm term) {
...@@ -327,7 +362,7 @@ static void resetRaft(SSyncRaft* pRaft, SyncTerm term) { ...@@ -327,7 +362,7 @@ static void resetRaft(SSyncRaft* pRaft, SyncTerm term) {
abortLeaderTransfer(pRaft); abortLeaderTransfer(pRaft);
syncRaftResetVotes(pRaft->tracker); syncRaftResetVotes(pRaft->tracker);
syncRaftProgressVisit(pRaft->tracker, initProgress, pRaft); syncRaftProgressVisit(pRaft->tracker, resetProgress, pRaft);
pRaft->pendingConfigIndex = 0; pRaft->pendingConfigIndex = 0;
pRaft->uncommittedSize = 0; pRaft->uncommittedSize = 0;
......
...@@ -40,19 +40,16 @@ void syncRaftCloseInflights(SSyncRaftInflights* inflights) { ...@@ -40,19 +40,16 @@ void syncRaftCloseInflights(SSyncRaftInflights* inflights) {
free(inflights); free(inflights);
} }
/** // Add notifies the Inflights that a new message with the given index is being
* syncRaftInflightAdd notifies the Inflights that a new message with the given index is being // dispatched. Full() must be called prior to Add() to verify that there is room
* dispatched. syncRaftInflightFull() must be called prior to syncRaftInflightAdd() // for one more message, and consecutive calls to add Add() must provide a
* to verify that there is room for one more message, // monotonic sequence of indexes.
* and consecutive calls to add syncRaftInflightAdd() must provide a
* monotonic sequence of indexes.
**/
void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex) { void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex) {
assert(!syncRaftInflightFull(inflights)); assert(!syncRaftInflightFull(inflights));
int next = inflights->start + inflights->count; int next = inflights->start + inflights->count;
int size = inflights->size; int size = inflights->size;
/* is next wrapped around buffer? */
if (next >= size) { if (next >= size) {
next -= size; next -= size;
} }
...@@ -61,12 +58,10 @@ void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex) ...@@ -61,12 +58,10 @@ void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex)
inflights->count++; inflights->count++;
} }
/** // FreeLE frees the inflights smaller or equal to the given `to` flight.
* syncRaftInflightFreeLE frees the inflights smaller or equal to the given `to` flight.
**/
void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex) { void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex) {
if (inflights->count == 0 || toIndex < inflights->buffer[inflights->start]) { if (inflights->count == 0 || toIndex < inflights->buffer[inflights->start]) {
/* out of the left side of the window */ // out of the left side of the window
return; return;
} }
...@@ -95,10 +90,8 @@ void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex) { ...@@ -95,10 +90,8 @@ void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex) {
} }
} }
/** // FreeFirstOne releases the first inflight. This is a no-op if nothing is
* syncRaftInflightFreeFirstOne releases the first inflight. // inflight.
* This is a no-op if nothing is inflight.
**/
void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights) { void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights) {
syncRaftInflightFreeLE(inflights, inflights->buffer[inflights->start]); syncRaftInflightFreeLE(inflights, inflights->buffer[inflights->start]);
} }
/*
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "sync_raft_node_map.h"
#include "sync_type.h"
#include "sync_raft_progress.h"
void syncRaftInitNodeMap(SSyncRaftNodeMap* nodeMap) {
nodeMap->nodeIdMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
}
void syncRaftFreeNodeMap(SSyncRaftNodeMap* nodeMap) {
taosHashCleanup(nodeMap->nodeIdMap);
}
void syncRaftClearNodeMap(SSyncRaftNodeMap* nodeMap) {
taosHashClear(nodeMap->nodeIdMap);
}
bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId) {
SyncNodeId** ppId = (SyncNodeId**)taosHashGet(nodeMap->nodeIdMap, &nodeId, sizeof(SyncNodeId*));
if (ppId == NULL) {
return false;
}
return true;
}
void syncRaftCopyNodeMap(SSyncRaftNodeMap* from, SSyncRaftNodeMap* to) {
SyncNodeId *pId = NULL;
while (!syncRaftIterateNodeMap(from, pId)) {
taosHashPut(to->nodeIdMap, &pId, sizeof(SyncNodeId*), &pId, sizeof(SyncNodeId*));
}
}
bool syncRaftIterateNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId *pId) {
SyncNodeId **ppId = taosHashIterate(nodeMap->nodeIdMap, pId);
if (ppId == NULL) {
return true;
}
*pId = *(*ppId);
return false;
}
bool syncRaftIsAllNodeInProgressMap(SSyncRaftNodeMap* nodeMap, SSyncRaftProgressMap* progressMap) {
SyncNodeId *pId = NULL;
while (!syncRaftIterateNodeMap(nodeMap, pId)) {
if (!syncRaftIsInProgressMap(progressMap, *pId)) {
return false;
}
}
return true;
}
void syncRaftUnionNodeMap(SSyncRaftNodeMap* nodeMap, SSyncRaftNodeMap* to) {
syncRaftCopyNodeMap(nodeMap, to);
}
void syncRaftAddToNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId) {
taosHashPut(nodeMap->nodeIdMap, &nodeId, sizeof(SyncNodeId*), &nodeId, sizeof(SyncNodeId*));
}
void syncRaftRemoveFromNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId) {
taosHashRemove(nodeMap->nodeIdMap, &nodeId, sizeof(SyncNodeId*));
}
int32_t syncRaftNodeMapSize(const SSyncRaftNodeMap* nodeMap) {
return taosHashGetSize(nodeMap->nodeIdMap);
}
\ No newline at end of file
...@@ -20,18 +20,26 @@ ...@@ -20,18 +20,26 @@
#include "sync.h" #include "sync.h"
#include "syncInt.h" #include "syncInt.h"
static void copyProgress(SSyncRaftProgress* progress, void* arg);
static void refProgress(SSyncRaftProgress* progress);
static void unrefProgress(SSyncRaftProgress* progress, void*);
static void resetProgressState(SSyncRaftProgress* progress, ESyncRaftProgressState state); static void resetProgressState(SSyncRaftProgress* progress, ESyncRaftProgressState state);
static void probeAcked(SSyncRaftProgress* progress); static void probeAcked(SSyncRaftProgress* progress);
static void resumeProgress(SSyncRaftProgress* progress); static void resumeProgress(SSyncRaftProgress* progress);
void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress) { void syncRaftResetProgress(SSyncRaft* pRaft, SSyncRaftProgress* progress) {
if (progress->inflights) {
syncRaftCloseInflights(progress->inflights);
}
SSyncRaftInflights* inflights = syncRaftOpenInflights(pRaft->tracker->maxInflightMsgs); SSyncRaftInflights* inflights = syncRaftOpenInflights(pRaft->tracker->maxInflightMsgs);
if (inflights == NULL) { if (inflights == NULL) {
return; return;
} }
*progress = (SSyncRaftProgress) { *progress = (SSyncRaftProgress) {
.matchIndex = i == pRaft->selfIndex ? syncRaftLogLastIndex(pRaft->log) : 0, .matchIndex = progress->id == pRaft->selfId ? syncRaftLogLastIndex(pRaft->log) : 0,
.nextIndex = syncRaftLogLastIndex(pRaft->log) + 1, .nextIndex = syncRaftLogLastIndex(pRaft->log) + 1,
.inflights = inflights, .inflights = inflights,
.isLearner = false, .isLearner = false,
...@@ -39,11 +47,9 @@ void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress) ...@@ -39,11 +47,9 @@ void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress)
}; };
} }
/** // MaybeUpdate is called when an MsgAppResp arrives from the follower, with the
* syncRaftProgressMaybeUpdate is called when an MsgAppResp arrives from the follower, with the // index acked by it. The method returns false if the given n index comes from
* index acked by it. The method returns false if the given n index comes from // an outdated message. Otherwise it updates the progress and returns true.
* an outdated message. Otherwise it updates the progress and returns true.
**/
bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex) { bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex) {
bool updated = false; bool updated = false;
...@@ -58,27 +64,36 @@ bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastInde ...@@ -58,27 +64,36 @@ bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastInde
return updated; return updated;
} }
// MaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The
// arguments are the index of the append message rejected by the follower, and
// the hint that we want to decrease to.
//
// Rejections can happen spuriously as messages are sent out of order or
// duplicated. In such cases, the rejection pertains to an index that the
// Progress already knows were previously acknowledged, and false is returned
// without changing the Progress.
//
// If the rejection is genuine, Next is lowered sensibly, and the Progress is
// cleared for sending log entries.
bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress, bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress,
SyncIndex rejected, SyncIndex matchHint) { SyncIndex rejected, SyncIndex matchHint) {
if (progress->state == PROGRESS_STATE_REPLICATE) { if (progress->state == PROGRESS_STATE_REPLICATE) {
/** // The rejection must be stale if the progress has matched and "rejected"
* the rejection must be stale if the progress has matched and "rejected" // is smaller than "match".
* is smaller than "match".
**/
if (rejected <= progress->matchIndex) { if (rejected <= progress->matchIndex) {
syncDebug("match index is up to date,ignore"); syncDebug("match index is up to date,ignore");
return false; return false;
} }
/* directly decrease next to match + 1 */ // Directly decrease next to match + 1.
//
// TODO(tbg): why not use matchHint if it's larger?
progress->nextIndex = progress->matchIndex + 1; progress->nextIndex = progress->matchIndex + 1;
return true; return true;
} }
/** // The rejection must be stale if "rejected" does not match next - 1. This
* The rejection must be stale if "rejected" does not match next - 1. This // is because non-replicating followers are probed one entry at a time.
* is because non-replicating followers are probed one entry at a time.
**/
if (rejected != progress->nextIndex - 1) { if (rejected != progress->nextIndex - 1) {
syncDebug("rejected index %" PRId64 " different from next index %" PRId64 " -> ignore" syncDebug("rejected index %" PRId64 " different from next index %" PRId64 " -> ignore"
, rejected, progress->nextIndex); , rejected, progress->nextIndex);
...@@ -91,14 +106,12 @@ bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress, ...@@ -91,14 +106,12 @@ bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress,
return true; return true;
} }
/** // IsPaused returns whether sending log entries to this node has been throttled.
* syncRaftProgressIsPaused returns whether sending log entries to this node has been throttled. // This is done when a node has rejected recent MsgApps, is currently waiting
* This is done when a node has rejected recent MsgApps, is currently waiting // for a snapshot, or has reached the MaxInflightMsgs limit. In normal
* for a snapshot, or has reached the MaxInflightMsgs limit. In normal // operation, this is false. A throttled node will be contacted less frequently
* operation, this is false. A throttled node will be contacted less frequently // until it has reached a state in which it's able to accept a steady stream of
* until it has reached a state in which it's able to accept a steady stream of // log entries again.
* log entries again.
**/
bool syncRaftProgressIsPaused(SSyncRaftProgress* progress) { bool syncRaftProgressIsPaused(SSyncRaftProgress* progress) {
switch (progress->state) { switch (progress->state) {
case PROGRESS_STATE_PROBE: case PROGRESS_STATE_PROBE:
...@@ -112,58 +125,44 @@ bool syncRaftProgressIsPaused(SSyncRaftProgress* progress) { ...@@ -112,58 +125,44 @@ bool syncRaftProgressIsPaused(SSyncRaftProgress* progress) {
} }
} }
int syncRaftFindProgressIndexByNodeId(const SSyncRaftProgressMap* progressMap, SyncNodeId id) { SSyncRaftProgress* syncRaftFindProgressByNodeId(const SSyncRaftProgressMap* progressMap, SyncNodeId id) {
int i; SSyncRaftProgress** ppProgress = (SSyncRaftProgress**)taosHashGet(progressMap->progressMap, &id, sizeof(SyncNodeId*));
for (i = 0; i < TSDB_MAX_REPLICA; ++i) { if (ppProgress == NULL) {
if (progressMap->progress[i].id == id) { return NULL;
return i;
}
} }
return -1;
return *ppProgress;
} }
int syncRaftAddToProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id) { int syncRaftAddToProgressMap(SSyncRaftProgressMap* progressMap, SSyncRaftProgress* progress) {
int i, j; refProgress(progress);
taosHashPut(progressMap->progressMap, &progress->id, sizeof(SyncNodeId*), &progress, sizeof(SSyncRaftProgress*));
}
for (i = 0, j = -1; i < TSDB_MAX_REPLICA; ++i) { void syncRaftRemoveFromProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id) {
if (progressMap->progress[i].id == id) { SSyncRaftProgress** ppProgress = (SSyncRaftProgress**)taosHashGet(progressMap->progressMap, &id, sizeof(SyncNodeId*));
return i; if (ppProgress == NULL) {
} return;
if (j == -1 && progressMap->progress[i].id == SYNC_NON_NODE_ID) {
j = i;
}
} }
unrefProgress(*ppProgress, NULL);
assert(j != -1); taosHashRemove(progressMap->progressMap, &id, sizeof(SyncNodeId*));
progressMap->progress[i].id = id;
} }
void syncRaftRemoveFromProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id) { bool syncRaftIsInProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id) {
int i; return taosHashGet(progressMap->progressMap, &id, sizeof(SyncNodeId*)) != NULL;
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
if (progressMap->progress[i].id == id) {
progressMap->progress[i].id = SYNC_NON_NODE_ID;
break;
}
}
} }
bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress) { bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress) {
return syncRaftLogLastIndex(pRaft->log) + 1 == progress->nextIndex; return syncRaftLogLastIndex(pRaft->log) + 1 == progress->nextIndex;
} }
/** // BecomeProbe transitions into StateProbe. Next is reset to Match+1 or,
* syncRaftProgressBecomeProbe transitions into StateProbe. Next is reset to Match+1 or, // optionally and if larger, the index of the pending snapshot.
* optionally and if larger, the index of the pending snapshot.
**/
void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress) { void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress) {
/** // If the original state is StateSnapshot, progress knows that
* If the original state is ProgressStateSnapshot, progress knows that // the pending snapshot has been sent to this peer successfully, then
* the pending snapshot has been sent to this peer successfully, then // probes from pendingSnapshot + 1.
* probes from pendingSnapshot + 1.
**/
if (progress->state == PROGRESS_STATE_SNAPSHOT) { if (progress->state == PROGRESS_STATE_SNAPSHOT) {
SyncIndex pendingSnapshotIndex = progress->pendingSnapshotIndex; SyncIndex pendingSnapshotIndex = progress->pendingSnapshotIndex;
resetProgressState(progress, PROGRESS_STATE_PROBE); resetProgressState(progress, PROGRESS_STATE_PROBE);
...@@ -174,111 +173,88 @@ void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress) { ...@@ -174,111 +173,88 @@ void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress) {
} }
} }
/** // BecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
* syncRaftProgressBecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
**/
void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress) { void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress) {
resetProgressState(progress, PROGRESS_STATE_REPLICATE); resetProgressState(progress, PROGRESS_STATE_REPLICATE);
progress->nextIndex = progress->matchIndex + 1; progress->nextIndex = progress->matchIndex + 1;
} }
// BecomeSnapshot moves the Progress to StateSnapshot with the specified pending
// snapshot index.
void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex) { void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex) {
resetProgressState(progress, PROGRESS_STATE_SNAPSHOT); resetProgressState(progress, PROGRESS_STATE_SNAPSHOT);
progress->pendingSnapshotIndex = snapshotIndex; progress->pendingSnapshotIndex = snapshotIndex;
} }
void syncRaftCopyProgress(const SSyncRaftProgress* progress, SSyncRaftProgress* out) { void syncRaftCopyProgress(const SSyncRaftProgress* progress, SSyncRaftProgress* out) {
memcpy(out, progress, sizeof(SSyncRaftProgress));
}
/**
* ResetState moves the Progress into the specified State, resetting ProbeSent,
* PendingSnapshot, and Inflights.
**/
static void resetProgressState(SSyncRaftProgress* progress, ESyncRaftProgressState state) {
progress->probeSent = false;
progress->pendingSnapshotIndex = 0;
progress->state = state;
syncRaftInflightReset(progress->inflights);
} }
/** void syncRaftInitProgressMap(SSyncRaftProgressMap* progressMap) {
* probeAcked is called when this peer has accepted an append. It resets progressMap->progressMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
* ProbeSent to signal that additional append messages should be sent without
* further delay.
**/
static void probeAcked(SSyncRaftProgress* progress) {
progress->probeSent = false;
} }
#if 0 void syncRaftFreeProgressMap(SSyncRaftProgressMap* progressMap) {
syncRaftVisitProgressMap(progressMap, unrefProgress, NULL);
SyncIndex syncRaftProgressNextIndex(SSyncRaft* pRaft, int i) { taosHashCleanup(progressMap->progressMap);
return pRaft->leaderState.progress[i].nextIndex;
} }
SyncIndex syncRaftProgressMatchIndex(SSyncRaft* pRaft, int i) { void syncRaftClearProgressMap(SSyncRaftProgressMap* progressMap) {
return pRaft->leaderState.progress[i].matchIndex; taosHashClear(progressMap->progressMap);
} }
void syncRaftProgressUpdateLastSend(SSyncRaft* pRaft, int i) { void syncRaftCopyProgressMap(SSyncRaftProgressMap* from, SSyncRaftProgressMap* to) {
pRaft->leaderState.progress[i].lastSend = pRaft->io.time(pRaft); syncRaftVisitProgressMap(from, copyProgress, to);
} }
void syncRaftProgressUpdateSnapshotLastSend(SSyncRaft* pRaft, int i) { bool syncRaftIterateProgressMap(const SSyncRaftProgressMap* progressMap, SSyncRaftProgress *pProgress) {
pRaft->leaderState.progress[i].lastSendSnapshot = pRaft->io.time(pRaft); SSyncRaftProgress **ppProgress = taosHashIterate(progressMap->progressMap, pProgress);
} if (ppProgress == NULL) {
return true;
}
bool syncRaftProgressResetRecentRecv(SSyncRaft* pRaft, int i) { *pProgress = *(*ppProgress);
SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); return false;
bool prev = progress->recentRecv;
progress->recentRecv = false;
return prev;
} }
void syncRaftProgressMarkRecentRecv(SSyncRaft* pRaft, int i) { bool syncRaftVisitProgressMap(SSyncRaftProgressMap* progressMap, visitProgressFp fp, void* arg) {
pRaft->leaderState.progress[i].recentRecv = true; SSyncRaftProgress *pProgress;
while (!syncRaftIterateProgressMap(progressMap, pProgress)) {
fp(pProgress, arg);
}
} }
bool syncRaftProgressGetRecentRecv(SSyncRaft* pRaft, int i) { static void copyProgress(SSyncRaftProgress* progress, void* arg) {
return pRaft->leaderState.progress[i].recentRecv; assert(progress->refCount > 0);
SSyncRaftProgressMap* to = (SSyncRaftProgressMap*)arg;
syncRaftAddToProgressMap(to, progress);
} }
void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i) { static void refProgress(SSyncRaftProgress* progress) {
SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); progress->refCount += 1;
resetProgressState(progress, PROGRESS_STATE_SNAPSHOT);
progress->pendingSnapshotIndex = raftLogSnapshotIndex(pRaft->log);
} }
void syncRaftProgressBecomeProbe(SSyncRaft* pRaft, int i) { static void unrefProgress(SSyncRaftProgress* progress, void* arg) {
SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]); (void)arg;
progress->refCount -= 1;
if (progress->state == PROGRESS_STATE_SNAPSHOT) { assert(progress->refCount >= 0);
assert(progress->pendingSnapshotIndex > 0); if (progress->refCount == 0) {
SyncIndex pendingSnapshotIndex = progress->pendingSnapshotIndex; free(progress);
resetProgressState(progress, PROGRESS_STATE_PROBE);
progress->nextIndex = max(progress->matchIndex + 1, pendingSnapshotIndex);
} else {
resetProgressState(progress, PROGRESS_STATE_PROBE);
progress->nextIndex = progress->matchIndex + 1;
} }
} }
void syncRaftProgressBecomeReplicate(SSyncRaft* pRaft, int i) { // ResetState moves the Progress into the specified State, resetting ProbeSent,
resetProgressState(pRaft->leaderState.progress, PROGRESS_STATE_REPLICATE); // PendingSnapshot, and Inflights.
pRaft->leaderState.progress->nextIndex = pRaft->leaderState.progress->matchIndex + 1; static void resetProgressState(SSyncRaftProgress* progress, ESyncRaftProgressState state) {
} progress->probeSent = false;
void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i) {
SSyncRaftProgress* progress = &(pRaft->leaderState.progress[i]);
progress->pendingSnapshotIndex = 0; progress->pendingSnapshotIndex = 0;
progress->state = PROGRESS_STATE_PROBE; progress->state = state;
syncRaftInflightReset(progress->inflights);
} }
ESyncRaftProgressState syncRaftProgressState(SSyncRaft* pRaft, int i) { // ProbeAcked is called when this peer has accepted an append. It resets
return pRaft->leaderState.progress[i].state; // ProbeSent to signal that additional append messages should be sent without
// further delay.
static void probeAcked(SSyncRaftProgress* progress) {
progress->probeSent = false;
} }
#endif
\ No newline at end of file
...@@ -13,62 +13,99 @@ ...@@ -13,62 +13,99 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "raft.h"
#include "sync_const.h"
#include "sync_raft_progress_tracker.h" #include "sync_raft_progress_tracker.h"
#include "sync_raft_proto.h" #include "sync_raft_proto.h"
SSyncRaftProgressTracker* syncRaftOpenProgressTracker() { SSyncRaftProgressTracker* syncRaftOpenProgressTracker(SSyncRaft* pRaft) {
SSyncRaftProgressTracker* tracker = (SSyncRaftProgressTracker*)malloc(sizeof(SSyncRaftProgressTracker)); SSyncRaftProgressTracker* tracker = (SSyncRaftProgressTracker*)malloc(sizeof(SSyncRaftProgressTracker));
if (tracker == NULL) { if (tracker == NULL) {
return NULL; return NULL;
} }
tracker->votesMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
syncRaftInitTrackConfig(&tracker->config);
tracker->pRaft = pRaft;
tracker->maxInflightMsgs = kSyncRaftMaxInflghtMsgs;
return tracker; return tracker;
} }
void syncRaftInitTrackConfig(SSyncRaftProgressTrackerConfig* config) {
syncRaftInitNodeMap(&config->learners);
syncRaftInitNodeMap(&config->learnersNext);
syncRaftInitQuorumJointConfig(&config->voters);
config->autoLeave = false;
}
void syncRaftFreeTrackConfig(SSyncRaftProgressTrackerConfig* config) {
syncRaftFreeNodeMap(&config->learners);
syncRaftFreeNodeMap(&config->learnersNext);
syncRaftFreeNodeMap(&config->voters.incoming);
syncRaftFreeNodeMap(&config->voters.outgoing);
}
// ResetVotes prepares for a new round of vote counting via recordVote.
void syncRaftResetVotes(SSyncRaftProgressTracker* tracker) { void syncRaftResetVotes(SSyncRaftProgressTracker* tracker) {
memset(tracker->votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(ESyncRaftVoteType) * TSDB_MAX_REPLICA); taosHashClear(tracker->votesMap);
} }
void syncRaftProgressVisit(SSyncRaftProgressTracker* tracker, visitProgressFp visit, void* arg) { void syncRaftProgressVisit(SSyncRaftProgressTracker* tracker, visitProgressFp visit, void* arg) {
int i; syncRaftVisitProgressMap(&tracker->progressMap, visit, arg);
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
SSyncRaftProgress* progress = &(tracker->progressMap.progress[i]);
visit(i, progress, arg);
}
} }
void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, int i, bool grant) { // RecordVote records that the node with the given id voted for this Raft
if (tracker->votes[i] != SYNC_RAFT_VOTE_RESP_UNKNOWN) { // instance if v == true (and declined it otherwise).
void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, SyncNodeId id, bool grant) {
ESyncRaftVoteType* pType = taosHashGet(tracker->votesMap, &id, sizeof(SyncNodeId*));
if (pType != NULL) {
return; return;
} }
tracker->votes[i] = grant ? SYNC_RAFT_VOTE_RESP_GRANT : SYNC_RAFT_VOTE_RESP_REJECT; taosHashPut(tracker->votesMap, &id, sizeof(SyncNodeId), &grant, sizeof(bool*));
} }
void syncRaftCloneTrackerConfig(const SSyncRaftProgressTrackerConfig* from, SSyncRaftProgressTrackerConfig* to) { void syncRaftCopyTrackerConfig(const SSyncRaftProgressTrackerConfig* from, SSyncRaftProgressTrackerConfig* to) {
memcpy(to, from, sizeof(SSyncRaftProgressTrackerConfig));
}
int syncRaftCheckTrackerConfigInProgress(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
// NB: intentionally allow the empty config. In production we'll never see a
// non-empty config (we prevent it from being created) but we will need to
// be able to *create* an initial config, for example during bootstrap (or
// during tests). Instead of having to hand-code this, we allow
// transitioning from an empty config into any other legal and non-empty
// config.
if (!syncRaftIsAllNodeInProgressMap(&config->voters.incoming, progressMap)) return -1;
if (!syncRaftIsAllNodeInProgressMap(&config->voters.outgoing, progressMap)) return -1;
if (!syncRaftIsAllNodeInProgressMap(&config->learners, progressMap)) return -1;
if (!syncRaftIsAllNodeInProgressMap(&config->learnersNext, progressMap)) return -1;
return 0;
} }
/** // TallyVotes returns the number of granted and rejected Votes, and whether the
* syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the // election outcome is known.
* election outcome is known.
**/
ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* rejected, int *granted) { ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* rejected, int *granted) {
int i; SSyncRaftProgress* progress = NULL;
SSyncRaftProgress* progress;
int r, g; int r, g;
for (i = 0, r = 0, g = 0; i < TSDB_MAX_REPLICA; ++i) { // Make sure to populate granted/rejected correctly even if the Votes slice
progress = &(tracker->progressMap.progress[i]); // contains members no longer part of the configuration. This doesn't really
// matter in the way the numbers are used (they're informational), but might
// as well get it right.
while (!syncRaftIterateProgressMap(&tracker->progressMap, progress)) {
if (progress->id == SYNC_NON_NODE_ID) { if (progress->id == SYNC_NON_NODE_ID) {
continue; continue;
} }
if (tracker->votes[i] == SYNC_RAFT_VOTE_RESP_UNKNOWN) { bool* v = taosHashGet(tracker->votesMap, &progress->id, sizeof(SyncNodeId*));
if (v == NULL) {
continue; continue;
} }
if (tracker->votes[i] == SYNC_RAFT_VOTE_RESP_GRANT) { if (*v) {
g++; g++;
} else { } else {
r++; r++;
...@@ -77,12 +114,43 @@ ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* r ...@@ -77,12 +114,43 @@ ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* r
if (rejected) *rejected = r; if (rejected) *rejected = r;
if (granted) *granted = g; if (granted) *granted = g;
return syncRaftVoteResult(&(tracker->config.voters), tracker->votes); return syncRaftVoteResult(&(tracker->config.voters), tracker->votesMap);
}
void syncRaftConfigState(SSyncRaftProgressTracker* tracker, SSyncConfigState* cs) {
syncRaftCopyNodeMap(&tracker->config.voters.incoming, &cs->voters);
syncRaftCopyNodeMap(&tracker->config.voters.outgoing, &cs->votersOutgoing);
syncRaftCopyNodeMap(&tracker->config.learners, &cs->learners);
syncRaftCopyNodeMap(&tracker->config.learnersNext, &cs->learnersNext);
cs->autoLeave = tracker->config.autoLeave;
} }
void syncRaftConfigState(const SSyncRaftProgressTracker* tracker, SSyncConfigState* cs) { static void matchAckIndexer(SyncNodeId id, void* arg, SyncIndex* index) {
memcpy(&cs->voters, &tracker->config.voters.incoming, sizeof(SSyncRaftNodeMap)); SSyncRaftProgressTracker* tracker = (SSyncRaftProgressTracker*)arg;
memcpy(&cs->votersOutgoing, &tracker->config.voters.outgoing, sizeof(SSyncRaftNodeMap)); SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(&tracker->progressMap, id);
memcpy(&cs->learners, &tracker->config.learners, sizeof(SSyncRaftNodeMap)); if (progress == NULL) {
memcpy(&cs->learnersNext, &tracker->config.learnersNext, sizeof(SSyncRaftNodeMap)); *index = 0;
return;
}
*index = progress->matchIndex;
}
// Committed returns the largest log index known to be committed based on what
// the voting members of the group have acknowledged.
SyncIndex syncRaftCommittedIndex(SSyncRaftProgressTracker* tracker) {
return syncRaftJointConfigCommittedIndex(&tracker->config.voters, matchAckIndexer, tracker);
}
static void visitProgressActive(SSyncRaftProgress* progress, void* arg) {
SHashObj* votesMap = (SHashObj*)arg;
taosHashPut(votesMap, &progress->id, sizeof(SyncNodeId), &progress->recentActive, sizeof(bool));
}
// QuorumActive returns true if the quorum is active from the view of the local
// raft state machine. Otherwise, it returns false.
bool syncRaftQuorumActive(SSyncRaftProgressTracker* tracker) {
SHashObj* votesMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
syncRaftVisitProgressMap(&tracker->progressMap, visitProgressActive, votesMap);
return syncRaftVoteResult(&tracker->config.voters, votesMap) == SYNC_RAFT_VOTE_WON;
} }
\ No newline at end of file
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "sync_raft_node_map.h"
#include "sync_raft_quorum_majority.h" #include "sync_raft_quorum_majority.h"
#include "sync_raft_quorum_joint.h" #include "sync_raft_quorum_joint.h"
#include "sync_raft_quorum.h" #include "sync_raft_quorum.h"
...@@ -22,9 +23,9 @@ ...@@ -22,9 +23,9 @@
* a result indicating whether the vote is pending, lost, or won. A joint quorum * a result indicating whether the vote is pending, lost, or won. A joint quorum
* requires both majority quorums to vote in favor. * requires both majority quorums to vote in favor.
**/ **/
ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteType* votes) { ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, SHashObj* votesMap) {
ESyncRaftVoteResult r1 = syncRaftMajorityVoteResult(&(config->incoming), votes); ESyncRaftVoteResult r1 = syncRaftMajorityVoteResult(&(config->incoming), votesMap);
ESyncRaftVoteResult r2 = syncRaftMajorityVoteResult(&(config->outgoing), votes); ESyncRaftVoteResult r2 = syncRaftMajorityVoteResult(&(config->outgoing), votesMap);
if (r1 == r2) { if (r1 == r2) {
// If they agree, return the agreed state. // If they agree, return the agreed state.
...@@ -40,46 +41,35 @@ ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const E ...@@ -40,46 +41,35 @@ ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const E
return SYNC_RAFT_VOTE_PENDING; return SYNC_RAFT_VOTE_PENDING;
} }
void syncRaftJointConfigAddToIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id) { void syncRaftInitQuorumJointConfig(SSyncRaftQuorumJointConfig* config) {
int i, min; syncRaftInitNodeMap(&config->incoming);
syncRaftInitNodeMap(&config->outgoing);
}
for (i = 0, min = -1; i < TSDB_MAX_REPLICA; ++i) { void syncRaftFreeQuorumJointConfig(SSyncRaftQuorumJointConfig* config) {
if (config->incoming.nodeId[i] == id) { syncRaftFreeNodeMap(&config->incoming);
return; syncRaftFreeNodeMap(&config->outgoing);
} }
if (min == -1 && config->incoming.nodeId[i] == SYNC_NON_NODE_ID) {
min = i;
}
}
assert(min != -1); void syncRaftJointConfigAddToIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
config->incoming.nodeId[min] = id; syncRaftAddToNodeMap(&config->incoming, id);
config->incoming.replica += 1;
} }
void syncRaftJointConfigRemoveFromIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id) { void syncRaftJointConfigRemoveFromIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
int i; syncRaftRemoveFromNodeMap(&config->incoming, id);
}
for (i = 0; i < TSDB_MAX_REPLICA; ++i) { void syncRaftJointConfigIDs(SSyncRaftQuorumJointConfig* config, SSyncRaftNodeMap* nodeMap) {
if (config->incoming.nodeId[i] == id) { syncRaftCopyNodeMap(&config->incoming, nodeMap);
config->incoming.replica -= 1;
config->incoming.nodeId[i] = SYNC_NON_NODE_ID;
break;
}
}
assert(config->incoming.replica >= 0); syncRaftUnionNodeMap(&config->outgoing, nodeMap);
} }
SyncIndex syncRaftJointConfigCommittedIndex(const SSyncRaftQuorumJointConfig* config, matchAckIndexerFp indexer, void* arg) {
SyncIndex index0, index1;
bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId) { index0 = syncRaftMajorityConfigCommittedIndex(&config->incoming, indexer, arg);
int i; index1 = syncRaftMajorityConfigCommittedIndex(&config->outgoing, indexer, arg);
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
if (nodeId == nodeMap->nodeId[i]) {
return true;
}
}
return false; return index0 < index1 ? index0 : index1;
} }
\ No newline at end of file
...@@ -13,42 +13,109 @@ ...@@ -13,42 +13,109 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "sync_const.h"
#include "sync_raft_quorum.h" #include "sync_raft_quorum.h"
#include "sync_raft_quorum_majority.h" #include "sync_raft_quorum_majority.h"
#include "sync_raft_node_map.h"
/** // VoteResult takes a mapping of voters to yes/no (true/false) votes and returns
* syncRaftMajorityVoteResult takes a mapping of voters to yes/no (true/false) votes and returns // a result indicating whether the vote is pending (i.e. neither a quorum of
* a result indicating whether the vote is pending (i.e. neither a quorum of // yes/no has been reached), won (a quorum of yes has been reached), or lost (a
* yes/no has been reached), won (a quorum of yes has been reached), or lost (a // quorum of no has been reached).
* quorum of no has been reached). ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncRaftNodeMap* config, SHashObj* votesMap) {
**/ int n = syncRaftNodeMapSize(config);
ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncRaftNodeMap* config, const ESyncRaftVoteType* votes) { if (n == 0) {
if (config->replica == 0) { // By convention, the elections on an empty config win. This comes in
// handy with joint quorums because it'll make a half-populated joint
// quorum behave like a majority quorum.
return SYNC_RAFT_VOTE_WON; return SYNC_RAFT_VOTE_WON;
} }
int i, g, r, missing; int i, g, r, missing;
for (i = g = r = missing = 0; i < TSDB_MAX_REPLICA; ++i) { i = g = r = missing = 0;
if (config->nodeId[i] == SYNC_NON_NODE_ID) { SyncNodeId* pId = NULL;
while (!syncRaftIterateNodeMap(config, pId)) {
const bool* v = (const bool*)taosHashGet(votesMap, pId, sizeof(SyncNodeId*));
if (v == NULL) {
missing += 1;
continue; continue;
} }
if (votes[i] == SYNC_RAFT_VOTE_RESP_UNKNOWN) { if (*v) {
missing += 1;
} else if (votes[i] == SYNC_RAFT_VOTE_RESP_GRANT) {
g +=1; g +=1;
} else { } else {
r += 1; r += 1;
} }
} }
int quorum = config->replica / 2 + 1; int quorum = n / 2 + 1;
if (g >= quorum) { if (g >= quorum) {
return SYNC_RAFT_VOTE_WON; return SYNC_RAFT_VOTE_WON;
} }
if (r + missing >= quorum) { if (g + missing >= quorum) {
return SYNC_RAFT_VOTE_PENDING; return SYNC_RAFT_VOTE_PENDING;
} }
return SYNC_RAFT_VOTE_LOST; return SYNC_RAFT_VOTE_LOST;
}
int compSyncIndex(const void * elem1, const void * elem2) {
SyncIndex index1 = *((SyncIndex*)elem1);
SyncIndex index2 = *((SyncIndex*)elem1);
if (index1 > index2) return 1;
if (index1 < index2) return -1;
return 0;
}
SyncIndex syncRaftMajorityConfigCommittedIndex(const SSyncRaftNodeMap* config, matchAckIndexerFp indexer, void* arg) {
int n = syncRaftNodeMapSize(config);
if (n == 0) {
// This plays well with joint quorums which, when one half is the zero
// MajorityConfig, should behave like the other half.
return kMaxCommitIndex;
}
// Use an on-stack slice to collect the committed indexes when n <= 7
// (otherwise we alloc). The alternative is to stash a slice on
// MajorityConfig, but this impairs usability (as is, MajorityConfig is just
// a map, and that's nice). The assumption is that running with a
// replication factor of >7 is rare, and in cases in which it happens
// performance is a lesser concern (additionally the performance
// implications of an allocation here are far from drastic).
SyncIndex* srt = NULL;
SyncIndex srk[TSDB_MAX_REPLICA];
if (n > TSDB_MAX_REPLICA) {
srt = (SyncIndex*)malloc(sizeof(SyncIndex) * n);
if (srt == NULL) {
return kMaxCommitIndex;
}
} else {
srt = &srk[0];
}
// Fill the slice with the indexes observed. Any unused slots will be
// left as zero; these correspond to voters that may report in, but
// haven't yet. We fill from the right (since the zeroes will end up on
// the left after sorting below anyway).
SyncNodeId *pId = NULL;
int i = 0;
SyncIndex index;
while (!syncRaftIterateNodeMap(config, pId)) {
indexer(*pId, arg, &index);
srt[i++] = index;
}
// Sort by index. Use a bespoke algorithm (copied from the stdlib's sort
// package) to keep srt on the stack.
qsort(srt, n, sizeof(SyncIndex), compSyncIndex);
// The smallest index into the array for which the value is acked by a
// quorum. In other words, from the end of the slice, move n/2+1 to the
// left (accounting for zero-indexing).
index = srt[n - (n/2 + 1)];
if (srt != &srk[0]) {
free(srt);
}
return index;
} }
\ No newline at end of file
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "sync_raft_restore.h" #include "sync_raft_restore.h"
#include "sync_raft_progress_tracker.h" #include "sync_raft_progress_tracker.h"
static void addToConfChangeSingleArray(SSyncConfChangeSingleArray* out, int* i, const SSyncRaftNodeMap* nodeMap, ESyncRaftConfChangeType t);
static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in); static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in);
// syncRaftRestoreConfig takes a Changer (which must represent an empty configuration), and // syncRaftRestoreConfig takes a Changer (which must represent an empty configuration), and
...@@ -27,21 +28,26 @@ static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleA ...@@ -27,21 +28,26 @@ static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleA
// the Changer only needs a ProgressMap (not a whole Tracker) at which point // the Changer only needs a ProgressMap (not a whole Tracker) at which point
// this can just take LastIndex and MaxInflight directly instead and cook up // this can just take LastIndex and MaxInflight directly instead and cook up
// the results from that alone. // the results from that alone.
int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs) { int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs,
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
SSyncConfChangeSingleArray outgoing; SSyncConfChangeSingleArray outgoing;
SSyncConfChangeSingleArray incoming; SSyncConfChangeSingleArray incoming;
SSyncConfChangeSingleArray css; SSyncConfChangeSingleArray css;
SSyncRaftProgressTracker* tracker = changer->tracker; SSyncRaftProgressTracker* tracker = changer->tracker;
SSyncRaftProgressTrackerConfig* config = &tracker->config;
SSyncRaftProgressMap* progressMap = &tracker->progressMap;
int i, ret; int i, ret;
syncRaftInitConfArray(&outgoing);
syncRaftInitConfArray(&incoming);
syncRaftInitTrackConfig(config);
syncRaftInitProgressMap(progressMap);
ret = toConfChangeSingle(cs, &outgoing, &incoming); ret = toConfChangeSingle(cs, &outgoing, &incoming);
if (ret != 0) { if (ret != 0) {
goto out; goto out;
} }
if (outgoing.n == 0) { if (syncRaftConfArrayIsEmpty(&outgoing)) {
// No outgoing config, so just apply the incoming changes one by one. // No outgoing config, so just apply the incoming changes one by one.
for (i = 0; i < incoming.n; ++i) { for (i = 0; i < incoming.n; ++i) {
css = (SSyncConfChangeSingleArray) { css = (SSyncConfChangeSingleArray) {
...@@ -52,6 +58,9 @@ int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs) ...@@ -52,6 +58,9 @@ int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs)
if (ret != 0) { if (ret != 0) {
goto out; goto out;
} }
syncRaftCopyTrackerConfig(config, &changer->tracker->config);
syncRaftCopyProgressMap(progressMap, &changer->tracker->progressMap);
} }
} else { } else {
// The ConfState describes a joint configuration. // The ConfState describes a joint configuration.
...@@ -68,6 +77,8 @@ int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs) ...@@ -68,6 +77,8 @@ int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs)
if (ret != 0) { if (ret != 0) {
goto out; goto out;
} }
syncRaftCopyTrackerConfig(config, &changer->tracker->config);
syncRaftCopyProgressMap(progressMap, &changer->tracker->progressMap);
} }
ret = syncRaftChangerEnterJoint(changer, cs->autoLeave, &incoming, config, progressMap); ret = syncRaftChangerEnterJoint(changer, cs->autoLeave, &incoming, config, progressMap);
...@@ -77,11 +88,24 @@ int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs) ...@@ -77,11 +88,24 @@ int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs)
} }
out: out:
if (incoming.n != 0) free(incoming.changes); syncRaftFreeConfArray(&incoming);
if (outgoing.n != 0) free(outgoing.changes); syncRaftFreeConfArray(&outgoing);
return ret; return ret;
} }
static void addToConfChangeSingleArray(SSyncConfChangeSingleArray* out, int* i, const SSyncRaftNodeMap* nodeMap, ESyncRaftConfChangeType t) {
SyncNodeId* pId = NULL;
while (!syncRaftIterateNodeMap(nodeMap, pId)) {
out->changes[*i] = (SSyncConfChangeSingle) {
.type = t,
.nodeId = *pId,
};
*i += 1;
}
}
// toConfChangeSingle translates a conf state into 1) a slice of operations creating // toConfChangeSingle translates a conf state into 1) a slice of operations creating
// first the config that will become the outgoing one, and then the incoming one, and // first the config that will become the outgoing one, and then the incoming one, and
// b) another slice that, when applied to the config resulted from 1), represents the // b) another slice that, when applied to the config resulted from 1), represents the
...@@ -89,15 +113,16 @@ out: ...@@ -89,15 +113,16 @@ out:
static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in) { static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in) {
int i; int i;
out->n = in->n = 0; out->n = syncRaftNodeMapSize(&cs->votersOutgoing);
out->n = cs->votersOutgoing.replica;
out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * out->n); out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * out->n);
if (out->changes == NULL) { if (out->changes == NULL) {
out->n = 0; out->n = 0;
return -1; return -1;
} }
in->n = cs->votersOutgoing.replica + cs->voters.replica + cs->learners.replica + cs->learnersNext.replica; in->n = syncRaftNodeMapSize(&cs->votersOutgoing) +
syncRaftNodeMapSize(&cs->voters) +
syncRaftNodeMapSize(&cs->learners) +
syncRaftNodeMapSize(&cs->learnersNext);
out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * in->n); out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * in->n);
if (in->changes == NULL) { if (in->changes == NULL) {
in->n = 0; in->n = 0;
...@@ -132,50 +157,24 @@ static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleA ...@@ -132,50 +157,24 @@ static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleA
// //
// as desired. // as desired.
for (i = 0; i < cs->votersOutgoing.replica; ++i) { // If there are outgoing voters, first add them one by one so that the
// If there are outgoing voters, first add them one by one so that the // (non-joint) config has them all.
// (non-joint) config has them all. i = 0;
out->changes[i] = (SSyncConfChangeSingle) { addToConfChangeSingleArray(out, &i, &cs->votersOutgoing, SYNC_RAFT_Conf_AddNode);
.type = SYNC_RAFT_Conf_AddNode, assert(i == out->n);
.nodeId = cs->votersOutgoing.nodeId[i],
};
}
// We're done constructing the outgoing slice, now on to the incoming one // We're done constructing the outgoing slice, now on to the incoming one
// (which will apply on top of the config created by the outgoing slice). // (which will apply on top of the config created by the outgoing slice).
i = 0;
// First, we'll remove all of the outgoing voters. // First, we'll remove all of the outgoing voters.
int j = 0; addToConfChangeSingleArray(in, &i, &cs->votersOutgoing, SYNC_RAFT_Conf_RemoveNode);
for (i = 0; i < cs->votersOutgoing.replica; ++i) {
in->changes[j] = (SSyncConfChangeSingle) {
.type = SYNC_RAFT_Conf_RemoveNode,
.nodeId = cs->votersOutgoing.nodeId[i],
};
j += 1;
}
// Then we'll add the incoming voters and learners. // Then we'll add the incoming voters and learners.
for (i = 0; i < cs->voters.replica; ++i) { addToConfChangeSingleArray(in, &i, &cs->voters, SYNC_RAFT_Conf_AddNode);
in->changes[j] = (SSyncConfChangeSingle) { addToConfChangeSingleArray(in, &i, &cs->learners, SYNC_RAFT_Conf_AddLearnerNode);
.type = SYNC_RAFT_Conf_AddNode, addToConfChangeSingleArray(in, &i, &cs->learnersNext, SYNC_RAFT_Conf_AddLearnerNode);
.nodeId = cs->voters.nodeId[i], assert(i == in->n);
};
j += 1;
}
for (i = 0; i < cs->learners.replica; ++i) {
in->changes[j] = (SSyncConfChangeSingle) {
.type = SYNC_RAFT_Conf_AddLearnerNode,
.nodeId = cs->learners.nodeId[i],
};
j += 1;
}
// Same for LearnersNext; these are nodes we want to be learners but which
// are currently voters in the outgoing config.
for (i = 0; i < cs->learnersNext.replica; ++i) {
in->changes[j] = (SSyncConfChangeSingle) {
.type = SYNC_RAFT_Conf_AddLearnerNode,
.nodeId = cs->learnersNext.nodeId[i],
};
j += 1;
}
return 0; return 0;
} }
\ No newline at end of file
...@@ -54,10 +54,11 @@ typedef struct { ...@@ -54,10 +54,11 @@ typedef struct {
char secret[TSDB_KEY_LEN]; // secret for the link char secret[TSDB_KEY_LEN]; // secret for the link
char ckey[TSDB_KEY_LEN]; // ciphering key char ckey[TSDB_KEY_LEN]; // ciphering key
void (*cfp)(SRpcMsg *, SEpSet *); void (*cfp)(void *parent, SRpcMsg *, SEpSet *);
int (*afp)(char *user, char *spi, char *encrypt, char *secret, char *ckey); int (*afp)(void *parent, char *user, char *spi, char *encrypt, char *secret, char *ckey);
int32_t refCount; int32_t refCount;
void *parent;
void *idPool; // handle to ID pool void *idPool; // handle to ID pool
void *tmrCtrl; // handle to timer void *tmrCtrl; // handle to timer
SHashObj *hash; // handle returned by hash utility SHashObj *hash; // handle returned by hash utility
...@@ -260,6 +261,7 @@ void *rpcOpen(const SRpcInit *pInit) { ...@@ -260,6 +261,7 @@ void *rpcOpen(const SRpcInit *pInit) {
pRpc->spi = pInit->spi; pRpc->spi = pInit->spi;
pRpc->cfp = pInit->cfp; pRpc->cfp = pInit->cfp;
pRpc->afp = pInit->afp; pRpc->afp = pInit->afp;
pRpc->parent = pInit->parent;
pRpc->refCount = 1; pRpc->refCount = 1;
atomic_add_fetch_32(&tsRpcNum, 1); atomic_add_fetch_32(&tsRpcNum, 1);
...@@ -505,14 +507,18 @@ void rpcSendRedirectRsp(void *thandle, const SEpSet *pEpSet) { ...@@ -505,14 +507,18 @@ void rpcSendRedirectRsp(void *thandle, const SEpSet *pEpSet) {
} }
int rpcGetConnInfo(void *thandle, SRpcConnInfo *pInfo) { int rpcGetConnInfo(void *thandle, SRpcConnInfo *pInfo) {
#if 0
SRpcConn *pConn = (SRpcConn *)thandle; SRpcConn *pConn = (SRpcConn *)thandle;
if (pConn->user[0] == 0) return -1; if (pConn->user[0] == 0) return -1;
pInfo->clientIp = pConn->peerIp; pInfo->clientIp = pConn->peerIp;
pInfo->clientPort = pConn->peerPort; pInfo->clientPort = pConn->peerPort;
// pInfo->serverIp = pConn->destIp; // pInfo->serverIp = pConn->destIp;
tstrncpy(pInfo->user, pConn->user, sizeof(pInfo->user)); tstrncpy(pInfo->user, pConn->user, sizeof(pInfo->user));
#else
strcpy(pInfo->user, "root");
#endif
return 0; return 0;
} }
...@@ -740,7 +746,7 @@ static SRpcConn *rpcAllocateServerConn(SRpcInfo *pRpc, SRecvInfo *pRecv) { ...@@ -740,7 +746,7 @@ static SRpcConn *rpcAllocateServerConn(SRpcInfo *pRpc, SRecvInfo *pRecv) {
if (pConn->user[0] == 0) { if (pConn->user[0] == 0) {
terrno = TSDB_CODE_RPC_AUTH_REQUIRED; terrno = TSDB_CODE_RPC_AUTH_REQUIRED;
} else { } else {
terrno = (*pRpc->afp)(pConn->user, &pConn->spi, &pConn->encrypt, pConn->secret, pConn->ckey); terrno = (*pRpc->afp)(pRpc->parent, pConn->user, &pConn->spi, &pConn->encrypt, pConn->secret, pConn->ckey);
} }
if (terrno != 0) { if (terrno != 0) {
...@@ -1020,8 +1026,8 @@ static void doRpcReportBrokenLinkToServer(void *param, void *id) { ...@@ -1020,8 +1026,8 @@ static void doRpcReportBrokenLinkToServer(void *param, void *id) {
SRpcMsg *pRpcMsg = (SRpcMsg *)(param); SRpcMsg *pRpcMsg = (SRpcMsg *)(param);
SRpcConn *pConn = (SRpcConn *)(pRpcMsg->handle); SRpcConn *pConn = (SRpcConn *)(pRpcMsg->handle);
SRpcInfo *pRpc = pConn->pRpc; SRpcInfo *pRpc = pConn->pRpc;
(*(pRpc->cfp))(pRpcMsg, NULL); (*(pRpc->cfp))(pRpc->parent, pRpcMsg, NULL);
free(pRpcMsg); free(pRpcMsg);
} }
static void rpcReportBrokenLinkToServer(SRpcConn *pConn) { static void rpcReportBrokenLinkToServer(SRpcConn *pConn) {
SRpcInfo *pRpc = pConn->pRpc; SRpcInfo *pRpc = pConn->pRpc;
...@@ -1133,9 +1139,9 @@ static void rpcNotifyClient(SRpcReqContext *pContext, SRpcMsg *pMsg) { ...@@ -1133,9 +1139,9 @@ static void rpcNotifyClient(SRpcReqContext *pContext, SRpcMsg *pMsg) {
// for asynchronous API // for asynchronous API
SEpSet *pEpSet = NULL; SEpSet *pEpSet = NULL;
if (pContext->epSet.inUse != pContext->oldInUse || pContext->redirect) if (pContext->epSet.inUse != pContext->oldInUse || pContext->redirect)
pEpSet = &pContext->epSet; pEpSet = &pContext->epSet;
(*pRpc->cfp)(pMsg, pEpSet); (*pRpc->cfp)(pRpc->parent, pMsg, pEpSet);
} }
// free the request message // free the request message
...@@ -1151,15 +1157,15 @@ static void rpcProcessIncomingMsg(SRpcConn *pConn, SRpcHead *pHead, SRpcReqConte ...@@ -1151,15 +1157,15 @@ static void rpcProcessIncomingMsg(SRpcConn *pConn, SRpcHead *pHead, SRpcReqConte
rpcMsg.contLen = rpcContLenFromMsg(pHead->msgLen); rpcMsg.contLen = rpcContLenFromMsg(pHead->msgLen);
rpcMsg.pCont = pHead->content; rpcMsg.pCont = pHead->content;
rpcMsg.msgType = pHead->msgType; rpcMsg.msgType = pHead->msgType;
rpcMsg.code = pHead->code; rpcMsg.code = pHead->code;
if ( rpcIsReq(pHead->msgType) ) { if (rpcIsReq(pHead->msgType)) {
rpcMsg.ahandle = pConn->ahandle; rpcMsg.ahandle = pConn->ahandle;
rpcMsg.handle = pConn; rpcMsg.handle = pConn;
rpcAddRef(pRpc); // add the refCount for requests rpcAddRef(pRpc); // add the refCount for requests
// notify the server app // notify the server app
(*(pRpc->cfp))(&rpcMsg, NULL); (*(pRpc->cfp))(pRpc->parent, &rpcMsg, NULL);
} else { } else {
// it's a response // it's a response
rpcMsg.handle = pContext; rpcMsg.handle = pContext;
......
...@@ -9,4 +9,5 @@ target_include_directories( ...@@ -9,4 +9,5 @@ target_include_directories(
target_link_libraries( target_link_libraries(
wal wal
PUBLIC os PUBLIC os
PUBLIC util
) )
...@@ -16,12 +16,17 @@ ...@@ -16,12 +16,17 @@
#ifndef _TD_WAL_INT_H_ #ifndef _TD_WAL_INT_H_
#define _TD_WAL_INT_H_ #define _TD_WAL_INT_H_
#include "wal.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
int walRotate(SWal* pWal);
int walGetFile(SWal* pWal, int32_t version);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*_TD_WAL_INT_H_*/ #endif /*_TD_WAL_INT_H_*/
\ No newline at end of file
...@@ -15,32 +15,34 @@ ...@@ -15,32 +15,34 @@
#include "wal.h" #include "wal.h"
int32_t walInit() { return 0; } int32_t walCommit(SWal *pWal, int64_t ver) {
return 0;
void walCleanUp() {} }
SWal *walOpen(char *path, SWalCfg *pCfg) { return NULL; }
int32_t walAlter(SWal *pWal, SWalCfg *pCfg) { return 0; }
void walClose(SWal *pWal) {}
void walFsync(SWal *pWal, bool force) {}
int64_t walWrite(SWal *pWal, int64_t index, void *body, int32_t bodyLen) { int32_t walRollback(SWal *pWal, int64_t ver) {
return 0; return 0;
} }
int32_t walCommit(SWal *pWal, int64_t ver) { return 0; } int32_t walTakeSnapshot(SWal *pWal, int64_t ver) {
return 0;
}
int32_t walRollback(SWal *pWal, int64_t ver) { return 0; } int32_t walRead(SWal *pWal, SWalHead **ppHead, int64_t ver) {
return 0;
}
int32_t walPrune(SWal *pWal, int64_t ver) { return 0; } int32_t walReadWithFp(SWal *pWal, FWalWrite writeFp, int64_t verStart, int32_t readNum) {
return 0;
}
int64_t walGetFirstVer(SWal *pWal) {
return 0;
}
int32_t walRead(SWal *, SWalHead **, int64_t ver); int64_t walGetSnapshotVer(SWal *pWal) {
int32_t walReadWithFp(SWal *, FWalWrite writeFp, int64_t verStart, int32_t readNum); return 0;
}
int64_t walGetFirstVer(SWal *); int64_t walGetLastVer(SWal *pWal) {
int64_t walGetSnapshotVer(SWal *); return 0;
int64_t walGetLastVer(SWal *); }
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "taoserror.h"
#include "tref.h"
#include "tfile.h"
#include "walInt.h"
int walSetCurVerImpl(SWal *pWal, int64_t ver) {
//close old file
//iterate all files
//open right file
//set cur version, cur file version and cur status
return 0;
}
int walSetCurVer(SWal *pWal, int64_t ver) {
if(ver > pWal->lastVersion + 1) {
//TODO: some records are skipped
return -1;
}
if(ver < pWal->firstVersion) {
//TODO: try to seek pruned log
return -1;
}
if(ver < pWal->snapshotVersion) {
//TODO: seek snapshotted log
}
if(ver < pWal->curFileFirstVersion || (pWal->curFileLastVersion != -1 && ver > pWal->curFileLastVersion)) {
//back up to avoid inconsistency
int64_t curVersion = pWal->curVersion;
int64_t curOffset = pWal->curOffset;
int64_t curFileFirstVersion = pWal->curFileFirstVersion;
int64_t curFileLastVersion = pWal->curFileLastVersion;
if(walSetCurVerImpl(pWal, ver) < 0) {
//TODO: errno
pWal->curVersion = curVersion;
pWal->curOffset = curOffset;
pWal->curFileFirstVersion = curFileFirstVersion;
pWal->curFileLastVersion = curFileLastVersion;
return -1;
}
}
return 0;
}
int walWriteIndex(SWal *pWal, int64_t ver, int64_t offset) {
int code = 0;
//get index file
if(!tfValid(pWal->curIdxTfd)) {
code = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, file:%"PRId64".idx, failed to open since %s", pWal->vgId, pWal->curFileFirstVersion, strerror(errno));
}
if(pWal->curVersion != ver) {
if(walSetCurVer(pWal, ver) != 0) {
//TODO: some records are skipped
return -1;
}
}
//check file checksum
//append index
return 0;
}
int walRotateIndex(SWal *pWal) {
//check file checksum
//create new file
//switch file
return 0;
}
...@@ -18,11 +18,10 @@ ...@@ -18,11 +18,10 @@
#include "taoserror.h" #include "taoserror.h"
#include "tref.h" #include "tref.h"
#include "tfile.h" #include "tfile.h"
#include "twal.h"
#include "walInt.h" #include "walInt.h"
typedef struct { typedef struct {
int32_t refId; int32_t refSetId;
int32_t seq; int32_t seq;
int8_t stop; int8_t stop;
pthread_t thread; pthread_t thread;
...@@ -37,7 +36,7 @@ static void walFreeObj(void *pWal); ...@@ -37,7 +36,7 @@ static void walFreeObj(void *pWal);
int32_t walInit() { int32_t walInit() {
int32_t code = 0; int32_t code = 0;
tsWal.refId = taosOpenRef(TSDB_MIN_VNODES, walFreeObj); tsWal.refSetId = taosOpenRef(TSDB_MIN_VNODES, walFreeObj);
code = pthread_mutex_init(&tsWal.mutex, NULL); code = pthread_mutex_init(&tsWal.mutex, NULL);
if (code) { if (code) {
...@@ -46,34 +45,33 @@ int32_t walInit() { ...@@ -46,34 +45,33 @@ int32_t walInit() {
} }
code = walCreateThread(); code = walCreateThread();
if (code != TSDB_CODE_SUCCESS) { if (code != 0) {
wError("failed to init wal module since %s", tstrerror(code)); wError("failed to init wal module since %s", tstrerror(code));
return code; return code;
} }
wInfo("wal module is initialized, rsetId:%d", tsWal.refId); wInfo("wal module is initialized, rsetId:%d", tsWal.refSetId);
return code; return code;
} }
void walCleanUp() { void walCleanUp() {
walStopThread(); walStopThread();
taosCloseRef(tsWal.refId); taosCloseRef(tsWal.refSetId);
pthread_mutex_destroy(&tsWal.mutex); pthread_mutex_destroy(&tsWal.mutex);
wInfo("wal module is cleaned up"); wInfo("wal module is cleaned up");
} }
void *walOpen(char *path, SWalCfg *pCfg) { SWal *walOpen(const char *path, SWalCfg *pCfg) {
SWal *pWal = tcalloc(1, sizeof(SWal)); SWal *pWal = malloc(sizeof(SWal));
if (pWal == NULL) { if (pWal == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
return NULL; return NULL;
} }
pWal->vgId = pCfg->vgId; pWal->vgId = pCfg->vgId;
pWal->tfd = -1; pWal->curLogTfd = -1;
pWal->fileId = -1; /*pWal->curFileId = -1;*/
pWal->level = pCfg->walLevel; pWal->level = pCfg->walLevel;
pWal->keep = pCfg->keep;
pWal->fsyncPeriod = pCfg->fsyncPeriod; pWal->fsyncPeriod = pCfg->fsyncPeriod;
tstrncpy(pWal->path, path, sizeof(pWal->path)); tstrncpy(pWal->path, path, sizeof(pWal->path));
pthread_mutex_init(&pWal->mutex, NULL); pthread_mutex_init(&pWal->mutex, NULL);
...@@ -81,13 +79,13 @@ void *walOpen(char *path, SWalCfg *pCfg) { ...@@ -81,13 +79,13 @@ void *walOpen(char *path, SWalCfg *pCfg) {
pWal->fsyncSeq = pCfg->fsyncPeriod / 1000; pWal->fsyncSeq = pCfg->fsyncPeriod / 1000;
if (pWal->fsyncSeq <= 0) pWal->fsyncSeq = 1; if (pWal->fsyncSeq <= 0) pWal->fsyncSeq = 1;
if (walInitObj(pWal) != TSDB_CODE_SUCCESS) { if (walInitObj(pWal) != 0) {
walFreeObj(pWal); walFreeObj(pWal);
return NULL; return NULL;
} }
pWal->rid = taosAddRef(tsWal.refId, pWal); pWal->refId = taosAddRef(tsWal.refSetId, pWal);
if (pWal->rid < 0) { if (pWal->refId < 0) {
walFreeObj(pWal); walFreeObj(pWal);
return NULL; return NULL;
} }
...@@ -97,14 +95,13 @@ void *walOpen(char *path, SWalCfg *pCfg) { ...@@ -97,14 +95,13 @@ void *walOpen(char *path, SWalCfg *pCfg) {
return pWal; return pWal;
} }
int32_t walAlter(void *handle, SWalCfg *pCfg) { int32_t walAlter(SWal *pWal, SWalCfg *pCfg) {
if (handle == NULL) return TSDB_CODE_WAL_APP_ERROR; if (pWal == NULL) return TSDB_CODE_WAL_APP_ERROR;
SWal *pWal = handle;
if (pWal->level == pCfg->walLevel && pWal->fsyncPeriod == pCfg->fsyncPeriod) { if (pWal->level == pCfg->walLevel && pWal->fsyncPeriod == pCfg->fsyncPeriod) {
wDebug("vgId:%d, old walLevel:%d fsync:%d, new walLevel:%d fsync:%d not change", pWal->vgId, pWal->level, wDebug("vgId:%d, old walLevel:%d fsync:%d, new walLevel:%d fsync:%d not change", pWal->vgId, pWal->level,
pWal->fsyncPeriod, pCfg->walLevel, pCfg->fsyncPeriod); pWal->fsyncPeriod, pCfg->walLevel, pCfg->fsyncPeriod);
return TSDB_CODE_SUCCESS; return 0;
} }
wInfo("vgId:%d, change old walLevel:%d fsync:%d, new walLevel:%d fsync:%d", pWal->vgId, pWal->level, wInfo("vgId:%d, change old walLevel:%d fsync:%d, new walLevel:%d fsync:%d", pWal->vgId, pWal->level,
...@@ -115,44 +112,33 @@ int32_t walAlter(void *handle, SWalCfg *pCfg) { ...@@ -115,44 +112,33 @@ int32_t walAlter(void *handle, SWalCfg *pCfg) {
pWal->fsyncSeq = pCfg->fsyncPeriod / 1000; pWal->fsyncSeq = pCfg->fsyncPeriod / 1000;
if (pWal->fsyncSeq <= 0) pWal->fsyncSeq = 1; if (pWal->fsyncSeq <= 0) pWal->fsyncSeq = 1;
return TSDB_CODE_SUCCESS; return 0;
} }
void walStop(void *handle) { void walClose(SWal *pWal) {
if (handle == NULL) return; if (pWal == NULL) return;
SWal *pWal = handle;
pthread_mutex_lock(&pWal->mutex); pthread_mutex_lock(&pWal->mutex);
pWal->stop = 1; tfClose(pWal->curLogTfd);
pthread_mutex_unlock(&pWal->mutex); pthread_mutex_unlock(&pWal->mutex);
wDebug("vgId:%d, stop write wal", pWal->vgId); taosRemoveRef(tsWal.refSetId, pWal->refId);
}
void walClose(void *handle) {
if (handle == NULL) return;
SWal *pWal = handle;
pthread_mutex_lock(&pWal->mutex);
tfClose(pWal->tfd);
pthread_mutex_unlock(&pWal->mutex);
taosRemoveRef(tsWal.refId, pWal->rid);
} }
static int32_t walInitObj(SWal *pWal) { static int32_t walInitObj(SWal *pWal) {
if (!taosMkDir(pWal->path)) { if (taosMkDir(pWal->path) != 0) {
wError("vgId:%d, path:%s, failed to create directory since %s", pWal->vgId, pWal->path, strerror(errno)); wError("vgId:%d, path:%s, failed to create directory since %s", pWal->vgId, pWal->path, strerror(errno));
return TAOS_SYSTEM_ERROR(errno); return TAOS_SYSTEM_ERROR(errno);
} }
wDebug("vgId:%d, object is initialized", pWal->vgId); wDebug("vgId:%d, object is initialized", pWal->vgId);
return TSDB_CODE_SUCCESS; return 0;
} }
static void walFreeObj(void *wal) { static void walFreeObj(void *wal) {
SWal *pWal = wal; SWal *pWal = wal;
wDebug("vgId:%d, wal:%p is freed", pWal->vgId, pWal); wDebug("vgId:%d, wal:%p is freed", pWal->vgId, pWal);
tfClose(pWal->tfd); tfClose(pWal->curLogTfd);
pthread_mutex_destroy(&pWal->mutex); pthread_mutex_destroy(&pWal->mutex);
tfree(pWal); tfree(pWal);
} }
...@@ -177,16 +163,16 @@ static void walUpdateSeq() { ...@@ -177,16 +163,16 @@ static void walUpdateSeq() {
} }
static void walFsyncAll() { static void walFsyncAll() {
SWal *pWal = taosIterateRef(tsWal.refId, 0); SWal *pWal = taosIterateRef(tsWal.refSetId, 0);
while (pWal) { while (pWal) {
if (walNeedFsync(pWal)) { if (walNeedFsync(pWal)) {
wTrace("vgId:%d, do fsync, level:%d seq:%d rseq:%d", pWal->vgId, pWal->level, pWal->fsyncSeq, tsWal.seq); wTrace("vgId:%d, do fsync, level:%d seq:%d rseq:%d", pWal->vgId, pWal->level, pWal->fsyncSeq, tsWal.seq);
int32_t code = tfFsync(pWal->tfd); int32_t code = tfFsync(pWal->curLogTfd);
if (code != 0) { if (code != 0) {
wError("vgId:%d, file:%s, failed to fsync since %s", pWal->vgId, pWal->name, strerror(code)); wError("vgId:%d, file:%"PRId64".log, failed to fsync since %s", pWal->vgId, pWal->curFileFirstVersion, strerror(code));
} }
} }
pWal = taosIterateRef(tsWal.refId, pWal->rid); pWal = taosIterateRef(tsWal.refSetId, pWal->refId);
} }
} }
...@@ -219,7 +205,7 @@ static int32_t walCreateThread() { ...@@ -219,7 +205,7 @@ static int32_t walCreateThread() {
pthread_attr_destroy(&thAttr); pthread_attr_destroy(&thAttr);
wDebug("wal thread is launched, thread:0x%08" PRIx64, taosGetPthreadId(tsWal.thread)); wDebug("wal thread is launched, thread:0x%08" PRIx64, taosGetPthreadId(tsWal.thread));
return TSDB_CODE_SUCCESS; return 0;
} }
static void walStopThread() { static void walStopThread() {
......
...@@ -115,4 +115,4 @@ int32_t walGetNewFile(SWal *pWal, int64_t *newFileId) { ...@@ -115,4 +115,4 @@ int32_t walGetNewFile(SWal *pWal, int64_t *newFileId) {
wTrace("vgId:%d, path:%s, newFileId:%" PRId64, pWal->vgId, pWal->path, *newFileId); wTrace("vgId:%d, path:%s, newFileId:%" PRId64, pWal->vgId, pWal->path, *newFileId);
return 0; return 0;
} }
\ No newline at end of file
...@@ -14,15 +14,14 @@ ...@@ -14,15 +14,14 @@
*/ */
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#define TAOS_RANDOM_FILE_FAIL_TEST
#include "os.h" #include "os.h"
#include "taoserror.h" #include "taoserror.h"
#include "taosmsg.h"
#include "tchecksum.h" #include "tchecksum.h"
#include "tfile.h" #include "tfile.h"
#include "twal.h"
#include "walInt.h" #include "walInt.h"
#if 0
static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, char *name, int64_t fileId); static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, char *name, int64_t fileId);
int32_t walRenew(void *handle) { int32_t walRenew(void *handle) {
...@@ -31,33 +30,33 @@ int32_t walRenew(void *handle) { ...@@ -31,33 +30,33 @@ int32_t walRenew(void *handle) {
SWal * pWal = handle; SWal * pWal = handle;
int32_t code = 0; int32_t code = 0;
if (pWal->stop) { /*if (pWal->stop) {*/
wDebug("vgId:%d, do not create a new wal file", pWal->vgId); /*wDebug("vgId:%d, do not create a new wal file", pWal->vgId);*/
return 0; /*return 0;*/
} /*}*/
pthread_mutex_lock(&pWal->mutex); pthread_mutex_lock(&pWal->mutex);
if (tfValid(pWal->tfd)) { if (tfValid(pWal->logTfd)) {
tfClose(pWal->tfd); tfClose(pWal->logTfd);
wDebug("vgId:%d, file:%s, it is closed while renew", pWal->vgId, pWal->name); wDebug("vgId:%d, file:%s, it is closed while renew", pWal->vgId, pWal->logName);
} }
if (pWal->keep == TAOS_WAL_KEEP) { /*if (pWal->keep == TAOS_WAL_KEEP) {*/
pWal->fileId = 0; /*pWal->fileId = 0;*/
} else { /*} else {*/
if (walGetNewFile(pWal, &pWal->fileId) != 0) pWal->fileId = 0; /*if (walGetNewFile(pWal, &pWal->fileId) != 0) pWal->fileId = 0;*/
pWal->fileId++; /*pWal->fileId++;*/
} /*}*/
snprintf(pWal->name, sizeof(pWal->name), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, pWal->fileId); snprintf(pWal->logName, sizeof(pWal->logName), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, pWal->curFileId);
pWal->tfd = tfOpenCreateWrite(pWal->name); pWal->logTfd = tfOpenCreateWrite(pWal->logName);
if (!tfValid(pWal->tfd)) { if (!tfValid(pWal->logTfd)) {
code = TAOS_SYSTEM_ERROR(errno); code = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, file:%s, failed to open since %s", pWal->vgId, pWal->name, strerror(errno)); wError("vgId:%d, file:%s, failed to open since %s", pWal->vgId, pWal->logName, strerror(errno));
} else { } else {
wDebug("vgId:%d, file:%s, it is created and open while renew", pWal->vgId, pWal->name); wDebug("vgId:%d, file:%s, it is created and open while renew", pWal->vgId, pWal->logName);
} }
pthread_mutex_unlock(&pWal->mutex); pthread_mutex_unlock(&pWal->mutex);
...@@ -68,14 +67,14 @@ int32_t walRenew(void *handle) { ...@@ -68,14 +67,14 @@ int32_t walRenew(void *handle) {
void walRemoveOneOldFile(void *handle) { void walRemoveOneOldFile(void *handle) {
SWal *pWal = handle; SWal *pWal = handle;
if (pWal == NULL) return; if (pWal == NULL) return;
if (pWal->keep == TAOS_WAL_KEEP) return; /*if (pWal->keep == TAOS_WAL_KEEP) return;*/
if (!tfValid(pWal->tfd)) return; if (!tfValid(pWal->logTfd)) return;
pthread_mutex_lock(&pWal->mutex); pthread_mutex_lock(&pWal->mutex);
// remove the oldest wal file // remove the oldest wal file
int64_t oldFileId = -1; int64_t oldFileId = -1;
if (walGetOldFile(pWal, pWal->fileId, WAL_FILE_NUM, &oldFileId) == 0) { if (walGetOldFile(pWal, pWal->curFileId, WAL_FILE_NUM, &oldFileId) == 0) {
char walName[WAL_FILE_LEN] = {0}; char walName[WAL_FILE_LEN] = {0};
snprintf(walName, sizeof(walName), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, oldFileId); snprintf(walName, sizeof(walName), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, oldFileId);
...@@ -97,27 +96,25 @@ void walRemoveAllOldFiles(void *handle) { ...@@ -97,27 +96,25 @@ void walRemoveAllOldFiles(void *handle) {
pthread_mutex_lock(&pWal->mutex); pthread_mutex_lock(&pWal->mutex);
tfClose(pWal->tfd); tfClose(pWal->logTfd);
wDebug("vgId:%d, file:%s, it is closed before remove all wals", pWal->vgId, pWal->name); wDebug("vgId:%d, file:%s, it is closed before remove all wals", pWal->vgId, pWal->logName);
while (walGetNextFile(pWal, &fileId) >= 0) { while (walGetNextFile(pWal, &fileId) >= 0) {
snprintf(pWal->name, sizeof(pWal->name), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, fileId); snprintf(pWal->logName, sizeof(pWal->logName), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, fileId);
if (remove(pWal->name) < 0) { if (remove(pWal->logName) < 0) {
wError("vgId:%d, wal:%p file:%s, failed to remove since %s", pWal->vgId, pWal, pWal->name, strerror(errno)); wError("vgId:%d, wal:%p file:%s, failed to remove since %s", pWal->vgId, pWal, pWal->logName, strerror(errno));
} else { } else {
wInfo("vgId:%d, wal:%p file:%s, it is removed", pWal->vgId, pWal, pWal->name); wInfo("vgId:%d, wal:%p file:%s, it is removed", pWal->vgId, pWal, pWal->logName);
} }
} }
pthread_mutex_unlock(&pWal->mutex); pthread_mutex_unlock(&pWal->mutex);
} }
#endif
#if defined(WAL_CHECKSUM_WHOLE)
static void walUpdateChecksum(SWalHead *pHead) { static void walUpdateChecksum(SWalHead *pHead) {
pHead->sver = 2; pHead->sver = 2;
pHead->cksum = 0; pHead->cksum = taosCalcChecksum(0, (uint8_t *)pHead, sizeof(SWalHead) + pHead->len);
pHead->cksum = taosCalcChecksum(0, (uint8_t *)pHead, sizeof(*pHead) + pHead->len);
} }
static int walValidateChecksum(SWalHead *pHead) { static int walValidateChecksum(SWalHead *pHead) {
...@@ -132,38 +129,38 @@ static int walValidateChecksum(SWalHead *pHead) { ...@@ -132,38 +129,38 @@ static int walValidateChecksum(SWalHead *pHead) {
return 0; return 0;
} }
#endif int64_t walWrite(SWal *pWal, int64_t index, void *body, int32_t bodyLen) {
if (pWal == NULL) return -1;
int32_t walWrite(void *handle, SWalHead *pHead) {
if (handle == NULL) return -1;
SWal * pWal = handle; SWalHead *pHead = malloc(sizeof(SWalHead) + bodyLen);
if(pHead == NULL) {
return -1;
}
pHead->version = index;
int32_t code = 0; int32_t code = 0;
// no wal // no wal
if (!tfValid(pWal->tfd)) return 0; if (!tfValid(pWal->curLogTfd)) return 0;
if (pWal->level == TAOS_WAL_NOLOG) return 0; if (pWal->level == TAOS_WAL_NOLOG) return 0;
if (pHead->version <= pWal->version) return 0; if (pHead->version <= pWal->curVersion) return 0;
pHead->signature = WAL_SIGNATURE; pHead->signature = WAL_SIGNATURE;
#if defined(WAL_CHECKSUM_WHOLE) pHead->len = bodyLen;
memcpy(pHead->cont, body, bodyLen);
walUpdateChecksum(pHead); walUpdateChecksum(pHead);
#else
pHead->sver = 0;
taosCalcChecksumAppend(0, (uint8_t *)pHead, sizeof(SWalHead));
#endif
int32_t contLen = pHead->len + sizeof(SWalHead); int32_t contLen = pHead->len + sizeof(SWalHead);
pthread_mutex_lock(&pWal->mutex); pthread_mutex_lock(&pWal->mutex);
if (tfWrite(pWal->tfd, pHead, contLen) != contLen) { if (tfWrite(pWal->curLogTfd, pHead, contLen) != contLen) {
code = TAOS_SYSTEM_ERROR(errno); code = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, file:%s, failed to write since %s", pWal->vgId, pWal->name, strerror(errno)); wError("vgId:%d, file:%"PRId64".log, failed to write since %s", pWal->vgId, pWal->curFileFirstVersion, strerror(errno));
} else { } else {
wTrace("vgId:%d, write wal, fileId:%" PRId64 " tfd:%" PRId64 " hver:%" PRId64 " wver:%" PRIu64 " len:%d", pWal->vgId, /*wTrace("vgId:%d, write wal, fileId:%" PRId64 " tfd:%" PRId64 " hver:%" PRId64 " wver:%" PRIu64 " len:%d", pWal->vgId,*/
pWal->fileId, pWal->tfd, pHead->version, pWal->version, pHead->len); /*pWal->curFileId, pWal->logTfd, pHead->version, pWal->curVersion, pHead->len);*/
pWal->version = pHead->version; pWal->curVersion = pHead->version;
} }
pthread_mutex_unlock(&pWal->mutex); pthread_mutex_unlock(&pWal->mutex);
...@@ -173,18 +170,18 @@ int32_t walWrite(void *handle, SWalHead *pHead) { ...@@ -173,18 +170,18 @@ int32_t walWrite(void *handle, SWalHead *pHead) {
return code; return code;
} }
void walFsync(void *handle, bool forceFsync) { void walFsync(SWal *pWal, bool forceFsync) {
SWal *pWal = handle; if (pWal == NULL || !tfValid(pWal->curLogTfd)) return;
if (pWal == NULL || !tfValid(pWal->tfd)) return;
if (forceFsync || (pWal->level == TAOS_WAL_FSYNC && pWal->fsyncPeriod == 0)) { if (forceFsync || (pWal->level == TAOS_WAL_FSYNC && pWal->fsyncPeriod == 0)) {
wTrace("vgId:%d, fileId:%" PRId64 ", do fsync", pWal->vgId, pWal->fileId); wTrace("vgId:%d, fileId:%"PRId64".log, do fsync", pWal->vgId, pWal->curFileFirstVersion);
if (tfFsync(pWal->tfd) < 0) { if (tfFsync(pWal->curLogTfd) < 0) {
wError("vgId:%d, fileId:%" PRId64 ", fsync failed since %s", pWal->vgId, pWal->fileId, strerror(errno)); wError("vgId:%d, file:%"PRId64".log, fsync failed since %s", pWal->vgId, pWal->curFileFirstVersion, strerror(errno));
} }
} }
} }
#if 0
int32_t walRestore(void *handle, void *pVnode, FWalWrite writeFp) { int32_t walRestore(void *handle, void *pVnode, FWalWrite writeFp) {
if (handle == NULL) return -1; if (handle == NULL) return -1;
...@@ -194,10 +191,10 @@ int32_t walRestore(void *handle, void *pVnode, FWalWrite writeFp) { ...@@ -194,10 +191,10 @@ int32_t walRestore(void *handle, void *pVnode, FWalWrite writeFp) {
int64_t fileId = -1; int64_t fileId = -1;
while ((code = walGetNextFile(pWal, &fileId)) >= 0) { while ((code = walGetNextFile(pWal, &fileId)) >= 0) {
if (fileId == pWal->fileId) continue; /*if (fileId == pWal->curFileId) continue;*/
char walName[WAL_FILE_LEN]; char walName[WAL_FILE_LEN];
snprintf(walName, sizeof(pWal->name), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, fileId); snprintf(walName, sizeof(pWal->logName), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, fileId);
wInfo("vgId:%d, file:%s, will be restored", pWal->vgId, walName); wInfo("vgId:%d, file:%s, will be restored", pWal->vgId, walName);
code = walRestoreWalFile(pWal, pVnode, writeFp, walName, fileId); code = walRestoreWalFile(pWal, pVnode, writeFp, walName, fileId);
...@@ -206,26 +203,26 @@ int32_t walRestore(void *handle, void *pVnode, FWalWrite writeFp) { ...@@ -206,26 +203,26 @@ int32_t walRestore(void *handle, void *pVnode, FWalWrite writeFp) {
continue; continue;
} }
wInfo("vgId:%d, file:%s, restore success, wver:%" PRIu64, pWal->vgId, walName, pWal->version); wInfo("vgId:%d, file:%s, restore success, wver:%" PRIu64, pWal->vgId, walName, pWal->curVersion);
count++; count++;
} }
if (pWal->keep != TAOS_WAL_KEEP) return TSDB_CODE_SUCCESS; /*if (pWal->keep != TAOS_WAL_KEEP) return TSDB_CODE_SUCCESS;*/
if (count == 0) { if (count == 0) {
wDebug("vgId:%d, wal file not exist, renew it", pWal->vgId); wDebug("vgId:%d, wal file not exist, renew it", pWal->vgId);
return walRenew(pWal); return walRenew(pWal);
} else { } else {
// open the existing WAL file in append mode // open the existing WAL file in append mode
pWal->fileId = 0; /*pWal->curFileId = 0;*/
snprintf(pWal->name, sizeof(pWal->name), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, pWal->fileId); snprintf(pWal->logName, sizeof(pWal->logName), "%s/%s%" PRId64, pWal->path, WAL_PREFIX, pWal->curFileId);
pWal->tfd = tfOpenCreateWriteAppend(pWal->name); pWal->logTfd = tfOpenCreateWriteAppend(pWal->logName);
if (!tfValid(pWal->tfd)) { if (!tfValid(pWal->logTfd)) {
wError("vgId:%d, file:%s, failed to open since %s", pWal->vgId, pWal->name, strerror(errno)); wError("vgId:%d, file:%s, failed to open since %s", pWal->vgId, pWal->logName, strerror(errno));
return TAOS_SYSTEM_ERROR(errno); return TAOS_SYSTEM_ERROR(errno);
} }
wDebug("vgId:%d, file:%s, it is created and open while restore", pWal->vgId, pWal->name); wDebug("vgId:%d, file:%s, it is created and open while restore", pWal->vgId, pWal->logName);
} }
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
...@@ -242,14 +239,15 @@ int32_t walGetWalFile(void *handle, char *fileName, int64_t *fileId) { ...@@ -242,14 +239,15 @@ int32_t walGetWalFile(void *handle, char *fileName, int64_t *fileId) {
int32_t code = walGetNextFile(pWal, fileId); int32_t code = walGetNextFile(pWal, fileId);
if (code >= 0) { if (code >= 0) {
sprintf(fileName, "wal/%s%" PRId64, WAL_PREFIX, *fileId); sprintf(fileName, "wal/%s%" PRId64, WAL_PREFIX, *fileId);
code = (*fileId == pWal->fileId) ? 0 : 1; /*code = (*fileId == pWal->curFileId) ? 0 : 1;*/
} }
wDebug("vgId:%d, get wal file, code:%d curId:%" PRId64 " outId:%" PRId64, pWal->vgId, code, pWal->fileId, *fileId); wDebug("vgId:%d, get wal file, code:%d curId:%" PRId64 " outId:%" PRId64, pWal->vgId, code, pWal->curFileId, *fileId);
pthread_mutex_unlock(&(pWal->mutex)); pthread_mutex_unlock(&(pWal->mutex));
return code; return code;
} }
#endif
static void walFtruncate(SWal *pWal, int64_t tfd, int64_t offset) { static void walFtruncate(SWal *pWal, int64_t tfd, int64_t offset) {
tfFtruncate(tfd, offset); tfFtruncate(tfd, offset);
...@@ -275,13 +273,6 @@ static int32_t walSkipCorruptedRecord(SWal *pWal, SWalHead *pHead, int64_t tfd, ...@@ -275,13 +273,6 @@ static int32_t walSkipCorruptedRecord(SWal *pWal, SWalHead *pHead, int64_t tfd,
continue; continue;
} }
#if defined(WAL_CHECKSUM_WHOLE)
if (pHead->sver == 0 && walValidateChecksum(pHead)) {
wInfo("vgId:%d, wal head cksum check passed, offset:%" PRId64, pWal->vgId, pos);
*offset = pos;
return TSDB_CODE_SUCCESS;
}
if (pHead->sver >= 1) { if (pHead->sver >= 1) {
if (tfRead(tfd, pHead->cont, pHead->len) < pHead->len) { if (tfRead(tfd, pHead->cont, pHead->len) < pHead->len) {
wError("vgId:%d, read to end of corrupted wal file, offset:%" PRId64, pWal->vgId, pos); wError("vgId:%d, read to end of corrupted wal file, offset:%" PRId64, pWal->vgId, pos);
...@@ -294,132 +285,14 @@ static int32_t walSkipCorruptedRecord(SWal *pWal, SWalHead *pHead, int64_t tfd, ...@@ -294,132 +285,14 @@ static int32_t walSkipCorruptedRecord(SWal *pWal, SWalHead *pHead, int64_t tfd,
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
} }
#else
if (taosCheckChecksumWhole((uint8_t *)pHead, sizeof(SWalHead))) {
wInfo("vgId:%d, wal head cksum check passed, offset:%" PRId64, pWal->vgId, pos);
*offset = pos;
return TSDB_CODE_SUCCESS;
}
#endif
} }
return TSDB_CODE_WAL_FILE_CORRUPTED; return TSDB_CODE_WAL_FILE_CORRUPTED;
} }
// Add SMemRowType ahead of SDataRow
static void expandSubmitBlk(SSubmitBlk *pDest, SSubmitBlk *pSrc, int32_t *lenExpand) {
// copy the header firstly
memcpy(pDest, pSrc, sizeof(SSubmitBlk));
int32_t nRows = htons(pDest->numOfRows);
int32_t dataLen = htonl(pDest->dataLen);
if ((nRows <= 0) || (dataLen <= 0)) {
return;
}
char *pDestData = pDest->data;
char *pSrcData = pSrc->data;
for (int32_t i = 0; i < nRows; ++i) {
memRowSetType(pDestData, SMEM_ROW_DATA);
memcpy(memRowDataBody(pDestData), pSrcData, dataRowLen(pSrcData));
pDestData = POINTER_SHIFT(pDestData, memRowTLen(pDestData));
pSrcData = POINTER_SHIFT(pSrcData, dataRowLen(pSrcData));
++(*lenExpand);
}
pDest->dataLen = htonl(dataLen + nRows * sizeof(uint8_t));
}
// Check SDataRow by comparing the SDataRow len and SSubmitBlk dataLen
static bool walIsSDataRow(void *pBlkData, int nRows, int32_t dataLen) {
if ((nRows <= 0) || (dataLen <= 0)) {
return true;
}
int32_t len = 0, kvLen = 0;
for (int i = 0; i < nRows; ++i) {
len += dataRowLen(pBlkData);
if (len > dataLen) {
return false;
}
/**
* For SDataRow between version [2.1.5.0 and 2.1.6.X], it would never conflict.
* For SKVRow between version [2.1.5.0 and 2.1.6.X], it may conflict in below scenario
* - with 1st type byte 0x01 and sversion 0x0101(257), thus do further check
*/
if (dataRowLen(pBlkData) == 257) {
SMemRow memRow = pBlkData;
SKVRow kvRow = memRowKvBody(memRow);
int nCols = kvRowNCols(kvRow);
uint16_t calcTsOffset = (uint16_t)(TD_KV_ROW_HEAD_SIZE + sizeof(SColIdx) * nCols);
uint16_t realTsOffset = (kvRowColIdx(kvRow))->offset;
if (calcTsOffset == realTsOffset) {
kvLen += memRowKvTLen(memRow);
}
}
pBlkData = POINTER_SHIFT(pBlkData, dataRowLen(pBlkData));
}
if (len != dataLen) {
return false;
}
if (kvLen == dataLen) {
return false;
}
return true;
}
// for WAL SMemRow/SDataRow compatibility
static int walSMemRowCheck(SWalHead *pHead) {
if ((pHead->sver < 2) && (pHead->msgType == TSDB_MSG_TYPE_SUBMIT)) {
SSubmitMsg *pMsg = (SSubmitMsg *)pHead->cont;
int32_t numOfBlocks = htonl(pMsg->numOfBlocks);
if (numOfBlocks <= 0) {
return 0;
}
int32_t nTotalRows = 0;
SSubmitBlk *pBlk = (SSubmitBlk *)pMsg->blocks;
for (int32_t i = 0; i < numOfBlocks; ++i) {
int32_t dataLen = htonl(pBlk->dataLen);
int32_t nRows = htons(pBlk->numOfRows);
nTotalRows += nRows;
if (!walIsSDataRow(pBlk->data, nRows, dataLen)) {
return 0;
}
pBlk = (SSubmitBlk *)POINTER_SHIFT(pBlk, sizeof(SSubmitBlk) + dataLen);
}
ASSERT(nTotalRows >= 0);
SWalHead *pWalHead = (SWalHead *)calloc(sizeof(SWalHead) + pHead->len + nTotalRows * sizeof(uint8_t), 1);
if (pWalHead == NULL) {
return -1;
}
memcpy(pWalHead, pHead, sizeof(SWalHead) + sizeof(SSubmitMsg));
SSubmitMsg *pDestMsg = (SSubmitMsg *)pWalHead->cont;
SSubmitBlk *pDestBlks = (SSubmitBlk *)pDestMsg->blocks;
SSubmitBlk *pSrcBlks = (SSubmitBlk *)pMsg->blocks;
int32_t lenExpand = 0;
for (int32_t i = 0; i < numOfBlocks; ++i) {
expandSubmitBlk(pDestBlks, pSrcBlks, &lenExpand);
pDestBlks = POINTER_SHIFT(pDestBlks, htonl(pDestBlks->dataLen) + sizeof(SSubmitBlk));
pSrcBlks = POINTER_SHIFT(pSrcBlks, htonl(pSrcBlks->dataLen) + sizeof(SSubmitBlk));
}
if (lenExpand > 0) {
pDestMsg->header.contLen = htonl(pDestMsg->length) + lenExpand;
pDestMsg->length = htonl(pDestMsg->header.contLen);
pWalHead->len = pWalHead->len + lenExpand;
}
memcpy(pHead, pWalHead, sizeof(SWalHead) + pWalHead->len);
tfree(pWalHead);
}
return 0;
}
static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, char *name, int64_t fileId) { static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, char *name, int64_t fileId) {
int32_t size = WAL_MAX_SIZE; int32_t size = WAL_MAX_SIZE;
void * buffer = tmalloc(size); void * buffer = malloc(size);
if (buffer == NULL) { if (buffer == NULL) {
wError("vgId:%d, file:%s, failed to open for restore since %s", pWal->vgId, name, strerror(errno)); wError("vgId:%d, file:%s, failed to open for restore since %s", pWal->vgId, name, strerror(errno));
return TAOS_SYSTEM_ERROR(errno); return TAOS_SYSTEM_ERROR(errno);
...@@ -454,7 +327,6 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch ...@@ -454,7 +327,6 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch
break; break;
} }
#if defined(WAL_CHECKSUM_WHOLE)
if ((pHead->sver == 0 && !walValidateChecksum(pHead)) || pHead->sver < 0 || pHead->sver > 2) { if ((pHead->sver == 0 && !walValidateChecksum(pHead)) || pHead->sver < 0 || pHead->sver > 2) {
wError("vgId:%d, file:%s, wal head cksum is messed up, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name, wError("vgId:%d, file:%s, wal head cksum is messed up, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name,
pHead->version, pHead->len, offset); pHead->version, pHead->len, offset);
...@@ -498,57 +370,15 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch ...@@ -498,57 +370,15 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch
} }
} }
#else
if (!taosCheckChecksumWhole((uint8_t *)pHead, sizeof(SWalHead))) {
wError("vgId:%d, file:%s, wal head cksum is messed up, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name,
pHead->version, pHead->len, offset);
code = walSkipCorruptedRecord(pWal, pHead, tfd, &offset);
if (code != TSDB_CODE_SUCCESS) {
walFtruncate(pWal, tfd, offset);
break;
}
}
if (pHead->len < 0 || pHead->len > size - sizeof(SWalHead)) {
wError("vgId:%d, file:%s, wal head len out of range, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name,
pHead->version, pHead->len, offset);
code = walSkipCorruptedRecord(pWal, pHead, tfd, &offset);
if (code != TSDB_CODE_SUCCESS) {
walFtruncate(pWal, tfd, offset);
break;
}
}
ret = (int32_t)tfRead(tfd, pHead->cont, pHead->len);
if (ret < 0) {
wError("vgId:%d, file:%s, failed to read wal body since %s", pWal->vgId, name, strerror(errno));
code = TAOS_SYSTEM_ERROR(errno);
break;
}
if (ret < pHead->len) {
wError("vgId:%d, file:%s, failed to read wal body, ret:%d len:%d", pWal->vgId, name, ret, pHead->len);
offset += sizeof(SWalHead);
continue;
}
#endif
offset = offset + sizeof(SWalHead) + pHead->len; offset = offset + sizeof(SWalHead) + pHead->len;
wTrace("vgId:%d, restore wal, fileId:%" PRId64 " hver:%" PRIu64 " wver:%" PRIu64 " len:%d offset:%" PRId64, wTrace("vgId:%d, restore wal, fileId:%" PRId64 " hver:%" PRIu64 " wver:%" PRIu64 " len:%d offset:%" PRId64,
pWal->vgId, fileId, pHead->version, pWal->version, pHead->len, offset); pWal->vgId, fileId, pHead->version, pWal->curVersion, pHead->len, offset);
pWal->version = pHead->version; pWal->curVersion = pHead->version;
// wInfo("writeFp: %ld", offset); // wInfo("writeFp: %ld", offset);
if (0 != walSMemRowCheck(pHead)) { (*writeFp)(pVnode, pHead);
wError("vgId:%d, restore wal, fileId:%" PRId64 " hver:%" PRIu64 " wver:%" PRIu64 " len:%d offset:%" PRId64,
pWal->vgId, fileId, pHead->version, pWal->version, pHead->len, offset);
tfClose(tfd);
tfree(buffer);
return TAOS_SYSTEM_ERROR(errno);
}
(*writeFp)(pVnode, pHead, TAOS_QTYPE_WAL, NULL);
} }
tfClose(tfd); tfClose(tfd);
...@@ -558,11 +388,10 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch ...@@ -558,11 +388,10 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch
return code; return code;
} }
uint64_t walGetVersion(twalh param) { uint64_t walGetVersion(SWal *pWal) {
SWal *pWal = param; if (pWal == NULL) return 0;
if (pWal == 0) return 0;
return pWal->version; return pWal->curVersion;
} }
// Wal version in slave (dnode1) must be reset. // Wal version in slave (dnode1) must be reset.
...@@ -570,10 +399,9 @@ uint64_t walGetVersion(twalh param) { ...@@ -570,10 +399,9 @@ uint64_t walGetVersion(twalh param) {
// Some new wal record cannot be written to the wal file in dnode1 for wal version not reset, then fversion and the record in wal file may inconsistent, // Some new wal record cannot be written to the wal file in dnode1 for wal version not reset, then fversion and the record in wal file may inconsistent,
// At this time, if dnode2 down, dnode1 switched to master. After dnode2 start and restore data from dnode1, data loss will occur // At this time, if dnode2 down, dnode1 switched to master. After dnode2 start and restore data from dnode1, data loss will occur
void walResetVersion(twalh param, uint64_t newVer) { void walResetVersion(SWal *pWal, uint64_t newVer) {
SWal *pWal = param; if (pWal == NULL) return;
if (pWal == 0) return; wInfo("vgId:%d, version reset from %" PRIu64 " to %" PRIu64, pWal->vgId, pWal->curVersion, newVer);
wInfo("vgId:%d, version reset from %" PRIu64 " to %" PRIu64, pWal->vgId, pWal->version, newVer);
pWal->version = newVer; pWal->curVersion = newVer;
} }
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//#define _DEFAULT_SOURCE
#include "os.h"
#include "tutil.h"
#include "tglobal.h"
#include "tlog.h"
#include "twal.h"
#include "tfile.h"
int64_t ver = 0;
void *pWal = NULL;
int writeToQueue(void *pVnode, void *data, int type, void *pMsg) {
// do nothing
SWalHead *pHead = data;
if (pHead->version > ver)
ver = pHead->version;
walWrite(pWal, pHead);
return 0;
}
int main(int argc, char *argv[]) {
char path[128] = "/tmp/wal";
int level = 2;
int total = 5;
int rows = 10000;
int size = 128;
int keep = 0;
for (int i=1; i<argc; ++i) {
if (strcmp(argv[i], "-p")==0 && i < argc-1) {
tstrncpy(path, argv[++i], sizeof(path));
} else if (strcmp(argv[i], "-l")==0 && i < argc-1) {
level = atoi(argv[++i]);
} else if (strcmp(argv[i], "-r")==0 && i < argc-1) {
rows = atoi(argv[++i]);
} else if (strcmp(argv[i], "-k")==0 && i < argc-1) {
keep = atoi(argv[++i]);
} else if (strcmp(argv[i], "-t")==0 && i < argc-1) {
total = atoi(argv[++i]);
} else if (strcmp(argv[i], "-s")==0 && i < argc-1) {
size = atoi(argv[++i]);
} else if (strcmp(argv[i], "-v")==0 && i < argc-1) {
ver = atoll(argv[++i]);
} else if (strcmp(argv[i], "-d")==0 && i < argc-1) {
dDebugFlag = atoi(argv[++i]);
} else {
printf("\nusage: %s [options] \n", argv[0]);
printf(" [-p path]: wal file path default is:%s\n", path);
printf(" [-l level]: log level, default is:%d\n", level);
printf(" [-t total]: total wal files, default is:%d\n", total);
printf(" [-r rows]: rows of records per wal file, default is:%d\n", rows);
printf(" [-k keep]: keep the wal after closing, default is:%d\n", keep);
printf(" [-v version]: initial version, default is:%" PRId64 "\n", ver);
printf(" [-d debugFlag]: debug flag, default:%d\n", dDebugFlag);
printf(" [-h help]: print out this help\n\n");
exit(0);
}
}
taosInitLog("wal.log", 100000, 10);
tfInit();
walInit();
SWalCfg walCfg = {0};
walCfg.walLevel = level;
walCfg.keep = keep;
pWal = walOpen(path, &walCfg);
if (pWal == NULL) {
printf("failed to open wal\n");
exit(-1);
}
int ret = walRestore(pWal, NULL, writeToQueue);
if (ret <0) {
printf("failed to restore wal\n");
exit(-1);
}
printf("version starts from:%" PRId64 "\n", ver);
int contLen = sizeof(SWalHead) + size;
SWalHead *pHead = (SWalHead *) malloc(contLen);
for (int i=0; i<total; ++i) {
for (int k=0; k<rows; ++k) {
pHead->version = ++ver;
pHead->len = size;
walWrite(pWal, pHead);
}
printf("renew a wal, i:%d\n", i);
walRenew(pWal);
}
printf("%d wal files are written\n", total);
int64_t index = 0;
char name[256];
while (1) {
int code = walGetWalFile(pWal, name, &index);
if (code == -1) {
printf("failed to get wal file, index:%" PRId64 "\n", index);
break;
}
printf("index:%" PRId64 " wal:%s\n", index, name);
if (code == 0) break;
}
getchar();
walClose(pWal);
walCleanUp();
tfCleanup();
return 0;
}
...@@ -48,25 +48,25 @@ void taosRemoveDir(const char *dirname) { ...@@ -48,25 +48,25 @@ void taosRemoveDir(const char *dirname) {
taosRemoveDir(filename); taosRemoveDir(filename);
} else { } else {
(void)remove(filename); (void)remove(filename);
printf("file:%s is removed\n", filename); //printf("file:%s is removed\n", filename);
} }
} }
closedir(dir); closedir(dir);
rmdir(dirname); rmdir(dirname);
printf("dir:%s is removed\n", dirname); //printf("dir:%s is removed\n", dirname);
} }
bool taosDirExist(char *dirname) { return access(dirname, F_OK) == 0; } int32_t taosDirExist(char *dirname) { return access(dirname, F_OK); }
bool taosMkDir(const char *dirname) { int32_t taosMkDir(const char *dirname) {
int32_t code = mkdir(dirname, 0755); int32_t code = mkdir(dirname, 0755);
if (code < 0 && errno == EEXIST) { if (code < 0 && errno == EEXIST) {
return true; return 0;
} }
return code == 0; return code;
} }
void taosRemoveOldFiles(char *dirname, int32_t keepDays) { void taosRemoveOldFiles(char *dirname, int32_t keepDays) {
...@@ -101,9 +101,9 @@ void taosRemoveOldFiles(char *dirname, int32_t keepDays) { ...@@ -101,9 +101,9 @@ void taosRemoveOldFiles(char *dirname, int32_t keepDays) {
int32_t days = (int32_t)(ABS(sec - fileSec) / 86400 + 1); int32_t days = (int32_t)(ABS(sec - fileSec) / 86400 + 1);
if (days > keepDays) { if (days > keepDays) {
(void)remove(filename); (void)remove(filename);
printf("file:%s is removed, days:%d keepDays:%d", filename, days, keepDays); //printf("file:%s is removed, days:%d keepDays:%d", filename, days, keepDays);
} else { } else {
printf("file:%s won't be removed, days:%d keepDays:%d", filename, days, keepDays); //printf("file:%s won't be removed, days:%d keepDays:%d", filename, days, keepDays);
} }
} }
} }
...@@ -112,12 +112,12 @@ void taosRemoveOldFiles(char *dirname, int32_t keepDays) { ...@@ -112,12 +112,12 @@ void taosRemoveOldFiles(char *dirname, int32_t keepDays) {
rmdir(dirname); rmdir(dirname);
} }
bool taosExpandDir(char *dirname, char *outname, int32_t maxlen) { int32_t taosExpandDir(char *dirname, char *outname, int32_t maxlen) {
wordexp_t full_path; wordexp_t full_path;
if (0 != wordexp(dirname, &full_path, 0)) { if (0 != wordexp(dirname, &full_path, 0)) {
printf("failed to expand path:%s since %s", dirname, strerror(errno)); //printf("failed to expand path:%s since %s", dirname, strerror(errno));
wordfree(&full_path); wordfree(&full_path);
return false; return -1;
} }
if (full_path.we_wordv != NULL && full_path.we_wordv[0] != NULL) { if (full_path.we_wordv != NULL && full_path.we_wordv[0] != NULL) {
...@@ -126,16 +126,16 @@ bool taosExpandDir(char *dirname, char *outname, int32_t maxlen) { ...@@ -126,16 +126,16 @@ bool taosExpandDir(char *dirname, char *outname, int32_t maxlen) {
wordfree(&full_path); wordfree(&full_path);
return true; return 0;
} }
bool taosRealPath(char *dirname, int32_t maxlen) { int32_t taosRealPath(char *dirname, int32_t maxlen) {
char tmp[PATH_MAX] = {0}; char tmp[PATH_MAX] = {0};
if (realpath(dirname, tmp) != NULL) { if (realpath(dirname, tmp) != NULL) {
strncpy(dirname, tmp, maxlen); strncpy(dirname, tmp, maxlen);
} }
return true; return 0;
} }
#endif #endif
...@@ -404,14 +404,14 @@ int32_t taosRenameFile(char *oldName, char *newName) { ...@@ -404,14 +404,14 @@ int32_t taosRenameFile(char *oldName, char *newName) {
#if defined(_TD_WINDOWS_64) || defined(_TD_WINDOWS_32) #if defined(_TD_WINDOWS_64) || defined(_TD_WINDOWS_32)
int32_t code = MoveFileEx(oldName, newName, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED); int32_t code = MoveFileEx(oldName, newName, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED);
if (code < 0) { if (code < 0) {
printf("failed to rename file %s to %s, reason:%s", oldName, newName, strerror(errno)); //printf("failed to rename file %s to %s, reason:%s", oldName, newName, strerror(errno));
} }
return code; return code;
#else #else
int32_t code = rename(oldName, newName); int32_t code = rename(oldName, newName);
if (code < 0) { if (code < 0) {
printf("failed to rename file %s to %s, reason:%s", oldName, newName, strerror(errno)); //printf("failed to rename file %s to %s, reason:%s", oldName, newName, strerror(errno));
} }
return code; return code;
......
...@@ -95,7 +95,7 @@ void taosShutDownSocketWR(SOCKET fd) { ...@@ -95,7 +95,7 @@ void taosShutDownSocketWR(SOCKET fd) {
int32_t taosSetNonblocking(SOCKET sock, int32_t on) { int32_t taosSetNonblocking(SOCKET sock, int32_t on) {
int32_t flags = 0; int32_t flags = 0;
if ((flags = fcntl(sock, F_GETFL, 0)) < 0) { if ((flags = fcntl(sock, F_GETFL, 0)) < 0) {
printf("fcntl(F_GETFL) error: %d (%s)\n", errno, strerror(errno)); //printf("fcntl(F_GETFL) error: %d (%s)\n", errno, strerror(errno));
return 1; return 1;
} }
...@@ -105,7 +105,7 @@ int32_t taosSetNonblocking(SOCKET sock, int32_t on) { ...@@ -105,7 +105,7 @@ int32_t taosSetNonblocking(SOCKET sock, int32_t on) {
flags &= ~O_NONBLOCK; flags &= ~O_NONBLOCK;
if ((flags = fcntl(sock, F_SETFL, flags)) < 0) { if ((flags = fcntl(sock, F_SETFL, flags)) < 0) {
printf("fcntl(F_SETFL) error: %d (%s)\n", errno, strerror(errno)); //printf("fcntl(F_SETFL) error: %d (%s)\n", errno, strerror(errno));
return 1; return 1;
} }
...@@ -120,7 +120,7 @@ void taosBlockSIGPIPE() { ...@@ -120,7 +120,7 @@ void taosBlockSIGPIPE() {
sigaddset(&signal_mask, SIGPIPE); sigaddset(&signal_mask, SIGPIPE);
int32_t rc = pthread_sigmask(SIG_BLOCK, &signal_mask, NULL); int32_t rc = pthread_sigmask(SIG_BLOCK, &signal_mask, NULL);
if (rc != 0) { if (rc != 0) {
printf("failed to block SIGPIPE"); //printf("failed to block SIGPIPE");
} }
} }
...@@ -130,7 +130,7 @@ void taosSetMaskSIGPIPE() { ...@@ -130,7 +130,7 @@ void taosSetMaskSIGPIPE() {
sigaddset(&signal_mask, SIGPIPE); sigaddset(&signal_mask, SIGPIPE);
int32_t rc = pthread_sigmask(SIG_SETMASK, &signal_mask, NULL); int32_t rc = pthread_sigmask(SIG_SETMASK, &signal_mask, NULL);
if (rc != 0) { if (rc != 0) {
printf("failed to setmask SIGPIPE"); //printf("failed to setmask SIGPIPE");
} }
} }
...@@ -277,7 +277,7 @@ int32_t taosGetFqdn(char *fqdn) { ...@@ -277,7 +277,7 @@ int32_t taosGetFqdn(char *fqdn) {
char hostname[1024]; char hostname[1024];
hostname[1023] = '\0'; hostname[1023] = '\0';
if (gethostname(hostname, 1023) == -1) { if (gethostname(hostname, 1023) == -1) {
printf("failed to get hostname, reason:%s", strerror(errno)); //printf("failed to get hostname, reason:%s", strerror(errno));
return -1; return -1;
} }
...@@ -294,7 +294,7 @@ int32_t taosGetFqdn(char *fqdn) { ...@@ -294,7 +294,7 @@ int32_t taosGetFqdn(char *fqdn) {
#endif // __APPLE__ #endif // __APPLE__
int32_t ret = getaddrinfo(hostname, NULL, &hints, &result); int32_t ret = getaddrinfo(hostname, NULL, &hints, &result);
if (!result) { if (!result) {
printf("failed to get fqdn, code:%d, reason:%s", ret, gai_strerror(ret)); //printf("failed to get fqdn, code:%d, reason:%s", ret, gai_strerror(ret));
return -1; return -1;
} }
...@@ -326,12 +326,12 @@ uint32_t taosGetIpv4FromFqdn(const char *fqdn) { ...@@ -326,12 +326,12 @@ uint32_t taosGetIpv4FromFqdn(const char *fqdn) {
} else { } else {
#ifdef EAI_SYSTEM #ifdef EAI_SYSTEM
if (ret == EAI_SYSTEM) { if (ret == EAI_SYSTEM) {
printf("failed to get the ip address, fqdn:%s, since:%s", fqdn, strerror(errno)); //printf("failed to get the ip address, fqdn:%s, since:%s", fqdn, strerror(errno));
} else { } else {
printf("failed to get the ip address, fqdn:%s, since:%s", fqdn, gai_strerror(ret)); //printf("failed to get the ip address, fqdn:%s, since:%s", fqdn, gai_strerror(ret));
} }
#else #else
printf("failed to get the ip address, fqdn:%s, since:%s", fqdn, gai_strerror(ret)); //printf("failed to get the ip address, fqdn:%s, since:%s", fqdn, gai_strerror(ret));
#endif #endif
return 0xFFFFFFFF; return 0xFFFFFFFF;
} }
...@@ -437,13 +437,13 @@ int32_t taosNonblockwrite(SOCKET fd, char *ptr, int32_t nbytes) { ...@@ -437,13 +437,13 @@ int32_t taosNonblockwrite(SOCKET fd, char *ptr, int32_t nbytes) {
FD_SET(fd, &fset); FD_SET(fd, &fset);
if ((nready = select((int32_t)(fd + 1), NULL, &fset, NULL, &tv)) == 0) { if ((nready = select((int32_t)(fd + 1), NULL, &fset, NULL, &tv)) == 0) {
errno = ETIMEDOUT; errno = ETIMEDOUT;
printf("fd %d timeout, no enough space to write", fd); //printf("fd %d timeout, no enough space to write", fd);
break; break;
} else if (nready < 0) { } else if (nready < 0) {
if (errno == EINTR) continue; if (errno == EINTR) continue;
printf("select error, %d (%s)", errno, strerror(errno)); //printf("select error, %d (%s)", errno, strerror(errno));
return -1; return -1;
} }
...@@ -451,7 +451,7 @@ int32_t taosNonblockwrite(SOCKET fd, char *ptr, int32_t nbytes) { ...@@ -451,7 +451,7 @@ int32_t taosNonblockwrite(SOCKET fd, char *ptr, int32_t nbytes) {
if (nwritten <= 0) { if (nwritten <= 0) {
if (errno == EAGAIN || errno == EINTR) continue; if (errno == EAGAIN || errno == EINTR) continue;
printf("write error, %d (%s)", errno, strerror(errno)); //printf("write error, %d (%s)", errno, strerror(errno));
return -1; return -1;
} }
...@@ -477,21 +477,21 @@ int32_t taosReadn(SOCKET fd, char *ptr, int32_t nbytes) { ...@@ -477,21 +477,21 @@ int32_t taosReadn(SOCKET fd, char *ptr, int32_t nbytes) {
FD_SET(fd, &fset); FD_SET(fd, &fset);
if ((nready = select((int32_t)(fd + 1), NULL, &fset, NULL, &tv)) == 0) { if ((nready = select((int32_t)(fd + 1), NULL, &fset, NULL, &tv)) == 0) {
errno = ETIMEDOUT; errno = ETIMEDOUT;
printf("fd %d timeout\n", fd); //printf("fd %d timeout\n", fd);
break; break;
} else if (nready < 0) { } else if (nready < 0) {
if (errno == EINTR) continue; if (errno == EINTR) continue;
printf("select error, %d (%s)", errno, strerror(errno)); //printf("select error, %d (%s)", errno, strerror(errno));
return -1; return -1;
} }
if ((nread = (int32_t)taosReadSocket(fd, ptr, (size_t)nleft)) < 0) { if ((nread = (int32_t)taosReadSocket(fd, ptr, (size_t)nleft)) < 0) {
if (errno == EINTR) continue; if (errno == EINTR) continue;
printf("read error, %d (%s)", errno, strerror(errno)); //printf("read error, %d (%s)", errno, strerror(errno));
return -1; return -1;
} else if (nread == 0) { } else if (nread == 0) {
printf("fd %d EOF", fd); //printf("fd %d EOF", fd);
break; // EOF break; // EOF
} }
...@@ -507,7 +507,7 @@ SOCKET taosOpenUdpSocket(uint32_t ip, uint16_t port) { ...@@ -507,7 +507,7 @@ SOCKET taosOpenUdpSocket(uint32_t ip, uint16_t port) {
SOCKET sockFd; SOCKET sockFd;
int32_t bufSize = 1024000; int32_t bufSize = 1024000;
printf("open udp socket:0x%x:%hu", ip, port); //printf("open udp socket:0x%x:%hu", ip, port);
memset((char *)&localAddr, 0, sizeof(localAddr)); memset((char *)&localAddr, 0, sizeof(localAddr));
localAddr.sin_family = AF_INET; localAddr.sin_family = AF_INET;
...@@ -515,26 +515,26 @@ SOCKET taosOpenUdpSocket(uint32_t ip, uint16_t port) { ...@@ -515,26 +515,26 @@ SOCKET taosOpenUdpSocket(uint32_t ip, uint16_t port) {
localAddr.sin_port = (uint16_t)htons(port); localAddr.sin_port = (uint16_t)htons(port);
if ((sockFd = socket(AF_INET, SOCK_DGRAM, 0)) <= 2) { if ((sockFd = socket(AF_INET, SOCK_DGRAM, 0)) <= 2) {
printf("failed to open udp socket: %d (%s)", errno, strerror(errno)); //printf("failed to open udp socket: %d (%s)", errno, strerror(errno));
taosCloseSocketNoCheck(sockFd); taosCloseSocketNoCheck(sockFd);
return -1; return -1;
} }
if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_SNDBUF, (void *)&bufSize, sizeof(bufSize)) != 0) { if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_SNDBUF, (void *)&bufSize, sizeof(bufSize)) != 0) {
printf("failed to set the send buffer size for UDP socket\n"); //printf("failed to set the send buffer size for UDP socket\n");
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_RCVBUF, (void *)&bufSize, sizeof(bufSize)) != 0) { if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_RCVBUF, (void *)&bufSize, sizeof(bufSize)) != 0) {
printf("failed to set the receive buffer size for UDP socket\n"); //printf("failed to set the receive buffer size for UDP socket\n");
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
/* bind socket to local address */ /* bind socket to local address */
if (bind(sockFd, (struct sockaddr *)&localAddr, sizeof(localAddr)) < 0) { if (bind(sockFd, (struct sockaddr *)&localAddr, sizeof(localAddr)) < 0) {
printf("failed to bind udp socket: %d (%s), 0x%x:%hu", errno, strerror(errno), ip, port); //printf("failed to bind udp socket: %d (%s), 0x%x:%hu", errno, strerror(errno), ip, port);
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
...@@ -551,7 +551,7 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie ...@@ -551,7 +551,7 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie
sockFd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); sockFd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (sockFd <= 2) { if (sockFd <= 2) {
printf("failed to open the socket: %d (%s)", errno, strerror(errno)); //printf("failed to open the socket: %d (%s)", errno, strerror(errno));
taosCloseSocketNoCheck(sockFd); taosCloseSocketNoCheck(sockFd);
return -1; return -1;
} }
...@@ -559,19 +559,19 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie ...@@ -559,19 +559,19 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie
/* set REUSEADDR option, so the portnumber can be re-used */ /* set REUSEADDR option, so the portnumber can be re-used */
int32_t reuse = 1; int32_t reuse = 1;
if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_REUSEADDR, (void *)&reuse, sizeof(reuse)) < 0) { if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_REUSEADDR, (void *)&reuse, sizeof(reuse)) < 0) {
printf("setsockopt SO_REUSEADDR failed: %d (%s)", errno, strerror(errno)); //printf("setsockopt SO_REUSEADDR failed: %d (%s)", errno, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_SNDBUF, (void *)&bufSize, sizeof(bufSize)) != 0) { if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_SNDBUF, (void *)&bufSize, sizeof(bufSize)) != 0) {
printf("failed to set the send buffer size for TCP socket\n"); //printf("failed to set the send buffer size for TCP socket\n");
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_RCVBUF, (void *)&bufSize, sizeof(bufSize)) != 0) { if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_RCVBUF, (void *)&bufSize, sizeof(bufSize)) != 0) {
printf("failed to set the receive buffer size for TCP socket\n"); //printf("failed to set the receive buffer size for TCP socket\n");
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
...@@ -584,8 +584,8 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie ...@@ -584,8 +584,8 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie
/* bind socket to client address */ /* bind socket to client address */
if (bind(sockFd, (struct sockaddr *)&clientAddr, sizeof(clientAddr)) < 0) { if (bind(sockFd, (struct sockaddr *)&clientAddr, sizeof(clientAddr)) < 0) {
printf("bind tcp client socket failed, client(0x%x:0), dest(0x%x:%d), reason:(%s)", clientIp, destIp, destPort, //printf("bind tcp client socket failed, client(0x%x:0), dest(0x%x:%d), reason:(%s)", clientIp, destIp, destPort,
strerror(errno)); // strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
...@@ -601,7 +601,7 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie ...@@ -601,7 +601,7 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie
ret = connect(sockFd, (struct sockaddr *)&serverAddr, sizeof(serverAddr)); ret = connect(sockFd, (struct sockaddr *)&serverAddr, sizeof(serverAddr));
if (ret == -1) { if (ret == -1) {
if (errno == EHOSTUNREACH) { if (errno == EHOSTUNREACH) {
printf("failed to connect socket, ip:0x%x, port:%hu(%s)", destIp, destPort, strerror(errno)); //printf("failed to connect socket, ip:0x%x, port:%hu(%s)", destIp, destPort, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} else if (errno == EINPROGRESS || errno == EAGAIN || errno == EWOULDBLOCK) { } else if (errno == EINPROGRESS || errno == EAGAIN || errno == EWOULDBLOCK) {
...@@ -612,19 +612,19 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie ...@@ -612,19 +612,19 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie
int res = poll(wfd, 1, TCP_CONN_TIMEOUT); int res = poll(wfd, 1, TCP_CONN_TIMEOUT);
if (res == -1 || res == 0) { if (res == -1 || res == 0) {
printf("failed to connect socket, ip:0x%x, port:%hu(poll error/conn timeout)", destIp, destPort); //printf("failed to connect socket, ip:0x%x, port:%hu(poll error/conn timeout)", destIp, destPort);
taosCloseSocket(sockFd); // taosCloseSocket(sockFd); //
return -1; return -1;
} }
int optVal = -1, optLen = sizeof(int); int optVal = -1, optLen = sizeof(int);
if ((0 != taosGetSockOpt(sockFd, SOL_SOCKET, SO_ERROR, &optVal, &optLen)) || (optVal != 0)) { if ((0 != taosGetSockOpt(sockFd, SOL_SOCKET, SO_ERROR, &optVal, &optLen)) || (optVal != 0)) {
printf("failed to connect socket, ip:0x%x, port:%hu(connect host error)", destIp, destPort); //printf("failed to connect socket, ip:0x%x, port:%hu(connect host error)", destIp, destPort);
taosCloseSocket(sockFd); // taosCloseSocket(sockFd); //
return -1; return -1;
} }
ret = 0; ret = 0;
} else { // Other error } else { // Other error
printf("failed to connect socket, ip:0x%x, port:%hu(target host cannot be reached)", destIp, destPort); //printf("failed to connect socket, ip:0x%x, port:%hu(target host cannot be reached)", destIp, destPort);
taosCloseSocket(sockFd); // taosCloseSocket(sockFd); //
return -1; return -1;
} }
...@@ -636,7 +636,7 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie ...@@ -636,7 +636,7 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie
#endif #endif
if (ret != 0) { if (ret != 0) {
printf("failed to connect socket, ip:0x%x, port:%hu(%s)", destIp, destPort, strerror(errno)); //printf("failed to connect socket, ip:0x%x, port:%hu(%s)", destIp, destPort, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
sockFd = -1; sockFd = -1;
} else { } else {
...@@ -649,7 +649,7 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie ...@@ -649,7 +649,7 @@ SOCKET taosOpenTcpClientSocket(uint32_t destIp, uint16_t destPort, uint32_t clie
int32_t taosKeepTcpAlive(SOCKET sockFd) { int32_t taosKeepTcpAlive(SOCKET sockFd) {
int32_t alive = 1; int32_t alive = 1;
if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_KEEPALIVE, (void *)&alive, sizeof(alive)) < 0) { if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_KEEPALIVE, (void *)&alive, sizeof(alive)) < 0) {
printf("fd:%d setsockopt SO_KEEPALIVE failed: %d (%s)", sockFd, errno, strerror(errno)); //printf("fd:%d setsockopt SO_KEEPALIVE failed: %d (%s)", sockFd, errno, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
...@@ -658,21 +658,21 @@ int32_t taosKeepTcpAlive(SOCKET sockFd) { ...@@ -658,21 +658,21 @@ int32_t taosKeepTcpAlive(SOCKET sockFd) {
// all fails on macosx // all fails on macosx
int32_t probes = 3; int32_t probes = 3;
if (taosSetSockOpt(sockFd, SOL_TCP, TCP_KEEPCNT, (void *)&probes, sizeof(probes)) < 0) { if (taosSetSockOpt(sockFd, SOL_TCP, TCP_KEEPCNT, (void *)&probes, sizeof(probes)) < 0) {
printf("fd:%d setsockopt SO_KEEPCNT failed: %d (%s)", sockFd, errno, strerror(errno)); //printf("fd:%d setsockopt SO_KEEPCNT failed: %d (%s)", sockFd, errno, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
int32_t alivetime = 10; int32_t alivetime = 10;
if (taosSetSockOpt(sockFd, SOL_TCP, TCP_KEEPIDLE, (void *)&alivetime, sizeof(alivetime)) < 0) { if (taosSetSockOpt(sockFd, SOL_TCP, TCP_KEEPIDLE, (void *)&alivetime, sizeof(alivetime)) < 0) {
printf("fd:%d setsockopt SO_KEEPIDLE failed: %d (%s)", sockFd, errno, strerror(errno)); //printf("fd:%d setsockopt SO_KEEPIDLE failed: %d (%s)", sockFd, errno, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
int32_t interval = 3; int32_t interval = 3;
if (taosSetSockOpt(sockFd, SOL_TCP, TCP_KEEPINTVL, (void *)&interval, sizeof(interval)) < 0) { if (taosSetSockOpt(sockFd, SOL_TCP, TCP_KEEPINTVL, (void *)&interval, sizeof(interval)) < 0) {
printf("fd:%d setsockopt SO_KEEPINTVL failed: %d (%s)", sockFd, errno, strerror(errno)); //printf("fd:%d setsockopt SO_KEEPINTVL failed: %d (%s)", sockFd, errno, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
...@@ -680,7 +680,7 @@ int32_t taosKeepTcpAlive(SOCKET sockFd) { ...@@ -680,7 +680,7 @@ int32_t taosKeepTcpAlive(SOCKET sockFd) {
int32_t nodelay = 1; int32_t nodelay = 1;
if (taosSetSockOpt(sockFd, IPPROTO_TCP, TCP_NODELAY, (void *)&nodelay, sizeof(nodelay)) < 0) { if (taosSetSockOpt(sockFd, IPPROTO_TCP, TCP_NODELAY, (void *)&nodelay, sizeof(nodelay)) < 0) {
printf("fd:%d setsockopt TCP_NODELAY failed %d (%s)", sockFd, errno, strerror(errno)); //printf("fd:%d setsockopt TCP_NODELAY failed %d (%s)", sockFd, errno, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
...@@ -689,7 +689,7 @@ int32_t taosKeepTcpAlive(SOCKET sockFd) { ...@@ -689,7 +689,7 @@ int32_t taosKeepTcpAlive(SOCKET sockFd) {
linger.l_onoff = 1; linger.l_onoff = 1;
linger.l_linger = 3; linger.l_linger = 3;
if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_LINGER, (void *)&linger, sizeof(linger)) < 0) { if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_LINGER, (void *)&linger, sizeof(linger)) < 0) {
printf("setsockopt SO_LINGER failed: %d (%s)", errno, strerror(errno)); //printf("setsockopt SO_LINGER failed: %d (%s)", errno, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
...@@ -702,7 +702,7 @@ SOCKET taosOpenTcpServerSocket(uint32_t ip, uint16_t port) { ...@@ -702,7 +702,7 @@ SOCKET taosOpenTcpServerSocket(uint32_t ip, uint16_t port) {
SOCKET sockFd; SOCKET sockFd;
int32_t reuse; int32_t reuse;
printf("open tcp server socket:0x%x:%hu", ip, port); //printf("open tcp server socket:0x%x:%hu", ip, port);
bzero((char *)&serverAdd, sizeof(serverAdd)); bzero((char *)&serverAdd, sizeof(serverAdd));
serverAdd.sin_family = AF_INET; serverAdd.sin_family = AF_INET;
...@@ -710,7 +710,7 @@ SOCKET taosOpenTcpServerSocket(uint32_t ip, uint16_t port) { ...@@ -710,7 +710,7 @@ SOCKET taosOpenTcpServerSocket(uint32_t ip, uint16_t port) {
serverAdd.sin_port = (uint16_t)htons(port); serverAdd.sin_port = (uint16_t)htons(port);
if ((sockFd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) <= 2) { if ((sockFd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) <= 2) {
printf("failed to open TCP socket: %d (%s)", errno, strerror(errno)); //printf("failed to open TCP socket: %d (%s)", errno, strerror(errno));
taosCloseSocketNoCheck(sockFd); taosCloseSocketNoCheck(sockFd);
return -1; return -1;
} }
...@@ -718,26 +718,26 @@ SOCKET taosOpenTcpServerSocket(uint32_t ip, uint16_t port) { ...@@ -718,26 +718,26 @@ SOCKET taosOpenTcpServerSocket(uint32_t ip, uint16_t port) {
/* set REUSEADDR option, so the portnumber can be re-used */ /* set REUSEADDR option, so the portnumber can be re-used */
reuse = 1; reuse = 1;
if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_REUSEADDR, (void *)&reuse, sizeof(reuse)) < 0) { if (taosSetSockOpt(sockFd, SOL_SOCKET, SO_REUSEADDR, (void *)&reuse, sizeof(reuse)) < 0) {
printf("setsockopt SO_REUSEADDR failed: %d (%s)", errno, strerror(errno)); //printf("setsockopt SO_REUSEADDR failed: %d (%s)", errno, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
/* bind socket to server address */ /* bind socket to server address */
if (bind(sockFd, (struct sockaddr *)&serverAdd, sizeof(serverAdd)) < 0) { if (bind(sockFd, (struct sockaddr *)&serverAdd, sizeof(serverAdd)) < 0) {
printf("bind tcp server socket failed, 0x%x:%hu(%s)", ip, port, strerror(errno)); //printf("bind tcp server socket failed, 0x%x:%hu(%s)", ip, port, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
if (taosKeepTcpAlive(sockFd) < 0) { if (taosKeepTcpAlive(sockFd) < 0) {
printf("failed to set tcp server keep-alive option, 0x%x:%hu(%s)", ip, port, strerror(errno)); //printf("failed to set tcp server keep-alive option, 0x%x:%hu(%s)", ip, port, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
if (listen(sockFd, 1024) < 0) { if (listen(sockFd, 1024) < 0) {
printf("listen tcp server socket failed, 0x%x:%hu(%s)", ip, port, strerror(errno)); //printf("listen tcp server socket failed, 0x%x:%hu(%s)", ip, port, strerror(errno));
taosCloseSocket(sockFd); taosCloseSocket(sockFd);
return -1; return -1;
} }
...@@ -767,16 +767,16 @@ int64_t taosCopyFds(SOCKET sfd, int32_t dfd, int64_t len) { ...@@ -767,16 +767,16 @@ int64_t taosCopyFds(SOCKET sfd, int32_t dfd, int64_t len) {
int64_t retLen = taosReadMsg(sfd, temp, (int32_t)readLen); int64_t retLen = taosReadMsg(sfd, temp, (int32_t)readLen);
if (readLen != retLen) { if (readLen != retLen) {
printf("read error, readLen:%" PRId64 " retLen:%" PRId64 " len:%" PRId64 " leftLen:%" PRId64 ", reason:%s", //printf("read error, readLen:%" PRId64 " retLen:%" PRId64 " len:%" PRId64 " leftLen:%" PRId64 ", reason:%s",
readLen, retLen, len, leftLen, strerror(errno)); // readLen, retLen, len, leftLen, strerror(errno));
return -1; return -1;
} }
writeLen = taosWriteMsg(dfd, temp, (int32_t)readLen); writeLen = taosWriteMsg(dfd, temp, (int32_t)readLen);
if (readLen != writeLen) { if (readLen != writeLen) {
printf("copy error, readLen:%" PRId64 " writeLen:%" PRId64 " len:%" PRId64 " leftLen:%" PRId64 ", reason:%s", //printf("copy error, readLen:%" PRId64 " writeLen:%" PRId64 " len:%" PRId64 " leftLen:%" PRId64 ", reason:%s",
readLen, writeLen, len, leftLen, strerror(errno)); // readLen, writeLen, len, leftLen, strerror(errno));
return -1; return -1;
} }
......
...@@ -277,7 +277,7 @@ char *strsep(char **stringp, const char *delim) { ...@@ -277,7 +277,7 @@ char *strsep(char **stringp, const char *delim) {
char *getpass(const char *prefix) { char *getpass(const char *prefix) {
static char passwd[TSDB_KEY_LEN] = {0}; static char passwd[TSDB_KEY_LEN] = {0};
memset(passwd, 0, TSDB_KEY_LEN); memset(passwd, 0, TSDB_KEY_LEN);
printf("%s", prefix); //printf("%s", prefix);
int32_t index = 0; int32_t index = 0;
char ch; char ch;
......
...@@ -134,7 +134,7 @@ int32_t taosGetDiskSize(char *dataDir, SysDiskSize *diskSize) { ...@@ -134,7 +134,7 @@ int32_t taosGetDiskSize(char *dataDir, SysDiskSize *diskSize) {
diskSize->used = (int64_t)(i64TotalBytes - i64FreeBytes); diskSize->used = (int64_t)(i64TotalBytes - i64FreeBytes);
return 0; return 0;
} else { } else {
printf("failed to get disk size, dataDir:%s errno:%s", tsDataDir, strerror(errno)); //printf("failed to get disk size, dataDir:%s errno:%s", tsDataDir, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
return -1; return -1;
} }
...@@ -205,12 +205,12 @@ void taosGetSystemInfo() { ...@@ -205,12 +205,12 @@ void taosGetSystemInfo() {
} }
void taosKillSystem() { void taosKillSystem() {
printf("function taosKillSystem, exit!"); //printf("function taosKillSystem, exit!");
exit(0); exit(0);
} }
int taosSystem(const char *cmd) { int taosSystem(const char *cmd) {
printf("taosSystem not support"); //printf("taosSystem not support");
return -1; return -1;
} }
...@@ -280,7 +280,7 @@ static void taosGetSystemTimezone() { ...@@ -280,7 +280,7 @@ static void taosGetSystemTimezone() {
{ {
int n = readlink("/etc/localtime", buf, sizeof(buf)); int n = readlink("/etc/localtime", buf, sizeof(buf));
if (n < 0) { if (n < 0) {
printf("read /etc/localtime error, reason:%s", strerror(errno)); //printf("read /etc/localtime error, reason:%s", strerror(errno));
return; return;
} }
buf[n] = '\0'; buf[n] = '\0';
...@@ -294,7 +294,7 @@ static void taosGetSystemTimezone() { ...@@ -294,7 +294,7 @@ static void taosGetSystemTimezone() {
} }
} }
if (!tz || 0 == strchr(tz, '/')) { if (!tz || 0 == strchr(tz, '/')) {
printf("parsing /etc/localtime failed"); //printf("parsing /etc/localtime failed");
return; return;
} }
...@@ -321,7 +321,7 @@ static void taosGetSystemTimezone() { ...@@ -321,7 +321,7 @@ static void taosGetSystemTimezone() {
-timezone / 3600); -timezone / 3600);
// cfg_timezone->cfgStatus = TAOS_CFG_CSTATUS_DEFAULT; // cfg_timezone->cfgStatus = TAOS_CFG_CSTATUS_DEFAULT;
printf("timezone not configured, set to system default:%s", tsTimezone); //printf("timezone not configured, set to system default:%s", tsTimezone);
} }
/* /*
...@@ -348,11 +348,11 @@ static void taosGetSystemLocale() { // get and set default locale ...@@ -348,11 +348,11 @@ static void taosGetSystemLocale() { // get and set default locale
locale = setlocale(LC_CTYPE, ""); locale = setlocale(LC_CTYPE, "");
if (locale == NULL) { if (locale == NULL) {
printf("can't get locale from system, set it to en_US.UTF-8 since error:%d:%s", errno, strerror(errno)); //printf("can't get locale from system, set it to en_US.UTF-8 since error:%d:%s", errno, strerror(errno));
strcpy(tsLocale, "en_US.UTF-8"); strcpy(tsLocale, "en_US.UTF-8");
} else { } else {
tstrncpy(tsLocale, locale, TSDB_LOCALE_LEN); tstrncpy(tsLocale, locale, TSDB_LOCALE_LEN);
printf("locale not configured, set to system default:%s", tsLocale); //printf("locale not configured, set to system default:%s", tsLocale);
} }
/* if user does not specify the charset, extract it from locale */ /* if user does not specify the charset, extract it from locale */
...@@ -364,15 +364,15 @@ static void taosGetSystemLocale() { // get and set default locale ...@@ -364,15 +364,15 @@ static void taosGetSystemLocale() { // get and set default locale
tstrncpy(tsCharset, revisedCharset, TSDB_LOCALE_LEN); tstrncpy(tsCharset, revisedCharset, TSDB_LOCALE_LEN);
free(revisedCharset); free(revisedCharset);
printf("charset not configured, set to system default:%s", tsCharset); //printf("charset not configured, set to system default:%s", tsCharset);
} else { } else {
strcpy(tsCharset, "UTF-8"); strcpy(tsCharset, "UTF-8");
printf("can't get locale and charset from system, set it to UTF-8"); //printf("can't get locale and charset from system, set it to UTF-8");
} }
} }
void taosKillSystem() { void taosKillSystem() {
printf("function taosKillSystem, exit!"); //printf("function taosKillSystem, exit!");
exit(0); exit(0);
} }
...@@ -432,7 +432,7 @@ bool taosGetSysMemory(float *memoryUsedMB) { ...@@ -432,7 +432,7 @@ bool taosGetSysMemory(float *memoryUsedMB) {
} }
int taosSystem(const char *cmd) { int taosSystem(const char *cmd) {
printf("un support funtion"); //printf("un support funtion");
return -1; return -1;
} }
...@@ -441,7 +441,7 @@ void taosSetCoreDump() {} ...@@ -441,7 +441,7 @@ void taosSetCoreDump() {}
int32_t taosGetDiskSize(char *dataDir, SysDiskSize *diskSize) { int32_t taosGetDiskSize(char *dataDir, SysDiskSize *diskSize) {
struct statvfs info; struct statvfs info;
if (statvfs(dataDir, &info)) { if (statvfs(dataDir, &info)) {
printf("failed to get disk size, dataDir:%s errno:%s", tsDataDir, strerror(errno)); //printf("failed to get disk size, dataDir:%s errno:%s", tsDataDir, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
return -1; return -1;
} else { } else {
...@@ -535,7 +535,7 @@ bool taosGetSysMemory(float *memoryUsedMB) { ...@@ -535,7 +535,7 @@ bool taosGetSysMemory(float *memoryUsedMB) {
bool taosGetProcMemory(float *memoryUsedMB) { bool taosGetProcMemory(float *memoryUsedMB) {
FILE *fp = fopen(tsProcMemFile, "r"); FILE *fp = fopen(tsProcMemFile, "r");
if (fp == NULL) { if (fp == NULL) {
printf("open file:%s failed", tsProcMemFile); //printf("open file:%s failed", tsProcMemFile);
return false; return false;
} }
...@@ -555,7 +555,7 @@ bool taosGetProcMemory(float *memoryUsedMB) { ...@@ -555,7 +555,7 @@ bool taosGetProcMemory(float *memoryUsedMB) {
} }
if (line == NULL) { if (line == NULL) {
printf("read file:%s failed", tsProcMemFile); //printf("read file:%s failed", tsProcMemFile);
fclose(fp); fclose(fp);
return false; return false;
} }
...@@ -573,7 +573,7 @@ bool taosGetProcMemory(float *memoryUsedMB) { ...@@ -573,7 +573,7 @@ bool taosGetProcMemory(float *memoryUsedMB) {
static bool taosGetSysCpuInfo(SysCpuInfo *cpuInfo) { static bool taosGetSysCpuInfo(SysCpuInfo *cpuInfo) {
FILE *fp = fopen(tsSysCpuFile, "r"); FILE *fp = fopen(tsSysCpuFile, "r");
if (fp == NULL) { if (fp == NULL) {
printf("open file:%s failed", tsSysCpuFile); //printf("open file:%s failed", tsSysCpuFile);
return false; return false;
} }
...@@ -581,7 +581,7 @@ static bool taosGetSysCpuInfo(SysCpuInfo *cpuInfo) { ...@@ -581,7 +581,7 @@ static bool taosGetSysCpuInfo(SysCpuInfo *cpuInfo) {
char * line = NULL; char * line = NULL;
ssize_t _bytes = getline(&line, &len, fp); ssize_t _bytes = getline(&line, &len, fp);
if ((_bytes < 0) || (line == NULL)) { if ((_bytes < 0) || (line == NULL)) {
printf("read file:%s failed", tsSysCpuFile); //printf("read file:%s failed", tsSysCpuFile);
fclose(fp); fclose(fp);
return false; return false;
} }
...@@ -598,7 +598,7 @@ static bool taosGetSysCpuInfo(SysCpuInfo *cpuInfo) { ...@@ -598,7 +598,7 @@ static bool taosGetSysCpuInfo(SysCpuInfo *cpuInfo) {
static bool taosGetProcCpuInfo(ProcCpuInfo *cpuInfo) { static bool taosGetProcCpuInfo(ProcCpuInfo *cpuInfo) {
FILE *fp = fopen(tsProcCpuFile, "r"); FILE *fp = fopen(tsProcCpuFile, "r");
if (fp == NULL) { if (fp == NULL) {
printf("open file:%s failed", tsProcCpuFile); //printf("open file:%s failed", tsProcCpuFile);
return false; return false;
} }
...@@ -606,7 +606,7 @@ static bool taosGetProcCpuInfo(ProcCpuInfo *cpuInfo) { ...@@ -606,7 +606,7 @@ static bool taosGetProcCpuInfo(ProcCpuInfo *cpuInfo) {
char * line = NULL; char * line = NULL;
ssize_t _bytes = getline(&line, &len, fp); ssize_t _bytes = getline(&line, &len, fp);
if ((_bytes < 0) || (line == NULL)) { if ((_bytes < 0) || (line == NULL)) {
printf("read file:%s failed", tsProcCpuFile); //printf("read file:%s failed", tsProcCpuFile);
fclose(fp); fclose(fp);
return false; return false;
} }
...@@ -642,7 +642,7 @@ static void taosGetSystemTimezone() { ...@@ -642,7 +642,7 @@ static void taosGetSystemTimezone() {
int len = fread(buf, 64, 1, f); int len = fread(buf, 64, 1, f);
if (len < 64 && ferror(f)) { if (len < 64 && ferror(f)) {
fclose(f); fclose(f);
printf("read /etc/timezone error, reason:%s", strerror(errno)); //printf("read /etc/timezone error, reason:%s", strerror(errno));
return; return;
} }
...@@ -681,7 +681,7 @@ static void taosGetSystemTimezone() { ...@@ -681,7 +681,7 @@ static void taosGetSystemTimezone() {
snprintf(tsTimezone, TSDB_TIMEZONE_LEN, "%s (%s, %s%02d00)", buf, tzname[daylight], tz >= 0 ? "+" : "-", abs(tz)); snprintf(tsTimezone, TSDB_TIMEZONE_LEN, "%s (%s, %s%02d00)", buf, tzname[daylight], tz >= 0 ? "+" : "-", abs(tz));
// cfg_timezone->cfgStatus = TAOS_CFG_CSTATUS_DEFAULT; // cfg_timezone->cfgStatus = TAOS_CFG_CSTATUS_DEFAULT;
printf("timezone not configured, set to system default:%s", tsTimezone); //printf("timezone not configured, set to system default:%s", tsTimezone);
} }
/* /*
...@@ -707,11 +707,11 @@ static void taosGetSystemLocale() { // get and set default locale ...@@ -707,11 +707,11 @@ static void taosGetSystemLocale() { // get and set default locale
locale = setlocale(LC_CTYPE, ""); locale = setlocale(LC_CTYPE, "");
if (locale == NULL) { if (locale == NULL) {
printf("can't get locale from system, set it to en_US.UTF-8 since error:%d:%s", errno, strerror(errno)); //printf("can't get locale from system, set it to en_US.UTF-8 since error:%d:%s", errno, strerror(errno));
strcpy(tsLocale, "en_US.UTF-8"); strcpy(tsLocale, "en_US.UTF-8");
} else { } else {
tstrncpy(tsLocale, locale, TSDB_LOCALE_LEN); tstrncpy(tsLocale, locale, TSDB_LOCALE_LEN);
printf("locale not configured, set to system default:%s", tsLocale); //printf("locale not configured, set to system default:%s", tsLocale);
} }
/* if user does not specify the charset, extract it from locale */ /* if user does not specify the charset, extract it from locale */
...@@ -723,10 +723,10 @@ static void taosGetSystemLocale() { // get and set default locale ...@@ -723,10 +723,10 @@ static void taosGetSystemLocale() { // get and set default locale
tstrncpy(tsCharset, revisedCharset, TSDB_LOCALE_LEN); tstrncpy(tsCharset, revisedCharset, TSDB_LOCALE_LEN);
free(revisedCharset); free(revisedCharset);
printf("charset not configured, set to system default:%s", tsCharset); //printf("charset not configured, set to system default:%s", tsCharset);
} else { } else {
strcpy(tsCharset, "UTF-8"); strcpy(tsCharset, "UTF-8");
printf("can't get locale and charset from system, set it to UTF-8"); //printf("can't get locale and charset from system, set it to UTF-8");
} }
} }
...@@ -774,7 +774,7 @@ bool taosGetCpuUsage(float *sysCpuUsage, float *procCpuUsage) { ...@@ -774,7 +774,7 @@ bool taosGetCpuUsage(float *sysCpuUsage, float *procCpuUsage) {
int32_t taosGetDiskSize(char *dataDir, SysDiskSize *diskSize) { int32_t taosGetDiskSize(char *dataDir, SysDiskSize *diskSize) {
struct statvfs info; struct statvfs info;
if (statvfs(dataDir, &info)) { if (statvfs(dataDir, &info)) {
printf("failed to get disk size, dataDir:%s errno:%s", dataDir, strerror(errno)); //printf("failed to get disk size, dataDir:%s errno:%s", dataDir, strerror(errno));
return -1; return -1;
} else { } else {
diskSize->tsize = info.f_blocks * info.f_frsize; diskSize->tsize = info.f_blocks * info.f_frsize;
...@@ -788,7 +788,7 @@ bool taosGetCardInfo(int64_t *bytes, int64_t *rbytes, int64_t *tbytes) { ...@@ -788,7 +788,7 @@ bool taosGetCardInfo(int64_t *bytes, int64_t *rbytes, int64_t *tbytes) {
*bytes = 0; *bytes = 0;
FILE *fp = fopen(tsSysNetFile, "r"); FILE *fp = fopen(tsSysNetFile, "r");
if (fp == NULL) { if (fp == NULL) {
printf("open file:%s failed", tsSysNetFile); //printf("open file:%s failed", tsSysNetFile);
return false; return false;
} }
...@@ -864,7 +864,7 @@ bool taosGetBandSpeed(float *bandSpeedKb) { ...@@ -864,7 +864,7 @@ bool taosGetBandSpeed(float *bandSpeedKb) {
double totalBytes = (double)(curBytes - lastBytes) / 1024 * 8; // Kb double totalBytes = (double)(curBytes - lastBytes) / 1024 * 8; // Kb
*bandSpeedKb = (float)(totalBytes / (double)(curTime - lastTime)); *bandSpeedKb = (float)(totalBytes / (double)(curTime - lastTime));
// printf("bandwidth lastBytes:%ld, lastTime:%ld, curBytes:%ld, curTime:%ld, // //printf("bandwidth lastBytes:%ld, lastTime:%ld, curBytes:%ld, curTime:%ld,
// speed:%f", lastBytes, lastTime, curBytes, curTime, *bandSpeed); // speed:%f", lastBytes, lastTime, curBytes, curTime, *bandSpeed);
lastTime = curTime; lastTime = curTime;
...@@ -876,7 +876,7 @@ bool taosGetBandSpeed(float *bandSpeedKb) { ...@@ -876,7 +876,7 @@ bool taosGetBandSpeed(float *bandSpeedKb) {
bool taosReadProcIO(int64_t *rchars, int64_t *wchars) { bool taosReadProcIO(int64_t *rchars, int64_t *wchars) {
FILE *fp = fopen(tsProcIOFile, "r"); FILE *fp = fopen(tsProcIOFile, "r");
if (fp == NULL) { if (fp == NULL) {
printf("open file:%s failed", tsProcIOFile); //printf("open file:%s failed", tsProcIOFile);
return false; return false;
} }
...@@ -909,7 +909,7 @@ bool taosReadProcIO(int64_t *rchars, int64_t *wchars) { ...@@ -909,7 +909,7 @@ bool taosReadProcIO(int64_t *rchars, int64_t *wchars) {
fclose(fp); fclose(fp);
if (readIndex < 2) { if (readIndex < 2) {
printf("read file:%s failed", tsProcIOFile); //printf("read file:%s failed", tsProcIOFile);
return false; return false;
} }
...@@ -964,7 +964,7 @@ void taosGetSystemInfo() { ...@@ -964,7 +964,7 @@ void taosGetSystemInfo() {
void taosKillSystem() { void taosKillSystem() {
// SIGINT // SIGINT
printf("taosd will shut down soon"); //printf("taosd will shut down soon");
kill(tsProcId, 2); kill(tsProcId, 2);
} }
...@@ -973,22 +973,22 @@ int taosSystem(const char *cmd) { ...@@ -973,22 +973,22 @@ int taosSystem(const char *cmd) {
int res; int res;
char buf[1024]; char buf[1024];
if (cmd == NULL) { if (cmd == NULL) {
printf("taosSystem cmd is NULL!"); //printf("taosSystem cmd is NULL!");
return -1; return -1;
} }
if ((fp = popen(cmd, "r")) == NULL) { if ((fp = popen(cmd, "r")) == NULL) {
printf("popen cmd:%s error: %s", cmd, strerror(errno)); //printf("popen cmd:%s error: %s", cmd, strerror(errno));
return -1; return -1;
} else { } else {
while (fgets(buf, sizeof(buf), fp)) { while (fgets(buf, sizeof(buf), fp)) {
printf("popen result:%s", buf); //printf("popen result:%s", buf);
} }
if ((res = pclose(fp)) == -1) { if ((res = pclose(fp)) == -1) {
printf("close popen file pointer fp error!"); //printf("close popen file pointer fp error!");
} else { } else {
printf("popen res is :%d", res); //printf("popen res is :%d", res);
} }
return res; return res;
...@@ -1003,14 +1003,14 @@ void taosSetCoreDump(bool enable) { ...@@ -1003,14 +1003,14 @@ void taosSetCoreDump(bool enable) {
struct rlimit rlim_new; struct rlimit rlim_new;
if (getrlimit(RLIMIT_CORE, &rlim) == 0) { if (getrlimit(RLIMIT_CORE, &rlim) == 0) {
#ifndef _ALPINE #ifndef _ALPINE
printf("the old unlimited para: rlim_cur=%" PRIu64 ", rlim_max=%" PRIu64, rlim.rlim_cur, rlim.rlim_max); //printf("the old unlimited para: rlim_cur=%" PRIu64 ", rlim_max=%" PRIu64, rlim.rlim_cur, rlim.rlim_max);
#else #else
printf("the old unlimited para: rlim_cur=%llu, rlim_max=%llu", rlim.rlim_cur, rlim.rlim_max); //printf("the old unlimited para: rlim_cur=%llu, rlim_max=%llu", rlim.rlim_cur, rlim.rlim_max);
#endif #endif
rlim_new.rlim_cur = RLIM_INFINITY; rlim_new.rlim_cur = RLIM_INFINITY;
rlim_new.rlim_max = RLIM_INFINITY; rlim_new.rlim_max = RLIM_INFINITY;
if (setrlimit(RLIMIT_CORE, &rlim_new) != 0) { if (setrlimit(RLIMIT_CORE, &rlim_new) != 0) {
printf("set unlimited fail, error: %s", strerror(errno)); //printf("set unlimited fail, error: %s", strerror(errno));
rlim_new.rlim_cur = rlim.rlim_max; rlim_new.rlim_cur = rlim.rlim_max;
rlim_new.rlim_max = rlim.rlim_max; rlim_new.rlim_max = rlim.rlim_max;
(void)setrlimit(RLIMIT_CORE, &rlim_new); (void)setrlimit(RLIMIT_CORE, &rlim_new);
...@@ -1019,9 +1019,9 @@ void taosSetCoreDump(bool enable) { ...@@ -1019,9 +1019,9 @@ void taosSetCoreDump(bool enable) {
if (getrlimit(RLIMIT_CORE, &rlim) == 0) { if (getrlimit(RLIMIT_CORE, &rlim) == 0) {
#ifndef _ALPINE #ifndef _ALPINE
printf("the new unlimited para: rlim_cur=%" PRIu64 ", rlim_max=%" PRIu64, rlim.rlim_cur, rlim.rlim_max); //printf("the new unlimited para: rlim_cur=%" PRIu64 ", rlim_max=%" PRIu64, rlim.rlim_cur, rlim.rlim_max);
#else #else
printf("the new unlimited para: rlim_cur=%llu, rlim_max=%llu", rlim.rlim_cur, rlim.rlim_max); //printf("the new unlimited para: rlim_cur=%llu, rlim_max=%llu", rlim.rlim_cur, rlim.rlim_max);
#endif #endif
} }
...@@ -1047,10 +1047,10 @@ void taosSetCoreDump(bool enable) { ...@@ -1047,10 +1047,10 @@ void taosSetCoreDump(bool enable) {
old_len = sizeof(old_usespid); old_len = sizeof(old_usespid);
if (syscall(SYS__sysctl, &args) == -1) { if (syscall(SYS__sysctl, &args) == -1) {
printf("_sysctl(kern_core_uses_pid) set fail: %s", strerror(errno)); //printf("_sysctl(kern_core_uses_pid) set fail: %s", strerror(errno));
} }
printf("The old core_uses_pid[%" PRIu64 "]: %d", old_len, old_usespid); //printf("The old core_uses_pid[%" PRIu64 "]: %d", old_len, old_usespid);
old_usespid = 0; old_usespid = 0;
old_len = 0; old_len = 0;
...@@ -1063,10 +1063,10 @@ void taosSetCoreDump(bool enable) { ...@@ -1063,10 +1063,10 @@ void taosSetCoreDump(bool enable) {
old_len = sizeof(old_usespid); old_len = sizeof(old_usespid);
if (syscall(SYS__sysctl, &args) == -1) { if (syscall(SYS__sysctl, &args) == -1) {
printf("_sysctl(kern_core_uses_pid) get fail: %s", strerror(errno)); //printf("_sysctl(kern_core_uses_pid) get fail: %s", strerror(errno));
} }
printf("The new core_uses_pid[%" PRIu64 "]: %d", old_len, old_usespid); //printf("The new core_uses_pid[%" PRIu64 "]: %d", old_len, old_usespid);
#endif #endif
} }
......
...@@ -87,11 +87,11 @@ int taosSetConsoleEcho(bool on) { ...@@ -87,11 +87,11 @@ int taosSetConsoleEcho(bool on) {
void* taosLoadDll(const char* filename) { void* taosLoadDll(const char* filename) {
void* handle = dlopen(filename, RTLD_LAZY); void* handle = dlopen(filename, RTLD_LAZY);
if (!handle) { if (!handle) {
printf("load dll:%s failed, error:%s", filename, dlerror()); //printf("load dll:%s failed, error:%s", filename, dlerror());
return NULL; return NULL;
} }
printf("dll %s loaded", filename); //printf("dll %s loaded", filename);
return handle; return handle;
} }
...@@ -101,11 +101,11 @@ void* taosLoadSym(void* handle, char* name) { ...@@ -101,11 +101,11 @@ void* taosLoadSym(void* handle, char* name) {
char* error = NULL; char* error = NULL;
if ((error = dlerror()) != NULL) { if ((error = dlerror()) != NULL) {
printf("load sym:%s failed, error:%s", name, dlerror()); //printf("load sym:%s failed, error:%s", name, dlerror());
return NULL; return NULL;
} }
printf("sym %s loaded", name); //printf("sym %s loaded", name);
return sym; return sym;
} }
...@@ -133,7 +133,7 @@ int taosSetConsoleEcho(bool on) { ...@@ -133,7 +133,7 @@ int taosSetConsoleEcho(bool on) {
err = tcsetattr(STDIN_FILENO, TCSAFLUSH, &term); err = tcsetattr(STDIN_FILENO, TCSAFLUSH, &term);
if (err == -1 || err == EINTR) { if (err == -1 || err == EINTR) {
printf("Cannot set the attribution of the terminal"); //printf("Cannot set the attribution of the terminal");
return -1; return -1;
} }
......
...@@ -170,7 +170,7 @@ static void *taosProcessAlarmSignal(void *tharg) { ...@@ -170,7 +170,7 @@ static void *taosProcessAlarmSignal(void *tharg) {
sevent.sigev_signo = SIGALRM; sevent.sigev_signo = SIGALRM;
if (timer_create(CLOCK_REALTIME, &sevent, &timerId) == -1) { if (timer_create(CLOCK_REALTIME, &sevent, &timerId) == -1) {
printf("Failed to create timer"); //printf("Failed to create timer");
} }
pthread_cleanup_push(taosDeleteTimer, &timerId); pthread_cleanup_push(taosDeleteTimer, &timerId);
...@@ -182,17 +182,17 @@ static void *taosProcessAlarmSignal(void *tharg) { ...@@ -182,17 +182,17 @@ static void *taosProcessAlarmSignal(void *tharg) {
ts.it_interval.tv_nsec = 1000000 * MSECONDS_PER_TICK; ts.it_interval.tv_nsec = 1000000 * MSECONDS_PER_TICK;
if (timer_settime(timerId, 0, &ts, NULL)) { if (timer_settime(timerId, 0, &ts, NULL)) {
printf("Failed to init timer"); //printf("Failed to init timer");
return NULL; return NULL;
} }
int signo; int signo;
while (!stopTimer) { while (!stopTimer) {
if (sigwait(&sigset, &signo)) { if (sigwait(&sigset, &signo)) {
printf("Failed to wait signal: number %d", signo); //printf("Failed to wait signal: number %d", signo);
continue; continue;
} }
/* printf("Signal handling: number %d ......\n", signo); */ /* //printf("Signal handling: number %d ......\n", signo); */
callback(0); callback(0);
} }
...@@ -208,10 +208,10 @@ int taosInitTimer(void (*callback)(int), int ms) { ...@@ -208,10 +208,10 @@ int taosInitTimer(void (*callback)(int), int ms) {
int code = pthread_create(&timerThread, &tattr, taosProcessAlarmSignal, callback); int code = pthread_create(&timerThread, &tattr, taosProcessAlarmSignal, callback);
pthread_attr_destroy(&tattr); pthread_attr_destroy(&tattr);
if (code != 0) { if (code != 0) {
printf("failed to create timer thread"); //printf("failed to create timer thread");
return -1; return -1;
} else { } else {
printf("timer thread:0x%08" PRIx64 " is created", taosGetPthreadId(timerThread)); //printf("timer thread:0x%08" PRIx64 " is created", taosGetPthreadId(timerThread));
} }
return 0; return 0;
...@@ -220,7 +220,7 @@ int taosInitTimer(void (*callback)(int), int ms) { ...@@ -220,7 +220,7 @@ int taosInitTimer(void (*callback)(int), int ms) {
void taosUninitTimer() { void taosUninitTimer() {
stopTimer = true; stopTimer = true;
printf("join timer thread:0x%08" PRIx64, taosGetPthreadId(timerThread)); //printf("join timer thread:0x%08" PRIx64, taosGetPthreadId(timerThread));
pthread_join(timerThread, NULL); pthread_join(timerThread, NULL);
} }
......
...@@ -154,7 +154,7 @@ static bool taosReadDirectoryConfig(SGlobalCfg *cfg, char *input_value) { ...@@ -154,7 +154,7 @@ static bool taosReadDirectoryConfig(SGlobalCfg *cfg, char *input_value) {
taosExpandDir(input_value, option, cfg->ptrLength); taosExpandDir(input_value, option, cfg->ptrLength);
taosRealPath(option, cfg->ptrLength); taosRealPath(option, cfg->ptrLength);
if (!taosMkDir(option)) { if (taosMkDir(option) != 0) {
uError("config option:%s, input value:%s, directory not exist, create fail:%s", cfg->option, input_value, uError("config option:%s, input value:%s, directory not exist, create fail:%s", cfg->option, input_value,
strerror(errno)); strerror(errno));
return false; return false;
...@@ -335,7 +335,7 @@ void taosReadGlobalLogCfg() { ...@@ -335,7 +335,7 @@ void taosReadGlobalLogCfg() {
fclose(fp); fclose(fp);
} }
bool taosReadGlobalCfg() { int32_t taosReadGlobalCfg() {
char * line, *option, *value, *value2, *value3; char * line, *option, *value, *value2, *value3;
int olen, vlen, vlen2, vlen3; int olen, vlen, vlen2, vlen3;
char fileName[PATH_MAX] = {0}; char fileName[PATH_MAX] = {0};
...@@ -345,7 +345,7 @@ bool taosReadGlobalCfg() { ...@@ -345,7 +345,7 @@ bool taosReadGlobalCfg() {
if (fp == NULL) { if (fp == NULL) {
fp = fopen(configDir, "r"); fp = fopen(configDir, "r");
if (fp == NULL) { if (fp == NULL) {
return false; return -1;
} }
} }
...@@ -393,7 +393,7 @@ bool taosReadGlobalCfg() { ...@@ -393,7 +393,7 @@ bool taosReadGlobalCfg() {
// taosSetAllDebugFlag(); // taosSetAllDebugFlag();
// } // }
return true; return 0;
} }
void taosPrintGlobalCfg() { void taosPrintGlobalCfg() {
...@@ -402,7 +402,7 @@ void taosPrintGlobalCfg() { ...@@ -402,7 +402,7 @@ void taosPrintGlobalCfg() {
for (int i = 0; i < tsGlobalConfigNum; ++i) { for (int i = 0; i < tsGlobalConfigNum; ++i) {
SGlobalCfg *cfg = tsGlobalConfig + i; SGlobalCfg *cfg = tsGlobalConfig + i;
if (tscEmbedded == 0 && !(cfg->cfgType & TSDB_CFG_CTYPE_B_CLIENT)) continue; if (tscEmbeddedInUtil == 0 && !(cfg->cfgType & TSDB_CFG_CTYPE_B_CLIENT)) continue;
if (cfg->cfgType & TSDB_CFG_CTYPE_B_NOT_PRINT) continue; if (cfg->cfgType & TSDB_CFG_CTYPE_B_NOT_PRINT) continue;
int optionLen = (int)strlen(cfg->option); int optionLen = (int)strlen(cfg->option);
...@@ -487,7 +487,7 @@ void taosDumpGlobalCfg() { ...@@ -487,7 +487,7 @@ void taosDumpGlobalCfg() {
printf("==================================\n"); printf("==================================\n");
for (int i = 0; i < tsGlobalConfigNum; ++i) { for (int i = 0; i < tsGlobalConfigNum; ++i) {
SGlobalCfg *cfg = tsGlobalConfig + i; SGlobalCfg *cfg = tsGlobalConfig + i;
if (tscEmbedded == 0 && !(cfg->cfgType & TSDB_CFG_CTYPE_B_CLIENT)) continue; if (tscEmbeddedInUtil == 0 && !(cfg->cfgType & TSDB_CFG_CTYPE_B_CLIENT)) continue;
if (cfg->cfgType & TSDB_CFG_CTYPE_B_NOT_PRINT) continue; if (cfg->cfgType & TSDB_CFG_CTYPE_B_NOT_PRINT) continue;
if (!(cfg->cfgType & TSDB_CFG_CTYPE_B_SHOW)) continue; if (!(cfg->cfgType & TSDB_CFG_CTYPE_B_SHOW)) continue;
...@@ -499,7 +499,7 @@ void taosDumpGlobalCfg() { ...@@ -499,7 +499,7 @@ void taosDumpGlobalCfg() {
for (int i = 0; i < tsGlobalConfigNum; ++i) { for (int i = 0; i < tsGlobalConfigNum; ++i) {
SGlobalCfg *cfg = tsGlobalConfig + i; SGlobalCfg *cfg = tsGlobalConfig + i;
if (tscEmbedded == 0 && !(cfg->cfgType & TSDB_CFG_CTYPE_B_CLIENT)) continue; if (tscEmbeddedInUtil == 0 && !(cfg->cfgType & TSDB_CFG_CTYPE_B_CLIENT)) continue;
if (cfg->cfgType & TSDB_CFG_CTYPE_B_NOT_PRINT) continue; if (cfg->cfgType & TSDB_CFG_CTYPE_B_NOT_PRINT) continue;
if (cfg->cfgType & TSDB_CFG_CTYPE_B_SHOW) continue; if (cfg->cfgType & TSDB_CFG_CTYPE_B_SHOW) continue;
......
...@@ -78,6 +78,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MEMORY_CORRUPTED, "Memory corrupted") ...@@ -78,6 +78,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MEMORY_CORRUPTED, "Memory corrupted")
TAOS_DEFINE_ERROR(TSDB_CODE_FILE_CORRUPTED, "Data file corrupted") TAOS_DEFINE_ERROR(TSDB_CODE_FILE_CORRUPTED, "Data file corrupted")
TAOS_DEFINE_ERROR(TSDB_CODE_CHECKSUM_ERROR, "Checksum error") TAOS_DEFINE_ERROR(TSDB_CODE_CHECKSUM_ERROR, "Checksum error")
TAOS_DEFINE_ERROR(TSDB_CODE_INVALID_MSG, "Invalid config message") TAOS_DEFINE_ERROR(TSDB_CODE_INVALID_MSG, "Invalid config message")
TAOS_DEFINE_ERROR(TSDB_CODE_MSG_NOT_PROCESSED, "Message not processed")
TAOS_DEFINE_ERROR(TSDB_CODE_REF_NO_MEMORY, "Ref out of memory") TAOS_DEFINE_ERROR(TSDB_CODE_REF_NO_MEMORY, "Ref out of memory")
TAOS_DEFINE_ERROR(TSDB_CODE_REF_FULL, "too many Ref Objs") TAOS_DEFINE_ERROR(TSDB_CODE_REF_FULL, "too many Ref Objs")
TAOS_DEFINE_ERROR(TSDB_CODE_REF_ID_REMOVED, "Ref ID is removed") TAOS_DEFINE_ERROR(TSDB_CODE_REF_ID_REMOVED, "Ref ID is removed")
...@@ -235,20 +236,20 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_TOPIC_PARTITONS, "Invalid topic partito ...@@ -235,20 +236,20 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_TOPIC_PARTITONS, "Invalid topic partito
TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOPIC_ALREADY_EXIST, "Topic already exists") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOPIC_ALREADY_EXIST, "Topic already exists")
// dnode // dnode
TAOS_DEFINE_ERROR(TSDB_CODE_DND_MSG_NOT_PROCESSED, "Message not processed")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_OUT_OF_MEMORY, "Dnode out of memory")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_ID_NOT_MATCH_DNODE, "Mnode Id not match Dnode")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_ALREADY_DEPLOYED, "Mnode already deployed")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_NOT_DEPLOYED, "Mnode not deployed")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_READ_MNODE_FILE_ERROR, "Read mnode.json error")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_WRITE_MNODE_FILE_ERROR, "Write mnode.json error")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_NO_WRITE_ACCESS, "No permission for disk files in dnode")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_INVALID_MSG_LEN, "Invalid message length")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_ACTION_IN_PROGRESS, "Action in progress") TAOS_DEFINE_ERROR(TSDB_CODE_DND_ACTION_IN_PROGRESS, "Action in progress")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_TOO_MANY_VNODES, "Too many vnode directories")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_EXITING, "Dnode is exiting") TAOS_DEFINE_ERROR(TSDB_CODE_DND_EXITING, "Dnode is exiting")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_PARSE_VNODE_FILE_ERROR, "Parse vnodes.json error") TAOS_DEFINE_ERROR(TSDB_CODE_DND_INVALID_MSG_LEN, "Invalid message length")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_PARSE_DNODE_FILE_ERROR, "Parse dnodes.json error") TAOS_DEFINE_ERROR(TSDB_CODE_DND_DNODE_READ_FILE_ERROR, "Read dnode.json error")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_DNODE_WRITE_FILE_ERROR, "Write dnode.json error")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_ALREADY_DEPLOYED, "Mnode already deployed")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_NOT_DEPLOYED, "Mnode not deployed")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_ID_INVALID, "Mnode Id invalid")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_ID_NOT_FOUND, "Mnode Id not found")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_READ_FILE_ERROR, "Read mnode.json error")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_MNODE_WRITE_FILE_ERROR, "Write mnode.json error")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_VNODE_TOO_MANY_VNODES, "Too many vnode directories")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_VNODE_READ_FILE_ERROR, "Read vnodes.json error")
TAOS_DEFINE_ERROR(TSDB_CODE_DND_VNODE_WRITE_FILE_ERROR, "Write vnodes.json error")
// vnode // vnode
TAOS_DEFINE_ERROR(TSDB_CODE_VND_ACTION_IN_PROGRESS, "Action in progress") TAOS_DEFINE_ERROR(TSDB_CODE_VND_ACTION_IN_PROGRESS, "Action in progress")
......
...@@ -68,6 +68,8 @@ typedef struct { ...@@ -68,6 +68,8 @@ typedef struct {
pthread_mutex_t logMutex; pthread_mutex_t logMutex;
} SLogObj; } SLogObj;
int8_t tscEmbeddedInUtil = 0;
int32_t tsLogKeepDays = 0; int32_t tsLogKeepDays = 0;
int8_t tsAsyncLog = 1; int8_t tsAsyncLog = 1;
float tsTotalLogDirGB = 0; float tsTotalLogDirGB = 0;
......
...@@ -40,7 +40,7 @@ static void taosInitNote(int32_t numOfLines, int32_t maxNotes, SNoteObj *pNote, ...@@ -40,7 +40,7 @@ static void taosInitNote(int32_t numOfLines, int32_t maxNotes, SNoteObj *pNote,
taosNotePrint(pNote, "=================================================="); taosNotePrint(pNote, "==================================================");
} }
void taosInitNotes() { int32_t taosInitNotes() {
char name[TSDB_FILENAME_LEN * 2] = {0}; char name[TSDB_FILENAME_LEN * 2] = {0};
#if 0 #if 0
...@@ -58,7 +58,8 @@ void taosInitNotes() { ...@@ -58,7 +58,8 @@ void taosInitNotes() {
snprintf(name, TSDB_FILENAME_LEN * 2, "%s/taosinfo", tsLogDir); snprintf(name, TSDB_FILENAME_LEN * 2, "%s/taosinfo", tsLogDir);
taosInitNote(tsNumOfLogLines, 1, &tsInfoNote, name); taosInitNote(tsNumOfLogLines, 1, &tsInfoNote, name);
} }
#endif #endif
return 0;
} }
static bool taosLockNote(int32_t fd, SNoteObj *pNote) { static bool taosLockNote(int32_t fd, SNoteObj *pNote) {
......
...@@ -107,7 +107,7 @@ bool taosQueueEmpty(taos_queue param) { ...@@ -107,7 +107,7 @@ bool taosQueueEmpty(taos_queue param) {
if (queue->head == NULL && queue->tail == NULL) { if (queue->head == NULL && queue->tail == NULL) {
empty = true; empty = true;
} }
pthread_mutex_destroy(&queue->mutex); pthread_mutex_unlock(&queue->mutex);
return empty; return empty;
} }
......
...@@ -50,7 +50,7 @@ void tWorkerCleanup(SWorkerPool *pool) { ...@@ -50,7 +50,7 @@ void tWorkerCleanup(SWorkerPool *pool) {
} }
} }
free(pool->workers); tfree(pool->workers);
taosCloseQset(pool->qset); taosCloseQset(pool->qset);
pthread_mutex_destroy(&pool->mutex); pthread_mutex_destroy(&pool->mutex);
...@@ -159,7 +159,7 @@ void tMWorkerCleanup(SMWorkerPool *pool) { ...@@ -159,7 +159,7 @@ void tMWorkerCleanup(SMWorkerPool *pool) {
} }
} }
free(pool->workers); tfree(pool->workers);
pthread_mutex_destroy(&pool->mutex); pthread_mutex_destroy(&pool->mutex);
uInfo("worker:%s is closed", pool->name); uInfo("worker:%s is closed", pool->name);
......
...@@ -371,7 +371,7 @@ void tscSaveSubscriptionProgress(void* sub) { ...@@ -371,7 +371,7 @@ void tscSaveSubscriptionProgress(void* sub) {
char path[256]; char path[256];
sprintf(path, "%s/subscribe", tsDataDir); sprintf(path, "%s/subscribe", tsDataDir);
if (!taosMkDir(path)) { if (taosMkDir(path) != 0) {
tscError("failed to create subscribe dir: %s", path); tscError("failed to create subscribe dir: %s", path);
} }
......
...@@ -148,7 +148,7 @@ void taos_init_imp(void) { ...@@ -148,7 +148,7 @@ void taos_init_imp(void) {
} }
taosReadGlobalCfg(); taosReadGlobalCfg();
if (taosCheckGlobalCfg()) { if (taosCheckGlobalCfg() != 0) {
tscInitRes = -1; tscInitRes = -1;
return; return;
} }
......
...@@ -106,7 +106,7 @@ int main(int argc, char* argv[]) { ...@@ -106,7 +106,7 @@ int main(int argc, char* argv[]) {
taosInitGlobalCfg(); taosInitGlobalCfg();
taosReadGlobalLogCfg(); taosReadGlobalLogCfg();
if (!taosReadGlobalCfg()) { if (taosReadGlobalCfg() ! =0) {
printf("TDengine read global config failed"); printf("TDengine read global config failed");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
......
...@@ -252,7 +252,7 @@ int tfsMkdirAt(const char *rname, int level, int id) { ...@@ -252,7 +252,7 @@ int tfsMkdirAt(const char *rname, int level, int id) {
char aname[TMPNAME_LEN]; char aname[TMPNAME_LEN];
snprintf(aname, TMPNAME_LEN, "%s/%s", DISK_DIR(pDisk), rname); snprintf(aname, TMPNAME_LEN, "%s/%s", DISK_DIR(pDisk), rname);
if (!taosMkDir(aname)) { if (taosMkDir(aname) != 0) {
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
return -1; return -1;
} }
......
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_WAL_INT_H
#define TDENGINE_WAL_INT_H
#ifdef __cplusplus
extern "C" {
#endif
#include "tlog.h"
extern int32_t wDebugFlag;
#define wFatal(...) { if (wDebugFlag & DEBUG_FATAL) { taosPrintLog("WAL FATAL ", 255, __VA_ARGS__); }}
#define wError(...) { if (wDebugFlag & DEBUG_ERROR) { taosPrintLog("WAL ERROR ", 255, __VA_ARGS__); }}
#define wWarn(...) { if (wDebugFlag & DEBUG_WARN) { taosPrintLog("WAL WARN ", 255, __VA_ARGS__); }}
#define wInfo(...) { if (wDebugFlag & DEBUG_INFO) { taosPrintLog("WAL ", 255, __VA_ARGS__); }}
#define wDebug(...) { if (wDebugFlag & DEBUG_DEBUG) { taosPrintLog("WAL ", wDebugFlag, __VA_ARGS__); }}
#define wTrace(...) { if (wDebugFlag & DEBUG_TRACE) { taosPrintLog("WAL ", wDebugFlag, __VA_ARGS__); }}
#define WAL_PREFIX "wal"
#define WAL_PREFIX_LEN 3
#define WAL_REFRESH_MS 1000
#define WAL_MAX_SIZE (TSDB_MAX_WAL_SIZE + sizeof(SWalHead) + 16)
#define WAL_SIGNATURE ((uint32_t)(0xFAFBFDFE))
#define WAL_PATH_LEN (TSDB_FILENAME_LEN + 12)
#define WAL_FILE_LEN (WAL_PATH_LEN + 32)
#define WAL_FILE_NUM 1 // 3
typedef struct {
uint64_t version;
int64_t fileId;
int64_t rid;
int64_t tfd;
int32_t vgId;
int32_t keep;
int32_t level;
int32_t fsyncPeriod;
int32_t fsyncSeq;
int8_t stop;
int8_t reserved[3];
char path[WAL_PATH_LEN];
char name[WAL_FILE_LEN];
pthread_mutex_t mutex;
} SWal;
int32_t walGetNextFile(SWal *pWal, int64_t *nextFileId);
int32_t walGetOldFile(SWal *pWal, int64_t curFileId, int32_t minDiff, int64_t *oldFileId);
int32_t walGetNewFile(SWal *pWal, int64_t *newFileId);
#ifdef __cplusplus
}
#endif
#endif
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//#define _DEFAULT_SOURCE
#include "os.h"
#include "tutil.h"
#include "tglobal.h"
#include "tlog.h"
#include "twal.h"
#include "tfile.h"
int64_t ver = 0;
void *pWal = NULL;
int writeToQueue(void *pVnode, void *data, int type, void *pMsg) {
// do nothing
SWalHead *pHead = data;
if (pHead->version > ver)
ver = pHead->version;
walWrite(pWal, pHead);
return 0;
}
int main(int argc, char *argv[]) {
char path[128] = "/tmp/wal";
int level = 2;
int total = 5;
int rows = 10000;
int size = 128;
int keep = 0;
for (int i=1; i<argc; ++i) {
if (strcmp(argv[i], "-p")==0 && i < argc-1) {
tstrncpy(path, argv[++i], sizeof(path));
} else if (strcmp(argv[i], "-l")==0 && i < argc-1) {
level = atoi(argv[++i]);
} else if (strcmp(argv[i], "-r")==0 && i < argc-1) {
rows = atoi(argv[++i]);
} else if (strcmp(argv[i], "-k")==0 && i < argc-1) {
keep = atoi(argv[++i]);
} else if (strcmp(argv[i], "-t")==0 && i < argc-1) {
total = atoi(argv[++i]);
} else if (strcmp(argv[i], "-s")==0 && i < argc-1) {
size = atoi(argv[++i]);
} else if (strcmp(argv[i], "-v")==0 && i < argc-1) {
ver = atoll(argv[++i]);
} else if (strcmp(argv[i], "-d")==0 && i < argc-1) {
dDebugFlag = atoi(argv[++i]);
} else {
printf("\nusage: %s [options] \n", argv[0]);
printf(" [-p path]: wal file path default is:%s\n", path);
printf(" [-l level]: log level, default is:%d\n", level);
printf(" [-t total]: total wal files, default is:%d\n", total);
printf(" [-r rows]: rows of records per wal file, default is:%d\n", rows);
printf(" [-k keep]: keep the wal after closing, default is:%d\n", keep);
printf(" [-v version]: initial version, default is:%" PRId64 "\n", ver);
printf(" [-d debugFlag]: debug flag, default:%d\n", dDebugFlag);
printf(" [-h help]: print out this help\n\n");
exit(0);
}
}
taosInitLog("wal.log", 100000, 10);
tfInit();
walInit();
SWalCfg walCfg = {0};
walCfg.walLevel = level;
walCfg.keep = keep;
pWal = walOpen(path, &walCfg);
if (pWal == NULL) {
printf("failed to open wal\n");
exit(-1);
}
int ret = walRestore(pWal, NULL, writeToQueue);
if (ret <0) {
printf("failed to restore wal\n");
exit(-1);
}
printf("version starts from:%" PRId64 "\n", ver);
int contLen = sizeof(SWalHead) + size;
SWalHead *pHead = (SWalHead *) malloc(contLen);
for (int i=0; i<total; ++i) {
for (int k=0; k<rows; ++k) {
pHead->version = ++ver;
pHead->len = size;
walWrite(pWal, pHead);
}
printf("renew a wal, i:%d\n", i);
walRenew(pWal);
}
printf("%d wal files are written\n", total);
int64_t index = 0;
char name[256];
while (1) {
int code = walGetWalFile(pWal, name, &index);
if (code == -1) {
printf("failed to get wal file, index:%" PRId64 "\n", index);
break;
}
printf("index:%" PRId64 " wal:%s\n", index, name);
if (code == 0) break;
}
getchar();
walClose(pWal);
walCleanUp();
tfCleanup();
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册