From 28341e3182f58cc34ffbd854fb1d068072b2d7d9 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Sun, 10 Oct 2021 21:36:32 +0800 Subject: [PATCH] [TD-10432] add vnode files to vnode module --- include/common/taosmsg.h | 16 + include/common/tglobal.h | 1 + source/common/src/tglobal.c | 1 + source/server/vnode/CMakeLists.txt | 2 + source/server/vnode/inc/vnodeInt.h | 68 ++- {src => source/server}/vnode/inc/vnodeMain.h | 22 +- .../server/vnode/inc/vnodeMgmt.h | 11 +- .../server/vnode/inc/vnodeMgmtMsg.h | 19 +- .../server/vnode/inc/vnodeRead.h | 28 +- .../server/vnode/inc/vnodeReadMsg.h | 26 +- .../server}/vnode/inc/vnodeStatus.h | 29 +- .../server}/vnode/inc/vnodeVersion.h | 10 +- .../server}/vnode/inc/vnodeWorker.h | 15 +- .../server/vnode/inc/vnodeWrite.h | 20 +- source/server/vnode/inc/vnodeWriteMsg.h | 22 +- {src => source/server}/vnode/src/vnodeCfg.c | 21 +- source/server/vnode/src/vnodeInt.c | 37 +- {src => source/server}/vnode/src/vnodeMain.c | 516 +++++++++++++++--- source/server/vnode/src/vnodeMgmt.c | 117 ++++ source/server/vnode/src/vnodeMgmtMsg.c | 93 ++++ source/server/vnode/src/vnodeRead.c | 221 ++++++++ .../server/vnode/src/vnodeReadMsg.c | 255 ++------- .../server}/vnode/src/vnodeStatus.c | 77 +-- .../server}/vnode/src/vnodeVersion.c | 6 +- source/server/vnode/src/vnodeWorker.c | 110 ++++ source/server/vnode/src/vnodeWrite.c | 236 ++++++++ source/server/vnode/src/vnodeWriteMsg.c | 22 +- src/inc/twal.h | 74 --- src/raft/CMakeLists.txt | 7 - src/vnode/CMakeLists.txt | 14 - src/vnode/inc/vnodeInt.h | 83 --- src/vnode/inc/vnodeSync.h | 40 -- src/vnode/src/vnodeBackup.c | 174 ------ src/vnode/src/vnodeMgmt.c | 214 -------- src/vnode/src/vnodeSync.c | 180 ------ src/vnode/src/vnodeWorker.c | 206 ------- src/vnode/src/vnodeWrite.c | 408 -------------- 37 files changed, 1549 insertions(+), 1852 deletions(-) rename {src => source/server}/vnode/inc/vnodeMain.h (63%) rename src/vnode/inc/vnodeCfg.h => source/server/vnode/inc/vnodeMgmt.h (80%) rename src/vnode/inc/vnodeRead.h => source/server/vnode/inc/vnodeMgmtMsg.h (64%) rename src/vnode/inc/vnodeMgmt.h => source/server/vnode/inc/vnodeRead.h (55%) rename src/vnode/inc/vnodeWrite.h => source/server/vnode/inc/vnodeReadMsg.h (61%) rename {src => source/server}/vnode/inc/vnodeStatus.h (59%) rename {src => source/server}/vnode/inc/vnodeVersion.h (80%) rename {src => source/server}/vnode/inc/vnodeWorker.h (71%) rename src/vnode/inc/vnodeBackup.h => source/server/vnode/inc/vnodeWrite.h (60%) rename {src => source/server}/vnode/src/vnodeCfg.c (97%) rename {src => source/server}/vnode/src/vnodeMain.c (52%) create mode 100644 source/server/vnode/src/vnodeMgmt.c create mode 100644 source/server/vnode/src/vnodeMgmtMsg.c create mode 100644 source/server/vnode/src/vnodeRead.c rename src/vnode/src/vnodeRead.c => source/server/vnode/src/vnodeReadMsg.c (53%) rename {src => source/server}/vnode/src/vnodeStatus.c (61%) rename {src => source/server}/vnode/src/vnodeVersion.c (96%) create mode 100644 source/server/vnode/src/vnodeWorker.c create mode 100644 source/server/vnode/src/vnodeWrite.c delete mode 100644 src/inc/twal.h delete mode 100644 src/raft/CMakeLists.txt delete mode 100644 src/vnode/CMakeLists.txt delete mode 100644 src/vnode/inc/vnodeInt.h delete mode 100644 src/vnode/inc/vnodeSync.h delete mode 100644 src/vnode/src/vnodeBackup.c delete mode 100644 src/vnode/src/vnodeMgmt.c delete mode 100644 src/vnode/src/vnodeSync.c delete mode 100644 src/vnode/src/vnodeWorker.c delete mode 100644 src/vnode/src/vnodeWrite.c diff --git a/include/common/taosmsg.h b/include/common/taosmsg.h index 72ce3a946c..8f89df40d0 100644 --- a/include/common/taosmsg.h +++ b/include/common/taosmsg.h @@ -990,6 +990,22 @@ typedef struct { /* data */ } SAlterTableRsp; +typedef struct { + /* data */ +} SDropStableReq; + +typedef struct { + /* data */ +} SDropStableRsp; + +typedef struct { + /* data */ +} SUpdateTagValReq; + +typedef struct { + /* data */ +} SUpdateTagValRsp; + #pragma pack(pop) #ifdef __cplusplus diff --git a/include/common/tglobal.h b/include/common/tglobal.h index ffabe0d935..ad2924b1e4 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -57,6 +57,7 @@ extern int32_t tsCompressMsgSize; extern int32_t tsCompressColData; extern int32_t tsMaxNumOfDistinctResults; extern char tsTempDir[]; +extern int64_t tsMaxVnodeQueuedBytes; //query buffer management extern int32_t tsQueryBufferSize; // maximum allowed usage buffer size in MB for each data node during query processing diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 9eab95cd41..745e3ad1ee 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -60,6 +60,7 @@ float tsRatioOfQueryCores = 1.0f; int8_t tsDaylight = 0; int8_t tsEnableCoreFile = 0; int32_t tsMaxBinaryDisplayWidth = 30; +int64_t tsMaxVnodeQueuedBytes = 1024*1024*1024; //1GB /* * denote if the server needs to compress response message at the application layer to client, including query rsp, diff --git a/source/server/vnode/CMakeLists.txt b/source/server/vnode/CMakeLists.txt index 03421b2628..04bc1f698d 100644 --- a/source/server/vnode/CMakeLists.txt +++ b/source/server/vnode/CMakeLists.txt @@ -15,4 +15,6 @@ target_link_libraries( PUBLIC meta PUBLIC tq PUBLIC tsdb + PUBLIC wal + PUBLIC cjson ) \ No newline at end of file diff --git a/source/server/vnode/inc/vnodeInt.h b/source/server/vnode/inc/vnodeInt.h index 545d376f49..5f07c5819c 100644 --- a/source/server/vnode/inc/vnodeInt.h +++ b/source/server/vnode/inc/vnodeInt.h @@ -16,7 +16,6 @@ #ifndef _TD_VNODE_INT_H_ #define _TD_VNODE_INT_H_ - #include "os.h" #include "amalloc.h" #include "meta.h" @@ -25,20 +24,83 @@ #include "trpc.h" #include "tsdb.h" #include "vnode.h" +#include "tlog.h" +#include "tqueue.h" +#include "wal.h" +#include "tworker.h" #ifdef __cplusplus extern "C" { #endif -typedef struct SVnode { +extern int32_t vDebugFlag; + +#define vFatal(...) { if (vDebugFlag & DEBUG_FATAL) { taosPrintLog("VND FATAL ", 255, __VA_ARGS__); }} +#define vError(...) { if (vDebugFlag & DEBUG_ERROR) { taosPrintLog("VND ERROR ", 255, __VA_ARGS__); }} +#define vWarn(...) { if (vDebugFlag & DEBUG_WARN) { taosPrintLog("VND WARN ", 255, __VA_ARGS__); }} +#define vInfo(...) { if (vDebugFlag & DEBUG_INFO) { taosPrintLog("VND ", 255, __VA_ARGS__); }} +#define vDebug(...) { if (vDebugFlag & DEBUG_DEBUG) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }} +#define vTrace(...) { if (vDebugFlag & DEBUG_TRACE) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }} + +typedef struct { SMeta * pMeta; STsdb * pTsdb; STQ * pTQ; SMemAllocator *allocator; + + int32_t vgId; // global vnode group ID + int32_t refCount; // reference count + int64_t queuedWMsgSize; + int32_t queuedWMsg; + int32_t queuedRMsg; + int32_t numOfExistQHandle; // current initialized and existed query handle in current dnode + int32_t flowctrlLevel; + int8_t preClose; // drop and close switch + int8_t reserved[3]; + int64_t sequence; // for topic + int8_t status; + int8_t role; + int8_t accessState; + int8_t isFull; + int8_t isCommiting; + int8_t dbReplica; + int8_t dropped; + int8_t dbType; + uint64_t version; // current version + uint64_t cversion; // version while commit start + uint64_t fversion; // version on saved data file + void * wqueue; // write queue + void * qqueue; // read query queue + void * fqueue; // read fetch/cancel queue + void * wal; + void * tsdb; + int64_t sync; + void * events; + void * cq; // continuous query + int32_t dbCfgVersion; + int32_t vgCfgVersion; + STsdbCfg tsdbCfg; +#if 0 + SSyncCfg syncCfg; +#endif + SWalCfg walCfg; + void * qMgmt; + char * rootDir; + tsem_t sem; + char db[TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN]; + pthread_mutex_t statusMutex; } SVnode; +typedef struct { + int32_t len; + void * rsp; + void * qhandle; // used by query and retrieve msg +} SVnRsp; + +void vnodeGetDnodeEp(int32_t dnodeId, char *ep, char *fqdn, uint16_t *port); + #ifdef __cplusplus } #endif -#endif /*_TD_VNODE_INT_H_*/ \ No newline at end of file +#endif /*_TD_VNODE_INT_H_*/ diff --git a/src/vnode/inc/vnodeMain.h b/source/server/vnode/inc/vnodeMain.h similarity index 63% rename from src/vnode/inc/vnodeMain.h rename to source/server/vnode/inc/vnodeMain.h index 91a5d632cd..093d07b013 100644 --- a/src/vnode/inc/vnodeMain.h +++ b/source/server/vnode/inc/vnodeMain.h @@ -13,25 +13,35 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODE_MAIN_H -#define TDENGINE_VNODE_MAIN_H +#ifndef _TD_VNODE_MAIN_H_ +#define _TD_VNODE_MAIN_H_ #ifdef __cplusplus extern "C" { #endif #include "vnodeInt.h" +int32_t vnodeInitMain(); +void vnodeCleanupMain(); + int32_t vnodeCreate(SCreateVnodeMsg *pVnodeCfg); int32_t vnodeDrop(int32_t vgId); int32_t vnodeOpen(int32_t vgId); -int32_t vnodeAlter(void *pVnode, SCreateVnodeMsg *pVnodeCfg); +int32_t vnodeAlter(SVnode *pVnode, SCreateVnodeMsg *pVnodeCfg); int32_t vnodeSync(int32_t vgId); int32_t vnodeClose(int32_t vgId); -void vnodeCleanUp(SVnodeObj *pVnode); -void vnodeDestroy(SVnodeObj *pVnode); +void vnodeCleanUp(SVnode *pVnode); +void vnodeDestroy(SVnode *pVnode); +int32_t vnodeCompact(int32_t vgId); +void vnodeBackup(int32_t vgId); +void vnodeGetStatus(struct SStatusMsg *status); + +SVnode *vnodeAcquire(int32_t vgId); +SVnode *vnodeAcquireNotClose(int32_t vgId); +void vnodeRelease(SVnode *pVnode); #ifdef __cplusplus } #endif -#endif +#endif /*_TD_VNODE_MAIN_H_*/ diff --git a/src/vnode/inc/vnodeCfg.h b/source/server/vnode/inc/vnodeMgmt.h similarity index 80% rename from src/vnode/inc/vnodeCfg.h rename to source/server/vnode/inc/vnodeMgmt.h index ba148c07c1..23dc826db7 100644 --- a/src/vnode/inc/vnodeCfg.h +++ b/source/server/vnode/inc/vnodeMgmt.h @@ -13,19 +13,20 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODE_CFG_H -#define TDENGINE_VNODE_CFG_H +#ifndef _TD_VNODE_MGMT_H_ +#define _TD_VNODE_MGMT_H_ #ifdef __cplusplus extern "C" { #endif #include "vnodeInt.h" -int32_t vnodeReadCfg(SVnodeObj *pVnode); -int32_t vnodeWriteCfg(SCreateVnodeMsg *pVnodeCfg); +int32_t vnodeInitMgmt(); +void vnodeCleanupMgmt(); +void vnodeProcessMgmtMsg(SRpcMsg *pMsg); #ifdef __cplusplus } #endif -#endif +#endif /*_TD_VNODE_MGMT_H_*/ diff --git a/src/vnode/inc/vnodeRead.h b/source/server/vnode/inc/vnodeMgmtMsg.h similarity index 64% rename from src/vnode/inc/vnodeRead.h rename to source/server/vnode/inc/vnodeMgmtMsg.h index 0e9655f837..4d5533f2fe 100644 --- a/src/vnode/inc/vnodeRead.h +++ b/source/server/vnode/inc/vnodeMgmtMsg.h @@ -13,24 +13,23 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODE_READ_H -#define TDENGINE_VNODE_READ_H +#ifndef _TD_VNODE_MGMT_MSG_H_ +#define _TD_VNODE_MGMT_MSG_H_ #ifdef __cplusplus extern "C" { #endif #include "vnodeInt.h" -int32_t vnodeInitRead(void); -void vnodeCleanupRead(void); - -int32_t vnodeWriteToRQueue(void *pVnode, void *pCont, int32_t contLen, int8_t qtype, void *rparam); -void vnodeFreeFromRQueue(void *pVnode, SVReadMsg *pRead); -int32_t vnodeProcessRead(void *pVnode, SVReadMsg *pRead); -void vnodeWaitReadCompleted(SVnodeObj *pVnode); +int32_t vnodeProcessCreateVnodeMsg(SRpcMsg *rpcMsg); +int32_t vnodeProcessAlterVnodeMsg(SRpcMsg *rpcMsg); +int32_t vnodeProcessSyncVnodeMsg(SRpcMsg *rpcMsg); +int32_t vnodeProcessCompactVnodeMsg(SRpcMsg *rpcMsg); +int32_t vnodeProcessDropVnodeMsg(SRpcMsg *rpcMsg); +int32_t vnodeProcessAlterStreamReq(SRpcMsg *pMsg); #ifdef __cplusplus } #endif -#endif +#endif /*_TD_VNODE_MGMT_H_*/ diff --git a/src/vnode/inc/vnodeMgmt.h b/source/server/vnode/inc/vnodeRead.h similarity index 55% rename from src/vnode/inc/vnodeMgmt.h rename to source/server/vnode/inc/vnodeRead.h index 5a7e745619..e5efae3d93 100644 --- a/src/vnode/inc/vnodeMgmt.h +++ b/source/server/vnode/inc/vnodeRead.h @@ -13,30 +13,30 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODE_MGMT_H -#define TDENGINE_VNODE_MGMT_H +#ifndef _TD_VNODE_READ_H_ +#define _TD_VNODE_READ_H_ #ifdef __cplusplus extern "C" { #endif #include "vnodeInt.h" -int32_t vnodeInitMgmt(); -void vnodeCleanupMgmt(); +int32_t vnodeInitRead(); +void vnodeCleanupRead(); +taos_queue vnodeAllocQueryQueue(SVnode *pVnode); +taos_queue vnodeAllocFetchQueue(SVnode *pVnode); +void vnodeFreeQueryQueue(taos_queue pQueue); +void vnodeFreeFetchQueue(taos_queue pQueue); -void* vnodeAcquire(int32_t vgId); -void vnodeRelease(void *pVnode); -void* vnodeGetWal(void *pVnode); +void vnodeProcessReadMsg(SRpcMsg *pRpcMsg); +int32_t vnodeReputPutToRQueue(SVnode *pVnode, void **qhandle, void *ahandle); -int32_t vnodeGetVnodeList(int32_t vnodeList[], int32_t *numOfVnodes); -void vnodeBuildStatusMsg(void *pStatus); -void vnodeSetAccess(SVgroupAccess *pAccess, int32_t numOfVnodes); - -void vnodeAddIntoHash(SVnodeObj* pVnode); -void vnodeRemoveFromHash(SVnodeObj * pVnode); +void vnodeStartRead(SVnode *pVnode); +void vnodeStopRead(SVnode *pVnode); +void vnodeWaitReadCompleted(SVnode *pVnode); #ifdef __cplusplus } #endif -#endif +#endif /*_TD_VNODE_READ_H_*/ diff --git a/src/vnode/inc/vnodeWrite.h b/source/server/vnode/inc/vnodeReadMsg.h similarity index 61% rename from src/vnode/inc/vnodeWrite.h rename to source/server/vnode/inc/vnodeReadMsg.h index e996bc0b06..a1efb729e1 100644 --- a/src/vnode/inc/vnodeWrite.h +++ b/source/server/vnode/inc/vnodeReadMsg.h @@ -13,24 +13,32 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODE_WRITE_H -#define TDENGINE_VNODE_WRITE_H +#ifndef _TD_VNODE_READ_MSG_H_ +#define _TD_VNODE_READ_MSG_H_ #ifdef __cplusplus extern "C" { #endif #include "vnodeInt.h" -int32_t vnodeInitWrite(void); -void vnodeCleanupWrite(void); +typedef struct SReadMsg { + int32_t code; + int32_t contLen; + int8_t qtype; + int8_t msgType; + SVnode *pVnode; + SVnRsp rspRet; + void * rpcHandle; + void * rpcAhandle; + void * qhandle; + char pCont[]; +} SReadMsg; -int32_t vnodeWriteToWQueue(void *pVnode, void *pHead, int32_t qtype, void *pRpcMsg); -void vnodeFreeFromWQueue(void *pVnode, SVWriteMsg *pWrite); -int32_t vnodeProcessWrite(void *pVnode, void *pHead, int32_t qtype, void *pRspRet); -void vnodeWaitWriteCompleted(SVnodeObj *pVnode); +int32_t vnodeProcessQueryMsg(SVnode *pVnode, SReadMsg *pRead); +int32_t vnodeProcessFetchMsg(SVnode *pVnode, SReadMsg *pRead); #ifdef __cplusplus } #endif -#endif \ No newline at end of file +#endif /*_TD_VNODE_READ_MSG_H_*/ diff --git a/src/vnode/inc/vnodeStatus.h b/source/server/vnode/inc/vnodeStatus.h similarity index 59% rename from src/vnode/inc/vnodeStatus.h rename to source/server/vnode/inc/vnodeStatus.h index 910a6d71b2..c7f1b4c96d 100644 --- a/src/vnode/inc/vnodeStatus.h +++ b/source/server/vnode/inc/vnodeStatus.h @@ -13,8 +13,8 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODE_STATUS_H -#define TDENGINE_VNODE_STATUS_H +#ifndef _TD_VNODE_STATUS_H_ +#define _TD_VNODE_STATUS_H_ #ifdef __cplusplus extern "C" { @@ -25,24 +25,23 @@ typedef enum _VN_STATUS { TAOS_VN_STATUS_INIT = 0, TAOS_VN_STATUS_READY = 1, TAOS_VN_STATUS_CLOSING = 2, - TAOS_VN_STATUS_UPDATING = 3, - TAOS_VN_STATUS_RESET = 4, + TAOS_VN_STATUS_UPDATING = 3 } EVnodeStatus; -bool vnodeSetInitStatus(SVnodeObj* pVnode); -bool vnodeSetReadyStatus(SVnodeObj* pVnode); -bool vnodeSetClosingStatus(SVnodeObj* pVnode); -bool vnodeSetUpdatingStatus(SVnodeObj* pVnode); -bool vnodeSetResetStatus(SVnodeObj* pVnode); +// vnodeStatus +extern char* vnodeStatus[]; -bool vnodeInInitStatus(SVnodeObj* pVnode); -bool vnodeInReadyStatus(SVnodeObj* pVnode); -bool vnodeInReadyOrUpdatingStatus(SVnodeObj* pVnode); -bool vnodeInClosingStatus(SVnodeObj* pVnode); -bool vnodeInResetStatus(SVnodeObj* pVnode); +bool vnodeSetInitStatus(SVnode* pVnode); +bool vnodeSetReadyStatus(SVnode* pVnode); +bool vnodeSetClosingStatus(SVnode* pVnode); +bool vnodeSetUpdatingStatus(SVnode* pVnode); + +bool vnodeInInitStatus(SVnode* pVnode); +bool vnodeInReadyStatus(SVnode* pVnode); +bool vnodeInClosingStatus(SVnode* pVnode); #ifdef __cplusplus } #endif -#endif \ No newline at end of file +#endif /*_TD_VNODE_STATUS_H_*/ \ No newline at end of file diff --git a/src/vnode/inc/vnodeVersion.h b/source/server/vnode/inc/vnodeVersion.h similarity index 80% rename from src/vnode/inc/vnodeVersion.h rename to source/server/vnode/inc/vnodeVersion.h index 913e3915ab..81e6758559 100644 --- a/src/vnode/inc/vnodeVersion.h +++ b/source/server/vnode/inc/vnodeVersion.h @@ -13,19 +13,19 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODE_VERSION_H -#define TDENGINE_VNODE_VERSION_H +#ifndef _TD_VNODE_VERSION_H_ +#define _TD_VNODE_VERSION_H_ #ifdef __cplusplus extern "C" { #endif #include "vnodeInt.h" -int32_t vnodeReadVersion(SVnodeObj *pVnode); -int32_t vnodeSaveVersion(SVnodeObj *pVnode); +int32_t vnodeReadVersion(SVnode *pVnode); +int32_t vnodeSaveVersion(SVnode *pVnode); #ifdef __cplusplus } #endif -#endif +#endif /*_TD_VNODE_VERSION_H_*/ diff --git a/src/vnode/inc/vnodeWorker.h b/source/server/vnode/inc/vnodeWorker.h similarity index 71% rename from src/vnode/inc/vnodeWorker.h rename to source/server/vnode/inc/vnodeWorker.h index 01d9d42900..eea35011a8 100644 --- a/src/vnode/inc/vnodeWorker.h +++ b/source/server/vnode/inc/vnodeWorker.h @@ -13,21 +13,22 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODE_WORKER_H -#define TDENGINE_VNODE_WORKER_H +#ifndef _TD_VNODE_WORKER_H_ +#define _TD_VNODE_WORKER_H_ #ifdef __cplusplus extern "C" { #endif #include "vnodeInt.h" -int32_t vnodeInitMWorker(); -void vnodeCleanupMWorker(); -int32_t vnodeCleanupInMWorker(SVnodeObj *pVnode); -int32_t vnodeDestroyInMWorker(SVnodeObj *pVnode); +int32_t vnodeInitWorker(); +void vnodeCleanupWorker(); +void vnodeProcessCleanupTask(SVnode *pVnode); +void vnodeProcessDestroyTask(SVnode *pVnode); +void vnodeProcessBackupTask(SVnode *pVnode); #ifdef __cplusplus } #endif -#endif \ No newline at end of file +#endif /*_TD_VNODE_WORKER_H_*/ \ No newline at end of file diff --git a/src/vnode/inc/vnodeBackup.h b/source/server/vnode/inc/vnodeWrite.h similarity index 60% rename from src/vnode/inc/vnodeBackup.h rename to source/server/vnode/inc/vnodeWrite.h index 0a6b26546c..48acf750c1 100644 --- a/src/vnode/inc/vnodeBackup.h +++ b/source/server/vnode/inc/vnodeWrite.h @@ -13,20 +13,28 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODE_BACKUP_H -#define TDENGINE_VNODE_BACKUP_H +#ifndef _TD_VNODE_WRITE_H_ +#define _TD_VNODE_WRITE_H_ #ifdef __cplusplus extern "C" { #endif #include "vnodeInt.h" -int32_t vnodeInitBackup(); -void vnodeCleanupBackup(); -int32_t vnodeBackup(int32_t vgId); +int32_t vnodeInitWrite(); +void vnodeCleanupWrite(); +taos_queue vnodeAllocWriteQueue(SVnode *pVnode); +void vnodeFreeWriteQueue(taos_queue pQueue); + +void vnodeProcessWriteMsg(SRpcMsg *pRpcMsg); +int32_t vnodeProcessWalMsg(SVnode *pVnode, SWalHead *pHead); + +void vnodeStartWrite(SVnode *pVnode); +void vnodeStopWrite(SVnode *pVnode); +void vnodeWaitWriteCompleted(SVnode *pVnode); #ifdef __cplusplus } #endif -#endif +#endif /*_TD_VNODE_WRITE_H_*/ \ No newline at end of file diff --git a/source/server/vnode/inc/vnodeWriteMsg.h b/source/server/vnode/inc/vnodeWriteMsg.h index b4430cd255..86cdba6946 100644 --- a/source/server/vnode/inc/vnodeWriteMsg.h +++ b/source/server/vnode/inc/vnodeWriteMsg.h @@ -13,9 +13,23 @@ * along with this program. If not, see . */ -#ifndef _TD_VNODE_WRITE_H_ -#define _TD_VNODE_WRITE_H_ +#ifndef _TD_VNODE_WRITE_MSG_H_ +#define _TD_VNODE_WRITE_MSG_H_ -int vnodeProcessSubmitReq(SVnode *pVnode, SSubmitReq *pReq, SSubmitRsp *pRsp); +#ifdef __cplusplus +extern "C" { +#endif +#include "vnodeInt.h" + +int32_t vnodeProcessSubmitReq(SVnode *pVnode, SSubmitReq *pReq, SSubmitRsp *pRsp); +int32_t vnodeProcessCreateTableReq(SVnode *pVnode, SCreateTableReq *pReq, SCreateTableRsp *pRsp); +int32_t vnodeProcessDropTableReq(SVnode *pVnode, SDropTableReq *pReq, SDropTableRsp *pRsp); +int32_t vnodeProcessAlterTableReq(SVnode *pVnode, SAlterTableReq *pReq, SAlterTableRsp *pRsp); +int32_t vnodeProcessDropStableReq(SVnode *pVnode, SDropStableReq *pReq, SDropStableRsp *pRsp); +int32_t vnodeProcessUpdateTagValReq(SVnode *pVnode, SUpdateTagValReq *pReq, SUpdateTagValRsp *pRsp); -#endif /*_TD_VNODE_WRITE_H_*/ \ No newline at end of file +#ifdef __cplusplus +} +#endif + +#endif /*_TD_VNODE_WRITE_MSG_H_*/ \ No newline at end of file diff --git a/src/vnode/src/vnodeCfg.c b/source/server/vnode/src/vnodeCfg.c similarity index 97% rename from src/vnode/src/vnodeCfg.c rename to source/server/vnode/src/vnodeCfg.c index 2e1d761fcf..9c01a47f8f 100644 --- a/src/vnode/src/vnodeCfg.c +++ b/source/server/vnode/src/vnodeCfg.c @@ -17,10 +17,10 @@ #include "os.h" #include "cJSON.h" #include "tglobal.h" -#include "dnode.h" #include "vnodeCfg.h" -static void vnodeLoadCfg(SVnodeObj *pVnode, SCreateVnodeMsg* vnodeMsg) { +static void vnodeLoadCfg(SVnode *pVnode, SCreateVnodeMsg *vnodeMsg) { +#if 0 tstrncpy(pVnode->db, vnodeMsg->db, sizeof(pVnode->db)); pVnode->dbCfgVersion = vnodeMsg->cfg.dbCfgVersion; pVnode->vgCfgVersion = vnodeMsg->cfg.vgCfgVersion; @@ -56,9 +56,11 @@ static void vnodeLoadCfg(SVnodeObj *pVnode, SCreateVnodeMsg* vnodeMsg) { SNodeInfo *node = &pVnode->syncCfg.nodeInfo[i]; vInfo("vgId:%d, dnode:%d, %s:%u", pVnode->vgId, node->nodeId, node->nodeFqdn, node->nodePort); } +#endif } -int32_t vnodeReadCfg(SVnodeObj *pVnode) { +int32_t vnodeReadCfg(SVnode *pVnode) { +#if 0 int32_t ret = TSDB_CODE_VND_APP_ERROR; int32_t len = 0; int maxLen = 1000; @@ -66,6 +68,7 @@ int32_t vnodeReadCfg(SVnodeObj *pVnode) { cJSON * root = NULL; FILE * fp = NULL; bool nodeChanged = false; + SCreateVnodeMsg vnodeMsg; char file[TSDB_FILENAME_LEN + 30] = {0}; @@ -286,8 +289,13 @@ int32_t vnodeReadCfg(SVnodeObj *pVnode) { } tstrncpy(node->nodeEp, nodeEp->valuestring, TSDB_EP_LEN); - bool changed = dnodeCheckEpChanged(node->nodeId, node->nodeEp); - if (changed) nodeChanged = changed; + char nodeEpStr[TSDB_EP_LEN]; + vnodeGetDnodeEp(node->nodeId, nodeEpStr, NULL, NULL); + bool changed = (strcmp(node->nodeEp, nodeEpStr) != 0); + if (changed) { + tstrncpy(node->nodeEp, nodeEpStr, TSDB_EP_LEN); + nodeChanged = changed; + } } ret = TSDB_CODE_SUCCESS; @@ -350,7 +358,7 @@ int32_t vnodeWriteCfg(SCreateVnodeMsg *pMsg) { len += snprintf(content + len, maxLen - len, " \"nodeInfos\": [{\n"); for (int32_t i = 0; i < pMsg->cfg.vgReplica; i++) { SVnodeDesc *node = &pMsg->nodes[i]; - dnodeUpdateEp(node->nodeId, node->nodeEp, NULL, NULL); + vnodeGetDnodeEp(node->nodeId, node->nodeEp, NULL, NULL); len += snprintf(content + len, maxLen - len, " \"nodeId\": %d,\n", node->nodeId); len += snprintf(content + len, maxLen - len, " \"nodeEp\": \"%s\"\n", node->nodeEp); if (i < pMsg->cfg.vgReplica - 1) { @@ -368,5 +376,6 @@ int32_t vnodeWriteCfg(SCreateVnodeMsg *pMsg) { terrno = 0; vInfo("vgId:%d, successed to write %s", pMsg->cfg.vgId, file); +#endif return TSDB_CODE_SUCCESS; } diff --git a/source/server/vnode/src/vnodeInt.c b/source/server/vnode/src/vnodeInt.c index ac14d2a756..a10c35fd98 100644 --- a/source/server/vnode/src/vnodeInt.c +++ b/source/server/vnode/src/vnodeInt.c @@ -13,16 +13,39 @@ * along with this program. If not, see . */ -#include "vnodeInt.h" +#define _DEFAULT_SOURCE +#include "os.h" +#include "tstep.h" +#include "vnodeMain.h" +#include "vnodeMgmt.h" +#include "vnodeRead.h" +#include "vnodeWorker.h" +#include "vnodeWrite.h" -int32_t vnodeInit(SVnodePara para) { return 0; } +static struct { + struct SSteps *steps; + SVnodeFp fp; +} tsVint; -void vnodeCleanup() {} +int32_t vnodeInit(SVnodePara para) { + tsVint.fp = para.fp; -int32_t vnodeGetStatistics(SVnodeStat *stat) { return 0; } + struct SSteps *steps = taosStepInit(8, NULL); + if (steps == NULL) return -1; -void vnodeGetStatus(struct SStatusMsg *status) {} + taosStepAdd(steps, "vnode-main", vnodeInitMain, vnodeCleanupMain); + taosStepAdd(steps, "vnode-worker",vnodeInitWorker, vnodeCleanupWorker); + taosStepAdd(steps, "vnode-read", vnodeInitRead, vnodeCleanupRead); + taosStepAdd(steps, "vnode-mgmt", vnodeInitMgmt, vnodeCleanupMgmt); + taosStepAdd(steps, "vnode-write", vnodeInitWrite, vnodeCleanupWrite); + // taosStepAdd(steps, "vnode-queue", tsdbInitCommitQueue, tsdbDestroyCommitQueue); -void vnodeSetAccess(struct SVgroupAccess *access, int32_t numOfVnodes) {} + tsVint.steps = steps; + return taosStepExec(tsVint.steps); +} -void vnodeProcessMsg(SRpcMsg *msg) {} +void vnodeCleanup() { taosStepCleanup(tsVint.steps); } + +void vnodeGetDnodeEp(int32_t dnodeId, char *ep, char *fqdn, uint16_t *port) { + return (*tsVint.fp.GetDnodeEp)(dnodeId, ep, fqdn, port); +} \ No newline at end of file diff --git a/src/vnode/src/vnodeMain.c b/source/server/vnode/src/vnodeMain.c similarity index 52% rename from src/vnode/src/vnodeMain.c rename to source/server/vnode/src/vnodeMain.c index c823880ae2..d9c1a88d15 100644 --- a/src/vnode/src/vnodeMain.c +++ b/source/server/vnode/src/vnodeMain.c @@ -18,30 +18,104 @@ #include "taoserror.h" #include "taosmsg.h" #include "tglobal.h" -#include "tfs.h" -#include "query.h" -#include "dnode.h" +#include "ttimer.h" +#include "thash.h" +// #include "query.h" #include "vnodeCfg.h" +#include "vnodeMain.h" +#include "vnodeMgmt.h" +#include "vnodeRead.h" #include "vnodeStatus.h" -#include "vnodeSync.h" #include "vnodeVersion.h" -#include "vnodeMgmt.h" #include "vnodeWorker.h" -#include "vnodeBackup.h" -#include "vnodeMain.h" +#include "vnodeWrite.h" + +typedef struct { + pthread_t thread; + int32_t threadIndex; + int32_t failed; + int32_t opened; + int32_t vnodeNum; + int32_t * vnodeList; +} SOpenVnodeThread; + +static struct { + void * timer; + SHashObj *hash; + int32_t openVnodes; + int32_t totalVnodes; + void (*msgFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *); +} tsVmain; + +static void vnodeIncRef(void *ptNode) { + assert(ptNode != NULL); + + SVnode **ppVnode = (SVnode **)ptNode; + assert(ppVnode); + assert(*ppVnode); + + SVnode *pVnode = *ppVnode; + atomic_add_fetch_32(&pVnode->refCount, 1); + vTrace("vgId:%d, get vnode, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); +} + +SVnode *vnodeAcquire(int32_t vgId) { + SVnode *pVnode = NULL; + +#if 0 + taosHashGetClone(tsVmain.hash, &vgId, sizeof(int32_t), vnodeIncRef, &pVnode); +#endif + if (pVnode == NULL) { + terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; + vDebug("vgId:%d, not exist", vgId); + return NULL; + } + + return pVnode; +} + +SVnode *vnodeAcquireNotClose(int32_t vgId) { + SVnode *pVnode = vnodeAcquire(vgId); + if (pVnode != NULL && pVnode->preClose == 1) { + vnodeRelease(pVnode); + terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; + vDebug("vgId:%d, not exist, pre closing", vgId); + return NULL; + } + + return pVnode; +} + +void vnodeRelease(SVnode *pVnode) { + if (pVnode == NULL) return; + + int32_t refCount = atomic_sub_fetch_32(&pVnode->refCount, 1); + int32_t vgId = pVnode->vgId; + + vTrace("vgId:%d, release vnode, refCount:%d pVnode:%p", vgId, refCount, pVnode); + assert(refCount >= 0); + + if (refCount <= 0) { + vDebug("vgId:%d, vnode will be destroyed, refCount:%d pVnode:%p", vgId, refCount, pVnode); + vnodeProcessDestroyTask(pVnode); + int32_t count = taosHashGetSize(tsVmain.hash); + vDebug("vgId:%d, vnode is destroyed, vnodes:%d", vgId, count); + } +} static int32_t vnodeProcessTsdbStatus(void *arg, int32_t status, int32_t eno); int32_t vnodeCreate(SCreateVnodeMsg *pVnodeCfg) { int32_t code; - SVnodeObj *pVnode = vnodeAcquire(pVnodeCfg->cfg.vgId); + SVnode *pVnode = vnodeAcquire(pVnodeCfg->cfg.vgId); if (pVnode != NULL) { vDebug("vgId:%d, vnode already exist, refCount:%d pVnode:%p", pVnodeCfg->cfg.vgId, pVnode->refCount, pVnode); vnodeRelease(pVnode); return TSDB_CODE_SUCCESS; } +#if 0 if (tfsMkdir("vnode") < 0) { vError("vgId:%d, failed to create vnode dir, reason:%s", pVnodeCfg->cfg.vgId, tstrerror(terrno)); return terrno; @@ -64,7 +138,7 @@ int32_t vnodeCreate(SCreateVnodeMsg *pVnodeCfg) { vError("vgId:%d, failed to create tsdb in vnode, reason:%s", pVnodeCfg->cfg.vgId, tstrerror(terrno)); return TSDB_CODE_VND_INIT_FAILED; } - +#endif vInfo("vgId:%d, vnode dir is created, walLevel:%d fsyncPeriod:%d", pVnodeCfg->cfg.vgId, pVnodeCfg->cfg.walLevel, pVnodeCfg->cfg.fsyncPeriod); code = vnodeOpen(pVnodeCfg->cfg.vgId); @@ -73,7 +147,8 @@ int32_t vnodeCreate(SCreateVnodeMsg *pVnodeCfg) { } int32_t vnodeSync(int32_t vgId) { - SVnodeObj *pVnode = vnodeAcquireNotClose(vgId); +#if 0 + SVnode *pVnode = vnodeAcquireNotClose(vgId); if (pVnode == NULL) { vDebug("vgId:%d, failed to sync, vnode not find", vgId); return TSDB_CODE_VND_INVALID_VGROUP_ID; @@ -90,13 +165,12 @@ int32_t vnodeSync(int32_t vgId) { } vnodeRelease(pVnode); - +#endif return TSDB_CODE_SUCCESS; } - int32_t vnodeDrop(int32_t vgId) { - SVnodeObj *pVnode = vnodeAcquireNotClose(vgId); + SVnode *pVnode = vnodeAcquireNotClose(vgId); if (pVnode == NULL) { vDebug("vgId:%d, failed to drop, vnode not find", vgId); return TSDB_CODE_VND_INVALID_VGROUP_ID; @@ -110,25 +184,29 @@ int32_t vnodeDrop(int32_t vgId) { pVnode->dropped = 1; vnodeRelease(pVnode); - vnodeCleanupInMWorker(pVnode); + vnodeProcessCleanupTask(pVnode); return TSDB_CODE_SUCCESS; } + int32_t vnodeCompact(int32_t vgId) { - void *pVnode = vnodeAcquire(vgId); +#if 0 + SVnode *pVnode = vnodeAcquire(vgId); if (pVnode != NULL) { vDebug("vgId:%d, compact vnode msg is received", vgId); - //not care success or not - tsdbCompact(((SVnodeObj*)pVnode)->tsdb); + // not care success or not + tsdbCompact(((SVnode *)pVnode)->tsdb); vnodeRelease(pVnode); } else { vInfo("vgId:%d, vnode not exist, can't compact it", vgId); return TSDB_CODE_VND_INVALID_VGROUP_ID; } - return TSDB_CODE_SUCCESS; +#endif + return TSDB_CODE_SUCCESS; } -static int32_t vnodeAlterImp(SVnodeObj *pVnode, SCreateVnodeMsg *pVnodeCfg) { +static int32_t vnodeAlterImp(SVnode *pVnode, SCreateVnodeMsg *pVnodeCfg) { +#if 0 STsdbCfg tsdbCfg = pVnode->tsdbCfg; SSyncCfg syncCfg = pVnode->syncCfg; int32_t dbCfgVersion = pVnode->dbCfgVersion; @@ -204,13 +282,11 @@ static int32_t vnodeAlterImp(SVnodeObj *pVnode, SCreateVnodeMsg *pVnodeCfg) { vnodeSetReadyStatus(pVnode); } - +#endif return 0; } -int32_t vnodeAlter(void *vparam, SCreateVnodeMsg *pVnodeCfg) { - SVnodeObj *pVnode = vparam; - +int32_t vnodeAlter(SVnode *pVnode, SCreateVnodeMsg *pVnodeCfg) { vDebug("vgId:%d, current dbCfgVersion:%d vgCfgVersion:%d, input dbCfgVersion:%d vgCfgVersion:%d", pVnode->vgId, pVnode->dbCfgVersion, pVnode->vgCfgVersion, pVnodeCfg->cfg.dbCfgVersion, pVnodeCfg->cfg.vgCfgVersion); @@ -231,6 +307,7 @@ int32_t vnodeAlter(void *vparam, SCreateVnodeMsg *pVnodeCfg) { } static void vnodeFindWalRootDir(int32_t vgId, char *walRootDir) { +#if 0 char vnodeDir[TSDB_FILENAME_LEN] = "\0"; snprintf(vnodeDir, TSDB_FILENAME_LEN, "/vnode/vnode%d/wal", vgId); @@ -246,15 +323,17 @@ static void vnodeFindWalRootDir(int32_t vgId, char *walRootDir) { sprintf(walRootDir, "%s/vnode/vnode%d", TFS_DISK_PATH(tfile->level, tfile->id), vgId); tfsClosedir(tdir); +#endif } int32_t vnodeOpen(int32_t vgId) { +#if 0 char temp[TSDB_FILENAME_LEN * 3]; char rootDir[TSDB_FILENAME_LEN * 2]; char walRootDir[TSDB_FILENAME_LEN * 2] = {0}; snprintf(rootDir, TSDB_FILENAME_LEN * 2, "%s/vnode%d", tsVnodeDir, vgId); - SVnodeObj *pVnode = calloc(sizeof(SVnodeObj), 1); + SVnode *pVnode = calloc(sizeof(SVnode), 1); if (pVnode == NULL) { vError("vgId:%d, failed to open vnode since no enough memory", vgId); return TAOS_SYSTEM_ERROR(errno); @@ -262,9 +341,9 @@ int32_t vnodeOpen(int32_t vgId) { atomic_add_fetch_32(&pVnode->refCount, 1); - pVnode->vgId = vgId; + pVnode->vgId = vgId; pVnode->fversion = 0; - pVnode->version = 0; + pVnode->version = 0; pVnode->tsdbCfg.tsdbId = pVnode->vgId; pVnode->rootDir = strdup(rootDir); pVnode->accessState = TSDB_VN_ALL_ACCCESS; @@ -279,7 +358,7 @@ int32_t vnodeOpen(int32_t vgId) { vError("vgId:%d, failed to read config file, set cfgVersion to 0", pVnode->vgId); vnodeCleanUp(pVnode); return 0; - } + } code = vnodeReadVersion(pVnode); if (code != TSDB_CODE_SUCCESS) { @@ -291,29 +370,15 @@ int32_t vnodeOpen(int32_t vgId) { } pVnode->fversion = pVnode->version; - - pVnode->wqueue = dnodeAllocVWriteQueue(pVnode); - pVnode->qqueue = dnodeAllocVQueryQueue(pVnode); - pVnode->fqueue = dnodeAllocVFetchQueue(pVnode); + + pVnode->wqueue = vnodeAllocWriteQueue(pVnode); + pVnode->qqueue = vnodeAllocQueryQueue(pVnode); + pVnode->fqueue = vnodeAllocFetchQueue(pVnode); if (pVnode->wqueue == NULL || pVnode->qqueue == NULL || pVnode->fqueue == NULL) { vnodeCleanUp(pVnode); return terrno; } - if (tsEnableStream) { - SCqCfg cqCfg = {0}; - sprintf(cqCfg.user, "_root"); - strcpy(cqCfg.pass, tsInternalPass); - strcpy(cqCfg.db, pVnode->db); - cqCfg.vgId = vgId; - cqCfg.cqWrite = vnodeWriteToCache; - pVnode->cq = cqOpen(pVnode, &cqCfg); - if (pVnode->cq == NULL) { - vnodeCleanUp(pVnode); - return terrno; - } - } - STsdbAppH appH = {0}; appH.appH = (void *)pVnode; appH.notifyStatus = vnodeProcessTsdbStatus; @@ -327,8 +392,8 @@ int32_t vnodeOpen(int32_t vgId) { vnodeCleanUp(pVnode); return terrno; } else if (tsdbGetState(pVnode->tsdb) != TSDB_STATE_OK) { - vError("vgId:%d, failed to open tsdb(state: %d), replica:%d reason:%s", pVnode->vgId, - tsdbGetState(pVnode->tsdb), pVnode->syncCfg.replica, tstrerror(terrno)); + vError("vgId:%d, failed to open tsdb(state: %d), replica:%d reason:%s", pVnode->vgId, tsdbGetState(pVnode->tsdb), + pVnode->syncCfg.replica, tstrerror(terrno)); if (pVnode->syncCfg.replica <= 1) { vnodeCleanUp(pVnode); return TSDB_CODE_VND_INVALID_TSDB_STATE; @@ -355,12 +420,12 @@ int32_t vnodeOpen(int32_t vgId) { sprintf(temp, "%s/wal", walRootDir); pVnode->walCfg.vgId = pVnode->vgId; pVnode->wal = walOpen(temp, &pVnode->walCfg); - if (pVnode->wal == NULL) { + if (pVnode->wal == NULL) { vnodeCleanUp(pVnode); return terrno; } - walRestore(pVnode->wal, pVnode, vnodeProcessWrite); + walRestore(pVnode->wal, pVnode, (FWalWrite)vnodeProcessWalMsg); if (pVnode->version == 0) { pVnode->fversion = 0; pVnode->version = walGetVersion(pVnode->wal); @@ -386,40 +451,16 @@ int32_t vnodeOpen(int32_t vgId) { vDebug("vgId:%d, vnode is opened in %s - %s, pVnode:%p", pVnode->vgId, rootDir, walRootDir, pVnode); - vnodeAddIntoHash(pVnode); - - SSyncInfo syncInfo; - syncInfo.vgId = pVnode->vgId; - syncInfo.version = pVnode->version; - syncInfo.syncCfg = pVnode->syncCfg; - tstrncpy(syncInfo.path, walRootDir, TSDB_FILENAME_LEN); - syncInfo.getWalInfoFp = vnodeGetWalInfo; - syncInfo.writeToCacheFp = vnodeWriteToCache; - syncInfo.confirmForward = vnodeConfirmForard; - syncInfo.notifyRoleFp = vnodeNotifyRole; - syncInfo.notifyFlowCtrlFp = vnodeCtrlFlow; - syncInfo.startSyncFileFp = vnodeStartSyncFile; - syncInfo.stopSyncFileFp = vnodeStopSyncFile; - syncInfo.getVersionFp = vnodeGetVersion; - syncInfo.sendFileFp = tsdbSyncSend; - syncInfo.recvFileFp = tsdbSyncRecv; - syncInfo.pTsdb = pVnode->tsdb; - pVnode->sync = syncStart(&syncInfo); - - if (pVnode->sync <= 0) { - vError("vgId:%d, failed to open sync, replica:%d reason:%s", pVnode->vgId, pVnode->syncCfg.replica, - tstrerror(terrno)); - vnodeRemoveFromHash(pVnode); - vnodeCleanUp(pVnode); - return terrno; - } + taosHashPut(tsVmain.hash, &pVnode->vgId, sizeof(int32_t), &pVnode, sizeof(SVnode *)); vnodeSetReadyStatus(pVnode); + pVnode->role = TAOS_SYNC_ROLE_MASTER; +#endif return TSDB_CODE_SUCCESS; } int32_t vnodeClose(int32_t vgId) { - SVnodeObj *pVnode = vnodeAcquireNotClose(vgId); + SVnode *pVnode = vnodeAcquireNotClose(vgId); if (pVnode == NULL) return 0; if (pVnode->dropped) { vnodeRelease(pVnode); @@ -435,10 +476,11 @@ int32_t vnodeClose(int32_t vgId) { return 0; } -void vnodeDestroy(SVnodeObj *pVnode) { +void vnodeDestroy(SVnode *pVnode) { +#if 0 int32_t code = 0; int32_t vgId = pVnode->vgId; - + if (pVnode->qMgmt) { qCleanupQueryMgmt(pVnode->qMgmt); pVnode->qMgmt = NULL; @@ -475,47 +517,49 @@ void vnodeDestroy(SVnodeObj *pVnode) { } if (pVnode->wqueue) { - dnodeFreeVWriteQueue(pVnode->wqueue); + vnodeFreeWriteQueue(pVnode->wqueue); pVnode->wqueue = NULL; } if (pVnode->qqueue) { - dnodeFreeVQueryQueue(pVnode->qqueue); + vnodeFreeQueryQueue(pVnode->qqueue); pVnode->qqueue = NULL; } if (pVnode->fqueue) { - dnodeFreeVFetchQueue(pVnode->fqueue); + vnodeFreeFetchQueue(pVnode->fqueue); pVnode->fqueue = NULL; } tfree(pVnode->rootDir); if (pVnode->dropped) { - char rootDir[TSDB_FILENAME_LEN] = {0}; + char rootDir[TSDB_FILENAME_LEN] = {0}; char stagingDir[TSDB_FILENAME_LEN] = {0}; sprintf(rootDir, "%s/vnode%d", "vnode", vgId); sprintf(stagingDir, "%s/.staging/vnode%d", "vnode_bak", vgId); tfsRename(rootDir, stagingDir); - vnodeBackup(vgId); + vnodeProcessBackupTask(pVnode); - dnodeSendStatusMsgToMnode(); + // dnodeSendStatusMsgToMnode(); } tsem_destroy(&pVnode->sem); pthread_mutex_destroy(&pVnode->statusMutex); free(pVnode); tsdbDecCommitRef(vgId); +#endif } -void vnodeCleanUp(SVnodeObj *pVnode) { +void vnodeCleanUp(SVnode *pVnode) { +#if 0 vDebug("vgId:%d, vnode will cleanup, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); vnodeSetClosingStatus(pVnode); - vnodeRemoveFromHash(pVnode); + taosHashRemove(tsVmain.hash, &pVnode->vgId, sizeof(int32_t)); // stop replication module if (pVnode->sync > 0) { @@ -526,10 +570,12 @@ void vnodeCleanUp(SVnodeObj *pVnode) { vDebug("vgId:%d, vnode is cleaned, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); vnodeRelease(pVnode); +#endif } +#if 0 static int32_t vnodeProcessTsdbStatus(void *arg, int32_t status, int32_t eno) { - SVnodeObj *pVnode = arg; + SVnode *pVnode = arg; if (eno != TSDB_CODE_SUCCESS) { vError("vgId:%d, failed to commit since %s, fver:%" PRIu64 " vver:%" PRIu64, pVnode->vgId, tstrerror(eno), @@ -561,9 +607,301 @@ static int32_t vnodeProcessTsdbStatus(void *arg, int32_t status, int32_t eno) { } // timer thread callback - if(status == TSDB_STATUS_COMMIT_NOBLOCK) { + if (status == TSDB_STATUS_COMMIT_NOBLOCK) { qSolveCommitNoBlock(pVnode->tsdb, pVnode->qMgmt); } return 0; } +#endif + +static void *vnodeOpenVnode(void *param) { + SOpenVnodeThread *pThread = param; + + vDebug("thread:%d, start to open %d vnodes", pThread->threadIndex, pThread->vnodeNum); + setThreadName("vnodeOpenVnode"); + + for (int32_t v = 0; v < pThread->vnodeNum; ++v) { + int32_t vgId = pThread->vnodeList[v]; + + char stepDesc[TSDB_STEP_DESC_LEN] = {0}; + snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to restore, %d of %d have been opened", vgId, + tsVmain.openVnodes, tsVmain.totalVnodes); + // (*vnodeInst()->fp.ReportStartup)("open-vnodes", stepDesc); + + if (vnodeOpen(vgId) < 0) { + vError("vgId:%d, failed to open vnode by thread:%d", vgId, pThread->threadIndex); + pThread->failed++; + } else { + vDebug("vgId:%d, is opened by thread:%d", vgId, pThread->threadIndex); + pThread->opened++; + } + + atomic_add_fetch_32(&tsVmain.openVnodes, 1); + } + + vDebug("thread:%d, total vnodes:%d, opened:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened, + pThread->failed); + return NULL; +} + +static int32_t vnodeGetVnodeListFromDisk(int32_t vnodeList[], int32_t *numOfVnodes) { +#if 0 + DIR *dir = opendir(tsVnodeDir); + if (dir == NULL) return TSDB_CODE_DND_NO_WRITE_ACCESS; + + *numOfVnodes = 0; + struct dirent *de = NULL; + while ((de = readdir(dir)) != NULL) { + if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; + if (de->d_type & DT_DIR) { + if (strncmp("vnode", de->d_name, 5) != 0) continue; + int32_t vnode = atoi(de->d_name + 5); + if (vnode == 0) continue; + + (*numOfVnodes)++; + + if (*numOfVnodes >= TSDB_MAX_VNODES) { + vError("vgId:%d, too many vnode directory in disk, exist:%d max:%d", vnode, *numOfVnodes, TSDB_MAX_VNODES); + closedir(dir); + return TSDB_CODE_DND_TOO_MANY_VNODES; + } else { + vnodeList[*numOfVnodes - 1] = vnode; + } + } + } + closedir(dir); +#endif + return TSDB_CODE_SUCCESS; +} + +static int32_t vnodeOpenVnodes() { + int32_t vnodeList[TSDB_MAX_VNODES] = {0}; + int32_t numOfVnodes = 0; + int32_t status = vnodeGetVnodeListFromDisk(vnodeList, &numOfVnodes); + + if (status != TSDB_CODE_SUCCESS) { + vInfo("failed to get vnode list from disk since code:%d", status); + return status; + } + + tsVmain.totalVnodes = numOfVnodes; + + int32_t threadNum = tsNumOfCores; + int32_t vnodesPerThread = numOfVnodes / threadNum + 1; + + SOpenVnodeThread *threads = calloc(threadNum, sizeof(SOpenVnodeThread)); + for (int32_t t = 0; t < threadNum; ++t) { + threads[t].threadIndex = t; + threads[t].vnodeList = calloc(vnodesPerThread, sizeof(int32_t)); + } + + for (int32_t v = 0; v < numOfVnodes; ++v) { + int32_t t = v % threadNum; + SOpenVnodeThread *pThread = &threads[t]; + pThread->vnodeList[pThread->vnodeNum++] = vnodeList[v]; + } + + vInfo("start %d threads to open %d vnodes", threadNum, numOfVnodes); + + for (int32_t t = 0; t < threadNum; ++t) { + SOpenVnodeThread *pThread = &threads[t]; + if (pThread->vnodeNum == 0) continue; + + pthread_attr_t thAttr; + pthread_attr_init(&thAttr); + pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE); + if (pthread_create(&pThread->thread, &thAttr, vnodeOpenVnode, pThread) != 0) { + vError("thread:%d, failed to create thread to open vnode, reason:%s", pThread->threadIndex, strerror(errno)); + } + + pthread_attr_destroy(&thAttr); + } + + int32_t openVnodes = 0; + int32_t failedVnodes = 0; + for (int32_t t = 0; t < threadNum; ++t) { + SOpenVnodeThread *pThread = &threads[t]; + if (pThread->vnodeNum > 0 && taosCheckPthreadValid(pThread->thread)) { + pthread_join(pThread->thread, NULL); + } + openVnodes += pThread->opened; + failedVnodes += pThread->failed; + free(pThread->vnodeList); + } + + free(threads); + vInfo("there are total vnodes:%d, opened:%d", numOfVnodes, openVnodes); + + if (failedVnodes != 0) { + vError("there are total vnodes:%d, failed:%d", numOfVnodes, failedVnodes); + return -1; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t vnodeGetVnodeList(int32_t vnodeList[], int32_t *numOfVnodes) { + void *pIter = taosHashIterate(tsVmain.hash, NULL); + while (pIter) { + SVnode **pVnode = pIter; + if (*pVnode) { + (*numOfVnodes)++; + if (*numOfVnodes >= TSDB_MAX_VNODES) { + vError("vgId:%d, too many open vnodes, exist:%d max:%d", (*pVnode)->vgId, *numOfVnodes, TSDB_MAX_VNODES); + continue; + } else { + vnodeList[*numOfVnodes - 1] = (*pVnode)->vgId; + } + } + + pIter = taosHashIterate(tsVmain.hash, pIter); + } + + return TSDB_CODE_SUCCESS; +} + +static void vnodeCleanupVnodes() { + int32_t vnodeList[TSDB_MAX_VNODES] = {0}; + int32_t numOfVnodes = 0; + + int32_t code = vnodeGetVnodeList(vnodeList, &numOfVnodes); + + if (code != TSDB_CODE_SUCCESS) { + vInfo("failed to get dnode list since code %d", code); + return; + } + + for (int32_t i = 0; i < numOfVnodes; ++i) { + vnodeClose(vnodeList[i]); + } + + vInfo("total vnodes:%d are all closed", numOfVnodes); +} + +static void vnodeInitMsgFp() { + tsVmain.msgFp[TSDB_MSG_TYPE_MD_CREATE_VNODE] = vnodeProcessMgmtMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_MD_ALTER_VNODE] = vnodeProcessMgmtMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_MD_SYNC_VNODE] = vnodeProcessMgmtMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_MD_COMPACT_VNODE] = vnodeProcessMgmtMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_MD_DROP_VNODE] = vnodeProcessMgmtMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_MD_ALTER_STREAM] = vnodeProcessMgmtMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_MD_CREATE_TABLE] = vnodeProcessWriteMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_MD_DROP_TABLE] = vnodeProcessWriteMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_MD_ALTER_TABLE] = vnodeProcessWriteMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_MD_DROP_STABLE] = vnodeProcessWriteMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_SUBMIT] = vnodeProcessWriteMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_UPDATE_TAG_VAL] = vnodeProcessWriteMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_QUERY] = vnodeProcessReadMsg; + tsVmain.msgFp[TSDB_MSG_TYPE_FETCH] = vnodeProcessReadMsg; +} + +void vnodeProcessMsg(SRpcMsg *pMsg) { + if (tsVmain.msgFp[pMsg->msgType]) { + (*tsVmain.msgFp[pMsg->msgType])(pMsg); + } else { + assert(0); + } +} + +int32_t vnodeInitMain() { + vnodeInitMsgFp(); + + tsVmain.timer = taosTmrInit(100, 200, 60000, "VND-TIMER"); + if (tsVmain.timer == NULL) { + vError("failed to init vnode timer"); + return -1; + } + + tsVmain.hash = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); + if (tsVmain.hash == NULL) { + taosTmrCleanUp(tsVmain.timer); + vError("failed to init vnode mgmt"); + return -1; + } + + vInfo("vnode main is initialized"); + return vnodeOpenVnodes(); +} + +void vnodeCleanupMain() { + taosTmrCleanUp(tsVmain.timer); + tsVmain.timer = NULL; + + vnodeCleanupVnodes(); + + taosHashCleanup(tsVmain.hash); + tsVmain.hash = NULL; +} + +static void vnodeBuildVloadMsg(SVnode *pVnode, SStatusMsg *pStatus) { +#if 0 + int64_t totalStorage = 0; + int64_t compStorage = 0; + int64_t pointsWritten = 0; + + if (vnodeInClosingStatus(pVnode)) return; + if (pStatus->openVnodes >= TSDB_MAX_VNODES) return; + + if (pVnode->tsdb) { + tsdbReportStat(pVnode->tsdb, &pointsWritten, &totalStorage, &compStorage); + } + + SVnodeLoad *pLoad = &pStatus->load[pStatus->openVnodes++]; + pLoad->vgId = htonl(pVnode->vgId); + pLoad->dbCfgVersion = htonl(pVnode->dbCfgVersion); + pLoad->vgCfgVersion = htonl(pVnode->vgCfgVersion); + pLoad->totalStorage = htobe64(totalStorage); + pLoad->compStorage = htobe64(compStorage); + pLoad->pointsWritten = htobe64(pointsWritten); + pLoad->vnodeVersion = htobe64(pVnode->version); + pLoad->status = pVnode->status; + pLoad->role = pVnode->role; + pLoad->replica = pVnode->syncCfg.replica; + pLoad->compact = (pVnode->tsdb != NULL) ? tsdbGetCompactState(pVnode->tsdb) : 0; +#endif +} + +void vnodeGetStatus(struct SStatusMsg *pStatus) { + void *pIter = taosHashIterate(tsVmain.hash, NULL); + while (pIter) { + SVnode **pVnode = pIter; + if (*pVnode) { + vnodeBuildVloadMsg(*pVnode, pStatus); + } + pIter = taosHashIterate(tsVmain.hash, pIter); + } +} + +void vnodeSetAccess(struct SVgroupAccess *pAccess, int32_t numOfVnodes) { + for (int32_t i = 0; i < numOfVnodes; ++i) { + pAccess[i].vgId = htonl(pAccess[i].vgId); + SVnode *pVnode = vnodeAcquireNotClose(pAccess[i].vgId); + if (pVnode != NULL) { + pVnode->accessState = pAccess[i].accessState; + if (pVnode->accessState != TSDB_VN_ALL_ACCCESS) { + vDebug("vgId:%d, access state is set to %d", pAccess[i].vgId, pVnode->accessState); + } + vnodeRelease(pVnode); + } + } +} + +void vnodeBackup(int32_t vgId) { + char newDir[TSDB_FILENAME_LEN] = {0}; + char stagingDir[TSDB_FILENAME_LEN] = {0}; + + sprintf(newDir, "%s/vnode%d", "vnode_bak", vgId); + sprintf(stagingDir, "%s/.staging/vnode%d", "vnode_bak", vgId); + +#if 0 + if (tsEnableVnodeBak) { + tfsRmdir(newDir); + tfsRename(stagingDir, newDir); + } else { + vInfo("vgId:%d, vnode backup not enabled", vgId); + + tfsRmdir(stagingDir); + } +#endif +} diff --git a/source/server/vnode/src/vnodeMgmt.c b/source/server/vnode/src/vnodeMgmt.c new file mode 100644 index 0000000000..4158b0f6aa --- /dev/null +++ b/source/server/vnode/src/vnodeMgmt.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#define _DEFAULT_SOURCE +#include "os.h" +#include "vnodeMain.h" +#include "vnodeMgmt.h" +#include "vnodeMgmtMsg.h" + +typedef struct { + SRpcMsg rpcMsg; + char pCont[]; +} SVnMgmtMsg; + +static struct { + SWorkerPool pool; + taos_queue pQueue; + int32_t (*msgFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *); +} tsVmgmt = {0}; + +static int32_t vnodeProcessMgmtStart(void *unused, SVnMgmtMsg *pMgmt, int32_t qtype) { + SRpcMsg *pMsg = &pMgmt->rpcMsg; + int32_t msgType = pMsg->msgType; + + if (tsVmgmt.msgFp[msgType]) { + vTrace("msg:%p, ahandle:%p type:%s will be processed", pMgmt, pMsg->ahandle, taosMsg[msgType]); + return (*tsVmgmt.msgFp[msgType])(pMsg); + } else { + vError("msg:%p, ahandle:%p type:%s not processed since no handle", pMgmt, pMsg->ahandle, taosMsg[msgType]); + return TSDB_CODE_DND_MSG_NOT_PROCESSED; + } +} + +static void vnodeSendMgmtEnd(void *unused, SVnMgmtMsg *pMgmt, int32_t qtype, int32_t code) { + SRpcMsg *pMsg = &pMgmt->rpcMsg; + SRpcMsg rsp = {0}; + + rsp.code = code; + vTrace("msg:%p, is processed, code:0x%x", pMgmt, rsp.code); + if (rsp.code != TSDB_CODE_DND_ACTION_IN_PROGRESS) { + rsp.handle = pMsg->handle; + rsp.pCont = NULL; + rpcSendResponse(&rsp); + } + + taosFreeQitem(pMsg); +} + +static void vnodeInitMgmtReqFp() { + tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_CREATE_VNODE] = vnodeProcessCreateVnodeMsg; + tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_ALTER_VNODE] = vnodeProcessAlterVnodeMsg; + tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_SYNC_VNODE] = vnodeProcessSyncVnodeMsg; + tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_COMPACT_VNODE]= vnodeProcessCompactVnodeMsg; + tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_DROP_VNODE] = vnodeProcessDropVnodeMsg; + tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_ALTER_STREAM] = vnodeProcessAlterStreamReq; +} + +static int32_t vnodeWriteToMgmtQueue(SRpcMsg *pMsg) { + int32_t size = sizeof(SVnMgmtMsg) + pMsg->contLen; + SVnMgmtMsg *pMgmt = taosAllocateQitem(size); + if (pMgmt == NULL) return TSDB_CODE_DND_OUT_OF_MEMORY; + + pMgmt->rpcMsg = *pMsg; + pMgmt->rpcMsg.pCont = pMgmt->pCont; + memcpy(pMgmt->pCont, pMsg->pCont, pMsg->contLen); + taosWriteQitem(tsVmgmt.pQueue, TAOS_QTYPE_RPC, pMgmt); + + return TSDB_CODE_SUCCESS; +} + +void vnodeProcessMgmtMsg(SRpcMsg *pMsg) { + int32_t code = vnodeWriteToMgmtQueue(pMsg); + if (code != TSDB_CODE_SUCCESS) { + SRpcMsg rsp = {.handle = pMsg->handle, .code = code}; + rpcSendResponse(&rsp); + } + + rpcFreeCont(pMsg->pCont); +} + +int32_t vnodeInitMgmt() { + vnodeInitMgmtReqFp(); + + SWorkerPool *pPool = &tsVmgmt.pool; + pPool->name = "vmgmt"; + pPool->startFp = (ProcessStartFp)vnodeProcessMgmtStart; + pPool->endFp = (ProcessEndFp)vnodeSendMgmtEnd; + pPool->min = 1; + pPool->max = 1; + if (tWorkerInit(pPool) != 0) { + return TSDB_CODE_VND_OUT_OF_MEMORY; + } + + tsVmgmt.pQueue = tWorkerAllocQueue(pPool, NULL); + + vInfo("vmgmt is initialized, max worker %d", pPool->max); + return TSDB_CODE_SUCCESS; +} + +void vnodeCleanupMgmt() { + tWorkerFreeQueue(&tsVmgmt.pool, tsVmgmt.pQueue); + tWorkerCleanup(&tsVmgmt.pool); + tsVmgmt.pQueue = NULL; + vInfo("vmgmt is closed"); +} diff --git a/source/server/vnode/src/vnodeMgmtMsg.c b/source/server/vnode/src/vnodeMgmtMsg.c new file mode 100644 index 0000000000..d67fa11ece --- /dev/null +++ b/source/server/vnode/src/vnodeMgmtMsg.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#define _DEFAULT_SOURCE +#include "os.h" +#include "vnodeMain.h" +#include "vnodeMgmtMsg.h" + +static SCreateVnodeMsg* vnodeParseVnodeMsg(SRpcMsg *rpcMsg) { + SCreateVnodeMsg *pCreate = rpcMsg->pCont; + pCreate->cfg.vgId = htonl(pCreate->cfg.vgId); + pCreate->cfg.dbCfgVersion = htonl(pCreate->cfg.dbCfgVersion); + pCreate->cfg.vgCfgVersion = htonl(pCreate->cfg.vgCfgVersion); + pCreate->cfg.maxTables = htonl(pCreate->cfg.maxTables); + pCreate->cfg.cacheBlockSize = htonl(pCreate->cfg.cacheBlockSize); + pCreate->cfg.totalBlocks = htonl(pCreate->cfg.totalBlocks); + pCreate->cfg.daysPerFile = htonl(pCreate->cfg.daysPerFile); + pCreate->cfg.daysToKeep1 = htonl(pCreate->cfg.daysToKeep1); + pCreate->cfg.daysToKeep2 = htonl(pCreate->cfg.daysToKeep2); + pCreate->cfg.daysToKeep = htonl(pCreate->cfg.daysToKeep); + pCreate->cfg.minRowsPerFileBlock = htonl(pCreate->cfg.minRowsPerFileBlock); + pCreate->cfg.maxRowsPerFileBlock = htonl(pCreate->cfg.maxRowsPerFileBlock); + pCreate->cfg.fsyncPeriod = htonl(pCreate->cfg.fsyncPeriod); + pCreate->cfg.commitTime = htonl(pCreate->cfg.commitTime); + + for (int32_t j = 0; j < pCreate->cfg.vgReplica; ++j) { + pCreate->nodes[j].nodeId = htonl(pCreate->nodes[j].nodeId); + } + + return pCreate; +} + +int32_t vnodeProcessCreateVnodeMsg(SRpcMsg *rpcMsg) { + SCreateVnodeMsg *pCreate = vnodeParseVnodeMsg(rpcMsg); + SVnode *pVnode = vnodeAcquire(pCreate->cfg.vgId); + if (pVnode != NULL) { + vDebug("vgId:%d, already exist, return success", pCreate->cfg.vgId); + vnodeRelease(pVnode); + return TSDB_CODE_SUCCESS; + } else { + vDebug("vgId:%d, create vnode msg is received", pCreate->cfg.vgId); + return vnodeCreate(pCreate); + } +} + +int32_t vnodeProcessAlterVnodeMsg(SRpcMsg *rpcMsg) { + SAlterVnodeMsg *pAlter = vnodeParseVnodeMsg(rpcMsg); + + void *pVnode = vnodeAcquireNotClose(pAlter->cfg.vgId); + if (pVnode != NULL) { + vDebug("vgId:%d, alter vnode msg is received", pAlter->cfg.vgId); + int32_t code = vnodeAlter(pVnode, pAlter); + vnodeRelease(pVnode); + return code; + } else { + vInfo("vgId:%d, vnode not exist, can't alter it", pAlter->cfg.vgId); + return TSDB_CODE_VND_INVALID_VGROUP_ID; + } +} + +int32_t vnodeProcessSyncVnodeMsg(SRpcMsg *rpcMsg) { + SSyncVnodeMsg *pSyncVnode = rpcMsg->pCont; + pSyncVnode->vgId = htonl(pSyncVnode->vgId); + + return vnodeSync(pSyncVnode->vgId); +} + +int32_t vnodeProcessCompactVnodeMsg(SRpcMsg *rpcMsg) { + SCompactVnodeMsg *pCompactVnode = rpcMsg->pCont; + pCompactVnode->vgId = htonl(pCompactVnode->vgId); + return vnodeCompact(pCompactVnode->vgId); +} + +int32_t vnodeProcessDropVnodeMsg(SRpcMsg *rpcMsg) { + SDropVnodeMsg *pDrop = rpcMsg->pCont; + pDrop->vgId = htonl(pDrop->vgId); + + return vnodeDrop(pDrop->vgId); +} + +int32_t vnodeProcessAlterStreamReq(SRpcMsg *pMsg) { return 0; } diff --git a/source/server/vnode/src/vnodeRead.c b/source/server/vnode/src/vnodeRead.c new file mode 100644 index 0000000000..39b6983b7d --- /dev/null +++ b/source/server/vnode/src/vnodeRead.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#define _DEFAULT_SOURCE +#include "os.h" +#include "taosmsg.h" +#include "tglobal.h" +// #include "query.h" +#include "vnodeMain.h" +#include "vnodeRead.h" +#include "vnodeReadMsg.h" +#include "vnodeStatus.h" + +static struct { + SWorkerPool query; + SWorkerPool fetch; + int32_t (*msgFp[TSDB_MSG_TYPE_MAX])(SVnode *, struct SReadMsg *); +} tsVread = {0}; + +void vnodeStartRead(SVnode *pVnode) {} +void vnodeStopRead(SVnode *pVnode) {} + +void vnodeWaitReadCompleted(SVnode *pVnode) { + while (pVnode->queuedRMsg > 0) { + vTrace("vgId:%d, queued rmsg num:%d", pVnode->vgId, pVnode->queuedRMsg); + taosMsleep(10); + } +} + +static int32_t vnodeWriteToRQueue(SVnode *pVnode, void *pCont, int32_t contLen, int8_t qtype, SRpcMsg *pRpcMsg) { + if (pVnode->dropped) { + return TSDB_CODE_APP_NOT_READY; + } + +#if 0 + if (!((pVnode->role == TAOS_SYNC_ROLE_MASTER) || (tsEnableSlaveQuery && pVnode->role == TAOS_SYNC_ROLE_SLAVE))) { + return TSDB_CODE_APP_NOT_READY; + } +#endif + + if (!vnodeInReadyStatus(pVnode)) { + vDebug("vgId:%d, failed to write into vread queue, vnode status is %s", pVnode->vgId, vnodeStatus[pVnode->status]); + return TSDB_CODE_APP_NOT_READY; + } + + int32_t size = sizeof(SReadMsg) + contLen; + SReadMsg *pRead = taosAllocateQitem(size); + if (pRead == NULL) { + return TSDB_CODE_VND_OUT_OF_MEMORY; + } + + if (pRpcMsg != NULL) { + pRead->rpcHandle = pRpcMsg->handle; + pRead->rpcAhandle = pRpcMsg->ahandle; + pRead->msgType = pRpcMsg->msgType; + pRead->code = pRpcMsg->code; + } + + if (contLen != 0) { + pRead->contLen = contLen; + memcpy(pRead->pCont, pCont, contLen); + } else { + pRead->qhandle = pCont; + } + + pRead->qtype = qtype; + + atomic_add_fetch_32(&pVnode->refCount, 1); + atomic_add_fetch_32(&pVnode->queuedRMsg, 1); + + if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pRead->msgType == TSDB_MSG_TYPE_FETCH) { + return taosWriteQitem(pVnode->fqueue, qtype, pRead); + } else { + return taosWriteQitem(pVnode->qqueue, qtype, pRead); + } +} + +static void vnodeFreeFromRQueue(SVnode *pVnode, SReadMsg *pRead) { + atomic_sub_fetch_32(&pVnode->queuedRMsg, 1); + + taosFreeQitem(pRead); + vnodeRelease(pVnode); +} + +int32_t vnodeReputPutToRQueue(SVnode *pVnode, void **qhandle, void *ahandle) { + SRpcMsg rpcMsg = {0}; + rpcMsg.msgType = TSDB_MSG_TYPE_QUERY; + rpcMsg.ahandle = ahandle; + + int32_t code = vnodeWriteToRQueue(pVnode, qhandle, 0, TAOS_QTYPE_QUERY, &rpcMsg); + if (code == TSDB_CODE_SUCCESS) { + vTrace("QInfo:%p add to vread queue for exec query", *qhandle); + } + + return code; +} + +void vnodeProcessReadMsg(SRpcMsg *pMsg) { + int32_t queuedMsgNum = 0; + int32_t leftLen = pMsg->contLen; + int32_t code = TSDB_CODE_VND_INVALID_VGROUP_ID; + char * pCont = pMsg->pCont; + + while (leftLen > 0) { + SMsgHead *pHead = (SMsgHead *)pCont; + pHead->vgId = htonl(pHead->vgId); + pHead->contLen = htonl(pHead->contLen); + + assert(pHead->contLen > 0); + SVnode *pVnode = vnodeAcquireNotClose(pHead->vgId); + if (pVnode != NULL) { + code = vnodeWriteToRQueue(pVnode, pCont, pHead->contLen, TAOS_QTYPE_RPC, pMsg); + if (code == TSDB_CODE_SUCCESS) queuedMsgNum++; + vnodeRelease(pVnode); + } + + leftLen -= pHead->contLen; + pCont -= pHead->contLen; + } + + if (queuedMsgNum == 0) { + SRpcMsg rpcRsp = {.handle = pMsg->handle, .code = code}; + rpcSendResponse(&rpcRsp); + } + + rpcFreeCont(pMsg->pCont); +} + +static void vnodeInitReadMsgFp() { + tsVread.msgFp[TSDB_MSG_TYPE_QUERY] = vnodeProcessQueryMsg; + tsVread.msgFp[TSDB_MSG_TYPE_FETCH] = vnodeProcessFetchMsg; +} + +static int32_t vnodeProcessReadStart(SVnode *pVnode, SReadMsg *pRead, int32_t qtype) { + int32_t msgType = pRead->msgType; + if (tsVread.msgFp[msgType] == NULL) { + vDebug("vgId:%d, msgType:%s not processed, no handle", pVnode->vgId, taosMsg[msgType]); + return TSDB_CODE_VND_MSG_NOT_PROCESSED; + } else { + vTrace("msg:%p, app:%p type:%s will be processed", pRead, pRead->rpcAhandle, taosMsg[msgType]); + } + + return (*tsVread.msgFp[msgType])(pVnode, pRead); +} + +static void vnodeSendReadRsp(SReadMsg *pRead, int32_t code) { + SRpcMsg rpcRsp = { + .handle = pRead->rpcHandle, + .pCont = pRead->rspRet.rsp, + .contLen = pRead->rspRet.len, + .code = code, + }; + + rpcSendResponse(&rpcRsp); +} + +static void vnodeProcessReadEnd(SVnode *pVnode, SReadMsg *pRead, int32_t qtype, int32_t code) { + if (qtype == TAOS_QTYPE_RPC && code != TSDB_CODE_QRY_NOT_READY) { + vnodeSendReadRsp(pRead, code); + } else { + if (code == TSDB_CODE_QRY_HAS_RSP) { + vnodeSendReadRsp(pRead, pRead->code); + } else { // code == TSDB_CODE_QRY_NOT_READY, do not return msg to client + assert(pRead->rpcHandle == NULL || (pRead->rpcHandle != NULL && pRead->msgType == 5)); + } + } + + vnodeFreeFromRQueue(pVnode, pRead); +} + +int32_t vnodeInitRead() { + vnodeInitReadMsgFp(); + + int32_t maxFetchThreads = 4; + float threadsForQuery = MAX(tsNumOfCores * tsRatioOfQueryCores, 1); + + SWorkerPool *pPool = &tsVread.query; + pPool->name = "vquery"; + pPool->startFp = (ProcessStartFp)vnodeProcessReadStart; + pPool->endFp = (ProcessEndFp)vnodeProcessReadEnd; + pPool->min = (int32_t)threadsForQuery; + pPool->max = pPool->min; + if (tWorkerInit(pPool) != 0) return -1; + + pPool = &tsVread.fetch; + pPool->name = "vfetch"; + pPool->startFp = (ProcessStartFp)vnodeProcessReadStart; + pPool->endFp = (ProcessEndFp)vnodeProcessReadEnd; + pPool->min = MIN(maxFetchThreads, tsNumOfCores); + pPool->max = pPool->min; + if (tWorkerInit(pPool) != 0) return -1; + + vInfo("vread is initialized, max worker %d", pPool->max); + return 0; +} + +void vnodeCleanupRead() { + tWorkerCleanup(&tsVread.fetch); + tWorkerCleanup(&tsVread.query); + vInfo("vread is closed"); +} + +taos_queue vnodeAllocQueryQueue(SVnode *pVnode) { return tWorkerAllocQueue(&tsVread.query, pVnode); } + +taos_queue vnodeAllocFetchQueue(SVnode *pVnode) { return tWorkerAllocQueue(&tsVread.fetch, pVnode); } + +void vnodeFreeQueryQueue(taos_queue pQueue) { tWorkerFreeQueue(&tsVread.query, pQueue); } + +void vnodeFreeFetchQueue(taos_queue pQueue) { tWorkerFreeQueue(&tsVread.fetch, pQueue); } diff --git a/src/vnode/src/vnodeRead.c b/source/server/vnode/src/vnodeReadMsg.c similarity index 53% rename from src/vnode/src/vnodeRead.c rename to source/server/vnode/src/vnodeReadMsg.c index 64f87ba5ca..8a0f4b2e0f 100644 --- a/src/vnode/src/vnodeRead.c +++ b/source/server/vnode/src/vnodeReadMsg.c @@ -16,155 +16,26 @@ #define _DEFAULT_SOURCE #include "os.h" #include "taosmsg.h" -#include "tqueue.h" #include "tglobal.h" -#include "query.h" +// #include "query.h" #include "vnodeStatus.h" +#include "vnodeRead.h" +#include "vnodeReadMsg.h" -int32_t vNumOfExistedQHandle; // current initialized and existed query handle in current dnode - -static int32_t (*vnodeProcessReadMsgFp[TSDB_MSG_TYPE_MAX])(SVnodeObj *pVnode, SVReadMsg *pRead); -static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead); -static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead); - -static int32_t vnodeNotifyCurrentQhandle(void* handle, uint64_t qId, void* qhandle, int32_t vgId); - -int32_t vnodeInitRead(void) { - vnodeProcessReadMsgFp[TSDB_MSG_TYPE_QUERY] = vnodeProcessQueryMsg; - vnodeProcessReadMsgFp[TSDB_MSG_TYPE_FETCH] = vnodeProcessFetchMsg; - return 0; -} - -void vnodeCleanupRead() {} - -// -// After the fetch request enters the vnode queue, if the vnode cannot provide services, the process function are -// still required, or there will be a deadlock, so we don’t do any check here, but put the check codes before the -// request enters the queue -// -int32_t vnodeProcessRead(void *vparam, SVReadMsg *pRead) { - SVnodeObj *pVnode = vparam; - int32_t msgType = pRead->msgType; - - if (vnodeProcessReadMsgFp[msgType] == NULL) { - vDebug("vgId:%d, msgType:%s not processed, no handle", pVnode->vgId, taosMsg[msgType]); - return TSDB_CODE_VND_MSG_NOT_PROCESSED; - } - - return (*vnodeProcessReadMsgFp[msgType])(pVnode, pRead); -} - -static int32_t vnodeCheckRead(SVnodeObj *pVnode) { - if (!vnodeInReadyStatus(pVnode)) { - vDebug("vgId:%d, vnode status is %s, refCount:%d pVnode:%p", pVnode->vgId, vnodeStatus[pVnode->status], - pVnode->refCount, pVnode); - return TSDB_CODE_APP_NOT_READY; - } - - // tsdb may be in reset state - if (pVnode->tsdb == NULL) { - vDebug("vgId:%d, tsdb is null, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); - return TSDB_CODE_APP_NOT_READY; - } - - if (pVnode->role == TAOS_SYNC_ROLE_MASTER) { - return TSDB_CODE_SUCCESS; - } - - if (tsEnableSlaveQuery && pVnode->role == TAOS_SYNC_ROLE_SLAVE) { - return TSDB_CODE_SUCCESS; - } - - vDebug("vgId:%d, replica:%d role:%s, refCount:%d pVnode:%p, cant provide query service", pVnode->vgId, pVnode->syncCfg.replica, - syncRole[pVnode->role], pVnode->refCount, pVnode); - return TSDB_CODE_APP_NOT_READY; -} - -void vnodeFreeFromRQueue(void *vparam, SVReadMsg *pRead) { - SVnodeObj *pVnode = vparam; - - atomic_sub_fetch_32(&pVnode->queuedRMsg, 1); - vTrace("vgId:%d, free from vrqueue, refCount:%d queued:%d", pVnode->vgId, pVnode->refCount, pVnode->queuedRMsg); - - taosFreeQitem(pRead); - vnodeRelease(pVnode); -} - -static SVReadMsg *vnodeBuildVReadMsg(SVnodeObj *pVnode, void *pCont, int32_t contLen, int8_t qtype, SRpcMsg *pRpcMsg) { - int32_t size = sizeof(SVReadMsg) + contLen; - SVReadMsg *pRead = taosAllocateQitem(size); - if (pRead == NULL) { - terrno = TSDB_CODE_VND_OUT_OF_MEMORY; - return NULL; - } - - if (pRpcMsg != NULL) { - pRead->rpcHandle = pRpcMsg->handle; - pRead->rpcAhandle = pRpcMsg->ahandle; - pRead->msgType = pRpcMsg->msgType; - pRead->code = pRpcMsg->code; - } - - if (contLen != 0) { - pRead->contLen = contLen; - memcpy(pRead->pCont, pCont, contLen); - } else { - pRead->qhandle = pCont; - } - - pRead->qtype = qtype; - atomic_add_fetch_32(&pVnode->refCount, 1); - - return pRead; -} - -int32_t vnodeWriteToRQueue(void *vparam, void *pCont, int32_t contLen, int8_t qtype, void *rparam) { - SVnodeObj *pVnode = vparam; - if (pVnode->dropped) { - return TSDB_CODE_APP_NOT_READY; - } - - SVReadMsg *pRead = vnodeBuildVReadMsg(vparam, pCont, contLen, qtype, rparam); - if (pRead == NULL) { - assert(terrno != 0); - return terrno; - } - - int32_t code = vnodeCheckRead(pVnode); - if (code != TSDB_CODE_SUCCESS) { - taosFreeQitem(pRead); - vnodeRelease(pVnode); - return code; - } - - atomic_add_fetch_32(&pVnode->queuedRMsg, 1); - - if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pRead->msgType == TSDB_MSG_TYPE_FETCH) { - vTrace("vgId:%d, write into vfetch queue, refCount:%d queued:%d", pVnode->vgId, pVnode->refCount, - pVnode->queuedRMsg); - return taosWriteQitem(pVnode->fqueue, qtype, pRead); - } else { - vTrace("vgId:%d, write into vquery queue, refCount:%d queued:%d", pVnode->vgId, pVnode->refCount, - pVnode->queuedRMsg); - return taosWriteQitem(pVnode->qqueue, qtype, pRead); - } -} - -static int32_t vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void **qhandle, void *ahandle) { - SRpcMsg rpcMsg = {0}; - rpcMsg.msgType = TSDB_MSG_TYPE_QUERY; - rpcMsg.ahandle = ahandle; - - int32_t code = vnodeWriteToRQueue(pVnode, qhandle, 0, TAOS_QTYPE_QUERY, &rpcMsg); - if (code == TSDB_CODE_SUCCESS) { - vTrace("QInfo:%p add to vread queue for exec query", *qhandle); - } +#if 0 +// notify connection(handle) that current qhandle is created, if current connection from +// client is broken, the query needs to be killed immediately. +static int32_t vnodeNotifyCurrentQhandle(void *handle, uint64_t qId, void *qhandle, int32_t vgId) { + SRetrieveTableMsg *pMsg = rpcMallocCont(sizeof(SRetrieveTableMsg)); + pMsg->qId = htobe64(qId); + pMsg->header.vgId = htonl(vgId); + pMsg->header.contLen = htonl(sizeof(SRetrieveTableMsg)); - return code; + vTrace("QInfo:0x%" PRIx64 "-%p register qhandle to connect:%p", qId, qhandle, handle); + return rpcReportProgress(handle, (char *)pMsg, sizeof(SRetrieveTableMsg)); } /** - * * @param pRet response message object * @param pVnode the vnode object * @param handle qhandle for executing query @@ -172,14 +43,16 @@ static int32_t vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void **qhandle, void * @param ahandle sqlObj address at client side * @return */ -static int32_t vnodeDumpQueryResult(SRspRet *pRet, void *pVnode, uint64_t qId, void **handle, bool *freeHandle, void *ahandle) { +static int32_t vnodeDumpQueryResult(SVnRsp *pRet, void *pVnode, uint64_t qId, void **handle, bool *freeHandle, + void *ahandle) { bool continueExec = false; int32_t code = TSDB_CODE_SUCCESS; - if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) { + if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == + TSDB_CODE_SUCCESS) { if (continueExec) { *freeHandle = false; - code = vnodePutItemIntoReadQueue(pVnode, handle, ahandle); + code = vnodeReputPutToRQueue(pVnode, handle, ahandle); if (code != TSDB_CODE_SUCCESS) { *freeHandle = true; return code; @@ -188,7 +61,7 @@ static int32_t vnodeDumpQueryResult(SRspRet *pRet, void *pVnode, uint64_t qId, v } } else { *freeHandle = true; - vTrace("QInfo:0x%"PRIx64"-%p exec completed, free handle:%d", qId, *handle, *freeHandle); + vTrace("QInfo:0x%" PRIx64 "-%p exec completed, free handle:%d", qId, *handle, *freeHandle); } } else { SRetrieveTableRsp *pRsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); @@ -203,7 +76,7 @@ static int32_t vnodeDumpQueryResult(SRspRet *pRet, void *pVnode, uint64_t qId, v return code; } -static void vnodeBuildNoResultQueryRsp(SRspRet *pRet) { +static void vnodeBuildNoResultQueryRsp(SVnRsp *pRet) { pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); pRet->len = sizeof(SRetrieveTableRsp); @@ -212,15 +85,16 @@ static void vnodeBuildNoResultQueryRsp(SRspRet *pRet) { pRsp->completed = true; } +#endif - -static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { +int32_t vnodeProcessQueryMsg(SVnode *pVnode, SReadMsg *pRead) { +#if 0 void * pCont = pRead->pCont; int32_t contLen = pRead->contLen; - SRspRet *pRet = &pRead->rspRet; + SVnRsp *pRet = &pRead->rspRet; SQueryTableMsg *pQueryTableMsg = (SQueryTableMsg *)pCont; - memset(pRet, 0, sizeof(SRspRet)); + memset(pRet, 0, sizeof(SVnRsp)); // qHandle needs to be freed correctly if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { @@ -231,13 +105,13 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { void ** handle = NULL; if (contLen != 0) { - qinfo_t pQInfo = NULL; + qinfo_t pQInfo = NULL; uint64_t qId = genQueryId(); code = qCreateQueryInfo(pVnode->tsdb, pVnode->vgId, pQueryTableMsg, &pQInfo, qId); SQueryTableRsp *pRsp = (SQueryTableRsp *)rpcMallocCont(sizeof(SQueryTableRsp)); pRsp->code = code; - pRsp->qId = 0; + pRsp->qId = 0; pRet->len = sizeof(SQueryTableRsp); pRet->rsp = pRsp; @@ -250,8 +124,8 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { pRsp->code = terrno; terrno = 0; - vError("vgId:%d, QInfo:0x%"PRIx64 "-%p register qhandle failed, return to app, code:%s,", pVnode->vgId, qId, (void *)pQInfo, - tstrerror(pRsp->code)); + vError("vgId:%d, QInfo:0x%" PRIx64 "-%p register qhandle failed, return to app, code:%s,", pVnode->vgId, qId, + (void *)pQInfo, tstrerror(pRsp->code)); qDestroyQueryInfo(pQInfo); // destroy it directly return pRsp->code; } else { @@ -261,7 +135,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { if (handle != NULL && vnodeNotifyCurrentQhandle(pRead->rpcHandle, qId, *handle, pVnode->vgId) != TSDB_CODE_SUCCESS) { - vError("vgId:%d, QInfo:0x%"PRIx64 "-%p, query discarded since link is broken, %p", pVnode->vgId, qId, *handle, + vError("vgId:%d, QInfo:0x%" PRIx64 "-%p, query discarded since link is broken, %p", pVnode->vgId, qId, *handle, pRead->rpcHandle); pRsp->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; @@ -274,8 +148,9 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { } if (handle != NULL) { - vTrace("vgId:%d, QInfo:0x%"PRIx64 "-%p, dnode query msg disposed, create qhandle and returns to app", vgId, qId, *handle); - code = vnodePutItemIntoReadQueue(pVnode, handle, pRead->rpcHandle); + vTrace("vgId:%d, QInfo:0x%" PRIx64 "-%p, query msg disposed, create qhandle and returns to app", vgId, qId, + *handle); + code = vnodeReputPutToRQueue(pVnode, handle, pRead->rpcHandle); if (code != TSDB_CODE_SUCCESS) { pRsp->code = code; qReleaseQInfo(pVnode->qMgmt, (void **)&handle, true); @@ -283,14 +158,14 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { } } - int32_t remain = atomic_add_fetch_32(&vNumOfExistedQHandle, 1); + int32_t remain = atomic_add_fetch_32(&pVnode->numOfExistQHandle, 1); vTrace("vgId:%d, new qhandle created, total qhandle:%d", pVnode->vgId, remain); } else { assert(pCont != NULL); - void **qhandle = (void **)pRead->qhandle; + void ** qhandle = (void **)pRead->qhandle; uint64_t qId = 0; - vTrace("vgId:%d, QInfo:%p, dnode continues to exec query", pVnode->vgId, *qhandle); + vTrace("vgId:%d, QInfo:%p, continues to exec query", pVnode->vgId, *qhandle); // In the retrieve blocking model, only 50% CPU will be used in query processing if (tsRetrieveBlockingModel) { @@ -315,10 +190,11 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { // NOTE: set return code to be TSDB_CODE_QRY_HAS_RSP to notify dnode to return msg to client code = TSDB_CODE_QRY_HAS_RSP; } else { - //void *h1 = qGetResultRetrieveMsg(*qhandle); + // void *h1 = qGetResultRetrieveMsg(*qhandle); - /* remove this assert, one possible case that will cause h1 not NULL: query thread unlock pQInfo->lock, and then FETCH thread execute twice before query thread reach here */ - //assert(h1 == NULL); + /* remove this assert, one possible case that will cause h1 not NULL: query thread unlock pQInfo->lock, and then + * FETCH thread execute twice before query thread reach here */ + // assert(h1 == NULL); freehandle = qQueryCompleted(*qhandle); } @@ -327,22 +203,24 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { // If the building of result is not required, simply free it. Otherwise, mandatorily free the qhandle if (freehandle || (!buildRes)) { if (freehandle) { - int32_t remain = atomic_sub_fetch_32(&vNumOfExistedQHandle, 1); + int32_t remain = atomic_sub_fetch_32(&pVnode->numOfExistQHandle, 1); vTrace("vgId:%d, QInfo:%p, start to free qhandle, remain qhandle:%d", pVnode->vgId, *qhandle, remain); } qReleaseQInfo(pVnode->qMgmt, (void **)&qhandle, freehandle); } - } } return code; +#endif + return 0; } -static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { - void *pCont = pRead->pCont; - SRspRet *pRet = &pRead->rspRet; +int32_t vnodeProcessFetchMsg(SVnode *pVnode, SReadMsg *pRead) { +#if 0 + void * pCont = pRead->pCont; + SVnRsp *pRet = &pRead->rspRet; SRetrieveTableMsg *pRetrieve = pCont; pRetrieve->free = htons(pRetrieve->free); @@ -351,7 +229,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { vTrace("vgId:%d, qId:0x%" PRIx64 ", retrieve msg is disposed, free:%d, conn:%p", pVnode->vgId, pRetrieve->qId, pRetrieve->free, pRead->rpcHandle); - memset(pRet, 0, sizeof(SRspRet)); + memset(pRet, 0, sizeof(SVnRsp)); terrno = TSDB_CODE_SUCCESS; int32_t code = TSDB_CODE_SUCCESS; @@ -364,16 +242,17 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { } if (code != TSDB_CODE_SUCCESS) { - vError("vgId:%d, invalid qId in retrieving result, code:%s, QInfo:%" PRIu64, pVnode->vgId, tstrerror(code), pRetrieve->qId); + vError("vgId:%d, invalid qId in retrieving result, code:%s, QInfo:%" PRIu64, pVnode->vgId, tstrerror(code), + pRetrieve->qId); vnodeBuildNoResultQueryRsp(pRet); return code; } // kill current query and free corresponding resources. if (pRetrieve->free == 1) { - int32_t remain = atomic_sub_fetch_32(&vNumOfExistedQHandle, 1); - vWarn("vgId:%d, QInfo:%"PRIx64 "-%p, retrieve msg received to kill query and free qhandle, remain qhandle:%d", pVnode->vgId, pRetrieve->qId, - *handle, remain); + int32_t remain = atomic_sub_fetch_32(&pVnode->numOfExistQHandle, 1); + vWarn("vgId:%d, QInfo:%" PRIx64 "-%p, retrieve msg received to kill query and free qhandle, remain qhandle:%d", + pVnode->vgId, pRetrieve->qId, *handle, remain); qKillQuery(*handle); qReleaseQInfo(pVnode->qMgmt, (void **)&handle, true); @@ -385,9 +264,9 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { // register the qhandle to connect to quit query immediate if connection is broken if (vnodeNotifyCurrentQhandle(pRead->rpcHandle, pRetrieve->qId, *handle, pVnode->vgId) != TSDB_CODE_SUCCESS) { - int32_t remain = atomic_sub_fetch_32(&vNumOfExistedQHandle, 1); - vError("vgId:%d, QInfo:%"PRIu64 "-%p, retrieve discarded since link is broken, conn:%p, remain qhandle:%d", pVnode->vgId, pRetrieve->qhandle, - *handle, pRead->rpcHandle, remain); + int32_t remain = atomic_sub_fetch_32(&pVnode->numOfExistQHandle, 1); + vError("vgId:%d, QInfo:%" PRIu64 "-%p, retrieve discarded since link is broken, conn:%p, remain qhandle:%d", + pVnode->vgId, pRetrieve->qhandle, *handle, pRead->rpcHandle, remain); code = TSDB_CODE_RPC_NETWORK_UNAVAIL; qKillQuery(*handle); @@ -422,29 +301,13 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { // If qhandle is not added into vread queue, the query should be completed already or paused with error. // Here free qhandle immediately if (freeHandle) { - int32_t remain = atomic_sub_fetch_32(&vNumOfExistedQHandle, 1); + int32_t remain = atomic_sub_fetch_32(&pVnode->numOfExistQHandle, 1); vTrace("vgId:%d, QInfo:%p, start to free qhandle, remain qhandle:%d", pVnode->vgId, *handle, remain); qReleaseQInfo(pVnode->qMgmt, (void **)&handle, true); } return code; +#endif + return 0; } -// notify connection(handle) that current qhandle is created, if current connection from -// client is broken, the query needs to be killed immediately. -int32_t vnodeNotifyCurrentQhandle(void *handle, uint64_t qId, void *qhandle, int32_t vgId) { - SRetrieveTableMsg *pMsg = rpcMallocCont(sizeof(SRetrieveTableMsg)); - pMsg->qId = htobe64(qId); - pMsg->header.vgId = htonl(vgId); - pMsg->header.contLen = htonl(sizeof(SRetrieveTableMsg)); - - vTrace("QInfo:0x%"PRIx64"-%p register qhandle to connect:%p", qId, qhandle, handle); - return rpcReportProgress(handle, (char *)pMsg, sizeof(SRetrieveTableMsg)); -} - -void vnodeWaitReadCompleted(SVnodeObj *pVnode) { - while (pVnode->queuedRMsg > 0) { - vTrace("vgId:%d, queued rmsg num:%d", pVnode->vgId, pVnode->queuedRMsg); - taosMsleep(10); - } -} diff --git a/src/vnode/src/vnodeStatus.c b/source/server/vnode/src/vnodeStatus.c similarity index 61% rename from src/vnode/src/vnodeStatus.c rename to source/server/vnode/src/vnodeStatus.c index 1eaddc3d25..b0b3bbfa49 100644 --- a/src/vnode/src/vnodeStatus.c +++ b/source/server/vnode/src/vnodeStatus.c @@ -16,9 +16,9 @@ #define _DEFAULT_SOURCE #include "os.h" #include "taosmsg.h" -#include "query.h" -#include "vnodeStatus.h" +// #include "query.h" #include "vnodeRead.h" +#include "vnodeStatus.h" #include "vnodeWrite.h" char* vnodeStatus[] = { @@ -29,30 +29,32 @@ char* vnodeStatus[] = { "reset" }; -bool vnodeSetInitStatus(SVnodeObj* pVnode) { +bool vnodeSetInitStatus(SVnode* pVnode) { pthread_mutex_lock(&pVnode->statusMutex); pVnode->status = TAOS_VN_STATUS_INIT; pthread_mutex_unlock(&pVnode->statusMutex); return true; } -bool vnodeSetReadyStatus(SVnodeObj* pVnode) { +bool vnodeSetReadyStatus(SVnode* pVnode) { bool set = false; pthread_mutex_lock(&pVnode->statusMutex); if (pVnode->status == TAOS_VN_STATUS_INIT || pVnode->status == TAOS_VN_STATUS_READY || - pVnode->status == TAOS_VN_STATUS_UPDATING || pVnode->status == TAOS_VN_STATUS_RESET) { + pVnode->status == TAOS_VN_STATUS_UPDATING) { pVnode->status = TAOS_VN_STATUS_READY; set = true; } +#if 0 qQueryMgmtReOpen(pVnode->qMgmt); +#endif pthread_mutex_unlock(&pVnode->statusMutex); return set; } -static bool vnodeSetClosingStatusImp(SVnodeObj* pVnode) { +static bool vnodeSetClosingStatusImp(SVnode* pVnode) { bool set = false; pthread_mutex_lock(&pVnode->statusMutex); @@ -65,7 +67,7 @@ static bool vnodeSetClosingStatusImp(SVnodeObj* pVnode) { return set; } -bool vnodeSetClosingStatus(SVnodeObj* pVnode) { +bool vnodeSetClosingStatus(SVnode* pVnode) { if (pVnode->status == TAOS_VN_STATUS_CLOSING) return true; @@ -73,15 +75,17 @@ bool vnodeSetClosingStatus(SVnodeObj* pVnode) { taosMsleep(1); } +#if 0 // release local resources only after cutting off outside connections qQueryMgmtNotifyClosed(pVnode->qMgmt); +#endif vnodeWaitReadCompleted(pVnode); vnodeWaitWriteCompleted(pVnode); return true; } -bool vnodeSetUpdatingStatus(SVnodeObj* pVnode) { +bool vnodeSetUpdatingStatus(SVnode* pVnode) { bool set = false; pthread_mutex_lock(&pVnode->statusMutex); @@ -94,35 +98,7 @@ bool vnodeSetUpdatingStatus(SVnodeObj* pVnode) { return set; } -static bool vnodeSetResetStatusImp(SVnodeObj* pVnode) { - bool set = false; - pthread_mutex_lock(&pVnode->statusMutex); - - if (pVnode->status == TAOS_VN_STATUS_READY || pVnode->status == TAOS_VN_STATUS_INIT) { - pVnode->status = TAOS_VN_STATUS_RESET; - set = true; - } - - pthread_mutex_unlock(&pVnode->statusMutex); - return set; -} - -bool vnodeSetResetStatus(SVnodeObj* pVnode) { - while (!vnodeSetResetStatusImp(pVnode)) { - taosMsleep(1); - } - - vInfo("vgId:%d, set to reset status", pVnode->vgId); - - // release local resources only after cutting off outside connections - qQueryMgmtNotifyClosed(pVnode->qMgmt); - vnodeWaitReadCompleted(pVnode); - vnodeWaitWriteCompleted(pVnode); - - return true; -} - -bool vnodeInInitStatus(SVnodeObj* pVnode) { +bool vnodeInInitStatus(SVnode* pVnode) { bool in = false; pthread_mutex_lock(&pVnode->statusMutex); @@ -134,7 +110,7 @@ bool vnodeInInitStatus(SVnodeObj* pVnode) { return in; } -bool vnodeInReadyStatus(SVnodeObj* pVnode) { +bool vnodeInReadyStatus(SVnode* pVnode) { bool in = false; pthread_mutex_lock(&pVnode->statusMutex); @@ -146,19 +122,7 @@ bool vnodeInReadyStatus(SVnodeObj* pVnode) { return in; } -bool vnodeInReadyOrUpdatingStatus(SVnodeObj* pVnode) { - bool in = false; - pthread_mutex_lock(&pVnode->statusMutex); - - if (pVnode->status == TAOS_VN_STATUS_READY || pVnode->status == TAOS_VN_STATUS_UPDATING) { - in = true; - } - - pthread_mutex_unlock(&pVnode->statusMutex); - return in; -} - -bool vnodeInClosingStatus(SVnodeObj* pVnode) { +bool vnodeInClosingStatus(SVnode* pVnode) { bool in = false; pthread_mutex_lock(&pVnode->statusMutex); @@ -170,14 +134,3 @@ bool vnodeInClosingStatus(SVnodeObj* pVnode) { return in; } -bool vnodeInResetStatus(SVnodeObj* pVnode) { - bool in = false; - pthread_mutex_lock(&pVnode->statusMutex); - - if (pVnode->status == TAOS_VN_STATUS_RESET) { - in = true; - } - - pthread_mutex_unlock(&pVnode->statusMutex); - return in; -} diff --git a/src/vnode/src/vnodeVersion.c b/source/server/vnode/src/vnodeVersion.c similarity index 96% rename from src/vnode/src/vnodeVersion.c rename to source/server/vnode/src/vnodeVersion.c index d1aee5a3d3..c330533885 100644 --- a/src/vnode/src/vnodeVersion.c +++ b/source/server/vnode/src/vnodeVersion.c @@ -19,7 +19,7 @@ #include "tglobal.h" #include "vnodeVersion.h" -int32_t vnodeReadVersion(SVnodeObj *pVnode) { +int32_t vnodeReadVersion(SVnode *pVnode) { int32_t len = 0; int32_t maxLen = 100; char * content = calloc(1, maxLen + 1); @@ -71,7 +71,7 @@ PARSE_VER_ERROR: return terrno; } -int32_t vnodeSaveVersion(SVnodeObj *pVnode) { +int32_t vnodeSaveVersion(SVnode *pVnode) { char file[TSDB_FILENAME_LEN + 30] = {0}; sprintf(file, "%s/vnode%d/version.json", tsVnodeDir, pVnode->vgId); @@ -90,7 +90,7 @@ int32_t vnodeSaveVersion(SVnodeObj *pVnode) { len += snprintf(content + len, maxLen - len, "}\n"); fwrite(content, 1, len, fp); - taosFsync(fileno(fp)); + taosFsyncFile(fileno(fp)); fclose(fp); free(content); terrno = 0; diff --git a/source/server/vnode/src/vnodeWorker.c b/source/server/vnode/src/vnodeWorker.c new file mode 100644 index 0000000000..4a8a3a7049 --- /dev/null +++ b/source/server/vnode/src/vnodeWorker.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#define _DEFAULT_SOURCE +#include "os.h" +#include "vnodeMain.h" +#include "vnodeWorker.h" + +enum { CLEANUP_TASK = 0, DESTROY_TASK = 1, BACKUP_TASK = 2 }; + +typedef struct { + int32_t vgId; + int32_t code; + int32_t type; + void * rpcHandle; + SVnode *pVnode; +} SVnTask; + +static struct { + SWorkerPool pool; + taos_queue pQueue; +} tsVworker = {0}; + +static void vnodeProcessTaskStart(void *unused, SVnTask *pTask, int32_t qtype) { + pTask->code = 0; + + switch (pTask->type) { + case CLEANUP_TASK: + vnodeCleanUp(pTask->pVnode); + break; + case DESTROY_TASK: + vnodeDestroy(pTask->pVnode); + break; + case BACKUP_TASK: + vnodeBackup(pTask->vgId); + break; + default: + break; + } +} + +static void vnodeProcessTaskEnd(void *unused, SVnTask *pTask, int32_t qtype, int32_t code) { + if (pTask->rpcHandle != NULL) { + SRpcMsg rpcRsp = {.handle = pTask->rpcHandle, .code = pTask->code}; + rpcSendResponse(&rpcRsp); + } + + taosFreeQitem(pTask); +} + +static int32_t vnodeWriteIntoTaskQueue(SVnode *pVnode, int32_t type, void *rpcHandle) { + SVnTask *pTask = taosAllocateQitem(sizeof(SVnTask)); + if (pTask == NULL) return TSDB_CODE_VND_OUT_OF_MEMORY; + + pTask->vgId = pVnode->vgId; + pTask->pVnode = pVnode; + pTask->rpcHandle = rpcHandle; + pTask->type = type; + + + return taosWriteQitem(tsVworker.pQueue, TAOS_QTYPE_RPC, pTask); +} + +void vnodeProcessCleanupTask(SVnode *pVnode) { + vnodeWriteIntoTaskQueue(pVnode, CLEANUP_TASK, NULL); +} + +void vnodeProcessDestroyTask(SVnode *pVnode) { + vnodeWriteIntoTaskQueue(pVnode, DESTROY_TASK, NULL); +} + +void vnodeProcessBackupTask(SVnode *pVnode) { + vnodeWriteIntoTaskQueue(pVnode, BACKUP_TASK, NULL); +} + +int32_t vnodeInitWorker() { + SWorkerPool *pPool = &tsVworker.pool; + pPool->name = "vworker"; + pPool->startFp = (ProcessStartFp)vnodeProcessTaskStart; + pPool->endFp = (ProcessEndFp)vnodeProcessTaskEnd; + pPool->min = 0; + pPool->max = 1; + if (tWorkerInit(pPool) != 0) { + return TSDB_CODE_VND_OUT_OF_MEMORY; + } + + tsVworker.pQueue = tWorkerAllocQueue(pPool, NULL); + + vInfo("vworker is initialized, max worker %d", pPool->max); + return TSDB_CODE_SUCCESS; +} + +void vnodeCleanupWorker() { + tWorkerFreeQueue(&tsVworker.pool, tsVworker.pQueue); + tWorkerCleanup(&tsVworker.pool); + tsVworker.pQueue = NULL; + vInfo("vworker is closed"); +} diff --git a/source/server/vnode/src/vnodeWrite.c b/source/server/vnode/src/vnodeWrite.c new file mode 100644 index 0000000000..3c2634a2cf --- /dev/null +++ b/source/server/vnode/src/vnodeWrite.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#define _DEFAULT_SOURCE +#include "os.h" +#include "tglobal.h" +#include "tqueue.h" +#include "tworker.h" +#include "taosmsg.h" +#include "vnodeMain.h" +#include "vnodeStatus.h" +#include "vnodeWrite.h" +#include "vnodeWriteMsg.h" + +typedef int32_t (*WriteMsgFp)(SVnode *, void *pCont, SVnRsp *); + +typedef struct { + int32_t code; + int8_t qtype; + SVnode * pVnode; + SRpcMsg rpcMsg; + SVnRsp rspRet; + char reserveForSync[24]; + SWalHead walHead; +} SVnWriteMsg; + +static struct { + SWriteWorkerPool pool; + int64_t queuedBytes; + int32_t queuedMsgs; +} tsVwrite = {0}; + +void vnodeStartWrite(SVnode *pVnode) {} +void vnodeStoprite(SVnode *pVnode) {} + +void vnodeWaitWriteCompleted(SVnode *pVnode) { + while (pVnode->queuedWMsg > 0) { + vTrace("vgId:%d, queued wmsg num:%d", pVnode->vgId, pVnode->queuedWMsg); + taosMsleep(10); + } +} + +static int32_t vnodeWriteToWQueue(SVnode *pVnode, SWalHead *pHead, int32_t qtype, SRpcMsg *pRpcMsg) { + if (!(pVnode->accessState & TSDB_VN_WRITE_ACCCESS)) { + vWarn("vgId:%d, no write auth", pVnode->vgId); + return TSDB_CODE_VND_NO_WRITE_AUTH; + } + + if (tsAvailDataDirGB <= tsMinimalDataDirGB) { + vWarn("vgId:%d, failed to write into vwqueue since no diskspace, avail:%fGB", pVnode->vgId, tsAvailDataDirGB); + return TSDB_CODE_VND_NO_DISKSPACE; + } + + if (pHead->len > TSDB_MAX_WAL_SIZE) { + vError("vgId:%d, wal len:%d exceeds limit, hver:%" PRIu64, pVnode->vgId, pHead->len, pHead->version); + return TSDB_CODE_WAL_SIZE_LIMIT; + } + + if (!vnodeInReadyStatus(pVnode)) { + vError("vgId:%d, failed to write into vwqueue, vstatus is %s", pVnode->vgId, vnodeStatus[pVnode->status]); + return TSDB_CODE_APP_NOT_READY; + } + + if (tsVwrite.queuedBytes > tsMaxVnodeQueuedBytes) { + vDebug("vgId:%d, too many bytes:%" PRId64 " in vwqueue, flow control", pVnode->vgId, tsVwrite.queuedBytes); + return TSDB_CODE_VND_IS_FLOWCTRL; + } + + int32_t size = sizeof(SVnWriteMsg) + pHead->len; + SVnWriteMsg *pWrite = taosAllocateQitem(size); + if (pWrite == NULL) { + return TSDB_CODE_VND_OUT_OF_MEMORY; + } + + if (pRpcMsg != NULL) { + pWrite->rpcMsg = *pRpcMsg; + } + + memcpy(&pWrite->walHead, pHead, sizeof(SWalHead) + pHead->len); + pWrite->pVnode = pVnode; + pWrite->qtype = qtype; + + atomic_add_fetch_64(&tsVwrite.queuedBytes, size); + atomic_add_fetch_32(&tsVwrite.queuedMsgs, 1); + atomic_add_fetch_32(&pVnode->refCount, 1); + atomic_add_fetch_32(&pVnode->queuedWMsg, 1); + taosWriteQitem(pVnode->wqueue, pWrite->qtype, pWrite); + + return TSDB_CODE_SUCCESS; +} + +static void vnodeFreeFromWQueue(SVnode *pVnode, SVnWriteMsg *pWrite) { + int64_t size = sizeof(SVnWriteMsg) + pWrite->walHead.len; + atomic_sub_fetch_64(&tsVwrite.queuedBytes, size); + atomic_sub_fetch_32(&tsVwrite.queuedMsgs, 1); + atomic_sub_fetch_32(&pVnode->queuedWMsg, 1); + + taosFreeQitem(pWrite); + vnodeRelease(pVnode); +} + +int32_t vnodeProcessWalMsg(SVnode *pVnode, SWalHead *pHead) { + return vnodeWriteToWQueue(pVnode, pHead, TAOS_QTYPE_WAL, NULL); +} + +void vnodeProcessWriteMsg(SRpcMsg *pRpcMsg) { + int32_t code; + + SMsgHead *pMsg = pRpcMsg->pCont; + pMsg->vgId = htonl(pMsg->vgId); + pMsg->contLen = htonl(pMsg->contLen); + + SVnode *pVnode = vnodeAcquireNotClose(pMsg->vgId); + if (pVnode == NULL) { + code = TSDB_CODE_VND_INVALID_VGROUP_ID; + } else { + SWalHead *pHead = (SWalHead *)((char *)pRpcMsg->pCont - sizeof(SWalHead)); + pHead->msgType = pRpcMsg->msgType; + pHead->version = 0; + pHead->len = pMsg->contLen; + code = vnodeWriteToWQueue(pVnode, pHead, TAOS_QTYPE_RPC, pRpcMsg); + } + + if (code != TSDB_CODE_SUCCESS) { + SRpcMsg rpcRsp = {.handle = pRpcMsg->handle, .code = code}; + rpcSendResponse(&rpcRsp); + } + + vnodeRelease(pVnode); + rpcFreeCont(pRpcMsg->pCont); +} + +static bool vnodeProcessWriteStart(SVnode *pVnode, SVnWriteMsg *pWrite, int32_t qtype) { + SWalHead *pHead = &pWrite->walHead; + SVnRsp * pRet = &pWrite->rspRet; + int32_t msgType = pHead->msgType; + + vTrace("vgId:%d, msg:%s will be processed, hver:%" PRIu64, pVnode->vgId, taosMsg[pHead->msgType], pHead->version); + + // write into WAL +#if 0 + pWrite->code = walWrite(pVnode->wal, pHead); + if (pWrite->code < 0) return false; +#endif + + pVnode->version = pHead->version; + + // write data locally + switch (msgType) { + case TSDB_MSG_TYPE_SUBMIT: + pRet->len = sizeof(SSubmitRsp); + pRet->rsp = rpcMallocCont(pRet->len); + pWrite->code = vnodeProcessSubmitReq(pVnode, (void*)pHead->cont, pRet->rsp); + break; + case TSDB_MSG_TYPE_MD_CREATE_TABLE: + pWrite->code = vnodeProcessCreateTableReq(pVnode, (void*)pHead->cont, NULL); + break; + case TSDB_MSG_TYPE_MD_DROP_TABLE: + pWrite->code = vnodeProcessDropTableReq(pVnode, (void*)pHead->cont, NULL); + break; + case TSDB_MSG_TYPE_MD_ALTER_TABLE: + pWrite->code = vnodeProcessAlterTableReq(pVnode, (void*)pHead->cont, NULL); + break; + case TSDB_MSG_TYPE_MD_DROP_STABLE: + pWrite->code = vnodeProcessDropStableReq(pVnode, (void*)pHead->cont, NULL); + break; + case TSDB_MSG_TYPE_UPDATE_TAG_VAL: + pWrite->code = vnodeProcessUpdateTagValReq(pVnode, (void*)pHead->cont, NULL); + break; + default: + pWrite->code = TSDB_CODE_VND_MSG_NOT_PROCESSED; + break; + } + + if (pWrite->code < 0) return false; + + // update fync + return (pWrite->code == 0 && msgType != TSDB_MSG_TYPE_SUBMIT); +} + +static void vnodeFsync(SVnode *pVnode, bool fsync) { +#if 0 + walFsync(pVnode->wal, fsync); +#endif +} + +static void vnodeProcessWriteEnd(SVnode *pVnode, SVnWriteMsg *pWrite, int32_t qtype, int32_t code) { + if (qtype == TAOS_QTYPE_RPC) { + SRpcMsg rpcRsp = { + .handle = pWrite->rpcMsg.handle, + .pCont = pWrite->rspRet.rsp, + .contLen = pWrite->rspRet.len, + .code = pWrite->code, + }; + rpcSendResponse(&rpcRsp); + } else { + if (pWrite->rspRet.rsp) { + rpcFreeCont(pWrite->rspRet.rsp); + } + } + vnodeFreeFromWQueue(pVnode, pWrite); +} + +int32_t vnodeInitWrite() { + SWriteWorkerPool *pPool = &tsVwrite.pool; + pPool->name = "vwrite"; + pPool->max = tsNumOfCores; + pPool->startFp = (ProcessWriteStartFp)vnodeProcessWriteStart; + pPool->syncFp = (ProcessWriteSyncFp)vnodeFsync; + pPool->endFp = (ProcessWriteEndFp)vnodeProcessWriteEnd; + if (tWriteWorkerInit(pPool) != 0) return -1; + + vInfo("vwrite is initialized, max worker %d", pPool->max); + return TSDB_CODE_SUCCESS; +} + +void vnodeCleanupWrite() { + tWriteWorkerCleanup(&tsVwrite.pool); + vInfo("vwrite is closed"); +} + +taos_queue vnodeAllocWriteQueue(SVnode *pVnode) { return tWriteWorkerAllocQueue(&tsVwrite.pool, pVnode); } + +void vnodeFreeWriteQueue(taos_queue pQueue) { tWriteWorkerFreeQueue(&tsVwrite.pool, pQueue); } \ No newline at end of file diff --git a/source/server/vnode/src/vnodeWriteMsg.c b/source/server/vnode/src/vnodeWriteMsg.c index d4ef4ab337..0fe6fa2bc9 100644 --- a/source/server/vnode/src/vnodeWriteMsg.c +++ b/source/server/vnode/src/vnodeWriteMsg.c @@ -13,9 +13,11 @@ * along with this program. If not, see . */ -#include "vnodeInt.h" +#define _DEFAULT_SOURCE +#include "os.h" +#include "vnodeWriteMsg.h" -int vnodeProcessSubmitReq(SVnode *pVnode, SSubmitReq *pReq, SSubmitRsp *pRsp) { +int32_t vnodeProcessSubmitReq(SVnode *pVnode, SSubmitReq *pReq, SSubmitRsp *pRsp) { // TODO: Check inputs #if 0 @@ -51,17 +53,27 @@ int vnodeProcessSubmitReq(SVnode *pVnode, SSubmitReq *pReq, SSubmitRsp *pRsp) { return 0; } -int vnodeProcessCreateTableReq(SVnode *pVnode, SCreateTableReq *pReq, SCreateTableRsp *pRsp) { +int32_t vnodeProcessCreateTableReq(SVnode *pVnode, SCreateTableReq *pReq, SCreateTableRsp *pRsp) { // TODO return 0; } -int vnodeProcessDropTableReq(SVnode *pVnode, SDropTableReq *pReq, SDropTableRsp *pRsp) { + int32_t vnodeProcessDropTableReq(SVnode *pVnode, SDropTableReq *pReq, SDropTableRsp *pRsp) { // TODO return 0; } -int vnodeProcessAlterTableReq(SVnode *pVnode, SAlterTableReq *pReq, SAlterTableRsp *pRsp) { +int32_t vnodeProcessAlterTableReq(SVnode *pVnode, SAlterTableReq *pReq, SAlterTableRsp *pRsp) { + // TODO + return 0; +} + +int32_t vnodeProcessDropStableReq(SVnode *pVnode, SDropStableReq *pReq, SDropStableRsp *pRsp) { + // TODO + return 0; +} + +int32_t vnodeProcessUpdateTagValReq(SVnode *pVnode, SUpdateTagValReq *pReq, SUpdateTagValRsp *pRsp) { // TODO return 0; } diff --git a/src/inc/twal.h b/src/inc/twal.h deleted file mode 100644 index 868a1fbd78..0000000000 --- a/src/inc/twal.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -#ifndef _TD_WAL_H_ -#define _TD_WAL_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - TAOS_WAL_NOLOG = 0, - TAOS_WAL_WRITE = 1, - TAOS_WAL_FSYNC = 2 -} EWalType; - -typedef enum { - TAOS_WAL_NOT_KEEP = 0, - TAOS_WAL_KEEP = 1 -} EWalKeep; - -typedef struct { - int8_t msgType; - int8_t sver; // sver 2 for WAL SDataRow/SMemRow compatibility - int8_t reserved[2]; - int32_t len; - uint64_t version; - uint32_t signature; - uint32_t cksum; - char cont[]; -} SWalHead; - -typedef struct { - int32_t vgId; - int32_t fsyncPeriod; // millisecond - EWalType walLevel; // wal level - EWalKeep keep; // keep the wal file when closed -} SWalCfg; - -typedef void * twalh; // WAL HANDLE -typedef int32_t FWalWrite(void *ahandle, void *pHead, int32_t qtype, void *pMsg); - -int32_t walInit(); -void walCleanUp(); -twalh walOpen(char *path, SWalCfg *pCfg); -int32_t walAlter(twalh pWal, SWalCfg *pCfg); -void walStop(twalh); -void walClose(twalh); -int32_t walRenew(twalh); -void walRemoveOneOldFile(twalh); -void walRemoveAllOldFiles(twalh); -int32_t walWrite(twalh, SWalHead *); -void walFsync(twalh, bool forceFsync); -int32_t walRestore(twalh, void *pVnode, FWalWrite writeFp); -int32_t walGetWalFile(twalh, char *fileName, int64_t *fileId); -uint64_t walGetVersion(twalh); -void walResetVersion(twalh, uint64_t newVer); - -#ifdef __cplusplus -} -#endif - -#endif // _TD_WAL_H_ diff --git a/src/raft/CMakeLists.txt b/src/raft/CMakeLists.txt deleted file mode 100644 index 3dc6601038..0000000000 --- a/src/raft/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -aux_source_directory(source RAFT_SRC) -add_library(raft ${RAFT_SRC}) -target_include_directories( - raft - PUBLIC "${CMAKE_SOURCE_DIR}/include/raft" - PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include" -) \ No newline at end of file diff --git a/src/vnode/CMakeLists.txt b/src/vnode/CMakeLists.txt deleted file mode 100644 index 6238f43d32..0000000000 --- a/src/vnode/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.8...3.20) -PROJECT(TDengine) - -INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/cJson/inc) -INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/query/inc) -INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/tsdb/inc) -INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/dnode/inc) -INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/sync/inc) -INCLUDE_DIRECTORIES(${TD_ENTERPRISE_DIR}/src/inc) -INCLUDE_DIRECTORIES(inc) -AUX_SOURCE_DIRECTORY(src SRC) - -ADD_LIBRARY(vnode ${SRC}) -TARGET_LINK_LIBRARIES(vnode tsdb tcq common) diff --git a/src/vnode/inc/vnodeInt.h b/src/vnode/inc/vnodeInt.h deleted file mode 100644 index 4864b79dc4..0000000000 --- a/src/vnode/inc/vnodeInt.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODE_INT_H -#define TDENGINE_VNODE_INT_H - -#ifdef __cplusplus -extern "C" { -#endif -#include "tlog.h" -#include "tsync.h" -#include "tcq.h" -#include "tsdb.h" -#include "vnode.h" - -extern int32_t vDebugFlag; -extern int32_t vNumOfExistedQHandle; // current initialized and existed query handle in current dnode - -#define vFatal(...) { if (vDebugFlag & DEBUG_FATAL) { taosPrintLog("VND FATAL ", 255, __VA_ARGS__); }} -#define vError(...) { if (vDebugFlag & DEBUG_ERROR) { taosPrintLog("VND ERROR ", 255, __VA_ARGS__); }} -#define vWarn(...) { if (vDebugFlag & DEBUG_WARN) { taosPrintLog("VND WARN ", 255, __VA_ARGS__); }} -#define vInfo(...) { if (vDebugFlag & DEBUG_INFO) { taosPrintLog("VND ", 255, __VA_ARGS__); }} -#define vDebug(...) { if (vDebugFlag & DEBUG_DEBUG) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }} -#define vTrace(...) { if (vDebugFlag & DEBUG_TRACE) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }} - -typedef struct { - int32_t vgId; // global vnode group ID - int32_t refCount; // reference count - int64_t queuedWMsgSize; - int32_t queuedWMsg; - int32_t queuedRMsg; - int32_t flowctrlLevel; - int8_t preClose; // drop and close switch - int8_t reserved[3]; - int64_t sequence; // for topic - int8_t status; - int8_t role; - int8_t accessState; - int8_t isFull; - int8_t isCommiting; - int8_t dbReplica; - int8_t dropped; - int8_t dbType; - uint64_t version; // current version - uint64_t cversion; // version while commit start - uint64_t fversion; // version on saved data file - void * wqueue; // write queue - void * qqueue; // read query queue - void * fqueue; // read fetch/cancel queue - void * wal; - void * tsdb; - int64_t sync; - void * events; - void * cq; // continuous query - int32_t dbCfgVersion; - int32_t vgCfgVersion; - STsdbCfg tsdbCfg; - SSyncCfg syncCfg; - SWalCfg walCfg; - void * qMgmt; - char * rootDir; - tsem_t sem; - char db[TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN]; - pthread_mutex_t statusMutex; -} SVnodeObj; - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/vnode/inc/vnodeSync.h b/src/vnode/inc/vnodeSync.h deleted file mode 100644 index 28fb63dd6a..0000000000 --- a/src/vnode/inc/vnodeSync.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_VNODE_SYNC_H -#define TDENGINE_VNODE_SYNC_H - -#ifdef __cplusplus -extern "C" { -#endif -#include "vnodeInt.h" - -uint32_t vnodeGetFileInfo(int32_t vgId, char *name, uint32_t *index, uint32_t eindex, int64_t *size, uint64_t *fver); -int32_t vnodeGetWalInfo(int32_t vgId, char *fileName, int64_t *fileId); -void vnodeNotifyRole(int32_t vgId, int8_t role); -void vnodeCtrlFlow(int32_t vgId, int32_t level); -void vnodeStartSyncFile(int32_t vgId); -void vnodeStopSyncFile(int32_t vgId, uint64_t fversion); -void vnodeConfirmForard(int32_t vgId, void *wparam, int32_t code); -int32_t vnodeWriteToCache(int32_t vgId, void *wparam, int32_t qtype, void *rparam); -int32_t vnodeGetVersion(int32_t vgId, uint64_t *fver, uint64_t *wver); - -void vnodeConfirmForward(void *pVnode, uint64_t version, int32_t code, bool force); - -#ifdef __cplusplus -} -#endif - -#endif \ No newline at end of file diff --git a/src/vnode/src/vnodeBackup.c b/src/vnode/src/vnodeBackup.c deleted file mode 100644 index 801af42e0e..0000000000 --- a/src/vnode/src/vnodeBackup.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" -#include "taoserror.h" -#include "taosmsg.h" -#include "tutil.h" -#include "tqueue.h" -#include "tglobal.h" -#include "tfs.h" -#include "vnodeBackup.h" -#include "vnodeMain.h" - -typedef struct { - int32_t vgId; -} SVBackupMsg; - -typedef struct { - pthread_t thread; - int32_t workerId; -} SVBackupWorker; - -typedef struct { - int32_t num; - SVBackupWorker *worker; -} SVBackupWorkerPool; - -static SVBackupWorkerPool tsVBackupPool; -static taos_qset tsVBackupQset; -static taos_queue tsVBackupQueue; - -static void vnodeProcessBackupMsg(SVBackupMsg *pMsg) { - int32_t vgId = pMsg->vgId; - char newDir[TSDB_FILENAME_LEN] = {0}; - char stagingDir[TSDB_FILENAME_LEN] = {0}; - - sprintf(newDir, "%s/vnode%d", "vnode_bak", vgId); - sprintf(stagingDir, "%s/.staging/vnode%d", "vnode_bak", vgId); - - if (tsEnableVnodeBak) { - tfsRmdir(newDir); - tfsRename(stagingDir, newDir); - } else { - vInfo("vgId:%d, vnode backup not enabled", vgId); - - tfsRmdir(stagingDir); - } -} - -static void *vnodeBackupFunc(void *param) { - setThreadName("vnodeBackup"); - - while (1) { - SVBackupMsg *pMsg = NULL; - if (taosReadQitemFromQset(tsVBackupQset, NULL, (void **)&pMsg, NULL) == 0) { - vDebug("qset:%p, vbackup got no message from qset, exiting", tsVBackupQset); - break; - } - - vTrace("vgId:%d, will be processed in vbackup queue", pMsg->vgId); - vnodeProcessBackupMsg(pMsg); - - vTrace("vgId:%d, disposed in vbackup worker", pMsg->vgId); - taosFreeQitem(pMsg); - } - - return NULL; -} - -static int32_t vnodeStartBackup() { - tsVBackupQueue = taosOpenQueue(); - if (tsVBackupQueue == NULL) return TSDB_CODE_DND_OUT_OF_MEMORY; - - taosAddIntoQset(tsVBackupQset, tsVBackupQueue, NULL); - - for (int32_t i = 0; i < tsVBackupPool.num; ++i) { - SVBackupWorker *pWorker = tsVBackupPool.worker + i; - pWorker->workerId = i; - - pthread_attr_t thAttr; - pthread_attr_init(&thAttr); - pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE); - - if (pthread_create(&pWorker->thread, &thAttr, vnodeBackupFunc, pWorker) != 0) { - vError("failed to create thread to process vbackup queue, reason:%s", strerror(errno)); - } - - pthread_attr_destroy(&thAttr); - - vDebug("vbackup:%d is launched, total:%d", pWorker->workerId, tsVBackupPool.num); - } - - vDebug("vbackup queue:%p is allocated", tsVBackupQueue); - - return TSDB_CODE_SUCCESS; -} - -static int32_t vnodeWriteIntoBackupWorker(int32_t vgId) { - SVBackupMsg *pMsg = taosAllocateQitem(sizeof(SVBackupMsg)); - if (pMsg == NULL) return TSDB_CODE_VND_OUT_OF_MEMORY; - - pMsg->vgId = vgId; - - int32_t code = taosWriteQitem(tsVBackupQueue, TAOS_QTYPE_RPC, pMsg); - if (code == 0) code = TSDB_CODE_DND_ACTION_IN_PROGRESS; - - return code; -} - -int32_t vnodeBackup(int32_t vgId) { - vTrace("vgId:%d, will backup", vgId); - return vnodeWriteIntoBackupWorker(vgId); -} - -int32_t vnodeInitBackup() { - tsVBackupQset = taosOpenQset(); - - tsVBackupPool.num = 1; - tsVBackupPool.worker = calloc(sizeof(SVBackupWorker), tsVBackupPool.num); - - if (tsVBackupPool.worker == NULL) return -1; - for (int32_t i = 0; i < tsVBackupPool.num; ++i) { - SVBackupWorker *pWorker = tsVBackupPool.worker + i; - pWorker->workerId = i; - vDebug("vbackup:%d is created", i); - } - - vDebug("vbackup is initialized, num:%d qset:%p", tsVBackupPool.num, tsVBackupQset); - - return vnodeStartBackup(); -} - -void vnodeCleanupBackup() { - for (int32_t i = 0; i < tsVBackupPool.num; ++i) { - SVBackupWorker *pWorker = tsVBackupPool.worker + i; - if (taosCheckPthreadValid(pWorker->thread)) { - taosQsetThreadResume(tsVBackupQset); - } - vDebug("vbackup:%d is closed", i); - } - - for (int32_t i = 0; i < tsVBackupPool.num; ++i) { - SVBackupWorker *pWorker = tsVBackupPool.worker + i; - vDebug("vbackup:%d start to join", i); - if (taosCheckPthreadValid(pWorker->thread)) { - pthread_join(pWorker->thread, NULL); - } - vDebug("vbackup:%d join success", i); - } - - vDebug("vbackup is closed, qset:%p", tsVBackupQset); - - taosCloseQset(tsVBackupQset); - tsVBackupQset = NULL; - - tfree(tsVBackupPool.worker); - - vDebug("vbackup queue:%p is freed", tsVBackupQueue); - taosCloseQueue(tsVBackupQueue); - tsVBackupQueue = NULL; -} diff --git a/src/vnode/src/vnodeMgmt.c b/src/vnode/src/vnodeMgmt.c deleted file mode 100644 index 8d699cb100..0000000000 --- a/src/vnode/src/vnodeMgmt.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" -#include "dnode.h" -#include "vnodeStatus.h" -#include "vnodeBackup.h" -#include "vnodeWorker.h" -#include "vnodeRead.h" -#include "vnodeWrite.h" -#include "vnodeMain.h" - -static SHashObj *tsVnodesHash = NULL; - -static int32_t vnodeInitHash(void); -static void vnodeCleanupHash(void); -static void vnodeIncRef(void *ptNode); - -static SStep tsVnodeSteps[] = { - {"vnode-backup", vnodeInitBackup, vnodeCleanupBackup}, - {"vnode-worker", vnodeInitMWorker, vnodeCleanupMWorker}, - {"vnode-write", vnodeInitWrite, vnodeCleanupWrite}, - {"vnode-read", vnodeInitRead, vnodeCleanupRead}, - {"vnode-hash", vnodeInitHash, vnodeCleanupHash}, - {"tsdb-queue", tsdbInitCommitQueue, tsdbDestroyCommitQueue} -}; - -int32_t vnodeInitMgmt() { - int32_t stepSize = sizeof(tsVnodeSteps) / sizeof(SStep); - return dnodeStepInit(tsVnodeSteps, stepSize); -} - -void vnodeCleanupMgmt() { - int32_t stepSize = sizeof(tsVnodeSteps) / sizeof(SStep); - dnodeStepCleanup(tsVnodeSteps, stepSize); -} - -static int32_t vnodeInitHash() { - tsVnodesHash = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); - if (tsVnodesHash == NULL) { - vError("failed to init vnode mgmt"); - return -1; - } - - return 0; -} - -static void vnodeCleanupHash() { - if (tsVnodesHash != NULL) { - vDebug("vnode mgmt is cleanup"); - taosHashCleanup(tsVnodesHash); - tsVnodesHash = NULL; - } -} - -void *vnodeGetWal(void *pVnode) { - return ((SVnodeObj *)pVnode)->wal; -} - -void vnodeAddIntoHash(SVnodeObj *pVnode) { - taosHashPut(tsVnodesHash, &pVnode->vgId, sizeof(int32_t), &pVnode, sizeof(SVnodeObj *)); -} - -void vnodeRemoveFromHash(SVnodeObj *pVnode) { - taosHashRemove(tsVnodesHash, &pVnode->vgId, sizeof(int32_t)); -} - -static void vnodeIncRef(void *ptNode) { - assert(ptNode != NULL); - - SVnodeObj **ppVnode = (SVnodeObj **)ptNode; - assert(ppVnode); - assert(*ppVnode); - - SVnodeObj *pVnode = *ppVnode; - atomic_add_fetch_32(&pVnode->refCount, 1); - vTrace("vgId:%d, get vnode, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); -} - -void *vnodeAcquire(int32_t vgId) { - SVnodeObj *pVnode = NULL; - if (tsVnodesHash != NULL) { - taosHashGetClone(tsVnodesHash, &vgId, sizeof(int32_t), vnodeIncRef, &pVnode); - } - - if (pVnode == NULL) { - terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; - vDebug("vgId:%d, not exist", vgId); - return NULL; - } - - return pVnode; -} - -void vnodeRelease(void *vparam) { - SVnodeObj *pVnode = vparam; - if (vparam == NULL) return; - - int32_t refCount = atomic_sub_fetch_32(&pVnode->refCount, 1); - int32_t vgId = pVnode->vgId; - - vTrace("vgId:%d, release vnode, refCount:%d pVnode:%p", vgId, refCount, pVnode); - assert(refCount >= 0); - - if (refCount > 0) { - if (vnodeInResetStatus(pVnode) && refCount <= 3) { - tsem_post(&pVnode->sem); - } - } else { - vDebug("vgId:%d, vnode will be destroyed, refCount:%d pVnode:%p", vgId, refCount, pVnode); - vnodeDestroyInMWorker(pVnode); - int32_t count = taosHashGetSize(tsVnodesHash); - vDebug("vgId:%d, vnode is destroyed, vnodes:%d", vgId, count); - } -} - -void *vnodeAcquireNotClose(int32_t vgId) { - SVnodeObj *pVnode = vnodeAcquire(vgId); - if (pVnode != NULL && pVnode->preClose == 1) { - vnodeRelease(pVnode); - terrno = TSDB_CODE_VND_INVALID_VGROUP_ID; - vDebug("vgId:%d, not exist, pre closing", vgId); - return NULL; - } - - return pVnode; -} - -static void vnodeBuildVloadMsg(SVnodeObj *pVnode, SStatusMsg *pStatus) { - int64_t totalStorage = 0; - int64_t compStorage = 0; - int64_t pointsWritten = 0; - - if (vnodeInClosingStatus(pVnode)) return; - if (pStatus->openVnodes >= TSDB_MAX_VNODES) return; - - if (pVnode->tsdb) { - tsdbReportStat(pVnode->tsdb, &pointsWritten, &totalStorage, &compStorage); - } - - SVnodeLoad *pLoad = &pStatus->load[pStatus->openVnodes++]; - pLoad->vgId = htonl(pVnode->vgId); - pLoad->dbCfgVersion = htonl(pVnode->dbCfgVersion); - pLoad->vgCfgVersion = htonl(pVnode->vgCfgVersion); - pLoad->totalStorage = htobe64(totalStorage); - pLoad->compStorage = htobe64(compStorage); - pLoad->pointsWritten = htobe64(pointsWritten); - pLoad->vnodeVersion = htobe64(pVnode->version); - pLoad->status = pVnode->status; - pLoad->role = pVnode->role; - pLoad->replica = pVnode->syncCfg.replica; - pLoad->compact = (pVnode->tsdb != NULL) ? tsdbGetCompactState(pVnode->tsdb) : 0; -} - -int32_t vnodeGetVnodeList(int32_t vnodeList[], int32_t *numOfVnodes) { - void *pIter = taosHashIterate(tsVnodesHash, NULL); - while (pIter) { - SVnodeObj **pVnode = pIter; - if (*pVnode) { - - (*numOfVnodes)++; - if (*numOfVnodes >= TSDB_MAX_VNODES) { - vError("vgId:%d, too many open vnodes, exist:%d max:%d", (*pVnode)->vgId, *numOfVnodes, TSDB_MAX_VNODES); - continue; - } else { - vnodeList[*numOfVnodes - 1] = (*pVnode)->vgId; - } - - } - - pIter = taosHashIterate(tsVnodesHash, pIter); - } - return TSDB_CODE_SUCCESS; -} - -void vnodeBuildStatusMsg(void *param) { - SStatusMsg *pStatus = param; - - void *pIter = taosHashIterate(tsVnodesHash, NULL); - while (pIter) { - SVnodeObj **pVnode = pIter; - if (*pVnode) { - vnodeBuildVloadMsg(*pVnode, pStatus); - } - pIter = taosHashIterate(tsVnodesHash, pIter); - } -} - -void vnodeSetAccess(SVgroupAccess *pAccess, int32_t numOfVnodes) { - for (int32_t i = 0; i < numOfVnodes; ++i) { - pAccess[i].vgId = htonl(pAccess[i].vgId); - SVnodeObj *pVnode = vnodeAcquireNotClose(pAccess[i].vgId); - if (pVnode != NULL) { - pVnode->accessState = pAccess[i].accessState; - if (pVnode->accessState != TSDB_VN_ALL_ACCCESS) { - vDebug("vgId:%d, access state is set to %d", pAccess[i].vgId, pVnode->accessState); - } - vnodeRelease(pVnode); - } - } -} diff --git a/src/vnode/src/vnodeSync.c b/src/vnode/src/vnodeSync.c deleted file mode 100644 index 2bdfd2ead3..0000000000 --- a/src/vnode/src/vnodeSync.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" -#include "taosmsg.h" -#include "query.h" -#include "dnode.h" -#include "vnodeVersion.h" -#include "vnodeMain.h" -#include "vnodeStatus.h" - -uint32_t vnodeGetFileInfo(int32_t vgId, char *name, uint32_t *index, uint32_t eindex, int64_t *size, uint64_t *fver) { - SVnodeObj *pVnode = vnodeAcquire(vgId); - if (pVnode == NULL) { - vError("vgId:%d, vnode not found while get file info", vgId); - return 0; - } - - *fver = pVnode->fversion; - uint32_t ret = tsdbGetFileInfo(pVnode->tsdb, name, index, eindex, size); - - vnodeRelease(pVnode); - return ret; -} - -int32_t vnodeGetWalInfo(int32_t vgId, char *fileName, int64_t *fileId) { - SVnodeObj *pVnode = vnodeAcquire(vgId); - if (pVnode == NULL) { - vError("vgId:%d, vnode not found while get wal info", vgId); - return -1; - } - - int32_t code = walGetWalFile(pVnode->wal, fileName, fileId); - - vnodeRelease(pVnode); - return code; -} - -void vnodeNotifyRole(int32_t vgId, int8_t role) { - SVnodeObj *pVnode = vnodeAcquire(vgId); - if (pVnode == NULL) { - vTrace("vgId:%d, vnode not found while notify role", vgId); - return; - } - if (pVnode->dropped) { - vTrace("vgId:%d, vnode dropped while notify role", vgId); - vnodeRelease(pVnode); - return; - } - - vInfo("vgId:%d, sync role changed from %s to %s", pVnode->vgId, syncRole[pVnode->role], syncRole[role]); - pVnode->role = role; - dnodeSendStatusMsgToMnode(); - - if (pVnode->role == TAOS_SYNC_ROLE_MASTER) { - cqStart(pVnode->cq); - } else { - cqStop(pVnode->cq); - } - - vnodeRelease(pVnode); -} - -void vnodeCtrlFlow(int32_t vgId, int32_t level) { - SVnodeObj *pVnode = vnodeAcquire(vgId); - if (pVnode == NULL) { - vTrace("vgId:%d, vnode not found while flow ctrl", vgId); - return; - } - if (pVnode->dropped) { - vTrace("vgId:%d, vnode dropped while flow ctrl", vgId); - vnodeRelease(pVnode); - return; - } - - if (pVnode->flowctrlLevel != level) { - vDebug("vgId:%d, set flowctrl level from %d to %d", pVnode->vgId, pVnode->flowctrlLevel, level); - pVnode->flowctrlLevel = level; - } - - vnodeRelease(pVnode); -} - -void vnodeStartSyncFile(int32_t vgId) { - SVnodeObj *pVnode = vnodeAcquireNotClose(vgId); - if (pVnode == NULL) { - vError("vgId:%d, vnode not found while start filesync", vgId); - return; - } - - vInfo("vgId:%d, datafile will be synced", vgId); - vnodeSetResetStatus(pVnode); - - vnodeRelease(pVnode); -} - -void vnodeStopSyncFile(int32_t vgId, uint64_t fversion) { - SVnodeObj *pVnode = vnodeAcquire(vgId); - if (pVnode == NULL) { - vError("vgId:%d, vnode not found while stop filesync", vgId); - return; - } - - pVnode->fversion = fversion; - pVnode->version = fversion; - vnodeSaveVersion(pVnode); - walResetVersion(pVnode->wal, fversion); - - vInfo("vgId:%d, datafile is synced, fver:%" PRIu64 " vver:%" PRIu64, vgId, fversion, fversion); - vnodeSetReadyStatus(pVnode); - - vnodeRelease(pVnode); -} - -void vnodeConfirmForard(int32_t vgId, void *wparam, int32_t code) { - SVnodeObj *pVnode = vnodeAcquire(vgId); - if (pVnode == NULL) { - vError("vgId:%d, vnode not found while confirm forward", vgId); - } - - if (code == TSDB_CODE_SYN_CONFIRM_EXPIRED && pVnode->status == TAOS_VN_STATUS_CLOSING) { - vDebug("vgId:%d, db:%s, vnode is closing while confirm forward", vgId, pVnode->db); - code = TSDB_CODE_VND_IS_CLOSING; - } - - dnodeSendRpcVWriteRsp(pVnode, wparam, code); - vnodeRelease(pVnode); -} - -int32_t vnodeWriteToCache(int32_t vgId, void *wparam, int32_t qtype, void *rparam) { - SVnodeObj *pVnode = vnodeAcquire(vgId); - if (pVnode == NULL) { - vError("vgId:%d, vnode not found while write to cache", vgId); - vnodeRelease(pVnode); - return TSDB_CODE_VND_INVALID_VGROUP_ID; - } - - int32_t code = vnodeWriteToWQueue(pVnode, wparam, qtype, rparam); - - vnodeRelease(pVnode); - return code; -} - -int32_t vnodeGetVersion(int32_t vgId, uint64_t *fver, uint64_t *wver) { - SVnodeObj *pVnode = vnodeAcquireNotClose(vgId); - if (pVnode == NULL) { - vError("vgId:%d, vnode not found while write to cache", vgId); - return -1; - } - - int32_t code = 0; - if (pVnode->isCommiting) { - vInfo("vgId:%d, vnode is commiting while get version", vgId); - code = -1; - } else { - *fver = pVnode->fversion; - *wver = pVnode->version; - } - - vnodeRelease(pVnode); - return code; -} - -void vnodeConfirmForward(void *vparam, uint64_t version, int32_t code, bool force) { - SVnodeObj *pVnode = vparam; - syncConfirmForward(pVnode->sync, version, code, force); -} diff --git a/src/vnode/src/vnodeWorker.c b/src/vnode/src/vnodeWorker.c deleted file mode 100644 index 7fcc393746..0000000000 --- a/src/vnode/src/vnodeWorker.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" -#include "taoserror.h" -#include "taosmsg.h" -#include "tutil.h" -#include "tqueue.h" -#include "tglobal.h" -#include "vnodeWorker.h" -#include "vnodeMain.h" - -typedef enum { - VNODE_WORKER_ACTION_CLEANUP, - VNODE_WORKER_ACTION_DESTROY -} EVMWorkerAction; - -typedef struct { - int32_t vgId; - int32_t code; - void * rpcHandle; - SVnodeObj *pVnode; - EVMWorkerAction action; -} SVMWorkerMsg; - -typedef struct { - pthread_t thread; - int32_t workerId; -} SVMWorker; - -typedef struct { - int32_t curNum; - int32_t maxNum; - SVMWorker *worker; -} SVMWorkerPool; - -static SVMWorkerPool tsVMWorkerPool; -static taos_qset tsVMWorkerQset; -static taos_queue tsVMWorkerQueue; - -static void *vnodeMWorkerFunc(void *param); - -static int32_t vnodeStartMWorker() { - tsVMWorkerQueue = taosOpenQueue(); - if (tsVMWorkerQueue == NULL) return TSDB_CODE_DND_OUT_OF_MEMORY; - - taosAddIntoQset(tsVMWorkerQset, tsVMWorkerQueue, NULL); - - for (int32_t i = tsVMWorkerPool.curNum; i < tsVMWorkerPool.maxNum; ++i) { - SVMWorker *pWorker = tsVMWorkerPool.worker + i; - pWorker->workerId = i; - - pthread_attr_t thAttr; - pthread_attr_init(&thAttr); - pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE); - - if (pthread_create(&pWorker->thread, &thAttr, vnodeMWorkerFunc, pWorker) != 0) { - vError("failed to create thread to process vmworker queue, reason:%s", strerror(errno)); - } - - pthread_attr_destroy(&thAttr); - - tsVMWorkerPool.curNum = i + 1; - vDebug("vmworker:%d is launched, total:%d", pWorker->workerId, tsVMWorkerPool.maxNum); - } - - vDebug("vmworker queue:%p is allocated", tsVMWorkerQueue); - return TSDB_CODE_SUCCESS; -} - -int32_t vnodeInitMWorker() { - tsVMWorkerQset = taosOpenQset(); - - tsVMWorkerPool.maxNum = 1; - tsVMWorkerPool.curNum = 0; - tsVMWorkerPool.worker = calloc(sizeof(SVMWorker), tsVMWorkerPool.maxNum); - - if (tsVMWorkerPool.worker == NULL) return -1; - for (int32_t i = 0; i < tsVMWorkerPool.maxNum; ++i) { - SVMWorker *pWorker = tsVMWorkerPool.worker + i; - pWorker->workerId = i; - vDebug("vmworker:%d is created", i); - } - - vDebug("vmworker is initialized, num:%d qset:%p", tsVMWorkerPool.maxNum, tsVMWorkerQset); - - return vnodeStartMWorker(); -} - -static void vnodeStopMWorker() { - vDebug("vmworker queue:%p is freed", tsVMWorkerQueue); - taosCloseQueue(tsVMWorkerQueue); - tsVMWorkerQueue = NULL; -} - -void vnodeCleanupMWorker() { - for (int32_t i = 0; i < tsVMWorkerPool.maxNum; ++i) { - SVMWorker *pWorker = tsVMWorkerPool.worker + i; - if (taosCheckPthreadValid(pWorker->thread)) { - taosQsetThreadResume(tsVMWorkerQset); - } - vDebug("vmworker:%d is closed", i); - } - - for (int32_t i = 0; i < tsVMWorkerPool.maxNum; ++i) { - SVMWorker *pWorker = tsVMWorkerPool.worker + i; - vDebug("vmworker:%d start to join", i); - if (taosCheckPthreadValid(pWorker->thread)) { - pthread_join(pWorker->thread, NULL); - } - vDebug("vmworker:%d join success", i); - } - - vDebug("vmworker is closed, qset:%p", tsVMWorkerQset); - - taosCloseQset(tsVMWorkerQset); - tsVMWorkerQset = NULL; - tfree(tsVMWorkerPool.worker); - - vnodeStopMWorker(); -} - -static int32_t vnodeWriteIntoMWorker(SVnodeObj *pVnode, EVMWorkerAction action, void *rpcHandle) { - SVMWorkerMsg *pMsg = taosAllocateQitem(sizeof(SVMWorkerMsg)); - if (pMsg == NULL) return TSDB_CODE_VND_OUT_OF_MEMORY; - - pMsg->vgId = pVnode->vgId; - pMsg->pVnode = pVnode; - pMsg->rpcHandle = rpcHandle; - pMsg->action = action; - - int32_t code = taosWriteQitem(tsVMWorkerQueue, TAOS_QTYPE_RPC, pMsg); - if (code == 0) code = TSDB_CODE_DND_ACTION_IN_PROGRESS; - - return code; -} - -int32_t vnodeCleanupInMWorker(SVnodeObj *pVnode) { - vTrace("vgId:%d, will cleanup in vmworker", pVnode->vgId); - return vnodeWriteIntoMWorker(pVnode, VNODE_WORKER_ACTION_CLEANUP, NULL); -} - -int32_t vnodeDestroyInMWorker(SVnodeObj *pVnode) { - vTrace("vgId:%d, will destroy in vmworker", pVnode->vgId); - return vnodeWriteIntoMWorker(pVnode, VNODE_WORKER_ACTION_DESTROY, NULL); -} - -static void vnodeFreeMWorkerMsg(SVMWorkerMsg *pMsg) { - vTrace("vgId:%d, disposed in vmworker", pMsg->vgId); - taosFreeQitem(pMsg); -} - -static void vnodeSendVMWorkerRpcRsp(SVMWorkerMsg *pMsg) { - if (pMsg->rpcHandle != NULL) { - SRpcMsg rpcRsp = {.handle = pMsg->rpcHandle, .code = pMsg->code}; - rpcSendResponse(&rpcRsp); - } - - vnodeFreeMWorkerMsg(pMsg); -} - -static void vnodeProcessMWorkerMsg(SVMWorkerMsg *pMsg) { - pMsg->code = 0; - - switch (pMsg->action) { - case VNODE_WORKER_ACTION_CLEANUP: - vnodeCleanUp(pMsg->pVnode); - break; - case VNODE_WORKER_ACTION_DESTROY: - vnodeDestroy(pMsg->pVnode); - break; - default: - break; - } -} - -static void *vnodeMWorkerFunc(void *param) { - setThreadName("vnodeMWorker"); - - while (1) { - SVMWorkerMsg *pMsg = NULL; - if (taosReadQitemFromQset(tsVMWorkerQset, NULL, (void **)&pMsg, NULL) == 0) { - vDebug("qset:%p, vmworker got no message from qset, exiting", tsVMWorkerQset); - break; - } - - vTrace("vgId:%d, action:%d will be processed in vmworker queue", pMsg->vgId, pMsg->action); - vnodeProcessMWorkerMsg(pMsg); - vnodeSendVMWorkerRpcRsp(pMsg); - } - - return NULL; -} diff --git a/src/vnode/src/vnodeWrite.c b/src/vnode/src/vnodeWrite.c deleted file mode 100644 index 743398d834..0000000000 --- a/src/vnode/src/vnodeWrite.c +++ /dev/null @@ -1,408 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" -#include "tp.h" -#include "taosmsg.h" -#include "taoserror.h" -#include "tglobal.h" -#include "tqueue.h" -#include "ttimer.h" -#include "dnode.h" -#include "vnodeStatus.h" - -#define MAX_QUEUED_MSG_NUM 100000 -#define MAX_QUEUED_MSG_SIZE 1024*1024*1024 //1GB - -extern void * tsDnodeTmr; -static int32_t (*vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MAX])(SVnodeObj *, void *pCont, SRspRet *); -static int32_t vnodeProcessSubmitMsg(SVnodeObj *pVnode, void *pCont, SRspRet *); -static int32_t vnodeProcessCreateTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *); -static int32_t vnodeProcessDropTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *); -static int32_t vnodeProcessAlterTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *); -static int32_t vnodeProcessDropStableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *); -static int32_t vnodeProcessUpdateTagValMsg(SVnodeObj *pVnode, void *pCont, SRspRet *); -static int32_t vnodePerformFlowCtrl(SVWriteMsg *pWrite); - -int32_t vnodeInitWrite(void) { - vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_SUBMIT] = vnodeProcessSubmitMsg; - vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_CREATE_TABLE] = vnodeProcessCreateTableMsg; - vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_DROP_TABLE] = vnodeProcessDropTableMsg; - vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_ALTER_TABLE] = vnodeProcessAlterTableMsg; - vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_DROP_STABLE] = vnodeProcessDropStableMsg; - vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_UPDATE_TAG_VAL] = vnodeProcessUpdateTagValMsg; - - return 0; -} - -void vnodeCleanupWrite() {} - -int32_t vnodeProcessWrite(void *vparam, void *wparam, int32_t qtype, void *rparam) { - int32_t code = 0; - SVnodeObj *pVnode = vparam; - SWalHead * pHead = wparam; - SVWriteMsg*pWrite = rparam; - - SRspRet *pRspRet = NULL; - if (pWrite != NULL) pRspRet = &pWrite->rspRet; - - if (vnodeProcessWriteMsgFp[pHead->msgType] == NULL) { - vError("vgId:%d, msg:%s not processed since no handle, qtype:%s hver:%" PRIu64, pVnode->vgId, - taosMsg[pHead->msgType], qtypeStr[qtype], pHead->version); - return TSDB_CODE_VND_MSG_NOT_PROCESSED; - } - - vTrace("vgId:%d, msg:%s will be processed in vnode, qtype:%s hver:%" PRIu64 " vver:%" PRIu64, pVnode->vgId, - taosMsg[pHead->msgType], qtypeStr[qtype], pHead->version, pVnode->version); - - if (pHead->version == 0) { // from client or CQ - if (!vnodeInReadyStatus(pVnode)) { - vDebug("vgId:%d, msg:%s not processed since vstatus:%d, qtype:%s hver:%" PRIu64, pVnode->vgId, - taosMsg[pHead->msgType], pVnode->status, qtypeStr[qtype], pHead->version); - return TSDB_CODE_APP_NOT_READY; // it may be in deleting or closing state - } - - if (pVnode->role != TAOS_SYNC_ROLE_MASTER) { - vDebug("vgId:%d, msg:%s not processed since replica:%d role:%s, qtype:%s hver:%" PRIu64, pVnode->vgId, - taosMsg[pHead->msgType], pVnode->syncCfg.replica, syncRole[pVnode->role], qtypeStr[qtype], pHead->version); - return TSDB_CODE_APP_NOT_READY; - } - - // assign version - pHead->version = pVnode->version + 1; - } else { // from wal or forward - // for data from WAL or forward, version may be smaller - if (pHead->version <= pVnode->version) return 0; - } - - // forward to peers, even it is WAL/FWD, it shall be called to update version in sync - int32_t syncCode = 0; - bool force = (pWrite == NULL ? false : pWrite->walHead.msgType != TSDB_MSG_TYPE_SUBMIT); - syncCode = syncForwardToPeer(pVnode->sync, pHead, pWrite, qtype, force); - if (syncCode < 0) { - pHead->version = 0; - return syncCode; - } - - // write into WAL - code = walWrite(pVnode->wal, pHead); - if (code < 0) { - if (syncCode > 0) atomic_sub_fetch_32(&pWrite->processedCount, 1); - vError("vgId:%d, hver:%" PRIu64 " vver:%" PRIu64 " code:0x%x", pVnode->vgId, pHead->version, pVnode->version, code); - pHead->version = 0; - return code; - } - - pVnode->version = pHead->version; - - // write data locally - code = (*vnodeProcessWriteMsgFp[pHead->msgType])(pVnode, pHead->cont, pRspRet); - if (code < 0) { - if (syncCode > 0) atomic_sub_fetch_32(&pWrite->processedCount, 1); - return code; - } - - return syncCode; -} - -static int32_t vnodeCheckWrite(SVnodeObj *pVnode) { - if (!(pVnode->accessState & TSDB_VN_WRITE_ACCCESS)) { - vDebug("vgId:%d, no write auth, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); - return TSDB_CODE_VND_NO_WRITE_AUTH; - } - - if (pVnode->dbReplica != pVnode->syncCfg.replica && - pVnode->syncCfg.nodeInfo[pVnode->syncCfg.replica - 1].nodeId == dnodeGetDnodeId()) { - vDebug("vgId:%d, vnode is balancing and will be dropped, dbReplica:%d vgReplica:%d, refCount:%d pVnode:%p", - pVnode->vgId, pVnode->dbReplica, pVnode->syncCfg.replica, pVnode->refCount, pVnode); - return TSDB_CODE_VND_IS_BALANCING; - } - - // tsdb may be in reset state - if (pVnode->tsdb == NULL) { - vDebug("vgId:%d, tsdb is null, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode); - return TSDB_CODE_APP_NOT_READY; - } - - if (pVnode->isFull) { - vDebug("vgId:%d, vnode is full, refCount:%d", pVnode->vgId, pVnode->refCount); - return TSDB_CODE_VND_IS_FULL; - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t vnodeProcessSubmitMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) { - int32_t code = TSDB_CODE_SUCCESS; - - vTrace("vgId:%d, submit msg is processed", pVnode->vgId); - - if (pVnode->dbType == TSDB_DB_TYPE_TOPIC && pVnode->role == TAOS_SYNC_ROLE_MASTER) { - tpUpdateTs(pVnode->vgId, &pVnode->sequence, pCont); - } - - // save insert result into item - SShellSubmitRspMsg *pRsp = NULL; - if (pRet) { - pRet->len = sizeof(SShellSubmitRspMsg); - pRet->rsp = rpcMallocCont(pRet->len); - pRsp = pRet->rsp; - } - - if (tsdbInsertData(pVnode->tsdb, pCont, pRsp) < 0) code = terrno; - - return code; -} - -static int32_t vnodeProcessCreateTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) { - int code = TSDB_CODE_SUCCESS; - - STableCfg *pCfg = tsdbCreateTableCfgFromMsg((SMDCreateTableMsg *)pCont); - if (pCfg == NULL) { - ASSERT(terrno != 0); - return terrno; - } - - if (tsdbCreateTable(pVnode->tsdb, pCfg) < 0) { - code = terrno; - ASSERT(code != 0); - } - - tsdbClearTableCfg(pCfg); - return code; -} - -static int32_t vnodeProcessDropTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) { - SMDDropTableMsg *pTable = pCont; - int32_t code = TSDB_CODE_SUCCESS; - - vDebug("vgId:%d, table:%s, start to drop", pVnode->vgId, pTable->tableFname); - STableId tableId = {.uid = htobe64(pTable->uid), .tid = htonl(pTable->tid)}; - - if (tsdbDropTable(pVnode->tsdb, tableId) < 0) code = terrno; - - return code; -} - -static int32_t vnodeProcessAlterTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) { - // TODO: disposed in tsdb - // STableCfg *pCfg = tsdbCreateTableCfgFromMsg((SMDCreateTableMsg *)pCont); - // if (pCfg == NULL) return terrno; - // if (tsdbCreateTable(pVnode->tsdb, pCfg) < 0) code = terrno; - - // tsdbClearTableCfg(pCfg); - vDebug("vgId:%d, alter table msg is received", pVnode->vgId); - return TSDB_CODE_SUCCESS; -} - -static int32_t vnodeProcessDropStableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) { - SDropSTableMsg *pTable = pCont; - int32_t code = TSDB_CODE_SUCCESS; - - vDebug("vgId:%d, stable:%s, start to drop", pVnode->vgId, pTable->tableFname); - - STableId stableId = {.uid = htobe64(pTable->uid), .tid = -1}; - - if (tsdbDropTable(pVnode->tsdb, stableId) < 0) code = terrno; - - vDebug("vgId:%d, stable:%s, drop stable result:%s", pVnode->vgId, pTable->tableFname, tstrerror(code)); - - return code; -} - -static int32_t vnodeProcessUpdateTagValMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) { - if (tsdbUpdateTableTagValue(pVnode->tsdb, (SUpdateTableTagValMsg *)pCont) < 0) { - return terrno; - } - return TSDB_CODE_SUCCESS; -} - -static SVWriteMsg *vnodeBuildVWriteMsg(SVnodeObj *pVnode, SWalHead *pHead, int32_t qtype, SRpcMsg *pRpcMsg) { - if (pHead->len > TSDB_MAX_WAL_SIZE) { - vError("vgId:%d, wal len:%d exceeds limit, hver:%" PRIu64, pVnode->vgId, pHead->len, pHead->version); - terrno = TSDB_CODE_WAL_SIZE_LIMIT; - return NULL; - } - - int32_t size = sizeof(SVWriteMsg) + pHead->len; - SVWriteMsg *pWrite = taosAllocateQitem(size); - if (pWrite == NULL) { - terrno = TSDB_CODE_VND_OUT_OF_MEMORY; - return NULL; - } - - if (pRpcMsg != NULL) { - pWrite->rpcMsg = *pRpcMsg; - } - - memcpy(&pWrite->walHead, pHead, sizeof(SWalHead) + pHead->len); - pWrite->pVnode = pVnode; - pWrite->qtype = qtype; - - atomic_add_fetch_32(&pVnode->refCount, 1); - - return pWrite; -} - -static int32_t vnodeWriteToWQueueImp(SVWriteMsg *pWrite) { - SVnodeObj *pVnode = pWrite->pVnode; - - if (pWrite->qtype == TAOS_QTYPE_RPC) { - int32_t code = vnodeCheckWrite(pVnode); - if (code != TSDB_CODE_SUCCESS) { - vError("vgId:%d, failed to write into vwqueue since %s", pVnode->vgId, tstrerror(code)); - taosFreeQitem(pWrite); - vnodeRelease(pVnode); - return code; - } - } - - if (tsAvailDataDirGB <= tsMinimalDataDirGB) { - vError("vgId:%d, failed to write into vwqueue since no diskspace, avail:%fGB", pVnode->vgId, tsAvailDataDirGB); - taosFreeQitem(pWrite); - vnodeRelease(pVnode); - return TSDB_CODE_VND_NO_DISKSPACE; - } - - if (!vnodeInReadyOrUpdatingStatus(pVnode)) { - vError("vgId:%d, failed to write into vwqueue, vstatus is %s, refCount:%d pVnode:%p", pVnode->vgId, - vnodeStatus[pVnode->status], pVnode->refCount, pVnode); - taosFreeQitem(pWrite); - vnodeRelease(pVnode); - return TSDB_CODE_APP_NOT_READY; - } - - int32_t queued = atomic_add_fetch_32(&pVnode->queuedWMsg, 1); - int64_t queuedSize = atomic_add_fetch_64(&pVnode->queuedWMsgSize, pWrite->walHead.len); - - if (queued > MAX_QUEUED_MSG_NUM || queuedSize > MAX_QUEUED_MSG_SIZE) { - int32_t ms = (queued / MAX_QUEUED_MSG_NUM) * 10 + 3; - if (ms > 100) ms = 100; - vDebug("vgId:%d, too many msg:%d in vwqueue, flow control %dms", pVnode->vgId, queued, ms); - taosMsleep(ms); - } - - vTrace("vgId:%d, write into vwqueue, refCount:%d queued:%d size:%" PRId64, pVnode->vgId, pVnode->refCount, - pVnode->queuedWMsg, pVnode->queuedWMsgSize); - - taosWriteQitem(pVnode->wqueue, pWrite->qtype, pWrite); - return TSDB_CODE_SUCCESS; -} - -int32_t vnodeWriteToWQueue(void *vparam, void *wparam, int32_t qtype, void *rparam) { - SVnodeObj *pVnode = vparam; - if (qtype == TAOS_QTYPE_RPC) { - if (!vnodeInReadyStatus(pVnode)) { - return TSDB_CODE_APP_NOT_READY; // it may be in deleting or closing state - } - - if (pVnode->role != TAOS_SYNC_ROLE_MASTER) { - return TSDB_CODE_APP_NOT_READY; - } - } - - SVWriteMsg *pWrite = vnodeBuildVWriteMsg(vparam, wparam, qtype, rparam); - if (pWrite == NULL) { - assert(terrno != 0); - return terrno; - } - - int32_t code = vnodePerformFlowCtrl(pWrite); - if (code != 0) return 0; - - return vnodeWriteToWQueueImp(pWrite); -} - -void vnodeFreeFromWQueue(void *vparam, SVWriteMsg *pWrite) { - SVnodeObj *pVnode = vparam; - if (pVnode) { - int32_t queued = atomic_sub_fetch_32(&pVnode->queuedWMsg, 1); - int64_t queuedSize = atomic_sub_fetch_64(&pVnode->queuedWMsgSize, pWrite->walHead.len); - - vTrace("vgId:%d, msg:%p, app:%p, free from vwqueue, queued:%d size:%" PRId64, pVnode->vgId, pWrite, - pWrite->rpcMsg.ahandle, queued, queuedSize); - } - - taosFreeQitem(pWrite); - vnodeRelease(pVnode); -} - -static void vnodeFlowCtrlMsgToWQueue(void *param, void *tmrId) { - SVWriteMsg *pWrite = param; - SVnodeObj * pVnode = pWrite->pVnode; - int32_t code = TSDB_CODE_VND_IS_SYNCING; - - if (pVnode->flowctrlLevel <= 0) code = TSDB_CODE_VND_IS_FLOWCTRL; - - pWrite->processedCount++; - if (pWrite->processedCount >= 100) { - vError("vgId:%d, msg:%p, failed to process since %s, retry:%d", pVnode->vgId, pWrite, tstrerror(code), - pWrite->processedCount); - void *handle = pWrite->rpcMsg.handle; - taosFreeQitem(pWrite); - vnodeRelease(pVnode); - SRpcMsg rpcRsp = {.handle = handle, .code = code}; - rpcSendResponse(&rpcRsp); - } else { - code = vnodePerformFlowCtrl(pWrite); - if (code == 0) { - vDebug("vgId:%d, msg:%p, write into vwqueue after flowctrl, retry:%d", pVnode->vgId, pWrite, - pWrite->processedCount); - pWrite->processedCount = 0; - void *handle = pWrite->rpcMsg.handle; - code = vnodeWriteToWQueueImp(pWrite); - if (code != TSDB_CODE_SUCCESS) { - SRpcMsg rpcRsp = {.handle = handle, .code = code}; - rpcSendResponse(&rpcRsp); - } - } - } -} - -static int32_t vnodePerformFlowCtrl(SVWriteMsg *pWrite) { - SVnodeObj *pVnode = pWrite->pVnode; - if (pWrite->qtype != TAOS_QTYPE_RPC) return 0; - if (pVnode->queuedWMsg < MAX_QUEUED_MSG_NUM && pVnode->queuedWMsgSize < MAX_QUEUED_MSG_SIZE && - pVnode->flowctrlLevel <= 0) - return 0; - - if (tsEnableFlowCtrl == 0) { - int32_t ms = (int32_t)pow(2, pVnode->flowctrlLevel + 2); - if (ms > 100) ms = 100; - vTrace("vgId:%d, msg:%p, app:%p, perform flowctrl for %d ms", pVnode->vgId, pWrite, pWrite->rpcMsg.ahandle, ms); - taosMsleep(ms); - return 0; - } else { - void *unUsedTimerId = NULL; - taosTmrReset(vnodeFlowCtrlMsgToWQueue, 100, pWrite, tsDnodeTmr, &unUsedTimerId); - - vTrace("vgId:%d, msg:%p, app:%p, perform flowctrl, retry:%d", pVnode->vgId, pWrite, pWrite->rpcMsg.ahandle, - pWrite->processedCount); - return TSDB_CODE_VND_ACTION_IN_PROGRESS; - } -} - -void vnodeWaitWriteCompleted(SVnodeObj *pVnode) { - int32_t extraSleep = 0; - while (pVnode->queuedWMsg > 0) { - vTrace("vgId:%d, queued wmsg num:%d", pVnode->vgId, pVnode->queuedWMsg); - taosMsleep(10); - extraSleep = 1; - } - - if (extraSleep) - taosMsleep(900); -} -- GitLab