提交 28341e31 编写于 作者: S Shengliang Guan

[TD-10432] add vnode files to vnode module

上级 a2e4026d
......@@ -990,6 +990,22 @@ typedef struct {
/* data */
} SAlterTableRsp;
typedef struct {
/* data */
} SDropStableReq;
typedef struct {
/* data */
} SDropStableRsp;
typedef struct {
/* data */
} SUpdateTagValReq;
typedef struct {
/* data */
} SUpdateTagValRsp;
#pragma pack(pop)
#ifdef __cplusplus
......
......@@ -57,6 +57,7 @@ extern int32_t tsCompressMsgSize;
extern int32_t tsCompressColData;
extern int32_t tsMaxNumOfDistinctResults;
extern char tsTempDir[];
extern int64_t tsMaxVnodeQueuedBytes;
//query buffer management
extern int32_t tsQueryBufferSize; // maximum allowed usage buffer size in MB for each data node during query processing
......
......@@ -60,6 +60,7 @@ float tsRatioOfQueryCores = 1.0f;
int8_t tsDaylight = 0;
int8_t tsEnableCoreFile = 0;
int32_t tsMaxBinaryDisplayWidth = 30;
int64_t tsMaxVnodeQueuedBytes = 1024*1024*1024; //1GB
/*
* denote if the server needs to compress response message at the application layer to client, including query rsp,
......
......@@ -15,4 +15,6 @@ target_link_libraries(
PUBLIC meta
PUBLIC tq
PUBLIC tsdb
PUBLIC wal
PUBLIC cjson
)
\ No newline at end of file
......@@ -16,7 +16,6 @@
#ifndef _TD_VNODE_INT_H_
#define _TD_VNODE_INT_H_
#include "os.h"
#include "amalloc.h"
#include "meta.h"
......@@ -25,20 +24,83 @@
#include "trpc.h"
#include "tsdb.h"
#include "vnode.h"
#include "tlog.h"
#include "tqueue.h"
#include "wal.h"
#include "tworker.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct SVnode {
extern int32_t vDebugFlag;
#define vFatal(...) { if (vDebugFlag & DEBUG_FATAL) { taosPrintLog("VND FATAL ", 255, __VA_ARGS__); }}
#define vError(...) { if (vDebugFlag & DEBUG_ERROR) { taosPrintLog("VND ERROR ", 255, __VA_ARGS__); }}
#define vWarn(...) { if (vDebugFlag & DEBUG_WARN) { taosPrintLog("VND WARN ", 255, __VA_ARGS__); }}
#define vInfo(...) { if (vDebugFlag & DEBUG_INFO) { taosPrintLog("VND ", 255, __VA_ARGS__); }}
#define vDebug(...) { if (vDebugFlag & DEBUG_DEBUG) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }}
#define vTrace(...) { if (vDebugFlag & DEBUG_TRACE) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }}
typedef struct {
SMeta * pMeta;
STsdb * pTsdb;
STQ * pTQ;
SMemAllocator *allocator;
int32_t vgId; // global vnode group ID
int32_t refCount; // reference count
int64_t queuedWMsgSize;
int32_t queuedWMsg;
int32_t queuedRMsg;
int32_t numOfExistQHandle; // current initialized and existed query handle in current dnode
int32_t flowctrlLevel;
int8_t preClose; // drop and close switch
int8_t reserved[3];
int64_t sequence; // for topic
int8_t status;
int8_t role;
int8_t accessState;
int8_t isFull;
int8_t isCommiting;
int8_t dbReplica;
int8_t dropped;
int8_t dbType;
uint64_t version; // current version
uint64_t cversion; // version while commit start
uint64_t fversion; // version on saved data file
void * wqueue; // write queue
void * qqueue; // read query queue
void * fqueue; // read fetch/cancel queue
void * wal;
void * tsdb;
int64_t sync;
void * events;
void * cq; // continuous query
int32_t dbCfgVersion;
int32_t vgCfgVersion;
STsdbCfg tsdbCfg;
#if 0
SSyncCfg syncCfg;
#endif
SWalCfg walCfg;
void * qMgmt;
char * rootDir;
tsem_t sem;
char db[TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN];
pthread_mutex_t statusMutex;
} SVnode;
typedef struct {
int32_t len;
void * rsp;
void * qhandle; // used by query and retrieve msg
} SVnRsp;
void vnodeGetDnodeEp(int32_t dnodeId, char *ep, char *fqdn, uint16_t *port);
#ifdef __cplusplus
}
#endif
#endif /*_TD_VNODE_INT_H_*/
\ No newline at end of file
#endif /*_TD_VNODE_INT_H_*/
......@@ -13,25 +13,35 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_MAIN_H
#define TDENGINE_VNODE_MAIN_H
#ifndef _TD_VNODE_MAIN_H_
#define _TD_VNODE_MAIN_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "vnodeInt.h"
int32_t vnodeInitMain();
void vnodeCleanupMain();
int32_t vnodeCreate(SCreateVnodeMsg *pVnodeCfg);
int32_t vnodeDrop(int32_t vgId);
int32_t vnodeOpen(int32_t vgId);
int32_t vnodeAlter(void *pVnode, SCreateVnodeMsg *pVnodeCfg);
int32_t vnodeAlter(SVnode *pVnode, SCreateVnodeMsg *pVnodeCfg);
int32_t vnodeSync(int32_t vgId);
int32_t vnodeClose(int32_t vgId);
void vnodeCleanUp(SVnodeObj *pVnode);
void vnodeDestroy(SVnodeObj *pVnode);
void vnodeCleanUp(SVnode *pVnode);
void vnodeDestroy(SVnode *pVnode);
int32_t vnodeCompact(int32_t vgId);
void vnodeBackup(int32_t vgId);
void vnodeGetStatus(struct SStatusMsg *status);
SVnode *vnodeAcquire(int32_t vgId);
SVnode *vnodeAcquireNotClose(int32_t vgId);
void vnodeRelease(SVnode *pVnode);
#ifdef __cplusplus
}
#endif
#endif
#endif /*_TD_VNODE_MAIN_H_*/
......@@ -13,19 +13,20 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_CFG_H
#define TDENGINE_VNODE_CFG_H
#ifndef _TD_VNODE_MGMT_H_
#define _TD_VNODE_MGMT_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "vnodeInt.h"
int32_t vnodeReadCfg(SVnodeObj *pVnode);
int32_t vnodeWriteCfg(SCreateVnodeMsg *pVnodeCfg);
int32_t vnodeInitMgmt();
void vnodeCleanupMgmt();
void vnodeProcessMgmtMsg(SRpcMsg *pMsg);
#ifdef __cplusplus
}
#endif
#endif
#endif /*_TD_VNODE_MGMT_H_*/
......@@ -13,24 +13,23 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_READ_H
#define TDENGINE_VNODE_READ_H
#ifndef _TD_VNODE_MGMT_MSG_H_
#define _TD_VNODE_MGMT_MSG_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "vnodeInt.h"
int32_t vnodeInitRead(void);
void vnodeCleanupRead(void);
int32_t vnodeWriteToRQueue(void *pVnode, void *pCont, int32_t contLen, int8_t qtype, void *rparam);
void vnodeFreeFromRQueue(void *pVnode, SVReadMsg *pRead);
int32_t vnodeProcessRead(void *pVnode, SVReadMsg *pRead);
void vnodeWaitReadCompleted(SVnodeObj *pVnode);
int32_t vnodeProcessCreateVnodeMsg(SRpcMsg *rpcMsg);
int32_t vnodeProcessAlterVnodeMsg(SRpcMsg *rpcMsg);
int32_t vnodeProcessSyncVnodeMsg(SRpcMsg *rpcMsg);
int32_t vnodeProcessCompactVnodeMsg(SRpcMsg *rpcMsg);
int32_t vnodeProcessDropVnodeMsg(SRpcMsg *rpcMsg);
int32_t vnodeProcessAlterStreamReq(SRpcMsg *pMsg);
#ifdef __cplusplus
}
#endif
#endif
#endif /*_TD_VNODE_MGMT_H_*/
......@@ -13,30 +13,30 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_MGMT_H
#define TDENGINE_VNODE_MGMT_H
#ifndef _TD_VNODE_READ_H_
#define _TD_VNODE_READ_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "vnodeInt.h"
int32_t vnodeInitMgmt();
void vnodeCleanupMgmt();
int32_t vnodeInitRead();
void vnodeCleanupRead();
taos_queue vnodeAllocQueryQueue(SVnode *pVnode);
taos_queue vnodeAllocFetchQueue(SVnode *pVnode);
void vnodeFreeQueryQueue(taos_queue pQueue);
void vnodeFreeFetchQueue(taos_queue pQueue);
void* vnodeAcquire(int32_t vgId);
void vnodeRelease(void *pVnode);
void* vnodeGetWal(void *pVnode);
void vnodeProcessReadMsg(SRpcMsg *pRpcMsg);
int32_t vnodeReputPutToRQueue(SVnode *pVnode, void **qhandle, void *ahandle);
int32_t vnodeGetVnodeList(int32_t vnodeList[], int32_t *numOfVnodes);
void vnodeBuildStatusMsg(void *pStatus);
void vnodeSetAccess(SVgroupAccess *pAccess, int32_t numOfVnodes);
void vnodeAddIntoHash(SVnodeObj* pVnode);
void vnodeRemoveFromHash(SVnodeObj * pVnode);
void vnodeStartRead(SVnode *pVnode);
void vnodeStopRead(SVnode *pVnode);
void vnodeWaitReadCompleted(SVnode *pVnode);
#ifdef __cplusplus
}
#endif
#endif
#endif /*_TD_VNODE_READ_H_*/
......@@ -13,24 +13,32 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_WRITE_H
#define TDENGINE_VNODE_WRITE_H
#ifndef _TD_VNODE_READ_MSG_H_
#define _TD_VNODE_READ_MSG_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "vnodeInt.h"
int32_t vnodeInitWrite(void);
void vnodeCleanupWrite(void);
typedef struct SReadMsg {
int32_t code;
int32_t contLen;
int8_t qtype;
int8_t msgType;
SVnode *pVnode;
SVnRsp rspRet;
void * rpcHandle;
void * rpcAhandle;
void * qhandle;
char pCont[];
} SReadMsg;
int32_t vnodeWriteToWQueue(void *pVnode, void *pHead, int32_t qtype, void *pRpcMsg);
void vnodeFreeFromWQueue(void *pVnode, SVWriteMsg *pWrite);
int32_t vnodeProcessWrite(void *pVnode, void *pHead, int32_t qtype, void *pRspRet);
void vnodeWaitWriteCompleted(SVnodeObj *pVnode);
int32_t vnodeProcessQueryMsg(SVnode *pVnode, SReadMsg *pRead);
int32_t vnodeProcessFetchMsg(SVnode *pVnode, SReadMsg *pRead);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#endif /*_TD_VNODE_READ_MSG_H_*/
......@@ -13,8 +13,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_STATUS_H
#define TDENGINE_VNODE_STATUS_H
#ifndef _TD_VNODE_STATUS_H_
#define _TD_VNODE_STATUS_H_
#ifdef __cplusplus
extern "C" {
......@@ -25,24 +25,23 @@ typedef enum _VN_STATUS {
TAOS_VN_STATUS_INIT = 0,
TAOS_VN_STATUS_READY = 1,
TAOS_VN_STATUS_CLOSING = 2,
TAOS_VN_STATUS_UPDATING = 3,
TAOS_VN_STATUS_RESET = 4,
TAOS_VN_STATUS_UPDATING = 3
} EVnodeStatus;
bool vnodeSetInitStatus(SVnodeObj* pVnode);
bool vnodeSetReadyStatus(SVnodeObj* pVnode);
bool vnodeSetClosingStatus(SVnodeObj* pVnode);
bool vnodeSetUpdatingStatus(SVnodeObj* pVnode);
bool vnodeSetResetStatus(SVnodeObj* pVnode);
// vnodeStatus
extern char* vnodeStatus[];
bool vnodeInInitStatus(SVnodeObj* pVnode);
bool vnodeInReadyStatus(SVnodeObj* pVnode);
bool vnodeInReadyOrUpdatingStatus(SVnodeObj* pVnode);
bool vnodeInClosingStatus(SVnodeObj* pVnode);
bool vnodeInResetStatus(SVnodeObj* pVnode);
bool vnodeSetInitStatus(SVnode* pVnode);
bool vnodeSetReadyStatus(SVnode* pVnode);
bool vnodeSetClosingStatus(SVnode* pVnode);
bool vnodeSetUpdatingStatus(SVnode* pVnode);
bool vnodeInInitStatus(SVnode* pVnode);
bool vnodeInReadyStatus(SVnode* pVnode);
bool vnodeInClosingStatus(SVnode* pVnode);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#endif /*_TD_VNODE_STATUS_H_*/
\ No newline at end of file
......@@ -13,19 +13,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_VERSION_H
#define TDENGINE_VNODE_VERSION_H
#ifndef _TD_VNODE_VERSION_H_
#define _TD_VNODE_VERSION_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "vnodeInt.h"
int32_t vnodeReadVersion(SVnodeObj *pVnode);
int32_t vnodeSaveVersion(SVnodeObj *pVnode);
int32_t vnodeReadVersion(SVnode *pVnode);
int32_t vnodeSaveVersion(SVnode *pVnode);
#ifdef __cplusplus
}
#endif
#endif
#endif /*_TD_VNODE_VERSION_H_*/
......@@ -13,21 +13,22 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_WORKER_H
#define TDENGINE_VNODE_WORKER_H
#ifndef _TD_VNODE_WORKER_H_
#define _TD_VNODE_WORKER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "vnodeInt.h"
int32_t vnodeInitMWorker();
void vnodeCleanupMWorker();
int32_t vnodeCleanupInMWorker(SVnodeObj *pVnode);
int32_t vnodeDestroyInMWorker(SVnodeObj *pVnode);
int32_t vnodeInitWorker();
void vnodeCleanupWorker();
void vnodeProcessCleanupTask(SVnode *pVnode);
void vnodeProcessDestroyTask(SVnode *pVnode);
void vnodeProcessBackupTask(SVnode *pVnode);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
#endif /*_TD_VNODE_WORKER_H_*/
\ No newline at end of file
......@@ -13,20 +13,28 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_BACKUP_H
#define TDENGINE_VNODE_BACKUP_H
#ifndef _TD_VNODE_WRITE_H_
#define _TD_VNODE_WRITE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "vnodeInt.h"
int32_t vnodeInitBackup();
void vnodeCleanupBackup();
int32_t vnodeBackup(int32_t vgId);
int32_t vnodeInitWrite();
void vnodeCleanupWrite();
taos_queue vnodeAllocWriteQueue(SVnode *pVnode);
void vnodeFreeWriteQueue(taos_queue pQueue);
void vnodeProcessWriteMsg(SRpcMsg *pRpcMsg);
int32_t vnodeProcessWalMsg(SVnode *pVnode, SWalHead *pHead);
void vnodeStartWrite(SVnode *pVnode);
void vnodeStopWrite(SVnode *pVnode);
void vnodeWaitWriteCompleted(SVnode *pVnode);
#ifdef __cplusplus
}
#endif
#endif
#endif /*_TD_VNODE_WRITE_H_*/
\ No newline at end of file
......@@ -13,9 +13,23 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_VNODE_WRITE_H_
#define _TD_VNODE_WRITE_H_
#ifndef _TD_VNODE_WRITE_MSG_H_
#define _TD_VNODE_WRITE_MSG_H_
int vnodeProcessSubmitReq(SVnode *pVnode, SSubmitReq *pReq, SSubmitRsp *pRsp);
#ifdef __cplusplus
extern "C" {
#endif
#include "vnodeInt.h"
int32_t vnodeProcessSubmitReq(SVnode *pVnode, SSubmitReq *pReq, SSubmitRsp *pRsp);
int32_t vnodeProcessCreateTableReq(SVnode *pVnode, SCreateTableReq *pReq, SCreateTableRsp *pRsp);
int32_t vnodeProcessDropTableReq(SVnode *pVnode, SDropTableReq *pReq, SDropTableRsp *pRsp);
int32_t vnodeProcessAlterTableReq(SVnode *pVnode, SAlterTableReq *pReq, SAlterTableRsp *pRsp);
int32_t vnodeProcessDropStableReq(SVnode *pVnode, SDropStableReq *pReq, SDropStableRsp *pRsp);
int32_t vnodeProcessUpdateTagValReq(SVnode *pVnode, SUpdateTagValReq *pReq, SUpdateTagValRsp *pRsp);
#endif /*_TD_VNODE_WRITE_H_*/
\ No newline at end of file
#ifdef __cplusplus
}
#endif
#endif /*_TD_VNODE_WRITE_MSG_H_*/
\ No newline at end of file
......@@ -17,10 +17,10 @@
#include "os.h"
#include "cJSON.h"
#include "tglobal.h"
#include "dnode.h"
#include "vnodeCfg.h"
static void vnodeLoadCfg(SVnodeObj *pVnode, SCreateVnodeMsg* vnodeMsg) {
static void vnodeLoadCfg(SVnode *pVnode, SCreateVnodeMsg *vnodeMsg) {
#if 0
tstrncpy(pVnode->db, vnodeMsg->db, sizeof(pVnode->db));
pVnode->dbCfgVersion = vnodeMsg->cfg.dbCfgVersion;
pVnode->vgCfgVersion = vnodeMsg->cfg.vgCfgVersion;
......@@ -56,9 +56,11 @@ static void vnodeLoadCfg(SVnodeObj *pVnode, SCreateVnodeMsg* vnodeMsg) {
SNodeInfo *node = &pVnode->syncCfg.nodeInfo[i];
vInfo("vgId:%d, dnode:%d, %s:%u", pVnode->vgId, node->nodeId, node->nodeFqdn, node->nodePort);
}
#endif
}
int32_t vnodeReadCfg(SVnodeObj *pVnode) {
int32_t vnodeReadCfg(SVnode *pVnode) {
#if 0
int32_t ret = TSDB_CODE_VND_APP_ERROR;
int32_t len = 0;
int maxLen = 1000;
......@@ -66,6 +68,7 @@ int32_t vnodeReadCfg(SVnodeObj *pVnode) {
cJSON * root = NULL;
FILE * fp = NULL;
bool nodeChanged = false;
SCreateVnodeMsg vnodeMsg;
char file[TSDB_FILENAME_LEN + 30] = {0};
......@@ -286,8 +289,13 @@ int32_t vnodeReadCfg(SVnodeObj *pVnode) {
}
tstrncpy(node->nodeEp, nodeEp->valuestring, TSDB_EP_LEN);
bool changed = dnodeCheckEpChanged(node->nodeId, node->nodeEp);
if (changed) nodeChanged = changed;
char nodeEpStr[TSDB_EP_LEN];
vnodeGetDnodeEp(node->nodeId, nodeEpStr, NULL, NULL);
bool changed = (strcmp(node->nodeEp, nodeEpStr) != 0);
if (changed) {
tstrncpy(node->nodeEp, nodeEpStr, TSDB_EP_LEN);
nodeChanged = changed;
}
}
ret = TSDB_CODE_SUCCESS;
......@@ -350,7 +358,7 @@ int32_t vnodeWriteCfg(SCreateVnodeMsg *pMsg) {
len += snprintf(content + len, maxLen - len, " \"nodeInfos\": [{\n");
for (int32_t i = 0; i < pMsg->cfg.vgReplica; i++) {
SVnodeDesc *node = &pMsg->nodes[i];
dnodeUpdateEp(node->nodeId, node->nodeEp, NULL, NULL);
vnodeGetDnodeEp(node->nodeId, node->nodeEp, NULL, NULL);
len += snprintf(content + len, maxLen - len, " \"nodeId\": %d,\n", node->nodeId);
len += snprintf(content + len, maxLen - len, " \"nodeEp\": \"%s\"\n", node->nodeEp);
if (i < pMsg->cfg.vgReplica - 1) {
......@@ -368,5 +376,6 @@ int32_t vnodeWriteCfg(SCreateVnodeMsg *pMsg) {
terrno = 0;
vInfo("vgId:%d, successed to write %s", pMsg->cfg.vgId, file);
#endif
return TSDB_CODE_SUCCESS;
}
......@@ -13,16 +13,39 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "vnodeInt.h"
#define _DEFAULT_SOURCE
#include "os.h"
#include "tstep.h"
#include "vnodeMain.h"
#include "vnodeMgmt.h"
#include "vnodeRead.h"
#include "vnodeWorker.h"
#include "vnodeWrite.h"
int32_t vnodeInit(SVnodePara para) { return 0; }
static struct {
struct SSteps *steps;
SVnodeFp fp;
} tsVint;
void vnodeCleanup() {}
int32_t vnodeInit(SVnodePara para) {
tsVint.fp = para.fp;
int32_t vnodeGetStatistics(SVnodeStat *stat) { return 0; }
struct SSteps *steps = taosStepInit(8, NULL);
if (steps == NULL) return -1;
void vnodeGetStatus(struct SStatusMsg *status) {}
taosStepAdd(steps, "vnode-main", vnodeInitMain, vnodeCleanupMain);
taosStepAdd(steps, "vnode-worker",vnodeInitWorker, vnodeCleanupWorker);
taosStepAdd(steps, "vnode-read", vnodeInitRead, vnodeCleanupRead);
taosStepAdd(steps, "vnode-mgmt", vnodeInitMgmt, vnodeCleanupMgmt);
taosStepAdd(steps, "vnode-write", vnodeInitWrite, vnodeCleanupWrite);
// taosStepAdd(steps, "vnode-queue", tsdbInitCommitQueue, tsdbDestroyCommitQueue);
void vnodeSetAccess(struct SVgroupAccess *access, int32_t numOfVnodes) {}
tsVint.steps = steps;
return taosStepExec(tsVint.steps);
}
void vnodeProcessMsg(SRpcMsg *msg) {}
void vnodeCleanup() { taosStepCleanup(tsVint.steps); }
void vnodeGetDnodeEp(int32_t dnodeId, char *ep, char *fqdn, uint16_t *port) {
return (*tsVint.fp.GetDnodeEp)(dnodeId, ep, fqdn, port);
}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "vnodeMain.h"
#include "vnodeMgmt.h"
#include "vnodeMgmtMsg.h"
typedef struct {
SRpcMsg rpcMsg;
char pCont[];
} SVnMgmtMsg;
static struct {
SWorkerPool pool;
taos_queue pQueue;
int32_t (*msgFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *);
} tsVmgmt = {0};
static int32_t vnodeProcessMgmtStart(void *unused, SVnMgmtMsg *pMgmt, int32_t qtype) {
SRpcMsg *pMsg = &pMgmt->rpcMsg;
int32_t msgType = pMsg->msgType;
if (tsVmgmt.msgFp[msgType]) {
vTrace("msg:%p, ahandle:%p type:%s will be processed", pMgmt, pMsg->ahandle, taosMsg[msgType]);
return (*tsVmgmt.msgFp[msgType])(pMsg);
} else {
vError("msg:%p, ahandle:%p type:%s not processed since no handle", pMgmt, pMsg->ahandle, taosMsg[msgType]);
return TSDB_CODE_DND_MSG_NOT_PROCESSED;
}
}
static void vnodeSendMgmtEnd(void *unused, SVnMgmtMsg *pMgmt, int32_t qtype, int32_t code) {
SRpcMsg *pMsg = &pMgmt->rpcMsg;
SRpcMsg rsp = {0};
rsp.code = code;
vTrace("msg:%p, is processed, code:0x%x", pMgmt, rsp.code);
if (rsp.code != TSDB_CODE_DND_ACTION_IN_PROGRESS) {
rsp.handle = pMsg->handle;
rsp.pCont = NULL;
rpcSendResponse(&rsp);
}
taosFreeQitem(pMsg);
}
static void vnodeInitMgmtReqFp() {
tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_CREATE_VNODE] = vnodeProcessCreateVnodeMsg;
tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_ALTER_VNODE] = vnodeProcessAlterVnodeMsg;
tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_SYNC_VNODE] = vnodeProcessSyncVnodeMsg;
tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_COMPACT_VNODE]= vnodeProcessCompactVnodeMsg;
tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_DROP_VNODE] = vnodeProcessDropVnodeMsg;
tsVmgmt.msgFp[TSDB_MSG_TYPE_MD_ALTER_STREAM] = vnodeProcessAlterStreamReq;
}
static int32_t vnodeWriteToMgmtQueue(SRpcMsg *pMsg) {
int32_t size = sizeof(SVnMgmtMsg) + pMsg->contLen;
SVnMgmtMsg *pMgmt = taosAllocateQitem(size);
if (pMgmt == NULL) return TSDB_CODE_DND_OUT_OF_MEMORY;
pMgmt->rpcMsg = *pMsg;
pMgmt->rpcMsg.pCont = pMgmt->pCont;
memcpy(pMgmt->pCont, pMsg->pCont, pMsg->contLen);
taosWriteQitem(tsVmgmt.pQueue, TAOS_QTYPE_RPC, pMgmt);
return TSDB_CODE_SUCCESS;
}
void vnodeProcessMgmtMsg(SRpcMsg *pMsg) {
int32_t code = vnodeWriteToMgmtQueue(pMsg);
if (code != TSDB_CODE_SUCCESS) {
SRpcMsg rsp = {.handle = pMsg->handle, .code = code};
rpcSendResponse(&rsp);
}
rpcFreeCont(pMsg->pCont);
}
int32_t vnodeInitMgmt() {
vnodeInitMgmtReqFp();
SWorkerPool *pPool = &tsVmgmt.pool;
pPool->name = "vmgmt";
pPool->startFp = (ProcessStartFp)vnodeProcessMgmtStart;
pPool->endFp = (ProcessEndFp)vnodeSendMgmtEnd;
pPool->min = 1;
pPool->max = 1;
if (tWorkerInit(pPool) != 0) {
return TSDB_CODE_VND_OUT_OF_MEMORY;
}
tsVmgmt.pQueue = tWorkerAllocQueue(pPool, NULL);
vInfo("vmgmt is initialized, max worker %d", pPool->max);
return TSDB_CODE_SUCCESS;
}
void vnodeCleanupMgmt() {
tWorkerFreeQueue(&tsVmgmt.pool, tsVmgmt.pQueue);
tWorkerCleanup(&tsVmgmt.pool);
tsVmgmt.pQueue = NULL;
vInfo("vmgmt is closed");
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "vnodeMain.h"
#include "vnodeMgmtMsg.h"
static SCreateVnodeMsg* vnodeParseVnodeMsg(SRpcMsg *rpcMsg) {
SCreateVnodeMsg *pCreate = rpcMsg->pCont;
pCreate->cfg.vgId = htonl(pCreate->cfg.vgId);
pCreate->cfg.dbCfgVersion = htonl(pCreate->cfg.dbCfgVersion);
pCreate->cfg.vgCfgVersion = htonl(pCreate->cfg.vgCfgVersion);
pCreate->cfg.maxTables = htonl(pCreate->cfg.maxTables);
pCreate->cfg.cacheBlockSize = htonl(pCreate->cfg.cacheBlockSize);
pCreate->cfg.totalBlocks = htonl(pCreate->cfg.totalBlocks);
pCreate->cfg.daysPerFile = htonl(pCreate->cfg.daysPerFile);
pCreate->cfg.daysToKeep1 = htonl(pCreate->cfg.daysToKeep1);
pCreate->cfg.daysToKeep2 = htonl(pCreate->cfg.daysToKeep2);
pCreate->cfg.daysToKeep = htonl(pCreate->cfg.daysToKeep);
pCreate->cfg.minRowsPerFileBlock = htonl(pCreate->cfg.minRowsPerFileBlock);
pCreate->cfg.maxRowsPerFileBlock = htonl(pCreate->cfg.maxRowsPerFileBlock);
pCreate->cfg.fsyncPeriod = htonl(pCreate->cfg.fsyncPeriod);
pCreate->cfg.commitTime = htonl(pCreate->cfg.commitTime);
for (int32_t j = 0; j < pCreate->cfg.vgReplica; ++j) {
pCreate->nodes[j].nodeId = htonl(pCreate->nodes[j].nodeId);
}
return pCreate;
}
int32_t vnodeProcessCreateVnodeMsg(SRpcMsg *rpcMsg) {
SCreateVnodeMsg *pCreate = vnodeParseVnodeMsg(rpcMsg);
SVnode *pVnode = vnodeAcquire(pCreate->cfg.vgId);
if (pVnode != NULL) {
vDebug("vgId:%d, already exist, return success", pCreate->cfg.vgId);
vnodeRelease(pVnode);
return TSDB_CODE_SUCCESS;
} else {
vDebug("vgId:%d, create vnode msg is received", pCreate->cfg.vgId);
return vnodeCreate(pCreate);
}
}
int32_t vnodeProcessAlterVnodeMsg(SRpcMsg *rpcMsg) {
SAlterVnodeMsg *pAlter = vnodeParseVnodeMsg(rpcMsg);
void *pVnode = vnodeAcquireNotClose(pAlter->cfg.vgId);
if (pVnode != NULL) {
vDebug("vgId:%d, alter vnode msg is received", pAlter->cfg.vgId);
int32_t code = vnodeAlter(pVnode, pAlter);
vnodeRelease(pVnode);
return code;
} else {
vInfo("vgId:%d, vnode not exist, can't alter it", pAlter->cfg.vgId);
return TSDB_CODE_VND_INVALID_VGROUP_ID;
}
}
int32_t vnodeProcessSyncVnodeMsg(SRpcMsg *rpcMsg) {
SSyncVnodeMsg *pSyncVnode = rpcMsg->pCont;
pSyncVnode->vgId = htonl(pSyncVnode->vgId);
return vnodeSync(pSyncVnode->vgId);
}
int32_t vnodeProcessCompactVnodeMsg(SRpcMsg *rpcMsg) {
SCompactVnodeMsg *pCompactVnode = rpcMsg->pCont;
pCompactVnode->vgId = htonl(pCompactVnode->vgId);
return vnodeCompact(pCompactVnode->vgId);
}
int32_t vnodeProcessDropVnodeMsg(SRpcMsg *rpcMsg) {
SDropVnodeMsg *pDrop = rpcMsg->pCont;
pDrop->vgId = htonl(pDrop->vgId);
return vnodeDrop(pDrop->vgId);
}
int32_t vnodeProcessAlterStreamReq(SRpcMsg *pMsg) { return 0; }
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "taosmsg.h"
#include "tglobal.h"
// #include "query.h"
#include "vnodeMain.h"
#include "vnodeRead.h"
#include "vnodeReadMsg.h"
#include "vnodeStatus.h"
static struct {
SWorkerPool query;
SWorkerPool fetch;
int32_t (*msgFp[TSDB_MSG_TYPE_MAX])(SVnode *, struct SReadMsg *);
} tsVread = {0};
void vnodeStartRead(SVnode *pVnode) {}
void vnodeStopRead(SVnode *pVnode) {}
void vnodeWaitReadCompleted(SVnode *pVnode) {
while (pVnode->queuedRMsg > 0) {
vTrace("vgId:%d, queued rmsg num:%d", pVnode->vgId, pVnode->queuedRMsg);
taosMsleep(10);
}
}
static int32_t vnodeWriteToRQueue(SVnode *pVnode, void *pCont, int32_t contLen, int8_t qtype, SRpcMsg *pRpcMsg) {
if (pVnode->dropped) {
return TSDB_CODE_APP_NOT_READY;
}
#if 0
if (!((pVnode->role == TAOS_SYNC_ROLE_MASTER) || (tsEnableSlaveQuery && pVnode->role == TAOS_SYNC_ROLE_SLAVE))) {
return TSDB_CODE_APP_NOT_READY;
}
#endif
if (!vnodeInReadyStatus(pVnode)) {
vDebug("vgId:%d, failed to write into vread queue, vnode status is %s", pVnode->vgId, vnodeStatus[pVnode->status]);
return TSDB_CODE_APP_NOT_READY;
}
int32_t size = sizeof(SReadMsg) + contLen;
SReadMsg *pRead = taosAllocateQitem(size);
if (pRead == NULL) {
return TSDB_CODE_VND_OUT_OF_MEMORY;
}
if (pRpcMsg != NULL) {
pRead->rpcHandle = pRpcMsg->handle;
pRead->rpcAhandle = pRpcMsg->ahandle;
pRead->msgType = pRpcMsg->msgType;
pRead->code = pRpcMsg->code;
}
if (contLen != 0) {
pRead->contLen = contLen;
memcpy(pRead->pCont, pCont, contLen);
} else {
pRead->qhandle = pCont;
}
pRead->qtype = qtype;
atomic_add_fetch_32(&pVnode->refCount, 1);
atomic_add_fetch_32(&pVnode->queuedRMsg, 1);
if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pRead->msgType == TSDB_MSG_TYPE_FETCH) {
return taosWriteQitem(pVnode->fqueue, qtype, pRead);
} else {
return taosWriteQitem(pVnode->qqueue, qtype, pRead);
}
}
static void vnodeFreeFromRQueue(SVnode *pVnode, SReadMsg *pRead) {
atomic_sub_fetch_32(&pVnode->queuedRMsg, 1);
taosFreeQitem(pRead);
vnodeRelease(pVnode);
}
int32_t vnodeReputPutToRQueue(SVnode *pVnode, void **qhandle, void *ahandle) {
SRpcMsg rpcMsg = {0};
rpcMsg.msgType = TSDB_MSG_TYPE_QUERY;
rpcMsg.ahandle = ahandle;
int32_t code = vnodeWriteToRQueue(pVnode, qhandle, 0, TAOS_QTYPE_QUERY, &rpcMsg);
if (code == TSDB_CODE_SUCCESS) {
vTrace("QInfo:%p add to vread queue for exec query", *qhandle);
}
return code;
}
void vnodeProcessReadMsg(SRpcMsg *pMsg) {
int32_t queuedMsgNum = 0;
int32_t leftLen = pMsg->contLen;
int32_t code = TSDB_CODE_VND_INVALID_VGROUP_ID;
char * pCont = pMsg->pCont;
while (leftLen > 0) {
SMsgHead *pHead = (SMsgHead *)pCont;
pHead->vgId = htonl(pHead->vgId);
pHead->contLen = htonl(pHead->contLen);
assert(pHead->contLen > 0);
SVnode *pVnode = vnodeAcquireNotClose(pHead->vgId);
if (pVnode != NULL) {
code = vnodeWriteToRQueue(pVnode, pCont, pHead->contLen, TAOS_QTYPE_RPC, pMsg);
if (code == TSDB_CODE_SUCCESS) queuedMsgNum++;
vnodeRelease(pVnode);
}
leftLen -= pHead->contLen;
pCont -= pHead->contLen;
}
if (queuedMsgNum == 0) {
SRpcMsg rpcRsp = {.handle = pMsg->handle, .code = code};
rpcSendResponse(&rpcRsp);
}
rpcFreeCont(pMsg->pCont);
}
static void vnodeInitReadMsgFp() {
tsVread.msgFp[TSDB_MSG_TYPE_QUERY] = vnodeProcessQueryMsg;
tsVread.msgFp[TSDB_MSG_TYPE_FETCH] = vnodeProcessFetchMsg;
}
static int32_t vnodeProcessReadStart(SVnode *pVnode, SReadMsg *pRead, int32_t qtype) {
int32_t msgType = pRead->msgType;
if (tsVread.msgFp[msgType] == NULL) {
vDebug("vgId:%d, msgType:%s not processed, no handle", pVnode->vgId, taosMsg[msgType]);
return TSDB_CODE_VND_MSG_NOT_PROCESSED;
} else {
vTrace("msg:%p, app:%p type:%s will be processed", pRead, pRead->rpcAhandle, taosMsg[msgType]);
}
return (*tsVread.msgFp[msgType])(pVnode, pRead);
}
static void vnodeSendReadRsp(SReadMsg *pRead, int32_t code) {
SRpcMsg rpcRsp = {
.handle = pRead->rpcHandle,
.pCont = pRead->rspRet.rsp,
.contLen = pRead->rspRet.len,
.code = code,
};
rpcSendResponse(&rpcRsp);
}
static void vnodeProcessReadEnd(SVnode *pVnode, SReadMsg *pRead, int32_t qtype, int32_t code) {
if (qtype == TAOS_QTYPE_RPC && code != TSDB_CODE_QRY_NOT_READY) {
vnodeSendReadRsp(pRead, code);
} else {
if (code == TSDB_CODE_QRY_HAS_RSP) {
vnodeSendReadRsp(pRead, pRead->code);
} else { // code == TSDB_CODE_QRY_NOT_READY, do not return msg to client
assert(pRead->rpcHandle == NULL || (pRead->rpcHandle != NULL && pRead->msgType == 5));
}
}
vnodeFreeFromRQueue(pVnode, pRead);
}
int32_t vnodeInitRead() {
vnodeInitReadMsgFp();
int32_t maxFetchThreads = 4;
float threadsForQuery = MAX(tsNumOfCores * tsRatioOfQueryCores, 1);
SWorkerPool *pPool = &tsVread.query;
pPool->name = "vquery";
pPool->startFp = (ProcessStartFp)vnodeProcessReadStart;
pPool->endFp = (ProcessEndFp)vnodeProcessReadEnd;
pPool->min = (int32_t)threadsForQuery;
pPool->max = pPool->min;
if (tWorkerInit(pPool) != 0) return -1;
pPool = &tsVread.fetch;
pPool->name = "vfetch";
pPool->startFp = (ProcessStartFp)vnodeProcessReadStart;
pPool->endFp = (ProcessEndFp)vnodeProcessReadEnd;
pPool->min = MIN(maxFetchThreads, tsNumOfCores);
pPool->max = pPool->min;
if (tWorkerInit(pPool) != 0) return -1;
vInfo("vread is initialized, max worker %d", pPool->max);
return 0;
}
void vnodeCleanupRead() {
tWorkerCleanup(&tsVread.fetch);
tWorkerCleanup(&tsVread.query);
vInfo("vread is closed");
}
taos_queue vnodeAllocQueryQueue(SVnode *pVnode) { return tWorkerAllocQueue(&tsVread.query, pVnode); }
taos_queue vnodeAllocFetchQueue(SVnode *pVnode) { return tWorkerAllocQueue(&tsVread.fetch, pVnode); }
void vnodeFreeQueryQueue(taos_queue pQueue) { tWorkerFreeQueue(&tsVread.query, pQueue); }
void vnodeFreeFetchQueue(taos_queue pQueue) { tWorkerFreeQueue(&tsVread.fetch, pQueue); }
......@@ -16,155 +16,26 @@
#define _DEFAULT_SOURCE
#include "os.h"
#include "taosmsg.h"
#include "tqueue.h"
#include "tglobal.h"
#include "query.h"
// #include "query.h"
#include "vnodeStatus.h"
#include "vnodeRead.h"
#include "vnodeReadMsg.h"
int32_t vNumOfExistedQHandle; // current initialized and existed query handle in current dnode
static int32_t (*vnodeProcessReadMsgFp[TSDB_MSG_TYPE_MAX])(SVnodeObj *pVnode, SVReadMsg *pRead);
static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead);
static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead);
static int32_t vnodeNotifyCurrentQhandle(void* handle, uint64_t qId, void* qhandle, int32_t vgId);
int32_t vnodeInitRead(void) {
vnodeProcessReadMsgFp[TSDB_MSG_TYPE_QUERY] = vnodeProcessQueryMsg;
vnodeProcessReadMsgFp[TSDB_MSG_TYPE_FETCH] = vnodeProcessFetchMsg;
return 0;
}
void vnodeCleanupRead() {}
//
// After the fetch request enters the vnode queue, if the vnode cannot provide services, the process function are
// still required, or there will be a deadlock, so we don’t do any check here, but put the check codes before the
// request enters the queue
//
int32_t vnodeProcessRead(void *vparam, SVReadMsg *pRead) {
SVnodeObj *pVnode = vparam;
int32_t msgType = pRead->msgType;
if (vnodeProcessReadMsgFp[msgType] == NULL) {
vDebug("vgId:%d, msgType:%s not processed, no handle", pVnode->vgId, taosMsg[msgType]);
return TSDB_CODE_VND_MSG_NOT_PROCESSED;
}
return (*vnodeProcessReadMsgFp[msgType])(pVnode, pRead);
}
static int32_t vnodeCheckRead(SVnodeObj *pVnode) {
if (!vnodeInReadyStatus(pVnode)) {
vDebug("vgId:%d, vnode status is %s, refCount:%d pVnode:%p", pVnode->vgId, vnodeStatus[pVnode->status],
pVnode->refCount, pVnode);
return TSDB_CODE_APP_NOT_READY;
}
// tsdb may be in reset state
if (pVnode->tsdb == NULL) {
vDebug("vgId:%d, tsdb is null, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode);
return TSDB_CODE_APP_NOT_READY;
}
if (pVnode->role == TAOS_SYNC_ROLE_MASTER) {
return TSDB_CODE_SUCCESS;
}
if (tsEnableSlaveQuery && pVnode->role == TAOS_SYNC_ROLE_SLAVE) {
return TSDB_CODE_SUCCESS;
}
vDebug("vgId:%d, replica:%d role:%s, refCount:%d pVnode:%p, cant provide query service", pVnode->vgId, pVnode->syncCfg.replica,
syncRole[pVnode->role], pVnode->refCount, pVnode);
return TSDB_CODE_APP_NOT_READY;
}
void vnodeFreeFromRQueue(void *vparam, SVReadMsg *pRead) {
SVnodeObj *pVnode = vparam;
atomic_sub_fetch_32(&pVnode->queuedRMsg, 1);
vTrace("vgId:%d, free from vrqueue, refCount:%d queued:%d", pVnode->vgId, pVnode->refCount, pVnode->queuedRMsg);
taosFreeQitem(pRead);
vnodeRelease(pVnode);
}
static SVReadMsg *vnodeBuildVReadMsg(SVnodeObj *pVnode, void *pCont, int32_t contLen, int8_t qtype, SRpcMsg *pRpcMsg) {
int32_t size = sizeof(SVReadMsg) + contLen;
SVReadMsg *pRead = taosAllocateQitem(size);
if (pRead == NULL) {
terrno = TSDB_CODE_VND_OUT_OF_MEMORY;
return NULL;
}
if (pRpcMsg != NULL) {
pRead->rpcHandle = pRpcMsg->handle;
pRead->rpcAhandle = pRpcMsg->ahandle;
pRead->msgType = pRpcMsg->msgType;
pRead->code = pRpcMsg->code;
}
if (contLen != 0) {
pRead->contLen = contLen;
memcpy(pRead->pCont, pCont, contLen);
} else {
pRead->qhandle = pCont;
}
pRead->qtype = qtype;
atomic_add_fetch_32(&pVnode->refCount, 1);
return pRead;
}
int32_t vnodeWriteToRQueue(void *vparam, void *pCont, int32_t contLen, int8_t qtype, void *rparam) {
SVnodeObj *pVnode = vparam;
if (pVnode->dropped) {
return TSDB_CODE_APP_NOT_READY;
}
SVReadMsg *pRead = vnodeBuildVReadMsg(vparam, pCont, contLen, qtype, rparam);
if (pRead == NULL) {
assert(terrno != 0);
return terrno;
}
int32_t code = vnodeCheckRead(pVnode);
if (code != TSDB_CODE_SUCCESS) {
taosFreeQitem(pRead);
vnodeRelease(pVnode);
return code;
}
atomic_add_fetch_32(&pVnode->queuedRMsg, 1);
if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pRead->msgType == TSDB_MSG_TYPE_FETCH) {
vTrace("vgId:%d, write into vfetch queue, refCount:%d queued:%d", pVnode->vgId, pVnode->refCount,
pVnode->queuedRMsg);
return taosWriteQitem(pVnode->fqueue, qtype, pRead);
} else {
vTrace("vgId:%d, write into vquery queue, refCount:%d queued:%d", pVnode->vgId, pVnode->refCount,
pVnode->queuedRMsg);
return taosWriteQitem(pVnode->qqueue, qtype, pRead);
}
}
static int32_t vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void **qhandle, void *ahandle) {
SRpcMsg rpcMsg = {0};
rpcMsg.msgType = TSDB_MSG_TYPE_QUERY;
rpcMsg.ahandle = ahandle;
int32_t code = vnodeWriteToRQueue(pVnode, qhandle, 0, TAOS_QTYPE_QUERY, &rpcMsg);
if (code == TSDB_CODE_SUCCESS) {
vTrace("QInfo:%p add to vread queue for exec query", *qhandle);
}
#if 0
// notify connection(handle) that current qhandle is created, if current connection from
// client is broken, the query needs to be killed immediately.
static int32_t vnodeNotifyCurrentQhandle(void *handle, uint64_t qId, void *qhandle, int32_t vgId) {
SRetrieveTableMsg *pMsg = rpcMallocCont(sizeof(SRetrieveTableMsg));
pMsg->qId = htobe64(qId);
pMsg->header.vgId = htonl(vgId);
pMsg->header.contLen = htonl(sizeof(SRetrieveTableMsg));
return code;
vTrace("QInfo:0x%" PRIx64 "-%p register qhandle to connect:%p", qId, qhandle, handle);
return rpcReportProgress(handle, (char *)pMsg, sizeof(SRetrieveTableMsg));
}
/**
*
* @param pRet response message object
* @param pVnode the vnode object
* @param handle qhandle for executing query
......@@ -172,14 +43,16 @@ static int32_t vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void **qhandle, void
* @param ahandle sqlObj address at client side
* @return
*/
static int32_t vnodeDumpQueryResult(SRspRet *pRet, void *pVnode, uint64_t qId, void **handle, bool *freeHandle, void *ahandle) {
static int32_t vnodeDumpQueryResult(SVnRsp *pRet, void *pVnode, uint64_t qId, void **handle, bool *freeHandle,
void *ahandle) {
bool continueExec = false;
int32_t code = TSDB_CODE_SUCCESS;
if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) {
if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) ==
TSDB_CODE_SUCCESS) {
if (continueExec) {
*freeHandle = false;
code = vnodePutItemIntoReadQueue(pVnode, handle, ahandle);
code = vnodeReputPutToRQueue(pVnode, handle, ahandle);
if (code != TSDB_CODE_SUCCESS) {
*freeHandle = true;
return code;
......@@ -188,7 +61,7 @@ static int32_t vnodeDumpQueryResult(SRspRet *pRet, void *pVnode, uint64_t qId, v
}
} else {
*freeHandle = true;
vTrace("QInfo:0x%"PRIx64"-%p exec completed, free handle:%d", qId, *handle, *freeHandle);
vTrace("QInfo:0x%" PRIx64 "-%p exec completed, free handle:%d", qId, *handle, *freeHandle);
}
} else {
SRetrieveTableRsp *pRsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp));
......@@ -203,7 +76,7 @@ static int32_t vnodeDumpQueryResult(SRspRet *pRet, void *pVnode, uint64_t qId, v
return code;
}
static void vnodeBuildNoResultQueryRsp(SRspRet *pRet) {
static void vnodeBuildNoResultQueryRsp(SVnRsp *pRet) {
pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp));
pRet->len = sizeof(SRetrieveTableRsp);
......@@ -212,15 +85,16 @@ static void vnodeBuildNoResultQueryRsp(SRspRet *pRet) {
pRsp->completed = true;
}
#endif
static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
int32_t vnodeProcessQueryMsg(SVnode *pVnode, SReadMsg *pRead) {
#if 0
void * pCont = pRead->pCont;
int32_t contLen = pRead->contLen;
SRspRet *pRet = &pRead->rspRet;
SVnRsp *pRet = &pRead->rspRet;
SQueryTableMsg *pQueryTableMsg = (SQueryTableMsg *)pCont;
memset(pRet, 0, sizeof(SRspRet));
memset(pRet, 0, sizeof(SVnRsp));
// qHandle needs to be freed correctly
if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) {
......@@ -231,13 +105,13 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
void ** handle = NULL;
if (contLen != 0) {
qinfo_t pQInfo = NULL;
qinfo_t pQInfo = NULL;
uint64_t qId = genQueryId();
code = qCreateQueryInfo(pVnode->tsdb, pVnode->vgId, pQueryTableMsg, &pQInfo, qId);
SQueryTableRsp *pRsp = (SQueryTableRsp *)rpcMallocCont(sizeof(SQueryTableRsp));
pRsp->code = code;
pRsp->qId = 0;
pRsp->qId = 0;
pRet->len = sizeof(SQueryTableRsp);
pRet->rsp = pRsp;
......@@ -250,8 +124,8 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
pRsp->code = terrno;
terrno = 0;
vError("vgId:%d, QInfo:0x%"PRIx64 "-%p register qhandle failed, return to app, code:%s,", pVnode->vgId, qId, (void *)pQInfo,
tstrerror(pRsp->code));
vError("vgId:%d, QInfo:0x%" PRIx64 "-%p register qhandle failed, return to app, code:%s,", pVnode->vgId, qId,
(void *)pQInfo, tstrerror(pRsp->code));
qDestroyQueryInfo(pQInfo); // destroy it directly
return pRsp->code;
} else {
......@@ -261,7 +135,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
if (handle != NULL &&
vnodeNotifyCurrentQhandle(pRead->rpcHandle, qId, *handle, pVnode->vgId) != TSDB_CODE_SUCCESS) {
vError("vgId:%d, QInfo:0x%"PRIx64 "-%p, query discarded since link is broken, %p", pVnode->vgId, qId, *handle,
vError("vgId:%d, QInfo:0x%" PRIx64 "-%p, query discarded since link is broken, %p", pVnode->vgId, qId, *handle,
pRead->rpcHandle);
pRsp->code = TSDB_CODE_RPC_NETWORK_UNAVAIL;
......@@ -274,8 +148,9 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
}
if (handle != NULL) {
vTrace("vgId:%d, QInfo:0x%"PRIx64 "-%p, dnode query msg disposed, create qhandle and returns to app", vgId, qId, *handle);
code = vnodePutItemIntoReadQueue(pVnode, handle, pRead->rpcHandle);
vTrace("vgId:%d, QInfo:0x%" PRIx64 "-%p, query msg disposed, create qhandle and returns to app", vgId, qId,
*handle);
code = vnodeReputPutToRQueue(pVnode, handle, pRead->rpcHandle);
if (code != TSDB_CODE_SUCCESS) {
pRsp->code = code;
qReleaseQInfo(pVnode->qMgmt, (void **)&handle, true);
......@@ -283,14 +158,14 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
}
}
int32_t remain = atomic_add_fetch_32(&vNumOfExistedQHandle, 1);
int32_t remain = atomic_add_fetch_32(&pVnode->numOfExistQHandle, 1);
vTrace("vgId:%d, new qhandle created, total qhandle:%d", pVnode->vgId, remain);
} else {
assert(pCont != NULL);
void **qhandle = (void **)pRead->qhandle;
void ** qhandle = (void **)pRead->qhandle;
uint64_t qId = 0;
vTrace("vgId:%d, QInfo:%p, dnode continues to exec query", pVnode->vgId, *qhandle);
vTrace("vgId:%d, QInfo:%p, continues to exec query", pVnode->vgId, *qhandle);
// In the retrieve blocking model, only 50% CPU will be used in query processing
if (tsRetrieveBlockingModel) {
......@@ -315,10 +190,11 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
// NOTE: set return code to be TSDB_CODE_QRY_HAS_RSP to notify dnode to return msg to client
code = TSDB_CODE_QRY_HAS_RSP;
} else {
//void *h1 = qGetResultRetrieveMsg(*qhandle);
// void *h1 = qGetResultRetrieveMsg(*qhandle);
/* remove this assert, one possible case that will cause h1 not NULL: query thread unlock pQInfo->lock, and then FETCH thread execute twice before query thread reach here */
//assert(h1 == NULL);
/* remove this assert, one possible case that will cause h1 not NULL: query thread unlock pQInfo->lock, and then
* FETCH thread execute twice before query thread reach here */
// assert(h1 == NULL);
freehandle = qQueryCompleted(*qhandle);
}
......@@ -327,22 +203,24 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
// If the building of result is not required, simply free it. Otherwise, mandatorily free the qhandle
if (freehandle || (!buildRes)) {
if (freehandle) {
int32_t remain = atomic_sub_fetch_32(&vNumOfExistedQHandle, 1);
int32_t remain = atomic_sub_fetch_32(&pVnode->numOfExistQHandle, 1);
vTrace("vgId:%d, QInfo:%p, start to free qhandle, remain qhandle:%d", pVnode->vgId, *qhandle, remain);
}
qReleaseQInfo(pVnode->qMgmt, (void **)&qhandle, freehandle);
}
}
}
return code;
#endif
return 0;
}
static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
void *pCont = pRead->pCont;
SRspRet *pRet = &pRead->rspRet;
int32_t vnodeProcessFetchMsg(SVnode *pVnode, SReadMsg *pRead) {
#if 0
void * pCont = pRead->pCont;
SVnRsp *pRet = &pRead->rspRet;
SRetrieveTableMsg *pRetrieve = pCont;
pRetrieve->free = htons(pRetrieve->free);
......@@ -351,7 +229,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
vTrace("vgId:%d, qId:0x%" PRIx64 ", retrieve msg is disposed, free:%d, conn:%p", pVnode->vgId, pRetrieve->qId,
pRetrieve->free, pRead->rpcHandle);
memset(pRet, 0, sizeof(SRspRet));
memset(pRet, 0, sizeof(SVnRsp));
terrno = TSDB_CODE_SUCCESS;
int32_t code = TSDB_CODE_SUCCESS;
......@@ -364,16 +242,17 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
}
if (code != TSDB_CODE_SUCCESS) {
vError("vgId:%d, invalid qId in retrieving result, code:%s, QInfo:%" PRIu64, pVnode->vgId, tstrerror(code), pRetrieve->qId);
vError("vgId:%d, invalid qId in retrieving result, code:%s, QInfo:%" PRIu64, pVnode->vgId, tstrerror(code),
pRetrieve->qId);
vnodeBuildNoResultQueryRsp(pRet);
return code;
}
// kill current query and free corresponding resources.
if (pRetrieve->free == 1) {
int32_t remain = atomic_sub_fetch_32(&vNumOfExistedQHandle, 1);
vWarn("vgId:%d, QInfo:%"PRIx64 "-%p, retrieve msg received to kill query and free qhandle, remain qhandle:%d", pVnode->vgId, pRetrieve->qId,
*handle, remain);
int32_t remain = atomic_sub_fetch_32(&pVnode->numOfExistQHandle, 1);
vWarn("vgId:%d, QInfo:%" PRIx64 "-%p, retrieve msg received to kill query and free qhandle, remain qhandle:%d",
pVnode->vgId, pRetrieve->qId, *handle, remain);
qKillQuery(*handle);
qReleaseQInfo(pVnode->qMgmt, (void **)&handle, true);
......@@ -385,9 +264,9 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
// register the qhandle to connect to quit query immediate if connection is broken
if (vnodeNotifyCurrentQhandle(pRead->rpcHandle, pRetrieve->qId, *handle, pVnode->vgId) != TSDB_CODE_SUCCESS) {
int32_t remain = atomic_sub_fetch_32(&vNumOfExistedQHandle, 1);
vError("vgId:%d, QInfo:%"PRIu64 "-%p, retrieve discarded since link is broken, conn:%p, remain qhandle:%d", pVnode->vgId, pRetrieve->qhandle,
*handle, pRead->rpcHandle, remain);
int32_t remain = atomic_sub_fetch_32(&pVnode->numOfExistQHandle, 1);
vError("vgId:%d, QInfo:%" PRIu64 "-%p, retrieve discarded since link is broken, conn:%p, remain qhandle:%d",
pVnode->vgId, pRetrieve->qhandle, *handle, pRead->rpcHandle, remain);
code = TSDB_CODE_RPC_NETWORK_UNAVAIL;
qKillQuery(*handle);
......@@ -422,29 +301,13 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SVReadMsg *pRead) {
// If qhandle is not added into vread queue, the query should be completed already or paused with error.
// Here free qhandle immediately
if (freeHandle) {
int32_t remain = atomic_sub_fetch_32(&vNumOfExistedQHandle, 1);
int32_t remain = atomic_sub_fetch_32(&pVnode->numOfExistQHandle, 1);
vTrace("vgId:%d, QInfo:%p, start to free qhandle, remain qhandle:%d", pVnode->vgId, *handle, remain);
qReleaseQInfo(pVnode->qMgmt, (void **)&handle, true);
}
return code;
#endif
return 0;
}
// notify connection(handle) that current qhandle is created, if current connection from
// client is broken, the query needs to be killed immediately.
int32_t vnodeNotifyCurrentQhandle(void *handle, uint64_t qId, void *qhandle, int32_t vgId) {
SRetrieveTableMsg *pMsg = rpcMallocCont(sizeof(SRetrieveTableMsg));
pMsg->qId = htobe64(qId);
pMsg->header.vgId = htonl(vgId);
pMsg->header.contLen = htonl(sizeof(SRetrieveTableMsg));
vTrace("QInfo:0x%"PRIx64"-%p register qhandle to connect:%p", qId, qhandle, handle);
return rpcReportProgress(handle, (char *)pMsg, sizeof(SRetrieveTableMsg));
}
void vnodeWaitReadCompleted(SVnodeObj *pVnode) {
while (pVnode->queuedRMsg > 0) {
vTrace("vgId:%d, queued rmsg num:%d", pVnode->vgId, pVnode->queuedRMsg);
taosMsleep(10);
}
}
......@@ -16,9 +16,9 @@
#define _DEFAULT_SOURCE
#include "os.h"
#include "taosmsg.h"
#include "query.h"
#include "vnodeStatus.h"
// #include "query.h"
#include "vnodeRead.h"
#include "vnodeStatus.h"
#include "vnodeWrite.h"
char* vnodeStatus[] = {
......@@ -29,30 +29,32 @@ char* vnodeStatus[] = {
"reset"
};
bool vnodeSetInitStatus(SVnodeObj* pVnode) {
bool vnodeSetInitStatus(SVnode* pVnode) {
pthread_mutex_lock(&pVnode->statusMutex);
pVnode->status = TAOS_VN_STATUS_INIT;
pthread_mutex_unlock(&pVnode->statusMutex);
return true;
}
bool vnodeSetReadyStatus(SVnodeObj* pVnode) {
bool vnodeSetReadyStatus(SVnode* pVnode) {
bool set = false;
pthread_mutex_lock(&pVnode->statusMutex);
if (pVnode->status == TAOS_VN_STATUS_INIT || pVnode->status == TAOS_VN_STATUS_READY ||
pVnode->status == TAOS_VN_STATUS_UPDATING || pVnode->status == TAOS_VN_STATUS_RESET) {
pVnode->status == TAOS_VN_STATUS_UPDATING) {
pVnode->status = TAOS_VN_STATUS_READY;
set = true;
}
#if 0
qQueryMgmtReOpen(pVnode->qMgmt);
#endif
pthread_mutex_unlock(&pVnode->statusMutex);
return set;
}
static bool vnodeSetClosingStatusImp(SVnodeObj* pVnode) {
static bool vnodeSetClosingStatusImp(SVnode* pVnode) {
bool set = false;
pthread_mutex_lock(&pVnode->statusMutex);
......@@ -65,7 +67,7 @@ static bool vnodeSetClosingStatusImp(SVnodeObj* pVnode) {
return set;
}
bool vnodeSetClosingStatus(SVnodeObj* pVnode) {
bool vnodeSetClosingStatus(SVnode* pVnode) {
if (pVnode->status == TAOS_VN_STATUS_CLOSING)
return true;
......@@ -73,15 +75,17 @@ bool vnodeSetClosingStatus(SVnodeObj* pVnode) {
taosMsleep(1);
}
#if 0
// release local resources only after cutting off outside connections
qQueryMgmtNotifyClosed(pVnode->qMgmt);
#endif
vnodeWaitReadCompleted(pVnode);
vnodeWaitWriteCompleted(pVnode);
return true;
}
bool vnodeSetUpdatingStatus(SVnodeObj* pVnode) {
bool vnodeSetUpdatingStatus(SVnode* pVnode) {
bool set = false;
pthread_mutex_lock(&pVnode->statusMutex);
......@@ -94,35 +98,7 @@ bool vnodeSetUpdatingStatus(SVnodeObj* pVnode) {
return set;
}
static bool vnodeSetResetStatusImp(SVnodeObj* pVnode) {
bool set = false;
pthread_mutex_lock(&pVnode->statusMutex);
if (pVnode->status == TAOS_VN_STATUS_READY || pVnode->status == TAOS_VN_STATUS_INIT) {
pVnode->status = TAOS_VN_STATUS_RESET;
set = true;
}
pthread_mutex_unlock(&pVnode->statusMutex);
return set;
}
bool vnodeSetResetStatus(SVnodeObj* pVnode) {
while (!vnodeSetResetStatusImp(pVnode)) {
taosMsleep(1);
}
vInfo("vgId:%d, set to reset status", pVnode->vgId);
// release local resources only after cutting off outside connections
qQueryMgmtNotifyClosed(pVnode->qMgmt);
vnodeWaitReadCompleted(pVnode);
vnodeWaitWriteCompleted(pVnode);
return true;
}
bool vnodeInInitStatus(SVnodeObj* pVnode) {
bool vnodeInInitStatus(SVnode* pVnode) {
bool in = false;
pthread_mutex_lock(&pVnode->statusMutex);
......@@ -134,7 +110,7 @@ bool vnodeInInitStatus(SVnodeObj* pVnode) {
return in;
}
bool vnodeInReadyStatus(SVnodeObj* pVnode) {
bool vnodeInReadyStatus(SVnode* pVnode) {
bool in = false;
pthread_mutex_lock(&pVnode->statusMutex);
......@@ -146,19 +122,7 @@ bool vnodeInReadyStatus(SVnodeObj* pVnode) {
return in;
}
bool vnodeInReadyOrUpdatingStatus(SVnodeObj* pVnode) {
bool in = false;
pthread_mutex_lock(&pVnode->statusMutex);
if (pVnode->status == TAOS_VN_STATUS_READY || pVnode->status == TAOS_VN_STATUS_UPDATING) {
in = true;
}
pthread_mutex_unlock(&pVnode->statusMutex);
return in;
}
bool vnodeInClosingStatus(SVnodeObj* pVnode) {
bool vnodeInClosingStatus(SVnode* pVnode) {
bool in = false;
pthread_mutex_lock(&pVnode->statusMutex);
......@@ -170,14 +134,3 @@ bool vnodeInClosingStatus(SVnodeObj* pVnode) {
return in;
}
bool vnodeInResetStatus(SVnodeObj* pVnode) {
bool in = false;
pthread_mutex_lock(&pVnode->statusMutex);
if (pVnode->status == TAOS_VN_STATUS_RESET) {
in = true;
}
pthread_mutex_unlock(&pVnode->statusMutex);
return in;
}
......@@ -19,7 +19,7 @@
#include "tglobal.h"
#include "vnodeVersion.h"
int32_t vnodeReadVersion(SVnodeObj *pVnode) {
int32_t vnodeReadVersion(SVnode *pVnode) {
int32_t len = 0;
int32_t maxLen = 100;
char * content = calloc(1, maxLen + 1);
......@@ -71,7 +71,7 @@ PARSE_VER_ERROR:
return terrno;
}
int32_t vnodeSaveVersion(SVnodeObj *pVnode) {
int32_t vnodeSaveVersion(SVnode *pVnode) {
char file[TSDB_FILENAME_LEN + 30] = {0};
sprintf(file, "%s/vnode%d/version.json", tsVnodeDir, pVnode->vgId);
......@@ -90,7 +90,7 @@ int32_t vnodeSaveVersion(SVnodeObj *pVnode) {
len += snprintf(content + len, maxLen - len, "}\n");
fwrite(content, 1, len, fp);
taosFsync(fileno(fp));
taosFsyncFile(fileno(fp));
fclose(fp);
free(content);
terrno = 0;
......
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "vnodeMain.h"
#include "vnodeWorker.h"
enum { CLEANUP_TASK = 0, DESTROY_TASK = 1, BACKUP_TASK = 2 };
typedef struct {
int32_t vgId;
int32_t code;
int32_t type;
void * rpcHandle;
SVnode *pVnode;
} SVnTask;
static struct {
SWorkerPool pool;
taos_queue pQueue;
} tsVworker = {0};
static void vnodeProcessTaskStart(void *unused, SVnTask *pTask, int32_t qtype) {
pTask->code = 0;
switch (pTask->type) {
case CLEANUP_TASK:
vnodeCleanUp(pTask->pVnode);
break;
case DESTROY_TASK:
vnodeDestroy(pTask->pVnode);
break;
case BACKUP_TASK:
vnodeBackup(pTask->vgId);
break;
default:
break;
}
}
static void vnodeProcessTaskEnd(void *unused, SVnTask *pTask, int32_t qtype, int32_t code) {
if (pTask->rpcHandle != NULL) {
SRpcMsg rpcRsp = {.handle = pTask->rpcHandle, .code = pTask->code};
rpcSendResponse(&rpcRsp);
}
taosFreeQitem(pTask);
}
static int32_t vnodeWriteIntoTaskQueue(SVnode *pVnode, int32_t type, void *rpcHandle) {
SVnTask *pTask = taosAllocateQitem(sizeof(SVnTask));
if (pTask == NULL) return TSDB_CODE_VND_OUT_OF_MEMORY;
pTask->vgId = pVnode->vgId;
pTask->pVnode = pVnode;
pTask->rpcHandle = rpcHandle;
pTask->type = type;
return taosWriteQitem(tsVworker.pQueue, TAOS_QTYPE_RPC, pTask);
}
void vnodeProcessCleanupTask(SVnode *pVnode) {
vnodeWriteIntoTaskQueue(pVnode, CLEANUP_TASK, NULL);
}
void vnodeProcessDestroyTask(SVnode *pVnode) {
vnodeWriteIntoTaskQueue(pVnode, DESTROY_TASK, NULL);
}
void vnodeProcessBackupTask(SVnode *pVnode) {
vnodeWriteIntoTaskQueue(pVnode, BACKUP_TASK, NULL);
}
int32_t vnodeInitWorker() {
SWorkerPool *pPool = &tsVworker.pool;
pPool->name = "vworker";
pPool->startFp = (ProcessStartFp)vnodeProcessTaskStart;
pPool->endFp = (ProcessEndFp)vnodeProcessTaskEnd;
pPool->min = 0;
pPool->max = 1;
if (tWorkerInit(pPool) != 0) {
return TSDB_CODE_VND_OUT_OF_MEMORY;
}
tsVworker.pQueue = tWorkerAllocQueue(pPool, NULL);
vInfo("vworker is initialized, max worker %d", pPool->max);
return TSDB_CODE_SUCCESS;
}
void vnodeCleanupWorker() {
tWorkerFreeQueue(&tsVworker.pool, tsVworker.pQueue);
tWorkerCleanup(&tsVworker.pool);
tsVworker.pQueue = NULL;
vInfo("vworker is closed");
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "tglobal.h"
#include "tqueue.h"
#include "tworker.h"
#include "taosmsg.h"
#include "vnodeMain.h"
#include "vnodeStatus.h"
#include "vnodeWrite.h"
#include "vnodeWriteMsg.h"
typedef int32_t (*WriteMsgFp)(SVnode *, void *pCont, SVnRsp *);
typedef struct {
int32_t code;
int8_t qtype;
SVnode * pVnode;
SRpcMsg rpcMsg;
SVnRsp rspRet;
char reserveForSync[24];
SWalHead walHead;
} SVnWriteMsg;
static struct {
SWriteWorkerPool pool;
int64_t queuedBytes;
int32_t queuedMsgs;
} tsVwrite = {0};
void vnodeStartWrite(SVnode *pVnode) {}
void vnodeStoprite(SVnode *pVnode) {}
void vnodeWaitWriteCompleted(SVnode *pVnode) {
while (pVnode->queuedWMsg > 0) {
vTrace("vgId:%d, queued wmsg num:%d", pVnode->vgId, pVnode->queuedWMsg);
taosMsleep(10);
}
}
static int32_t vnodeWriteToWQueue(SVnode *pVnode, SWalHead *pHead, int32_t qtype, SRpcMsg *pRpcMsg) {
if (!(pVnode->accessState & TSDB_VN_WRITE_ACCCESS)) {
vWarn("vgId:%d, no write auth", pVnode->vgId);
return TSDB_CODE_VND_NO_WRITE_AUTH;
}
if (tsAvailDataDirGB <= tsMinimalDataDirGB) {
vWarn("vgId:%d, failed to write into vwqueue since no diskspace, avail:%fGB", pVnode->vgId, tsAvailDataDirGB);
return TSDB_CODE_VND_NO_DISKSPACE;
}
if (pHead->len > TSDB_MAX_WAL_SIZE) {
vError("vgId:%d, wal len:%d exceeds limit, hver:%" PRIu64, pVnode->vgId, pHead->len, pHead->version);
return TSDB_CODE_WAL_SIZE_LIMIT;
}
if (!vnodeInReadyStatus(pVnode)) {
vError("vgId:%d, failed to write into vwqueue, vstatus is %s", pVnode->vgId, vnodeStatus[pVnode->status]);
return TSDB_CODE_APP_NOT_READY;
}
if (tsVwrite.queuedBytes > tsMaxVnodeQueuedBytes) {
vDebug("vgId:%d, too many bytes:%" PRId64 " in vwqueue, flow control", pVnode->vgId, tsVwrite.queuedBytes);
return TSDB_CODE_VND_IS_FLOWCTRL;
}
int32_t size = sizeof(SVnWriteMsg) + pHead->len;
SVnWriteMsg *pWrite = taosAllocateQitem(size);
if (pWrite == NULL) {
return TSDB_CODE_VND_OUT_OF_MEMORY;
}
if (pRpcMsg != NULL) {
pWrite->rpcMsg = *pRpcMsg;
}
memcpy(&pWrite->walHead, pHead, sizeof(SWalHead) + pHead->len);
pWrite->pVnode = pVnode;
pWrite->qtype = qtype;
atomic_add_fetch_64(&tsVwrite.queuedBytes, size);
atomic_add_fetch_32(&tsVwrite.queuedMsgs, 1);
atomic_add_fetch_32(&pVnode->refCount, 1);
atomic_add_fetch_32(&pVnode->queuedWMsg, 1);
taosWriteQitem(pVnode->wqueue, pWrite->qtype, pWrite);
return TSDB_CODE_SUCCESS;
}
static void vnodeFreeFromWQueue(SVnode *pVnode, SVnWriteMsg *pWrite) {
int64_t size = sizeof(SVnWriteMsg) + pWrite->walHead.len;
atomic_sub_fetch_64(&tsVwrite.queuedBytes, size);
atomic_sub_fetch_32(&tsVwrite.queuedMsgs, 1);
atomic_sub_fetch_32(&pVnode->queuedWMsg, 1);
taosFreeQitem(pWrite);
vnodeRelease(pVnode);
}
int32_t vnodeProcessWalMsg(SVnode *pVnode, SWalHead *pHead) {
return vnodeWriteToWQueue(pVnode, pHead, TAOS_QTYPE_WAL, NULL);
}
void vnodeProcessWriteMsg(SRpcMsg *pRpcMsg) {
int32_t code;
SMsgHead *pMsg = pRpcMsg->pCont;
pMsg->vgId = htonl(pMsg->vgId);
pMsg->contLen = htonl(pMsg->contLen);
SVnode *pVnode = vnodeAcquireNotClose(pMsg->vgId);
if (pVnode == NULL) {
code = TSDB_CODE_VND_INVALID_VGROUP_ID;
} else {
SWalHead *pHead = (SWalHead *)((char *)pRpcMsg->pCont - sizeof(SWalHead));
pHead->msgType = pRpcMsg->msgType;
pHead->version = 0;
pHead->len = pMsg->contLen;
code = vnodeWriteToWQueue(pVnode, pHead, TAOS_QTYPE_RPC, pRpcMsg);
}
if (code != TSDB_CODE_SUCCESS) {
SRpcMsg rpcRsp = {.handle = pRpcMsg->handle, .code = code};
rpcSendResponse(&rpcRsp);
}
vnodeRelease(pVnode);
rpcFreeCont(pRpcMsg->pCont);
}
static bool vnodeProcessWriteStart(SVnode *pVnode, SVnWriteMsg *pWrite, int32_t qtype) {
SWalHead *pHead = &pWrite->walHead;
SVnRsp * pRet = &pWrite->rspRet;
int32_t msgType = pHead->msgType;
vTrace("vgId:%d, msg:%s will be processed, hver:%" PRIu64, pVnode->vgId, taosMsg[pHead->msgType], pHead->version);
// write into WAL
#if 0
pWrite->code = walWrite(pVnode->wal, pHead);
if (pWrite->code < 0) return false;
#endif
pVnode->version = pHead->version;
// write data locally
switch (msgType) {
case TSDB_MSG_TYPE_SUBMIT:
pRet->len = sizeof(SSubmitRsp);
pRet->rsp = rpcMallocCont(pRet->len);
pWrite->code = vnodeProcessSubmitReq(pVnode, (void*)pHead->cont, pRet->rsp);
break;
case TSDB_MSG_TYPE_MD_CREATE_TABLE:
pWrite->code = vnodeProcessCreateTableReq(pVnode, (void*)pHead->cont, NULL);
break;
case TSDB_MSG_TYPE_MD_DROP_TABLE:
pWrite->code = vnodeProcessDropTableReq(pVnode, (void*)pHead->cont, NULL);
break;
case TSDB_MSG_TYPE_MD_ALTER_TABLE:
pWrite->code = vnodeProcessAlterTableReq(pVnode, (void*)pHead->cont, NULL);
break;
case TSDB_MSG_TYPE_MD_DROP_STABLE:
pWrite->code = vnodeProcessDropStableReq(pVnode, (void*)pHead->cont, NULL);
break;
case TSDB_MSG_TYPE_UPDATE_TAG_VAL:
pWrite->code = vnodeProcessUpdateTagValReq(pVnode, (void*)pHead->cont, NULL);
break;
default:
pWrite->code = TSDB_CODE_VND_MSG_NOT_PROCESSED;
break;
}
if (pWrite->code < 0) return false;
// update fync
return (pWrite->code == 0 && msgType != TSDB_MSG_TYPE_SUBMIT);
}
static void vnodeFsync(SVnode *pVnode, bool fsync) {
#if 0
walFsync(pVnode->wal, fsync);
#endif
}
static void vnodeProcessWriteEnd(SVnode *pVnode, SVnWriteMsg *pWrite, int32_t qtype, int32_t code) {
if (qtype == TAOS_QTYPE_RPC) {
SRpcMsg rpcRsp = {
.handle = pWrite->rpcMsg.handle,
.pCont = pWrite->rspRet.rsp,
.contLen = pWrite->rspRet.len,
.code = pWrite->code,
};
rpcSendResponse(&rpcRsp);
} else {
if (pWrite->rspRet.rsp) {
rpcFreeCont(pWrite->rspRet.rsp);
}
}
vnodeFreeFromWQueue(pVnode, pWrite);
}
int32_t vnodeInitWrite() {
SWriteWorkerPool *pPool = &tsVwrite.pool;
pPool->name = "vwrite";
pPool->max = tsNumOfCores;
pPool->startFp = (ProcessWriteStartFp)vnodeProcessWriteStart;
pPool->syncFp = (ProcessWriteSyncFp)vnodeFsync;
pPool->endFp = (ProcessWriteEndFp)vnodeProcessWriteEnd;
if (tWriteWorkerInit(pPool) != 0) return -1;
vInfo("vwrite is initialized, max worker %d", pPool->max);
return TSDB_CODE_SUCCESS;
}
void vnodeCleanupWrite() {
tWriteWorkerCleanup(&tsVwrite.pool);
vInfo("vwrite is closed");
}
taos_queue vnodeAllocWriteQueue(SVnode *pVnode) { return tWriteWorkerAllocQueue(&tsVwrite.pool, pVnode); }
void vnodeFreeWriteQueue(taos_queue pQueue) { tWriteWorkerFreeQueue(&tsVwrite.pool, pQueue); }
\ No newline at end of file
......@@ -13,9 +13,11 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "vnodeInt.h"
#define _DEFAULT_SOURCE
#include "os.h"
#include "vnodeWriteMsg.h"
int vnodeProcessSubmitReq(SVnode *pVnode, SSubmitReq *pReq, SSubmitRsp *pRsp) {
int32_t vnodeProcessSubmitReq(SVnode *pVnode, SSubmitReq *pReq, SSubmitRsp *pRsp) {
// TODO: Check inputs
#if 0
......@@ -51,17 +53,27 @@ int vnodeProcessSubmitReq(SVnode *pVnode, SSubmitReq *pReq, SSubmitRsp *pRsp) {
return 0;
}
int vnodeProcessCreateTableReq(SVnode *pVnode, SCreateTableReq *pReq, SCreateTableRsp *pRsp) {
int32_t vnodeProcessCreateTableReq(SVnode *pVnode, SCreateTableReq *pReq, SCreateTableRsp *pRsp) {
// TODO
return 0;
}
int vnodeProcessDropTableReq(SVnode *pVnode, SDropTableReq *pReq, SDropTableRsp *pRsp) {
int32_t vnodeProcessDropTableReq(SVnode *pVnode, SDropTableReq *pReq, SDropTableRsp *pRsp) {
// TODO
return 0;
}
int vnodeProcessAlterTableReq(SVnode *pVnode, SAlterTableReq *pReq, SAlterTableRsp *pRsp) {
int32_t vnodeProcessAlterTableReq(SVnode *pVnode, SAlterTableReq *pReq, SAlterTableRsp *pRsp) {
// TODO
return 0;
}
int32_t vnodeProcessDropStableReq(SVnode *pVnode, SDropStableReq *pReq, SDropStableRsp *pRsp) {
// TODO
return 0;
}
int32_t vnodeProcessUpdateTagValReq(SVnode *pVnode, SUpdateTagValReq *pReq, SUpdateTagValRsp *pRsp) {
// TODO
return 0;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_WAL_H_
#define _TD_WAL_H_
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
TAOS_WAL_NOLOG = 0,
TAOS_WAL_WRITE = 1,
TAOS_WAL_FSYNC = 2
} EWalType;
typedef enum {
TAOS_WAL_NOT_KEEP = 0,
TAOS_WAL_KEEP = 1
} EWalKeep;
typedef struct {
int8_t msgType;
int8_t sver; // sver 2 for WAL SDataRow/SMemRow compatibility
int8_t reserved[2];
int32_t len;
uint64_t version;
uint32_t signature;
uint32_t cksum;
char cont[];
} SWalHead;
typedef struct {
int32_t vgId;
int32_t fsyncPeriod; // millisecond
EWalType walLevel; // wal level
EWalKeep keep; // keep the wal file when closed
} SWalCfg;
typedef void * twalh; // WAL HANDLE
typedef int32_t FWalWrite(void *ahandle, void *pHead, int32_t qtype, void *pMsg);
int32_t walInit();
void walCleanUp();
twalh walOpen(char *path, SWalCfg *pCfg);
int32_t walAlter(twalh pWal, SWalCfg *pCfg);
void walStop(twalh);
void walClose(twalh);
int32_t walRenew(twalh);
void walRemoveOneOldFile(twalh);
void walRemoveAllOldFiles(twalh);
int32_t walWrite(twalh, SWalHead *);
void walFsync(twalh, bool forceFsync);
int32_t walRestore(twalh, void *pVnode, FWalWrite writeFp);
int32_t walGetWalFile(twalh, char *fileName, int64_t *fileId);
uint64_t walGetVersion(twalh);
void walResetVersion(twalh, uint64_t newVer);
#ifdef __cplusplus
}
#endif
#endif // _TD_WAL_H_
aux_source_directory(source RAFT_SRC)
add_library(raft ${RAFT_SRC})
target_include_directories(
raft
PUBLIC "${CMAKE_SOURCE_DIR}/include/raft"
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include"
)
\ No newline at end of file
CMAKE_MINIMUM_REQUIRED(VERSION 2.8...3.20)
PROJECT(TDengine)
INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/cJson/inc)
INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/query/inc)
INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/tsdb/inc)
INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/dnode/inc)
INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/sync/inc)
INCLUDE_DIRECTORIES(${TD_ENTERPRISE_DIR}/src/inc)
INCLUDE_DIRECTORIES(inc)
AUX_SOURCE_DIRECTORY(src SRC)
ADD_LIBRARY(vnode ${SRC})
TARGET_LINK_LIBRARIES(vnode tsdb tcq common)
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_INT_H
#define TDENGINE_VNODE_INT_H
#ifdef __cplusplus
extern "C" {
#endif
#include "tlog.h"
#include "tsync.h"
#include "tcq.h"
#include "tsdb.h"
#include "vnode.h"
extern int32_t vDebugFlag;
extern int32_t vNumOfExistedQHandle; // current initialized and existed query handle in current dnode
#define vFatal(...) { if (vDebugFlag & DEBUG_FATAL) { taosPrintLog("VND FATAL ", 255, __VA_ARGS__); }}
#define vError(...) { if (vDebugFlag & DEBUG_ERROR) { taosPrintLog("VND ERROR ", 255, __VA_ARGS__); }}
#define vWarn(...) { if (vDebugFlag & DEBUG_WARN) { taosPrintLog("VND WARN ", 255, __VA_ARGS__); }}
#define vInfo(...) { if (vDebugFlag & DEBUG_INFO) { taosPrintLog("VND ", 255, __VA_ARGS__); }}
#define vDebug(...) { if (vDebugFlag & DEBUG_DEBUG) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }}
#define vTrace(...) { if (vDebugFlag & DEBUG_TRACE) { taosPrintLog("VND ", vDebugFlag, __VA_ARGS__); }}
typedef struct {
int32_t vgId; // global vnode group ID
int32_t refCount; // reference count
int64_t queuedWMsgSize;
int32_t queuedWMsg;
int32_t queuedRMsg;
int32_t flowctrlLevel;
int8_t preClose; // drop and close switch
int8_t reserved[3];
int64_t sequence; // for topic
int8_t status;
int8_t role;
int8_t accessState;
int8_t isFull;
int8_t isCommiting;
int8_t dbReplica;
int8_t dropped;
int8_t dbType;
uint64_t version; // current version
uint64_t cversion; // version while commit start
uint64_t fversion; // version on saved data file
void * wqueue; // write queue
void * qqueue; // read query queue
void * fqueue; // read fetch/cancel queue
void * wal;
void * tsdb;
int64_t sync;
void * events;
void * cq; // continuous query
int32_t dbCfgVersion;
int32_t vgCfgVersion;
STsdbCfg tsdbCfg;
SSyncCfg syncCfg;
SWalCfg walCfg;
void * qMgmt;
char * rootDir;
tsem_t sem;
char db[TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN];
pthread_mutex_t statusMutex;
} SVnodeObj;
#ifdef __cplusplus
}
#endif
#endif
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_SYNC_H
#define TDENGINE_VNODE_SYNC_H
#ifdef __cplusplus
extern "C" {
#endif
#include "vnodeInt.h"
uint32_t vnodeGetFileInfo(int32_t vgId, char *name, uint32_t *index, uint32_t eindex, int64_t *size, uint64_t *fver);
int32_t vnodeGetWalInfo(int32_t vgId, char *fileName, int64_t *fileId);
void vnodeNotifyRole(int32_t vgId, int8_t role);
void vnodeCtrlFlow(int32_t vgId, int32_t level);
void vnodeStartSyncFile(int32_t vgId);
void vnodeStopSyncFile(int32_t vgId, uint64_t fversion);
void vnodeConfirmForard(int32_t vgId, void *wparam, int32_t code);
int32_t vnodeWriteToCache(int32_t vgId, void *wparam, int32_t qtype, void *rparam);
int32_t vnodeGetVersion(int32_t vgId, uint64_t *fver, uint64_t *wver);
void vnodeConfirmForward(void *pVnode, uint64_t version, int32_t code, bool force);
#ifdef __cplusplus
}
#endif
#endif
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "taoserror.h"
#include "taosmsg.h"
#include "tutil.h"
#include "tqueue.h"
#include "tglobal.h"
#include "tfs.h"
#include "vnodeBackup.h"
#include "vnodeMain.h"
typedef struct {
int32_t vgId;
} SVBackupMsg;
typedef struct {
pthread_t thread;
int32_t workerId;
} SVBackupWorker;
typedef struct {
int32_t num;
SVBackupWorker *worker;
} SVBackupWorkerPool;
static SVBackupWorkerPool tsVBackupPool;
static taos_qset tsVBackupQset;
static taos_queue tsVBackupQueue;
static void vnodeProcessBackupMsg(SVBackupMsg *pMsg) {
int32_t vgId = pMsg->vgId;
char newDir[TSDB_FILENAME_LEN] = {0};
char stagingDir[TSDB_FILENAME_LEN] = {0};
sprintf(newDir, "%s/vnode%d", "vnode_bak", vgId);
sprintf(stagingDir, "%s/.staging/vnode%d", "vnode_bak", vgId);
if (tsEnableVnodeBak) {
tfsRmdir(newDir);
tfsRename(stagingDir, newDir);
} else {
vInfo("vgId:%d, vnode backup not enabled", vgId);
tfsRmdir(stagingDir);
}
}
static void *vnodeBackupFunc(void *param) {
setThreadName("vnodeBackup");
while (1) {
SVBackupMsg *pMsg = NULL;
if (taosReadQitemFromQset(tsVBackupQset, NULL, (void **)&pMsg, NULL) == 0) {
vDebug("qset:%p, vbackup got no message from qset, exiting", tsVBackupQset);
break;
}
vTrace("vgId:%d, will be processed in vbackup queue", pMsg->vgId);
vnodeProcessBackupMsg(pMsg);
vTrace("vgId:%d, disposed in vbackup worker", pMsg->vgId);
taosFreeQitem(pMsg);
}
return NULL;
}
static int32_t vnodeStartBackup() {
tsVBackupQueue = taosOpenQueue();
if (tsVBackupQueue == NULL) return TSDB_CODE_DND_OUT_OF_MEMORY;
taosAddIntoQset(tsVBackupQset, tsVBackupQueue, NULL);
for (int32_t i = 0; i < tsVBackupPool.num; ++i) {
SVBackupWorker *pWorker = tsVBackupPool.worker + i;
pWorker->workerId = i;
pthread_attr_t thAttr;
pthread_attr_init(&thAttr);
pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE);
if (pthread_create(&pWorker->thread, &thAttr, vnodeBackupFunc, pWorker) != 0) {
vError("failed to create thread to process vbackup queue, reason:%s", strerror(errno));
}
pthread_attr_destroy(&thAttr);
vDebug("vbackup:%d is launched, total:%d", pWorker->workerId, tsVBackupPool.num);
}
vDebug("vbackup queue:%p is allocated", tsVBackupQueue);
return TSDB_CODE_SUCCESS;
}
static int32_t vnodeWriteIntoBackupWorker(int32_t vgId) {
SVBackupMsg *pMsg = taosAllocateQitem(sizeof(SVBackupMsg));
if (pMsg == NULL) return TSDB_CODE_VND_OUT_OF_MEMORY;
pMsg->vgId = vgId;
int32_t code = taosWriteQitem(tsVBackupQueue, TAOS_QTYPE_RPC, pMsg);
if (code == 0) code = TSDB_CODE_DND_ACTION_IN_PROGRESS;
return code;
}
int32_t vnodeBackup(int32_t vgId) {
vTrace("vgId:%d, will backup", vgId);
return vnodeWriteIntoBackupWorker(vgId);
}
int32_t vnodeInitBackup() {
tsVBackupQset = taosOpenQset();
tsVBackupPool.num = 1;
tsVBackupPool.worker = calloc(sizeof(SVBackupWorker), tsVBackupPool.num);
if (tsVBackupPool.worker == NULL) return -1;
for (int32_t i = 0; i < tsVBackupPool.num; ++i) {
SVBackupWorker *pWorker = tsVBackupPool.worker + i;
pWorker->workerId = i;
vDebug("vbackup:%d is created", i);
}
vDebug("vbackup is initialized, num:%d qset:%p", tsVBackupPool.num, tsVBackupQset);
return vnodeStartBackup();
}
void vnodeCleanupBackup() {
for (int32_t i = 0; i < tsVBackupPool.num; ++i) {
SVBackupWorker *pWorker = tsVBackupPool.worker + i;
if (taosCheckPthreadValid(pWorker->thread)) {
taosQsetThreadResume(tsVBackupQset);
}
vDebug("vbackup:%d is closed", i);
}
for (int32_t i = 0; i < tsVBackupPool.num; ++i) {
SVBackupWorker *pWorker = tsVBackupPool.worker + i;
vDebug("vbackup:%d start to join", i);
if (taosCheckPthreadValid(pWorker->thread)) {
pthread_join(pWorker->thread, NULL);
}
vDebug("vbackup:%d join success", i);
}
vDebug("vbackup is closed, qset:%p", tsVBackupQset);
taosCloseQset(tsVBackupQset);
tsVBackupQset = NULL;
tfree(tsVBackupPool.worker);
vDebug("vbackup queue:%p is freed", tsVBackupQueue);
taosCloseQueue(tsVBackupQueue);
tsVBackupQueue = NULL;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "dnode.h"
#include "vnodeStatus.h"
#include "vnodeBackup.h"
#include "vnodeWorker.h"
#include "vnodeRead.h"
#include "vnodeWrite.h"
#include "vnodeMain.h"
static SHashObj *tsVnodesHash = NULL;
static int32_t vnodeInitHash(void);
static void vnodeCleanupHash(void);
static void vnodeIncRef(void *ptNode);
static SStep tsVnodeSteps[] = {
{"vnode-backup", vnodeInitBackup, vnodeCleanupBackup},
{"vnode-worker", vnodeInitMWorker, vnodeCleanupMWorker},
{"vnode-write", vnodeInitWrite, vnodeCleanupWrite},
{"vnode-read", vnodeInitRead, vnodeCleanupRead},
{"vnode-hash", vnodeInitHash, vnodeCleanupHash},
{"tsdb-queue", tsdbInitCommitQueue, tsdbDestroyCommitQueue}
};
int32_t vnodeInitMgmt() {
int32_t stepSize = sizeof(tsVnodeSteps) / sizeof(SStep);
return dnodeStepInit(tsVnodeSteps, stepSize);
}
void vnodeCleanupMgmt() {
int32_t stepSize = sizeof(tsVnodeSteps) / sizeof(SStep);
dnodeStepCleanup(tsVnodeSteps, stepSize);
}
static int32_t vnodeInitHash() {
tsVnodesHash = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
if (tsVnodesHash == NULL) {
vError("failed to init vnode mgmt");
return -1;
}
return 0;
}
static void vnodeCleanupHash() {
if (tsVnodesHash != NULL) {
vDebug("vnode mgmt is cleanup");
taosHashCleanup(tsVnodesHash);
tsVnodesHash = NULL;
}
}
void *vnodeGetWal(void *pVnode) {
return ((SVnodeObj *)pVnode)->wal;
}
void vnodeAddIntoHash(SVnodeObj *pVnode) {
taosHashPut(tsVnodesHash, &pVnode->vgId, sizeof(int32_t), &pVnode, sizeof(SVnodeObj *));
}
void vnodeRemoveFromHash(SVnodeObj *pVnode) {
taosHashRemove(tsVnodesHash, &pVnode->vgId, sizeof(int32_t));
}
static void vnodeIncRef(void *ptNode) {
assert(ptNode != NULL);
SVnodeObj **ppVnode = (SVnodeObj **)ptNode;
assert(ppVnode);
assert(*ppVnode);
SVnodeObj *pVnode = *ppVnode;
atomic_add_fetch_32(&pVnode->refCount, 1);
vTrace("vgId:%d, get vnode, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode);
}
void *vnodeAcquire(int32_t vgId) {
SVnodeObj *pVnode = NULL;
if (tsVnodesHash != NULL) {
taosHashGetClone(tsVnodesHash, &vgId, sizeof(int32_t), vnodeIncRef, &pVnode);
}
if (pVnode == NULL) {
terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
vDebug("vgId:%d, not exist", vgId);
return NULL;
}
return pVnode;
}
void vnodeRelease(void *vparam) {
SVnodeObj *pVnode = vparam;
if (vparam == NULL) return;
int32_t refCount = atomic_sub_fetch_32(&pVnode->refCount, 1);
int32_t vgId = pVnode->vgId;
vTrace("vgId:%d, release vnode, refCount:%d pVnode:%p", vgId, refCount, pVnode);
assert(refCount >= 0);
if (refCount > 0) {
if (vnodeInResetStatus(pVnode) && refCount <= 3) {
tsem_post(&pVnode->sem);
}
} else {
vDebug("vgId:%d, vnode will be destroyed, refCount:%d pVnode:%p", vgId, refCount, pVnode);
vnodeDestroyInMWorker(pVnode);
int32_t count = taosHashGetSize(tsVnodesHash);
vDebug("vgId:%d, vnode is destroyed, vnodes:%d", vgId, count);
}
}
void *vnodeAcquireNotClose(int32_t vgId) {
SVnodeObj *pVnode = vnodeAcquire(vgId);
if (pVnode != NULL && pVnode->preClose == 1) {
vnodeRelease(pVnode);
terrno = TSDB_CODE_VND_INVALID_VGROUP_ID;
vDebug("vgId:%d, not exist, pre closing", vgId);
return NULL;
}
return pVnode;
}
static void vnodeBuildVloadMsg(SVnodeObj *pVnode, SStatusMsg *pStatus) {
int64_t totalStorage = 0;
int64_t compStorage = 0;
int64_t pointsWritten = 0;
if (vnodeInClosingStatus(pVnode)) return;
if (pStatus->openVnodes >= TSDB_MAX_VNODES) return;
if (pVnode->tsdb) {
tsdbReportStat(pVnode->tsdb, &pointsWritten, &totalStorage, &compStorage);
}
SVnodeLoad *pLoad = &pStatus->load[pStatus->openVnodes++];
pLoad->vgId = htonl(pVnode->vgId);
pLoad->dbCfgVersion = htonl(pVnode->dbCfgVersion);
pLoad->vgCfgVersion = htonl(pVnode->vgCfgVersion);
pLoad->totalStorage = htobe64(totalStorage);
pLoad->compStorage = htobe64(compStorage);
pLoad->pointsWritten = htobe64(pointsWritten);
pLoad->vnodeVersion = htobe64(pVnode->version);
pLoad->status = pVnode->status;
pLoad->role = pVnode->role;
pLoad->replica = pVnode->syncCfg.replica;
pLoad->compact = (pVnode->tsdb != NULL) ? tsdbGetCompactState(pVnode->tsdb) : 0;
}
int32_t vnodeGetVnodeList(int32_t vnodeList[], int32_t *numOfVnodes) {
void *pIter = taosHashIterate(tsVnodesHash, NULL);
while (pIter) {
SVnodeObj **pVnode = pIter;
if (*pVnode) {
(*numOfVnodes)++;
if (*numOfVnodes >= TSDB_MAX_VNODES) {
vError("vgId:%d, too many open vnodes, exist:%d max:%d", (*pVnode)->vgId, *numOfVnodes, TSDB_MAX_VNODES);
continue;
} else {
vnodeList[*numOfVnodes - 1] = (*pVnode)->vgId;
}
}
pIter = taosHashIterate(tsVnodesHash, pIter);
}
return TSDB_CODE_SUCCESS;
}
void vnodeBuildStatusMsg(void *param) {
SStatusMsg *pStatus = param;
void *pIter = taosHashIterate(tsVnodesHash, NULL);
while (pIter) {
SVnodeObj **pVnode = pIter;
if (*pVnode) {
vnodeBuildVloadMsg(*pVnode, pStatus);
}
pIter = taosHashIterate(tsVnodesHash, pIter);
}
}
void vnodeSetAccess(SVgroupAccess *pAccess, int32_t numOfVnodes) {
for (int32_t i = 0; i < numOfVnodes; ++i) {
pAccess[i].vgId = htonl(pAccess[i].vgId);
SVnodeObj *pVnode = vnodeAcquireNotClose(pAccess[i].vgId);
if (pVnode != NULL) {
pVnode->accessState = pAccess[i].accessState;
if (pVnode->accessState != TSDB_VN_ALL_ACCCESS) {
vDebug("vgId:%d, access state is set to %d", pAccess[i].vgId, pVnode->accessState);
}
vnodeRelease(pVnode);
}
}
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "taosmsg.h"
#include "query.h"
#include "dnode.h"
#include "vnodeVersion.h"
#include "vnodeMain.h"
#include "vnodeStatus.h"
uint32_t vnodeGetFileInfo(int32_t vgId, char *name, uint32_t *index, uint32_t eindex, int64_t *size, uint64_t *fver) {
SVnodeObj *pVnode = vnodeAcquire(vgId);
if (pVnode == NULL) {
vError("vgId:%d, vnode not found while get file info", vgId);
return 0;
}
*fver = pVnode->fversion;
uint32_t ret = tsdbGetFileInfo(pVnode->tsdb, name, index, eindex, size);
vnodeRelease(pVnode);
return ret;
}
int32_t vnodeGetWalInfo(int32_t vgId, char *fileName, int64_t *fileId) {
SVnodeObj *pVnode = vnodeAcquire(vgId);
if (pVnode == NULL) {
vError("vgId:%d, vnode not found while get wal info", vgId);
return -1;
}
int32_t code = walGetWalFile(pVnode->wal, fileName, fileId);
vnodeRelease(pVnode);
return code;
}
void vnodeNotifyRole(int32_t vgId, int8_t role) {
SVnodeObj *pVnode = vnodeAcquire(vgId);
if (pVnode == NULL) {
vTrace("vgId:%d, vnode not found while notify role", vgId);
return;
}
if (pVnode->dropped) {
vTrace("vgId:%d, vnode dropped while notify role", vgId);
vnodeRelease(pVnode);
return;
}
vInfo("vgId:%d, sync role changed from %s to %s", pVnode->vgId, syncRole[pVnode->role], syncRole[role]);
pVnode->role = role;
dnodeSendStatusMsgToMnode();
if (pVnode->role == TAOS_SYNC_ROLE_MASTER) {
cqStart(pVnode->cq);
} else {
cqStop(pVnode->cq);
}
vnodeRelease(pVnode);
}
void vnodeCtrlFlow(int32_t vgId, int32_t level) {
SVnodeObj *pVnode = vnodeAcquire(vgId);
if (pVnode == NULL) {
vTrace("vgId:%d, vnode not found while flow ctrl", vgId);
return;
}
if (pVnode->dropped) {
vTrace("vgId:%d, vnode dropped while flow ctrl", vgId);
vnodeRelease(pVnode);
return;
}
if (pVnode->flowctrlLevel != level) {
vDebug("vgId:%d, set flowctrl level from %d to %d", pVnode->vgId, pVnode->flowctrlLevel, level);
pVnode->flowctrlLevel = level;
}
vnodeRelease(pVnode);
}
void vnodeStartSyncFile(int32_t vgId) {
SVnodeObj *pVnode = vnodeAcquireNotClose(vgId);
if (pVnode == NULL) {
vError("vgId:%d, vnode not found while start filesync", vgId);
return;
}
vInfo("vgId:%d, datafile will be synced", vgId);
vnodeSetResetStatus(pVnode);
vnodeRelease(pVnode);
}
void vnodeStopSyncFile(int32_t vgId, uint64_t fversion) {
SVnodeObj *pVnode = vnodeAcquire(vgId);
if (pVnode == NULL) {
vError("vgId:%d, vnode not found while stop filesync", vgId);
return;
}
pVnode->fversion = fversion;
pVnode->version = fversion;
vnodeSaveVersion(pVnode);
walResetVersion(pVnode->wal, fversion);
vInfo("vgId:%d, datafile is synced, fver:%" PRIu64 " vver:%" PRIu64, vgId, fversion, fversion);
vnodeSetReadyStatus(pVnode);
vnodeRelease(pVnode);
}
void vnodeConfirmForard(int32_t vgId, void *wparam, int32_t code) {
SVnodeObj *pVnode = vnodeAcquire(vgId);
if (pVnode == NULL) {
vError("vgId:%d, vnode not found while confirm forward", vgId);
}
if (code == TSDB_CODE_SYN_CONFIRM_EXPIRED && pVnode->status == TAOS_VN_STATUS_CLOSING) {
vDebug("vgId:%d, db:%s, vnode is closing while confirm forward", vgId, pVnode->db);
code = TSDB_CODE_VND_IS_CLOSING;
}
dnodeSendRpcVWriteRsp(pVnode, wparam, code);
vnodeRelease(pVnode);
}
int32_t vnodeWriteToCache(int32_t vgId, void *wparam, int32_t qtype, void *rparam) {
SVnodeObj *pVnode = vnodeAcquire(vgId);
if (pVnode == NULL) {
vError("vgId:%d, vnode not found while write to cache", vgId);
vnodeRelease(pVnode);
return TSDB_CODE_VND_INVALID_VGROUP_ID;
}
int32_t code = vnodeWriteToWQueue(pVnode, wparam, qtype, rparam);
vnodeRelease(pVnode);
return code;
}
int32_t vnodeGetVersion(int32_t vgId, uint64_t *fver, uint64_t *wver) {
SVnodeObj *pVnode = vnodeAcquireNotClose(vgId);
if (pVnode == NULL) {
vError("vgId:%d, vnode not found while write to cache", vgId);
return -1;
}
int32_t code = 0;
if (pVnode->isCommiting) {
vInfo("vgId:%d, vnode is commiting while get version", vgId);
code = -1;
} else {
*fver = pVnode->fversion;
*wver = pVnode->version;
}
vnodeRelease(pVnode);
return code;
}
void vnodeConfirmForward(void *vparam, uint64_t version, int32_t code, bool force) {
SVnodeObj *pVnode = vparam;
syncConfirmForward(pVnode->sync, version, code, force);
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "taoserror.h"
#include "taosmsg.h"
#include "tutil.h"
#include "tqueue.h"
#include "tglobal.h"
#include "vnodeWorker.h"
#include "vnodeMain.h"
typedef enum {
VNODE_WORKER_ACTION_CLEANUP,
VNODE_WORKER_ACTION_DESTROY
} EVMWorkerAction;
typedef struct {
int32_t vgId;
int32_t code;
void * rpcHandle;
SVnodeObj *pVnode;
EVMWorkerAction action;
} SVMWorkerMsg;
typedef struct {
pthread_t thread;
int32_t workerId;
} SVMWorker;
typedef struct {
int32_t curNum;
int32_t maxNum;
SVMWorker *worker;
} SVMWorkerPool;
static SVMWorkerPool tsVMWorkerPool;
static taos_qset tsVMWorkerQset;
static taos_queue tsVMWorkerQueue;
static void *vnodeMWorkerFunc(void *param);
static int32_t vnodeStartMWorker() {
tsVMWorkerQueue = taosOpenQueue();
if (tsVMWorkerQueue == NULL) return TSDB_CODE_DND_OUT_OF_MEMORY;
taosAddIntoQset(tsVMWorkerQset, tsVMWorkerQueue, NULL);
for (int32_t i = tsVMWorkerPool.curNum; i < tsVMWorkerPool.maxNum; ++i) {
SVMWorker *pWorker = tsVMWorkerPool.worker + i;
pWorker->workerId = i;
pthread_attr_t thAttr;
pthread_attr_init(&thAttr);
pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE);
if (pthread_create(&pWorker->thread, &thAttr, vnodeMWorkerFunc, pWorker) != 0) {
vError("failed to create thread to process vmworker queue, reason:%s", strerror(errno));
}
pthread_attr_destroy(&thAttr);
tsVMWorkerPool.curNum = i + 1;
vDebug("vmworker:%d is launched, total:%d", pWorker->workerId, tsVMWorkerPool.maxNum);
}
vDebug("vmworker queue:%p is allocated", tsVMWorkerQueue);
return TSDB_CODE_SUCCESS;
}
int32_t vnodeInitMWorker() {
tsVMWorkerQset = taosOpenQset();
tsVMWorkerPool.maxNum = 1;
tsVMWorkerPool.curNum = 0;
tsVMWorkerPool.worker = calloc(sizeof(SVMWorker), tsVMWorkerPool.maxNum);
if (tsVMWorkerPool.worker == NULL) return -1;
for (int32_t i = 0; i < tsVMWorkerPool.maxNum; ++i) {
SVMWorker *pWorker = tsVMWorkerPool.worker + i;
pWorker->workerId = i;
vDebug("vmworker:%d is created", i);
}
vDebug("vmworker is initialized, num:%d qset:%p", tsVMWorkerPool.maxNum, tsVMWorkerQset);
return vnodeStartMWorker();
}
static void vnodeStopMWorker() {
vDebug("vmworker queue:%p is freed", tsVMWorkerQueue);
taosCloseQueue(tsVMWorkerQueue);
tsVMWorkerQueue = NULL;
}
void vnodeCleanupMWorker() {
for (int32_t i = 0; i < tsVMWorkerPool.maxNum; ++i) {
SVMWorker *pWorker = tsVMWorkerPool.worker + i;
if (taosCheckPthreadValid(pWorker->thread)) {
taosQsetThreadResume(tsVMWorkerQset);
}
vDebug("vmworker:%d is closed", i);
}
for (int32_t i = 0; i < tsVMWorkerPool.maxNum; ++i) {
SVMWorker *pWorker = tsVMWorkerPool.worker + i;
vDebug("vmworker:%d start to join", i);
if (taosCheckPthreadValid(pWorker->thread)) {
pthread_join(pWorker->thread, NULL);
}
vDebug("vmworker:%d join success", i);
}
vDebug("vmworker is closed, qset:%p", tsVMWorkerQset);
taosCloseQset(tsVMWorkerQset);
tsVMWorkerQset = NULL;
tfree(tsVMWorkerPool.worker);
vnodeStopMWorker();
}
static int32_t vnodeWriteIntoMWorker(SVnodeObj *pVnode, EVMWorkerAction action, void *rpcHandle) {
SVMWorkerMsg *pMsg = taosAllocateQitem(sizeof(SVMWorkerMsg));
if (pMsg == NULL) return TSDB_CODE_VND_OUT_OF_MEMORY;
pMsg->vgId = pVnode->vgId;
pMsg->pVnode = pVnode;
pMsg->rpcHandle = rpcHandle;
pMsg->action = action;
int32_t code = taosWriteQitem(tsVMWorkerQueue, TAOS_QTYPE_RPC, pMsg);
if (code == 0) code = TSDB_CODE_DND_ACTION_IN_PROGRESS;
return code;
}
int32_t vnodeCleanupInMWorker(SVnodeObj *pVnode) {
vTrace("vgId:%d, will cleanup in vmworker", pVnode->vgId);
return vnodeWriteIntoMWorker(pVnode, VNODE_WORKER_ACTION_CLEANUP, NULL);
}
int32_t vnodeDestroyInMWorker(SVnodeObj *pVnode) {
vTrace("vgId:%d, will destroy in vmworker", pVnode->vgId);
return vnodeWriteIntoMWorker(pVnode, VNODE_WORKER_ACTION_DESTROY, NULL);
}
static void vnodeFreeMWorkerMsg(SVMWorkerMsg *pMsg) {
vTrace("vgId:%d, disposed in vmworker", pMsg->vgId);
taosFreeQitem(pMsg);
}
static void vnodeSendVMWorkerRpcRsp(SVMWorkerMsg *pMsg) {
if (pMsg->rpcHandle != NULL) {
SRpcMsg rpcRsp = {.handle = pMsg->rpcHandle, .code = pMsg->code};
rpcSendResponse(&rpcRsp);
}
vnodeFreeMWorkerMsg(pMsg);
}
static void vnodeProcessMWorkerMsg(SVMWorkerMsg *pMsg) {
pMsg->code = 0;
switch (pMsg->action) {
case VNODE_WORKER_ACTION_CLEANUP:
vnodeCleanUp(pMsg->pVnode);
break;
case VNODE_WORKER_ACTION_DESTROY:
vnodeDestroy(pMsg->pVnode);
break;
default:
break;
}
}
static void *vnodeMWorkerFunc(void *param) {
setThreadName("vnodeMWorker");
while (1) {
SVMWorkerMsg *pMsg = NULL;
if (taosReadQitemFromQset(tsVMWorkerQset, NULL, (void **)&pMsg, NULL) == 0) {
vDebug("qset:%p, vmworker got no message from qset, exiting", tsVMWorkerQset);
break;
}
vTrace("vgId:%d, action:%d will be processed in vmworker queue", pMsg->vgId, pMsg->action);
vnodeProcessMWorkerMsg(pMsg);
vnodeSendVMWorkerRpcRsp(pMsg);
}
return NULL;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "tp.h"
#include "taosmsg.h"
#include "taoserror.h"
#include "tglobal.h"
#include "tqueue.h"
#include "ttimer.h"
#include "dnode.h"
#include "vnodeStatus.h"
#define MAX_QUEUED_MSG_NUM 100000
#define MAX_QUEUED_MSG_SIZE 1024*1024*1024 //1GB
extern void * tsDnodeTmr;
static int32_t (*vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MAX])(SVnodeObj *, void *pCont, SRspRet *);
static int32_t vnodeProcessSubmitMsg(SVnodeObj *pVnode, void *pCont, SRspRet *);
static int32_t vnodeProcessCreateTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *);
static int32_t vnodeProcessDropTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *);
static int32_t vnodeProcessAlterTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *);
static int32_t vnodeProcessDropStableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *);
static int32_t vnodeProcessUpdateTagValMsg(SVnodeObj *pVnode, void *pCont, SRspRet *);
static int32_t vnodePerformFlowCtrl(SVWriteMsg *pWrite);
int32_t vnodeInitWrite(void) {
vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_SUBMIT] = vnodeProcessSubmitMsg;
vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_CREATE_TABLE] = vnodeProcessCreateTableMsg;
vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_DROP_TABLE] = vnodeProcessDropTableMsg;
vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_ALTER_TABLE] = vnodeProcessAlterTableMsg;
vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_DROP_STABLE] = vnodeProcessDropStableMsg;
vnodeProcessWriteMsgFp[TSDB_MSG_TYPE_UPDATE_TAG_VAL] = vnodeProcessUpdateTagValMsg;
return 0;
}
void vnodeCleanupWrite() {}
int32_t vnodeProcessWrite(void *vparam, void *wparam, int32_t qtype, void *rparam) {
int32_t code = 0;
SVnodeObj *pVnode = vparam;
SWalHead * pHead = wparam;
SVWriteMsg*pWrite = rparam;
SRspRet *pRspRet = NULL;
if (pWrite != NULL) pRspRet = &pWrite->rspRet;
if (vnodeProcessWriteMsgFp[pHead->msgType] == NULL) {
vError("vgId:%d, msg:%s not processed since no handle, qtype:%s hver:%" PRIu64, pVnode->vgId,
taosMsg[pHead->msgType], qtypeStr[qtype], pHead->version);
return TSDB_CODE_VND_MSG_NOT_PROCESSED;
}
vTrace("vgId:%d, msg:%s will be processed in vnode, qtype:%s hver:%" PRIu64 " vver:%" PRIu64, pVnode->vgId,
taosMsg[pHead->msgType], qtypeStr[qtype], pHead->version, pVnode->version);
if (pHead->version == 0) { // from client or CQ
if (!vnodeInReadyStatus(pVnode)) {
vDebug("vgId:%d, msg:%s not processed since vstatus:%d, qtype:%s hver:%" PRIu64, pVnode->vgId,
taosMsg[pHead->msgType], pVnode->status, qtypeStr[qtype], pHead->version);
return TSDB_CODE_APP_NOT_READY; // it may be in deleting or closing state
}
if (pVnode->role != TAOS_SYNC_ROLE_MASTER) {
vDebug("vgId:%d, msg:%s not processed since replica:%d role:%s, qtype:%s hver:%" PRIu64, pVnode->vgId,
taosMsg[pHead->msgType], pVnode->syncCfg.replica, syncRole[pVnode->role], qtypeStr[qtype], pHead->version);
return TSDB_CODE_APP_NOT_READY;
}
// assign version
pHead->version = pVnode->version + 1;
} else { // from wal or forward
// for data from WAL or forward, version may be smaller
if (pHead->version <= pVnode->version) return 0;
}
// forward to peers, even it is WAL/FWD, it shall be called to update version in sync
int32_t syncCode = 0;
bool force = (pWrite == NULL ? false : pWrite->walHead.msgType != TSDB_MSG_TYPE_SUBMIT);
syncCode = syncForwardToPeer(pVnode->sync, pHead, pWrite, qtype, force);
if (syncCode < 0) {
pHead->version = 0;
return syncCode;
}
// write into WAL
code = walWrite(pVnode->wal, pHead);
if (code < 0) {
if (syncCode > 0) atomic_sub_fetch_32(&pWrite->processedCount, 1);
vError("vgId:%d, hver:%" PRIu64 " vver:%" PRIu64 " code:0x%x", pVnode->vgId, pHead->version, pVnode->version, code);
pHead->version = 0;
return code;
}
pVnode->version = pHead->version;
// write data locally
code = (*vnodeProcessWriteMsgFp[pHead->msgType])(pVnode, pHead->cont, pRspRet);
if (code < 0) {
if (syncCode > 0) atomic_sub_fetch_32(&pWrite->processedCount, 1);
return code;
}
return syncCode;
}
static int32_t vnodeCheckWrite(SVnodeObj *pVnode) {
if (!(pVnode->accessState & TSDB_VN_WRITE_ACCCESS)) {
vDebug("vgId:%d, no write auth, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode);
return TSDB_CODE_VND_NO_WRITE_AUTH;
}
if (pVnode->dbReplica != pVnode->syncCfg.replica &&
pVnode->syncCfg.nodeInfo[pVnode->syncCfg.replica - 1].nodeId == dnodeGetDnodeId()) {
vDebug("vgId:%d, vnode is balancing and will be dropped, dbReplica:%d vgReplica:%d, refCount:%d pVnode:%p",
pVnode->vgId, pVnode->dbReplica, pVnode->syncCfg.replica, pVnode->refCount, pVnode);
return TSDB_CODE_VND_IS_BALANCING;
}
// tsdb may be in reset state
if (pVnode->tsdb == NULL) {
vDebug("vgId:%d, tsdb is null, refCount:%d pVnode:%p", pVnode->vgId, pVnode->refCount, pVnode);
return TSDB_CODE_APP_NOT_READY;
}
if (pVnode->isFull) {
vDebug("vgId:%d, vnode is full, refCount:%d", pVnode->vgId, pVnode->refCount);
return TSDB_CODE_VND_IS_FULL;
}
return TSDB_CODE_SUCCESS;
}
static int32_t vnodeProcessSubmitMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) {
int32_t code = TSDB_CODE_SUCCESS;
vTrace("vgId:%d, submit msg is processed", pVnode->vgId);
if (pVnode->dbType == TSDB_DB_TYPE_TOPIC && pVnode->role == TAOS_SYNC_ROLE_MASTER) {
tpUpdateTs(pVnode->vgId, &pVnode->sequence, pCont);
}
// save insert result into item
SShellSubmitRspMsg *pRsp = NULL;
if (pRet) {
pRet->len = sizeof(SShellSubmitRspMsg);
pRet->rsp = rpcMallocCont(pRet->len);
pRsp = pRet->rsp;
}
if (tsdbInsertData(pVnode->tsdb, pCont, pRsp) < 0) code = terrno;
return code;
}
static int32_t vnodeProcessCreateTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) {
int code = TSDB_CODE_SUCCESS;
STableCfg *pCfg = tsdbCreateTableCfgFromMsg((SMDCreateTableMsg *)pCont);
if (pCfg == NULL) {
ASSERT(terrno != 0);
return terrno;
}
if (tsdbCreateTable(pVnode->tsdb, pCfg) < 0) {
code = terrno;
ASSERT(code != 0);
}
tsdbClearTableCfg(pCfg);
return code;
}
static int32_t vnodeProcessDropTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) {
SMDDropTableMsg *pTable = pCont;
int32_t code = TSDB_CODE_SUCCESS;
vDebug("vgId:%d, table:%s, start to drop", pVnode->vgId, pTable->tableFname);
STableId tableId = {.uid = htobe64(pTable->uid), .tid = htonl(pTable->tid)};
if (tsdbDropTable(pVnode->tsdb, tableId) < 0) code = terrno;
return code;
}
static int32_t vnodeProcessAlterTableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) {
// TODO: disposed in tsdb
// STableCfg *pCfg = tsdbCreateTableCfgFromMsg((SMDCreateTableMsg *)pCont);
// if (pCfg == NULL) return terrno;
// if (tsdbCreateTable(pVnode->tsdb, pCfg) < 0) code = terrno;
// tsdbClearTableCfg(pCfg);
vDebug("vgId:%d, alter table msg is received", pVnode->vgId);
return TSDB_CODE_SUCCESS;
}
static int32_t vnodeProcessDropStableMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) {
SDropSTableMsg *pTable = pCont;
int32_t code = TSDB_CODE_SUCCESS;
vDebug("vgId:%d, stable:%s, start to drop", pVnode->vgId, pTable->tableFname);
STableId stableId = {.uid = htobe64(pTable->uid), .tid = -1};
if (tsdbDropTable(pVnode->tsdb, stableId) < 0) code = terrno;
vDebug("vgId:%d, stable:%s, drop stable result:%s", pVnode->vgId, pTable->tableFname, tstrerror(code));
return code;
}
static int32_t vnodeProcessUpdateTagValMsg(SVnodeObj *pVnode, void *pCont, SRspRet *pRet) {
if (tsdbUpdateTableTagValue(pVnode->tsdb, (SUpdateTableTagValMsg *)pCont) < 0) {
return terrno;
}
return TSDB_CODE_SUCCESS;
}
static SVWriteMsg *vnodeBuildVWriteMsg(SVnodeObj *pVnode, SWalHead *pHead, int32_t qtype, SRpcMsg *pRpcMsg) {
if (pHead->len > TSDB_MAX_WAL_SIZE) {
vError("vgId:%d, wal len:%d exceeds limit, hver:%" PRIu64, pVnode->vgId, pHead->len, pHead->version);
terrno = TSDB_CODE_WAL_SIZE_LIMIT;
return NULL;
}
int32_t size = sizeof(SVWriteMsg) + pHead->len;
SVWriteMsg *pWrite = taosAllocateQitem(size);
if (pWrite == NULL) {
terrno = TSDB_CODE_VND_OUT_OF_MEMORY;
return NULL;
}
if (pRpcMsg != NULL) {
pWrite->rpcMsg = *pRpcMsg;
}
memcpy(&pWrite->walHead, pHead, sizeof(SWalHead) + pHead->len);
pWrite->pVnode = pVnode;
pWrite->qtype = qtype;
atomic_add_fetch_32(&pVnode->refCount, 1);
return pWrite;
}
static int32_t vnodeWriteToWQueueImp(SVWriteMsg *pWrite) {
SVnodeObj *pVnode = pWrite->pVnode;
if (pWrite->qtype == TAOS_QTYPE_RPC) {
int32_t code = vnodeCheckWrite(pVnode);
if (code != TSDB_CODE_SUCCESS) {
vError("vgId:%d, failed to write into vwqueue since %s", pVnode->vgId, tstrerror(code));
taosFreeQitem(pWrite);
vnodeRelease(pVnode);
return code;
}
}
if (tsAvailDataDirGB <= tsMinimalDataDirGB) {
vError("vgId:%d, failed to write into vwqueue since no diskspace, avail:%fGB", pVnode->vgId, tsAvailDataDirGB);
taosFreeQitem(pWrite);
vnodeRelease(pVnode);
return TSDB_CODE_VND_NO_DISKSPACE;
}
if (!vnodeInReadyOrUpdatingStatus(pVnode)) {
vError("vgId:%d, failed to write into vwqueue, vstatus is %s, refCount:%d pVnode:%p", pVnode->vgId,
vnodeStatus[pVnode->status], pVnode->refCount, pVnode);
taosFreeQitem(pWrite);
vnodeRelease(pVnode);
return TSDB_CODE_APP_NOT_READY;
}
int32_t queued = atomic_add_fetch_32(&pVnode->queuedWMsg, 1);
int64_t queuedSize = atomic_add_fetch_64(&pVnode->queuedWMsgSize, pWrite->walHead.len);
if (queued > MAX_QUEUED_MSG_NUM || queuedSize > MAX_QUEUED_MSG_SIZE) {
int32_t ms = (queued / MAX_QUEUED_MSG_NUM) * 10 + 3;
if (ms > 100) ms = 100;
vDebug("vgId:%d, too many msg:%d in vwqueue, flow control %dms", pVnode->vgId, queued, ms);
taosMsleep(ms);
}
vTrace("vgId:%d, write into vwqueue, refCount:%d queued:%d size:%" PRId64, pVnode->vgId, pVnode->refCount,
pVnode->queuedWMsg, pVnode->queuedWMsgSize);
taosWriteQitem(pVnode->wqueue, pWrite->qtype, pWrite);
return TSDB_CODE_SUCCESS;
}
int32_t vnodeWriteToWQueue(void *vparam, void *wparam, int32_t qtype, void *rparam) {
SVnodeObj *pVnode = vparam;
if (qtype == TAOS_QTYPE_RPC) {
if (!vnodeInReadyStatus(pVnode)) {
return TSDB_CODE_APP_NOT_READY; // it may be in deleting or closing state
}
if (pVnode->role != TAOS_SYNC_ROLE_MASTER) {
return TSDB_CODE_APP_NOT_READY;
}
}
SVWriteMsg *pWrite = vnodeBuildVWriteMsg(vparam, wparam, qtype, rparam);
if (pWrite == NULL) {
assert(terrno != 0);
return terrno;
}
int32_t code = vnodePerformFlowCtrl(pWrite);
if (code != 0) return 0;
return vnodeWriteToWQueueImp(pWrite);
}
void vnodeFreeFromWQueue(void *vparam, SVWriteMsg *pWrite) {
SVnodeObj *pVnode = vparam;
if (pVnode) {
int32_t queued = atomic_sub_fetch_32(&pVnode->queuedWMsg, 1);
int64_t queuedSize = atomic_sub_fetch_64(&pVnode->queuedWMsgSize, pWrite->walHead.len);
vTrace("vgId:%d, msg:%p, app:%p, free from vwqueue, queued:%d size:%" PRId64, pVnode->vgId, pWrite,
pWrite->rpcMsg.ahandle, queued, queuedSize);
}
taosFreeQitem(pWrite);
vnodeRelease(pVnode);
}
static void vnodeFlowCtrlMsgToWQueue(void *param, void *tmrId) {
SVWriteMsg *pWrite = param;
SVnodeObj * pVnode = pWrite->pVnode;
int32_t code = TSDB_CODE_VND_IS_SYNCING;
if (pVnode->flowctrlLevel <= 0) code = TSDB_CODE_VND_IS_FLOWCTRL;
pWrite->processedCount++;
if (pWrite->processedCount >= 100) {
vError("vgId:%d, msg:%p, failed to process since %s, retry:%d", pVnode->vgId, pWrite, tstrerror(code),
pWrite->processedCount);
void *handle = pWrite->rpcMsg.handle;
taosFreeQitem(pWrite);
vnodeRelease(pVnode);
SRpcMsg rpcRsp = {.handle = handle, .code = code};
rpcSendResponse(&rpcRsp);
} else {
code = vnodePerformFlowCtrl(pWrite);
if (code == 0) {
vDebug("vgId:%d, msg:%p, write into vwqueue after flowctrl, retry:%d", pVnode->vgId, pWrite,
pWrite->processedCount);
pWrite->processedCount = 0;
void *handle = pWrite->rpcMsg.handle;
code = vnodeWriteToWQueueImp(pWrite);
if (code != TSDB_CODE_SUCCESS) {
SRpcMsg rpcRsp = {.handle = handle, .code = code};
rpcSendResponse(&rpcRsp);
}
}
}
}
static int32_t vnodePerformFlowCtrl(SVWriteMsg *pWrite) {
SVnodeObj *pVnode = pWrite->pVnode;
if (pWrite->qtype != TAOS_QTYPE_RPC) return 0;
if (pVnode->queuedWMsg < MAX_QUEUED_MSG_NUM && pVnode->queuedWMsgSize < MAX_QUEUED_MSG_SIZE &&
pVnode->flowctrlLevel <= 0)
return 0;
if (tsEnableFlowCtrl == 0) {
int32_t ms = (int32_t)pow(2, pVnode->flowctrlLevel + 2);
if (ms > 100) ms = 100;
vTrace("vgId:%d, msg:%p, app:%p, perform flowctrl for %d ms", pVnode->vgId, pWrite, pWrite->rpcMsg.ahandle, ms);
taosMsleep(ms);
return 0;
} else {
void *unUsedTimerId = NULL;
taosTmrReset(vnodeFlowCtrlMsgToWQueue, 100, pWrite, tsDnodeTmr, &unUsedTimerId);
vTrace("vgId:%d, msg:%p, app:%p, perform flowctrl, retry:%d", pVnode->vgId, pWrite, pWrite->rpcMsg.ahandle,
pWrite->processedCount);
return TSDB_CODE_VND_ACTION_IN_PROGRESS;
}
}
void vnodeWaitWriteCompleted(SVnodeObj *pVnode) {
int32_t extraSleep = 0;
while (pVnode->queuedWMsg > 0) {
vTrace("vgId:%d, queued wmsg num:%d", pVnode->vgId, pVnode->queuedWMsg);
taosMsleep(10);
extraSleep = 1;
}
if (extraSleep)
taosMsleep(900);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册