diff --git a/.gitmodules b/.gitmodules index a2266c46afd180b52d3aa19003380078894f6a4b..656478a47e2b810204fa6d4b6a00ece2e12acbc8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,6 @@ [submodule "src/connector/go"] path = src/connector/go url = git@github.com:taosdata/driver-go.git -[submodule "src/connector/grafanaplugin"] - path = src/connector/grafanaplugin - url = git@github.com:taosdata/grafanaplugin.git [submodule "src/connector/hivemq-tdengine-extension"] path = src/connector/hivemq-tdengine-extension url = git@github.com:taosdata/hivemq-tdengine-extension.git diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 70e76517c6e1d8a30658b15ff053cc0632fe8a8c..808c7c8fa648dc00bf18f88ad11f8f869df57b7c 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -111,8 +111,8 @@ typedef enum _mgmt_table { TSDB_MGMT_TABLE_MAX, } EShowType; -#define TSDB_ALTER_TABLE_ADD_TAG_COLUMN 1 -#define TSDB_ALTER_TABLE_DROP_TAG_COLUMN 2 +#define TSDB_ALTER_TABLE_ADD_TAG 1 +#define TSDB_ALTER_TABLE_DROP_TAG 2 #define TSDB_ALTER_TABLE_UPDATE_TAG_NAME 3 #define TSDB_ALTER_TABLE_UPDATE_TAG_VAL 4 @@ -726,6 +726,7 @@ typedef struct { char tbName[TSDB_TABLE_NAME_LEN]; char stbName[TSDB_TABLE_NAME_LEN]; char dbFName[TSDB_DB_FNAME_LEN]; + uint64_t dbId; int32_t numOfTags; int32_t numOfColumns; int8_t precision; @@ -1198,8 +1199,8 @@ typedef struct { int32_t tSerializeSVCreateTbBatchReq(void** buf, SVCreateTbBatchReq* pReq); void* tDeserializeSVCreateTbBatchReq(void* buf, SVCreateTbBatchReq* pReq); -int32_t tSerializeSVCreateTbBatchReqp(void** buf, SVCreateTbBatchReq* pRsp); -void* tDeserializeSVCreateTbBatchReq(void* buf, SVCreateTbBatchReq* pRsp); +int32_t tSerializeSVCreateTbBatchRsp(void** buf, SVCreateTbBatchRsp* pRsp); +void* tDeserializeSVCreateTbBatchRsp(void* buf, SVCreateTbBatchRsp* pRsp); typedef struct { uint64_t ver; @@ -1209,7 +1210,6 @@ typedef struct { } SVDropTbReq; typedef struct { - uint64_t ver; } SVDropTbRsp; int32_t tSerializeSVDropTbReq(void** buf, SVDropTbReq* pReq); diff --git a/include/libs/catalog/catalog.h b/include/libs/catalog/catalog.h index c291ebd8fd3563761434f9ba03869271341ff432..ab1298785ae8030c43566e3fcd7dcc4d600c27e1 100644 --- a/include/libs/catalog/catalog.h +++ b/include/libs/catalog/catalog.h @@ -32,6 +32,15 @@ extern "C" { struct SCatalog; +enum { + CTG_DBG_DB_NUM = 1, + CTG_DBG_META_NUM, + CTG_DBG_STB_NUM, + CTG_DBG_DB_RENT_NUM, + CTG_DBG_STB_RENT_NUM, +}; + + typedef struct SCatalogReq { SArray *pTableName; // element is SNAME SArray *pUdf; // udf name @@ -99,7 +108,7 @@ int32_t catalogGetDBVgroupVersion(struct SCatalog* pCatalog, const char* dbName, */ int32_t catalogGetDBVgroup(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const char* pDBName, bool forceUpdate, SArray** pVgroupList); -int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbName, SDBVgroupInfo* dbInfo); +int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbName, uint64_t dbId, SDBVgroupInfo* dbInfo); int32_t catalogRemoveDB(struct SCatalog* pCatalog, const char* dbName, uint64_t dbId); @@ -127,6 +136,8 @@ int32_t catalogGetTableMeta(struct SCatalog* pCatalog, void * pTransporter, cons */ int32_t catalogGetSTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta); +int32_t catalogUpdateSTableMeta(struct SCatalog* pCatalog, STableMetaRsp *rspMsg); + /** * Force renew a table's local cached meta data. diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index 2e4093590ddb05dbd14f8d3c26939c9185c0d54a..549f36a8980c5db945b2b471f94911e9c9b8d1f8 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -81,7 +81,6 @@ typedef struct STableMeta { } STableMeta; typedef struct SDBVgroupInfo { - uint64_t dbId; int32_t vgVersion; int8_t hashMethod; SHashObj *vgHash; //key:vgId, value:SVgroupInfo @@ -89,6 +88,7 @@ typedef struct SDBVgroupInfo { typedef struct SUseDbOutput { char db[TSDB_DB_FNAME_LEN]; + uint64_t dbId; SDBVgroupInfo *dbVgroup; } SUseDbOutput; @@ -102,6 +102,7 @@ enum { typedef struct STableMetaOutput { int32_t metaType; + uint64_t dbId; char dbFName[TSDB_DB_FNAME_LEN]; char ctbName[TSDB_TABLE_NAME_LEN]; char tbName[TSDB_TABLE_NAME_LEN]; @@ -159,6 +160,8 @@ void initQueryModuleMsgHandle(); const SSchema* tGetTbnameColumnSchema(); bool tIsValidSchema(struct SSchema* pSchema, int32_t numOfCols, int32_t numOfTags); +int32_t queryCreateTableMetaFromMsg(STableMetaRsp* msg, bool isSuperTable, STableMeta **pMeta); + extern int32_t (*queryBuildMsg[TDMT_MAX])(void* input, char **msg, int32_t msgSize, int32_t *msgLen); extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char *msg, int32_t msgSize); diff --git a/include/nodes/nodes.h b/include/nodes/nodes.h index 73082825efc9c46bccec06ffae96ba80f0d8ff85..ae29445865976874cb84ecf0a3cc3a6559c59209 100644 --- a/include/nodes/nodes.h +++ b/include/nodes/nodes.h @@ -55,7 +55,7 @@ typedef enum ENodeType { QUERY_NODE_FILL, // only for parser - QUERY_NODE_TARGET_EXPR, + QUERY_NODE_RAW_EXPR, QUERY_NODE_SET_OPERATOR, QUERY_NODE_SELECT_STMT, @@ -81,6 +81,13 @@ typedef struct SNodeList { SListCell* pTail; } SNodeList; +typedef struct SRawExprNode { + ENodeType nodeType; + char* p; + uint32_t n; + SNode* pNode; +} SRawExprNode; + typedef struct SDataType { uint8_t type; uint8_t precision; diff --git a/include/util/taoserror.h b/include/util/taoserror.h index b19215c79d973e9221a026ecdc61958b3610bd4e..33d993324523133f205aeace338f63bc1b8ca00d 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -218,18 +218,19 @@ int32_t* taosGetErrno(); // mnode-stable #define TSDB_CODE_MND_STB_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03A0) #define TSDB_CODE_MND_STB_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03A1) -#define TSDB_CODE_MND_TOO_MANY_STBS TAOS_DEF_ERROR_CODE(0, 0x03A2) -#define TSDB_CODE_MND_INVALID_STB TAOS_DEF_ERROR_CODE(0, 0x03A3) -#define TSDB_CODE_MND_INVALID_STB_OPTION TAOS_DEF_ERROR_CODE(0, 0x03A4) -#define TSDB_CODE_MND_STB_OPTION_UNCHNAGED TAOS_DEF_ERROR_CODE(0, 0x03A5) -#define TSDB_CODE_MND_TOO_MANY_TAGS TAOS_DEF_ERROR_CODE(0, 0x03A6) -#define TSDB_CODE_MND_TAG_ALREAY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03A7) -#define TSDB_CODE_MND_TAG_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03A8) -#define TSDB_CODE_MND_TOO_MANY_COLUMNS TAOS_DEF_ERROR_CODE(0, 0x03A9) -#define TSDB_CODE_MND_COLUMN_ALREAY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AA) -#define TSDB_CODE_MND_COLUMN_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AB) -#define TSDB_CODE_MND_INVALID_ROW_BYTES TAOS_DEF_ERROR_CODE(0, 0x03AC) -#define TSDB_CODE_MND_NAME_CONFLICT_WITH_TOPIC TAOS_DEF_ERROR_CODE(0, 0x03AD) +#define TSDB_CODE_MND_NAME_CONFLICT_WITH_TOPIC TAOS_DEF_ERROR_CODE(0, 0x03A2) +#define TSDB_CODE_MND_TOO_MANY_STBS TAOS_DEF_ERROR_CODE(0, 0x03A3) +#define TSDB_CODE_MND_INVALID_STB TAOS_DEF_ERROR_CODE(0, 0x03A4) +#define TSDB_CODE_MND_INVALID_STB_OPTION TAOS_DEF_ERROR_CODE(0, 0x03A5) +#define TSDB_CODE_MND_INVALID_STB_ALTER_OPTION TAOS_DEF_ERROR_CODE(0, 0x03A6) +#define TSDB_CODE_MND_STB_OPTION_UNCHNAGED TAOS_DEF_ERROR_CODE(0, 0x03A7) +#define TSDB_CODE_MND_INVALID_ROW_BYTES TAOS_DEF_ERROR_CODE(0, 0x03A8) +#define TSDB_CODE_MND_TOO_MANY_TAGS TAOS_DEF_ERROR_CODE(0, 0x03A9) +#define TSDB_CODE_MND_TAG_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AA) +#define TSDB_CODE_MND_TAG_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AB) +#define TSDB_CODE_MND_TOO_MANY_COLUMNS TAOS_DEF_ERROR_CODE(0, 0x03AC) +#define TSDB_CODE_MND_COLUMN_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AD) +#define TSDB_CODE_MND_COLUMN_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AE) // mnode-func #define TSDB_CODE_MND_FUNC_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03C0) @@ -435,6 +436,8 @@ int32_t* taosGetErrno(); #define TSDB_CODE_CTG_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x2402) //catalog is not ready #define TSDB_CODE_CTG_MEM_ERROR TAOS_DEF_ERROR_CODE(0, 0x2403) //catalog memory error #define TSDB_CODE_CTG_SYS_ERROR TAOS_DEF_ERROR_CODE(0, 0x2404) //catalog system error +#define TSDB_CODE_CTG_DB_DROPPED TAOS_DEF_ERROR_CODE(0, 0x2405) //Database is dropped +#define TSDB_CODE_CTG_OUT_OF_SERVICE TAOS_DEF_ERROR_CODE(0, 0x2406) //catalog is out of service //scheduler #define TSDB_CODE_SCH_STATUS_ERROR TAOS_DEF_ERROR_CODE(0, 0x2501) //scheduler status error diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index d265ffaa94d9712bc2ea081d74bdfc8ffe1d5adc..3e1af765b0ae4e725025d651a2cb6c8360aeaf47 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -44,7 +44,6 @@ static int32_t hbProcessDBInfoRsp(void *value, int32_t valueLen, struct SCatalog code = catalogRemoveDB(pCatalog, rsp->db, rsp->uid); } else { SDBVgroupInfo vgInfo = {0}; - vgInfo.dbId = rsp->uid; vgInfo.vgVersion = rsp->vgVersion; vgInfo.hashMethod = rsp->hashMethod; vgInfo.vgHash = taosHashInit(rsp->vgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); @@ -69,7 +68,7 @@ static int32_t hbProcessDBInfoRsp(void *value, int32_t valueLen, struct SCatalog } } - code = catalogUpdateDBVgroup(pCatalog, rsp->db, &vgInfo); + code = catalogUpdateDBVgroup(pCatalog, rsp->db, rsp->uid, &vgInfo); if (code) { taosHashCleanup(vgInfo.vgHash); } @@ -101,50 +100,33 @@ static int32_t hbProcessStbInfoRsp(void *value, int32_t valueLen, struct SCatalo tscDebug("hb remove stb, db:%s, stb:%s", rsp->dbFName, rsp->stbName); - code = catalogRemoveSTableMeta(pCatalog, rsp->dbFName, rsp->stbName, rsp->suid); + catalogRemoveSTableMeta(pCatalog, rsp->dbFName, rsp->stbName, rsp->suid); } else { + tscDebug("hb update stb, db:%s, stb:%s", rsp->dbFName, rsp->stbName); + rsp->numOfTags = ntohl(rsp->numOfTags); + rsp->sversion = ntohl(rsp->sversion); + rsp->tversion = ntohl(rsp->tversion); + rsp->tuid = be64toh(rsp->tuid); + rsp->vgId = ntohl(rsp->vgId); + + SSchema* pSchema = rsp->pSchema; schemaNum = rsp->numOfColumns + rsp->numOfTags; -/* - rsp->vgNum = ntohl(rsp->vgNum); - rsp->uid = be64toh(rsp->uid); - - SDBVgroupInfo vgInfo = {0}; - vgInfo.dbId = rsp->uid; - vgInfo.vgVersion = rsp->vgVersion; - vgInfo.hashMethod = rsp->hashMethod; - vgInfo.vgHash = taosHashInit(rsp->vgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); - if (NULL == vgInfo.vgHash) { - tscError("hash init[%d] failed", rsp->vgNum); - return TSDB_CODE_TSC_OUT_OF_MEMORY; - } - - for (int32_t i = 0; i < rsp->vgNum; ++i) { - rsp->vgroupInfo[i].vgId = ntohl(rsp->vgroupInfo[i].vgId); - rsp->vgroupInfo[i].hashBegin = ntohl(rsp->vgroupInfo[i].hashBegin); - rsp->vgroupInfo[i].hashEnd = ntohl(rsp->vgroupInfo[i].hashEnd); - for (int32_t n = 0; n < rsp->vgroupInfo[i].epset.numOfEps; ++n) { - rsp->vgroupInfo[i].epset.eps[n].port = ntohs(rsp->vgroupInfo[i].epset.eps[n].port); - } + for (int i = 0; i < schemaNum; ++i) { + pSchema->bytes = ntohl(pSchema->bytes); + pSchema->colId = ntohl(pSchema->colId); - if (0 != taosHashPut(vgInfo.vgHash, &rsp->vgroupInfo[i].vgId, sizeof(rsp->vgroupInfo[i].vgId), &rsp->vgroupInfo[i], sizeof(rsp->vgroupInfo[i]))) { - tscError("hash push failed, errno:%d", errno); - taosHashCleanup(vgInfo.vgHash); - return TSDB_CODE_TSC_OUT_OF_MEMORY; - } - } - - code = catalogUpdateDBVgroup(pCatalog, rsp->db, &vgInfo); - if (code) { - taosHashCleanup(vgInfo.vgHash); + pSchema++; } -*/ - } - if (code) { - return code; + if (rsp->pSchema[0].colId != PRIMARYKEY_TIMESTAMP_COL_ID) { + tscError("invalid colId[%d] for the first column in table meta rsp msg", rsp->pSchema[0].colId); + return TSDB_CODE_TSC_INVALID_VALUE; + } + + catalogUpdateSTableMeta(pCatalog, rsp); } msgLen += sizeof(STableMetaRsp) + schemaNum * sizeof(SSchema); diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index e45b61554c0ba434ee482a702446d65aeb2a07cf..6f094b20f805664d965a6e9b0cd3e4c429c573ce 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -27,7 +27,7 @@ #undef TD_MSG_SEG_CODE_ #include "tmsgdef.h" -int tInitSubmitMsgIter(SSubmitMsg *pMsg, SSubmitMsgIter *pIter) { +int32_t tInitSubmitMsgIter(SSubmitMsg *pMsg, SSubmitMsgIter *pIter) { if (pMsg == NULL) { terrno = TSDB_CODE_TDB_SUBMIT_MSG_MSSED_UP; return -1; @@ -44,7 +44,7 @@ int tInitSubmitMsgIter(SSubmitMsg *pMsg, SSubmitMsgIter *pIter) { return 0; } -int tGetSubmitMsgNext(SSubmitMsgIter *pIter, SSubmitBlk **pPBlock) { +int32_t tGetSubmitMsgNext(SSubmitMsgIter *pIter, SSubmitBlk **pPBlock) { if (pIter->len == 0) { pIter->len += sizeof(SSubmitMsg); } else { @@ -63,7 +63,7 @@ int tGetSubmitMsgNext(SSubmitMsgIter *pIter, SSubmitBlk **pPBlock) { return 0; } -int tInitSubmitBlkIter(SSubmitBlk *pBlock, SSubmitBlkIter *pIter) { +int32_t tInitSubmitBlkIter(SSubmitBlk *pBlock, SSubmitBlkIter *pIter) { if (pBlock->dataLen <= 0) return -1; pIter->totalLen = pBlock->dataLen; pIter->len = 0; @@ -85,14 +85,14 @@ SMemRow tGetSubmitBlkNext(SSubmitBlkIter *pIter) { } } -int tSerializeSClientHbReq(void **buf, const SClientHbReq *pReq) { - int tlen = 0; +int32_t tSerializeSClientHbReq(void **buf, const SClientHbReq *pReq) { + int32_t tlen = 0; tlen += taosEncodeSClientHbKey(buf, &pReq->connKey); int32_t kvNum = taosHashGetSize(pReq->info); tlen += taosEncodeFixedI32(buf, kvNum); - SKv *kv; - void* pIter = taosHashIterate(pReq->info, NULL); + SKv *kv; + void *pIter = taosHashIterate(pReq->info, NULL); while (pIter != NULL) { kv = pIter; tlen += taosEncodeSKv(buf, kv); @@ -111,7 +111,7 @@ void *tDeserializeSClientHbReq(void *buf, SClientHbReq *pReq) { if (pReq->info == NULL) { pReq->info = taosHashInit(kvNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); } - for(int i = 0; i < kvNum; i++) { + for (int32_t i = 0; i < kvNum; i++) { SKv kv; buf = taosDecodeSKv(buf, &kv); taosHashPut(pReq->info, &kv.key, sizeof(kv.key), &kv, sizeof(kv)); @@ -120,54 +120,55 @@ void *tDeserializeSClientHbReq(void *buf, SClientHbReq *pReq) { return buf; } -int tSerializeSClientHbRsp(void** buf, const SClientHbRsp* pRsp) { - int tlen = 0; +int32_t tSerializeSClientHbRsp(void **buf, const SClientHbRsp *pRsp) { + int32_t tlen = 0; int32_t kvNum = taosArrayGetSize(pRsp->info); tlen += taosEncodeSClientHbKey(buf, &pRsp->connKey); tlen += taosEncodeFixedI32(buf, pRsp->status); tlen += taosEncodeFixedI32(buf, kvNum); - for (int i = 0; i < kvNum; i++) { + for (int32_t i = 0; i < kvNum; i++) { SKv *kv = (SKv *)taosArrayGet(pRsp->info, i); tlen += taosEncodeSKv(buf, kv); } return tlen; } -void* tDeserializeSClientHbRsp(void* buf, SClientHbRsp* pRsp) { + +void *tDeserializeSClientHbRsp(void *buf, SClientHbRsp *pRsp) { int32_t kvNum = 0; buf = taosDecodeSClientHbKey(buf, &pRsp->connKey); buf = taosDecodeFixedI32(buf, &pRsp->status); buf = taosDecodeFixedI32(buf, &kvNum); pRsp->info = taosArrayInit(kvNum, sizeof(SKv)); - for (int i = 0; i < kvNum; i++) { + for (int32_t i = 0; i < kvNum; i++) { SKv kv = {0}; buf = taosDecodeSKv(buf, &kv); taosArrayPush(pRsp->info, &kv); } - + return buf; } -int tSerializeSClientHbBatchReq(void** buf, const SClientHbBatchReq* pBatchReq) { - int tlen = 0; +int32_t tSerializeSClientHbBatchReq(void **buf, const SClientHbBatchReq *pBatchReq) { + int32_t tlen = 0; tlen += taosEncodeFixedI64(buf, pBatchReq->reqId); int32_t reqNum = taosArrayGetSize(pBatchReq->reqs); - tlen += taosEncodeFixedI32(buf, reqNum); - for (int i = 0; i < reqNum; i++) { - SClientHbReq* pReq = taosArrayGet(pBatchReq->reqs, i); + tlen += taosEncodeFixedI32(buf, reqNum); + for (int32_t i = 0; i < reqNum; i++) { + SClientHbReq *pReq = taosArrayGet(pBatchReq->reqs, i); tlen += tSerializeSClientHbReq(buf, pReq); } return tlen; } -void* tDeserializeSClientHbBatchReq(void* buf, SClientHbBatchReq* pBatchReq) { +void *tDeserializeSClientHbBatchReq(void *buf, SClientHbBatchReq *pBatchReq) { buf = taosDecodeFixedI64(buf, &pBatchReq->reqId); if (pBatchReq->reqs == NULL) { pBatchReq->reqs = taosArrayInit(0, sizeof(SClientHbReq)); } - + int32_t reqNum; buf = taosDecodeFixedI32(buf, &reqNum); - for (int i = 0; i < reqNum; i++) { + for (int32_t i = 0; i < reqNum; i++) { SClientHbReq req = {0}; buf = tDeserializeSClientHbReq(buf, &req); taosArrayPush(pBatchReq->reqs, &req); @@ -175,31 +176,31 @@ void* tDeserializeSClientHbBatchReq(void* buf, SClientHbBatchReq* pBatchReq) { return buf; } -int tSerializeSClientHbBatchRsp(void** buf, const SClientHbBatchRsp* pBatchRsp) { - int tlen = 0; +int32_t tSerializeSClientHbBatchRsp(void **buf, const SClientHbBatchRsp *pBatchRsp) { + int32_t tlen = 0; int32_t sz = taosArrayGetSize(pBatchRsp->rsps); tlen += taosEncodeFixedI32(buf, sz); - for (int i = 0; i < sz; i++) { - SClientHbRsp* pRsp = taosArrayGet(pBatchRsp->rsps, i); + for (int32_t i = 0; i < sz; i++) { + SClientHbRsp *pRsp = taosArrayGet(pBatchRsp->rsps, i); tlen += tSerializeSClientHbRsp(buf, pRsp); } return tlen; } -void* tDeserializeSClientHbBatchRsp(void* buf, SClientHbBatchRsp* pBatchRsp) { +void *tDeserializeSClientHbBatchRsp(void *buf, SClientHbBatchRsp *pBatchRsp) { int32_t sz; buf = taosDecodeFixedI32(buf, &sz); pBatchRsp->rsps = taosArrayInit(sz, sizeof(SClientHbRsp)); - for (int i = 0; i < sz; i++) { + for (int32_t i = 0; i < sz; i++) { SClientHbRsp rsp = {0}; - buf = tDeserializeSClientHbRsp(buf, &rsp); + buf = tDeserializeSClientHbRsp(buf, &rsp); taosArrayPush(pBatchRsp->rsps, &rsp); } return buf; } -int tSerializeSVCreateTbReq(void **buf, SVCreateTbReq *pReq) { - int tlen = 0; +int32_t tSerializeSVCreateTbReq(void **buf, SVCreateTbReq *pReq) { + int32_t tlen = 0; tlen += taosEncodeFixedU64(buf, pReq->ver); tlen += taosEncodeString(buf, pReq->name); @@ -293,8 +294,8 @@ void *tDeserializeSVCreateTbReq(void *buf, SVCreateTbReq *pReq) { return buf; } -int tSerializeSVCreateTbBatchReq(void **buf, SVCreateTbBatchReq *pReq) { - int tlen = 0; +int32_t tSerializeSVCreateTbBatchReq(void **buf, SVCreateTbBatchReq *pReq) { + int32_t tlen = 0; tlen += taosEncodeFixedU64(buf, pReq->ver); tlen += taosEncodeFixedU32(buf, taosArrayGetSize(pReq->pArray)); @@ -322,7 +323,7 @@ void *tDeserializeSVCreateTbBatchReq(void *buf, SVCreateTbBatchReq *pReq) { } int32_t tSerializeSVDropTbReq(void **buf, SVDropTbReq *pReq) { - int tlen = 0; + int32_t tlen = 0; tlen += taosEncodeFixedU64(buf, pReq->ver); tlen += taosEncodeString(buf, pReq->name); tlen += taosEncodeFixedU8(buf, pReq->type); diff --git a/source/dnode/mgmt/impl/test/vnode/vnode.cpp b/source/dnode/mgmt/impl/test/vnode/vnode.cpp index 9451608653e1a05eb27ed0e6d2852fb1ebcd9de6..380a837c58312a5be3a10241bddb32bb4e9d18b0 100644 --- a/source/dnode/mgmt/impl/test/vnode/vnode.cpp +++ b/source/dnode/mgmt/impl/test/vnode/vnode.cpp @@ -108,7 +108,7 @@ TEST_F(DndTestVnode, 01_Create_Vnode) { } } -TEST_F(DndTestVnode, 02_ALTER_Vnode) { +TEST_F(DndTestVnode, 02_Alter_Vnode) { for (int i = 0; i < 3; ++i) { int32_t contLen = sizeof(SAlterVnodeReq); @@ -219,7 +219,7 @@ TEST_F(DndTestVnode, 03_Create_Stb) { } } -TEST_F(DndTestVnode, 04_ALTER_Stb) { +TEST_F(DndTestVnode, 04_Alter_Stb) { for (int i = 0; i < 1; ++i) { SVCreateTbReq req = {0}; req.ver = 0; @@ -310,7 +310,7 @@ TEST_F(DndTestVnode, 05_DROP_Stb) { } } -TEST_F(DndTestVnode, 06_DROP_Vnode) { +TEST_F(DndTestVnode, 06_Drop_Vnode) { for (int i = 0; i < 3; ++i) { int32_t contLen = sizeof(SDropVnodeReq); diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 006abcd8e2e2282a6c7fe9fd7e58cb5a39059d78..7b21f33702889d96986f83a507d9c26c5b17ebe8 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -652,12 +652,12 @@ static int32_t mndAddSuperTableTag(const SStbObj *pOld, SStbObj *pNew, const SSc for (int32_t i = 0; i < ntags; i++) { if (mndFindSuperTableColumnIndex(pOld, pSchemas[i].name) > 0) { - terrno = TSDB_CODE_MND_TAG_ALREAY_EXIST; + terrno = TSDB_CODE_MND_COLUMN_ALREADY_EXIST; return -1; } if (mndFindSuperTableTagIndex(pOld, pSchemas[i].name) > 0) { - terrno = TSDB_CODE_MND_COLUMN_ALREAY_EXIST; + terrno = TSDB_CODE_MND_TAG_ALREADY_EXIST; return -1; } } @@ -692,6 +692,7 @@ static int32_t mndDropSuperTableTag(const SStbObj *pOld, SStbObj *pNew, const ch } memmove(pNew->pTags + tag, pNew->pTags + tag + 1, sizeof(SSchema) * (pNew->numOfTags - tag - 1)); + pNew->numOfTags--; pNew->version++; mDebug("stb:%s, start to drop tag %s", pNew->name, tagName); @@ -706,13 +707,12 @@ static int32_t mndAlterStbTagName(const SStbObj *pOld, SStbObj *pNew, const char } if (mndFindSuperTableTagIndex(pOld, newTagName) >= 0) { - terrno = TSDB_CODE_MND_TAG_ALREAY_EXIST; + terrno = TSDB_CODE_MND_TAG_ALREADY_EXIST; return -1; } - int32_t len = (int32_t)strlen(newTagName); - if (len >= TSDB_COL_NAME_LEN) { - terrno = TSDB_CODE_MND_INVALID_STB_OPTION; + if (mndFindSuperTableColumnIndex(pOld, newTagName) >= 0) { + terrno = TSDB_CODE_MND_COLUMN_ALREADY_EXIST; return -1; } @@ -735,16 +735,17 @@ static int32_t mndAlterStbTagBytes(const SStbObj *pOld, SStbObj *pNew, const SSc return -1; } - if (!(pSchema->type == TSDB_DATA_TYPE_BINARY || pSchema->type == TSDB_DATA_TYPE_NCHAR)) { - terrno = TSDB_CODE_MND_INVALID_STB_OPTION; + if (mndAllocStbSchemas(pOld, pNew) != 0) { return -1; } - if (mndAllocStbSchemas(pOld, pNew) != 0) { + SSchema *pTag = pNew->pTags + tag; + + if (!(pTag->type == TSDB_DATA_TYPE_BINARY || pTag->type == TSDB_DATA_TYPE_NCHAR)) { + terrno = TSDB_CODE_MND_INVALID_STB_OPTION; return -1; } - SSchema *pTag = pNew->pTags + tag; if (pSchema->bytes <= pTag->bytes) { terrno = TSDB_CODE_MND_INVALID_ROW_BYTES; return -1; @@ -765,12 +766,12 @@ static int32_t mndAddSuperTableColumn(const SStbObj *pOld, SStbObj *pNew, const for (int32_t i = 0; i < ncols; i++) { if (mndFindSuperTableColumnIndex(pOld, pSchemas[i].name) > 0) { - terrno = TSDB_CODE_MND_TAG_ALREAY_EXIST; + terrno = TSDB_CODE_MND_COLUMN_ALREADY_EXIST; return -1; } if (mndFindSuperTableTagIndex(pOld, pSchemas[i].name) > 0) { - terrno = TSDB_CODE_MND_COLUMN_ALREAY_EXIST; + terrno = TSDB_CODE_MND_TAG_ALREADY_EXIST; return -1; } } @@ -795,16 +796,27 @@ static int32_t mndAddSuperTableColumn(const SStbObj *pOld, SStbObj *pNew, const static int32_t mndDropSuperTableColumn(const SStbObj *pOld, SStbObj *pNew, const char *colName) { int32_t col = mndFindSuperTableColumnIndex(pOld, colName); - if (col <= 0) { + if (col < 0) { terrno = TSDB_CODE_MND_COLUMN_NOT_EXIST; return -1; } + if (col == 0) { + terrno = TSDB_CODE_MND_INVALID_STB_ALTER_OPTION; + return -1; + } + + if (pOld->numOfColumns == 2) { + terrno = TSDB_CODE_MND_INVALID_STB_ALTER_OPTION; + return -1; + } + if (mndAllocStbSchemas(pOld, pNew) != 0) { return -1; } memmove(pNew->pColumns + col, pNew->pColumns + col + 1, sizeof(SSchema) * (pNew->numOfColumns - col - 1)); + pNew->numOfColumns--; pNew->version++; mDebug("stb:%s, start to drop col %s", pNew->name, colName); @@ -818,11 +830,6 @@ static int32_t mndAlterStbColumnBytes(const SStbObj *pOld, SStbObj *pNew, const return -1; } - if (!(pSchema->type == TSDB_DATA_TYPE_BINARY || pSchema->type == TSDB_DATA_TYPE_NCHAR)) { - terrno = TSDB_CODE_MND_INVALID_STB_OPTION; - return -1; - } - uint32_t nLen = 0; for (int32_t i = 0; i < pOld->numOfColumns; ++i) { nLen += (pOld->pColumns[i].colId == col) ? pSchema->bytes : pOld->pColumns[i].bytes; @@ -838,6 +845,11 @@ static int32_t mndAlterStbColumnBytes(const SStbObj *pOld, SStbObj *pNew, const } SSchema *pCol = pNew->pColumns + col; + if (!(pCol->type == TSDB_DATA_TYPE_BINARY || pCol->type == TSDB_DATA_TYPE_NCHAR)) { + terrno = TSDB_CODE_MND_INVALID_STB_OPTION; + return -1; + } + if (pSchema->bytes <= pCol->bytes) { terrno = TSDB_CODE_MND_INVALID_ROW_BYTES; return -1; @@ -914,12 +926,13 @@ static int32_t mndAlterStb(SMnode *pMnode, SMnodeMsg *pReq, const SMAltertbReq * taosRUnLockLatch(&pOld->lock); int32_t code = -1; + STrans *pTrans = NULL; switch (pAlter->alterType) { - case TSDB_ALTER_TABLE_ADD_TAG_COLUMN: + case TSDB_ALTER_TABLE_ADD_TAG: code = mndAddSuperTableTag(pOld, &stbObj, pAlter->pSchemas, 1); break; - case TSDB_ALTER_TABLE_DROP_TAG_COLUMN: + case TSDB_ALTER_TABLE_DROP_TAG: code = mndDropSuperTableTag(pOld, &stbObj, pAlter->pSchemas[0].name); break; case TSDB_ALTER_TABLE_UPDATE_TAG_NAME: @@ -945,7 +958,7 @@ static int32_t mndAlterStb(SMnode *pMnode, SMnodeMsg *pReq, const SMAltertbReq * if (code != 0) goto ALTER_STB_OVER; code = -1; - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pReq->rpcMsg); + pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pReq->rpcMsg); if (pTrans == NULL) goto ALTER_STB_OVER; mDebug("trans:%d, used to alter stb:%s", pTrans->id, pAlter->name); @@ -975,17 +988,17 @@ static int32_t mndProcessMAlterStbReq(SMnodeMsg *pReq) { return -1; } - SStbObj *pStb = mndAcquireStb(pMnode, pAlter->name); - if (pStb == NULL) { - terrno = TSDB_CODE_MND_STB_NOT_EXIST; + SDbObj *pDb = mndAcquireDbByStb(pMnode, pAlter->name); + if (pDb == NULL) { + terrno = TSDB_CODE_MND_INVALID_DB; mError("stb:%s, failed to alter since %s", pAlter->name, terrstr()); return -1; } - SDbObj *pDb = mndAcquireDbByStb(pMnode, pAlter->name); - if (pDb == NULL) { - mndReleaseStb(pMnode, pStb); - terrno = TSDB_CODE_MND_DB_NOT_SELECTED; + SStbObj *pStb = mndAcquireStb(pMnode, pAlter->name); + if (pStb == NULL) { + mndReleaseDb(pMnode, pDb); + terrno = TSDB_CODE_MND_STB_NOT_EXIST; mError("stb:%s, failed to alter since %s", pAlter->name, terrstr()); return -1; } @@ -994,7 +1007,7 @@ static int32_t mndProcessMAlterStbReq(SMnodeMsg *pReq) { mndReleaseStb(pMnode, pStb); if (code != 0) { - mError("stb:%s, failed to alter since %s", pAlter->name, tstrerror(code)); + mError("stb:%s, failed to alter since %s", pAlter->name, terrstr()); return code; } @@ -1165,6 +1178,7 @@ static int32_t mndProcessStbMetaReq(SMnodeMsg *pReq) { strcpy(pMeta->dbFName, pStb->db); strcpy(pMeta->tbName, pInfo->tbName); strcpy(pMeta->stbName, pInfo->tbName); + pMeta->dbId = htobe64(pDb->uid); pMeta->numOfTags = htonl(pStb->numOfTags); pMeta->numOfColumns = htonl(pStb->numOfColumns); pMeta->precision = pDb->cfg.precision; diff --git a/source/dnode/mnode/impl/test/stb/stb.cpp b/source/dnode/mnode/impl/test/stb/stb.cpp index ed0beb50a47a7f7a199b8b9bccbfee63e12a8446..402795eb039bd8992545f690318ca9aabbf02acb 100644 --- a/source/dnode/mnode/impl/test/stb/stb.cpp +++ b/source/dnode/mnode/impl/test/stb/stb.cpp @@ -23,8 +23,18 @@ class MndTestStb : public ::testing::Test { void TearDown() override {} SCreateDbReq* BuildCreateDbReq(const char* dbname, int32_t* pContLen); + SDropDbReq* BuildDropDbReq(const char* dbname, int32_t* pContLen); SMCreateStbReq* BuildCreateStbReq(const char* stbname, int32_t* pContLen); - SMAltertbReq* BuildAlterStbAddTagReq(const char* stbname, int32_t* pContLen); + SMAltertbReq* BuildAlterStbAddTagReq(const char* stbname, const char* tagname, int32_t* pContLen); + SMAltertbReq* BuildAlterStbDropTagReq(const char* stbname, const char* tagname, int32_t* pContLen); + SMAltertbReq* BuildAlterStbUpdateTagNameReq(const char* stbname, const char* tagname, const char* newtagname, + int32_t* pContLen); + SMAltertbReq* BuildAlterStbUpdateTagBytesReq(const char* stbname, const char* tagname, int32_t bytes, + int32_t* pContLen); + SMAltertbReq* BuildAlterStbAddColumnReq(const char* stbname, const char* colname, int32_t* pContLen); + SMAltertbReq* BuildAlterStbDropColumnReq(const char* stbname, const char* colname, int32_t* pContLen); + SMAltertbReq* BuildAlterStbUpdateColumnBytesReq(const char* stbname, const char* colname, int32_t bytes, + int32_t* pContLen); }; Testbase MndTestStb::test; @@ -58,7 +68,17 @@ SCreateDbReq* MndTestStb::BuildCreateDbReq(const char* dbname, int32_t* pContLen return pReq; } -SMCreateStbReq* MndTestStb::BuildCreateStbReq(const char *stbname, int32_t* pContLen) { +SDropDbReq* MndTestStb::BuildDropDbReq(const char* dbname, int32_t* pContLen) { + int32_t contLen = sizeof(SDropDbReq); + + SDropDbReq* pReq = (SDropDbReq*)rpcMallocCont(contLen); + strcpy(pReq->db, dbname); + + *pContLen = contLen; + return pReq; +} + +SMCreateStbReq* MndTestStb::BuildCreateStbReq(const char* stbname, int32_t* pContLen) { int32_t cols = 2; int32_t tags = 3; int32_t contLen = (tags + cols) * sizeof(SSchema) + sizeof(SMCreateStbReq); @@ -77,8 +97,8 @@ SMCreateStbReq* MndTestStb::BuildCreateStbReq(const char *stbname, int32_t* pCon { SSchema* pSchema = &pReq->pSchemas[1]; - pSchema->bytes = htonl(4); - pSchema->type = TSDB_DATA_TYPE_INT; + pSchema->bytes = htonl(12); + pSchema->type = TSDB_DATA_TYPE_BINARY; strcpy(pSchema->name, "col1"); } @@ -107,173 +127,129 @@ SMCreateStbReq* MndTestStb::BuildCreateStbReq(const char *stbname, int32_t* pCon return pReq; } -// TEST_F(MndTestStb, 01_Create_Show_Meta_Drop_Restart_Stb) { -// const char *dbname = "1.d1"; -// const char *stbname = "1.d1.stb"; - -// { -// int32_t contLen = 0; -// SCreateDbReq* pReq = BuildCreateDbReq(dbname, &contLen); -// SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DB, pReq, contLen); -// ASSERT_NE(pRsp, nullptr); -// ASSERT_EQ(pRsp->code, 0); -// } - -// { -// int32_t contLen = 0; -// SMCreateStbReq* pReq = BuildCreateStbReq(stbname, &contLen); -// SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_STB, pReq, contLen); -// ASSERT_NE(pRsp, nullptr); -// ASSERT_EQ(pRsp->code, 0); -// } - -// { -// test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); -// CHECK_META("show stables", 4); -// CHECK_SCHEMA(0, TSDB_DATA_TYPE_BINARY, TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE, "name"); -// CHECK_SCHEMA(1, TSDB_DATA_TYPE_TIMESTAMP, 8, "create_time"); -// CHECK_SCHEMA(2, TSDB_DATA_TYPE_INT, 4, "columns"); -// CHECK_SCHEMA(3, TSDB_DATA_TYPE_INT, 4, "tags"); - -// test.SendShowRetrieveReq(); -// EXPECT_EQ(test.GetShowRows(), 1); -// CheckBinary("stb", TSDB_TABLE_NAME_LEN); -// CheckTimestamp(); -// CheckInt32(2); -// CheckInt32(3); -// } - -// // ----- meta ------ -// { -// int32_t contLen = sizeof(STableInfoReq); -// STableInfoReq* pReq = (STableInfoReq*)rpcMallocCont(contLen); -// strcpy(pReq->dbFName, dbname); -// strcpy(pReq->tbName, "stb"); - -// SRpcMsg* pMsg = test.SendReq(TDMT_MND_STB_META, pReq, contLen); -// ASSERT_NE(pMsg, nullptr); -// ASSERT_EQ(pMsg->code, 0); - -// STableMetaRsp* pRsp = (STableMetaRsp*)pMsg->pCont; -// pRsp->numOfTags = htonl(pRsp->numOfTags); -// pRsp->numOfColumns = htonl(pRsp->numOfColumns); -// pRsp->sversion = htonl(pRsp->sversion); -// pRsp->tversion = htonl(pRsp->tversion); -// pRsp->suid = be64toh(pRsp->suid); -// pRsp->tuid = be64toh(pRsp->tuid); -// pRsp->vgId = be64toh(pRsp->vgId); -// for (int32_t i = 0; i < pRsp->numOfTags + pRsp->numOfColumns; ++i) { -// SSchema* pSchema = &pRsp->pSchema[i]; -// pSchema->colId = htonl(pSchema->colId); -// pSchema->bytes = htonl(pSchema->bytes); -// } - -// EXPECT_STREQ(pRsp->dbFName, dbname); -// EXPECT_STREQ(pRsp->tbName, "stb"); -// EXPECT_STREQ(pRsp->stbName, "stb"); -// EXPECT_EQ(pRsp->numOfColumns, 2); -// EXPECT_EQ(pRsp->numOfTags, 3); -// EXPECT_EQ(pRsp->precision, TSDB_TIME_PRECISION_MILLI); -// EXPECT_EQ(pRsp->tableType, TSDB_SUPER_TABLE); -// EXPECT_EQ(pRsp->update, 0); -// EXPECT_EQ(pRsp->sversion, 1); -// EXPECT_EQ(pRsp->tversion, 0); -// EXPECT_GT(pRsp->suid, 0); -// EXPECT_GT(pRsp->tuid, 0); -// EXPECT_EQ(pRsp->vgId, 0); - -// { -// SSchema* pSchema = &pRsp->pSchema[0]; -// EXPECT_EQ(pSchema->type, TSDB_DATA_TYPE_TIMESTAMP); -// EXPECT_EQ(pSchema->colId, 1); -// EXPECT_EQ(pSchema->bytes, 8); -// EXPECT_STREQ(pSchema->name, "ts"); -// } - -// { -// SSchema* pSchema = &pRsp->pSchema[1]; -// EXPECT_EQ(pSchema->type, TSDB_DATA_TYPE_INT); -// EXPECT_EQ(pSchema->colId, 2); -// EXPECT_EQ(pSchema->bytes, 4); -// EXPECT_STREQ(pSchema->name, "col1"); -// } - -// { -// SSchema* pSchema = &pRsp->pSchema[2]; -// EXPECT_EQ(pSchema->type, TSDB_DATA_TYPE_TINYINT); -// EXPECT_EQ(pSchema->colId, 3); -// EXPECT_EQ(pSchema->bytes, 2); -// EXPECT_STREQ(pSchema->name, "tag1"); -// } - -// { -// SSchema* pSchema = &pRsp->pSchema[3]; -// EXPECT_EQ(pSchema->type, TSDB_DATA_TYPE_BIGINT); -// EXPECT_EQ(pSchema->colId, 4); -// EXPECT_EQ(pSchema->bytes, 8); -// EXPECT_STREQ(pSchema->name, "tag2"); -// } - -// { -// SSchema* pSchema = &pRsp->pSchema[4]; -// EXPECT_EQ(pSchema->type, TSDB_DATA_TYPE_BINARY); -// EXPECT_EQ(pSchema->colId, 5); -// EXPECT_EQ(pSchema->bytes, 16); -// EXPECT_STREQ(pSchema->name, "tag3"); -// } -// } - -// // restart -// test.Restart(); - -// { -// test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); -// CHECK_META("show stables", 4); -// test.SendShowRetrieveReq(); -// EXPECT_EQ(test.GetShowRows(), 1); - -// CheckBinary("stb", TSDB_TABLE_NAME_LEN); -// CheckTimestamp(); -// CheckInt32(2); -// CheckInt32(3); -// } - -// { -// int32_t contLen = sizeof(SMDropStbReq); - -// SMDropStbReq* pReq = (SMDropStbReq*)rpcMallocCont(contLen); -// strcpy(pReq->name, stbname); - -// SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_STB, pReq, contLen); -// ASSERT_NE(pRsp, nullptr); -// ASSERT_EQ(pRsp->code, 0); -// } - -// test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); -// CHECK_META("show stables", 4); -// test.SendShowRetrieveReq(); -// EXPECT_EQ(test.GetShowRows(), 0); -// } - -SMAltertbReq* MndTestStb::BuildAlterStbAddTagReq(const char* stbname, int32_t* pContLen) { +SMAltertbReq* MndTestStb::BuildAlterStbAddTagReq(const char* stbname, const char* tagname, int32_t* pContLen) { + int32_t contLen = sizeof(SMAltertbReq) + sizeof(SSchema); + SMAltertbReq* pReq = (SMAltertbReq*)rpcMallocCont(contLen); + strcpy(pReq->name, stbname); + pReq->numOfSchemas = htonl(1); + pReq->alterType = TSDB_ALTER_TABLE_ADD_TAG; + + SSchema* pSchema = &pReq->pSchemas[0]; + pSchema->bytes = htonl(12); + pSchema->type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema->name, tagname); + + *pContLen = contLen; + return pReq; +} + +SMAltertbReq* MndTestStb::BuildAlterStbDropTagReq(const char* stbname, const char* tagname, int32_t* pContLen) { + int32_t contLen = sizeof(SMAltertbReq) + sizeof(SSchema); + SMAltertbReq* pReq = (SMAltertbReq*)rpcMallocCont(contLen); + strcpy(pReq->name, stbname); + pReq->numOfSchemas = htonl(1); + pReq->alterType = TSDB_ALTER_TABLE_DROP_TAG; + + SSchema* pSchema = &pReq->pSchemas[0]; + pSchema->bytes = htonl(12); + pSchema->type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema->name, tagname); + + *pContLen = contLen; + return pReq; +} + +SMAltertbReq* MndTestStb::BuildAlterStbUpdateTagNameReq(const char* stbname, const char* tagname, + const char* newtagname, int32_t* pContLen) { + int32_t contLen = sizeof(SMAltertbReq) + 2 * sizeof(SSchema); + SMAltertbReq* pReq = (SMAltertbReq*)rpcMallocCont(contLen); + strcpy(pReq->name, stbname); + pReq->numOfSchemas = htonl(1); + pReq->alterType = TSDB_ALTER_TABLE_UPDATE_TAG_NAME; + + SSchema* pSchema = &pReq->pSchemas[0]; + pSchema->bytes = htonl(12); + pSchema->type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema->name, tagname); + + pSchema = &pReq->pSchemas[1]; + pSchema->bytes = htonl(12); + pSchema->type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema->name, newtagname); + + *pContLen = contLen; + return pReq; +} + +SMAltertbReq* MndTestStb::BuildAlterStbUpdateTagBytesReq(const char* stbname, const char* tagname, int32_t bytes, + int32_t* pContLen) { + int32_t contLen = sizeof(SMAltertbReq) + sizeof(SSchema); + SMAltertbReq* pReq = (SMAltertbReq*)rpcMallocCont(contLen); + strcpy(pReq->name, stbname); + pReq->numOfSchemas = htonl(1); + pReq->alterType = TSDB_ALTER_TABLE_UPDATE_TAG_BYTES; + + SSchema* pSchema = &pReq->pSchemas[0]; + pSchema->bytes = htonl(bytes); + pSchema->type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema->name, tagname); + + *pContLen = contLen; + return pReq; +} + +SMAltertbReq* MndTestStb::BuildAlterStbAddColumnReq(const char* stbname, const char* colname, int32_t* pContLen) { + int32_t contLen = sizeof(SMAltertbReq) + sizeof(SSchema); + SMAltertbReq* pReq = (SMAltertbReq*)rpcMallocCont(contLen); + strcpy(pReq->name, stbname); + pReq->numOfSchemas = htonl(1); + pReq->alterType = TSDB_ALTER_TABLE_ADD_COLUMN; + + SSchema* pSchema = &pReq->pSchemas[0]; + pSchema->bytes = htonl(12); + pSchema->type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema->name, colname); + + *pContLen = contLen; + return pReq; +} + +SMAltertbReq* MndTestStb::BuildAlterStbDropColumnReq(const char* stbname, const char* colname, int32_t* pContLen) { int32_t contLen = sizeof(SMAltertbReq) + sizeof(SSchema); SMAltertbReq* pReq = (SMAltertbReq*)rpcMallocCont(contLen); strcpy(pReq->name, stbname); pReq->numOfSchemas = htonl(1); - pReq->alterType = TSDB_ALTER_TABLE_ADD_TAG_COLUMN; + pReq->alterType = TSDB_ALTER_TABLE_DROP_COLUMN; SSchema* pSchema = &pReq->pSchemas[0]; - pSchema->bytes = htonl(4); - pSchema->type = TSDB_DATA_TYPE_INT; - strcpy(pSchema->name, "tag4"); + pSchema->bytes = htonl(12); + pSchema->type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema->name, colname); *pContLen = contLen; return pReq; } -TEST_F(MndTestStb, 01_Alter_Stb) { - const char *dbname = "1.d2"; - const char *stbname = "1.d2.stb"; +SMAltertbReq* MndTestStb::BuildAlterStbUpdateColumnBytesReq(const char* stbname, const char* colname, int32_t bytes, + int32_t* pContLen) { + int32_t contLen = sizeof(SMAltertbReq) + sizeof(SSchema); + SMAltertbReq* pReq = (SMAltertbReq*)rpcMallocCont(contLen); + strcpy(pReq->name, stbname); + pReq->numOfSchemas = htonl(1); + pReq->alterType = TSDB_ALTER_TABLE_UPDATE_COLUMN_BYTES; + + SSchema* pSchema = &pReq->pSchemas[0]; + pSchema->bytes = htonl(bytes); + pSchema->type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema->name, colname); + + *pContLen = contLen; + return pReq; +} + +TEST_F(MndTestStb, 01_Create_Show_Meta_Drop_Restart_Stb) { + const char* dbname = "1.d1"; + const char* stbname = "1.d1.stb"; { int32_t contLen = 0; @@ -292,8 +268,188 @@ TEST_F(MndTestStb, 01_Alter_Stb) { } { - int32_t contLen = 0; - SMAltertbReq* pReq = BuildAlterStbAddTagReq(stbname, &contLen); + test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); + CHECK_META("show stables", 4); + CHECK_SCHEMA(0, TSDB_DATA_TYPE_BINARY, TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE, "name"); + CHECK_SCHEMA(1, TSDB_DATA_TYPE_TIMESTAMP, 8, "create_time"); + CHECK_SCHEMA(2, TSDB_DATA_TYPE_INT, 4, "columns"); + CHECK_SCHEMA(3, TSDB_DATA_TYPE_INT, 4, "tags"); + + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + CheckBinary("stb", TSDB_TABLE_NAME_LEN); + CheckTimestamp(); + CheckInt32(2); + CheckInt32(3); + } + + // ----- meta ------ + { + int32_t contLen = sizeof(STableInfoReq); + STableInfoReq* pReq = (STableInfoReq*)rpcMallocCont(contLen); + strcpy(pReq->dbFName, dbname); + strcpy(pReq->tbName, "stb"); + + SRpcMsg* pMsg = test.SendReq(TDMT_MND_STB_META, pReq, contLen); + ASSERT_NE(pMsg, nullptr); + ASSERT_EQ(pMsg->code, 0); + + STableMetaRsp* pRsp = (STableMetaRsp*)pMsg->pCont; + pRsp->numOfTags = htonl(pRsp->numOfTags); + pRsp->numOfColumns = htonl(pRsp->numOfColumns); + pRsp->sversion = htonl(pRsp->sversion); + pRsp->tversion = htonl(pRsp->tversion); + pRsp->suid = be64toh(pRsp->suid); + pRsp->tuid = be64toh(pRsp->tuid); + pRsp->vgId = be64toh(pRsp->vgId); + for (int32_t i = 0; i < pRsp->numOfTags + pRsp->numOfColumns; ++i) { + SSchema* pSchema = &pRsp->pSchema[i]; + pSchema->colId = htonl(pSchema->colId); + pSchema->bytes = htonl(pSchema->bytes); + } + + EXPECT_STREQ(pRsp->dbFName, dbname); + EXPECT_STREQ(pRsp->tbName, "stb"); + EXPECT_STREQ(pRsp->stbName, "stb"); + EXPECT_EQ(pRsp->numOfColumns, 2); + EXPECT_EQ(pRsp->numOfTags, 3); + EXPECT_EQ(pRsp->precision, TSDB_TIME_PRECISION_MILLI); + EXPECT_EQ(pRsp->tableType, TSDB_SUPER_TABLE); + EXPECT_EQ(pRsp->update, 0); + EXPECT_EQ(pRsp->sversion, 1); + EXPECT_EQ(pRsp->tversion, 0); + EXPECT_GT(pRsp->suid, 0); + EXPECT_GT(pRsp->tuid, 0); + EXPECT_EQ(pRsp->vgId, 0); + + { + SSchema* pSchema = &pRsp->pSchema[0]; + EXPECT_EQ(pSchema->type, TSDB_DATA_TYPE_TIMESTAMP); + EXPECT_EQ(pSchema->colId, 1); + EXPECT_EQ(pSchema->bytes, 8); + EXPECT_STREQ(pSchema->name, "ts"); + } + + { + SSchema* pSchema = &pRsp->pSchema[1]; + EXPECT_EQ(pSchema->type, TSDB_DATA_TYPE_BINARY); + EXPECT_EQ(pSchema->colId, 2); + EXPECT_EQ(pSchema->bytes, 12); + EXPECT_STREQ(pSchema->name, "col1"); + } + + { + SSchema* pSchema = &pRsp->pSchema[2]; + EXPECT_EQ(pSchema->type, TSDB_DATA_TYPE_TINYINT); + EXPECT_EQ(pSchema->colId, 3); + EXPECT_EQ(pSchema->bytes, 2); + EXPECT_STREQ(pSchema->name, "tag1"); + } + + { + SSchema* pSchema = &pRsp->pSchema[3]; + EXPECT_EQ(pSchema->type, TSDB_DATA_TYPE_BIGINT); + EXPECT_EQ(pSchema->colId, 4); + EXPECT_EQ(pSchema->bytes, 8); + EXPECT_STREQ(pSchema->name, "tag2"); + } + + { + SSchema* pSchema = &pRsp->pSchema[4]; + EXPECT_EQ(pSchema->type, TSDB_DATA_TYPE_BINARY); + EXPECT_EQ(pSchema->colId, 5); + EXPECT_EQ(pSchema->bytes, 16); + EXPECT_STREQ(pSchema->name, "tag3"); + } + } + + // restart + test.Restart(); + + { + test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); + CHECK_META("show stables", 4); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + + CheckBinary("stb", TSDB_TABLE_NAME_LEN); + CheckTimestamp(); + CheckInt32(2); + CheckInt32(3); + } + + { + int32_t contLen = sizeof(SMDropStbReq); + + SMDropStbReq* pReq = (SMDropStbReq*)rpcMallocCont(contLen); + strcpy(pReq->name, stbname); + + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_STB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + { + test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); + CHECK_META("show stables", 4); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 0); + } + + { + int32_t contLen = 0; + SDropDbReq* pReq = BuildDropDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } +} + +TEST_F(MndTestStb, 02_Alter_Stb_AddTag) { + const char* dbname = "1.d2"; + const char* stbname = "1.d2.stb"; + int32_t contLen = 0; + + { + SCreateDbReq* pReq = BuildCreateDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMCreateStbReq* pReq = BuildCreateStbReq(stbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_STB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMAltertbReq* pReq = BuildAlterStbAddTagReq("1.d3.stb", "tag4", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DB); + } + + { + SMAltertbReq* pReq = BuildAlterStbAddTagReq("1.d2.stb3", "tag4", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_STB_NOT_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbAddTagReq(stbname, "tag3", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_TAG_ALREADY_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbAddTagReq(stbname, "col1", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_COLUMN_ALREADY_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbAddTagReq(stbname, "tag4", &contLen); SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); ASSERT_NE(pRsp, nullptr); ASSERT_EQ(pRsp->code, 0); @@ -306,4 +462,377 @@ TEST_F(MndTestStb, 01_Alter_Stb) { CheckInt32(2); CheckInt32(4); } + + { + SDropDbReq* pReq = BuildDropDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } +} + +TEST_F(MndTestStb, 03_Alter_Stb_DropTag) { + const char* dbname = "1.d3"; + const char* stbname = "1.d3.stb"; + int32_t contLen = 0; + + { + SCreateDbReq* pReq = BuildCreateDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMCreateStbReq* pReq = BuildCreateStbReq(stbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMAltertbReq* pReq = BuildAlterStbDropTagReq(stbname, "tag5", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_TAG_NOT_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbDropTagReq(stbname, "tag3", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + + test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + CheckBinary("stb", TSDB_TABLE_NAME_LEN); + CheckTimestamp(); + CheckInt32(2); + CheckInt32(2); + } + + { + SDropDbReq* pReq = BuildDropDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } +} + +TEST_F(MndTestStb, 04_Alter_Stb_AlterTagName) { + const char* dbname = "1.d4"; + const char* stbname = "1.d4.stb"; + int32_t contLen = 0; + + { + SCreateDbReq* pReq = BuildCreateDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMCreateStbReq* pReq = BuildCreateStbReq(stbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateTagNameReq(stbname, "tag5", "tag6", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_TAG_NOT_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateTagNameReq(stbname, "col1", "tag6", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_TAG_NOT_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateTagNameReq(stbname, "tag3", "col1", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_COLUMN_ALREADY_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateTagNameReq(stbname, "tag3", "tag2", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_TAG_ALREADY_EXIST); + } + { + SMAltertbReq* pReq = BuildAlterStbUpdateTagNameReq(stbname, "tag3", "tag2", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_TAG_ALREADY_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateTagNameReq(stbname, "tag3", "tag4", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + + test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + CheckBinary("stb", TSDB_TABLE_NAME_LEN); + CheckTimestamp(); + CheckInt32(2); + CheckInt32(3); + } + + { + SDropDbReq* pReq = BuildDropDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } +} + +TEST_F(MndTestStb, 05_Alter_Stb_AlterTagBytes) { + const char* dbname = "1.d5"; + const char* stbname = "1.d5.stb"; + int32_t contLen = 0; + + { + SCreateDbReq* pReq = BuildCreateDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMCreateStbReq* pReq = BuildCreateStbReq(stbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateTagBytesReq(stbname, "tag5", 12, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_TAG_NOT_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateTagBytesReq(stbname, "tag1", 13, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_STB_OPTION); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateTagBytesReq(stbname, "tag3", 8, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_ROW_BYTES); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateTagBytesReq(stbname, "tag3", 20, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + + test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + CheckBinary("stb", TSDB_TABLE_NAME_LEN); + CheckTimestamp(); + CheckInt32(2); + CheckInt32(3); + } + + { + SDropDbReq* pReq = BuildDropDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } +} + +TEST_F(MndTestStb, 06_Alter_Stb_AddColumn) { + const char* dbname = "1.d6"; + const char* stbname = "1.d6.stb"; + int32_t contLen = 0; + + { + SCreateDbReq* pReq = BuildCreateDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMCreateStbReq* pReq = BuildCreateStbReq(stbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_STB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMAltertbReq* pReq = BuildAlterStbAddColumnReq("1.d7.stb", "tag4", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_DB); + } + + { + SMAltertbReq* pReq = BuildAlterStbAddColumnReq("1.d6.stb3", "tag4", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_STB_NOT_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbAddColumnReq(stbname, "tag3", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_TAG_ALREADY_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbAddColumnReq(stbname, "col1", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_COLUMN_ALREADY_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbAddColumnReq(stbname, "col2", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + + test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + CheckBinary("stb", TSDB_TABLE_NAME_LEN); + CheckTimestamp(); + CheckInt32(3); + CheckInt32(3); + } + + { + SDropDbReq* pReq = BuildDropDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } +} + +TEST_F(MndTestStb, 07_Alter_Stb_DropColumn) { + const char* dbname = "1.d7"; + const char* stbname = "1.d7.stb"; + int32_t contLen = 0; + + { + SCreateDbReq* pReq = BuildCreateDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMCreateStbReq* pReq = BuildCreateStbReq(stbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMAltertbReq* pReq = BuildAlterStbDropColumnReq(stbname, "col4", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_COLUMN_NOT_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbDropColumnReq(stbname, "col1", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_STB_ALTER_OPTION); + } + + { + SMAltertbReq* pReq = BuildAlterStbDropColumnReq(stbname, "ts", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_STB_ALTER_OPTION); + } + + { + SMAltertbReq* pReq = BuildAlterStbAddColumnReq(stbname, "col2", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMAltertbReq* pReq = BuildAlterStbDropColumnReq(stbname, "col1", &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + + test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + CheckBinary("stb", TSDB_TABLE_NAME_LEN); + CheckTimestamp(); + CheckInt32(2); + CheckInt32(3); + } + + { + SDropDbReq* pReq = BuildDropDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } +} + +TEST_F(MndTestStb, 08_Alter_Stb_AlterTagBytes) { + const char* dbname = "1.d8"; + const char* stbname = "1.d8.stb"; + int32_t contLen = 0; + + { + SCreateDbReq* pReq = BuildCreateDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMCreateStbReq* pReq = BuildCreateStbReq(stbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateColumnBytesReq(stbname, "col5", 12, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_COLUMN_NOT_EXIST); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateColumnBytesReq(stbname, "ts", 8, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_STB_OPTION); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateColumnBytesReq(stbname, "col1", 8, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_ROW_BYTES); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateColumnBytesReq(stbname, "col1", TSDB_MAX_BYTES_PER_ROW, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, TSDB_CODE_MND_INVALID_ROW_BYTES); + } + + { + SMAltertbReq* pReq = BuildAlterStbUpdateColumnBytesReq(stbname, "col1", 20, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_ALTER_STB, pReq, contLen); + ASSERT_EQ(pRsp->code, 0); + + test.SendShowMetaReq(TSDB_MGMT_TABLE_STB, dbname); + test.SendShowRetrieveReq(); + EXPECT_EQ(test.GetShowRows(), 1); + CheckBinary("stb", TSDB_TABLE_NAME_LEN); + CheckTimestamp(); + CheckInt32(2); + CheckInt32(3); + } + + { + SDropDbReq* pReq = BuildDropDbReq(dbname, &contLen); + SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_DB, pReq, contLen); + ASSERT_NE(pRsp, nullptr); + ASSERT_EQ(pRsp->code, 0); + } } diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index 0f2d711a79f467fee792e197cb5a0b55a912cdc2..be6c0860402bb7a01f2928aeeafec9efd45f1185 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -1327,7 +1327,7 @@ static int tsdbMergeMemData(SCommitH *pCommith, SCommitIter *pIter, int bidx) { int nBlocks = pCommith->readh.pBlkIdx->numOfBlocks; SBlock * pBlock = pCommith->readh.pBlkInfo->blocks + bidx; TSKEY keyLimit; - int16_t colId = 0; + int16_t colId = PRIMARYKEY_TIMESTAMP_COL_ID; SMergeInfo mInfo; SBlock subBlocks[TSDB_MAX_SUBBLOCKS]; SBlock block, supBlock; diff --git a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c index 3dcbb7888b767988a13bf023daf68b78050f379e..24c71fdc7e12f389811f1ab5612bdfbf090d1474 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReadImpl.c +++ b/source/dnode/vnode/src/tsdb/tsdbReadImpl.c @@ -472,7 +472,7 @@ static int tsdbLoadBlockDataImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDat continue; } - int16_t tcolId = 0; + int16_t tcolId = PRIMARYKEY_TIMESTAMP_COL_ID; uint32_t toffset = TSDB_KEY_COL_OFFSET; int32_t tlen = pBlock->keyLen; diff --git a/source/dnode/vnode/src/vnd/vnodeBufferPool.c b/source/dnode/vnode/src/vnd/vnodeBufferPool.c index 434498eef59bdbaa1fa95ec7d6c63f262c2057bc..f7a72353eb9d162faa5a67e07603142b986d66bf 100644 --- a/source/dnode/vnode/src/vnd/vnodeBufferPool.c +++ b/source/dnode/vnode/src/vnd/vnodeBufferPool.c @@ -185,6 +185,7 @@ static void vBufPoolDestroyMA(SMemAllocatorFactory *pMAF, SMemAllocator *pMA) { free(pMA); if (--pVMA->_ref.val == 0) { TD_DLIST_POP(&(pVnode->pBufPool->incycle), pVMA); + vmaReset(pVMA); TD_DLIST_APPEND(&(pVnode->pBufPool->free), pVMA); } } \ No newline at end of file diff --git a/source/libs/catalog/inc/catalogInt.h b/source/libs/catalog/inc/catalogInt.h index 9c041d76c7049710f55a46240c639e41c600bfea..8f243743872b11ea0f44cab953901b08bc516514 100644 --- a/source/libs/catalog/inc/catalogInt.h +++ b/source/libs/catalog/inc/catalogInt.h @@ -48,18 +48,22 @@ enum { }; typedef struct SCtgDebug { - int32_t lockDebug; + bool lockDebug; + bool cacheDebug; + uint32_t showCachePeriodSec; } SCtgDebug; typedef struct SCtgTbMetaCache { SRWLatch stbLock; - SHashObj *cache; //key:tbname, value:STableMeta + SRWLatch metaLock; // RC between cache destroy and all other operations + SHashObj *metaCache; //key:tbname, value:STableMeta SHashObj *stbCache; //key:suid, value:STableMeta* } SCtgTbMetaCache; typedef struct SCtgDBCache { SRWLatch vgLock; + uint64_t dbId; int8_t deleted; SDBVgroupInfo *vgInfo; SCtgTbMetaCache tbCache; @@ -81,6 +85,7 @@ typedef struct SCtgRentMgmt { typedef struct SCatalog { uint64_t clusterId; + SRWLatch dbLock; SHashObj *dbCache; //key:dbname, value:SCtgDBCache SCtgRentMgmt dbRent; SCtgRentMgmt stbRent; @@ -105,6 +110,8 @@ typedef struct SCatalogStat { } SCatalogStat; typedef struct SCatalogMgmt { + bool exit; + SRWLatch lock; SHashObj *pCluster; //key: clusterId, value: SCatalog* SCatalogStat stat; SCatalogCfg cfg; @@ -132,11 +139,8 @@ typedef uint32_t (*tableNameHashFp)(const char *, uint32_t); #define ctgDebug(param, ...) qDebug("CTG:%p " param, pCatalog, __VA_ARGS__) #define ctgTrace(param, ...) qTrace("CTG:%p " param, pCatalog, __VA_ARGS__) -#define CTG_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0) -#define CTG_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0) -#define CTG_ERR_JRET(c) do { code = c; if (code != TSDB_CODE_SUCCESS) { terrno = code; goto _return; } } while (0) - #define CTG_LOCK_DEBUG(...) do { if (gCTGDebug.lockDebug) { qDebug(__VA_ARGS__); } } while (0) +#define CTG_CACHE_DEBUG(...) do { if (gCTGDebug.cacheDebug) { qDebug(__VA_ARGS__); } } while (0) #define TD_RWLATCH_WRITE_FLAG_COPY 0x40000000 @@ -172,6 +176,15 @@ typedef uint32_t (*tableNameHashFp)(const char *, uint32_t); } \ } while (0) + +#define CTG_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0) +#define CTG_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0) +#define CTG_ERR_JRET(c) do { code = c; if (code != TSDB_CODE_SUCCESS) { terrno = code; goto _return; } } while (0) + +#define CTG_API_ENTER() do { CTG_LOCK(CTG_READ, &ctgMgmt.lock); if (atomic_load_8(&ctgMgmt.exit)) { CTG_RET(TSDB_CODE_CTG_OUT_OF_SERVICE); } } while (0) +#define CTG_API_LEAVE(c) do { int32_t __code = c; CTG_UNLOCK(CTG_READ, &ctgMgmt.lock); CTG_RET(__code); } while (0) + + #ifdef __cplusplus } diff --git a/source/libs/catalog/src/catalog.c b/source/libs/catalog/src/catalog.c index 02773fe533d5c00a85bceefc8ffc315ee642a4f7..6ecff87a895cfc61b4024eb62c412c61c678fe71 100644 --- a/source/libs/catalog/src/catalog.c +++ b/source/libs/catalog/src/catalog.c @@ -22,6 +22,164 @@ SCatalogMgmt ctgMgmt = {0}; SCtgDebug gCTGDebug = {0}; +int32_t ctgDbgGetTbMetaNum(SCtgDBCache *dbCache) { + return dbCache->tbCache.metaCache ? (int32_t)taosHashGetSize(dbCache->tbCache.metaCache) : 0; +} + +int32_t ctgDbgGetStbNum(SCtgDBCache *dbCache) { + return dbCache->tbCache.stbCache ? (int32_t)taosHashGetSize(dbCache->tbCache.stbCache) : 0; +} + +int32_t ctgDbgGetRentNum(SCtgRentMgmt *rent) { + int32_t num = 0; + for (uint16_t i = 0; i < rent->slotNum; ++i) { + SCtgRentSlot *slot = &rent->slots[i]; + if (NULL == slot->meta) { + continue; + } + + num += taosArrayGetSize(slot->meta); + } + + return num; +} + +int32_t ctgDbgGetClusterCacheNum(struct SCatalog* pCatalog, int32_t type) { + if (NULL == pCatalog || NULL == pCatalog->dbCache) { + return 0; + } + + switch (type) { + case CTG_DBG_DB_NUM: + return (int32_t)taosHashGetSize(pCatalog->dbCache); + case CTG_DBG_DB_RENT_NUM: + return ctgDbgGetRentNum(&pCatalog->dbRent); + case CTG_DBG_STB_RENT_NUM: + return ctgDbgGetRentNum(&pCatalog->stbRent); + default: + break; + } + + SCtgDBCache *dbCache = NULL; + int32_t num = 0; + void *pIter = taosHashIterate(pCatalog->dbCache, NULL); + while (pIter) { + dbCache = (SCtgDBCache *)pIter; + switch (type) { + case CTG_DBG_META_NUM: + num += ctgDbgGetTbMetaNum(dbCache); + break; + case CTG_DBG_STB_NUM: + num += ctgDbgGetStbNum(dbCache); + break; + default: + ctgError("invalid type:%d", type); + break; + } + pIter = taosHashIterate(pCatalog->dbCache, pIter); + } + + return num; +} + + +void ctgDbgShowDBCache(SHashObj *dbHash) { + if (NULL == dbHash) { + return; + } + + int32_t i = 0; + SCtgDBCache *dbCache = NULL; + void *pIter = taosHashIterate(dbHash, NULL); + while (pIter) { + char *dbFName = NULL; + size_t len = 0; + + dbCache = (SCtgDBCache *)pIter; + + taosHashGetKey(dbCache, (void **)&dbFName, &len); + + CTG_CACHE_DEBUG("** %dth db [%.*s][%"PRIx64"] **", i, (int32_t)len, dbFName, dbCache->dbId); + + pIter = taosHashIterate(dbHash, pIter); + } +} + + + + +void ctgDbgShowClusterCache(struct SCatalog* pCatalog) { + if (NULL == pCatalog) { + return; + } + + CTG_CACHE_DEBUG("## cluster %"PRIx64" %p cache Info ##", pCatalog->clusterId, pCatalog); + CTG_CACHE_DEBUG("db:%d meta:%d stb:%d dbRent:%d stbRent:%d", ctgDbgGetClusterCacheNum(pCatalog, CTG_DBG_DB_NUM), ctgDbgGetClusterCacheNum(pCatalog, CTG_DBG_META_NUM), + ctgDbgGetClusterCacheNum(pCatalog, CTG_DBG_STB_NUM), ctgDbgGetClusterCacheNum(pCatalog, CTG_DBG_DB_RENT_NUM), ctgDbgGetClusterCacheNum(pCatalog, CTG_DBG_STB_RENT_NUM)); + + ctgDbgShowDBCache(pCatalog->dbCache); +} + +int32_t ctgInitDBCache(struct SCatalog* pCatalog) { + if (NULL == pCatalog->dbCache) { + SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxDBCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + if (NULL == cache) { + ctgError("taosHashInit %d failed", CTG_DEFAULT_CACHE_DB_NUMBER); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + } + + if (NULL != atomic_val_compare_exchange_ptr(&pCatalog->dbCache, NULL, cache)) { + taosHashCleanup(cache); + } + } + + return TSDB_CODE_SUCCESS; +} + + +int32_t ctgInitTbMetaCache(struct SCatalog* pCatalog, SCtgDBCache *dbCache) { + if (NULL == dbCache->tbCache.metaCache) { + if (dbCache->deleted) { + ctgInfo("db is dropping, dbId:%"PRIx64, dbCache->dbId); + CTG_ERR_RET(TSDB_CODE_CTG_DB_DROPPED); + } + + SHashObj *metaCache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + if (NULL == metaCache) { + ctgError("taosHashInit failed, num:%d", ctgMgmt.cfg.maxTblCacheNum); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + } + + if (NULL != atomic_val_compare_exchange_ptr(&dbCache->tbCache.metaCache, NULL, metaCache)) { + taosHashCleanup(metaCache); + } + } + + return TSDB_CODE_SUCCESS; +} + +int32_t ctgInitStbCache(struct SCatalog* pCatalog, SCtgDBCache *dbCache) { + if (NULL == dbCache->tbCache.stbCache) { + if (dbCache->deleted) { + ctgInfo("db is dropping, dbId:%"PRIx64, dbCache->dbId); + CTG_ERR_RET(TSDB_CODE_CTG_DB_DROPPED); + } + + SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_ENTRY_LOCK); + if (NULL == cache) { + ctgError("taosHashInit failed, num:%d", ctgMgmt.cfg.maxTblCacheNum); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + } + + if (NULL != atomic_val_compare_exchange_ptr(&dbCache->tbCache.stbCache, NULL, cache)) { + taosHashCleanup(cache); + } + } + + return TSDB_CODE_SUCCESS; +} + + void ctgFreeMetaRent(SCtgRentMgmt *mgmt) { if (NULL == mgmt->slots) { @@ -40,18 +198,20 @@ void ctgFreeMetaRent(SCtgRentMgmt *mgmt) { } -void ctgFreeTableMetaCache(SCtgTbMetaCache *table) { - CTG_LOCK(CTG_WRITE, &table->stbLock); - if (table->stbCache) { - taosHashCleanup(table->stbCache); - table->stbCache = NULL; +void ctgFreeTableMetaCache(SCtgTbMetaCache *cache) { + CTG_LOCK(CTG_WRITE, &cache->stbLock); + if (cache->stbCache) { + taosHashCleanup(cache->stbCache); + cache->stbCache = NULL; } - CTG_UNLOCK(CTG_WRITE, &table->stbLock); + CTG_UNLOCK(CTG_WRITE, &cache->stbLock); - if (table->cache) { - taosHashCleanup(table->cache); - table->cache = NULL; + CTG_LOCK(CTG_WRITE, &cache->metaLock); + if (cache->metaCache) { + taosHashCleanup(cache->metaCache); + cache->metaCache = NULL; } + CTG_UNLOCK(CTG_WRITE, &cache->metaLock); } void ctgFreeDbCache(SCtgDBCache *dbCache) { @@ -61,9 +221,8 @@ void ctgFreeDbCache(SCtgDBCache *dbCache) { atomic_store_8(&dbCache->deleted, 1); - SDBVgroupInfo *dbInfo = NULL; + CTG_LOCK(CTG_WRITE, &dbCache->vgLock); if (dbCache->vgInfo) { - CTG_LOCK(CTG_WRITE, &dbCache->vgLock); if (dbCache->vgInfo->vgHash) { taosHashCleanup(dbCache->vgInfo->vgHash); @@ -71,8 +230,8 @@ void ctgFreeDbCache(SCtgDBCache *dbCache) { } tfree(dbCache->vgInfo); - CTG_UNLOCK(CTG_WRITE, &dbCache->vgLock); } + CTG_UNLOCK(CTG_WRITE, &dbCache->vgLock); ctgFreeTableMetaCache(&dbCache->tbCache); } @@ -97,22 +256,21 @@ void ctgFreeHandle(struct SCatalog* pCatalog) { free(pCatalog); } - -int32_t ctgGetDBVgroupFromCache(struct SCatalog* pCatalog, const char *dbName, SCtgDBCache **dbCache, bool *inCache) { +int32_t ctgGetDBVgroupFromCache(struct SCatalog* pCatalog, const char *dbFName, SCtgDBCache **dbCache, bool *inCache) { if (NULL == pCatalog->dbCache) { *inCache = false; - ctgWarn("empty db cache, dbName:%s", dbName); + ctgWarn("empty db cache, dbFName:%s", dbFName); return TSDB_CODE_SUCCESS; } SCtgDBCache *cache = NULL; while (true) { - cache = taosHashAcquire(pCatalog->dbCache, dbName, strlen(dbName)); + cache = taosHashAcquire(pCatalog->dbCache, dbFName, strlen(dbFName)); if (NULL == cache) { *inCache = false; - ctgWarn("not in db vgroup cache, dbName:%s", dbName); + ctgWarn("not in db vgroup cache, dbFName:%s", dbFName); return TSDB_CODE_SUCCESS; } @@ -120,7 +278,7 @@ int32_t ctgGetDBVgroupFromCache(struct SCatalog* pCatalog, const char *dbName, S if (NULL == cache->vgInfo) { CTG_UNLOCK(CTG_READ, &cache->vgLock); taosHashRelease(pCatalog->dbCache, cache); - ctgWarn("db cache vgInfo is NULL, dbName:%s", dbName); + ctgWarn("db cache vgInfo is NULL, dbFName:%s", dbFName); continue; } @@ -131,7 +289,7 @@ int32_t ctgGetDBVgroupFromCache(struct SCatalog* pCatalog, const char *dbName, S *dbCache = cache; *inCache = true; - ctgDebug("Got db vgroup from cache, dbName:%s", dbName); + ctgDebug("Got db vgroup from cache, dbFName:%s", dbFName); return TSDB_CODE_SUCCESS; } @@ -189,7 +347,10 @@ int32_t ctgIsTableMetaExistInCache(struct SCatalog* pCatalog, char *dbFName, cha size_t sz = 0; - STableMeta *tbMeta = taosHashGet(dbCache->tbCache.cache, tbName, strlen(tbName)); + CTG_LOCK(CTG_READ, &dbCache->tbCache.metaLock); + STableMeta *tbMeta = taosHashGet(dbCache->tbCache.metaCache, tbName, strlen(tbName)); + CTG_UNLOCK(CTG_READ, &dbCache->tbCache.metaLock); + if (NULL == tbMeta) { taosHashRelease(pCatalog->dbCache, dbCache); @@ -227,15 +388,18 @@ int32_t ctgGetTableMetaFromCache(struct SCatalog* pCatalog, const SName* pTableN return TSDB_CODE_SUCCESS; } - if (NULL == dbCache->tbCache.cache) { + if (NULL == dbCache->tbCache.metaCache) { *exist = 0; taosHashRelease(pCatalog->dbCache, dbCache); ctgWarn("empty tbmeta cache, dbFName:%s, tbName:%s", db, pTableName->tname); return TSDB_CODE_SUCCESS; } - size_t sz = 0; - STableMeta *tbMeta = taosHashGetCloneExt(dbCache->tbCache.cache, pTableName->tname, strlen(pTableName->tname), NULL, (void **)pTableMeta, &sz); + size_t sz = 0; + CTG_LOCK(CTG_READ, &dbCache->tbCache.metaLock); + STableMeta *tbMeta = taosHashGetCloneExt(dbCache->tbCache.metaCache, pTableName->tname, strlen(pTableName->tname), NULL, (void **)pTableMeta, &sz); + CTG_UNLOCK(CTG_READ, &dbCache->tbCache.metaLock); + if (NULL == *pTableMeta) { *exist = 0; taosHashRelease(pCatalog->dbCache, dbCache); @@ -308,7 +472,10 @@ int32_t ctgGetTableTypeFromCache(struct SCatalog* pCatalog, const SName* pTableN return TSDB_CODE_SUCCESS; } - STableMeta *pTableMeta = (STableMeta *)taosHashAcquire(dbCache->tbCache.cache, pTableName->tname, strlen(pTableName->tname)); + CTG_LOCK(CTG_READ, &dbCache->tbCache.metaLock); + STableMeta *pTableMeta = (STableMeta *)taosHashAcquire(dbCache->tbCache.metaCache, pTableName->tname, strlen(pTableName->tname)); + CTG_UNLOCK(CTG_READ, &dbCache->tbCache.metaLock); + if (NULL == pTableMeta) { ctgWarn("tbmeta not in cache, dbFName:%s, tbName:%s", dbName, pTableName->tname); taosHashRelease(pCatalog->dbCache, dbCache); @@ -318,7 +485,7 @@ int32_t ctgGetTableTypeFromCache(struct SCatalog* pCatalog, const SName* pTableN *tbType = atomic_load_8(&pTableMeta->tableType); - taosHashRelease(dbCache->tbCache.cache, dbCache); + taosHashRelease(dbCache->tbCache.metaCache, dbCache); taosHashRelease(pCatalog->dbCache, dbCache); ctgDebug("Got tbtype from cache, dbFName:%s, tbName:%s, type:%d", dbName, pTableName->tname, *tbType); @@ -526,6 +693,7 @@ int32_t ctgGetVgInfoFromHashValue(struct SCatalog *pCatalog, SDBVgroupInfo *dbIn CTG_RET(code); } +#if 1 int32_t ctgSTableVersionCompare(const void* key1, const void* key2) { if (*(uint64_t *)key1 < ((SSTableMetaVersion*)key2)->suid) { return -1; @@ -545,7 +713,29 @@ int32_t ctgDbVgVersionCompare(const void* key1, const void* key2) { return 0; } } +#else +int32_t ctgSTableVersionCompare(const void* key1, const void* key2) { + if (((SSTableMetaVersion*)key1)->suid < ((SSTableMetaVersion*)key2)->suid) { + return -1; + } else if (((SSTableMetaVersion*)key1)->suid > ((SSTableMetaVersion*)key2)->suid) { + return 1; + } else { + return 0; + } +} + +int32_t ctgDbVgVersionCompare(const void* key1, const void* key2) { + if (((SDbVgVersion*)key1)->dbId < ((SDbVgVersion*)key2)->dbId) { + return -1; + } else if (((SDbVgVersion*)key1)->dbId > ((SDbVgVersion*)key2)->dbId) { + return 1; + } else { + return 0; + } +} + +#endif int32_t ctgMetaRentInit(SCtgRentMgmt *mgmt, uint32_t rentSec, int8_t type) { mgmt->slotRIdx = 0; @@ -609,14 +799,15 @@ int32_t ctgMetaRentUpdate(SCtgRentMgmt *mgmt, void *meta, int64_t id, int32_t si } if (slot->needSort) { + qDebug("meta slot before sorte, slot idx:%d, type:%d, size:%d", widx, mgmt->type, (int32_t)taosArrayGetSize(slot->meta)); taosArraySort(slot->meta, compare); slot->needSort = false; - qDebug("meta slot sorted, slot idx:%d, type:%d", widx, mgmt->type); + qDebug("meta slot sorted, slot idx:%d, type:%d, size:%d", widx, mgmt->type, (int32_t)taosArrayGetSize(slot->meta)); } void *orig = taosArraySearch(slot->meta, &id, compare, TD_EQ); if (NULL == orig) { - qError("meta not found in slot, id:%"PRIx64", slot idx:%d, type:%d", id, widx, mgmt->type); + qError("meta not found in slot, id:%"PRIx64", slot idx:%d, type:%d, size:%d", id, widx, mgmt->type, (int32_t)taosArrayGetSize(slot->meta)); CTG_ERR_JRET(TSDB_CODE_CTG_INTERNAL_ERROR); } @@ -741,74 +932,241 @@ int32_t ctgMetaRentGet(SCtgRentMgmt *mgmt, void **res, uint32_t *num, int32_t si return TSDB_CODE_SUCCESS; } - - -int32_t ctgUpdateTableMetaCache(struct SCatalog *pCatalog, STableMetaOutput *output) { +int32_t ctgAddDBCache(struct SCatalog *pCatalog, const char *dbFName, SCtgDBCache *dbCache) { int32_t code = 0; - SCtgDBCache *dbCache = NULL; - if ((!CTG_IS_META_CTABLE(output->metaType)) && NULL == output->tbMeta) { - ctgError("no valid tbmeta got from meta rsp, dbFName:%s, tbName:%s", output->dbFName, output->tbName); - CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); + code = taosHashPut(pCatalog->dbCache, dbFName, strlen(dbFName), dbCache, sizeof(SCtgDBCache)); + if (code) { + if (HASH_NODE_EXIST(code)) { + ctgDebug("db already in cache, dbFName:%s", dbFName); + return TSDB_CODE_SUCCESS; + } + + ctgError("taosHashPut db to cache failed, dbFName:%s", dbFName); + CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } + + SDbVgVersion vgVersion = {.dbId = dbCache->dbId, .vgVersion = dbCache->vgInfo ? dbCache->vgInfo->vgVersion : -1}; + strncpy(vgVersion.dbFName, dbFName, sizeof(vgVersion.dbFName)); - if (NULL == pCatalog->dbCache) { - SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxDBCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); - if (NULL == cache) { - ctgError("taosHashInit %d failed", CTG_DEFAULT_CACHE_DB_NUMBER); - CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + ctgDebug("dbCache added, dbFName:%s, vgVersion:%d, dbId:%"PRIx64, dbFName, vgVersion.vgVersion, dbCache->dbId); + + CTG_ERR_JRET(ctgMetaRentAdd(&pCatalog->dbRent, &vgVersion, vgVersion.dbId, sizeof(SDbVgVersion))); + + return TSDB_CODE_SUCCESS; + +_return: + + ctgFreeDbCache(dbCache); + + CTG_RET(code); +} + + +void ctgRemoveAndFreeTableMeta(struct SCatalog* pCatalog, SCtgTbMetaCache *cache) { + CTG_LOCK(CTG_WRITE, &cache->stbLock); + if (cache->stbCache) { + void *pIter = taosHashIterate(cache->stbCache, NULL); + while (pIter) { + uint64_t *suid = NULL; + taosHashGetKey(pIter, (void **)&suid, NULL); + + if (TSDB_CODE_SUCCESS == ctgMetaRentRemove(&pCatalog->stbRent, *suid, ctgSTableVersionCompare)) { + ctgDebug("stb removed from rent, suid:%"PRIx64, *suid); + } + + pIter = taosHashIterate(cache->stbCache, pIter); } + } + CTG_UNLOCK(CTG_WRITE, &cache->stbLock); - if (NULL != atomic_val_compare_exchange_ptr(&pCatalog->dbCache, NULL, cache)) { - taosHashCleanup(cache); + ctgFreeTableMetaCache(cache); +} + + +int32_t ctgValidateAndRemoveDb(struct SCatalog* pCatalog, SCtgDBCache *dbCache, const char* dbFName) { + if (taosHashRemove(pCatalog->dbCache, dbFName, strlen(dbFName))) { + ctgInfo("taosHashRemove from dbCache failed, may be removed, dbFName:%s", dbFName); + CTG_ERR_RET(TSDB_CODE_CTG_DB_DROPPED); + } + + atomic_store_8(&dbCache->deleted, 1); + + CTG_LOCK(CTG_WRITE, &dbCache->vgLock); + if (dbCache->vgInfo) { + ctgInfo("cleanup db vgInfo, dbFName:%s, dbId:%"PRIx64, dbFName, dbCache->dbId); + + if (dbCache->vgInfo->vgHash) { + taosHashCleanup(dbCache->vgInfo->vgHash); } + + tfree(dbCache->vgInfo); } + CTG_UNLOCK(CTG_WRITE, &dbCache->vgLock); + + ctgRemoveAndFreeTableMeta(pCatalog, &dbCache->tbCache); + + ctgInfo("db removed from cache, dbFName:%s, uid:%"PRIx64, dbFName, dbCache->dbId); + CTG_ERR_RET(ctgMetaRentRemove(&pCatalog->dbRent, dbCache->dbId, ctgDbVgVersionCompare)); + + ctgDebug("db removed from rent, dbFName:%s, uid:%"PRIx64, dbFName, dbCache->dbId); + + return TSDB_CODE_SUCCESS; +} + + +int32_t ctgAcquireDBCache(struct SCatalog* pCatalog, const char *dbFName, uint64_t dbId, SCtgDBCache **pCache) { + int32_t code = 0; + SCtgDBCache *dbCache = NULL; + + CTG_LOCK(CTG_WRITE, &pCatalog->dbLock); + while (true) { - dbCache = (SCtgDBCache *)taosHashAcquire(pCatalog->dbCache, output->dbFName, strlen(output->dbFName)); + dbCache = (SCtgDBCache *)taosHashAcquire(pCatalog->dbCache, dbFName, strlen(dbFName)); if (dbCache) { - break; + // TODO OPEN IT +#if 0 + if (dbCache->dbId == dbId) { + *pCache = dbCache; + return TSDB_CODE_SUCCESS; + } +#else + if (0 == dbId) { + *pCache = dbCache; + return TSDB_CODE_SUCCESS; + } + + if (dbId && (dbCache->dbId == 0)) { + dbCache->dbId = dbId; + *pCache = dbCache; + return TSDB_CODE_SUCCESS; + } + + if (dbCache->dbId == dbId) { + *pCache = dbCache; + return TSDB_CODE_SUCCESS; + } +#endif + code = ctgValidateAndRemoveDb(pCatalog, dbCache, dbFName); + taosHashRelease(pCatalog->dbCache, dbCache); + dbCache = NULL; + if (code) { + if (TSDB_CODE_CTG_DB_DROPPED == code) { + continue; + } + + CTG_ERR_JRET(code); + } } + + SCtgDBCache newDBCache = {0}; + newDBCache.dbId = dbId; - SCtgDBCache newDbCache = {0}; + CTG_ERR_JRET(ctgAddDBCache(pCatalog, dbFName, &newDBCache)); + } - if (taosHashPut(pCatalog->dbCache, output->dbFName, strlen(output->dbFName), &newDbCache, sizeof(newDbCache))) { - ctgError("taosHashPut db to cache failed, db:%s", output->dbFName); - CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); - } +_return: + + if (dbCache) { + taosHashRelease(pCatalog->dbCache, dbCache); } - if (NULL == dbCache->tbCache.cache) { - SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); - if (NULL == cache) { - ctgError("taosHashInit failed, num:%d", ctgMgmt.cfg.maxTblCacheNum); - CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); - } + CTG_UNLOCK(CTG_WRITE, &pCatalog->dbLock); + + CTG_RET(code); +} - if (NULL != atomic_val_compare_exchange_ptr(&dbCache->tbCache.cache, NULL, cache)) { - taosHashCleanup(cache); - } + + +int32_t ctgUpdateTbMetaImpl(struct SCatalog *pCatalog, SCtgTbMetaCache *tbCache, char *dbFName, char *tbName, STableMeta *meta, int32_t metaSize) { + CTG_LOCK(CTG_READ, &tbCache->metaLock); + if (taosHashPut(tbCache->metaCache, tbName, strlen(tbName), meta, metaSize) != 0) { + CTG_UNLOCK(CTG_READ, &tbCache->metaLock); + ctgError("taosHashPut tbmeta to cache failed, dbFName:%s, tbName:%s, tbType:%d", dbFName, tbName, meta->tableType); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); } + CTG_UNLOCK(CTG_READ, &tbCache->metaLock); + + ctgDebug("tbmeta updated to cache, dbFName:%s, tbName:%s, tbType:%d", dbFName, tbName, meta->tableType); - if (NULL == dbCache->tbCache.stbCache) { - SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_ENTRY_LOCK); - if (NULL == cache) { - ctgError("taosHashInit failed, num:%d", ctgMgmt.cfg.maxTblCacheNum); - CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); - } + return TSDB_CODE_SUCCESS; +} - if (NULL != atomic_val_compare_exchange_ptr(&dbCache->tbCache.stbCache, NULL, cache)) { - taosHashCleanup(cache); +int32_t ctgUpdateStbMetaImpl(struct SCatalog *pCatalog, SCtgTbMetaCache *tbCache, char *dbFName, char *tbName, STableMeta *meta, int32_t metaSize) { + bool newAdded = false; + int32_t code = 0; + SSTableMetaVersion metaRent = {.suid = meta->suid, .sversion = meta->sversion, .tversion = meta->tversion}; + strcpy(metaRent.dbFName, dbFName); + strcpy(metaRent.stbName, tbName); + + CTG_LOCK(CTG_WRITE, &tbCache->stbLock); + + CTG_LOCK(CTG_READ, &tbCache->metaLock); + STableMeta *orig = taosHashAcquire(tbCache->metaCache, tbName, strlen(tbName)); + if (orig) { + if (orig->suid != meta->suid) { + if (taosHashRemove(tbCache->stbCache, &orig->suid, sizeof(orig->suid))) { + ctgError("stb not exist in stbCache, db:%s, stb:%s, suid:%"PRIx64, dbFName, tbName, orig->suid); + } + + ctgMetaRentRemove(&pCatalog->stbRent, orig->suid, ctgSTableVersionCompare); } + + taosHashRelease(tbCache->metaCache, orig); } + CTG_UNLOCK(CTG_READ, &tbCache->metaLock); - if (CTG_IS_META_CTABLE(output->metaType) || CTG_IS_META_BOTH(output->metaType)) { - if (taosHashPut(dbCache->tbCache.cache, output->ctbName, strlen(output->ctbName), &output->ctbMeta, sizeof(output->ctbMeta)) != 0) { - ctgError("taosHashPut ctbmeta to cache failed, ctbName:%s", output->ctbName); - CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); - } + CTG_ERR_JRET(ctgUpdateTbMetaImpl(pCatalog, tbCache, dbFName, tbName, meta, metaSize)); + + CTG_LOCK(CTG_READ, &tbCache->metaLock); + STableMeta *tbMeta = taosHashGet(tbCache->metaCache, tbName, strlen(tbName)); + if (taosHashPutExt(tbCache->stbCache, &meta->suid, sizeof(meta->suid), &tbMeta, POINTER_BYTES, &newAdded) != 0) { + CTG_UNLOCK(CTG_READ, &tbCache->metaLock); + CTG_UNLOCK(CTG_WRITE, &tbCache->stbLock); + ctgError("taosHashPutExt stable to stable cache failed, suid:%"PRIx64, meta->suid); + CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + } + CTG_UNLOCK(CTG_READ, &tbCache->metaLock); + + CTG_UNLOCK(CTG_WRITE, &tbCache->stbLock); + + ctgDebug("update stable to cache, suid:%"PRIx64, meta->suid); + + if (newAdded) { + CTG_ERR_RET(ctgMetaRentAdd(&pCatalog->stbRent, &metaRent, metaRent.suid, sizeof(SSTableMetaVersion))); + } else { + CTG_ERR_RET(ctgMetaRentUpdate(&pCatalog->stbRent, &metaRent, metaRent.suid, sizeof(SSTableMetaVersion), ctgSTableVersionCompare)); + } + + return TSDB_CODE_SUCCESS; + +_return: + + CTG_UNLOCK(CTG_WRITE, &tbCache->stbLock); + + CTG_RET(code); +} + + +int32_t ctgUpdateTableMetaCache(struct SCatalog *pCatalog, STableMetaOutput *output) { + int32_t code = 0; + SCtgDBCache *dbCache = NULL; - ctgDebug("ctbmeta updated to cache, ctbName:%s", output->ctbName); + if ((!CTG_IS_META_CTABLE(output->metaType)) && NULL == output->tbMeta) { + ctgError("no valid tbmeta got from meta rsp, dbFName:%s, tbName:%s", output->dbFName, output->tbName); + CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); + } + + CTG_ERR_RET(ctgInitDBCache(pCatalog)); + + CTG_ERR_JRET(ctgAcquireDBCache(pCatalog, output->dbFName, output->dbId, &dbCache)); + + CTG_ERR_JRET(ctgInitTbMetaCache(pCatalog, dbCache)); + CTG_ERR_JRET(ctgInitStbCache(pCatalog, dbCache)); + + if (CTG_IS_META_CTABLE(output->metaType) || CTG_IS_META_BOTH(output->metaType)) { + CTG_ERR_JRET(ctgUpdateTbMetaImpl(pCatalog, &dbCache->tbCache, output->dbFName, output->ctbName, (STableMeta *)&output->ctbMeta, sizeof(output->ctbMeta))); } if (CTG_IS_META_CTABLE(output->metaType)) { @@ -823,75 +1181,45 @@ int32_t ctgUpdateTableMetaCache(struct SCatalog *pCatalog, STableMetaOutput *out int32_t tbSize = sizeof(*output->tbMeta) + sizeof(SSchema) * (output->tbMeta->tableInfo.numOfColumns + output->tbMeta->tableInfo.numOfTags); if (TSDB_SUPER_TABLE == output->tbMeta->tableType) { - bool newAdded = false; - SSTableMetaVersion metaRent = {.suid = output->tbMeta->suid, .sversion = output->tbMeta->sversion, .tversion = output->tbMeta->tversion}; - strcpy(metaRent.dbFName, output->dbFName); - strcpy(metaRent.stbName, output->tbName); - - CTG_LOCK(CTG_WRITE, &dbCache->tbCache.stbLock); - if (taosHashPut(dbCache->tbCache.cache, output->tbName, strlen(output->tbName), output->tbMeta, tbSize) != 0) { - CTG_UNLOCK(CTG_WRITE, &dbCache->tbCache.stbLock); - ctgError("taosHashPut tablemeta to cache failed, dbFName:%s, tbName:%s", output->dbFName, output->tbName); - CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); - } - - STableMeta *tbMeta = taosHashGet(dbCache->tbCache.cache, output->tbName, strlen(output->tbName)); - if (taosHashPutExt(dbCache->tbCache.stbCache, &output->tbMeta->suid, sizeof(output->tbMeta->suid), &tbMeta, POINTER_BYTES, &newAdded) != 0) { - CTG_UNLOCK(CTG_WRITE, &dbCache->tbCache.stbLock); - ctgError("taosHashPutExt stable to stable cache failed, suid:%"PRIx64, output->tbMeta->suid); - CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); - } - CTG_UNLOCK(CTG_WRITE, &dbCache->tbCache.stbLock); - - ctgDebug("update stable to cache, suid:%"PRIx64, output->tbMeta->suid); - - if (newAdded) { - CTG_ERR_JRET(ctgMetaRentAdd(&pCatalog->stbRent, &metaRent, metaRent.suid, sizeof(SSTableMetaVersion))); - } else { - CTG_ERR_JRET(ctgMetaRentUpdate(&pCatalog->stbRent, &metaRent, metaRent.suid, sizeof(SSTableMetaVersion), ctgSTableVersionCompare)); - } + CTG_ERR_JRET(ctgUpdateStbMetaImpl(pCatalog, &dbCache->tbCache, output->dbFName, output->tbName, output->tbMeta, tbSize)); } else { - if (taosHashPut(dbCache->tbCache.cache, output->tbName, strlen(output->tbName), output->tbMeta, tbSize) != 0) { - ctgError("taosHashPut tablemeta to cache failed, dbFName:%s, tbName:%s", output->dbFName, output->tbName); - CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); - } + CTG_ERR_JRET(ctgUpdateTbMetaImpl(pCatalog, &dbCache->tbCache, output->dbFName, output->tbName, output->tbMeta, tbSize)); } - ctgDebug("update tablemeta to cache, dbFName:%s, tbName:%s", output->dbFName, output->tbName); - _return: if (dbCache) { - taosHashRelease(pCatalog->dbCache, dbCache); + taosHashRelease(pCatalog->dbCache, dbCache); + CTG_UNLOCK(CTG_WRITE, &pCatalog->dbLock); } CTG_RET(code); } -int32_t ctgGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const char* dbName, bool forceUpdate, SCtgDBCache** dbCache) { +int32_t ctgGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const char* dbFName, bool forceUpdate, SCtgDBCache** dbCache) { bool inCache = false; if (!forceUpdate) { - CTG_ERR_RET(ctgGetDBVgroupFromCache(pCatalog, dbName, dbCache, &inCache)); + CTG_ERR_RET(ctgGetDBVgroupFromCache(pCatalog, dbFName, dbCache, &inCache)); if (inCache) { return TSDB_CODE_SUCCESS; } - ctgDebug("failed to get DB vgroupInfo from cache, dbName:%s, load it from mnode, update:%d", dbName, forceUpdate); + ctgDebug("failed to get DB vgroupInfo from cache, dbName:%s, load it from mnode, update:%d", dbFName, forceUpdate); } SUseDbOutput DbOut = {0}; SBuildUseDBInput input = {0}; - tstrncpy(input.db, dbName, tListLen(input.db)); + tstrncpy(input.db, dbFName, tListLen(input.db)); input.vgVersion = CTG_DEFAULT_INVALID_VERSION; while (true) { CTG_ERR_RET(ctgGetDBVgroupFromMnode(pCatalog, pRpc, pMgmtEps, &input, &DbOut)); - CTG_ERR_RET(catalogUpdateDBVgroup(pCatalog, dbName, DbOut.dbVgroup)); - CTG_ERR_RET(ctgGetDBVgroupFromCache(pCatalog, dbName, dbCache, &inCache)); + CTG_ERR_RET(catalogUpdateDBVgroup(pCatalog, dbFName, DbOut.dbId, DbOut.dbVgroup)); + CTG_ERR_RET(ctgGetDBVgroupFromCache(pCatalog, dbFName, dbCache, &inCache)); if (!inCache) { - ctgWarn("can't get db vgroup from cache, will retry, db:%s", dbName); + ctgWarn("can't get db vgroup from cache, will retry, db:%s", dbFName); continue; } @@ -901,58 +1229,6 @@ int32_t ctgGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgm return TSDB_CODE_SUCCESS; } - -int32_t ctgValidateAndRemoveDb(struct SCatalog* pCatalog, const char* dbName, uint64_t dbId, bool *removed) { - *removed = false; - - SCtgDBCache *dbCache = (SCtgDBCache *)taosHashAcquire(pCatalog->dbCache, dbName, strlen(dbName)); - if (NULL == dbCache) { - ctgInfo("db not exist in dbCache, may be removed, db:%s", dbName); - return TSDB_CODE_SUCCESS; - } - - CTG_LOCK(CTG_WRITE, &dbCache->vgLock); - - if (NULL == dbCache->vgInfo) { - ctgInfo("db vgInfo not in dbCache, may be removed, db:%s, dbId:%"PRIx64, dbName, dbId); - CTG_UNLOCK(CTG_WRITE, &dbCache->vgLock); - taosHashRelease(pCatalog->dbCache, dbCache); - return TSDB_CODE_SUCCESS; - } - - if (dbCache->vgInfo->dbId != dbId) { - ctgInfo("db id already updated, db:%s, dbId:%"PRIx64 ", targetId:%"PRIx64, dbName, dbCache->vgInfo->dbId, dbId); - CTG_UNLOCK(CTG_WRITE, &dbCache->vgLock); - taosHashRelease(pCatalog->dbCache, dbCache); - return TSDB_CODE_SUCCESS; - } - - if (dbCache->vgInfo->vgHash) { - ctgInfo("cleanup db vgInfo, db:%s, dbId:%"PRIx64, dbName, dbId); - taosHashCleanup(dbCache->vgInfo->vgHash); - tfree(dbCache->vgInfo); - } - - if (taosHashRemove(pCatalog->dbCache, dbName, strlen(dbName))) { - ctgError("taosHashRemove from dbCache failed, db:%s", dbName); - CTG_UNLOCK(CTG_WRITE, &dbCache->vgLock); - taosHashRelease(pCatalog->dbCache, dbCache); - CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); - } - - dbCache->deleted = true; - - CTG_UNLOCK(CTG_WRITE, &dbCache->vgLock); - - ctgFreeTableMetaCache(&dbCache->tbCache); - - taosHashRelease(pCatalog->dbCache, dbCache); - - *removed = true; - - return TSDB_CODE_SUCCESS; -} - int32_t ctgValidateAndRemoveStbMeta(struct SCatalog* pCatalog, const char* dbName, const char* stbName, uint64_t suid, bool *removed) { *removed = false; @@ -970,12 +1246,16 @@ int32_t ctgValidateAndRemoveStbMeta(struct SCatalog* pCatalog, const char* dbNam return TSDB_CODE_SUCCESS; } - if (taosHashRemove(dbCache->tbCache.cache, stbName, strlen(stbName))) { + CTG_LOCK(CTG_READ, &dbCache->tbCache.metaLock); + if (taosHashRemove(dbCache->tbCache.metaCache, stbName, strlen(stbName))) { + CTG_UNLOCK(CTG_READ, &dbCache->tbCache.metaLock); CTG_UNLOCK(CTG_WRITE, &dbCache->tbCache.stbLock); taosHashRelease(pCatalog->dbCache, dbCache); ctgError("stb not exist in cache, db:%s, stb:%s, suid:%"PRIx64, dbName, stbName, suid); CTG_ERR_RET(TSDB_CODE_CTG_INTERNAL_ERROR); - } + } + CTG_UNLOCK(CTG_READ, &dbCache->tbCache.metaLock); + CTG_UNLOCK(CTG_WRITE, &dbCache->tbCache.stbLock); taosHashRelease(pCatalog->dbCache, dbCache); @@ -1104,6 +1384,8 @@ int32_t catalogInit(SCatalogCfg *cfg) { CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } + atomic_store_8(&ctgMgmt.exit, false); + if (cfg) { memcpy(&ctgMgmt.cfg, cfg, sizeof(*cfg)); @@ -1222,17 +1504,19 @@ int32_t catalogGetDBVgroupVersion(struct SCatalog* pCatalog, const char* dbName, CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } + CTG_API_ENTER(); + if (NULL == pCatalog->dbCache) { *version = CTG_DEFAULT_INVALID_VERSION; ctgInfo("empty db cache, dbName:%s", dbName); - return TSDB_CODE_SUCCESS; + CTG_API_LEAVE(TSDB_CODE_SUCCESS); } SCtgDBCache *db = taosHashAcquire(pCatalog->dbCache, dbName, strlen(dbName)); if (NULL == db) { *version = CTG_DEFAULT_INVALID_VERSION; ctgInfo("db not in cache, dbName:%s", dbName); - return TSDB_CODE_SUCCESS; + CTG_API_LEAVE(TSDB_CODE_SUCCESS); } CTG_LOCK(CTG_READ, &db->vgLock); @@ -1242,7 +1526,7 @@ int32_t catalogGetDBVgroupVersion(struct SCatalog* pCatalog, const char* dbName, *version = CTG_DEFAULT_INVALID_VERSION; ctgInfo("db not in cache, dbName:%s", dbName); - return TSDB_CODE_SUCCESS; + CTG_API_LEAVE(TSDB_CODE_SUCCESS); } *version = db->vgInfo->vgVersion; @@ -1252,20 +1536,22 @@ int32_t catalogGetDBVgroupVersion(struct SCatalog* pCatalog, const char* dbName, ctgDebug("Got db vgVersion from cache, dbName:%s, vgVersion:%d", dbName, *version); - return TSDB_CODE_SUCCESS; + CTG_API_LEAVE(TSDB_CODE_SUCCESS); } -int32_t catalogGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const char* dbName, bool forceUpdate, SArray** vgroupList) { - if (NULL == pCatalog || NULL == dbName || NULL == pRpc || NULL == pMgmtEps || NULL == vgroupList) { +int32_t catalogGetDBVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const char* dbFName, bool forceUpdate, SArray** vgroupList) { + if (NULL == pCatalog || NULL == dbFName || NULL == pRpc || NULL == pMgmtEps || NULL == vgroupList) { CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } + CTG_API_ENTER(); + SCtgDBCache* dbCache = NULL; SVgroupInfo *vgInfo = NULL; int32_t code = 0; SArray *vgList = NULL; - CTG_ERR_JRET(ctgGetDBVgroup(pCatalog, pRpc, pMgmtEps, dbName, forceUpdate, &dbCache)); + CTG_ERR_JRET(ctgGetDBVgroup(pCatalog, pRpc, pMgmtEps, dbFName, forceUpdate, &dbCache)); int32_t vgNum = (int32_t)taosHashGetSize(dbCache->vgInfo->vgHash); vgList = taosArrayInit(vgNum, sizeof(SVgroupInfo)); @@ -1303,130 +1589,119 @@ _return: vgList = NULL; } - CTG_RET(code); + CTG_API_LEAVE(code); } -int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbName, SDBVgroupInfo* dbInfo) { +int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbFName, uint64_t dbId, SDBVgroupInfo* dbInfo) { int32_t code = 0; + + CTG_API_ENTER(); - if (NULL == pCatalog || NULL == dbName || NULL == dbInfo) { + if (NULL == pCatalog || NULL == dbFName || NULL == dbInfo) { CTG_ERR_JRET(TSDB_CODE_CTG_INVALID_INPUT); } if (NULL == dbInfo->vgHash || dbInfo->vgVersion < 0 || taosHashGetSize(dbInfo->vgHash) <= 0) { - ctgError("invalid db vgInfo, dbName:%s, vgHash:%p, vgVersion:%d", dbName, dbInfo->vgHash, dbInfo->vgVersion); + ctgError("invalid db vgInfo, dbFName:%s, vgHash:%p, vgVersion:%d", dbFName, dbInfo->vgHash, dbInfo->vgVersion); CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } - if (NULL == pCatalog->dbCache) { - SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxDBCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); - if (NULL == cache) { - ctgError("taosHashInit %d failed", CTG_DEFAULT_CACHE_DB_NUMBER); - CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); - } - - if (NULL != atomic_val_compare_exchange_ptr(&pCatalog->dbCache, NULL, cache)) { - taosHashCleanup(cache); - } - } + CTG_ERR_JRET(ctgInitDBCache(pCatalog)); bool newAdded = false; - SDbVgVersion vgVersion = {.dbId = dbInfo->dbId, .vgVersion = dbInfo->vgVersion}; - - SCtgDBCache *dbCache = (SCtgDBCache *)taosHashAcquire(pCatalog->dbCache, dbName, strlen(dbName)); - if (dbCache) { - CTG_LOCK(CTG_WRITE, &dbCache->vgLock); - - if (NULL == dbCache->vgInfo) { - newAdded = true; - - dbCache->vgInfo = dbInfo; - } else { - if (dbCache->vgInfo->dbId != dbInfo->dbId) { - ctgMetaRentRemove(&pCatalog->dbRent, dbCache->vgInfo->dbId, ctgDbVgVersionCompare); - newAdded = true; - } else if (dbInfo->vgVersion <= dbCache->vgInfo->vgVersion) { - ctgInfo("db vgVersion is old, db:%s, vgVersion:%d, current:%d", dbName, dbInfo->vgVersion, dbCache->vgInfo->vgVersion); - CTG_UNLOCK(CTG_WRITE, &dbCache->vgLock); - taosHashRelease(pCatalog->dbCache, dbCache); - - goto _return; - } - - if (dbCache->vgInfo->vgHash) { - ctgInfo("cleanup db vgHash, db:%s", dbName); - taosHashCleanup(dbCache->vgInfo->vgHash); - dbCache->vgInfo->vgHash = NULL; - } - - tfree(dbCache->vgInfo); - dbCache->vgInfo = dbInfo; - } + SDbVgVersion vgVersion = {.dbId = dbId, .vgVersion = dbInfo->vgVersion}; + SCtgDBCache *dbCache = NULL; + CTG_ERR_JRET(ctgAcquireDBCache(pCatalog, dbFName, dbId, &dbCache)); + + CTG_LOCK(CTG_WRITE, &dbCache->vgLock); + if (dbCache->deleted) { + ctgInfo("db is dropping, dbFName:%s, dbId:%"PRIx64, dbFName, dbId); CTG_UNLOCK(CTG_WRITE, &dbCache->vgLock); taosHashRelease(pCatalog->dbCache, dbCache); + CTG_ERR_JRET(TSDB_CODE_CTG_DB_DROPPED); + } + + if (NULL == dbCache->vgInfo) { + dbCache->vgInfo = dbInfo; } else { - SCtgDBCache newDBCache = {0}; - newDBCache.vgInfo = dbInfo; + if (dbInfo->vgVersion <= dbCache->vgInfo->vgVersion) { + ctgInfo("db vgVersion is old, dbFName:%s, vgVersion:%d, current:%d", dbFName, dbInfo->vgVersion, dbCache->vgInfo->vgVersion); + CTG_UNLOCK(CTG_WRITE, &dbCache->vgLock); + taosHashRelease(pCatalog->dbCache, dbCache); + + goto _return; + } - if (taosHashPut(pCatalog->dbCache, dbName, strlen(dbName), &newDBCache, sizeof(newDBCache)) != 0) { - ctgError("taosHashPut db & db vgroup to cache failed, db:%s", dbName); - CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); + if (dbCache->vgInfo->vgHash) { + ctgDebug("cleanup db vgHash, dbFName:%s", dbFName); + taosHashCleanup(dbCache->vgInfo->vgHash); + dbCache->vgInfo->vgHash = NULL; } - newAdded = true; + tfree(dbCache->vgInfo); + dbCache->vgInfo = dbInfo; } dbInfo = NULL; - strncpy(vgVersion.dbFName, dbName, sizeof(vgVersion.dbFName)); - - if (newAdded) { - CTG_ERR_JRET(ctgMetaRentAdd(&pCatalog->dbRent, &vgVersion, vgVersion.dbId, sizeof(SDbVgVersion))); - } else { - CTG_ERR_JRET(ctgMetaRentUpdate(&pCatalog->dbRent, &vgVersion, vgVersion.dbId, sizeof(SDbVgVersion), ctgDbVgVersionCompare)); - } - - ctgDebug("dbName:%s vgroup updated, vgVersion:%d", dbName, vgVersion.vgVersion); + CTG_UNLOCK(CTG_WRITE, &dbCache->vgLock); + taosHashRelease(pCatalog->dbCache, dbCache); + + strncpy(vgVersion.dbFName, dbFName, sizeof(vgVersion.dbFName)); + CTG_ERR_JRET(ctgMetaRentUpdate(&pCatalog->dbRent, &vgVersion, vgVersion.dbId, sizeof(SDbVgVersion), ctgDbVgVersionCompare)); + ctgDebug("dbCache updated, dbFName:%s, vgVersion:%d, dbId:%"PRIx64, dbFName, vgVersion.vgVersion, vgVersion.dbId); _return: + if (dbCache) { + CTG_UNLOCK(CTG_WRITE, &pCatalog->dbLock); + } + if (dbInfo) { taosHashCleanup(dbInfo->vgHash); dbInfo->vgHash = NULL; tfree(dbInfo); } - CTG_RET(code); + CTG_API_LEAVE(code); } -int32_t catalogRemoveDB(struct SCatalog* pCatalog, const char* dbName, uint64_t dbId) { +int32_t catalogRemoveDB(struct SCatalog* pCatalog, const char* dbFName, uint64_t dbId) { int32_t code = 0; - bool removed = false; - if (NULL == pCatalog || NULL == dbName) { + if (NULL == pCatalog || NULL == dbFName) { CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } + CTG_API_ENTER(); + if (NULL == pCatalog->dbCache) { - return TSDB_CODE_SUCCESS; + CTG_API_LEAVE(TSDB_CODE_SUCCESS); } - - CTG_ERR_RET(ctgValidateAndRemoveDb(pCatalog, dbName, dbId, &removed)); - if (!removed) { - return TSDB_CODE_SUCCESS; + + SCtgDBCache *dbCache = (SCtgDBCache *)taosHashAcquire(pCatalog->dbCache, dbFName, strlen(dbFName)); + if (NULL == dbCache) { + ctgInfo("db not exist in dbCache, may be removed, dbFName:%s", dbFName); + CTG_API_LEAVE(TSDB_CODE_SUCCESS); } - - ctgInfo("db removed from cache, db:%s, uid:%"PRIx64, dbName, dbId); - CTG_ERR_RET(ctgMetaRentRemove(&pCatalog->dbRent, dbId, ctgDbVgVersionCompare)); + if (dbCache->dbId != dbId) { + ctgInfo("db id already updated, dbFName:%s, dbId:%"PRIx64 ", targetId:%"PRIx64, dbFName, dbCache->dbId, dbId); + taosHashRelease(pCatalog->dbCache, dbCache); + CTG_API_LEAVE(TSDB_CODE_SUCCESS); + } - ctgDebug("db removed from rent, db:%s, uid:%"PRIx64, dbName, dbId); + CTG_ERR_JRET(ctgValidateAndRemoveDb(pCatalog, dbCache, dbFName)); + +_return: - CTG_RET(code); + taosHashRelease(pCatalog->dbCache, dbCache); + + CTG_API_LEAVE(code); } int32_t catalogRemoveSTableMeta(struct SCatalog* pCatalog, const char* dbName, const char* stbName, uint64_t suid) { @@ -1437,49 +1712,86 @@ int32_t catalogRemoveSTableMeta(struct SCatalog* pCatalog, const char* dbName, c CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } + CTG_API_ENTER(); + if (NULL == pCatalog->dbCache) { - return TSDB_CODE_SUCCESS; + CTG_API_LEAVE(TSDB_CODE_SUCCESS); } CTG_ERR_RET(ctgValidateAndRemoveStbMeta(pCatalog, dbName, stbName, suid, &removed)); if (!removed) { - return TSDB_CODE_SUCCESS; + CTG_API_LEAVE(TSDB_CODE_SUCCESS); } ctgInfo("stb removed from cache, db:%s, stbName:%s, suid:%"PRIx64, dbName, stbName, suid); - CTG_ERR_RET(ctgMetaRentRemove(&pCatalog->stbRent, suid, ctgSTableVersionCompare)); + CTG_ERR_JRET(ctgMetaRentRemove(&pCatalog->stbRent, suid, ctgSTableVersionCompare)); ctgDebug("stb removed from rent, db:%s, stbName:%s, suid:%"PRIx64, dbName, stbName, suid); + +_return: - CTG_RET(code); + CTG_API_LEAVE(code); } int32_t catalogGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta) { - return ctgGetTableMeta(pCatalog, pTransporter, pMgmtEps, pTableName, false, pTableMeta, -1); + CTG_API_ENTER(); + + CTG_API_LEAVE(ctgGetTableMeta(pCatalog, pTransporter, pMgmtEps, pTableName, false, pTableMeta, -1)); } int32_t catalogGetSTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta) { - return ctgGetTableMeta(pCatalog, pTransporter, pMgmtEps, pTableName, false, pTableMeta, 1); + CTG_API_ENTER(); + + CTG_API_LEAVE(ctgGetTableMeta(pCatalog, pTransporter, pMgmtEps, pTableName, false, pTableMeta, 1)); +} + +int32_t catalogUpdateSTableMeta(struct SCatalog* pCatalog, STableMetaRsp *rspMsg) { + STableMetaOutput output = {0}; + int32_t code = 0; + + CTG_API_ENTER(); + + strcpy(output.dbFName, rspMsg->dbFName); + strcpy(output.tbName, rspMsg->tbName); + + SET_META_TYPE_TABLE(output.metaType); + + CTG_ERR_JRET(queryCreateTableMetaFromMsg(rspMsg, true, &output.tbMeta)); + + CTG_ERR_JRET(ctgUpdateTableMetaCache(pCatalog, &output)); + +_return: + + tfree(output.tbMeta); + + CTG_API_LEAVE(code); } + int32_t catalogRenewTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, int32_t isSTable) { if (NULL == pCatalog || NULL == pTransporter || NULL == pMgmtEps || NULL == pTableName) { CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } - return ctgRenewTableMetaImpl(pCatalog, pTransporter, pMgmtEps, pTableName, isSTable); + CTG_API_ENTER(); + + CTG_API_LEAVE(ctgRenewTableMetaImpl(pCatalog, pTransporter, pMgmtEps, pTableName, isSTable)); } int32_t catalogRenewAndGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta, int32_t isSTable) { - return ctgGetTableMeta(pCatalog, pTransporter, pMgmtEps, pTableName, true, pTableMeta, isSTable); + CTG_API_ENTER(); + + CTG_API_LEAVE(ctgGetTableMeta(pCatalog, pTransporter, pMgmtEps, pTableName, true, pTableMeta, isSTable)); } int32_t catalogGetTableDistVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, SArray** pVgroupList) { if (NULL == pCatalog || NULL == pRpc || NULL == pMgmtEps || NULL == pTableName || NULL == pVgroupList) { CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } + + CTG_API_ENTER(); STableMeta *tbMeta = NULL; int32_t code = 0; @@ -1495,7 +1807,7 @@ int32_t catalogGetTableDistVgroup(struct SCatalog* pCatalog, void *pRpc, const S tNameGetFullDbName(pTableName, db); CTG_ERR_JRET(ctgGetDBVgroup(pCatalog, pRpc, pMgmtEps, db, false, &dbCache)); - // REMOEV THIS .... + // TODO REMOEV THIS .... if (0 == tbMeta->vgId) { SVgroupInfo vgroup = {0}; @@ -1503,7 +1815,7 @@ int32_t catalogGetTableDistVgroup(struct SCatalog* pCatalog, void *pRpc, const S tbMeta->vgId = vgroup.vgId; } - // REMOVE THIS .... + // TODO REMOVE THIS .... if (tbMeta->tableType == TSDB_SUPER_TABLE) { CTG_ERR_JRET(ctgGetVgInfoFromDB(pCatalog, pRpc, pMgmtEps, dbCache->vgInfo, pVgroupList)); @@ -1542,7 +1854,7 @@ _return: vgList = NULL; } - CTG_RET(code); + CTG_API_LEAVE(code); } @@ -1550,10 +1862,12 @@ int32_t catalogGetTableHashVgroup(struct SCatalog *pCatalog, void *pTransporter, SCtgDBCache* dbCache = NULL; int32_t code = 0; + CTG_API_ENTER(); + char db[TSDB_DB_FNAME_LEN] = {0}; tNameGetFullDbName(pTableName, db); - CTG_ERR_RET(ctgGetDBVgroup(pCatalog, pTransporter, pMgmtEps, db, false, &dbCache)); + CTG_ERR_JRET(ctgGetDBVgroup(pCatalog, pTransporter, pMgmtEps, db, false, &dbCache)); CTG_ERR_JRET(ctgGetVgInfoFromHashValue(pCatalog, dbCache->vgInfo, pTableName, pVgroup)); @@ -1564,7 +1878,7 @@ _return: taosHashRelease(pCatalog->dbCache, dbCache); } - CTG_RET(code); + CTG_API_LEAVE(code); } @@ -1573,19 +1887,22 @@ int32_t catalogGetAllMeta(struct SCatalog* pCatalog, void *pTransporter, const S CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } + CTG_API_ENTER(); + int32_t code = 0; + pRsp->pTableMeta = NULL; if (pReq->pTableName) { int32_t tbNum = (int32_t)taosArrayGetSize(pReq->pTableName); if (tbNum <= 0) { ctgError("empty table name list, tbNum:%d", tbNum); - CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); + CTG_ERR_JRET(TSDB_CODE_CTG_INVALID_INPUT); } pRsp->pTableMeta = taosArrayInit(tbNum, POINTER_BYTES); if (NULL == pRsp->pTableMeta) { ctgError("taosArrayInit %d failed", tbNum); - CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR); + CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR); } for (int32_t i = 0; i < tbNum; ++i) { @@ -1602,7 +1919,7 @@ int32_t catalogGetAllMeta(struct SCatalog* pCatalog, void *pTransporter, const S } } - return TSDB_CODE_SUCCESS; + CTG_API_LEAVE(TSDB_CODE_SUCCESS); _return: @@ -1617,7 +1934,7 @@ _return: pRsp->pTableMeta = NULL; } - CTG_RET(code); + CTG_API_LEAVE(code); } int32_t catalogGetQnodeList(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, SArray* pQnodeList) { @@ -1625,9 +1942,11 @@ int32_t catalogGetQnodeList(struct SCatalog* pCatalog, void *pRpc, const SEpSet* CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } + CTG_API_ENTER(); + //TODO - return TSDB_CODE_SUCCESS; + CTG_API_LEAVE(TSDB_CODE_SUCCESS); } int32_t catalogGetExpiredSTables(struct SCatalog* pCatalog, SSTableMetaVersion **stables, uint32_t *num) { @@ -1635,7 +1954,9 @@ int32_t catalogGetExpiredSTables(struct SCatalog* pCatalog, SSTableMetaVersion * CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } - CTG_RET(ctgMetaRentGet(&pCatalog->stbRent, (void **)stables, num, sizeof(SSTableMetaVersion))); + CTG_API_ENTER(); + + CTG_API_LEAVE(ctgMetaRentGet(&pCatalog->stbRent, (void **)stables, num, sizeof(SSTableMetaVersion))); } int32_t catalogGetExpiredDBs(struct SCatalog* pCatalog, SDbVgVersion **dbs, uint32_t *num) { @@ -1643,15 +1964,21 @@ int32_t catalogGetExpiredDBs(struct SCatalog* pCatalog, SDbVgVersion **dbs, uint CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT); } - CTG_RET(ctgMetaRentGet(&pCatalog->dbRent, (void **)dbs, num, sizeof(SDbVgVersion))); + CTG_API_ENTER(); + + CTG_API_LEAVE(ctgMetaRentGet(&pCatalog->dbRent, (void **)dbs, num, sizeof(SDbVgVersion))); } void catalogDestroy(void) { - if (NULL == ctgMgmt.pCluster) { + if (NULL == ctgMgmt.pCluster || atomic_load_8(&ctgMgmt.exit)) { return; } + atomic_store_8(&ctgMgmt.exit, true); + + CTG_LOCK(CTG_WRITE, &ctgMgmt.lock); + SCatalog *pCatalog = NULL; void *pIter = taosHashIterate(ctgMgmt.pCluster, NULL); while (pIter) { @@ -1667,6 +1994,8 @@ void catalogDestroy(void) { taosHashCleanup(ctgMgmt.pCluster); ctgMgmt.pCluster = NULL; + CTG_UNLOCK(CTG_WRITE, &ctgMgmt.lock); + qInfo("catalog destroyed"); } diff --git a/source/libs/catalog/test/catalogTests.cpp b/source/libs/catalog/test/catalogTests.cpp index 751fa72347973ede04f3e72765f018dc01cfb2ae..d0f98e3c2ac5b87229ecfdc766fbe66854e5175a 100644 --- a/source/libs/catalog/test/catalogTests.cpp +++ b/source/libs/catalog/test/catalogTests.cpp @@ -39,6 +39,7 @@ namespace { extern "C" int32_t ctgGetTableMetaFromCache(struct SCatalog *pCatalog, const SName *pTableName, STableMeta **pTableMeta, int32_t *exist); extern "C" int32_t ctgUpdateTableMetaCache(struct SCatalog *pCatalog, STableMetaOutput *output); +extern "C" int32_t ctgDbgGetClusterCacheNum(struct SCatalog* pCatalog, int32_t type); void ctgTestSetPrepareTableMeta(); void ctgTestSetPrepareCTableMeta(); @@ -49,7 +50,7 @@ bool ctgTestStop = false; bool ctgTestEnableSleep = false; bool ctgTestDeadLoop = false; int32_t ctgTestPrintNum = 200000; -int32_t ctgTestMTRunSec = 30; +int32_t ctgTestMTRunSec = 5; int32_t ctgTestCurrentVgVersion = 0; int32_t ctgTestVgVersion = 1; @@ -107,6 +108,7 @@ void ctgTestInitLogFile() { const int32_t maxLogFileNum = 10; tsAsyncLog = 0; + qDebugFlag = 159; char temp[128] = {0}; sprintf(temp, "%s/%s", tsLogDir, defaultLogFileNamePrefix); @@ -185,7 +187,6 @@ void ctgTestBuildDBVgroup(SDBVgroupInfo **pdbVgroup) { ctgTestCurrentVgVersion = dbVgroup->vgVersion; dbVgroup->hashMethod = 0; - dbVgroup->dbId = ctgTestDbId; dbVgroup->vgHash = taosHashInit(ctgTestVgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); vgNum = ctgTestGetVgNumFromVgVersion(dbVgroup->vgVersion); @@ -209,6 +210,45 @@ void ctgTestBuildDBVgroup(SDBVgroupInfo **pdbVgroup) { *pdbVgroup = dbVgroup; } + +void ctgTestBuildSTableMetaRsp(STableMetaRsp *rspMsg) { + strcpy(rspMsg->dbFName, ctgTestDbname); + sprintf(rspMsg->tbName, "%s", ctgTestSTablename); + sprintf(rspMsg->stbName, "%s", ctgTestSTablename); + rspMsg->numOfTags = ctgTestTagNum; + rspMsg->numOfColumns = ctgTestColNum; + rspMsg->precision = 1 + 1; + rspMsg->tableType = TSDB_SUPER_TABLE; + rspMsg->update = 1 + 1; + rspMsg->sversion = ctgTestSVersion + 1; + rspMsg->tversion = ctgTestTVersion + 1; + rspMsg->suid = ctgTestSuid + 1; + rspMsg->tuid = ctgTestSuid + 1; + rspMsg->vgId = 1; + + SSchema *s = NULL; + s = &rspMsg->pSchema[0]; + s->type = TSDB_DATA_TYPE_TIMESTAMP; + s->colId = 1; + s->bytes = 8; + strcpy(s->name, "ts"); + + s = &rspMsg->pSchema[1]; + s->type = TSDB_DATA_TYPE_INT; + s->colId = 2; + s->bytes = 4; + strcpy(s->name, "col1s"); + + s = &rspMsg->pSchema[2]; + s->type = TSDB_DATA_TYPE_BINARY; + s->colId = 3; + s->bytes = 12 + 1; + strcpy(s->name, "tag1s"); + + return; +} + + void ctgTestPrepareDbVgroups(void *shandle, SEpSet *pEpSet, SRpcMsg *pMsg, SRpcMsg *pRsp) { SUseDbRsp *rspMsg = NULL; // todo @@ -592,7 +632,32 @@ void *ctgTestGetDbVgroupThread(void *param) { return NULL; } -void *ctgTestSetDbVgroupThread(void *param) { +void *ctgTestSetSameDbVgroupThread(void *param) { + struct SCatalog *pCtg = (struct SCatalog *)param; + int32_t code = 0; + SDBVgroupInfo *dbVgroup = NULL; + int32_t n = 0; + + while (!ctgTestStop) { + ctgTestBuildDBVgroup(&dbVgroup); + code = catalogUpdateDBVgroup(pCtg, ctgTestDbname, ctgTestDbId, dbVgroup); + if (code) { + assert(0); + } + + if (ctgTestEnableSleep) { + usleep(rand() % 5); + } + if (++n % ctgTestPrintNum == 0) { + printf("Set:%d\n", n); + } + } + + return NULL; +} + + +void *ctgTestSetDiffDbVgroupThread(void *param) { struct SCatalog *pCtg = (struct SCatalog *)param; int32_t code = 0; SDBVgroupInfo *dbVgroup = NULL; @@ -600,7 +665,7 @@ void *ctgTestSetDbVgroupThread(void *param) { while (!ctgTestStop) { ctgTestBuildDBVgroup(&dbVgroup); - code = catalogUpdateDBVgroup(pCtg, ctgTestDbname, dbVgroup); + code = catalogUpdateDBVgroup(pCtg, ctgTestDbname, ctgTestDbId++, dbVgroup); if (code) { assert(0); } @@ -616,6 +681,7 @@ void *ctgTestSetDbVgroupThread(void *param) { return NULL; } + void *ctgTestGetCtableMetaThread(void *param) { struct SCatalog *pCtg = (struct SCatalog *)param; int32_t code = 0; @@ -681,6 +747,8 @@ TEST(tableMeta, normalTable) { void *mockPointer = (void *)0x1; SVgroupInfo vgInfo = {0}; + ctgTestInitLogFile(); + ctgTestSetPrepareDbVgroups(); initQueryModuleMsgHandle(); @@ -771,6 +839,8 @@ TEST(tableMeta, childTableCase) { void *mockPointer = (void *)0x1; SVgroupInfo vgInfo = {0}; + ctgTestInitLogFile(); + ctgTestSetPrepareDbVgroupsAndChildMeta(); initQueryModuleMsgHandle(); @@ -964,6 +1034,124 @@ TEST(tableMeta, superTableCase) { catalogDestroy(); } +TEST(tableMeta, rmStbMeta) { + struct SCatalog *pCtg = NULL; + void *mockPointer = (void *)0x1; + SVgroupInfo vgInfo = {0}; + + ctgTestInitLogFile(); + + ctgTestSetPrepareDbVgroupsAndSuperMeta(); + + initQueryModuleMsgHandle(); + + int32_t code = catalogInit(NULL); + ASSERT_EQ(code, 0); + + // sendCreateDbMsg(pConn->pTransporter, &pConn->pAppInfo->mgmtEp.epSet); + code = catalogGetHandle(ctgTestClusterId, &pCtg); + ASSERT_EQ(code, 0); + + SName n = {.type = TSDB_TABLE_NAME_T, .acctId = 1}; + strcpy(n.dbname, "db1"); + strcpy(n.tname, ctgTestSTablename); + + STableMeta *tableMeta = NULL; + code = catalogGetTableMeta(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &tableMeta); + ASSERT_EQ(code, 0); + ASSERT_EQ(tableMeta->vgId, 0); + ASSERT_EQ(tableMeta->tableType, TSDB_SUPER_TABLE); + ASSERT_EQ(tableMeta->sversion, ctgTestSVersion); + ASSERT_EQ(tableMeta->tversion, ctgTestTVersion); + ASSERT_EQ(tableMeta->uid, ctgTestSuid); + ASSERT_EQ(tableMeta->suid, ctgTestSuid); + ASSERT_EQ(tableMeta->tableInfo.numOfColumns, ctgTestColNum); + ASSERT_EQ(tableMeta->tableInfo.numOfTags, ctgTestTagNum); + ASSERT_EQ(tableMeta->tableInfo.precision, 1); + ASSERT_EQ(tableMeta->tableInfo.rowSize, 12); + + code = catalogRemoveSTableMeta(pCtg, "1.db1", ctgTestSTablename, ctgTestSuid); + ASSERT_EQ(code, 0); + + ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_DB_NUM), 1); + ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_META_NUM), 0); + ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_STB_NUM), 0); + ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_DB_RENT_NUM), 1); + ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_STB_RENT_NUM), 0); + + catalogDestroy(); +} + +TEST(tableMeta, updateStbMeta) { + struct SCatalog *pCtg = NULL; + void *mockPointer = (void *)0x1; + SVgroupInfo vgInfo = {0}; + + ctgTestInitLogFile(); + + ctgTestSetPrepareDbVgroupsAndSuperMeta(); + + initQueryModuleMsgHandle(); + + int32_t code = catalogInit(NULL); + ASSERT_EQ(code, 0); + + // sendCreateDbMsg(pConn->pTransporter, &pConn->pAppInfo->mgmtEp.epSet); + code = catalogGetHandle(ctgTestClusterId, &pCtg); + ASSERT_EQ(code, 0); + + SName n = {.type = TSDB_TABLE_NAME_T, .acctId = 1}; + strcpy(n.dbname, "db1"); + strcpy(n.tname, ctgTestSTablename); + + STableMeta *tableMeta = NULL; + code = catalogGetTableMeta(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &tableMeta); + ASSERT_EQ(code, 0); + ASSERT_EQ(tableMeta->vgId, 0); + ASSERT_EQ(tableMeta->tableType, TSDB_SUPER_TABLE); + ASSERT_EQ(tableMeta->sversion, ctgTestSVersion); + ASSERT_EQ(tableMeta->tversion, ctgTestTVersion); + ASSERT_EQ(tableMeta->uid, ctgTestSuid); + ASSERT_EQ(tableMeta->suid, ctgTestSuid); + ASSERT_EQ(tableMeta->tableInfo.numOfColumns, ctgTestColNum); + ASSERT_EQ(tableMeta->tableInfo.numOfTags, ctgTestTagNum); + ASSERT_EQ(tableMeta->tableInfo.precision, 1); + ASSERT_EQ(tableMeta->tableInfo.rowSize, 12); + + tfree(tableMeta); + + STableMetaRsp rsp = {0}; + ctgTestBuildSTableMetaRsp(&rsp); + + code = catalogUpdateSTableMeta(pCtg, &rsp); + ASSERT_EQ(code, 0); + + ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_DB_NUM), 1); + ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_META_NUM), 1); + ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_STB_NUM), 1); + ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_DB_RENT_NUM), 1); + ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_STB_RENT_NUM), 1); + + code = catalogGetTableMeta(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &tableMeta); + ASSERT_EQ(code, 0); + ASSERT_EQ(tableMeta->vgId, 0); + ASSERT_EQ(tableMeta->tableType, TSDB_SUPER_TABLE); + ASSERT_EQ(tableMeta->sversion, ctgTestSVersion + 1); + ASSERT_EQ(tableMeta->tversion, ctgTestTVersion + 1); + ASSERT_EQ(tableMeta->uid, ctgTestSuid + 1); + ASSERT_EQ(tableMeta->suid, ctgTestSuid + 1); + ASSERT_EQ(tableMeta->tableInfo.numOfColumns, ctgTestColNum); + ASSERT_EQ(tableMeta->tableInfo.numOfTags, ctgTestTagNum); + ASSERT_EQ(tableMeta->tableInfo.precision, 1 + 1); + ASSERT_EQ(tableMeta->tableInfo.rowSize, 12); + + tfree(tableMeta); + + catalogDestroy(); +} + + + TEST(tableDistVgroup, normalTable) { struct SCatalog *pCtg = NULL; void *mockPointer = (void *)0x1; @@ -1109,7 +1297,7 @@ TEST(dbVgroup, getSetDbVgroupCase) { taosArrayDestroy(vgList); ctgTestBuildDBVgroup(&dbVgroup); - code = catalogUpdateDBVgroup(pCtg, ctgTestDbname, dbVgroup); + code = catalogUpdateDBVgroup(pCtg, ctgTestDbname, ctgTestDbId, dbVgroup); ASSERT_EQ(code, 0); code = catalogGetTableHashVgroup(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &vgInfo); @@ -1128,7 +1316,7 @@ TEST(dbVgroup, getSetDbVgroupCase) { catalogDestroy(); } -TEST(multiThread, getSetDbVgroupCase) { +TEST(multiThread, getSetRmSameDbVgroup) { struct SCatalog *pCtg = NULL; void *mockPointer = (void *)0x1; SVgroupInfo vgInfo = {0}; @@ -1159,10 +1347,10 @@ TEST(multiThread, getSetDbVgroupCase) { pthread_attr_init(&thattr); pthread_t thread1, thread2; - pthread_create(&(thread1), &thattr, ctgTestSetDbVgroupThread, pCtg); + pthread_create(&(thread1), &thattr, ctgTestSetSameDbVgroupThread, pCtg); sleep(1); - pthread_create(&(thread1), &thattr, ctgTestGetDbVgroupThread, pCtg); + pthread_create(&(thread2), &thattr, ctgTestGetDbVgroupThread, pCtg); while (true) { if (ctgTestDeadLoop) { @@ -1179,6 +1367,58 @@ TEST(multiThread, getSetDbVgroupCase) { catalogDestroy(); } +TEST(multiThread, getSetRmDiffDbVgroup) { + struct SCatalog *pCtg = NULL; + void *mockPointer = (void *)0x1; + SVgroupInfo vgInfo = {0}; + SVgroupInfo *pvgInfo = NULL; + SDBVgroupInfo dbVgroup = {0}; + SArray *vgList = NULL; + ctgTestStop = false; + + ctgTestInitLogFile(); + + ctgTestSetPrepareDbVgroups(); + + initQueryModuleMsgHandle(); + + // sendCreateDbMsg(pConn->pTransporter, &pConn->pAppInfo->mgmtEp.epSet); + + int32_t code = catalogInit(NULL); + ASSERT_EQ(code, 0); + + code = catalogGetHandle(ctgTestClusterId, &pCtg); + ASSERT_EQ(code, 0); + + SName n = {.type = TSDB_TABLE_NAME_T, .acctId = 1}; + strcpy(n.dbname, "db1"); + strcpy(n.tname, ctgTestTablename); + + pthread_attr_t thattr; + pthread_attr_init(&thattr); + + pthread_t thread1, thread2; + pthread_create(&(thread1), &thattr, ctgTestSetDiffDbVgroupThread, pCtg); + + sleep(1); + pthread_create(&(thread2), &thattr, ctgTestGetDbVgroupThread, pCtg); + + while (true) { + if (ctgTestDeadLoop) { + sleep(1); + } else { + sleep(ctgTestMTRunSec); + break; + } + } + + ctgTestStop = true; + sleep(1); + + catalogDestroy(); +} + + TEST(multiThread, ctableMeta) { struct SCatalog *pCtg = NULL; diff --git a/source/libs/parser/inc/astCreateFuncs.h b/source/libs/parser/inc/astCreateFuncs.h index 7cd7e1932d84b6d1f30a2b17b707ee20fec4676a..82315a5ba481ddb6d4053241724bc62cf2d9308f 100644 --- a/source/libs/parser/inc/astCreateFuncs.h +++ b/source/libs/parser/inc/astCreateFuncs.h @@ -27,12 +27,10 @@ extern "C" { extern SToken nil_token; -typedef struct STargetExprNode { - ENodeType nodeType; - char* p; - uint32_t n; - SNode* pNode; -} STargetExprNode; +SNode* createRawExprNode(SAstCreateContext* pCxt, const SToken* pToken, SNode* pNode); +SNode* createRawExprNodeExt(SAstCreateContext* pCxt, const SToken* pStart, const SToken* pEnd, SNode* pNode); +SNode* releaseRawExprNode(SAstCreateContext* pCxt, SNode* pNode); +SToken getTokenFromRawExprNode(SAstCreateContext* pCxt, SNode* pNode); SNodeList* createNodeList(SAstCreateContext* pCxt, SNode* pNode); SNodeList* addNodeToList(SAstCreateContext* pCxt, SNodeList* pList, SNode* pNode); diff --git a/source/libs/parser/inc/new_sql.y b/source/libs/parser/inc/new_sql.y index 6616b80d179035e3d7f4fb23b73e17199a3355b1..d12e76000ac20ca47a9d6cf8e0357536ad1827f0 100644 --- a/source/libs/parser/inc/new_sql.y +++ b/source/libs/parser/inc/new_sql.y @@ -67,19 +67,19 @@ cmd ::= SHOW DATABASES. cmd ::= query_expression(A). { PARSER_TRACE; pCxt->pRootNode = A; } /************************************************ literal *************************************************************/ -literal(A) ::= NK_INTEGER(B). { PARSER_TRACE; A = createValueNode(pCxt, TSDB_DATA_TYPE_BIGINT, &B); } -literal(A) ::= NK_FLOAT(B). { PARSER_TRACE; A = createValueNode(pCxt, TSDB_DATA_TYPE_DOUBLE, &B); } -literal(A) ::= NK_STRING(B). { PARSER_TRACE; A = createValueNode(pCxt, TSDB_DATA_TYPE_BINARY, &B); } -literal(A) ::= NK_BOOL(B). { PARSER_TRACE; A = createValueNode(pCxt, TSDB_DATA_TYPE_BOOL, &B); } -literal(A) ::= TIMESTAMP NK_STRING(B). { PARSER_TRACE; A = createValueNode(pCxt, TSDB_DATA_TYPE_TIMESTAMP, &B); } +literal(A) ::= NK_INTEGER(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createValueNode(pCxt, TSDB_DATA_TYPE_BIGINT, &B)); } +literal(A) ::= NK_FLOAT(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createValueNode(pCxt, TSDB_DATA_TYPE_DOUBLE, &B)); } +literal(A) ::= NK_STRING(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createValueNode(pCxt, TSDB_DATA_TYPE_BINARY, &B)); } +literal(A) ::= NK_BOOL(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createValueNode(pCxt, TSDB_DATA_TYPE_BOOL, &B)); } +literal(A) ::= TIMESTAMP(B) NK_STRING(C). { PARSER_TRACE; A = createRawExprNodeExt(pCxt, &B, &C, createValueNode(pCxt, TSDB_DATA_TYPE_TIMESTAMP, &C)); } literal(A) ::= duration_literal(B). { PARSER_TRACE; A = B; } -duration_literal(A) ::= NK_VARIABLE(B). { PARSER_TRACE; A = createDurationValueNode(pCxt, &B); } +duration_literal(A) ::= NK_VARIABLE(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createDurationValueNode(pCxt, &B)); } %type literal_list { SNodeList* } %destructor literal_list { PARSER_DESTRUCTOR_TRACE; nodesDestroyList($$); } -literal_list(A) ::= literal(B). { PARSER_TRACE; A = createNodeList(pCxt, B); } -literal_list(A) ::= literal_list(B) NK_COMMA literal(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, C); } +literal_list(A) ::= literal(B). { PARSER_TRACE; A = createNodeList(pCxt, releaseRawExprNode(pCxt, B)); } +literal_list(A) ::= literal_list(B) NK_COMMA literal(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, releaseRawExprNode(pCxt, C)); } /************************************************ names and identifiers ***********************************************/ %type db_name { SToken } @@ -111,37 +111,70 @@ expression(A) ::= literal(B). //expression(A) ::= NK_QUESTION(B). { PARSER_TRACE; A = B; } //expression(A) ::= pseudo_column(B). { PARSER_TRACE; A = B; } expression(A) ::= column_reference(B). { PARSER_TRACE; A = B; } -expression(A) ::= function_name(B) NK_LP expression_list(C) NK_RP. { PARSER_TRACE; A = createFunctionNode(pCxt, &B, C); } +expression(A) ::= function_name(B) NK_LP expression_list(C) NK_RP(D). { PARSER_TRACE; A = createRawExprNodeExt(pCxt, &B, &D, createFunctionNode(pCxt, &B, C)); } //expression(A) ::= cast_expression(B). { PARSER_TRACE; A = B; } //expression(A) ::= case_expression(B). { PARSER_TRACE; A = B; } expression(A) ::= subquery(B). { PARSER_TRACE; A = B; } -expression(A) ::= NK_LP expression(B) NK_RP. { PARSER_TRACE; A = B; } -expression(A) ::= NK_PLUS expression(B). { PARSER_TRACE; A = B; } -expression(A) ::= NK_MINUS expression(B). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_SUB, B, NULL); } -expression(A) ::= expression(B) NK_PLUS expression(C). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_ADD, B, C); } -expression(A) ::= expression(B) NK_MINUS expression(C). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_SUB, B, C); } -expression(A) ::= expression(B) NK_STAR expression(C). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_MULTI, B, C); } -expression(A) ::= expression(B) NK_SLASH expression(C). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_DIV, B, C); } -expression(A) ::= expression(B) NK_REM expression(C). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_MOD, B, C); } +expression(A) ::= NK_LP(B) expression(C) NK_RP(D). { PARSER_TRACE; A = createRawExprNodeExt(pCxt, &B, &D, releaseRawExprNode(pCxt, C)); } +expression(A) ::= NK_PLUS(B) expression(C). { + PARSER_TRACE; + SToken t = getTokenFromRawExprNode(pCxt, C); + A = createRawExprNodeExt(pCxt, &B, &t, releaseRawExprNode(pCxt, C)); + } +expression(A) ::= NK_MINUS(B) expression(C). { + PARSER_TRACE; + SToken t = getTokenFromRawExprNode(pCxt, C); + A = createRawExprNodeExt(pCxt, &B, &t, createOperatorNode(pCxt, OP_TYPE_SUB, releaseRawExprNode(pCxt, C), NULL)); + } +expression(A) ::= expression(B) NK_PLUS expression(C). { + PARSER_TRACE; + SToken s = getTokenFromRawExprNode(pCxt, B); + SToken e = getTokenFromRawExprNode(pCxt, C); + A = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_ADD, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C))); + } +expression(A) ::= expression(B) NK_MINUS expression(C). { + PARSER_TRACE; + SToken s = getTokenFromRawExprNode(pCxt, B); + SToken e = getTokenFromRawExprNode(pCxt, C); + A = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_SUB, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C))); + } +expression(A) ::= expression(B) NK_STAR expression(C). { + PARSER_TRACE; + SToken s = getTokenFromRawExprNode(pCxt, B); + SToken e = getTokenFromRawExprNode(pCxt, C); + A = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_MULTI, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C))); + } +expression(A) ::= expression(B) NK_SLASH expression(C). { + PARSER_TRACE; + SToken s = getTokenFromRawExprNode(pCxt, B); + SToken e = getTokenFromRawExprNode(pCxt, C); + A = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_DIV, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C))); + } +expression(A) ::= expression(B) NK_REM expression(C). { + PARSER_TRACE; + SToken s = getTokenFromRawExprNode(pCxt, B); + SToken e = getTokenFromRawExprNode(pCxt, C); + A = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_MOD, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C))); + } %type expression_list { SNodeList* } %destructor expression_list { PARSER_DESTRUCTOR_TRACE; nodesDestroyList($$); } -expression_list(A) ::= expression(B). { PARSER_TRACE; A = createNodeList(pCxt, B); } -expression_list(A) ::= expression_list(B) NK_COMMA expression(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, C); } +expression_list(A) ::= expression(B). { PARSER_TRACE; A = createNodeList(pCxt, releaseRawExprNode(pCxt, B)); } +expression_list(A) ::= expression_list(B) NK_COMMA expression(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, releaseRawExprNode(pCxt, C)); } -column_reference(A) ::= column_name(B). { PARSER_TRACE; A = createColumnNode(pCxt, NULL, &B); } -column_reference(A) ::= table_name(B) NK_DOT column_name(C). { PARSER_TRACE; A = createColumnNode(pCxt, &B, &C); } +column_reference(A) ::= column_name(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createColumnNode(pCxt, NULL, &B)); } +column_reference(A) ::= table_name(B) NK_DOT column_name(C). { PARSER_TRACE; A = createRawExprNodeExt(pCxt, &B, &C, createColumnNode(pCxt, &B, &C)); } //pseudo_column(A) ::= NK_NOW. { PARSER_TRACE; A = createFunctionNode(pCxt, NULL, NULL); } /************************************************ predicate ***********************************************************/ -predicate(A) ::= expression(B) compare_op(C) expression(D). { PARSER_TRACE; A = createOperatorNode(pCxt, C, B, D); } +predicate(A) ::= expression(B) compare_op(C) expression(D). { PARSER_TRACE; A = createOperatorNode(pCxt, C, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, D)); } //predicate(A) ::= expression(B) compare_op sub_type expression(B). -predicate(A) ::= expression(B) BETWEEN expression(C) AND expression(D). { PARSER_TRACE; A = createBetweenAnd(pCxt, B, C, D); } -predicate(A) ::= expression(B) NOT BETWEEN expression(C) AND expression(D). { PARSER_TRACE; A = createNotBetweenAnd(pCxt, C, B, D); } -predicate(A) ::= expression(B) IS NULL. { PARSER_TRACE; A = createIsNullCondNode(pCxt, B, true); } -predicate(A) ::= expression(B) IS NOT NULL. { PARSER_TRACE; A = createIsNullCondNode(pCxt, B, false); } -predicate(A) ::= expression(B) in_op(C) in_predicate_value(D). { PARSER_TRACE; A = createOperatorNode(pCxt, C, B, D); } +predicate(A) ::= expression(B) BETWEEN expression(C) AND expression(D). { PARSER_TRACE; A = createBetweenAnd(pCxt, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C), releaseRawExprNode(pCxt, D)); } +predicate(A) ::= expression(B) NOT BETWEEN expression(C) AND expression(D). { PARSER_TRACE; A = createNotBetweenAnd(pCxt, releaseRawExprNode(pCxt, C), releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, D)); } +predicate(A) ::= expression(B) IS NULL. { PARSER_TRACE; A = createIsNullCondNode(pCxt, releaseRawExprNode(pCxt, B), true); } +predicate(A) ::= expression(B) IS NOT NULL. { PARSER_TRACE; A = createIsNullCondNode(pCxt, releaseRawExprNode(pCxt, B), false); } +predicate(A) ::= expression(B) in_op(C) in_predicate_value(D). { PARSER_TRACE; A = createOperatorNode(pCxt, C, releaseRawExprNode(pCxt, B), D); } %type compare_op { EOperatorType } %destructor compare_op { PARSER_DESTRUCTOR_TRACE; } @@ -186,7 +219,7 @@ table_reference(A) ::= joined_table(B). table_primary(A) ::= table_name(B) alias_opt(C). { PARSER_TRACE; A = createRealTableNode(pCxt, NULL, &B, &C); } table_primary(A) ::= db_name(B) NK_DOT table_name(C) alias_opt(D). { PARSER_TRACE; A = createRealTableNode(pCxt, &B, &C, &D); } -table_primary(A) ::= subquery(B) alias_opt(C). { PARSER_TRACE; A = createTempTableNode(pCxt, B, &C); } +table_primary(A) ::= subquery(B) alias_opt(C). { PARSER_TRACE; A = createTempTableNode(pCxt, releaseRawExprNode(pCxt, B), &C); } table_primary(A) ::= parenthesized_joined_table(B). { PARSER_TRACE; A = B; } %type alias_opt { SToken } @@ -236,9 +269,13 @@ select_list(A) ::= select_sublist(B). select_sublist(A) ::= select_item(B). { PARSER_TRACE; A = createNodeList(pCxt, B); } select_sublist(A) ::= select_sublist(B) NK_COMMA select_item(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, C); } -select_item(A) ::= expression(B). { PARSER_TRACE; A = B; } -select_item(A) ::= expression(B) column_alias(C). { PARSER_TRACE; A = setProjectionAlias(pCxt, B, &C); } -select_item(A) ::= expression(B) AS column_alias(C). { PARSER_TRACE; A = setProjectionAlias(pCxt, B, &C); } +select_item(A) ::= expression(B). { + PARSER_TRACE; + SToken t = getTokenFromRawExprNode(pCxt, B); + A = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, B), &t); + } +select_item(A) ::= expression(B) column_alias(C). { PARSER_TRACE; A = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, B), &C); } +select_item(A) ::= expression(B) AS column_alias(C). { PARSER_TRACE; A = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, B), &C); } select_item(A) ::= table_name(B) NK_DOT NK_STAR(C). { PARSER_TRACE; A = createColumnNode(pCxt, &B, &C); } where_clause_opt(A) ::= . { PARSER_TRACE; A = NULL; } @@ -251,8 +288,8 @@ partition_by_clause_opt(A) ::= PARTITION BY expression_list(B). twindow_clause_opt(A) ::= . { PARSER_TRACE; A = NULL; } twindow_clause_opt(A) ::= - SESSION NK_LP column_reference(B) NK_COMMA NK_INTEGER(C) NK_RP. { PARSER_TRACE; A = createSessionWindowNode(pCxt, B, &C); } -twindow_clause_opt(A) ::= STATE_WINDOW NK_LP column_reference(B) NK_RP. { PARSER_TRACE; A = createStateWindowNode(pCxt, B); } + SESSION NK_LP column_reference(B) NK_COMMA NK_INTEGER(C) NK_RP. { PARSER_TRACE; A = createSessionWindowNode(pCxt, releaseRawExprNode(pCxt, B), &C); } +twindow_clause_opt(A) ::= STATE_WINDOW NK_LP column_reference(B) NK_RP. { PARSER_TRACE; A = createStateWindowNode(pCxt, releaseRawExprNode(pCxt, B)); } twindow_clause_opt(A) ::= INTERVAL NK_LP duration_literal(B) NK_RP sliding_opt(C) fill_opt(D). { PARSER_TRACE; A = createIntervalWindowNode(pCxt, B, NULL, C, D); } twindow_clause_opt(A) ::= @@ -317,7 +354,7 @@ limit_clause_opt(A) ::= LIMIT NK_INTEGER(B) OFFSET NK_INTEGER(C). limit_clause_opt(A) ::= LIMIT NK_INTEGER(C) NK_COMMA NK_INTEGER(B). { PARSER_TRACE; A = createLimitNode(pCxt, &B, &C); } /************************************************ subquery ************************************************************/ -subquery(A) ::= NK_LP query_expression(B) NK_RP. { PARSER_TRACE; A = B; } +subquery(A) ::= NK_LP(B) query_expression(C) NK_RP(D). { PARSER_TRACE; A = createRawExprNodeExt(pCxt, &B, &D, C); } /************************************************ search_condition ****************************************************/ search_condition(A) ::= boolean_value_expression(B). { PARSER_TRACE; A = B; } @@ -330,7 +367,7 @@ sort_specification_list(A) ::= sort_specification_list(B) NK_COMMA sort_specification(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, C); } sort_specification(A) ::= - expression(B) ordering_specification_opt(C) null_ordering_opt(D). { PARSER_TRACE; A = createOrderByExprNode(pCxt, B, C, D); } + expression(B) ordering_specification_opt(C) null_ordering_opt(D). { PARSER_TRACE; A = createOrderByExprNode(pCxt, releaseRawExprNode(pCxt, B), C, D); } %type ordering_specification_opt EOrder %destructor ordering_specification_opt { PARSER_DESTRUCTOR_TRACE; } diff --git a/source/libs/parser/inc/sql.y b/source/libs/parser/inc/sql.y index 6d3e9e729f2754fa159e885eec80ea1b2a12b948..1222138b5e4c7f09b9ed4c9d21ed4bdf70122f0a 100644 --- a/source/libs/parser/inc/sql.y +++ b/source/libs/parser/inc/sql.y @@ -820,7 +820,7 @@ cmd ::= ALTER TABLE ids(X) cpxName(F) MODIFY COLUMN columnlist(A). { //////////////////////////////////ALTER TAGS statement///////////////////////////////////// cmd ::= ALTER TABLE ids(X) cpxName(Y) ADD TAG columnlist(A). { X.n += Y.n; - SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, A, NULL, TSDB_ALTER_TABLE_ADD_TAG_COLUMN, -1); + SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, A, NULL, TSDB_ALTER_TABLE_ADD_TAG, -1); setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE); } cmd ::= ALTER TABLE ids(X) cpxName(Z) DROP TAG ids(Y). { @@ -829,7 +829,7 @@ cmd ::= ALTER TABLE ids(X) cpxName(Z) DROP TAG ids(Y). { toTSDBType(Y.type); SArray* A = tListItemAppendToken(NULL, &Y, -1); - SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, NULL, A, TSDB_ALTER_TABLE_DROP_TAG_COLUMN, -1); + SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, NULL, A, TSDB_ALTER_TABLE_DROP_TAG, -1); setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE); } @@ -889,7 +889,7 @@ cmd ::= ALTER STABLE ids(X) cpxName(F) MODIFY COLUMN columnlist(A). { //////////////////////////////////ALTER TAGS statement///////////////////////////////////// cmd ::= ALTER STABLE ids(X) cpxName(Y) ADD TAG columnlist(A). { X.n += Y.n; - SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, A, NULL, TSDB_ALTER_TABLE_ADD_TAG_COLUMN, TSDB_SUPER_TABLE); + SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, A, NULL, TSDB_ALTER_TABLE_ADD_TAG, TSDB_SUPER_TABLE); setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE); } cmd ::= ALTER STABLE ids(X) cpxName(Z) DROP TAG ids(Y). { @@ -898,7 +898,7 @@ cmd ::= ALTER STABLE ids(X) cpxName(Z) DROP TAG ids(Y). { toTSDBType(Y.type); SArray* A = tListItemAppendToken(NULL, &Y, -1); - SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, NULL, A, TSDB_ALTER_TABLE_DROP_TAG_COLUMN, TSDB_SUPER_TABLE); + SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, NULL, A, TSDB_ALTER_TABLE_DROP_TAG, TSDB_SUPER_TABLE); setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE); } diff --git a/source/libs/parser/src/astCreateFuncs.c b/source/libs/parser/src/astCreateFuncs.c index e8b8b42f743f2909cd7480a927837b95f359fbc9..5aaa40e0e425397850eafab2e3c08492d54d0c90 100644 --- a/source/libs/parser/src/astCreateFuncs.c +++ b/source/libs/parser/src/astCreateFuncs.c @@ -24,6 +24,14 @@ } \ } while (0) +#define CHECK_RAW_EXPR_NODE(node) \ + do { \ + if (NULL == (node) || QUERY_NODE_RAW_EXPR != nodeType(node)) { \ + pCxt->valid = false; \ + return NULL; \ + } \ + } while (0) + SToken nil_token = { .type = TK_NIL, .n = 0, .z = NULL }; static bool checkDbName(SAstCreateContext* pCxt, const SToken* pDbName) { @@ -50,6 +58,37 @@ static bool checkColumnName(SAstCreateContext* pCxt, const SToken* pColumnName) return pCxt->valid; } +SNode* createRawExprNode(SAstCreateContext* pCxt, const SToken* pToken, SNode* pNode) { + SRawExprNode* target = (SRawExprNode*)nodesMakeNode(QUERY_NODE_RAW_EXPR); + CHECK_OUT_OF_MEM(target); + target->p = pToken->z; + target->n = pToken->n; + target->pNode = pNode; + return (SNode*)target; +} + +SNode* createRawExprNodeExt(SAstCreateContext* pCxt, const SToken* pStart, const SToken* pEnd, SNode* pNode) { + SRawExprNode* target = (SRawExprNode*)nodesMakeNode(QUERY_NODE_RAW_EXPR); + CHECK_OUT_OF_MEM(target); + target->p = pStart->z; + target->n = (pEnd->z + pEnd->n) - pStart->z; + target->pNode = pNode; + return (SNode*)target; +} + +SNode* releaseRawExprNode(SAstCreateContext* pCxt, SNode* pNode) { + CHECK_RAW_EXPR_NODE(pNode); + SNode* tmp = ((SRawExprNode*)pNode)->pNode; + tfree(pNode); + return tmp; +} + +SToken getTokenFromRawExprNode(SAstCreateContext* pCxt, SNode* pNode) { + SRawExprNode* target = (SRawExprNode*)pNode; + SToken t = { .type = 0, .z = target->p, .n = target->n}; + return t; +} + SNodeList* createNodeList(SAstCreateContext* pCxt, SNode* pNode) { SNodeList* list = nodesMakeList(); CHECK_OUT_OF_MEM(list); @@ -86,7 +125,7 @@ SNode* createValueNode(SAstCreateContext* pCxt, int32_t dataType, const SToken* SNode* createDurationValueNode(SAstCreateContext* pCxt, const SToken* pLiteral) { SValueNode* val = (SValueNode*)nodesMakeNode(QUERY_NODE_VALUE); CHECK_OUT_OF_MEM(val); - // todo + // todo : calc, for example, 10s return (SNode*)val; } diff --git a/source/libs/parser/src/astGenerator.c b/source/libs/parser/src/astGenerator.c index 9f0efbcf0e2f7d186ad15627d0b818d311378d63..5dce5120f98131eda4257fc459a34b1f209b7b1f 100644 --- a/source/libs/parser/src/astGenerator.c +++ b/source/libs/parser/src/astGenerator.c @@ -610,7 +610,7 @@ SAlterTableInfo *tSetAlterTableInfo(SToken *pTableName, SArray *pCols, SArray *p pAlterTable->type = type; pAlterTable->tableType = tableType; - if (type == TSDB_ALTER_TABLE_ADD_COLUMN || type == TSDB_ALTER_TABLE_ADD_TAG_COLUMN || type == TSDB_ALTER_TABLE_UPDATE_COLUMN_BYTES || type == TSDB_ALTER_TABLE_UPDATE_TAG_BYTES) { + if (type == TSDB_ALTER_TABLE_ADD_COLUMN || type == TSDB_ALTER_TABLE_ADD_TAG || type == TSDB_ALTER_TABLE_UPDATE_COLUMN_BYTES || type == TSDB_ALTER_TABLE_UPDATE_TAG_BYTES) { pAlterTable->pAddColumns = pCols; assert(pVals == NULL); } else { diff --git a/source/libs/parser/src/new_sql.c b/source/libs/parser/src/new_sql.c index 8ce36a6425d3cf7712a155274df431aced84d22b..cb2b277df8461634d32bb1e60509500dae6950dc 100644 --- a/source/libs/parser/src/new_sql.c +++ b/source/libs/parser/src/new_sql.c @@ -1517,23 +1517,24 @@ static YYACTIONTYPE yy_reduce( { PARSER_TRACE; pCxt->pRootNode = yymsp[0].minor.yy168; } break; case 2: /* literal ::= NK_INTEGER */ -{ PARSER_TRACE; yylhsminor.yy168 = createValueNode(pCxt, TSDB_DATA_TYPE_BIGINT, &yymsp[0].minor.yy0); } +{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy0, createValueNode(pCxt, TSDB_DATA_TYPE_BIGINT, &yymsp[0].minor.yy0)); } yymsp[0].minor.yy168 = yylhsminor.yy168; break; case 3: /* literal ::= NK_FLOAT */ -{ PARSER_TRACE; yylhsminor.yy168 = createValueNode(pCxt, TSDB_DATA_TYPE_DOUBLE, &yymsp[0].minor.yy0); } +{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy0, createValueNode(pCxt, TSDB_DATA_TYPE_DOUBLE, &yymsp[0].minor.yy0)); } yymsp[0].minor.yy168 = yylhsminor.yy168; break; case 4: /* literal ::= NK_STRING */ -{ PARSER_TRACE; yylhsminor.yy168 = createValueNode(pCxt, TSDB_DATA_TYPE_BINARY, &yymsp[0].minor.yy0); } +{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy0, createValueNode(pCxt, TSDB_DATA_TYPE_BINARY, &yymsp[0].minor.yy0)); } yymsp[0].minor.yy168 = yylhsminor.yy168; break; case 5: /* literal ::= NK_BOOL */ -{ PARSER_TRACE; yylhsminor.yy168 = createValueNode(pCxt, TSDB_DATA_TYPE_BOOL, &yymsp[0].minor.yy0); } +{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy0, createValueNode(pCxt, TSDB_DATA_TYPE_BOOL, &yymsp[0].minor.yy0)); } yymsp[0].minor.yy168 = yylhsminor.yy168; break; case 6: /* literal ::= TIMESTAMP NK_STRING */ -{ PARSER_TRACE; yymsp[-1].minor.yy168 = createValueNode(pCxt, TSDB_DATA_TYPE_TIMESTAMP, &yymsp[0].minor.yy0); } +{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-1].minor.yy0, &yymsp[0].minor.yy0, createValueNode(pCxt, TSDB_DATA_TYPE_TIMESTAMP, &yymsp[0].minor.yy0)); } + yymsp[-1].minor.yy168 = yylhsminor.yy168; break; case 7: /* literal ::= duration_literal */ case 17: /* expression ::= literal */ yytestcase(yyruleno==17); @@ -1545,7 +1546,6 @@ static YYACTIONTYPE yy_reduce( case 61: /* table_reference ::= table_primary */ yytestcase(yyruleno==61); case 62: /* table_reference ::= joined_table */ yytestcase(yyruleno==62); case 66: /* table_primary ::= parenthesized_joined_table */ yytestcase(yyruleno==66); - case 82: /* select_item ::= expression */ yytestcase(yyruleno==82); case 110: /* query_expression_body ::= query_primary */ yytestcase(yyruleno==110); case 112: /* query_primary ::= query_specification */ yytestcase(yyruleno==112); case 124: /* search_condition ::= boolean_value_expression */ yytestcase(yyruleno==124); @@ -1553,21 +1553,17 @@ static YYACTIONTYPE yy_reduce( yymsp[0].minor.yy168 = yylhsminor.yy168; break; case 8: /* duration_literal ::= NK_VARIABLE */ -{ PARSER_TRACE; yylhsminor.yy168 = createDurationValueNode(pCxt, &yymsp[0].minor.yy0); } +{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy0, createDurationValueNode(pCxt, &yymsp[0].minor.yy0)); } yymsp[0].minor.yy168 = yylhsminor.yy168; break; case 9: /* literal_list ::= literal */ case 29: /* expression_list ::= expression */ yytestcase(yyruleno==29); - case 80: /* select_sublist ::= select_item */ yytestcase(yyruleno==80); - case 125: /* sort_specification_list ::= sort_specification */ yytestcase(yyruleno==125); -{ PARSER_TRACE; yylhsminor.yy192 = createNodeList(pCxt, yymsp[0].minor.yy168); } +{ PARSER_TRACE; yylhsminor.yy192 = createNodeList(pCxt, releaseRawExprNode(pCxt, yymsp[0].minor.yy168)); } yymsp[0].minor.yy192 = yylhsminor.yy192; break; case 10: /* literal_list ::= literal_list NK_COMMA literal */ case 30: /* expression_list ::= expression_list NK_COMMA expression */ yytestcase(yyruleno==30); - case 81: /* select_sublist ::= select_sublist NK_COMMA select_item */ yytestcase(yyruleno==81); - case 126: /* sort_specification_list ::= sort_specification_list NK_COMMA sort_specification */ yytestcase(yyruleno==126); -{ PARSER_TRACE; yylhsminor.yy192 = addNodeToList(pCxt, yymsp[-2].minor.yy192, yymsp[0].minor.yy168); } +{ PARSER_TRACE; yylhsminor.yy192 = addNodeToList(pCxt, yymsp[-2].minor.yy192, releaseRawExprNode(pCxt, yymsp[0].minor.yy168)); } yymsp[-2].minor.yy192 = yylhsminor.yy192; break; case 11: /* db_name ::= NK_ID */ @@ -1580,74 +1576,106 @@ static YYACTIONTYPE yy_reduce( yymsp[0].minor.yy241 = yylhsminor.yy241; break; case 19: /* expression ::= function_name NK_LP expression_list NK_RP */ -{ PARSER_TRACE; yylhsminor.yy168 = createFunctionNode(pCxt, &yymsp[-3].minor.yy241, yymsp[-1].minor.yy192); } +{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-3].minor.yy241, &yymsp[0].minor.yy0, createFunctionNode(pCxt, &yymsp[-3].minor.yy241, yymsp[-1].minor.yy192)); } yymsp[-3].minor.yy168 = yylhsminor.yy168; break; case 21: /* expression ::= NK_LP expression NK_RP */ - case 57: /* boolean_primary ::= NK_LP boolean_value_expression NK_RP */ yytestcase(yyruleno==57); - case 70: /* parenthesized_joined_table ::= NK_LP joined_table NK_RP */ yytestcase(yyruleno==70); - case 71: /* parenthesized_joined_table ::= NK_LP parenthesized_joined_table NK_RP */ yytestcase(yyruleno==71); - case 123: /* subquery ::= NK_LP query_expression NK_RP */ yytestcase(yyruleno==123); -{ PARSER_TRACE; yymsp[-2].minor.yy168 = yymsp[-1].minor.yy168; } +{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-2].minor.yy0, &yymsp[0].minor.yy0, releaseRawExprNode(pCxt, yymsp[-1].minor.yy168)); } + yymsp[-2].minor.yy168 = yylhsminor.yy168; break; case 22: /* expression ::= NK_PLUS expression */ - case 58: /* from_clause ::= FROM table_reference_list */ yytestcase(yyruleno==58); - case 87: /* where_clause_opt ::= WHERE search_condition */ yytestcase(yyruleno==87); - case 108: /* having_clause_opt ::= HAVING search_condition */ yytestcase(yyruleno==108); -{ PARSER_TRACE; yymsp[-1].minor.yy168 = yymsp[0].minor.yy168; } +{ + PARSER_TRACE; + SToken t = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168); + yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-1].minor.yy0, &t, releaseRawExprNode(pCxt, yymsp[0].minor.yy168)); + } + yymsp[-1].minor.yy168 = yylhsminor.yy168; break; case 23: /* expression ::= NK_MINUS expression */ -{ PARSER_TRACE; yymsp[-1].minor.yy168 = createOperatorNode(pCxt, OP_TYPE_SUB, yymsp[0].minor.yy168, NULL); } +{ + PARSER_TRACE; + SToken t = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168); + yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-1].minor.yy0, &t, createOperatorNode(pCxt, OP_TYPE_SUB, releaseRawExprNode(pCxt, yymsp[0].minor.yy168), NULL)); + } + yymsp[-1].minor.yy168 = yylhsminor.yy168; break; case 24: /* expression ::= expression NK_PLUS expression */ -{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, OP_TYPE_ADD, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); } +{ + PARSER_TRACE; + SToken s = getTokenFromRawExprNode(pCxt, yymsp[-2].minor.yy168); + SToken e = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168); + yylhsminor.yy168 = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_ADD, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168))); + } yymsp[-2].minor.yy168 = yylhsminor.yy168; break; case 25: /* expression ::= expression NK_MINUS expression */ -{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, OP_TYPE_SUB, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); } +{ + PARSER_TRACE; + SToken s = getTokenFromRawExprNode(pCxt, yymsp[-2].minor.yy168); + SToken e = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168); + yylhsminor.yy168 = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_SUB, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168))); + } yymsp[-2].minor.yy168 = yylhsminor.yy168; break; case 26: /* expression ::= expression NK_STAR expression */ -{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, OP_TYPE_MULTI, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); } +{ + PARSER_TRACE; + SToken s = getTokenFromRawExprNode(pCxt, yymsp[-2].minor.yy168); + SToken e = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168); + yylhsminor.yy168 = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_MULTI, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168))); + } yymsp[-2].minor.yy168 = yylhsminor.yy168; break; case 27: /* expression ::= expression NK_SLASH expression */ -{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, OP_TYPE_DIV, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); } +{ + PARSER_TRACE; + SToken s = getTokenFromRawExprNode(pCxt, yymsp[-2].minor.yy168); + SToken e = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168); + yylhsminor.yy168 = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_DIV, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168))); + } yymsp[-2].minor.yy168 = yylhsminor.yy168; break; case 28: /* expression ::= expression NK_REM expression */ -{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, OP_TYPE_MOD, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); } +{ + PARSER_TRACE; + SToken s = getTokenFromRawExprNode(pCxt, yymsp[-2].minor.yy168); + SToken e = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168); + yylhsminor.yy168 = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_MOD, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168))); + } yymsp[-2].minor.yy168 = yylhsminor.yy168; break; case 31: /* column_reference ::= column_name */ -{ PARSER_TRACE; yylhsminor.yy168 = createColumnNode(pCxt, NULL, &yymsp[0].minor.yy241); } +{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy241, createColumnNode(pCxt, NULL, &yymsp[0].minor.yy241)); } yymsp[0].minor.yy168 = yylhsminor.yy168; break; case 32: /* column_reference ::= table_name NK_DOT column_name */ -{ PARSER_TRACE; yylhsminor.yy168 = createColumnNode(pCxt, &yymsp[-2].minor.yy241, &yymsp[0].minor.yy241); } +{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-2].minor.yy241, &yymsp[0].minor.yy241, createColumnNode(pCxt, &yymsp[-2].minor.yy241, &yymsp[0].minor.yy241)); } yymsp[-2].minor.yy168 = yylhsminor.yy168; break; case 33: /* predicate ::= expression compare_op expression */ - case 38: /* predicate ::= expression in_op in_predicate_value */ yytestcase(yyruleno==38); -{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, yymsp[-1].minor.yy228, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); } +{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, yymsp[-1].minor.yy228, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168)); } yymsp[-2].minor.yy168 = yylhsminor.yy168; break; case 34: /* predicate ::= expression BETWEEN expression AND expression */ -{ PARSER_TRACE; yylhsminor.yy168 = createBetweenAnd(pCxt, yymsp[-4].minor.yy168, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); } +{ PARSER_TRACE; yylhsminor.yy168 = createBetweenAnd(pCxt, releaseRawExprNode(pCxt, yymsp[-4].minor.yy168), releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168)); } yymsp[-4].minor.yy168 = yylhsminor.yy168; break; case 35: /* predicate ::= expression NOT BETWEEN expression AND expression */ -{ PARSER_TRACE; yylhsminor.yy168 = createNotBetweenAnd(pCxt, yymsp[-2].minor.yy168, yymsp[-5].minor.yy168, yymsp[0].minor.yy168); } +{ PARSER_TRACE; yylhsminor.yy168 = createNotBetweenAnd(pCxt, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[-5].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168)); } yymsp[-5].minor.yy168 = yylhsminor.yy168; break; case 36: /* predicate ::= expression IS NULL */ -{ PARSER_TRACE; yylhsminor.yy168 = createIsNullCondNode(pCxt, yymsp[-2].minor.yy168, true); } +{ PARSER_TRACE; yylhsminor.yy168 = createIsNullCondNode(pCxt, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), true); } yymsp[-2].minor.yy168 = yylhsminor.yy168; break; case 37: /* predicate ::= expression IS NOT NULL */ -{ PARSER_TRACE; yylhsminor.yy168 = createIsNullCondNode(pCxt, yymsp[-3].minor.yy168, false); } +{ PARSER_TRACE; yylhsminor.yy168 = createIsNullCondNode(pCxt, releaseRawExprNode(pCxt, yymsp[-3].minor.yy168), false); } yymsp[-3].minor.yy168 = yylhsminor.yy168; break; + case 38: /* predicate ::= expression in_op in_predicate_value */ +{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, yymsp[-1].minor.yy228, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), yymsp[0].minor.yy168); } + yymsp[-2].minor.yy168 = yylhsminor.yy168; + break; case 39: /* compare_op ::= NK_LT */ { PARSER_TRACE; yymsp[0].minor.yy228 = OP_TYPE_LOWER_THAN; } break; @@ -1698,6 +1726,16 @@ static YYACTIONTYPE yy_reduce( { PARSER_TRACE; yylhsminor.yy168 = createLogicConditionNode(pCxt, LOGIC_COND_TYPE_AND, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); } yymsp[-2].minor.yy168 = yylhsminor.yy168; break; + case 57: /* boolean_primary ::= NK_LP boolean_value_expression NK_RP */ + case 70: /* parenthesized_joined_table ::= NK_LP joined_table NK_RP */ yytestcase(yyruleno==70); + case 71: /* parenthesized_joined_table ::= NK_LP parenthesized_joined_table NK_RP */ yytestcase(yyruleno==71); +{ PARSER_TRACE; yymsp[-2].minor.yy168 = yymsp[-1].minor.yy168; } + break; + case 58: /* from_clause ::= FROM table_reference_list */ + case 87: /* where_clause_opt ::= WHERE search_condition */ yytestcase(yyruleno==87); + case 108: /* having_clause_opt ::= HAVING search_condition */ yytestcase(yyruleno==108); +{ PARSER_TRACE; yymsp[-1].minor.yy168 = yymsp[0].minor.yy168; } + break; case 60: /* table_reference_list ::= table_reference_list NK_COMMA table_reference */ { PARSER_TRACE; yylhsminor.yy168 = createJoinTableNode(pCxt, JOIN_TYPE_INNER, yymsp[-2].minor.yy168, yymsp[0].minor.yy168, NULL); } yymsp[-2].minor.yy168 = yylhsminor.yy168; @@ -1711,7 +1749,7 @@ static YYACTIONTYPE yy_reduce( yymsp[-3].minor.yy168 = yylhsminor.yy168; break; case 65: /* table_primary ::= subquery alias_opt */ -{ PARSER_TRACE; yylhsminor.yy168 = createTempTableNode(pCxt, yymsp[-1].minor.yy168, &yymsp[0].minor.yy241); } +{ PARSER_TRACE; yylhsminor.yy168 = createTempTableNode(pCxt, releaseRawExprNode(pCxt, yymsp[-1].minor.yy168), &yymsp[0].minor.yy241); } yymsp[-1].minor.yy168 = yylhsminor.yy168; break; case 67: /* alias_opt ::= */ @@ -1758,12 +1796,30 @@ static YYACTIONTYPE yy_reduce( { PARSER_TRACE; yylhsminor.yy192 = yymsp[0].minor.yy192; } yymsp[0].minor.yy192 = yylhsminor.yy192; break; + case 80: /* select_sublist ::= select_item */ + case 125: /* sort_specification_list ::= sort_specification */ yytestcase(yyruleno==125); +{ PARSER_TRACE; yylhsminor.yy192 = createNodeList(pCxt, yymsp[0].minor.yy168); } + yymsp[0].minor.yy192 = yylhsminor.yy192; + break; + case 81: /* select_sublist ::= select_sublist NK_COMMA select_item */ + case 126: /* sort_specification_list ::= sort_specification_list NK_COMMA sort_specification */ yytestcase(yyruleno==126); +{ PARSER_TRACE; yylhsminor.yy192 = addNodeToList(pCxt, yymsp[-2].minor.yy192, yymsp[0].minor.yy168); } + yymsp[-2].minor.yy192 = yylhsminor.yy192; + break; + case 82: /* select_item ::= expression */ +{ + PARSER_TRACE; + SToken t = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168); + yylhsminor.yy168 = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, yymsp[0].minor.yy168), &t); + } + yymsp[0].minor.yy168 = yylhsminor.yy168; + break; case 83: /* select_item ::= expression column_alias */ -{ PARSER_TRACE; yylhsminor.yy168 = setProjectionAlias(pCxt, yymsp[-1].minor.yy168, &yymsp[0].minor.yy241); } +{ PARSER_TRACE; yylhsminor.yy168 = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, yymsp[-1].minor.yy168), &yymsp[0].minor.yy241); } yymsp[-1].minor.yy168 = yylhsminor.yy168; break; case 84: /* select_item ::= expression AS column_alias */ -{ PARSER_TRACE; yylhsminor.yy168 = setProjectionAlias(pCxt, yymsp[-2].minor.yy168, &yymsp[0].minor.yy241); } +{ PARSER_TRACE; yylhsminor.yy168 = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), &yymsp[0].minor.yy241); } yymsp[-2].minor.yy168 = yylhsminor.yy168; break; case 85: /* select_item ::= table_name NK_DOT NK_STAR */ @@ -1790,10 +1846,10 @@ static YYACTIONTYPE yy_reduce( { PARSER_TRACE; yymsp[-2].minor.yy192 = yymsp[0].minor.yy192; } break; case 91: /* twindow_clause_opt ::= SESSION NK_LP column_reference NK_COMMA NK_INTEGER NK_RP */ -{ PARSER_TRACE; yymsp[-5].minor.yy168 = createSessionWindowNode(pCxt, yymsp[-3].minor.yy168, &yymsp[-1].minor.yy0); } +{ PARSER_TRACE; yymsp[-5].minor.yy168 = createSessionWindowNode(pCxt, releaseRawExprNode(pCxt, yymsp[-3].minor.yy168), &yymsp[-1].minor.yy0); } break; case 92: /* twindow_clause_opt ::= STATE_WINDOW NK_LP column_reference NK_RP */ -{ PARSER_TRACE; yymsp[-3].minor.yy168 = createStateWindowNode(pCxt, yymsp[-1].minor.yy168); } +{ PARSER_TRACE; yymsp[-3].minor.yy168 = createStateWindowNode(pCxt, releaseRawExprNode(pCxt, yymsp[-1].minor.yy168)); } break; case 93: /* twindow_clause_opt ::= INTERVAL NK_LP duration_literal NK_RP sliding_opt fill_opt */ { PARSER_TRACE; yymsp[-5].minor.yy168 = createIntervalWindowNode(pCxt, yymsp[-3].minor.yy168, NULL, yymsp[-1].minor.yy168, yymsp[0].minor.yy168); } @@ -1850,8 +1906,12 @@ static YYACTIONTYPE yy_reduce( case 122: /* limit_clause_opt ::= LIMIT NK_INTEGER NK_COMMA NK_INTEGER */ yytestcase(yyruleno==122); { PARSER_TRACE; yymsp[-3].minor.yy168 = createLimitNode(pCxt, &yymsp[0].minor.yy0, &yymsp[-2].minor.yy0); } break; + case 123: /* subquery ::= NK_LP query_expression NK_RP */ +{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-2].minor.yy0, &yymsp[0].minor.yy0, yymsp[-1].minor.yy168); } + yymsp[-2].minor.yy168 = yylhsminor.yy168; + break; case 127: /* sort_specification ::= expression ordering_specification_opt null_ordering_opt */ -{ PARSER_TRACE; yylhsminor.yy168 = createOrderByExprNode(pCxt, yymsp[-2].minor.yy168, yymsp[-1].minor.yy10, yymsp[0].minor.yy177); } +{ PARSER_TRACE; yylhsminor.yy168 = createOrderByExprNode(pCxt, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), yymsp[-1].minor.yy10, yymsp[0].minor.yy177); } yymsp[-2].minor.yy168 = yylhsminor.yy168; break; case 128: /* ordering_specification_opt ::= */ diff --git a/source/libs/parser/src/sql.c b/source/libs/parser/src/sql.c index 2fae10d17ee62310caa32d424bae46a248169dc3..664f2a3ff2c124afea9ecec79ecdae9d6339e415 100644 --- a/source/libs/parser/src/sql.c +++ b/source/libs/parser/src/sql.c @@ -3206,7 +3206,7 @@ static void yy_reduce( case 286: /* cmd ::= ALTER TABLE ids cpxName ADD TAG columnlist */ { yymsp[-4].minor.yy0.n += yymsp[-3].minor.yy0.n; - SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, yymsp[0].minor.yy165, NULL, TSDB_ALTER_TABLE_ADD_TAG_COLUMN, -1); + SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, yymsp[0].minor.yy165, NULL, TSDB_ALTER_TABLE_ADD_TAG, -1); setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE); } break; @@ -3217,7 +3217,7 @@ static void yy_reduce( toTSDBType(yymsp[0].minor.yy0.type); SArray* A = tListItemAppendToken(NULL, &yymsp[0].minor.yy0, -1); - SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, NULL, A, TSDB_ALTER_TABLE_DROP_TAG_COLUMN, -1); + SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, NULL, A, TSDB_ALTER_TABLE_DROP_TAG, -1); setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE); } break; @@ -3282,7 +3282,7 @@ static void yy_reduce( case 294: /* cmd ::= ALTER STABLE ids cpxName ADD TAG columnlist */ { yymsp[-4].minor.yy0.n += yymsp[-3].minor.yy0.n; - SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, yymsp[0].minor.yy165, NULL, TSDB_ALTER_TABLE_ADD_TAG_COLUMN, TSDB_SUPER_TABLE); + SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, yymsp[0].minor.yy165, NULL, TSDB_ALTER_TABLE_ADD_TAG, TSDB_SUPER_TABLE); setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE); } break; @@ -3293,7 +3293,7 @@ static void yy_reduce( toTSDBType(yymsp[0].minor.yy0.type); SArray* A = tListItemAppendToken(NULL, &yymsp[0].minor.yy0, -1); - SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, NULL, A, TSDB_ALTER_TABLE_DROP_TAG_COLUMN, TSDB_SUPER_TABLE); + SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, NULL, A, TSDB_ALTER_TABLE_DROP_TAG, TSDB_SUPER_TABLE); setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE); } break; diff --git a/source/libs/qcom/src/querymsg.c b/source/libs/qcom/src/querymsg.c index e7b3d08bc5885f81bb1c62d7c157e85cf81489c6..3e14bfca094bd548fe8918cc0be9f6bab4d932ae 100644 --- a/source/libs/qcom/src/querymsg.c +++ b/source/libs/qcom/src/querymsg.c @@ -119,9 +119,9 @@ int32_t queryProcessUseDBRsp(void* output, char *msg, int32_t msgSize) { return TSDB_CODE_TSC_OUT_OF_MEMORY; } + pOut->dbId = pRsp->uid; pOut->dbVgroup->vgVersion = pRsp->vgVersion; pOut->dbVgroup->hashMethod = pRsp->hashMethod; - pOut->dbVgroup->dbId = pRsp->uid; pOut->dbVgroup->vgHash = taosHashInit(pRsp->vgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); if (NULL == pOut->dbVgroup->vgHash) { qError("taosHashInit %d failed", pRsp->vgNum); @@ -159,6 +159,7 @@ _return: } static int32_t queryConvertTableMetaMsg(STableMetaRsp* pMetaMsg) { + pMetaMsg->dbId = be64toh(pMetaMsg->dbId); pMetaMsg->numOfTags = ntohl(pMetaMsg->numOfTags); pMetaMsg->numOfColumns = ntohl(pMetaMsg->numOfColumns); pMetaMsg->sversion = ntohl(pMetaMsg->sversion); @@ -258,6 +259,8 @@ int32_t queryProcessTableMetaRsp(void* output, char *msg, int32_t msgSize) { } strcpy(pOut->dbFName, pMetaMsg->dbFName); + + pOut->dbId = pMetaMsg->dbId; if (pMetaMsg->tableType == TSDB_CHILD_TABLE) { SET_META_TYPE_BOTH_TABLE(pOut->metaType); diff --git a/source/libs/sync/CMakeLists.txt b/source/libs/sync/CMakeLists.txt index 37ee5194c81d337b68e92df096c42a4721ec93eb..784a864451e16e19d325746add3173253998b545 100644 --- a/source/libs/sync/CMakeLists.txt +++ b/source/libs/sync/CMakeLists.txt @@ -1,3 +1,4 @@ + aux_source_directory(src SYNC_SRC) add_library(sync ${SYNC_SRC}) diff --git a/source/libs/sync/inc/raft.h b/source/libs/sync/inc/raft.h deleted file mode 100644 index 129f0f4dbccaeeb52458b7bcd5560168edb14c12..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/raft.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_LIBS_SYNC_RAFT_H -#define _TD_LIBS_SYNC_RAFT_H - -#include "sync.h" -#include "sync_type.h" -#include "thash.h" -#include "raft_message.h" -#include "sync_raft_impl.h" -#include "sync_raft_quorum.h" - -typedef struct RaftLeaderState { - -} RaftLeaderState; - -typedef struct RaftCandidateState { - /* true if in pre-vote phase */ - bool inPreVote; -} RaftCandidateState; - -typedef struct SSyncRaftIOMethods { - // send SSyncMessage to node - int (*send)(const SSyncMessage* pMsg, const SNodeInfo* pNode); -} SSyncRaftIOMethods; - -typedef int (*SyncRaftStepFp)(SSyncRaft* pRaft, const SSyncMessage* pMsg); -typedef void (*SyncRaftTickFp)(SSyncRaft* pRaft); - -struct SSyncRaft { - // owner sync node - SSyncNode* pNode; - - // hash map nodeId -> SNodeInfo* - SHashObj* nodeInfoMap; - - SyncNodeId selfId; - SyncGroupId selfGroupId; - - SSyncRaftIOMethods io; - - SSyncFSM fsm; - SSyncLogStore logStore; - SStateManager stateManager; - - union { - RaftLeaderState leaderState; - RaftCandidateState candidateState; - }; - - SyncTerm term; - SyncNodeId voteFor; - - SSyncRaftLog *log; - - uint64_t maxMsgSize; - uint64_t maxUncommittedSize; - SSyncRaftProgressTracker *tracker; - - ESyncState state; - - // isLearner is true if the local raft node is a learner. - bool isLearner; - - /** - * the leader id - **/ - SyncNodeId leaderId; - - /** - * leadTransferee is id of the leader transfer target when its value is not zero. - * Follow the procedure defined in raft thesis 3.10. - **/ - SyncNodeId leadTransferee; - - /** - * Only one conf change may be pending (in the log, but not yet - * applied) at a time. This is enforced via pendingConfIndex, which - * is set to a value >= the log index of the latest pending - * configuration change (if any). Config changes are only allowed to - * be proposed if the leader's applied index is greater than this - * value. - **/ - SyncIndex pendingConfigIndex; - - /** - * an estimate of the size of the uncommitted tail of the Raft log. Used to - * prevent unbounded log growth. Only maintained by the leader. Reset on - * term changes. - **/ - uint32_t uncommittedSize; - - /** - * number of ticks since it reached last electionTimeout when it is leader - * or candidate. - * number of ticks since it reached last electionTimeout or received a - * valid message from current leader when it is a follower. - **/ - uint16_t electionElapsed; - - /** - * number of ticks since it reached last heartbeatTimeout. - * only leader keeps heartbeatElapsed. - **/ - uint16_t heartbeatElapsed; - - bool preVote; - bool checkQuorum; - - int heartbeatTimeout; - int electionTimeout; - - /** - * randomizedElectionTimeout is a random number between - * [electiontimeout, 2 * electiontimeout - 1]. It gets reset - * when raft changes its state to follower or candidate. - **/ - int randomizedElectionTimeout; - bool disableProposalForwarding; - - // current tick count since start up - uint32_t currentTick; - - SyncRaftStepFp stepFp; - - SyncRaftTickFp tickFp; -}; - -int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo); -int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg); -int32_t syncRaftTick(SSyncRaft* pRaft); - -#endif /* _TD_LIBS_SYNC_RAFT_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_log.h b/source/libs/sync/inc/raft_log.h deleted file mode 100644 index 117ed42c2cd4cd6655b7dedd64128763c9f05025..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/raft_log.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_LIBS_SYNC_RAFT_LOG_H -#define _TD_LIBS_SYNC_RAFT_LOG_H - -#include "sync.h" -#include "sync_type.h" - -typedef enum ESyncRaftEntryType { - SYNC_ENTRY_TYPE_LOG = 1, -} ESyncRaftEntryType; - -struct SSyncRaftEntry { - SyncTerm term; - - SyncIndex index; - - ESyncRaftEntryType type; - - SSyncBuffer buffer; -}; - -struct SSyncRaftLog { - SyncIndex uncommittedConfigIndex; - - SyncIndex commitIndex; - - SyncIndex appliedIndex; -}; - -SSyncRaftLog* syncRaftLogOpen(); - -SyncIndex syncRaftLogLastIndex(SSyncRaftLog* pLog); - -SyncIndex syncRaftLogSnapshotIndex(SSyncRaftLog* pLog); - -SyncTerm syncRaftLogLastTerm(SSyncRaftLog* pLog); - -void syncRaftLogAppliedTo(SSyncRaftLog* pLog, SyncIndex appliedIndex); - -bool syncRaftLogIsUptodate(SSyncRaftLog* pLog, SyncIndex index, SyncTerm term); - -int syncRaftLogNumOfPendingConf(SSyncRaftLog* pLog); - -bool syncRaftHasUnappliedLog(SSyncRaftLog* pLog); - -SyncTerm syncRaftLogTermOf(SSyncRaftLog* pLog, SyncIndex index); - -int syncRaftLogAppend(SSyncRaftLog* pLog, SSyncRaftEntry *pEntries, int n); - -int syncRaftLogAcquire(SSyncRaftLog* pLog, SyncIndex index, int maxMsgSize, - SSyncRaftEntry **ppEntries, int *n); - -void syncRaftLogRelease(SSyncRaftLog* pLog, SyncIndex index, - SSyncRaftEntry *pEntries, int n); - -bool syncRaftLogMatchTerm(); - -static FORCE_INLINE bool syncRaftLogIsCommitted(SSyncRaftLog* pLog, SyncIndex index) { - return pLog->commitIndex > index; -} - -#endif /* _TD_LIBS_SYNC_RAFT_LOG_H */ diff --git a/source/libs/sync/inc/raft_message.h b/source/libs/sync/inc/raft_message.h deleted file mode 100644 index 0d81511756d33d594fdd9e2cd7b37affc81afb10..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/raft_message.h +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_LIBS_SYNC_RAFT_MESSAGE_H -#define _TD_LIBS_SYNC_RAFT_MESSAGE_H - -#include "sync.h" -#include "sync_type.h" - -/** - * below define message type which handled by Raft. - * - * internal message, which communicate between threads, start with RAFT_MSG_INTERNAL_*. - * internal message use pointer only and stack memory, need not to be decode/encode and free. - * - * outter message start with RAFT_MSG_*, which communicate between cluster peers, - * need to implement its decode/encode functions. - **/ -typedef enum ESyncRaftMessageType { - // client propose a cmd - RAFT_MSG_INTERNAL_PROP = 1, - - // node election timeout - RAFT_MSG_INTERNAL_ELECTION = 2, - - RAFT_MSG_VOTE = 3, - RAFT_MSG_VOTE_RESP = 4, - - RAFT_MSG_APPEND = 5, - RAFT_MSG_APPEND_RESP = 6, -} ESyncRaftMessageType; - -typedef struct RaftMsgInternal_Prop { - const SSyncBuffer *pBuf; - bool isWeak; - void* pData; -} RaftMsgInternal_Prop; - -typedef struct RaftMsgInternal_Election { - -} RaftMsgInternal_Election; - -typedef struct RaftMsg_Vote { - ESyncRaftElectionType cType; - SyncIndex lastIndex; - SyncTerm lastTerm; -} RaftMsg_Vote; - -typedef struct RaftMsg_VoteResp { - bool rejected; - ESyncRaftElectionType cType; -} RaftMsg_VoteResp; - -typedef struct RaftMsg_Append_Entries { - // index of log entry preceeding new ones - SyncIndex index; - - // term of entry at prevIndex - SyncTerm term; - - // leader's commit index. - SyncIndex commitIndex; - - // size of the log entries array - int nEntries; - - // log entries array - SSyncRaftEntry* entries; -} RaftMsg_Append_Entries; - -typedef struct RaftMsg_Append_Resp { - SyncIndex index; -} RaftMsg_Append_Resp; - -typedef struct SSyncMessage { - ESyncRaftMessageType msgType; - SyncTerm term; - SyncGroupId groupId; - SyncNodeId from; - - union { - RaftMsgInternal_Prop propose; - - RaftMsgInternal_Election election; - - RaftMsg_Vote vote; - RaftMsg_VoteResp voteResp; - - RaftMsg_Append_Entries appendEntries; - RaftMsg_Append_Resp appendResp; - }; -} SSyncMessage; - -static FORCE_INLINE SSyncMessage* syncInitPropMsg(SSyncMessage* pMsg, const SSyncBuffer* pBuf, void* pData, bool isWeak) { - *pMsg = (SSyncMessage) { - .msgType = RAFT_MSG_INTERNAL_PROP, - .term = 0, - .propose = (RaftMsgInternal_Prop) { - .isWeak = isWeak, - .pBuf = pBuf, - .pData = pData, - }, - }; - - return pMsg; -} - -static FORCE_INLINE SSyncMessage* syncInitElectionMsg(SSyncMessage* pMsg, SyncNodeId from) { - *pMsg = (SSyncMessage) { - .msgType = RAFT_MSG_INTERNAL_ELECTION, - .term = 0, - .from = from, - .election = (RaftMsgInternal_Election) { - - }, - }; - - return pMsg; -} - -static FORCE_INLINE SSyncMessage* syncNewVoteMsg(SyncGroupId groupId, SyncNodeId from, - SyncTerm term, ESyncRaftElectionType cType, - SyncIndex lastIndex, SyncTerm lastTerm) { - SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); - if (pMsg == NULL) { - return NULL; - } - *pMsg = (SSyncMessage) { - .groupId = groupId, - .from = from, - .term = term, - .msgType = RAFT_MSG_VOTE, - .vote = (RaftMsg_Vote) { - .cType = cType, - .lastIndex = lastIndex, - .lastTerm = lastTerm, - }, - }; - - return pMsg; -} - -static FORCE_INLINE SSyncMessage* syncNewVoteRespMsg(SyncGroupId groupId, SyncNodeId from, - ESyncRaftElectionType cType, bool rejected) { - SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); - if (pMsg == NULL) { - return NULL; - } - *pMsg = (SSyncMessage) { - .groupId = groupId, - .from = from, - .msgType = RAFT_MSG_VOTE_RESP, - .voteResp = (RaftMsg_VoteResp) { - .cType = cType, - .rejected = rejected, - }, - }; - - return pMsg; -} - -static FORCE_INLINE SSyncMessage* syncNewAppendMsg(SyncGroupId groupId, SyncNodeId from, - SyncTerm term, SyncIndex logIndex, SyncTerm logTerm, - SyncIndex commitIndex, int nEntries, SSyncRaftEntry* entries) { - SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); - if (pMsg == NULL) { - return NULL; - } - *pMsg = (SSyncMessage) { - .groupId = groupId, - .from = from, - .term = term, - .msgType = RAFT_MSG_APPEND, - .appendEntries = (RaftMsg_Append_Entries) { - .index = logIndex, - .term = logTerm, - .commitIndex = commitIndex, - .nEntries = nEntries, - .entries = entries, - }, - }; - - return pMsg; -} - -static FORCE_INLINE SSyncMessage* syncNewEmptyAppendRespMsg(SyncGroupId groupId, SyncNodeId from, SyncTerm term) { - SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage)); - if (pMsg == NULL) { - return NULL; - } - *pMsg = (SSyncMessage) { - .groupId = groupId, - .from = from, - .term = term, - .msgType = RAFT_MSG_APPEND_RESP, - .appendResp = (RaftMsg_Append_Resp) { - - }, - }; - - return pMsg; -} - -static FORCE_INLINE bool syncIsInternalMsg(ESyncRaftMessageType msgType) { - return msgType == RAFT_MSG_INTERNAL_PROP || - msgType == RAFT_MSG_INTERNAL_ELECTION; -} - -static FORCE_INLINE bool syncIsPreVoteRespMsg(const SSyncMessage* pMsg) { - return pMsg->msgType == RAFT_MSG_VOTE_RESP && pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION; -} - -static FORCE_INLINE bool syncIsPreVoteMsg(const SSyncMessage* pMsg) { - return pMsg->msgType == RAFT_MSG_VOTE && pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION; -} - -void syncFreeMessage(const SSyncMessage* pMsg); - -// message handlers -int syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); -int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); -int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); -int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); - -#endif /* _TD_LIBS_SYNC_RAFT_MESSAGE_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/raft_replication.h b/source/libs/sync/inc/raft_replication.h deleted file mode 100644 index 180a2db61f8a553d6807dc4a52cd3b397fbcb067..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/raft_replication.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TD_SYNC_RAFT_REPLICATION_H -#define TD_SYNC_RAFT_REPLICATION_H - -#include "sync.h" -#include "syncInt.h" -#include "sync_type.h" - -// syncRaftMaybeSendAppend sends an append RPC with new entries to the given peer, -// if necessary. Returns true if a message was sent. The sendIfEmpty -// argument controls whether messages with no entries will be sent -// ("empty" messages are useful to convey updated Commit indexes, but -// are undesirable when we're sending multiple messages in a batch). -bool syncRaftMaybeSendAppend(SSyncRaft* pRaft, SSyncRaftProgress* progress, bool sendIfEmpty); - -#endif /* TD_SYNC_RAFT_REPLICATION_H */ diff --git a/source/libs/sync/inc/raft_unstable_log.h b/source/libs/sync/inc/raft_unstable_log.h deleted file mode 100644 index 0748a425a1701bc9686a5c7a0d9fa1956cfeefd5..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/raft_unstable_log.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TD_SYNC_RAFT_UNSTABLE_LOG_H -#define TD_SYNC_RAFT_UNSTABLE_LOG_H - -#include "sync_type.h" - -/* in-memory unstable raft log storage */ -struct SSyncRaftUnstableLog { -#if 0 - /* Circular buffer of log entries */ - RaftEntry *entries; - - /* size of Circular buffer */ - int size; - - /* Indexes of used slots [front, back) */ - int front, back; - - /* Index of first entry is offset + 1 */ - SyncIndex offset; - - /* meta data of snapshot */ - SSyncRaftUnstableLog snapshot; -#endif -}; - -/** - * return index of last in memory log, return 0 if log is empty - **/ -//SyncIndex syncRaftLogLastIndex(SSyncRaftUnstableLog* pLog); - -#if 0 -void raftLogInit(RaftLog* pLog); - -void raftLogClose(RaftLog* pLog); - -/** - * When startup populating log entrues loaded from disk, - * init raft memory log with snapshot index,term and log start idnex. - **/ -/* -void raftLogStart(RaftLog* pLog, - RaftSnapshotMeta snapshot, - SyncIndex startIndex); -*/ -/** - * Get the number of entries the log. - **/ -int raftLogNumEntries(const RaftLog* pLog); - - - -/** - * return last term of in memory log, return 0 if log is empty - **/ -SyncTerm raftLogLastTerm(RaftLog* pLog); - -/** - * return term of log with the given index, return 0 if the term of index cannot be found - * , errCode will save the error code. - **/ -SyncTerm raftLogTermOf(RaftLog* pLog, SyncIndex index, RaftCode* errCode); - -/** - * Get the last index of the most recent snapshot. Return 0 if there are no * - * snapshots. - **/ -SyncIndex raftLogSnapshotIndex(RaftLog* pLog); - -/* Append a new entry to the log. */ -int raftLogAppend(RaftLog* pLog, - SyncTerm term, - const SSyncBuffer *buf); - -/** - * acquire log from given index onwards. - **/ -/* -int raftLogAcquire(RaftLog* pLog, - SyncIndex index, - RaftEntry **ppEntries, - int *n); - -void raftLogRelease(RaftLog* pLog, - SyncIndex index, - RaftEntry *pEntries, - int n); -*/ -/* Delete all entries from the given index (included) onwards. */ -void raftLogTruncate(RaftLog* pLog, SyncIndex index); - -/** - * when taking a new snapshot, the function will update the last snapshot information and delete - * all entries up last_index - trailing (included). If the log contains no entry - * a last_index - trailing, then no entry will be deleted. - **/ -void raftLogSnapshot(RaftLog* pLog, SyncIndex index, SyncIndex trailing); - -#endif - -#endif /* TD_SYNC_RAFT_UNSTABLE_LOG_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h deleted file mode 100644 index f99fb066aeb7e1acdf70c3a3284868cfe2dacb12..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/syncInt.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_LIBS_SYNC_INT_H -#define _TD_LIBS_SYNC_INT_H - -#include "thash.h" -#include "os.h" -#include "sync.h" -#include "sync_type.h" -#include "raft.h" -#include "tlog.h" - -#define TAOS_SYNC_MAX_WORKER 3 - -typedef struct SSyncWorker { - pthread_t thread; -} SSyncWorker; - -struct SSyncNode { - pthread_mutex_t mutex; - int32_t refCount; - SyncGroupId vgId; - SSyncRaft raft; - void* syncTimer; -}; - -typedef struct SSyncManager { - pthread_mutex_t mutex; - - // sync server rpc - void* serverRpc; - // rpc server hash table base on FQDN:port key - SHashObj* rpcServerTable; - - // sync client rpc - void* clientRpc; - - // worker threads - SSyncWorker worker[TAOS_SYNC_MAX_WORKER]; - - // vgroup hash table - SHashObj* vgroupTable; - - // timer manager - void* syncTimerManager; - -} SSyncManager; - -extern SSyncManager* gSyncManager; - -#define syncFatal(...) do { if (sDebugFlag & DEBUG_FATAL) { taosPrintLog("SYNC FATAL ", 255, __VA_ARGS__); }} while(0) -#define syncError(...) do { if (sDebugFlag & DEBUG_ERROR) { taosPrintLog("SYNC ERROR ", 255, __VA_ARGS__); }} while(0) -#define syncWarn(...) do { if (sDebugFlag & DEBUG_WARN) { taosPrintLog("SYNC WARN ", 255, __VA_ARGS__); }} while(0) -#define syncInfo(...) do { if (sDebugFlag & DEBUG_INFO) { taosPrintLog("SYNC ", 255, __VA_ARGS__); }} while(0) -#define syncDebug(...) do { if (sDebugFlag & DEBUG_DEBUG) { taosPrintLog("SYNC ", sDebugFlag, __VA_ARGS__); }} while(0) -#define syncTrace(...) do { if (sDebugFlag & DEBUG_TRACE) { taosPrintLog("SYNC ", sDebugFlag, __VA_ARGS__); }} while(0) - -#endif /* _TD_LIBS_SYNC_INT_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/sync_const.h b/source/libs/sync/inc/sync_const.h deleted file mode 100644 index b49c17f82e33afe50e5d4175492d94c1628c8eee..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_const.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_LIBS_SYNC_CONST_H -#define _TD_LIBS_SYNC_CONST_H - -#include "sync.h" - -static int kSyncRaftMaxInflghtMsgs = 20; - -static SyncIndex kMaxCommitIndex = UINT64_MAX; - -#endif /* _TD_LIBS_SYNC_CONST_H */ diff --git a/source/libs/sync/inc/sync_raft_config_change.h b/source/libs/sync/inc/sync_raft_config_change.h deleted file mode 100644 index 75a29f35e8052858423e6bf3755f6a2ba0065ec6..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_raft_config_change.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TD_SYNC_RAFT_CONFIG_CHANGE_H -#define TD_SYNC_RAFT_CONFIG_CHANGE_H - -#include "sync_type.h" -#include "sync_raft_proto.h" - -/** - * Changer facilitates configuration changes. It exposes methods to handle - * simple and joint consensus while performing the proper validation that allows - * refusing invalid configuration changes before they affect the active - * configuration. - **/ -struct SSyncRaftChanger { - SSyncRaftProgressTracker* tracker; - SyncIndex lastIndex; -}; - -typedef int (*configChangeFp)(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css, - SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); - -// Simple carries out a series of configuration changes that (in aggregate) -// mutates the incoming majority config Voters[0] by at most one. This method -// will return an error if that is not the case, if the resulting quorum is -// zero, or if the configuration is in a joint state (i.e. if there is an -// outgoing configuration). -int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css, - SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); - -int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const SSyncConfChangeSingleArray* css, - SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); - -#endif /* TD_SYNC_RAFT_CONFIG_CHANGE_H */ diff --git a/source/libs/sync/inc/sync_raft_impl.h b/source/libs/sync/inc/sync_raft_impl.h deleted file mode 100644 index 1a6c13f65f477b0eff38c31e454217c4c4e3321f..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_raft_impl.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_LIBS_SYNC_RAFT_IMPL_H -#define _TD_LIBS_SYNC_RAFT_IMPL_H - -#include "sync.h" -#include "sync_type.h" -#include "raft_message.h" -#include "sync_raft_quorum.h" - -void syncRaftBecomeFollower(SSyncRaft* pRaft, SyncTerm term, SyncNodeId leaderId); -void syncRaftBecomePreCandidate(SSyncRaft* pRaft); -void syncRaftBecomeCandidate(SSyncRaft* pRaft); -void syncRaftBecomeLeader(SSyncRaft* pRaft); - -void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType); - -void syncRaftCampaign(SSyncRaft* pRaft, ESyncRaftElectionType cType); - -void syncRaftTriggerHeartbeat(SSyncRaft* pRaft); - -void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft); -bool syncRaftIsPromotable(SSyncRaft* pRaft); -bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft); -int syncRaftQuorum(SSyncRaft* pRaft); - -bool syncRaftMaybeCommit(SSyncRaft* pRaft); - -ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id, - bool preVote, bool accept, - int* rejectNum, int *granted); - -static FORCE_INLINE bool syncRaftIsEmptyServerState(const SSyncServerState* serverState) { - return serverState->commitIndex == 0 && - serverState->term == SYNC_NON_TERM && - serverState->voteFor == SYNC_NON_NODE_ID; -} - -void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState); - -void syncRaftBroadcastAppend(SSyncRaft* pRaft); - -SNodeInfo* syncRaftGetNodeById(SSyncRaft *pRaft, SyncNodeId id); - -#endif /* _TD_LIBS_SYNC_RAFT_IMPL_H */ diff --git a/source/libs/sync/inc/sync_raft_inflights.h b/source/libs/sync/inc/sync_raft_inflights.h deleted file mode 100644 index 627bf9a26f28eaecd752b7afbc7e4c3f7508bdb2..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_raft_inflights.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TD_SYNC_RAFT_INFLIGHTS_H -#define TD_SYNC_RAFT_INFLIGHTS_H - -#include "sync.h" - -// Inflights limits the number of MsgApp (represented by the largest index -// contained within) sent to followers but not yet acknowledged by them. Callers -// use Full() to check whether more messages can be sent, call Add() whenever -// they are sending a new append, and release "quota" via FreeLE() whenever an -// ack is received. -typedef struct SSyncRaftInflights { - // the starting index in the buffer - int start; - - // number of inflights in the buffer - int count; - - // the size of the buffer - int size; - - // buffer contains the index of the last entry - // inside one message. - SyncIndex* buffer; -} SSyncRaftInflights; - -SSyncRaftInflights* syncRaftOpenInflights(int size); -void syncRaftCloseInflights(SSyncRaftInflights*); - -// reset frees all inflights. -static FORCE_INLINE void syncRaftInflightReset(SSyncRaftInflights* inflights) { - inflights->count = 0; - inflights->start = 0; -} - -// Full returns true if no more messages can be sent at the moment. -static FORCE_INLINE bool syncRaftInflightFull(SSyncRaftInflights* inflights) { - return inflights->count == inflights->size; -} - -// Add notifies the Inflights that a new message with the given index is being -// dispatched. Full() must be called prior to Add() to verify that there is room -// for one more message, and consecutive calls to add Add() must provide a -// monotonic sequence of indexes. -void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex); - -// FreeLE frees the inflights smaller or equal to the given `to` flight. -void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex); - -/** - * syncRaftInflightFreeFirstOne releases the first inflight. - * This is a no-op if nothing is inflight. - **/ -void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights); - -#endif /* TD_SYNC_RAFT_INFLIGHTS_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/sync_raft_node_map.h b/source/libs/sync/inc/sync_raft_node_map.h deleted file mode 100644 index b4cf04056d2aaa76cc99b54ba903f8b0b86192ed..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_raft_node_map.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_LIBS_SYNC_RAFT_NODE_MAP_H -#define _TD_LIBS_SYNC_RAFT_NODE_MAP_H - -#include "thash.h" -#include "sync.h" -#include "sync_type.h" - -struct SSyncRaftNodeMap { - SHashObj* nodeIdMap; -}; - -void syncRaftInitNodeMap(SSyncRaftNodeMap* nodeMap); -void syncRaftFreeNodeMap(SSyncRaftNodeMap* nodeMap); - -void syncRaftClearNodeMap(SSyncRaftNodeMap* nodeMap); - -bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId); - -void syncRaftCopyNodeMap(SSyncRaftNodeMap* from, SSyncRaftNodeMap* to); - -void syncRaftUnionNodeMap(SSyncRaftNodeMap* nodeMap, SSyncRaftNodeMap* to); - -void syncRaftAddToNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId); - -void syncRaftRemoveFromNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId); - -int32_t syncRaftNodeMapSize(const SSyncRaftNodeMap* nodeMap); - -// return true if reach the end -bool syncRaftIterateNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId *pId); - -bool syncRaftIsAllNodeInProgressMap(SSyncRaftNodeMap* nodeMap, SSyncRaftProgressMap* progressMap); - -#endif /* _TD_LIBS_SYNC_RAFT_NODE_MAP_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/sync_raft_progress.h b/source/libs/sync/inc/sync_raft_progress.h deleted file mode 100644 index 32c21281cde4e1479c91b8ea5034aa3a81327970..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_raft_progress.h +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TD_SYNC_RAFT_PROGRESS_H -#define TD_SYNC_RAFT_PROGRESS_H - -#include "sync_type.h" -#include "sync_raft_inflights.h" -#include "thash.h" - -/** - * State defines how the leader should interact with the follower. - * - * When in PROGRESS_STATE_PROBE, leader sends at most one replication message - * per heartbeat interval. It also probes actual progress of the follower. - * - * When in PROGRESS_STATE_REPLICATE, leader optimistically increases next - * to the latest entry sent after sending replication message. This is - * an optimized state for fast replicating log entries to the follower. - * - * When in PROGRESS_STATE_SNAPSHOT, leader should have sent out snapshot - * before and stops sending any replication message. - * - * PROGRESS_STATE_PROBE is the initial state. - **/ -typedef enum ESyncRaftProgressState { - /** - * StateProbe indicates a follower whose last index isn't known. Such a - * follower is "probed" (i.e. an append sent periodically) to narrow down - * its last index. In the ideal (and common) case, only one round of probing - * is necessary as the follower will react with a hint. Followers that are - * probed over extended periods of time are often offline. - **/ - PROGRESS_STATE_PROBE = 0, - - /** - * StateReplicate is the state steady in which a follower eagerly receives - * log entries to append to its log. - **/ - PROGRESS_STATE_REPLICATE, - - /** - * StateSnapshot indicates a follower that needs log entries not available - * from the leader's Raft log. Such a follower needs a full snapshot to - * return to StateReplicate. - **/ - PROGRESS_STATE_SNAPSHOT, -} ESyncRaftProgressState; - -static const char* kProgressStateString[] = { - "Probe", - "Replicate", - "Snapshot", -}; - -// Progress represents a follower’s progress in the view of the leader. Leader -// maintains progresses of all followers, and sends entries to the follower -// based on its progress. -// -// NB(tbg): Progress is basically a state machine whose transitions are mostly -// strewn around `*raft.raft`. Additionally, some fields are only used when in a -// certain State. All of this isn't ideal. -struct SSyncRaftProgress { - SyncGroupId groupId; - - SyncNodeId id; - - int16_t refCount; - - SyncIndex nextIndex; - - SyncIndex matchIndex; - - // State defines how the leader should interact with the follower. - // - // When in StateProbe, leader sends at most one replication message - // per heartbeat interval. It also probes actual progress of the follower. - // - // When in StateReplicate, leader optimistically increases next - // to the latest entry sent after sending replication message. This is - // an optimized state for fast replicating log entries to the follower. - // - // When in StateSnapshot, leader should have sent out snapshot - // before and stops sending any replication message. - ESyncRaftProgressState state; - - // PendingSnapshot is used in StateSnapshot. - // If there is a pending snapshot, the pendingSnapshot will be set to the - // index of the snapshot. If pendingSnapshot is set, the replication process of - // this Progress will be paused. raft will not resend snapshot until the pending one - // is reported to be failed. - SyncIndex pendingSnapshotIndex; - - // RecentActive is true if the progress is recently active. Receiving any messages - // from the corresponding follower indicates the progress is active. - // RecentActive can be reset to false after an election timeout. - // - // TODO(tbg): the leader should always have this set to true. - bool recentActive; - - // ProbeSent is used while this follower is in StateProbe. When ProbeSent is - // true, raft should pause sending replication message to this peer until - // ProbeSent is reset. See ProbeAcked() and IsPaused(). - bool probeSent; - - // Inflights is a sliding window for the inflight messages. - // Each inflight message contains one or more log entries. - // The max number of entries per message is defined in raft config as MaxSizePerMsg. - // Thus inflight effectively limits both the number of inflight messages - // and the bandwidth each Progress can use. - // When inflights is Full, no more message should be sent. - // When a leader sends out a message, the index of the last - // entry should be added to inflights. The index MUST be added - // into inflights in order. - // When a leader receives a reply, the previous inflights should - // be freed by calling inflights.FreeLE with the index of the last - // received entry. - SSyncRaftInflights* inflights; - - // IsLearner is true if this progress is tracked for a learner. - bool isLearner; -}; - -struct SSyncRaftProgressMap { - // map nodeId -> SSyncRaftProgress* - SHashObj* progressMap; -}; - -static FORCE_INLINE const char* syncRaftProgressStateString(const SSyncRaftProgress* progress) { - return kProgressStateString[progress->state]; -} - -void syncRaftResetProgress(SSyncRaft* pRaft, SSyncRaftProgress* progress); - -// BecomeProbe transitions into StateProbe. Next is reset to Match+1 or, -// optionally and if larger, the index of the pending snapshot. -void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress); - -// BecomeReplicate transitions into StateReplicate, resetting Next to Match+1. -void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress); - -// MaybeUpdate is called when an MsgAppResp arrives from the follower, with the -// index acked by it. The method returns false if the given n index comes from -// an outdated message. Otherwise it updates the progress and returns true. -bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex); - -// OptimisticUpdate signals that appends all the way up to and including index n -// are in-flight. As a result, Next is increased to n+1. -static FORCE_INLINE void syncRaftProgressOptimisticNextIndex(SSyncRaftProgress* progress, SyncIndex nextIndex) { - progress->nextIndex = nextIndex + 1; -} - -// MaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The -// arguments are the index of the append message rejected by the follower, and -// the hint that we want to decrease to. -// -// Rejections can happen spuriously as messages are sent out of order or -// duplicated. In such cases, the rejection pertains to an index that the -// Progress already knows were previously acknowledged, and false is returned -// without changing the Progress. -// -// If the rejection is genuine, Next is lowered sensibly, and the Progress is -// cleared for sending log entries. -bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress, - SyncIndex rejected, SyncIndex matchHint); - -// IsPaused returns whether sending log entries to this node has been throttled. -// This is done when a node has rejected recent MsgApps, is currently waiting -// for a snapshot, or has reached the MaxInflightMsgs limit. In normal -// operation, this is false. A throttled node will be contacted less frequently -// until it has reached a state in which it's able to accept a steady stream of -// log entries again. -bool syncRaftProgressIsPaused(SSyncRaftProgress* progress); - -static FORCE_INLINE SyncIndex syncRaftProgressNextIndex(SSyncRaftProgress* progress) { - return progress->nextIndex; -} - -static FORCE_INLINE ESyncRaftProgressState syncRaftProgressInReplicate(SSyncRaftProgress* progress) { - return progress->state == PROGRESS_STATE_REPLICATE; -} - -static FORCE_INLINE ESyncRaftProgressState syncRaftProgressInSnapshot(SSyncRaftProgress* progress) { - return progress->state == PROGRESS_STATE_SNAPSHOT; -} - -static FORCE_INLINE ESyncRaftProgressState syncRaftProgressInProbe(SSyncRaftProgress* progress) { - return progress->state == PROGRESS_STATE_PROBE; -} - -static FORCE_INLINE bool syncRaftProgressRecentActive(SSyncRaftProgress* progress) { - return progress->recentActive; -} - -void syncRaftInitProgressMap(SSyncRaftProgressMap* progressMap); -void syncRaftFreeProgressMap(SSyncRaftProgressMap* progressMap); - -void syncRaftClearProgressMap(SSyncRaftProgressMap* progressMap); -void syncRaftCopyProgressMap(SSyncRaftProgressMap* from, SSyncRaftProgressMap* to); - -SSyncRaftProgress* syncRaftFindProgressByNodeId(const SSyncRaftProgressMap* progressMap, SyncNodeId id); - -int syncRaftAddToProgressMap(SSyncRaftProgressMap* progressMap, SSyncRaftProgress* progress); - -void syncRaftRemoveFromProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id); - -bool syncRaftIsInProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id); - -/** - * return true if progress's log is up-todate - **/ -bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress); - -// BecomeSnapshot moves the Progress to StateSnapshot with the specified pending -// snapshot index. -void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex); - -void syncRaftCopyProgress(const SSyncRaftProgress* from, SSyncRaftProgress* to); - -// return true if reach the end -bool syncRaftIterateProgressMap(const SSyncRaftProgressMap* progressMap, SSyncRaftProgress *pProgress); - -bool syncRaftVisitProgressMap(SSyncRaftProgressMap* progressMap, visitProgressFp fp, void* arg); - -#if 0 - -void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i); - - - -SyncIndex syncRaftProgressMatchIndex(SSyncRaft* pRaft, int i); - -void syncRaftProgressUpdateLastSend(SSyncRaft* pRaft, int i); - -void syncRaftProgressUpdateSnapshotLastSend(SSyncRaft* pRaft, int i); - -bool syncRaftProgressResetRecentRecv(SSyncRaft* pRaft, int i); - -void syncRaftProgressMarkRecentRecv(SSyncRaft* pRaft, int i); - - - -void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i); - -#endif - -#endif /* TD_SYNC_RAFT_PROGRESS_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/sync_raft_progress_tracker.h b/source/libs/sync/inc/sync_raft_progress_tracker.h deleted file mode 100644 index 0a3c7dd6fc57ef0adcb22382910cc0e710cabb70..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_raft_progress_tracker.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H -#define _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H - -#include "sync_type.h" -#include "sync_raft_quorum.h" -#include "sync_raft_quorum_joint.h" -#include "sync_raft_progress.h" -#include "sync_raft_proto.h" -#include "thash.h" - -// Config reflects the configuration tracked in a ProgressTracker. -struct SSyncRaftProgressTrackerConfig { - SSyncRaftQuorumJointConfig voters; - - // autoLeave is true if the configuration is joint and a transition to the - // incoming configuration should be carried out automatically by Raft when - // this is possible. If false, the configuration will be joint until the - // application initiates the transition manually. - bool autoLeave; - - // Learners is a set of IDs corresponding to the learners active in the - // current configuration. - // - // Invariant: Learners and Voters does not intersect, i.e. if a peer is in - // either half of the joint config, it can't be a learner; if it is a - // learner it can't be in either half of the joint config. This invariant - // simplifies the implementation since it allows peers to have clarity about - // its current role without taking into account joint consensus. - SSyncRaftNodeMap learners; - - // When we turn a voter into a learner during a joint consensus transition, - // we cannot add the learner directly when entering the joint state. This is - // because this would violate the invariant that the intersection of - // voters and learners is empty. For example, assume a Voter is removed and - // immediately re-added as a learner (or in other words, it is demoted): - // - // Initially, the configuration will be - // - // voters: {1 2 3} - // learners: {} - // - // and we want to demote 3. Entering the joint configuration, we naively get - // - // voters: {1 2} & {1 2 3} - // learners: {3} - // - // but this violates the invariant (3 is both voter and learner). Instead, - // we get - // - // voters: {1 2} & {1 2 3} - // learners: {} - // next_learners: {3} - // - // Where 3 is now still purely a voter, but we are remembering the intention - // to make it a learner upon transitioning into the final configuration: - // - // voters: {1 2} - // learners: {3} - // next_learners: {} - // - // Note that next_learners is not used while adding a learner that is not - // also a voter in the joint config. In this case, the learner is added - // right away when entering the joint configuration, so that it is caught up - // as soon as possible. - SSyncRaftNodeMap learnersNext; -}; - -struct SSyncRaftProgressTracker { - SSyncRaftProgressTrackerConfig config; - - SSyncRaftProgressMap progressMap; - - // nodeid -> ESyncRaftVoteType map - SHashObj* votesMap; - - int maxInflightMsgs; - - SSyncRaft* pRaft; -}; - -SSyncRaftProgressTracker* syncRaftOpenProgressTracker(SSyncRaft* pRaft); - -void syncRaftInitTrackConfig(SSyncRaftProgressTrackerConfig* config); -void syncRaftFreeTrackConfig(SSyncRaftProgressTrackerConfig* config); - -void syncRaftFreeTrackConfig(SSyncRaftProgressTrackerConfig* config); - -// ResetVotes prepares for a new round of vote counting via recordVote. -void syncRaftResetVotes(SSyncRaftProgressTracker*); - -void syncRaftProgressVisit(SSyncRaftProgressTracker*, visitProgressFp visit, void* arg); - -// RecordVote records that the node with the given id voted for this Raft -// instance if v == true (and declined it otherwise). -void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, SyncNodeId id, bool grant); - -void syncRaftCopyTrackerConfig(const SSyncRaftProgressTrackerConfig* from, SSyncRaftProgressTrackerConfig* to); - -int syncRaftCheckTrackerConfigInProgress(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); - -// TallyVotes returns the number of granted and rejected Votes, and whether the -// election outcome is known. -ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* rejected, int *granted); - -void syncRaftConfigState(SSyncRaftProgressTracker* tracker, SSyncConfigState* cs); - -// Committed returns the largest log index known to be committed based on what -// the voting members of the group have acknowledged. -SyncIndex syncRaftCommittedIndex(SSyncRaftProgressTracker* tracker); - -// QuorumActive returns true if the quorum is active from the view of the local -// raft state machine. Otherwise, it returns false. -bool syncRaftQuorumActive(SSyncRaftProgressTracker* tracker); - -bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId); - -#endif /* _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H */ diff --git a/source/libs/sync/inc/sync_raft_proto.h b/source/libs/sync/inc/sync_raft_proto.h deleted file mode 100644 index 29371e328dc7365108075f3a42e7722bbadad03c..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_raft_proto.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TD_SYNC_RAFT_PROTO_H -#define TD_SYNC_RAFT_PROTO_H - -#include "sync_type.h" -#include "sync_raft_node_map.h" - -typedef enum ESyncRaftConfChangeType { - SYNC_RAFT_Conf_AddNode = 0, - SYNC_RAFT_Conf_RemoveNode = 1, - SYNC_RAFT_Conf_UpdateNode = 2, - SYNC_RAFT_Conf_AddLearnerNode = 3, -} ESyncRaftConfChangeType; - -// ConfChangeSingle is an individual configuration change operation. Multiple -// such operations can be carried out atomically via a ConfChangeV2. -typedef struct SSyncConfChangeSingle { - ESyncRaftConfChangeType type; - SyncNodeId nodeId; -} SSyncConfChangeSingle; - -typedef struct SSyncConfChangeSingleArray { - int n; - SSyncConfChangeSingle* changes; -} SSyncConfChangeSingleArray; - -typedef struct SSyncConfigState { - // The voters in the incoming config. (If the configuration is not joint, - // then the outgoing config is empty). - SSyncRaftNodeMap voters; - - // The learners in the incoming config. - SSyncRaftNodeMap learners; - - // The voters in the outgoing config. - SSyncRaftNodeMap votersOutgoing; - - // The nodes that will become learners when the outgoing config is removed. - // These nodes are necessarily currently in nodes_joint (or they would have - // been added to the incoming config right away). - SSyncRaftNodeMap learnersNext; - - // If set, the config is joint and Raft will automatically transition into - // the final config (i.e. remove the outgoing config) when this is safe. - bool autoLeave; -} SSyncConfigState; - -static FORCE_INLINE bool syncRaftConfArrayIsEmpty(const SSyncConfChangeSingleArray* ary) { - return ary->n == 0; -} - -static FORCE_INLINE void syncRaftInitConfArray(SSyncConfChangeSingleArray* ary) { - *ary = (SSyncConfChangeSingleArray) { - .changes = NULL, - .n = 0, - }; -} - -static FORCE_INLINE void syncRaftFreeConfArray(SSyncConfChangeSingleArray* ary) { - if (ary->changes != NULL) free(ary->changes); -} - -#endif /* TD_SYNC_RAFT_PROTO_H */ diff --git a/source/libs/sync/inc/sync_raft_quorum.h b/source/libs/sync/inc/sync_raft_quorum.h deleted file mode 100644 index 16ac1cd02934d0fbeeeb969d499727f166295252..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_raft_quorum.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TD_SYNC_RAFT_QUORUM_H -#define TD_SYNC_RAFT_QUORUM_H - -/** - * ESyncRaftVoteResult indicates the outcome of a vote. - **/ -typedef enum { - /** - * SYNC_RAFT_VOTE_PENDING indicates that the decision of the vote depends on future - * votes, i.e. neither "yes" or "no" has reached quorum yet. - **/ - SYNC_RAFT_VOTE_PENDING = 1, - - /** - * SYNC_RAFT_VOTE_LOST indicates that the quorum has voted "no". - **/ - SYNC_RAFT_VOTE_LOST = 2, - - /** - * SYNC_RAFT_VOTE_WON indicates that the quorum has voted "yes". - **/ - SYNC_RAFT_VOTE_WON = 3, -} ESyncRaftVoteResult; - -#endif /* TD_SYNC_RAFT_QUORUM_H */ \ No newline at end of file diff --git a/source/libs/sync/inc/sync_raft_quorum_joint.h b/source/libs/sync/inc/sync_raft_quorum_joint.h deleted file mode 100644 index 9d5f10ab51c0b5fc3532899dccac1190e13b4101..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_raft_quorum_joint.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H -#define _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H - -#include "taosdef.h" -#include "sync.h" -#include "sync_type.h" -#include "sync_raft_node_map.h" -#include "thash.h" - -// JointConfig is a configuration of two groups of (possibly overlapping) -// majority configurations. Decisions require the support of both majorities. -typedef struct SSyncRaftQuorumJointConfig { - SSyncRaftNodeMap outgoing; - SSyncRaftNodeMap incoming; -} SSyncRaftQuorumJointConfig; - -// IDs returns a newly initialized map representing the set of voters present -// in the joint configuration. -void syncRaftJointConfigIDs(SSyncRaftQuorumJointConfig* config, SSyncRaftNodeMap* nodeMap); - -// CommittedIndex returns the largest committed index for the given joint -// quorum. An index is jointly committed if it is committed in both constituent -// majorities. -SyncIndex syncRaftJointConfigCommittedIndex(const SSyncRaftQuorumJointConfig* config, matchAckIndexerFp indexer, void* arg); - -// VoteResult takes a mapping of voters to yes/no (true/false) votes and returns -// a result indicating whether the vote is pending, lost, or won. A joint quorum -// requires both majority quorums to vote in favor. -ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, SHashObj* votesMap); - -void syncRaftInitQuorumJointConfig(SSyncRaftQuorumJointConfig* config); - -static FORCE_INLINE bool syncRaftJointConfigInOutgoing(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) { - return syncRaftIsInNodeMap(&config->outgoing, id); -} - -static FORCE_INLINE bool syncRaftJointConfigInIncoming(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) { - return syncRaftIsInNodeMap(&config->incoming, id); -} - -void syncRaftJointConfigAddToIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id); - -void syncRaftJointConfigRemoveFromIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id); - -static FORCE_INLINE const SSyncRaftNodeMap* syncRaftJointConfigIncoming(const SSyncRaftQuorumJointConfig* config) { - return &config->incoming; -} - -static FORCE_INLINE const SSyncRaftNodeMap* syncRaftJointConfigOutgoing(const SSyncRaftQuorumJointConfig* config) { - return &config->outgoing; -} - -static FORCE_INLINE void syncRaftJointConfigClearOutgoing(SSyncRaftQuorumJointConfig* config) { - syncRaftClearNodeMap(&config->outgoing); -} - -static FORCE_INLINE bool syncRaftJointConfigIsIncomingEmpty(const SSyncRaftQuorumJointConfig* config) { - return syncRaftNodeMapSize(&config->incoming) == 0; -} - -static FORCE_INLINE bool syncRaftJointConfigIsOutgoingEmpty(const SSyncRaftQuorumJointConfig* config) { - return syncRaftNodeMapSize(&config->outgoing) == 0; -} - -static FORCE_INLINE bool syncRaftJointConfigIsInOutgoing(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) { - return syncRaftIsInNodeMap(&config->outgoing, id); -} - -#endif /* _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H */ diff --git a/source/libs/sync/inc/sync_raft_quorum_majority.h b/source/libs/sync/inc/sync_raft_quorum_majority.h deleted file mode 100644 index 399bd71db8bd068b577d0890d1cab150ce524bc9..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_raft_quorum_majority.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H -#define _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H - -#include "sync.h" -#include "sync_type.h" -#include "sync_raft_quorum.h" -#include "thash.h" - -/** - * syncRaftMajorityVoteResult takes a mapping of voters to yes/no (true/false) votes and returns - * a result indicating whether the vote is pending (i.e. neither a quorum of - * yes/no has been reached), won (a quorum of yes has been reached), or lost (a - * quorum of no has been reached). - **/ -ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncRaftNodeMap* config, SHashObj* votesMap); - -// CommittedIndex computes the committed index from those supplied via the -// provided AckedIndexer (for the active config). -SyncIndex syncRaftMajorityConfigCommittedIndex(const SSyncRaftNodeMap* config, matchAckIndexerFp indexer, void* arg); - -#endif /* _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H */ diff --git a/source/libs/sync/inc/sync_raft_restore.h b/source/libs/sync/inc/sync_raft_restore.h deleted file mode 100644 index df4448cab8b4d9550541a5ea342d3464492be9ec..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_raft_restore.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TD_SYNC_RAFT_RESTORE_H -#define TD_SYNC_RAFT_RESTORE_H - -#include "sync_type.h" -#include "sync_raft_proto.h" - -// syncRaftRestoreConfig takes a Changer (which must represent an empty configuration), and -// runs a sequence of changes enacting the configuration described in the -// ConfState. -// -// TODO(tbg) it's silly that this takes a Changer. Unravel this by making sure -// the Changer only needs a ProgressMap (not a whole Tracker) at which point -// this can just take LastIndex and MaxInflight directly instead and cook up -// the results from that alone. -int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs, - SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); - -#endif /* TD_SYNC_RAFT_RESTORE_H */ diff --git a/source/libs/sync/inc/sync_type.h b/source/libs/sync/inc/sync_type.h deleted file mode 100644 index c5c4cc3a76ed85bd030146c0a728524c9b49689c..0000000000000000000000000000000000000000 --- a/source/libs/sync/inc/sync_type.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef _TD_LIBS_SYNC_TYPE_H -#define _TD_LIBS_SYNC_TYPE_H - -#include -#include "sync.h" -#include "osMath.h" - -#define SYNC_NON_NODE_ID -1 -#define SYNC_NON_TERM 0 - -typedef int32_t SyncTime; -typedef uint32_t SyncTick; - -typedef struct SSyncRaft SSyncRaft; - -typedef struct SSyncRaftProgress SSyncRaftProgress; -typedef struct SSyncRaftProgressMap SSyncRaftProgressMap; -typedef struct SSyncRaftProgressTrackerConfig SSyncRaftProgressTrackerConfig; - -typedef struct SSyncRaftNodeMap SSyncRaftNodeMap; - -typedef struct SSyncRaftProgressTracker SSyncRaftProgressTracker; - -typedef struct SSyncRaftChanger SSyncRaftChanger; - -typedef struct SSyncRaftLog SSyncRaftLog; - -typedef struct SSyncRaftEntry SSyncRaftEntry; - -#if 0 -#ifndef TMIN -#define TMIN(x, y) (((x) < (y)) ? (x) : (y)) -#endif - -#ifndef TMAX -#define TMAX(x, y) (((x) > (y)) ? (x) : (y)) -#endif -#endif - - -typedef struct SSyncServerState { - SyncNodeId voteFor; - SyncTerm term; - SyncIndex commitIndex; -} SSyncServerState; - -typedef struct SSyncClusterConfig { - // Log index number of current cluster config. - SyncIndex index; - - // Log index number of previous cluster config. - SyncIndex prevIndex; - - // current cluster - const SSyncCluster* cluster; -} SSyncClusterConfig; - -typedef enum { - SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0, - SYNC_RAFT_CAMPAIGN_ELECTION = 1, - SYNC_RAFT_CAMPAIGN_TRANSFER = 2, -} ESyncRaftElectionType; - -typedef enum { - // grant the vote request - SYNC_RAFT_VOTE_RESP_GRANT = 1, - - // reject the vote request - SYNC_RAFT_VOTE_RESP_REJECT = 2, -} ESyncRaftVoteType; - -typedef void (*visitProgressFp)(SSyncRaftProgress* progress, void* arg); - -typedef void (*matchAckIndexerFp)(SyncNodeId id, void* arg, SyncIndex* index); - -#endif /* _TD_LIBS_SYNC_TYPE_H */ diff --git a/source/libs/sync/src/raft.c b/source/libs/sync/src/raft.c deleted file mode 100644 index 72b0d268a8d9acec0c3b25650bc614256c235bd8..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/raft.c +++ /dev/null @@ -1,325 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "raft.h" -#include "sync_raft_impl.h" -#include "raft_log.h" -#include "sync_raft_restore.h" -#include "raft_replication.h" -#include "sync_raft_config_change.h" -#include "sync_raft_progress_tracker.h" -#include "syncInt.h" - -#define RAFT_READ_LOG_MAX_NUM 100 - -static int deserializeServerStateFromBuffer(SSyncServerState* server, const char* buffer, int n); -static int deserializeClusterStateFromBuffer(SSyncConfigState* cluster, const char* buffer, int n); - -static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfig* config, - const SSyncRaftProgressMap* progressMap, SSyncConfigState* cs); - -static void abortLeaderTransfer(SSyncRaft* pRaft); - -static bool preHandleMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); -static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); -static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); - -int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) { - SSyncNode* pNode = pRaft->pNode; - SSyncServerState serverState; - SSyncConfigState confState; - SStateManager* stateManager; - SSyncLogStore* logStore; - SSyncFSM* fsm; - SSyncBuffer buffer[RAFT_READ_LOG_MAX_NUM]; - int nBuf, limit, i; - char* buf; - int n; - SSyncRaftChanger changer; - - memset(pRaft, 0, sizeof(SSyncRaft)); - - memcpy(&pRaft->fsm, &pInfo->fsm, sizeof(SSyncFSM)); - memcpy(&pRaft->logStore, &pInfo->logStore, sizeof(SSyncLogStore)); - memcpy(&pRaft->stateManager, &pInfo->stateManager, sizeof(SStateManager)); - - stateManager = &(pRaft->stateManager); - logStore = &(pRaft->logStore); - fsm = &(pRaft->fsm); - - pRaft->nodeInfoMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); - if (pRaft->nodeInfoMap == NULL) { - return -1; - } - - // init progress tracker - pRaft->tracker = syncRaftOpenProgressTracker(pRaft); - if (pRaft->tracker == NULL) { - return -1; - } - - // open raft log - if ((pRaft->log = syncRaftLogOpen()) == NULL) { - return -1; - } - // read server state - if (stateManager->readServerState(stateManager, &buf, &n) != 0) { - syncError("readServerState for vgid %d fail", pInfo->vgId); - return -1; - } - if (deserializeServerStateFromBuffer(&serverState, buf, n) != 0) { - syncError("deserializeServerStateFromBuffer for vgid %d fail", pInfo->vgId); - return -1; - } - free(buf); - //assert(initIndex <= serverState.commitIndex); - - // read config state - if (stateManager->readClusterState(stateManager, &buf, &n) != 0) { - syncError("readClusterState for vgid %d fail", pInfo->vgId); - return -1; - } - if (deserializeClusterStateFromBuffer(&confState, buf, n) != 0) { - syncError("deserializeClusterStateFromBuffer for vgid %d fail", pInfo->vgId); - return -1; - } - free(buf); - - changer = (SSyncRaftChanger) { - .tracker = pRaft->tracker, - .lastIndex = syncRaftLogLastIndex(pRaft->log), - }; - SSyncRaftProgressTrackerConfig config; - SSyncRaftProgressMap progressMap; - - if (syncRaftRestoreConfig(&changer, &confState, &config, &progressMap) < 0) { - syncError("syncRaftRestoreConfig for vgid %d fail", pInfo->vgId); - return -1; - } - - // save restored config and progress map to tracker - syncRaftCopyProgressMap(&progressMap, &pRaft->tracker->progressMap); - syncRaftCopyTrackerConfig(&config, &pRaft->tracker->config); - - // free progress map and config - syncRaftFreeProgressMap(&progressMap); - syncRaftFreeTrackConfig(&config); - - if (!syncRaftIsEmptyServerState(&serverState)) { - syncRaftLoadState(pRaft, &serverState); - } - - if (pInfo->appliedIndex > 0) { - syncRaftLogAppliedTo(pRaft->log, pInfo->appliedIndex); - } - - syncRaftBecomeFollower(pRaft, pRaft->term, SYNC_NON_NODE_ID); - - syncInfo("[%d:%d] restore vgid %d state: snapshot index success", - pRaft->selfGroupId, pRaft->selfId, pInfo->vgId); - return 0; -} - -int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - syncDebug("from %d, type:%d, term:%" PRId64 ", state:%d", - pMsg->from, pMsg->msgType, pMsg->term, pRaft->state); - - if (preHandleMessage(pRaft, pMsg)) { - syncFreeMessage(pMsg); - return 0; - } - - ESyncRaftMessageType msgType = pMsg->msgType; - if (msgType == RAFT_MSG_INTERNAL_ELECTION) { - syncRaftHandleElectionMessage(pRaft, pMsg); - } else if (msgType == RAFT_MSG_VOTE) { - syncRaftHandleVoteMessage(pRaft, pMsg); - } else { - pRaft->stepFp(pRaft, pMsg); - } - - syncFreeMessage(pMsg); - return 0; -} - -int32_t syncRaftTick(SSyncRaft* pRaft) { - pRaft->currentTick += 1; - pRaft->tickFp(pRaft); - return 0; -} - -static int deserializeServerStateFromBuffer(SSyncServerState* server, const char* buffer, int n) { - return 0; -} - -static int deserializeClusterStateFromBuffer(SSyncConfigState* cluster, const char* buffer, int n) { - return 0; -} - -static void visitProgressMaybeSendAppend(SSyncRaftProgress* progress, void* arg) { - syncRaftMaybeSendAppend(arg, progress, false); -} - -// switchToConfig reconfigures this node to use the provided configuration. It -// updates the in-memory state and, when necessary, carries out additional -// actions such as reacting to the removal of nodes or changed quorum -// requirements. -// -// The inputs usually result from restoring a ConfState or applying a ConfChange. -static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfig* config, - const SSyncRaftProgressMap* progressMap, SSyncConfigState* cs) { - SyncNodeId selfId = pRaft->selfId; - int i; - bool exist; - SSyncRaftProgress* progress = NULL; - - syncRaftConfigState(pRaft->tracker, cs); - progress = syncRaftFindProgressByNodeId(&pRaft->tracker->progressMap, selfId); - exist = (progress != NULL); - - // Update whether the node itself is a learner, resetting to false when the - // node is removed. - if (exist) { - pRaft->isLearner = progress->isLearner; - } else { - pRaft->isLearner = false; - } - - if ((!exist || pRaft->isLearner) && pRaft->state == TAOS_SYNC_STATE_LEADER) { - // This node is leader and was removed or demoted. We prevent demotions - // at the time writing but hypothetically we handle them the same way as - // removing the leader: stepping down into the next Term. - // - // TODO(tbg): step down (for sanity) and ask follower with largest Match - // to TimeoutNow (to avoid interruption). This might still drop some - // proposals but it's better than nothing. - // - // TODO(tbg): test this branch. It is untested at the time of writing. - return; - } - - // The remaining steps only make sense if this node is the leader and there - // are other nodes. - if (pRaft->state != TAOS_SYNC_STATE_LEADER || syncRaftNodeMapSize(&cs->voters) == 0) { - return; - } - - if (syncRaftMaybeCommit(pRaft)) { - // If the configuration change means that more entries are committed now, - // broadcast/append to everyone in the updated config. - syncRaftBroadcastAppend(pRaft); - } else { - // Otherwise, still probe the newly added replicas; there's no reason to - // let them wait out a heartbeat interval (or the next incoming - // proposal). - syncRaftProgressVisit(pRaft->tracker, visitProgressMaybeSendAppend, pRaft); - - // If the the leadTransferee was removed or demoted, abort the leadership transfer. - SyncNodeId leadTransferee = pRaft->leadTransferee; - if (leadTransferee != SYNC_NON_NODE_ID) { - if (!syncRaftIsInNodeMap(&pRaft->tracker->config.voters.incoming, leadTransferee) && - !syncRaftIsInNodeMap(&pRaft->tracker->config.voters.outgoing, leadTransferee)) { - abortLeaderTransfer(pRaft); - } - } - } -} - -static void abortLeaderTransfer(SSyncRaft* pRaft) { - pRaft->leadTransferee = SYNC_NON_NODE_ID; -} - -/** - * pre-handle message, return true means no need to continue - * Handle the message term, which may result in our stepping down to a follower. - **/ -static bool preHandleMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - // local message? - if (pMsg->term == 0) { - return false; - } - - if (pMsg->term > pRaft->term) { - return preHandleNewTermMessage(pRaft, pMsg); - } else if (pMsg->term < pRaft->term) { - return preHandleOldTermMessage(pRaft, pMsg); - } - - return false; -} - -static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - SyncNodeId leaderId = pMsg->from; - ESyncRaftMessageType msgType = pMsg->msgType; - - if (msgType == RAFT_MSG_VOTE) { - // TODO - leaderId = SYNC_NON_NODE_ID; - } - - if (syncIsPreVoteMsg(pMsg)) { - // Never change our term in response to a PreVote - } else if (syncIsPreVoteRespMsg(pMsg) && !pMsg->voteResp.rejected) { - /** - * We send pre-vote requests with a term in our future. If the - * pre-vote is granted, we will increment our term when we get a - * quorum. If it is not, the term comes from the node that - * rejected our vote so we should become a follower at the new - * term. - **/ - } else { - syncInfo("[%d:%d] [term:%" PRId64 "] received a %d message with higher term from %d [term:%" PRId64 "]", - pRaft->selfGroupId, pRaft->selfId, pRaft->term, msgType, pMsg->from, pMsg->term); - syncRaftBecomeFollower(pRaft, pMsg->term, leaderId); - } - - return false; -} - -static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - if (pRaft->checkQuorum && pMsg->msgType == RAFT_MSG_APPEND) { - /** - * We have received messages from a leader at a lower term. It is possible - * that these messages were simply delayed in the network, but this could - * also mean that this node has advanced its term number during a network - * partition, and it is now unable to either win an election or to rejoin - * the majority on the old term. If checkQuorum is false, this will be - * handled by incrementing term numbers in response to MsgVote with a - * higher term, but if checkQuorum is true we may not advance the term on - * MsgVote and must generate other messages to advance the term. The net - * result of these two features is to minimize the disruption caused by - * nodes that have been removed from the cluster's configuration: a - * removed node will send MsgVotes (or MsgPreVotes) which will be ignored, - * but it will not receive MsgApp or MsgHeartbeat, so it will not create - * disruptive term increases - **/ - SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from); - if (pNode == NULL) { - return true; - } - SSyncMessage* msg = syncNewEmptyAppendRespMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term); - if (msg == NULL) { - return true; - } - - pRaft->io.send(msg, pNode); - } else { - // ignore other cases - syncInfo("[%d:%d] [term:%" PRId64 "] ignored a %d message with lower term from %d [term:%" PRId64 "]", - pRaft->selfGroupId, pRaft->selfId, pRaft->term, pMsg->msgType, pMsg->from, pMsg->term); - } - - return true; -} \ No newline at end of file diff --git a/source/libs/sync/src/raft_handle_append_entries_message.c b/source/libs/sync/src/raft_handle_append_entries_message.c deleted file mode 100644 index 92ebfe75f5ef3ebb26bb1fd5f00e850206db9936..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/raft_handle_append_entries_message.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "syncInt.h" -#include "raft.h" -#include "raft_log.h" -#include "sync_raft_impl.h" -#include "raft_message.h" - -int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - const RaftMsg_Append_Entries *appendEntries = &(pMsg->appendEntries); - - SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from); - if (pNode == NULL) { - return 0; - } - - SSyncMessage* pRespMsg = syncNewEmptyAppendRespMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term); - if (pRespMsg == NULL) { - return 0; - } - - RaftMsg_Append_Resp *appendResp = &(pRespMsg->appendResp); - // ignore committed logs - if (syncRaftLogIsCommitted(pRaft->log, appendEntries->index)) { - appendResp->index = pRaft->log->commitIndex; - goto out; - } - - syncInfo("[%d:%d] recv append from %d index %" PRId64"", - pRaft->selfGroupId, pRaft->selfId, pMsg->from, appendEntries->index); - -out: - pRaft->io.send(pRespMsg, pNode); - return 0; -} \ No newline at end of file diff --git a/source/libs/sync/src/raft_handle_election_message.c b/source/libs/sync/src/raft_handle_election_message.c deleted file mode 100644 index a58c8ba5cfa1d663c1486a2c2ccbf7d7c9b28708..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/raft_handle_election_message.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "syncInt.h" -#include "raft.h" -#include "raft_log.h" -#include "raft_message.h" - -int syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - if (pRaft->preVote) { - syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_PRE_ELECTION); - } else { - syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); - } - - return 0; -} diff --git a/source/libs/sync/src/raft_handle_vote_message.c b/source/libs/sync/src/raft_handle_vote_message.c deleted file mode 100644 index 4d940732dcff7e619dd5021a7ed0026f7d40895b..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/raft_handle_vote_message.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "syncInt.h" -#include "raft.h" -#include "sync_raft_impl.h" -#include "raft_log.h" -#include "raft_message.h" - -static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg); - -int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - SSyncMessage* pRespMsg; - SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from); - if (pNode == NULL) { - return 0; - } - - bool grant; - SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); - SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log); - - grant = canGrantVoteMessage(pRaft, pMsg); - pRespMsg = syncNewVoteRespMsg(pRaft->selfGroupId, pRaft->selfId, pMsg->vote.cType, !grant); - if (pRespMsg == NULL) { - return 0; - } - syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %" PRId64 ", vote: %d] %s for %d" - "[logterm: %" PRId64 ", index: %" PRId64 "] at term %" PRId64 "", - pRaft->selfGroupId, pRaft->selfId, lastTerm, lastIndex, pRaft->voteFor, - grant ? "grant" : "reject", - pMsg->from, pMsg->vote.lastTerm, pMsg->vote.lastIndex, pRaft->term); - - pRaft->io.send(pRespMsg, pNode); - return 0; -} - -static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - bool canVote = - // We can vote if this is a repeat of a vote we've already cast... - pRaft->voteFor == pMsg->from || - // ...we haven't voted and we don't think there's a leader yet in this term... - (pRaft->voteFor == SYNC_NON_NODE_ID && pRaft->leaderId == SYNC_NON_NODE_ID) || - // ...or this is a PreVote for a future term... - (pMsg->vote.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION && pMsg->term > pRaft->term); - - // ...and we believe the candidate is up to date. - return canVote && syncRaftLogIsUptodate(pRaft->log, pMsg->vote.lastIndex, pMsg->vote.lastTerm); -} \ No newline at end of file diff --git a/source/libs/sync/src/raft_handle_vote_resp_message.c b/source/libs/sync/src/raft_handle_vote_resp_message.c deleted file mode 100644 index 87a5cfcd159166c74f844e9331085ca464fe83a3..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/raft_handle_vote_resp_message.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "syncInt.h" -#include "raft.h" -#include "sync_raft_impl.h" -#include "raft_message.h" - -int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - int granted, rejected; - int quorum; - int voterIndex; - - assert(pRaft->state == TAOS_SYNC_STATE_CANDIDATE); - - SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from); - if (pNode == NULL) { - syncError("[%d:%d] recv vote resp from unknown server %d", pRaft->selfGroupId, pRaft->selfId, pMsg->from); - return 0; - } - - if (pRaft->state != TAOS_SYNC_STATE_CANDIDATE) { - syncError("[%d:%d] is not candidate, ignore vote resp", pRaft->selfGroupId, pRaft->selfId); - return 0; - } - - ESyncRaftVoteResult result = syncRaftPollVote(pRaft, pMsg->from, - pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION, - !pMsg->voteResp.rejected, &rejected, &granted); - - syncInfo("[%d:%d] [quorum:%d] has received %d votes and %d vote rejections", - pRaft->selfGroupId, pRaft->selfId, quorum, granted, rejected); - - if (result == SYNC_RAFT_VOTE_WON) { - if (pRaft->candidateState.inPreVote) { - syncRaftCampaign(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); - } else { - syncRaftBecomeLeader(pRaft); - syncRaftBroadcastAppend(pRaft); - } - } else if (result == SYNC_RAFT_VOTE_LOST) { - // pb.MsgPreVoteResp contains future term of pre-candidate - // m.Term > r.Term; reuse r.Term - syncRaftBecomeFollower(pRaft, pRaft->term, SYNC_NON_NODE_ID); - } - - return 0; -} \ No newline at end of file diff --git a/source/libs/sync/src/raft_log.c b/source/libs/sync/src/raft_log.c deleted file mode 100644 index b6e6d292e8c44e6212d1e5706d02bb43279b9625..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/raft_log.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "raft_log.h" - -SSyncRaftLog* syncRaftLogOpen() { - return NULL; -} - -SyncIndex syncRaftLogLastIndex(SSyncRaftLog* pLog) { - return 0; -} - -SyncIndex syncRaftLogSnapshotIndex(SSyncRaftLog* pLog) { - return 0; -} - -SyncTerm syncRaftLogLastTerm(SSyncRaftLog* pLog) { - return 0; -} - -void syncRaftLogAppliedTo(SSyncRaftLog* pLog, SyncIndex appliedIndex) { - -} - -bool syncRaftLogIsUptodate(SSyncRaftLog* pLog, SyncIndex index, SyncTerm term) { - return true; -} - -int syncRaftLogNumOfPendingConf(SSyncRaftLog* pLog) { - return 0; -} - -bool syncRaftHasUnappliedLog(SSyncRaftLog* pLog) { - return pLog->commitIndex > pLog->appliedIndex; -} - -SyncTerm syncRaftLogTermOf(SSyncRaftLog* pLog, SyncIndex index) { - return SYNC_NON_TERM; -} - -int syncRaftLogAppend(SSyncRaftLog* pLog, SSyncRaftEntry *pEntries, int n) { - -} - -int syncRaftLogAcquire(SSyncRaftLog* pLog, SyncIndex index, int maxMsgSize, - SSyncRaftEntry **ppEntries, int *n) { - return 0; -} - -void syncRaftLogRelease(SSyncRaftLog* pLog, SyncIndex index, - SSyncRaftEntry *pEntries, int n) { - return; -} \ No newline at end of file diff --git a/source/libs/sync/src/raft_message.c b/source/libs/sync/src/raft_message.c deleted file mode 100644 index e706127f2954ea4c9a5c22bcd9d271fa4b613ea0..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/raft_message.c +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "raft_message.h" - -void syncFreeMessage(const SSyncMessage* pMsg) { - if (!syncIsInternalMsg(pMsg->msgType)) { - free((SSyncMessage*)pMsg); - } -} \ No newline at end of file diff --git a/source/libs/sync/src/raft_replication.c b/source/libs/sync/src/raft_replication.c deleted file mode 100644 index c8c2d2c3792f575782fc89f14b8655d87f534834..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/raft_replication.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "raft.h" -#include "raft_log.h" -#include "sync_raft_progress.h" -#include "syncInt.h" -#include "raft_replication.h" - -static bool sendSnapshot(SSyncRaft* pRaft, SSyncRaftProgress* progress); -static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress, - SyncIndex prevIndex, SyncTerm prevTerm, - SSyncRaftEntry *entries, int nEntry); - -// maybeSendAppend sends an append RPC with new entries to the given peer, -// if necessary. Returns true if a message was sent. The sendIfEmpty -// argument controls whether messages with no entries will be sent -// ("empty" messages are useful to convey updated Commit indexes, but -// are undesirable when we're sending multiple messages in a batch). -bool syncRaftMaybeSendAppend(SSyncRaft* pRaft, SSyncRaftProgress* progress, bool sendIfEmpty) { - assert(pRaft->state == TAOS_SYNC_STATE_LEADER); - SyncNodeId nodeId = progress->id; - - if (syncRaftProgressIsPaused(progress)) { - syncInfo("node [%d:%d] paused", pRaft->selfGroupId, nodeId); - return false; - } - - SyncIndex nextIndex = syncRaftProgressNextIndex(progress); - SSyncRaftEntry *entries; - int nEntry; - SyncIndex prevIndex; - SyncTerm prevTerm; - - prevIndex = nextIndex - 1; - prevTerm = syncRaftLogTermOf(pRaft->log, prevIndex); - int ret = syncRaftLogAcquire(pRaft->log, nextIndex, pRaft->maxMsgSize, &entries, &nEntry); - - if (nEntry == 0 && !sendIfEmpty) { - return false; - } - - if (ret != 0 || prevTerm == SYNC_NON_TERM) { - return sendSnapshot(pRaft, progress); - } - - return sendAppendEntries(pRaft, progress, prevIndex, prevTerm, entries, nEntry); -} - -static bool sendSnapshot(SSyncRaft* pRaft, SSyncRaftProgress* progress) { - if (!syncRaftProgressRecentActive(progress)) { - return false; - } - return true; -} - -static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress, - SyncIndex prevIndex, SyncTerm prevTerm, - SSyncRaftEntry *entries, int nEntry) { - SNodeInfo* pNode = syncRaftGetNodeById(pRaft, progress->id); - if (pNode == NULL) { - return false; - } - SyncIndex lastIndex; - SyncTerm logTerm = prevTerm; - - SSyncMessage* msg = syncNewAppendMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term, - prevIndex, prevTerm, pRaft->log->commitIndex, - nEntry, entries); - - if (msg == NULL) { - goto err_release_log; - } - - if (nEntry != 0) { - switch (progress->state) { - // optimistically increase the next when in StateReplicate - case PROGRESS_STATE_REPLICATE: - lastIndex = entries[nEntry - 1].index; - syncRaftProgressOptimisticNextIndex(progress, lastIndex); - syncRaftInflightAdd(progress->inflights, lastIndex); - break; - case PROGRESS_STATE_PROBE: - progress->probeSent = true; - break; - default: - syncFatal("[%d:%d] is sending append in unhandled state %s", - pRaft->selfGroupId, pRaft->selfId, syncRaftProgressStateString(progress)); - break; - } - } - pRaft->io.send(msg, pNode); - return true; - -err_release_log: - syncRaftLogRelease(pRaft->log, prevIndex + 1, entries, nEntry); - return false; -} diff --git a/source/libs/sync/src/raft_unstable_log.c b/source/libs/sync/src/raft_unstable_log.c deleted file mode 100644 index e798e206626135c47a1344a5a7b04881fc947efa..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/raft_unstable_log.c +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "sync.h" -#include "raft_unstable_log.h" - -/* -SyncIndex syncRaftLogLastIndex(SSyncRaftUnstableLog* pLog) { - return 0; -} -*/ \ No newline at end of file diff --git a/source/libs/sync/src/sync.c b/source/libs/sync/src/sync.c index 321b03d2ee463023c2ba570a287bea93fa75abd0..7ded53b6e610e07b6b70c5022bce10c60899a9ac 100644 --- a/source/libs/sync/src/sync.c +++ b/source/libs/sync/src/sync.c @@ -1,302 +1 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "syncInt.h" -#include "trpc.h" -#include "ttimer.h" - -SSyncManager* gSyncManager = NULL; - -#define SYNC_TICK_TIMER 50 -#define SYNC_ACTIVITY_TIMER 5 -#define SYNC_SERVER_WORKER 2 - -static void syncProcessRsp(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet); -static void syncProcessReqMsg(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet); - -static int syncInitRpcServer(SSyncManager* syncManager, const SSyncCluster* pSyncCfg); -static int syncInitRpcClient(SSyncManager* syncManager); -static int syncOpenWorkerPool(SSyncManager* syncManager); -static int syncCloseWorkerPool(SSyncManager* syncManager); -static void *syncWorkerMain(void *argv); -static void syncNodeTick(void *param, void *tmrId); - -int32_t syncInit() { - if (gSyncManager != NULL) { - return 0; - } - - gSyncManager = (SSyncManager*)calloc(sizeof(SSyncManager), 0); - if (gSyncManager == NULL) { - syncError("malloc SSyncManager fail"); - return -1; - } - - pthread_mutex_init(&gSyncManager->mutex, NULL); - - // init client rpc - if (syncInitRpcClient(gSyncManager) != 0) { - syncCleanUp(); - return -1; - } - - // init sync timer manager - gSyncManager->syncTimerManager = taosTmrInit(1000, 50, 10000, "SYNC"); - if (gSyncManager->syncTimerManager == NULL) { - syncCleanUp(); - return -1; - } - - // init worker pool - if (syncOpenWorkerPool(gSyncManager) != 0) { - syncCleanUp(); - return -1; - } - - // init vgroup hash table - gSyncManager->vgroupTable = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); - if (gSyncManager->vgroupTable == NULL) { - syncCleanUp(); - return -1; - } - return 0; -} - -void syncCleanUp() { - if (gSyncManager == NULL) { - return; - } - pthread_mutex_lock(&gSyncManager->mutex); - if (gSyncManager->vgroupTable) { - taosHashCleanup(gSyncManager->vgroupTable); - } - if (gSyncManager->clientRpc) { - rpcClose(gSyncManager->clientRpc); - syncInfo("sync inter-sync rpc client is closed"); - } - if (gSyncManager->syncTimerManager) { - taosTmrCleanUp(gSyncManager->syncTimerManager); - } - syncCloseWorkerPool(gSyncManager); - pthread_mutex_unlock(&gSyncManager->mutex); - pthread_mutex_destroy(&gSyncManager->mutex); - free(gSyncManager); - gSyncManager = NULL; -} - -SSyncNode* syncStart(const SSyncInfo* pInfo) { - pthread_mutex_lock(&gSyncManager->mutex); - - SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &pInfo->vgId, sizeof(SyncGroupId*)); - if (ppNode != NULL) { - syncInfo("vgroup %d already exist", pInfo->vgId); - pthread_mutex_unlock(&gSyncManager->mutex); - return *ppNode; - } - - // init rpc server - if (syncInitRpcServer(gSyncManager, &pInfo->syncCfg) != 0) { - pthread_mutex_unlock(&gSyncManager->mutex); - return NULL; - } - - SSyncNode *pNode = (SSyncNode*)malloc(sizeof(SSyncNode)); - if (pNode == NULL) { - syncError("malloc vgroup %d node fail", pInfo->vgId); - pthread_mutex_unlock(&gSyncManager->mutex); - return NULL; - } - - pNode->syncTimer = taosTmrStart(syncNodeTick, SYNC_TICK_TIMER, (void*)((int64_t)pInfo->vgId), gSyncManager->syncTimerManager); - - // start raft - pNode->raft.pNode = pNode; - if (syncRaftStart(&pNode->raft, pInfo) != 0) { - syncError("raft start at %d node fail", pInfo->vgId); - pthread_mutex_unlock(&gSyncManager->mutex); - return NULL; - } - - pthread_mutex_init(&pNode->mutex, NULL); - - taosHashPut(gSyncManager->vgroupTable, &pInfo->vgId, sizeof(SyncGroupId), &pNode, sizeof(SSyncNode *)); - - pthread_mutex_unlock(&gSyncManager->mutex); - return NULL; -} - -void syncStop(const SSyncNode* pNode) { - pthread_mutex_lock(&gSyncManager->mutex); - - SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &pNode->vgId, sizeof(SyncGroupId*)); - if (ppNode == NULL) { - syncInfo("vgroup %d not exist", pNode->vgId); - pthread_mutex_unlock(&gSyncManager->mutex); - return; - } - assert(*ppNode == pNode); - taosTmrStop(pNode->syncTimer); - - taosHashRemove(gSyncManager->vgroupTable, &pNode->vgId, sizeof(SyncGroupId)); - pthread_mutex_unlock(&gSyncManager->mutex); - - pthread_mutex_destroy(&((*ppNode)->mutex)); - free(*ppNode); -} - -int32_t syncPropose(SSyncNode* syncNode, const SSyncBuffer* pBuf, void* pData, bool isWeak) { - SSyncMessage msg; - - pthread_mutex_lock(&syncNode->mutex); - int32_t ret = syncRaftStep(&syncNode->raft, syncInitPropMsg(&msg, pBuf, pData, isWeak)); - pthread_mutex_unlock(&syncNode->mutex); - return ret; -} - -void syncReconfig(const SSyncNode* pNode, const SSyncCluster* pCfg) {} - -int32_t syncAddNode(SSyncNode syncNode, const SNodeInfo *pNode) { - return 0; -} - -int32_t syncRemoveNode(SSyncNode syncNode, const SNodeInfo *pNode) { - return 0; -} - -// process rpc rsp message from other sync server -static void syncProcessRsp(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet) { - -} - -// process rpc message from other sync server -static void syncProcessReqMsg(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet) { - -} - -static int syncInitRpcServer(SSyncManager* syncManager, const SSyncCluster* pSyncCfg) { - if (gSyncManager->rpcServerTable == NULL) { - gSyncManager->rpcServerTable = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); - if (gSyncManager->rpcServerTable == NULL) { - syncError("init sync rpc server hash table error"); - return -1; - } - } - assert(pSyncCfg->selfIndex < pSyncCfg->replica && pSyncCfg->selfIndex >= 0); - const SNodeInfo* pNode = &(pSyncCfg->nodeInfo[pSyncCfg->replica]); - char buffer[156] = {'\0'}; - snprintf(buffer, sizeof(buffer), "%s:%d", &(pNode->nodeFqdn[0]), pNode->nodePort); - size_t len = strlen(buffer); - void** ppRpcServer = taosHashGet(gSyncManager->rpcServerTable, buffer, len); - if (ppRpcServer != NULL) { - // already inited - syncInfo("sync rpc server for %s already exist", buffer); - return 0; - } - - SRpcInit rpcInit; - memset(&rpcInit, 0, sizeof(rpcInit)); - rpcInit.localPort = pNode->nodePort; - rpcInit.label = "sync-server"; - rpcInit.numOfThreads = SYNC_SERVER_WORKER; - rpcInit.cfp = syncProcessReqMsg; - rpcInit.sessions = TSDB_MAX_VNODES << 4; - rpcInit.connType = TAOS_CONN_SERVER; - rpcInit.idleTime = SYNC_ACTIVITY_TIMER * 1000; - - void* rpcServer = rpcOpen(&rpcInit); - if (rpcServer == NULL) { - syncInfo("rpcOpen for sync rpc server for %s fail", buffer); - return -1; - } - - taosHashPut(gSyncManager->rpcServerTable, buffer, strlen(buffer), rpcServer, len); - syncInfo("sync rpc server for %s init success", buffer); - - return 0; -} - -static int syncInitRpcClient(SSyncManager* syncManager) { - char secret[TSDB_PASSWORD_LEN] = "secret"; - SRpcInit rpcInit; - memset(&rpcInit, 0, sizeof(rpcInit)); - rpcInit.label = "sync-client"; - rpcInit.numOfThreads = 1; - rpcInit.cfp = syncProcessRsp; - rpcInit.sessions = TSDB_MAX_VNODES << 4; - rpcInit.connType = TAOS_CONN_CLIENT; - rpcInit.idleTime = SYNC_ACTIVITY_TIMER * 1000; - rpcInit.user = "t"; - rpcInit.ckey = "key"; - rpcInit.secret = secret; - - syncManager->clientRpc = rpcOpen(&rpcInit); - if (syncManager->clientRpc == NULL) { - syncError("failed to init sync rpc client"); - return -1; - } - - syncInfo("sync inter-sync rpc client is initialized"); - return 0; -} - -static int syncOpenWorkerPool(SSyncManager* syncManager) { - int i; - pthread_attr_t thattr; - - pthread_attr_init(&thattr); - pthread_attr_setdetachstate(&thattr, PTHREAD_CREATE_JOINABLE); - - for (i = 0; i < TAOS_SYNC_MAX_WORKER; ++i) { - SSyncWorker* pWorker = &(syncManager->worker[i]); - - if (pthread_create(&(pWorker->thread), &thattr, (void *)syncWorkerMain, pWorker) != 0) { - syncError("failed to create sync worker since %s", strerror(errno)); - - return -1; - } - } - - pthread_attr_destroy(&thattr); - - return 0; -} - -static int syncCloseWorkerPool(SSyncManager* syncManager) { - return 0; -} - -static void *syncWorkerMain(void *argv) { - SSyncWorker* pWorker = (SSyncWorker *)argv; - - taosBlockSIGPIPE(); - setThreadName("syncWorker"); - - return NULL; -} - -static void syncNodeTick(void *param, void *tmrId) { - SyncGroupId vgId = (SyncGroupId)((int64_t)param); - SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &vgId, sizeof(SyncGroupId*)); - if (ppNode == NULL) { - return; - } - SSyncNode *pNode = *ppNode; - - pthread_mutex_lock(&pNode->mutex); - syncRaftTick(&pNode->raft); - pthread_mutex_unlock(&pNode->mutex); - - pNode->syncTimer = taosTmrStart(syncNodeTick, SYNC_TICK_TIMER, (void*)(int64_t)pNode->vgId, gSyncManager->syncTimerManager); -} \ No newline at end of file +#include "sync.h" \ No newline at end of file diff --git a/source/libs/sync/src/sync_raft_config_change.c b/source/libs/sync/src/sync_raft_config_change.c deleted file mode 100644 index de790b58767fb9e442a9bd5469e709e3e669cf02..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/sync_raft_config_change.c +++ /dev/null @@ -1,409 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "raft.h" -#include "syncInt.h" -#include "sync_raft_config_change.h" -#include "sync_raft_progress.h" -#include "sync_raft_progress_tracker.h" -#include "sync_raft_quorum_joint.h" - -static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); -static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); -static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); -static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap); -static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config); -static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, - SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css); - -static int symDiff(const SSyncRaftNodeMap* l, const SSyncRaftNodeMap* r); - -static void initProgress(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, - SSyncRaftProgressMap* progressMap, SyncNodeId id, bool isLearner); - -static void nilAwareDelete(SSyncRaftNodeMap* nodeMap, SyncNodeId id); -static void nilAwareAdd(SSyncRaftNodeMap* nodeMap, SyncNodeId id); - -static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, - SSyncRaftProgressMap* progressMap, SyncNodeId id); -static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, - SSyncRaftProgressMap* progressMap, SyncNodeId id); -static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, - SSyncRaftProgressMap* progressMap, SyncNodeId id); - -// EnterJoint verifies that the outgoing (=right) majority config of the joint -// config is empty and initializes it with a copy of the incoming (=left) -// majority config. That is, it transitions from -// -// (1 2 3)&&() -// to -// (1 2 3)&&(1 2 3). -// -// The supplied changes are then applied to the incoming majority config, -// resulting in a joint configuration that in terms of the Raft thesis[1] -// (Section 4.3) corresponds to `C_{new,old}`. -// -// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf -int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const SSyncConfChangeSingleArray* css, - SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { - int ret; - - ret = checkAndCopy(changer, config, progressMap); - if (ret != 0) { - return ret; - } - - if (hasJointConfig(config)) { - syncError("config is already joint"); - return -1; - } - - if(syncRaftJointConfigIsIncomingEmpty(&config->voters) == 0) { - // We allow adding nodes to an empty config for convenience (testing and - // bootstrap), but you can't enter a joint state. - syncError("can't make a zero-voter config joint"); - return -1; - } - - // Clear the outgoing config. - syncRaftJointConfigClearOutgoing(&config->voters); - - // Copy incoming to outgoing. - syncRaftCopyNodeMap(&config->voters.incoming, &config->voters.outgoing); - - ret = applyConfig(changer, config, progressMap, css); - if (ret != 0) { - return ret; - } - - config->autoLeave = autoLeave; - return checkAndReturn(config, progressMap); -} - -// Simple carries out a series of configuration changes that (in aggregate) -// mutates the incoming majority config Voters[0] by at most one. This method -// will return an error if that is not the case, if the resulting quorum is -// zero, or if the configuration is in a joint state (i.e. if there is an -// outgoing configuration). -int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css, - SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { - int ret; - - ret = checkAndCopy(changer, config, progressMap); - if (ret != 0) { - return ret; - } - - if (hasJointConfig(config)) { - syncError("can't apply simple config change in joint config"); - return -1; - } - - ret = applyConfig(changer, config, progressMap, css); - if (ret != 0) { - return ret; - } - - int n = symDiff(syncRaftJointConfigIncoming(&changer->tracker->config.voters), - syncRaftJointConfigIncoming(&config->voters)); - if (n > 1) { - syncError("more than one voter changed without entering joint config"); - return -1; - } - - return checkAndReturn(config, progressMap); -} - -// apply a change to the configuration. By convention, changes to voters are -// always made to the incoming majority config Voters[0]. Voters[1] is either -// empty or preserves the outgoing majority configuration while in a joint state. -static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, - SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css) { - int i; - - for (i = 0; i < css->n; ++i) { - const SSyncConfChangeSingle* cs = &(css->changes[i]); - if (cs->nodeId == SYNC_NON_NODE_ID) { - continue; - } - - ESyncRaftConfChangeType type = cs->type; - switch (type) { - case SYNC_RAFT_Conf_AddNode: - makeVoter(changer, config, progressMap, cs->nodeId); - break; - case SYNC_RAFT_Conf_AddLearnerNode: - makeLearner(changer, config, progressMap, cs->nodeId); - break; - case SYNC_RAFT_Conf_RemoveNode: - removeNodeId(changer, config, progressMap, cs->nodeId); - break; - case SYNC_RAFT_Conf_UpdateNode: - break; - } - } - - if (syncRaftJointConfigIsIncomingEmpty(&config->voters)) { - syncError("removed all voters"); - return -1; - } - - return 0; -} - - -// makeVoter adds or promotes the given ID to be a voter in the incoming -// majority config. -static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, - SSyncRaftProgressMap* progressMap, SyncNodeId id) { - SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, id); - if (progress == NULL) { - initProgress(changer, config, progressMap, id, false); - return; - } - - progress->isLearner = false; - nilAwareDelete(&config->learners, id); - nilAwareDelete(&config->learnersNext, id); - syncRaftJointConfigAddToIncoming(&config->voters, id); -} - -// makeLearner makes the given ID a learner or stages it to be a learner once -// an active joint configuration is exited. -// -// The former happens when the peer is not a part of the outgoing config, in -// which case we either add a new learner or demote a voter in the incoming -// config. -// -// The latter case occurs when the configuration is joint and the peer is a -// voter in the outgoing config. In that case, we do not want to add the peer -// as a learner because then we'd have to track a peer as a voter and learner -// simultaneously. Instead, we add the learner to LearnersNext, so that it will -// be added to Learners the moment the outgoing config is removed by -// LeaveJoint(). -static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, - SSyncRaftProgressMap* progressMap, SyncNodeId id) { - SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, id); - if (progress == NULL) { - initProgress(changer, config, progressMap, id, true); - return; - } - - if (progress->isLearner) { - return; - } - // Remove any existing voter in the incoming config... - removeNodeId(changer, config, progressMap, id); - - // ... but save the Progress. - syncRaftAddToProgressMap(progressMap, progress); - - // Use LearnersNext if we can't add the learner to Learners directly, i.e. - // if the peer is still tracked as a voter in the outgoing config. It will - // be turned into a learner in LeaveJoint(). - // - // Otherwise, add a regular learner right away. - bool inInOutgoing = syncRaftJointConfigIsInOutgoing(&config->voters, id); - if (inInOutgoing) { - nilAwareAdd(&config->learnersNext, id); - } else { - nilAwareAdd(&config->learners, id); - progress->isLearner = true; - } -} - -// removeNodeId this peer as a voter or learner from the incoming config. -static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, - SSyncRaftProgressMap* progressMap, SyncNodeId id) { - SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, id); - if (progress == NULL) { - return; - } - - syncRaftJointConfigRemoveFromIncoming(&config->voters, id); - nilAwareDelete(&config->learners, id); - nilAwareDelete(&config->learnersNext, id); - - // If the peer is still a voter in the outgoing config, keep the Progress. - bool inInOutgoing = syncRaftJointConfigIsInOutgoing(&config->voters, id); - if (!inInOutgoing) { - syncRaftRemoveFromProgressMap(progressMap, id); - } -} - -// initProgress initializes a new progress for the given node or learner. -static void initProgress(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, - SSyncRaftProgressMap* progressMap, SyncNodeId id, bool isLearner) { - if (!isLearner) { - syncRaftJointConfigAddToIncoming(&config->voters, id); - } else { - nilAwareAdd(&config->learners, id); - } - - SSyncRaftProgress* pProgress = (SSyncRaftProgress*)malloc(sizeof(SSyncRaftProgress)); - assert (pProgress != NULL); - *pProgress = (SSyncRaftProgress) { - // Initializing the Progress with the last index means that the follower - // can be probed (with the last index). - // - // TODO(tbg): seems awfully optimistic. Using the first index would be - // better. The general expectation here is that the follower has no log - // at all (and will thus likely need a snapshot), though the app may - // have applied a snapshot out of band before adding the replica (thus - // making the first index the better choice). - .id = id, - .groupId = changer->tracker->pRaft->selfGroupId, - .nextIndex = changer->lastIndex, - .matchIndex = 0, - .state = PROGRESS_STATE_PROBE, - .pendingSnapshotIndex = 0, - .probeSent = false, - .inflights = syncRaftOpenInflights(changer->tracker->maxInflightMsgs), - .isLearner = isLearner, - // When a node is first added, we should mark it as recently active. - // Otherwise, CheckQuorum may cause us to step down if it is invoked - // before the added node has had a chance to communicate with us. - .recentActive = true, - .refCount = 0, - }; - - syncRaftAddToProgressMap(progressMap, pProgress); -} - -// checkInvariants makes sure that the config and progress are compatible with -// each other. This is used to check both what the Changer is initialized with, -// as well as what it returns. -static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { - int ret = syncRaftCheckTrackerConfigInProgress(config, progressMap); - if (ret != 0) { - return ret; - } - - // Any staged learner was staged because it could not be directly added due - // to a conflicting voter in the outgoing config. - SyncNodeId* pNodeId = NULL; - while (!syncRaftIterateNodeMap(&config->learnersNext, pNodeId)) { - SyncNodeId nodeId = *pNodeId; - if (!syncRaftJointConfigInOutgoing(&config->voters, nodeId)) { - syncError("[%d] is in LearnersNext, but not outgoing", nodeId); - return -1; - } - SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, nodeId); - assert(progress); - assert(progress->id == nodeId); - if (progress->isLearner) { - syncError("[%d:%d] is in LearnersNext, but is already marked as learner", progress->groupId, nodeId); - return -1; - } - } - - // Conversely Learners and Voters doesn't intersect at all. - pNodeId = NULL; - while (!syncRaftIterateNodeMap(&config->learners, pNodeId)) { - SyncNodeId nodeId = *pNodeId; - if (syncRaftJointConfigInOutgoing(&config->voters, nodeId)) { - syncError("%d is in Learners and outgoing", nodeId); - return -1; - } - SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, nodeId); - assert(progress); - assert(progress->id == nodeId); - - if (!progress->isLearner) { - syncError("[%d:%d] is in Learners, but is not marked as learner", progress->groupId, nodeId); - return -1; - } - } - - if (!hasJointConfig(config)) { - // We enforce that empty maps are nil instead of zero. - if (syncRaftNodeMapSize(&config->learnersNext) > 0) { - syncError("cfg.LearnersNext must be nil when not joint"); - return -1; - } - if (config->autoLeave) { - syncError("AutoLeave must be false when not joint"); - return -1; - } - } - - return 0; -} - -// checkAndCopy copies the tracker's config and progress map (deeply enough for -// the purposes of the Changer) and returns those copies. It returns an error -// if checkInvariants does. -static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { - syncRaftCopyTrackerConfig(&changer->tracker->config, config); - syncRaftClearProgressMap(progressMap); - - SSyncRaftProgress* pProgress = NULL; - while (!syncRaftIterateProgressMap(&changer->tracker->progressMap, pProgress)) { - syncRaftAddToProgressMap(progressMap, pProgress); - } - - return checkAndReturn(config, progressMap); -} - -// checkAndReturn calls checkInvariants on the input and returns either the -// resulting error or the input. -static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { - if (checkInvariants(config, progressMap) != 0) { - return -1; - } - - return 0; -} - -static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config) { - return !syncRaftJointConfigIsOutgoingEmpty(&config->voters); -} - -// symdiff returns the count of the symmetric difference between the sets of -// uint64s, i.e. len( (l - r) \union (r - l)). -static int symDiff(const SSyncRaftNodeMap* l, const SSyncRaftNodeMap* r) { - int n; - int i; - int j0, j1; - const SSyncRaftNodeMap* pairs[2][2] = { - {l, r}, // count elems in l but not in r - {r, l}, // count elems in r but not in l - }; - - for (n = 0, i = 0; i < 2; ++i) { - const SSyncRaftNodeMap** pp = pairs[i]; - - const SSyncRaftNodeMap* p0 = pp[0]; - const SSyncRaftNodeMap* p1 = pp[1]; - SyncNodeId* pNodeId; - while (!syncRaftIterateNodeMap(p0, pNodeId)) { - if (!syncRaftIsInNodeMap(p1, *pNodeId)) { - n+=1; - } - } - } - - return n; -} - -// nilAwareDelete deletes from a map, nil'ing the map itself if it is empty after. -static void nilAwareDelete(SSyncRaftNodeMap* nodeMap, SyncNodeId id) { - syncRaftRemoveFromNodeMap(nodeMap, id); -} - -// nilAwareAdd populates a map entry, creating the map if necessary. -static void nilAwareAdd(SSyncRaftNodeMap* nodeMap, SyncNodeId id) { - syncRaftAddToNodeMap(nodeMap, id); -} \ No newline at end of file diff --git a/source/libs/sync/src/sync_raft_election.c b/source/libs/sync/src/sync_raft_election.c deleted file mode 100644 index fe2e0fd9d318f8b4788012bac6a2071174c1d1a3..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/sync_raft_election.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "syncInt.h" -#include "raft.h" -#include "raft_log.h" -#include "raft_message.h" -#include "sync_raft_progress_tracker.h" - -void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) { - if (pRaft->state == TAOS_SYNC_STATE_LEADER) { - syncDebug("[%d:%d] ignoring RAFT_MSG_INTERNAL_ELECTION because already leader", pRaft->selfGroupId, pRaft->selfId); - return; - } - - if (!syncRaftIsPromotable(pRaft)) { - syncWarn("[%d:%d] is unpromotable and can not syncRaftCampaign", pRaft->selfGroupId, pRaft->selfId); - return; - } - - // if there is pending uncommitted config,cannot start election - if (syncRaftLogNumOfPendingConf(pRaft->log) > 0 && syncRaftHasUnappliedLog(pRaft->log)) { - syncWarn("[%d:%d] cannot syncRaftStartElection at term %" PRId64 " since there are still pending configuration changes to apply", - pRaft->selfGroupId, pRaft->selfId, pRaft->term); - return; - } - - syncInfo("[%d:%d] is starting a new election at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); - - syncRaftCampaign(pRaft, cType); -} - -// syncRaftCampaign transitions the raft instance to candidate state. This must only be -// called after verifying that this is a legitimate transition. -void syncRaftCampaign(SSyncRaft* pRaft, ESyncRaftElectionType cType) { - bool preVote; - SyncTerm term; - - if (syncRaftIsPromotable(pRaft)) { - syncDebug("[%d:%d] is unpromotable; syncRaftCampaign() should have been called", pRaft->selfGroupId, pRaft->selfId); - return; - } - - if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { - syncRaftBecomePreCandidate(pRaft); - preVote = true; - // PreVote RPCs are sent for the next term before we've incremented r.Term. - term = pRaft->term + 1; - } else { - syncRaftBecomeCandidate(pRaft); - term = pRaft->term; - preVote = false; - } - - int quorum = syncRaftQuorum(pRaft); - ESyncRaftVoteResult result = syncRaftPollVote(pRaft, pRaft->selfId, preVote, true, NULL, NULL); - if (result == SYNC_RAFT_VOTE_WON) { - // We won the election after voting for ourselves (which must mean that - // this is a single-node cluster). Advance to the next state. - if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) { - syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION); - } else { - syncRaftBecomeLeader(pRaft); - } - return; - } - - // broadcast vote message to other peers - int i; - SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); - SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log); - SSyncRaftNodeMap nodeMap; - syncRaftJointConfigIDs(&pRaft->tracker->config.voters, &nodeMap); - SyncNodeId *pNodeId = NULL; - while (!syncRaftIterateNodeMap(&nodeMap, pNodeId)) { - SyncNodeId nodeId = *pNodeId; - if (nodeId == SYNC_NON_NODE_ID) { - continue; - } - - if (nodeId == pRaft->selfId) { - continue; - } - - SNodeInfo* pNode = syncRaftGetNodeById(pRaft, nodeId); - if (pNode == NULL) { - continue; - } - - SSyncMessage* pMsg = syncNewVoteMsg(pRaft->selfGroupId, pRaft->selfId, - term, cType, lastIndex, lastTerm); - if (pMsg == NULL) { - continue; - } - - syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %" PRId64 "] sent vote request to %d at term %" PRId64 "", - pRaft->selfGroupId, pRaft->selfId, lastTerm, - lastIndex, nodeId, pRaft->term); - - pRaft->io.send(pMsg, pNode); - } -} \ No newline at end of file diff --git a/source/libs/sync/src/sync_raft_impl.c b/source/libs/sync/src/sync_raft_impl.c deleted file mode 100644 index 3050bb2c8aa9186df4c9f16db2c8a5a883476533..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/sync_raft_impl.c +++ /dev/null @@ -1,369 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "raft.h" -#include "sync_raft_impl.h" -#include "raft_log.h" -#include "raft_replication.h" -#include "sync_raft_progress_tracker.h" -#include "syncInt.h" - -static int convertClear(SSyncRaft* pRaft); -static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg); -static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg); -static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg); - -static bool increaseUncommittedSize(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n); - -static int triggerAll(SSyncRaft* pRaft); - -static void tickElection(SSyncRaft* pRaft); -static void tickHeartbeat(SSyncRaft* pRaft); - -static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n); - -static void abortLeaderTransfer(SSyncRaft* pRaft); - -static void resetRaft(SSyncRaft* pRaft, SyncTerm term); - -void syncRaftBecomeFollower(SSyncRaft* pRaft, SyncTerm term, SyncNodeId leaderId) { - convertClear(pRaft); - - pRaft->stepFp = stepFollower; - resetRaft(pRaft, term); - pRaft->tickFp = tickElection; - pRaft->leaderId = leaderId; - pRaft->state = TAOS_SYNC_STATE_FOLLOWER; - syncInfo("[%d:%d] became followe at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); -} - -void syncRaftBecomePreCandidate(SSyncRaft* pRaft) { - convertClear(pRaft); - - /** - * Becoming a pre-candidate changes our step functions and state, - * but doesn't change anything else. In particular it does not increase - * r.Term or change r.Vote. - **/ - pRaft->stepFp = stepCandidate; - pRaft->tickFp = tickElection; - pRaft->state = TAOS_SYNC_STATE_CANDIDATE; - pRaft->candidateState.inPreVote = true; - syncInfo("[%d:%d] became pre-candidate at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); -} - -void syncRaftBecomeCandidate(SSyncRaft* pRaft) { - convertClear(pRaft); - - pRaft->candidateState.inPreVote = false; - pRaft->stepFp = stepCandidate; - // become candidate make term+1 - resetRaft(pRaft, pRaft->term + 1); - pRaft->tickFp = tickElection; - pRaft->voteFor = pRaft->selfId; - pRaft->state = TAOS_SYNC_STATE_CANDIDATE; - syncInfo("[%d:%d] became candidate at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); -} - -void syncRaftBecomeLeader(SSyncRaft* pRaft) { - assert(pRaft->state != TAOS_SYNC_STATE_FOLLOWER); - - pRaft->stepFp = stepLeader; - resetRaft(pRaft, pRaft->term); - pRaft->leaderId = pRaft->leaderId; - pRaft->state = TAOS_SYNC_STATE_LEADER; - - SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(&pRaft->tracker->progressMap, pRaft->selfId); - assert(progress != NULL); - // Followers enter replicate mode when they've been successfully probed - // (perhaps after having received a snapshot as a result). The leader is - // trivially in this state. Note that r.reset() has initialized this - // progress with the last index already. - syncRaftProgressBecomeReplicate(progress); - - // Conservatively set the pendingConfIndex to the last index in the - // log. There may or may not be a pending config change, but it's - // safe to delay any future proposals until we commit all our - // pending log entries, and scanning the entire tail of the log - // could be expensive. - SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); - pRaft->pendingConfigIndex = lastIndex; - - // after become leader, send a no-op log - SSyncRaftEntry* entry = (SSyncRaftEntry*)malloc(sizeof(SSyncRaftEntry)); - if (entry == NULL) { - return; - } - *entry = (SSyncRaftEntry) { - .buffer = (SSyncBuffer) { - .data = NULL, - .len = 0, - } - }; - appendEntries(pRaft, entry, 1); - //syncRaftTriggerHeartbeat(pRaft); - syncInfo("[%d:%d] became leader at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term); -} - -void syncRaftTriggerHeartbeat(SSyncRaft* pRaft) { - triggerAll(pRaft); -} - -void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft) { - // electionTimeoutTick in [3,6] tick - pRaft->randomizedElectionTimeout = taosRand() % 4 + 3; -} - -bool syncRaftIsPromotable(SSyncRaft* pRaft) { - return pRaft->selfId != SYNC_NON_NODE_ID; -} - -bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft) { - return pRaft->electionElapsed >= pRaft->randomizedElectionTimeout; -} - -int syncRaftQuorum(SSyncRaft* pRaft) { - return 0; - //return pRaft->cluster.replica / 2 + 1; -} - -ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id, - bool preVote, bool grant, - int* rejected, int *granted) { - SNodeInfo* pNode = syncRaftGetNodeById(pRaft, id); - if (pNode == NULL) { - return true; - } - - if (grant) { - syncInfo("[%d:%d] received grant (pre-vote %d) from %d at term %" PRId64 "", - pRaft->selfGroupId, pRaft->selfId, preVote, id, pRaft->term); - } else { - syncInfo("[%d:%d] received rejection (pre-vote %d) from %d at term %" PRId64 "", - pRaft->selfGroupId, pRaft->selfId, preVote, id, pRaft->term); - } - - syncRaftRecordVote(pRaft->tracker, pNode->nodeId, grant); - return syncRaftTallyVotes(pRaft->tracker, rejected, granted); -} -/* - if (accept) { - syncInfo("[%d:%d] received (pre-vote %d) from %d at term %" PRId64 "", - pRaft->selfGroupId, pRaft->selfId, preVote, id, pRaft->term); - } else { - syncInfo("[%d:%d] received rejection from %d at term %" PRId64 "", - pRaft->selfGroupId, pRaft->selfId, id, pRaft->term); - } - - int voteIndex = syncRaftGetNodeById(pRaft, id); - assert(voteIndex < pRaft->cluster.replica && voteIndex >= 0); - assert(pRaft->candidateState.votes[voteIndex] == SYNC_RAFT_VOTE_RESP_UNKNOWN); - - pRaft->candidateState.votes[voteIndex] = accept ? SYNC_RAFT_VOTE_RESP_GRANT : SYNC_RAFT_VOTE_RESP_REJECT; - int granted = 0, rejected = 0; - int i; - for (i = 0; i < pRaft->cluster.replica; ++i) { - if (pRaft->candidateState.votes[i] == SYNC_RAFT_VOTE_RESP_GRANT) granted++; - else if (pRaft->candidateState.votes[i] == SYNC_RAFT_VOTE_RESP_REJECT) rejected++; - } - - if (rejectNum) *rejectNum = rejected; - return granted; -*/ - -void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState) { - SyncIndex commitIndex = serverState->commitIndex; - SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); - - if (commitIndex < pRaft->log->commitIndex || commitIndex > lastIndex) { - syncFatal("[%d:%d] state.commit %"PRId64" is out of range [%" PRId64 ",%" PRId64 "", - pRaft->selfGroupId, pRaft->selfId, commitIndex, pRaft->log->commitIndex, lastIndex); - return; - } - - pRaft->log->commitIndex = commitIndex; - pRaft->term = serverState->term; - pRaft->voteFor = serverState->voteFor; -} - -static void visitProgressSendAppend(SSyncRaftProgress* progress, void* arg) { - SSyncRaft* pRaft = (SSyncRaft*)arg; - if (pRaft->selfId == progress->id) { - return; - } - - syncRaftMaybeSendAppend(arg, progress, true); -} - -// bcastAppend sends RPC, with entries to all peers that are not up-to-date -// according to the progress recorded in r.prs. -void syncRaftBroadcastAppend(SSyncRaft* pRaft) { - syncRaftProgressVisit(pRaft->tracker, visitProgressSendAppend, pRaft); -} - -SNodeInfo* syncRaftGetNodeById(SSyncRaft *pRaft, SyncNodeId id) { - SNodeInfo **ppNode = taosHashGet(pRaft->nodeInfoMap, &id, sizeof(SyncNodeId*)); - if (ppNode != NULL) { - return *ppNode; - } - - return NULL; -} - -static int convertClear(SSyncRaft* pRaft) { - -} - -static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - - return 0; -} - -static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - /** - * Only handle vote responses corresponding to our candidacy (while in - * StateCandidate, we may get stale MsgPreVoteResp messages in this term from - * our pre-candidate state). - **/ - ESyncRaftMessageType msgType = pMsg->msgType; - - if (msgType == RAFT_MSG_INTERNAL_PROP) { - return 0; - } - - if (msgType == RAFT_MSG_VOTE_RESP) { - syncRaftHandleVoteRespMessage(pRaft, pMsg); - return 0; - } else if (msgType == RAFT_MSG_APPEND) { - syncRaftBecomeFollower(pRaft, pMsg->term, pMsg->from); - syncRaftHandleAppendEntriesMessage(pRaft, pMsg); - } - return 0; -} - -static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg) { - convertClear(pRaft); - return 0; -} - -// tickElection is run by followers and candidates after r.electionTimeout. -static void tickElection(SSyncRaft* pRaft) { - pRaft->electionElapsed += 1; - - if (!syncRaftIsPromotable(pRaft)) { - return; - } - - if (!syncRaftIsPastElectionTimeout(pRaft)) { - return; - } - - // election timeout - pRaft->electionElapsed = 0; - SSyncMessage msg; - syncRaftStep(pRaft, syncInitElectionMsg(&msg, pRaft->selfId)); -} - -// tickHeartbeat is run by leaders to send a MsgBeat after r.heartbeatTimeout. -static void tickHeartbeat(SSyncRaft* pRaft) { - -} - -// TODO -static bool increaseUncommittedSize(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) { - return false; -} - -static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) { - SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log); - SyncTerm term = pRaft->term; - int i; - - for (i = 0; i < n; ++i) { - entries[i].term = term; - entries[i].index = lastIndex + 1 + i; - } - - // Track the size of this uncommitted proposal. - if (!increaseUncommittedSize(pRaft, entries, n)) { - // Drop the proposal. - return; - } - - syncRaftLogAppend(pRaft->log, entries, n); - - SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(&pRaft->tracker->progressMap, pRaft->selfId); - assert(progress != NULL); - syncRaftProgressMaybeUpdate(progress, lastIndex); - // Regardless of syncRaftMaybeCommit's return, our caller will call bcastAppend. - syncRaftMaybeCommit(pRaft); -} - -// syncRaftMaybeCommit attempts to advance the commit index. Returns true if -// the commit index changed (in which case the caller should call -// r.bcastAppend). -bool syncRaftMaybeCommit(SSyncRaft* pRaft) { - - return true; -} - -/** - * trigger I/O requests for newly appended log entries or heartbeats. - **/ -static int triggerAll(SSyncRaft* pRaft) { - #if 0 - assert(pRaft->state == TAOS_SYNC_STATE_LEADER); - int i; - - for (i = 0; i < pRaft->cluster.replica; ++i) { - if (i == pRaft->cluster.selfIndex) { - continue; - } - - syncRaftMaybeSendAppend(pRaft, pRaft->tracker->progressMap.progress[i], true); - } - #endif - return 0; -} - -static void abortLeaderTransfer(SSyncRaft* pRaft) { - pRaft->leadTransferee = SYNC_NON_NODE_ID; -} - -static void resetProgress(SSyncRaftProgress* progress, void* arg) { - syncRaftResetProgress((SSyncRaft*)arg, progress); -} - -static void resetRaft(SSyncRaft* pRaft, SyncTerm term) { - if (pRaft->term != term) { - pRaft->term = term; - pRaft->voteFor = SYNC_NON_NODE_ID; - } - - pRaft->leaderId = SYNC_NON_NODE_ID; - - pRaft->electionElapsed = 0; - pRaft->heartbeatElapsed = 0; - - syncRaftRandomizedElectionTimeout(pRaft); - - abortLeaderTransfer(pRaft); - - syncRaftResetVotes(pRaft->tracker); - syncRaftProgressVisit(pRaft->tracker, resetProgress, pRaft); - - pRaft->pendingConfigIndex = 0; - pRaft->uncommittedSize = 0; -} \ No newline at end of file diff --git a/source/libs/sync/src/sync_raft_inflights.c b/source/libs/sync/src/sync_raft_inflights.c deleted file mode 100644 index 7b97aca014c41847492ab7507ddb3ce5c92fef8b..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/sync_raft_inflights.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "sync_raft_inflights.h" - -SSyncRaftInflights* syncRaftOpenInflights(int size) { - SSyncRaftInflights* inflights = (SSyncRaftInflights*)malloc(sizeof(SSyncRaftInflights)); - if (inflights == NULL) { - return NULL; - } - SyncIndex* buffer = (SyncIndex*)malloc(sizeof(SyncIndex) * size); - if (buffer == NULL) { - free(inflights); - return NULL; - } - *inflights = (SSyncRaftInflights) { - .buffer = buffer, - .count = 0, - .size = 0, - .start = 0, - }; - - return inflights; -} - -void syncRaftCloseInflights(SSyncRaftInflights* inflights) { - free(inflights->buffer); - free(inflights); -} - -// Add notifies the Inflights that a new message with the given index is being -// dispatched. Full() must be called prior to Add() to verify that there is room -// for one more message, and consecutive calls to add Add() must provide a -// monotonic sequence of indexes. -void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex) { - assert(!syncRaftInflightFull(inflights)); - - int next = inflights->start + inflights->count; - int size = inflights->size; - - if (next >= size) { - next -= size; - } - - inflights->buffer[next] = inflightIndex; - inflights->count++; -} - -// FreeLE frees the inflights smaller or equal to the given `to` flight. -void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex) { - if (inflights->count == 0 || toIndex < inflights->buffer[inflights->start]) { - // out of the left side of the window - return; - } - - int i, idx; - for (i = 0, idx = inflights->start; i < inflights->count; i++) { - if (toIndex < inflights->buffer[idx]) { // found the first large inflight - break; - } - - // increase index and maybe rotate - int size = inflights->size; - idx++; - if (idx >= size) { - idx -= size; - } - } - - // free i inflights and set new start index - inflights->count -= i; - inflights->start = idx; - assert(inflights->count >= 0); - if (inflights->count == 0) { - // inflights is empty, reset the start index so that we don't grow the - // buffer unnecessarily. - inflights->start = 0; - } -} - -// FreeFirstOne releases the first inflight. This is a no-op if nothing is -// inflight. -void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights) { - syncRaftInflightFreeLE(inflights, inflights->buffer[inflights->start]); -} diff --git a/source/libs/sync/src/sync_raft_node_map.c b/source/libs/sync/src/sync_raft_node_map.c deleted file mode 100644 index 642eebe65bbc858bf2ffc0e1ceb246d50fd728f7..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/sync_raft_node_map.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "sync_raft_node_map.h" -#include "sync_type.h" -#include "sync_raft_progress.h" - -void syncRaftInitNodeMap(SSyncRaftNodeMap* nodeMap) { - nodeMap->nodeIdMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); -} - -void syncRaftFreeNodeMap(SSyncRaftNodeMap* nodeMap) { - taosHashCleanup(nodeMap->nodeIdMap); -} - -void syncRaftClearNodeMap(SSyncRaftNodeMap* nodeMap) { - taosHashClear(nodeMap->nodeIdMap); -} - -bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId) { - SyncNodeId** ppId = (SyncNodeId**)taosHashGet(nodeMap->nodeIdMap, &nodeId, sizeof(SyncNodeId*)); - if (ppId == NULL) { - return false; - } - return true; -} - -void syncRaftCopyNodeMap(SSyncRaftNodeMap* from, SSyncRaftNodeMap* to) { - SyncNodeId *pId = NULL; - while (!syncRaftIterateNodeMap(from, pId)) { - taosHashPut(to->nodeIdMap, &pId, sizeof(SyncNodeId*), &pId, sizeof(SyncNodeId*)); - } -} - -bool syncRaftIterateNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId *pId) { - SyncNodeId **ppId = taosHashIterate(nodeMap->nodeIdMap, pId); - if (ppId == NULL) { - return true; - } - - *pId = *(*ppId); - return false; -} - -bool syncRaftIsAllNodeInProgressMap(SSyncRaftNodeMap* nodeMap, SSyncRaftProgressMap* progressMap) { - SyncNodeId *pId = NULL; - while (!syncRaftIterateNodeMap(nodeMap, pId)) { - if (!syncRaftIsInProgressMap(progressMap, *pId)) { - return false; - } - } - - return true; -} - -void syncRaftUnionNodeMap(SSyncRaftNodeMap* nodeMap, SSyncRaftNodeMap* to) { - syncRaftCopyNodeMap(nodeMap, to); -} - -void syncRaftAddToNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId) { - taosHashPut(nodeMap->nodeIdMap, &nodeId, sizeof(SyncNodeId*), &nodeId, sizeof(SyncNodeId*)); -} - -void syncRaftRemoveFromNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId) { - taosHashRemove(nodeMap->nodeIdMap, &nodeId, sizeof(SyncNodeId*)); -} - -int32_t syncRaftNodeMapSize(const SSyncRaftNodeMap* nodeMap) { - return taosHashGetSize(nodeMap->nodeIdMap); -} \ No newline at end of file diff --git a/source/libs/sync/src/sync_raft_progress.c b/source/libs/sync/src/sync_raft_progress.c deleted file mode 100644 index 6577972b29251ee44209f6be0c41ead259e08a0e..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/sync_raft_progress.c +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "raft.h" -#include "raft_log.h" -#include "sync_raft_progress.h" -#include "sync_raft_progress_tracker.h" -#include "sync.h" -#include "syncInt.h" - -static void copyProgress(SSyncRaftProgress* progress, void* arg); - -static void refProgress(SSyncRaftProgress* progress); -static void unrefProgress(SSyncRaftProgress* progress, void*); - -static void resetProgressState(SSyncRaftProgress* progress, ESyncRaftProgressState state); -static void probeAcked(SSyncRaftProgress* progress); - -static void resumeProgress(SSyncRaftProgress* progress); - -void syncRaftResetProgress(SSyncRaft* pRaft, SSyncRaftProgress* progress) { - if (progress->inflights) { - syncRaftCloseInflights(progress->inflights); - } - SSyncRaftInflights* inflights = syncRaftOpenInflights(pRaft->tracker->maxInflightMsgs); - if (inflights == NULL) { - return; - } - *progress = (SSyncRaftProgress) { - .matchIndex = progress->id == pRaft->selfId ? syncRaftLogLastIndex(pRaft->log) : 0, - .nextIndex = syncRaftLogLastIndex(pRaft->log) + 1, - .inflights = inflights, - .isLearner = false, - .state = PROGRESS_STATE_PROBE, - }; -} - -// MaybeUpdate is called when an MsgAppResp arrives from the follower, with the -// index acked by it. The method returns false if the given n index comes from -// an outdated message. Otherwise it updates the progress and returns true. -bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex) { - bool updated = false; - - if (progress->matchIndex < lastIndex) { - progress->matchIndex = lastIndex; - updated = true; - probeAcked(progress); - } - - progress->nextIndex = TMAX(progress->nextIndex, lastIndex + 1); - - return updated; -} - -// MaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The -// arguments are the index of the append message rejected by the follower, and -// the hint that we want to decrease to. -// -// Rejections can happen spuriously as messages are sent out of order or -// duplicated. In such cases, the rejection pertains to an index that the -// Progress already knows were previously acknowledged, and false is returned -// without changing the Progress. -// -// If the rejection is genuine, Next is lowered sensibly, and the Progress is -// cleared for sending log entries. -bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress, - SyncIndex rejected, SyncIndex matchHint) { - if (progress->state == PROGRESS_STATE_REPLICATE) { - // The rejection must be stale if the progress has matched and "rejected" - // is smaller than "match". - if (rejected <= progress->matchIndex) { - syncDebug("match index is up to date,ignore"); - return false; - } - - // Directly decrease next to match + 1. - // - // TODO(tbg): why not use matchHint if it's larger? - progress->nextIndex = progress->matchIndex + 1; - return true; - } - - // The rejection must be stale if "rejected" does not match next - 1. This - // is because non-replicating followers are probed one entry at a time. - if (rejected != progress->nextIndex - 1) { - syncDebug("rejected index %" PRId64 " different from next index %" PRId64 " -> ignore" - , rejected, progress->nextIndex); - return false; - } - - progress->nextIndex = TMAX(TMIN(rejected, matchHint + 1), 1); - - progress->probeSent = false; - return true; -} - -// IsPaused returns whether sending log entries to this node has been throttled. -// This is done when a node has rejected recent MsgApps, is currently waiting -// for a snapshot, or has reached the MaxInflightMsgs limit. In normal -// operation, this is false. A throttled node will be contacted less frequently -// until it has reached a state in which it's able to accept a steady stream of -// log entries again. -bool syncRaftProgressIsPaused(SSyncRaftProgress* progress) { - switch (progress->state) { - case PROGRESS_STATE_PROBE: - return progress->probeSent; - case PROGRESS_STATE_REPLICATE: - return syncRaftInflightFull(progress->inflights); - case PROGRESS_STATE_SNAPSHOT: - return true; - default: - syncFatal("error sync state:%d", progress->state); - } -} - -SSyncRaftProgress* syncRaftFindProgressByNodeId(const SSyncRaftProgressMap* progressMap, SyncNodeId id) { - SSyncRaftProgress** ppProgress = (SSyncRaftProgress**)taosHashGet(progressMap->progressMap, &id, sizeof(SyncNodeId*)); - if (ppProgress == NULL) { - return NULL; - } - - return *ppProgress; -} - -int syncRaftAddToProgressMap(SSyncRaftProgressMap* progressMap, SSyncRaftProgress* progress) { - refProgress(progress); - taosHashPut(progressMap->progressMap, &progress->id, sizeof(SyncNodeId*), &progress, sizeof(SSyncRaftProgress*)); -} - -void syncRaftRemoveFromProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id) { - SSyncRaftProgress** ppProgress = (SSyncRaftProgress**)taosHashGet(progressMap->progressMap, &id, sizeof(SyncNodeId*)); - if (ppProgress == NULL) { - return; - } - unrefProgress(*ppProgress, NULL); - - taosHashRemove(progressMap->progressMap, &id, sizeof(SyncNodeId*)); -} - -bool syncRaftIsInProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id) { - return taosHashGet(progressMap->progressMap, &id, sizeof(SyncNodeId*)) != NULL; -} - -bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress) { - return syncRaftLogLastIndex(pRaft->log) + 1 == progress->nextIndex; -} - -// BecomeProbe transitions into StateProbe. Next is reset to Match+1 or, -// optionally and if larger, the index of the pending snapshot. -void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress) { - // If the original state is StateSnapshot, progress knows that - // the pending snapshot has been sent to this peer successfully, then - // probes from pendingSnapshot + 1. - if (progress->state == PROGRESS_STATE_SNAPSHOT) { - SyncIndex pendingSnapshotIndex = progress->pendingSnapshotIndex; - resetProgressState(progress, PROGRESS_STATE_PROBE); - progress->nextIndex = TMAX(progress->matchIndex + 1, pendingSnapshotIndex + 1); - } else { - resetProgressState(progress, PROGRESS_STATE_PROBE); - progress->nextIndex = progress->matchIndex + 1; - } -} - -// BecomeReplicate transitions into StateReplicate, resetting Next to Match+1. -void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress) { - resetProgressState(progress, PROGRESS_STATE_REPLICATE); - progress->nextIndex = progress->matchIndex + 1; -} - -// BecomeSnapshot moves the Progress to StateSnapshot with the specified pending -// snapshot index. -void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex) { - resetProgressState(progress, PROGRESS_STATE_SNAPSHOT); - progress->pendingSnapshotIndex = snapshotIndex; -} - -void syncRaftCopyProgress(const SSyncRaftProgress* progress, SSyncRaftProgress* out) { - memcpy(out, progress, sizeof(SSyncRaftProgress)); -} - -void syncRaftInitProgressMap(SSyncRaftProgressMap* progressMap) { - progressMap->progressMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); -} - -void syncRaftFreeProgressMap(SSyncRaftProgressMap* progressMap) { - syncRaftVisitProgressMap(progressMap, unrefProgress, NULL); - taosHashCleanup(progressMap->progressMap); -} - -void syncRaftClearProgressMap(SSyncRaftProgressMap* progressMap) { - taosHashClear(progressMap->progressMap); -} - -void syncRaftCopyProgressMap(SSyncRaftProgressMap* from, SSyncRaftProgressMap* to) { - syncRaftVisitProgressMap(from, copyProgress, to); -} - -bool syncRaftIterateProgressMap(const SSyncRaftProgressMap* progressMap, SSyncRaftProgress *pProgress) { - SSyncRaftProgress **ppProgress = taosHashIterate(progressMap->progressMap, pProgress); - if (ppProgress == NULL) { - return true; - } - - *pProgress = *(*ppProgress); - return false; -} - -bool syncRaftVisitProgressMap(SSyncRaftProgressMap* progressMap, visitProgressFp fp, void* arg) { - SSyncRaftProgress *pProgress; - while (!syncRaftIterateProgressMap(progressMap, pProgress)) { - fp(pProgress, arg); - } -} - -static void copyProgress(SSyncRaftProgress* progress, void* arg) { - assert(progress->refCount > 0); - SSyncRaftProgressMap* to = (SSyncRaftProgressMap*)arg; - syncRaftAddToProgressMap(to, progress); -} - -static void refProgress(SSyncRaftProgress* progress) { - progress->refCount += 1; -} - -static void unrefProgress(SSyncRaftProgress* progress, void* arg) { - (void)arg; - progress->refCount -= 1; - assert(progress->refCount >= 0); - if (progress->refCount == 0) { - free(progress); - } -} - -// ResetState moves the Progress into the specified State, resetting ProbeSent, -// PendingSnapshot, and Inflights. -static void resetProgressState(SSyncRaftProgress* progress, ESyncRaftProgressState state) { - progress->probeSent = false; - progress->pendingSnapshotIndex = 0; - progress->state = state; - syncRaftInflightReset(progress->inflights); -} - -// ProbeAcked is called when this peer has accepted an append. It resets -// ProbeSent to signal that additional append messages should be sent without -// further delay. -static void probeAcked(SSyncRaftProgress* progress) { - progress->probeSent = false; -} diff --git a/source/libs/sync/src/sync_raft_progress_tracker.c b/source/libs/sync/src/sync_raft_progress_tracker.c deleted file mode 100644 index e0b4afae21221f50a2f17de3ef66c2e747a49707..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/sync_raft_progress_tracker.c +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "raft.h" -#include "sync_const.h" -#include "sync_raft_progress_tracker.h" -#include "sync_raft_proto.h" - -SSyncRaftProgressTracker* syncRaftOpenProgressTracker(SSyncRaft* pRaft) { - SSyncRaftProgressTracker* tracker = (SSyncRaftProgressTracker*)malloc(sizeof(SSyncRaftProgressTracker)); - if (tracker == NULL) { - return NULL; - } - - tracker->votesMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); - - syncRaftInitTrackConfig(&tracker->config); - tracker->pRaft = pRaft; - tracker->maxInflightMsgs = kSyncRaftMaxInflghtMsgs; - - return tracker; -} - -void syncRaftInitTrackConfig(SSyncRaftProgressTrackerConfig* config) { - syncRaftInitNodeMap(&config->learners); - syncRaftInitNodeMap(&config->learnersNext); - syncRaftInitQuorumJointConfig(&config->voters); - config->autoLeave = false; -} - -void syncRaftFreeTrackConfig(SSyncRaftProgressTrackerConfig* config) { - syncRaftFreeNodeMap(&config->learners); - syncRaftFreeNodeMap(&config->learnersNext); - syncRaftFreeNodeMap(&config->voters.incoming); - syncRaftFreeNodeMap(&config->voters.outgoing); -} - -// ResetVotes prepares for a new round of vote counting via recordVote. -void syncRaftResetVotes(SSyncRaftProgressTracker* tracker) { - taosHashClear(tracker->votesMap); -} - -void syncRaftProgressVisit(SSyncRaftProgressTracker* tracker, visitProgressFp visit, void* arg) { - syncRaftVisitProgressMap(&tracker->progressMap, visit, arg); -} - -// RecordVote records that the node with the given id voted for this Raft -// instance if v == true (and declined it otherwise). -void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, SyncNodeId id, bool grant) { - ESyncRaftVoteType* pType = taosHashGet(tracker->votesMap, &id, sizeof(SyncNodeId*)); - if (pType != NULL) { - return; - } - - taosHashPut(tracker->votesMap, &id, sizeof(SyncNodeId), &grant, sizeof(bool*)); -} - -void syncRaftCopyTrackerConfig(const SSyncRaftProgressTrackerConfig* from, SSyncRaftProgressTrackerConfig* to) { - memcpy(to, from, sizeof(SSyncRaftProgressTrackerConfig)); -} - -int syncRaftCheckTrackerConfigInProgress(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { - // NB: intentionally allow the empty config. In production we'll never see a - // non-empty config (we prevent it from being created) but we will need to - // be able to *create* an initial config, for example during bootstrap (or - // during tests). Instead of having to hand-code this, we allow - // transitioning from an empty config into any other legal and non-empty - // config. - if (!syncRaftIsAllNodeInProgressMap(&config->voters.incoming, progressMap)) return -1; - if (!syncRaftIsAllNodeInProgressMap(&config->voters.outgoing, progressMap)) return -1; - if (!syncRaftIsAllNodeInProgressMap(&config->learners, progressMap)) return -1; - if (!syncRaftIsAllNodeInProgressMap(&config->learnersNext, progressMap)) return -1; - return 0; -} - -// TallyVotes returns the number of granted and rejected Votes, and whether the -// election outcome is known. -ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* rejected, int *granted) { - SSyncRaftProgress* progress = NULL; - int r, g; - - // Make sure to populate granted/rejected correctly even if the Votes slice - // contains members no longer part of the configuration. This doesn't really - // matter in the way the numbers are used (they're informational), but might - // as well get it right. - while (!syncRaftIterateProgressMap(&tracker->progressMap, progress)) { - if (progress->id == SYNC_NON_NODE_ID) { - continue; - } - - bool* v = taosHashGet(tracker->votesMap, &progress->id, sizeof(SyncNodeId*)); - if (v == NULL) { - continue; - } - - if (*v) { - g++; - } else { - r++; - } - } - - if (rejected) *rejected = r; - if (granted) *granted = g; - return syncRaftVoteResult(&(tracker->config.voters), tracker->votesMap); -} - -void syncRaftConfigState(SSyncRaftProgressTracker* tracker, SSyncConfigState* cs) { - syncRaftCopyNodeMap(&tracker->config.voters.incoming, &cs->voters); - syncRaftCopyNodeMap(&tracker->config.voters.outgoing, &cs->votersOutgoing); - syncRaftCopyNodeMap(&tracker->config.learners, &cs->learners); - syncRaftCopyNodeMap(&tracker->config.learnersNext, &cs->learnersNext); - cs->autoLeave = tracker->config.autoLeave; -} - -static void matchAckIndexer(SyncNodeId id, void* arg, SyncIndex* index) { - SSyncRaftProgressTracker* tracker = (SSyncRaftProgressTracker*)arg; - SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(&tracker->progressMap, id); - if (progress == NULL) { - *index = 0; - return; - } - *index = progress->matchIndex; -} - -// Committed returns the largest log index known to be committed based on what -// the voting members of the group have acknowledged. -SyncIndex syncRaftCommittedIndex(SSyncRaftProgressTracker* tracker) { - return syncRaftJointConfigCommittedIndex(&tracker->config.voters, matchAckIndexer, tracker); -} - -static void visitProgressActive(SSyncRaftProgress* progress, void* arg) { - SHashObj* votesMap = (SHashObj*)arg; - taosHashPut(votesMap, &progress->id, sizeof(SyncNodeId), &progress->recentActive, sizeof(bool)); -} - -// QuorumActive returns true if the quorum is active from the view of the local -// raft state machine. Otherwise, it returns false. -bool syncRaftQuorumActive(SSyncRaftProgressTracker* tracker) { - SHashObj* votesMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); - syncRaftVisitProgressMap(&tracker->progressMap, visitProgressActive, votesMap); - - return syncRaftVoteResult(&tracker->config.voters, votesMap) == SYNC_RAFT_VOTE_WON; -} \ No newline at end of file diff --git a/source/libs/sync/src/sync_raft_quorum_joint.c b/source/libs/sync/src/sync_raft_quorum_joint.c deleted file mode 100644 index 70c078b6f586a4a655f977687306c63def07b316..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/sync_raft_quorum_joint.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "sync_raft_node_map.h" -#include "sync_raft_quorum_majority.h" -#include "sync_raft_quorum_joint.h" -#include "sync_raft_quorum.h" - -/** - * syncRaftVoteResult takes a mapping of voters to yes/no (true/false) votes and returns - * a result indicating whether the vote is pending, lost, or won. A joint quorum - * requires both majority quorums to vote in favor. - **/ -ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, SHashObj* votesMap) { - ESyncRaftVoteResult r1 = syncRaftMajorityVoteResult(&(config->incoming), votesMap); - ESyncRaftVoteResult r2 = syncRaftMajorityVoteResult(&(config->outgoing), votesMap); - - if (r1 == r2) { - // If they agree, return the agreed state. - return r1; - } - - if (r1 == SYNC_RAFT_VOTE_LOST || r2 == SYNC_RAFT_VOTE_LOST) { - // If either config has lost, loss is the only possible outcome. - return SYNC_RAFT_VOTE_LOST; - } - - // One side won, the other one is pending, so the whole outcome is. - return SYNC_RAFT_VOTE_PENDING; -} - -void syncRaftInitQuorumJointConfig(SSyncRaftQuorumJointConfig* config) { - syncRaftInitNodeMap(&config->incoming); - syncRaftInitNodeMap(&config->outgoing); -} - -void syncRaftFreeQuorumJointConfig(SSyncRaftQuorumJointConfig* config) { - syncRaftFreeNodeMap(&config->incoming); - syncRaftFreeNodeMap(&config->outgoing); -} - -void syncRaftJointConfigAddToIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id) { - syncRaftAddToNodeMap(&config->incoming, id); -} - -void syncRaftJointConfigRemoveFromIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id) { - syncRaftRemoveFromNodeMap(&config->incoming, id); -} - -void syncRaftJointConfigIDs(SSyncRaftQuorumJointConfig* config, SSyncRaftNodeMap* nodeMap) { - syncRaftCopyNodeMap(&config->incoming, nodeMap); - - syncRaftUnionNodeMap(&config->outgoing, nodeMap); -} - -SyncIndex syncRaftJointConfigCommittedIndex(const SSyncRaftQuorumJointConfig* config, matchAckIndexerFp indexer, void* arg) { - SyncIndex index0, index1; - - index0 = syncRaftMajorityConfigCommittedIndex(&config->incoming, indexer, arg); - index1 = syncRaftMajorityConfigCommittedIndex(&config->outgoing, indexer, arg); - - return index0 < index1 ? index0 : index1; -} \ No newline at end of file diff --git a/source/libs/sync/src/sync_raft_quorum_majority.c b/source/libs/sync/src/sync_raft_quorum_majority.c deleted file mode 100644 index 313f213cdac5a7cb99a5598991c8e9171161f3d6..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/sync_raft_quorum_majority.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "sync_const.h" -#include "sync_raft_quorum.h" -#include "sync_raft_quorum_majority.h" -#include "sync_raft_node_map.h" - -// VoteResult takes a mapping of voters to yes/no (true/false) votes and returns -// a result indicating whether the vote is pending (i.e. neither a quorum of -// yes/no has been reached), won (a quorum of yes has been reached), or lost (a -// quorum of no has been reached). -ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncRaftNodeMap* config, SHashObj* votesMap) { - int n = syncRaftNodeMapSize(config); - if (n == 0) { - // By convention, the elections on an empty config win. This comes in - // handy with joint quorums because it'll make a half-populated joint - // quorum behave like a majority quorum. - return SYNC_RAFT_VOTE_WON; - } - - int i, g, r, missing; - i = g = r = missing = 0; - SyncNodeId* pId = NULL; - while (!syncRaftIterateNodeMap(config, pId)) { - const bool* v = (const bool*)taosHashGet(votesMap, pId, sizeof(SyncNodeId*)); - if (v == NULL) { - missing += 1; - continue; - } - - if (*v) { - g +=1; - } else { - r += 1; - } - } - - int quorum = n / 2 + 1; - if (g >= quorum) { - return SYNC_RAFT_VOTE_WON; - } - if (g + missing >= quorum) { - return SYNC_RAFT_VOTE_PENDING; - } - - return SYNC_RAFT_VOTE_LOST; -} - -int compSyncIndex(const void * elem1, const void * elem2) { - SyncIndex index1 = *((SyncIndex*)elem1); - SyncIndex index2 = *((SyncIndex*)elem1); - if (index1 > index2) return 1; - if (index1 < index2) return -1; - return 0; -} - -SyncIndex syncRaftMajorityConfigCommittedIndex(const SSyncRaftNodeMap* config, matchAckIndexerFp indexer, void* arg) { - int n = syncRaftNodeMapSize(config); - if (n == 0) { - // This plays well with joint quorums which, when one half is the zero - // MajorityConfig, should behave like the other half. - return kMaxCommitIndex; - } - - // Use an on-stack slice to collect the committed indexes when n <= 7 - // (otherwise we alloc). The alternative is to stash a slice on - // MajorityConfig, but this impairs usability (as is, MajorityConfig is just - // a map, and that's nice). The assumption is that running with a - // replication factor of >7 is rare, and in cases in which it happens - // performance is a lesser concern (additionally the performance - // implications of an allocation here are far from drastic). - SyncIndex* srt = NULL; - SyncIndex srk[TSDB_MAX_REPLICA]; - if (n > TSDB_MAX_REPLICA) { - srt = (SyncIndex*)malloc(sizeof(SyncIndex) * n); - if (srt == NULL) { - return kMaxCommitIndex; - } - } else { - srt = &srk[0]; - } - - // Fill the slice with the indexes observed. Any unused slots will be - // left as zero; these correspond to voters that may report in, but - // haven't yet. We fill from the right (since the zeroes will end up on - // the left after sorting below anyway). - SyncNodeId *pId = NULL; - int i = 0; - SyncIndex index; - while (!syncRaftIterateNodeMap(config, pId)) { - indexer(*pId, arg, &index); - srt[i++] = index; - } - - // Sort by index. Use a bespoke algorithm (copied from the stdlib's sort - // package) to keep srt on the stack. - qsort(srt, n, sizeof(SyncIndex), compSyncIndex); - - // The smallest index into the array for which the value is acked by a - // quorum. In other words, from the end of the slice, move n/2+1 to the - // left (accounting for zero-indexing). - index = srt[n - (n/2 + 1)]; - if (srt != &srk[0]) { - free(srt); - } - - return index; -} \ No newline at end of file diff --git a/source/libs/sync/src/sync_raft_restore.c b/source/libs/sync/src/sync_raft_restore.c deleted file mode 100644 index d1acd3e8e954a7dea26adba6f01ad6963c644008..0000000000000000000000000000000000000000 --- a/source/libs/sync/src/sync_raft_restore.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "sync_raft_config_change.h" -#include "sync_raft_restore.h" -#include "sync_raft_progress_tracker.h" - -static void addToConfChangeSingleArray(SSyncConfChangeSingleArray* out, int* i, const SSyncRaftNodeMap* nodeMap, ESyncRaftConfChangeType t); -static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in); - -// syncRaftRestoreConfig takes a Changer (which must represent an empty configuration), and -// runs a sequence of changes enacting the configuration described in the -// ConfState. -// -// TODO(tbg) it's silly that this takes a Changer. Unravel this by making sure -// the Changer only needs a ProgressMap (not a whole Tracker) at which point -// this can just take LastIndex and MaxInflight directly instead and cook up -// the results from that alone. -int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs, - SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) { - SSyncConfChangeSingleArray outgoing; - SSyncConfChangeSingleArray incoming; - SSyncConfChangeSingleArray css; - SSyncRaftProgressTracker* tracker = changer->tracker; - int i, ret; - - syncRaftInitConfArray(&outgoing); - syncRaftInitConfArray(&incoming); - - syncRaftInitTrackConfig(config); - syncRaftInitProgressMap(progressMap); - - ret = toConfChangeSingle(cs, &outgoing, &incoming); - if (ret != 0) { - goto out; - } - - if (syncRaftConfArrayIsEmpty(&outgoing)) { - // No outgoing config, so just apply the incoming changes one by one. - for (i = 0; i < incoming.n; ++i) { - css = (SSyncConfChangeSingleArray) { - .n = 1, - .changes = &incoming.changes[i], - }; - ret = syncRaftChangerSimpleConfig(changer, &css, config, progressMap); - if (ret != 0) { - goto out; - } - - syncRaftCopyTrackerConfig(config, &changer->tracker->config); - syncRaftCopyProgressMap(progressMap, &changer->tracker->progressMap); - } - } else { - // The ConfState describes a joint configuration. - // - // First, apply all of the changes of the outgoing config one by one, so - // that it temporarily becomes the incoming active config. For example, - // if the config is (1 2 3)&(2 3 4), this will establish (2 3 4)&(). - for (i = 0; i < outgoing.n; ++i) { - css = (SSyncConfChangeSingleArray) { - .n = 1, - .changes = &outgoing.changes[i], - }; - ret = syncRaftChangerSimpleConfig(changer, &css, config, progressMap); - if (ret != 0) { - goto out; - } - syncRaftCopyTrackerConfig(config, &changer->tracker->config); - syncRaftCopyProgressMap(progressMap, &changer->tracker->progressMap); - } - - ret = syncRaftChangerEnterJoint(changer, cs->autoLeave, &incoming, config, progressMap); - if (ret != 0) { - goto out; - } - } - -out: - syncRaftFreeConfArray(&incoming); - syncRaftFreeConfArray(&outgoing); - - return ret; -} - -static void addToConfChangeSingleArray(SSyncConfChangeSingleArray* out, int* i, const SSyncRaftNodeMap* nodeMap, ESyncRaftConfChangeType t) { - SyncNodeId* pId = NULL; - - while (!syncRaftIterateNodeMap(nodeMap, pId)) { - out->changes[*i] = (SSyncConfChangeSingle) { - .type = t, - .nodeId = *pId, - }; - *i += 1; - } -} - -// toConfChangeSingle translates a conf state into 1) a slice of operations creating -// first the config that will become the outgoing one, and then the incoming one, and -// b) another slice that, when applied to the config resulted from 1), represents the -// ConfState. -static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in) { - int i; - - out->n = syncRaftNodeMapSize(&cs->votersOutgoing); - out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * out->n); - if (out->changes == NULL) { - out->n = 0; - return -1; - } - in->n = syncRaftNodeMapSize(&cs->votersOutgoing) + - syncRaftNodeMapSize(&cs->voters) + - syncRaftNodeMapSize(&cs->learners) + - syncRaftNodeMapSize(&cs->learnersNext); - out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * in->n); - if (in->changes == NULL) { - in->n = 0; - return -1; - } - - // Example to follow along this code: - // voters=(1 2 3) learners=(5) outgoing=(1 2 4 6) learners_next=(4) - // - // This means that before entering the joint config, the configuration - // had voters (1 2 4 6) and perhaps some learners that are already gone. - // The new set of voters is (1 2 3), i.e. (1 2) were kept around, and (4 6) - // are no longer voters; however 4 is poised to become a learner upon leaving - // the joint state. - // We can't tell whether 5 was a learner before entering the joint config, - // but it doesn't matter (we'll pretend that it wasn't). - // - // The code below will construct - // outgoing = add 1; add 2; add 4; add 6 - // incoming = remove 1; remove 2; remove 4; remove 6 - // add 1; add 2; add 3; - // add-learner 5; - // add-learner 4; - // - // So, when starting with an empty config, after applying 'outgoing' we have - // - // quorum=(1 2 4 6) - // - // From which we enter a joint state via 'incoming' - // - // quorum=(1 2 3)&&(1 2 4 6) learners=(5) learners_next=(4) - // - // as desired. - - // If there are outgoing voters, first add them one by one so that the - // (non-joint) config has them all. - i = 0; - addToConfChangeSingleArray(out, &i, &cs->votersOutgoing, SYNC_RAFT_Conf_AddNode); - assert(i == out->n); - - // We're done constructing the outgoing slice, now on to the incoming one - // (which will apply on top of the config created by the outgoing slice). - i = 0; - - // First, we'll remove all of the outgoing voters. - addToConfChangeSingleArray(in, &i, &cs->votersOutgoing, SYNC_RAFT_Conf_RemoveNode); - - // Then we'll add the incoming voters and learners. - addToConfChangeSingleArray(in, &i, &cs->voters, SYNC_RAFT_Conf_AddNode); - addToConfChangeSingleArray(in, &i, &cs->learners, SYNC_RAFT_Conf_AddLearnerNode); - addToConfChangeSingleArray(in, &i, &cs->learnersNext, SYNC_RAFT_Conf_AddLearnerNode); - assert(i == in->n); - - return 0; -} \ No newline at end of file diff --git a/source/libs/sync/test/raftTests.cpp b/source/libs/sync/test/raftTests.cpp deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/source/libs/transport/CMakeLists.txt b/source/libs/transport/CMakeLists.txt index 5c214b75a11fd9013f923bf67e7ecedb80d6afaa..a2e82201bfe416b731b3a1109c37687899c93cae 100644 --- a/source/libs/transport/CMakeLists.txt +++ b/source/libs/transport/CMakeLists.txt @@ -14,17 +14,18 @@ target_link_libraries( PUBLIC common ) if (${BUILD_WITH_UV_TRANS}) +if (${BUILD_WITH_UV}) target_include_directories( transport PUBLIC "${CMAKE_SOURCE_DIR}/contrib/libuv/include" ) - -#LINK_DIRECTORIES("${CMAKE_SOURCE_DIR}/debug/contrib/libuv") + target_link_libraries( transport PUBLIC uv_a ) add_definitions(-DUSE_UV) +endif(${BUILD_WITH_UV}) endif(${BUILD_WITH_UV_TRANS}) if (${BUILD_TEST}) diff --git a/source/nodes/src/nodesTraverseFuncs.c b/source/nodes/src/nodesTraverseFuncs.c index 0702254b5f664e6f4e2c401dd8921a95daae11db..2280d0b922fc4c8f0a38d772ed0419e0c2eb0480 100644 --- a/source/nodes/src/nodesTraverseFuncs.c +++ b/source/nodes/src/nodesTraverseFuncs.c @@ -75,6 +75,35 @@ static bool walkNode(SNode* pNode, ETraversalOrder order, FQueryNodeWalker walke case QUERY_NODE_ORDER_BY_EXPR: res = walkNode(((SOrderByExprNode*)pNode)->pExpr, order, walker, pContext); break; + case QUERY_NODE_STATE_WINDOW: + res = walkNode(((SStateWindowNode*)pNode)->pCol, order, walker, pContext); + break; + case QUERY_NODE_SESSION_WINDOW: + res = walkNode(((SSessionWindowNode*)pNode)->pCol, order, walker, pContext); + break; + case QUERY_NODE_INTERVAL_WINDOW: { + SIntervalWindowNode* pInterval = (SIntervalWindowNode*)pNode; + res = walkNode(pInterval->pInterval, order, walker, pContext); + if (res) { + res = walkNode(pInterval->pOffset, order, walker, pContext); + } + if (res) { + res = walkNode(pInterval->pSliding, order, walker, pContext); + } + if (res) { + res = walkNode(pInterval->pFill, order, walker, pContext); + } + break; + } + case QUERY_NODE_NODE_LIST: + res = walkList(((SNodeListNode*)pNode)->pNodeList, order, walker, pContext); + break; + case QUERY_NODE_FILL: + res = walkNode(((SFillNode*)pNode)->pValues, order, walker, pContext); + break; + case QUERY_NODE_RAW_EXPR: + res = walkNode(((SRawExprNode*)pNode)->pNode, order, walker, pContext); + break; default: break; } diff --git a/source/nodes/src/nodesUtilFuncs.c b/source/nodes/src/nodesUtilFuncs.c index af6cec755d3a5305dc8a684ab51ff7a61009275c..5acb9fdf7c858208836204013874c2461c1b68cf 100644 --- a/source/nodes/src/nodesUtilFuncs.c +++ b/source/nodes/src/nodesUtilFuncs.c @@ -58,6 +58,12 @@ SNode* nodesMakeNode(ENodeType type) { return makeNode(type, sizeof(SSessionWindowNode)); case QUERY_NODE_INTERVAL_WINDOW: return makeNode(type, sizeof(SIntervalWindowNode)); + case QUERY_NODE_NODE_LIST: + return makeNode(type, sizeof(SNodeListNode)); + case QUERY_NODE_FILL: + return makeNode(type, sizeof(SFillNode)); + case QUERY_NODE_RAW_EXPR: + return makeNode(type, sizeof(SRawExprNode)); case QUERY_NODE_SET_OPERATOR: return makeNode(type, sizeof(SSetOperator)); case QUERY_NODE_SELECT_STMT: @@ -74,7 +80,7 @@ static bool destroyNode(SNode* pNode, void* pContext) { switch (nodeType(pNode)) { case QUERY_NODE_VALUE: tfree(((SValueNode*)pNode)->literal); - break; + break; default: break; } diff --git a/source/util/src/terror.c b/source/util/src/terror.c index ca0401113c2f8c84bd4dce3ac93e3fa53e171c89..a451b1e7080fb86c859feacda2a82109ffe01472 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -228,17 +228,19 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_NOT_EXIST, "VGroup does not exist // mnode-stable TAOS_DEFINE_ERROR(TSDB_CODE_MND_STB_ALREADY_EXIST, "Stable already exists") TAOS_DEFINE_ERROR(TSDB_CODE_MND_STB_NOT_EXIST, "Stable not exist") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_NAME_CONFLICT_WITH_TOPIC, "Stable confilct with topic") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOO_MANY_STBS, "Too many stables") TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_STB, "Invalid stable name") TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_STB_OPTION, "Invalid stable options") -TAOS_DEFINE_ERROR(TSDB_CODE_MND_STB_OPTION_UNCHNAGED, "Stable options not changed") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_STB_ALTER_OPTION, "Invalid stable alter options") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_STB_OPTION_UNCHNAGED, "Stable option unchanged") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_ROW_BYTES, "Invalid row bytes") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOO_MANY_TAGS, "Too many tags") -TAOS_DEFINE_ERROR(TSDB_CODE_MND_TAG_ALREAY_EXIST, "Tag already exists") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_TAG_ALREADY_EXIST, "Tag already exists") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TAG_NOT_EXIST, "Tag does not exist") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TOO_MANY_COLUMNS, "Too many columns") -TAOS_DEFINE_ERROR(TSDB_CODE_MND_COLUMN_ALREAY_EXIST, "Column already exists") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_COLUMN_ALREADY_EXIST, "Column already exists") TAOS_DEFINE_ERROR(TSDB_CODE_MND_COLUMN_NOT_EXIST, "Column does not exist") -TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_ROW_BYTES, "Invalid row bytes") // mnode-func TAOS_DEFINE_ERROR(TSDB_CODE_MND_FUNC_ALREADY_EXIST, "Func already exists") @@ -418,6 +420,8 @@ TAOS_DEFINE_ERROR(TSDB_CODE_CTG_INVALID_INPUT, "invalid catalog input TAOS_DEFINE_ERROR(TSDB_CODE_CTG_NOT_READY, "catalog is not ready") TAOS_DEFINE_ERROR(TSDB_CODE_CTG_MEM_ERROR, "catalog memory error") TAOS_DEFINE_ERROR(TSDB_CODE_CTG_SYS_ERROR, "catalog system error") +TAOS_DEFINE_ERROR(TSDB_CODE_CTG_DB_DROPPED, "Database is dropped") +TAOS_DEFINE_ERROR(TSDB_CODE_CTG_OUT_OF_SERVICE, "catalog is out of service") //scheduler TAOS_DEFINE_ERROR(TSDB_CODE_SCH_STATUS_ERROR, "scheduler status error") diff --git a/src/connector/grafanaplugin b/src/connector/grafanaplugin deleted file mode 160000 index 4a4d79099b076b8ff12d5b4fdbcba54049a6866d..0000000000000000000000000000000000000000 --- a/src/connector/grafanaplugin +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4a4d79099b076b8ff12d5b4fdbcba54049a6866d