未验证 提交 32763aa9 编写于 作者: H Hongze Cheng 提交者: GitHub

Merge pull request #9240 from taosdata/feature/vnode

Merge branch '3.0' into feature/vnode
......@@ -19,5 +19,5 @@ target_link_libraries(
# test
if(${BUILD_TEST})
#add_subdirectory(test)
add_subdirectory(test)
endif(${BUILD_TEST})
\ No newline at end of file
......@@ -71,6 +71,7 @@ struct SVnode {
SWal* pWal;
SVnodeSync* pSync;
SVnodeFS* pFs;
tsem_t canCommit;
};
int vnodeScheduleTask(SVnodeTask* task);
......
......@@ -38,9 +38,10 @@ int vnodeCommit(void *arg) {
metaCommit(pVnode->pMeta);
tqCommit(pVnode->pTq);
tsdbCommit(pVnode->pTq);
tsdbCommit(pVnode->pTsdb);
vnodeBufPoolRecycle(pVnode);
tsem_post(&(pVnode->canCommit));
// TODO
return 0;
}
......
......@@ -74,11 +74,14 @@ static SVnode *vnodeNew(const char *path, const SVnodeCfg *pVnodeCfg) {
pVnode->path = strdup(path);
vnodeOptionsCopy(&(pVnode->config), pVnodeCfg);
tsem_init(&(pVnode->canCommit), 0, 1);
return pVnode;
}
static void vnodeFree(SVnode *pVnode) {
if (pVnode) {
tsem_destroy(&(pVnode->canCommit));
tfree(pVnode->path);
free(pVnode);
}
......
......@@ -27,14 +27,14 @@ int vnodeProcessNoWalWMsgs(SVnode *pVnode, SRpcMsg *pMsg) {
}
int vnodeProcessWMsgs(SVnode *pVnode, SArray *pMsgs) {
SRpcMsg *pMsg;
SRpcMsg * pMsg;
SVnodeReq *pVnodeReq;
for (int i = 0; i < taosArrayGetSize(pMsgs); i++) {
pMsg = *(SRpcMsg **)taosArrayGet(pMsgs, i);
// ser request version
void *pBuf = pMsg->pCont;
void * pBuf = pMsg->pCont;
int64_t ver = pVnode->state.processed++;
taosEncodeFixedU64(&pBuf, ver);
......@@ -45,14 +45,14 @@ int vnodeProcessWMsgs(SVnode *pVnode, SArray *pMsgs) {
walFsync(pVnode->pWal, false);
// Apply each request now
for (int i = 0; i < taosArrayGetSize(pMsgs); i++) {
pMsg = *(SRpcMsg **)taosArrayGet(pMsgs, i);
SVnodeReq vReq;
// TODO: Integrate RAFT module here
return 0;
}
// Apply the request
{
void *ptr = vnodeMalloc(pVnode, pMsg->contLen);
int vnodeApplyWMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
SVnodeReq vReq;
void * ptr = vnodeMalloc(pVnode, pMsg->contLen);
if (ptr == NULL) {
// TODO: handle error
}
......@@ -92,21 +92,14 @@ int vnodeProcessWMsgs(SVnode *pVnode, SArray *pMsgs) {
}
pVnode->state.applied = ver;
}
// Check if it needs to commit
if (vnodeShouldCommit(pVnode)) {
tsem_wait(&(pVnode->canCommit));
if (vnodeAsyncCommit(pVnode) < 0) {
// TODO: handle error
}
}
}
return 0;
}
int vnodeApplyWMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
// TODO
return 0;
}
......
......@@ -166,6 +166,21 @@ static void vtClearMsgBatch(SArray *pMsgArr) {
taosArrayClear(pMsgArr);
}
static void vtProcessAndApplyReqs(SVnode *pVnode, SArray *pMsgArr) {
int rcode;
SRpcMsg *pReq;
SRpcMsg *pRsp;
rcode = vnodeProcessWMsgs(pVnode, pMsgArr);
GTEST_ASSERT_EQ(rcode, 0);
for (size_t i = 0; i < taosArrayGetSize(pMsgArr); i++) {
pReq = *(SRpcMsg **)taosArrayGet(pMsgArr, i);
rcode = vnodeApplyWMsg(pVnode, pReq, NULL);
GTEST_ASSERT_EQ(rcode, 0);
}
}
TEST(vnodeApiTest, vnode_simple_create_table_test) {
tb_uid_t suid = 1638166374163;
SRpcMsg *pMsg;
......@@ -189,8 +204,7 @@ TEST(vnodeApiTest, vnode_simple_create_table_test) {
sprintf(tbname, "st");
vtBuildCreateStbReq(suid, tbname, &pMsg);
taosArrayPush(pMsgArr, &pMsg);
rcode = vnodeProcessWMsgs(pVnode, pMsgArr);
ASSERT_EQ(rcode, 0);
vtProcessAndApplyReqs(pVnode, pMsgArr);
vtClearMsgBatch(pMsgArr);
// CREATE A LOT OF CHILD TABLES
......@@ -203,8 +217,7 @@ TEST(vnodeApiTest, vnode_simple_create_table_test) {
}
// Process request batch
rcode = vnodeProcessWMsgs(pVnode, pMsgArr);
ASSERT_EQ(rcode, 0);
vtProcessAndApplyReqs(pVnode, pMsgArr);
// Clear request batch
vtClearMsgBatch(pMsgArr);
......@@ -242,16 +255,14 @@ TEST(vnodeApiTest, vnode_simple_insert_test) {
sprintf(tbname, "st");
vtBuildCreateStbReq(suid, tbname, &pMsg);
taosArrayPush(pMsgArr, &pMsg);
rcode = vnodeProcessWMsgs(pVnode, pMsgArr);
GTEST_ASSERT_EQ(rcode, 0);
vtProcessAndApplyReqs(pVnode, pMsgArr);
vtClearMsgBatch(pMsgArr);
// 2. CREATE A CHILD TABLE
sprintf(tbname, "t0");
vtBuildCreateCtbReq(suid, tbname, &pMsg);
taosArrayPush(pMsgArr, &pMsg);
rcode = vnodeProcessWMsgs(pVnode, pMsgArr);
GTEST_ASSERT_EQ(rcode, 0);
vtProcessAndApplyReqs(pVnode, pMsgArr);
vtClearMsgBatch(pMsgArr);
// 3. WRITE A LOT OF TIME-SERIES DATA
......@@ -260,8 +271,7 @@ TEST(vnodeApiTest, vnode_simple_insert_test) {
vtBuildSubmitReq(&pMsg);
taosArrayPush(pMsgArr, &pMsg);
}
rcode = vnodeProcessWMsgs(pVnode, pMsgArr);
GTEST_ASSERT_EQ(rcode, 0);
vtProcessAndApplyReqs(pVnode, pMsgArr);
vtClearMsgBatch(pMsgArr);
}
......
......@@ -44,11 +44,13 @@ STQ* tqOpen(const char* path, STqCfg* tqConfig, STqLogReader* tqLogReader, SMemA
pTq->path = strdup(path);
pTq->tqConfig = tqConfig;
pTq->tqLogReader = tqLogReader;
#if 0
pTq->tqMemRef.pAlloctorFactory = allocFac;
pTq->tqMemRef.pAllocator = allocFac->create(allocFac);
if (pTq->tqMemRef.pAllocator == NULL) {
// TODO: error code of buffer pool
}
#endif
pTq->tqMeta = tqStoreOpen(path, (FTqSerialize)tqSerializeGroup, (FTqDeserialize)tqDeserializeGroup, free, 0);
if (pTq->tqMeta == NULL) {
// TODO: free STQ
......
aux_source_directory(src TSDB_SRC)
add_library(tsdb ${TSDB_SRC})
if(0)
add_library(tsdb ${TSDB_SRC})
else(0)
add_library(tsdb "")
target_sources(tsdb
PRIVATE
"src/tsdbCommit.c"
"src/tsdbMain.c"
"src/tsdbMemTable.c"
"src/tsdbOptions.c"
"src/tsdbWrite.c"
)
endif(0)
target_include_directories(
tsdb
PUBLIC "${CMAKE_SOURCE_DIR}/include/dnode/vnode/tsdb"
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"
)
target_link_libraries(
tsdb
PUBLIC os
......
......@@ -16,6 +16,7 @@
#ifndef _TD_TSDB_COMMIT_H_
#define _TD_TSDB_COMMIT_H_
#if 0
typedef struct {
int minFid;
int midFid;
......@@ -53,5 +54,6 @@ static FORCE_INLINE int tsdbGetFidLevel(int fid, SRtn *pRtn) {
return -1;
}
}
#endif
#endif /* _TD_TSDB_COMMIT_H_ */
\ No newline at end of file
......@@ -19,8 +19,12 @@
extern "C" {
#endif
#if 0
void *tsdbCompactImpl(STsdbRepo *pRepo);
#endif
#ifdef __cplusplus
}
#endif
......
......@@ -16,6 +16,8 @@
#ifndef _TD_TSDB_FS_H_
#define _TD_TSDB_FS_H_
#if 0
#define TSDB_FS_VERSION 0
// ================== TSDB global config
......@@ -113,4 +115,6 @@ static FORCE_INLINE int tsdbUnLockFS(STsdbFS* pFs) {
return 0;
}
#endif
#endif /* _TD_TSDB_FS_H_ */
......@@ -16,6 +16,8 @@
#ifndef _TS_TSDB_FILE_H_
#define _TS_TSDB_FILE_H_
#if 0
#define TSDB_FILE_HEAD_SIZE 512
#define TSDB_FILE_DELIMITER 0xF00AFA0F
#define TSDB_FILE_INIT_MAGIC 0xFFFFFFFF
......@@ -364,4 +366,5 @@ static FORCE_INLINE bool tsdbFSetIsOk(SDFileSet* pSet) {
return true;
}
#endif
#endif /* _TS_TSDB_FILE_H_ */
\ No newline at end of file
......@@ -16,10 +16,14 @@
#ifndef _TD_TSDB_HEALTH_H_
#define _TD_TSDB_HEALTH_H_
#if 0
bool tsdbUrgeQueryFree(STsdbRepo* pRepo);
int32_t tsdbInsertNewBlock(STsdbRepo* pRepo);
bool tsdbIdleMemEnough();
bool tsdbAllowNewBlock(STsdbRepo* pRepo);
#endif
#endif /* _TD_TSDB_BUFFER_H_ */
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_IDX_H_
#define _TD_TSDB_IDX_H_
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
}
#endif
#endif /*_TD_TSDB_IDX_H_*/
\ No newline at end of file
......@@ -15,6 +15,7 @@
#ifndef _TD_TSDB_READ_IMPL_H_
#define _TD_TSDB_READ_IMPL_H_
#if 0
#include "tfs.h"
#include "tsdb.h"
......@@ -150,4 +151,6 @@ static FORCE_INLINE int tsdbMakeRoom(void **ppBuf, size_t size) {
return 0;
}
#endif
#endif /*_TD_TSDB_READ_IMPL_H_*/
......@@ -16,6 +16,8 @@
#ifndef TSDB_ROW_MERGE_BUF_H
#define TSDB_ROW_MERGE_BUF_H
#if 0
#ifdef __cplusplus
extern "C" {
#endif
......@@ -42,4 +44,6 @@ static FORCE_INLINE void tsdbFreeMergeBuf(SMergeBuf buf) {
}
#endif
#endif
#endif /* ifndef TSDB_ROW_MERGE_BUF_H */
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_SMA_H_
#define _TD_TSDB_SMA_H_
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
}
#endif
#endif /*_TD_TSDB_SMA_H_*/
\ No newline at end of file
......@@ -16,6 +16,7 @@
#ifndef _TD_TSDB_INT_H_
#define _TD_TSDB_INT_H_
#if 0
// // TODO: remove the include
// #include <errno.h>
// #include <fcntl.h>
......@@ -144,4 +145,5 @@ static FORCE_INLINE int tsdbGetNextMaxTables(int tid) {
}
#endif
#endif
#endif /* _TD_TSDB_INT_H_ */
......@@ -31,3 +31,1672 @@ int tsdbCommit(STsdb *pTsdb) {
// tsem_post(&(pTsdb->canCommit));
return 0;
}
#if 0
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbint.h"
extern int32_t tsTsdbMetaCompactRatio;
#define TSDB_MAX_SUBBLOCKS 8
static FORCE_INLINE int TSDB_KEY_FID(TSKEY key, int32_t days, int8_t precision) {
if (key < 0) {
return (int)((key + 1) / tsTickPerDay[precision] / days - 1);
} else {
return (int)((key / tsTickPerDay[precision] / days));
}
}
typedef struct {
SRtn rtn; // retention snapshot
SFSIter fsIter; // tsdb file iterator
int niters; // memory iterators
SCommitIter *iters;
bool isRFileSet; // read and commit FSET
SReadH readh;
SDFileSet wSet;
bool isDFileSame;
bool isLFileSame;
TSKEY minKey;
TSKEY maxKey;
SArray * aBlkIdx; // SBlockIdx array
STable * pTable;
SArray * aSupBlk; // Table super-block array
SArray * aSubBlk; // table sub-block array
SDataCols * pDataCols;
} SCommitH;
#define TSDB_COMMIT_REPO(ch) TSDB_READ_REPO(&(ch->readh))
#define TSDB_COMMIT_REPO_ID(ch) REPO_ID(TSDB_READ_REPO(&(ch->readh)))
#define TSDB_COMMIT_WRITE_FSET(ch) (&((ch)->wSet))
#define TSDB_COMMIT_TABLE(ch) ((ch)->pTable)
#define TSDB_COMMIT_HEAD_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_HEAD)
#define TSDB_COMMIT_DATA_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_DATA)
#define TSDB_COMMIT_LAST_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_LAST)
#define TSDB_COMMIT_BUF(ch) TSDB_READ_BUF(&((ch)->readh))
#define TSDB_COMMIT_COMP_BUF(ch) TSDB_READ_COMP_BUF(&((ch)->readh))
#define TSDB_COMMIT_DEFAULT_ROWS(ch) TSDB_DEFAULT_BLOCK_ROWS(TSDB_COMMIT_REPO(ch)->config.maxRowsPerFileBlock)
#define TSDB_COMMIT_TXN_VERSION(ch) FS_TXN_VERSION(REPO_FS(TSDB_COMMIT_REPO(ch)))
static int tsdbCommitMeta(STsdbRepo *pRepo);
static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact);
static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid);
static int tsdbCompactMetaFile(STsdbRepo *pRepo, STsdbFS *pfs, SMFile *pMFile);
static int tsdbCommitTSData(STsdbRepo *pRepo);
static void tsdbStartCommit(STsdbRepo *pRepo);
static void tsdbEndCommit(STsdbRepo *pRepo, int eno);
static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid);
static int tsdbCreateCommitIters(SCommitH *pCommith);
static void tsdbDestroyCommitIters(SCommitH *pCommith);
static void tsdbSeekCommitIter(SCommitH *pCommith, TSKEY key);
static int tsdbInitCommitH(SCommitH *pCommith, STsdbRepo *pRepo);
static void tsdbDestroyCommitH(SCommitH *pCommith);
static int tsdbGetFidLevel(int fid, SRtn *pRtn);
static int tsdbNextCommitFid(SCommitH *pCommith);
static int tsdbCommitToTable(SCommitH *pCommith, int tid);
static int tsdbSetCommitTable(SCommitH *pCommith, STable *pTable);
static int tsdbComparKeyBlock(const void *arg1, const void *arg2);
static int tsdbWriteBlockInfo(SCommitH *pCommih);
static int tsdbCommitMemData(SCommitH *pCommith, SCommitIter *pIter, TSKEY keyLimit, bool toData);
static int tsdbMergeMemData(SCommitH *pCommith, SCommitIter *pIter, int bidx);
static int tsdbMoveBlock(SCommitH *pCommith, int bidx);
static int tsdbCommitAddBlock(SCommitH *pCommith, const SBlock *pSupBlock, const SBlock *pSubBlocks, int nSubBlocks);
static int tsdbMergeBlockData(SCommitH *pCommith, SCommitIter *pIter, SDataCols *pDataCols, TSKEY keyLimit,
bool isLastOneBlock);
static void tsdbResetCommitFile(SCommitH *pCommith);
static void tsdbResetCommitTable(SCommitH *pCommith);
static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid);
static void tsdbCloseCommitFile(SCommitH *pCommith, bool hasError);
static bool tsdbCanAddSubBlock(SCommitH *pCommith, SBlock *pBlock, SMergeInfo *pInfo);
static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIter *pCommitIter, SDataCols *pTarget,
TSKEY maxKey, int maxRows, int8_t update);
void *tsdbCommitData(STsdbRepo *pRepo) {
if (pRepo->imem == NULL) {
return NULL;
}
tsdbStartCommit(pRepo);
// Commit to update meta file
if (tsdbCommitMeta(pRepo) < 0) {
tsdbError("vgId:%d error occurs while committing META data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
// Create the iterator to read from cache
if (tsdbCommitTSData(pRepo) < 0) {
tsdbError("vgId:%d error occurs while committing TS data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
tsdbEndCommit(pRepo, TSDB_CODE_SUCCESS);
return NULL;
_err:
ASSERT(terrno != TSDB_CODE_SUCCESS);
pRepo->code = terrno;
tsdbEndCommit(pRepo, terrno);
return NULL;
}
int tsdbApplyRtnOnFSet(STsdbRepo *pRepo, SDFileSet *pSet, SRtn *pRtn) {
SDiskID did;
SDFileSet nSet;
STsdbFS * pfs = REPO_FS(pRepo);
int level;
ASSERT(pSet->fid >= pRtn->minFid);
level = tsdbGetFidLevel(pSet->fid, pRtn);
tfsAllocDisk(level, &(did.level), &(did.id));
if (did.level == TFS_UNDECIDED_LEVEL) {
terrno = TSDB_CODE_TDB_NO_AVAIL_DISK;
return -1;
}
if (did.level > TSDB_FSET_LEVEL(pSet)) {
// Need to move the FSET to higher level
tsdbInitDFileSet(&nSet, did, REPO_ID(pRepo), pSet->fid, FS_TXN_VERSION(pfs));
if (tsdbCopyDFileSet(pSet, &nSet) < 0) {
tsdbError("vgId:%d failed to copy FSET %d from level %d to level %d since %s", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), did.level, tstrerror(terrno));
return -1;
}
if (tsdbUpdateDFileSet(pfs, &nSet) < 0) {
return -1;
}
tsdbInfo("vgId:%d FSET %d is copied from level %d disk id %d to level %d disk id %d", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet), did.level, did.id);
} else {
// On a correct level
if (tsdbUpdateDFileSet(pfs, pSet) < 0) {
return -1;
}
}
return 0;
}
int tsdbWriteBlockInfoImpl(SDFile *pHeadf, STable *pTable, SArray *pSupA, SArray *pSubA, void **ppBuf,
SBlockIdx *pIdx) {
size_t nSupBlocks;
size_t nSubBlocks;
uint32_t tlen;
SBlockInfo *pBlkInfo;
int64_t offset;
SBlock * pBlock;
memset(pIdx, 0, sizeof(*pIdx));
nSupBlocks = taosArrayGetSize(pSupA);
nSubBlocks = (pSubA == NULL) ? 0 : taosArrayGetSize(pSubA);
if (nSupBlocks <= 0) {
// No data (data all deleted)
return 0;
}
tlen = (uint32_t)(sizeof(SBlockInfo) + sizeof(SBlock) * (nSupBlocks + nSubBlocks) + sizeof(TSCKSUM));
if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1;
pBlkInfo = *ppBuf;
pBlkInfo->delimiter = TSDB_FILE_DELIMITER;
pBlkInfo->tid = TABLE_TID(pTable);
pBlkInfo->uid = TABLE_UID(pTable);
memcpy((void *)(pBlkInfo->blocks), taosArrayGet(pSupA, 0), nSupBlocks * sizeof(SBlock));
if (nSubBlocks > 0) {
memcpy((void *)(pBlkInfo->blocks + nSupBlocks), taosArrayGet(pSubA, 0), nSubBlocks * sizeof(SBlock));
for (int i = 0; i < nSupBlocks; i++) {
pBlock = pBlkInfo->blocks + i;
if (pBlock->numOfSubBlocks > 1) {
pBlock->offset += (sizeof(SBlockInfo) + sizeof(SBlock) * nSupBlocks);
}
}
}
taosCalcChecksumAppend(0, (uint8_t *)pBlkInfo, tlen);
if (tsdbAppendDFile(pHeadf, (void *)pBlkInfo, tlen, &offset) < 0) {
return -1;
}
tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(pBlkInfo, tlen - sizeof(TSCKSUM)));
// Set pIdx
pBlock = taosArrayGetLast(pSupA);
pIdx->tid = TABLE_TID(pTable);
pIdx->uid = TABLE_UID(pTable);
pIdx->hasLast = pBlock->last ? 1 : 0;
pIdx->maxKey = pBlock->keyLast;
pIdx->numOfBlocks = (uint32_t)nSupBlocks;
pIdx->len = tlen;
pIdx->offset = (uint32_t)offset;
return 0;
}
int tsdbWriteBlockIdx(SDFile *pHeadf, SArray *pIdxA, void **ppBuf) {
SBlockIdx *pBlkIdx;
size_t nidx = taosArrayGetSize(pIdxA);
int tlen = 0, size;
int64_t offset;
if (nidx <= 0) {
// All data are deleted
pHeadf->info.offset = 0;
pHeadf->info.len = 0;
return 0;
}
for (size_t i = 0; i < nidx; i++) {
pBlkIdx = (SBlockIdx *)taosArrayGet(pIdxA, i);
size = tsdbEncodeSBlockIdx(NULL, pBlkIdx);
if (tsdbMakeRoom(ppBuf, tlen + size) < 0) return -1;
void *ptr = POINTER_SHIFT(*ppBuf, tlen);
tsdbEncodeSBlockIdx(&ptr, pBlkIdx);
tlen += size;
}
tlen += sizeof(TSCKSUM);
if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1;
taosCalcChecksumAppend(0, (uint8_t *)(*ppBuf), tlen);
if (tsdbAppendDFile(pHeadf, *ppBuf, tlen, &offset) < tlen) {
return -1;
}
tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(*ppBuf, tlen - sizeof(TSCKSUM)));
pHeadf->info.offset = (uint32_t)offset;
pHeadf->info.len = tlen;
return 0;
}
// =================== Commit Meta Data
static int tsdbInitCommitMetaFile(STsdbRepo *pRepo, SMFile* pMf, bool open) {
STsdbFS * pfs = REPO_FS(pRepo);
SMFile * pOMFile = pfs->cstatus->pmf;
SDiskID did;
// Create/Open a meta file or open the existing file
if (pOMFile == NULL) {
// Create a new meta file
did.level = TFS_PRIMARY_LEVEL;
did.id = TFS_PRIMARY_ID;
tsdbInitMFile(pMf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)));
if (open && tsdbCreateMFile(pMf, true) < 0) {
tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
tsdbInfo("vgId:%d meta file %s is created to commit", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMf));
} else {
tsdbInitMFileEx(pMf, pOMFile);
if (open && tsdbOpenMFile(pMf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
}
return 0;
}
static int tsdbCommitMeta(STsdbRepo *pRepo) {
STsdbFS * pfs = REPO_FS(pRepo);
SMemTable *pMem = pRepo->imem;
SMFile * pOMFile = pfs->cstatus->pmf;
SMFile mf;
SActObj * pAct = NULL;
SActCont * pCont = NULL;
SListNode *pNode = NULL;
ASSERT(pOMFile != NULL || listNEles(pMem->actList) > 0);
if (listNEles(pMem->actList) <= 0) {
// no meta data to commit, just keep the old meta file
tsdbUpdateMFile(pfs, pOMFile);
if (tsTsdbMetaCompactRatio > 0) {
if (tsdbInitCommitMetaFile(pRepo, &mf, false) < 0) {
return -1;
}
int ret = tsdbCompactMetaFile(pRepo, pfs, &mf);
if (ret < 0) tsdbError("compact meta file error");
return ret;
}
return 0;
} else {
if (tsdbInitCommitMetaFile(pRepo, &mf, true) < 0) {
return -1;
}
}
// Loop to write
while ((pNode = tdListPopHead(pMem->actList)) != NULL) {
pAct = (SActObj *)pNode->data;
if (pAct->act == TSDB_UPDATE_META) {
pCont = (SActCont *)POINTER_SHIFT(pAct, sizeof(SActObj));
if (tsdbUpdateMetaRecord(pfs, &mf, pAct->uid, (void *)(pCont->cont), pCont->len, false) < 0) {
tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
tstrerror(terrno));
tsdbCloseMFile(&mf);
(void)tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
return -1;
}
} else if (pAct->act == TSDB_DROP_META) {
if (tsdbDropMetaRecord(pfs, &mf, pAct->uid) < 0) {
tsdbError("vgId:%d failed to drop META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
tstrerror(terrno));
tsdbCloseMFile(&mf);
tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
return -1;
}
} else {
ASSERT(false);
}
}
if (tsdbUpdateMFileHeader(&mf) < 0) {
tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno));
tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
return -1;
}
TSDB_FILE_FSYNC(&mf);
tsdbCloseMFile(&mf);
tsdbUpdateMFile(pfs, &mf);
if (tsTsdbMetaCompactRatio > 0 && tsdbCompactMetaFile(pRepo, pfs, &mf) < 0) {
tsdbError("compact meta file error");
}
return 0;
}
int tsdbEncodeKVRecord(void **buf, SKVRecord *pRecord) {
int tlen = 0;
tlen += taosEncodeFixedU64(buf, pRecord->uid);
tlen += taosEncodeFixedI64(buf, pRecord->offset);
tlen += taosEncodeFixedI64(buf, pRecord->size);
return tlen;
}
void *tsdbDecodeKVRecord(void *buf, SKVRecord *pRecord) {
buf = taosDecodeFixedU64(buf, &(pRecord->uid));
buf = taosDecodeFixedI64(buf, &(pRecord->offset));
buf = taosDecodeFixedI64(buf, &(pRecord->size));
return buf;
}
void tsdbGetRtnSnap(STsdbRepo *pRepo, SRtn *pRtn) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
TSKEY minKey, midKey, maxKey, now;
now = taosGetTimestamp(pCfg->precision);
minKey = now - pCfg->keep * tsTickPerDay[pCfg->precision];
midKey = now - pCfg->keep2 * tsTickPerDay[pCfg->precision];
maxKey = now - pCfg->keep1 * tsTickPerDay[pCfg->precision];
pRtn->minKey = minKey;
pRtn->minFid = (int)(TSDB_KEY_FID(minKey, pCfg->daysPerFile, pCfg->precision));
pRtn->midFid = (int)(TSDB_KEY_FID(midKey, pCfg->daysPerFile, pCfg->precision));
pRtn->maxFid = (int)(TSDB_KEY_FID(maxKey, pCfg->daysPerFile, pCfg->precision));
tsdbDebug("vgId:%d now:%" PRId64 " minKey:%" PRId64 " minFid:%d, midFid:%d, maxFid:%d", REPO_ID(pRepo), now, minKey,
pRtn->minFid, pRtn->midFid, pRtn->maxFid);
}
static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact) {
char buf[64] = "\0";
void * pBuf = buf;
SKVRecord rInfo;
int64_t offset;
// Seek to end of meta file
offset = tsdbSeekMFile(pMFile, 0, SEEK_END);
if (offset < 0) {
return -1;
}
rInfo.offset = offset;
rInfo.uid = uid;
rInfo.size = contLen;
int tlen = tsdbEncodeKVRecord((void **)(&pBuf), &rInfo);
if (tsdbAppendMFile(pMFile, buf, tlen, NULL) < tlen) {
return -1;
}
if (tsdbAppendMFile(pMFile, cont, contLen, NULL) < contLen) {
return -1;
}
tsdbUpdateMFileMagic(pMFile, POINTER_SHIFT(cont, contLen - sizeof(TSCKSUM)));
SHashObj* cache = compact ? pfs->metaCacheComp : pfs->metaCache;
pMFile->info.nRecords++;
SKVRecord *pRecord = taosHashGet(cache, (void *)&uid, sizeof(uid));
if (pRecord != NULL) {
pMFile->info.tombSize += (pRecord->size + sizeof(SKVRecord));
} else {
pMFile->info.nRecords++;
}
taosHashPut(cache, (void *)(&uid), sizeof(uid), (void *)(&rInfo), sizeof(rInfo));
return 0;
}
static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid) {
SKVRecord rInfo = {0};
char buf[128] = "\0";
SKVRecord *pRecord = taosHashGet(pfs->metaCache, (void *)(&uid), sizeof(uid));
if (pRecord == NULL) {
tsdbError("failed to drop META record with key %" PRIu64 " since not find", uid);
return -1;
}
rInfo.offset = -pRecord->offset;
rInfo.uid = pRecord->uid;
rInfo.size = pRecord->size;
void *pBuf = buf;
tsdbEncodeKVRecord(&pBuf, &rInfo);
if (tsdbAppendMFile(pMFile, buf, sizeof(SKVRecord), NULL) < 0) {
return -1;
}
pMFile->info.magic = taosCalcChecksum(pMFile->info.magic, (uint8_t *)buf, sizeof(SKVRecord));
pMFile->info.nDels++;
pMFile->info.nRecords--;
pMFile->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2);
taosHashRemove(pfs->metaCache, (void *)(&uid), sizeof(uid));
return 0;
}
static int tsdbCompactMetaFile(STsdbRepo *pRepo, STsdbFS *pfs, SMFile *pMFile) {
float delPercent = (float)(pMFile->info.nDels) / (float)(pMFile->info.nRecords);
float tombPercent = (float)(pMFile->info.tombSize) / (float)(pMFile->info.size);
float compactRatio = (float)(tsTsdbMetaCompactRatio)/100;
if (delPercent < compactRatio && tombPercent < compactRatio) {
return 0;
}
if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) {
tsdbError("open meta file %s compact fail", pMFile->f.rname);
return -1;
}
tsdbInfo("begin compact tsdb meta file, ratio:%d, nDels:%" PRId64 ",nRecords:%" PRId64 ",tombSize:%" PRId64 ",size:%" PRId64,
tsTsdbMetaCompactRatio, pMFile->info.nDels,pMFile->info.nRecords,pMFile->info.tombSize,pMFile->info.size);
SMFile mf;
SDiskID did;
// first create tmp meta file
did.level = TFS_PRIMARY_LEVEL;
did.id = TFS_PRIMARY_ID;
tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)) + 1);
if (tsdbCreateMFile(&mf, true) < 0) {
tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
tsdbInfo("vgId:%d meta file %s is created to compact meta data", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf));
// second iterator metaCache
int code = -1;
int64_t maxBufSize = 1024;
SKVRecord *pRecord;
void *pBuf = NULL;
pBuf = malloc((size_t)maxBufSize);
if (pBuf == NULL) {
goto _err;
}
// init Comp
assert(pfs->metaCacheComp == NULL);
pfs->metaCacheComp = taosHashInit(4096, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK);
if (pfs->metaCacheComp == NULL) {
goto _err;
}
pRecord = taosHashIterate(pfs->metaCache, NULL);
while (pRecord) {
if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) {
tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
tstrerror(terrno));
goto _err;
}
if (pRecord->size > maxBufSize) {
maxBufSize = pRecord->size;
void* tmp = realloc(pBuf, (size_t)maxBufSize);
if (tmp == NULL) {
goto _err;
}
pBuf = tmp;
}
int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size);
if (nread < 0) {
tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
tstrerror(terrno));
goto _err;
}
if (nread < pRecord->size) {
tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d",
REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread);
goto _err;
}
if (tsdbUpdateMetaRecord(pfs, &mf, pRecord->uid, pBuf, (int)pRecord->size, true) < 0) {
tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pRecord->uid,
tstrerror(terrno));
goto _err;
}
pRecord = taosHashIterate(pfs->metaCache, pRecord);
}
code = 0;
_err:
if (code == 0) TSDB_FILE_FSYNC(&mf);
tsdbCloseMFile(&mf);
tsdbCloseMFile(pMFile);
if (code == 0) {
// rename meta.tmp -> meta
tsdbInfo("vgId:%d meta file rename %s -> %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf), TSDB_FILE_FULL_NAME(pMFile));
taosRename(mf.f.aname,pMFile->f.aname);
tstrncpy(mf.f.aname, pMFile->f.aname, TSDB_FILENAME_LEN);
tstrncpy(mf.f.rname, pMFile->f.rname, TSDB_FILENAME_LEN);
// update current meta file info
pfs->nstatus->pmf = NULL;
tsdbUpdateMFile(pfs, &mf);
taosHashCleanup(pfs->metaCache);
pfs->metaCache = pfs->metaCacheComp;
pfs->metaCacheComp = NULL;
} else {
// remove meta.tmp file
remove(mf.f.aname);
taosHashCleanup(pfs->metaCacheComp);
pfs->metaCacheComp = NULL;
}
tfree(pBuf);
ASSERT(mf.info.nDels == 0);
ASSERT(mf.info.tombSize == 0);
tsdbInfo("end compact tsdb meta file,code:%d,nRecords:%" PRId64 ",size:%" PRId64,
code,mf.info.nRecords,mf.info.size);
return code;
}
// =================== Commit Time-Series Data
static int tsdbCommitTSData(STsdbRepo *pRepo) {
SMemTable *pMem = pRepo->imem;
SCommitH commith;
SDFileSet *pSet = NULL;
int fid;
memset(&commith, 0, sizeof(commith));
if (pMem->numOfRows <= 0) {
// No memory data, just apply retention on each file on disk
if (tsdbApplyRtn(pRepo) < 0) {
return -1;
}
return 0;
}
// Resource initialization
if (tsdbInitCommitH(&commith, pRepo) < 0) {
return -1;
}
// Skip expired memory data and expired FSET
tsdbSeekCommitIter(&commith, commith.rtn.minKey);
while ((pSet = tsdbFSIterNext(&(commith.fsIter)))) {
if (pSet->fid < commith.rtn.minFid) {
tsdbInfo("vgId:%d FSET %d on level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
} else {
break;
}
}
// Loop to commit to each file
fid = tsdbNextCommitFid(&(commith));
while (true) {
// Loop over both on disk and memory
if (pSet == NULL && fid == TSDB_IVLD_FID) break;
if (pSet && (fid == TSDB_IVLD_FID || pSet->fid < fid)) {
// Only has existing FSET but no memory data to commit in this
// existing FSET, only check if file in correct retention
if (tsdbApplyRtnOnFSet(pRepo, pSet, &(commith.rtn)) < 0) {
tsdbDestroyCommitH(&commith);
return -1;
}
pSet = tsdbFSIterNext(&(commith.fsIter));
} else {
// Has memory data to commit
SDFileSet *pCSet;
int cfid;
if (pSet == NULL || pSet->fid > fid) {
// Commit to a new FSET with fid: fid
pCSet = NULL;
cfid = fid;
} else {
// Commit to an existing FSET
pCSet = pSet;
cfid = pSet->fid;
pSet = tsdbFSIterNext(&(commith.fsIter));
}
if (tsdbCommitToFile(&commith, pCSet, cfid) < 0) {
tsdbDestroyCommitH(&commith);
return -1;
}
fid = tsdbNextCommitFid(&commith);
}
}
tsdbDestroyCommitH(&commith);
return 0;
}
static void tsdbStartCommit(STsdbRepo *pRepo) {
SMemTable *pMem = pRepo->imem;
ASSERT(pMem->numOfRows > 0 || listNEles(pMem->actList) > 0);
tsdbInfo("vgId:%d start to commit! keyFirst %" PRId64 " keyLast %" PRId64 " numOfRows %" PRId64 " meta rows: %d",
REPO_ID(pRepo), pMem->keyFirst, pMem->keyLast, pMem->numOfRows, listNEles(pMem->actList));
tsdbStartFSTxn(pRepo, pMem->pointsAdd, pMem->storageAdd);
pRepo->code = TSDB_CODE_SUCCESS;
}
static void tsdbEndCommit(STsdbRepo *pRepo, int eno) {
if (eno != TSDB_CODE_SUCCESS) {
tsdbEndFSTxnWithError(REPO_FS(pRepo));
} else {
tsdbEndFSTxn(pRepo);
}
tsdbInfo("vgId:%d commit over, %s", REPO_ID(pRepo), (eno == TSDB_CODE_SUCCESS) ? "succeed" : "failed");
if (pRepo->appH.notifyStatus) pRepo->appH.notifyStatus(pRepo->appH.appH, TSDB_STATUS_COMMIT_OVER, eno);
SMemTable *pIMem = pRepo->imem;
(void)tsdbLockRepo(pRepo);
pRepo->imem = NULL;
(void)tsdbUnlockRepo(pRepo);
tsdbUnRefMemTable(pRepo, pIMem);
tsem_post(&(pRepo->readyToCommit));
}
#if 0
static bool tsdbHasDataToCommit(SCommitIter *iters, int nIters, TSKEY minKey, TSKEY maxKey) {
for (int i = 0; i < nIters; i++) {
TSKEY nextKey = tsdbNextIterKey((iters + i)->pIter);
if (nextKey != TSDB_DATA_TIMESTAMP_NULL && (nextKey >= minKey && nextKey <= maxKey)) return true;
}
return false;
}
#endif
static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
ASSERT(pSet == NULL || pSet->fid == fid);
tsdbResetCommitFile(pCommith);
tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, fid, &(pCommith->minKey), &(pCommith->maxKey));
// Set and open files
if (tsdbSetAndOpenCommitFile(pCommith, pSet, fid) < 0) {
return -1;
}
// Loop to commit each table data
for (int tid = 1; tid < pCommith->niters; tid++) {
SCommitIter *pIter = pCommith->iters + tid;
if (pIter->pTable == NULL) continue;
if (tsdbCommitToTable(pCommith, tid) < 0) {
tsdbCloseCommitFile(pCommith, true);
// revert the file change
tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet);
return -1;
}
}
if (tsdbWriteBlockIdx(TSDB_COMMIT_HEAD_FILE(pCommith), pCommith->aBlkIdx, (void **)(&(TSDB_COMMIT_BUF(pCommith)))) <
0) {
tsdbError("vgId:%d failed to write SBlockIdx part to FSET %d since %s", REPO_ID(pRepo), fid, tstrerror(terrno));
tsdbCloseCommitFile(pCommith, true);
// revert the file change
tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet);
return -1;
}
if (tsdbUpdateDFileSetHeader(&(pCommith->wSet)) < 0) {
tsdbError("vgId:%d failed to update FSET %d header since %s", REPO_ID(pRepo), fid, tstrerror(terrno));
tsdbCloseCommitFile(pCommith, true);
// revert the file change
tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet);
return -1;
}
// Close commit file
tsdbCloseCommitFile(pCommith, false);
if (tsdbUpdateDFileSet(REPO_FS(pRepo), &(pCommith->wSet)) < 0) {
return -1;
}
return 0;
}
static int tsdbCreateCommitIters(SCommitH *pCommith) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
SMemTable *pMem = pRepo->imem;
STsdbMeta *pMeta = pRepo->tsdbMeta;
pCommith->niters = pMem->maxTables;
pCommith->iters = (SCommitIter *)calloc(pMem->maxTables, sizeof(SCommitIter));
if (pCommith->iters == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
if (tsdbRLockRepoMeta(pRepo) < 0) return -1;
// reference all tables
for (int i = 0; i < pMem->maxTables; i++) {
if (pMeta->tables[i] != NULL) {
tsdbRefTable(pMeta->tables[i]);
pCommith->iters[i].pTable = pMeta->tables[i];
}
}
if (tsdbUnlockRepoMeta(pRepo) < 0) return -1;
for (int i = 0; i < pMem->maxTables; i++) {
if ((pCommith->iters[i].pTable != NULL) && (pMem->tData[i] != NULL) &&
(TABLE_UID(pCommith->iters[i].pTable) == pMem->tData[i]->uid)) {
if ((pCommith->iters[i].pIter = tSkipListCreateIter(pMem->tData[i]->pData)) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
tSkipListIterNext(pCommith->iters[i].pIter);
}
}
return 0;
}
static void tsdbDestroyCommitIters(SCommitH *pCommith) {
if (pCommith->iters == NULL) return;
for (int i = 1; i < pCommith->niters; i++) {
if (pCommith->iters[i].pTable != NULL) {
tsdbUnRefTable(pCommith->iters[i].pTable);
tSkipListDestroyIter(pCommith->iters[i].pIter);
}
}
free(pCommith->iters);
pCommith->iters = NULL;
pCommith->niters = 0;
}
// Skip all keys until key (not included)
static void tsdbSeekCommitIter(SCommitH *pCommith, TSKEY key) {
for (int i = 0; i < pCommith->niters; i++) {
SCommitIter *pIter = pCommith->iters + i;
if (pIter->pTable == NULL || pIter->pIter == NULL) continue;
tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, key - 1, INT32_MAX, NULL, NULL, 0, true, NULL);
}
}
static int tsdbInitCommitH(SCommitH *pCommith, STsdbRepo *pRepo) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
memset(pCommith, 0, sizeof(*pCommith));
tsdbGetRtnSnap(pRepo, &(pCommith->rtn));
TSDB_FSET_SET_CLOSED(TSDB_COMMIT_WRITE_FSET(pCommith));
// Init read handle
if (tsdbInitReadH(&(pCommith->readh), pRepo) < 0) {
return -1;
}
// Init file iterator
tsdbFSIterInit(&(pCommith->fsIter), REPO_FS(pRepo), TSDB_FS_ITER_FORWARD);
if (tsdbCreateCommitIters(pCommith) < 0) {
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->aBlkIdx = taosArrayInit(1024, sizeof(SBlockIdx));
if (pCommith->aBlkIdx == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->aSupBlk = taosArrayInit(1024, sizeof(SBlock));
if (pCommith->aSupBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->aSubBlk = taosArrayInit(1024, sizeof(SBlock));
if (pCommith->aSubBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->pDataCols = tdNewDataCols(0, pCfg->maxRowsPerFileBlock);
if (pCommith->pDataCols == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
return 0;
}
static void tsdbDestroyCommitH(SCommitH *pCommith) {
pCommith->pDataCols = tdFreeDataCols(pCommith->pDataCols);
pCommith->aSubBlk = taosArrayDestroy(pCommith->aSubBlk);
pCommith->aSupBlk = taosArrayDestroy(pCommith->aSupBlk);
pCommith->aBlkIdx = taosArrayDestroy(pCommith->aBlkIdx);
tsdbDestroyCommitIters(pCommith);
tsdbDestroyReadH(&(pCommith->readh));
tsdbCloseDFileSet(TSDB_COMMIT_WRITE_FSET(pCommith));
}
static int tsdbNextCommitFid(SCommitH *pCommith) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
int fid = TSDB_IVLD_FID;
for (int i = 0; i < pCommith->niters; i++) {
SCommitIter *pIter = pCommith->iters + i;
if (pIter->pTable == NULL || pIter->pIter == NULL) continue;
TSKEY nextKey = tsdbNextIterKey(pIter->pIter);
if (nextKey == TSDB_DATA_TIMESTAMP_NULL) {
continue;
} else {
int tfid = (int)(TSDB_KEY_FID(nextKey, pCfg->daysPerFile, pCfg->precision));
if (fid == TSDB_IVLD_FID || fid > tfid) {
fid = tfid;
}
}
}
return fid;
}
static int tsdbCommitToTable(SCommitH *pCommith, int tid) {
SCommitIter *pIter = pCommith->iters + tid;
TSKEY nextKey = tsdbNextIterKey(pIter->pIter);
tsdbResetCommitTable(pCommith);
TSDB_RLOCK_TABLE(pIter->pTable);
// Set commit table
if (tsdbSetCommitTable(pCommith, pIter->pTable) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
// No disk data and no memory data, just return
if (pCommith->readh.pBlkIdx == NULL && (nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey)) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return 0;
}
// Must has disk data or has memory data
int nBlocks;
int bidx = 0;
SBlock *pBlock;
if (pCommith->readh.pBlkIdx) {
if (tsdbLoadBlockInfo(&(pCommith->readh), NULL) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
nBlocks = pCommith->readh.pBlkIdx->numOfBlocks;
} else {
nBlocks = 0;
}
if (bidx < nBlocks) {
pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
} else {
pBlock = NULL;
}
while (true) {
if (pBlock == NULL && (nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey)) break;
if ((nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey) ||
(pBlock && (!pBlock->last) && tsdbComparKeyBlock((void *)(&nextKey), pBlock) > 0)) {
if (tsdbMoveBlock(pCommith, bidx) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
bidx++;
if (bidx < nBlocks) {
pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
} else {
pBlock = NULL;
}
} else if (pBlock && (pBlock->last || tsdbComparKeyBlock((void *)(&nextKey), pBlock) == 0)) {
// merge pBlock data and memory data
if (tsdbMergeMemData(pCommith, pIter, bidx) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
bidx++;
if (bidx < nBlocks) {
pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
} else {
pBlock = NULL;
}
nextKey = tsdbNextIterKey(pIter->pIter);
} else {
// Only commit memory data
if (pBlock == NULL) {
if (tsdbCommitMemData(pCommith, pIter, pCommith->maxKey, false) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
} else {
if (tsdbCommitMemData(pCommith, pIter, pBlock->keyFirst - 1, true) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
}
nextKey = tsdbNextIterKey(pIter->pIter);
}
}
TSDB_RUNLOCK_TABLE(pIter->pTable);
if (tsdbWriteBlockInfo(pCommith) < 0) {
tsdbError("vgId:%d failed to write SBlockInfo part into file %s since %s", TSDB_COMMIT_REPO_ID(pCommith),
TSDB_FILE_FULL_NAME(TSDB_COMMIT_HEAD_FILE(pCommith)), tstrerror(terrno));
return -1;
}
return 0;
}
static int tsdbSetCommitTable(SCommitH *pCommith, STable *pTable) {
STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1);
pCommith->pTable = pTable;
if (tdInitDataCols(pCommith->pDataCols, pSchema) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
if (pCommith->isRFileSet) {
if (tsdbSetReadTable(&(pCommith->readh), pTable) < 0) {
return -1;
}
} else {
pCommith->readh.pBlkIdx = NULL;
}
return 0;
}
static int tsdbComparKeyBlock(const void *arg1, const void *arg2) {
TSKEY key = *(TSKEY *)arg1;
SBlock *pBlock = (SBlock *)arg2;
if (key < pBlock->keyFirst) {
return -1;
} else if (key > pBlock->keyLast) {
return 1;
} else {
return 0;
}
}
int tsdbWriteBlockImpl(STsdbRepo *pRepo, STable *pTable, SDFile *pDFile, SDataCols *pDataCols, SBlock *pBlock,
bool isLast, bool isSuper, void **ppBuf, void **ppCBuf) {
STsdbCfg * pCfg = REPO_CFG(pRepo);
SBlockData *pBlockData;
int64_t offset = 0;
int rowsToWrite = pDataCols->numOfRows;
ASSERT(rowsToWrite > 0 && rowsToWrite <= pCfg->maxRowsPerFileBlock);
ASSERT((!isLast) || rowsToWrite < pCfg->minRowsPerFileBlock);
// Make buffer space
if (tsdbMakeRoom(ppBuf, TSDB_BLOCK_STATIS_SIZE(pDataCols->numOfCols)) < 0) {
return -1;
}
pBlockData = (SBlockData *)(*ppBuf);
// Get # of cols not all NULL(not including key column)
int nColsNotAllNull = 0;
for (int ncol = 1; ncol < pDataCols->numOfCols; ncol++) { // ncol from 1, we skip the timestamp column
SDataCol * pDataCol = pDataCols->cols + ncol;
SBlockCol *pBlockCol = pBlockData->cols + nColsNotAllNull;
if (isAllRowsNull(pDataCol)) { // all data to commit are NULL, just ignore it
continue;
}
memset(pBlockCol, 0, sizeof(*pBlockCol));
pBlockCol->colId = pDataCol->colId;
pBlockCol->type = pDataCol->type;
if (tDataTypes[pDataCol->type].statisFunc) {
(*tDataTypes[pDataCol->type].statisFunc)(pDataCol->pData, rowsToWrite, &(pBlockCol->min), &(pBlockCol->max),
&(pBlockCol->sum), &(pBlockCol->minIndex), &(pBlockCol->maxIndex),
&(pBlockCol->numOfNull));
}
nColsNotAllNull++;
}
ASSERT(nColsNotAllNull >= 0 && nColsNotAllNull <= pDataCols->numOfCols);
// Compress the data if neccessary
int tcol = 0; // counter of not all NULL and written columns
uint32_t toffset = 0;
int32_t tsize = TSDB_BLOCK_STATIS_SIZE(nColsNotAllNull);
int32_t lsize = tsize;
int32_t keyLen = 0;
for (int ncol = 0; ncol < pDataCols->numOfCols; ncol++) {
// All not NULL columns finish
if (ncol != 0 && tcol >= nColsNotAllNull) break;
SDataCol * pDataCol = pDataCols->cols + ncol;
SBlockCol *pBlockCol = pBlockData->cols + tcol;
if (ncol != 0 && (pDataCol->colId != pBlockCol->colId)) continue;
int32_t flen; // final length
int32_t tlen = dataColGetNEleLen(pDataCol, rowsToWrite);
void * tptr;
// Make room
if (tsdbMakeRoom(ppBuf, lsize + tlen + COMP_OVERFLOW_BYTES + sizeof(TSCKSUM)) < 0) {
return -1;
}
pBlockData = (SBlockData *)(*ppBuf);
pBlockCol = pBlockData->cols + tcol;
tptr = POINTER_SHIFT(pBlockData, lsize);
if (pCfg->compression == TWO_STAGE_COMP &&
tsdbMakeRoom(ppCBuf, tlen + COMP_OVERFLOW_BYTES) < 0) {
return -1;
}
// Compress or just copy
if (pCfg->compression) {
flen = (*(tDataTypes[pDataCol->type].compFunc))((char *)pDataCol->pData, tlen, rowsToWrite, tptr,
tlen + COMP_OVERFLOW_BYTES, pCfg->compression, *ppCBuf,
tlen + COMP_OVERFLOW_BYTES);
} else {
flen = tlen;
memcpy(tptr, pDataCol->pData, flen);
}
// Add checksum
ASSERT(flen > 0);
flen += sizeof(TSCKSUM);
taosCalcChecksumAppend(0, (uint8_t *)tptr, flen);
tsdbUpdateDFileMagic(pDFile, POINTER_SHIFT(tptr, flen - sizeof(TSCKSUM)));
if (ncol != 0) {
tsdbSetBlockColOffset(pBlockCol, toffset);
pBlockCol->len = flen;
tcol++;
} else {
keyLen = flen;
}
toffset += flen;
lsize += flen;
}
pBlockData->delimiter = TSDB_FILE_DELIMITER;
pBlockData->uid = TABLE_UID(pTable);
pBlockData->numOfCols = nColsNotAllNull;
taosCalcChecksumAppend(0, (uint8_t *)pBlockData, tsize);
tsdbUpdateDFileMagic(pDFile, POINTER_SHIFT(pBlockData, tsize - sizeof(TSCKSUM)));
// Write the whole block to file
if (tsdbAppendDFile(pDFile, (void *)pBlockData, lsize, &offset) < lsize) {
return -1;
}
// Update pBlock membership vairables
pBlock->last = isLast;
pBlock->offset = offset;
pBlock->algorithm = pCfg->compression;
pBlock->numOfRows = rowsToWrite;
pBlock->len = lsize;
pBlock->keyLen = keyLen;
pBlock->numOfSubBlocks = isSuper ? 1 : 0;
pBlock->numOfCols = nColsNotAllNull;
pBlock->keyFirst = dataColsKeyFirst(pDataCols);
pBlock->keyLast = dataColsKeyLast(pDataCols);
tsdbDebug("vgId:%d tid:%d a block of data is written to file %s, offset %" PRId64
" numOfRows %d len %d numOfCols %" PRId16 " keyFirst %" PRId64 " keyLast %" PRId64,
REPO_ID(pRepo), TABLE_TID(pTable), TSDB_FILE_FULL_NAME(pDFile), offset, rowsToWrite, pBlock->len,
pBlock->numOfCols, pBlock->keyFirst, pBlock->keyLast);
return 0;
}
static int tsdbWriteBlock(SCommitH *pCommith, SDFile *pDFile, SDataCols *pDataCols, SBlock *pBlock, bool isLast,
bool isSuper) {
return tsdbWriteBlockImpl(TSDB_COMMIT_REPO(pCommith), TSDB_COMMIT_TABLE(pCommith), pDFile, pDataCols, pBlock, isLast,
isSuper, (void **)(&(TSDB_COMMIT_BUF(pCommith))),
(void **)(&(TSDB_COMMIT_COMP_BUF(pCommith))));
}
static int tsdbWriteBlockInfo(SCommitH *pCommih) {
SDFile * pHeadf = TSDB_COMMIT_HEAD_FILE(pCommih);
SBlockIdx blkIdx;
STable * pTable = TSDB_COMMIT_TABLE(pCommih);
if (tsdbWriteBlockInfoImpl(pHeadf, pTable, pCommih->aSupBlk, pCommih->aSubBlk, (void **)(&(TSDB_COMMIT_BUF(pCommih))),
&blkIdx) < 0) {
return -1;
}
if (blkIdx.numOfBlocks == 0) {
return 0;
}
if (taosArrayPush(pCommih->aBlkIdx, (void *)(&blkIdx)) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
return 0;
}
static int tsdbCommitMemData(SCommitH *pCommith, SCommitIter *pIter, TSKEY keyLimit, bool toData) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SMergeInfo mInfo;
int32_t defaultRows = TSDB_COMMIT_DEFAULT_ROWS(pCommith);
SDFile * pDFile;
bool isLast;
SBlock block;
while (true) {
tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, keyLimit, defaultRows, pCommith->pDataCols, NULL, 0,
pCfg->update, &mInfo);
if (pCommith->pDataCols->numOfRows <= 0) break;
if (toData || pCommith->pDataCols->numOfRows >= pCfg->minRowsPerFileBlock) {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isLast = false;
} else {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
isLast = true;
}
if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, isLast, true) < 0) return -1;
if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) {
return -1;
}
}
return 0;
}
static int tsdbMergeMemData(SCommitH *pCommith, SCommitIter *pIter, int bidx) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
int nBlocks = pCommith->readh.pBlkIdx->numOfBlocks;
SBlock * pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
TSKEY keyLimit;
int16_t colId = 0;
SMergeInfo mInfo;
SBlock subBlocks[TSDB_MAX_SUBBLOCKS];
SBlock block, supBlock;
SDFile * pDFile;
if (bidx == nBlocks - 1) {
keyLimit = pCommith->maxKey;
} else {
keyLimit = pBlock[1].keyFirst - 1;
}
SSkipListIterator titer = *(pIter->pIter);
if (tsdbLoadBlockDataCols(&(pCommith->readh), pBlock, NULL, &colId, 1) < 0) return -1;
tsdbLoadDataFromCache(pIter->pTable, &titer, keyLimit, INT32_MAX, NULL, pCommith->readh.pDCols[0]->cols[0].pData,
pCommith->readh.pDCols[0]->numOfRows, pCfg->update, &mInfo);
if (mInfo.nOperations == 0) {
// no new data to insert (all updates denied)
if (tsdbMoveBlock(pCommith, bidx) < 0) {
return -1;
}
*(pIter->pIter) = titer;
} else if (pBlock->numOfRows + mInfo.rowsInserted - mInfo.rowsDeleteSucceed == 0) {
// Ignore the block
ASSERT(0);
*(pIter->pIter) = titer;
} else if (tsdbCanAddSubBlock(pCommith, pBlock, &mInfo)) {
// Add a sub-block
tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, keyLimit, INT32_MAX, pCommith->pDataCols,
pCommith->readh.pDCols[0]->cols[0].pData, pCommith->readh.pDCols[0]->numOfRows, pCfg->update,
&mInfo);
if (pBlock->last) {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
}
if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, pBlock->last, false) < 0) return -1;
if (pBlock->numOfSubBlocks == 1) {
subBlocks[0] = *pBlock;
subBlocks[0].numOfSubBlocks = 0;
} else {
memcpy(subBlocks, POINTER_SHIFT(pCommith->readh.pBlkInfo, pBlock->offset),
sizeof(SBlock) * pBlock->numOfSubBlocks);
}
subBlocks[pBlock->numOfSubBlocks] = block;
supBlock = *pBlock;
supBlock.keyFirst = mInfo.keyFirst;
supBlock.keyLast = mInfo.keyLast;
supBlock.numOfSubBlocks++;
supBlock.numOfRows = pBlock->numOfRows + mInfo.rowsInserted - mInfo.rowsDeleteSucceed;
supBlock.offset = taosArrayGetSize(pCommith->aSubBlk) * sizeof(SBlock);
if (tsdbCommitAddBlock(pCommith, &supBlock, subBlocks, supBlock.numOfSubBlocks) < 0) return -1;
} else {
if (tsdbLoadBlockData(&(pCommith->readh), pBlock, NULL) < 0) return -1;
if (tsdbMergeBlockData(pCommith, pIter, pCommith->readh.pDCols[0], keyLimit, bidx == (nBlocks - 1)) < 0) return -1;
}
return 0;
}
static int tsdbMoveBlock(SCommitH *pCommith, int bidx) {
SBlock *pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
SDFile *pDFile;
SBlock block;
bool isSameFile;
ASSERT(pBlock->numOfSubBlocks > 0);
if (pBlock->last) {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
isSameFile = pCommith->isLFileSame;
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isSameFile = pCommith->isDFileSame;
}
if (isSameFile) {
if (pBlock->numOfSubBlocks == 1) {
if (tsdbCommitAddBlock(pCommith, pBlock, NULL, 0) < 0) {
return -1;
}
} else {
block = *pBlock;
block.offset = sizeof(SBlock) * taosArrayGetSize(pCommith->aSubBlk);
if (tsdbCommitAddBlock(pCommith, &block, POINTER_SHIFT(pCommith->readh.pBlkInfo, pBlock->offset),
pBlock->numOfSubBlocks) < 0) {
return -1;
}
}
} else {
if (tsdbLoadBlockData(&(pCommith->readh), pBlock, NULL) < 0) return -1;
if (tsdbWriteBlock(pCommith, pDFile, pCommith->readh.pDCols[0], &block, pBlock->last, true) < 0) return -1;
if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) return -1;
}
return 0;
}
static int tsdbCommitAddBlock(SCommitH *pCommith, const SBlock *pSupBlock, const SBlock *pSubBlocks, int nSubBlocks) {
if (taosArrayPush(pCommith->aSupBlk, pSupBlock) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
if (pSubBlocks && taosArrayAddBatch(pCommith->aSubBlk, pSubBlocks, nSubBlocks) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
return 0;
}
static int tsdbMergeBlockData(SCommitH *pCommith, SCommitIter *pIter, SDataCols *pDataCols, TSKEY keyLimit, bool isLastOneBlock) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SBlock block;
SDFile * pDFile;
bool isLast;
int32_t defaultRows = TSDB_COMMIT_DEFAULT_ROWS(pCommith);
int biter = 0;
while (true) {
tsdbLoadAndMergeFromCache(pCommith->readh.pDCols[0], &biter, pIter, pCommith->pDataCols, keyLimit, defaultRows,
pCfg->update);
if (pCommith->pDataCols->numOfRows == 0) break;
if (isLastOneBlock) {
if (pCommith->pDataCols->numOfRows < pCfg->minRowsPerFileBlock) {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
isLast = true;
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isLast = false;
}
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isLast = false;
}
if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, isLast, true) < 0) return -1;
if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) return -1;
}
return 0;
}
static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIter *pCommitIter, SDataCols *pTarget,
TSKEY maxKey, int maxRows, int8_t update) {
TSKEY key1 = INT64_MAX;
TSKEY key2 = INT64_MAX;
STSchema *pSchema = NULL;
ASSERT(maxRows > 0 && dataColsKeyLast(pDataCols) <= maxKey);
tdResetDataCols(pTarget);
while (true) {
key1 = (*iter >= pDataCols->numOfRows) ? INT64_MAX : dataColsKeyAt(pDataCols, *iter);
SMemRow row = tsdbNextIterRow(pCommitIter->pIter);
if (row == NULL || memRowKey(row) > maxKey) {
key2 = INT64_MAX;
} else {
key2 = memRowKey(row);
}
if (key1 == INT64_MAX && key2 == INT64_MAX) break;
if (key1 < key2) {
for (int i = 0; i < pDataCols->numOfCols; i++) {
//TODO: dataColAppendVal may fail
dataColAppendVal(pTarget->cols + i, tdGetColDataOfRow(pDataCols->cols + i, *iter), pTarget->numOfRows,
pTarget->maxPoints);
}
pTarget->numOfRows++;
(*iter)++;
} else if (key1 > key2) {
if (pSchema == NULL || schemaVersion(pSchema) != memRowVersion(row)) {
pSchema = tsdbGetTableSchemaImpl(pCommitIter->pTable, false, false, memRowVersion(row));
ASSERT(pSchema != NULL);
}
tdAppendMemRowToDataCol(row, pSchema, pTarget, true);
tSkipListIterNext(pCommitIter->pIter);
} else {
if (update != TD_ROW_OVERWRITE_UPDATE) {
//copy disk data
for (int i = 0; i < pDataCols->numOfCols; i++) {
//TODO: dataColAppendVal may fail
dataColAppendVal(pTarget->cols + i, tdGetColDataOfRow(pDataCols->cols + i, *iter), pTarget->numOfRows,
pTarget->maxPoints);
}
if(update == TD_ROW_DISCARD_UPDATE) pTarget->numOfRows++;
}
if (update != TD_ROW_DISCARD_UPDATE) {
//copy mem data
if (pSchema == NULL || schemaVersion(pSchema) != memRowVersion(row)) {
pSchema = tsdbGetTableSchemaImpl(pCommitIter->pTable, false, false, memRowVersion(row));
ASSERT(pSchema != NULL);
}
tdAppendMemRowToDataCol(row, pSchema, pTarget, update == TD_ROW_OVERWRITE_UPDATE);
}
(*iter)++;
tSkipListIterNext(pCommitIter->pIter);
}
if (pTarget->numOfRows >= maxRows) break;
}
}
static void tsdbResetCommitFile(SCommitH *pCommith) {
pCommith->isRFileSet = false;
pCommith->isDFileSame = false;
pCommith->isLFileSame = false;
taosArrayClear(pCommith->aBlkIdx);
}
static void tsdbResetCommitTable(SCommitH *pCommith) {
taosArrayClear(pCommith->aSubBlk);
taosArrayClear(pCommith->aSupBlk);
pCommith->pTable = NULL;
}
static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid) {
SDiskID did;
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
SDFileSet *pWSet = TSDB_COMMIT_WRITE_FSET(pCommith);
tfsAllocDisk(tsdbGetFidLevel(fid, &(pCommith->rtn)), &(did.level), &(did.id));
if (did.level == TFS_UNDECIDED_LEVEL) {
terrno = TSDB_CODE_TDB_NO_AVAIL_DISK;
return -1;
}
// Open read FSET
if (pSet) {
if (tsdbSetAndOpenReadFSet(&(pCommith->readh), pSet) < 0) {
return -1;
}
pCommith->isRFileSet = true;
if (tsdbLoadBlockIdx(&(pCommith->readh)) < 0) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
tsdbDebug("vgId:%d FSET %d at level %d disk id %d is opened to read to commit", REPO_ID(pRepo), TSDB_FSET_FID(pSet),
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
} else {
pCommith->isRFileSet = false;
}
// Set and open commit FSET
if (pSet == NULL || did.level > TSDB_FSET_LEVEL(pSet)) {
// Create a new FSET to write data
tsdbInitDFileSet(pWSet, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)));
if (tsdbCreateDFileSet(pWSet, true) < 0) {
tsdbError("vgId:%d failed to create FSET %d at level %d disk id %d since %s", REPO_ID(pRepo),
TSDB_FSET_FID(pWSet), TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet), tstrerror(terrno));
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
}
return -1;
}
pCommith->isDFileSame = false;
pCommith->isLFileSame = false;
tsdbDebug("vgId:%d FSET %d at level %d disk id %d is created to commit", REPO_ID(pRepo), TSDB_FSET_FID(pWSet),
TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet));
} else {
did.level = TSDB_FSET_LEVEL(pSet);
did.id = TSDB_FSET_ID(pSet);
pCommith->wSet.fid = fid;
pCommith->wSet.state = 0;
// TSDB_FILE_HEAD
SDFile *pWHeadf = TSDB_COMMIT_HEAD_FILE(pCommith);
tsdbInitDFile(pWHeadf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_HEAD);
if (tsdbCreateDFile(pWHeadf, true) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWHeadf),
tstrerror(terrno));
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
// TSDB_FILE_DATA
SDFile *pRDataf = TSDB_READ_DATA_FILE(&(pCommith->readh));
SDFile *pWDataf = TSDB_COMMIT_DATA_FILE(pCommith);
tsdbInitDFileEx(pWDataf, pRDataf);
if (tsdbOpenDFile(pWDataf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWDataf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
pCommith->isDFileSame = true;
// TSDB_FILE_LAST
SDFile *pRLastf = TSDB_READ_LAST_FILE(&(pCommith->readh));
SDFile *pWLastf = TSDB_COMMIT_LAST_FILE(pCommith);
if (pRLastf->info.size < 32 * 1024) {
tsdbInitDFileEx(pWLastf, pRLastf);
pCommith->isLFileSame = true;
if (tsdbOpenDFile(pWLastf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
} else {
tsdbInitDFile(pWLastf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_LAST);
pCommith->isLFileSame = false;
if (tsdbCreateDFile(pWLastf, true) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
(void)tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
}
}
return 0;
}
static void tsdbCloseCommitFile(SCommitH *pCommith, bool hasError) {
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
}
if (!hasError) {
TSDB_FSET_FSYNC(TSDB_COMMIT_WRITE_FSET(pCommith));
}
tsdbCloseDFileSet(TSDB_COMMIT_WRITE_FSET(pCommith));
}
static bool tsdbCanAddSubBlock(SCommitH *pCommith, SBlock *pBlock, SMergeInfo *pInfo) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
int mergeRows = pBlock->numOfRows + pInfo->rowsInserted - pInfo->rowsDeleteSucceed;
ASSERT(mergeRows > 0);
if (pBlock->numOfSubBlocks < TSDB_MAX_SUBBLOCKS && pInfo->nOperations <= pCfg->maxRowsPerFileBlock) {
if (pBlock->last) {
if (pCommith->isLFileSame && mergeRows < pCfg->minRowsPerFileBlock) return true;
} else {
if (pCommith->isDFileSame && mergeRows <= pCfg->maxRowsPerFileBlock) return true;
}
}
return false;
}
int tsdbApplyRtn(STsdbRepo *pRepo) {
SRtn rtn;
SFSIter fsiter;
STsdbFS * pfs = REPO_FS(pRepo);
SDFileSet *pSet;
// Get retention snapshot
tsdbGetRtnSnap(pRepo, &rtn);
tsdbFSIterInit(&fsiter, pfs, TSDB_FS_ITER_FORWARD);
while ((pSet = tsdbFSIterNext(&fsiter))) {
if (pSet->fid < rtn.minFid) {
tsdbInfo("vgId:%d FSET %d at level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
continue;
}
if (tsdbApplyRtnOnFSet(pRepo, pSet, &rtn) < 0) {
return -1;
}
}
return 0;
}
#endif
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
\ No newline at end of file
......@@ -93,3 +93,1013 @@ static int tsdbOpenImpl(STsdb *pTsdb) {
static void tsdbCloseImpl(STsdb *pTsdb) {
// TODO
}
#if 0
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// no test file errors here
#include "taosdef.h"
#include "tsdbint.h"
#include "ttimer.h"
#include "tthread.h"
#define IS_VALID_PRECISION(precision) \
(((precision) >= TSDB_TIME_PRECISION_MILLI) && ((precision) <= TSDB_TIME_PRECISION_NANO))
#define TSDB_DEFAULT_COMPRESSION TWO_STAGE_COMP
#define IS_VALID_COMPRESSION(compression) (((compression) >= NO_COMPRESSION) && ((compression) <= TWO_STAGE_COMP))
static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg);
static STsdbRepo *tsdbNewRepo(STsdbCfg *pCfg, STsdbAppH *pAppH);
static void tsdbFreeRepo(STsdbRepo *pRepo);
static void tsdbStartStream(STsdbRepo *pRepo);
static void tsdbStopStream(STsdbRepo *pRepo);
static int tsdbRestoreLastColumns(STsdbRepo *pRepo, STable *pTable, SReadH* pReadh);
static int tsdbRestoreLastRow(STsdbRepo *pRepo, STable *pTable, SReadH* pReadh, SBlockIdx *pIdx);
// Function declaration
int32_t tsdbCreateRepo(int repoid) {
char tsdbDir[TSDB_FILENAME_LEN] = "\0";
char dataDir[TSDB_FILENAME_LEN] = "\0";
tsdbGetRootDir(repoid, tsdbDir);
if (tfsMkdir(tsdbDir) < 0) {
goto _err;
}
tsdbGetDataDir(repoid, dataDir);
if (tfsMkdir(dataDir) < 0) {
goto _err;
}
// TODO: need to create current file with nothing in
return 0;
_err:
tsdbError("vgId:%d failed to create TSDB repository since %s", repoid, tstrerror(terrno));
return -1;
}
int32_t tsdbDropRepo(int repoid) {
char tsdbDir[TSDB_FILENAME_LEN] = "\0";
tsdbGetRootDir(repoid, tsdbDir);
return tfsRmdir(tsdbDir);
}
STsdbRepo *tsdbOpenRepo(STsdbCfg *pCfg, STsdbAppH *pAppH) {
STsdbRepo *pRepo;
STsdbCfg config = *pCfg;
terrno = TSDB_CODE_SUCCESS;
// Check and set default configurations
if (tsdbCheckAndSetDefaultCfg(&config) < 0) {
tsdbError("vgId:%d failed to open TSDB repository since %s", config.tsdbId, tstrerror(terrno));
return NULL;
}
// Create new TSDB object
if ((pRepo = tsdbNewRepo(&config, pAppH)) == NULL) {
tsdbError("vgId:%d failed to open TSDB repository while creating TSDB object since %s", config.tsdbId,
tstrerror(terrno));
return NULL;
}
// Open meta
if (tsdbOpenMeta(pRepo) < 0) {
tsdbError("vgId:%d failed to open TSDB repository while opening Meta since %s", config.tsdbId, tstrerror(terrno));
tsdbCloseRepo(pRepo, false);
return NULL;
}
if (tsdbOpenBufPool(pRepo) < 0) {
tsdbError("vgId:%d failed to open TSDB repository while opening buffer pool since %s", config.tsdbId,
tstrerror(terrno));
tsdbCloseRepo(pRepo, false);
return NULL;
}
if (tsdbOpenFS(pRepo) < 0) {
tsdbError("vgId:%d failed to open TSDB repository while opening FS since %s", config.tsdbId, tstrerror(terrno));
tsdbCloseRepo(pRepo, false);
return NULL;
}
// TODO: Restore information from data
if ((!(pRepo->state & TSDB_STATE_BAD_DATA)) && tsdbRestoreInfo(pRepo) < 0) {
tsdbError("vgId:%d failed to open TSDB repository while restore info since %s", config.tsdbId, tstrerror(terrno));
tsdbCloseRepo(pRepo, false);
return NULL;
}
pRepo->mergeBuf = NULL;
tsdbStartStream(pRepo);
tsdbDebug("vgId:%d, TSDB repository opened", REPO_ID(pRepo));
return pRepo;
}
// Note: all working thread and query thread must stopped when calling this function
int tsdbCloseRepo(STsdbRepo *repo, int toCommit) {
if (repo == NULL) return 0;
STsdbRepo *pRepo = repo;
int vgId = REPO_ID(pRepo);
terrno = TSDB_CODE_SUCCESS;
tsdbStopStream(pRepo);
if(pRepo->pthread){
taosDestoryThread(pRepo->pthread);
pRepo->pthread = NULL;
}
if (toCommit) {
tsdbSyncCommit(repo);
}
tsem_wait(&(pRepo->readyToCommit));
tsdbUnRefMemTable(pRepo, pRepo->mem);
tsdbUnRefMemTable(pRepo, pRepo->imem);
pRepo->mem = NULL;
pRepo->imem = NULL;
tsdbCloseFS(pRepo);
tsdbCloseBufPool(pRepo);
tsdbCloseMeta(pRepo);
tsdbFreeRepo(pRepo);
tsdbDebug("vgId:%d repository is closed", vgId);
if (terrno != TSDB_CODE_SUCCESS) {
return -1;
} else {
return 0;
}
}
STsdbCfg *tsdbGetCfg(const STsdbRepo *repo) {
ASSERT(repo != NULL);
return &((STsdbRepo *)repo)->config;
}
int tsdbLockRepo(STsdbRepo *pRepo) {
int code = pthread_mutex_lock(&pRepo->mutex);
if (code != 0) {
tsdbError("vgId:%d failed to lock tsdb since %s", REPO_ID(pRepo), strerror(errno));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
pRepo->repoLocked = true;
return 0;
}
int tsdbUnlockRepo(STsdbRepo *pRepo) {
ASSERT(IS_REPO_LOCKED(pRepo));
pRepo->repoLocked = false;
int code = pthread_mutex_unlock(&pRepo->mutex);
if (code != 0) {
tsdbError("vgId:%d failed to unlock tsdb since %s", REPO_ID(pRepo), strerror(errno));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
int tsdbCheckCommit(STsdbRepo *pRepo) {
ASSERT(pRepo->mem != NULL);
STsdbCfg *pCfg = &(pRepo->config);
STsdbBufBlock *pBufBlock = tsdbGetCurrBufBlock(pRepo);
ASSERT(pBufBlock != NULL);
if ((pRepo->mem->extraBuffList != NULL) ||
((listNEles(pRepo->mem->bufBlockList) >= pCfg->totalBlocks / 3) && (pBufBlock->remain < TSDB_BUFFER_RESERVE))) {
// trigger commit
if (tsdbAsyncCommit(pRepo) < 0) return -1;
}
return 0;
}
STsdbMeta *tsdbGetMeta(STsdbRepo *pRepo) { return pRepo->tsdbMeta; }
STsdbRepoInfo *tsdbGetStatus(STsdbRepo *pRepo) { return NULL; }
int tsdbGetState(STsdbRepo *repo) { return repo->state; }
int8_t tsdbGetCompactState(STsdbRepo *repo) { return (int8_t)(repo->compactState); }
void tsdbReportStat(void *repo, int64_t *totalPoints, int64_t *totalStorage, int64_t *compStorage) {
ASSERT(repo != NULL);
STsdbRepo *pRepo = repo;
*totalPoints = pRepo->stat.pointsWritten;
*totalStorage = pRepo->stat.totalStorage;
*compStorage = pRepo->stat.compStorage;
}
int32_t tsdbConfigRepo(STsdbRepo *repo, STsdbCfg *pCfg) {
// TODO: think about multithread cases
if (tsdbCheckAndSetDefaultCfg(pCfg) < 0) return -1;
STsdbCfg * pRCfg = &repo->config;
ASSERT(pRCfg->tsdbId == pCfg->tsdbId);
ASSERT(pRCfg->cacheBlockSize == pCfg->cacheBlockSize);
ASSERT(pRCfg->daysPerFile == pCfg->daysPerFile);
ASSERT(pRCfg->minRowsPerFileBlock == pCfg->minRowsPerFileBlock);
ASSERT(pRCfg->maxRowsPerFileBlock == pCfg->maxRowsPerFileBlock);
ASSERT(pRCfg->precision == pCfg->precision);
bool configChanged = false;
if (pRCfg->compression != pCfg->compression) {
configChanged = true;
}
if (pRCfg->keep != pCfg->keep) {
configChanged = true;
}
if (pRCfg->keep1 != pCfg->keep1) {
configChanged = true;
}
if (pRCfg->keep2 != pCfg->keep2) {
configChanged = true;
}
if (pRCfg->cacheLastRow != pCfg->cacheLastRow) {
configChanged = true;
}
if (pRCfg->totalBlocks != pCfg->totalBlocks) {
configChanged = true;
}
if (!configChanged) {
tsdbError("vgId:%d no config changed", REPO_ID(repo));
}
int code = pthread_mutex_lock(&repo->save_mutex);
if (code != 0) {
tsdbError("vgId:%d failed to lock tsdb save config mutex since %s", REPO_ID(repo), strerror(errno));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
STsdbCfg * pSaveCfg = &repo->save_config;
*pSaveCfg = repo->config;
pSaveCfg->compression = pCfg->compression;
pSaveCfg->keep = pCfg->keep;
pSaveCfg->keep1 = pCfg->keep1;
pSaveCfg->keep2 = pCfg->keep2;
pSaveCfg->cacheLastRow = pCfg->cacheLastRow;
pSaveCfg->totalBlocks = pCfg->totalBlocks;
tsdbInfo("vgId:%d old config: compression(%d), keep(%d,%d,%d), cacheLastRow(%d),totalBlocks(%d)",
REPO_ID(repo),
pRCfg->compression, pRCfg->keep, pRCfg->keep1,pRCfg->keep2,
pRCfg->cacheLastRow, pRCfg->totalBlocks);
tsdbInfo("vgId:%d new config: compression(%d), keep(%d,%d,%d), cacheLastRow(%d),totalBlocks(%d)",
REPO_ID(repo),
pSaveCfg->compression, pSaveCfg->keep,pSaveCfg->keep1, pSaveCfg->keep2,
pSaveCfg->cacheLastRow,pSaveCfg->totalBlocks);
repo->config_changed = true;
pthread_mutex_unlock(&repo->save_mutex);
// schedule a commit msg and wait for the new config applied
tsdbSyncCommitConfig(repo);
return 0;
#if 0
STsdbRepo *pRepo = (STsdbRepo *)repo;
STsdbCfg config = pRepo->config;
STsdbCfg * pRCfg = &pRepo->config;
if (tsdbCheckAndSetDefaultCfg(pCfg) < 0) return -1;
ASSERT(pRCfg->tsdbId == pCfg->tsdbId);
ASSERT(pRCfg->cacheBlockSize == pCfg->cacheBlockSize);
ASSERT(pRCfg->daysPerFile == pCfg->daysPerFile);
ASSERT(pRCfg->minRowsPerFileBlock == pCfg->minRowsPerFileBlock);
ASSERT(pRCfg->maxRowsPerFileBlock == pCfg->maxRowsPerFileBlock);
ASSERT(pRCfg->precision == pCfg->precision);
bool configChanged = false;
if (pRCfg->compression != pCfg->compression) {
tsdbAlterCompression(pRepo, pCfg->compression);
config.compression = pCfg->compression;
configChanged = true;
}
if (pRCfg->keep != pCfg->keep) {
if (tsdbAlterKeep(pRepo, pCfg->keep) < 0) {
tsdbError("vgId:%d failed to configure repo when alter keep since %s", REPO_ID(pRepo), tstrerror(terrno));
config.keep = pCfg->keep;
return -1;
}
configChanged = true;
}
if (pRCfg->totalBlocks != pCfg->totalBlocks) {
tsdbAlterCacheTotalBlocks(pRepo, pCfg->totalBlocks);
config.totalBlocks = pCfg->totalBlocks;
configChanged = true;
}
if (pRCfg->cacheLastRow != pCfg->cacheLastRow) {
config.cacheLastRow = pCfg->cacheLastRow;
configChanged = true;
}
if (configChanged) {
if (tsdbSaveConfig(pRepo->rootDir, &config) < 0) {
tsdbError("vgId:%d failed to configure repository while save config since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
}
return 0;
#endif
}
uint32_t tsdbGetFileInfo(STsdbRepo *repo, char *name, uint32_t *index, uint32_t eindex, int64_t *size) {
// TODO
return 0;
#if 0
STsdbRepo *pRepo = (STsdbRepo *)repo;
// STsdbMeta *pMeta = pRepo->tsdbMeta;
STsdbFileH *pFileH = pRepo->tsdbFileH;
uint32_t magic = 0;
char * fname = NULL;
struct stat fState;
tsdbDebug("vgId:%d name:%s index:%d eindex:%d", pRepo->config.tsdbId, name, *index, eindex);
ASSERT(*index <= eindex);
if (name[0] == 0) { // get the file from index or after, but not larger than eindex
int fid = (*index) / TSDB_FILE_TYPE_MAX;
if (pFileH->nFGroups == 0 || fid > pFileH->pFGroup[pFileH->nFGroups - 1].fileId) {
if (*index <= TSDB_META_FILE_INDEX && TSDB_META_FILE_INDEX <= eindex) {
fname = tsdbGetMetaFileName(pRepo->rootDir);
*index = TSDB_META_FILE_INDEX;
magic = TSDB_META_FILE_MAGIC(pRepo->tsdbMeta);
sprintf(name, "tsdb/%s", TSDB_META_FILE_NAME);
} else {
return 0;
}
} else {
SFileGroup *pFGroup =
taosbsearch(&fid, pFileH->pFGroup, pFileH->nFGroups, sizeof(SFileGroup), keyFGroupCompFunc, TD_GE);
if (pFGroup->fileId == fid) {
SFile *pFile = &pFGroup->files[(*index) % TSDB_FILE_TYPE_MAX];
fname = strdup(TSDB_FILE_NAME(pFile));
magic = pFile->info.magic;
char *tfname = strdup(fname);
sprintf(name, "tsdb/%s/%s", TSDB_DATA_DIR_NAME, basename(tfname));
tfree(tfname);
} else {
if ((pFGroup->fileId + 1) * TSDB_FILE_TYPE_MAX - 1 < (int)eindex) {
SFile *pFile = &pFGroup->files[0];
fname = strdup(TSDB_FILE_NAME(pFile));
*index = pFGroup->fileId * TSDB_FILE_TYPE_MAX;
magic = pFile->info.magic;
char *tfname = strdup(fname);
sprintf(name, "tsdb/%s/%s", TSDB_DATA_DIR_NAME, basename(tfname));
tfree(tfname);
} else {
return 0;
}
}
}
} else { // get the named file at the specified index. If not there, return 0
fname = malloc(256);
sprintf(fname, "%s/vnode/vnode%d/%s", TFS_PRIMARY_PATH(), REPO_ID(pRepo), name);
if (access(fname, F_OK) != 0) {
tfree(fname);
return 0;
}
if (*index == TSDB_META_FILE_INDEX) { // get meta file
tsdbGetStoreInfo(fname, &magic, size);
} else {
char tfname[TSDB_FILENAME_LEN] = "\0";
sprintf(tfname, "vnode/vnode%d/tsdb/%s/%s", REPO_ID(pRepo), TSDB_DATA_DIR_NAME, basename(name));
tsdbGetFileInfoImpl(tfname, &magic, size);
}
tfree(fname);
return magic;
}
if (stat(fname, &fState) < 0) {
tfree(fname);
return 0;
}
*size = fState.st_size;
// magic = *size;
tfree(fname);
return magic;
#endif
}
void tsdbGetRootDir(int repoid, char dirName[]) {
snprintf(dirName, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb", repoid);
}
void tsdbGetDataDir(int repoid, char dirName[]) {
snprintf(dirName, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/data", repoid);
}
static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg) {
// Check tsdbId
if (pCfg->tsdbId < 0) {
tsdbError("vgId:%d invalid vgroup ID", pCfg->tsdbId);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
// Check precision
if (pCfg->precision == -1) {
pCfg->precision = TSDB_DEFAULT_PRECISION;
} else {
if (!IS_VALID_PRECISION(pCfg->precision)) {
tsdbError("vgId:%d invalid precision configuration %d", pCfg->tsdbId, pCfg->precision);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
// Check compression
if (pCfg->compression == -1) {
pCfg->compression = TSDB_DEFAULT_COMPRESSION;
} else {
if (!IS_VALID_COMPRESSION(pCfg->compression)) {
tsdbError("vgId:%d invalid compression configuration %d", pCfg->tsdbId, pCfg->precision);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
// Check daysPerFile
if (pCfg->daysPerFile == -1) {
pCfg->daysPerFile = TSDB_DEFAULT_DAYS_PER_FILE;
} else {
if (pCfg->daysPerFile < TSDB_MIN_DAYS_PER_FILE || pCfg->daysPerFile > TSDB_MAX_DAYS_PER_FILE) {
tsdbError(
"vgId:%d invalid daysPerFile configuration! daysPerFile %d TSDB_MIN_DAYS_PER_FILE %d TSDB_MAX_DAYS_PER_FILE "
"%d",
pCfg->tsdbId, pCfg->daysPerFile, TSDB_MIN_DAYS_PER_FILE, TSDB_MAX_DAYS_PER_FILE);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
// Check minRowsPerFileBlock and maxRowsPerFileBlock
if (pCfg->minRowsPerFileBlock == -1) {
pCfg->minRowsPerFileBlock = TSDB_DEFAULT_MIN_ROW_FBLOCK;
} else {
if (pCfg->minRowsPerFileBlock < TSDB_MIN_MIN_ROW_FBLOCK || pCfg->minRowsPerFileBlock > TSDB_MAX_MIN_ROW_FBLOCK) {
tsdbError(
"vgId:%d invalid minRowsPerFileBlock configuration! minRowsPerFileBlock %d TSDB_MIN_MIN_ROW_FBLOCK %d "
"TSDB_MAX_MIN_ROW_FBLOCK %d",
pCfg->tsdbId, pCfg->minRowsPerFileBlock, TSDB_MIN_MIN_ROW_FBLOCK, TSDB_MAX_MIN_ROW_FBLOCK);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
if (pCfg->maxRowsPerFileBlock == -1) {
pCfg->maxRowsPerFileBlock = TSDB_DEFAULT_MAX_ROW_FBLOCK;
} else {
if (pCfg->maxRowsPerFileBlock < TSDB_MIN_MAX_ROW_FBLOCK || pCfg->maxRowsPerFileBlock > TSDB_MAX_MAX_ROW_FBLOCK) {
tsdbError(
"vgId:%d invalid maxRowsPerFileBlock configuration! maxRowsPerFileBlock %d TSDB_MIN_MAX_ROW_FBLOCK %d "
"TSDB_MAX_MAX_ROW_FBLOCK %d",
pCfg->tsdbId, pCfg->maxRowsPerFileBlock, TSDB_MIN_MIN_ROW_FBLOCK, TSDB_MAX_MIN_ROW_FBLOCK);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
if (pCfg->minRowsPerFileBlock > pCfg->maxRowsPerFileBlock) {
tsdbError("vgId:%d invalid configuration! minRowsPerFileBlock %d maxRowsPerFileBlock %d", pCfg->tsdbId,
pCfg->minRowsPerFileBlock, pCfg->maxRowsPerFileBlock);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
// Check keep
if (pCfg->keep == -1) {
pCfg->keep = TSDB_DEFAULT_KEEP;
} else {
if (pCfg->keep < TSDB_MIN_KEEP || pCfg->keep > TSDB_MAX_KEEP) {
tsdbError(
"vgId:%d invalid keep configuration! keep %d TSDB_MIN_KEEP %d "
"TSDB_MAX_KEEP %d",
pCfg->tsdbId, pCfg->keep, TSDB_MIN_KEEP, TSDB_MAX_KEEP);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
if (pCfg->keep1 == 0) {
pCfg->keep1 = pCfg->keep;
}
if (pCfg->keep2 == 0) {
pCfg->keep2 = pCfg->keep;
}
// update check
if (pCfg->update < TD_ROW_DISCARD_UPDATE || pCfg->update > TD_ROW_PARTIAL_UPDATE)
pCfg->update = TD_ROW_DISCARD_UPDATE;
// update cacheLastRow
if (pCfg->cacheLastRow != 0) {
if (pCfg->cacheLastRow > 3)
pCfg->cacheLastRow = 1;
}
return 0;
}
static STsdbRepo *tsdbNewRepo(STsdbCfg *pCfg, STsdbAppH *pAppH) {
STsdbRepo *pRepo = (STsdbRepo *)calloc(1, sizeof(*pRepo));
if (pRepo == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
pRepo->state = TSDB_STATE_OK;
pRepo->code = TSDB_CODE_SUCCESS;
pRepo->compactState = 0;
pRepo->config = *pCfg;
if (pAppH) {
pRepo->appH = *pAppH;
}
pRepo->repoLocked = false;
pRepo->pthread = NULL;
int code = pthread_mutex_init(&(pRepo->mutex), NULL);
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
tsdbFreeRepo(pRepo);
return NULL;
}
code = pthread_mutex_init(&(pRepo->save_mutex), NULL);
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
tsdbFreeRepo(pRepo);
return NULL;
}
pRepo->config_changed = false;
atomic_store_8(&pRepo->hasCachedLastColumn, 0);
code = tsem_init(&(pRepo->readyToCommit), 0, 1);
if (code != 0) {
code = errno;
terrno = TAOS_SYSTEM_ERROR(code);
tsdbFreeRepo(pRepo);
return NULL;
}
pRepo->tsdbMeta = tsdbNewMeta(pCfg);
if (pRepo->tsdbMeta == NULL) {
tsdbError("vgId:%d failed to create meta since %s", REPO_ID(pRepo), tstrerror(terrno));
tsdbFreeRepo(pRepo);
return NULL;
}
pRepo->pPool = tsdbNewBufPool(pCfg);
if (pRepo->pPool == NULL) {
tsdbError("vgId:%d failed to create buffer pool since %s", REPO_ID(pRepo), tstrerror(terrno));
tsdbFreeRepo(pRepo);
return NULL;
}
pRepo->fs = tsdbNewFS(pCfg);
if (pRepo->fs == NULL) {
tsdbError("vgId:%d failed to TSDB file system since %s", REPO_ID(pRepo), tstrerror(terrno));
tsdbFreeRepo(pRepo);
return NULL;
}
return pRepo;
}
static void tsdbFreeRepo(STsdbRepo *pRepo) {
if (pRepo) {
tsdbFreeFS(pRepo->fs);
tsdbFreeBufPool(pRepo->pPool);
tsdbFreeMeta(pRepo->tsdbMeta);
tsdbFreeMergeBuf(pRepo->mergeBuf);
// tsdbFreeMemTable(pRepo->mem);
// tsdbFreeMemTable(pRepo->imem);
tsem_destroy(&(pRepo->readyToCommit));
pthread_mutex_destroy(&pRepo->mutex);
free(pRepo);
}
}
static void tsdbStartStream(STsdbRepo *pRepo) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
for (int i = 0; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable && pTable->type == TSDB_STREAM_TABLE) {
pTable->cqhandle = (*pRepo->appH.cqCreateFunc)(pRepo->appH.cqH, TABLE_UID(pTable), TABLE_TID(pTable), TABLE_NAME(pTable)->data, pTable->sql,
tsdbGetTableSchemaImpl(pTable, false, false, -1), 0);
}
}
}
static void tsdbStopStream(STsdbRepo *pRepo) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
for (int i = 0; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable && pTable->type == TSDB_STREAM_TABLE) {
(*pRepo->appH.cqDropFunc)(pTable->cqhandle);
}
}
}
static int tsdbRestoreLastColumns(STsdbRepo *pRepo, STable *pTable, SReadH* pReadh) {
//tsdbInfo("tsdbRestoreLastColumns of table %s", pTable->name->data);
STSchema *pSchema = tsdbGetTableLatestSchema(pTable);
if (pSchema == NULL) {
tsdbError("tsdbGetTableLatestSchema of table %s fail", pTable->name->data);
return 0;
}
SBlock* pBlock;
int numColumns;
int32_t blockIdx;
SDataStatis* pBlockStatis = NULL;
SMemRow row = NULL;
// restore last column data with last schema
int err = 0;
numColumns = schemaNCols(pSchema);
if (numColumns <= pTable->restoreColumnNum) {
pTable->hasRestoreLastColumn = true;
return 0;
}
if (pTable->lastColSVersion != schemaVersion(pSchema)) {
if (tsdbInitColIdCacheWithSchema(pTable, pSchema) < 0) {
return -1;
}
}
row = taosTMalloc(memRowMaxBytesFromSchema(pSchema));
if (row == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
err = -1;
goto out;
}
memRowSetType(row, SMEM_ROW_DATA);
tdInitDataRow(memRowDataBody(row), pSchema);
// first load block index info
if (tsdbLoadBlockInfo(pReadh, NULL) < 0) {
err = -1;
goto out;
}
pBlockStatis = calloc(numColumns, sizeof(SDataStatis));
if (pBlockStatis == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
err = -1;
goto out;
}
memset(pBlockStatis, 0, numColumns * sizeof(SDataStatis));
for(int32_t i = 0; i < numColumns; ++i) {
STColumn *pCol = schemaColAt(pSchema, i);
pBlockStatis[i].colId = pCol->colId;
}
// load block from backward
SBlockIdx *pIdx = pReadh->pBlkIdx;
blockIdx = (int32_t)(pIdx->numOfBlocks - 1);
while (numColumns > pTable->restoreColumnNum && blockIdx >= 0) {
bool loadStatisData = false;
pBlock = pReadh->pBlkInfo->blocks + blockIdx;
blockIdx -= 1;
// load block data
if (tsdbLoadBlockData(pReadh, pBlock, NULL) < 0) {
err = -1;
goto out;
}
// file block with sub-blocks has no statistics data
if (pBlock->numOfSubBlocks <= 1) {
tsdbLoadBlockStatis(pReadh, pBlock);
tsdbGetBlockStatis(pReadh, pBlockStatis, (int)numColumns);
loadStatisData = true;
}
for (int16_t i = 0; i < numColumns && numColumns > pTable->restoreColumnNum; ++i) {
STColumn *pCol = schemaColAt(pSchema, i);
// ignore loaded columns
if (pTable->lastCols[i].bytes != 0) {
continue;
}
// ignore block which has no not-null colId column
if (loadStatisData && pBlockStatis[i].numOfNull == pBlock->numOfRows) {
continue;
}
// OK,let's load row from backward to get not-null column
for (int32_t rowId = pBlock->numOfRows - 1; rowId >= 0; rowId--) {
SDataCol *pDataCol = pReadh->pDCols[0]->cols + i;
const void* pColData = tdGetColDataOfRow(pDataCol, rowId);
tdAppendColVal(memRowDataBody(row), pColData, pCol->type, pCol->offset);
//SDataCol *pDataCol = readh.pDCols[0]->cols + j;
void *value = tdGetRowDataOfCol(memRowDataBody(row), (int8_t)pCol->type, TD_DATA_ROW_HEAD_SIZE + pCol->offset);
if (isNull(value, pCol->type)) {
continue;
}
int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pCol->colId);
if (idx == -1) {
tsdbError("tsdbRestoreLastColumns restore vgId:%d,table:%s cache column %d fail", REPO_ID(pRepo), pTable->name->data, pCol->colId);
continue;
}
// save not-null column
uint16_t bytes = IS_VAR_DATA_TYPE(pCol->type) ? varDataTLen(pColData) : pCol->bytes;
SDataCol *pLastCol = &(pTable->lastCols[idx]);
pLastCol->pData = malloc(bytes);
pLastCol->bytes = bytes;
pLastCol->colId = pCol->colId;
memcpy(pLastCol->pData, value, bytes);
// save row ts(in column 0)
pDataCol = pReadh->pDCols[0]->cols + 0;
pCol = schemaColAt(pSchema, 0);
tdAppendColVal(memRowDataBody(row), tdGetColDataOfRow(pDataCol, rowId), pCol->type, pCol->offset);
pLastCol->ts = memRowKey(row);
pTable->restoreColumnNum += 1;
tsdbDebug("tsdbRestoreLastColumns restore vgId:%d,table:%s cache column %d, %" PRId64, REPO_ID(pRepo), pTable->name->data, pLastCol->colId, pLastCol->ts);
break;
}
}
}
out:
taosTZfree(row);
tfree(pBlockStatis);
if (err == 0 && numColumns <= pTable->restoreColumnNum) {
pTable->hasRestoreLastColumn = true;
}
return err;
}
static int tsdbRestoreLastRow(STsdbRepo *pRepo, STable *pTable, SReadH* pReadh, SBlockIdx *pIdx) {
ASSERT(pTable->lastRow == NULL);
if (tsdbLoadBlockInfo(pReadh, NULL) < 0) {
return -1;
}
SBlock* pBlock = pReadh->pBlkInfo->blocks + pIdx->numOfBlocks - 1;
if (tsdbLoadBlockData(pReadh, pBlock, NULL) < 0) {
return -1;
}
// Get the data in row
STSchema *pSchema = tsdbGetTableSchema(pTable);
pTable->lastRow = taosTMalloc(memRowMaxBytesFromSchema(pSchema));
if (pTable->lastRow == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
memRowSetType(pTable->lastRow, SMEM_ROW_DATA);
tdInitDataRow(memRowDataBody(pTable->lastRow), pSchema);
for (int icol = 0; icol < schemaNCols(pSchema); icol++) {
STColumn *pCol = schemaColAt(pSchema, icol);
SDataCol *pDataCol = pReadh->pDCols[0]->cols + icol;
tdAppendColVal(memRowDataBody(pTable->lastRow), tdGetColDataOfRow(pDataCol, pBlock->numOfRows - 1), pCol->type,
pCol->offset);
}
return 0;
}
int tsdbRestoreInfo(STsdbRepo *pRepo) {
SFSIter fsiter;
SReadH readh;
SDFileSet *pSet;
STsdbMeta *pMeta = pRepo->tsdbMeta;
STsdbCfg * pCfg = REPO_CFG(pRepo);
if (tsdbInitReadH(&readh, pRepo) < 0) {
return -1;
}
tsdbFSIterInit(&fsiter, REPO_FS(pRepo), TSDB_FS_ITER_BACKWARD);
if (CACHE_LAST_NULL_COLUMN(pCfg)) {
for (int i = 1; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
pTable->restoreColumnNum = 0;
pTable->hasRestoreLastColumn = false;
}
}
while ((pSet = tsdbFSIterNext(&fsiter)) != NULL) {
if (tsdbSetAndOpenReadFSet(&readh, pSet) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
if (tsdbLoadBlockIdx(&readh) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
for (int i = 1; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
//tsdbInfo("tsdbRestoreInfo restore vgId:%d,table:%s", REPO_ID(pRepo), pTable->name->data);
if (tsdbSetReadTable(&readh, pTable) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
TSKEY lastKey = tsdbGetTableLastKeyImpl(pTable);
SBlockIdx *pIdx = readh.pBlkIdx;
if (pIdx && lastKey < pIdx->maxKey) {
pTable->lastKey = pIdx->maxKey;
if (CACHE_LAST_ROW(pCfg) && tsdbRestoreLastRow(pRepo, pTable, &readh, pIdx) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
}
// restore NULL columns
if (pIdx && CACHE_LAST_NULL_COLUMN(pCfg) && !pTable->hasRestoreLastColumn) {
if (tsdbRestoreLastColumns(pRepo, pTable, &readh) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
}
}
}
tsdbDestroyReadH(&readh);
if (CACHE_LAST_NULL_COLUMN(pCfg)) {
atomic_store_8(&pRepo->hasCachedLastColumn, 1);
}
return 0;
}
int tsdbCacheLastData(STsdbRepo *pRepo, STsdbCfg* oldCfg) {
bool cacheLastRow = false, cacheLastCol = false;
SFSIter fsiter;
SReadH readh;
SDFileSet *pSet;
STsdbMeta *pMeta = pRepo->tsdbMeta;
int tableNum = 0;
int maxTableIdx = 0;
int cacheLastRowTableNum = 0;
int cacheLastColTableNum = 0;
bool need_free_last_row = CACHE_LAST_ROW(oldCfg) && !CACHE_LAST_ROW(&(pRepo->config));
bool need_free_last_col = CACHE_LAST_NULL_COLUMN(oldCfg) && !CACHE_LAST_NULL_COLUMN(&(pRepo->config));
if (CACHE_LAST_ROW(&(pRepo->config)) || CACHE_LAST_NULL_COLUMN(&(pRepo->config))) {
tsdbInfo("tsdbCacheLastData cache last data since cacheLast option changed");
cacheLastRow = !CACHE_LAST_ROW(oldCfg) && CACHE_LAST_ROW(&(pRepo->config));
cacheLastCol = !CACHE_LAST_NULL_COLUMN(oldCfg) && CACHE_LAST_NULL_COLUMN(&(pRepo->config));
}
// calc max table idx and table num
for (int i = 1; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
tableNum += 1;
maxTableIdx = i;
if (cacheLastCol) {
pTable->restoreColumnNum = 0;
pTable->hasRestoreLastColumn = false;
}
}
// if close last option,need to free data
if (need_free_last_row || need_free_last_col) {
if (need_free_last_col) {
atomic_store_8(&pRepo->hasCachedLastColumn, 0);
}
tsdbInfo("free cache last data since cacheLast option changed");
for (int i = 1; i <= maxTableIdx; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
if (need_free_last_row) {
taosTZfree(pTable->lastRow);
pTable->lastRow = NULL;
}
if (need_free_last_col) {
tsdbFreeLastColumns(pTable);
pTable->hasRestoreLastColumn = false;
}
}
}
if (!cacheLastRow && !cacheLastCol) {
return 0;
}
cacheLastRowTableNum = cacheLastRow ? tableNum : 0;
cacheLastColTableNum = cacheLastCol ? tableNum : 0;
if (tsdbInitReadH(&readh, pRepo) < 0) {
return -1;
}
tsdbFSIterInit(&fsiter, REPO_FS(pRepo), TSDB_FS_ITER_BACKWARD);
while ((pSet = tsdbFSIterNext(&fsiter)) != NULL && (cacheLastRowTableNum > 0 || cacheLastColTableNum > 0)) {
if (tsdbSetAndOpenReadFSet(&readh, pSet) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
if (tsdbLoadBlockIdx(&readh) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
for (int i = 1; i <= maxTableIdx; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
//tsdbInfo("tsdbRestoreInfo restore vgId:%d,table:%s", REPO_ID(pRepo), pTable->name->data);
if (tsdbSetReadTable(&readh, pTable) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
SBlockIdx *pIdx = readh.pBlkIdx;
if (pIdx && cacheLastRowTableNum > 0 && pTable->lastRow == NULL) {
pTable->lastKey = pIdx->maxKey;
if (tsdbRestoreLastRow(pRepo, pTable, &readh, pIdx) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
cacheLastRowTableNum -= 1;
}
// restore NULL columns
if (pIdx && cacheLastColTableNum > 0 && !pTable->hasRestoreLastColumn) {
if (tsdbRestoreLastColumns(pRepo, pTable, &readh) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
if (pTable->hasRestoreLastColumn) {
cacheLastColTableNum -= 1;
}
}
}
}
tsdbDestroyReadH(&readh);
if (cacheLastCol) {
atomic_store_8(&pRepo->hasCachedLastColumn, 1);
}
return 0;
}
#endif
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
\ No newline at end of file
CMAKE_MINIMUM_REQUIRED(VERSION 2.8...3.20)
PROJECT(TDengine)
INCLUDE_DIRECTORIES(inc)
AUX_SOURCE_DIRECTORY(src SRC)
ADD_LIBRARY(tsdb ${SRC})
TARGET_LINK_LIBRARIES(tsdb tfs common tutil)
IF (TD_TSDB_PLUGINS)
TARGET_LINK_LIBRARIES(tsdb tsdbPlugins)
ENDIF ()
IF (TD_LINUX)
# Someone has no gtest directory, so comment it
# ADD_SUBDIRECTORY(tests)
ENDIF ()
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_BUFFER_H_
#define _TD_TSDB_BUFFER_H_
typedef struct {
int64_t blockId;
int offset;
int remain;
char data[];
} STsdbBufBlock;
typedef struct {
pthread_cond_t poolNotEmpty;
int bufBlockSize;
int tBufBlocks;
int nBufBlocks;
int nRecycleBlocks;
int nElasticBlocks;
int64_t index;
SList* bufBlockList;
} STsdbBufPool;
#define TSDB_BUFFER_RESERVE 1024 // Reseve 1K as commit threshold
STsdbBufPool* tsdbNewBufPool();
void tsdbFreeBufPool(STsdbBufPool* pBufPool);
int tsdbOpenBufPool(STsdbRepo* pRepo);
void tsdbCloseBufPool(STsdbRepo* pRepo);
SListNode* tsdbAllocBufBlockFromPool(STsdbRepo* pRepo);
int tsdbExpandPool(STsdbRepo* pRepo, int32_t oldTotalBlocks);
void tsdbRecycleBufferBlock(STsdbBufPool* pPool, SListNode *pNode, bool bELastic);
// health cite
STsdbBufBlock *tsdbNewBufBlock(int bufBlockSize);
void tsdbFreeBufBlock(STsdbBufBlock *pBufBlock);
#endif /* _TD_TSDB_BUFFER_H_ */
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_LOG_H_
#define _TD_TSDB_LOG_H_
extern int32_t tsdbDebugFlag;
#define tsdbFatal(...) do { if (tsdbDebugFlag & DEBUG_FATAL) { taosPrintLog("TDB FATAL ", 255, __VA_ARGS__); }} while(0)
#define tsdbError(...) do { if (tsdbDebugFlag & DEBUG_ERROR) { taosPrintLog("TDB ERROR ", 255, __VA_ARGS__); }} while(0)
#define tsdbWarn(...) do { if (tsdbDebugFlag & DEBUG_WARN) { taosPrintLog("TDB WARN ", 255, __VA_ARGS__); }} while(0)
#define tsdbInfo(...) do { if (tsdbDebugFlag & DEBUG_INFO) { taosPrintLog("TDB ", 255, __VA_ARGS__); }} while(0)
#define tsdbDebug(...) do { if (tsdbDebugFlag & DEBUG_DEBUG) { taosPrintLog("TDB ", tsdbDebugFlag, __VA_ARGS__); }} while(0)
#define tsdbTrace(...) do { if (tsdbDebugFlag & DEBUG_TRACE) { taosPrintLog("TDB ", tsdbDebugFlag, __VA_ARGS__); }} while(0)
#endif /* _TD_TSDB_LOG_H_ */
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_TSDB_META_H_
#define _TD_TSDB_META_H_
#define TSDB_MAX_TABLE_SCHEMAS 16
typedef struct STable {
STableId tableId;
ETableType type;
tstr* name; // NOTE: there a flexible string here
uint64_t suid;
struct STable* pSuper; // super table pointer
SArray* schema;
STSchema* tagSchema;
SKVRow tagVal;
SSkipList* pIndex; // For TSDB_SUPER_TABLE, it is the skiplist index
void* eventHandler; // TODO
void* streamHandler; // TODO
TSKEY lastKey;
SMemRow lastRow;
char* sql;
void* cqhandle;
SRWLatch latch; // TODO: implementa latch functions
SDataCol *lastCols;
int16_t maxColNum;
int16_t restoreColumnNum;
bool hasRestoreLastColumn;
int lastColSVersion;
T_REF_DECLARE()
} STable;
typedef struct {
pthread_rwlock_t rwLock;
int32_t nTables;
int32_t maxTables;
STable** tables;
SList* superList;
SHashObj* uidMap;
int maxRowBytes;
int maxCols;
} STsdbMeta;
#define TSDB_INIT_NTABLES 1024
#define TABLE_TYPE(t) (t)->type
#define TABLE_NAME(t) (t)->name
#define TABLE_CHAR_NAME(t) TABLE_NAME(t)->data
#define TABLE_UID(t) (t)->tableId.uid
#define TABLE_TID(t) (t)->tableId.tid
#define TABLE_SUID(t) (t)->suid
// #define TSDB_META_FILE_MAGIC(m) KVSTORE_MAGIC((m)->pStore)
#define TSDB_RLOCK_TABLE(t) taosRLockLatch(&((t)->latch))
#define TSDB_RUNLOCK_TABLE(t) taosRUnLockLatch(&((t)->latch))
#define TSDB_WLOCK_TABLE(t) taosWLockLatch(&((t)->latch))
#define TSDB_WUNLOCK_TABLE(t) taosWUnLockLatch(&((t)->latch))
STsdbMeta* tsdbNewMeta(STsdbCfg* pCfg);
void tsdbFreeMeta(STsdbMeta* pMeta);
int tsdbOpenMeta(STsdbRepo* pRepo);
int tsdbCloseMeta(STsdbRepo* pRepo);
STable* tsdbGetTableByUid(STsdbMeta* pMeta, uint64_t uid);
STSchema* tsdbGetTableSchemaByVersion(STable* pTable, int16_t _version);
int tsdbWLockRepoMeta(STsdbRepo* pRepo);
int tsdbRLockRepoMeta(STsdbRepo* pRepo);
int tsdbUnlockRepoMeta(STsdbRepo* pRepo);
void tsdbRefTable(STable* pTable);
void tsdbUnRefTable(STable* pTable);
void tsdbUpdateTableSchema(STsdbRepo* pRepo, STable* pTable, STSchema* pSchema, bool insertAct);
int tsdbRestoreTable(STsdbRepo* pRepo, void* cont, int contLen);
void tsdbOrgMeta(STsdbRepo* pRepo);
int tsdbInitColIdCacheWithSchema(STable* pTable, STSchema* pSchema);
int16_t tsdbGetLastColumnsIndexByColId(STable* pTable, int16_t colId);
int tsdbUpdateLastColSchema(STable *pTable, STSchema *pNewSchema);
STSchema* tsdbGetTableLatestSchema(STable *pTable);
void tsdbFreeLastColumns(STable* pTable);
static FORCE_INLINE int tsdbCompareSchemaVersion(const void *key1, const void *key2) {
if (*(int16_t *)key1 < schemaVersion(*(STSchema **)key2)) {
return -1;
} else if (*(int16_t *)key1 > schemaVersion(*(STSchema **)key2)) {
return 1;
} else {
return 0;
}
}
static FORCE_INLINE STSchema* tsdbGetTableSchemaImpl(STable* pTable, bool lock, bool copy, int16_t _version) {
STable* pDTable = (pTable->pSuper != NULL) ? pTable->pSuper : pTable; // for performance purpose
STSchema* pSchema = NULL;
STSchema* pTSchema = NULL;
if (lock) TSDB_RLOCK_TABLE(pDTable);
if (_version < 0) { // get the latest version of schema
pTSchema = *(STSchema **)taosArrayGetLast(pDTable->schema);
} else { // get the schema with version
void* ptr = taosArraySearch(pDTable->schema, &_version, tsdbCompareSchemaVersion, TD_EQ);
if (ptr == NULL) {
terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION;
goto _exit;
}
pTSchema = *(STSchema**)ptr;
}
ASSERT(pTSchema != NULL);
if (copy) {
if ((pSchema = tdDupSchema(pTSchema)) == NULL) terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
} else {
pSchema = pTSchema;
}
_exit:
if (lock) TSDB_RUNLOCK_TABLE(pDTable);
return pSchema;
}
static FORCE_INLINE STSchema* tsdbGetTableSchema(STable* pTable) {
return tsdbGetTableSchemaImpl(pTable, false, false, -1);
}
static FORCE_INLINE STSchema *tsdbGetTableTagSchema(STable *pTable) {
if (pTable->type == TSDB_CHILD_TABLE) { // check child table first
STable *pSuper = pTable->pSuper;
if (pSuper == NULL) return NULL;
return pSuper->tagSchema;
} else if (pTable->type == TSDB_SUPER_TABLE) {
return pTable->tagSchema;
} else {
return NULL;
}
}
static FORCE_INLINE TSKEY tsdbGetTableLastKeyImpl(STable* pTable) {
ASSERT((pTable->lastRow == NULL) || (pTable->lastKey == memRowKey(pTable->lastRow)));
return pTable->lastKey;
}
#endif /* _TD_TSDB_META_H_ */
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbint.h"
extern int32_t tsTsdbMetaCompactRatio;
#define TSDB_MAX_SUBBLOCKS 8
static FORCE_INLINE int TSDB_KEY_FID(TSKEY key, int32_t days, int8_t precision) {
if (key < 0) {
return (int)((key + 1) / tsTickPerDay[precision] / days - 1);
} else {
return (int)((key / tsTickPerDay[precision] / days));
}
}
typedef struct {
SRtn rtn; // retention snapshot
SFSIter fsIter; // tsdb file iterator
int niters; // memory iterators
SCommitIter *iters;
bool isRFileSet; // read and commit FSET
SReadH readh;
SDFileSet wSet;
bool isDFileSame;
bool isLFileSame;
TSKEY minKey;
TSKEY maxKey;
SArray * aBlkIdx; // SBlockIdx array
STable * pTable;
SArray * aSupBlk; // Table super-block array
SArray * aSubBlk; // table sub-block array
SDataCols * pDataCols;
} SCommitH;
#define TSDB_COMMIT_REPO(ch) TSDB_READ_REPO(&(ch->readh))
#define TSDB_COMMIT_REPO_ID(ch) REPO_ID(TSDB_READ_REPO(&(ch->readh)))
#define TSDB_COMMIT_WRITE_FSET(ch) (&((ch)->wSet))
#define TSDB_COMMIT_TABLE(ch) ((ch)->pTable)
#define TSDB_COMMIT_HEAD_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_HEAD)
#define TSDB_COMMIT_DATA_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_DATA)
#define TSDB_COMMIT_LAST_FILE(ch) TSDB_DFILE_IN_SET(TSDB_COMMIT_WRITE_FSET(ch), TSDB_FILE_LAST)
#define TSDB_COMMIT_BUF(ch) TSDB_READ_BUF(&((ch)->readh))
#define TSDB_COMMIT_COMP_BUF(ch) TSDB_READ_COMP_BUF(&((ch)->readh))
#define TSDB_COMMIT_DEFAULT_ROWS(ch) TSDB_DEFAULT_BLOCK_ROWS(TSDB_COMMIT_REPO(ch)->config.maxRowsPerFileBlock)
#define TSDB_COMMIT_TXN_VERSION(ch) FS_TXN_VERSION(REPO_FS(TSDB_COMMIT_REPO(ch)))
static int tsdbCommitMeta(STsdbRepo *pRepo);
static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact);
static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid);
static int tsdbCompactMetaFile(STsdbRepo *pRepo, STsdbFS *pfs, SMFile *pMFile);
static int tsdbCommitTSData(STsdbRepo *pRepo);
static void tsdbStartCommit(STsdbRepo *pRepo);
static void tsdbEndCommit(STsdbRepo *pRepo, int eno);
static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid);
static int tsdbCreateCommitIters(SCommitH *pCommith);
static void tsdbDestroyCommitIters(SCommitH *pCommith);
static void tsdbSeekCommitIter(SCommitH *pCommith, TSKEY key);
static int tsdbInitCommitH(SCommitH *pCommith, STsdbRepo *pRepo);
static void tsdbDestroyCommitH(SCommitH *pCommith);
static int tsdbGetFidLevel(int fid, SRtn *pRtn);
static int tsdbNextCommitFid(SCommitH *pCommith);
static int tsdbCommitToTable(SCommitH *pCommith, int tid);
static int tsdbSetCommitTable(SCommitH *pCommith, STable *pTable);
static int tsdbComparKeyBlock(const void *arg1, const void *arg2);
static int tsdbWriteBlockInfo(SCommitH *pCommih);
static int tsdbCommitMemData(SCommitH *pCommith, SCommitIter *pIter, TSKEY keyLimit, bool toData);
static int tsdbMergeMemData(SCommitH *pCommith, SCommitIter *pIter, int bidx);
static int tsdbMoveBlock(SCommitH *pCommith, int bidx);
static int tsdbCommitAddBlock(SCommitH *pCommith, const SBlock *pSupBlock, const SBlock *pSubBlocks, int nSubBlocks);
static int tsdbMergeBlockData(SCommitH *pCommith, SCommitIter *pIter, SDataCols *pDataCols, TSKEY keyLimit,
bool isLastOneBlock);
static void tsdbResetCommitFile(SCommitH *pCommith);
static void tsdbResetCommitTable(SCommitH *pCommith);
static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid);
static void tsdbCloseCommitFile(SCommitH *pCommith, bool hasError);
static bool tsdbCanAddSubBlock(SCommitH *pCommith, SBlock *pBlock, SMergeInfo *pInfo);
static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIter *pCommitIter, SDataCols *pTarget,
TSKEY maxKey, int maxRows, int8_t update);
void *tsdbCommitData(STsdbRepo *pRepo) {
if (pRepo->imem == NULL) {
return NULL;
}
tsdbStartCommit(pRepo);
// Commit to update meta file
if (tsdbCommitMeta(pRepo) < 0) {
tsdbError("vgId:%d error occurs while committing META data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
// Create the iterator to read from cache
if (tsdbCommitTSData(pRepo) < 0) {
tsdbError("vgId:%d error occurs while committing TS data since %s", REPO_ID(pRepo), tstrerror(terrno));
goto _err;
}
tsdbEndCommit(pRepo, TSDB_CODE_SUCCESS);
return NULL;
_err:
ASSERT(terrno != TSDB_CODE_SUCCESS);
pRepo->code = terrno;
tsdbEndCommit(pRepo, terrno);
return NULL;
}
int tsdbApplyRtnOnFSet(STsdbRepo *pRepo, SDFileSet *pSet, SRtn *pRtn) {
SDiskID did;
SDFileSet nSet;
STsdbFS * pfs = REPO_FS(pRepo);
int level;
ASSERT(pSet->fid >= pRtn->minFid);
level = tsdbGetFidLevel(pSet->fid, pRtn);
tfsAllocDisk(level, &(did.level), &(did.id));
if (did.level == TFS_UNDECIDED_LEVEL) {
terrno = TSDB_CODE_TDB_NO_AVAIL_DISK;
return -1;
}
if (did.level > TSDB_FSET_LEVEL(pSet)) {
// Need to move the FSET to higher level
tsdbInitDFileSet(&nSet, did, REPO_ID(pRepo), pSet->fid, FS_TXN_VERSION(pfs));
if (tsdbCopyDFileSet(pSet, &nSet) < 0) {
tsdbError("vgId:%d failed to copy FSET %d from level %d to level %d since %s", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), did.level, tstrerror(terrno));
return -1;
}
if (tsdbUpdateDFileSet(pfs, &nSet) < 0) {
return -1;
}
tsdbInfo("vgId:%d FSET %d is copied from level %d disk id %d to level %d disk id %d", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet), did.level, did.id);
} else {
// On a correct level
if (tsdbUpdateDFileSet(pfs, pSet) < 0) {
return -1;
}
}
return 0;
}
int tsdbWriteBlockInfoImpl(SDFile *pHeadf, STable *pTable, SArray *pSupA, SArray *pSubA, void **ppBuf,
SBlockIdx *pIdx) {
size_t nSupBlocks;
size_t nSubBlocks;
uint32_t tlen;
SBlockInfo *pBlkInfo;
int64_t offset;
SBlock * pBlock;
memset(pIdx, 0, sizeof(*pIdx));
nSupBlocks = taosArrayGetSize(pSupA);
nSubBlocks = (pSubA == NULL) ? 0 : taosArrayGetSize(pSubA);
if (nSupBlocks <= 0) {
// No data (data all deleted)
return 0;
}
tlen = (uint32_t)(sizeof(SBlockInfo) + sizeof(SBlock) * (nSupBlocks + nSubBlocks) + sizeof(TSCKSUM));
if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1;
pBlkInfo = *ppBuf;
pBlkInfo->delimiter = TSDB_FILE_DELIMITER;
pBlkInfo->tid = TABLE_TID(pTable);
pBlkInfo->uid = TABLE_UID(pTable);
memcpy((void *)(pBlkInfo->blocks), taosArrayGet(pSupA, 0), nSupBlocks * sizeof(SBlock));
if (nSubBlocks > 0) {
memcpy((void *)(pBlkInfo->blocks + nSupBlocks), taosArrayGet(pSubA, 0), nSubBlocks * sizeof(SBlock));
for (int i = 0; i < nSupBlocks; i++) {
pBlock = pBlkInfo->blocks + i;
if (pBlock->numOfSubBlocks > 1) {
pBlock->offset += (sizeof(SBlockInfo) + sizeof(SBlock) * nSupBlocks);
}
}
}
taosCalcChecksumAppend(0, (uint8_t *)pBlkInfo, tlen);
if (tsdbAppendDFile(pHeadf, (void *)pBlkInfo, tlen, &offset) < 0) {
return -1;
}
tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(pBlkInfo, tlen - sizeof(TSCKSUM)));
// Set pIdx
pBlock = taosArrayGetLast(pSupA);
pIdx->tid = TABLE_TID(pTable);
pIdx->uid = TABLE_UID(pTable);
pIdx->hasLast = pBlock->last ? 1 : 0;
pIdx->maxKey = pBlock->keyLast;
pIdx->numOfBlocks = (uint32_t)nSupBlocks;
pIdx->len = tlen;
pIdx->offset = (uint32_t)offset;
return 0;
}
int tsdbWriteBlockIdx(SDFile *pHeadf, SArray *pIdxA, void **ppBuf) {
SBlockIdx *pBlkIdx;
size_t nidx = taosArrayGetSize(pIdxA);
int tlen = 0, size;
int64_t offset;
if (nidx <= 0) {
// All data are deleted
pHeadf->info.offset = 0;
pHeadf->info.len = 0;
return 0;
}
for (size_t i = 0; i < nidx; i++) {
pBlkIdx = (SBlockIdx *)taosArrayGet(pIdxA, i);
size = tsdbEncodeSBlockIdx(NULL, pBlkIdx);
if (tsdbMakeRoom(ppBuf, tlen + size) < 0) return -1;
void *ptr = POINTER_SHIFT(*ppBuf, tlen);
tsdbEncodeSBlockIdx(&ptr, pBlkIdx);
tlen += size;
}
tlen += sizeof(TSCKSUM);
if (tsdbMakeRoom(ppBuf, tlen) < 0) return -1;
taosCalcChecksumAppend(0, (uint8_t *)(*ppBuf), tlen);
if (tsdbAppendDFile(pHeadf, *ppBuf, tlen, &offset) < tlen) {
return -1;
}
tsdbUpdateDFileMagic(pHeadf, POINTER_SHIFT(*ppBuf, tlen - sizeof(TSCKSUM)));
pHeadf->info.offset = (uint32_t)offset;
pHeadf->info.len = tlen;
return 0;
}
// =================== Commit Meta Data
static int tsdbInitCommitMetaFile(STsdbRepo *pRepo, SMFile* pMf, bool open) {
STsdbFS * pfs = REPO_FS(pRepo);
SMFile * pOMFile = pfs->cstatus->pmf;
SDiskID did;
// Create/Open a meta file or open the existing file
if (pOMFile == NULL) {
// Create a new meta file
did.level = TFS_PRIMARY_LEVEL;
did.id = TFS_PRIMARY_ID;
tsdbInitMFile(pMf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)));
if (open && tsdbCreateMFile(pMf, true) < 0) {
tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
tsdbInfo("vgId:%d meta file %s is created to commit", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMf));
} else {
tsdbInitMFileEx(pMf, pOMFile);
if (open && tsdbOpenMFile(pMf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
}
return 0;
}
static int tsdbCommitMeta(STsdbRepo *pRepo) {
STsdbFS * pfs = REPO_FS(pRepo);
SMemTable *pMem = pRepo->imem;
SMFile * pOMFile = pfs->cstatus->pmf;
SMFile mf;
SActObj * pAct = NULL;
SActCont * pCont = NULL;
SListNode *pNode = NULL;
ASSERT(pOMFile != NULL || listNEles(pMem->actList) > 0);
if (listNEles(pMem->actList) <= 0) {
// no meta data to commit, just keep the old meta file
tsdbUpdateMFile(pfs, pOMFile);
if (tsTsdbMetaCompactRatio > 0) {
if (tsdbInitCommitMetaFile(pRepo, &mf, false) < 0) {
return -1;
}
int ret = tsdbCompactMetaFile(pRepo, pfs, &mf);
if (ret < 0) tsdbError("compact meta file error");
return ret;
}
return 0;
} else {
if (tsdbInitCommitMetaFile(pRepo, &mf, true) < 0) {
return -1;
}
}
// Loop to write
while ((pNode = tdListPopHead(pMem->actList)) != NULL) {
pAct = (SActObj *)pNode->data;
if (pAct->act == TSDB_UPDATE_META) {
pCont = (SActCont *)POINTER_SHIFT(pAct, sizeof(SActObj));
if (tsdbUpdateMetaRecord(pfs, &mf, pAct->uid, (void *)(pCont->cont), pCont->len, false) < 0) {
tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
tstrerror(terrno));
tsdbCloseMFile(&mf);
(void)tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
return -1;
}
} else if (pAct->act == TSDB_DROP_META) {
if (tsdbDropMetaRecord(pfs, &mf, pAct->uid) < 0) {
tsdbError("vgId:%d failed to drop META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pAct->uid,
tstrerror(terrno));
tsdbCloseMFile(&mf);
tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
return -1;
}
} else {
ASSERT(false);
}
}
if (tsdbUpdateMFileHeader(&mf) < 0) {
tsdbError("vgId:%d failed to update META file header since %s, revert it", REPO_ID(pRepo), tstrerror(terrno));
tsdbApplyMFileChange(&mf, pOMFile);
// TODO: need to reload metaCache
return -1;
}
TSDB_FILE_FSYNC(&mf);
tsdbCloseMFile(&mf);
tsdbUpdateMFile(pfs, &mf);
if (tsTsdbMetaCompactRatio > 0 && tsdbCompactMetaFile(pRepo, pfs, &mf) < 0) {
tsdbError("compact meta file error");
}
return 0;
}
int tsdbEncodeKVRecord(void **buf, SKVRecord *pRecord) {
int tlen = 0;
tlen += taosEncodeFixedU64(buf, pRecord->uid);
tlen += taosEncodeFixedI64(buf, pRecord->offset);
tlen += taosEncodeFixedI64(buf, pRecord->size);
return tlen;
}
void *tsdbDecodeKVRecord(void *buf, SKVRecord *pRecord) {
buf = taosDecodeFixedU64(buf, &(pRecord->uid));
buf = taosDecodeFixedI64(buf, &(pRecord->offset));
buf = taosDecodeFixedI64(buf, &(pRecord->size));
return buf;
}
void tsdbGetRtnSnap(STsdbRepo *pRepo, SRtn *pRtn) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
TSKEY minKey, midKey, maxKey, now;
now = taosGetTimestamp(pCfg->precision);
minKey = now - pCfg->keep * tsTickPerDay[pCfg->precision];
midKey = now - pCfg->keep2 * tsTickPerDay[pCfg->precision];
maxKey = now - pCfg->keep1 * tsTickPerDay[pCfg->precision];
pRtn->minKey = minKey;
pRtn->minFid = (int)(TSDB_KEY_FID(minKey, pCfg->daysPerFile, pCfg->precision));
pRtn->midFid = (int)(TSDB_KEY_FID(midKey, pCfg->daysPerFile, pCfg->precision));
pRtn->maxFid = (int)(TSDB_KEY_FID(maxKey, pCfg->daysPerFile, pCfg->precision));
tsdbDebug("vgId:%d now:%" PRId64 " minKey:%" PRId64 " minFid:%d, midFid:%d, maxFid:%d", REPO_ID(pRepo), now, minKey,
pRtn->minFid, pRtn->midFid, pRtn->maxFid);
}
static int tsdbUpdateMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid, void *cont, int contLen, bool compact) {
char buf[64] = "\0";
void * pBuf = buf;
SKVRecord rInfo;
int64_t offset;
// Seek to end of meta file
offset = tsdbSeekMFile(pMFile, 0, SEEK_END);
if (offset < 0) {
return -1;
}
rInfo.offset = offset;
rInfo.uid = uid;
rInfo.size = contLen;
int tlen = tsdbEncodeKVRecord((void **)(&pBuf), &rInfo);
if (tsdbAppendMFile(pMFile, buf, tlen, NULL) < tlen) {
return -1;
}
if (tsdbAppendMFile(pMFile, cont, contLen, NULL) < contLen) {
return -1;
}
tsdbUpdateMFileMagic(pMFile, POINTER_SHIFT(cont, contLen - sizeof(TSCKSUM)));
SHashObj* cache = compact ? pfs->metaCacheComp : pfs->metaCache;
pMFile->info.nRecords++;
SKVRecord *pRecord = taosHashGet(cache, (void *)&uid, sizeof(uid));
if (pRecord != NULL) {
pMFile->info.tombSize += (pRecord->size + sizeof(SKVRecord));
} else {
pMFile->info.nRecords++;
}
taosHashPut(cache, (void *)(&uid), sizeof(uid), (void *)(&rInfo), sizeof(rInfo));
return 0;
}
static int tsdbDropMetaRecord(STsdbFS *pfs, SMFile *pMFile, uint64_t uid) {
SKVRecord rInfo = {0};
char buf[128] = "\0";
SKVRecord *pRecord = taosHashGet(pfs->metaCache, (void *)(&uid), sizeof(uid));
if (pRecord == NULL) {
tsdbError("failed to drop META record with key %" PRIu64 " since not find", uid);
return -1;
}
rInfo.offset = -pRecord->offset;
rInfo.uid = pRecord->uid;
rInfo.size = pRecord->size;
void *pBuf = buf;
tsdbEncodeKVRecord(&pBuf, &rInfo);
if (tsdbAppendMFile(pMFile, buf, sizeof(SKVRecord), NULL) < 0) {
return -1;
}
pMFile->info.magic = taosCalcChecksum(pMFile->info.magic, (uint8_t *)buf, sizeof(SKVRecord));
pMFile->info.nDels++;
pMFile->info.nRecords--;
pMFile->info.tombSize += (rInfo.size + sizeof(SKVRecord) * 2);
taosHashRemove(pfs->metaCache, (void *)(&uid), sizeof(uid));
return 0;
}
static int tsdbCompactMetaFile(STsdbRepo *pRepo, STsdbFS *pfs, SMFile *pMFile) {
float delPercent = (float)(pMFile->info.nDels) / (float)(pMFile->info.nRecords);
float tombPercent = (float)(pMFile->info.tombSize) / (float)(pMFile->info.size);
float compactRatio = (float)(tsTsdbMetaCompactRatio)/100;
if (delPercent < compactRatio && tombPercent < compactRatio) {
return 0;
}
if (tsdbOpenMFile(pMFile, O_RDONLY) < 0) {
tsdbError("open meta file %s compact fail", pMFile->f.rname);
return -1;
}
tsdbInfo("begin compact tsdb meta file, ratio:%d, nDels:%" PRId64 ",nRecords:%" PRId64 ",tombSize:%" PRId64 ",size:%" PRId64,
tsTsdbMetaCompactRatio, pMFile->info.nDels,pMFile->info.nRecords,pMFile->info.tombSize,pMFile->info.size);
SMFile mf;
SDiskID did;
// first create tmp meta file
did.level = TFS_PRIMARY_LEVEL;
did.id = TFS_PRIMARY_ID;
tsdbInitMFile(&mf, did, REPO_ID(pRepo), FS_TXN_VERSION(REPO_FS(pRepo)) + 1);
if (tsdbCreateMFile(&mf, true) < 0) {
tsdbError("vgId:%d failed to create META file since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
tsdbInfo("vgId:%d meta file %s is created to compact meta data", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf));
// second iterator metaCache
int code = -1;
int64_t maxBufSize = 1024;
SKVRecord *pRecord;
void *pBuf = NULL;
pBuf = malloc((size_t)maxBufSize);
if (pBuf == NULL) {
goto _err;
}
// init Comp
assert(pfs->metaCacheComp == NULL);
pfs->metaCacheComp = taosHashInit(4096, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK);
if (pfs->metaCacheComp == NULL) {
goto _err;
}
pRecord = taosHashIterate(pfs->metaCache, NULL);
while (pRecord) {
if (tsdbSeekMFile(pMFile, pRecord->offset + sizeof(SKVRecord), SEEK_SET) < 0) {
tsdbError("vgId:%d failed to seek file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
tstrerror(terrno));
goto _err;
}
if (pRecord->size > maxBufSize) {
maxBufSize = pRecord->size;
void* tmp = realloc(pBuf, (size_t)maxBufSize);
if (tmp == NULL) {
goto _err;
}
pBuf = tmp;
}
int nread = (int)tsdbReadMFile(pMFile, pBuf, pRecord->size);
if (nread < 0) {
tsdbError("vgId:%d failed to read file %s since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile),
tstrerror(terrno));
goto _err;
}
if (nread < pRecord->size) {
tsdbError("vgId:%d failed to read file %s since file corrupted, expected read:%" PRId64 " actual read:%d",
REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pMFile), pRecord->size, nread);
goto _err;
}
if (tsdbUpdateMetaRecord(pfs, &mf, pRecord->uid, pBuf, (int)pRecord->size, true) < 0) {
tsdbError("vgId:%d failed to update META record, uid %" PRIu64 " since %s", REPO_ID(pRepo), pRecord->uid,
tstrerror(terrno));
goto _err;
}
pRecord = taosHashIterate(pfs->metaCache, pRecord);
}
code = 0;
_err:
if (code == 0) TSDB_FILE_FSYNC(&mf);
tsdbCloseMFile(&mf);
tsdbCloseMFile(pMFile);
if (code == 0) {
// rename meta.tmp -> meta
tsdbInfo("vgId:%d meta file rename %s -> %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(&mf), TSDB_FILE_FULL_NAME(pMFile));
taosRename(mf.f.aname,pMFile->f.aname);
tstrncpy(mf.f.aname, pMFile->f.aname, TSDB_FILENAME_LEN);
tstrncpy(mf.f.rname, pMFile->f.rname, TSDB_FILENAME_LEN);
// update current meta file info
pfs->nstatus->pmf = NULL;
tsdbUpdateMFile(pfs, &mf);
taosHashCleanup(pfs->metaCache);
pfs->metaCache = pfs->metaCacheComp;
pfs->metaCacheComp = NULL;
} else {
// remove meta.tmp file
remove(mf.f.aname);
taosHashCleanup(pfs->metaCacheComp);
pfs->metaCacheComp = NULL;
}
tfree(pBuf);
ASSERT(mf.info.nDels == 0);
ASSERT(mf.info.tombSize == 0);
tsdbInfo("end compact tsdb meta file,code:%d,nRecords:%" PRId64 ",size:%" PRId64,
code,mf.info.nRecords,mf.info.size);
return code;
}
// =================== Commit Time-Series Data
static int tsdbCommitTSData(STsdbRepo *pRepo) {
SMemTable *pMem = pRepo->imem;
SCommitH commith;
SDFileSet *pSet = NULL;
int fid;
memset(&commith, 0, sizeof(commith));
if (pMem->numOfRows <= 0) {
// No memory data, just apply retention on each file on disk
if (tsdbApplyRtn(pRepo) < 0) {
return -1;
}
return 0;
}
// Resource initialization
if (tsdbInitCommitH(&commith, pRepo) < 0) {
return -1;
}
// Skip expired memory data and expired FSET
tsdbSeekCommitIter(&commith, commith.rtn.minKey);
while ((pSet = tsdbFSIterNext(&(commith.fsIter)))) {
if (pSet->fid < commith.rtn.minFid) {
tsdbInfo("vgId:%d FSET %d on level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
} else {
break;
}
}
// Loop to commit to each file
fid = tsdbNextCommitFid(&(commith));
while (true) {
// Loop over both on disk and memory
if (pSet == NULL && fid == TSDB_IVLD_FID) break;
if (pSet && (fid == TSDB_IVLD_FID || pSet->fid < fid)) {
// Only has existing FSET but no memory data to commit in this
// existing FSET, only check if file in correct retention
if (tsdbApplyRtnOnFSet(pRepo, pSet, &(commith.rtn)) < 0) {
tsdbDestroyCommitH(&commith);
return -1;
}
pSet = tsdbFSIterNext(&(commith.fsIter));
} else {
// Has memory data to commit
SDFileSet *pCSet;
int cfid;
if (pSet == NULL || pSet->fid > fid) {
// Commit to a new FSET with fid: fid
pCSet = NULL;
cfid = fid;
} else {
// Commit to an existing FSET
pCSet = pSet;
cfid = pSet->fid;
pSet = tsdbFSIterNext(&(commith.fsIter));
}
if (tsdbCommitToFile(&commith, pCSet, cfid) < 0) {
tsdbDestroyCommitH(&commith);
return -1;
}
fid = tsdbNextCommitFid(&commith);
}
}
tsdbDestroyCommitH(&commith);
return 0;
}
static void tsdbStartCommit(STsdbRepo *pRepo) {
SMemTable *pMem = pRepo->imem;
ASSERT(pMem->numOfRows > 0 || listNEles(pMem->actList) > 0);
tsdbInfo("vgId:%d start to commit! keyFirst %" PRId64 " keyLast %" PRId64 " numOfRows %" PRId64 " meta rows: %d",
REPO_ID(pRepo), pMem->keyFirst, pMem->keyLast, pMem->numOfRows, listNEles(pMem->actList));
tsdbStartFSTxn(pRepo, pMem->pointsAdd, pMem->storageAdd);
pRepo->code = TSDB_CODE_SUCCESS;
}
static void tsdbEndCommit(STsdbRepo *pRepo, int eno) {
if (eno != TSDB_CODE_SUCCESS) {
tsdbEndFSTxnWithError(REPO_FS(pRepo));
} else {
tsdbEndFSTxn(pRepo);
}
tsdbInfo("vgId:%d commit over, %s", REPO_ID(pRepo), (eno == TSDB_CODE_SUCCESS) ? "succeed" : "failed");
if (pRepo->appH.notifyStatus) pRepo->appH.notifyStatus(pRepo->appH.appH, TSDB_STATUS_COMMIT_OVER, eno);
SMemTable *pIMem = pRepo->imem;
(void)tsdbLockRepo(pRepo);
pRepo->imem = NULL;
(void)tsdbUnlockRepo(pRepo);
tsdbUnRefMemTable(pRepo, pIMem);
tsem_post(&(pRepo->readyToCommit));
}
#if 0
static bool tsdbHasDataToCommit(SCommitIter *iters, int nIters, TSKEY minKey, TSKEY maxKey) {
for (int i = 0; i < nIters; i++) {
TSKEY nextKey = tsdbNextIterKey((iters + i)->pIter);
if (nextKey != TSDB_DATA_TIMESTAMP_NULL && (nextKey >= minKey && nextKey <= maxKey)) return true;
}
return false;
}
#endif
static int tsdbCommitToFile(SCommitH *pCommith, SDFileSet *pSet, int fid) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
ASSERT(pSet == NULL || pSet->fid == fid);
tsdbResetCommitFile(pCommith);
tsdbGetFidKeyRange(pCfg->daysPerFile, pCfg->precision, fid, &(pCommith->minKey), &(pCommith->maxKey));
// Set and open files
if (tsdbSetAndOpenCommitFile(pCommith, pSet, fid) < 0) {
return -1;
}
// Loop to commit each table data
for (int tid = 1; tid < pCommith->niters; tid++) {
SCommitIter *pIter = pCommith->iters + tid;
if (pIter->pTable == NULL) continue;
if (tsdbCommitToTable(pCommith, tid) < 0) {
tsdbCloseCommitFile(pCommith, true);
// revert the file change
tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet);
return -1;
}
}
if (tsdbWriteBlockIdx(TSDB_COMMIT_HEAD_FILE(pCommith), pCommith->aBlkIdx, (void **)(&(TSDB_COMMIT_BUF(pCommith)))) <
0) {
tsdbError("vgId:%d failed to write SBlockIdx part to FSET %d since %s", REPO_ID(pRepo), fid, tstrerror(terrno));
tsdbCloseCommitFile(pCommith, true);
// revert the file change
tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet);
return -1;
}
if (tsdbUpdateDFileSetHeader(&(pCommith->wSet)) < 0) {
tsdbError("vgId:%d failed to update FSET %d header since %s", REPO_ID(pRepo), fid, tstrerror(terrno));
tsdbCloseCommitFile(pCommith, true);
// revert the file change
tsdbApplyDFileSetChange(TSDB_COMMIT_WRITE_FSET(pCommith), pSet);
return -1;
}
// Close commit file
tsdbCloseCommitFile(pCommith, false);
if (tsdbUpdateDFileSet(REPO_FS(pRepo), &(pCommith->wSet)) < 0) {
return -1;
}
return 0;
}
static int tsdbCreateCommitIters(SCommitH *pCommith) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
SMemTable *pMem = pRepo->imem;
STsdbMeta *pMeta = pRepo->tsdbMeta;
pCommith->niters = pMem->maxTables;
pCommith->iters = (SCommitIter *)calloc(pMem->maxTables, sizeof(SCommitIter));
if (pCommith->iters == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
if (tsdbRLockRepoMeta(pRepo) < 0) return -1;
// reference all tables
for (int i = 0; i < pMem->maxTables; i++) {
if (pMeta->tables[i] != NULL) {
tsdbRefTable(pMeta->tables[i]);
pCommith->iters[i].pTable = pMeta->tables[i];
}
}
if (tsdbUnlockRepoMeta(pRepo) < 0) return -1;
for (int i = 0; i < pMem->maxTables; i++) {
if ((pCommith->iters[i].pTable != NULL) && (pMem->tData[i] != NULL) &&
(TABLE_UID(pCommith->iters[i].pTable) == pMem->tData[i]->uid)) {
if ((pCommith->iters[i].pIter = tSkipListCreateIter(pMem->tData[i]->pData)) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
tSkipListIterNext(pCommith->iters[i].pIter);
}
}
return 0;
}
static void tsdbDestroyCommitIters(SCommitH *pCommith) {
if (pCommith->iters == NULL) return;
for (int i = 1; i < pCommith->niters; i++) {
if (pCommith->iters[i].pTable != NULL) {
tsdbUnRefTable(pCommith->iters[i].pTable);
tSkipListDestroyIter(pCommith->iters[i].pIter);
}
}
free(pCommith->iters);
pCommith->iters = NULL;
pCommith->niters = 0;
}
// Skip all keys until key (not included)
static void tsdbSeekCommitIter(SCommitH *pCommith, TSKEY key) {
for (int i = 0; i < pCommith->niters; i++) {
SCommitIter *pIter = pCommith->iters + i;
if (pIter->pTable == NULL || pIter->pIter == NULL) continue;
tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, key - 1, INT32_MAX, NULL, NULL, 0, true, NULL);
}
}
static int tsdbInitCommitH(SCommitH *pCommith, STsdbRepo *pRepo) {
STsdbCfg *pCfg = REPO_CFG(pRepo);
memset(pCommith, 0, sizeof(*pCommith));
tsdbGetRtnSnap(pRepo, &(pCommith->rtn));
TSDB_FSET_SET_CLOSED(TSDB_COMMIT_WRITE_FSET(pCommith));
// Init read handle
if (tsdbInitReadH(&(pCommith->readh), pRepo) < 0) {
return -1;
}
// Init file iterator
tsdbFSIterInit(&(pCommith->fsIter), REPO_FS(pRepo), TSDB_FS_ITER_FORWARD);
if (tsdbCreateCommitIters(pCommith) < 0) {
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->aBlkIdx = taosArrayInit(1024, sizeof(SBlockIdx));
if (pCommith->aBlkIdx == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->aSupBlk = taosArrayInit(1024, sizeof(SBlock));
if (pCommith->aSupBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->aSubBlk = taosArrayInit(1024, sizeof(SBlock));
if (pCommith->aSubBlk == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
pCommith->pDataCols = tdNewDataCols(0, pCfg->maxRowsPerFileBlock);
if (pCommith->pDataCols == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbDestroyCommitH(pCommith);
return -1;
}
return 0;
}
static void tsdbDestroyCommitH(SCommitH *pCommith) {
pCommith->pDataCols = tdFreeDataCols(pCommith->pDataCols);
pCommith->aSubBlk = taosArrayDestroy(pCommith->aSubBlk);
pCommith->aSupBlk = taosArrayDestroy(pCommith->aSupBlk);
pCommith->aBlkIdx = taosArrayDestroy(pCommith->aBlkIdx);
tsdbDestroyCommitIters(pCommith);
tsdbDestroyReadH(&(pCommith->readh));
tsdbCloseDFileSet(TSDB_COMMIT_WRITE_FSET(pCommith));
}
static int tsdbNextCommitFid(SCommitH *pCommith) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
int fid = TSDB_IVLD_FID;
for (int i = 0; i < pCommith->niters; i++) {
SCommitIter *pIter = pCommith->iters + i;
if (pIter->pTable == NULL || pIter->pIter == NULL) continue;
TSKEY nextKey = tsdbNextIterKey(pIter->pIter);
if (nextKey == TSDB_DATA_TIMESTAMP_NULL) {
continue;
} else {
int tfid = (int)(TSDB_KEY_FID(nextKey, pCfg->daysPerFile, pCfg->precision));
if (fid == TSDB_IVLD_FID || fid > tfid) {
fid = tfid;
}
}
}
return fid;
}
static int tsdbCommitToTable(SCommitH *pCommith, int tid) {
SCommitIter *pIter = pCommith->iters + tid;
TSKEY nextKey = tsdbNextIterKey(pIter->pIter);
tsdbResetCommitTable(pCommith);
TSDB_RLOCK_TABLE(pIter->pTable);
// Set commit table
if (tsdbSetCommitTable(pCommith, pIter->pTable) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
// No disk data and no memory data, just return
if (pCommith->readh.pBlkIdx == NULL && (nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey)) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return 0;
}
// Must has disk data or has memory data
int nBlocks;
int bidx = 0;
SBlock *pBlock;
if (pCommith->readh.pBlkIdx) {
if (tsdbLoadBlockInfo(&(pCommith->readh), NULL) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
nBlocks = pCommith->readh.pBlkIdx->numOfBlocks;
} else {
nBlocks = 0;
}
if (bidx < nBlocks) {
pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
} else {
pBlock = NULL;
}
while (true) {
if (pBlock == NULL && (nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey)) break;
if ((nextKey == TSDB_DATA_TIMESTAMP_NULL || nextKey > pCommith->maxKey) ||
(pBlock && (!pBlock->last) && tsdbComparKeyBlock((void *)(&nextKey), pBlock) > 0)) {
if (tsdbMoveBlock(pCommith, bidx) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
bidx++;
if (bidx < nBlocks) {
pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
} else {
pBlock = NULL;
}
} else if (pBlock && (pBlock->last || tsdbComparKeyBlock((void *)(&nextKey), pBlock) == 0)) {
// merge pBlock data and memory data
if (tsdbMergeMemData(pCommith, pIter, bidx) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
bidx++;
if (bidx < nBlocks) {
pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
} else {
pBlock = NULL;
}
nextKey = tsdbNextIterKey(pIter->pIter);
} else {
// Only commit memory data
if (pBlock == NULL) {
if (tsdbCommitMemData(pCommith, pIter, pCommith->maxKey, false) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
} else {
if (tsdbCommitMemData(pCommith, pIter, pBlock->keyFirst - 1, true) < 0) {
TSDB_RUNLOCK_TABLE(pIter->pTable);
return -1;
}
}
nextKey = tsdbNextIterKey(pIter->pIter);
}
}
TSDB_RUNLOCK_TABLE(pIter->pTable);
if (tsdbWriteBlockInfo(pCommith) < 0) {
tsdbError("vgId:%d failed to write SBlockInfo part into file %s since %s", TSDB_COMMIT_REPO_ID(pCommith),
TSDB_FILE_FULL_NAME(TSDB_COMMIT_HEAD_FILE(pCommith)), tstrerror(terrno));
return -1;
}
return 0;
}
static int tsdbSetCommitTable(SCommitH *pCommith, STable *pTable) {
STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1);
pCommith->pTable = pTable;
if (tdInitDataCols(pCommith->pDataCols, pSchema) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
if (pCommith->isRFileSet) {
if (tsdbSetReadTable(&(pCommith->readh), pTable) < 0) {
return -1;
}
} else {
pCommith->readh.pBlkIdx = NULL;
}
return 0;
}
static int tsdbComparKeyBlock(const void *arg1, const void *arg2) {
TSKEY key = *(TSKEY *)arg1;
SBlock *pBlock = (SBlock *)arg2;
if (key < pBlock->keyFirst) {
return -1;
} else if (key > pBlock->keyLast) {
return 1;
} else {
return 0;
}
}
int tsdbWriteBlockImpl(STsdbRepo *pRepo, STable *pTable, SDFile *pDFile, SDataCols *pDataCols, SBlock *pBlock,
bool isLast, bool isSuper, void **ppBuf, void **ppCBuf) {
STsdbCfg * pCfg = REPO_CFG(pRepo);
SBlockData *pBlockData;
int64_t offset = 0;
int rowsToWrite = pDataCols->numOfRows;
ASSERT(rowsToWrite > 0 && rowsToWrite <= pCfg->maxRowsPerFileBlock);
ASSERT((!isLast) || rowsToWrite < pCfg->minRowsPerFileBlock);
// Make buffer space
if (tsdbMakeRoom(ppBuf, TSDB_BLOCK_STATIS_SIZE(pDataCols->numOfCols)) < 0) {
return -1;
}
pBlockData = (SBlockData *)(*ppBuf);
// Get # of cols not all NULL(not including key column)
int nColsNotAllNull = 0;
for (int ncol = 1; ncol < pDataCols->numOfCols; ncol++) { // ncol from 1, we skip the timestamp column
SDataCol * pDataCol = pDataCols->cols + ncol;
SBlockCol *pBlockCol = pBlockData->cols + nColsNotAllNull;
if (isAllRowsNull(pDataCol)) { // all data to commit are NULL, just ignore it
continue;
}
memset(pBlockCol, 0, sizeof(*pBlockCol));
pBlockCol->colId = pDataCol->colId;
pBlockCol->type = pDataCol->type;
if (tDataTypes[pDataCol->type].statisFunc) {
(*tDataTypes[pDataCol->type].statisFunc)(pDataCol->pData, rowsToWrite, &(pBlockCol->min), &(pBlockCol->max),
&(pBlockCol->sum), &(pBlockCol->minIndex), &(pBlockCol->maxIndex),
&(pBlockCol->numOfNull));
}
nColsNotAllNull++;
}
ASSERT(nColsNotAllNull >= 0 && nColsNotAllNull <= pDataCols->numOfCols);
// Compress the data if neccessary
int tcol = 0; // counter of not all NULL and written columns
uint32_t toffset = 0;
int32_t tsize = TSDB_BLOCK_STATIS_SIZE(nColsNotAllNull);
int32_t lsize = tsize;
int32_t keyLen = 0;
for (int ncol = 0; ncol < pDataCols->numOfCols; ncol++) {
// All not NULL columns finish
if (ncol != 0 && tcol >= nColsNotAllNull) break;
SDataCol * pDataCol = pDataCols->cols + ncol;
SBlockCol *pBlockCol = pBlockData->cols + tcol;
if (ncol != 0 && (pDataCol->colId != pBlockCol->colId)) continue;
int32_t flen; // final length
int32_t tlen = dataColGetNEleLen(pDataCol, rowsToWrite);
void * tptr;
// Make room
if (tsdbMakeRoom(ppBuf, lsize + tlen + COMP_OVERFLOW_BYTES + sizeof(TSCKSUM)) < 0) {
return -1;
}
pBlockData = (SBlockData *)(*ppBuf);
pBlockCol = pBlockData->cols + tcol;
tptr = POINTER_SHIFT(pBlockData, lsize);
if (pCfg->compression == TWO_STAGE_COMP &&
tsdbMakeRoom(ppCBuf, tlen + COMP_OVERFLOW_BYTES) < 0) {
return -1;
}
// Compress or just copy
if (pCfg->compression) {
flen = (*(tDataTypes[pDataCol->type].compFunc))((char *)pDataCol->pData, tlen, rowsToWrite, tptr,
tlen + COMP_OVERFLOW_BYTES, pCfg->compression, *ppCBuf,
tlen + COMP_OVERFLOW_BYTES);
} else {
flen = tlen;
memcpy(tptr, pDataCol->pData, flen);
}
// Add checksum
ASSERT(flen > 0);
flen += sizeof(TSCKSUM);
taosCalcChecksumAppend(0, (uint8_t *)tptr, flen);
tsdbUpdateDFileMagic(pDFile, POINTER_SHIFT(tptr, flen - sizeof(TSCKSUM)));
if (ncol != 0) {
tsdbSetBlockColOffset(pBlockCol, toffset);
pBlockCol->len = flen;
tcol++;
} else {
keyLen = flen;
}
toffset += flen;
lsize += flen;
}
pBlockData->delimiter = TSDB_FILE_DELIMITER;
pBlockData->uid = TABLE_UID(pTable);
pBlockData->numOfCols = nColsNotAllNull;
taosCalcChecksumAppend(0, (uint8_t *)pBlockData, tsize);
tsdbUpdateDFileMagic(pDFile, POINTER_SHIFT(pBlockData, tsize - sizeof(TSCKSUM)));
// Write the whole block to file
if (tsdbAppendDFile(pDFile, (void *)pBlockData, lsize, &offset) < lsize) {
return -1;
}
// Update pBlock membership vairables
pBlock->last = isLast;
pBlock->offset = offset;
pBlock->algorithm = pCfg->compression;
pBlock->numOfRows = rowsToWrite;
pBlock->len = lsize;
pBlock->keyLen = keyLen;
pBlock->numOfSubBlocks = isSuper ? 1 : 0;
pBlock->numOfCols = nColsNotAllNull;
pBlock->keyFirst = dataColsKeyFirst(pDataCols);
pBlock->keyLast = dataColsKeyLast(pDataCols);
tsdbDebug("vgId:%d tid:%d a block of data is written to file %s, offset %" PRId64
" numOfRows %d len %d numOfCols %" PRId16 " keyFirst %" PRId64 " keyLast %" PRId64,
REPO_ID(pRepo), TABLE_TID(pTable), TSDB_FILE_FULL_NAME(pDFile), offset, rowsToWrite, pBlock->len,
pBlock->numOfCols, pBlock->keyFirst, pBlock->keyLast);
return 0;
}
static int tsdbWriteBlock(SCommitH *pCommith, SDFile *pDFile, SDataCols *pDataCols, SBlock *pBlock, bool isLast,
bool isSuper) {
return tsdbWriteBlockImpl(TSDB_COMMIT_REPO(pCommith), TSDB_COMMIT_TABLE(pCommith), pDFile, pDataCols, pBlock, isLast,
isSuper, (void **)(&(TSDB_COMMIT_BUF(pCommith))),
(void **)(&(TSDB_COMMIT_COMP_BUF(pCommith))));
}
static int tsdbWriteBlockInfo(SCommitH *pCommih) {
SDFile * pHeadf = TSDB_COMMIT_HEAD_FILE(pCommih);
SBlockIdx blkIdx;
STable * pTable = TSDB_COMMIT_TABLE(pCommih);
if (tsdbWriteBlockInfoImpl(pHeadf, pTable, pCommih->aSupBlk, pCommih->aSubBlk, (void **)(&(TSDB_COMMIT_BUF(pCommih))),
&blkIdx) < 0) {
return -1;
}
if (blkIdx.numOfBlocks == 0) {
return 0;
}
if (taosArrayPush(pCommih->aBlkIdx, (void *)(&blkIdx)) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
return 0;
}
static int tsdbCommitMemData(SCommitH *pCommith, SCommitIter *pIter, TSKEY keyLimit, bool toData) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SMergeInfo mInfo;
int32_t defaultRows = TSDB_COMMIT_DEFAULT_ROWS(pCommith);
SDFile * pDFile;
bool isLast;
SBlock block;
while (true) {
tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, keyLimit, defaultRows, pCommith->pDataCols, NULL, 0,
pCfg->update, &mInfo);
if (pCommith->pDataCols->numOfRows <= 0) break;
if (toData || pCommith->pDataCols->numOfRows >= pCfg->minRowsPerFileBlock) {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isLast = false;
} else {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
isLast = true;
}
if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, isLast, true) < 0) return -1;
if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) {
return -1;
}
}
return 0;
}
static int tsdbMergeMemData(SCommitH *pCommith, SCommitIter *pIter, int bidx) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
int nBlocks = pCommith->readh.pBlkIdx->numOfBlocks;
SBlock * pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
TSKEY keyLimit;
int16_t colId = 0;
SMergeInfo mInfo;
SBlock subBlocks[TSDB_MAX_SUBBLOCKS];
SBlock block, supBlock;
SDFile * pDFile;
if (bidx == nBlocks - 1) {
keyLimit = pCommith->maxKey;
} else {
keyLimit = pBlock[1].keyFirst - 1;
}
SSkipListIterator titer = *(pIter->pIter);
if (tsdbLoadBlockDataCols(&(pCommith->readh), pBlock, NULL, &colId, 1) < 0) return -1;
tsdbLoadDataFromCache(pIter->pTable, &titer, keyLimit, INT32_MAX, NULL, pCommith->readh.pDCols[0]->cols[0].pData,
pCommith->readh.pDCols[0]->numOfRows, pCfg->update, &mInfo);
if (mInfo.nOperations == 0) {
// no new data to insert (all updates denied)
if (tsdbMoveBlock(pCommith, bidx) < 0) {
return -1;
}
*(pIter->pIter) = titer;
} else if (pBlock->numOfRows + mInfo.rowsInserted - mInfo.rowsDeleteSucceed == 0) {
// Ignore the block
ASSERT(0);
*(pIter->pIter) = titer;
} else if (tsdbCanAddSubBlock(pCommith, pBlock, &mInfo)) {
// Add a sub-block
tsdbLoadDataFromCache(pIter->pTable, pIter->pIter, keyLimit, INT32_MAX, pCommith->pDataCols,
pCommith->readh.pDCols[0]->cols[0].pData, pCommith->readh.pDCols[0]->numOfRows, pCfg->update,
&mInfo);
if (pBlock->last) {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
}
if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, pBlock->last, false) < 0) return -1;
if (pBlock->numOfSubBlocks == 1) {
subBlocks[0] = *pBlock;
subBlocks[0].numOfSubBlocks = 0;
} else {
memcpy(subBlocks, POINTER_SHIFT(pCommith->readh.pBlkInfo, pBlock->offset),
sizeof(SBlock) * pBlock->numOfSubBlocks);
}
subBlocks[pBlock->numOfSubBlocks] = block;
supBlock = *pBlock;
supBlock.keyFirst = mInfo.keyFirst;
supBlock.keyLast = mInfo.keyLast;
supBlock.numOfSubBlocks++;
supBlock.numOfRows = pBlock->numOfRows + mInfo.rowsInserted - mInfo.rowsDeleteSucceed;
supBlock.offset = taosArrayGetSize(pCommith->aSubBlk) * sizeof(SBlock);
if (tsdbCommitAddBlock(pCommith, &supBlock, subBlocks, supBlock.numOfSubBlocks) < 0) return -1;
} else {
if (tsdbLoadBlockData(&(pCommith->readh), pBlock, NULL) < 0) return -1;
if (tsdbMergeBlockData(pCommith, pIter, pCommith->readh.pDCols[0], keyLimit, bidx == (nBlocks - 1)) < 0) return -1;
}
return 0;
}
static int tsdbMoveBlock(SCommitH *pCommith, int bidx) {
SBlock *pBlock = pCommith->readh.pBlkInfo->blocks + bidx;
SDFile *pDFile;
SBlock block;
bool isSameFile;
ASSERT(pBlock->numOfSubBlocks > 0);
if (pBlock->last) {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
isSameFile = pCommith->isLFileSame;
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isSameFile = pCommith->isDFileSame;
}
if (isSameFile) {
if (pBlock->numOfSubBlocks == 1) {
if (tsdbCommitAddBlock(pCommith, pBlock, NULL, 0) < 0) {
return -1;
}
} else {
block = *pBlock;
block.offset = sizeof(SBlock) * taosArrayGetSize(pCommith->aSubBlk);
if (tsdbCommitAddBlock(pCommith, &block, POINTER_SHIFT(pCommith->readh.pBlkInfo, pBlock->offset),
pBlock->numOfSubBlocks) < 0) {
return -1;
}
}
} else {
if (tsdbLoadBlockData(&(pCommith->readh), pBlock, NULL) < 0) return -1;
if (tsdbWriteBlock(pCommith, pDFile, pCommith->readh.pDCols[0], &block, pBlock->last, true) < 0) return -1;
if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) return -1;
}
return 0;
}
static int tsdbCommitAddBlock(SCommitH *pCommith, const SBlock *pSupBlock, const SBlock *pSubBlocks, int nSubBlocks) {
if (taosArrayPush(pCommith->aSupBlk, pSupBlock) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
if (pSubBlocks && taosArrayAddBatch(pCommith->aSubBlk, pSubBlocks, nSubBlocks) == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
return 0;
}
static int tsdbMergeBlockData(SCommitH *pCommith, SCommitIter *pIter, SDataCols *pDataCols, TSKEY keyLimit, bool isLastOneBlock) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
SBlock block;
SDFile * pDFile;
bool isLast;
int32_t defaultRows = TSDB_COMMIT_DEFAULT_ROWS(pCommith);
int biter = 0;
while (true) {
tsdbLoadAndMergeFromCache(pCommith->readh.pDCols[0], &biter, pIter, pCommith->pDataCols, keyLimit, defaultRows,
pCfg->update);
if (pCommith->pDataCols->numOfRows == 0) break;
if (isLastOneBlock) {
if (pCommith->pDataCols->numOfRows < pCfg->minRowsPerFileBlock) {
pDFile = TSDB_COMMIT_LAST_FILE(pCommith);
isLast = true;
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isLast = false;
}
} else {
pDFile = TSDB_COMMIT_DATA_FILE(pCommith);
isLast = false;
}
if (tsdbWriteBlock(pCommith, pDFile, pCommith->pDataCols, &block, isLast, true) < 0) return -1;
if (tsdbCommitAddBlock(pCommith, &block, NULL, 0) < 0) return -1;
}
return 0;
}
static void tsdbLoadAndMergeFromCache(SDataCols *pDataCols, int *iter, SCommitIter *pCommitIter, SDataCols *pTarget,
TSKEY maxKey, int maxRows, int8_t update) {
TSKEY key1 = INT64_MAX;
TSKEY key2 = INT64_MAX;
STSchema *pSchema = NULL;
ASSERT(maxRows > 0 && dataColsKeyLast(pDataCols) <= maxKey);
tdResetDataCols(pTarget);
while (true) {
key1 = (*iter >= pDataCols->numOfRows) ? INT64_MAX : dataColsKeyAt(pDataCols, *iter);
SMemRow row = tsdbNextIterRow(pCommitIter->pIter);
if (row == NULL || memRowKey(row) > maxKey) {
key2 = INT64_MAX;
} else {
key2 = memRowKey(row);
}
if (key1 == INT64_MAX && key2 == INT64_MAX) break;
if (key1 < key2) {
for (int i = 0; i < pDataCols->numOfCols; i++) {
//TODO: dataColAppendVal may fail
dataColAppendVal(pTarget->cols + i, tdGetColDataOfRow(pDataCols->cols + i, *iter), pTarget->numOfRows,
pTarget->maxPoints);
}
pTarget->numOfRows++;
(*iter)++;
} else if (key1 > key2) {
if (pSchema == NULL || schemaVersion(pSchema) != memRowVersion(row)) {
pSchema = tsdbGetTableSchemaImpl(pCommitIter->pTable, false, false, memRowVersion(row));
ASSERT(pSchema != NULL);
}
tdAppendMemRowToDataCol(row, pSchema, pTarget, true);
tSkipListIterNext(pCommitIter->pIter);
} else {
if (update != TD_ROW_OVERWRITE_UPDATE) {
//copy disk data
for (int i = 0; i < pDataCols->numOfCols; i++) {
//TODO: dataColAppendVal may fail
dataColAppendVal(pTarget->cols + i, tdGetColDataOfRow(pDataCols->cols + i, *iter), pTarget->numOfRows,
pTarget->maxPoints);
}
if(update == TD_ROW_DISCARD_UPDATE) pTarget->numOfRows++;
}
if (update != TD_ROW_DISCARD_UPDATE) {
//copy mem data
if (pSchema == NULL || schemaVersion(pSchema) != memRowVersion(row)) {
pSchema = tsdbGetTableSchemaImpl(pCommitIter->pTable, false, false, memRowVersion(row));
ASSERT(pSchema != NULL);
}
tdAppendMemRowToDataCol(row, pSchema, pTarget, update == TD_ROW_OVERWRITE_UPDATE);
}
(*iter)++;
tSkipListIterNext(pCommitIter->pIter);
}
if (pTarget->numOfRows >= maxRows) break;
}
}
static void tsdbResetCommitFile(SCommitH *pCommith) {
pCommith->isRFileSet = false;
pCommith->isDFileSame = false;
pCommith->isLFileSame = false;
taosArrayClear(pCommith->aBlkIdx);
}
static void tsdbResetCommitTable(SCommitH *pCommith) {
taosArrayClear(pCommith->aSubBlk);
taosArrayClear(pCommith->aSupBlk);
pCommith->pTable = NULL;
}
static int tsdbSetAndOpenCommitFile(SCommitH *pCommith, SDFileSet *pSet, int fid) {
SDiskID did;
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
SDFileSet *pWSet = TSDB_COMMIT_WRITE_FSET(pCommith);
tfsAllocDisk(tsdbGetFidLevel(fid, &(pCommith->rtn)), &(did.level), &(did.id));
if (did.level == TFS_UNDECIDED_LEVEL) {
terrno = TSDB_CODE_TDB_NO_AVAIL_DISK;
return -1;
}
// Open read FSET
if (pSet) {
if (tsdbSetAndOpenReadFSet(&(pCommith->readh), pSet) < 0) {
return -1;
}
pCommith->isRFileSet = true;
if (tsdbLoadBlockIdx(&(pCommith->readh)) < 0) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
tsdbDebug("vgId:%d FSET %d at level %d disk id %d is opened to read to commit", REPO_ID(pRepo), TSDB_FSET_FID(pSet),
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
} else {
pCommith->isRFileSet = false;
}
// Set and open commit FSET
if (pSet == NULL || did.level > TSDB_FSET_LEVEL(pSet)) {
// Create a new FSET to write data
tsdbInitDFileSet(pWSet, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)));
if (tsdbCreateDFileSet(pWSet, true) < 0) {
tsdbError("vgId:%d failed to create FSET %d at level %d disk id %d since %s", REPO_ID(pRepo),
TSDB_FSET_FID(pWSet), TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet), tstrerror(terrno));
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
}
return -1;
}
pCommith->isDFileSame = false;
pCommith->isLFileSame = false;
tsdbDebug("vgId:%d FSET %d at level %d disk id %d is created to commit", REPO_ID(pRepo), TSDB_FSET_FID(pWSet),
TSDB_FSET_LEVEL(pWSet), TSDB_FSET_ID(pWSet));
} else {
did.level = TSDB_FSET_LEVEL(pSet);
did.id = TSDB_FSET_ID(pSet);
pCommith->wSet.fid = fid;
pCommith->wSet.state = 0;
// TSDB_FILE_HEAD
SDFile *pWHeadf = TSDB_COMMIT_HEAD_FILE(pCommith);
tsdbInitDFile(pWHeadf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_HEAD);
if (tsdbCreateDFile(pWHeadf, true) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWHeadf),
tstrerror(terrno));
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
// TSDB_FILE_DATA
SDFile *pRDataf = TSDB_READ_DATA_FILE(&(pCommith->readh));
SDFile *pWDataf = TSDB_COMMIT_DATA_FILE(pCommith);
tsdbInitDFileEx(pWDataf, pRDataf);
if (tsdbOpenDFile(pWDataf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWDataf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
pCommith->isDFileSame = true;
// TSDB_FILE_LAST
SDFile *pRLastf = TSDB_READ_LAST_FILE(&(pCommith->readh));
SDFile *pWLastf = TSDB_COMMIT_LAST_FILE(pCommith);
if (pRLastf->info.size < 32 * 1024) {
tsdbInitDFileEx(pWLastf, pRLastf);
pCommith->isLFileSame = true;
if (tsdbOpenDFile(pWLastf, O_WRONLY) < 0) {
tsdbError("vgId:%d failed to open file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
} else {
tsdbInitDFile(pWLastf, did, REPO_ID(pRepo), fid, FS_TXN_VERSION(REPO_FS(pRepo)), TSDB_FILE_LAST);
pCommith->isLFileSame = false;
if (tsdbCreateDFile(pWLastf, true) < 0) {
tsdbError("vgId:%d failed to create file %s to commit since %s", REPO_ID(pRepo), TSDB_FILE_FULL_NAME(pWLastf),
tstrerror(terrno));
tsdbCloseDFileSet(pWSet);
(void)tsdbRemoveDFile(pWHeadf);
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
return -1;
}
}
}
}
return 0;
}
static void tsdbCloseCommitFile(SCommitH *pCommith, bool hasError) {
if (pCommith->isRFileSet) {
tsdbCloseAndUnsetFSet(&(pCommith->readh));
}
if (!hasError) {
TSDB_FSET_FSYNC(TSDB_COMMIT_WRITE_FSET(pCommith));
}
tsdbCloseDFileSet(TSDB_COMMIT_WRITE_FSET(pCommith));
}
static bool tsdbCanAddSubBlock(SCommitH *pCommith, SBlock *pBlock, SMergeInfo *pInfo) {
STsdbRepo *pRepo = TSDB_COMMIT_REPO(pCommith);
STsdbCfg * pCfg = REPO_CFG(pRepo);
int mergeRows = pBlock->numOfRows + pInfo->rowsInserted - pInfo->rowsDeleteSucceed;
ASSERT(mergeRows > 0);
if (pBlock->numOfSubBlocks < TSDB_MAX_SUBBLOCKS && pInfo->nOperations <= pCfg->maxRowsPerFileBlock) {
if (pBlock->last) {
if (pCommith->isLFileSame && mergeRows < pCfg->minRowsPerFileBlock) return true;
} else {
if (pCommith->isDFileSame && mergeRows <= pCfg->maxRowsPerFileBlock) return true;
}
}
return false;
}
int tsdbApplyRtn(STsdbRepo *pRepo) {
SRtn rtn;
SFSIter fsiter;
STsdbFS * pfs = REPO_FS(pRepo);
SDFileSet *pSet;
// Get retention snapshot
tsdbGetRtnSnap(pRepo, &rtn);
tsdbFSIterInit(&fsiter, pfs, TSDB_FS_ITER_FORWARD);
while ((pSet = tsdbFSIterNext(&fsiter))) {
if (pSet->fid < rtn.minFid) {
tsdbInfo("vgId:%d FSET %d at level %d disk id %d expires, remove it", REPO_ID(pRepo), pSet->fid,
TSDB_FSET_LEVEL(pSet), TSDB_FSET_ID(pSet));
continue;
}
if (tsdbApplyRtnOnFSet(pRepo, pSet, &rtn) < 0) {
return -1;
}
}
return 0;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// no test file errors here
#include "taosdef.h"
#include "tsdbint.h"
#include "ttimer.h"
#include "tthread.h"
#define IS_VALID_PRECISION(precision) \
(((precision) >= TSDB_TIME_PRECISION_MILLI) && ((precision) <= TSDB_TIME_PRECISION_NANO))
#define TSDB_DEFAULT_COMPRESSION TWO_STAGE_COMP
#define IS_VALID_COMPRESSION(compression) (((compression) >= NO_COMPRESSION) && ((compression) <= TWO_STAGE_COMP))
static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg);
static STsdbRepo *tsdbNewRepo(STsdbCfg *pCfg, STsdbAppH *pAppH);
static void tsdbFreeRepo(STsdbRepo *pRepo);
static void tsdbStartStream(STsdbRepo *pRepo);
static void tsdbStopStream(STsdbRepo *pRepo);
static int tsdbRestoreLastColumns(STsdbRepo *pRepo, STable *pTable, SReadH* pReadh);
static int tsdbRestoreLastRow(STsdbRepo *pRepo, STable *pTable, SReadH* pReadh, SBlockIdx *pIdx);
// Function declaration
int32_t tsdbCreateRepo(int repoid) {
char tsdbDir[TSDB_FILENAME_LEN] = "\0";
char dataDir[TSDB_FILENAME_LEN] = "\0";
tsdbGetRootDir(repoid, tsdbDir);
if (tfsMkdir(tsdbDir) < 0) {
goto _err;
}
tsdbGetDataDir(repoid, dataDir);
if (tfsMkdir(dataDir) < 0) {
goto _err;
}
// TODO: need to create current file with nothing in
return 0;
_err:
tsdbError("vgId:%d failed to create TSDB repository since %s", repoid, tstrerror(terrno));
return -1;
}
int32_t tsdbDropRepo(int repoid) {
char tsdbDir[TSDB_FILENAME_LEN] = "\0";
tsdbGetRootDir(repoid, tsdbDir);
return tfsRmdir(tsdbDir);
}
STsdbRepo *tsdbOpenRepo(STsdbCfg *pCfg, STsdbAppH *pAppH) {
STsdbRepo *pRepo;
STsdbCfg config = *pCfg;
terrno = TSDB_CODE_SUCCESS;
// Check and set default configurations
if (tsdbCheckAndSetDefaultCfg(&config) < 0) {
tsdbError("vgId:%d failed to open TSDB repository since %s", config.tsdbId, tstrerror(terrno));
return NULL;
}
// Create new TSDB object
if ((pRepo = tsdbNewRepo(&config, pAppH)) == NULL) {
tsdbError("vgId:%d failed to open TSDB repository while creating TSDB object since %s", config.tsdbId,
tstrerror(terrno));
return NULL;
}
// Open meta
if (tsdbOpenMeta(pRepo) < 0) {
tsdbError("vgId:%d failed to open TSDB repository while opening Meta since %s", config.tsdbId, tstrerror(terrno));
tsdbCloseRepo(pRepo, false);
return NULL;
}
if (tsdbOpenBufPool(pRepo) < 0) {
tsdbError("vgId:%d failed to open TSDB repository while opening buffer pool since %s", config.tsdbId,
tstrerror(terrno));
tsdbCloseRepo(pRepo, false);
return NULL;
}
if (tsdbOpenFS(pRepo) < 0) {
tsdbError("vgId:%d failed to open TSDB repository while opening FS since %s", config.tsdbId, tstrerror(terrno));
tsdbCloseRepo(pRepo, false);
return NULL;
}
// TODO: Restore information from data
if ((!(pRepo->state & TSDB_STATE_BAD_DATA)) && tsdbRestoreInfo(pRepo) < 0) {
tsdbError("vgId:%d failed to open TSDB repository while restore info since %s", config.tsdbId, tstrerror(terrno));
tsdbCloseRepo(pRepo, false);
return NULL;
}
pRepo->mergeBuf = NULL;
tsdbStartStream(pRepo);
tsdbDebug("vgId:%d, TSDB repository opened", REPO_ID(pRepo));
return pRepo;
}
// Note: all working thread and query thread must stopped when calling this function
int tsdbCloseRepo(STsdbRepo *repo, int toCommit) {
if (repo == NULL) return 0;
STsdbRepo *pRepo = repo;
int vgId = REPO_ID(pRepo);
terrno = TSDB_CODE_SUCCESS;
tsdbStopStream(pRepo);
if(pRepo->pthread){
taosDestoryThread(pRepo->pthread);
pRepo->pthread = NULL;
}
if (toCommit) {
tsdbSyncCommit(repo);
}
tsem_wait(&(pRepo->readyToCommit));
tsdbUnRefMemTable(pRepo, pRepo->mem);
tsdbUnRefMemTable(pRepo, pRepo->imem);
pRepo->mem = NULL;
pRepo->imem = NULL;
tsdbCloseFS(pRepo);
tsdbCloseBufPool(pRepo);
tsdbCloseMeta(pRepo);
tsdbFreeRepo(pRepo);
tsdbDebug("vgId:%d repository is closed", vgId);
if (terrno != TSDB_CODE_SUCCESS) {
return -1;
} else {
return 0;
}
}
STsdbCfg *tsdbGetCfg(const STsdbRepo *repo) {
ASSERT(repo != NULL);
return &((STsdbRepo *)repo)->config;
}
int tsdbLockRepo(STsdbRepo *pRepo) {
int code = pthread_mutex_lock(&pRepo->mutex);
if (code != 0) {
tsdbError("vgId:%d failed to lock tsdb since %s", REPO_ID(pRepo), strerror(errno));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
pRepo->repoLocked = true;
return 0;
}
int tsdbUnlockRepo(STsdbRepo *pRepo) {
ASSERT(IS_REPO_LOCKED(pRepo));
pRepo->repoLocked = false;
int code = pthread_mutex_unlock(&pRepo->mutex);
if (code != 0) {
tsdbError("vgId:%d failed to unlock tsdb since %s", REPO_ID(pRepo), strerror(errno));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
int tsdbCheckCommit(STsdbRepo *pRepo) {
ASSERT(pRepo->mem != NULL);
STsdbCfg *pCfg = &(pRepo->config);
STsdbBufBlock *pBufBlock = tsdbGetCurrBufBlock(pRepo);
ASSERT(pBufBlock != NULL);
if ((pRepo->mem->extraBuffList != NULL) ||
((listNEles(pRepo->mem->bufBlockList) >= pCfg->totalBlocks / 3) && (pBufBlock->remain < TSDB_BUFFER_RESERVE))) {
// trigger commit
if (tsdbAsyncCommit(pRepo) < 0) return -1;
}
return 0;
}
STsdbMeta *tsdbGetMeta(STsdbRepo *pRepo) { return pRepo->tsdbMeta; }
STsdbRepoInfo *tsdbGetStatus(STsdbRepo *pRepo) { return NULL; }
int tsdbGetState(STsdbRepo *repo) { return repo->state; }
int8_t tsdbGetCompactState(STsdbRepo *repo) { return (int8_t)(repo->compactState); }
void tsdbReportStat(void *repo, int64_t *totalPoints, int64_t *totalStorage, int64_t *compStorage) {
ASSERT(repo != NULL);
STsdbRepo *pRepo = repo;
*totalPoints = pRepo->stat.pointsWritten;
*totalStorage = pRepo->stat.totalStorage;
*compStorage = pRepo->stat.compStorage;
}
int32_t tsdbConfigRepo(STsdbRepo *repo, STsdbCfg *pCfg) {
// TODO: think about multithread cases
if (tsdbCheckAndSetDefaultCfg(pCfg) < 0) return -1;
STsdbCfg * pRCfg = &repo->config;
ASSERT(pRCfg->tsdbId == pCfg->tsdbId);
ASSERT(pRCfg->cacheBlockSize == pCfg->cacheBlockSize);
ASSERT(pRCfg->daysPerFile == pCfg->daysPerFile);
ASSERT(pRCfg->minRowsPerFileBlock == pCfg->minRowsPerFileBlock);
ASSERT(pRCfg->maxRowsPerFileBlock == pCfg->maxRowsPerFileBlock);
ASSERT(pRCfg->precision == pCfg->precision);
bool configChanged = false;
if (pRCfg->compression != pCfg->compression) {
configChanged = true;
}
if (pRCfg->keep != pCfg->keep) {
configChanged = true;
}
if (pRCfg->keep1 != pCfg->keep1) {
configChanged = true;
}
if (pRCfg->keep2 != pCfg->keep2) {
configChanged = true;
}
if (pRCfg->cacheLastRow != pCfg->cacheLastRow) {
configChanged = true;
}
if (pRCfg->totalBlocks != pCfg->totalBlocks) {
configChanged = true;
}
if (!configChanged) {
tsdbError("vgId:%d no config changed", REPO_ID(repo));
}
int code = pthread_mutex_lock(&repo->save_mutex);
if (code != 0) {
tsdbError("vgId:%d failed to lock tsdb save config mutex since %s", REPO_ID(repo), strerror(errno));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
STsdbCfg * pSaveCfg = &repo->save_config;
*pSaveCfg = repo->config;
pSaveCfg->compression = pCfg->compression;
pSaveCfg->keep = pCfg->keep;
pSaveCfg->keep1 = pCfg->keep1;
pSaveCfg->keep2 = pCfg->keep2;
pSaveCfg->cacheLastRow = pCfg->cacheLastRow;
pSaveCfg->totalBlocks = pCfg->totalBlocks;
tsdbInfo("vgId:%d old config: compression(%d), keep(%d,%d,%d), cacheLastRow(%d),totalBlocks(%d)",
REPO_ID(repo),
pRCfg->compression, pRCfg->keep, pRCfg->keep1,pRCfg->keep2,
pRCfg->cacheLastRow, pRCfg->totalBlocks);
tsdbInfo("vgId:%d new config: compression(%d), keep(%d,%d,%d), cacheLastRow(%d),totalBlocks(%d)",
REPO_ID(repo),
pSaveCfg->compression, pSaveCfg->keep,pSaveCfg->keep1, pSaveCfg->keep2,
pSaveCfg->cacheLastRow,pSaveCfg->totalBlocks);
repo->config_changed = true;
pthread_mutex_unlock(&repo->save_mutex);
// schedule a commit msg and wait for the new config applied
tsdbSyncCommitConfig(repo);
return 0;
#if 0
STsdbRepo *pRepo = (STsdbRepo *)repo;
STsdbCfg config = pRepo->config;
STsdbCfg * pRCfg = &pRepo->config;
if (tsdbCheckAndSetDefaultCfg(pCfg) < 0) return -1;
ASSERT(pRCfg->tsdbId == pCfg->tsdbId);
ASSERT(pRCfg->cacheBlockSize == pCfg->cacheBlockSize);
ASSERT(pRCfg->daysPerFile == pCfg->daysPerFile);
ASSERT(pRCfg->minRowsPerFileBlock == pCfg->minRowsPerFileBlock);
ASSERT(pRCfg->maxRowsPerFileBlock == pCfg->maxRowsPerFileBlock);
ASSERT(pRCfg->precision == pCfg->precision);
bool configChanged = false;
if (pRCfg->compression != pCfg->compression) {
tsdbAlterCompression(pRepo, pCfg->compression);
config.compression = pCfg->compression;
configChanged = true;
}
if (pRCfg->keep != pCfg->keep) {
if (tsdbAlterKeep(pRepo, pCfg->keep) < 0) {
tsdbError("vgId:%d failed to configure repo when alter keep since %s", REPO_ID(pRepo), tstrerror(terrno));
config.keep = pCfg->keep;
return -1;
}
configChanged = true;
}
if (pRCfg->totalBlocks != pCfg->totalBlocks) {
tsdbAlterCacheTotalBlocks(pRepo, pCfg->totalBlocks);
config.totalBlocks = pCfg->totalBlocks;
configChanged = true;
}
if (pRCfg->cacheLastRow != pCfg->cacheLastRow) {
config.cacheLastRow = pCfg->cacheLastRow;
configChanged = true;
}
if (configChanged) {
if (tsdbSaveConfig(pRepo->rootDir, &config) < 0) {
tsdbError("vgId:%d failed to configure repository while save config since %s", REPO_ID(pRepo), tstrerror(terrno));
return -1;
}
}
return 0;
#endif
}
uint32_t tsdbGetFileInfo(STsdbRepo *repo, char *name, uint32_t *index, uint32_t eindex, int64_t *size) {
// TODO
return 0;
#if 0
STsdbRepo *pRepo = (STsdbRepo *)repo;
// STsdbMeta *pMeta = pRepo->tsdbMeta;
STsdbFileH *pFileH = pRepo->tsdbFileH;
uint32_t magic = 0;
char * fname = NULL;
struct stat fState;
tsdbDebug("vgId:%d name:%s index:%d eindex:%d", pRepo->config.tsdbId, name, *index, eindex);
ASSERT(*index <= eindex);
if (name[0] == 0) { // get the file from index or after, but not larger than eindex
int fid = (*index) / TSDB_FILE_TYPE_MAX;
if (pFileH->nFGroups == 0 || fid > pFileH->pFGroup[pFileH->nFGroups - 1].fileId) {
if (*index <= TSDB_META_FILE_INDEX && TSDB_META_FILE_INDEX <= eindex) {
fname = tsdbGetMetaFileName(pRepo->rootDir);
*index = TSDB_META_FILE_INDEX;
magic = TSDB_META_FILE_MAGIC(pRepo->tsdbMeta);
sprintf(name, "tsdb/%s", TSDB_META_FILE_NAME);
} else {
return 0;
}
} else {
SFileGroup *pFGroup =
taosbsearch(&fid, pFileH->pFGroup, pFileH->nFGroups, sizeof(SFileGroup), keyFGroupCompFunc, TD_GE);
if (pFGroup->fileId == fid) {
SFile *pFile = &pFGroup->files[(*index) % TSDB_FILE_TYPE_MAX];
fname = strdup(TSDB_FILE_NAME(pFile));
magic = pFile->info.magic;
char *tfname = strdup(fname);
sprintf(name, "tsdb/%s/%s", TSDB_DATA_DIR_NAME, basename(tfname));
tfree(tfname);
} else {
if ((pFGroup->fileId + 1) * TSDB_FILE_TYPE_MAX - 1 < (int)eindex) {
SFile *pFile = &pFGroup->files[0];
fname = strdup(TSDB_FILE_NAME(pFile));
*index = pFGroup->fileId * TSDB_FILE_TYPE_MAX;
magic = pFile->info.magic;
char *tfname = strdup(fname);
sprintf(name, "tsdb/%s/%s", TSDB_DATA_DIR_NAME, basename(tfname));
tfree(tfname);
} else {
return 0;
}
}
}
} else { // get the named file at the specified index. If not there, return 0
fname = malloc(256);
sprintf(fname, "%s/vnode/vnode%d/%s", TFS_PRIMARY_PATH(), REPO_ID(pRepo), name);
if (access(fname, F_OK) != 0) {
tfree(fname);
return 0;
}
if (*index == TSDB_META_FILE_INDEX) { // get meta file
tsdbGetStoreInfo(fname, &magic, size);
} else {
char tfname[TSDB_FILENAME_LEN] = "\0";
sprintf(tfname, "vnode/vnode%d/tsdb/%s/%s", REPO_ID(pRepo), TSDB_DATA_DIR_NAME, basename(name));
tsdbGetFileInfoImpl(tfname, &magic, size);
}
tfree(fname);
return magic;
}
if (stat(fname, &fState) < 0) {
tfree(fname);
return 0;
}
*size = fState.st_size;
// magic = *size;
tfree(fname);
return magic;
#endif
}
void tsdbGetRootDir(int repoid, char dirName[]) {
snprintf(dirName, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb", repoid);
}
void tsdbGetDataDir(int repoid, char dirName[]) {
snprintf(dirName, TSDB_FILENAME_LEN, "vnode/vnode%d/tsdb/data", repoid);
}
static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg) {
// Check tsdbId
if (pCfg->tsdbId < 0) {
tsdbError("vgId:%d invalid vgroup ID", pCfg->tsdbId);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
// Check precision
if (pCfg->precision == -1) {
pCfg->precision = TSDB_DEFAULT_PRECISION;
} else {
if (!IS_VALID_PRECISION(pCfg->precision)) {
tsdbError("vgId:%d invalid precision configuration %d", pCfg->tsdbId, pCfg->precision);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
// Check compression
if (pCfg->compression == -1) {
pCfg->compression = TSDB_DEFAULT_COMPRESSION;
} else {
if (!IS_VALID_COMPRESSION(pCfg->compression)) {
tsdbError("vgId:%d invalid compression configuration %d", pCfg->tsdbId, pCfg->precision);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
// Check daysPerFile
if (pCfg->daysPerFile == -1) {
pCfg->daysPerFile = TSDB_DEFAULT_DAYS_PER_FILE;
} else {
if (pCfg->daysPerFile < TSDB_MIN_DAYS_PER_FILE || pCfg->daysPerFile > TSDB_MAX_DAYS_PER_FILE) {
tsdbError(
"vgId:%d invalid daysPerFile configuration! daysPerFile %d TSDB_MIN_DAYS_PER_FILE %d TSDB_MAX_DAYS_PER_FILE "
"%d",
pCfg->tsdbId, pCfg->daysPerFile, TSDB_MIN_DAYS_PER_FILE, TSDB_MAX_DAYS_PER_FILE);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
// Check minRowsPerFileBlock and maxRowsPerFileBlock
if (pCfg->minRowsPerFileBlock == -1) {
pCfg->minRowsPerFileBlock = TSDB_DEFAULT_MIN_ROW_FBLOCK;
} else {
if (pCfg->minRowsPerFileBlock < TSDB_MIN_MIN_ROW_FBLOCK || pCfg->minRowsPerFileBlock > TSDB_MAX_MIN_ROW_FBLOCK) {
tsdbError(
"vgId:%d invalid minRowsPerFileBlock configuration! minRowsPerFileBlock %d TSDB_MIN_MIN_ROW_FBLOCK %d "
"TSDB_MAX_MIN_ROW_FBLOCK %d",
pCfg->tsdbId, pCfg->minRowsPerFileBlock, TSDB_MIN_MIN_ROW_FBLOCK, TSDB_MAX_MIN_ROW_FBLOCK);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
if (pCfg->maxRowsPerFileBlock == -1) {
pCfg->maxRowsPerFileBlock = TSDB_DEFAULT_MAX_ROW_FBLOCK;
} else {
if (pCfg->maxRowsPerFileBlock < TSDB_MIN_MAX_ROW_FBLOCK || pCfg->maxRowsPerFileBlock > TSDB_MAX_MAX_ROW_FBLOCK) {
tsdbError(
"vgId:%d invalid maxRowsPerFileBlock configuration! maxRowsPerFileBlock %d TSDB_MIN_MAX_ROW_FBLOCK %d "
"TSDB_MAX_MAX_ROW_FBLOCK %d",
pCfg->tsdbId, pCfg->maxRowsPerFileBlock, TSDB_MIN_MIN_ROW_FBLOCK, TSDB_MAX_MIN_ROW_FBLOCK);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
if (pCfg->minRowsPerFileBlock > pCfg->maxRowsPerFileBlock) {
tsdbError("vgId:%d invalid configuration! minRowsPerFileBlock %d maxRowsPerFileBlock %d", pCfg->tsdbId,
pCfg->minRowsPerFileBlock, pCfg->maxRowsPerFileBlock);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
// Check keep
if (pCfg->keep == -1) {
pCfg->keep = TSDB_DEFAULT_KEEP;
} else {
if (pCfg->keep < TSDB_MIN_KEEP || pCfg->keep > TSDB_MAX_KEEP) {
tsdbError(
"vgId:%d invalid keep configuration! keep %d TSDB_MIN_KEEP %d "
"TSDB_MAX_KEEP %d",
pCfg->tsdbId, pCfg->keep, TSDB_MIN_KEEP, TSDB_MAX_KEEP);
terrno = TSDB_CODE_TDB_INVALID_CONFIG;
return -1;
}
}
if (pCfg->keep1 == 0) {
pCfg->keep1 = pCfg->keep;
}
if (pCfg->keep2 == 0) {
pCfg->keep2 = pCfg->keep;
}
// update check
if (pCfg->update < TD_ROW_DISCARD_UPDATE || pCfg->update > TD_ROW_PARTIAL_UPDATE)
pCfg->update = TD_ROW_DISCARD_UPDATE;
// update cacheLastRow
if (pCfg->cacheLastRow != 0) {
if (pCfg->cacheLastRow > 3)
pCfg->cacheLastRow = 1;
}
return 0;
}
static STsdbRepo *tsdbNewRepo(STsdbCfg *pCfg, STsdbAppH *pAppH) {
STsdbRepo *pRepo = (STsdbRepo *)calloc(1, sizeof(*pRepo));
if (pRepo == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
pRepo->state = TSDB_STATE_OK;
pRepo->code = TSDB_CODE_SUCCESS;
pRepo->compactState = 0;
pRepo->config = *pCfg;
if (pAppH) {
pRepo->appH = *pAppH;
}
pRepo->repoLocked = false;
pRepo->pthread = NULL;
int code = pthread_mutex_init(&(pRepo->mutex), NULL);
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
tsdbFreeRepo(pRepo);
return NULL;
}
code = pthread_mutex_init(&(pRepo->save_mutex), NULL);
if (code != 0) {
terrno = TAOS_SYSTEM_ERROR(code);
tsdbFreeRepo(pRepo);
return NULL;
}
pRepo->config_changed = false;
atomic_store_8(&pRepo->hasCachedLastColumn, 0);
code = tsem_init(&(pRepo->readyToCommit), 0, 1);
if (code != 0) {
code = errno;
terrno = TAOS_SYSTEM_ERROR(code);
tsdbFreeRepo(pRepo);
return NULL;
}
pRepo->tsdbMeta = tsdbNewMeta(pCfg);
if (pRepo->tsdbMeta == NULL) {
tsdbError("vgId:%d failed to create meta since %s", REPO_ID(pRepo), tstrerror(terrno));
tsdbFreeRepo(pRepo);
return NULL;
}
pRepo->pPool = tsdbNewBufPool(pCfg);
if (pRepo->pPool == NULL) {
tsdbError("vgId:%d failed to create buffer pool since %s", REPO_ID(pRepo), tstrerror(terrno));
tsdbFreeRepo(pRepo);
return NULL;
}
pRepo->fs = tsdbNewFS(pCfg);
if (pRepo->fs == NULL) {
tsdbError("vgId:%d failed to TSDB file system since %s", REPO_ID(pRepo), tstrerror(terrno));
tsdbFreeRepo(pRepo);
return NULL;
}
return pRepo;
}
static void tsdbFreeRepo(STsdbRepo *pRepo) {
if (pRepo) {
tsdbFreeFS(pRepo->fs);
tsdbFreeBufPool(pRepo->pPool);
tsdbFreeMeta(pRepo->tsdbMeta);
tsdbFreeMergeBuf(pRepo->mergeBuf);
// tsdbFreeMemTable(pRepo->mem);
// tsdbFreeMemTable(pRepo->imem);
tsem_destroy(&(pRepo->readyToCommit));
pthread_mutex_destroy(&pRepo->mutex);
free(pRepo);
}
}
static void tsdbStartStream(STsdbRepo *pRepo) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
for (int i = 0; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable && pTable->type == TSDB_STREAM_TABLE) {
pTable->cqhandle = (*pRepo->appH.cqCreateFunc)(pRepo->appH.cqH, TABLE_UID(pTable), TABLE_TID(pTable), TABLE_NAME(pTable)->data, pTable->sql,
tsdbGetTableSchemaImpl(pTable, false, false, -1), 0);
}
}
}
static void tsdbStopStream(STsdbRepo *pRepo) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
for (int i = 0; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable && pTable->type == TSDB_STREAM_TABLE) {
(*pRepo->appH.cqDropFunc)(pTable->cqhandle);
}
}
}
static int tsdbRestoreLastColumns(STsdbRepo *pRepo, STable *pTable, SReadH* pReadh) {
//tsdbInfo("tsdbRestoreLastColumns of table %s", pTable->name->data);
STSchema *pSchema = tsdbGetTableLatestSchema(pTable);
if (pSchema == NULL) {
tsdbError("tsdbGetTableLatestSchema of table %s fail", pTable->name->data);
return 0;
}
SBlock* pBlock;
int numColumns;
int32_t blockIdx;
SDataStatis* pBlockStatis = NULL;
SMemRow row = NULL;
// restore last column data with last schema
int err = 0;
numColumns = schemaNCols(pSchema);
if (numColumns <= pTable->restoreColumnNum) {
pTable->hasRestoreLastColumn = true;
return 0;
}
if (pTable->lastColSVersion != schemaVersion(pSchema)) {
if (tsdbInitColIdCacheWithSchema(pTable, pSchema) < 0) {
return -1;
}
}
row = taosTMalloc(memRowMaxBytesFromSchema(pSchema));
if (row == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
err = -1;
goto out;
}
memRowSetType(row, SMEM_ROW_DATA);
tdInitDataRow(memRowDataBody(row), pSchema);
// first load block index info
if (tsdbLoadBlockInfo(pReadh, NULL) < 0) {
err = -1;
goto out;
}
pBlockStatis = calloc(numColumns, sizeof(SDataStatis));
if (pBlockStatis == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
err = -1;
goto out;
}
memset(pBlockStatis, 0, numColumns * sizeof(SDataStatis));
for(int32_t i = 0; i < numColumns; ++i) {
STColumn *pCol = schemaColAt(pSchema, i);
pBlockStatis[i].colId = pCol->colId;
}
// load block from backward
SBlockIdx *pIdx = pReadh->pBlkIdx;
blockIdx = (int32_t)(pIdx->numOfBlocks - 1);
while (numColumns > pTable->restoreColumnNum && blockIdx >= 0) {
bool loadStatisData = false;
pBlock = pReadh->pBlkInfo->blocks + blockIdx;
blockIdx -= 1;
// load block data
if (tsdbLoadBlockData(pReadh, pBlock, NULL) < 0) {
err = -1;
goto out;
}
// file block with sub-blocks has no statistics data
if (pBlock->numOfSubBlocks <= 1) {
tsdbLoadBlockStatis(pReadh, pBlock);
tsdbGetBlockStatis(pReadh, pBlockStatis, (int)numColumns);
loadStatisData = true;
}
for (int16_t i = 0; i < numColumns && numColumns > pTable->restoreColumnNum; ++i) {
STColumn *pCol = schemaColAt(pSchema, i);
// ignore loaded columns
if (pTable->lastCols[i].bytes != 0) {
continue;
}
// ignore block which has no not-null colId column
if (loadStatisData && pBlockStatis[i].numOfNull == pBlock->numOfRows) {
continue;
}
// OK,let's load row from backward to get not-null column
for (int32_t rowId = pBlock->numOfRows - 1; rowId >= 0; rowId--) {
SDataCol *pDataCol = pReadh->pDCols[0]->cols + i;
const void* pColData = tdGetColDataOfRow(pDataCol, rowId);
tdAppendColVal(memRowDataBody(row), pColData, pCol->type, pCol->offset);
//SDataCol *pDataCol = readh.pDCols[0]->cols + j;
void *value = tdGetRowDataOfCol(memRowDataBody(row), (int8_t)pCol->type, TD_DATA_ROW_HEAD_SIZE + pCol->offset);
if (isNull(value, pCol->type)) {
continue;
}
int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pCol->colId);
if (idx == -1) {
tsdbError("tsdbRestoreLastColumns restore vgId:%d,table:%s cache column %d fail", REPO_ID(pRepo), pTable->name->data, pCol->colId);
continue;
}
// save not-null column
uint16_t bytes = IS_VAR_DATA_TYPE(pCol->type) ? varDataTLen(pColData) : pCol->bytes;
SDataCol *pLastCol = &(pTable->lastCols[idx]);
pLastCol->pData = malloc(bytes);
pLastCol->bytes = bytes;
pLastCol->colId = pCol->colId;
memcpy(pLastCol->pData, value, bytes);
// save row ts(in column 0)
pDataCol = pReadh->pDCols[0]->cols + 0;
pCol = schemaColAt(pSchema, 0);
tdAppendColVal(memRowDataBody(row), tdGetColDataOfRow(pDataCol, rowId), pCol->type, pCol->offset);
pLastCol->ts = memRowKey(row);
pTable->restoreColumnNum += 1;
tsdbDebug("tsdbRestoreLastColumns restore vgId:%d,table:%s cache column %d, %" PRId64, REPO_ID(pRepo), pTable->name->data, pLastCol->colId, pLastCol->ts);
break;
}
}
}
out:
taosTZfree(row);
tfree(pBlockStatis);
if (err == 0 && numColumns <= pTable->restoreColumnNum) {
pTable->hasRestoreLastColumn = true;
}
return err;
}
static int tsdbRestoreLastRow(STsdbRepo *pRepo, STable *pTable, SReadH* pReadh, SBlockIdx *pIdx) {
ASSERT(pTable->lastRow == NULL);
if (tsdbLoadBlockInfo(pReadh, NULL) < 0) {
return -1;
}
SBlock* pBlock = pReadh->pBlkInfo->blocks + pIdx->numOfBlocks - 1;
if (tsdbLoadBlockData(pReadh, pBlock, NULL) < 0) {
return -1;
}
// Get the data in row
STSchema *pSchema = tsdbGetTableSchema(pTable);
pTable->lastRow = taosTMalloc(memRowMaxBytesFromSchema(pSchema));
if (pTable->lastRow == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
memRowSetType(pTable->lastRow, SMEM_ROW_DATA);
tdInitDataRow(memRowDataBody(pTable->lastRow), pSchema);
for (int icol = 0; icol < schemaNCols(pSchema); icol++) {
STColumn *pCol = schemaColAt(pSchema, icol);
SDataCol *pDataCol = pReadh->pDCols[0]->cols + icol;
tdAppendColVal(memRowDataBody(pTable->lastRow), tdGetColDataOfRow(pDataCol, pBlock->numOfRows - 1), pCol->type,
pCol->offset);
}
return 0;
}
int tsdbRestoreInfo(STsdbRepo *pRepo) {
SFSIter fsiter;
SReadH readh;
SDFileSet *pSet;
STsdbMeta *pMeta = pRepo->tsdbMeta;
STsdbCfg * pCfg = REPO_CFG(pRepo);
if (tsdbInitReadH(&readh, pRepo) < 0) {
return -1;
}
tsdbFSIterInit(&fsiter, REPO_FS(pRepo), TSDB_FS_ITER_BACKWARD);
if (CACHE_LAST_NULL_COLUMN(pCfg)) {
for (int i = 1; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
pTable->restoreColumnNum = 0;
pTable->hasRestoreLastColumn = false;
}
}
while ((pSet = tsdbFSIterNext(&fsiter)) != NULL) {
if (tsdbSetAndOpenReadFSet(&readh, pSet) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
if (tsdbLoadBlockIdx(&readh) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
for (int i = 1; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
//tsdbInfo("tsdbRestoreInfo restore vgId:%d,table:%s", REPO_ID(pRepo), pTable->name->data);
if (tsdbSetReadTable(&readh, pTable) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
TSKEY lastKey = tsdbGetTableLastKeyImpl(pTable);
SBlockIdx *pIdx = readh.pBlkIdx;
if (pIdx && lastKey < pIdx->maxKey) {
pTable->lastKey = pIdx->maxKey;
if (CACHE_LAST_ROW(pCfg) && tsdbRestoreLastRow(pRepo, pTable, &readh, pIdx) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
}
// restore NULL columns
if (pIdx && CACHE_LAST_NULL_COLUMN(pCfg) && !pTable->hasRestoreLastColumn) {
if (tsdbRestoreLastColumns(pRepo, pTable, &readh) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
}
}
}
tsdbDestroyReadH(&readh);
if (CACHE_LAST_NULL_COLUMN(pCfg)) {
atomic_store_8(&pRepo->hasCachedLastColumn, 1);
}
return 0;
}
int tsdbCacheLastData(STsdbRepo *pRepo, STsdbCfg* oldCfg) {
bool cacheLastRow = false, cacheLastCol = false;
SFSIter fsiter;
SReadH readh;
SDFileSet *pSet;
STsdbMeta *pMeta = pRepo->tsdbMeta;
int tableNum = 0;
int maxTableIdx = 0;
int cacheLastRowTableNum = 0;
int cacheLastColTableNum = 0;
bool need_free_last_row = CACHE_LAST_ROW(oldCfg) && !CACHE_LAST_ROW(&(pRepo->config));
bool need_free_last_col = CACHE_LAST_NULL_COLUMN(oldCfg) && !CACHE_LAST_NULL_COLUMN(&(pRepo->config));
if (CACHE_LAST_ROW(&(pRepo->config)) || CACHE_LAST_NULL_COLUMN(&(pRepo->config))) {
tsdbInfo("tsdbCacheLastData cache last data since cacheLast option changed");
cacheLastRow = !CACHE_LAST_ROW(oldCfg) && CACHE_LAST_ROW(&(pRepo->config));
cacheLastCol = !CACHE_LAST_NULL_COLUMN(oldCfg) && CACHE_LAST_NULL_COLUMN(&(pRepo->config));
}
// calc max table idx and table num
for (int i = 1; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
tableNum += 1;
maxTableIdx = i;
if (cacheLastCol) {
pTable->restoreColumnNum = 0;
pTable->hasRestoreLastColumn = false;
}
}
// if close last option,need to free data
if (need_free_last_row || need_free_last_col) {
if (need_free_last_col) {
atomic_store_8(&pRepo->hasCachedLastColumn, 0);
}
tsdbInfo("free cache last data since cacheLast option changed");
for (int i = 1; i <= maxTableIdx; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
if (need_free_last_row) {
taosTZfree(pTable->lastRow);
pTable->lastRow = NULL;
}
if (need_free_last_col) {
tsdbFreeLastColumns(pTable);
pTable->hasRestoreLastColumn = false;
}
}
}
if (!cacheLastRow && !cacheLastCol) {
return 0;
}
cacheLastRowTableNum = cacheLastRow ? tableNum : 0;
cacheLastColTableNum = cacheLastCol ? tableNum : 0;
if (tsdbInitReadH(&readh, pRepo) < 0) {
return -1;
}
tsdbFSIterInit(&fsiter, REPO_FS(pRepo), TSDB_FS_ITER_BACKWARD);
while ((pSet = tsdbFSIterNext(&fsiter)) != NULL && (cacheLastRowTableNum > 0 || cacheLastColTableNum > 0)) {
if (tsdbSetAndOpenReadFSet(&readh, pSet) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
if (tsdbLoadBlockIdx(&readh) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
for (int i = 1; i <= maxTableIdx; i++) {
STable *pTable = pMeta->tables[i];
if (pTable == NULL) continue;
//tsdbInfo("tsdbRestoreInfo restore vgId:%d,table:%s", REPO_ID(pRepo), pTable->name->data);
if (tsdbSetReadTable(&readh, pTable) < 0) {
tsdbDestroyReadH(&readh);
return -1;
}
SBlockIdx *pIdx = readh.pBlkIdx;
if (pIdx && cacheLastRowTableNum > 0 && pTable->lastRow == NULL) {
pTable->lastKey = pIdx->maxKey;
if (tsdbRestoreLastRow(pRepo, pTable, &readh, pIdx) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
cacheLastRowTableNum -= 1;
}
// restore NULL columns
if (pIdx && cacheLastColTableNum > 0 && !pTable->hasRestoreLastColumn) {
if (tsdbRestoreLastColumns(pRepo, pTable, &readh) != 0) {
tsdbDestroyReadH(&readh);
return -1;
}
if (pTable->hasRestoreLastColumn) {
cacheLastColTableNum -= 1;
}
}
}
}
tsdbDestroyReadH(&readh);
if (cacheLastCol) {
atomic_store_8(&pRepo->hasCachedLastColumn, 1);
}
return 0;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbint.h"
#define TSDB_SUPER_TABLE_SL_LEVEL 5
#define DEFAULT_TAG_INDEX_COLUMN 0
static char * getTagIndexKey(const void *pData);
static STable *tsdbNewTable();
static STable *tsdbCreateTableFromCfg(STableCfg *pCfg, bool isSuper, STable *pSTable);
static void tsdbFreeTable(STable *pTable);
static int tsdbAddTableToMeta(STsdbRepo *pRepo, STable *pTable, bool addIdx, bool lock);
static void tsdbRemoveTableFromMeta(STsdbRepo *pRepo, STable *pTable, bool rmFromIdx, bool lock);
static int tsdbAddTableIntoIndex(STsdbMeta *pMeta, STable *pTable, bool refSuper);
static int tsdbRemoveTableFromIndex(STsdbMeta *pMeta, STable *pTable);
static int tsdbInitTableCfg(STableCfg *config, ETableType type, uint64_t uid, int32_t tid);
static int tsdbTableSetSchema(STableCfg *config, STSchema *pSchema, bool dup);
static int tsdbTableSetName(STableCfg *config, char *name, bool dup);
static int tsdbTableSetTagSchema(STableCfg *config, STSchema *pSchema, bool dup);
static int tsdbTableSetSName(STableCfg *config, char *sname, bool dup);
static int tsdbTableSetSuperUid(STableCfg *config, uint64_t uid);
static int tsdbTableSetTagValue(STableCfg *config, SKVRow row, bool dup);
static int tsdbTableSetStreamSql(STableCfg *config, char *sql, bool dup);
static int tsdbEncodeTableName(void **buf, tstr *name);
static void * tsdbDecodeTableName(void *buf, tstr **name);
static int tsdbEncodeTable(void **buf, STable *pTable);
static void * tsdbDecodeTable(void *buf, STable **pRTable);
static int tsdbGetTableEncodeSize(int8_t act, STable *pTable);
static void * tsdbInsertTableAct(STsdbRepo *pRepo, int8_t act, void *buf, STable *pTable);
static int tsdbRemoveTableFromStore(STsdbRepo *pRepo, STable *pTable);
static int tsdbRmTableFromMeta(STsdbRepo *pRepo, STable *pTable);
static int tsdbAdjustMetaTables(STsdbRepo *pRepo, int tid);
static int tsdbCheckTableTagVal(SKVRow *pKVRow, STSchema *pSchema);
static int tsdbInsertNewTableAction(STsdbRepo *pRepo, STable* pTable);
static int tsdbAddSchema(STable *pTable, STSchema *pSchema);
static void tsdbFreeTableSchema(STable *pTable);
// ------------------ OUTER FUNCTIONS ------------------
int tsdbCreateTable(STsdbRepo *repo, STableCfg *pCfg) {
STsdbRepo *pRepo = (STsdbRepo *)repo;
STsdbMeta *pMeta = pRepo->tsdbMeta;
STable * super = NULL;
STable * table = NULL;
bool newSuper = false;
bool superChanged = false;
int tid = pCfg->tableId.tid;
STable * pTable = NULL;
if (tid < 1 || tid > TSDB_MAX_TABLES) {
tsdbError("vgId:%d failed to create table since invalid tid %d", REPO_ID(pRepo), tid);
terrno = TSDB_CODE_TDB_IVD_CREATE_TABLE_INFO;
goto _err;
}
if (tid < pMeta->maxTables && pMeta->tables[tid] != NULL) {
if (TABLE_UID(pMeta->tables[tid]) == pCfg->tableId.uid) {
tsdbError("vgId:%d table %s already exists, tid %d uid %" PRId64, REPO_ID(pRepo),
TABLE_CHAR_NAME(pMeta->tables[tid]), TABLE_TID(pMeta->tables[tid]), TABLE_UID(pMeta->tables[tid]));
return 0;
} else {
tsdbInfo("vgId:%d table %s at tid %d uid %" PRIu64
" exists, replace it with new table, this can be not reasonable",
REPO_ID(pRepo), TABLE_CHAR_NAME(pMeta->tables[tid]), TABLE_TID(pMeta->tables[tid]),
TABLE_UID(pMeta->tables[tid]));
tsdbDropTable(pRepo, pMeta->tables[tid]->tableId);
}
}
pTable = tsdbGetTableByUid(pMeta, pCfg->tableId.uid);
if (pTable != NULL) {
tsdbError("vgId:%d table %s already exists, tid %d uid %" PRId64, REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
TABLE_TID(pTable), TABLE_UID(pTable));
terrno = TSDB_CODE_TDB_TABLE_ALREADY_EXIST;
goto _err;
}
if (pCfg->type == TSDB_CHILD_TABLE) {
super = tsdbGetTableByUid(pMeta, pCfg->superUid);
if (super == NULL) { // super table not exists, try to create it
newSuper = true;
super = tsdbCreateTableFromCfg(pCfg, true, NULL);
if (super == NULL) goto _err;
} else {
if (TABLE_TYPE(super) != TSDB_SUPER_TABLE || TABLE_UID(super) != pCfg->superUid) {
terrno = TSDB_CODE_TDB_IVD_CREATE_TABLE_INFO;
goto _err;
}
if (schemaVersion(pCfg->tagSchema) > schemaVersion(super->tagSchema)) {
// tag schema out of date, need to update super table tag version
STSchema *pOldSchema = super->tagSchema;
TSDB_WLOCK_TABLE(super);
super->tagSchema = tdDupSchema(pCfg->tagSchema);
TSDB_WUNLOCK_TABLE(super);
tdFreeSchema(pOldSchema);
superChanged = true;
}
}
}
table = tsdbCreateTableFromCfg(pCfg, false, super);
if (table == NULL) goto _err;
// Register to meta
tsdbWLockRepoMeta(pRepo);
if (newSuper) {
if (tsdbAddTableToMeta(pRepo, super, true, false) < 0) {
tsdbUnlockRepoMeta(pRepo);
goto _err;
}
}
if (tsdbAddTableToMeta(pRepo, table, true, false) < 0) {
tsdbUnlockRepoMeta(pRepo);
goto _err;
}
tsdbUnlockRepoMeta(pRepo);
// Write to memtable action
if (newSuper || superChanged) {
// add insert new super table action
if (tsdbInsertNewTableAction(pRepo, super) != 0) {
goto _err;
}
}
// add insert new table action
if (tsdbInsertNewTableAction(pRepo, table) != 0) {
goto _err;
}
if (tsdbCheckCommit(pRepo) < 0) return -1;
return 0;
_err:
if (newSuper) {
tsdbFreeTable(super);
}
tsdbFreeTable(table);
return -1;
}
int tsdbDropTable(STsdbRepo *repo, STableId tableId) {
STsdbRepo *pRepo = (STsdbRepo *)repo;
STsdbMeta *pMeta = pRepo->tsdbMeta;
uint64_t uid = tableId.uid;
int tid = 0;
char * tbname = NULL;
STable *pTable = tsdbGetTableByUid(pMeta, uid);
if (pTable == NULL) {
tsdbError("vgId:%d failed to drop table since table not exists! tid:%d uid %" PRIu64, REPO_ID(pRepo), tableId.tid,
uid);
terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
return -1;
}
tsdbDebug("vgId:%d try to drop table %s type %d", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TYPE(pTable));
tid = TABLE_TID(pTable);
tbname = strdup(TABLE_CHAR_NAME(pTable));
if (tbname == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
// Write to KV store first
if (tsdbRemoveTableFromStore(pRepo, pTable) < 0) {
tsdbError("vgId:%d failed to drop table %s since %s", REPO_ID(pRepo), tbname, tstrerror(terrno));
goto _err;
}
// Remove table from Meta
if (tsdbRmTableFromMeta(pRepo, pTable) < 0) {
tsdbError("vgId:%d failed to drop table %s since %s", REPO_ID(pRepo), tbname, tstrerror(terrno));
goto _err;
}
tsdbDebug("vgId:%d, table %s is dropped! tid:%d, uid:%" PRId64, pRepo->config.tsdbId, tbname, tid, uid);
free(tbname);
if (tsdbCheckCommit(pRepo) < 0) goto _err;
return 0;
_err:
tfree(tbname);
return -1;
}
void *tsdbGetTableTagVal(const void* pTable, int32_t colId, int16_t type, int16_t bytes) {
// TODO: this function should be changed also
STSchema *pSchema = tsdbGetTableTagSchema((STable*) pTable);
STColumn *pCol = tdGetColOfID(pSchema, colId);
if (pCol == NULL) {
return NULL; // No matched tag volumn
}
char *val = tdGetKVRowValOfCol(((STable*)pTable)->tagVal, colId);
assert(type == pCol->type && bytes >= pCol->bytes);
// if (val != NULL && IS_VAR_DATA_TYPE(type)) {
// assert(varDataLen(val) < pCol->bytes);
// }
return val;
}
char *tsdbGetTableName(void* pTable) {
// TODO: need to change as thread-safe
if (pTable == NULL) {
return NULL;
} else {
return (char*) (((STable *)pTable)->name);
}
}
STableCfg *tsdbCreateTableCfgFromMsg(SMDCreateTableMsg *pMsg) {
if (pMsg == NULL) return NULL;
SSchema *pSchema = (SSchema *)pMsg->data;
int16_t numOfCols = htons(pMsg->numOfColumns);
int16_t numOfTags = htons(pMsg->numOfTags);
STSchemaBuilder schemaBuilder = {0};
STableCfg *pCfg = (STableCfg *)calloc(1, sizeof(STableCfg));
if (pCfg == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
if (tsdbInitTableCfg(pCfg, pMsg->tableType, htobe64(pMsg->uid), htonl(pMsg->tid)) < 0) goto _err;
if (tdInitTSchemaBuilder(&schemaBuilder, htonl(pMsg->sversion)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
for (int i = 0; i < numOfCols; i++) {
if (tdAddColToSchema(&schemaBuilder, pSchema[i].type, htons(pSchema[i].colId), htons(pSchema[i].bytes)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
}
if (tsdbTableSetSchema(pCfg, tdGetSchemaFromBuilder(&schemaBuilder), false) < 0) goto _err;
if (tsdbTableSetName(pCfg, pMsg->tableFname, true) < 0) goto _err;
if (numOfTags > 0) {
// Decode tag schema
tdResetTSchemaBuilder(&schemaBuilder, htonl(pMsg->tversion));
for (int i = numOfCols; i < numOfCols + numOfTags; i++) {
if (tdAddColToSchema(&schemaBuilder, pSchema[i].type, htons(pSchema[i].colId), htons(pSchema[i].bytes)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
}
if (tsdbTableSetTagSchema(pCfg, tdGetSchemaFromBuilder(&schemaBuilder), false) < 0) goto _err;
if (tsdbTableSetSName(pCfg, pMsg->stableFname, true) < 0) goto _err;
if (tsdbTableSetSuperUid(pCfg, htobe64(pMsg->superTableUid)) < 0) goto _err;
int32_t tagDataLen = htonl(pMsg->tagDataLen);
if (tagDataLen) {
char *pTagData = pMsg->data + (numOfCols + numOfTags) * sizeof(SSchema);
tsdbTableSetTagValue(pCfg, pTagData, true);
}
}
if (pMsg->tableType == TSDB_STREAM_TABLE) {
char *sql = pMsg->data + (numOfCols + numOfTags) * sizeof(SSchema);
tsdbTableSetStreamSql(pCfg, sql, true);
}
tdDestroyTSchemaBuilder(&schemaBuilder);
return pCfg;
_err:
tdDestroyTSchemaBuilder(&schemaBuilder);
tsdbClearTableCfg(pCfg);
return NULL;
}
static UNUSED_FUNC int32_t colIdCompar(const void* left, const void* right) {
int16_t colId = *(int16_t*) left;
STColumn* p2 = (STColumn*) right;
if (colId == p2->colId) {
return 0;
}
return (colId < p2->colId)? -1:1;
}
int tsdbUpdateTableTagValue(STsdbRepo *repo, SUpdateTableTagValMsg *pMsg) {
STsdbRepo *pRepo = (STsdbRepo *)repo;
STsdbMeta *pMeta = pRepo->tsdbMeta;
STSchema * pNewSchema = NULL;
pMsg->uid = htobe64(pMsg->uid);
pMsg->tid = htonl(pMsg->tid);
pMsg->tversion = htons(pMsg->tversion);
pMsg->colId = htons(pMsg->colId);
pMsg->bytes = htons(pMsg->bytes);
pMsg->tagValLen = htonl(pMsg->tagValLen);
pMsg->numOfTags = htons(pMsg->numOfTags);
pMsg->schemaLen = htonl(pMsg->schemaLen);
for (int i = 0; i < pMsg->numOfTags; i++) {
STColumn *pTCol = (STColumn *)pMsg->data + i;
pTCol->bytes = htons(pTCol->bytes);
pTCol->colId = htons(pTCol->colId);
}
STable *pTable = tsdbGetTableByUid(pMeta, pMsg->uid);
if (pTable == NULL || TABLE_TID(pTable) != pMsg->tid) {
tsdbError("vgId:%d failed to update table tag value since invalid table id %d uid %" PRIu64, REPO_ID(pRepo),
pMsg->tid, pMsg->uid);
terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
return -1;
}
if (TABLE_TYPE(pTable) != TSDB_CHILD_TABLE) {
tsdbError("vgId:%d try to update tag value of a non-child table, invalid action", REPO_ID(pRepo));
terrno = TSDB_CODE_TDB_INVALID_ACTION;
return -1;
}
if (schemaVersion(pTable->pSuper->tagSchema) > pMsg->tversion) {
tsdbError(
"vgId:%d failed to update tag value of table %s since version out of date, client tag version %d server tag "
"version %d",
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), pMsg->tversion, schemaVersion(pTable->tagSchema));
terrno = TSDB_CODE_TDB_TAG_VER_OUT_OF_DATE;
return -1;
}
if (schemaVersion(pTable->pSuper->tagSchema) < pMsg->tversion) { // tag schema out of data,
tsdbDebug("vgId:%d need to update tag schema of table %s tid %d uid %" PRIu64
" since out of date, current version %d new version %d",
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable),
schemaVersion(pTable->pSuper->tagSchema), pMsg->tversion);
STSchemaBuilder schemaBuilder = {0};
STColumn *pTCol = (STColumn *)pMsg->data;
ASSERT(pMsg->schemaLen % sizeof(STColumn) == 0 && pTCol[0].colId == colColId(schemaColAt(pTable->pSuper->tagSchema, 0)));
if (tdInitTSchemaBuilder(&schemaBuilder, pMsg->tversion) < 0) {
tsdbDebug("vgId:%d failed to update tag schema of table %s tid %d uid %" PRIu64 " since out of memory",
REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_TID(pTable), TABLE_UID(pTable));
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
for (int i = 0; i < (pMsg->schemaLen / sizeof(STColumn)); i++) {
if (tdAddColToSchema(&schemaBuilder, pTCol[i].type, pTCol[i].colId, pTCol[i].bytes) < 0) {
tdDestroyTSchemaBuilder(&schemaBuilder);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
}
pNewSchema = tdGetSchemaFromBuilder(&schemaBuilder);
if (pNewSchema == NULL) {
tdDestroyTSchemaBuilder(&schemaBuilder);
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
tdDestroyTSchemaBuilder(&schemaBuilder);
}
// Change in memory
if (pNewSchema != NULL) { // change super table tag schema
TSDB_WLOCK_TABLE(pTable->pSuper);
STSchema *pOldSchema = pTable->pSuper->tagSchema;
pTable->pSuper->tagSchema = pNewSchema;
tdFreeSchema(pOldSchema);
TSDB_WUNLOCK_TABLE(pTable->pSuper);
}
bool isChangeIndexCol = (pMsg->colId == colColId(schemaColAt(pTable->pSuper->tagSchema, 0)));
// STColumn *pCol = bsearch(&(pMsg->colId), pMsg->data, pMsg->numOfTags, sizeof(STColumn), colIdCompar);
// ASSERT(pCol != NULL);
if (isChangeIndexCol) {
tsdbWLockRepoMeta(pRepo);
tsdbRemoveTableFromIndex(pMeta, pTable);
}
TSDB_WLOCK_TABLE(pTable);
tdSetKVRowDataOfCol(&(pTable->tagVal), pMsg->colId, pMsg->type, POINTER_SHIFT(pMsg->data, pMsg->schemaLen));
TSDB_WUNLOCK_TABLE(pTable);
if (isChangeIndexCol) {
tsdbAddTableIntoIndex(pMeta, pTable, false);
tsdbUnlockRepoMeta(pRepo);
}
// Update on file
int tlen1 = (pNewSchema) ? tsdbGetTableEncodeSize(TSDB_UPDATE_META, pTable->pSuper) : 0;
int tlen2 = tsdbGetTableEncodeSize(TSDB_UPDATE_META, pTable);
void *buf = tsdbAllocBytes(pRepo, tlen1+tlen2);
ASSERT(buf != NULL);
if (pNewSchema) {
void *pBuf = tsdbInsertTableAct(pRepo, TSDB_UPDATE_META, buf, pTable->pSuper);
ASSERT(POINTER_DISTANCE(pBuf, buf) == tlen1);
buf = pBuf;
}
tsdbInsertTableAct(pRepo, TSDB_UPDATE_META, buf, pTable);
if (tsdbCheckCommit(pRepo) < 0) return -1;
return 0;
}
// ------------------ INTERNAL FUNCTIONS ------------------
static int tsdbInsertNewTableAction(STsdbRepo *pRepo, STable* pTable) {
int tlen = 0;
void *pBuf = NULL;
tlen = tsdbGetTableEncodeSize(TSDB_UPDATE_META, pTable);
pBuf = tsdbAllocBytes(pRepo, tlen);
if (pBuf == NULL) {
return -1;
}
void *tBuf = tsdbInsertTableAct(pRepo, TSDB_UPDATE_META, pBuf, pTable);
ASSERT(POINTER_DISTANCE(tBuf, pBuf) == tlen);
return 0;
}
STsdbMeta *tsdbNewMeta(STsdbCfg *pCfg) {
STsdbMeta *pMeta = (STsdbMeta *)calloc(1, sizeof(*pMeta));
if (pMeta == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
int code = pthread_rwlock_init(&pMeta->rwLock, NULL);
if (code != 0) {
tsdbError("vgId:%d failed to init TSDB meta r/w lock since %s", pCfg->tsdbId, strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
goto _err;
}
pMeta->maxTables = TSDB_INIT_NTABLES + 1;
pMeta->tables = (STable **)calloc(pMeta->maxTables, sizeof(STable *));
if (pMeta->tables == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pMeta->superList = tdListNew(sizeof(STable *));
if (pMeta->superList == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pMeta->uidMap = taosHashInit((size_t)(TSDB_INIT_NTABLES * 1.1), taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, false);
if (pMeta->uidMap == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
return pMeta;
_err:
tsdbFreeMeta(pMeta);
return NULL;
}
void tsdbFreeMeta(STsdbMeta *pMeta) {
if (pMeta) {
taosHashCleanup(pMeta->uidMap);
tdListFree(pMeta->superList);
tfree(pMeta->tables);
pthread_rwlock_destroy(&pMeta->rwLock);
free(pMeta);
}
}
int tsdbOpenMeta(STsdbRepo *pRepo) {
return 0;
#if 0
char * fname = NULL;
STsdbMeta *pMeta = pRepo->tsdbMeta;
ASSERT(pMeta != NULL);
fname = tsdbGetMetaFileName(pRepo->rootDir);
if (fname == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
// pMeta->pStore = tdOpenKVStore(fname, tsdbRestoreTable, tsdbOrgMeta, (void *)pRepo);
// if (pMeta->pStore == NULL) {
// tsdbError("vgId:%d failed to open TSDB meta while open the kv store since %s", REPO_ID(pRepo), tstrerror(terrno));
// goto _err;
// }
tsdbDebug("vgId:%d open TSDB meta succeed", REPO_ID(pRepo));
tfree(fname);
return 0;
_err:
tfree(fname);
return -1;
#endif
}
int tsdbCloseMeta(STsdbRepo *pRepo) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
SListNode *pNode = NULL;
STable * pTable = NULL;
if (pMeta == NULL) return 0;
// tdCloseKVStore(pMeta->pStore);
for (int i = 1; i < pMeta->maxTables; i++) {
tsdbFreeTable(pMeta->tables[i]);
}
while ((pNode = tdListPopHead(pMeta->superList)) != NULL) {
tdListNodeGetData(pMeta->superList, pNode, (void *)(&pTable));
tsdbFreeTable(pTable);
listNodeFree(pNode);
}
tsdbDebug("vgId:%d TSDB meta is closed", REPO_ID(pRepo));
return 0;
}
STable *tsdbGetTableByUid(STsdbMeta *pMeta, uint64_t uid) {
void *ptr = taosHashGet(pMeta->uidMap, (char *)(&uid), sizeof(uid));
if (ptr == NULL) return NULL;
return *(STable **)ptr;
}
STSchema *tsdbGetTableSchemaByVersion(STable *pTable, int16_t _version) {
return tsdbGetTableSchemaImpl(pTable, true, false, _version);
}
int tsdbWLockRepoMeta(STsdbRepo *pRepo) {
int code = pthread_rwlock_wrlock(&(pRepo->tsdbMeta->rwLock));
if (code != 0) {
tsdbError("vgId:%d failed to write lock TSDB meta since %s", REPO_ID(pRepo), strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
int tsdbRLockRepoMeta(STsdbRepo *pRepo) {
int code = pthread_rwlock_rdlock(&(pRepo->tsdbMeta->rwLock));
if (code != 0) {
tsdbError("vgId:%d failed to read lock TSDB meta since %s", REPO_ID(pRepo), strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
int tsdbUnlockRepoMeta(STsdbRepo *pRepo) {
int code = pthread_rwlock_unlock(&(pRepo->tsdbMeta->rwLock));
if (code != 0) {
tsdbError("vgId:%d failed to unlock TSDB meta since %s", REPO_ID(pRepo), strerror(code));
terrno = TAOS_SYSTEM_ERROR(code);
return -1;
}
return 0;
}
void tsdbRefTable(STable *pTable) {
int32_t ref = T_REF_INC(pTable);
UNUSED(ref);
tsdbDebug("ref table %s uid %" PRIu64 " tid:%d, refCount:%d", TABLE_CHAR_NAME(pTable), TABLE_UID(pTable), TABLE_TID(pTable), ref);
}
void tsdbUnRefTable(STable *pTable) {
uint64_t uid = TABLE_UID(pTable);
int32_t tid = TABLE_TID(pTable);
int32_t ref = T_REF_DEC(pTable);
tsdbDebug("unref table, uid:%" PRIu64 " tid:%d, refCount:%d", uid, tid, ref);
if (ref == 0) {
if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) {
tsdbUnRefTable(pTable->pSuper);
}
tsdbFreeTable(pTable);
}
}
void tsdbFreeLastColumns(STable* pTable) {
if (pTable->lastCols == NULL) {
return;
}
for (int i = 0; i < pTable->maxColNum; ++i) {
if (pTable->lastCols[i].bytes == 0) {
continue;
}
tfree(pTable->lastCols[i].pData);
pTable->lastCols[i].bytes = 0;
pTable->lastCols[i].pData = NULL;
}
tfree(pTable->lastCols);
pTable->lastCols = NULL;
pTable->maxColNum = 0;
pTable->lastColSVersion = -1;
pTable->restoreColumnNum = 0;
pTable->hasRestoreLastColumn = false;
}
int16_t tsdbGetLastColumnsIndexByColId(STable* pTable, int16_t colId) {
if (pTable->lastCols == NULL) {
return -1;
}
// TODO: use binary search instead
for (int16_t i = 0; i < pTable->maxColNum; ++i) {
if (pTable->lastCols[i].colId == colId) {
return i;
}
}
return -1;
}
int tsdbInitColIdCacheWithSchema(STable* pTable, STSchema* pSchema) {
ASSERT(pTable->lastCols == NULL);
int16_t numOfColumn = pSchema->numOfCols;
pTable->lastCols = (SDataCol*)malloc(numOfColumn * sizeof(SDataCol));
if (pTable->lastCols == NULL) {
return -1;
}
for (int16_t i = 0; i < numOfColumn; ++i) {
STColumn *pCol = schemaColAt(pSchema, i);
SDataCol* pDataCol = &(pTable->lastCols[i]);
pDataCol->bytes = 0;
pDataCol->pData = NULL;
pDataCol->colId = pCol->colId;
}
pTable->lastColSVersion = schemaVersion(pSchema);
pTable->maxColNum = numOfColumn;
pTable->restoreColumnNum = 0;
pTable->hasRestoreLastColumn = false;
return 0;
}
STSchema* tsdbGetTableLatestSchema(STable *pTable) {
return tsdbGetTableSchemaByVersion(pTable, -1);
}
int tsdbUpdateLastColSchema(STable *pTable, STSchema *pNewSchema) {
if (pTable->lastColSVersion == schemaVersion(pNewSchema)) {
return 0;
}
tsdbDebug("tsdbUpdateLastColSchema:%s,%d->%d", pTable->name->data, pTable->lastColSVersion, schemaVersion(pNewSchema));
int16_t numOfCols = pNewSchema->numOfCols;
SDataCol *lastCols = (SDataCol*)malloc(numOfCols * sizeof(SDataCol));
if (lastCols == NULL) {
return -1;
}
TSDB_WLOCK_TABLE(pTable);
for (int16_t i = 0; i < numOfCols; ++i) {
STColumn *pCol = schemaColAt(pNewSchema, i);
int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pCol->colId);
SDataCol* pDataCol = &(lastCols[i]);
if (idx != -1) {
// move col data to new last column array
SDataCol* pOldDataCol = &(pTable->lastCols[idx]);
memcpy(pDataCol, pOldDataCol, sizeof(SDataCol));
} else {
// init new colid data
pDataCol->colId = pCol->colId;
pDataCol->bytes = 0;
pDataCol->pData = NULL;
}
}
SDataCol *oldLastCols = pTable->lastCols;
int16_t oldLastColNum = pTable->maxColNum;
pTable->lastColSVersion = schemaVersion(pNewSchema);
pTable->lastCols = lastCols;
pTable->maxColNum = numOfCols;
if (oldLastCols == NULL) {
TSDB_WUNLOCK_TABLE(pTable);
return 0;
}
// free old schema last column datas
for (int16_t i = 0; i < oldLastColNum; ++i) {
SDataCol* pDataCol = &(oldLastCols[i]);
if (pDataCol->bytes == 0) {
continue;
}
int16_t idx = tsdbGetLastColumnsIndexByColId(pTable, pDataCol->colId);
if (idx != -1) {
continue;
}
// free not exist column data
tfree(pDataCol->pData);
}
TSDB_WUNLOCK_TABLE(pTable);
tfree(oldLastCols);
return 0;
}
void tsdbUpdateTableSchema(STsdbRepo *pRepo, STable *pTable, STSchema *pSchema, bool insertAct) {
ASSERT(TABLE_TYPE(pTable) != TSDB_STREAM_TABLE && TABLE_TYPE(pTable) != TSDB_SUPER_TABLE);
STsdbMeta *pMeta = pRepo->tsdbMeta;
STable *pCTable = (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) ? pTable->pSuper : pTable;
ASSERT(schemaVersion(pSchema) > schemaVersion(*(STSchema **)taosArrayGetLast(pCTable->schema)));
TSDB_WLOCK_TABLE(pCTable);
tsdbAddSchema(pCTable, pSchema);
if (schemaNCols(pSchema) > pMeta->maxCols) pMeta->maxCols = schemaNCols(pSchema);
if (schemaTLen(pSchema) > pMeta->maxRowBytes) pMeta->maxRowBytes = schemaTLen(pSchema);
TSDB_WUNLOCK_TABLE(pCTable);
if (insertAct) {
if (tsdbInsertNewTableAction(pRepo, pCTable) != 0) {
tsdbError("vgId:%d table %s tid %d uid %" PRIu64 " tsdbInsertNewTableAction fail", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
TABLE_TID(pTable), TABLE_UID(pTable));
}
}
}
int tsdbRestoreTable(STsdbRepo *pRepo, void *cont, int contLen) {
STable *pTable = NULL;
if (!taosCheckChecksumWhole((uint8_t *)cont, contLen)) {
terrno = TSDB_CODE_TDB_FILE_CORRUPTED;
return -1;
}
tsdbDecodeTable(cont, &pTable);
if (tsdbAddTableToMeta(pRepo, pTable, false, false) < 0) {
tsdbFreeTable(pTable);
return -1;
}
tsdbTrace("vgId:%d table %s tid %d uid %" PRIu64 " is restored from file", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
TABLE_TID(pTable), TABLE_UID(pTable));
return 0;
}
void tsdbOrgMeta(STsdbRepo *pRepo) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
for (int i = 1; i < pMeta->maxTables; i++) {
STable *pTable = pMeta->tables[i];
if (pTable != NULL && pTable->type == TSDB_CHILD_TABLE) {
tsdbAddTableIntoIndex(pMeta, pTable, true);
}
}
}
// ------------------ LOCAL FUNCTIONS ------------------
static char *getTagIndexKey(const void *pData) {
STable *pTable = (STable *)pData;
STSchema *pSchema = tsdbGetTableTagSchema(pTable);
STColumn *pCol = schemaColAt(pSchema, DEFAULT_TAG_INDEX_COLUMN);
void * res = tdGetKVRowValOfCol(pTable->tagVal, pCol->colId);
if (res == NULL) {
// treat the column as NULL if we cannot find it
res = (char*)getNullValue(pCol->type);
}
return res;
}
static STable *tsdbNewTable() {
STable *pTable = (STable *)calloc(1, sizeof(*pTable));
if (pTable == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
pTable->lastKey = TSKEY_INITIAL_VAL;
pTable->lastCols = NULL;
pTable->restoreColumnNum = 0;
pTable->maxColNum = 0;
pTable->hasRestoreLastColumn = false;
pTable->lastColSVersion = -1;
return pTable;
}
static STable *tsdbCreateTableFromCfg(STableCfg *pCfg, bool isSuper, STable *pSTable) {
STable *pTable = NULL;
size_t tsize = 0;
pTable = tsdbNewTable();
if (pTable == NULL) goto _err;
if (isSuper) {
pTable->type = TSDB_SUPER_TABLE;
tsize = strnlen(pCfg->sname, TSDB_TABLE_NAME_LEN - 1);
pTable->name = calloc(1, tsize + VARSTR_HEADER_SIZE + 1);
if (pTable->name == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
STR_WITH_SIZE_TO_VARSTR(pTable->name, pCfg->sname, (VarDataLenT)tsize);
TABLE_UID(pTable) = pCfg->superUid;
TABLE_TID(pTable) = -1;
TABLE_SUID(pTable) = -1;
pTable->pSuper = NULL;
if (tsdbAddSchema(pTable, tdDupSchema(pCfg->schema)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pTable->tagSchema = tdDupSchema(pCfg->tagSchema);
if (pTable->tagSchema == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
pTable->tagVal = NULL;
STColumn *pCol = schemaColAt(pTable->tagSchema, DEFAULT_TAG_INDEX_COLUMN);
pTable->pIndex = tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, colType(pCol), (uint8_t)(colBytes(pCol)), NULL,
SL_ALLOW_DUP_KEY, getTagIndexKey);
if (pTable->pIndex == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
} else {
pTable->type = pCfg->type;
tsize = strnlen(pCfg->name, TSDB_TABLE_NAME_LEN - 1);
pTable->name = calloc(1, tsize + VARSTR_HEADER_SIZE + 1);
if (pTable->name == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
STR_WITH_SIZE_TO_VARSTR(pTable->name, pCfg->name, (VarDataLenT)tsize);
TABLE_UID(pTable) = pCfg->tableId.uid;
TABLE_TID(pTable) = pCfg->tableId.tid;
if (pCfg->type == TSDB_CHILD_TABLE) {
TABLE_SUID(pTable) = pCfg->superUid;
if (tsdbCheckTableTagVal(pCfg->tagValues, pSTable->tagSchema) < 0) {
goto _err;
}
pTable->tagVal = tdKVRowDup(pCfg->tagValues);
if (pTable->tagVal == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
} else {
TABLE_SUID(pTable) = -1;
if (tsdbAddSchema(pTable, tdDupSchema(pCfg->schema)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) {
pTable->sql = strdup(pCfg->sql);
if (pTable->sql == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
goto _err;
}
}
}
}
T_REF_INC(pTable);
tsdbDebug("table %s tid %d uid %" PRIu64 " is created", TABLE_CHAR_NAME(pTable), TABLE_TID(pTable),
TABLE_UID(pTable));
return pTable;
_err:
tsdbFreeTable(pTable);
return NULL;
}
static void tsdbFreeTable(STable *pTable) {
if (pTable) {
if (pTable->name != NULL)
tsdbTrace("table %s tid %d uid %" PRIu64 " is freed", TABLE_CHAR_NAME(pTable), TABLE_TID(pTable),
TABLE_UID(pTable));
tfree(TABLE_NAME(pTable));
if (TABLE_TYPE(pTable) != TSDB_CHILD_TABLE) {
tsdbFreeTableSchema(pTable);
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
tdFreeSchema(pTable->tagSchema);
}
}
kvRowFree(pTable->tagVal);
tSkipListDestroy(pTable->pIndex);
taosTZfree(pTable->lastRow);
tfree(pTable->sql);
tsdbFreeLastColumns(pTable);
free(pTable);
}
}
static int tsdbAddTableToMeta(STsdbRepo *pRepo, STable *pTable, bool addIdx, bool lock) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
if (lock && tsdbWLockRepoMeta(pRepo) < 0) {
tsdbError("vgId:%d failed to add table %s to meta since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
tstrerror(terrno));
return -1;
}
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
if (tdListAppend(pMeta->superList, (void *)(&pTable)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbError("vgId:%d failed to add table %s to meta since %s", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
tstrerror(terrno));
goto _err;
}
} else {
if (TABLE_TID(pTable) >= pMeta->maxTables) {
if (tsdbAdjustMetaTables(pRepo, TABLE_TID(pTable)) < 0) goto _err;
}
if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE && addIdx) { // add STABLE to the index
if (tsdbAddTableIntoIndex(pMeta, pTable, true) < 0) {
tsdbDebug("vgId:%d failed to add table %s to meta while add table to index since %s", REPO_ID(pRepo),
TABLE_CHAR_NAME(pTable), tstrerror(terrno));
goto _err;
}
}
ASSERT(TABLE_TID(pTable) < pMeta->maxTables);
pMeta->tables[TABLE_TID(pTable)] = pTable;
pMeta->nTables++;
}
if (taosHashPut(pMeta->uidMap, (char *)(&pTable->tableId.uid), sizeof(pTable->tableId.uid), (void *)(&pTable),
sizeof(pTable)) < 0) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbError("vgId:%d failed to add table %s to meta while put into uid map since %s", REPO_ID(pRepo),
TABLE_CHAR_NAME(pTable), tstrerror(terrno));
goto _err;
}
if (TABLE_TYPE(pTable) != TSDB_CHILD_TABLE) {
STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1);
if (schemaNCols(pSchema) > pMeta->maxCols) pMeta->maxCols = schemaNCols(pSchema);
if (schemaTLen(pSchema) > pMeta->maxRowBytes) pMeta->maxRowBytes = schemaTLen(pSchema);
}
if (lock && tsdbUnlockRepoMeta(pRepo) < 0) return -1;
if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE && addIdx) {
pTable->cqhandle = (*pRepo->appH.cqCreateFunc)(pRepo->appH.cqH, TABLE_UID(pTable), TABLE_TID(pTable), TABLE_NAME(pTable)->data, pTable->sql,
tsdbGetTableSchemaImpl(pTable, false, false, -1), 1);
}
tsdbDebug("vgId:%d table %s tid %d uid %" PRIu64 " is added to meta", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable),
TABLE_TID(pTable), TABLE_UID(pTable));
return 0;
_err:
tsdbRemoveTableFromMeta(pRepo, pTable, false, false);
if (lock) tsdbUnlockRepoMeta(pRepo);
return -1;
}
static void tsdbRemoveTableFromMeta(STsdbRepo *pRepo, STable *pTable, bool rmFromIdx, bool lock) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
SListIter lIter = {0};
SListNode *pNode = NULL;
STable * tTable = NULL;
STSchema *pSchema = tsdbGetTableSchemaImpl(pTable, false, false, -1);
int maxCols = schemaNCols(pSchema);
int maxRowBytes = schemaTLen(pSchema);
if (lock) tsdbWLockRepoMeta(pRepo);
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
tdListInitIter(pMeta->superList, &lIter, TD_LIST_BACKWARD);
while ((pNode = tdListNext(&lIter)) != NULL) {
tdListNodeGetData(pMeta->superList, pNode, (void *)(&tTable));
if (pTable == tTable) {
tdListPopNode(pMeta->superList, pNode);
free(pNode);
break;
}
}
} else {
pMeta->tables[pTable->tableId.tid] = NULL;
if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE && rmFromIdx) {
tsdbRemoveTableFromIndex(pMeta, pTable);
}
pMeta->nTables--;
}
taosHashRemove(pMeta->uidMap, (char *)(&(TABLE_UID(pTable))), sizeof(TABLE_UID(pTable)));
if (maxCols == pMeta->maxCols || maxRowBytes == pMeta->maxRowBytes) {
maxCols = 0;
maxRowBytes = 0;
for (int i = 0; i < pMeta->maxTables; i++) {
STable *_pTable = pMeta->tables[i];
if (_pTable != NULL) {
pSchema = tsdbGetTableSchemaImpl(_pTable, false, false, -1);
maxCols = MAX(maxCols, schemaNCols(pSchema));
maxRowBytes = MAX(maxRowBytes, schemaTLen(pSchema));
}
}
}
pMeta->maxCols = maxCols;
pMeta->maxRowBytes = maxRowBytes;
if (lock) tsdbUnlockRepoMeta(pRepo);
tsdbDebug("vgId:%d table %s uid %" PRIu64 " is removed from meta", REPO_ID(pRepo), TABLE_CHAR_NAME(pTable), TABLE_UID(pTable));
tsdbUnRefTable(pTable);
}
static int tsdbAddTableIntoIndex(STsdbMeta *pMeta, STable *pTable, bool refSuper) {
ASSERT(pTable->type == TSDB_CHILD_TABLE && pTable != NULL);
STable *pSTable = tsdbGetTableByUid(pMeta, TABLE_SUID(pTable));
ASSERT(pSTable != NULL);
pTable->pSuper = pSTable;
tSkipListPut(pSTable->pIndex, (void *)pTable);
if (refSuper) T_REF_INC(pSTable);
return 0;
}
static int tsdbRemoveTableFromIndex(STsdbMeta *pMeta, STable *pTable) {
ASSERT(pTable->type == TSDB_CHILD_TABLE && pTable != NULL);
STable *pSTable = pTable->pSuper;
ASSERT(pSTable != NULL);
char* key = getTagIndexKey(pTable);
SArray *res = tSkipListGet(pSTable->pIndex, key);
size_t size = taosArrayGetSize(res);
ASSERT(size > 0);
for (int32_t i = 0; i < size; ++i) {
SSkipListNode *pNode = taosArrayGetP(res, i);
// STableIndexElem* pElem = (STableIndexElem*) SL_GET_NODE_DATA(pNode);
if ((STable *)SL_GET_NODE_DATA(pNode) == pTable) { // this is the exact what we need
tSkipListRemoveNode(pSTable->pIndex, pNode);
}
}
taosArrayDestroy(res);
return 0;
}
static int tsdbInitTableCfg(STableCfg *config, ETableType type, uint64_t uid, int32_t tid) {
if (type != TSDB_CHILD_TABLE && type != TSDB_NORMAL_TABLE && type != TSDB_STREAM_TABLE) {
terrno = TSDB_CODE_TDB_INVALID_TABLE_TYPE;
return -1;
}
memset((void *)config, 0, sizeof(*config));
config->type = type;
config->superUid = TSDB_INVALID_SUPER_TABLE_ID;
config->tableId.uid = uid;
config->tableId.tid = tid;
return 0;
}
static int tsdbTableSetSchema(STableCfg *config, STSchema *pSchema, bool dup) {
if (dup) {
config->schema = tdDupSchema(pSchema);
if (config->schema == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->schema = pSchema;
}
return 0;
}
static int tsdbTableSetName(STableCfg *config, char *name, bool dup) {
if (dup) {
config->name = strdup(name);
if (config->name == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->name = name;
}
return 0;
}
static int tsdbTableSetTagSchema(STableCfg *config, STSchema *pSchema, bool dup) {
if (config->type != TSDB_CHILD_TABLE) {
terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG;
return -1;
}
if (dup) {
config->tagSchema = tdDupSchema(pSchema);
if (config->tagSchema == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->tagSchema = pSchema;
}
return 0;
}
static int tsdbTableSetSName(STableCfg *config, char *sname, bool dup) {
if (config->type != TSDB_CHILD_TABLE) {
terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG;
return -1;
}
if (dup) {
config->sname = strdup(sname);
if (config->sname == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->sname = sname;
}
return 0;
}
static int tsdbTableSetSuperUid(STableCfg *config, uint64_t uid) {
if (config->type != TSDB_CHILD_TABLE || uid == TSDB_INVALID_SUPER_TABLE_ID) {
terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG;
return -1;
}
config->superUid = uid;
return 0;
}
static int tsdbTableSetTagValue(STableCfg *config, SKVRow row, bool dup) {
if (config->type != TSDB_CHILD_TABLE) {
terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG;
return -1;
}
if (dup) {
config->tagValues = tdKVRowDup(row);
if (config->tagValues == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->tagValues = row;
}
return 0;
}
static int tsdbTableSetStreamSql(STableCfg *config, char *sql, bool dup) {
if (config->type != TSDB_STREAM_TABLE) {
terrno = TSDB_CODE_TDB_INVALID_CREATE_TB_MSG;
return -1;
}
if (dup) {
config->sql = strdup(sql);
if (config->sql == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
} else {
config->sql = sql;
}
return 0;
}
void tsdbClearTableCfg(STableCfg *config) {
if (config) {
if (config->schema) tdFreeSchema(config->schema);
if (config->tagSchema) tdFreeSchema(config->tagSchema);
if (config->tagValues) kvRowFree(config->tagValues);
tfree(config->name);
tfree(config->sname);
tfree(config->sql);
free(config);
}
}
static int tsdbEncodeTableName(void **buf, tstr *name) {
int tlen = 0;
tlen += taosEncodeFixedI16(buf, name->len);
if (buf != NULL) {
memcpy(*buf, name->data, name->len);
*buf = POINTER_SHIFT(*buf, name->len);
}
tlen += name->len;
return tlen;
}
static void *tsdbDecodeTableName(void *buf, tstr **name) {
VarDataLenT len = 0;
buf = taosDecodeFixedI16(buf, &len);
*name = calloc(1, sizeof(tstr) + len + 1);
if (*name == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return NULL;
}
(*name)->len = len;
memcpy((*name)->data, buf, len);
buf = POINTER_SHIFT(buf, len);
return buf;
}
static int tsdbEncodeTable(void **buf, STable *pTable) {
ASSERT(pTable != NULL);
int tlen = 0;
tlen += taosEncodeFixedU8(buf, pTable->type);
tlen += tsdbEncodeTableName(buf, pTable->name);
tlen += taosEncodeFixedU64(buf, TABLE_UID(pTable));
tlen += taosEncodeFixedI32(buf, TABLE_TID(pTable));
if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) {
tlen += taosEncodeFixedU64(buf, TABLE_SUID(pTable));
tlen += tdEncodeKVRow(buf, pTable->tagVal);
} else {
uint32_t arraySize = (uint32_t)taosArrayGetSize(pTable->schema);
if(arraySize > UINT8_MAX) {
tlen += taosEncodeFixedU8(buf, 0);
tlen += taosEncodeFixedU32(buf, arraySize);
} else {
tlen += taosEncodeFixedU8(buf, (uint8_t)arraySize);
}
for (uint32_t i = 0; i < arraySize; i++) {
STSchema *pSchema = taosArrayGetP(pTable->schema, i);
tlen += tdEncodeSchema(buf, pSchema);
}
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
tlen += tdEncodeSchema(buf, pTable->tagSchema);
}
if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) {
tlen += taosEncodeString(buf, pTable->sql);
}
}
return tlen;
}
static void *tsdbDecodeTable(void *buf, STable **pRTable) {
STable *pTable = tsdbNewTable();
if (pTable == NULL) return NULL;
uint8_t type = 0;
buf = taosDecodeFixedU8(buf, &type);
pTable->type = type;
buf = tsdbDecodeTableName(buf, &(pTable->name));
buf = taosDecodeFixedU64(buf, &TABLE_UID(pTable));
buf = taosDecodeFixedI32(buf, &TABLE_TID(pTable));
if (TABLE_TYPE(pTable) == TSDB_CHILD_TABLE) {
buf = taosDecodeFixedU64(buf, &TABLE_SUID(pTable));
buf = tdDecodeKVRow(buf, &(pTable->tagVal));
} else {
uint32_t nSchemas = 0;
buf = taosDecodeFixedU8(buf, (uint8_t *)&nSchemas);
if(nSchemas == 0) {
buf = taosDecodeFixedU32(buf, &nSchemas);
}
for (int i = 0; i < nSchemas; i++) {
STSchema *pSchema;
buf = tdDecodeSchema(buf, &pSchema);
tsdbAddSchema(pTable, pSchema);
}
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
buf = tdDecodeSchema(buf, &(pTable->tagSchema));
STColumn *pCol = schemaColAt(pTable->tagSchema, DEFAULT_TAG_INDEX_COLUMN);
pTable->pIndex = tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, colType(pCol), (uint8_t)(colBytes(pCol)), NULL,
SL_ALLOW_DUP_KEY, getTagIndexKey);
if (pTable->pIndex == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
tsdbFreeTable(pTable);
return NULL;
}
}
if (TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) {
buf = taosDecodeString(buf, &(pTable->sql));
}
}
T_REF_INC(pTable);
*pRTable = pTable;
return buf;
}
static int tsdbGetTableEncodeSize(int8_t act, STable *pTable) {
int tlen = 0;
if (act == TSDB_UPDATE_META) {
tlen = sizeof(SListNode) + sizeof(SActObj) + sizeof(SActCont) + tsdbEncodeTable(NULL, pTable) + sizeof(TSCKSUM);
} else {
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
tlen = (int)((sizeof(SListNode) + sizeof(SActObj)) * (SL_SIZE(pTable->pIndex) + 1));
} else {
tlen = sizeof(SListNode) + sizeof(SActObj);
}
}
return tlen;
}
static void *tsdbInsertTableAct(STsdbRepo *pRepo, int8_t act, void *buf, STable *pTable) {
SListNode *pNode = (SListNode *)buf;
SActObj * pAct = (SActObj *)(pNode->data);
SActCont * pCont = (SActCont *)POINTER_SHIFT(pAct, sizeof(*pAct));
void * pBuf = (void *)pCont;
pNode->prev = pNode->next = NULL;
pAct->act = act;
pAct->uid = TABLE_UID(pTable);
if (act == TSDB_UPDATE_META) {
pBuf = (void *)(pCont->cont);
pCont->len = tsdbEncodeTable(&pBuf, pTable) + sizeof(TSCKSUM);
taosCalcChecksumAppend(0, (uint8_t *)pCont->cont, pCont->len);
pBuf = POINTER_SHIFT(pBuf, sizeof(TSCKSUM));
}
tdListAppendNode(pRepo->mem->actList, pNode);
return pBuf;
}
static int tsdbRemoveTableFromStore(STsdbRepo *pRepo, STable *pTable) {
int tlen = tsdbGetTableEncodeSize(TSDB_DROP_META, pTable);
void *buf = tsdbAllocBytes(pRepo, tlen);
if (buf == NULL) {
return -1;
}
void *pBuf = buf;
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
SSkipListIterator *pIter = tSkipListCreateIter(pTable->pIndex);
if (pIter == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
while (tSkipListIterNext(pIter)) {
STable *tTable = (STable *)SL_GET_NODE_DATA(tSkipListIterGet(pIter));
ASSERT(TABLE_TYPE(tTable) == TSDB_CHILD_TABLE);
pBuf = tsdbInsertTableAct(pRepo, TSDB_DROP_META, pBuf, tTable);
}
tSkipListDestroyIter(pIter);
}
pBuf = tsdbInsertTableAct(pRepo, TSDB_DROP_META, pBuf, pTable);
ASSERT(POINTER_DISTANCE(pBuf, buf) == tlen);
return 0;
}
static int tsdbRmTableFromMeta(STsdbRepo *pRepo, STable *pTable) {
if (TABLE_TYPE(pTable) == TSDB_SUPER_TABLE) {
SSkipListIterator *pIter = tSkipListCreateIter(pTable->pIndex);
if (pIter == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
tsdbWLockRepoMeta(pRepo);
while (tSkipListIterNext(pIter)) {
STable *tTable = (STable *)SL_GET_NODE_DATA(tSkipListIterGet(pIter));
tsdbRemoveTableFromMeta(pRepo, tTable, false, false);
}
tsdbRemoveTableFromMeta(pRepo, pTable, false, false);
tsdbUnlockRepoMeta(pRepo);
tSkipListDestroyIter(pIter);
} else {
if ((TABLE_TYPE(pTable) == TSDB_STREAM_TABLE) && pTable->cqhandle) pRepo->appH.cqDropFunc(pTable->cqhandle);
tsdbRemoveTableFromMeta(pRepo, pTable, true, true);
}
return 0;
}
static int tsdbAdjustMetaTables(STsdbRepo *pRepo, int tid) {
STsdbMeta *pMeta = pRepo->tsdbMeta;
ASSERT(tid >= pMeta->maxTables);
int maxTables = tsdbGetNextMaxTables(tid);
STable **tables = (STable **)calloc(maxTables, sizeof(STable *));
if (tables == NULL) {
terrno = TSDB_CODE_TDB_OUT_OF_MEMORY;
return -1;
}
memcpy((void *)tables, (void *)pMeta->tables, sizeof(STable *) * pMeta->maxTables);
pMeta->maxTables = maxTables;
STable **tTables = pMeta->tables;
pMeta->tables = tables;
tfree(tTables);
tsdbDebug("vgId:%d tsdb meta maxTables is adjusted as %d", REPO_ID(pRepo), maxTables);
return 0;
}
static int tsdbCheckTableTagVal(SKVRow *pKVRow, STSchema *pSchema) {
for (size_t i = 0; i < kvRowNCols(pKVRow); i++) {
SColIdx * pColIdx = kvRowColIdxAt(pKVRow, i);
STColumn *pCol = tdGetColOfID(pSchema, pColIdx->colId);
if ((pCol == NULL) || (!IS_VAR_DATA_TYPE(pCol->type))) continue;
void *pValue = tdGetKVRowValOfCol(pKVRow, pCol->colId);
if (varDataTLen(pValue) > pCol->bytes) {
terrno = TSDB_CODE_TDB_IVLD_TAG_VAL;
return -1;
}
}
return 0;
}
static int tsdbAddSchema(STable *pTable, STSchema *pSchema) {
ASSERT(TABLE_TYPE(pTable) != TSDB_CHILD_TABLE);
if (pTable->schema == NULL) {
pTable->schema = taosArrayInit(TSDB_MAX_TABLE_SCHEMAS, sizeof(SSchema *));
if (pTable->schema == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
}
ASSERT(taosArrayGetSize(pTable->schema) == 0 ||
schemaVersion(pSchema) > schemaVersion(*(STSchema **)taosArrayGetLast(pTable->schema)));
if (taosArrayPush(pTable->schema, &pSchema) == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
return 0;
}
static void tsdbFreeTableSchema(STable *pTable) {
ASSERT(pTable != NULL);
if (pTable->schema) {
for (size_t i = 0; i < taosArrayGetSize(pTable->schema); i++) {
STSchema *pSchema = taosArrayGetP(pTable->schema, i);
tdFreeSchema(pSchema);
}
taosArrayDestroy(pTable->schema);
}
}
AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST)
add_executable(tsdbTests ${SOURCE_LIST})
target_link_libraries(tsdbTests gtest gtest_main pthread common tsdb tutil trpc)
add_test(NAME unit COMMAND ${CMAKE_CURRENT_BINARY_DIR}/tsdbTests)
\ No newline at end of file
#include <gtest/gtest.h>
#include <stdlib.h>
#include <sys/time.h>
#include "tsdb.h"
#include "tsdbMain.h"
static double getCurTime() {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec * 1E-6;
}
typedef struct {
STsdbRepo *pRepo;
bool isAscend;
int tid;
uint64_t uid;
int sversion;
TSKEY startTime;
TSKEY interval;
int totalRows;
int rowsPerSubmit;
STSchema * pSchema;
} SInsertInfo;
static int insertData(SInsertInfo *pInfo) {
SSubmitMsg *pMsg =
(SSubmitMsg *)malloc(sizeof(SSubmitMsg) + sizeof(SSubmitBlk) + dataRowMaxBytesFromSchema(pInfo->pSchema) * pInfo->rowsPerSubmit);
if (pMsg == NULL) return -1;
TSKEY start_time = pInfo->startTime;
// Loop to write data
double stime = getCurTime();
for (int k = 0; k < pInfo->totalRows/pInfo->rowsPerSubmit; k++) {
memset((void *)pMsg, 0, sizeof(SSubmitMsg));
SSubmitBlk *pBlock = (SSubmitBlk *)pMsg->blocks;
pBlock->uid = pInfo->uid;
pBlock->tid = pInfo->tid;
pBlock->sversion = pInfo->sversion;
pBlock->dataLen = 0;
pBlock->schemaLen = 0;
pBlock->numOfRows = 0;
for (int i = 0; i < pInfo->rowsPerSubmit; i++) {
// start_time += 1000;
if (pInfo->isAscend) {
start_time += pInfo->interval;
} else {
start_time -= pInfo->interval;
}
SDataRow row = (SDataRow)(pBlock->data + pBlock->dataLen);
tdInitDataRow(row, pInfo->pSchema);
for (int j = 0; j < schemaNCols(pInfo->pSchema); j++) {
STColumn *pTCol = schemaColAt(pInfo->pSchema, j);
if (j == 0) { // Just for timestamp
tdAppendColVal(row, (void *)(&start_time), pTCol->type, pTCol->offset);
} else { // For int
int val = 10;
tdAppendColVal(row, (void *)(&val), pTCol->type, pTCol->offset);
}
}
pBlock->dataLen += dataRowLen(row);
pBlock->numOfRows++;
}
pMsg->length = sizeof(SSubmitMsg) + sizeof(SSubmitBlk) + pBlock->dataLen;
pMsg->numOfBlocks = 1;
pBlock->dataLen = htonl(pBlock->dataLen);
pBlock->numOfRows = htonl(pBlock->numOfRows);
pBlock->schemaLen = htonl(pBlock->schemaLen);
pBlock->uid = htobe64(pBlock->uid);
pBlock->tid = htonl(pBlock->tid);
pBlock->sversion = htonl(pBlock->sversion);
pBlock->padding = htonl(pBlock->padding);
pMsg->length = htonl(pMsg->length);
pMsg->numOfBlocks = htonl(pMsg->numOfBlocks);
if (tsdbInsertData(pInfo->pRepo, pMsg, NULL) < 0) {
tfree(pMsg);
return -1;
}
}
double etime = getCurTime();
printf("Spent %f seconds to write %d records\n", etime - stime, pInfo->totalRows);
tfree(pMsg);
return 0;
}
static void tsdbSetCfg(STsdbCfg *pCfg, int32_t tsdbId, int32_t cacheBlockSize, int32_t totalBlocks, int32_t maxTables,
int32_t daysPerFile, int32_t keep, int32_t minRows, int32_t maxRows, int8_t precision,
int8_t compression) {
pCfg->tsdbId = tsdbId;
pCfg->cacheBlockSize = cacheBlockSize;
pCfg->totalBlocks = totalBlocks;
// pCfg->maxTables = maxTables;
pCfg->daysPerFile = daysPerFile;
pCfg->keep = keep;
pCfg->minRowsPerFileBlock = minRows;
pCfg->maxRowsPerFileBlock = maxRows;
pCfg->precision = precision;
pCfg->compression = compression;
}
static void tsdbSetTableCfg(STableCfg *pCfg) {
STSchemaBuilder schemaBuilder = {0};
pCfg->type = TSDB_NORMAL_TABLE;
pCfg->superUid = TSDB_INVALID_SUPER_TABLE_ID;
pCfg->tableId.tid = 1;
pCfg->tableId.uid = 5849583783847394;
tdInitTSchemaBuilder(&schemaBuilder, 0);
int colId = 0;
for (int i = 0; i < 5; i++) {
tdAddColToSchema(&schemaBuilder, (colId == 0) ? TSDB_DATA_TYPE_TIMESTAMP : TSDB_DATA_TYPE_INT, colId, 0);
colId++;
}
pCfg->schema = tdGetSchemaFromBuilder(&schemaBuilder);
pCfg->name = strdup("t1");
tdDestroyTSchemaBuilder(&schemaBuilder);
}
TEST(TsdbTest, testInsertSpeed) {
int vnode = 1;
int ret = 0;
STsdbCfg tsdbCfg;
STableCfg tableCfg;
std::string testDir = "./test";
char * rootDir = strdup((testDir + "/vnode" + std::to_string(vnode)).c_str());
tsdbDebugFlag = 131; //NOTE: you must set the flag
taosRemoveDir(rootDir);
// Create and open repository
tsdbSetCfg(&tsdbCfg, 1, 16, 4, -1, -1, -1, -1, -1, -1, -1);
tsdbCreateRepo(rootDir, &tsdbCfg);
STsdbRepo *repo = tsdbOpenRepo(rootDir, NULL);
ASSERT_NE(repo, nullptr);
// Create table
tsdbSetTableCfg(&tableCfg);
tsdbCreateTable(repo, &tableCfg);
// Insert data
SInsertInfo iInfo = {repo, true, 1, 5849583783847394, 0, 1590000000000, 10, 10000000, 100, tableCfg.schema};
insertData(&iInfo);
tsdbCloseRepo(repo, 1);
}
static char *getTKey(const void *data) {
return (char *)data;
}
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册