未验证 提交 677a27a0 编写于 作者: X Xiaoyu Wang 提交者: GitHub

Merge pull request #19690 from taosdata/fix/3.0_merge_main

Fix/3.0_merge_main
......@@ -146,9 +146,9 @@ extern void (*tColDataCalcSMA[])(SColData *pColData, int64_t *sum, int64_t *max,
int32_t tColDataAddValueByBind(SColData *pColData, TAOS_MULTI_BIND *pBind);
void tColDataSortMerge(SArray *colDataArr);
//for raw block
int32_t tColDataAddValueByDataBlock(SColData *pColData, int8_t type, int32_t bytes,
int32_t nRows, char* lengthOrbitmap, char *data);
// for raw block
int32_t tColDataAddValueByDataBlock(SColData *pColData, int8_t type, int32_t bytes, int32_t nRows, char *lengthOrbitmap,
char *data);
// for encode/decode
int32_t tPutColData(uint8_t *pBuf, SColData *pColData);
int32_t tGetColData(uint8_t *pBuf, SColData *pColData);
......@@ -261,7 +261,13 @@ struct STag {
// STSchema ================================
STSchema *tBuildTSchema(SSchema *aSchema, int32_t numOfCols, int32_t version);
void tDestroyTSchema(STSchema *pTSchema);
#define tDestroyTSchema(pTSchema) \
do { \
if (pTSchema) { \
taosMemoryFree(pTSchema); \
pTSchema = NULL; \
} \
} while (0)
#endif
......
......@@ -201,6 +201,7 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead);
int32_t walFetchBody(SWalReader *pRead, SWalCkHead **ppHead);
int32_t walSkipFetchBody(SWalReader *pRead, const SWalCkHead *pHead);
SWalRef *walRefFirstVer(SWal *, SWalRef *);
SWalRef *walRefCommittedVer(SWal *);
SWalRef *walOpenRef(SWal *);
......
......@@ -1532,10 +1532,6 @@ STSchema *tBuildTSchema(SSchema *aSchema, int32_t numOfCols, int32_t version) {
return pTSchema;
}
void tDestroyTSchema(STSchema *pTSchema) {
if (pTSchema) taosMemoryFree(pTSchema);
}
// SColData ========================================
void tColDataDestroy(void *ph) {
SColData *pColData = (SColData *)ph;
......
......@@ -206,6 +206,7 @@ int32_t tsdbCmprColData(SColData *pColData, int8_t cmprAlg, SBlockCol *pBlockCol
uint8_t **ppBuf);
int32_t tsdbDecmprColData(uint8_t *pIn, SBlockCol *pBlockCol, int8_t cmprAlg, int32_t nVal, SColData *pColData,
uint8_t **ppBuf);
int32_t tRowInfoCmprFn(const void *p1, const void *p2);
// tsdbMemTable ==============================================================================================
// SMemTable
int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable);
......
......@@ -252,7 +252,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader);
int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData);
// STsdbSnapWriter ========================================
int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter);
int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData);
int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr);
int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter);
int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback);
// STqSnapshotReader ==
......
......@@ -706,9 +706,8 @@ int32_t metaGetTbTSchemaEx(SMeta *pMeta, tb_uid_t suid, tb_uid_t uid, int32_t sv
}
}
if (sver <= 0) {
metaError("meta/query: incorrect sver: %" PRId32 ".", sver);
code = TSDB_CODE_FAILED;
if (ASSERTS(sver > 0, __FILE__, __LINE__, "failed to get table schema version: %d", sver)) {
code = TSDB_CODE_NOT_FOUND;
goto _exit;
}
......
......@@ -446,10 +446,10 @@ int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData)
// rsma1/rsma2
if (pHdr->type == SNAP_DATA_RSMA1) {
pHdr->type = SNAP_DATA_TSDB;
code = tsdbSnapWrite(pWriter->pDataWriter[0], pData, nData);
code = tsdbSnapWrite(pWriter->pDataWriter[0], pHdr);
} else if (pHdr->type == SNAP_DATA_RSMA2) {
pHdr->type = SNAP_DATA_TSDB;
code = tsdbSnapWrite(pWriter->pDataWriter[1], pData, nData);
code = tsdbSnapWrite(pWriter->pDataWriter[1], pHdr);
} else if (pHdr->type == SNAP_DATA_QTASK) {
code = rsmaSnapWriteQTaskInfo(pWriter, pData, nData);
} else {
......
......@@ -520,7 +520,12 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
tqOffsetResetToData(&fetchOffsetNew, 0, 0);
}
} else {
tqOffsetResetToLog(&fetchOffsetNew, walGetFirstVer(pTq->pVnode->pWal));
pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef);
if (pHandle->pRef == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
tqOffsetResetToLog(&fetchOffsetNew, pHandle->pRef->refVer - 1);
}
} else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
......
......@@ -15,274 +15,628 @@
#include "tsdb.h"
// STsdbSnapReader ========================================
typedef enum { SNAP_DATA_FILE_ITER = 0, SNAP_STT_FILE_ITER } EFIterT;
extern int32_t tsdbReadDataBlockEx(SDataFReader* pReader, SDataBlk* pDataBlk, SBlockData* pBlockData);
extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo);
extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg);
extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg);
// STsdbDataIter2 ========================================
#define TSDB_MEM_TABLE_DATA_ITER 0
#define TSDB_DATA_FILE_DATA_ITER 1
#define TSDB_STT_FILE_DATA_ITER 2
#define TSDB_TOMB_FILE_DATA_ITER 3
typedef struct STsdbDataIter2 STsdbDataIter2;
typedef struct STsdbFilterInfo STsdbFilterInfo;
typedef struct {
SRBTreeNode n;
SRowInfo rInfo;
EFIterT type;
int64_t suid;
int64_t uid;
SDelData delData;
} SDelInfo;
struct STsdbDataIter2 {
STsdbDataIter2* next;
SRBTreeNode rbtn;
int32_t type;
SRowInfo rowInfo;
SDelInfo delInfo;
union {
// TSDB_MEM_TABLE_DATA_ITER
struct {
SArray* aBlockIdx;
int32_t iBlockIdx;
SBlockIdx* pBlockIdx;
SMapData mBlock;
int32_t iBlock;
}; // .data file
SMemTable* pMemTable;
} mIter;
// TSDB_DATA_FILE_DATA_ITER
struct {
int32_t iStt;
SArray* aSttBlk;
int32_t iSttBlk;
}; // .stt file
SDataFReader* pReader;
SArray* aBlockIdx; // SArray<SBlockIdx>
SMapData mDataBlk;
SBlockData bData;
int32_t iBlockIdx;
int32_t iDataBlk;
int32_t iRow;
} dIter;
// TSDB_STT_FILE_DATA_ITER
struct {
SDataFReader* pReader;
int32_t iStt;
SArray* aSttBlk;
SBlockData bData;
int32_t iSttBlk;
int32_t iRow;
} sIter;
// TSDB_TOMB_FILE_DATA_ITER
struct {
SDelFReader* pReader;
SArray* aDelIdx;
SArray* aDelData;
int32_t iDelIdx;
int32_t iDelData;
} tIter;
};
SBlockData bData;
int32_t iRow;
} SFDataIter;
};
struct STsdbSnapReader {
STsdb* pTsdb;
#define TSDB_FILTER_FLAG_BY_VERSION 0x1
struct STsdbFilterInfo {
int32_t flag;
int64_t sver;
int64_t ever;
STsdbFS fs;
int8_t type;
// for data file
int8_t dataDone;
int32_t fid;
SDataFReader* pDataFReader;
SFDataIter* pIter;
SRBTree rbt;
SFDataIter aFDataIter[TSDB_MAX_STT_TRIGGER + 1];
SBlockData bData;
SSkmInfo skmTable;
// for del file
int8_t delDone;
SDelFReader* pDelFReader;
SArray* aDelIdx; // SArray<SDelIdx>
int32_t iDelIdx;
SArray* aDelData; // SArray<SDelData>
uint8_t* aBuf[5];
};
extern int32_t tRowInfoCmprFn(const void* p1, const void* p2);
extern int32_t tsdbReadDataBlockEx(SDataFReader* pReader, SDataBlk* pDataBlk, SBlockData* pBlockData);
extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo);
#define TSDB_RBTN_TO_DATA_ITER(pNode) ((STsdbDataIter2*)(((char*)pNode) - offsetof(STsdbDataIter2, rbtn)))
static int32_t tFDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) {
SFDataIter* pIter1 = (SFDataIter*)(((uint8_t*)pNode1) - offsetof(SFDataIter, n));
SFDataIter* pIter2 = (SFDataIter*)(((uint8_t*)pNode2) - offsetof(SFDataIter, n));
/* open */
static int32_t tsdbOpenDataFileDataIter(SDataFReader* pReader, STsdbDataIter2** ppIter) {
int32_t code = 0;
int32_t lino = 0;
return tRowInfoCmprFn(&pIter1->rInfo, &pIter2->rInfo);
// create handle
STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter));
if (pIter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pIter->type = TSDB_DATA_FILE_DATA_ITER;
pIter->dIter.pReader = pReader;
if ((pIter->dIter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tBlockDataCreate(&pIter->dIter.bData);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->dIter.iBlockIdx = 0;
pIter->dIter.iDataBlk = 0;
pIter->dIter.iRow = 0;
// read data
code = tsdbReadBlockIdx(pReader, pIter->dIter.aBlockIdx);
TSDB_CHECK_CODE(code, lino, _exit);
if (taosArrayGetSize(pIter->dIter.aBlockIdx) == 0) goto _clear;
_exit:
if (code) {
if (pIter) {
_clear:
tBlockDataDestroy(&pIter->dIter.bData);
taosArrayDestroy(pIter->dIter.aBlockIdx);
taosMemoryFree(pIter);
pIter = NULL;
}
}
*ppIter = pIter;
return code;
}
static int32_t tsdbSnapReadOpenFile(STsdbSnapReader* pReader) {
static int32_t tsdbOpenSttFileDataIter(SDataFReader* pReader, int32_t iStt, STsdbDataIter2** ppIter) {
int32_t code = 0;
int32_t lino = 0;
SDFileSet dFileSet = {.fid = pReader->fid};
SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &dFileSet, tDFileSetCmprFn, TD_GT);
if (pSet == NULL) return code;
// create handle
STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter));
if (pIter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pReader->fid = pSet->fid;
code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->type = TSDB_STT_FILE_DATA_ITER;
pIter->sIter.pReader = pReader;
pIter->sIter.iStt = iStt;
pIter->sIter.aSttBlk = taosArrayInit(0, sizeof(SSttBlk));
if (pIter->sIter.aSttBlk == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pReader->pIter = NULL;
tRBTreeCreate(&pReader->rbt, tFDataIterCmprFn);
code = tBlockDataCreate(&pIter->sIter.bData);
TSDB_CHECK_CODE(code, lino, _exit);
// .data file
SFDataIter* pIter = &pReader->aFDataIter[0];
pIter->type = SNAP_DATA_FILE_ITER;
pIter->sIter.iSttBlk = 0;
pIter->sIter.iRow = 0;
code = tsdbReadBlockIdx(pReader->pDataFReader, pIter->aBlockIdx);
// read data
code = tsdbReadSttBlk(pReader, iStt, pIter->sIter.aSttBlk);
TSDB_CHECK_CODE(code, lino, _exit);
for (pIter->iBlockIdx = 0; pIter->iBlockIdx < taosArrayGetSize(pIter->aBlockIdx); pIter->iBlockIdx++) {
pIter->pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->aBlockIdx, pIter->iBlockIdx);
if (taosArrayGetSize(pIter->sIter.aSttBlk) == 0) goto _clear;
code = tsdbReadDataBlk(pReader->pDataFReader, pIter->pBlockIdx, &pIter->mBlock);
_exit:
if (code) {
if (pIter) {
_clear:
taosArrayDestroy(pIter->sIter.aSttBlk);
tBlockDataDestroy(&pIter->sIter.bData);
taosMemoryFree(pIter);
pIter = NULL;
}
}
*ppIter = pIter;
return code;
}
static int32_t tsdbOpenTombFileDataIter(SDelFReader* pReader, STsdbDataIter2** ppIter) {
int32_t code = 0;
int32_t lino = 0;
STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter));
if (pIter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pIter->type = TSDB_TOMB_FILE_DATA_ITER;
for (pIter->iBlock = 0; pIter->iBlock < pIter->mBlock.nItem; pIter->iBlock++) {
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pIter->mBlock, pIter->iBlock, &dataBlk, tGetDataBlk);
pIter->tIter.pReader = pReader;
if ((pIter->tIter.aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
if ((pIter->tIter.aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
if (dataBlk.minVer > pReader->ever || dataBlk.maxVer < pReader->sver) continue;
code = tsdbReadDelIdx(pReader, pIter->tIter.aDelIdx);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbReadDataBlockEx(pReader->pDataFReader, &dataBlk, &pIter->bData);
TSDB_CHECK_CODE(code, lino, _exit);
if (taosArrayGetSize(pIter->tIter.aDelIdx) == 0) goto _clear;
ASSERT(pIter->pBlockIdx->suid == pIter->bData.suid);
ASSERT(pIter->pBlockIdx->uid == pIter->bData.uid);
pIter->tIter.iDelIdx = 0;
pIter->tIter.iDelData = 0;
for (pIter->iRow = 0; pIter->iRow < pIter->bData.nRow; pIter->iRow++) {
int64_t rowVer = pIter->bData.aVersion[pIter->iRow];
_exit:
if (code) {
if (pIter) {
_clear:
taosArrayDestroy(pIter->tIter.aDelIdx);
taosArrayDestroy(pIter->tIter.aDelData);
taosMemoryFree(pIter);
pIter = NULL;
}
}
*ppIter = pIter;
return code;
}
if (rowVer >= pReader->sver && rowVer <= pReader->ever) {
pIter->rInfo.suid = pIter->pBlockIdx->suid;
pIter->rInfo.uid = pIter->pBlockIdx->uid;
pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow);
goto _add_iter_and_break;
/* close */
static void tsdbCloseDataFileDataIter(STsdbDataIter2* pIter) {
tBlockDataDestroy(&pIter->dIter.bData);
tMapDataClear(&pIter->dIter.mDataBlk);
taosArrayDestroy(pIter->dIter.aBlockIdx);
taosMemoryFree(pIter);
}
static void tsdbCloseSttFileDataIter(STsdbDataIter2* pIter) {
tBlockDataDestroy(&pIter->sIter.bData);
taosArrayDestroy(pIter->sIter.aSttBlk);
taosMemoryFree(pIter);
}
static void tsdbCloseTombFileDataIter(STsdbDataIter2* pIter) {
taosArrayDestroy(pIter->tIter.aDelData);
taosArrayDestroy(pIter->tIter.aDelIdx);
taosMemoryFree(pIter);
}
static void tsdbCloseDataIter2(STsdbDataIter2* pIter) {
if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) {
ASSERT(0);
} else if (pIter->type == TSDB_DATA_FILE_DATA_ITER) {
tsdbCloseDataFileDataIter(pIter);
} else if (pIter->type == TSDB_STT_FILE_DATA_ITER) {
tsdbCloseSttFileDataIter(pIter);
} else if (pIter->type == TSDB_TOMB_FILE_DATA_ITER) {
tsdbCloseTombFileDataIter(pIter);
} else {
ASSERT(0);
}
}
/* cmpr */
static int32_t tsdbDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) {
STsdbDataIter2* pIter1 = TSDB_RBTN_TO_DATA_ITER(pNode1);
STsdbDataIter2* pIter2 = TSDB_RBTN_TO_DATA_ITER(pNode2);
return tRowInfoCmprFn(&pIter1->rowInfo, &pIter2->rowInfo);
}
/* seek */
/* iter next */
static int32_t tsdbDataFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) {
int32_t code = 0;
int32_t lino = 0;
for (;;) {
while (pIter->dIter.iRow < pIter->dIter.bData.nRow) {
if (pFilterInfo) {
if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) {
if (pIter->dIter.bData.aVersion[pIter->dIter.iRow] < pFilterInfo->sver ||
pIter->dIter.bData.aVersion[pIter->dIter.iRow] > pFilterInfo->ever) {
pIter->dIter.iRow++;
continue;
}
}
}
pIter->rowInfo.suid = pIter->dIter.bData.suid;
pIter->rowInfo.uid = pIter->dIter.bData.uid;
pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->dIter.bData, pIter->dIter.iRow);
pIter->dIter.iRow++;
goto _exit;
}
continue;
for (;;) {
while (pIter->dIter.iDataBlk < pIter->dIter.mDataBlk.nItem) {
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pIter->dIter.mDataBlk, pIter->dIter.iDataBlk, &dataBlk, tGetDataBlk);
// filter
if (pFilterInfo) {
if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) {
if (pFilterInfo->sver > dataBlk.maxVer || pFilterInfo->ever < dataBlk.minVer) {
pIter->dIter.iDataBlk++;
continue;
}
}
}
_add_iter_and_break:
tRBTreePut(&pReader->rbt, (SRBTreeNode*)pIter);
break;
}
code = tsdbReadDataBlockEx(pIter->dIter.pReader, &dataBlk, &pIter->dIter.bData);
TSDB_CHECK_CODE(code, lino, _exit);
// .stt file
pIter = &pReader->aFDataIter[1];
for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) {
pIter->type = SNAP_STT_FILE_ITER;
pIter->iStt = iStt;
pIter->dIter.iDataBlk++;
pIter->dIter.iRow = 0;
code = tsdbReadSttBlk(pReader->pDataFReader, iStt, pIter->aSttBlk);
TSDB_CHECK_CODE(code, lino, _exit);
break;
}
for (pIter->iSttBlk = 0; pIter->iSttBlk < taosArrayGetSize(pIter->aSttBlk); pIter->iSttBlk++) {
SSttBlk* pSttBlk = (SSttBlk*)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk);
if (pIter->dIter.iRow < pIter->dIter.bData.nRow) break;
if (pSttBlk->minVer > pReader->ever) continue;
if (pSttBlk->maxVer < pReader->sver) continue;
for (;;) {
if (pIter->dIter.iBlockIdx < taosArrayGetSize(pIter->dIter.aBlockIdx)) {
SBlockIdx* pBlockIdx = taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx);
code = tsdbReadSttBlockEx(pReader->pDataFReader, iStt, pSttBlk, &pIter->bData);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk);
TSDB_CHECK_CODE(code, lino, _exit);
for (pIter->iRow = 0; pIter->iRow < pIter->bData.nRow; pIter->iRow++) {
int64_t rowVer = pIter->bData.aVersion[pIter->iRow];
pIter->dIter.iBlockIdx++;
pIter->dIter.iDataBlk = 0;
if (rowVer >= pReader->sver && rowVer <= pReader->ever) {
pIter->rInfo.suid = pIter->bData.suid;
pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow];
pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow);
goto _add_iter;
break;
} else {
pIter->rowInfo = (SRowInfo){0};
goto _exit;
}
}
}
continue;
_add_iter:
tRBTreePut(&pReader->rbt, (SRBTreeNode*)pIter);
pIter++;
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed since %s", TD_VID(pReader->pTsdb->pVnode), __func__, tstrerror(code));
} else {
tsdbInfo("vgId:%d, %s done, path:%s, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->pTsdb->path,
pReader->fid);
tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapNextRow(STsdbSnapReader* pReader) {
static int32_t tsdbSttFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) {
int32_t code = 0;
int32_t lino = 0;
if (pReader->pIter) {
SFDataIter* pIter = NULL;
while (true) {
_find_row:
pIter = pReader->pIter;
for (pIter->iRow++; pIter->iRow < pIter->bData.nRow; pIter->iRow++) {
int64_t rowVer = pIter->bData.aVersion[pIter->iRow];
if (rowVer >= pReader->sver && rowVer <= pReader->ever) {
pIter->rInfo.suid = pIter->bData.suid;
pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow];
pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow);
goto _out;
for (;;) {
while (pIter->sIter.iRow < pIter->sIter.bData.nRow) {
if (pFilterInfo) {
if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) {
if (pFilterInfo->sver > pIter->sIter.bData.aVersion[pIter->sIter.iRow] ||
pFilterInfo->ever < pIter->sIter.bData.aVersion[pIter->sIter.iRow]) {
pIter->sIter.iRow++;
continue;
}
}
}
if (pIter->type == SNAP_DATA_FILE_ITER) {
while (true) {
for (pIter->iBlock++; pIter->iBlock < pIter->mBlock.nItem; pIter->iBlock++) {
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pIter->mBlock, pIter->iBlock, &dataBlk, tGetDataBlk);
if (dataBlk.minVer > pReader->ever || dataBlk.maxVer < pReader->sver) continue;
pIter->rowInfo.suid = pIter->sIter.bData.suid;
pIter->rowInfo.uid = pIter->sIter.bData.uid ? pIter->sIter.bData.uid : pIter->sIter.bData.aUid[pIter->sIter.iRow];
pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->sIter.bData, pIter->sIter.iRow);
pIter->sIter.iRow++;
goto _exit;
}
code = tsdbReadDataBlockEx(pReader->pDataFReader, &dataBlk, &pIter->bData);
if (code) goto _err;
for (;;) {
if (pIter->sIter.iSttBlk < taosArrayGetSize(pIter->sIter.aSttBlk)) {
SSttBlk* pSttBlk = taosArrayGet(pIter->sIter.aSttBlk, pIter->sIter.iSttBlk);
pIter->iRow = -1;
goto _find_row;
if (pFilterInfo) {
if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) {
if (pFilterInfo->sver > pSttBlk->maxVer || pFilterInfo->ever < pSttBlk->minVer) {
pIter->sIter.iSttBlk++;
continue;
}
}
pIter->iBlockIdx++;
if (pIter->iBlockIdx >= taosArrayGetSize(pIter->aBlockIdx)) break;
pIter->pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->aBlockIdx, pIter->iBlockIdx);
code = tsdbReadDataBlk(pReader->pDataFReader, pIter->pBlockIdx, &pIter->mBlock);
if (code) goto _err;
pIter->iBlock = -1;
}
pReader->pIter = NULL;
code = tsdbReadSttBlockEx(pIter->sIter.pReader, pIter->sIter.iStt, pSttBlk, &pIter->sIter.bData);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->sIter.iRow = 0;
pIter->sIter.iSttBlk++;
break;
} else if (pIter->type == SNAP_STT_FILE_ITER) {
for (pIter->iSttBlk++; pIter->iSttBlk < taosArrayGetSize(pIter->aSttBlk); pIter->iSttBlk++) {
SSttBlk* pSttBlk = (SSttBlk*)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk);
} else {
pIter->rowInfo = (SRowInfo){0};
goto _exit;
}
}
}
_exit:
if (code) {
tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
}
return code;
}
if (pSttBlk->minVer > pReader->ever || pSttBlk->maxVer < pReader->sver) continue;
static int32_t tsdbTombFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) {
int32_t code = 0;
int32_t lino = 0;
code = tsdbReadSttBlockEx(pReader->pDataFReader, pIter->iStt, pSttBlk, &pIter->bData);
if (code) goto _err;
for (;;) {
while (pIter->tIter.iDelData < taosArrayGetSize(pIter->tIter.aDelData)) {
SDelData* pDelData = taosArrayGet(pIter->tIter.aDelData, pIter->tIter.iDelData);
pIter->iRow = -1;
goto _find_row;
if (pFilterInfo) {
if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) {
if (pFilterInfo->sver > pDelData->version || pFilterInfo->ever < pDelData->version) {
pIter->tIter.iDelData++;
continue;
}
}
}
pReader->pIter = NULL;
pIter->delInfo.delData = *pDelData;
pIter->tIter.iDelData++;
goto _exit;
}
for (;;) {
if (pIter->tIter.iDelIdx < taosArrayGetSize(pIter->tIter.aDelIdx)) {
SDelIdx* pDelIdx = taosArrayGet(pIter->tIter.aDelIdx, pIter->tIter.iDelIdx);
code = tsdbReadDelData(pIter->tIter.pReader, pDelIdx, pIter->tIter.aDelData);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->delInfo.suid = pDelIdx->suid;
pIter->delInfo.uid = pDelIdx->uid;
pIter->tIter.iDelData = 0;
pIter->tIter.iDelIdx++;
break;
} else {
ASSERT(0);
pIter->delInfo = (SDelInfo){0};
goto _exit;
}
}
}
_out:
pIter = (SFDataIter*)tRBTreeMin(&pReader->rbt);
if (pReader->pIter && pIter) {
int32_t c = tRowInfoCmprFn(&pReader->pIter->rInfo, &pIter->rInfo);
if (c > 0) {
tRBTreePut(&pReader->rbt, (SRBTreeNode*)pReader->pIter);
pReader->pIter = NULL;
} else {
ASSERT(c);
}
_exit:
if (code) {
tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbDataIterNext2(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) {
int32_t code = 0;
if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) {
ASSERT(0);
return code;
} else if (pIter->type == TSDB_DATA_FILE_DATA_ITER) {
return tsdbDataFileDataIterNext(pIter, pFilterInfo);
} else if (pIter->type == TSDB_STT_FILE_DATA_ITER) {
return tsdbSttFileDataIterNext(pIter, pFilterInfo);
} else if (pIter->type == TSDB_TOMB_FILE_DATA_ITER) {
return tsdbTombFileDataIterNext(pIter, pFilterInfo);
} else {
ASSERT(0);
return code;
}
}
/* get */
// STsdbSnapReader ========================================
struct STsdbSnapReader {
STsdb* pTsdb;
int64_t sver;
int64_t ever;
int8_t type;
uint8_t* aBuf[5];
STsdbFS fs;
TABLEID tbid;
SSkmInfo skmTable;
// timeseries data
int8_t dataDone;
int32_t fid;
SDataFReader* pDataFReader;
STsdbDataIter2* iterList;
STsdbDataIter2* pIter;
SRBTree rbt;
SBlockData bData;
// tombstone data
int8_t delDone;
SDelFReader* pDelFReader;
STsdbDataIter2* pTIter;
SArray* aDelData;
};
static int32_t tsdbSnapReadFileDataStart(STsdbSnapReader* pReader) {
int32_t code = 0;
int32_t lino = 0;
SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT);
if (pSet == NULL) {
pReader->fid = INT32_MAX;
goto _exit;
}
pReader->fid = pSet->fid;
tRBTreeCreate(&pReader->rbt, tsdbDataIterCmprFn);
code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbOpenDataFileDataIter(pReader->pDataFReader, &pReader->pIter);
TSDB_CHECK_CODE(code, lino, _exit);
if (pReader->pIter) {
// iter to next with filter info (sver, ever)
code = tsdbDataIterNext2(pReader->pIter,
&(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag
.sver = pReader->sver,
.ever = pReader->ever});
TSDB_CHECK_CODE(code, lino, _exit);
if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) {
// add to rbtree
tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn);
// add to iterList
pReader->pIter->next = pReader->iterList;
pReader->iterList = pReader->pIter;
} else {
tsdbCloseDataIter2(pReader->pIter);
}
}
if (pReader->pIter == NULL) {
pReader->pIter = (SFDataIter*)tRBTreeMin(&pReader->rbt);
for (int32_t iStt = 0; iStt < pSet->nSttF; ++iStt) {
code = tsdbOpenSttFileDataIter(pReader->pDataFReader, iStt, &pReader->pIter);
TSDB_CHECK_CODE(code, lino, _exit);
if (pReader->pIter) {
tRBTreeDrop(&pReader->rbt, (SRBTreeNode*)pReader->pIter);
// iter to valid row
code = tsdbDataIterNext2(pReader->pIter,
&(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag
.sver = pReader->sver,
.ever = pReader->ever});
TSDB_CHECK_CODE(code, lino, _exit);
if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) {
// add to rbtree
tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn);
// add to iterList
pReader->pIter->next = pReader->iterList;
pReader->iterList = pReader->pIter;
} else {
tsdbCloseDataIter2(pReader->pIter);
}
}
}
return code;
pReader->pIter = NULL;
_err:
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbInfo("vgId:%d %s done, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->fid);
}
return code;
}
static SRowInfo* tsdbSnapGetRow(STsdbSnapReader* pReader) {
static void tsdbSnapReadFileDataEnd(STsdbSnapReader* pReader) {
while (pReader->iterList) {
STsdbDataIter2* pIter = pReader->iterList;
pReader->iterList = pIter->next;
tsdbCloseDataIter2(pIter);
}
tsdbDataFReaderClose(&pReader->pDataFReader);
}
static int32_t tsdbSnapReadNextRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) {
int32_t code = 0;
int32_t lino = 0;
if (pReader->pIter) {
return &pReader->pIter->rInfo;
} else {
tsdbSnapNextRow(pReader);
code = tsdbDataIterNext2(pReader->pIter, &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag
.sver = pReader->sver,
.ever = pReader->ever});
TSDB_CHECK_CODE(code, lino, _exit);
if (pReader->pIter->rowInfo.suid == 0 && pReader->pIter->rowInfo.uid == 0) {
pReader->pIter = NULL;
} else {
SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt);
if (pNode) {
int32_t c = tsdbDataIterCmprFn(&pReader->pIter->rbtn, pNode);
if (c > 0) {
tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn);
pReader->pIter = NULL;
} else if (c == 0) {
ASSERT(0);
}
}
}
}
if (pReader->pIter == NULL) {
SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt);
if (pNode) {
tRBTreeDrop(&pReader->rbt, pNode);
pReader->pIter = TSDB_RBTN_TO_DATA_ITER(pNode);
}
}
if (ppRowInfo) {
if (pReader->pIter) {
return &pReader->pIter->rInfo;
*ppRowInfo = &pReader->pIter->rowInfo;
} else {
return NULL;
*ppRowInfo = NULL;
}
}
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapReadGetRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) {
if (pReader->pIter) {
*ppRowInfo = &pReader->pIter->rowInfo;
return 0;
}
return tsdbSnapReadNextRow(pReader, ppRowInfo);
}
static int32_t tsdbSnapCmprData(STsdbSnapReader* pReader, uint8_t** ppData) {
......@@ -318,155 +672,215 @@ _exit:
return code;
}
static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) {
static int32_t tsdbSnapReadTimeSeriesData(STsdbSnapReader* pReader, uint8_t** ppData) {
int32_t code = 0;
int32_t lino = 0;
STsdb* pTsdb = pReader->pTsdb;
while (true) {
tBlockDataReset(&pReader->bData);
for (;;) {
// start a new file read if need
if (pReader->pDataFReader == NULL) {
code = tsdbSnapReadOpenFile(pReader);
code = tsdbSnapReadFileDataStart(pReader);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (pReader->pDataFReader == NULL) break;
SRowInfo* pRowInfo = tsdbSnapGetRow(pReader);
SRowInfo* pRowInfo;
code = tsdbSnapReadGetRow(pReader, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
if (pRowInfo == NULL) {
tsdbDataFReaderClose(&pReader->pDataFReader);
tsdbSnapReadFileDataEnd(pReader);
continue;
}
TABLEID id = {.suid = pRowInfo->suid, .uid = pRowInfo->uid};
SBlockData* pBlockData = &pReader->bData;
code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, id.suid, id.uid, &pReader->skmTable);
code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, pRowInfo->suid, pRowInfo->uid, &pReader->skmTable);
TSDB_CHECK_CODE(code, lino, _exit);
code = tBlockDataInit(pBlockData, &id, pReader->skmTable.pTSchema, NULL, 0);
code = tBlockDataInit(&pReader->bData, (TABLEID*)pRowInfo, pReader->skmTable.pTSchema, NULL, 0);
TSDB_CHECK_CODE(code, lino, _exit);
while (pRowInfo->suid == id.suid && pRowInfo->uid == id.uid) {
code = tBlockDataAppendRow(pBlockData, &pRowInfo->row, NULL, pRowInfo->uid);
do {
if (!TABLE_SAME_SCHEMA(pReader->bData.suid, pReader->bData.uid, pRowInfo->suid, pRowInfo->uid)) break;
if (pReader->bData.uid && pReader->bData.uid != pRowInfo->uid) {
code = tRealloc((uint8_t**)&pReader->bData.aUid, sizeof(int64_t) * (pReader->bData.nRow + 1));
TSDB_CHECK_CODE(code, lino, _exit);
for (int32_t iRow = 0; iRow < pReader->bData.nRow; ++iRow) {
pReader->bData.aUid[iRow] = pReader->bData.uid;
}
pReader->bData.uid = 0;
}
code = tBlockDataAppendRow(&pReader->bData, &pRowInfo->row, NULL, pRowInfo->uid);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSnapNextRow(pReader);
code = tsdbSnapReadNextRow(pReader, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
pRowInfo = tsdbSnapGetRow(pReader);
if (pRowInfo == NULL) {
tsdbDataFReaderClose(&pReader->pDataFReader);
break;
}
if (pReader->bData.nRow >= 4096) break;
} while (pRowInfo);
ASSERT(pReader->bData.nRow > 0);
break;
}
if (pReader->bData.nRow > 0) {
ASSERT(pReader->bData.suid || pReader->bData.uid);
code = tsdbSnapCmprData(pReader, ppData);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapCmprTombData(STsdbSnapReader* pReader, uint8_t** ppData) {
int32_t code = 0;
int32_t lino = 0;
int64_t size = sizeof(TABLEID);
for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) {
size += tPutDelData(NULL, taosArrayGet(pReader->aDelData, iDelData));
}
uint8_t* pData = (uint8_t*)taosMemoryMalloc(sizeof(SSnapDataHdr) + size);
if (pData == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
SSnapDataHdr* pHdr = (SSnapDataHdr*)pData;
pHdr->type = SNAP_DATA_DEL;
pHdr->size = size;
TABLEID* pId = (TABLEID*)(pData + sizeof(SSnapDataHdr));
*pId = pReader->tbid;
size = sizeof(SSnapDataHdr) + sizeof(TABLEID);
for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) {
size += tPutDelData(pData + size, taosArrayGet(pReader->aDelData, iDelData));
}
if (pBlockData->nRow >= 4096) break;
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code));
if (pData) {
taosMemoryFree(pData);
pData = NULL;
}
}
*ppData = pData;
return code;
}
code = tsdbSnapCmprData(pReader, ppData);
TSDB_CHECK_CODE(code, lino, _exit);
static void tsdbSnapReadGetTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) {
if (pReader->pTIter == NULL || (pReader->pTIter->delInfo.suid == 0 && pReader->pTIter->delInfo.uid == 0)) {
*ppDelInfo = NULL;
} else {
*ppDelInfo = &pReader->pTIter->delInfo;
}
}
break;
static int32_t tsdbSnapReadNextTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) {
int32_t code = 0;
int32_t lino = 0;
code = tsdbDataIterNext2(
pReader->pTIter,
&(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, .sver = pReader->sver, .ever = pReader->ever});
TSDB_CHECK_CODE(code, lino, _exit);
if (ppDelInfo) {
tsdbSnapReadGetTombData(pReader, ppDelInfo);
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code), pTsdb->path);
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) {
static int32_t tsdbSnapReadTombData(STsdbSnapReader* pReader, uint8_t** ppData) {
int32_t code = 0;
int32_t lino = 0;
STsdb* pTsdb = pReader->pTsdb;
SDelFile* pDelFile = pReader->fs.pDelFile;
STsdb* pTsdb = pReader->pTsdb;
// open tombstone data iter if need
if (pReader->pDelFReader == NULL) {
if (pDelFile == NULL) {
goto _exit;
}
if (pReader->fs.pDelFile == NULL) goto _exit;
// open
code = tsdbDelFReaderOpen(&pReader->pDelFReader, pDelFile, pTsdb);
code = tsdbDelFReaderOpen(&pReader->pDelFReader, pReader->fs.pDelFile, pTsdb);
TSDB_CHECK_CODE(code, lino, _exit);
// read index
code = tsdbReadDelIdx(pReader->pDelFReader, pReader->aDelIdx);
code = tsdbOpenTombFileDataIter(pReader->pDelFReader, &pReader->pTIter);
TSDB_CHECK_CODE(code, lino, _exit);
pReader->iDelIdx = 0;
if (pReader->pTIter) {
code = tsdbSnapReadNextTombData(pReader, NULL);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
while (true) {
if (pReader->iDelIdx >= taosArrayGetSize(pReader->aDelIdx)) {
tsdbDelFReaderClose(&pReader->pDelFReader);
break;
}
// loop to get tombstone data
SDelInfo* pDelInfo;
tsdbSnapReadGetTombData(pReader, &pDelInfo);
SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pReader->aDelIdx, pReader->iDelIdx);
if (pDelInfo == NULL) goto _exit;
pReader->iDelIdx++;
pReader->tbid = *(TABLEID*)pDelInfo;
code = tsdbReadDelData(pReader->pDelFReader, pDelIdx, pReader->aDelData);
if (pReader->aDelData) {
taosArrayClear(pReader->aDelData);
} else if ((pReader->aDelData = taosArrayInit(16, sizeof(SDelData))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
int32_t size = 0;
for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); iDelData++) {
SDelData* pDelData = (SDelData*)taosArrayGet(pReader->aDelData, iDelData);
if (pDelData->version >= pReader->sver && pDelData->version <= pReader->ever) {
size += tPutDelData(NULL, pDelData);
}
}
if (size == 0) continue;
// org data
size = sizeof(TABLEID) + size;
*ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + size);
if (*ppData == NULL) {
while (pDelInfo && pDelInfo->suid == pReader->tbid.suid && pDelInfo->uid == pReader->tbid.uid) {
if (taosArrayPush(pReader->aDelData, &pDelInfo->delData) < 0) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData);
pHdr->type = SNAP_DATA_DEL;
pHdr->size = size;
TABLEID* pId = (TABLEID*)(&pHdr[1]);
pId->suid = pDelIdx->suid;
pId->uid = pDelIdx->uid;
int32_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID);
for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); iDelData++) {
SDelData* pDelData = (SDelData*)taosArrayGet(pReader->aDelData, iDelData);
if (pDelData->version < pReader->sver) continue;
if (pDelData->version > pReader->ever) continue;
n += tPutDelData((*ppData) + n, pDelData);
}
tsdbInfo("vgId:%d, vnode snapshot tsdb read del data for %s, suid:%" PRId64 " uid:%" PRId64 " size:%d",
TD_VID(pTsdb->pVnode), pTsdb->path, pDelIdx->suid, pDelIdx->uid, size);
code = tsdbSnapReadNextTombData(pReader, &pDelInfo);
TSDB_CHECK_CODE(code, lino, _exit);
}
break;
// encode tombstone data
if (taosArrayGetSize(pReader->aDelData) > 0) {
code = tsdbSnapCmprTombData(pReader, ppData);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code), pTsdb->path);
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
return code;
}
int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, STsdbSnapReader** ppReader) {
int32_t code = 0;
int32_t lino = 0;
STsdbSnapReader* pReader = NULL;
int32_t code = 0;
int32_t lino = 0;
// alloc
pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader));
STsdbSnapReader* pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader));
if (pReader == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
......@@ -476,118 +890,80 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type
pReader->ever = ever;
pReader->type = type;
code = taosThreadRwlockRdlock(&pTsdb->rwLock);
if (code) {
code = TAOS_SYSTEM_ERROR(code);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosThreadRwlockRdlock(&pTsdb->rwLock);
code = tsdbFSRef(pTsdb, &pReader->fs);
if (code) {
taosThreadRwlockUnlock(&pTsdb->rwLock);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosThreadRwlockUnlock(&pTsdb->rwLock);
code = taosThreadRwlockUnlock(&pTsdb->rwLock);
if (code) {
code = TAOS_SYSTEM_ERROR(code);
TSDB_CHECK_CODE(code, lino, _exit);
}
// data
// init
pReader->fid = INT32_MIN;
for (int32_t iIter = 0; iIter < sizeof(pReader->aFDataIter) / sizeof(pReader->aFDataIter[0]); iIter++) {
SFDataIter* pIter = &pReader->aFDataIter[iIter];
if (iIter == 0) {
pIter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx));
if (pIter->aBlockIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
} else {
pIter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk));
if (pIter->aSttBlk == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
}
code = tBlockDataCreate(&pIter->bData);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tBlockDataCreate(&pReader->bData);
TSDB_CHECK_CODE(code, lino, _exit);
// del
pReader->aDelIdx = taosArrayInit(0, sizeof(SDelIdx));
if (pReader->aDelIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pReader->aDelData = taosArrayInit(0, sizeof(SDelData));
if (pReader->aDelData == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed at line %d since %s, TSDB path: %s", TD_VID(pTsdb->pVnode), __func__, lino,
tstrerror(code), pTsdb->path);
*ppReader = NULL;
tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode),
__func__, lino, tstrerror(code), sver, ever, type);
if (pReader) {
taosArrayDestroy(pReader->aDelData);
taosArrayDestroy(pReader->aDelIdx);
tBlockDataDestroy(&pReader->bData);
tsdbFSDestroy(&pReader->fs);
tsdbFSUnref(pTsdb, &pReader->fs);
taosMemoryFree(pReader);
pReader = NULL;
}
} else {
*ppReader = pReader;
tsdbInfo("vgId:%d, vnode snapshot tsdb reader opened for %s", TD_VID(pTsdb->pVnode), pTsdb->path);
tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), __func__, sver, ever,
type);
}
*ppReader = pReader;
return code;
}
int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) {
int32_t code = 0;
STsdbSnapReader* pReader = *ppReader;
// data
if (pReader->pDataFReader) tsdbDataFReaderClose(&pReader->pDataFReader);
for (int32_t iIter = 0; iIter < sizeof(pReader->aFDataIter) / sizeof(pReader->aFDataIter[0]); iIter++) {
SFDataIter* pIter = &pReader->aFDataIter[iIter];
int32_t code = 0;
int32_t lino = 0;
if (iIter == 0) {
taosArrayDestroy(pIter->aBlockIdx);
tMapDataClear(&pIter->mBlock);
} else {
taosArrayDestroy(pIter->aSttBlk);
}
STsdbSnapReader* pReader = *ppReader;
STsdb* pTsdb = pReader->pTsdb;
tBlockDataDestroy(&pIter->bData);
// tombstone
if (pReader->pTIter) {
tsdbCloseDataIter2(pReader->pTIter);
pReader->pTIter = NULL;
}
if (pReader->pDelFReader) {
tsdbDelFReaderClose(&pReader->pDelFReader);
}
taosArrayDestroy(pReader->aDelData);
// timeseries
while (pReader->iterList) {
STsdbDataIter2* pIter = pReader->iterList;
pReader->iterList = pIter->next;
tsdbCloseDataIter2(pIter);
}
if (pReader->pDataFReader) {
tsdbDataFReaderClose(&pReader->pDataFReader);
}
tBlockDataDestroy(&pReader->bData);
tDestroyTSchema(pReader->skmTable.pTSchema);
// del
if (pReader->pDelFReader) tsdbDelFReaderClose(&pReader->pDelFReader);
taosArrayDestroy(pReader->aDelIdx);
taosArrayDestroy(pReader->aDelData);
// other
tDestroyTSchema(pReader->skmTable.pTSchema);
tsdbFSUnref(pReader->pTsdb, &pReader->fs);
tsdbInfo("vgId:%d, vnode snapshot tsdb reader closed for %s", TD_VID(pReader->pTsdb->pVnode), pReader->pTsdb->path);
for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(pReader->aBuf[0]); iBuf++) {
tFree(pReader->aBuf[iBuf]);
}
taosMemoryFree(pReader);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
*ppReader = NULL;
return code;
}
......@@ -600,7 +976,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) {
// read data file
if (!pReader->dataDone) {
code = tsdbSnapReadData(pReader, ppData);
code = tsdbSnapReadTimeSeriesData(pReader, ppData);
TSDB_CHECK_CODE(code, lino, _exit);
if (*ppData) {
goto _exit;
......@@ -611,7 +987,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) {
// read del file
if (!pReader->delDone) {
code = tsdbSnapReadDel(pReader, ppData);
code = tsdbSnapReadTombData(pReader, ppData);
TSDB_CHECK_CODE(code, lino, _exit);
if (*ppData) {
goto _exit;
......@@ -622,22 +998,18 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) {
_exit:
if (code) {
tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pReader->pTsdb->pVnode), __func__, tstrerror(code),
pReader->pTsdb->path);
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d, %s done, path:%s", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->pTsdb->path);
tsdbDebug("vgId:%d %s done", TD_VID(pReader->pTsdb->pVnode), __func__);
}
return code;
}
// STsdbSnapWriter ========================================
struct STsdbSnapWriter {
STsdb* pTsdb;
int64_t sver;
int64_t ever;
STsdbFS fs;
// config
STsdb* pTsdb;
int64_t sver;
int64_t ever;
int32_t minutes;
int8_t precision;
int32_t minRow;
......@@ -646,641 +1018,816 @@ struct STsdbSnapWriter {
int64_t commitID;
uint8_t* aBuf[5];
// for data file
SBlockData bData;
int32_t fid;
TABLEID id;
SSkmInfo skmTable;
struct {
SDataFReader* pReader;
SArray* aBlockIdx;
int32_t iBlockIdx;
SBlockIdx* pBlockIdx;
SMapData mDataBlk;
int32_t iDataBlk;
SBlockData bData;
int32_t iRow;
} dReader;
struct {
SDataFWriter* pWriter;
SArray* aBlockIdx;
SMapData mDataBlk;
SArray* aSttBlk;
SBlockData bData;
SBlockData sData;
} dWriter;
// for del file
SDelFReader* pDelFReader;
STsdbFS fs;
TABLEID tbid;
// time-series data
SBlockData inData;
int32_t fid;
SSkmInfo skmTable;
/* reader */
SDataFReader* pDataFReader;
STsdbDataIter2* iterList;
STsdbDataIter2* pDIter;
STsdbDataIter2* pSIter;
SRBTree rbt; // SRBTree<STsdbDataIter2>
/* writer */
SDataFWriter* pDataFWriter;
SArray* aBlockIdx;
SMapData mDataBlk; // SMapData<SDataBlk>
SArray* aSttBlk; // SArray<SSttBlk>
SBlockData bData;
SBlockData sData;
// tombstone data
/* reader */
SDelFReader* pDelFReader;
STsdbDataIter2* pTIter;
/* writer */
SDelFWriter* pDelFWriter;
int32_t iDelIdx;
SArray* aDelIdxR;
SArray* aDelIdx;
SArray* aDelData;
SArray* aDelIdxW;
};
// SNAP_DATA_TSDB
extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg);
extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg);
static int32_t tsdbSnapNextTableData(STsdbSnapWriter* pWriter) {
static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) {
int32_t code = 0;
int32_t lino = 0;
if (pId) {
pWriter->tbid = *pId;
} else {
pWriter->tbid = (TABLEID){INT64_MAX, INT64_MAX};
}
if (pWriter->pDIter) {
STsdbDataIter2* pIter = pWriter->pDIter;
// assert last table data end
ASSERT(pIter->dIter.iRow >= pIter->dIter.bData.nRow);
ASSERT(pIter->dIter.iDataBlk >= pIter->dIter.mDataBlk.nItem);
for (;;) {
if (pIter->dIter.iBlockIdx >= taosArrayGetSize(pIter->dIter.aBlockIdx)) {
pWriter->pDIter = NULL;
break;
}
SBlockIdx* pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx);
int32_t c = tTABLEIDCmprFn(pBlockIdx, &pWriter->tbid);
if (c < 0) {
code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk);
TSDB_CHECK_CODE(code, lino, _exit);
SBlockIdx* pNewBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1);
if (pNewBlockIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pNewBlockIdx->suid = pBlockIdx->suid;
pNewBlockIdx->uid = pBlockIdx->uid;
code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pIter->dIter.mDataBlk, pNewBlockIdx);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->dIter.iBlockIdx++;
} else if (c == 0) {
code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->dIter.iDataBlk = 0;
pIter->dIter.iBlockIdx++;
break;
} else {
pIter->dIter.iDataBlk = pIter->dIter.mDataBlk.nItem;
break;
}
}
}
if (pId) {
code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable);
TSDB_CHECK_CODE(code, lino, _exit);
tMapDataReset(&pWriter->mDataBlk);
ASSERT(pWriter->dReader.iRow >= pWriter->dReader.bData.nRow);
code = tBlockDataInit(&pWriter->bData, pId, pWriter->skmTable.pTSchema, NULL, 0);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (pWriter->dReader.iBlockIdx < taosArrayGetSize(pWriter->dReader.aBlockIdx)) {
pWriter->dReader.pBlockIdx = (SBlockIdx*)taosArrayGet(pWriter->dReader.aBlockIdx, pWriter->dReader.iBlockIdx);
if (!TABLE_SAME_SCHEMA(pWriter->tbid.suid, pWriter->tbid.uid, pWriter->sData.suid, pWriter->sData.uid)) {
if ((pWriter->sData.nRow > 0)) {
code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbReadDataBlk(pWriter->dReader.pReader, pWriter->dReader.pBlockIdx, &pWriter->dReader.mDataBlk);
if (code) goto _exit;
if (pId) {
TABLEID id = {.suid = pWriter->tbid.suid, .uid = pWriter->tbid.suid ? 0 : pWriter->tbid.uid};
code = tBlockDataInit(&pWriter->sData, &id, pWriter->skmTable.pTSchema, NULL, 0);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
pWriter->dReader.iBlockIdx++;
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
pWriter->dReader.pBlockIdx = NULL;
tMapDataReset(&pWriter->dReader.mDataBlk);
tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__,
pWriter->tbid.suid, pWriter->tbid.uid);
}
return code;
}
static int32_t tsdbSnapWriteTableRowImpl(STsdbSnapWriter* pWriter, TSDBROW* pRow) {
int32_t code = 0;
int32_t lino = 0;
code = tBlockDataAppendRow(&pWriter->bData, pRow, pWriter->skmTable.pTSchema, pWriter->tbid.uid);
TSDB_CHECK_CODE(code, lino, _exit);
if (pWriter->bData.nRow >= pWriter->maxRow) {
code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg);
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->dReader.iDataBlk = 0; // point to the next one
tBlockDataReset(&pWriter->dReader.bData);
pWriter->dReader.iRow = 0;
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteCopyData(STsdbSnapWriter* pWriter, TABLEID* pId) {
static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) {
int32_t code = 0;
int32_t lino = 0;
while (true) {
if (pWriter->dReader.pBlockIdx == NULL) break;
if (tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, pId) >= 0) break;
TSDBKEY inKey = pRow ? TSDBROW_KEY(pRow) : TSDBKEY_MAX;
if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow &&
pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) {
goto _write_row;
} else {
for (;;) {
while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) {
TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow);
int32_t c = tsdbKeyCmprFn(&inKey, &TSDBROW_KEY(&row));
if (c < 0) {
goto _write_row;
} else if (c > 0) {
code = tsdbSnapWriteTableRowImpl(pWriter, &row);
TSDB_CHECK_CODE(code, lino, _exit);
pWriter->pDIter->dIter.iRow++;
} else {
ASSERT(0);
}
}
SBlockIdx blkIdx = *pWriter->dReader.pBlockIdx;
code = tsdbWriteDataBlk(pWriter->dWriter.pWriter, &pWriter->dReader.mDataBlk, &blkIdx);
if (code) goto _exit;
for (;;) {
if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) goto _write_row;
if (taosArrayPush(pWriter->dWriter.aBlockIdx, &blkIdx) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
// FIXME: Here can be slow, use array instead
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk);
int32_t c = tDataBlkCmprFn(&dataBlk, &(SDataBlk){.minKey = inKey, .maxKey = inKey});
if (c > 0) {
goto _write_row;
} else if (c < 0) {
if (pWriter->bData.nRow > 0) {
code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg);
TSDB_CHECK_CODE(code, lino, _exit);
}
tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk);
pWriter->pDIter->dIter.iDataBlk++;
} else {
code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData);
TSDB_CHECK_CODE(code, lino, _exit);
pWriter->pDIter->dIter.iRow = 0;
pWriter->pDIter->dIter.iDataBlk++;
break;
}
}
}
}
code = tsdbSnapNextTableData(pWriter);
if (code) goto _exit;
_write_row:
if (pRow) {
code = tsdbSnapWriteTableRowImpl(pWriter, pRow);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) {
static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) {
int32_t code = 0;
int32_t lino = 0;
// write a NULL row to end current table data write
code = tsdbSnapWriteTableRow(pWriter, NULL);
TSDB_CHECK_CODE(code, lino, _exit);
if (pWriter->bData.nRow > 0) {
if (pWriter->bData.nRow < pWriter->minRow) {
ASSERT(TABLE_SAME_SCHEMA(pWriter->sData.suid, pWriter->sData.uid, pWriter->tbid.suid, pWriter->tbid.uid));
for (int32_t iRow = 0; iRow < pWriter->bData.nRow; iRow++) {
code =
tBlockDataAppendRow(&pWriter->sData, &tsdbRowFromBlockData(&pWriter->bData, iRow), NULL, pWriter->tbid.uid);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSnapWriteCopyData(pWriter, pId);
if (code) goto _err;
if (pWriter->sData.nRow >= pWriter->maxRow) {
code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
pWriter->id.suid = pId->suid;
pWriter->id.uid = pId->uid;
tBlockDataClear(&pWriter->bData);
} else {
code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable);
if (code) goto _err;
if (pWriter->mDataBlk.nItem) {
SBlockIdx* pBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1);
if (pBlockIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
tMapDataReset(&pWriter->dWriter.mDataBlk);
code = tBlockDataInit(&pWriter->dWriter.bData, pId, pWriter->skmTable.pTSchema, NULL, 0);
if (code) goto _err;
pBlockIdx->suid = pWriter->tbid.suid;
pBlockIdx->uid = pWriter->tbid.uid;
return code;
code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pWriter->mDataBlk, pBlockIdx);
TSDB_CHECK_CODE(code, lino, _exit);
}
_err:
tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code));
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) {
static int32_t tsdbSnapWriteFileDataStart(STsdbSnapWriter* pWriter, int32_t fid) {
int32_t code = 0;
int32_t lino = 0;
if (pWriter->id.suid == 0 && pWriter->id.uid == 0) return code;
ASSERT(pWriter->pDataFWriter == NULL && pWriter->fid < fid);
int32_t c = 1;
if (pWriter->dReader.pBlockIdx) {
c = tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, &pWriter->id);
ASSERT(c >= 0);
}
STsdb* pTsdb = pWriter->pTsdb;
pWriter->fid = fid;
pWriter->tbid = (TABLEID){0};
SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ);
// open reader
pWriter->pDataFReader = NULL;
pWriter->iterList = NULL;
pWriter->pDIter = NULL;
pWriter->pSIter = NULL;
tRBTreeCreate(&pWriter->rbt, tsdbDataIterCmprFn);
if (pSet) {
code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbOpenDataFileDataIter(pWriter->pDataFReader, &pWriter->pDIter);
TSDB_CHECK_CODE(code, lino, _exit);
if (pWriter->pDIter) {
pWriter->pDIter->next = pWriter->iterList;
pWriter->iterList = pWriter->pDIter;
}
if (c == 0) {
SBlockData* pBData = &pWriter->dWriter.bData;
for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) {
code = tsdbOpenSttFileDataIter(pWriter->pDataFReader, iStt, &pWriter->pSIter);
TSDB_CHECK_CODE(code, lino, _exit);
for (; pWriter->dReader.iRow < pWriter->dReader.bData.nRow; pWriter->dReader.iRow++) {
TSDBROW row = tsdbRowFromBlockData(&pWriter->dReader.bData, pWriter->dReader.iRow);
if (pWriter->pSIter) {
code = tsdbSttFileDataIterNext(pWriter->pSIter, NULL);
TSDB_CHECK_CODE(code, lino, _exit);
code = tBlockDataAppendRow(pBData, &row, NULL, pWriter->id.uid);
if (code) goto _err;
// add to tree
tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn);
if (pBData->nRow >= pWriter->maxRow) {
code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, pBData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg);
if (code) goto _err;
// add to list
pWriter->pSIter->next = pWriter->iterList;
pWriter->iterList = pWriter->pSIter;
}
}
code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, pBData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg);
if (code) goto _err;
pWriter->pSIter = NULL;
}
// open writer
SDiskID diskId;
if (pSet) {
diskId = pSet->diskId;
} else {
tfsAllocDisk(pTsdb->pVnode->pTfs, 0 /*TODO*/, &diskId);
tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, diskId);
}
SDFileSet wSet = {.diskId = diskId,
.fid = fid,
.pHeadF = &(SHeadFile){.commitID = pWriter->commitID},
.pDataF = (pSet) ? pSet->pDataF : &(SDataFile){.commitID = pWriter->commitID},
.pSmaF = (pSet) ? pSet->pSmaF : &(SSmaFile){.commitID = pWriter->commitID},
.nSttF = 1,
.aSttF = {&(SSttFile){.commitID = pWriter->commitID}}};
code = tsdbDataFWriterOpen(&pWriter->pDataFWriter, pTsdb, &wSet);
TSDB_CHECK_CODE(code, lino, _exit);
for (; pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem; pWriter->dReader.iDataBlk++) {
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pWriter->dReader.mDataBlk, pWriter->dReader.iDataBlk, &dataBlk, tGetDataBlk);
if (pWriter->aBlockIdx) {
taosArrayClear(pWriter->aBlockIdx);
} else if ((pWriter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tMapDataPutItem(&pWriter->dWriter.mDataBlk, &dataBlk, tPutDataBlk);
if (code) goto _err;
}
tMapDataReset(&pWriter->mDataBlk);
code = tsdbSnapNextTableData(pWriter);
if (code) goto _err;
if (pWriter->aSttBlk) {
taosArrayClear(pWriter->aSttBlk);
} else if ((pWriter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
if (pWriter->dWriter.mDataBlk.nItem) {
SBlockIdx blockIdx = {.suid = pWriter->id.suid, .uid = pWriter->id.uid};
code = tsdbWriteDataBlk(pWriter->dWriter.pWriter, &pWriter->dWriter.mDataBlk, &blockIdx);
tBlockDataReset(&pWriter->bData);
tBlockDataReset(&pWriter->sData);
if (taosArrayPush(pWriter->dWriter.aBlockIdx, &blockIdx) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s, fid:%d", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code),
fid);
} else {
tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(pTsdb->pVnode), __func__, fid);
}
return code;
}
static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) {
int32_t code = 0;
int32_t lino = 0;
// switch to new table if need
if (pRowInfo == NULL || pRowInfo->uid != pWriter->tbid.uid) {
if (pWriter->tbid.uid) {
code = tsdbSnapWriteTableDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbSnapWriteTableDataStart(pWriter, (TABLEID*)pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->id.suid = 0;
pWriter->id.uid = 0;
if (pRowInfo == NULL) goto _exit;
return code;
code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row);
TSDB_CHECK_CODE(code, lino, _exit);
_err:
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteOpenFile(STsdbSnapWriter* pWriter, int32_t fid) {
static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) {
int32_t code = 0;
STsdb* pTsdb = pWriter->pTsdb;
ASSERT(pWriter->dWriter.pWriter == NULL);
int32_t lino = 0;
pWriter->fid = fid;
pWriter->id = (TABLEID){0};
SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ);
if (pWriter->pSIter) {
code = tsdbDataIterNext2(pWriter->pSIter, NULL);
TSDB_CHECK_CODE(code, lino, _exit);
// Reader
if (pSet) {
code = tsdbDataFReaderOpen(&pWriter->dReader.pReader, pWriter->pTsdb, pSet);
if (code) goto _err;
if (pWriter->pSIter->rowInfo.suid == 0 && pWriter->pSIter->rowInfo.uid == 0) {
pWriter->pSIter = NULL;
} else {
SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt);
if (pNode) {
int32_t c = tsdbDataIterCmprFn(&pWriter->pSIter->rbtn, pNode);
if (c > 0) {
tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn);
pWriter->pSIter = NULL;
} else if (c == 0) {
ASSERT(0);
}
}
}
}
code = tsdbReadBlockIdx(pWriter->dReader.pReader, pWriter->dReader.aBlockIdx);
if (code) goto _err;
} else {
ASSERT(pWriter->dReader.pReader == NULL);
taosArrayClear(pWriter->dReader.aBlockIdx);
}
pWriter->dReader.iBlockIdx = 0; // point to the next one
code = tsdbSnapNextTableData(pWriter);
if (code) goto _err;
// Writer
SHeadFile fHead = {.commitID = pWriter->commitID};
SDataFile fData = {.commitID = pWriter->commitID};
SSmaFile fSma = {.commitID = pWriter->commitID};
SSttFile fStt = {.commitID = pWriter->commitID};
SDFileSet wSet = {.fid = pWriter->fid, .pHeadF = &fHead, .pDataF = &fData, .pSmaF = &fSma};
if (pSet) {
wSet.diskId = pSet->diskId;
fData = *pSet->pDataF;
fSma = *pSet->pSmaF;
for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) {
wSet.aSttF[iStt] = pSet->aSttF[iStt];
if (pWriter->pSIter == NULL) {
SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt);
if (pNode) {
tRBTreeDrop(&pWriter->rbt, pNode);
pWriter->pSIter = TSDB_RBTN_TO_DATA_ITER(pNode);
}
wSet.nSttF = pSet->nSttF + 1; // TODO: fix pSet->nSttF == pTsdb->maxFile
} else {
SDiskID did = {0};
tfsAllocDisk(pTsdb->pVnode->pTfs, 0, &did);
tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did);
wSet.diskId = did;
wSet.nSttF = 1;
}
wSet.aSttF[wSet.nSttF - 1] = &fStt;
code = tsdbDataFWriterOpen(&pWriter->dWriter.pWriter, pWriter->pTsdb, &wSet);
if (code) goto _err;
taosArrayClear(pWriter->dWriter.aBlockIdx);
tMapDataReset(&pWriter->dWriter.mDataBlk);
taosArrayClear(pWriter->dWriter.aSttBlk);
tBlockDataReset(&pWriter->dWriter.bData);
tBlockDataReset(&pWriter->dWriter.sData);
}
return code;
if (ppRowInfo) {
if (pWriter->pSIter) {
*ppRowInfo = &pWriter->pSIter->rowInfo;
} else {
*ppRowInfo = NULL;
}
}
_err:
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteCloseFile(STsdbSnapWriter* pWriter) {
static int32_t tsdbSnapWriteGetRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) {
int32_t code = 0;
int32_t lino = 0;
ASSERT(pWriter->dWriter.pWriter);
code = tsdbSnapWriteTableDataEnd(pWriter);
if (code) goto _err;
// copy remain table data
TABLEID id = {.suid = INT64_MAX, .uid = INT64_MAX};
code = tsdbSnapWriteCopyData(pWriter, &id);
if (code) goto _err;
code =
tsdbWriteSttBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.sData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg);
if (code) goto _err;
// Indices
code = tsdbWriteBlockIdx(pWriter->dWriter.pWriter, pWriter->dWriter.aBlockIdx);
if (code) goto _err;
code = tsdbWriteSttBlk(pWriter->dWriter.pWriter, pWriter->dWriter.aSttBlk);
if (code) goto _err;
code = tsdbUpdateDFileSetHeader(pWriter->dWriter.pWriter);
if (code) goto _err;
code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->dWriter.pWriter->wSet);
if (code) goto _err;
code = tsdbDataFWriterClose(&pWriter->dWriter.pWriter, 1);
if (code) goto _err;
if (pWriter->dReader.pReader) {
code = tsdbDataFReaderClose(&pWriter->dReader.pReader);
if (code) goto _err;
if (pWriter->pSIter) {
*ppRowInfo = &pWriter->pSIter->rowInfo;
goto _exit;
}
_exit:
return code;
code = tsdbSnapWriteNextRow(pWriter, ppRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
_err:
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteToDataFile(STsdbSnapWriter* pWriter, int32_t iRow, int8_t* done) {
static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) {
int32_t code = 0;
int32_t lino = 0;
SBlockData* pBData = &pWriter->bData;
TABLEID id = {.suid = pBData->suid, .uid = pBData->uid ? pBData->uid : pBData->aUid[iRow]};
TSDBROW row = tsdbRowFromBlockData(pBData, iRow);
TSDBKEY key = TSDBROW_KEY(&row);
ASSERT(pWriter->pDataFWriter);
*done = 0;
while (pWriter->dReader.iRow < pWriter->dReader.bData.nRow ||
pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem) {
// Merge row by row
for (; pWriter->dReader.iRow < pWriter->dReader.bData.nRow; pWriter->dReader.iRow++) {
TSDBROW trow = tsdbRowFromBlockData(&pWriter->dReader.bData, pWriter->dReader.iRow);
TSDBKEY tKey = TSDBROW_KEY(&trow);
// consume remain data and end with a NULL table row
SRowInfo* pRowInfo;
code = tsdbSnapWriteGetRow(pWriter, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
for (;;) {
code = tsdbSnapWriteTableData(pWriter, pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
ASSERT(pWriter->dReader.bData.suid == id.suid && pWriter->dReader.bData.uid == id.uid);
if (pRowInfo == NULL) break;
int32_t c = tsdbKeyCmprFn(&key, &tKey);
if (c < 0) {
code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid);
if (code) goto _err;
} else if (c > 0) {
code = tBlockDataAppendRow(&pWriter->dWriter.bData, &trow, NULL, id.uid);
if (code) goto _err;
} else {
ASSERT(0);
}
code = tsdbSnapWriteNextRow(pWriter, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) {
code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk,
pWriter->cmprAlg);
if (code) goto _err;
}
// do file-level updates
code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aSttBlk);
TSDB_CHECK_CODE(code, lino, _exit);
if (c < 0) {
*done = 1;
goto _exit;
}
}
code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdx);
TSDB_CHECK_CODE(code, lino, _exit);
// Merge row by block
SDataBlk tDataBlk = {.minKey = key, .maxKey = key};
for (; pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem; pWriter->dReader.iDataBlk++) {
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pWriter->dReader.mDataBlk, pWriter->dReader.iDataBlk, &dataBlk, tGetDataBlk);
code = tsdbUpdateDFileSetHeader(pWriter->pDataFWriter);
TSDB_CHECK_CODE(code, lino, _exit);
int32_t c = tDataBlkCmprFn(&dataBlk, &tDataBlk);
if (c < 0) {
code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk,
pWriter->cmprAlg);
if (code) goto _err;
code = tMapDataPutItem(&pWriter->dWriter.mDataBlk, &dataBlk, tPutDataBlk);
if (code) goto _err;
} else if (c > 0) {
code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid);
if (code) goto _err;
if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) {
code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk,
pWriter->cmprAlg);
if (code) goto _err;
}
code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet);
TSDB_CHECK_CODE(code, lino, _exit);
*done = 1;
goto _exit;
} else {
code = tsdbReadDataBlockEx(pWriter->dReader.pReader, &dataBlk, &pWriter->dReader.bData);
if (code) goto _err;
pWriter->dReader.iRow = 0;
code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1);
TSDB_CHECK_CODE(code, lino, _exit);
pWriter->dReader.iDataBlk++;
break;
}
}
if (pWriter->pDataFReader) {
code = tsdbDataFReaderClose(&pWriter->pDataFReader);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
return code;
// clear sources
while (pWriter->iterList) {
STsdbDataIter2* pIter = pWriter->iterList;
pWriter->iterList = pIter->next;
tsdbCloseDataIter2(pIter);
}
_err:
tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code));
_exit:
if (code) {
tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s is done", TD_VID(pWriter->pTsdb->pVnode), __func__);
}
return code;
}
static int32_t tsdbSnapWriteToSttFile(STsdbSnapWriter* pWriter, int32_t iRow) {
static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) {
int32_t code = 0;
int32_t lino = 0;
TABLEID id = {.suid = pWriter->bData.suid,
.uid = pWriter->bData.uid ? pWriter->bData.uid : pWriter->bData.aUid[iRow]};
TSDBROW row = tsdbRowFromBlockData(&pWriter->bData, iRow);
SBlockData* pBData = &pWriter->dWriter.sData;
code = tDecmprBlockData(pHdr->data, pHdr->size, &pWriter->inData, pWriter->aBuf);
TSDB_CHECK_CODE(code, lino, _exit);
if (pBData->suid || pBData->uid) {
if (!TABLE_SAME_SCHEMA(pBData->suid, pBData->uid, id.suid, id.uid)) {
code = tsdbWriteSttBlock(pWriter->dWriter.pWriter, pBData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg);
if (code) goto _err;
ASSERT(pWriter->inData.nRow > 0);
pBData->suid = 0;
pBData->uid = 0;
// switch to new data file if need
int32_t fid = tsdbKeyFid(pWriter->inData.aTSKEY[0], pWriter->minutes, pWriter->precision);
if (pWriter->fid != fid) {
if (pWriter->pDataFWriter) {
code = tsdbSnapWriteFileDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
if (pBData->suid == 0 && pBData->uid == 0) {
code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pWriter->id.suid, pWriter->id.uid, &pWriter->skmTable);
if (code) goto _err;
TABLEID tid = {.suid = pWriter->id.suid, .uid = pWriter->id.suid ? 0 : pWriter->id.uid};
code = tBlockDataInit(pBData, &tid, pWriter->skmTable.pTSchema, NULL, 0);
if (code) goto _err;
code = tsdbSnapWriteFileDataStart(pWriter, fid);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tBlockDataAppendRow(pBData, &row, NULL, id.uid);
if (code) goto _err;
// loop write each row
SRowInfo* pRowInfo;
code = tsdbSnapWriteGetRow(pWriter, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
for (int32_t iRow = 0; iRow < pWriter->inData.nRow; ++iRow) {
SRowInfo rInfo = {.suid = pWriter->inData.suid,
.uid = pWriter->inData.uid ? pWriter->inData.uid : pWriter->inData.aUid[iRow],
.row = tsdbRowFromBlockData(&pWriter->inData, iRow)};
if (pBData->nRow >= pWriter->maxRow) {
code = tsdbWriteSttBlock(pWriter->dWriter.pWriter, pBData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg);
if (code) goto _err;
for (;;) {
if (pRowInfo == NULL) {
code = tsdbSnapWriteTableData(pWriter, &rInfo);
TSDB_CHECK_CODE(code, lino, _exit);
break;
} else {
int32_t c = tRowInfoCmprFn(&rInfo, pRowInfo);
if (c < 0) {
code = tsdbSnapWriteTableData(pWriter, &rInfo);
TSDB_CHECK_CODE(code, lino, _exit);
break;
} else if (c > 0) {
code = tsdbSnapWriteTableData(pWriter, pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSnapWriteNextRow(pWriter, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
ASSERT(0);
}
}
}
}
_exit:
return code;
_err:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64 " nRow:%d", TD_VID(pWriter->pTsdb->pVnode), __func__,
pWriter->inData.suid, pWriter->inData.uid, pWriter->inData.nRow);
}
return code;
}
static int32_t tsdbSnapWriteRowData(STsdbSnapWriter* pWriter, int32_t iRow) {
// SNAP_DATA_DEL
static int32_t tsdbSnapWriteDelTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) {
int32_t code = 0;
int32_t lino = 0;
SBlockData* pBlockData = &pWriter->bData;
TABLEID id = {.suid = pBlockData->suid, .uid = pBlockData->uid ? pBlockData->uid : pBlockData->aUid[iRow]};
// End last table data write if need
if (tTABLEIDCmprFn(&pWriter->id, &id) != 0) {
code = tsdbSnapWriteTableDataEnd(pWriter);
if (code) goto _err;
}
// Start new table data write if need
if (pWriter->id.suid == 0 && pWriter->id.uid == 0) {
code = tsdbSnapWriteTableDataStart(pWriter, &id);
if (code) goto _err;
}
// Merge with .data file data
int8_t done = 0;
if (pWriter->dReader.pBlockIdx && tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, &id) == 0) {
code = tsdbSnapWriteToDataFile(pWriter, iRow, &done);
if (code) goto _err;
}
// Append to the .stt data block (todo: check if need to set/reload sst block)
if (!done) {
code = tsdbSnapWriteToSttFile(pWriter, iRow);
if (code) goto _err;
if (pId) {
pWriter->tbid = *pId;
} else {
pWriter->tbid = (TABLEID){.suid = INT64_MAX, .uid = INT64_MAX};
}
_exit:
return code;
taosArrayClear(pWriter->aDelData);
_err:
tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code));
return code;
}
if (pWriter->pTIter) {
while (pWriter->pTIter->tIter.iDelIdx < taosArrayGetSize(pWriter->pTIter->tIter.aDelIdx)) {
SDelIdx* pDelIdx = taosArrayGet(pWriter->pTIter->tIter.aDelIdx, pWriter->pTIter->tIter.iDelIdx);
static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) {
int32_t code = 0;
STsdb* pTsdb = pWriter->pTsdb;
SBlockData* pBlockData = &pWriter->bData;
int32_t c = tTABLEIDCmprFn(pDelIdx, &pWriter->tbid);
if (c < 0) {
code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->pTIter->tIter.aDelData);
TSDB_CHECK_CODE(code, lino, _exit);
// Decode data
SSnapDataHdr* pHdr = (SSnapDataHdr*)pData;
code = tDecmprBlockData(pHdr->data, pHdr->size, pBlockData, pWriter->aBuf);
if (code) goto _err;
SDelIdx* pDelIdxNew = taosArrayReserve(pWriter->aDelIdx, 1);
if (pDelIdxNew == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
ASSERT(pBlockData->nRow > 0);
pDelIdxNew->suid = pDelIdx->suid;
pDelIdxNew->uid = pDelIdx->uid;
// Loop to handle each row
for (int32_t iRow = 0; iRow < pBlockData->nRow; iRow++) {
TSKEY ts = pBlockData->aTSKEY[iRow];
int32_t fid = tsdbKeyFid(ts, pWriter->minutes, pWriter->precision);
code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->pTIter->tIter.aDelData, pDelIdxNew);
TSDB_CHECK_CODE(code, lino, _exit);
if (pWriter->dWriter.pWriter == NULL || pWriter->fid != fid) {
if (pWriter->dWriter.pWriter) {
// ASSERT(fid > pWriter->fid);
pWriter->pTIter->tIter.iDelIdx++;
} else if (c == 0) {
code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSnapWriteCloseFile(pWriter);
if (code) goto _err;
pWriter->pTIter->tIter.iDelIdx++;
break;
} else {
break;
}
code = tsdbSnapWriteOpenFile(pWriter, fid);
if (code) goto _err;
}
code = tsdbSnapWriteRowData(pWriter, iRow);
if (code) goto _err;
}
return code;
_err:
tsdbError("vgId:%d, vnode snapshot tsdb write data for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path,
tstrerror(code));
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, pId->suid,
pId->uid);
}
return code;
}
// SNAP_DATA_DEL
static int32_t tsdbSnapMoveWriteDelData(STsdbSnapWriter* pWriter, TABLEID* pId) {
static int32_t tsdbSnapWriteDelTableDataEnd(STsdbSnapWriter* pWriter) {
int32_t code = 0;
int32_t lino = 0;
while (true) {
if (pWriter->iDelIdx >= taosArrayGetSize(pWriter->aDelIdxR)) break;
SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx);
if (tTABLEIDCmprFn(pDelIdx, pId) >= 0) break;
code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData);
if (code) goto _exit;
SDelIdx delIdx = *pDelIdx;
code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx);
if (code) goto _exit;
if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) {
if (taosArrayGetSize(pWriter->aDelData) > 0) {
SDelIdx* pDelIdx = taosArrayReserve(pWriter->aDelIdx, 1);
if (pDelIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->iDelIdx++;
pDelIdx->suid = pWriter->tbid.suid;
pDelIdx->uid = pWriter->tbid.uid;
code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, pDelIdx);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbTrace("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__);
}
return code;
}
static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) {
static int32_t tsdbSnapWriteDelTableData(STsdbSnapWriter* pWriter, TABLEID* pId, uint8_t* pData, int64_t size) {
int32_t code = 0;
STsdb* pTsdb = pWriter->pTsdb;
// Open del file if not opened yet
if (pWriter->pDelFWriter == NULL) {
SDelFile* pDelFile = pWriter->fs.pDelFile;
// reader
if (pDelFile) {
code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb);
if (code) goto _err;
int32_t lino = 0;
code = tsdbReadDelIdx(pWriter->pDelFReader, pWriter->aDelIdxR);
if (code) goto _err;
} else {
taosArrayClear(pWriter->aDelIdxR);
if (pId == NULL || pId->uid != pWriter->tbid.uid) {
if (pWriter->tbid.uid) {
code = tsdbSnapWriteDelTableDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->iDelIdx = 0;
// writer
SDelFile delFile = {.commitID = pWriter->commitID};
code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &delFile, pTsdb);
if (code) goto _err;
taosArrayClear(pWriter->aDelIdxW);
code = tsdbSnapWriteDelTableDataStart(pWriter, pId);
TSDB_CHECK_CODE(code, lino, _exit);
}
SSnapDataHdr* pHdr = (SSnapDataHdr*)pData;
TABLEID id = *(TABLEID*)pHdr->data;
if (pId == NULL) goto _exit;
ASSERT(pHdr->size + sizeof(SSnapDataHdr) == nData);
int64_t n = 0;
while (n < size) {
SDelData delData;
n += tGetDelData(pData + n, &delData);
// Move write data < id
code = tsdbSnapMoveWriteDelData(pWriter, &id);
if (code) goto _err;
if (taosArrayPush(pWriter->aDelData, &delData) < 0) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
}
ASSERT(n == size);
// Merge incoming data with current
if (pWriter->iDelIdx < taosArrayGetSize(pWriter->aDelIdxR) &&
tTABLEIDCmprFn(taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx), &id) == 0) {
SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData);
if (code) goto _err;
static int32_t tsdbSnapWriteDelDataStart(STsdbSnapWriter* pWriter) {
int32_t code = 0;
int32_t lino = 0;
pWriter->iDelIdx++;
} else {
taosArrayClear(pWriter->aDelData);
}
STsdb* pTsdb = pWriter->pTsdb;
SDelFile* pDelFile = pWriter->fs.pDelFile;
int64_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID);
while (n < nData) {
SDelData delData;
pWriter->tbid = (TABLEID){0};
n += tGetDelData(pData + n, &delData);
// reader
if (pDelFile) {
code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb);
TSDB_CHECK_CODE(code, lino, _exit);
if (taosArrayPush(pWriter->aDelData, &delData) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
code = tsdbOpenTombFileDataIter(pWriter->pDelFReader, &pWriter->pTIter);
TSDB_CHECK_CODE(code, lino, _exit);
}
SDelIdx delIdx = {.suid = id.suid, .uid = id.uid};
code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx);
if (code) goto _err;
// writer
code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &(SDelFile){.commitID = pWriter->commitID}, pTsdb);
TSDB_CHECK_CODE(code, lino, _exit);
if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) {
if ((pWriter->aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
}
if ((pWriter->aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
return code;
_err:
tsdbError("vgId:%d, vnode snapshot tsdb write del for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path,
tstrerror(code));
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
return code;
}
static int32_t tsdbSnapWriteDelEnd(STsdbSnapWriter* pWriter) {
static int32_t tsdbSnapWriteDelDataEnd(STsdbSnapWriter* pWriter) {
int32_t code = 0;
STsdb* pTsdb = pWriter->pTsdb;
int32_t lino = 0;
if (pWriter->pDelFWriter == NULL) return code;
STsdb* pTsdb = pWriter->pTsdb;
TABLEID id = {.suid = INT64_MAX, .uid = INT64_MAX};
code = tsdbSnapMoveWriteDelData(pWriter, &id);
if (code) goto _err;
// end remaining table with NULL data
code = tsdbSnapWriteDelTableData(pWriter, NULL, NULL, 0);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdxW);
if (code) goto _err;
// update file-level info
code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdx);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter);
if (code) goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel);
if (code) goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1);
if (code) goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
if (pWriter->pDelFReader) {
code = tsdbDelFReaderClose(&pWriter->pDelFReader);
if (code) goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
}
if (pWriter->pTIter) {
tsdbCloseDataIter2(pWriter->pTIter);
pWriter->pTIter = NULL;
}
tsdbInfo("vgId:%d, vnode snapshot tsdb write del for %s end", TD_VID(pTsdb->pVnode), pTsdb->path);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
return code;
}
static int32_t tsdbSnapWriteDelData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) {
int32_t code = 0;
int32_t lino = 0;
STsdb* pTsdb = pWriter->pTsdb;
_err:
tsdbError("vgId:%d, vnode snapshot tsdb write del end for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path,
tstrerror(code));
// start to write del data if need
if (pWriter->pDelFWriter == NULL) {
code = tsdbSnapWriteDelDataStart(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
// do write del data
code = tsdbSnapWriteDelTableData(pWriter, (TABLEID*)pHdr->data, pHdr->data + sizeof(TABLEID),
pHdr->size - sizeof(TABLEID));
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d %s failed since %s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code));
} else {
tsdbTrace("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
return code;
}
// APIs
int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter) {
int32_t code = 0;
int32_t lino = 0;
STsdbSnapWriter* pWriter = NULL;
int32_t code = 0;
int32_t lino = 0;
// alloc
pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter));
STsdbSnapWriter* pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter));
if (pWriter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
......@@ -1288,11 +1835,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr
pWriter->pTsdb = pTsdb;
pWriter->sver = sver;
pWriter->ever = ever;
code = tsdbFSCopy(pTsdb, &pWriter->fs);
TSDB_CHECK_CODE(code, lino, _exit);
// config
pWriter->minutes = pTsdb->keepCfg.days;
pWriter->precision = pTsdb->keepCfg.precision;
pWriter->minRow = pTsdb->pVnode->config.tsdbCfg.minRows;
......@@ -1300,102 +1842,70 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr
pWriter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression;
pWriter->commitID = pTsdb->pVnode->state.commitID;
code = tsdbFSCopy(pTsdb, &pWriter->fs);
TSDB_CHECK_CODE(code, lino, _exit);
// SNAP_DATA_TSDB
code = tBlockDataCreate(&pWriter->bData);
code = tBlockDataCreate(&pWriter->inData);
TSDB_CHECK_CODE(code, lino, _exit);
pWriter->fid = INT32_MIN;
pWriter->id = (TABLEID){0};
// Reader
pWriter->dReader.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx));
if (pWriter->dReader.aBlockIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tBlockDataCreate(&pWriter->dReader.bData);
TSDB_CHECK_CODE(code, lino, _exit);
// Writer
pWriter->dWriter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx));
if (pWriter->dWriter.aBlockIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->dWriter.aSttBlk = taosArrayInit(0, sizeof(SSttBlk));
if (pWriter->dWriter.aSttBlk == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tBlockDataCreate(&pWriter->dWriter.bData);
code = tBlockDataCreate(&pWriter->bData);
TSDB_CHECK_CODE(code, lino, _exit);
code = tBlockDataCreate(&pWriter->dWriter.sData);
code = tBlockDataCreate(&pWriter->sData);
TSDB_CHECK_CODE(code, lino, _exit);
// SNAP_DATA_DEL
pWriter->aDelIdxR = taosArrayInit(0, sizeof(SDelIdx));
if (pWriter->aDelIdxR == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->aDelData = taosArrayInit(0, sizeof(SDelData));
if (pWriter->aDelData == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->aDelIdxW = taosArrayInit(0, sizeof(SDelIdx));
if (pWriter->aDelIdxW == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
*ppWriter = NULL;
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
if (pWriter) {
if (pWriter->aDelIdxW) taosArrayDestroy(pWriter->aDelIdxW);
if (pWriter->aDelData) taosArrayDestroy(pWriter->aDelData);
if (pWriter->aDelIdxR) taosArrayDestroy(pWriter->aDelIdxR);
tBlockDataDestroy(&pWriter->dWriter.sData);
tBlockDataDestroy(&pWriter->dWriter.bData);
if (pWriter->dWriter.aSttBlk) taosArrayDestroy(pWriter->dWriter.aSttBlk);
if (pWriter->dWriter.aBlockIdx) taosArrayDestroy(pWriter->dWriter.aBlockIdx);
tBlockDataDestroy(&pWriter->dReader.bData);
if (pWriter->dReader.aBlockIdx) taosArrayDestroy(pWriter->dReader.aBlockIdx);
tBlockDataDestroy(&pWriter->sData);
tBlockDataDestroy(&pWriter->bData);
tBlockDataDestroy(&pWriter->inData);
tsdbFSDestroy(&pWriter->fs);
taosMemoryFree(pWriter);
pWriter = NULL;
}
} else {
tsdbInfo("vgId:%d, %s done", TD_VID(pTsdb->pVnode), __func__);
*ppWriter = pWriter;
tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64, TD_VID(pTsdb->pVnode), __func__, sver, ever);
}
*ppWriter = pWriter;
return code;
}
int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter) {
int32_t code = 0;
if (pWriter->dWriter.pWriter) {
code = tsdbSnapWriteCloseFile(pWriter);
if (code) goto _exit;
int32_t lino = 0;
if (pWriter->pDataFWriter) {
code = tsdbSnapWriteFileDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbSnapWriteDelEnd(pWriter);
if (code) goto _exit;
if (pWriter->pDelFWriter) {
code = tsdbSnapWriteDelDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbFSPrepareCommit(pWriter->pTsdb, &pWriter->fs);
if (code) goto _exit;
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code));
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__);
}
return code;
}
int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) {
int32_t code = 0;
int32_t code = 0;
int32_t lino = 0;
STsdbSnapWriter* pWriter = *ppWriter;
STsdb* pTsdb = pWriter->pTsdb;
......@@ -1408,7 +1918,7 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) {
code = tsdbFSCommit(pWriter->pTsdb);
if (code) {
taosThreadRwlockUnlock(&pTsdb->rwLock);
goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
}
// unlock
......@@ -1416,72 +1926,60 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) {
}
// SNAP_DATA_DEL
taosArrayDestroy(pWriter->aDelIdxW);
taosArrayDestroy(pWriter->aDelData);
taosArrayDestroy(pWriter->aDelIdxR);
taosArrayDestroy(pWriter->aDelIdx);
// SNAP_DATA_TSDB
// Writer
tBlockDataDestroy(&pWriter->dWriter.sData);
tBlockDataDestroy(&pWriter->dWriter.bData);
taosArrayDestroy(pWriter->dWriter.aSttBlk);
tMapDataClear(&pWriter->dWriter.mDataBlk);
taosArrayDestroy(pWriter->dWriter.aBlockIdx);
// Reader
tBlockDataDestroy(&pWriter->dReader.bData);
tMapDataClear(&pWriter->dReader.mDataBlk);
taosArrayDestroy(pWriter->dReader.aBlockIdx);
tBlockDataDestroy(&pWriter->sData);
tBlockDataDestroy(&pWriter->bData);
taosArrayDestroy(pWriter->aSttBlk);
tMapDataClear(&pWriter->mDataBlk);
taosArrayDestroy(pWriter->aBlockIdx);
tDestroyTSchema(pWriter->skmTable.pTSchema);
tBlockDataDestroy(&pWriter->inData);
for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t*); iBuf++) {
tFree(pWriter->aBuf[iBuf]);
}
tsdbInfo("vgId:%d, %s done", TD_VID(pWriter->pTsdb->pVnode), __func__);
tsdbFSDestroy(&pWriter->fs);
taosMemoryFree(pWriter);
*ppWriter = NULL;
return code;
_err:
tsdbError("vgId:%d, vnode snapshot tsdb writer close for %s failed since %s", TD_VID(pWriter->pTsdb->pVnode),
pWriter->pTsdb->path, tstrerror(code));
taosMemoryFree(pWriter);
*ppWriter = NULL;
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
return code;
}
int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) {
int32_t code = 0;
SSnapDataHdr* pHdr = (SSnapDataHdr*)pData;
int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) {
int32_t code = 0;
int32_t lino = 0;
// ts data
if (pHdr->type == SNAP_DATA_TSDB) {
code = tsdbSnapWriteData(pWriter, pData, nData);
if (code) goto _err;
code = tsdbSnapWriteTimeSeriesData(pWriter, pHdr);
TSDB_CHECK_CODE(code, lino, _exit);
goto _exit;
} else {
if (pWriter->dWriter.pWriter) {
code = tsdbSnapWriteCloseFile(pWriter);
if (code) goto _err;
}
} else if (pWriter->pDataFWriter) {
code = tsdbSnapWriteFileDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
// del data
if (pHdr->type == SNAP_DATA_DEL) {
code = tsdbSnapWriteDel(pWriter, pData, nData);
if (code) goto _err;
code = tsdbSnapWriteDelData(pWriter, pHdr);
TSDB_CHECK_CODE(code, lino, _exit);
goto _exit;
}
_exit:
tsdbDebug("vgId:%d, tsdb snapshot write for %s succeed", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path);
return code;
_err:
tsdbError("vgId:%d, tsdb snapshot write for %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path,
tstrerror(code));
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s, type:%d index:%" PRId64 " size:%" PRId64,
TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code), pHdr->type, pHdr->index, pHdr->size);
} else {
tsdbDebug("vgId:%d %s done, type:%d index:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__,
pHdr->type, pHdr->index, pHdr->size);
}
return code;
}
......@@ -758,7 +758,7 @@ int32_t tsdbRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema)
pTColVal->value.nData = pColVal->value.nData;
if (pTColVal->value.nData) {
memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData);
memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData);
}
pTColVal->flag = 0;
} else {
......@@ -1133,6 +1133,7 @@ _exit:
void tBlockDataReset(SBlockData *pBlockData) {
pBlockData->suid = 0;
pBlockData->uid = 0;
pBlockData->nRow = 0;
}
void tBlockDataClear(SBlockData *pBlockData) {
......
......@@ -455,7 +455,7 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) {
if (code) goto _err;
}
code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pData, nData);
code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pHdr);
if (code) goto _err;
} break;
case SNAP_DATA_TQ_HANDLE: {
......
......@@ -89,45 +89,6 @@
// /\ UNCHANGED <<candidateVars, leaderVars>>
//
int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) {
ASSERT(false && "deprecated");
if (ths->state != TAOS_SYNC_STATE_FOLLOWER) {
sNTrace(ths, "can not do follower commit");
return -1;
}
// maybe update commit index, leader notice me
if (newCommitIndex > ths->commitIndex) {
// has commit entry in local
if (newCommitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) {
// advance commit index to sanpshot first
SSnapshot snapshot;
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot);
if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) {
SyncIndex commitBegin = ths->commitIndex;
SyncIndex commitEnd = snapshot.lastApplyIndex;
ths->commitIndex = snapshot.lastApplyIndex;
sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd);
}
SyncIndex beginIndex = ths->commitIndex + 1;
SyncIndex endIndex = newCommitIndex;
// update commit index
ths->commitIndex = newCommitIndex;
// call back Wal
int32_t code = ths->pLogStore->syncLogUpdateCommitIndex(ths->pLogStore, ths->commitIndex);
ASSERT(code == 0);
code = syncNodeDoCommit(ths, beginIndex, endIndex, ths->state);
ASSERT(code == 0);
}
}
return 0;
}
SSyncRaftEntry* syncBuildRaftEntryFromAppendEntries(const SyncAppendEntries* pMsg) {
SSyncRaftEntry* pEntry = taosMemoryMalloc(pMsg->dataLen);
if (pEntry == NULL) {
......@@ -232,256 +193,3 @@ _IGNORE:
rpcFreeCont(rpcRsp.pCont);
return 0;
}
int32_t syncNodeOnAppendEntriesOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
SyncAppendEntries* pMsg = pRpcMsg->pCont;
SRpcMsg rpcRsp = {0};
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) {
syncLogRecvAppendEntries(ths, pMsg, "not in my config");
goto _IGNORE;
}
// prepare response msg
int32_t code = syncBuildAppendEntriesReply(&rpcRsp, ths->vgId);
if (code != 0) {
syncLogRecvAppendEntries(ths, pMsg, "build rsp error");
goto _IGNORE;
}
SyncAppendEntriesReply* pReply = rpcRsp.pCont;
pReply->srcId = ths->myRaftId;
pReply->destId = pMsg->srcId;
pReply->term = ths->raftStore.currentTerm;
pReply->success = false;
// pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
pReply->matchIndex = SYNC_INDEX_INVALID;
pReply->lastSendIndex = pMsg->prevLogIndex + 1;
pReply->startTime = ths->startTime;
if (pMsg->term < ths->raftStore.currentTerm) {
syncLogRecvAppendEntries(ths, pMsg, "reject, small term");
goto _SEND_RESPONSE;
}
if (pMsg->term > ths->raftStore.currentTerm) {
pReply->term = pMsg->term;
}
syncNodeStepDown(ths, pMsg->term);
syncNodeResetElectTimer(ths);
SyncIndex startIndex = ths->pLogStore->syncLogBeginIndex(ths->pLogStore);
SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
if (pMsg->prevLogIndex > lastIndex) {
syncLogRecvAppendEntries(ths, pMsg, "reject, index not match");
goto _SEND_RESPONSE;
}
if (pMsg->prevLogIndex >= startIndex) {
SyncTerm myPreLogTerm = syncNodeGetPreTerm(ths, pMsg->prevLogIndex + 1);
// ASSERT(myPreLogTerm != SYNC_TERM_INVALID);
if (myPreLogTerm == SYNC_TERM_INVALID) {
syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term invalid");
goto _SEND_RESPONSE;
}
if (myPreLogTerm != pMsg->prevLogTerm) {
syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term not match");
goto _SEND_RESPONSE;
}
}
// accept
pReply->success = true;
bool hasAppendEntries = pMsg->dataLen > 0;
if (hasAppendEntries) {
SSyncRaftEntry* pAppendEntry = syncEntryBuildFromAppendEntries(pMsg);
ASSERT(pAppendEntry != NULL);
SyncIndex appendIndex = pMsg->prevLogIndex + 1;
LRUHandle* hLocal = NULL;
LRUHandle* hAppend = NULL;
int32_t code = 0;
SSyncRaftEntry* pLocalEntry = NULL;
SLRUCache* pCache = ths->pLogStore->pCache;
hLocal = taosLRUCacheLookup(pCache, &appendIndex, sizeof(appendIndex));
if (hLocal) {
pLocalEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, hLocal);
code = 0;
ths->pLogStore->cacheHit++;
sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", appendIndex, pLocalEntry->bytes, pLocalEntry);
} else {
ths->pLogStore->cacheMiss++;
sNTrace(ths, "miss cache index:%" PRId64, appendIndex);
code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, appendIndex, &pLocalEntry);
}
if (code == 0) {
// get local entry success
if (pLocalEntry->term == pAppendEntry->term) {
// do nothing
sNTrace(ths, "log match, do nothing, index:%" PRId64, appendIndex);
} else {
// truncate
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, truncate error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
if (hLocal) {
taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
} else {
syncEntryDestroy(pLocalEntry);
}
if (hAppend) {
taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
} else {
syncEntryDestroy(pAppendEntry);
}
goto _IGNORE;
}
ASSERT(pAppendEntry->index == appendIndex);
// append
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry, false);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, append error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
if (hLocal) {
taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
} else {
syncEntryDestroy(pLocalEntry);
}
if (hAppend) {
taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
} else {
syncEntryDestroy(pAppendEntry);
}
goto _IGNORE;
}
syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend);
}
} else {
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
// log not exist
// truncate
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, truncate error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
syncEntryDestroy(pLocalEntry);
syncEntryDestroy(pAppendEntry);
goto _IGNORE;
}
// append
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry, false);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, append error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
if (hLocal) {
taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
} else {
syncEntryDestroy(pLocalEntry);
}
if (hAppend) {
taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
} else {
syncEntryDestroy(pAppendEntry);
}
goto _IGNORE;
}
syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend);
} else {
// get local entry success
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, get local entry error, append-index:%" PRId64 " err:%d", appendIndex,
terrno);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
if (hLocal) {
taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
} else {
syncEntryDestroy(pLocalEntry);
}
if (hAppend) {
taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
} else {
syncEntryDestroy(pAppendEntry);
}
goto _IGNORE;
}
}
// update match index
pReply->matchIndex = pAppendEntry->index;
if (hLocal) {
taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
} else {
syncEntryDestroy(pLocalEntry);
}
if (hAppend) {
taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
} else {
syncEntryDestroy(pAppendEntry);
}
} else {
// no append entries, do nothing
// maybe has extra entries, no harm
// update match index
pReply->matchIndex = pMsg->prevLogIndex;
}
// maybe update commit index, leader notice me
syncNodeFollowerCommit(ths, pMsg->commitIndex);
syncLogRecvAppendEntries(ths, pMsg, "accept");
goto _SEND_RESPONSE;
_IGNORE:
rpcFreeCont(rpcRsp.pCont);
return 0;
_SEND_RESPONSE:
// msg event log
syncLogSendAppendEntriesReply(ths, pReply, "");
// send response
syncNodeSendMsgById(&pReply->destId, ths, &rpcRsp);
return 0;
}
......@@ -89,63 +89,3 @@ int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
}
return 0;
}
int32_t syncNodeOnAppendEntriesReplyOld(SSyncNode* ths, SyncAppendEntriesReply* pMsg) {
int32_t ret = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) {
syncLogRecvAppendEntriesReply(ths, pMsg, "not in my config");
return 0;
}
// drop stale response
if (pMsg->term < ths->raftStore.currentTerm) {
syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response");
return 0;
}
if (ths->state == TAOS_SYNC_STATE_LEADER) {
if (pMsg->term > ths->raftStore.currentTerm) {
syncLogRecvAppendEntriesReply(ths, pMsg, "error term");
syncNodeStepDown(ths, pMsg->term);
return -1;
}
ASSERT(pMsg->term == ths->raftStore.currentTerm);
if (pMsg->success) {
SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
if (pMsg->matchIndex > oldMatchIndex) {
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex);
syncMaybeAdvanceCommitIndex(ths);
// maybe update minMatchIndex
ths->minMatchIndex = syncMinMatchIndex(ths);
}
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1);
} else {
SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
if (nextIndex > SYNC_INDEX_BEGIN) {
--nextIndex;
}
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex);
}
// send next append entries
SPeerState* pState = syncNodeGetPeerState(ths, &(pMsg->srcId));
ASSERT(pState != NULL);
if (pMsg->lastSendIndex == pState->lastSendIndex) {
int64_t timeNow = taosGetTimestampMs();
int64_t elapsed = timeNow - pState->lastSendTime;
sNTrace(ths, "sync-append-entries rtt elapsed:%" PRId64 ", index:%" PRId64, elapsed, pState->lastSendIndex);
syncNodeReplicateOne(ths, &(pMsg->srcId), true);
}
}
syncLogRecvAppendEntriesReply(ths, pMsg, "process");
return 0;
}
......@@ -43,148 +43,6 @@
// IN commitIndex' = [commitIndex EXCEPT ![i] = newCommitIndex]
// /\ UNCHANGED <<messages, serverVars, candidateVars, leaderVars, log>>
//
void syncOneReplicaAdvance(SSyncNode* pSyncNode) {
ASSERT(false && "deprecated");
if (pSyncNode == NULL) {
sError("pSyncNode is NULL");
return;
}
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
sNError(pSyncNode, "not leader, can not advance commit index");
return;
}
if (pSyncNode->replicaNum != 1) {
sNError(pSyncNode, "not one replica, can not advance commit index");
return;
}
// advance commit index to snapshot first
SSnapshot snapshot;
pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) {
SyncIndex commitBegin = pSyncNode->commitIndex;
SyncIndex commitEnd = snapshot.lastApplyIndex;
pSyncNode->commitIndex = snapshot.lastApplyIndex;
sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd);
}
// advance commit index as large as possible
SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode);
if (lastIndex > pSyncNode->commitIndex) {
sNTrace(pSyncNode, "commit by wal from index:%" PRId64 " to index:%" PRId64, pSyncNode->commitIndex + 1, lastIndex);
pSyncNode->commitIndex = lastIndex;
}
// call back Wal
SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore);
if (pSyncNode->commitIndex > walCommitVer) {
pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);
}
}
void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
ASSERTS(false, "deprecated");
if (pSyncNode == NULL) {
sError("pSyncNode is NULL");
return;
}
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
sNError(pSyncNode, "not leader, can not advance commit index");
return;
}
// advance commit index to sanpshot first
SSnapshot snapshot;
pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) {
SyncIndex commitBegin = pSyncNode->commitIndex;
SyncIndex commitEnd = snapshot.lastApplyIndex;
pSyncNode->commitIndex = snapshot.lastApplyIndex;
sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd);
}
// update commit index
SyncIndex newCommitIndex = pSyncNode->commitIndex;
for (SyncIndex index = syncNodeGetLastIndex(pSyncNode); index > pSyncNode->commitIndex; --index) {
bool agree = syncAgree(pSyncNode, index);
if (agree) {
// term
SSyncRaftEntry* pEntry = NULL;
SLRUCache* pCache = pSyncNode->pLogStore->pCache;
LRUHandle* h = taosLRUCacheLookup(pCache, &index, sizeof(index));
if (h) {
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
pSyncNode->pLogStore->cacheHit++;
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", index, pEntry->bytes, pEntry);
} else {
pSyncNode->pLogStore->cacheMiss++;
sNTrace(pSyncNode, "miss cache index:%" PRId64, index);
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry);
if (code != 0) {
sNError(pSyncNode, "advance commit index error, read wal index:%" PRId64, index);
return;
}
}
// cannot commit, even if quorum agree. need check term!
if (pEntry->term <= pSyncNode->raftStore.currentTerm) {
// update commit index
newCommitIndex = index;
if (h) {
taosLRUCacheRelease(pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
break;
} else {
sNTrace(pSyncNode, "can not commit due to term not equal, index:%" PRId64 ", term:%" PRIu64, pEntry->index,
pEntry->term);
}
if (h) {
taosLRUCacheRelease(pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
}
}
// advance commit index as large as possible
SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore);
if (walCommitVer > newCommitIndex) {
newCommitIndex = walCommitVer;
}
// maybe execute fsm
if (newCommitIndex > pSyncNode->commitIndex) {
SyncIndex beginIndex = pSyncNode->commitIndex + 1;
SyncIndex endIndex = newCommitIndex;
// update commit index
pSyncNode->commitIndex = newCommitIndex;
// call back Wal
pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);
// execute fsm
if (pSyncNode != NULL && pSyncNode->pFsm != NULL) {
int32_t code = syncNodeDoCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state);
if (code != 0) {
sNError(pSyncNode, "advance commit index error, do commit begin:%" PRId64 ", end:%" PRId64, beginIndex,
endIndex);
return;
}
}
}
}
bool syncAgreeIndex(SSyncNode* pSyncNode, SRaftId* pRaftId, SyncIndex index) {
// I am leader, I agree
......@@ -210,83 +68,7 @@ static inline int64_t syncNodeAbs64(int64_t a, int64_t b) {
return c;
}
int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) {
return pSyncNode->quorum;
#if 0
int32_t quorum = 1; // self
int64_t timeNow = taosGetTimestampMs();
for (int i = 0; i < pSyncNode->peersNum; ++i) {
int64_t peerStartTime = syncIndexMgrGetStartTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]);
int64_t peerRecvTime = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]);
SyncIndex peerMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId)[i]);
int64_t recvTimeDiff = TABS(peerRecvTime - timeNow);
int64_t startTimeDiff = TABS(peerStartTime - pSyncNode->startTime);
int64_t logDiff = TABS(peerMatchIndex - syncNodeGetLastIndex(pSyncNode));
/*
int64_t recvTimeDiff = syncNodeAbs64(peerRecvTime, timeNow);
int64_t startTimeDiff = syncNodeAbs64(peerStartTime, pSyncNode->startTime);
int64_t logDiff = syncNodeAbs64(peerMatchIndex, syncNodeGetLastIndex(pSyncNode));
*/
int32_t addQuorum = 0;
if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) {
if (startTimeDiff < SYNC_MAX_START_TIME_RANGE_MS) {
addQuorum = 1;
} else {
if (logDiff < SYNC_ADD_QUORUM_COUNT) {
addQuorum = 1;
} else {
addQuorum = 0;
}
}
} else {
addQuorum = 0;
}
/*
if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) {
addQuorum = 1;
} else {
addQuorum = 0;
}
if (startTimeDiff > SYNC_MAX_START_TIME_RANGE_MS) {
addQuorum = 0;
}
*/
quorum += addQuorum;
}
ASSERT(quorum <= pSyncNode->replicaNum);
if (quorum < pSyncNode->quorum) {
quorum = pSyncNode->quorum;
}
return quorum;
#endif
}
/*
bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
int agreeCount = 0;
for (int i = 0; i < pSyncNode->replicaNum; ++i) {
if (syncAgreeIndex(pSyncNode, &(pSyncNode->replicasId[i]), index)) {
++agreeCount;
}
if (agreeCount >= syncNodeDynamicQuorum(pSyncNode)) {
return true;
}
}
return false;
}
*/
int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { return pSyncNode->quorum; }
bool syncNodeAgreedUpon(SSyncNode* pNode, SyncIndex index) {
int count = 0;
......
......@@ -43,7 +43,10 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
for (int i = 0; i < pNode->peersNum; ++i) {
SRpcMsg rpcMsg = {0};
ret = syncBuildRequestVote(&rpcMsg, pNode->vgId);
ASSERT(ret == 0);
if (ret < 0) {
sError("vgId:%d, failed to build request-vote msg since %s", pNode->vgId, terrstr());
continue;
}
SyncRequestVote* pMsg = rpcMsg.pCont;
pMsg->srcId = pNode->myRaftId;
......@@ -51,13 +54,18 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
pMsg->term = pNode->raftStore.currentTerm;
ret = syncNodeGetLastIndexTerm(pNode, &pMsg->lastLogIndex, &pMsg->lastLogTerm);
ASSERT(ret == 0);
if (ret < 0) {
sError("vgId:%d, failed to get index and term of last log since %s", pNode->vgId, terrstr());
continue;
}
ret = syncNodeSendMsgById(&pNode->peersId[i], pNode, &rpcMsg);
ASSERT(ret == 0);
if (ret < 0) {
sError("vgId:%d, failed to send msg to peerId:%" PRId64, pNode->vgId, pNode->peersId[i].addr);
continue;
}
}
return ret;
return 0;
}
int32_t syncNodeElect(SSyncNode* pSyncNode) {
......
......@@ -292,8 +292,6 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
goto _DEL_WAL;
} else {
lastApplyIndex -= SYNC_VNODE_LOG_RETENTION;
SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
bool isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore);
......@@ -308,6 +306,8 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
if (pSyncNode->replicaNum > 1) {
// multi replicas
lastApplyIndex = TMAX(lastApplyIndex - SYNC_VNODE_LOG_RETENTION, beginIndex - 1);
if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
pSyncNode->minMatchIndex = syncMinMatchIndex(pSyncNode);
......@@ -586,78 +586,6 @@ SSyncState syncGetState(int64_t rid) {
return state;
}
#if 0
int32_t syncGetSnapshotByIndex(int64_t rid, SyncIndex index, SSnapshot* pSnapshot) {
if (index < SYNC_INDEX_BEGIN) {
return -1;
}
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
return -1;
}
ASSERT(rid == pSyncNode->rid);
SSyncRaftEntry* pEntry = NULL;
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry);
if (code != 0) {
if (pEntry != NULL) {
syncEntryDestroy(pEntry);
}
syncNodeRelease(pSyncNode);
return -1;
}
ASSERT(pEntry != NULL);
pSnapshot->data = NULL;
pSnapshot->lastApplyIndex = index;
pSnapshot->lastApplyTerm = pEntry->term;
pSnapshot->lastConfigIndex = syncNodeGetSnapshotConfigIndex(pSyncNode, index);
syncEntryDestroy(pEntry);
syncNodeRelease(pSyncNode);
return 0;
}
int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
return -1;
}
ASSERT(rid == pSyncNode->rid);
sMeta->lastConfigIndex = pSyncNode->raftCfg.lastConfigIndex;
sTrace("vgId:%d, get snapshot meta, lastConfigIndex:%" PRId64, pSyncNode->vgId, pSyncNode->raftCfg.lastConfigIndex);
syncNodeRelease(pSyncNode);
return 0;
}
int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
return -1;
}
ASSERT(rid == pSyncNode->rid);
ASSERT(pSyncNode->raftCfg.configIndexCount >= 1);
SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0];
for (int32_t i = 0; i < pSyncNode->raftCfg.configIndexCount; ++i) {
if ((pSyncNode->raftCfg.configIndexArr)[i] > lastIndex &&
(pSyncNode->raftCfg.configIndexArr)[i] <= snapshotIndex) {
lastIndex = (pSyncNode->raftCfg.configIndexArr)[i];
}
}
sMeta->lastConfigIndex = lastIndex;
sTrace("vgId:%d, get snapshot meta by index:%" PRId64 " lcindex:%" PRId64, pSyncNode->vgId, snapshotIndex,
sMeta->lastConfigIndex);
syncNodeRelease(pSyncNode);
return 0;
}
#endif
SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapshotLastApplyIndex) {
ASSERT(pSyncNode->raftCfg.configIndexCount >= 1);
SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0];
......@@ -1042,9 +970,12 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode->commitIndex = commitIndex;
sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex);
// restore log store on need
if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) {
sError("vgId:%d, failed to restore log store since %s.", pSyncNode->vgId, terrstr());
goto _error;
}
// timer ms init
pSyncNode->pingBaseLine = PING_TIMER_MS;
pSyncNode->electBaseLine = tsElectInterval;
......@@ -1107,10 +1038,16 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode->changing = false;
// replication mgr
syncNodeLogReplMgrInit(pSyncNode);
if (syncNodeLogReplMgrInit(pSyncNode) < 0) {
sError("vgId:%d, failed to init repl mgr since %s.", pSyncNode->vgId, terrstr());
goto _error;
}
// peer state
syncNodePeerStateInit(pSyncNode);
if (syncNodePeerStateInit(pSyncNode) < 0) {
sError("vgId:%d, failed to init peer stat since %s.", pSyncNode->vgId, terrstr());
goto _error;
}
//
// min match index
......@@ -1205,27 +1142,10 @@ int32_t syncNodeStart(SSyncNode* pSyncNode) {
int32_t ret = 0;
ret = syncNodeStartPingTimer(pSyncNode);
ASSERT(ret == 0);
return ret;
}
void syncNodeStartOld(SSyncNode* pSyncNode) {
// start raft
if (pSyncNode->replicaNum == 1) {
raftStoreNextTerm(pSyncNode);
syncNodeBecomeLeader(pSyncNode, "one replica start");
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop(pSyncNode);
syncMaybeAdvanceCommitIndex(pSyncNode);
} else {
syncNodeBecomeFollower(pSyncNode, "first start");
if (ret != 0) {
sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr());
}
int32_t ret = 0;
ret = syncNodeStartPingTimer(pSyncNode);
ASSERT(ret == 0);
return ret;
}
int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) {
......@@ -1236,11 +1156,16 @@ int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) {
// reset elect timer, long enough
int32_t electMS = TIMER_MAX_MS;
int32_t ret = syncNodeRestartElectTimer(pSyncNode, electMS);
ASSERT(ret == 0);
if (ret < 0) {
sError("vgId:%d, failed to restart elect timer since %s", pSyncNode->vgId, terrstr());
return -1;
}
ret = 0;
ret = syncNodeStartPingTimer(pSyncNode);
ASSERT(ret == 0);
if (ret < 0) {
sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr());
return -1;
}
return ret;
}
......@@ -1829,12 +1754,6 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode->leaderCache = pSyncNode->myRaftId;
for (int32_t i = 0; i < pSyncNode->pNextIndex->replicaNum; ++i) {
// maybe overwrite myself, no harm
// just do it!
// pSyncNode->pNextIndex->index[i] = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore) + 1;
// maybe wal is deleted
SyncIndex lastIndex;
SyncTerm lastTerm;
int32_t code = syncNodeGetLastIndexTerm(pSyncNode, &lastIndex, &lastTerm);
......@@ -1896,7 +1815,11 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted));
bool granted = voteGrantedMajority(pSyncNode->pVotesGranted);
if (!granted) {
sError("vgId:%d, not granted by majority.", pSyncNode->vgId);
return;
}
syncNodeBecomeLeader(pSyncNode, "candidate to leader");
sNTrace(pSyncNode, "state change syncNodeCandidate2Leader");
......@@ -1912,20 +1835,6 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
pSyncNode->vgId, pSyncNode->raftStore.currentTerm, pSyncNode->commitIndex, lastIndex);
}
void syncNodeCandidate2LeaderOld(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted));
syncNodeBecomeLeader(pSyncNode, "candidate to leader");
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop(pSyncNode);
syncMaybeAdvanceCommitIndex(pSyncNode);
if (pSyncNode->replicaNum > 1) {
syncNodeReplicate(pSyncNode);
}
}
bool syncNodeIsMnode(SSyncNode* pSyncNode) { return (pSyncNode->vgId == 1); }
int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) {
......@@ -1971,7 +1880,8 @@ void syncNodeCandidate2Follower(SSyncNode* pSyncNode) {
// need assert
void syncNodeVoteForTerm(SSyncNode* pSyncNode, SyncTerm term, SRaftId* pRaftId) {
ASSERT(term == pSyncNode->raftStore.currentTerm);
ASSERT(!raftStoreHasVoted(pSyncNode));
bool voted = raftStoreHasVoted(pSyncNode);
ASSERT(!voted);
raftStoreVote(pSyncNode, pRaftId);
}
......@@ -2649,24 +2559,6 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
return 0;
}
int32_t syncNodeOnLocalCmdOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
ASSERT(false && "deprecated");
SyncLocalCmd* pMsg = pRpcMsg->pCont;
syncLogRecvLocalCmd(ths, pMsg, "");
if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) {
syncNodeStepDown(ths, pMsg->currentTerm);
} else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) {
syncNodeFollowerCommit(ths, pMsg->commitIndex);
} else {
sError("error local cmd");
}
return 0;
}
// TLA+ Spec
// ClientRequest(i, v) ==
// /\ state[i] = Leader
......@@ -2711,96 +2603,6 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn
}
}
int32_t syncNodeOnClientRequestOld(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIndex) {
sNTrace(ths, "on client request");
int32_t ret = 0;
int32_t code = 0;
SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore);
SyncTerm term = ths->raftStore.currentTerm;
SSyncRaftEntry* pEntry;
if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
pEntry = syncEntryBuildFromClientRequest(pMsg->pCont, term, index);
} else {
pEntry = syncEntryBuildFromRpcMsg(pMsg, term, index);
}
LRUHandle* h = NULL;
if (ths->state == TAOS_SYNC_STATE_LEADER) {
// append entry
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry, false);
if (code != 0) {
if (ths->replicaNum == 1) {
if (h) {
taosLRUCacheRelease(ths->pLogStore->pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
return -1;
} else {
// del resp mgr, call FpCommitCb
SFsmCbMeta cbMeta = {
.index = pEntry->index,
.lastConfigIndex = SYNC_INDEX_INVALID,
.isWeak = pEntry->isWeak,
.code = -1,
.state = ths->state,
.seqNum = pEntry->seqNum,
.term = pEntry->term,
.currentTerm = ths->raftStore.currentTerm,
.flag = 0,
};
ths->pFsm->FpCommitCb(ths->pFsm, pMsg, &cbMeta);
if (h) {
taosLRUCacheRelease(ths->pLogStore->pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
return -1;
}
}
syncCacheEntry(ths->pLogStore, pEntry, &h);
// if mulit replica, start replicate right now
if (ths->replicaNum > 1) {
syncNodeReplicate(ths);
}
// if only myself, maybe commit right now
if (ths->replicaNum == 1) {
if (syncNodeIsMnode(ths)) {
syncMaybeAdvanceCommitIndex(ths);
} else {
syncOneReplicaAdvance(ths);
}
}
}
if (pRetIndex != NULL) {
if (ret == 0 && pEntry != NULL) {
*pRetIndex = pEntry->index;
} else {
*pRetIndex = SYNC_INDEX_INVALID;
}
}
if (h) {
taosLRUCacheRelease(ths->pLogStore->pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
return ret;
}
const char* syncStr(ESyncState state) {
switch (state) {
case TAOS_SYNC_STATE_FOLLOWER:
......@@ -2905,129 +2707,6 @@ bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) {
return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1);
}
int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) {
ASSERT(false);
if (beginIndex > endIndex) {
return 0;
}
if (ths == NULL) {
return -1;
}
if (ths->pFsm != NULL && ths->pFsm->FpGetSnapshotInfo != NULL) {
// advance commit index to sanpshot first
SSnapshot snapshot = {0};
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot);
if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex >= beginIndex) {
sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, beginIndex, snapshot.lastApplyIndex);
// update begin index
beginIndex = snapshot.lastApplyIndex + 1;
}
}
int32_t code = 0;
ESyncState state = flag;
sNTrace(ths, "commit by wal from index:%" PRId64 " to index:%" PRId64, beginIndex, endIndex);
// execute fsm
if (ths->pFsm != NULL) {
for (SyncIndex i = beginIndex; i <= endIndex; ++i) {
if (i != SYNC_INDEX_INVALID) {
SSyncRaftEntry* pEntry;
SLRUCache* pCache = ths->pLogStore->pCache;
LRUHandle* h = taosLRUCacheLookup(pCache, &i, sizeof(i));
if (h) {
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
ths->pLogStore->cacheHit++;
sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", i, pEntry->bytes, pEntry);
} else {
ths->pLogStore->cacheMiss++;
sNTrace(ths, "miss cache index:%" PRId64, i);
code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry);
// ASSERT(code == 0);
// ASSERT(pEntry != NULL);
if (code != 0 || pEntry == NULL) {
sNError(ths, "get log entry error");
sFatal("vgId:%d, get log entry %" PRId64 " error when commit since %s", ths->vgId, i, terrstr());
continue;
}
}
SRpcMsg rpcMsg = {0};
syncEntry2OriginalRpc(pEntry, &rpcMsg);
sTrace("do commit index:%" PRId64 ", type:%s", i, TMSG_INFO(pEntry->msgType));
// user commit
if ((ths->pFsm->FpCommitCb != NULL) && syncUtilUserCommit(pEntry->originalRpcType)) {
bool internalExecute = true;
if ((ths->replicaNum == 1) && ths->restoreFinish && ths->vgId != 1) {
internalExecute = false;
}
sNTrace(ths, "user commit index:%" PRId64 ", internal:%d, type:%s", i, internalExecute,
TMSG_INFO(pEntry->msgType));
// execute fsm in apply thread, or execute outside syncPropose
if (internalExecute) {
SFsmCbMeta cbMeta = {
.index = pEntry->index,
.lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, pEntry->index),
.isWeak = pEntry->isWeak,
.code = 0,
.state = ths->state,
.seqNum = pEntry->seqNum,
.term = pEntry->term,
.currentTerm = ths->raftStore.currentTerm,
.flag = flag,
};
syncRespMgrGetAndDel(ths->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info);
ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, &cbMeta);
}
}
#if 0
// execute in pre-commit
// leader transfer
if (pEntry->originalRpcType == TDMT_SYNC_LEADER_TRANSFER) {
code = syncDoLeaderTransfer(ths, &rpcMsg, pEntry);
ASSERT(code == 0);
}
#endif
// restore finish
// if only snapshot, a noop entry will be append, so syncLogLastIndex is always ok
if (pEntry->index == ths->pLogStore->syncLogLastIndex(ths->pLogStore)) {
if (ths->restoreFinish == false) {
if (ths->pFsm->FpRestoreFinishCb != NULL) {
ths->pFsm->FpRestoreFinishCb(ths->pFsm);
}
ths->restoreFinish = true;
int64_t restoreDelay = taosGetTimestampMs() - ths->leaderTime;
sNTrace(ths, "restore finish, index:%" PRId64 ", elapsed:%" PRId64 " ms", pEntry->index, restoreDelay);
}
}
rpcFreeCont(rpcMsg.pCont);
if (h) {
taosLRUCacheRelease(pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
}
}
}
return 0;
}
bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) {
for (int32_t i = 0; i < ths->replicaNum; ++i) {
if (syncUtilSameId(&((ths->replicasId)[i]), pRaftId)) {
......
......@@ -945,8 +945,11 @@ int32_t syncNodeLogReplMgrInit(SSyncNode* pNode) {
for (int i = 0; i < TSDB_MAX_REPLICA; i++) {
ASSERT(pNode->logReplMgrs[i] == NULL);
pNode->logReplMgrs[i] = syncLogReplMgrCreate();
if (pNode->logReplMgrs[i] == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
pNode->logReplMgrs[i]->peerId = i;
ASSERTS(pNode->logReplMgrs[i] != NULL, "Out of memory.");
}
return 0;
}
......
......@@ -48,92 +48,6 @@
int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg);
int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId, bool snapshot) {
ASSERT(false && "deprecated");
// next index
SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
if (snapshot) {
// maybe start snapshot
SyncIndex logStartIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
SyncIndex logEndIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
if (nextIndex < logStartIndex || nextIndex - 1 > logEndIndex) {
sNTrace(pSyncNode, "maybe start snapshot for next-index:%" PRId64 ", start:%" PRId64 ", end:%" PRId64, nextIndex,
logStartIndex, logEndIndex);
// start snapshot
int32_t code = syncNodeStartSnapshot(pSyncNode, pDestId);
}
}
// pre index, pre term
SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex);
SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex);
// prepare entry
SRpcMsg rpcMsg = {0};
SyncAppendEntries* pMsg = NULL;
SSyncRaftEntry* pEntry = NULL;
SLRUCache* pCache = pSyncNode->pLogStore->pCache;
LRUHandle* h = taosLRUCacheLookup(pCache, &nextIndex, sizeof(nextIndex));
int32_t code = 0;
if (h) {
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
code = 0;
pSyncNode->pLogStore->cacheHit++;
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", nextIndex, pEntry->bytes, pEntry);
} else {
pSyncNode->pLogStore->cacheMiss++;
sNTrace(pSyncNode, "miss cache index:%" PRId64, nextIndex);
code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry);
}
if (code == 0) {
ASSERT(pEntry != NULL);
code = syncBuildAppendEntries(&rpcMsg, (int32_t)(pEntry->bytes), pSyncNode->vgId);
ASSERT(code == 0);
pMsg = rpcMsg.pCont;
memcpy(pMsg->data, pEntry, pEntry->bytes);
} else {
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
// no entry in log
code = syncBuildAppendEntries(&rpcMsg, 0, pSyncNode->vgId);
ASSERT(code == 0);
pMsg = rpcMsg.pCont;
} else {
sNError(pSyncNode, "replicate to dnode:%d error, next-index:%" PRId64, DID(pDestId), nextIndex);
return -1;
}
}
if (h) {
taosLRUCacheRelease(pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
// prepare msg
ASSERT(pMsg != NULL);
pMsg->srcId = pSyncNode->myRaftId;
pMsg->destId = *pDestId;
pMsg->term = pSyncNode->raftStore.currentTerm;
pMsg->prevLogIndex = preLogIndex;
pMsg->prevLogTerm = preLogTerm;
pMsg->commitIndex = pSyncNode->commitIndex;
pMsg->privateTerm = 0;
// pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId);
// send msg
syncNodeMaybeSendAppendEntries(pSyncNode, pDestId, &rpcMsg);
return 0;
}
int32_t syncNodeReplicate(SSyncNode* pNode) {
SSyncLogBuffer* pBuf = pNode->pLogBuf;
taosThreadMutexLock(&pBuf->mutex);
......@@ -156,25 +70,6 @@ int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) {
return 0;
}
int32_t syncNodeReplicateOld(SSyncNode* pSyncNode) {
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
return -1;
}
sNTrace(pSyncNode, "do replicate");
int32_t ret = 0;
for (int i = 0; i < pSyncNode->peersNum; ++i) {
SRaftId* pDestId = &(pSyncNode->peersId[i]);
ret = syncNodeReplicateOne(pSyncNode, pDestId, true);
if (ret != 0) {
sError("vgId:%d, do append entries error for dnode:%d", pSyncNode->vgId, DID(pDestId));
}
}
return 0;
}
int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) {
SyncAppendEntries* pMsg = pRpcMsg->pCont;
pMsg->destId = *destRaftId;
......@@ -182,39 +77,6 @@ int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftI
return 0;
}
int32_t syncNodeSendAppendEntriesOld(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) {
int32_t ret = 0;
SyncAppendEntries* pMsg = pRpcMsg->pCont;
if (pMsg == NULL) {
sError("vgId:%d, sync-append-entries msg is NULL", pSyncNode->vgId);
return 0;
}
SPeerState* pState = syncNodeGetPeerState(pSyncNode, destRaftId);
if (pState == NULL) {
sError("vgId:%d, replica maybe dropped", pSyncNode->vgId);
return 0;
}
// save index, otherwise pMsg will be free by rpc
SyncIndex saveLastSendIndex = pState->lastSendIndex;
bool update = false;
if (pMsg->dataLen > 0) {
saveLastSendIndex = pMsg->prevLogIndex + 1;
update = true;
}
syncLogSendAppendEntries(pSyncNode, pMsg, "");
syncNodeSendMsgById(destRaftId, pSyncNode, pRpcMsg);
if (update) {
pState->lastSendIndex = saveLastSendIndex;
pState->lastSendTime = taosGetTimestampMs();
}
return ret;
}
int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) {
int32_t ret = 0;
SyncAppendEntries* pMsg = pRpcMsg->pCont;
......
......@@ -322,6 +322,35 @@ bool walLogEntriesComplete(const SWal* pWal) {
return complete;
}
int walTrimIdxFile(SWal* pWal, int32_t fileIdx) {
SWalFileInfo* pFileInfo = taosArrayGet(pWal->fileInfoSet, fileIdx);
ASSERT(pFileInfo != NULL);
char fnameStr[WAL_FILE_LEN];
walBuildIdxName(pWal, pFileInfo->firstVer, fnameStr);
int64_t fileSize = 0;
taosStatFile(fnameStr, &fileSize, NULL);
int64_t records = TMAX(0, pFileInfo->lastVer - pFileInfo->firstVer + 1);
int64_t lastEndOffset = records * sizeof(SWalIdxEntry);
if (fileSize <= lastEndOffset) {
return 0;
}
TdFilePtr pFile = taosOpenFile(fnameStr, TD_FILE_READ | TD_FILE_WRITE);
if (pFile == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
wInfo("vgId:%d, trim idx file. file: %s, size: %" PRId64 ", offset: %" PRId64, pWal->cfg.vgId, fnameStr, fileSize,
lastEndOffset);
taosFtruncateFile(pFile, lastEndOffset);
taosCloseFile(&pFile);
return 0;
}
int walCheckAndRepairMeta(SWal* pWal) {
// load log files, get first/snapshot/last version info
const char* logPattern = "^[0-9]+.log$";
......@@ -396,6 +425,8 @@ int walCheckAndRepairMeta(SWal* pWal) {
}
updateMeta = true;
(void)walTrimIdxFile(pWal, fileIdx);
int64_t lastVer = walScanLogGetLastVer(pWal, fileIdx);
if (lastVer < 0) {
if (terrno != TSDB_CODE_WAL_LOG_NOT_EXIST) {
......@@ -558,6 +589,7 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
goto _err;
}
int64_t count = 0;
while (idxEntry.ver < pFileInfo->lastVer) {
/*A(idxEntry.ver == ckHead.head.version);*/
......@@ -569,11 +601,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
idxEntry.offset, fLogNameStr);
goto _err;
}
wWarn("vgId:%d, wal idx append new entry %" PRId64 " %" PRId64, pWal->cfg.vgId, idxEntry.ver, idxEntry.offset);
if (taosWriteFile(pIdxFile, &idxEntry, sizeof(SWalIdxEntry)) < 0) {
wError("vgId:%d, failed to append file since %s. file:%s", pWal->cfg.vgId, terrstr(), fnameStr);
goto _err;
}
count++;
}
if (taosFsyncFile(pIdxFile) < 0) {
......@@ -581,6 +613,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
goto _err;
}
if (count > 0) {
wInfo("vgId:%d, rebuilt %" PRId64 " wal idx entries until lastVer: %" PRId64, pWal->cfg.vgId, count,
pFileInfo->lastVer);
}
(void)taosCloseFile(&pLogFile);
(void)taosCloseFile(&pIdxFile);
return 0;
......
......@@ -77,6 +77,31 @@ void walUnrefVer(SWalRef *pRef) {
}
#endif
SWalRef *walRefFirstVer(SWal *pWal, SWalRef *pRef) {
if (pRef == NULL) {
pRef = walOpenRef(pWal);
if (pRef == NULL) {
return NULL;
}
}
taosThreadMutexLock(&pWal->mutex);
int64_t ver = walGetFirstVer(pWal);
wDebug("vgId:%d, wal ref version %" PRId64 " for first", pWal->cfg.vgId, ver);
pRef->refVer = ver;
// bsearch in fileSet
SWalFileInfo tmpInfo;
tmpInfo.firstVer = ver;
SWalFileInfo *pRet = taosArraySearch(pWal->fileInfoSet, &tmpInfo, compareWalFileInfo, TD_LE);
ASSERT(pRet != NULL);
pRef->refFile = pRet->firstVer;
taosThreadMutexUnlock(&pWal->mutex);
return pRef;
}
SWalRef *walRefCommittedVer(SWal *pWal) {
SWalRef *pRef = walOpenRef(pWal);
if (pRef == NULL) {
......@@ -87,6 +112,8 @@ SWalRef *walRefCommittedVer(SWal *pWal) {
int64_t ver = walGetCommittedVer(pWal);
wDebug("vgId:%d, wal ref version %" PRId64 " for committed", pWal->cfg.vgId, ver);
pRef->refVer = ver;
// bsearch in fileSet
SWalFileInfo tmpInfo;
......
......@@ -15,8 +15,8 @@
#define ALLOW_FORBID_FUNC
#define _DEFAULT_SOURCE
#include "os.h"
#include <stdlib.h>
#include "os.h"
#ifdef WINDOWS
void swapStr(char* j, char* J, int width) {
......
......@@ -41,12 +41,6 @@ static void median(void *src, int64_t size, int64_t s, int64_t e, const void *pa
ASSERT(comparFn(elePtrAt(src, size, mid), elePtrAt(src, size, s), param) <= 0 &&
comparFn(elePtrAt(src, size, s), elePtrAt(src, size, e), param) <= 0);
#ifdef _DEBUG_VIEW
// tTagsPrints(src[s], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
// tTagsPrints(src[mid], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
// tTagsPrints(src[e], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
#endif
}
static void tInsertSort(void *src, int64_t size, int32_t s, int32_t e, const void *param, __ext_compar_fn_t comparFn,
......@@ -278,14 +272,4 @@ void taosheapsort(void *base, int32_t size, int32_t len, const void *parcompar,
}
taosMemoryFree(buf);
/*
char *buf = taosMemoryCalloc(1, size);
for (i = len - 1; i > 0; i--) {
doswap(elePtrAt(base, size, 0), elePtrAt(base, size, i));
taosheapadjust(base, size, 0, i - 1, parcompar, compar, parswap, swap, maxroot);
}
taosMemoryFreeClear(buf);
*/
}
......@@ -55,7 +55,7 @@ fi
date
docker run \
-v $REP_MOUNT_PARAM \
--rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true;make -j || exit 1"
--rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_TAOSX=true;make -j || exit 1"
if [[ -d ${WORKDIR}/debugNoSan ]] ;then
echo "delete ${WORKDIR}/debugNoSan"
......@@ -70,7 +70,7 @@ mv ${REP_REAL_PATH}/debug ${WORKDIR}/debugNoSan
date
docker run \
-v $REP_MOUNT_PARAM \
--rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug;make -j || exit 1 "
--rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug -DBUILD_TAOSX=true;make -j || exit 1 "
mv ${REP_REAL_PATH}/debug ${WORKDIR}/debugSan
......
......@@ -206,7 +206,7 @@ class TDTestCase:
paraDict['rowsPerTbl'] = self.rowsPerTbl
consumerId = 1
if self.snapshot == 0:
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2))
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1/2))
elif self.snapshot == 1:
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1))
......
......@@ -213,9 +213,9 @@ class TDTestCase:
paraDict['rowsPerTbl'] = self.rowsPerTbl
consumerId = 1
if self.snapshot == 0:
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2 + 1/2*1/2*2 + 1/2*1/2))
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1/2) * (1/2*3))
elif self.snapshot == 1:
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2 + 1/2*1/2))
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1 + 1/2))
topicList = topicFromStb1
ifcheckdata = 1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册