diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 30a6188db0057ebee0320589fdc4d45b22842453..04bf6bcc2b50ccf1a02ace316234acb78041d446 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -40,12 +40,9 @@ typedef struct SDelIdx SDelIdx; typedef struct STbData STbData; typedef struct SMemTable SMemTable; typedef struct STbDataIter STbDataIter; -typedef struct STable STable; typedef struct SMapData SMapData; typedef struct SBlockIdx SBlockIdx; typedef struct SBlock SBlock; -typedef struct SBlockStatis SBlockStatis; -typedef struct SAggrBlkCol SAggrBlkCol; typedef struct SColData SColData; typedef struct SBlockDataHdr SBlockDataHdr; typedef struct SBlockData SBlockData; @@ -62,8 +59,7 @@ typedef struct SDelFReader SDelFReader; typedef struct SRowIter SRowIter; typedef struct STsdbFS STsdbFS; typedef struct SRowMerger SRowMerger; -typedef struct STsdbFSState STsdbFSState; -typedef struct STsdbSnapHdr STsdbSnapHdr; +typedef struct STsdbReadSnap STsdbReadSnap; #define TSDB_MAX_SUBBLOCKS 8 #define TSDB_FHDR_SIZE 512 @@ -176,8 +172,6 @@ void tsdbMemTableDestroy(SMemTable *pMemTable); void tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData); void tsdbRefMemTable(SMemTable *pMemTable); void tsdbUnrefMemTable(SMemTable *pMemTable); -int32_t tsdbTakeMemSnapshot(STsdb *pTsdb, SMemTable **ppMem, SMemTable **ppIMem); -void tsdbUntakeMemSnapshot(STsdb *pTsdb, SMemTable *pMem, SMemTable *pIMem); // STbDataIter int32_t tsdbTbDataIterCreate(STbData *pTbData, TSDBKEY *pFrom, int8_t backward, STbDataIter **ppIter); void *tsdbTbDataIterDestroy(STbDataIter *pIter); @@ -188,30 +182,39 @@ bool tsdbTbDataIterNext(STbDataIter *pIter); int32_t tsdbGetNRowsInTbData(STbData *pTbData); // tsdbFile.c ============================================================================================== typedef enum { TSDB_HEAD_FILE = 0, TSDB_DATA_FILE, TSDB_LAST_FILE, TSDB_SMA_FILE } EDataFileT; -void tsdbDataFileName(STsdb *pTsdb, SDFileSet *pDFileSet, EDataFileT ftype, char fname[]); -bool tsdbFileIsSame(SDFileSet *pDFileSet1, SDFileSet *pDFileSet2, EDataFileT ftype); + bool tsdbDelFileIsSame(SDelFile *pDelFile1, SDelFile *pDelFile2); -int32_t tsdbUpdateDFileHdr(TdFilePtr pFD, SDFileSet *pSet, EDataFileT ftype); int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype); -int32_t tPutDataFileHdr(uint8_t *p, SDFileSet *pSet, EDataFileT ftype); +int32_t tPutHeadFile(uint8_t *p, SHeadFile *pHeadFile); +int32_t tPutDataFile(uint8_t *p, SDataFile *pDataFile); +int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile); +int32_t tPutSmaFile(uint8_t *p, SSmaFile *pSmaFile); int32_t tPutDelFile(uint8_t *p, SDelFile *pDelFile); int32_t tGetDelFile(uint8_t *p, SDelFile *pDelFile); int32_t tPutDFileSet(uint8_t *p, SDFileSet *pSet); int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet); + +void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF, char fname[]); +void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]); +void tsdbLastFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SLastFile *pLastF, char fname[]); +void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, char fname[]); // SDelFile void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]); // tsdbFS.c ============================================================================================== -int32_t tsdbFSOpen(STsdb *pTsdb, STsdbFS **ppFS); -int32_t tsdbFSClose(STsdbFS *pFS); -int32_t tsdbFSBegin(STsdbFS *pFS); -int32_t tsdbFSCommit(STsdbFS *pFS); +int32_t tsdbFSOpen(STsdb *pTsdb); +int32_t tsdbFSClose(STsdb *pTsdb); +int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS); +void tsdbFSDestroy(STsdbFS *pFS); +int32_t tDFileSetCmprFn(const void *p1, const void *p2); +int32_t tsdbFSCommit1(STsdb *pTsdb, STsdbFS *pFS); +int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFS); +int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS); +void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS); + int32_t tsdbFSRollback(STsdbFS *pFS); -int32_t tsdbFSStateUpsertDelFile(STsdbFSState *pState, SDelFile *pDelFile); -int32_t tsdbFSStateUpsertDFileSet(STsdbFSState *pState, SDFileSet *pSet); -void tsdbFSStateDeleteDFileSet(STsdbFSState *pState, int32_t fid); -SDelFile *tsdbFSStateGetDelFile(STsdbFSState *pState); -SDFileSet *tsdbFSStateGetDFileSet(STsdbFSState *pState, int32_t fid, int32_t flag); +int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet); +int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile); // tsdbReaderWriter.c ============================================================================================== // SDataFWriter int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet); @@ -222,8 +225,7 @@ int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *pMapData, uint8_t **ppBu int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_t **ppBuf1, uint8_t **ppBuf2, SBlockIdx *pBlockIdx, SBlock *pBlock, int8_t cmprAlg); -SDFileSet *tsdbDataFWriterGetWSet(SDataFWriter *pWriter); -int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo); +int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo); // SDataFReader int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pSet); int32_t tsdbDataFReaderClose(SDataFReader **ppReader); @@ -245,6 +247,9 @@ int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb int32_t tsdbDelFReaderClose(SDelFReader **ppReader); int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData, uint8_t **ppBuf); int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx, uint8_t **ppBuf); +// tsdbRead.c ============================================================================================== +int32_t tsdbTakeReadSnap(STsdb *pTsdb, STsdbReadSnap **ppSnap); +void tsdbUntakeReadSnap(STsdb *pTsdb, STsdbReadSnap *pSnap); #define TSDB_CACHE_NO(c) ((c).cacheLast == 0) #define TSDB_CACHE_LAST_ROW(c) (((c).cacheLast & 1) > 0) @@ -276,6 +281,11 @@ typedef struct { TSKEY minKey; } SRtn; +struct STsdbFS { + SDelFile *pDelFile; + SArray *aDFileSet; // SArray +}; + struct STsdb { char *path; SVnode *pVnode; @@ -283,7 +293,7 @@ struct STsdb { TdThreadRwlock rwLock; SMemTable *mem; SMemTable *imem; - STsdbFS *pFS; + STsdbFS fs; SLRUCache *lruCache; }; @@ -402,16 +412,6 @@ struct SBlock { SSubBlock aSubBlock[TSDB_MAX_SUBBLOCKS]; }; -struct SAggrBlkCol { - int16_t colId; - int16_t maxIndex; - int16_t minIndex; - int16_t numOfNull; - int64_t sum; - int64_t max; - int64_t min; -}; - struct SColData { int16_t cid; int8_t type; @@ -465,12 +465,6 @@ struct SDelIdx { int64_t size; }; -struct SDelFile { - int64_t commitID; - int64_t size; - int64_t offset; -}; - #pragma pack(push, 1) struct SBlockDataHdr { uint32_t delimiter; @@ -479,34 +473,50 @@ struct SBlockDataHdr { }; #pragma pack(pop) +struct SDelFile { + volatile int32_t nRef; + + int64_t commitID; + int64_t size; + int64_t offset; +}; + struct SHeadFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; int64_t offset; }; struct SDataFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; }; struct SLastFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; }; struct SSmaFile { + volatile int32_t nRef; + int64_t commitID; int64_t size; }; struct SDFileSet { - SDiskID diskId; - int32_t fid; - SHeadFile fHead; - SDataFile fData; - SLastFile fLast; - SSmaFile fSma; + SDiskID diskId; + int32_t fid; + SHeadFile *pHeadF; + SDataFile *pDataF; + SLastFile *pLastF; + SSmaFile *pSmaF; }; struct SRowIter { @@ -521,26 +531,33 @@ struct SRowMerger { SArray *pArray; // SArray }; -struct STsdbFSState { - SDelFile *pDelFile; - SArray *aDFileSet; // SArray - SDelFile delFile; -}; - -struct STsdbFS { - STsdb *pTsdb; - TdThreadRwlock lock; - int8_t inTxn; - STsdbFSState *cState; - STsdbFSState *nState; -}; - struct SDelFWriter { STsdb *pTsdb; SDelFile fDel; TdFilePtr pWriteH; }; +struct SDataFWriter { + STsdb *pTsdb; + SDFileSet wSet; + + TdFilePtr pHeadFD; + TdFilePtr pDataFD; + TdFilePtr pLastFD; + TdFilePtr pSmaFD; + + SHeadFile fHead; + SDataFile fData; + SLastFile fLast; + SSmaFile fSma; +}; + +struct STsdbReadSnap { + SMemTable *pMem; + SMemTable *pIMem; + STsdbFS fs; +}; + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index 484020e6e16d75b82b82318f09e23337acebfe4e..e259dde29c86a7559d8d4dd5f256a81dc137727b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -464,7 +464,7 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow) { switch (state->state) { case SFSNEXTROW_FS: - state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; + // state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; state->nFileSet = taosArrayGetSize(state->aDFileSet); state->iFileSet = state->nFileSet; @@ -793,9 +793,10 @@ typedef struct { TSDBROW memRow, imemRow, fsRow; TsdbNextRowState input[3]; - SMemTable *pMemTable; - SMemTable *pIMemTable; - STsdb *pTsdb; + // SMemTable *pMemTable; + // SMemTable *pIMemTable; + STsdbReadSnap *pReadSnap; + STsdb *pTsdb; } CacheNextRowIter; static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTsdb) { @@ -803,16 +804,16 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - tsdbTakeMemSnapshot(pTsdb, &pIter->pMemTable, &pIter->pIMemTable); + tsdbTakeReadSnap(pTsdb, &pIter->pReadSnap); STbData *pMem = NULL; - if (pIter->pMemTable) { - tsdbGetTbDataFromMemTable(pIter->pMemTable, suid, uid, &pMem); + if (pIter->pReadSnap->pMem) { + tsdbGetTbDataFromMemTable(pIter->pReadSnap->pMem, suid, uid, &pMem); } STbData *pIMem = NULL; - if (pIter->pIMemTable) { - tsdbGetTbDataFromMemTable(pIter->pIMemTable, suid, uid, &pIMem); + if (pIter->pReadSnap->pIMem) { + tsdbGetTbDataFromMemTable(pIter->pReadSnap->pIMem, suid, uid, &pIMem); } pIter->pTsdb = pTsdb; @@ -821,7 +822,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs SDelIdx delIdx; - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); + SDelFile *pDelFile = pIter->pReadSnap->fs.pDelFile; if (pDelFile) { SDelFReader *pDelFReader; @@ -846,6 +847,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs pIter->fsState.state = SFSNEXTROW_FS; pIter->fsState.pTsdb = pTsdb; + pIter->fsState.aDFileSet = pIter->pReadSnap->fs.aDFileSet; pIter->fsState.pBlockIdxExp = &pIter->idx; pIter->input[0] = (TsdbNextRowState){&pIter->memRow, true, false, &pIter->memState, getNextRowFromMem, NULL}; @@ -885,7 +887,7 @@ static int32_t nextRowIterClose(CacheNextRowIter *pIter) { taosArrayDestroy(pIter->pSkyline); } - tsdbUntakeMemSnapshot(pIter->pTsdb, pIter->pMemTable, pIter->pIMemTable); + tsdbUntakeReadSnap(pIter->pTsdb, pIter->pReadSnap); return code; _err: @@ -1172,480 +1174,480 @@ _err: return code; } -static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, STSRow **ppRow) { - int32_t code = 0; - SArray *pSkyline = NULL; - - STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); - int16_t nCol = pTSchema->numOfCols; - SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); - - tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - - STbData *pMem = NULL; - if (pTsdb->mem) { - tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); - } - - STbData *pIMem = NULL; - if (pTsdb->imem) { - tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); - } - - *ppRow = NULL; - - pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); - - SDelIdx delIdx; - - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); - if (pDelFile) { - SDelFReader *pDelFReader; - - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); - if (code) goto _err; - - code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); - if (code) goto _err; - - code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); - if (code) goto _err; - - tsdbDelFReaderClose(&pDelFReader); - } else { - code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); - if (code) goto _err; - } - - int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; - - SBlockIdx idx = {.suid = suid, .uid = uid}; - - SFSNextRowIter fsState = {0}; - fsState.state = SFSNEXTROW_FS; - fsState.pTsdb = pTsdb; - fsState.pBlockIdxExp = &idx; - - SMemNextRowIter memState = {0}; - SMemNextRowIter imemState = {0}; - TSDBROW memRow, imemRow, fsRow; - - TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, - {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, - {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; - - if (pMem) { - memState.pMem = pMem; - memState.state = SMEMNEXTROW_ENTER; - input[0].stop = false; - input[0].next = true; - } - if (pIMem) { - imemState.pMem = pIMem; - imemState.state = SMEMNEXTROW_ENTER; - input[1].stop = false; - input[1].next = true; - } - - int16_t nilColCount = nCol - 1; // count of null & none cols - int iCol = 0; // index of first nil col index from left to right - bool setICol = false; - - do { - for (int i = 0; i < 3; ++i) { - if (input[i].next && !input[i].stop) { - if (input[i].pRow == NULL) { - code = input[i].nextRowFn(input[i].iter, &input[i].pRow); - if (code) goto _err; - - if (input[i].pRow == NULL) { - input[i].stop = true; - input[i].next = false; - } - } - } - } - - if (input[0].stop && input[1].stop && input[2].stop) { - break; - } - - // select maxpoint(s) from mem, imem, fs - TSDBROW *max[3] = {0}; - int iMax[3] = {-1, -1, -1}; - int nMax = 0; - TSKEY maxKey = TSKEY_MIN; - - for (int i = 0; i < 3; ++i) { - if (!input[i].stop && input[i].pRow != NULL) { - TSDBKEY key = TSDBROW_KEY(input[i].pRow); - - // merging & deduplicating on client side - if (maxKey <= key.ts) { - if (maxKey < key.ts) { - nMax = 0; - maxKey = key.ts; - } - - iMax[nMax] = i; - max[nMax++] = input[i].pRow; - } - } - } - - // delete detection - TSDBROW *merge[3] = {0}; - int iMerge[3] = {-1, -1, -1}; - int nMerge = 0; - for (int i = 0; i < nMax; ++i) { - TSDBKEY maxKey = TSDBROW_KEY(max[i]); - - bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); - if (!deleted) { - iMerge[nMerge] = i; - merge[nMerge++] = max[i]; - } - - input[iMax[i]].next = deleted; - } - - // merge if nMerge > 1 - if (nMerge > 0) { - *dup = false; - - if (nMerge == 1) { - code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); - if (code) goto _err; - } else { - // merge 2 or 3 rows - SRowMerger merger = {0}; - - tRowMergerInit(&merger, merge[0], pTSchema); - for (int i = 1; i < nMerge; ++i) { - tRowMerge(&merger, merge[i]); - } - tRowMergerGetRow(&merger, ppRow); - tRowMergerClear(&merger); - } - } - - } while (1); - - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); - - return code; -_err: - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); - tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} +// static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, STSRow **ppRow) { +// int32_t code = 0; +// SArray *pSkyline = NULL; + +// STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); +// int16_t nCol = pTSchema->numOfCols; +// SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); + +// tb_uid_t suid = getTableSuidByUid(uid, pTsdb); + +// STbData *pMem = NULL; +// if (pTsdb->mem) { +// tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); +// } + +// STbData *pIMem = NULL; +// if (pTsdb->imem) { +// tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); +// } + +// *ppRow = NULL; + +// pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); + +// SDelIdx delIdx; + +// SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); +// if (pDelFile) { +// SDelFReader *pDelFReader; + +// code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); +// if (code) goto _err; + +// code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); +// if (code) goto _err; + +// code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); +// if (code) goto _err; + +// tsdbDelFReaderClose(&pDelFReader); +// } else { +// code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); +// if (code) goto _err; +// } + +// int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; + +// SBlockIdx idx = {.suid = suid, .uid = uid}; + +// SFSNextRowIter fsState = {0}; +// fsState.state = SFSNEXTROW_FS; +// fsState.pTsdb = pTsdb; +// fsState.pBlockIdxExp = &idx; + +// SMemNextRowIter memState = {0}; +// SMemNextRowIter imemState = {0}; +// TSDBROW memRow, imemRow, fsRow; + +// TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, +// {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, +// {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; + +// if (pMem) { +// memState.pMem = pMem; +// memState.state = SMEMNEXTROW_ENTER; +// input[0].stop = false; +// input[0].next = true; +// } +// if (pIMem) { +// imemState.pMem = pIMem; +// imemState.state = SMEMNEXTROW_ENTER; +// input[1].stop = false; +// input[1].next = true; +// } + +// int16_t nilColCount = nCol - 1; // count of null & none cols +// int iCol = 0; // index of first nil col index from left to right +// bool setICol = false; + +// do { +// for (int i = 0; i < 3; ++i) { +// if (input[i].next && !input[i].stop) { +// if (input[i].pRow == NULL) { +// code = input[i].nextRowFn(input[i].iter, &input[i].pRow); +// if (code) goto _err; + +// if (input[i].pRow == NULL) { +// input[i].stop = true; +// input[i].next = false; +// } +// } +// } +// } + +// if (input[0].stop && input[1].stop && input[2].stop) { +// break; +// } + +// // select maxpoint(s) from mem, imem, fs +// TSDBROW *max[3] = {0}; +// int iMax[3] = {-1, -1, -1}; +// int nMax = 0; +// TSKEY maxKey = TSKEY_MIN; + +// for (int i = 0; i < 3; ++i) { +// if (!input[i].stop && input[i].pRow != NULL) { +// TSDBKEY key = TSDBROW_KEY(input[i].pRow); + +// // merging & deduplicating on client side +// if (maxKey <= key.ts) { +// if (maxKey < key.ts) { +// nMax = 0; +// maxKey = key.ts; +// } + +// iMax[nMax] = i; +// max[nMax++] = input[i].pRow; +// } +// } +// } + +// // delete detection +// TSDBROW *merge[3] = {0}; +// int iMerge[3] = {-1, -1, -1}; +// int nMerge = 0; +// for (int i = 0; i < nMax; ++i) { +// TSDBKEY maxKey = TSDBROW_KEY(max[i]); + +// bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); +// if (!deleted) { +// iMerge[nMerge] = i; +// merge[nMerge++] = max[i]; +// } + +// input[iMax[i]].next = deleted; +// } + +// // merge if nMerge > 1 +// if (nMerge > 0) { +// *dup = false; + +// if (nMerge == 1) { +// code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); +// if (code) goto _err; +// } else { +// // merge 2 or 3 rows +// SRowMerger merger = {0}; + +// tRowMergerInit(&merger, merge[0], pTSchema); +// for (int i = 1; i < nMerge; ++i) { +// tRowMerge(&merger, merge[i]); +// } +// tRowMergerGetRow(&merger, ppRow); +// tRowMergerClear(&merger); +// } +// } + +// } while (1); + +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); + +// return code; +// _err: +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); +// tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); +// return code; +// } // static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, STSRow **ppRow) { -static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray) { - int32_t code = 0; - SArray *pSkyline = NULL; - STSRow *pRow = NULL; - STSRow **ppRow = &pRow; - - STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); - int16_t nCol = pTSchema->numOfCols; - // SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); - SArray *pColArray = taosArrayInit(nCol, sizeof(SLastCol)); - - tb_uid_t suid = getTableSuidByUid(uid, pTsdb); - - STbData *pMem = NULL; - if (pTsdb->mem) { - tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); - } - - STbData *pIMem = NULL; - if (pTsdb->imem) { - tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); - } - - *ppLastArray = NULL; - - pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); - - SDelIdx delIdx; - - SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); - if (pDelFile) { - SDelFReader *pDelFReader; - - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); - if (code) goto _err; - - code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); - if (code) goto _err; - - code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); - if (code) goto _err; - - tsdbDelFReaderClose(&pDelFReader); - } else { - code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); - if (code) goto _err; - } - - int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; - - SBlockIdx idx = {.suid = suid, .uid = uid}; - - SFSNextRowIter fsState = {0}; - fsState.state = SFSNEXTROW_FS; - fsState.pTsdb = pTsdb; - fsState.pBlockIdxExp = &idx; - - SMemNextRowIter memState = {0}; - SMemNextRowIter imemState = {0}; - TSDBROW memRow, imemRow, fsRow; - - TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, - {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, - {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; - - if (pMem) { - memState.pMem = pMem; - memState.state = SMEMNEXTROW_ENTER; - input[0].stop = false; - input[0].next = true; - } - if (pIMem) { - imemState.pMem = pIMem; - imemState.state = SMEMNEXTROW_ENTER; - input[1].stop = false; - input[1].next = true; - } - - int16_t nilColCount = nCol - 1; // count of null & none cols - int iCol = 0; // index of first nil col index from left to right - bool setICol = false; - - do { - for (int i = 0; i < 3; ++i) { - if (input[i].next && !input[i].stop) { - code = input[i].nextRowFn(input[i].iter, &input[i].pRow); - if (code) goto _err; - - if (input[i].pRow == NULL) { - input[i].stop = true; - input[i].next = false; - } - } - } - - if (input[0].stop && input[1].stop && input[2].stop) { - break; - } - - // select maxpoint(s) from mem, imem, fs - TSDBROW *max[3] = {0}; - int iMax[3] = {-1, -1, -1}; - int nMax = 0; - TSKEY maxKey = TSKEY_MIN; - - for (int i = 0; i < 3; ++i) { - if (!input[i].stop && input[i].pRow != NULL) { - TSDBKEY key = TSDBROW_KEY(input[i].pRow); - - // merging & deduplicating on client side - if (maxKey <= key.ts) { - if (maxKey < key.ts) { - nMax = 0; - maxKey = key.ts; - } - - iMax[nMax] = i; - max[nMax++] = input[i].pRow; - } - } - } - - // delete detection - TSDBROW *merge[3] = {0}; - int iMerge[3] = {-1, -1, -1}; - int nMerge = 0; - for (int i = 0; i < nMax; ++i) { - TSDBKEY maxKey = TSDBROW_KEY(max[i]); - - bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); - if (!deleted) { - iMerge[nMerge] = iMax[i]; - merge[nMerge++] = max[i]; - } - - input[iMax[i]].next = deleted; - } - - // merge if nMerge > 1 - if (nMerge > 0) { - if (nMerge == 1) { - code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); - if (code) goto _err; - } else { - // merge 2 or 3 rows - SRowMerger merger = {0}; - - tRowMergerInit(&merger, merge[0], pTSchema); - for (int i = 1; i < nMerge; ++i) { - tRowMerge(&merger, merge[i]); - } - tRowMergerGetRow(&merger, ppRow); - tRowMergerClear(&merger); - } - } else { - /* *ppRow = NULL; */ - /* return code; */ - continue; - } - - if (iCol == 0) { - STColumn *pTColumn = &pTSchema->columns[0]; - SColVal *pColVal = &(SColVal){0}; - - *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = maxKey}); - - // if (taosArrayPush(pColArray, pColVal) == NULL) { - if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - ++iCol; - - setICol = false; - for (int16_t i = iCol; i < nCol; ++i) { - // tsdbRowGetColVal(*ppRow, pTSchema, i, pColVal); - tTSRowGetVal(*ppRow, pTSchema, i, pColVal); - // if (taosArrayPush(pColArray, pColVal) == NULL) { - if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - if (pColVal->isNull || pColVal->isNone) { - for (int j = 0; j < nMerge; ++j) { - SColVal jColVal = {0}; - tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); - if (jColVal.isNull || jColVal.isNone) { - input[iMerge[j]].next = true; - } - } - if (!setICol) { - iCol = i; - setICol = true; - } - } else { - --nilColCount; - } - } - - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } - - continue; - } - - setICol = false; - for (int16_t i = iCol; i < nCol; ++i) { - SColVal colVal = {0}; - tTSRowGetVal(*ppRow, pTSchema, i, &colVal); - TSKEY rowTs = (*ppRow)->ts; - - // SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, i); - SLastCol *tTsVal = (SLastCol *)taosArrayGet(pColArray, i); - SColVal *tColVal = &tTsVal->colVal; - - if (!colVal.isNone && !colVal.isNull) { - if (tColVal->isNull || tColVal->isNone) { - // taosArraySet(pColArray, i, &colVal); - taosArraySet(pColArray, i, &(SLastCol){.ts = rowTs, .colVal = colVal}); - --nilColCount; - } - } else { - if ((tColVal->isNull || tColVal->isNone) && !setICol) { - iCol = i; - setICol = true; - - for (int j = 0; j < nMerge; ++j) { - SColVal jColVal = {0}; - tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); - if (jColVal.isNull || jColVal.isNone) { - input[iMerge[j]].next = true; - } - } - } - } - } - - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } - } while (nilColCount > 0); - - // if () new ts row from pColArray if non empty - /* if (taosArrayGetSize(pColArray) == nCol) { */ - /* code = tdSTSRowNew(pColArray, pTSchema, ppRow); */ - /* if (code) goto _err; */ - /* } */ - /* taosArrayDestroy(pColArray); */ - if (taosArrayGetSize(pColArray) <= 0) { - *ppLastArray = NULL; - taosArrayDestroy(pColArray); - } else { - *ppLastArray = pColArray; - } - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } - - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); - - return code; -_err: - taosArrayDestroy(pColArray); - if (*ppRow) { - taosMemoryFreeClear(*ppRow); - } - for (int i = 0; i < 3; ++i) { - if (input[i].nextRowClearFn) { - input[i].nextRowClearFn(input[i].iter); - } - } - if (pSkyline) { - taosArrayDestroy(pSkyline); - } - taosMemoryFreeClear(pTSchema); - tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} +// static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray) { +// int32_t code = 0; +// SArray *pSkyline = NULL; +// STSRow *pRow = NULL; +// STSRow **ppRow = &pRow; + +// STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1); +// int16_t nCol = pTSchema->numOfCols; +// // SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal)); +// SArray *pColArray = taosArrayInit(nCol, sizeof(SLastCol)); + +// tb_uid_t suid = getTableSuidByUid(uid, pTsdb); + +// STbData *pMem = NULL; +// if (pTsdb->mem) { +// tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem); +// } + +// STbData *pIMem = NULL; +// if (pTsdb->imem) { +// tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem); +// } + +// *ppLastArray = NULL; + +// pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); + +// SDelIdx delIdx; + +// SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); +// if (pDelFile) { +// SDelFReader *pDelFReader; + +// code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); +// if (code) goto _err; + +// code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); +// if (code) goto _err; + +// code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline); +// if (code) goto _err; + +// tsdbDelFReaderClose(&pDelFReader); +// } else { +// code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline); +// if (code) goto _err; +// } + +// int64_t iSkyline = taosArrayGetSize(pSkyline) - 1; + +// SBlockIdx idx = {.suid = suid, .uid = uid}; + +// SFSNextRowIter fsState = {0}; +// fsState.state = SFSNEXTROW_FS; +// fsState.pTsdb = pTsdb; +// fsState.pBlockIdxExp = &idx; + +// SMemNextRowIter memState = {0}; +// SMemNextRowIter imemState = {0}; +// TSDBROW memRow, imemRow, fsRow; + +// TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL}, +// {&imemRow, true, false, &imemState, getNextRowFromMem, NULL}, +// {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}}; + +// if (pMem) { +// memState.pMem = pMem; +// memState.state = SMEMNEXTROW_ENTER; +// input[0].stop = false; +// input[0].next = true; +// } +// if (pIMem) { +// imemState.pMem = pIMem; +// imemState.state = SMEMNEXTROW_ENTER; +// input[1].stop = false; +// input[1].next = true; +// } + +// int16_t nilColCount = nCol - 1; // count of null & none cols +// int iCol = 0; // index of first nil col index from left to right +// bool setICol = false; + +// do { +// for (int i = 0; i < 3; ++i) { +// if (input[i].next && !input[i].stop) { +// code = input[i].nextRowFn(input[i].iter, &input[i].pRow); +// if (code) goto _err; + +// if (input[i].pRow == NULL) { +// input[i].stop = true; +// input[i].next = false; +// } +// } +// } + +// if (input[0].stop && input[1].stop && input[2].stop) { +// break; +// } + +// // select maxpoint(s) from mem, imem, fs +// TSDBROW *max[3] = {0}; +// int iMax[3] = {-1, -1, -1}; +// int nMax = 0; +// TSKEY maxKey = TSKEY_MIN; + +// for (int i = 0; i < 3; ++i) { +// if (!input[i].stop && input[i].pRow != NULL) { +// TSDBKEY key = TSDBROW_KEY(input[i].pRow); + +// // merging & deduplicating on client side +// if (maxKey <= key.ts) { +// if (maxKey < key.ts) { +// nMax = 0; +// maxKey = key.ts; +// } + +// iMax[nMax] = i; +// max[nMax++] = input[i].pRow; +// } +// } +// } + +// // delete detection +// TSDBROW *merge[3] = {0}; +// int iMerge[3] = {-1, -1, -1}; +// int nMerge = 0; +// for (int i = 0; i < nMax; ++i) { +// TSDBKEY maxKey = TSDBROW_KEY(max[i]); + +// bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline); +// if (!deleted) { +// iMerge[nMerge] = iMax[i]; +// merge[nMerge++] = max[i]; +// } + +// input[iMax[i]].next = deleted; +// } + +// // merge if nMerge > 1 +// if (nMerge > 0) { +// if (nMerge == 1) { +// code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow); +// if (code) goto _err; +// } else { +// // merge 2 or 3 rows +// SRowMerger merger = {0}; + +// tRowMergerInit(&merger, merge[0], pTSchema); +// for (int i = 1; i < nMerge; ++i) { +// tRowMerge(&merger, merge[i]); +// } +// tRowMergerGetRow(&merger, ppRow); +// tRowMergerClear(&merger); +// } +// } else { +// /* *ppRow = NULL; */ +// /* return code; */ +// continue; +// } + +// if (iCol == 0) { +// STColumn *pTColumn = &pTSchema->columns[0]; +// SColVal *pColVal = &(SColVal){0}; + +// *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = maxKey}); + +// // if (taosArrayPush(pColArray, pColVal) == NULL) { +// if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { +// code = TSDB_CODE_OUT_OF_MEMORY; +// goto _err; +// } + +// ++iCol; + +// setICol = false; +// for (int16_t i = iCol; i < nCol; ++i) { +// // tsdbRowGetColVal(*ppRow, pTSchema, i, pColVal); +// tTSRowGetVal(*ppRow, pTSchema, i, pColVal); +// // if (taosArrayPush(pColArray, pColVal) == NULL) { +// if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) { +// code = TSDB_CODE_OUT_OF_MEMORY; +// goto _err; +// } + +// if (pColVal->isNull || pColVal->isNone) { +// for (int j = 0; j < nMerge; ++j) { +// SColVal jColVal = {0}; +// tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); +// if (jColVal.isNull || jColVal.isNone) { +// input[iMerge[j]].next = true; +// } +// } +// if (!setICol) { +// iCol = i; +// setICol = true; +// } +// } else { +// --nilColCount; +// } +// } + +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } + +// continue; +// } + +// setICol = false; +// for (int16_t i = iCol; i < nCol; ++i) { +// SColVal colVal = {0}; +// tTSRowGetVal(*ppRow, pTSchema, i, &colVal); +// TSKEY rowTs = (*ppRow)->ts; + +// // SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, i); +// SLastCol *tTsVal = (SLastCol *)taosArrayGet(pColArray, i); +// SColVal *tColVal = &tTsVal->colVal; + +// if (!colVal.isNone && !colVal.isNull) { +// if (tColVal->isNull || tColVal->isNone) { +// // taosArraySet(pColArray, i, &colVal); +// taosArraySet(pColArray, i, &(SLastCol){.ts = rowTs, .colVal = colVal}); +// --nilColCount; +// } +// } else { +// if ((tColVal->isNull || tColVal->isNone) && !setICol) { +// iCol = i; +// setICol = true; + +// for (int j = 0; j < nMerge; ++j) { +// SColVal jColVal = {0}; +// tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal); +// if (jColVal.isNull || jColVal.isNone) { +// input[iMerge[j]].next = true; +// } +// } +// } +// } +// } + +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } +// } while (nilColCount > 0); + +// // if () new ts row from pColArray if non empty +// /* if (taosArrayGetSize(pColArray) == nCol) { */ +// /* code = tdSTSRowNew(pColArray, pTSchema, ppRow); */ +// /* if (code) goto _err; */ +// /* } */ +// /* taosArrayDestroy(pColArray); */ +// if (taosArrayGetSize(pColArray) <= 0) { +// *ppLastArray = NULL; +// taosArrayDestroy(pColArray); +// } else { +// *ppLastArray = pColArray; +// } +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } + +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); + +// return code; +// _err: +// taosArrayDestroy(pColArray); +// if (*ppRow) { +// taosMemoryFreeClear(*ppRow); +// } +// for (int i = 0; i < 3; ++i) { +// if (input[i].nextRowClearFn) { +// input[i].nextRowClearFn(input[i].iter); +// } +// } +// if (pSkyline) { +// taosArrayDestroy(pSkyline); +// } +// taosMemoryFreeClear(pTSchema); +// tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); +// return code; +// } int32_t tsdbCacheGetLastrowH(SLRUCache *pCache, tb_uid_t uid, STsdb *pTsdb, LRUHandle **handle) { int32_t code = 0; diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index 13f310ae2721edc7567a34ddb4943c462cde4f42..194bd2e924351e26c6ed526286f01f6b67619087 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -29,6 +29,7 @@ typedef struct { int32_t minRow; int32_t maxRow; int8_t cmprAlg; + STsdbFS fs; // -------------- TSKEY nextKey; // reset by each table commit int32_t commitFid; @@ -119,9 +120,6 @@ int32_t tsdbCommit(STsdb *pTsdb) { code = tsdbCommitDel(&commith); if (code) goto _err; - code = tsdbCommitCache(&commith); - if (code) goto _err; - // end commit code = tsdbEndCommit(&commith, 0); if (code) goto _err; @@ -158,7 +156,7 @@ static int32_t tsdbCommitDelStart(SCommitter *pCommitter) { goto _err; } - SDelFile *pDelFileR = pTsdb->pFS->nState->pDelFile; + SDelFile *pDelFileR = pCommitter->fs.pDelFile; if (pDelFileR) { code = tsdbDelFReaderOpen(&pCommitter->pDelFReader, pDelFileR, pTsdb, NULL); if (code) goto _err; @@ -247,7 +245,7 @@ static int32_t tsdbCommitDelEnd(SCommitter *pCommitter) { code = tsdbUpdateDelFileHdr(pCommitter->pDelFWriter); if (code) goto _err; - code = tsdbFSStateUpsertDelFile(pTsdb->pFS->nState, &pCommitter->pDelFWriter->fDel); + code = tsdbFSUpsertDelFile(&pCommitter->fs, &pCommitter->pDelFWriter->fDel); if (code) goto _err; code = tsdbDelFWriterClose(&pCommitter->pDelFWriter, 1); @@ -273,7 +271,6 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { int32_t code = 0; STsdb *pTsdb = pCommitter->pTsdb; SDFileSet *pRSet = NULL; - SDFileSet wSet; // memory pCommitter->nextKey = TSKEY_MAX; @@ -282,7 +279,8 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { taosArrayClear(pCommitter->aBlockIdx); tMapDataReset(&pCommitter->oBlockMap); tBlockDataReset(&pCommitter->oBlockData); - pRSet = tsdbFSStateGetDFileSet(pTsdb->pFS->nState, pCommitter->commitFid, TD_EQ); + pRSet = (SDFileSet *)taosArraySearch(pCommitter->fs.aDFileSet, &(SDFileSet){.fid = pCommitter->commitFid}, + tDFileSetCmprFn, TD_EQ); if (pRSet) { code = tsdbDataFReaderOpen(&pCommitter->pReader, pTsdb, pRSet); if (code) goto _err; @@ -292,23 +290,29 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { } // new + SHeadFile fHead; + SDataFile fData; + SLastFile fLast; + SSmaFile fSma; + SDFileSet wSet = {.pHeadF = &fHead, .pDataF = &fData, .pLastF = &fLast, .pSmaF = &fSma}; + taosArrayClear(pCommitter->aBlockIdxN); tMapDataReset(&pCommitter->nBlockMap); tBlockDataReset(&pCommitter->nBlockData); if (pRSet) { - wSet = (SDFileSet){.diskId = pRSet->diskId, - .fid = pCommitter->commitFid, - .fHead = {.commitID = pCommitter->commitID, .offset = 0, .size = 0}, - .fData = pRSet->fData, - .fLast = {.commitID = pCommitter->commitID, .size = 0}, - .fSma = pRSet->fSma}; + wSet.diskId = pRSet->diskId; + wSet.fid = pCommitter->commitFid; + fHead = (SHeadFile){.commitID = pCommitter->commitID, .offset = 0, .size = 0}; + fData = *pRSet->pDataF; + fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0}; + fSma = *pRSet->pSmaF; } else { - wSet = (SDFileSet){.diskId = (SDiskID){.level = 0, .id = 0}, - .fid = pCommitter->commitFid, - .fHead = {.commitID = pCommitter->commitID, .offset = 0, .size = 0}, - .fData = {.commitID = pCommitter->commitID, .size = 0}, - .fLast = {.commitID = pCommitter->commitID, .size = 0}, - .fSma = {.commitID = pCommitter->commitID, .size = 0}}; + wSet.diskId = (SDiskID){.level = 0, .id = 0}; + wSet.fid = pCommitter->commitFid; + fHead = (SHeadFile){.commitID = pCommitter->commitID, .offset = 0, .size = 0}; + fData = (SDataFile){.commitID = pCommitter->commitID, .size = 0}; + fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0}; + fSma = (SSmaFile){.commitID = pCommitter->commitID, .size = 0}; } code = tsdbDataFWriterOpen(&pCommitter->pWriter, pTsdb, &wSet); if (code) goto _err; @@ -855,7 +859,7 @@ static int32_t tsdbCommitFileDataEnd(SCommitter *pCommitter) { if (code) goto _err; // upsert SDFileSet - code = tsdbFSStateUpsertDFileSet(pCommitter->pTsdb->pFS->nState, tsdbDataFWriterGetWSet(pCommitter->pWriter)); + code = tsdbFSUpsertFSet(&pCommitter->fs, &pCommitter->pWriter->wSet); if (code) goto _err; // close and sync @@ -973,7 +977,7 @@ static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter) { pCommitter->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; pCommitter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; - code = tsdbFSBegin(pTsdb->pFS); + code = tsdbFSCopy(pTsdb, &pCommitter->fs); if (code) goto _err; return code; @@ -1142,28 +1146,33 @@ _err: return code; } -static int32_t tsdbCommitCache(SCommitter *pCommitter) { - int32_t code = 0; - // TODO - return code; -} - static int32_t tsdbEndCommit(SCommitter *pCommitter, int32_t eno) { int32_t code = 0; STsdb *pTsdb = pCommitter->pTsdb; SMemTable *pMemTable = pTsdb->imem; - if (eno == 0) { - code = tsdbFSCommit(pTsdb->pFS); - } else { - code = tsdbFSRollback(pTsdb->pFS); - } + ASSERT(eno == 0); + + code = tsdbFSCommit1(pTsdb, &pCommitter->fs); + if (code) goto _err; + // lock taosThreadRwlockWrlock(&pTsdb->rwLock); + + // commit or rollback + code = tsdbFSCommit2(pTsdb, &pCommitter->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _err; + } + pTsdb->imem = NULL; + + // unlock taosThreadRwlockUnlock(&pTsdb->rwLock); tsdbUnrefMemTable(pMemTable); + tsdbFSDestroy(&pCommitter->fs); tsdbInfo("vgId:%d tsdb end commit", TD_VID(pTsdb->pVnode)); return code; diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index 3bc79621e10205f588f88f870c2a2973ed165105..b17e30d7c7cb5ebc36733d1abefe3fd276987f54 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -16,67 +16,41 @@ #include "tsdb.h" // ================================================================================================= -static int32_t tPutFSState(uint8_t *p, STsdbFSState *pState) { +static int32_t tsdbEncodeFS(uint8_t *p, STsdbFS *pFS) { int32_t n = 0; - int8_t hasDel = pState->pDelFile ? 1 : 0; - uint32_t nDFileSet = taosArrayGetSize(pState->aDFileSet); + int8_t hasDel = pFS->pDelFile ? 1 : 0; + uint32_t nSet = taosArrayGetSize(pFS->aDFileSet); // SDelFile n += tPutI8(p ? p + n : p, hasDel); if (hasDel) { - n += tPutDelFile(p ? p + n : p, pState->pDelFile); + n += tPutDelFile(p ? p + n : p, pFS->pDelFile); } // SArray - n += tPutU32v(p ? p + n : p, nDFileSet); - for (uint32_t iDFileSet = 0; iDFileSet < nDFileSet; iDFileSet++) { - n += tPutDFileSet(p ? p + n : p, (SDFileSet *)taosArrayGet(pState->aDFileSet, iDFileSet)); + n += tPutU32v(p ? p + n : p, nSet); + for (uint32_t iSet = 0; iSet < nSet; iSet++) { + n += tPutDFileSet(p ? p + n : p, (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet)); } return n; } -static int32_t tGetFSState(uint8_t *p, STsdbFSState *pState) { - int32_t n = 0; - int8_t hasDel; - uint32_t nDFileSet; - SDFileSet *pSet = &(SDFileSet){0}; - - // SDelFile - n += tGetI8(p + n, &hasDel); - if (hasDel) { - pState->pDelFile = &pState->delFile; - n += tGetDelFile(p + n, pState->pDelFile); - } else { - pState->pDelFile = NULL; - } - - // SArray - taosArrayClear(pState->aDFileSet); - n += tGetU32v(p + n, &nDFileSet); - for (uint32_t iDFileSet = 0; iDFileSet < nDFileSet; iDFileSet++) { - n += tGetDFileSet(p + n, pSet); - taosArrayPush(pState->aDFileSet, pSet); - } - - return n; -} - -static int32_t tsdbGnrtCurrent(const char *fname, STsdbFSState *pState) { +static int32_t tsdbGnrtCurrent(STsdb *pTsdb, STsdbFS *pFS, char *fname) { int32_t code = 0; int64_t n; int64_t size; - uint8_t *pData; + uint8_t *pData = NULL; TdFilePtr pFD = NULL; // to binary - size = tPutFSState(NULL, pState) + sizeof(TSCKSUM); + size = tsdbEncodeFS(NULL, pFS) + sizeof(TSCKSUM); pData = taosMemoryMalloc(size); if (pData == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - n = tPutFSState(pData, pState); + n = tsdbEncodeFS(pData, pFS); ASSERT(n + sizeof(TSCKSUM) == size); taosCalcChecksumAppend(0, pData, size); @@ -104,600 +78,1008 @@ static int32_t tsdbGnrtCurrent(const char *fname, STsdbFSState *pState) { return code; _err: - tsdbError("tsdb gnrt current failed since %s", tstrerror(code)); + tsdbError("vgId:%d tsdb gnrt current failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); if (pData) taosMemoryFree(pData); return code; } -static int32_t tsdbLoadCurrentState(STsdbFS *pFS, STsdbFSState *pState) { - int32_t code = 0; - int64_t size; - int64_t n; - char fname[TSDB_FILENAME_LEN]; - uint8_t *pData = NULL; - TdFilePtr pFD; +// static int32_t tsdbApplyDFileSetChange(STsdbFS *pFS, SDFileSet *pFrom, SDFileSet *pTo) { +// int32_t code = 0; +// char fname[TSDB_FILENAME_LEN]; + +// if (pFrom && pTo) { +// bool isSameDisk = (pFrom->diskId.level == pTo->diskId.level) && (pFrom->diskId.id == pTo->diskId.id); + +// // head +// if (isSameDisk && pFrom->pHeadF->commitID == pTo->pHeadF->commitID) { +// ASSERT(pFrom->pHeadF->size == pTo->pHeadF->size); +// ASSERT(pFrom->pHeadF->offset == pTo->pHeadF->offset); +// } else { +// tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); +// taosRemoveFile(fname); +// } + +// // data +// if (isSameDisk && pFrom->pDataF->commitID == pTo->pDataF->commitID) { +// if (pFrom->pDataF->size > pTo->pDataF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_DATA_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); +// taosRemoveFile(fname); +// } + +// // last +// if (isSameDisk && pFrom->pLastF->commitID == pTo->pLastF->commitID) { +// if (pFrom->pLastF->size > pTo->pLastF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); +// taosRemoveFile(fname); +// } + +// // sma +// if (isSameDisk && pFrom->pSmaF->commitID == pTo->pSmaF->commitID) { +// if (pFrom->pSmaF->size > pTo->pSmaF->size) { +// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_SMA_FILE); +// if (code) goto _err; +// } +// } else { +// tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); +// taosRemoveFile(fname); +// } +// } else if (pFrom) { +// // head +// tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname); +// taosRemoveFile(fname); + +// // data +// tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname); +// taosRemoveFile(fname); + +// // last +// tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname); +// taosRemoveFile(fname); + +// // fsm +// tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname); +// taosRemoveFile(fname); +// } + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb apply disk file set change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +// static int32_t tsdbApplyDelFileChange(STsdbFS *pFS, SDelFile *pFrom, SDelFile *pTo) { +// int32_t code = 0; +// char fname[TSDB_FILENAME_LEN]; + +// if (pFrom && pTo) { +// if (!tsdbDelFileIsSame(pFrom, pTo)) { +// tsdbDelFileName(pFS->pTsdb, pFrom, fname); +// if (taosRemoveFile(fname) < 0) { +// code = TAOS_SYSTEM_ERROR(errno); +// goto _err; +// } +// } +// } else if (pFrom) { +// tsdbDelFileName(pFS->pTsdb, pFrom, fname); +// if (taosRemoveFile(fname) < 0) { +// code = TAOS_SYSTEM_ERROR(errno); +// goto _err; +// } +// } else { +// // do nothing +// } + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb apply del file change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +// static int32_t tsdbFSApplyDiskChange(STsdbFS *pFS, STsdbFSState *pFrom, STsdbFSState *pTo) { +// int32_t code = 0; +// int32_t iFrom = 0; +// int32_t nFrom = taosArrayGetSize(pFrom->aDFileSet); +// int32_t iTo = 0; +// int32_t nTo = taosArrayGetSize(pTo->aDFileSet); +// SDFileSet *pDFileSetFrom; +// SDFileSet *pDFileSetTo; + +// // SDelFile +// code = tsdbApplyDelFileChange(pFS, pFrom->pDelFile, pTo->pDelFile); +// if (code) goto _err; + +// // SDFileSet +// while (iFrom < nFrom && iTo < nTo) { +// pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); +// pDFileSetTo = (SDFileSet *)taosArrayGet(pTo->aDFileSet, iTo); + +// if (pDFileSetFrom->fid == pDFileSetTo->fid) { +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, pDFileSetTo); +// if (code) goto _err; + +// iFrom++; +// iTo++; +// } else if (pDFileSetFrom->fid < pDFileSetTo->fid) { +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); +// if (code) goto _err; + +// iFrom++; +// } else { +// iTo++; +// } +// } + +// while (iFrom < nFrom) { +// pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); +// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); +// if (code) goto _err; + +// iFrom++; +// } + +// #if 0 +// // do noting +// while (iTo < nTo) { +// pDFileSetTo = (SDFileSet *)taosArrayGetP(pTo->aDFileSet, iTo); +// code = tsdbApplyDFileSetChange(pFS, NULL, pDFileSetTo); +// if (code) goto _err; + +// iTo++; +// } +// #endif + +// return code; + +// _err: +// tsdbError("vgId:%d tsdb fs apply disk change failed sicne %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); +// return code; +// } + +void tsdbFSDestroy(STsdbFS *pFS) { + if (pFS->pDelFile) { + taosMemoryFree(pFS->pDelFile); + } - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); + for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet); + taosMemoryFree(pSet->pHeadF); + taosMemoryFree(pSet->pDataF); + taosMemoryFree(pSet->pLastF); + taosMemoryFree(pSet->pSmaF); + } - if (!taosCheckExistFile(fname)) { - // create an empry CURRENT file if not exists - code = tsdbGnrtCurrent(fname, pState); - if (code) goto _err; - } else { - // open the file and load - pFD = taosOpenFile(fname, TD_FILE_READ); - if (pFD == NULL) { + taosArrayDestroy(pFS->aDFileSet); +} + +static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { + int32_t code = 0; + int64_t size; + char fname[TSDB_FILENAME_LEN]; + + // SDelFile + if (pTsdb->fs.pDelFile) { + tsdbDelFileName(pTsdb, pTsdb->fs.pDelFile, fname); + if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (taosFStatFile(pFD, &size, NULL) < 0) { - code = TAOS_SYSTEM_ERROR(errno); + if (size != pTsdb->fs.pDelFile->size) { + code = TSDB_CODE_FILE_CORRUPTED; goto _err; } + } - pData = taosMemoryMalloc(size); - if (pData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; + // SArray + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + + // head ========= + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); + if (taosStatFile(fname, &size, NULL)) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + if (size != pSet->pHeadF->size) { + code = TSDB_CODE_FILE_CORRUPTED; goto _err; } - n = taosReadFile(pFD, pData, size); - if (n < 0) { + // data ========= + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); + if (taosStatFile(fname, &size, NULL)) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } + if (size < pSet->pDataF->size) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _err; + } else if (size > pSet->pDataF->size) { + code = tsdbDFileRollback(pTsdb, pSet, TSDB_DATA_FILE); + if (code) goto _err; + } - if (!taosCheckChecksumWhole(pData, size)) { + // last =========== + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); + if (taosStatFile(fname, &size, NULL)) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + if (size != pSet->pLastF->size) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } - taosCloseFile(&pFD); + // sma ============= + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); + if (taosStatFile(fname, &size, NULL)) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + if (size < pSet->pSmaF->size) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _err; + } else if (size > pSet->pSmaF->size) { + code = tsdbDFileRollback(pTsdb, pSet, TSDB_SMA_FILE); + if (code) goto _err; + } + } - // decode - tGetFSState(pData, pState); + { + // remove those invalid files (todo) } - if (pData) taosMemoryFree(pData); return code; _err: - tsdbError("vgId:%d tsdb load current state failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - if (pData) taosMemoryFree(pData); + tsdbError("vgId:%d tsdb scan and try fix fs failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } -static int32_t tsdbApplyDFileSetChange(STsdbFS *pFS, SDFileSet *pFrom, SDFileSet *pTo) { - int32_t code = 0; - char fname[TSDB_FILENAME_LEN]; +int32_t tDFileSetCmprFn(const void *p1, const void *p2) { + if (((SDFileSet *)p1)->fid < ((SDFileSet *)p2)->fid) { + return -1; + } else if (((SDFileSet *)p1)->fid > ((SDFileSet *)p2)->fid) { + return 1; + } + + return 0; +} + +static int32_t tsdbRecoverFS(STsdb *pTsdb, uint8_t *pData, int64_t nData) { + int32_t code = 0; + int8_t hasDel; + uint32_t nSet; + int32_t n; + + // SDelFile + n = 0; + n += tGetI8(pData + n, &hasDel); + if (hasDel) { + pTsdb->fs.pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pTsdb->fs.pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + pTsdb->fs.pDelFile->nRef = 1; + n += tGetDelFile(pData + n, pTsdb->fs.pDelFile); + } else { + pTsdb->fs.pDelFile = NULL; + } + + // SArray + taosArrayClear(pTsdb->fs.aDFileSet); + n += tGetU32v(pData + n, &nSet); + for (uint32_t iSet = 0; iSet < nSet; iSet++) { + SDFileSet fSet; - if (pFrom && pTo) { // head - if (tsdbFileIsSame(pFrom, pTo, TSDB_HEAD_FILE)) { - ASSERT(pFrom->fHead.size == pTo->fHead.size); - ASSERT(pFrom->fHead.offset == pTo->fHead.offset); - } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_HEAD_FILE, fname); - taosRemoveFile(fname); + fSet.pHeadF = (SHeadFile *)taosMemoryCalloc(1, sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; } + fSet.pHeadF->nRef = 1; // data - if (tsdbFileIsSame(pFrom, pTo, TSDB_DATA_FILE)) { - if (pFrom->fData.size > pTo->fData.size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_DATA_FILE); - if (code) goto _err; - } - } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_DATA_FILE, fname); - taosRemoveFile(fname); + fSet.pDataF = (SDataFile *)taosMemoryCalloc(1, sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; } + fSet.pDataF->nRef = 1; // last - if (tsdbFileIsSame(pFrom, pTo, TSDB_LAST_FILE)) { - if (pFrom->fLast.size > pTo->fLast.size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE); - if (code) goto _err; - } - } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_LAST_FILE, fname); - taosRemoveFile(fname); + fSet.pLastF = (SLastFile *)taosMemoryCalloc(1, sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; } + fSet.pLastF->nRef = 1; // sma - if (tsdbFileIsSame(pFrom, pTo, TSDB_SMA_FILE)) { - if (pFrom->fSma.size > pTo->fSma.size) { - code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_SMA_FILE); - if (code) goto _err; - } - } else { - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_SMA_FILE, fname); - taosRemoveFile(fname); + fSet.pSmaF = (SSmaFile *)taosMemoryCalloc(1, sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; } - } else if (pFrom) { - // head - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_HEAD_FILE, fname); - taosRemoveFile(fname); - - // data - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_DATA_FILE, fname); - taosRemoveFile(fname); + fSet.pSmaF->nRef = 1; - // last - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_LAST_FILE, fname); - taosRemoveFile(fname); + n += tGetDFileSet(pData + n, &fSet); - // fsm - tsdbDataFileName(pFS->pTsdb, pFrom, TSDB_SMA_FILE, fname); - taosRemoveFile(fname); + if (taosArrayPush(pTsdb->fs.aDFileSet, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } } + ASSERT(n + sizeof(TSCKSUM) == nData); return code; _err: - tsdbError("vgId:%d tsdb apply disk file set change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); return code; } -static int32_t tsdbApplyDelFileChange(STsdbFS *pFS, SDelFile *pFrom, SDelFile *pTo) { +// EXPOSED APIS ==================================================================================== +int32_t tsdbFSOpen(STsdb *pTsdb) { int32_t code = 0; - char fname[TSDB_FILENAME_LEN]; - if (pFrom && pTo) { - if (!tsdbDelFileIsSame(pFrom, pTo)) { - tsdbDelFileName(pFS->pTsdb, pFrom, fname); - if (taosRemoveFile(fname) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - } - } else if (pFrom) { - tsdbDelFileName(pFS->pTsdb, pFrom, fname); - if (taosRemoveFile(fname) < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - } else { - // do nothing + // open handle + pTsdb->fs.pDelFile = NULL; + pTsdb->fs.aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); + if (pTsdb->fs.aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; } - return code; + // load fs or keep empty + char fname[TSDB_FILENAME_LEN]; -_err: - tsdbError("vgId:%d tsdb apply del file change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbFSApplyDiskChange(STsdbFS *pFS, STsdbFSState *pFrom, STsdbFSState *pTo) { - int32_t code = 0; - int32_t iFrom = 0; - int32_t nFrom = taosArrayGetSize(pFrom->aDFileSet); - int32_t iTo = 0; - int32_t nTo = taosArrayGetSize(pTo->aDFileSet); - SDFileSet *pDFileSetFrom; - SDFileSet *pDFileSetTo; - - // SDelFile - code = tsdbApplyDelFileChange(pFS, pFrom->pDelFile, pTo->pDelFile); - if (code) goto _err; + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); - // SDFileSet - while (iFrom < nFrom && iTo < nTo) { - pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); - pDFileSetTo = (SDFileSet *)taosArrayGet(pTo->aDFileSet, iTo); + if (!taosCheckExistFile(fname)) { + // empty one + code = tsdbGnrtCurrent(pTsdb, &pTsdb->fs, fname); + if (code) goto _err; + } else { + // read + TdFilePtr pFD = taosOpenFile(fname, TD_FILE_READ); + if (pFD == NULL) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } - if (pDFileSetFrom->fid == pDFileSetTo->fid) { - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, pDFileSetTo); - if (code) goto _err; + int64_t size; + if (taosFStatFile(pFD, &size, NULL) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosCloseFile(&pFD); + goto _err; + } - iFrom++; - iTo++; - } else if (pDFileSetFrom->fid < pDFileSetTo->fid) { - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); - if (code) goto _err; + uint8_t *pData = taosMemoryMalloc(size); + if (pData == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + taosCloseFile(&pFD); + goto _err; + } - iFrom++; - } else { - iTo++; + int64_t n = taosReadFile(pFD, pData, size); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosMemoryFree(pData); + taosCloseFile(&pFD); + goto _err; } - } - while (iFrom < nFrom) { - pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom); - code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL); - if (code) goto _err; + if (!taosCheckChecksumWhole(pData, size)) { + code = TSDB_CODE_FILE_CORRUPTED; + taosMemoryFree(pData); + taosCloseFile(&pFD); + goto _err; + } - iFrom++; - } + taosCloseFile(&pFD); -#if 0 - // do noting - while (iTo < nTo) { - pDFileSetTo = (SDFileSet *)taosArrayGetP(pTo->aDFileSet, iTo); - code = tsdbApplyDFileSetChange(pFS, NULL, pDFileSetTo); - if (code) goto _err; + // recover fs + code = tsdbRecoverFS(pTsdb, pData, size); + if (code) { + taosMemoryFree(pData); + goto _err; + } - iTo++; + taosMemoryFree(pData); } -#endif + + // scan and fix FS + code = tsdbScanAndTryFixFS(pTsdb); + if (code) goto _err; return code; _err: - tsdbError("vgId:%d tsdb fs apply disk change failed sicne %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); + tsdbError("vgId:%d tsdb fs open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } -static void tsdbFSDestroy(STsdbFS *pFS) { - if (pFS) { - if (pFS->nState) { - taosArrayDestroy(pFS->nState->aDFileSet); - taosMemoryFree(pFS->nState); - } - - if (pFS->cState) { - taosArrayDestroy(pFS->cState->aDFileSet); - taosMemoryFree(pFS->cState); - } +int32_t tsdbFSClose(STsdb *pTsdb) { + int32_t code = 0; - taosThreadRwlockDestroy(&pFS->lock); - taosMemoryFree(pFS); + if (pTsdb->fs.pDelFile) { + ASSERT(pTsdb->fs.pDelFile->nRef == 1); + taosMemoryFree(pTsdb->fs.pDelFile); } - // TODO -} -static int32_t tsdbFSCreate(STsdb *pTsdb, STsdbFS **ppFS) { - int32_t code = 0; - STsdbFS *pFS = NULL; + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); - pFS = (STsdbFS *)taosMemoryCalloc(1, sizeof(*pFS)); - if (pFS == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->pTsdb = pTsdb; - - code = taosThreadRwlockInit(&pFS->lock, NULL); - if (code) { - taosMemoryFree(pFS); - code = TAOS_SYSTEM_ERROR(code); - goto _err; - } + // head + ASSERT(pSet->pHeadF->nRef == 1); + taosMemoryFree(pSet->pHeadF); - pFS->inTxn = 0; + // data + ASSERT(pSet->pDataF->nRef == 1); + taosMemoryFree(pSet->pDataF); - pFS->cState = (STsdbFSState *)taosMemoryCalloc(1, sizeof(STsdbFSState)); - if (pFS->cState == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->cState->aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); - if (pFS->cState->aDFileSet == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } + // last + ASSERT(pSet->pLastF->nRef == 1); + taosMemoryFree(pSet->pLastF); - pFS->nState = (STsdbFSState *)taosMemoryCalloc(1, sizeof(STsdbFSState)); - if (pFS->nState == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - pFS->nState->aDFileSet = taosArrayInit(0, sizeof(SDFileSet)); - if (pFS->nState->aDFileSet == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; + // sma + ASSERT(pSet->pSmaF->nRef == 1); + taosMemoryFree(pSet->pSmaF); } - *ppFS = pFS; - return code; + taosArrayDestroy(pTsdb->fs.aDFileSet); -_err: - tsdbError("vgId:%d tsdb fs create failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - tsdbFSDestroy(pFS); - *ppFS = NULL; return code; } -static int32_t tsdbScanAndTryFixFS(STsdbFS *pFS, int8_t deepScan) { - int32_t code = 0; - STsdb *pTsdb = pFS->pTsdb; - STfs *pTfs = pTsdb->pVnode->pTfs; - int64_t size; - char fname[TSDB_FILENAME_LEN]; - char pHdr[TSDB_FHDR_SIZE]; - TdFilePtr pFD; +int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) { + int32_t code = 0; - // SDelFile - if (pFS->cState->pDelFile) { - tsdbDelFileName(pTsdb, pFS->cState->pDelFile, fname); - if (taosStatFile(fname, &size, NULL)) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + pFS->pDelFile = NULL; + pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet)); + if (pFS->aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } - if (size != pFS->cState->pDelFile->size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; + if (pTsdb->fs.pDelFile) { + pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pFS->pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } - if (deepScan) { - // TODO - } + *pFS->pDelFile = *pTsdb->fs.pDelFile; } - // SArray - for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->cState->aDFileSet); iSet++) { - SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->cState->aDFileSet, iSet); + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid}; - // head ========= - tsdbDataFileName(pTsdb, pDFileSet, TSDB_HEAD_FILE, fname); - if (taosStatFile(fname, &size, NULL)) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } + fSet.pHeadF->nRef = 0; + fSet.pHeadF->commitID = pSet->pHeadF->commitID; + fSet.pHeadF->size = pSet->pHeadF->size; + fSet.pHeadF->offset = pSet->pHeadF->offset; - if (deepScan) { - // TODO + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } + fSet.pDataF->nRef = 0; + fSet.pDataF->commitID = pSet->pDataF->commitID; + fSet.pDataF->size = pSet->pDataF->size; - // data ========= - tsdbDataFileName(pTsdb, pDFileSet, TSDB_DATA_FILE, fname); - if (taosStatFile(fname, &size, NULL)) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; + // data + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } + fSet.pLastF->nRef = 0; + fSet.pLastF->commitID = pSet->pLastF->commitID; + fSet.pLastF->size = pSet->pLastF->size; - if (size < pDFileSet->fData.size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } else if (size > pDFileSet->fData.size) { - ASSERT(0); - // need to rollback the file + // last + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } + fSet.pSmaF->nRef = 0; + fSet.pSmaF->commitID = pSet->pSmaF->commitID; + fSet.pSmaF->size = pSet->pSmaF->size; - if (deepScan) { - // TODO + if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } + } - // last =========== - tsdbDataFileName(pTsdb, pDFileSet, TSDB_LAST_FILE, fname); - if (taosStatFile(fname, &size, NULL)) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } +_exit: + return code; +} - if (size < pDFileSet->fLast.size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } else if (size > pDFileSet->fLast.size) { - ASSERT(0); - // need to rollback the file - } +int32_t tsdbFSRollback(STsdbFS *pFS) { + int32_t code = 0; - if (deepScan) { - // TODO - } + ASSERT(0); - // sma ============= - tsdbDataFileName(pTsdb, pDFileSet, TSDB_SMA_FILE, fname); - if (taosStatFile(fname, &size, NULL)) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + return code; - if (size < pDFileSet->fSma.size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } else if (size > pDFileSet->fSma.size) { - ASSERT(0); - // need to rollback the file - } +_err: + return code; +} - if (deepScan) { - // TODO +int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile) { + int32_t code = 0; + + if (pFS->pDelFile == NULL) { + pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pFS->pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } } + *pFS->pDelFile = *pDelFile; - // remove those invalid files (todo) -#if 0 - STfsDir *tdir; - const STfsFile *pf; +_exit: + return code; +} - tdir = tfsOpendir(pTfs, pTsdb->path); - if (tdir == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; +int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet) { + int32_t code = 0; + int32_t idx = taosArraySearchIdx(pFS->aDFileSet, pSet, tDFileSetCmprFn, TD_GE); + + if (idx < 0) { + idx = taosArrayGetSize(pFS->aDFileSet); + } else { + SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, idx); + int32_t c = tDFileSetCmprFn(pSet, pDFileSet); + if (c == 0) { + *pDFileSet->pHeadF = *pSet->pHeadF; + *pDFileSet->pDataF = *pSet->pDataF; + *pDFileSet->pLastF = *pSet->pLastF; + *pDFileSet->pSmaF = *pSet->pSmaF; + + goto _exit; + } } - while ((pf = tfsReaddir(tdir))) { - tfsBasename(pf, fname); + SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid}; + + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } + *fSet.pHeadF = *pSet->pHeadF; - tfsClosedir(tdir); -#endif + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pDataF = *pSet->pDataF; - return code; + // data + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pLastF = *pSet->pLastF; -_err: - tsdbError("vgId:%d tsdb scan and try fix fs failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} + // last + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + *fSet.pSmaF = *pSet->pSmaF; -static int32_t tDFileSetCmprFn(const void *p1, const void *p2) { - if (((SDFileSet *)p1)->fid < ((SDFileSet *)p2)->fid) { - return -1; - } else if (((SDFileSet *)p1)->fid > ((SDFileSet *)p2)->fid) { - return 1; + if (taosArrayInsert(pFS->aDFileSet, idx, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; } - return 0; +_exit: + return code; } -// EXPOSED APIS ==================================================================================== -int32_t tsdbFSOpen(STsdb *pTsdb, STsdbFS **ppFS) { +int32_t tsdbFSCommit1(STsdb *pTsdb, STsdbFS *pFSNew) { int32_t code = 0; + char tfname[TSDB_FILENAME_LEN]; + char fname[TSDB_FILENAME_LEN]; - // create handle - code = tsdbFSCreate(pTsdb, ppFS); - if (code) goto _err; + snprintf(tfname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT.t", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP, + pTsdb->path, TD_DIRSEP); - // load current state - code = tsdbLoadCurrentState(*ppFS, (*ppFS)->cState); - if (code) { - tsdbFSDestroy(*ppFS); - goto _err; - } + // gnrt CURRENT.t + code = tsdbGnrtCurrent(pTsdb, pFSNew, tfname); + if (code) goto _err; - // scan and fix FS - code = tsdbScanAndTryFixFS(*ppFS, 0); + // rename + code = taosRenameFile(tfname, fname); if (code) { - tsdbFSDestroy(*ppFS); + code = TAOS_SYSTEM_ERROR(code); goto _err; } return code; _err: - *ppFS = NULL; - tsdbError("vgId:%d tsdb fs open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + tsdbError("vgId:%d tsdb fs commit phase 1 failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } -int32_t tsdbFSClose(STsdbFS *pFS) { +int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFSNew) { int32_t code = 0; - tsdbFSDestroy(pFS); - return code; -} + int32_t nRef; + char fname[TSDB_FILENAME_LEN]; -int32_t tsdbFSBegin(STsdbFS *pFS) { - int32_t code = 0; + // del + if (pFSNew->pDelFile) { + SDelFile *pDelFile = pTsdb->fs.pDelFile; - ASSERT(!pFS->inTxn); + if (pDelFile == NULL || (pDelFile->commitID != pFSNew->pDelFile->commitID)) { + pTsdb->fs.pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile)); + if (pTsdb->fs.pDelFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } - // SDelFile - pFS->nState->pDelFile = NULL; - if (pFS->cState->pDelFile) { - pFS->nState->delFile = pFS->cState->delFile; - pFS->nState->pDelFile = &pFS->nState->delFile; + *pTsdb->fs.pDelFile = *pFSNew->pDelFile; + pTsdb->fs.pDelFile->nRef = 1; + + if (pDelFile) { + nRef = atomic_sub_fetch_32(&pDelFile->nRef, 1); + if (nRef == 0) { + tsdbDelFileName(pTsdb, pDelFile, fname); + taosRemoveFile(fname); + taosMemoryFree(pDelFile); + } + } + } + } else { + ASSERT(pTsdb->fs.pDelFile == NULL); } - // SArray - taosArrayClear(pFS->nState->aDFileSet); - for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->cState->aDFileSet); iSet++) { - SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->cState->aDFileSet, iSet); + // data + int32_t iOld = 0; + int32_t iNew = 0; + while (true) { + int32_t nOld = taosArrayGetSize(pTsdb->fs.aDFileSet); + int32_t nNew = taosArrayGetSize(pFSNew->aDFileSet); + SDFileSet fSet; + int8_t sameDisk; + + if (iOld >= nOld && iNew >= nNew) break; + + SDFileSet *pSetOld = (iOld < nOld) ? taosArrayGet(pTsdb->fs.aDFileSet, iOld) : NULL; + SDFileSet *pSetNew = (iNew < nNew) ? taosArrayGet(pFSNew->aDFileSet, iNew) : NULL; + + if (pSetOld && pSetNew) { + if (pSetOld->fid == pSetNew->fid) { + goto _merge_old_and_new; + } else if (pSetOld->fid < pSetNew->fid) { + goto _remove_old; + } else { + goto _add_new; + } + } else if (pSetOld) { + goto _remove_old; + } else { + goto _add_new; + } - if (taosArrayPush(pFS->nState->aDFileSet, pDFileSet) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; + _merge_old_and_new: + sameDisk = ((pSetOld->diskId.level == pSetNew->diskId.level) && (pSetOld->diskId.id == pSetNew->diskId.id)); + + // head + fSet.pHeadF = pSetOld->pHeadF; + if ((!sameDisk) || (pSetOld->pHeadF->commitID != pSetNew->pHeadF->commitID)) { + pSetOld->pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (pSetOld->pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pHeadF = *pSetNew->pHeadF; + pSetOld->pHeadF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pHeadF->nRef, 1); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pHeadF); + } + } else { + ASSERT(fSet.pHeadF->size == pSetNew->pHeadF->size); + ASSERT(fSet.pHeadF->offset == pSetNew->pHeadF->offset); } - } - pFS->inTxn = 1; - return code; + // data + fSet.pDataF = pSetOld->pDataF; + if ((!sameDisk) || (pSetOld->pDataF->commitID != pSetNew->pDataF->commitID)) { + pSetOld->pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (pSetOld->pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pDataF = *pSetNew->pDataF; + pSetOld->pDataF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pDataF->nRef, 1); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pDataF); + } + } else { + ASSERT(pSetOld->pDataF->size <= pSetNew->pDataF->size); + pSetOld->pDataF->size = pSetNew->pDataF->size; + } -_err: - tsdbError("vgId:%d tsdb fs begin failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} + // last + fSet.pLastF = pSetOld->pLastF; + if ((!sameDisk) || (pSetOld->pLastF->commitID != pSetNew->pLastF->commitID)) { + pSetOld->pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (pSetOld->pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pLastF = *pSetNew->pLastF; + pSetOld->pLastF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pLastF->nRef, 1); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pLastF); + } + } else { + ASSERT(pSetOld->pLastF->size == pSetNew->pLastF->size); + } -int32_t tsdbFSCommit(STsdbFS *pFS) { - int32_t code = 0; - STsdbFSState *pState = pFS->nState; - char tfname[TSDB_FILENAME_LEN]; - char fname[TSDB_FILENAME_LEN]; + // sma + fSet.pSmaF = pSetOld->pSmaF; + if ((!sameDisk) || (pSetOld->pSmaF->commitID != pSetNew->pSmaF->commitID)) { + pSetOld->pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (pSetOld->pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *pSetOld->pSmaF = *pSetNew->pSmaF; + pSetOld->pSmaF->nRef = 1; + + nRef = atomic_sub_fetch_32(&fSet.pSmaF->nRef, 1); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(fSet.pSmaF); + } + } else { + ASSERT(pSetOld->pSmaF->size <= pSetNew->pSmaF->size); + pSetOld->pSmaF->size = pSetNew->pSmaF->size; + } - // need lock (todo) - pFS->nState = pFS->cState; - pFS->cState = pState; + if (!sameDisk) { + pSetOld->diskId = pSetNew->diskId; + } - snprintf(tfname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT.t", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP, - pFS->pTsdb->path, TD_DIRSEP); + iOld++; + iNew++; + continue; - // gnrt CURRENT.t - code = tsdbGnrtCurrent(tfname, pFS->cState); - if (code) goto _err; + _remove_old: + nRef = atomic_sub_fetch_32(&pSetOld->pHeadF->nRef, 1); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pHeadF); + } - // rename - code = taosRenameFile(tfname, fname); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - goto _err; - } + nRef = atomic_sub_fetch_32(&pSetOld->pDataF->nRef, 1); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pDataF); + } - // apply commit on disk - code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState); - if (code) goto _err; + nRef = atomic_sub_fetch_32(&pSetOld->pLastF->nRef, 1); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pLastF); + } - pFS->inTxn = 0; + nRef = atomic_sub_fetch_32(&pSetOld->pSmaF->nRef, 1); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSetOld->pSmaF); + } - return code; + taosArrayRemove(pTsdb->fs.aDFileSet, iOld); + continue; -_err: - tsdbError("vgId:%d tsdb fs commit failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); - return code; -} + _add_new: + fSet.diskId = pSetNew->diskId; + fSet.fid = pSetNew->fid; -int32_t tsdbFSRollback(STsdbFS *pFS) { - int32_t code = 0; + // head + fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile)); + if (fSet.pHeadF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pHeadF = *pSetNew->pHeadF; + fSet.pHeadF->nRef = 1; - code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState); - if (code) goto _err; + // data + fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); + if (fSet.pDataF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pDataF = *pSetNew->pDataF; + fSet.pDataF->nRef = 1; + + // last + fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); + if (fSet.pLastF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pLastF = *pSetNew->pLastF; + fSet.pLastF->nRef = 1; - pFS->inTxn = 0; + // sma + fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); + if (fSet.pSmaF == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + *fSet.pSmaF = *pSetNew->pSmaF; + fSet.pSmaF->nRef = 1; + + if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + iOld++; + iNew++; + continue; + } return code; _err: - tsdbError("vgId:%d tsdb fs rollback failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code)); + tsdbError("vgId:%d tsdb fs commit phase 2 failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } -int32_t tsdbFSStateUpsertDelFile(STsdbFSState *pState, SDelFile *pDelFile) { +int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS) { int32_t code = 0; - pState->delFile = *pDelFile; - pState->pDelFile = &pState->delFile; - return code; -} + int32_t nRef; -int32_t tsdbFSStateUpsertDFileSet(STsdbFSState *pState, SDFileSet *pSet) { - int32_t code = 0; - int32_t idx = taosArraySearchIdx(pState->aDFileSet, pSet, tDFileSetCmprFn, TD_GE); + pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet)); + if (pFS->aDFileSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } - if (idx < 0) { - if (taosArrayPush(pState->aDFileSet, pSet) == NULL) { + pFS->pDelFile = pTsdb->fs.pDelFile; + if (pFS->pDelFile) { + nRef = atomic_fetch_add_32(&pFS->pDelFile->nRef, 1); + ASSERT(nRef > 0); + } + + SDFileSet fSet; + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + fSet = *pSet; + + nRef = atomic_fetch_add_32(&pSet->pHeadF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pDataF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pLastF->nRef, 1); + ASSERT(nRef > 0); + + nRef = atomic_fetch_add_32(&pSet->pSmaF->nRef, 1); + ASSERT(nRef > 0); + + if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } - } else { - SDFileSet *tDFileSet = (SDFileSet *)taosArrayGet(pState->aDFileSet, idx); - int32_t c = tDFileSetCmprFn(pSet, tDFileSet); - if (c == 0) { - taosArraySet(pState->aDFileSet, idx, pSet); - } else { - if (taosArrayInsert(pState->aDFileSet, idx, pSet) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; - } - } } _exit: return code; } -void tsdbFSStateDeleteDFileSet(STsdbFSState *pState, int32_t fid) { - int32_t idx; +void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS) { + int32_t nRef; + char fname[TSDB_FILENAME_LEN]; - idx = taosArraySearchIdx(pState->aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); - ASSERT(idx >= 0); - taosArrayRemove(pState->aDFileSet, idx); -} + if (pFS->pDelFile) { + nRef = atomic_sub_fetch_32(&pFS->pDelFile->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbDelFileName(pTsdb, pFS->pDelFile, fname); + taosRemoveFile(fname); + taosMemoryFree(pFS->pDelFile); + } + } -SDelFile *tsdbFSStateGetDelFile(STsdbFSState *pState) { return pState->pDelFile; } + for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet); -SDFileSet *tsdbFSStateGetDFileSet(STsdbFSState *pState, int32_t fid, int32_t flag) { - return (SDFileSet *)taosArraySearch(pState->aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, flag); -} + // head + nRef = atomic_sub_fetch_32(&pSet->pHeadF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pHeadF); + } + + // data + nRef = atomic_sub_fetch_32(&pSet->pDataF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pDataF); + } + + // last + nRef = atomic_sub_fetch_32(&pSet->pLastF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pLastF); + } + + // sma + nRef = atomic_sub_fetch_32(&pSet->pSmaF->nRef, 1); + ASSERT(nRef >= 0); + if (nRef == 0) { + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); + taosRemoveFile(fname); + taosMemoryFree(pSet->pSmaF); + } + } + + taosArrayDestroy(pFS->aDFileSet); +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFile.c b/source/dnode/vnode/src/tsdb/tsdbFile.c index f15ad072e7d6e1f88328f4ea94d1d9cee6ac2a4b..135ee23d44b15776dcf091c0a2198d61bd57e9cb 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile.c @@ -15,7 +15,7 @@ #include "tsdb.h" -static int32_t tPutHeadFile(uint8_t *p, SHeadFile *pHeadFile) { +int32_t tPutHeadFile(uint8_t *p, SHeadFile *pHeadFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pHeadFile->commitID); @@ -35,7 +35,7 @@ static int32_t tGetHeadFile(uint8_t *p, SHeadFile *pHeadFile) { return n; } -static int32_t tPutDataFile(uint8_t *p, SDataFile *pDataFile) { +int32_t tPutDataFile(uint8_t *p, SDataFile *pDataFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pDataFile->commitID); @@ -53,7 +53,7 @@ static int32_t tGetDataFile(uint8_t *p, SDataFile *pDataFile) { return n; } -static int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile) { +int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pLastFile->commitID); @@ -71,7 +71,7 @@ static int32_t tGetLastFile(uint8_t *p, SLastFile *pLastFile) { return n; } -static int32_t tPutSmaFile(uint8_t *p, SSmaFile *pSmaFile) { +int32_t tPutSmaFile(uint8_t *p, SSmaFile *pSmaFile) { int32_t n = 0; n += tPutI64v(p ? p + n : p, pSmaFile->commitID); @@ -90,123 +90,79 @@ static int32_t tGetSmaFile(uint8_t *p, SSmaFile *pSmaFile) { } // EXPOSED APIS ================================================== -void tsdbDataFileName(STsdb *pTsdb, SDFileSet *pDFileSet, EDataFileT ftype, char fname[]) { - STfs *pTfs = pTsdb->pVnode->pTfs; +void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pHeadF->commitID, ".head"); +} - switch (ftype) { - case TSDB_HEAD_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fHead.commitID, - ".head"); - break; - case TSDB_DATA_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fData.commitID, - ".data"); - break; - case TSDB_LAST_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fLast.commitID, - ".last"); - break; - case TSDB_SMA_FILE: - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTfs, pDFileSet->diskId), - TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pDFileSet->fid, pDFileSet->fSma.commitID, - ".sma"); - break; - default: - ASSERT(0); - break; - } +void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pDataF->commitID, ".data"); } -bool tsdbFileIsSame(SDFileSet *pDFileSet1, SDFileSet *pDFileSet2, EDataFileT ftype) { - if (pDFileSet1->diskId.level != pDFileSet2->diskId.level || pDFileSet1->diskId.id != pDFileSet2->diskId.id) { - return false; - } +void tsdbLastFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SLastFile *pLastF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pLastF->commitID, ".last"); +} - switch (ftype) { - case TSDB_HEAD_FILE: - return pDFileSet1->fHead.commitID == pDFileSet2->fHead.commitID; - case TSDB_DATA_FILE: - return pDFileSet1->fData.commitID == pDFileSet2->fData.commitID; - case TSDB_LAST_FILE: - return pDFileSet1->fLast.commitID == pDFileSet2->fLast.commitID; - case TSDB_SMA_FILE: - return pDFileSet1->fSma.commitID == pDFileSet2->fSma.commitID; - default: - ASSERT(0); - break; - } +void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, char fname[]) { + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pSmaF->commitID, ".sma"); } bool tsdbDelFileIsSame(SDelFile *pDelFile1, SDelFile *pDelFile2) { return pDelFile1->commitID == pDelFile2->commitID; } -int32_t tsdbUpdateDFileHdr(TdFilePtr pFD, SDFileSet *pSet, EDataFileT ftype) { - int32_t code = 0; - int64_t n; - char hdr[TSDB_FHDR_SIZE]; - - memset(hdr, 0, TSDB_FHDR_SIZE); - tPutDataFileHdr(hdr, pSet, ftype); - taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); - - n = taosLSeekFile(pFD, 0, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _exit; - } - - n = taosWriteFile(pFD, hdr, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _exit; - } - -_exit: - return code; -} - int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { int32_t code = 0; int64_t size; + int64_t n; TdFilePtr pFD; char fname[TSDB_FILENAME_LEN]; - - tsdbDataFileName(pTsdb, pSet, ftype, fname); - - // open - pFD = taosOpenFile(fname, TD_FILE_WRITE); - if (pFD == NULL) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } + char hdr[TSDB_FHDR_SIZE] = {0}; // truncate switch (ftype) { - case TSDB_HEAD_FILE: - size = pSet->fHead.size; - break; case TSDB_DATA_FILE: - size = pSet->fData.size; - break; - case TSDB_LAST_FILE: - size = pSet->fLast.size; + size = pSet->pDataF->size; + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); + tPutDataFile(hdr, pSet->pDataF); break; case TSDB_SMA_FILE: - size = pSet->fSma.size; + size = pSet->pSmaF->size; + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); + tPutSmaFile(hdr, pSet->pSmaF); break; default: ASSERT(0); } + + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + // open + pFD = taosOpenFile(fname, TD_FILE_WRITE); + if (pFD == NULL) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + // ftruncate if (taosFtruncateFile(pFD, size) < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } // update header - code = tsdbUpdateDFileHdr(pFD, pSet, ftype); - if (code) goto _err; + n = taosLSeekFile(pFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // sync if (taosFsyncFile(pFD) < 0) { @@ -220,42 +176,20 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { return code; _err: + tsdbError("vgId:%d tsdb rollback file failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } -int32_t tPutDataFileHdr(uint8_t *p, SDFileSet *pSet, EDataFileT ftype) { - int32_t n = 0; - - switch (ftype) { - case TSDB_HEAD_FILE: - n += tPutHeadFile(p ? p + n : p, &pSet->fHead); - break; - case TSDB_DATA_FILE: - n += tPutDataFile(p ? p + n : p, &pSet->fData); - break; - case TSDB_LAST_FILE: - n += tPutLastFile(p ? p + n : p, &pSet->fLast); - break; - case TSDB_SMA_FILE: - n += tPutSmaFile(p ? p + n : p, &pSet->fSma); - break; - default: - ASSERT(0); - } - - return n; -} - int32_t tPutDFileSet(uint8_t *p, SDFileSet *pSet) { int32_t n = 0; n += tPutI32v(p ? p + n : p, pSet->diskId.level); n += tPutI32v(p ? p + n : p, pSet->diskId.id); n += tPutI32v(p ? p + n : p, pSet->fid); - n += tPutHeadFile(p ? p + n : p, &pSet->fHead); - n += tPutDataFile(p ? p + n : p, &pSet->fData); - n += tPutLastFile(p ? p + n : p, &pSet->fLast); - n += tPutSmaFile(p ? p + n : p, &pSet->fSma); + n += tPutHeadFile(p ? p + n : p, pSet->pHeadF); + n += tPutDataFile(p ? p + n : p, pSet->pDataF); + n += tPutLastFile(p ? p + n : p, pSet->pLastF); + n += tPutSmaFile(p ? p + n : p, pSet->pSmaF); return n; } @@ -266,20 +200,18 @@ int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet) { n += tGetI32v(p + n, &pSet->diskId.level); n += tGetI32v(p + n, &pSet->diskId.id); n += tGetI32v(p + n, &pSet->fid); - n += tGetHeadFile(p + n, &pSet->fHead); - n += tGetDataFile(p + n, &pSet->fData); - n += tGetLastFile(p + n, &pSet->fLast); - n += tGetSmaFile(p + n, &pSet->fSma); + n += tGetHeadFile(p + n, pSet->pHeadF); + n += tGetDataFile(p + n, pSet->pDataF); + n += tGetLastFile(p + n, pSet->pLastF); + n += tGetSmaFile(p + n, pSet->pSmaF); return n; } // SDelFile =============================================== void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]) { - STfs *pTfs = pTsdb->pVnode->pTfs; - - snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%dver%" PRId64 "%s", tfsGetPrimaryPath(pTfs), TD_DIRSEP, pTsdb->path, - TD_DIRSEP, TD_VID(pTsdb->pVnode), pFile->commitID, ".del"); + snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%dver%" PRId64 "%s", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), + TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pFile->commitID, ".del"); } int32_t tPutDelFile(uint8_t *p, SDelFile *pDelFile) { diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index ee8a23e76e0aedee73c4efa2b0b94eda743bc014..fa775bb8822eb13d1493604272d60cd0dc479545 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -93,7 +93,11 @@ static int32_t tbDataPCmprFn(const void *p1, const void *p2) { } void tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData) { STbData *pTbData = &(STbData){.suid = suid, .uid = uid}; - void *p = taosArraySearch(pMemTable->aTbData, &pTbData, tbDataPCmprFn, TD_EQ); + + taosRLockLatch(&pMemTable->latch); + void *p = taosArraySearch(pMemTable->aTbData, &pTbData, tbDataPCmprFn, TD_EQ); + taosRUnLockLatch(&pMemTable->latch); + *ppTbData = p ? *(STbData **)p : NULL; } @@ -363,10 +367,13 @@ static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid void *p; if (idx < 0) { - p = taosArrayPush(pMemTable->aTbData, &pTbData); - } else { - p = taosArrayInsert(pMemTable->aTbData, idx, &pTbData); + idx = taosArrayGetSize(pMemTable->aTbData); } + + taosWLockLatch(&pMemTable->latch); + p = taosArrayInsert(pMemTable->aTbData, idx, &pTbData); + taosWUnLockLatch(&pMemTable->latch); + if (p == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -605,46 +612,3 @@ void tsdbUnrefMemTable(SMemTable *pMemTable) { tsdbMemTableDestroy(pMemTable); } } - -int32_t tsdbTakeMemSnapshot(STsdb *pTsdb, SMemTable **ppMem, SMemTable **ppIMem) { - int32_t code = 0; - - // lock - code = taosThreadRwlockRdlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - goto _exit; - } - - // take snapshot - *ppMem = pTsdb->mem; - *ppIMem = pTsdb->imem; - - if (*ppMem) { - tsdbRefMemTable(*ppMem); - } - - if (*ppIMem) { - tsdbRefMemTable(*ppIMem); - } - - // unlock - code = taosThreadRwlockUnlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - goto _exit; - } - -_exit: - return code; -} - -void tsdbUntakeMemSnapshot(STsdb *pTsdb, SMemTable *pMem, SMemTable *pIMem) { - if (pMem) { - tsdbUnrefMemTable(pMem); - } - - if (pIMem) { - tsdbUnrefMemTable(pIMem); - } -} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index 064c7adf4bcfb0124ca82502c08a3b0108c39d44..0b355d91b461d121be0359f7973d3fbe13e838c7 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -66,7 +66,7 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee tfsMkdir(pVnode->pTfs, pTsdb->path); // open tsdb - if (tsdbFSOpen(pTsdb, &pTsdb->pFS) < 0) { + if (tsdbFSOpen(pTsdb) < 0) { goto _err; } @@ -88,7 +88,7 @@ _err: int tsdbClose(STsdb **pTsdb) { if (*pTsdb) { taosThreadRwlockDestroy(&(*pTsdb)->rwLock); - tsdbFSClose((*pTsdb)->pFS); + tsdbFSClose(*pTsdb); tsdbCloseCache((*pTsdb)->lruCache); taosMemoryFreeClear(*pTsdb); } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index ccca13e55c4544ebf9d38d94c17d0874356a66ac..26ced6cf6ace66690defcaf53c850020517cf286 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -118,8 +118,7 @@ struct STsdbReader { char* idStr; // query info handle, for debug purpose int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows SBlockLoadSuppInfo suppInfo; - SMemTable* pMem; - SMemTable* pIMem; + STsdbReadSnap* pReadSnap; SIOCostSummary cost; STSchema* pSchema; @@ -275,20 +274,18 @@ static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* cap } // init file iterator -static int32_t initFilesetIterator(SFilesetIter* pIter, const STsdbFSState* pFState, int32_t order, const char* idstr) { - size_t numOfFileset = taosArrayGetSize(pFState->aDFileSet); +static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, int32_t order, const char* idstr) { + size_t numOfFileset = taosArrayGetSize(aDFileSet); pIter->index = ASCENDING_TRAVERSE(order) ? -1 : numOfFileset; pIter->order = order; - pIter->pFileList = taosArrayDup(pFState->aDFileSet); + pIter->pFileList = aDFileSet; pIter->numOfFiles = numOfFileset; tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, idstr); return TSDB_CODE_SUCCESS; } -static void cleanupFilesetIterator(SFilesetIter* pIter) { taosArrayDestroy(pIter->pFileList); } - static bool filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader) { bool asc = ASCENDING_TRAVERSE(pIter->order); int32_t step = asc ? 1 : -1; @@ -1881,8 +1878,8 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea int32_t backward = (!ASCENDING_TRAVERSE(pReader->order)); STbData* d = NULL; - if (pReader->pMem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pMem, pReader->suid, pBlockScanInfo->uid, &d); + if (pReader->pReadSnap->pMem != NULL) { + tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid, &d); if (d != NULL) { code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter); if (code == TSDB_CODE_SUCCESS) { @@ -1902,8 +1899,8 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea } STbData* di = NULL; - if (pReader->pIMem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pIMem, pReader->suid, pBlockScanInfo->uid, &di); + if (pReader->pReadSnap->pIMem != NULL) { + tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid, &di); if (di != NULL) { code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter); if (code == TSDB_CODE_SUCCESS) { @@ -1939,7 +1936,7 @@ int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* SArray* pDelData = taosArrayInit(4, sizeof(SDelData)); - SDelFile* pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState); + SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile; if (pDelFile) { SDelFReader* pDelFReader = NULL; code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); @@ -2836,8 +2833,10 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl SDataBlockIter* pBlockIter = &pReader->status.blockIter; - STsdbFSState* pFState = pReader->pTsdb->pFS->cState; - initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr); + code = tsdbTakeReadSnap(pReader->pTsdb, &pReader->pReadSnap); + if (code) goto _err; + + initFilesetIterator(&pReader->status.fileIter, (*ppReader)->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); resetDataBlockIterator(&pReader->status.blockIter, pReader->order); // no data in files, let's try buffer in memory @@ -2850,8 +2849,6 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl } } - tsdbTakeMemSnapshot(pReader->pTsdb, &pReader->pMem, &pReader->pIMem); - tsdbDebug("%p total numOfTable:%d in this query %s", pReader, numOfTables, pReader->idStr); return code; @@ -2867,7 +2864,7 @@ void tsdbReaderClose(STsdbReader* pReader) { SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; - tsdbUntakeMemSnapshot(pReader->pTsdb, pReader->pMem, pReader->pIMem); + tsdbUntakeReadSnap(pReader->pTsdb, pReader->pReadSnap); taosMemoryFreeClear(pSupInfo->plist); taosMemoryFree(pSupInfo->colIds); @@ -2880,7 +2877,6 @@ void tsdbReaderClose(STsdbReader* pReader) { } taosMemoryFree(pSupInfo->buildBuf); - cleanupFilesetIterator(&pReader->status.fileIter); cleanupDataBlockIterator(&pReader->status.blockIter); destroyBlockScanInfo(pReader->status.pTableMap); blockDataDestroy(pReader->pResBlock); @@ -3087,8 +3083,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { tsdbDataFReaderClose(&pReader->pFileReader); - STsdbFSState* pFState = pReader->pTsdb->pFS->cState; - initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr); + initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); resetDataBlockIterator(&pReader->status.blockIter, pReader->order); resetDataBlockScanInfo(pReader->status.pTableMap); @@ -3250,3 +3245,68 @@ int32_t tsdbGetTableSchema(SVnode* pVnode, int64_t uid, STSchema** pSchema, int6 return TSDB_CODE_SUCCESS; } + +int32_t tsdbTakeReadSnap(STsdb* pTsdb, STsdbReadSnap** ppSnap) { + int32_t code = 0; + + // alloc + *ppSnap = (STsdbReadSnap*)taosMemoryCalloc(1, sizeof(STsdbReadSnap)); + if (*ppSnap == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + // lock + code = taosThreadRwlockRdlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + // take snapshot + (*ppSnap)->pMem = pTsdb->mem; + (*ppSnap)->pIMem = pTsdb->imem; + + if ((*ppSnap)->pMem) { + tsdbRefMemTable((*ppSnap)->pMem); + } + + if ((*ppSnap)->pIMem) { + tsdbRefMemTable((*ppSnap)->pIMem); + } + + // fs + code = tsdbFSRef(pTsdb, &(*ppSnap)->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _exit; + } + + // unlock + code = taosThreadRwlockUnlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + tsdbTrace("vgId:%d take read snapshot", TD_VID(pTsdb->pVnode)); +_exit: + return code; +} + +void tsdbUntakeReadSnap(STsdb* pTsdb, STsdbReadSnap* pSnap) { + if (pSnap) { + if (pSnap->pMem) { + tsdbUnrefMemTable(pSnap->pMem); + } + + if (pSnap->pIMem) { + tsdbUnrefMemTable(pSnap->pIMem); + } + + tsdbFSUnref(pTsdb, &pSnap->fs); + taosMemoryFree(pSnap); + } + + tsdbTrace("vgId:%d untake read snapshot", TD_VID(pTsdb->pVnode)); +} diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index 5e8157864f04fe315f9407ca537f35d068b71361..7365ac23b8ab7b4901804db7801448824dad286e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -459,7 +459,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS // open impl // head - tsdbDataFileName(pTsdb, pSet, TSDB_HEAD_FILE, fname); + tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname); pReader->pHeadFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pHeadFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -467,7 +467,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS } // data - tsdbDataFileName(pTsdb, pSet, TSDB_DATA_FILE, fname); + tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); pReader->pDataFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pDataFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -475,7 +475,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS } // last - tsdbDataFileName(pTsdb, pSet, TSDB_LAST_FILE, fname); + tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname); pReader->pLastFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pLastFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -483,7 +483,7 @@ int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pS } // sma - tsdbDataFileName(pTsdb, pSet, TSDB_SMA_FILE, fname); + tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname); pReader->pSmaFD = taosOpenFile(fname, TD_FILE_READ); if (pReader->pSmaFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -536,8 +536,8 @@ _err: int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx, uint8_t **ppBuf) { int32_t code = 0; - int64_t offset = pReader->pSet->fHead.offset; - int64_t size = pReader->pSet->fHead.size - offset; + int64_t offset = pReader->pSet->pHeadF->offset; + int64_t size = pReader->pSet->pHeadF->size - offset; uint8_t *pBuf = NULL; int64_t n; uint32_t delimiter; @@ -1211,17 +1211,6 @@ _err: } // SDataFWriter ==================================================== -struct SDataFWriter { - STsdb *pTsdb; - SDFileSet wSet; - TdFilePtr pHeadFD; - TdFilePtr pDataFD; - TdFilePtr pLastFD; - TdFilePtr pSmaFD; -}; - -SDFileSet *tsdbDataFWriterGetWSet(SDataFWriter *pWriter) { return &pWriter->wSet; } - int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet) { int32_t code = 0; int32_t flag; @@ -1237,12 +1226,20 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } pWriter->pTsdb = pTsdb; - pWriter->wSet = *pSet; - pSet = &pWriter->wSet; + pWriter->wSet = (SDFileSet){.diskId = pSet->diskId, + .fid = pSet->fid, + .pHeadF = &pWriter->fHead, + .pDataF = &pWriter->fData, + .pLastF = &pWriter->fLast, + .pSmaF = &pWriter->fSma}; + pWriter->fHead = *pSet->pHeadF; + pWriter->fData = *pSet->pDataF; + pWriter->fLast = *pSet->pLastF; + pWriter->fSma = *pSet->pSmaF; // head flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; - tsdbDataFileName(pTsdb, pSet, TSDB_HEAD_FILE, fname); + tsdbHeadFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fHead, fname); pWriter->pHeadFD = taosOpenFile(fname, flag); if (pWriter->pHeadFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -1257,28 +1254,28 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS ASSERT(n == TSDB_FHDR_SIZE); - pSet->fHead.size += TSDB_FHDR_SIZE; + pWriter->fHead.size += TSDB_FHDR_SIZE; // data - if (pSet->fData.size == 0) { + if (pWriter->fData.size == 0) { flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; } else { flag = TD_FILE_WRITE; } - tsdbDataFileName(pTsdb, pSet, TSDB_DATA_FILE, fname); + tsdbDataFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fData, fname); pWriter->pDataFD = taosOpenFile(fname, flag); if (pWriter->pDataFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (pSet->fData.size == 0) { + if (pWriter->fData.size == 0) { n = taosWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - pSet->fData.size += TSDB_FHDR_SIZE; + pWriter->fData.size += TSDB_FHDR_SIZE; } else { n = taosLSeekFile(pWriter->pDataFD, 0, SEEK_END); if (n < 0) { @@ -1286,29 +1283,29 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } - ASSERT(n == pSet->fData.size); + ASSERT(n == pWriter->fData.size); } // last - if (pSet->fLast.size == 0) { + if (pWriter->fLast.size == 0) { flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; } else { flag = TD_FILE_WRITE; } - tsdbDataFileName(pTsdb, pSet, TSDB_LAST_FILE, fname); + tsdbLastFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fLast, fname); pWriter->pLastFD = taosOpenFile(fname, flag); if (pWriter->pLastFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (pSet->fLast.size == 0) { + if (pWriter->fLast.size == 0) { n = taosWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - pSet->fLast.size += TSDB_FHDR_SIZE; + pWriter->fLast.size += TSDB_FHDR_SIZE; } else { n = taosLSeekFile(pWriter->pLastFD, 0, SEEK_END); if (n < 0) { @@ -1316,29 +1313,29 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } - ASSERT(n == pSet->fLast.size); + ASSERT(n == pWriter->fLast.size); } // sma - if (pSet->fSma.size == 0) { + if (pWriter->fSma.size == 0) { flag = TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; } else { flag = TD_FILE_WRITE; } - tsdbDataFileName(pTsdb, pSet, TSDB_SMA_FILE, fname); + tsdbSmaFileName(pTsdb, pWriter->wSet.diskId, pWriter->wSet.fid, &pWriter->fSma, fname); pWriter->pSmaFD = taosOpenFile(fname, flag); if (pWriter->pSmaFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - if (pSet->fSma.size == 0) { + if (pWriter->fSma.size == 0) { n = taosWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; } - pSet->fSma.size += TSDB_FHDR_SIZE; + pWriter->fSma.size += TSDB_FHDR_SIZE; } else { n = taosLSeekFile(pWriter->pSmaFD, 0, SEEK_END); if (n < 0) { @@ -1346,7 +1343,7 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS goto _err; } - ASSERT(n == pSet->fSma.size); + ASSERT(n == pWriter->fSma.size); } *ppWriter = pWriter; @@ -1418,22 +1415,76 @@ _err: int32_t tsdbUpdateDFileSetHeader(SDataFWriter *pWriter) { int32_t code = 0; + int64_t n; + char hdr[TSDB_FHDR_SIZE]; // head ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_HEAD_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutHeadFile(hdr, &pWriter->fHead); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pHeadFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pHeadFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // data ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_DATA_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutDataFile(hdr, &pWriter->fData); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pDataFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pDataFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // last ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_LAST_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutLastFile(hdr, &pWriter->fLast); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pLastFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pLastFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } // sma ============== - code = tsdbUpdateDFileHdr(pWriter->pHeadFD, &pWriter->wSet, TSDB_SMA_FILE); - if (code) goto _err; + memset(hdr, 0, TSDB_FHDR_SIZE); + tPutSmaFile(hdr, &pWriter->fSma); + taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); + + n = taosLSeekFile(pWriter->pSmaFD, 0, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pWriter->pSmaFD, hdr, TSDB_FHDR_SIZE); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } return code; @@ -1444,7 +1495,7 @@ _err: int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx, uint8_t **ppBuf) { int32_t code = 0; - SHeadFile *pHeadFile = &pWriter->wSet.fHead; + SHeadFile *pHeadFile = &pWriter->fHead; uint8_t *pBuf = NULL; int64_t size; int64_t n; @@ -1494,7 +1545,7 @@ _err: int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *mBlock, uint8_t **ppBuf, SBlockIdx *pBlockIdx) { int32_t code = 0; - SHeadFile *pHeadFile = &pWriter->wSet.fHead; + SHeadFile *pHeadFile = &pWriter->fHead; SBlockDataHdr hdr = {.delimiter = TSDB_FILE_DLMT, .suid = pBlockIdx->suid, .uid = pBlockIdx->uid}; uint8_t *pBuf = NULL; int64_t size; @@ -1831,9 +1882,9 @@ int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_ pSubBlock->nRow = pBlockData->nRow; pSubBlock->cmprAlg = cmprAlg; if (pBlock->last) { - pSubBlock->offset = pWriter->wSet.fLast.size; + pSubBlock->offset = pWriter->fLast.size; } else { - pSubBlock->offset = pWriter->wSet.fData.size; + pSubBlock->offset = pWriter->fData.size; } // ======================= BLOCK DATA ======================= @@ -1881,9 +1932,9 @@ int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_ pSubBlock->szBlock = pSubBlock->szBlockCol + sizeof(TSCKSUM) + nData; if (pBlock->last) { - pWriter->wSet.fLast.size += pSubBlock->szBlock; + pWriter->fLast.size += pSubBlock->szBlock; } else { - pWriter->wSet.fData.size += pSubBlock->szBlock; + pWriter->fData.size += pSubBlock->szBlock; } // ======================= BLOCK SMA ======================= @@ -1896,8 +1947,8 @@ int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_ if (code) goto _err; if (pSubBlock->nSma > 0) { - pSubBlock->sOffset = pWriter->wSet.fSma.size; - pWriter->wSet.fSma.size += (sizeof(SColumnDataAgg) * pSubBlock->nSma + sizeof(TSCKSUM)); + pSubBlock->sOffset = pWriter->fSma.size; + pWriter->fSma.size += (sizeof(SColumnDataAgg) * pSubBlock->nSma + sizeof(TSCKSUM)); } _exit: @@ -1924,8 +1975,8 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { char fNameTo[TSDB_FILENAME_LEN]; // head - tsdbDataFileName(pTsdb, pSetFrom, TSDB_HEAD_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_HEAD_FILE, fNameTo); + tsdbHeadFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pHeadF, fNameFrom); + tsdbHeadFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pHeadF, fNameTo); pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { @@ -1939,7 +1990,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fHead.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pHeadF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1948,8 +1999,8 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { taosCloseFile(&PInFD); // data - tsdbDataFileName(pTsdb, pSetFrom, TSDB_DATA_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_DATA_FILE, fNameTo); + tsdbDataFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pDataF, fNameFrom); + tsdbDataFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pDataF, fNameTo); pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { @@ -1963,7 +2014,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fData.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pDataF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1972,8 +2023,9 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { taosCloseFile(&PInFD); // last - tsdbDataFileName(pTsdb, pSetFrom, TSDB_LAST_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_LAST_FILE, fNameTo); + tsdbLastFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pLastF, fNameFrom); + tsdbLastFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pLastF, fNameTo); + pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); @@ -1986,7 +2038,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fLast.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pLastF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1995,8 +2047,8 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { taosCloseFile(&PInFD); // sma - tsdbDataFileName(pTsdb, pSetFrom, TSDB_SMA_FILE, fNameFrom); - tsdbDataFileName(pTsdb, pSetTo, TSDB_SMA_FILE, fNameTo); + tsdbSmaFileName(pTsdb, pSetFrom->diskId, pSetFrom->fid, pSetFrom->pSmaF, fNameFrom); + tsdbSmaFileName(pTsdb, pSetTo->diskId, pSetTo->fid, pSetTo->pSmaF, fNameTo); pOutFD = taosOpenFile(fNameTo, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); if (pOutFD == NULL) { @@ -2010,7 +2062,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { goto _err; } - n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->fSma.size); + n = taosFSendFile(pOutFD, PInFD, 0, pSetFrom->pSmaF->size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index 137ef9a4a683ffe521623ff5c1a40e596fa909bf..5ba2ecb64b9ef01ec2915c279ea526aa35d22d7e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -15,90 +15,99 @@ #include "tsdb.h" -static int32_t tsdbDoRetentionImpl(STsdb *pTsdb, int64_t now, int8_t try, int8_t *canDo) { - int32_t code = 0; - STsdbFSState *pState; - - if (try) { - pState = pTsdb->pFS->cState; - *canDo = 0; - } else { - pState = pTsdb->pFS->nState; - } - - for (int32_t iSet = 0; iSet < taosArrayGetSize(pState->aDFileSet); iSet++) { - SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pState->aDFileSet, iSet); - int32_t expLevel = tsdbFidLevel(pDFileSet->fid, &pTsdb->keepCfg, now); +static bool tsdbShouldDoRetention(STsdb *pTsdb, int64_t now) { + for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now); SDiskID did; - // check - if (expLevel == pDFileSet->diskId.id) continue; + if (expLevel == pSet->diskId.level) continue; - // delete or move if (expLevel < 0) { - if (try) { - *canDo = 1; - } else { - tsdbFSStateDeleteDFileSet(pState, pDFileSet->fid); - iSet--; - } + return true; } else { - // alloc if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) { - code = terrno; - goto _exit; + return false; } - if (did.level == pDFileSet->diskId.level) continue; + if (did.level == pSet->diskId.level) continue; - if (try) { - *canDo = 1; - } else { - // copy the file to new disk - - SDFileSet nDFileSet = *pDFileSet; - nDFileSet.diskId = did; - - tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did); - - code = tsdbDFileSetCopy(pTsdb, pDFileSet, &nDFileSet); - if (code) goto _exit; - - code = tsdbFSStateUpsertDFileSet(pState, &nDFileSet); - if (code) goto _exit; - } + return true; } } -_exit: - return code; + return false; } int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) { int32_t code = 0; - int8_t canDo; - - // try - tsdbDoRetentionImpl(pTsdb, now, 1, &canDo); - if (!canDo) goto _exit; - // begin - code = tsdbFSBegin(pTsdb->pFS); - if (code) goto _err; + if (!tsdbShouldDoRetention(pTsdb, now)) { + return code; + } // do retention - code = tsdbDoRetentionImpl(pTsdb, now, 0, NULL); + STsdbFS fs; + + code = tsdbFSCopy(pTsdb, &fs); if (code) goto _err; - // commit - code = tsdbFSCommit(pTsdb->pFS); + for (int32_t iSet = 0; iSet < taosArrayGetSize(fs.aDFileSet); iSet++) { + SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); + int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now); + SDiskID did; + + if (expLevel < 0) { + taosMemoryFree(pSet->pHeadF); + taosMemoryFree(pSet->pDataF); + taosMemoryFree(pSet->pLastF); + taosMemoryFree(pSet->pSmaF); + taosArrayRemove(fs.aDFileSet, iSet); + iSet--; + } else { + if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) { + code = terrno; + goto _exit; + } + + if (did.level == pSet->diskId.level) continue; + + // copy file to new disk (todo) + SDFileSet fSet = *pSet; + fSet.diskId = did; + + code = tsdbDFileSetCopy(pTsdb, pSet, &fSet); + if (code) goto _err; + + code = tsdbFSUpsertFSet(&fs, &fSet); + if (code) goto _err; + } + + /* code */ + } + + // do change fs + code = tsdbFSCommit1(pTsdb, &fs); if (code) goto _err; + taosThreadRwlockWrlock(&pTsdb->rwLock); + + code = tsdbFSCommit2(pTsdb, &fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _err; + } + + taosThreadRwlockUnlock(&pTsdb->rwLock); + + tsdbFSDestroy(&fs); + _exit: return code; _err: tsdbError("vgId:%d tsdb do retention failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - tsdbFSRollback(pTsdb->pFS); + ASSERT(0); + // tsdbFSRollback(pTsdb->pFS); return code; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index fea0254045c14471d59fe3f4d6397d594e26fabe..43537c9a8d2778014c63b6f66a714881f57fee0f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -20,6 +20,7 @@ struct STsdbSnapReader { STsdb* pTsdb; int64_t sver; int64_t ever; + STsdbFS fs; // for data file int8_t dataDone; int32_t fid; @@ -45,7 +46,8 @@ static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { while (true) { if (pReader->pDataFReader == NULL) { - SDFileSet* pSet = tsdbFSStateGetDFileSet(pTsdb->pFS->cState, pReader->fid, TD_GT); + SDFileSet* pSet = + taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT); if (pSet == NULL) goto _exit; @@ -159,7 +161,7 @@ _err: static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; STsdb* pTsdb = pReader->pTsdb; - SDelFile* pDelFile = pTsdb->pFS->cState->pDelFile; + SDelFile* pDelFile = pReader->fs.pDelFile; if (pReader->pDelFReader == NULL) { if (pDelFile == NULL) { @@ -254,6 +256,24 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapRe pReader->sver = sver; pReader->ever = ever; + code = taosThreadRwlockRdlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _err; + } + + code = tsdbFSRef(pTsdb, &pReader->fs); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _err; + } + + code = taosThreadRwlockUnlock(&pTsdb->rwLock); + if (code) { + code = TAOS_SYSTEM_ERROR(code); + goto _err; + } + pReader->fid = INT32_MIN; pReader->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); if (pReader->aBlockIdx == NULL) { @@ -305,6 +325,8 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { taosArrayDestroy(pReader->aDelIdx); taosArrayDestroy(pReader->aDelData); + tsdbFSUnref(pReader->pTsdb, &pReader->fs); + tsdbInfo("vgId:%d vnode snapshot tsdb reader closed", TD_VID(pReader->pTsdb->pVnode)); taosMemoryFree(pReader); @@ -358,6 +380,7 @@ struct STsdbSnapWriter { STsdb* pTsdb; int64_t sver; int64_t ever; + STsdbFS fs; // config int32_t minutes; @@ -798,7 +821,7 @@ static int32_t tsdbSnapWriteDataEnd(STsdbSnapWriter* pWriter) { code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdxW, NULL); if (code) goto _err; - code = tsdbFSStateUpsertDFileSet(pTsdb->pFS->nState, tsdbDataFWriterGetWSet(pWriter->pDataFWriter)); + code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); if (code) goto _err; code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); @@ -843,7 +866,7 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 pWriter->fid = fid; // read - SDFileSet* pSet = tsdbFSStateGetDFileSet(pTsdb->pFS->nState, fid, TD_EQ); + SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); if (pSet) { code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet); if (code) goto _err; @@ -863,22 +886,26 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 tBlockDataReset(&pWriter->bDataR); // write - SDFileSet wSet; + SHeadFile fHead; + SDataFile fData; + SLastFile fLast; + SSmaFile fSma; + SDFileSet wSet = {.pHeadF = &fHead, .pDataF = &fData, .pLastF = &fLast, .pSmaF = &fSma}; if (pSet) { - wSet = (SDFileSet){.diskId = pSet->diskId, - .fid = fid, - .fHead = {.commitID = pWriter->commitID, .offset = 0, .size = 0}, - .fData = pSet->fData, - .fLast = {.commitID = pWriter->commitID, .size = 0}, - .fSma = pSet->fSma}; + wSet.diskId = pSet->diskId; + wSet.fid = fid; + fHead = (SHeadFile){.commitID = pWriter->commitID, .offset = 0, .size = 0}; + fData = *pSet->pDataF; + fLast = (SLastFile){.commitID = pWriter->commitID, .size = 0}; + fSma = *pSet->pSmaF; } else { - wSet = (SDFileSet){.diskId = (SDiskID){.level = 0, .id = 0}, - .fid = fid, - .fHead = {.commitID = pWriter->commitID, .offset = 0, .size = 0}, - .fData = {.commitID = pWriter->commitID, .size = 0}, - .fLast = {.commitID = pWriter->commitID, .size = 0}, - .fSma = {.commitID = pWriter->commitID, .size = 0}}; + wSet.diskId = (SDiskID){.level = 0, .id = 0}; + wSet.fid = fid; + fHead = (SHeadFile){.commitID = pWriter->commitID, .offset = 0, .size = 0}; + fData = (SDataFile){.commitID = pWriter->commitID, .size = 0}; + fLast = (SLastFile){.commitID = pWriter->commitID, .size = 0}; + fSma = (SSmaFile){.commitID = pWriter->commitID, .size = 0}; } code = tsdbDataFWriterOpen(&pWriter->pDataFWriter, pTsdb, &wSet); @@ -907,7 +934,7 @@ static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32 STsdb* pTsdb = pWriter->pTsdb; if (pWriter->pDelFWriter == NULL) { - SDelFile* pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->nState); + SDelFile* pDelFile = pWriter->fs.pDelFile; // reader if (pDelFile) { @@ -1017,7 +1044,7 @@ static int32_t tsdbSnapWriteDelEnd(STsdbSnapWriter* pWriter) { code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter); if (code) goto _err; - code = tsdbFSStateUpsertDelFile(pTsdb->pFS->nState, &pWriter->pDelFWriter->fDel); + code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel); if (code) goto _err; code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1); @@ -1051,6 +1078,9 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr pWriter->sver = sver; pWriter->ever = ever; + code = tsdbFSCopy(pTsdb, &pWriter->fs); + if (code) goto _err; + // config pWriter->minutes = pTsdb->keepCfg.days; pWriter->precision = pTsdb->keepCfg.precision; @@ -1096,9 +1126,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr goto _err; } - code = tsdbFSBegin(pTsdb->pFS); - if (code) goto _err; - *ppWriter = pWriter; return code; @@ -1113,8 +1140,9 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { STsdbSnapWriter* pWriter = *ppWriter; if (rollback) { - code = tsdbFSRollback(pWriter->pTsdb->pFS); - if (code) goto _err; + ASSERT(0); + // code = tsdbFSRollback(pWriter->pTsdb->pFS); + // if (code) goto _err; } else { code = tsdbSnapWriteDataEnd(pWriter); if (code) goto _err; @@ -1122,7 +1150,10 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { code = tsdbSnapWriteDelEnd(pWriter); if (code) goto _err; - code = tsdbFSCommit(pWriter->pTsdb->pFS); + code = tsdbFSCommit1(pWriter->pTsdb, &pWriter->fs); + if (code) goto _err; + + code = tsdbFSCommit2(pWriter->pTsdb, &pWriter->fs); if (code) goto _err; } diff --git a/source/libs/executor/src/executorMain.c b/source/libs/executor/src/executorMain.c index 27d8b3f1cc8113b4ec9f7139955ffb34f23f10f0..e920f5856008cf070de0d73d728b023daf16f209 100644 --- a/source/libs/executor/src/executorMain.c +++ b/source/libs/executor/src/executorMain.c @@ -315,6 +315,9 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, const STqOffsetVal* pOffset) { if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { SStreamScanInfo* pInfo = pOperator->info; if (pOffset->type == TMQ_OFFSET__LOG) { + STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; + tsdbReaderClose(pTSInfo->dataReader); + pTSInfo->dataReader = NULL; #if 0 if (tOffsetEqual(pOffset, &pTaskInfo->streamInfo.lastStatus) && pInfo->tqReader->pWalReader->curVersion != pOffset->version) { @@ -349,8 +352,8 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, const STqOffsetVal* pOffset) { #ifndef NDEBUG - qDebug("switch to next table %ld (cursor %d), %ld rows returned", uid, - pTableScanInfo->currentTable, pInfo->pTableScanOp->resultInfo.totalRows); + qDebug("switch to next table %ld (cursor %d), %ld rows returned", uid, pTableScanInfo->currentTable, + pInfo->pTableScanOp->resultInfo.totalRows); pInfo->pTableScanOp->resultInfo.totalRows = 0; #endif @@ -367,6 +370,14 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, const STqOffsetVal* pOffset) { // TODO after dropping table, table may be not found ASSERT(found); + if (pTableScanInfo->dataReader == NULL) { + if (tsdbReaderOpen(pTableScanInfo->readHandle.vnode, &pTableScanInfo->cond, + pTaskInfo->tableqinfoList.pTableList, &pTableScanInfo->dataReader, NULL) < 0 || + pTableScanInfo->dataReader == NULL) { + ASSERT(0); + } + } + tsdbSetTableId(pTableScanInfo->dataReader, uid); int64_t oldSkey = pTableScanInfo->cond.twindows.skey; pTableScanInfo->cond.twindows.skey = ts + 1; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index ab62905c3f14b369aac5da8eb308b344bae0c73f..fc29eed4559799c6bc81c6965c2ad0ab1de6ccc5 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -740,7 +740,7 @@ static SSDataBlock* doBlockInfoScan(SOperatorInfo* pOperator) { static void destroyBlockDistScanOperatorInfo(void* param, int32_t numOfOutput) { SBlockDistInfo* pDistInfo = (SBlockDistInfo*)param; blockDataDestroy(pDistInfo->pResBlock); - + tsdbReaderClose(pDistInfo->pHandle); taosMemoryFreeClear(param); } @@ -982,6 +982,9 @@ static SSDataBlock* doRangeScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32 if (!pResult) { blockDataCleanup(pSDB); *pRowIndex = 0; + STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; + tsdbReaderClose(pTableScanInfo->dataReader); + pTableScanInfo->dataReader = NULL; return NULL; } @@ -1003,6 +1006,9 @@ static SSDataBlock* doDataScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_ } if (!pResult) { pInfo->updateWin = (STimeWindow){.skey = INT64_MIN, .ekey = INT64_MAX}; + STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; + tsdbReaderClose(pTableScanInfo->dataReader); + pTableScanInfo->dataReader = NULL; return NULL; } @@ -2047,8 +2053,8 @@ static SSDataBlock* sysTableScanUserTables(SOperatorInfo* pOperator) { uint64_t suid = pInfo->pCur->mr.me.ctbEntry.suid; int32_t code = metaGetTableEntryByUid(&mr, suid); if (code != TSDB_CODE_SUCCESS) { - qError("failed to get super table meta, cname:%s, suid:0x%" PRIx64 ", code:%s, %s", - pInfo->pCur->mr.me.name, suid, tstrerror(terrno), GET_TASKID(pTaskInfo)); + qError("failed to get super table meta, cname:%s, suid:0x%" PRIx64 ", code:%s, %s", pInfo->pCur->mr.me.name, + suid, tstrerror(terrno), GET_TASKID(pTaskInfo)); metaReaderClear(&mr); metaCloseTbCursor(pInfo->pCur); pInfo->pCur = NULL; @@ -2154,7 +2160,6 @@ static SSDataBlock* sysTableScanUserTables(SOperatorInfo* pOperator) { } } - static SSDataBlock* sysTableScanUserSTables(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SSysTableScanInfo* pInfo = pOperator->info; @@ -2180,12 +2185,13 @@ static SSDataBlock* doSysTableScan(SOperatorInfo* pOperator) { getDBNameFromCondition(pInfo->pCondition, dbName); sprintf(pInfo->req.db, "%d.%s", pInfo->accountId, dbName); } - + if (strncasecmp(name, TSDB_INS_TABLE_USER_TABLES, TSDB_TABLE_FNAME_LEN) == 0) { return sysTableScanUserTables(pOperator); } else if (strncasecmp(name, TSDB_INS_TABLE_USER_TAGS, TSDB_TABLE_FNAME_LEN) == 0) { return sysTableScanUserTags(pOperator); - } else if (strncasecmp(name, TSDB_INS_TABLE_USER_STABLES, TSDB_TABLE_FNAME_LEN) == 0 && IS_SYS_DBNAME(pInfo->req.db)) { + } else if (strncasecmp(name, TSDB_INS_TABLE_USER_STABLES, TSDB_TABLE_FNAME_LEN) == 0 && + IS_SYS_DBNAME(pInfo->req.db)) { return sysTableScanUserSTables(pOperator); } else { // load the meta from mnode of the given epset if (pOperator->status == OP_EXEC_DONE) {