提交 684dd823 编写于 作者: H Hongze Cheng

fix read concurrency

上级 bb2e923f
......@@ -62,7 +62,6 @@ typedef struct SDelFReader SDelFReader;
typedef struct SRowIter SRowIter;
typedef struct STsdbFS STsdbFS;
typedef struct SRowMerger SRowMerger;
typedef struct STsdbFSState STsdbFSState;
typedef struct STsdbSnapHdr STsdbSnapHdr;
typedef struct STsdbReadSnap STsdbReadSnap;
......@@ -177,8 +176,6 @@ void tsdbMemTableDestroy(SMemTable *pMemTable);
void tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData);
void tsdbRefMemTable(SMemTable *pMemTable);
void tsdbUnrefMemTable(SMemTable *pMemTable);
int32_t tsdbTakeMemSnapshot(STsdb *pTsdb, SMemTable **ppMem, SMemTable **ppIMem);
void tsdbUntakeMemSnapshot(STsdb *pTsdb, SMemTable *pMem, SMemTable *pIMem);
// STbDataIter
int32_t tsdbTbDataIterCreate(STbData *pTbData, TSDBKEY *pFrom, int8_t backward, STbDataIter **ppIter);
void *tsdbTbDataIterDestroy(STbDataIter *pIter);
......@@ -208,17 +205,20 @@ void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, ch
// SDelFile
void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]);
// tsdbFS.c ==============================================================================================
int32_t tsdbFSOpen(STsdb *pTsdb, STsdbFS **ppFS);
int32_t tsdbFSClose(STsdbFS *pFS);
int32_t tsdbFSBegin(STsdbFS *pFS);
int32_t tsdbFSCommit(STsdbFS *pFS);
int32_t tsdbFSOpen(STsdb *pTsdb);
int32_t tsdbFSClose(STsdb *pTsdb);
int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS);
void tsdbFSDestroy(STsdbFS *pFS);
int32_t tDFileSetCmprFn(const void *p1, const void *p2);
int32_t tsdbFSCommit1(STsdb *pTsdb, STsdbFS *pFS);
int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFS);
int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS);
void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS);
int32_t tsdbFSRollback(STsdbFS *pFS);
int32_t tsdbFSStateUpsertDelFile(STsdbFSState *pState, SDelFile *pDelFile);
int32_t tsdbFSStateUpsertDFileSet(STsdbFSState *pState, SDFileSet *pSet);
void tsdbFSStateDeleteDFileSet(STsdbFSState *pState, int32_t fid);
SDelFile *tsdbFSStateGetDelFile(STsdbFSState *pState);
SDFileSet *tsdbFSStateGetDFileSet(STsdbFSState *pState, int32_t fid, int32_t flag);
int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet);
int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile);
// tsdbReaderWriter.c ==============================================================================================
// SDataFWriter
int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet);
......@@ -285,6 +285,11 @@ typedef struct {
TSKEY minKey;
} SRtn;
struct STsdbFS {
SDelFile *pDelFile;
SArray *aDFileSet; // SArray<SDFileSet>
};
struct STsdb {
char *path;
SVnode *pVnode;
......@@ -292,7 +297,7 @@ struct STsdb {
TdThreadRwlock rwLock;
SMemTable *mem;
SMemTable *imem;
STsdbFS *pFS;
STsdbFS fs;
SLRUCache *lruCache;
};
......@@ -540,22 +545,6 @@ struct SRowMerger {
SArray *pArray; // SArray<SColVal>
};
struct STsdbFSState {
SDelFile *pDelFile;
SArray *aDFileSet; // SArray<aDFileSet>
SDelFile delFile;
};
struct STsdbFS {
STsdb *pTsdb;
STsdbFSState *cState;
STsdbFSState *nState;
// new
SDelFile *pDelFile;
SArray aDFileSetP; // SArray<SDFileSet *>
};
struct SDelFWriter {
STsdb *pTsdb;
SDelFile fDel;
......
......@@ -464,7 +464,7 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow) {
switch (state->state) {
case SFSNEXTROW_FS:
state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet;
// state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet;
state->nFileSet = taosArrayGetSize(state->aDFileSet);
state->iFileSet = state->nFileSet;
......@@ -793,9 +793,10 @@ typedef struct {
TSDBROW memRow, imemRow, fsRow;
TsdbNextRowState input[3];
SMemTable *pMemTable;
SMemTable *pIMemTable;
STsdb *pTsdb;
// SMemTable *pMemTable;
// SMemTable *pIMemTable;
STsdbReadSnap *pReadSnap;
STsdb *pTsdb;
} CacheNextRowIter;
static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTsdb) {
......@@ -803,16 +804,16 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs
tb_uid_t suid = getTableSuidByUid(uid, pTsdb);
tsdbTakeMemSnapshot(pTsdb, &pIter->pMemTable, &pIter->pIMemTable);
tsdbTakeReadSnap(pTsdb, &pIter->pReadSnap);
STbData *pMem = NULL;
if (pIter->pMemTable) {
tsdbGetTbDataFromMemTable(pIter->pMemTable, suid, uid, &pMem);
if (pIter->pReadSnap->pMem) {
tsdbGetTbDataFromMemTable(pIter->pReadSnap->pMem, suid, uid, &pMem);
}
STbData *pIMem = NULL;
if (pIter->pIMemTable) {
tsdbGetTbDataFromMemTable(pIter->pIMemTable, suid, uid, &pIMem);
if (pIter->pReadSnap->pIMem) {
tsdbGetTbDataFromMemTable(pIter->pReadSnap->pIMem, suid, uid, &pIMem);
}
pIter->pTsdb = pTsdb;
......@@ -821,7 +822,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs
SDelIdx delIdx;
SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState);
SDelFile *pDelFile = pIter->pReadSnap->fs.pDelFile;
if (pDelFile) {
SDelFReader *pDelFReader;
......@@ -846,6 +847,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs
pIter->fsState.state = SFSNEXTROW_FS;
pIter->fsState.pTsdb = pTsdb;
pIter->fsState.aDFileSet = pIter->pReadSnap->fs.aDFileSet;
pIter->fsState.pBlockIdxExp = &pIter->idx;
pIter->input[0] = (TsdbNextRowState){&pIter->memRow, true, false, &pIter->memState, getNextRowFromMem, NULL};
......@@ -885,7 +887,7 @@ static int32_t nextRowIterClose(CacheNextRowIter *pIter) {
taosArrayDestroy(pIter->pSkyline);
}
tsdbUntakeMemSnapshot(pIter->pTsdb, pIter->pMemTable, pIter->pIMemTable);
tsdbUntakeReadSnap(pIter->pTsdb, pIter->pReadSnap);
return code;
_err:
......@@ -1172,480 +1174,480 @@ _err:
return code;
}
static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, STSRow **ppRow) {
int32_t code = 0;
SArray *pSkyline = NULL;
STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1);
int16_t nCol = pTSchema->numOfCols;
SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal));
tb_uid_t suid = getTableSuidByUid(uid, pTsdb);
STbData *pMem = NULL;
if (pTsdb->mem) {
tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem);
}
STbData *pIMem = NULL;
if (pTsdb->imem) {
tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem);
}
*ppRow = NULL;
pSkyline = taosArrayInit(32, sizeof(TSDBKEY));
SDelIdx delIdx;
SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState);
if (pDelFile) {
SDelFReader *pDelFReader;
code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL);
if (code) goto _err;
code = getTableDelIdx(pDelFReader, suid, uid, &delIdx);
if (code) goto _err;
code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline);
if (code) goto _err;
tsdbDelFReaderClose(&pDelFReader);
} else {
code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline);
if (code) goto _err;
}
int64_t iSkyline = taosArrayGetSize(pSkyline) - 1;
SBlockIdx idx = {.suid = suid, .uid = uid};
SFSNextRowIter fsState = {0};
fsState.state = SFSNEXTROW_FS;
fsState.pTsdb = pTsdb;
fsState.pBlockIdxExp = &idx;
SMemNextRowIter memState = {0};
SMemNextRowIter imemState = {0};
TSDBROW memRow, imemRow, fsRow;
TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL},
{&imemRow, true, false, &imemState, getNextRowFromMem, NULL},
{&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}};
if (pMem) {
memState.pMem = pMem;
memState.state = SMEMNEXTROW_ENTER;
input[0].stop = false;
input[0].next = true;
}
if (pIMem) {
imemState.pMem = pIMem;
imemState.state = SMEMNEXTROW_ENTER;
input[1].stop = false;
input[1].next = true;
}
int16_t nilColCount = nCol - 1; // count of null & none cols
int iCol = 0; // index of first nil col index from left to right
bool setICol = false;
do {
for (int i = 0; i < 3; ++i) {
if (input[i].next && !input[i].stop) {
if (input[i].pRow == NULL) {
code = input[i].nextRowFn(input[i].iter, &input[i].pRow);
if (code) goto _err;
if (input[i].pRow == NULL) {
input[i].stop = true;
input[i].next = false;
}
}
}
}
if (input[0].stop && input[1].stop && input[2].stop) {
break;
}
// select maxpoint(s) from mem, imem, fs
TSDBROW *max[3] = {0};
int iMax[3] = {-1, -1, -1};
int nMax = 0;
TSKEY maxKey = TSKEY_MIN;
for (int i = 0; i < 3; ++i) {
if (!input[i].stop && input[i].pRow != NULL) {
TSDBKEY key = TSDBROW_KEY(input[i].pRow);
// merging & deduplicating on client side
if (maxKey <= key.ts) {
if (maxKey < key.ts) {
nMax = 0;
maxKey = key.ts;
}
iMax[nMax] = i;
max[nMax++] = input[i].pRow;
}
}
}
// delete detection
TSDBROW *merge[3] = {0};
int iMerge[3] = {-1, -1, -1};
int nMerge = 0;
for (int i = 0; i < nMax; ++i) {
TSDBKEY maxKey = TSDBROW_KEY(max[i]);
bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline);
if (!deleted) {
iMerge[nMerge] = i;
merge[nMerge++] = max[i];
}
input[iMax[i]].next = deleted;
}
// merge if nMerge > 1
if (nMerge > 0) {
*dup = false;
if (nMerge == 1) {
code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow);
if (code) goto _err;
} else {
// merge 2 or 3 rows
SRowMerger merger = {0};
tRowMergerInit(&merger, merge[0], pTSchema);
for (int i = 1; i < nMerge; ++i) {
tRowMerge(&merger, merge[i]);
}
tRowMergerGetRow(&merger, ppRow);
tRowMergerClear(&merger);
}
}
} while (1);
for (int i = 0; i < 3; ++i) {
if (input[i].nextRowClearFn) {
input[i].nextRowClearFn(input[i].iter);
}
}
if (pSkyline) {
taosArrayDestroy(pSkyline);
}
taosMemoryFreeClear(pTSchema);
return code;
_err:
for (int i = 0; i < 3; ++i) {
if (input[i].nextRowClearFn) {
input[i].nextRowClearFn(input[i].iter);
}
}
if (pSkyline) {
taosArrayDestroy(pSkyline);
}
taosMemoryFreeClear(pTSchema);
tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
return code;
}
// static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, STSRow **ppRow) {
// int32_t code = 0;
// SArray *pSkyline = NULL;
// STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1);
// int16_t nCol = pTSchema->numOfCols;
// SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal));
// tb_uid_t suid = getTableSuidByUid(uid, pTsdb);
// STbData *pMem = NULL;
// if (pTsdb->mem) {
// tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem);
// }
// STbData *pIMem = NULL;
// if (pTsdb->imem) {
// tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem);
// }
// *ppRow = NULL;
// pSkyline = taosArrayInit(32, sizeof(TSDBKEY));
// SDelIdx delIdx;
// SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState);
// if (pDelFile) {
// SDelFReader *pDelFReader;
// code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL);
// if (code) goto _err;
// code = getTableDelIdx(pDelFReader, suid, uid, &delIdx);
// if (code) goto _err;
// code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline);
// if (code) goto _err;
// tsdbDelFReaderClose(&pDelFReader);
// } else {
// code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline);
// if (code) goto _err;
// }
// int64_t iSkyline = taosArrayGetSize(pSkyline) - 1;
// SBlockIdx idx = {.suid = suid, .uid = uid};
// SFSNextRowIter fsState = {0};
// fsState.state = SFSNEXTROW_FS;
// fsState.pTsdb = pTsdb;
// fsState.pBlockIdxExp = &idx;
// SMemNextRowIter memState = {0};
// SMemNextRowIter imemState = {0};
// TSDBROW memRow, imemRow, fsRow;
// TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL},
// {&imemRow, true, false, &imemState, getNextRowFromMem, NULL},
// {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}};
// if (pMem) {
// memState.pMem = pMem;
// memState.state = SMEMNEXTROW_ENTER;
// input[0].stop = false;
// input[0].next = true;
// }
// if (pIMem) {
// imemState.pMem = pIMem;
// imemState.state = SMEMNEXTROW_ENTER;
// input[1].stop = false;
// input[1].next = true;
// }
// int16_t nilColCount = nCol - 1; // count of null & none cols
// int iCol = 0; // index of first nil col index from left to right
// bool setICol = false;
// do {
// for (int i = 0; i < 3; ++i) {
// if (input[i].next && !input[i].stop) {
// if (input[i].pRow == NULL) {
// code = input[i].nextRowFn(input[i].iter, &input[i].pRow);
// if (code) goto _err;
// if (input[i].pRow == NULL) {
// input[i].stop = true;
// input[i].next = false;
// }
// }
// }
// }
// if (input[0].stop && input[1].stop && input[2].stop) {
// break;
// }
// // select maxpoint(s) from mem, imem, fs
// TSDBROW *max[3] = {0};
// int iMax[3] = {-1, -1, -1};
// int nMax = 0;
// TSKEY maxKey = TSKEY_MIN;
// for (int i = 0; i < 3; ++i) {
// if (!input[i].stop && input[i].pRow != NULL) {
// TSDBKEY key = TSDBROW_KEY(input[i].pRow);
// // merging & deduplicating on client side
// if (maxKey <= key.ts) {
// if (maxKey < key.ts) {
// nMax = 0;
// maxKey = key.ts;
// }
// iMax[nMax] = i;
// max[nMax++] = input[i].pRow;
// }
// }
// }
// // delete detection
// TSDBROW *merge[3] = {0};
// int iMerge[3] = {-1, -1, -1};
// int nMerge = 0;
// for (int i = 0; i < nMax; ++i) {
// TSDBKEY maxKey = TSDBROW_KEY(max[i]);
// bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline);
// if (!deleted) {
// iMerge[nMerge] = i;
// merge[nMerge++] = max[i];
// }
// input[iMax[i]].next = deleted;
// }
// // merge if nMerge > 1
// if (nMerge > 0) {
// *dup = false;
// if (nMerge == 1) {
// code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow);
// if (code) goto _err;
// } else {
// // merge 2 or 3 rows
// SRowMerger merger = {0};
// tRowMergerInit(&merger, merge[0], pTSchema);
// for (int i = 1; i < nMerge; ++i) {
// tRowMerge(&merger, merge[i]);
// }
// tRowMergerGetRow(&merger, ppRow);
// tRowMergerClear(&merger);
// }
// }
// } while (1);
// for (int i = 0; i < 3; ++i) {
// if (input[i].nextRowClearFn) {
// input[i].nextRowClearFn(input[i].iter);
// }
// }
// if (pSkyline) {
// taosArrayDestroy(pSkyline);
// }
// taosMemoryFreeClear(pTSchema);
// return code;
// _err:
// for (int i = 0; i < 3; ++i) {
// if (input[i].nextRowClearFn) {
// input[i].nextRowClearFn(input[i].iter);
// }
// }
// if (pSkyline) {
// taosArrayDestroy(pSkyline);
// }
// taosMemoryFreeClear(pTSchema);
// tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
// return code;
// }
// static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, STSRow **ppRow) {
static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray) {
int32_t code = 0;
SArray *pSkyline = NULL;
STSRow *pRow = NULL;
STSRow **ppRow = &pRow;
STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1);
int16_t nCol = pTSchema->numOfCols;
// SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal));
SArray *pColArray = taosArrayInit(nCol, sizeof(SLastCol));
tb_uid_t suid = getTableSuidByUid(uid, pTsdb);
STbData *pMem = NULL;
if (pTsdb->mem) {
tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem);
}
STbData *pIMem = NULL;
if (pTsdb->imem) {
tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem);
}
*ppLastArray = NULL;
pSkyline = taosArrayInit(32, sizeof(TSDBKEY));
SDelIdx delIdx;
SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState);
if (pDelFile) {
SDelFReader *pDelFReader;
code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL);
if (code) goto _err;
code = getTableDelIdx(pDelFReader, suid, uid, &delIdx);
if (code) goto _err;
code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline);
if (code) goto _err;
tsdbDelFReaderClose(&pDelFReader);
} else {
code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline);
if (code) goto _err;
}
int64_t iSkyline = taosArrayGetSize(pSkyline) - 1;
SBlockIdx idx = {.suid = suid, .uid = uid};
SFSNextRowIter fsState = {0};
fsState.state = SFSNEXTROW_FS;
fsState.pTsdb = pTsdb;
fsState.pBlockIdxExp = &idx;
SMemNextRowIter memState = {0};
SMemNextRowIter imemState = {0};
TSDBROW memRow, imemRow, fsRow;
TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL},
{&imemRow, true, false, &imemState, getNextRowFromMem, NULL},
{&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}};
if (pMem) {
memState.pMem = pMem;
memState.state = SMEMNEXTROW_ENTER;
input[0].stop = false;
input[0].next = true;
}
if (pIMem) {
imemState.pMem = pIMem;
imemState.state = SMEMNEXTROW_ENTER;
input[1].stop = false;
input[1].next = true;
}
int16_t nilColCount = nCol - 1; // count of null & none cols
int iCol = 0; // index of first nil col index from left to right
bool setICol = false;
do {
for (int i = 0; i < 3; ++i) {
if (input[i].next && !input[i].stop) {
code = input[i].nextRowFn(input[i].iter, &input[i].pRow);
if (code) goto _err;
if (input[i].pRow == NULL) {
input[i].stop = true;
input[i].next = false;
}
}
}
if (input[0].stop && input[1].stop && input[2].stop) {
break;
}
// select maxpoint(s) from mem, imem, fs
TSDBROW *max[3] = {0};
int iMax[3] = {-1, -1, -1};
int nMax = 0;
TSKEY maxKey = TSKEY_MIN;
for (int i = 0; i < 3; ++i) {
if (!input[i].stop && input[i].pRow != NULL) {
TSDBKEY key = TSDBROW_KEY(input[i].pRow);
// merging & deduplicating on client side
if (maxKey <= key.ts) {
if (maxKey < key.ts) {
nMax = 0;
maxKey = key.ts;
}
iMax[nMax] = i;
max[nMax++] = input[i].pRow;
}
}
}
// delete detection
TSDBROW *merge[3] = {0};
int iMerge[3] = {-1, -1, -1};
int nMerge = 0;
for (int i = 0; i < nMax; ++i) {
TSDBKEY maxKey = TSDBROW_KEY(max[i]);
bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline);
if (!deleted) {
iMerge[nMerge] = iMax[i];
merge[nMerge++] = max[i];
}
input[iMax[i]].next = deleted;
}
// merge if nMerge > 1
if (nMerge > 0) {
if (nMerge == 1) {
code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow);
if (code) goto _err;
} else {
// merge 2 or 3 rows
SRowMerger merger = {0};
tRowMergerInit(&merger, merge[0], pTSchema);
for (int i = 1; i < nMerge; ++i) {
tRowMerge(&merger, merge[i]);
}
tRowMergerGetRow(&merger, ppRow);
tRowMergerClear(&merger);
}
} else {
/* *ppRow = NULL; */
/* return code; */
continue;
}
if (iCol == 0) {
STColumn *pTColumn = &pTSchema->columns[0];
SColVal *pColVal = &(SColVal){0};
*pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = maxKey});
// if (taosArrayPush(pColArray, pColVal) == NULL) {
if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
++iCol;
setICol = false;
for (int16_t i = iCol; i < nCol; ++i) {
// tsdbRowGetColVal(*ppRow, pTSchema, i, pColVal);
tTSRowGetVal(*ppRow, pTSchema, i, pColVal);
// if (taosArrayPush(pColArray, pColVal) == NULL) {
if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
if (pColVal->isNull || pColVal->isNone) {
for (int j = 0; j < nMerge; ++j) {
SColVal jColVal = {0};
tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal);
if (jColVal.isNull || jColVal.isNone) {
input[iMerge[j]].next = true;
}
}
if (!setICol) {
iCol = i;
setICol = true;
}
} else {
--nilColCount;
}
}
if (*ppRow) {
taosMemoryFreeClear(*ppRow);
}
continue;
}
setICol = false;
for (int16_t i = iCol; i < nCol; ++i) {
SColVal colVal = {0};
tTSRowGetVal(*ppRow, pTSchema, i, &colVal);
TSKEY rowTs = (*ppRow)->ts;
// SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, i);
SLastCol *tTsVal = (SLastCol *)taosArrayGet(pColArray, i);
SColVal *tColVal = &tTsVal->colVal;
if (!colVal.isNone && !colVal.isNull) {
if (tColVal->isNull || tColVal->isNone) {
// taosArraySet(pColArray, i, &colVal);
taosArraySet(pColArray, i, &(SLastCol){.ts = rowTs, .colVal = colVal});
--nilColCount;
}
} else {
if ((tColVal->isNull || tColVal->isNone) && !setICol) {
iCol = i;
setICol = true;
for (int j = 0; j < nMerge; ++j) {
SColVal jColVal = {0};
tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal);
if (jColVal.isNull || jColVal.isNone) {
input[iMerge[j]].next = true;
}
}
}
}
}
if (*ppRow) {
taosMemoryFreeClear(*ppRow);
}
} while (nilColCount > 0);
// if () new ts row from pColArray if non empty
/* if (taosArrayGetSize(pColArray) == nCol) { */
/* code = tdSTSRowNew(pColArray, pTSchema, ppRow); */
/* if (code) goto _err; */
/* } */
/* taosArrayDestroy(pColArray); */
if (taosArrayGetSize(pColArray) <= 0) {
*ppLastArray = NULL;
taosArrayDestroy(pColArray);
} else {
*ppLastArray = pColArray;
}
if (*ppRow) {
taosMemoryFreeClear(*ppRow);
}
for (int i = 0; i < 3; ++i) {
if (input[i].nextRowClearFn) {
input[i].nextRowClearFn(input[i].iter);
}
}
if (pSkyline) {
taosArrayDestroy(pSkyline);
}
taosMemoryFreeClear(pTSchema);
return code;
_err:
taosArrayDestroy(pColArray);
if (*ppRow) {
taosMemoryFreeClear(*ppRow);
}
for (int i = 0; i < 3; ++i) {
if (input[i].nextRowClearFn) {
input[i].nextRowClearFn(input[i].iter);
}
}
if (pSkyline) {
taosArrayDestroy(pSkyline);
}
taosMemoryFreeClear(pTSchema);
tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
return code;
}
// static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray) {
// int32_t code = 0;
// SArray *pSkyline = NULL;
// STSRow *pRow = NULL;
// STSRow **ppRow = &pRow;
// STSchema *pTSchema = metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1);
// int16_t nCol = pTSchema->numOfCols;
// // SArray *pColArray = taosArrayInit(nCol, sizeof(SColVal));
// SArray *pColArray = taosArrayInit(nCol, sizeof(SLastCol));
// tb_uid_t suid = getTableSuidByUid(uid, pTsdb);
// STbData *pMem = NULL;
// if (pTsdb->mem) {
// tsdbGetTbDataFromMemTable(pTsdb->mem, suid, uid, &pMem);
// }
// STbData *pIMem = NULL;
// if (pTsdb->imem) {
// tsdbGetTbDataFromMemTable(pTsdb->imem, suid, uid, &pIMem);
// }
// *ppLastArray = NULL;
// pSkyline = taosArrayInit(32, sizeof(TSDBKEY));
// SDelIdx delIdx;
// SDelFile *pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState);
// if (pDelFile) {
// SDelFReader *pDelFReader;
// code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL);
// if (code) goto _err;
// code = getTableDelIdx(pDelFReader, suid, uid, &delIdx);
// if (code) goto _err;
// code = getTableDelSkyline(pMem, pIMem, pDelFReader, &delIdx, pSkyline);
// if (code) goto _err;
// tsdbDelFReaderClose(&pDelFReader);
// } else {
// code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pSkyline);
// if (code) goto _err;
// }
// int64_t iSkyline = taosArrayGetSize(pSkyline) - 1;
// SBlockIdx idx = {.suid = suid, .uid = uid};
// SFSNextRowIter fsState = {0};
// fsState.state = SFSNEXTROW_FS;
// fsState.pTsdb = pTsdb;
// fsState.pBlockIdxExp = &idx;
// SMemNextRowIter memState = {0};
// SMemNextRowIter imemState = {0};
// TSDBROW memRow, imemRow, fsRow;
// TsdbNextRowState input[3] = {{&memRow, true, false, &memState, getNextRowFromMem, NULL},
// {&imemRow, true, false, &imemState, getNextRowFromMem, NULL},
// {&fsRow, false, true, &fsState, getNextRowFromFS, clearNextRowFromFS}};
// if (pMem) {
// memState.pMem = pMem;
// memState.state = SMEMNEXTROW_ENTER;
// input[0].stop = false;
// input[0].next = true;
// }
// if (pIMem) {
// imemState.pMem = pIMem;
// imemState.state = SMEMNEXTROW_ENTER;
// input[1].stop = false;
// input[1].next = true;
// }
// int16_t nilColCount = nCol - 1; // count of null & none cols
// int iCol = 0; // index of first nil col index from left to right
// bool setICol = false;
// do {
// for (int i = 0; i < 3; ++i) {
// if (input[i].next && !input[i].stop) {
// code = input[i].nextRowFn(input[i].iter, &input[i].pRow);
// if (code) goto _err;
// if (input[i].pRow == NULL) {
// input[i].stop = true;
// input[i].next = false;
// }
// }
// }
// if (input[0].stop && input[1].stop && input[2].stop) {
// break;
// }
// // select maxpoint(s) from mem, imem, fs
// TSDBROW *max[3] = {0};
// int iMax[3] = {-1, -1, -1};
// int nMax = 0;
// TSKEY maxKey = TSKEY_MIN;
// for (int i = 0; i < 3; ++i) {
// if (!input[i].stop && input[i].pRow != NULL) {
// TSDBKEY key = TSDBROW_KEY(input[i].pRow);
// // merging & deduplicating on client side
// if (maxKey <= key.ts) {
// if (maxKey < key.ts) {
// nMax = 0;
// maxKey = key.ts;
// }
// iMax[nMax] = i;
// max[nMax++] = input[i].pRow;
// }
// }
// }
// // delete detection
// TSDBROW *merge[3] = {0};
// int iMerge[3] = {-1, -1, -1};
// int nMerge = 0;
// for (int i = 0; i < nMax; ++i) {
// TSDBKEY maxKey = TSDBROW_KEY(max[i]);
// bool deleted = tsdbKeyDeleted(&maxKey, pSkyline, &iSkyline);
// if (!deleted) {
// iMerge[nMerge] = iMax[i];
// merge[nMerge++] = max[i];
// }
// input[iMax[i]].next = deleted;
// }
// // merge if nMerge > 1
// if (nMerge > 0) {
// if (nMerge == 1) {
// code = tsRowFromTsdbRow(pTSchema, merge[nMerge - 1], ppRow);
// if (code) goto _err;
// } else {
// // merge 2 or 3 rows
// SRowMerger merger = {0};
// tRowMergerInit(&merger, merge[0], pTSchema);
// for (int i = 1; i < nMerge; ++i) {
// tRowMerge(&merger, merge[i]);
// }
// tRowMergerGetRow(&merger, ppRow);
// tRowMergerClear(&merger);
// }
// } else {
// /* *ppRow = NULL; */
// /* return code; */
// continue;
// }
// if (iCol == 0) {
// STColumn *pTColumn = &pTSchema->columns[0];
// SColVal *pColVal = &(SColVal){0};
// *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = maxKey});
// // if (taosArrayPush(pColArray, pColVal) == NULL) {
// if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) {
// code = TSDB_CODE_OUT_OF_MEMORY;
// goto _err;
// }
// ++iCol;
// setICol = false;
// for (int16_t i = iCol; i < nCol; ++i) {
// // tsdbRowGetColVal(*ppRow, pTSchema, i, pColVal);
// tTSRowGetVal(*ppRow, pTSchema, i, pColVal);
// // if (taosArrayPush(pColArray, pColVal) == NULL) {
// if (taosArrayPush(pColArray, &(SLastCol){.ts = maxKey, .colVal = *pColVal}) == NULL) {
// code = TSDB_CODE_OUT_OF_MEMORY;
// goto _err;
// }
// if (pColVal->isNull || pColVal->isNone) {
// for (int j = 0; j < nMerge; ++j) {
// SColVal jColVal = {0};
// tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal);
// if (jColVal.isNull || jColVal.isNone) {
// input[iMerge[j]].next = true;
// }
// }
// if (!setICol) {
// iCol = i;
// setICol = true;
// }
// } else {
// --nilColCount;
// }
// }
// if (*ppRow) {
// taosMemoryFreeClear(*ppRow);
// }
// continue;
// }
// setICol = false;
// for (int16_t i = iCol; i < nCol; ++i) {
// SColVal colVal = {0};
// tTSRowGetVal(*ppRow, pTSchema, i, &colVal);
// TSKEY rowTs = (*ppRow)->ts;
// // SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, i);
// SLastCol *tTsVal = (SLastCol *)taosArrayGet(pColArray, i);
// SColVal *tColVal = &tTsVal->colVal;
// if (!colVal.isNone && !colVal.isNull) {
// if (tColVal->isNull || tColVal->isNone) {
// // taosArraySet(pColArray, i, &colVal);
// taosArraySet(pColArray, i, &(SLastCol){.ts = rowTs, .colVal = colVal});
// --nilColCount;
// }
// } else {
// if ((tColVal->isNull || tColVal->isNone) && !setICol) {
// iCol = i;
// setICol = true;
// for (int j = 0; j < nMerge; ++j) {
// SColVal jColVal = {0};
// tsdbRowGetColVal(merge[j], pTSchema, i, &jColVal);
// if (jColVal.isNull || jColVal.isNone) {
// input[iMerge[j]].next = true;
// }
// }
// }
// }
// }
// if (*ppRow) {
// taosMemoryFreeClear(*ppRow);
// }
// } while (nilColCount > 0);
// // if () new ts row from pColArray if non empty
// /* if (taosArrayGetSize(pColArray) == nCol) { */
// /* code = tdSTSRowNew(pColArray, pTSchema, ppRow); */
// /* if (code) goto _err; */
// /* } */
// /* taosArrayDestroy(pColArray); */
// if (taosArrayGetSize(pColArray) <= 0) {
// *ppLastArray = NULL;
// taosArrayDestroy(pColArray);
// } else {
// *ppLastArray = pColArray;
// }
// if (*ppRow) {
// taosMemoryFreeClear(*ppRow);
// }
// for (int i = 0; i < 3; ++i) {
// if (input[i].nextRowClearFn) {
// input[i].nextRowClearFn(input[i].iter);
// }
// }
// if (pSkyline) {
// taosArrayDestroy(pSkyline);
// }
// taosMemoryFreeClear(pTSchema);
// return code;
// _err:
// taosArrayDestroy(pColArray);
// if (*ppRow) {
// taosMemoryFreeClear(*ppRow);
// }
// for (int i = 0; i < 3; ++i) {
// if (input[i].nextRowClearFn) {
// input[i].nextRowClearFn(input[i].iter);
// }
// }
// if (pSkyline) {
// taosArrayDestroy(pSkyline);
// }
// taosMemoryFreeClear(pTSchema);
// tsdbError("vgId:%d merge last_row failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
// return code;
// }
int32_t tsdbCacheGetLastrowH(SLRUCache *pCache, tb_uid_t uid, STsdb *pTsdb, LRUHandle **handle) {
int32_t code = 0;
......
......@@ -29,6 +29,7 @@ typedef struct {
int32_t minRow;
int32_t maxRow;
int8_t cmprAlg;
STsdbFS fs;
// --------------
TSKEY nextKey; // reset by each table commit
int32_t commitFid;
......@@ -119,9 +120,6 @@ int32_t tsdbCommit(STsdb *pTsdb) {
code = tsdbCommitDel(&commith);
if (code) goto _err;
code = tsdbCommitCache(&commith);
if (code) goto _err;
// end commit
code = tsdbEndCommit(&commith, 0);
if (code) goto _err;
......@@ -158,7 +156,7 @@ static int32_t tsdbCommitDelStart(SCommitter *pCommitter) {
goto _err;
}
SDelFile *pDelFileR = pTsdb->pFS->nState->pDelFile;
SDelFile *pDelFileR = pCommitter->fs.pDelFile;
if (pDelFileR) {
code = tsdbDelFReaderOpen(&pCommitter->pDelFReader, pDelFileR, pTsdb, NULL);
if (code) goto _err;
......@@ -247,7 +245,7 @@ static int32_t tsdbCommitDelEnd(SCommitter *pCommitter) {
code = tsdbUpdateDelFileHdr(pCommitter->pDelFWriter);
if (code) goto _err;
code = tsdbFSStateUpsertDelFile(pTsdb->pFS->nState, &pCommitter->pDelFWriter->fDel);
code = tsdbFSUpsertDelFile(&pCommitter->fs, &pCommitter->pDelFWriter->fDel);
if (code) goto _err;
code = tsdbDelFWriterClose(&pCommitter->pDelFWriter, 1);
......@@ -281,7 +279,8 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) {
taosArrayClear(pCommitter->aBlockIdx);
tMapDataReset(&pCommitter->oBlockMap);
tBlockDataReset(&pCommitter->oBlockData);
pRSet = tsdbFSStateGetDFileSet(pTsdb->pFS->nState, pCommitter->commitFid, TD_EQ);
pRSet = (SDFileSet *)taosArraySearch(pCommitter->fs.aDFileSet, &(SDFileSet){.fid = pCommitter->commitFid},
tDFileSetCmprFn, TD_EQ);
if (pRSet) {
code = tsdbDataFReaderOpen(&pCommitter->pReader, pTsdb, pRSet);
if (code) goto _err;
......@@ -860,7 +859,7 @@ static int32_t tsdbCommitFileDataEnd(SCommitter *pCommitter) {
if (code) goto _err;
// upsert SDFileSet
code = tsdbFSStateUpsertDFileSet(pCommitter->pTsdb->pFS->nState, &pCommitter->pWriter->wSet);
code = tsdbFSUpsertFSet(&pCommitter->fs, &pCommitter->pWriter->wSet);
if (code) goto _err;
// close and sync
......@@ -978,7 +977,7 @@ static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter) {
pCommitter->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows;
pCommitter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression;
code = tsdbFSBegin(pTsdb->pFS);
code = tsdbFSCopy(pTsdb, &pCommitter->fs);
if (code) goto _err;
return code;
......@@ -1147,28 +1146,33 @@ _err:
return code;
}
static int32_t tsdbCommitCache(SCommitter *pCommitter) {
int32_t code = 0;
// TODO
return code;
}
static int32_t tsdbEndCommit(SCommitter *pCommitter, int32_t eno) {
int32_t code = 0;
STsdb *pTsdb = pCommitter->pTsdb;
SMemTable *pMemTable = pTsdb->imem;
if (eno == 0) {
code = tsdbFSCommit(pTsdb->pFS);
} else {
code = tsdbFSRollback(pTsdb->pFS);
}
ASSERT(eno);
code = tsdbFSCommit1(pTsdb, &pCommitter->fs);
if (code) goto _err;
// lock
taosThreadRwlockWrlock(&pTsdb->rwLock);
// commit or rollback
code = tsdbFSCommit2(pTsdb, &pCommitter->fs);
if (code) {
taosThreadRwlockUnlock(&pTsdb->rwLock);
goto _err;
}
pTsdb->imem = NULL;
// unlock
taosThreadRwlockUnlock(&pTsdb->rwLock);
tsdbUnrefMemTable(pMemTable);
tsdbFSDestroy(&pCommitter->fs);
tsdbInfo("vgId:%d tsdb end commit", TD_VID(pTsdb->pVnode));
return code;
......
......@@ -16,67 +16,41 @@
#include "tsdb.h"
// =================================================================================================
static int32_t tPutFSState(uint8_t *p, STsdbFSState *pState) {
static int32_t tsdbEncodeFS(uint8_t *p, STsdbFS *pFS) {
int32_t n = 0;
int8_t hasDel = pState->pDelFile ? 1 : 0;
uint32_t nDFileSet = taosArrayGetSize(pState->aDFileSet);
int8_t hasDel = pFS->pDelFile ? 1 : 0;
uint32_t nSet = taosArrayGetSize(pFS->aDFileSet);
// SDelFile
n += tPutI8(p ? p + n : p, hasDel);
if (hasDel) {
n += tPutDelFile(p ? p + n : p, pState->pDelFile);
n += tPutDelFile(p ? p + n : p, pFS->pDelFile);
}
// SArray<SDFileSet>
n += tPutU32v(p ? p + n : p, nDFileSet);
for (uint32_t iDFileSet = 0; iDFileSet < nDFileSet; iDFileSet++) {
n += tPutDFileSet(p ? p + n : p, (SDFileSet *)taosArrayGet(pState->aDFileSet, iDFileSet));
n += tPutU32v(p ? p + n : p, nSet);
for (uint32_t iSet = 0; iSet < nSet; iSet++) {
n += tPutDFileSet(p ? p + n : p, (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet));
}
return n;
}
static int32_t tGetFSState(uint8_t *p, STsdbFSState *pState) {
int32_t n = 0;
int8_t hasDel;
uint32_t nDFileSet;
SDFileSet *pSet = &(SDFileSet){0};
// SDelFile
n += tGetI8(p + n, &hasDel);
if (hasDel) {
pState->pDelFile = &pState->delFile;
n += tGetDelFile(p + n, pState->pDelFile);
} else {
pState->pDelFile = NULL;
}
// SArray<SDFileSet>
taosArrayClear(pState->aDFileSet);
n += tGetU32v(p + n, &nDFileSet);
for (uint32_t iDFileSet = 0; iDFileSet < nDFileSet; iDFileSet++) {
n += tGetDFileSet(p + n, pSet);
taosArrayPush(pState->aDFileSet, pSet);
}
return n;
}
static int32_t tsdbGnrtCurrent(const char *fname, STsdbFSState *pState) {
static int32_t tsdbGnrtCurrent(STsdb *pTsdb, STsdbFS *pFS, char *fname) {
int32_t code = 0;
int64_t n;
int64_t size;
uint8_t *pData;
uint8_t *pData = NULL;
TdFilePtr pFD = NULL;
// to binary
size = tPutFSState(NULL, pState) + sizeof(TSCKSUM);
size = tsdbEncodeFS(NULL, pFS) + sizeof(TSCKSUM);
pData = taosMemoryMalloc(size);
if (pData == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
n = tPutFSState(pData, pState);
n = tsdbEncodeFS(pData, pFS);
ASSERT(n + sizeof(TSCKSUM) == size);
taosCalcChecksumAppend(0, pData, size);
......@@ -104,585 +78,1008 @@ static int32_t tsdbGnrtCurrent(const char *fname, STsdbFSState *pState) {
return code;
_err:
tsdbError("tsdb gnrt current failed since %s", tstrerror(code));
tsdbError("vgId:%d tsdb gnrt current failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
if (pData) taosMemoryFree(pData);
return code;
}
static int32_t tsdbLoadCurrentState(STsdbFS *pFS, STsdbFSState *pState) {
int32_t code = 0;
int64_t size;
int64_t n;
char fname[TSDB_FILENAME_LEN];
uint8_t *pData = NULL;
TdFilePtr pFD;
// static int32_t tsdbApplyDFileSetChange(STsdbFS *pFS, SDFileSet *pFrom, SDFileSet *pTo) {
// int32_t code = 0;
// char fname[TSDB_FILENAME_LEN];
// if (pFrom && pTo) {
// bool isSameDisk = (pFrom->diskId.level == pTo->diskId.level) && (pFrom->diskId.id == pTo->diskId.id);
// // head
// if (isSameDisk && pFrom->pHeadF->commitID == pTo->pHeadF->commitID) {
// ASSERT(pFrom->pHeadF->size == pTo->pHeadF->size);
// ASSERT(pFrom->pHeadF->offset == pTo->pHeadF->offset);
// } else {
// tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname);
// taosRemoveFile(fname);
// }
// // data
// if (isSameDisk && pFrom->pDataF->commitID == pTo->pDataF->commitID) {
// if (pFrom->pDataF->size > pTo->pDataF->size) {
// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_DATA_FILE);
// if (code) goto _err;
// }
// } else {
// tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname);
// taosRemoveFile(fname);
// }
// // last
// if (isSameDisk && pFrom->pLastF->commitID == pTo->pLastF->commitID) {
// if (pFrom->pLastF->size > pTo->pLastF->size) {
// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE);
// if (code) goto _err;
// }
// } else {
// tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname);
// taosRemoveFile(fname);
// }
// // sma
// if (isSameDisk && pFrom->pSmaF->commitID == pTo->pSmaF->commitID) {
// if (pFrom->pSmaF->size > pTo->pSmaF->size) {
// code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_SMA_FILE);
// if (code) goto _err;
// }
// } else {
// tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname);
// taosRemoveFile(fname);
// }
// } else if (pFrom) {
// // head
// tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname);
// taosRemoveFile(fname);
// // data
// tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname);
// taosRemoveFile(fname);
// // last
// tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname);
// taosRemoveFile(fname);
// // fsm
// tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname);
// taosRemoveFile(fname);
// }
// return code;
// _err:
// tsdbError("vgId:%d tsdb apply disk file set change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code));
// return code;
// }
// static int32_t tsdbApplyDelFileChange(STsdbFS *pFS, SDelFile *pFrom, SDelFile *pTo) {
// int32_t code = 0;
// char fname[TSDB_FILENAME_LEN];
// if (pFrom && pTo) {
// if (!tsdbDelFileIsSame(pFrom, pTo)) {
// tsdbDelFileName(pFS->pTsdb, pFrom, fname);
// if (taosRemoveFile(fname) < 0) {
// code = TAOS_SYSTEM_ERROR(errno);
// goto _err;
// }
// }
// } else if (pFrom) {
// tsdbDelFileName(pFS->pTsdb, pFrom, fname);
// if (taosRemoveFile(fname) < 0) {
// code = TAOS_SYSTEM_ERROR(errno);
// goto _err;
// }
// } else {
// // do nothing
// }
// return code;
// _err:
// tsdbError("vgId:%d tsdb apply del file change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code));
// return code;
// }
// static int32_t tsdbFSApplyDiskChange(STsdbFS *pFS, STsdbFSState *pFrom, STsdbFSState *pTo) {
// int32_t code = 0;
// int32_t iFrom = 0;
// int32_t nFrom = taosArrayGetSize(pFrom->aDFileSet);
// int32_t iTo = 0;
// int32_t nTo = taosArrayGetSize(pTo->aDFileSet);
// SDFileSet *pDFileSetFrom;
// SDFileSet *pDFileSetTo;
// // SDelFile
// code = tsdbApplyDelFileChange(pFS, pFrom->pDelFile, pTo->pDelFile);
// if (code) goto _err;
// // SDFileSet
// while (iFrom < nFrom && iTo < nTo) {
// pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom);
// pDFileSetTo = (SDFileSet *)taosArrayGet(pTo->aDFileSet, iTo);
// if (pDFileSetFrom->fid == pDFileSetTo->fid) {
// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, pDFileSetTo);
// if (code) goto _err;
// iFrom++;
// iTo++;
// } else if (pDFileSetFrom->fid < pDFileSetTo->fid) {
// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL);
// if (code) goto _err;
// iFrom++;
// } else {
// iTo++;
// }
// }
// while (iFrom < nFrom) {
// pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom);
// code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL);
// if (code) goto _err;
// iFrom++;
// }
// #if 0
// // do noting
// while (iTo < nTo) {
// pDFileSetTo = (SDFileSet *)taosArrayGetP(pTo->aDFileSet, iTo);
// code = tsdbApplyDFileSetChange(pFS, NULL, pDFileSetTo);
// if (code) goto _err;
// iTo++;
// }
// #endif
// return code;
// _err:
// tsdbError("vgId:%d tsdb fs apply disk change failed sicne %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code));
// return code;
// }
void tsdbFSDestroy(STsdbFS *pFS) {
if (pFS->pDelFile) {
taosMemoryFree(pFS->pDelFile);
}
snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP,
pFS->pTsdb->path, TD_DIRSEP);
for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->aDFileSet); iSet++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet);
taosMemoryFree(pSet->pHeadF);
taosMemoryFree(pSet->pDataF);
taosMemoryFree(pSet->pLastF);
taosMemoryFree(pSet->pSmaF);
}
if (!taosCheckExistFile(fname)) {
// create an empry CURRENT file if not exists
code = tsdbGnrtCurrent(fname, pState);
if (code) goto _err;
} else {
// open the file and load
pFD = taosOpenFile(fname, TD_FILE_READ);
if (pFD == NULL) {
taosArrayDestroy(pFS->aDFileSet);
}
static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) {
int32_t code = 0;
int64_t size;
char fname[TSDB_FILENAME_LEN];
// SDelFile
if (pTsdb->fs.pDelFile) {
tsdbDelFileName(pTsdb, pTsdb->fs.pDelFile, fname);
if (taosStatFile(fname, &size, NULL)) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
if (taosFStatFile(pFD, &size, NULL) < 0) {
code = TAOS_SYSTEM_ERROR(errno);
if (size != pTsdb->fs.pDelFile->size) {
code = TSDB_CODE_FILE_CORRUPTED;
goto _err;
}
}
pData = taosMemoryMalloc(size);
if (pData == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
// SArray<SDFileSet>
for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet);
// head =========
tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname);
if (taosStatFile(fname, &size, NULL)) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
if (size != pSet->pHeadF->size) {
code = TSDB_CODE_FILE_CORRUPTED;
goto _err;
}
n = taosReadFile(pFD, pData, size);
if (n < 0) {
// data =========
tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname);
if (taosStatFile(fname, &size, NULL)) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
if (size < pSet->pDataF->size) {
code = TSDB_CODE_FILE_CORRUPTED;
goto _err;
} else if (size > pSet->pDataF->size) {
code = tsdbDFileRollback(pTsdb, pSet, TSDB_DATA_FILE);
if (code) goto _err;
}
if (!taosCheckChecksumWhole(pData, size)) {
// last ===========
tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname);
if (taosStatFile(fname, &size, NULL)) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
if (size != pSet->pLastF->size) {
code = TSDB_CODE_FILE_CORRUPTED;
goto _err;
}
taosCloseFile(&pFD);
// sma =============
tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname);
if (taosStatFile(fname, &size, NULL)) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
if (size < pSet->pSmaF->size) {
code = TSDB_CODE_FILE_CORRUPTED;
goto _err;
} else if (size > pSet->pSmaF->size) {
code = tsdbDFileRollback(pTsdb, pSet, TSDB_SMA_FILE);
if (code) goto _err;
}
}
// decode
tGetFSState(pData, pState);
{
// remove those invalid files (todo)
}
if (pData) taosMemoryFree(pData);
return code;
_err:
tsdbError("vgId:%d tsdb load current state failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code));
if (pData) taosMemoryFree(pData);
tsdbError("vgId:%d tsdb scan and try fix fs failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
return code;
}
static int32_t tsdbApplyDFileSetChange(STsdbFS *pFS, SDFileSet *pFrom, SDFileSet *pTo) {
int32_t code = 0;
char fname[TSDB_FILENAME_LEN];
int32_t tDFileSetCmprFn(const void *p1, const void *p2) {
if (((SDFileSet *)p1)->fid < ((SDFileSet *)p2)->fid) {
return -1;
} else if (((SDFileSet *)p1)->fid > ((SDFileSet *)p2)->fid) {
return 1;
}
return 0;
}
if (pFrom && pTo) {
bool isSameDisk = (pFrom->diskId.level == pTo->diskId.level) && (pFrom->diskId.id == pTo->diskId.id);
static int32_t tsdbRecoverFS(STsdb *pTsdb, uint8_t *pData, int64_t nData) {
int32_t code = 0;
int8_t hasDel;
uint32_t nSet;
int32_t n;
// SDelFile
n = 0;
n += tGetI8(pData + n, &hasDel);
if (hasDel) {
pTsdb->fs.pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile));
if (pTsdb->fs.pDelFile == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
pTsdb->fs.pDelFile->nRef = 1;
n += tGetDelFile(pData + n, pTsdb->fs.pDelFile);
} else {
pTsdb->fs.pDelFile = NULL;
}
// SArray<SDFileSet>
taosArrayClear(pTsdb->fs.aDFileSet);
n += tGetU32v(pData + n, &nSet);
for (uint32_t iSet = 0; iSet < nSet; iSet++) {
SDFileSet fSet;
// head
if (isSameDisk && pFrom->pHeadF->commitID == pTo->pHeadF->commitID) {
ASSERT(pFrom->pHeadF->size == pTo->pHeadF->size);
ASSERT(pFrom->pHeadF->offset == pTo->pHeadF->offset);
} else {
tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname);
taosRemoveFile(fname);
fSet.pHeadF = (SHeadFile *)taosMemoryCalloc(1, sizeof(SHeadFile));
if (fSet.pHeadF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
fSet.pHeadF->nRef = 1;
// data
if (isSameDisk && pFrom->pDataF->commitID == pTo->pDataF->commitID) {
if (pFrom->pDataF->size > pTo->pDataF->size) {
code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_DATA_FILE);
if (code) goto _err;
}
} else {
tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname);
taosRemoveFile(fname);
fSet.pDataF = (SDataFile *)taosMemoryCalloc(1, sizeof(SDataFile));
if (fSet.pDataF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
fSet.pDataF->nRef = 1;
// last
if (isSameDisk && pFrom->pLastF->commitID == pTo->pLastF->commitID) {
if (pFrom->pLastF->size > pTo->pLastF->size) {
code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_LAST_FILE);
if (code) goto _err;
}
} else {
tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname);
taosRemoveFile(fname);
fSet.pLastF = (SLastFile *)taosMemoryCalloc(1, sizeof(SLastFile));
if (fSet.pLastF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
fSet.pLastF->nRef = 1;
// sma
if (isSameDisk && pFrom->pSmaF->commitID == pTo->pSmaF->commitID) {
if (pFrom->pSmaF->size > pTo->pSmaF->size) {
code = tsdbDFileRollback(pFS->pTsdb, pTo, TSDB_SMA_FILE);
if (code) goto _err;
}
} else {
tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname);
taosRemoveFile(fname);
fSet.pSmaF = (SSmaFile *)taosMemoryCalloc(1, sizeof(SSmaFile));
if (fSet.pSmaF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
} else if (pFrom) {
// head
tsdbHeadFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pHeadF, fname);
taosRemoveFile(fname);
// data
tsdbDataFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pDataF, fname);
taosRemoveFile(fname);
fSet.pSmaF->nRef = 1;
// last
tsdbLastFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pLastF, fname);
taosRemoveFile(fname);
n += tGetDFileSet(pData + n, &fSet);
// fsm
tsdbSmaFileName(pFS->pTsdb, pFrom->diskId, pFrom->fid, pFrom->pSmaF, fname);
taosRemoveFile(fname);
if (taosArrayPush(pTsdb->fs.aDFileSet, &fSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
}
ASSERT(n + sizeof(TSCKSUM) == nData);
return code;
_err:
tsdbError("vgId:%d tsdb apply disk file set change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code));
return code;
}
static int32_t tsdbApplyDelFileChange(STsdbFS *pFS, SDelFile *pFrom, SDelFile *pTo) {
// EXPOSED APIS ====================================================================================
int32_t tsdbFSOpen(STsdb *pTsdb) {
int32_t code = 0;
char fname[TSDB_FILENAME_LEN];
if (pFrom && pTo) {
if (!tsdbDelFileIsSame(pFrom, pTo)) {
tsdbDelFileName(pFS->pTsdb, pFrom, fname);
if (taosRemoveFile(fname) < 0) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
// open handle
pTsdb->fs.pDelFile = NULL;
pTsdb->fs.aDFileSet = taosArrayInit(0, sizeof(SDFileSet));
if (pTsdb->fs.aDFileSet == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
// load fs or keep empty
char fname[TSDB_FILENAME_LEN];
snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP,
pTsdb->path, TD_DIRSEP);
if (!taosCheckExistFile(fname)) {
// empty one
code = tsdbGnrtCurrent(pTsdb, &pTsdb->fs, fname);
if (code) goto _err;
} else {
// read
TdFilePtr pFD = taosOpenFile(fname, TD_FILE_READ);
if (pFD == NULL) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
} else if (pFrom) {
tsdbDelFileName(pFS->pTsdb, pFrom, fname);
if (taosRemoveFile(fname) < 0) {
int64_t size;
if (taosFStatFile(pFD, &size, NULL) < 0) {
code = TAOS_SYSTEM_ERROR(errno);
taosCloseFile(&pFD);
goto _err;
}
} else {
// do nothing
uint8_t *pData = taosMemoryMalloc(size);
if (pData == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
taosCloseFile(&pFD);
goto _err;
}
int64_t n = taosReadFile(pFD, pData, size);
if (n < 0) {
code = TAOS_SYSTEM_ERROR(errno);
taosMemoryFree(pData);
taosCloseFile(&pFD);
goto _err;
}
if (!taosCheckChecksumWhole(pData, size)) {
code = TSDB_CODE_FILE_CORRUPTED;
taosMemoryFree(pData);
taosCloseFile(&pFD);
goto _err;
}
taosCloseFile(&pFD);
// recover fs
code = tsdbRecoverFS(pTsdb, pData, size);
if (code) {
taosMemoryFree(pData);
goto _err;
}
taosMemoryFree(pData);
}
// scan and fix FS
code = tsdbScanAndTryFixFS(pTsdb);
if (code) goto _err;
return code;
_err:
tsdbError("vgId:%d tsdb apply del file change failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code));
tsdbError("vgId:%d tsdb fs open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
return code;
}
static int32_t tsdbFSApplyDiskChange(STsdbFS *pFS, STsdbFSState *pFrom, STsdbFSState *pTo) {
int32_t code = 0;
int32_t iFrom = 0;
int32_t nFrom = taosArrayGetSize(pFrom->aDFileSet);
int32_t iTo = 0;
int32_t nTo = taosArrayGetSize(pTo->aDFileSet);
SDFileSet *pDFileSetFrom;
SDFileSet *pDFileSetTo;
int32_t tsdbFSClose(STsdb *pTsdb) {
int32_t code = 0;
// SDelFile
code = tsdbApplyDelFileChange(pFS, pFrom->pDelFile, pTo->pDelFile);
if (code) goto _err;
if (pTsdb->fs.pDelFile) {
ASSERT(pTsdb->fs.pDelFile->nRef == 1);
taosMemoryFree(pTsdb->fs.pDelFile);
}
// SDFileSet
while (iFrom < nFrom && iTo < nTo) {
pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom);
pDFileSetTo = (SDFileSet *)taosArrayGet(pTo->aDFileSet, iTo);
for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet);
if (pDFileSetFrom->fid == pDFileSetTo->fid) {
code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, pDFileSetTo);
if (code) goto _err;
// head
ASSERT(pSet->pHeadF->nRef == 1);
taosMemoryFree(pSet->pHeadF);
iFrom++;
iTo++;
} else if (pDFileSetFrom->fid < pDFileSetTo->fid) {
code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL);
if (code) goto _err;
// data
ASSERT(pSet->pDataF->nRef == 1);
taosMemoryFree(pSet->pDataF);
iFrom++;
} else {
iTo++;
}
// last
ASSERT(pSet->pLastF->nRef == 1);
taosMemoryFree(pSet->pLastF);
// sma
ASSERT(pSet->pSmaF->nRef == 1);
taosMemoryFree(pSet->pSmaF);
}
while (iFrom < nFrom) {
pDFileSetFrom = (SDFileSet *)taosArrayGet(pFrom->aDFileSet, iFrom);
code = tsdbApplyDFileSetChange(pFS, pDFileSetFrom, NULL);
if (code) goto _err;
taosArrayClear(pTsdb->fs.aDFileSet);
iFrom++;
return code;
}
int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) {
int32_t code = 0;
pFS->pDelFile = NULL;
pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet));
if (pFS->aDFileSet == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
#if 0
// do noting
while (iTo < nTo) {
pDFileSetTo = (SDFileSet *)taosArrayGetP(pTo->aDFileSet, iTo);
code = tsdbApplyDFileSetChange(pFS, NULL, pDFileSetTo);
if (code) goto _err;
if (pTsdb->fs.pDelFile) {
pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile));
if (pFS->pDelFile == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
iTo++;
*pFS->pDelFile = *pTsdb->fs.pDelFile;
}
#endif
for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet);
SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid};
// head
fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile));
if (fSet.pHeadF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
fSet.pHeadF->nRef = 0;
fSet.pHeadF->commitID = pSet->pHeadF->commitID;
fSet.pHeadF->size = pSet->pHeadF->size;
fSet.pHeadF->offset = pSet->pHeadF->offset;
// data
fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile));
if (fSet.pDataF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
fSet.pDataF->nRef = 0;
fSet.pDataF->commitID = pSet->pDataF->commitID;
fSet.pDataF->size = pSet->pDataF->size;
// data
fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile));
if (fSet.pLastF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
fSet.pLastF->nRef = 0;
fSet.pLastF->commitID = pSet->pLastF->commitID;
fSet.pLastF->size = pSet->pLastF->size;
// last
fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile));
if (fSet.pSmaF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
fSet.pSmaF->nRef = 0;
fSet.pSmaF->commitID = pSet->pSmaF->commitID;
fSet.pSmaF->size = pSet->pSmaF->size;
if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
}
_exit:
return code;
}
int32_t tsdbFSRollback(STsdbFS *pFS) {
int32_t code = 0;
ASSERT(0);
return code;
_err:
tsdbError("vgId:%d tsdb fs apply disk change failed sicne %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code));
return code;
}
static void tsdbFSDestroy(STsdbFS *pFS) {
if (pFS) {
if (pFS->nState) {
taosArrayDestroy(pFS->nState->aDFileSet);
taosMemoryFree(pFS->nState);
}
int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile) {
int32_t code = 0;
if (pFS->cState) {
taosArrayDestroy(pFS->cState->aDFileSet);
taosMemoryFree(pFS->cState);
if (pFS->pDelFile == NULL) {
pFS->pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile));
if (pFS->pDelFile == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
taosMemoryFree(pFS);
}
// TODO
*pFS->pDelFile = *pDelFile;
_exit:
return code;
}
static int32_t tsdbFSCreate(STsdb *pTsdb, STsdbFS **ppFS) {
int32_t code = 0;
STsdbFS *pFS = NULL;
int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet) {
int32_t code = 0;
int32_t idx = taosArraySearchIdx(pFS->aDFileSet, pSet, tDFileSetCmprFn, TD_GE);
if (idx < 0) {
idx = taosArrayGetSize(pFS->aDFileSet);
} else {
SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, idx);
int32_t c = tDFileSetCmprFn(pSet, pDFileSet);
if (c == 0) {
*pDFileSet->pHeadF = *pSet->pHeadF;
*pDFileSet->pDataF = *pSet->pDataF;
*pDFileSet->pLastF = *pSet->pLastF;
*pDFileSet->pSmaF = *pSet->pSmaF;
pFS = (STsdbFS *)taosMemoryCalloc(1, sizeof(*pFS));
if (pFS == NULL) {
goto _exit;
}
}
SDFileSet fSet = {.diskId = pSet->diskId, .fid = pSet->fid};
// head
fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile));
if (fSet.pHeadF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
goto _exit;
}
pFS->pTsdb = pTsdb;
*fSet.pHeadF = *pSet->pHeadF;
pFS->cState = (STsdbFSState *)taosMemoryCalloc(1, sizeof(STsdbFSState));
if (pFS->cState == NULL) {
// data
fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile));
if (fSet.pDataF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
goto _exit;
}
pFS->cState->aDFileSet = taosArrayInit(0, sizeof(SDFileSet));
if (pFS->cState->aDFileSet == NULL) {
*fSet.pDataF = *pSet->pDataF;
// data
fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile));
if (fSet.pLastF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
goto _exit;
}
*fSet.pLastF = *pSet->pLastF;
pFS->nState = (STsdbFSState *)taosMemoryCalloc(1, sizeof(STsdbFSState));
if (pFS->nState == NULL) {
// last
fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile));
if (fSet.pSmaF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
goto _exit;
}
pFS->nState->aDFileSet = taosArrayInit(0, sizeof(SDFileSet));
if (pFS->nState->aDFileSet == NULL) {
*fSet.pSmaF = *pSet->pSmaF;
if (taosArrayInsert(pFS->aDFileSet, idx, &fSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
_exit:
return code;
}
int32_t tsdbFSCommit1(STsdb *pTsdb, STsdbFS *pFSNew) {
int32_t code = 0;
char tfname[TSDB_FILENAME_LEN];
char fname[TSDB_FILENAME_LEN];
snprintf(tfname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT.t", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP,
pTsdb->path, TD_DIRSEP);
snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pTsdb->pVnode->pTfs), TD_DIRSEP,
pTsdb->path, TD_DIRSEP);
// gnrt CURRENT.t
code = tsdbGnrtCurrent(pTsdb, pFSNew, tfname);
if (code) goto _err;
// rename
code = taosRenameFile(tfname, fname);
if (code) {
code = TAOS_SYSTEM_ERROR(code);
goto _err;
}
*ppFS = pFS;
return code;
_err:
tsdbError("vgId:%d tsdb fs create failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
tsdbFSDestroy(pFS);
*ppFS = NULL;
tsdbError("vgId:%d tsdb fs commit phase 1 failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
return code;
}
static int32_t tsdbScanAndTryFixFS(STsdbFS *pFS, int8_t deepScan) {
int32_t code = 0;
STsdb *pTsdb = pFS->pTsdb;
STfs *pTfs = pTsdb->pVnode->pTfs;
int64_t size;
char fname[TSDB_FILENAME_LEN];
char pHdr[TSDB_FHDR_SIZE];
TdFilePtr pFD;
int32_t tsdbFSCommit2(STsdb *pTsdb, STsdbFS *pFSNew) {
int32_t code = 0;
int32_t nRef;
char fname[TSDB_FILENAME_LEN];
// SDelFile
if (pFS->cState->pDelFile) {
tsdbDelFileName(pTsdb, pFS->cState->pDelFile, fname);
if (taosStatFile(fname, &size, NULL)) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
// del
if (pFSNew->pDelFile) {
SDelFile *pDelFile = pTsdb->fs.pDelFile;
if (size != pFS->cState->pDelFile->size) {
code = TSDB_CODE_FILE_CORRUPTED;
goto _err;
}
if (pDelFile == NULL || (pDelFile->commitID != pFSNew->pDelFile->commitID)) {
pTsdb->fs.pDelFile = (SDelFile *)taosMemoryMalloc(sizeof(SDelFile));
if (pTsdb->fs.pDelFile == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
if (deepScan) {
// TODO
*pTsdb->fs.pDelFile = *pFSNew->pDelFile;
pTsdb->fs.pDelFile->nRef = 1;
if (pDelFile) {
nRef = atomic_sub_fetch_32(&pDelFile->nRef, 1);
if (nRef == 0) {
tsdbDelFileName(pTsdb, pDelFile, fname);
taosRemoveFile(fname);
taosMemoryFree(pDelFile);
}
}
}
} else {
ASSERT(pTsdb->fs.pDelFile == NULL);
}
// SArray<SDFileSet>
for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->cState->aDFileSet); iSet++) {
SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->cState->aDFileSet, iSet);
// data
int32_t iOld = 0;
int32_t iNew = 0;
while (true) {
int32_t nOld = taosArrayGetSize(pTsdb->fs.aDFileSet);
int32_t nNew = taosArrayGetSize(pFSNew->aDFileSet);
SDFileSet fSet;
int8_t sameDisk;
if (iOld >= nOld && iNew >= nNew) break;
SDFileSet *pSetOld = (iOld < nOld) ? taosArrayGet(pTsdb->fs.aDFileSet, iOld) : NULL;
SDFileSet *pSetNew = (iNew < nNew) ? taosArrayGet(pFSNew->aDFileSet, iNew) : NULL;
if (pSetOld && pSetNew) {
if (pSetOld->fid == pSetNew->fid) {
goto _merge_old_and_new;
} else if (pSetOld->fid < pSetNew->fid) {
goto _remove_old;
} else {
goto _add_new;
}
} else if (pSetOld) {
goto _remove_old;
} else {
goto _add_new;
}
// head =========
tsdbHeadFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pHeadF, fname);
if (taosStatFile(fname, &size, NULL)) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
_merge_old_and_new:
sameDisk = ((pSetOld->diskId.level == pSetNew->diskId.level) && (pSetOld->diskId.id == pSetNew->diskId.id));
// head
fSet.pHeadF = pSetOld->pHeadF;
if ((!sameDisk) || (pSetOld->pHeadF->commitID != pSetNew->pHeadF->commitID)) {
pSetOld->pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile));
if (pSetOld->pHeadF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
*pSetOld->pHeadF = *pSetNew->pHeadF;
pSetOld->pHeadF->nRef = 1;
nRef = atomic_sub_fetch_32(&fSet.pHeadF->nRef, 1);
if (nRef == 0) {
tsdbHeadFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pHeadF, fname);
taosRemoveFile(fname);
taosMemoryFree(fSet.pHeadF);
}
} else {
ASSERT(fSet.pHeadF->size == pSetNew->pHeadF->size);
ASSERT(fSet.pHeadF->offset == pSetNew->pHeadF->offset);
}
if (deepScan) {
// TODO
// data
fSet.pDataF = pSetOld->pDataF;
if ((!sameDisk) || (pSetOld->pDataF->commitID != pSetNew->pDataF->commitID)) {
pSetOld->pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile));
if (pSetOld->pDataF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
*pSetOld->pDataF = *pSetNew->pDataF;
pSetOld->pDataF->nRef = 1;
nRef = atomic_sub_fetch_32(&fSet.pDataF->nRef, 1);
if (nRef == 0) {
tsdbDataFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pDataF, fname);
taosRemoveFile(fname);
taosMemoryFree(fSet.pDataF);
}
} else {
ASSERT(pSetOld->pDataF->size <= pSetNew->pDataF->size);
pSetOld->pDataF->size = pSetNew->pDataF->size;
}
// data =========
tsdbDataFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pDataF, fname);
if (taosStatFile(fname, &size, NULL)) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
// last
fSet.pLastF = pSetOld->pLastF;
if ((!sameDisk) || (pSetOld->pLastF->commitID != pSetNew->pLastF->commitID)) {
pSetOld->pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile));
if (pSetOld->pLastF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
*pSetOld->pLastF = *pSetNew->pLastF;
pSetOld->pLastF->nRef = 1;
nRef = atomic_sub_fetch_32(&fSet.pLastF->nRef, 1);
if (nRef == 0) {
tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pLastF, fname);
taosRemoveFile(fname);
taosMemoryFree(fSet.pLastF);
}
} else {
ASSERT(pSetOld->pLastF->size == pSetNew->pLastF->size);
}
if (size < pDFileSet->pDataF->size) {
code = TSDB_CODE_FILE_CORRUPTED;
goto _err;
} else if (size > pDFileSet->pDataF->size) {
ASSERT(0);
// need to rollback the file
// sma
fSet.pSmaF = pSetOld->pSmaF;
if ((!sameDisk) || (pSetOld->pSmaF->commitID != pSetNew->pSmaF->commitID)) {
pSetOld->pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile));
if (pSetOld->pSmaF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
*pSetOld->pSmaF = *pSetNew->pSmaF;
pSetOld->pSmaF->nRef = 1;
nRef = atomic_sub_fetch_32(&fSet.pSmaF->nRef, 1);
if (nRef == 0) {
tsdbSmaFileName(pTsdb, pSetOld->diskId, pSetOld->fid, fSet.pSmaF, fname);
taosRemoveFile(fname);
taosMemoryFree(fSet.pSmaF);
}
} else {
ASSERT(pSetOld->pSmaF->size <= pSetNew->pSmaF->size);
pSetOld->pSmaF->size = pSetNew->pSmaF->size;
}
if (deepScan) {
// TODO
if (!sameDisk) {
pSetOld->diskId = pSetNew->diskId;
}
// last ===========
tsdbLastFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pLastF, fname);
if (taosStatFile(fname, &size, NULL)) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
iOld++;
iNew++;
continue;
_remove_old:
nRef = atomic_sub_fetch_32(&pSetOld->pHeadF->nRef, 1);
if (nRef == 0) {
tsdbHeadFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pHeadF, fname);
taosRemoveFile(fname);
taosMemoryFree(pSetOld->pHeadF);
}
if (size < pDFileSet->pLastF->size) {
code = TSDB_CODE_FILE_CORRUPTED;
goto _err;
} else if (size > pDFileSet->pLastF->size) {
ASSERT(0);
// need to rollback the file
nRef = atomic_sub_fetch_32(&pSetOld->pDataF->nRef, 1);
if (nRef == 0) {
tsdbDataFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pDataF, fname);
taosRemoveFile(fname);
taosMemoryFree(pSetOld->pDataF);
}
if (deepScan) {
// TODO
nRef = atomic_sub_fetch_32(&pSetOld->pLastF->nRef, 1);
if (nRef == 0) {
tsdbLastFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pLastF, fname);
taosRemoveFile(fname);
taosMemoryFree(pSetOld->pLastF);
}
// sma =============
tsdbSmaFileName(pTsdb, pDFileSet->diskId, pDFileSet->fid, pDFileSet->pSmaF, fname);
if (taosStatFile(fname, &size, NULL)) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
nRef = atomic_sub_fetch_32(&pSetOld->pSmaF->nRef, 1);
if (nRef == 0) {
tsdbSmaFileName(pTsdb, pSetOld->diskId, pSetOld->fid, pSetOld->pSmaF, fname);
taosRemoveFile(fname);
taosMemoryFree(pSetOld->pSmaF);
}
if (size < pDFileSet->pSmaF->size) {
code = TSDB_CODE_FILE_CORRUPTED;
taosArrayRemove(pTsdb->fs.aDFileSet, iOld);
continue;
_add_new:
fSet.diskId = pSetNew->diskId;
fSet.fid = pSetNew->fid;
// head
fSet.pHeadF = (SHeadFile *)taosMemoryMalloc(sizeof(SHeadFile));
if (fSet.pHeadF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
} else if (size > pDFileSet->pSmaF->size) {
ASSERT(0);
// need to rollback the file
}
*fSet.pHeadF = *pSetNew->pHeadF;
fSet.pHeadF->nRef = 1;
if (deepScan) {
// TODO
// data
fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile));
if (fSet.pDataF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
}
*fSet.pDataF = *pSetNew->pDataF;
fSet.pDataF->nRef = 1;
// remove those invalid files (todo)
#if 0
STfsDir *tdir;
const STfsFile *pf;
// last
fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile));
if (fSet.pLastF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
*fSet.pLastF = *pSetNew->pLastF;
fSet.pLastF->nRef = 1;
tdir = tfsOpendir(pTfs, pTsdb->path);
if (tdir == NULL) {
code = TAOS_SYSTEM_ERROR(errno);
goto _err;
}
// sma
fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile));
if (fSet.pSmaF == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
*fSet.pSmaF = *pSetNew->pSmaF;
fSet.pSmaF->nRef = 1;
while ((pf = tfsReaddir(tdir))) {
tfsBasename(pf, fname);
if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
iOld++;
iNew++;
continue;
}
tfsClosedir(tdir);
#endif
return code;
_err:
tsdbError("vgId:%d tsdb scan and try fix fs failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
tsdbError("vgId:%d tsdb fs commit phase 2 failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
return code;
}
static int32_t tDFileSetCmprFn(const void *p1, const void *p2) {
if (((SDFileSet *)p1)->fid < ((SDFileSet *)p2)->fid) {
return -1;
} else if (((SDFileSet *)p1)->fid > ((SDFileSet *)p2)->fid) {
return 1;
}
return 0;
}
// EXPOSED APIS ====================================================================================
int32_t tsdbFSOpen(STsdb *pTsdb, STsdbFS **ppFS) {
int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS) {
int32_t code = 0;
int32_t nRef;
// create handle
code = tsdbFSCreate(pTsdb, ppFS);
if (code) goto _err;
// load current state
code = tsdbLoadCurrentState(*ppFS, (*ppFS)->cState);
if (code) {
tsdbFSDestroy(*ppFS);
goto _err;
pFS->aDFileSet = taosArrayInit(taosArrayGetSize(pTsdb->fs.aDFileSet), sizeof(SDFileSet));
if (pFS->aDFileSet == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
// scan and fix FS
code = tsdbScanAndTryFixFS(*ppFS, 0);
if (code) {
tsdbFSDestroy(*ppFS);
goto _err;
pFS->pDelFile = pTsdb->fs.pDelFile;
if (pFS->pDelFile) {
nRef = atomic_fetch_add_32(&pFS->pDelFile->nRef, 1);
ASSERT(nRef > 0);
}
return code;
_err:
*ppFS = NULL;
tsdbError("vgId:%d tsdb fs open failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
return code;
}
SDFileSet fSet;
for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet);
fSet = *pSet;
int32_t tsdbFSClose(STsdbFS *pFS) {
int32_t code = 0;
tsdbFSDestroy(pFS);
return code;
}
nRef = atomic_fetch_add_32(&pSet->pHeadF->nRef, 1);
ASSERT(nRef > 0);
int32_t tsdbFSBegin(STsdbFS *pFS) {
int32_t code = 0;
nRef = atomic_fetch_add_32(&pSet->pDataF->nRef, 1);
ASSERT(nRef > 0);
// SDelFile
pFS->nState->pDelFile = NULL;
if (pFS->cState->pDelFile) {
pFS->nState->delFile = pFS->cState->delFile;
pFS->nState->pDelFile = &pFS->nState->delFile;
}
nRef = atomic_fetch_add_32(&pSet->pLastF->nRef, 1);
ASSERT(nRef > 0);
// SArray<aDFileSet>
taosArrayClear(pFS->nState->aDFileSet);
for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->cState->aDFileSet); iSet++) {
SDFileSet *pDFileSet = (SDFileSet *)taosArrayGet(pFS->cState->aDFileSet, iSet);
nRef = atomic_fetch_add_32(&pSet->pSmaF->nRef, 1);
ASSERT(nRef > 0);
if (taosArrayPush(pFS->nState->aDFileSet, pDFileSet) == NULL) {
if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
goto _exit;
}
}
return code;
_err:
tsdbError("vgId:%d tsdb fs begin failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code));
_exit:
return code;
}
int32_t tsdbFSCommit(STsdbFS *pFS) {
int32_t code = 0;
STsdbFSState *pState = pFS->nState;
char tfname[TSDB_FILENAME_LEN];
char fname[TSDB_FILENAME_LEN];
// need lock (todo)
pFS->nState = pFS->cState;
pFS->cState = pState;
snprintf(tfname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT.t", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP,
pFS->pTsdb->path, TD_DIRSEP);
snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sCURRENT", tfsGetPrimaryPath(pFS->pTsdb->pVnode->pTfs), TD_DIRSEP,
pFS->pTsdb->path, TD_DIRSEP);
// gnrt CURRENT.t
code = tsdbGnrtCurrent(tfname, pFS->cState);
if (code) goto _err;
void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS) {
int32_t nRef;
char fname[TSDB_FILENAME_LEN];
// rename
code = taosRenameFile(tfname, fname);
if (code) {
code = TAOS_SYSTEM_ERROR(code);
goto _err;
if (pFS->pDelFile) {
nRef = atomic_sub_fetch_32(&pFS->pDelFile->nRef, 1);
ASSERT(nRef >= 0);
if (nRef == 0) {
tsdbDelFileName(pTsdb, pFS->pDelFile, fname);
taosRemoveFile(fname);
taosMemoryFree(pFS->pDelFile);
}
}
// apply commit on disk
code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState);
if (code) goto _err;
for (int32_t iSet = 0; iSet < taosArrayGetSize(pFS->aDFileSet); iSet++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(pFS->aDFileSet, iSet);
return code;
_err:
tsdbError("vgId:%d tsdb fs commit failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code));
return code;
}
int32_t tsdbFSRollback(STsdbFS *pFS) {
int32_t code = 0;
code = tsdbFSApplyDiskChange(pFS, pFS->nState, pFS->cState);
if (code) goto _err;
return code;
_err:
tsdbError("vgId:%d tsdb fs rollback failed since %s", TD_VID(pFS->pTsdb->pVnode), tstrerror(code));
return code;
}
int32_t tsdbFSStateUpsertDelFile(STsdbFSState *pState, SDelFile *pDelFile) {
int32_t code = 0;
pState->delFile = *pDelFile;
pState->pDelFile = &pState->delFile;
return code;
}
// head
nRef = atomic_sub_fetch_32(&pSet->pHeadF->nRef, 1);
ASSERT(nRef >= 0);
if (nRef == 0) {
tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname);
taosRemoveFile(fname);
taosMemoryFree(pSet->pHeadF);
}
int32_t tsdbFSStateUpsertDFileSet(STsdbFSState *pState, SDFileSet *pSet) {
int32_t code = 0;
int32_t idx = taosArraySearchIdx(pState->aDFileSet, pSet, tDFileSetCmprFn, TD_GE);
// data
nRef = atomic_sub_fetch_32(&pSet->pDataF->nRef, 1);
ASSERT(nRef >= 0);
if (nRef == 0) {
tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname);
taosRemoveFile(fname);
taosMemoryFree(pSet->pDataF);
}
if (idx < 0) {
if (taosArrayPush(pState->aDFileSet, pSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
// last
nRef = atomic_sub_fetch_32(&pSet->pLastF->nRef, 1);
ASSERT(nRef >= 0);
if (nRef == 0) {
tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname);
taosRemoveFile(fname);
taosMemoryFree(pSet->pLastF);
}
} else {
SDFileSet *tDFileSet = (SDFileSet *)taosArrayGet(pState->aDFileSet, idx);
int32_t c = tDFileSetCmprFn(pSet, tDFileSet);
if (c == 0) {
taosArraySet(pState->aDFileSet, idx, pSet);
} else {
if (taosArrayInsert(pState->aDFileSet, idx, pSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
// sma
nRef = atomic_sub_fetch_32(&pSet->pSmaF->nRef, 1);
ASSERT(nRef >= 0);
if (nRef == 0) {
tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname);
taosRemoveFile(fname);
taosMemoryFree(pSet->pSmaF);
}
}
_exit:
return code;
}
void tsdbFSStateDeleteDFileSet(STsdbFSState *pState, int32_t fid) {
int32_t idx;
idx = taosArraySearchIdx(pState->aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ);
ASSERT(idx >= 0);
taosArrayRemove(pState->aDFileSet, idx);
}
SDelFile *tsdbFSStateGetDelFile(STsdbFSState *pState) { return pState->pDelFile; }
SDFileSet *tsdbFSStateGetDFileSet(STsdbFSState *pState, int32_t fid, int32_t flag) {
return (SDFileSet *)taosArraySearch(pState->aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, flag);
}
taosArrayDestroy(pFS->aDFileSet);
}
\ No newline at end of file
......@@ -122,21 +122,11 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) {
// truncate
switch (ftype) {
case TSDB_HEAD_FILE:
size = pSet->pHeadF->size;
tsdbHeadFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pHeadF, fname);
tPutHeadFile(hdr, pSet->pHeadF);
break;
case TSDB_DATA_FILE:
size = pSet->pDataF->size;
tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname);
tPutDataFile(hdr, pSet->pDataF);
break;
case TSDB_LAST_FILE:
size = pSet->pLastF->size;
tsdbLastFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pLastF, fname);
tPutLastFile(hdr, pSet->pLastF);
break;
case TSDB_SMA_FILE:
size = pSet->pSmaF->size;
tsdbSmaFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pSmaF, fname);
......@@ -186,6 +176,7 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) {
return code;
_err:
tsdbError("vgId:%d tsdb rollback file failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
return code;
}
......@@ -219,10 +210,8 @@ int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet) {
// SDelFile ===============================================
void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]) {
STfs *pTfs = pTsdb->pVnode->pTfs;
snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%dver%" PRId64 "%s", tfsGetPrimaryPath(pTfs), TD_DIRSEP, pTsdb->path,
TD_DIRSEP, TD_VID(pTsdb->pVnode), pFile->commitID, ".del");
snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%dver%" PRId64 "%s", tfsGetPrimaryPath(pTsdb->pVnode->pTfs),
TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), pFile->commitID, ".del");
}
int32_t tPutDelFile(uint8_t *p, SDelFile *pDelFile) {
......
......@@ -605,48 +605,3 @@ void tsdbUnrefMemTable(SMemTable *pMemTable) {
tsdbMemTableDestroy(pMemTable);
}
}
int32_t tsdbTakeMemSnapshot(STsdb *pTsdb, SMemTable **ppMem, SMemTable **ppIMem) {
ASSERT(0);
int32_t code = 0;
// lock
code = taosThreadRwlockRdlock(&pTsdb->rwLock);
if (code) {
code = TAOS_SYSTEM_ERROR(code);
goto _exit;
}
// take snapshot
*ppMem = pTsdb->mem;
*ppIMem = pTsdb->imem;
if (*ppMem) {
tsdbRefMemTable(*ppMem);
}
if (*ppIMem) {
tsdbRefMemTable(*ppIMem);
}
// unlock
code = taosThreadRwlockUnlock(&pTsdb->rwLock);
if (code) {
code = TAOS_SYSTEM_ERROR(code);
goto _exit;
}
_exit:
return code;
}
void tsdbUntakeMemSnapshot(STsdb *pTsdb, SMemTable *pMem, SMemTable *pIMem) {
ASSERT(0);
if (pMem) {
tsdbUnrefMemTable(pMem);
}
if (pIMem) {
tsdbUnrefMemTable(pIMem);
}
}
\ No newline at end of file
......@@ -66,7 +66,7 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee
tfsMkdir(pVnode->pTfs, pTsdb->path);
// open tsdb
if (tsdbFSOpen(pTsdb, &pTsdb->pFS) < 0) {
if (tsdbFSOpen(pTsdb) < 0) {
goto _err;
}
......@@ -88,7 +88,7 @@ _err:
int tsdbClose(STsdb **pTsdb) {
if (*pTsdb) {
taosThreadRwlockDestroy(&(*pTsdb)->rwLock);
tsdbFSClose((*pTsdb)->pFS);
tsdbFSClose(*pTsdb);
tsdbCloseCache((*pTsdb)->lruCache);
taosMemoryFreeClear(*pTsdb);
}
......
......@@ -118,8 +118,7 @@ struct STsdbReader {
char* idStr; // query info handle, for debug purpose
int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows
SBlockLoadSuppInfo suppInfo;
SMemTable* pMem;
SMemTable* pIMem;
STsdbReadSnap* pReadSnap;
SIOCostSummary cost;
STSchema* pSchema;
......@@ -275,12 +274,12 @@ static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* cap
}
// init file iterator
static int32_t initFilesetIterator(SFilesetIter* pIter, const STsdbFSState* pFState, int32_t order, const char* idstr) {
size_t numOfFileset = taosArrayGetSize(pFState->aDFileSet);
static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, int32_t order, const char* idstr) {
size_t numOfFileset = taosArrayGetSize(aDFileSet);
pIter->index = ASCENDING_TRAVERSE(order) ? -1 : numOfFileset;
pIter->order = order;
pIter->pFileList = taosArrayDup(pFState->aDFileSet);
pIter->pFileList = aDFileSet;
pIter->numOfFiles = numOfFileset;
tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, idstr);
......@@ -1881,8 +1880,8 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea
int32_t backward = (!ASCENDING_TRAVERSE(pReader->order));
STbData* d = NULL;
if (pReader->pMem != NULL) {
tsdbGetTbDataFromMemTable(pReader->pMem, pReader->suid, pBlockScanInfo->uid, &d);
if (pReader->pReadSnap->pMem != NULL) {
tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid, &d);
if (d != NULL) {
code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter);
if (code == TSDB_CODE_SUCCESS) {
......@@ -1902,8 +1901,8 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea
}
STbData* di = NULL;
if (pReader->pIMem != NULL) {
tsdbGetTbDataFromMemTable(pReader->pIMem, pReader->suid, pBlockScanInfo->uid, &di);
if (pReader->pReadSnap->pIMem != NULL) {
tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid, &di);
if (di != NULL) {
code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter);
if (code == TSDB_CODE_SUCCESS) {
......@@ -1939,7 +1938,7 @@ int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader*
SArray* pDelData = taosArrayInit(4, sizeof(SDelData));
SDelFile* pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->cState);
SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile;
if (pDelFile) {
SDelFReader* pDelFReader = NULL;
code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL);
......@@ -2830,8 +2829,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl
SDataBlockIter* pBlockIter = &pReader->status.blockIter;
STsdbFSState* pFState = pReader->pTsdb->pFS->cState;
initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr);
initFilesetIterator(&pReader->status.fileIter, (*ppReader)->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr);
resetDataBlockIterator(&pReader->status.blockIter, pReader->order);
// no data in files, let's try buffer in memory
......@@ -2844,7 +2842,8 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl
}
}
tsdbTakeMemSnapshot(pReader->pTsdb, &pReader->pMem, &pReader->pIMem);
code = tsdbTakeReadSnap(pVnode->pTsdb, &pReader->pReadSnap);
if (code) goto _err;
tsdbDebug("%p total numOfTable:%d in this query %s", pReader, numOfTables, pReader->idStr);
return code;
......@@ -2861,7 +2860,7 @@ void tsdbReaderClose(STsdbReader* pReader) {
SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo;
tsdbUntakeMemSnapshot(pReader->pTsdb, pReader->pMem, pReader->pIMem);
tsdbUntakeReadSnap(pReader->pTsdb, pReader->pReadSnap);
taosMemoryFreeClear(pSupInfo->plist);
taosMemoryFree(pSupInfo->colIds);
......@@ -3081,8 +3080,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) {
tsdbDataFReaderClose(&pReader->pFileReader);
STsdbFSState* pFState = pReader->pTsdb->pFS->cState;
initFilesetIterator(&pReader->status.fileIter, pFState, pReader->order, pReader->idStr);
initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr);
resetDataBlockIterator(&pReader->status.blockIter, pReader->order);
resetDataBlockScanInfo(pReader->status.pTableMap);
......@@ -3275,6 +3273,11 @@ int32_t tsdbTakeReadSnap(STsdb* pTsdb, STsdbReadSnap** ppSnap) {
}
// fs (todo)
code = tsdbFSRef(pTsdb, &(*ppSnap)->fs);
if (code) {
taosThreadRwlockUnlock(&pTsdb->rwLock);
goto _exit;
}
// unlock
code = taosThreadRwlockUnlock(&pTsdb->rwLock);
......@@ -3297,6 +3300,6 @@ void tsdbUntakeReadSnap(STsdb* pTsdb, STsdbReadSnap* pSnap) {
tsdbUnrefMemTable(pSnap->pIMem);
}
// fs (todo)
tsdbFSUnref(pTsdb, &pSnap->fs);
}
}
......@@ -16,7 +16,8 @@
#include "tsdb.h"
static int32_t tsdbDoRetentionImpl(STsdb *pTsdb, int64_t now, int8_t try, int8_t *canDo) {
int32_t code = 0;
int32_t code = 0;
#if 0
STsdbFSState *pState;
if (try) {
......@@ -64,18 +65,20 @@ static int32_t tsdbDoRetentionImpl(STsdb *pTsdb, int64_t now, int8_t try, int8_t
code = tsdbDFileSetCopy(pTsdb, pDFileSet, &nDFileSet);
if (code) goto _exit;
code = tsdbFSStateUpsertDFileSet(pState, &nDFileSet);
code = tsdbFSUpsertFSet(pState, &nDFileSet);
if (code) goto _exit;
}
}
}
#endif
_exit:
return code;
}
int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) {
int32_t code = 0;
#if 0
int8_t canDo;
// try
......@@ -100,5 +103,6 @@ _exit:
_err:
tsdbError("vgId:%d tsdb do retention failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code));
tsdbFSRollback(pTsdb->pFS);
#endif
return code;
}
\ No newline at end of file
......@@ -20,6 +20,7 @@ struct STsdbSnapReader {
STsdb* pTsdb;
int64_t sver;
int64_t ever;
STsdbFS fs;
// for data file
int8_t dataDone;
int32_t fid;
......@@ -45,7 +46,8 @@ static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) {
while (true) {
if (pReader->pDataFReader == NULL) {
SDFileSet* pSet = tsdbFSStateGetDFileSet(pTsdb->pFS->cState, pReader->fid, TD_GT);
SDFileSet* pSet =
taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT);
if (pSet == NULL) goto _exit;
......@@ -159,7 +161,7 @@ _err:
static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) {
int32_t code = 0;
STsdb* pTsdb = pReader->pTsdb;
SDelFile* pDelFile = pTsdb->pFS->cState->pDelFile;
SDelFile* pDelFile = pReader->fs.pDelFile;
if (pReader->pDelFReader == NULL) {
if (pDelFile == NULL) {
......@@ -254,6 +256,24 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapRe
pReader->sver = sver;
pReader->ever = ever;
code = taosThreadRwlockRdlock(&pTsdb->rwLock);
if (code) {
code = TAOS_SYSTEM_ERROR(code);
goto _err;
}
code = tsdbFSRef(pTsdb, &pReader->fs);
if (code) {
taosThreadRwlockUnlock(&pTsdb->rwLock);
goto _err;
}
code = taosThreadRwlockUnlock(&pTsdb->rwLock);
if (code) {
code = TAOS_SYSTEM_ERROR(code);
goto _err;
}
pReader->fid = INT32_MIN;
pReader->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx));
if (pReader->aBlockIdx == NULL) {
......@@ -305,6 +325,8 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) {
taosArrayDestroy(pReader->aDelIdx);
taosArrayDestroy(pReader->aDelData);
tsdbFSUnref(pReader->pTsdb, &pReader->fs);
tsdbInfo("vgId:%d vnode snapshot tsdb reader closed", TD_VID(pReader->pTsdb->pVnode));
taosMemoryFree(pReader);
......@@ -358,6 +380,7 @@ struct STsdbSnapWriter {
STsdb* pTsdb;
int64_t sver;
int64_t ever;
STsdbFS fs;
// config
int32_t minutes;
......@@ -798,7 +821,7 @@ static int32_t tsdbSnapWriteDataEnd(STsdbSnapWriter* pWriter) {
code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdxW, NULL);
if (code) goto _err;
code = tsdbFSStateUpsertDFileSet(pTsdb->pFS->nState, &pWriter->pDataFWriter->wSet);
code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet);
if (code) goto _err;
code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1);
......@@ -843,7 +866,7 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3
pWriter->fid = fid;
// read
SDFileSet* pSet = tsdbFSStateGetDFileSet(pTsdb->pFS->nState, fid, TD_EQ);
SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ);
if (pSet) {
code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet);
if (code) goto _err;
......@@ -911,7 +934,7 @@ static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32
STsdb* pTsdb = pWriter->pTsdb;
if (pWriter->pDelFWriter == NULL) {
SDelFile* pDelFile = tsdbFSStateGetDelFile(pTsdb->pFS->nState);
SDelFile* pDelFile = pWriter->fs.pDelFile;
// reader
if (pDelFile) {
......@@ -1021,7 +1044,7 @@ static int32_t tsdbSnapWriteDelEnd(STsdbSnapWriter* pWriter) {
code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter);
if (code) goto _err;
code = tsdbFSStateUpsertDelFile(pTsdb->pFS->nState, &pWriter->pDelFWriter->fDel);
code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel);
if (code) goto _err;
code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1);
......@@ -1055,6 +1078,9 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr
pWriter->sver = sver;
pWriter->ever = ever;
code = tsdbFSCopy(pTsdb, &pWriter->fs);
if (code) goto _err;
// config
pWriter->minutes = pTsdb->keepCfg.days;
pWriter->precision = pTsdb->keepCfg.precision;
......@@ -1100,9 +1126,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr
goto _err;
}
code = tsdbFSBegin(pTsdb->pFS);
if (code) goto _err;
*ppWriter = pWriter;
return code;
......@@ -1117,8 +1140,9 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) {
STsdbSnapWriter* pWriter = *ppWriter;
if (rollback) {
code = tsdbFSRollback(pWriter->pTsdb->pFS);
if (code) goto _err;
ASSERT(0);
// code = tsdbFSRollback(pWriter->pTsdb->pFS);
// if (code) goto _err;
} else {
code = tsdbSnapWriteDataEnd(pWriter);
if (code) goto _err;
......@@ -1126,7 +1150,10 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) {
code = tsdbSnapWriteDelEnd(pWriter);
if (code) goto _err;
code = tsdbFSCommit(pWriter->pTsdb->pFS);
code = tsdbFSCommit1(pWriter->pTsdb, &pWriter->fs);
if (code) goto _err;
code = tsdbFSCommit2(pWriter->pTsdb, &pWriter->fs);
if (code) goto _err;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册