未验证 提交 c4500174 编写于 作者: S Shengliang Guan 提交者: GitHub

Merge pull request #18674 from taosdata/feat/async_commit

feat: async commit
......@@ -35,7 +35,7 @@ typedef struct STdbState {
TTB* pFillStateDb; // todo refactor
TTB* pSessionStateDb;
TTB* pParNameDb;
TXN txn;
TXN* txn;
} STdbState;
// incremental state storage
......
......@@ -620,7 +620,7 @@ typedef struct SStreamMeta {
SHashObj* pTasks;
SHashObj* pRecoverStatus;
void* ahandle;
TXN txn;
TXN* txn;
FTaskExpand* expandFunc;
int32_t vgId;
SRWLatch lock;
......
......@@ -88,6 +88,7 @@ int32_t taosFsyncFile(TdFilePtr pFile);
int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count);
int64_t taosPReadFile(TdFilePtr pFile, void *buf, int64_t count, int64_t offset);
int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count);
int64_t taosPWriteFile(TdFilePtr pFile, const void *buf, int64_t count, int64_t offset);
void taosFprintfFile(TdFilePtr pFile, const char *format, ...);
int64_t taosGetLineFile(TdFilePtr pFile, char **__restrict ptrBuf);
......
......@@ -70,7 +70,7 @@ int32_t metaCacheDrop(SMeta* pMeta, int64_t uid);
int32_t metaStatsCacheUpsert(SMeta* pMeta, SMetaStbStats* pInfo);
int32_t metaStatsCacheDrop(SMeta* pMeta, int64_t uid);
int32_t metaStatsCacheGet(SMeta* pMeta, int64_t uid, SMetaStbStats* pInfo);
void metaUpdateStbStats(SMeta *pMeta, int64_t uid, int64_t delta);
void metaUpdateStbStats(SMeta* pMeta, int64_t uid, int64_t delta);
int32_t metaUidFilterCacheGet(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, LRUHandle** pHandle);
struct SMeta {
......@@ -79,7 +79,7 @@ struct SMeta {
char* path;
SVnode* pVnode;
TDB* pEnv;
TXN txn;
TXN* txn;
TTB* pTbDb;
TTB* pSkmDb;
TTB* pUidIdx;
......
......@@ -77,7 +77,7 @@ void vnodeBufPoolReset(SVBufPool* pPool);
// vnodeQuery.c
int32_t vnodeQueryOpen(SVnode* pVnode);
void vnodeQueryPreClose(SVnode *pVnode);
void vnodeQueryPreClose(SVnode* pVnode);
void vnodeQueryClose(SVnode* pVnode);
int32_t vnodeGetTableMeta(SVnode* pVnode, SRpcMsg* pMsg, bool direct);
int vnodeGetTableCfg(SVnode* pVnode, SRpcMsg* pMsg, bool direct);
......@@ -86,7 +86,6 @@ int32_t vnodeGetBatchMeta(SVnode* pVnode, SRpcMsg* pMsg);
// vnodeCommit.c
int32_t vnodeBegin(SVnode* pVnode);
int32_t vnodeShouldCommit(SVnode* pVnode);
int32_t vnodeCommit(SVnode* pVnode);
void vnodeRollback(SVnode* pVnode);
int32_t vnodeSaveInfo(const char* dir, const SVnodeInfo* pCfg);
int32_t vnodeCommitInfo(const char* dir, const SVnodeInfo* pInfo);
......
......@@ -75,6 +75,7 @@ typedef struct SStreamStateWriter SStreamStateWriter;
typedef struct SRSmaSnapReader SRSmaSnapReader;
typedef struct SRSmaSnapWriter SRSmaSnapWriter;
typedef struct SSnapDataHdr SSnapDataHdr;
typedef struct SCommitInfo SCommitInfo;
#define VNODE_META_DIR "meta"
#define VNODE_TSDB_DIR "tsdb"
......@@ -100,8 +101,9 @@ typedef struct STbUidStore STbUidStore;
int metaOpen(SVnode* pVnode, SMeta** ppMeta, int8_t rollback);
int metaClose(SMeta* pMeta);
int metaBegin(SMeta* pMeta, int8_t fromSys);
int metaCommit(SMeta* pMeta);
int metaFinishCommit(SMeta* pMeta);
TXN* metaGetTxn(SMeta* pMeta);
int metaCommit(SMeta* pMeta, TXN* txn);
int metaFinishCommit(SMeta* pMeta, TXN* txn);
int metaPrepareAsyncCommit(SMeta* pMeta);
int metaCreateSTable(SMeta* pMeta, int64_t version, SVCreateStbReq* pReq);
int metaAlterSTable(SMeta* pMeta, int64_t version, SVCreateStbReq* pReq);
......@@ -146,7 +148,8 @@ int32_t metaGetInfo(SMeta* pMeta, int64_t uid, SMetaInfo* pInfo, SMetaReader* pR
int tsdbOpen(SVnode* pVnode, STsdb** ppTsdb, const char* dir, STsdbKeepCfg* pKeepCfg, int8_t rollback);
int tsdbClose(STsdb** pTsdb);
int32_t tsdbBegin(STsdb* pTsdb);
int32_t tsdbCommit(STsdb* pTsdb);
int32_t tsdbPrepareCommit(STsdb* pTsdb);
int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo);
int32_t tsdbFinishCommit(STsdb* pTsdb);
int32_t tsdbRollbackCommit(STsdb* pTsdb);
int32_t tsdbDoRetention(STsdb* pTsdb, int64_t now);
......@@ -203,8 +206,8 @@ int32_t smaBegin(SSma* pSma);
int32_t smaSyncPreCommit(SSma* pSma);
int32_t smaSyncCommit(SSma* pSma);
int32_t smaSyncPostCommit(SSma* pSma);
int32_t smaPreCommit(SSma* pSma);
int32_t smaCommit(SSma* pSma);
int32_t smaPrepareAsyncCommit(SSma* pSma);
int32_t smaCommit(SSma* pSma, SCommitInfo* pInfo);
int32_t smaFinishCommit(SSma* pSma);
int32_t smaPostCommit(SSma* pSma);
int32_t smaDoRetention(SSma* pSma, int64_t now);
......@@ -406,6 +409,12 @@ struct SSnapDataHdr {
uint8_t data[];
};
struct SCommitInfo {
SVnodeInfo info;
SVnode* pVnode;
TXN* txn;
};
#ifdef __cplusplus
}
#endif
......
......@@ -20,12 +20,19 @@ static FORCE_INLINE void metaFree(void *pPool, void *p) { vnodeBufPoolFree((SVB
// begin a meta txn
int metaBegin(SMeta *pMeta, int8_t fromSys) {
void *(*xMalloc)(void *, size_t);
void (*xFree)(void *, void *);
void *xArg = NULL;
if (fromSys) {
tdbTxnOpen(&pMeta->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
xMalloc = tdbDefaultMalloc;
xFree = tdbDefaultFree;
} else {
tdbTxnOpen(&pMeta->txn, 0, metaMalloc, metaFree, pMeta->pVnode->inUse, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
xMalloc = metaMalloc;
xFree = metaFree;
xArg = pMeta->pVnode->inUse;
}
if (tdbBegin(pMeta->pEnv, &pMeta->txn) < 0) {
if (tdbBegin(pMeta->pEnv, &pMeta->txn, xMalloc, xFree, xArg, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1;
}
......@@ -33,9 +40,10 @@ int metaBegin(SMeta *pMeta, int8_t fromSys) {
}
// commit the meta txn
int metaCommit(SMeta *pMeta) { return tdbCommit(pMeta->pEnv, &pMeta->txn); }
int metaFinishCommit(SMeta *pMeta) { return tdbPostCommit(pMeta->pEnv, &pMeta->txn); }
int metaPrepareAsyncCommit(SMeta *pMeta) { return tdbPrepareAsyncCommit(pMeta->pEnv, &pMeta->txn); }
TXN *metaGetTxn(SMeta *pMeta) { return pMeta->txn; }
int metaCommit(SMeta *pMeta, TXN *txn) { return tdbCommit(pMeta->pEnv, txn); }
int metaFinishCommit(SMeta *pMeta, TXN *txn) { return tdbPostCommit(pMeta->pEnv, txn); }
int metaPrepareAsyncCommit(SMeta *pMeta) { return tdbPrepareAsyncCommit(pMeta->pEnv, pMeta->txn); }
// abort the meta txn
int metaAbort(SMeta *pMeta) { return tdbAbort(pMeta->pEnv, &pMeta->txn); }
int metaAbort(SMeta *pMeta) { return tdbAbort(pMeta->pEnv, pMeta->txn); }
......@@ -117,7 +117,7 @@ static int metaSaveSmaToDB(SMeta *pMeta, const SMetaEntry *pME) {
tEncoderClear(&coder);
// write to table.db
if (tdbTbInsert(pMeta->pTbDb, pKey, kLen, pVal, vLen, &pMeta->txn) < 0) {
if (tdbTbInsert(pMeta->pTbDb, pKey, kLen, pVal, vLen, pMeta->txn) < 0) {
goto _err;
}
......@@ -131,17 +131,17 @@ _err:
static int metaUpdateUidIdx(SMeta *pMeta, const SMetaEntry *pME) {
SUidIdxVal uidIdxVal = {.suid = pME->smaEntry.tsma->indexUid, .version = pME->version, .skmVer = 0};
return tdbTbInsert(pMeta->pUidIdx, &pME->uid, sizeof(tb_uid_t), &uidIdxVal, sizeof(uidIdxVal), &pMeta->txn);
return tdbTbInsert(pMeta->pUidIdx, &pME->uid, sizeof(tb_uid_t), &uidIdxVal, sizeof(uidIdxVal), pMeta->txn);
}
static int metaUpdateNameIdx(SMeta *pMeta, const SMetaEntry *pME) {
return tdbTbInsert(pMeta->pNameIdx, pME->name, strlen(pME->name) + 1, &pME->uid, sizeof(tb_uid_t), &pMeta->txn);
return tdbTbInsert(pMeta->pNameIdx, pME->name, strlen(pME->name) + 1, &pME->uid, sizeof(tb_uid_t), pMeta->txn);
}
static int metaUpdateSmaIdx(SMeta *pMeta, const SMetaEntry *pME) {
SSmaIdxKey smaIdxKey = {.uid = pME->smaEntry.tsma->tableUid, .smaUid = pME->smaEntry.tsma->indexUid};
return tdbTbInsert(pMeta->pSmaIdx, &smaIdxKey, sizeof(smaIdxKey), NULL, 0, &pMeta->txn);
return tdbTbInsert(pMeta->pSmaIdx, &smaIdxKey, sizeof(smaIdxKey), NULL, 0, pMeta->txn);
}
static int metaHandleSmaEntry(SMeta *pMeta, const SMetaEntry *pME) {
......
......@@ -163,9 +163,9 @@ int32_t metaSnapWriterClose(SMetaSnapWriter** ppWriter, int8_t rollback) {
if (rollback) {
ASSERT(0);
} else {
code = metaCommit(pWriter->pMeta);
code = metaCommit(pWriter->pMeta, pWriter->pMeta->txn);
if (code) goto _err;
code = metaFinishCommit(pWriter->pMeta);
code = metaFinishCommit(pWriter->pMeta, pWriter->pMeta->txn);
if (code) goto _err;
}
taosMemoryFree(pWriter);
......
......@@ -261,7 +261,7 @@ int metaDropSTable(SMeta *pMeta, int64_t verison, SVDropStbReq *pReq, SArray *tb
// drop all child tables
TBC *pCtbIdxc = NULL;
tdbTbcOpen(pMeta->pCtbIdx, &pCtbIdxc, &pMeta->txn);
tdbTbcOpen(pMeta->pCtbIdx, &pCtbIdxc, NULL);
rc = tdbTbcMoveTo(pCtbIdxc, &(SCtbIdxKey){.suid = pReq->suid, .uid = INT64_MIN}, sizeof(SCtbIdxKey), &c);
if (rc < 0) {
tdbTbcClose(pCtbIdxc);
......@@ -295,10 +295,10 @@ int metaDropSTable(SMeta *pMeta, int64_t verison, SVDropStbReq *pReq, SArray *tb
_drop_super_table:
tdbTbGet(pMeta->pUidIdx, &pReq->suid, sizeof(tb_uid_t), &pData, &nData);
tdbTbDelete(pMeta->pTbDb, &(STbDbKey){.version = ((SUidIdxVal *)pData)[0].version, .uid = pReq->suid},
sizeof(STbDbKey), &pMeta->txn);
tdbTbDelete(pMeta->pNameIdx, pReq->name, strlen(pReq->name) + 1, &pMeta->txn);
tdbTbDelete(pMeta->pUidIdx, &pReq->suid, sizeof(tb_uid_t), &pMeta->txn);
tdbTbDelete(pMeta->pSuidIdx, &pReq->suid, sizeof(tb_uid_t), &pMeta->txn);
sizeof(STbDbKey), pMeta->txn);
tdbTbDelete(pMeta->pNameIdx, pReq->name, strlen(pReq->name) + 1, pMeta->txn);
tdbTbDelete(pMeta->pUidIdx, &pReq->suid, sizeof(tb_uid_t), pMeta->txn);
tdbTbDelete(pMeta->pSuidIdx, &pReq->suid, sizeof(tb_uid_t), pMeta->txn);
metaULock(pMeta);
......@@ -321,7 +321,7 @@ int metaAlterSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) {
int32_t ret;
int32_t c = -2;
tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, &pMeta->txn);
tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, NULL);
ret = tdbTbcMoveTo(pUidIdxc, &pReq->suid, sizeof(tb_uid_t), &c);
if (ret < 0 || c) {
tdbTbcClose(pUidIdxc);
......@@ -340,7 +340,7 @@ int metaAlterSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) {
oversion = ((SUidIdxVal *)pData)[0].version;
tdbTbcOpen(pMeta->pTbDb, &pTbDbc, &pMeta->txn);
tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL);
ret = tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = pReq->suid, .version = oversion}), sizeof(STbDbKey), &c);
ASSERT(ret == 0 && c == 0);
......@@ -595,7 +595,7 @@ static int metaDeleteTtlIdx(SMeta *pMeta, const SMetaEntry *pME) {
STtlIdxKey ttlKey = {0};
metaBuildTtlIdxKey(&ttlKey, pME);
if (ttlKey.dtime == 0) return 0;
return tdbTbDelete(pMeta->pTtlIdx, &ttlKey, sizeof(ttlKey), &pMeta->txn);
return tdbTbDelete(pMeta->pTtlIdx, &ttlKey, sizeof(ttlKey), pMeta->txn);
}
static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) {
......@@ -657,7 +657,7 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) {
if (metaCreateTagIdxKey(e.ctbEntry.suid, pTagColumn->colId, pTagData, nTagData, pTagColumn->type, uid,
&pTagIdxKey, &nTagIdxKey) == 0) {
tdbTbDelete(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, &pMeta->txn);
tdbTbDelete(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, pMeta->txn);
}
metaDestroyTagIdxKey(pTagIdxKey);
}
......@@ -667,9 +667,9 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) {
}
}
tdbTbDelete(pMeta->pTbDb, &(STbDbKey){.version = version, .uid = uid}, sizeof(STbDbKey), &pMeta->txn);
tdbTbDelete(pMeta->pNameIdx, e.name, strlen(e.name) + 1, &pMeta->txn);
tdbTbDelete(pMeta->pUidIdx, &uid, sizeof(uid), &pMeta->txn);
tdbTbDelete(pMeta->pTbDb, &(STbDbKey){.version = version, .uid = uid}, sizeof(STbDbKey), pMeta->txn);
tdbTbDelete(pMeta->pNameIdx, e.name, strlen(e.name) + 1, pMeta->txn);
tdbTbDelete(pMeta->pUidIdx, &uid, sizeof(uid), pMeta->txn);
if (e.type == TSDB_CHILD_TABLE || e.type == TSDB_NORMAL_TABLE) metaDeleteCtimeIdx(pMeta, &e);
if (e.type == TSDB_NORMAL_TABLE) metaDeleteNcolIdx(pMeta, &e);
......@@ -677,7 +677,7 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) {
if (e.type != TSDB_SUPER_TABLE) metaDeleteTtlIdx(pMeta, &e);
if (e.type == TSDB_CHILD_TABLE) {
tdbTbDelete(pMeta->pCtbIdx, &(SCtbIdxKey){.suid = e.ctbEntry.suid, .uid = uid}, sizeof(SCtbIdxKey), &pMeta->txn);
tdbTbDelete(pMeta->pCtbIdx, &(SCtbIdxKey){.suid = e.ctbEntry.suid, .uid = uid}, sizeof(SCtbIdxKey), pMeta->txn);
--pMeta->pVnode->config.vndStats.numOfCTables;
......@@ -689,7 +689,7 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type) {
--pMeta->pVnode->config.vndStats.numOfNTables;
pMeta->pVnode->config.vndStats.numOfNTimeSeries -= e.ntbEntry.schemaRow.nCols - 1;
} else if (e.type == TSDB_SUPER_TABLE) {
tdbTbDelete(pMeta->pSuidIdx, &e.uid, sizeof(tb_uid_t), &pMeta->txn);
tdbTbDelete(pMeta->pSuidIdx, &e.uid, sizeof(tb_uid_t), pMeta->txn);
// drop schema.db (todo)
metaStatsCacheDrop(pMeta, uid);
......@@ -710,7 +710,7 @@ int metaUpdateCtimeIdx(SMeta *pMeta, const SMetaEntry *pME) {
if (metaBuildCtimeIdxKey(&ctimeKey, pME) < 0) {
return 0;
}
return tdbTbInsert(pMeta->pCtimeIdx, &ctimeKey, sizeof(ctimeKey), NULL, 0, &pMeta->txn);
return tdbTbInsert(pMeta->pCtimeIdx, &ctimeKey, sizeof(ctimeKey), NULL, 0, pMeta->txn);
}
int metaDeleteCtimeIdx(SMeta *pMeta, const SMetaEntry *pME) {
......@@ -718,14 +718,14 @@ int metaDeleteCtimeIdx(SMeta *pMeta, const SMetaEntry *pME) {
if (metaBuildCtimeIdxKey(&ctimeKey, pME) < 0) {
return 0;
}
return tdbTbDelete(pMeta->pCtimeIdx, &ctimeKey, sizeof(ctimeKey), &pMeta->txn);
return tdbTbDelete(pMeta->pCtimeIdx, &ctimeKey, sizeof(ctimeKey), pMeta->txn);
}
int metaUpdateNcolIdx(SMeta *pMeta, const SMetaEntry *pME) {
SNcolIdxKey ncolKey = {0};
if (metaBuildNColIdxKey(&ncolKey, pME) < 0) {
return 0;
}
return tdbTbInsert(pMeta->pNcolIdx, &ncolKey, sizeof(ncolKey), NULL, 0, &pMeta->txn);
return tdbTbInsert(pMeta->pNcolIdx, &ncolKey, sizeof(ncolKey), NULL, 0, pMeta->txn);
}
int metaDeleteNcolIdx(SMeta *pMeta, const SMetaEntry *pME) {
......@@ -733,7 +733,7 @@ int metaDeleteNcolIdx(SMeta *pMeta, const SMetaEntry *pME) {
if (metaBuildNColIdxKey(&ncolKey, pME) < 0) {
return 0;
}
return tdbTbDelete(pMeta->pNcolIdx, &ncolKey, sizeof(ncolKey), &pMeta->txn);
return tdbTbDelete(pMeta->pNcolIdx, &ncolKey, sizeof(ncolKey), pMeta->txn);
}
static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAlterTbReq, STableMetaRsp *pMetaRsp) {
......@@ -768,7 +768,7 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl
// search uid index
TBC *pUidIdxc = NULL;
tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, &pMeta->txn);
tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, NULL);
tdbTbcMoveTo(pUidIdxc, &uid, sizeof(uid), &c);
ASSERT(c == 0);
......@@ -778,7 +778,7 @@ static int metaAlterTableColumn(SMeta *pMeta, int64_t version, SVAlterTbReq *pAl
// search table.db
TBC *pTbDbc = NULL;
tdbTbcOpen(pMeta->pTbDb, &pTbDbc, &pMeta->txn);
tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL);
tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = uid, .version = oversion}), sizeof(STbDbKey), &c);
ASSERT(c == 0);
tdbTbcGet(pTbDbc, NULL, NULL, &pData, &nData);
......@@ -959,7 +959,7 @@ static int metaUpdateTableTagVal(SMeta *pMeta, int64_t version, SVAlterTbReq *pA
// search uid index
TBC *pUidIdxc = NULL;
tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, &pMeta->txn);
tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, NULL);
tdbTbcMoveTo(pUidIdxc, &uid, sizeof(uid), &c);
ASSERT(c == 0);
......@@ -972,7 +972,7 @@ static int metaUpdateTableTagVal(SMeta *pMeta, int64_t version, SVAlterTbReq *pA
SDecoder dc2 = {0};
/* get ctbEntry */
tdbTbcOpen(pMeta->pTbDb, &pTbDbc, &pMeta->txn);
tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL);
tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = uid, .version = oversion}), sizeof(STbDbKey), &c);
ASSERT(c == 0);
tdbTbcGet(pTbDbc, NULL, NULL, &pData, &nData);
......@@ -1075,7 +1075,7 @@ static int metaUpdateTableTagVal(SMeta *pMeta, int64_t version, SVAlterTbReq *pA
ASSERT(ctbEntry.ctbEntry.pTags);
SCtbIdxKey ctbIdxKey = {.suid = ctbEntry.ctbEntry.suid, .uid = uid};
tdbTbUpsert(pMeta->pCtbIdx, &ctbIdxKey, sizeof(ctbIdxKey), ctbEntry.ctbEntry.pTags,
((STag *)(ctbEntry.ctbEntry.pTags))->len, &pMeta->txn);
((STag *)(ctbEntry.ctbEntry.pTags))->len, pMeta->txn);
metaUidCacheClear(pMeta, ctbEntry.ctbEntry.suid);
......@@ -1125,7 +1125,7 @@ static int metaUpdateTableOptions(SMeta *pMeta, int64_t version, SVAlterTbReq *p
// search uid index
TBC *pUidIdxc = NULL;
tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, &pMeta->txn);
tdbTbcOpen(pMeta->pUidIdx, &pUidIdxc, NULL);
tdbTbcMoveTo(pUidIdxc, &uid, sizeof(uid), &c);
ASSERT(c == 0);
......@@ -1135,7 +1135,7 @@ static int metaUpdateTableOptions(SMeta *pMeta, int64_t version, SVAlterTbReq *p
// search table.db
TBC *pTbDbc = NULL;
tdbTbcOpen(pMeta->pTbDb, &pTbDbc, &pMeta->txn);
tdbTbcOpen(pMeta->pTbDb, &pTbDbc, NULL);
tdbTbcMoveTo(pTbDbc, &((STbDbKey){.uid = uid, .version = oversion}), sizeof(STbDbKey), &c);
ASSERT(c == 0);
tdbTbcGet(pTbDbc, NULL, NULL, &pData, &nData);
......@@ -1242,7 +1242,7 @@ static int metaSaveToTbDb(SMeta *pMeta, const SMetaEntry *pME) {
tEncoderClear(&coder);
// write to table.db
if (tdbTbInsert(pMeta->pTbDb, pKey, kLen, pVal, vLen, &pMeta->txn) < 0) {
if (tdbTbInsert(pMeta->pTbDb, pKey, kLen, pVal, vLen, pMeta->txn) < 0) {
goto _err;
}
......@@ -1265,29 +1265,29 @@ static int metaUpdateUidIdx(SMeta *pMeta, const SMetaEntry *pME) {
SUidIdxVal uidIdxVal = {.suid = info.suid, .version = info.version, .skmVer = info.skmVer};
return tdbTbUpsert(pMeta->pUidIdx, &pME->uid, sizeof(tb_uid_t), &uidIdxVal, sizeof(uidIdxVal), &pMeta->txn);
return tdbTbUpsert(pMeta->pUidIdx, &pME->uid, sizeof(tb_uid_t), &uidIdxVal, sizeof(uidIdxVal), pMeta->txn);
}
static int metaUpdateSuidIdx(SMeta *pMeta, const SMetaEntry *pME) {
return tdbTbInsert(pMeta->pSuidIdx, &pME->uid, sizeof(tb_uid_t), NULL, 0, &pMeta->txn);
return tdbTbInsert(pMeta->pSuidIdx, &pME->uid, sizeof(tb_uid_t), NULL, 0, pMeta->txn);
}
static int metaUpdateNameIdx(SMeta *pMeta, const SMetaEntry *pME) {
return tdbTbInsert(pMeta->pNameIdx, pME->name, strlen(pME->name) + 1, &pME->uid, sizeof(tb_uid_t), &pMeta->txn);
return tdbTbInsert(pMeta->pNameIdx, pME->name, strlen(pME->name) + 1, &pME->uid, sizeof(tb_uid_t), pMeta->txn);
}
static int metaUpdateTtlIdx(SMeta *pMeta, const SMetaEntry *pME) {
STtlIdxKey ttlKey = {0};
metaBuildTtlIdxKey(&ttlKey, pME);
if (ttlKey.dtime == 0) return 0;
return tdbTbInsert(pMeta->pTtlIdx, &ttlKey, sizeof(ttlKey), NULL, 0, &pMeta->txn);
return tdbTbInsert(pMeta->pTtlIdx, &ttlKey, sizeof(ttlKey), NULL, 0, pMeta->txn);
}
static int metaUpdateCtbIdx(SMeta *pMeta, const SMetaEntry *pME) {
SCtbIdxKey ctbIdxKey = {.suid = pME->ctbEntry.suid, .uid = pME->uid};
return tdbTbInsert(pMeta->pCtbIdx, &ctbIdxKey, sizeof(ctbIdxKey), pME->ctbEntry.pTags,
((STag *)(pME->ctbEntry.pTags))->len, &pMeta->txn);
((STag *)(pME->ctbEntry.pTags))->len, pMeta->txn);
}
int metaCreateTagIdxKey(tb_uid_t suid, int32_t cid, const void *pTagData, int32_t nTagData, int8_t type, tb_uid_t uid,
......@@ -1379,7 +1379,7 @@ static int metaUpdateTagIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry) {
ret = -1;
goto end;
}
tdbTbUpsert(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, NULL, 0, &pMeta->txn);
tdbTbUpsert(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, NULL, 0, pMeta->txn);
}
end:
metaDestroyTagIdxKey(pTagIdxKey);
......@@ -1426,7 +1426,7 @@ static int metaSaveToSkmDb(SMeta *pMeta, const SMetaEntry *pME) {
tEncoderInit(&coder, pVal, vLen);
tEncodeSSchemaWrapper(&coder, pSW);
if (tdbTbInsert(pMeta->pSkmDb, &skmDbKey, sizeof(skmDbKey), pVal, vLen, &pMeta->txn) < 0) {
if (tdbTbInsert(pMeta->pSkmDb, &skmDbKey, sizeof(skmDbKey), pVal, vLen, pMeta->txn) < 0) {
rcode = -1;
goto _exit;
}
......
......@@ -23,7 +23,7 @@ static int32_t tdProcessRSmaSyncCommitImpl(SSma *pSma);
static int32_t tdProcessRSmaSyncPostCommitImpl(SSma *pSma);
#endif
static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma);
static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma);
static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo);
static int32_t tdProcessRSmaAsyncPostCommitImpl(SSma *pSma);
static int32_t tdUpdateQTaskInfoFiles(SSma *pSma, SRSmaStat *pRSmaStat);
......@@ -59,7 +59,7 @@ int32_t smaSyncPostCommit(SSma *pSma) { return tdProcessRSmaSyncPostCommitImpl(p
* @param pSma
* @return int32_t
*/
int32_t smaPreCommit(SSma *pSma) { return tdProcessRSmaAsyncPreCommitImpl(pSma); }
int32_t smaPrepareAsyncCommit(SSma *pSma) { return tdProcessRSmaAsyncPreCommitImpl(pSma); }
/**
* @brief async commit, only applicable to Rollup SMA
......@@ -67,7 +67,7 @@ int32_t smaPreCommit(SSma *pSma) { return tdProcessRSmaAsyncPreCommitImpl(pSma);
* @param pSma
* @return int32_t
*/
int32_t smaCommit(SSma *pSma) { return tdProcessRSmaAsyncCommitImpl(pSma); }
int32_t smaCommit(SSma *pSma, SCommitInfo *pInfo) { return tdProcessRSmaAsyncCommitImpl(pSma, pInfo); }
/**
* @brief async commit, only applicable to Rollup SMA
......@@ -378,6 +378,11 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) {
taosWUnLockLatch(SMA_ENV_LOCK(pEnv));
#endif
// all rsma results are written completely
STsdb *pTsdb = NULL;
if ((pTsdb = VND_RSMA1(pSma->pVnode))) tsdbPrepareCommit(pTsdb);
if ((pTsdb = VND_RSMA2(pSma->pVnode))) tsdbPrepareCommit(pTsdb);
return TSDB_CODE_SUCCESS;
}
......@@ -387,7 +392,7 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma) {
* @param pSma
* @return int32_t
*/
static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma) {
static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) {
int32_t code = 0;
SVnode *pVnode = pSma->pVnode;
#if 0
......@@ -399,11 +404,11 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma) {
}
#endif
if ((code = tsdbCommit(VND_RSMA1(pVnode))) < 0) {
if ((code = tsdbCommit(VND_RSMA1(pVnode), pInfo)) < 0) {
smaError("vgId:%d, failed to commit tsdb rsma1 since %s", TD_VID(pVnode), tstrerror(code));
goto _exit;
}
if ((code = tsdbCommit(VND_RSMA2(pVnode))) < 0) {
if ((code = tsdbCommit(VND_RSMA2(pVnode), pInfo)) < 0) {
smaError("vgId:%d, failed to commit tsdb rsma2 since %s", TD_VID(pVnode), tstrerror(code));
goto _exit;
}
......
......@@ -108,24 +108,22 @@ int32_t tqMetaClose(STQ* pTq) {
}
int32_t tqMetaSaveCheckInfo(STQ* pTq, const char* key, const void* value, int32_t vLen) {
TXN txn;
if (tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1;
}
TXN* txn;
if (tdbBegin(pTq->pMetaDB, &txn) < 0) {
if (tdbBegin(pTq->pMetaDB, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) <
0) {
return -1;
}
if (tdbTbUpsert(pTq->pCheckStore, key, strlen(key), value, vLen, &txn) < 0) {
if (tdbTbUpsert(pTq->pCheckStore, key, strlen(key), value, vLen, txn) < 0) {
return -1;
}
if (tdbCommit(pTq->pMetaDB, &txn) < 0) {
if (tdbCommit(pTq->pMetaDB, txn) < 0) {
return -1;
}
if (tdbPostCommit(pTq->pMetaDB, &txn) < 0) {
if (tdbPostCommit(pTq->pMetaDB, txn) < 0) {
return -1;
}
......@@ -133,25 +131,22 @@ int32_t tqMetaSaveCheckInfo(STQ* pTq, const char* key, const void* value, int32_
}
int32_t tqMetaDeleteCheckInfo(STQ* pTq, const char* key) {
TXN txn;
if (tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
ASSERT(0);
}
TXN* txn;
if (tdbBegin(pTq->pMetaDB, &txn) < 0) {
if (tdbBegin(pTq->pMetaDB, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) <
0) {
ASSERT(0);
}
if (tdbTbDelete(pTq->pCheckStore, key, (int)strlen(key), &txn) < 0) {
if (tdbTbDelete(pTq->pCheckStore, key, (int)strlen(key), txn) < 0) {
/*ASSERT(0);*/
}
if (tdbCommit(pTq->pMetaDB, &txn) < 0) {
if (tdbCommit(pTq->pMetaDB, txn) < 0) {
ASSERT(0);
}
if (tdbPostCommit(pTq->pMetaDB, &txn) < 0) {
if (tdbPostCommit(pTq->pMetaDB, txn) < 0) {
ASSERT(0);
}
......@@ -219,25 +214,22 @@ int32_t tqMetaSaveHandle(STQ* pTq, const char* key, const STqHandle* pHandle) {
ASSERT(0);
}
TXN txn;
TXN* txn;
if (tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
if (tdbBegin(pTq->pMetaDB, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) <
0) {
ASSERT(0);
}
if (tdbBegin(pTq->pMetaDB, &txn) < 0) {
if (tdbTbUpsert(pTq->pExecStore, key, (int)strlen(key), buf, vlen, txn) < 0) {
ASSERT(0);
}
if (tdbTbUpsert(pTq->pExecStore, key, (int)strlen(key), buf, vlen, &txn) < 0) {
if (tdbCommit(pTq->pMetaDB, txn) < 0) {
ASSERT(0);
}
if (tdbCommit(pTq->pMetaDB, &txn) < 0) {
ASSERT(0);
}
if (tdbPostCommit(pTq->pMetaDB, &txn) < 0) {
if (tdbPostCommit(pTq->pMetaDB, txn) < 0) {
ASSERT(0);
}
......@@ -247,25 +239,22 @@ int32_t tqMetaSaveHandle(STQ* pTq, const char* key, const STqHandle* pHandle) {
}
int32_t tqMetaDeleteHandle(STQ* pTq, const char* key) {
TXN txn;
if (tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
ASSERT(0);
}
TXN* txn;
if (tdbBegin(pTq->pMetaDB, &txn) < 0) {
if (tdbBegin(pTq->pMetaDB, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) <
0) {
ASSERT(0);
}
if (tdbTbDelete(pTq->pExecStore, key, (int)strlen(key), &txn) < 0) {
if (tdbTbDelete(pTq->pExecStore, key, (int)strlen(key), txn) < 0) {
/*ASSERT(0);*/
}
if (tdbCommit(pTq->pMetaDB, &txn) < 0) {
if (tdbCommit(pTq->pMetaDB, txn) < 0) {
ASSERT(0);
}
if (tdbPostCommit(pTq->pMetaDB, &txn) < 0) {
if (tdbPostCommit(pTq->pMetaDB, txn) < 0) {
ASSERT(0);
}
......
......@@ -129,7 +129,7 @@ struct STqSnapWriter {
STQ* pTq;
int64_t sver;
int64_t ever;
TXN txn;
TXN* txn;
};
int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) {
......@@ -146,8 +146,10 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p
pWriter->sver = sver;
pWriter->ever = ever;
if (tdbTxnOpen(&pWriter->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) {
ASSERT(0);
if (tdbBegin(pTq->pMetaDB, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) {
code = -1;
taosMemoryFree(pWriter);
goto _err;
}
*ppWriter = pWriter;
......@@ -165,11 +167,11 @@ int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) {
STQ* pTq = pWriter->pTq;
if (rollback) {
tdbAbort(pWriter->pTq->pMetaDB, &pWriter->txn);
tdbAbort(pWriter->pTq->pMetaDB, pWriter->txn);
} else {
code = tdbCommit(pWriter->pTq->pMetaDB, &pWriter->txn);
code = tdbCommit(pWriter->pTq->pMetaDB, pWriter->txn);
if (code) goto _err;
code = tdbPostCommit(pWriter->pTq->pMetaDB, &pWriter->txn);
code = tdbPostCommit(pWriter->pTq->pMetaDB, pWriter->txn);
if (code) goto _err;
}
......
......@@ -129,7 +129,7 @@ struct STqSnapWriter {
STQ* pTq;
int64_t sver;
int64_t ever;
TXN txn;
TXN* txn;
};
int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) {
......@@ -146,8 +146,10 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p
pWriter->sver = sver;
pWriter->ever = ever;
if (tdbTxnOpen(&pWriter->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) {
ASSERT(0);
if (tdbBegin(pTq->pMetaDB, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) {
code = -1;
taosMemoryFree(pWriter);
goto _err;
}
*ppWriter = pWriter;
......@@ -165,11 +167,12 @@ int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) {
STQ* pTq = pWriter->pTq;
if (rollback) {
tdbAbort(pWriter->pTq->pMetaDB, pWriter->txn);
ASSERT(0);
} else {
code = tdbCommit(pWriter->pTq->pMetaDB, &pWriter->txn);
code = tdbCommit(pWriter->pTq->pMetaDB, pWriter->txn);
if (code) goto _err;
code = tdbPostCommit(pWriter->pTq->pMetaDB, &pWriter->txn);
code = tdbPostCommit(pWriter->pTq->pMetaDB, pWriter->txn);
if (code) goto _err;
}
......
......@@ -129,7 +129,7 @@ struct STqSnapWriter {
STQ* pTq;
int64_t sver;
int64_t ever;
TXN txn;
TXN* txn;
};
int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** ppWriter) {
......@@ -146,8 +146,10 @@ int32_t tqSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapWriter** p
pWriter->sver = sver;
pWriter->ever = ever;
if (tdbTxnOpen(&pWriter->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) {
ASSERT(0);
if (tdbBegin(pTq->pMetaStore, &pWriter->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) {
code = -1;
taosMemoryFree(pWriter);
goto _err;
}
*ppWriter = pWriter;
......@@ -165,11 +167,12 @@ int32_t tqSnapWriterClose(STqSnapWriter** ppWriter, int8_t rollback) {
STQ* pTq = pWriter->pTq;
if (rollback) {
tdbAbort(pWriter->pTq->pMetaStore, pWriter->txn);
ASSERT(0);
} else {
code = tdbCommit(pWriter->pTq->pMetaStore, &pWriter->txn);
code = tdbCommit(pWriter->pTq->pMetaStore, pWriter->txn);
if (code) goto _err;
code = tdbPostCommit(pWriter->pTq->pMetaStore, &pWriter->txn);
code = tdbPostCommit(pWriter->pTq->pMetaStore, pWriter->txn);
if (code) goto _err;
}
......
......@@ -93,7 +93,7 @@ typedef struct {
SArray *aDelData; // SArray<SDelData>
} SCommitter;
static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter);
static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter, SCommitInfo *pInfo);
static int32_t tsdbCommitData(SCommitter *pCommitter);
static int32_t tsdbCommitDel(SCommitter *pCommitter);
static int32_t tsdbCommitCache(SCommitter *pCommitter);
......@@ -150,18 +150,28 @@ _exit:
return code;
}
int32_t tsdbCommit(STsdb *pTsdb) {
int32_t tsdbPrepareCommit(STsdb *pTsdb) {
taosThreadRwlockWrlock(&pTsdb->rwLock);
ASSERT(pTsdb->imem == NULL);
pTsdb->imem = pTsdb->mem;
pTsdb->mem = NULL;
taosThreadRwlockUnlock(&pTsdb->rwLock);
return 0;
}
int32_t tsdbCommit(STsdb *pTsdb, SCommitInfo *pInfo) {
if (!pTsdb) return 0;
int32_t code = 0;
int32_t lino = 0;
SCommitter commith;
SMemTable *pMemTable = pTsdb->mem;
SMemTable *pMemTable = pTsdb->imem;
// check
if (pMemTable->nRow == 0 && pMemTable->nDel == 0) {
taosThreadRwlockWrlock(&pTsdb->rwLock);
pTsdb->mem = NULL;
pTsdb->imem = NULL;
taosThreadRwlockUnlock(&pTsdb->rwLock);
tsdbUnrefMemTable(pMemTable);
......@@ -169,7 +179,7 @@ int32_t tsdbCommit(STsdb *pTsdb) {
}
// start commit
code = tsdbStartCommit(pTsdb, &commith);
code = tsdbStartCommit(pTsdb, &commith, pInfo);
TSDB_CHECK_CODE(code, lino, _exit);
// commit impl
......@@ -806,26 +816,21 @@ _exit:
}
// ----------------------------------------------------------------------------
static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter) {
static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter, SCommitInfo *pInfo) {
int32_t code = 0;
int32_t lino = 0;
memset(pCommitter, 0, sizeof(*pCommitter));
ASSERT(pTsdb->mem && pTsdb->imem == NULL && "last tsdb commit incomplete");
taosThreadRwlockWrlock(&pTsdb->rwLock);
pTsdb->imem = pTsdb->mem;
pTsdb->mem = NULL;
taosThreadRwlockUnlock(&pTsdb->rwLock);
ASSERT(pTsdb->imem && "last tsdb commit incomplete");
pCommitter->pTsdb = pTsdb;
pCommitter->commitID = pTsdb->pVnode->state.commitID;
pCommitter->commitID = pInfo->info.state.commitID;
pCommitter->minutes = pTsdb->keepCfg.days;
pCommitter->precision = pTsdb->keepCfg.precision;
pCommitter->minRow = pTsdb->pVnode->config.tsdbCfg.minRows;
pCommitter->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows;
pCommitter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression;
pCommitter->sttTrigger = pTsdb->pVnode->config.sttTrigger;
pCommitter->minRow = pInfo->info.config.tsdbCfg.minRows;
pCommitter->maxRow = pInfo->info.config.tsdbCfg.maxRows;
pCommitter->cmprAlg = pInfo->info.config.tsdbCfg.compression;
pCommitter->sttTrigger = pInfo->info.config.sttTrigger;
pCommitter->aTbDataP = tsdbMemTableGetTbDataArray(pTsdb->imem);
if (pCommitter->aTbDataP == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
......
......@@ -14,14 +14,14 @@
*/
#include "vnd.h"
#include "vnodeInt.h"
#define VND_INFO_FNAME "vnode.json"
#define VND_INFO_FNAME_TMP "vnode_tmp.json"
static int vnodeEncodeInfo(const SVnodeInfo *pInfo, char **ppData);
static int vnodeDecodeInfo(uint8_t *pData, SVnodeInfo *pInfo);
static int vnodeCommitImpl(void *arg);
static void vnodeWaitCommit(SVnode *pVnode);
static int vnodeCommitImpl(SCommitInfo *pInfo);
int vnodeBegin(SVnode *pVnode) {
// alloc buffer pool
......@@ -107,7 +107,8 @@ int vnodeSaveInfo(const char *dir, const SVnodeInfo *pInfo) {
// free info binary
taosMemoryFree(data);
vInfo("vgId:%d, vnode info is saved, fname:%s replica:%d", pInfo->config.vgId, fname, pInfo->config.syncCfg.replicaNum);
vInfo("vgId:%d, vnode info is saved, fname:%s replica:%d", pInfo->config.vgId, fname,
pInfo->config.syncCfg.replicaNum);
return 0;
......@@ -185,29 +186,78 @@ _err:
return -1;
}
static void vnodePrepareCommit(SVnode *pVnode) {
tsem_wait(&pVnode->canCommit);
tsdbPrepareCommit(pVnode->pTsdb);
metaPrepareAsyncCommit(pVnode->pMeta);
smaPrepareAsyncCommit(pVnode->pSma);
vnodeBufPoolUnRef(pVnode->inUse);
pVnode->inUse = NULL;
}
static int32_t vnodeCommitTask(void *arg) {
int32_t code = 0;
SCommitInfo *pInfo = (SCommitInfo *)arg;
// commit
code = vnodeCommitImpl(pInfo);
if (code) goto _exit;
// end commit
tsem_post(&pInfo->pVnode->canCommit);
_exit:
taosMemoryFree(pInfo);
return code;
}
int vnodeAsyncCommit(SVnode *pVnode) {
vnodeWaitCommit(pVnode);
int32_t code = 0;
// vnodeBufPoolSwitch(pVnode);
// tsdbPrepareCommit(pVnode->pTsdb);
// prepare to commit
vnodePrepareCommit(pVnode);
vnodeScheduleTask(vnodeCommitImpl, pVnode);
// schedule the task
pVnode->state.commitTerm = pVnode->state.applyTerm;
return 0;
SCommitInfo *pInfo = (SCommitInfo *)taosMemoryCalloc(1, sizeof(*pInfo));
if (NULL == pInfo) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
pInfo->info.config = pVnode->config;
pInfo->info.state.committed = pVnode->state.applied;
pInfo->info.state.commitTerm = pVnode->state.applyTerm;
pInfo->info.state.commitID = pVnode->state.commitID;
pInfo->pVnode = pVnode;
pInfo->txn = metaGetTxn(pVnode->pMeta);
vnodeScheduleTask(vnodeCommitTask, pInfo);
_exit:
if (code) {
vError("vgId:%d %s failed since %s, commit id:%" PRId64, TD_VID(pVnode), __func__, tstrerror(code),
pVnode->state.commitID);
} else {
vDebug("vgId:%d %s done", TD_VID(pVnode), __func__);
}
return code;
}
int vnodeSyncCommit(SVnode *pVnode) {
vnodeAsyncCommit(pVnode);
vnodeWaitCommit(pVnode);
tsem_post(&(pVnode->canCommit));
tsem_wait(&pVnode->canCommit);
tsem_post(&pVnode->canCommit);
return 0;
}
int vnodeCommit(SVnode *pVnode) {
static int vnodeCommitImpl(SCommitInfo *pInfo) {
int32_t code = 0;
int32_t lino = 0;
SVnodeInfo info = {0};
char dir[TSDB_FILENAME_LEN];
char dir[TSDB_FILENAME_LEN] = {0};
SVnode *pVnode = pInfo->pVnode;
vInfo("vgId:%d, start to commit, commit ID:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode),
pVnode->state.commitID, pVnode->state.applied, pVnode->state.applyTerm);
......@@ -218,19 +268,13 @@ int vnodeCommit(SVnode *pVnode) {
return -1;
}
pVnode->state.commitTerm = pVnode->state.applyTerm;
// save info
info.config = pVnode->config;
info.state.committed = pVnode->state.applied;
info.state.commitTerm = pVnode->state.applyTerm;
info.state.commitID = pVnode->state.commitID;
if (pVnode->pTfs) {
snprintf(dir, TSDB_FILENAME_LEN, "%s%s%s", tfsGetPrimaryPath(pVnode->pTfs), TD_DIRSEP, pVnode->path);
} else {
snprintf(dir, TSDB_FILENAME_LEN, "%s", pVnode->path);
}
if (vnodeSaveInfo(dir, &info) < 0) {
if (vnodeSaveInfo(dir, &pInfo->info) < 0) {
code = terrno;
TSDB_CHECK_CODE(code, lino, _exit);
}
......@@ -238,23 +282,17 @@ int vnodeCommit(SVnode *pVnode) {
// walBeginSnapshot(pVnode->pWal, pVnode->state.applied);
syncBeginSnapshot(pVnode->sync, pVnode->state.applied);
code = smaPreCommit(pVnode->pSma);
TSDB_CHECK_CODE(code, lino, _exit);
vnodeBufPoolUnRef(pVnode->inUse);
pVnode->inUse = NULL;
// commit each sub-system
if (metaCommit(pVnode->pMeta) < 0) {
if (metaCommit(pVnode->pMeta, pInfo->txn) < 0) {
code = TSDB_CODE_FAILED;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbCommit(pVnode->pTsdb);
code = tsdbCommit(pVnode->pTsdb, pInfo);
TSDB_CHECK_CODE(code, lino, _exit);
if (VND_IS_RSMA(pVnode)) {
code = smaCommit(pVnode->pSma);
code = smaCommit(pVnode->pSma, pInfo);
TSDB_CHECK_CODE(code, lino, _exit);
}
......@@ -264,7 +302,7 @@ int vnodeCommit(SVnode *pVnode) {
}
// commit info
if (vnodeCommitInfo(dir, &info) < 0) {
if (vnodeCommitInfo(dir, &pInfo->info) < 0) {
code = terrno;
TSDB_CHECK_CODE(code, lino, _exit);
}
......@@ -277,19 +315,18 @@ int vnodeCommit(SVnode *pVnode) {
TSDB_CHECK_CODE(code, lino, _exit);
}
if (metaFinishCommit(pVnode->pMeta) < 0) {
if (metaFinishCommit(pVnode->pMeta, pInfo->txn) < 0) {
code = terrno;
TSDB_CHECK_CODE(code, lino, _exit);
}
pVnode->state.committed = info.state.committed;
pVnode->state.committed = pInfo->info.state.committed;
if (smaPostCommit(pVnode->pSma) < 0) {
vError("vgId:%d, failed to post-commit sma since %s", TD_VID(pVnode), tstrerror(terrno));
return -1;
}
// apply the commit (TODO)
// walEndSnapshot(pVnode->pWal);
syncEndSnapshot(pVnode->sync);
......@@ -318,20 +355,6 @@ void vnodeRollback(SVnode *pVnode) {
(void)taosRemoveFile(tFName);
}
static int vnodeCommitImpl(void *arg) {
SVnode *pVnode = (SVnode *)arg;
// metaCommit(pVnode->pMeta);
tqCommit(pVnode->pTq);
// tsdbCommit(pVnode->pTsdb, );
// vnodeBufPoolRecycle(pVnode);
tsem_post(&(pVnode->canCommit));
return 0;
}
static FORCE_INLINE void vnodeWaitCommit(SVnode *pVnode) { tsem_wait(&pVnode->canCommit); }
static int vnodeEncodeState(const void *pObj, SJson *pJson) {
const SVState *pState = (SVState *)pObj;
......
......@@ -249,7 +249,7 @@ void vnodePreClose(SVnode *pVnode) {
void vnodeClose(SVnode *pVnode) {
if (pVnode) {
vnodeCommit(pVnode);
vnodeSyncCommit(pVnode);
vnodeSyncClose(pVnode);
vnodeQueryClose(pVnode);
walClose(pVnode->pWal);
......
......@@ -259,7 +259,7 @@ int32_t vnodeSnapWriterOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapWr
pWriter->ever = ever;
// commit it
code = vnodeCommit(pVnode);
code = vnodeSyncCommit(pVnode);
if (code) {
taosMemoryFree(pWriter);
goto _err;
......
......@@ -292,7 +292,9 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp
vnodeProcessAlterConfigReq(pVnode, version, pReq, len, pRsp);
break;
case TDMT_VND_COMMIT:
goto _do_commit;
vnodeSyncCommit(pVnode);
vnodeBegin(pVnode);
goto _exit;
default:
vError("vgId:%d, unprocessed msg, %d", TD_VID(pVnode), pMsg->msgType);
return -1;
......@@ -310,13 +312,12 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp
// commit if need
if (vnodeShouldCommit(pVnode)) {
_do_commit:
vInfo("vgId:%d, commit at version %" PRId64, TD_VID(pVnode), version);
// commit current change
if (vnodeCommit(pVnode) < 0) {
vError("vgId:%d, failed to commit vnode since %s.", TD_VID(pVnode), tstrerror(terrno));
goto _err;
}
#if 0
vnodeSyncCommit(pVnode);
#else
vnodeAsyncCommit(pVnode);
#endif
// start a new one
if (vnodeBegin(pVnode) < 0) {
......@@ -325,6 +326,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp
}
}
_exit:
return 0;
_err:
......
......@@ -69,8 +69,8 @@ _err:
}
void streamMetaClose(SStreamMeta* pMeta) {
tdbCommit(pMeta->db, &pMeta->txn);
tdbPostCommit(pMeta->db, &pMeta->txn);
tdbCommit(pMeta->db, pMeta->txn);
tdbPostCommit(pMeta->db, pMeta->txn);
tdbTbClose(pMeta->pTaskDb);
tdbTbClose(pMeta->pCheckpointDb);
tdbClose(pMeta->db);
......@@ -115,7 +115,7 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t ver, char* msg,
goto FAIL;
}
if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), msg, msgLen, &pMeta->txn) < 0) {
if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), msg, msgLen, pMeta->txn) < 0) {
taosHashRemove(pMeta->pTasks, &pTask->taskId, sizeof(int32_t));
ASSERT(0);
goto FAIL;
......@@ -152,7 +152,7 @@ int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) {
tEncodeSStreamTask(&encoder, pTask);
tEncoderClear(&encoder);
if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), buf, len, &pMeta->txn) < 0) {
if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) {
ASSERT(0);
return -1;
}
......@@ -220,42 +220,35 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) {
}
int32_t streamMetaBegin(SStreamMeta* pMeta) {
if (tdbTxnOpen(&pMeta->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) <
0) {
return -1;
}
if (tdbBegin(pMeta->db, &pMeta->txn) < 0) {
if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL,
TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1;
}
return 0;
}
int32_t streamMetaCommit(SStreamMeta* pMeta) {
if (tdbCommit(pMeta->db, &pMeta->txn) < 0) {
if (tdbCommit(pMeta->db, pMeta->txn) < 0) {
return -1;
}
memset(&pMeta->txn, 0, sizeof(TXN));
if (tdbTxnOpen(&pMeta->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) <
0) {
if (tdbPostCommit(pMeta->db, pMeta->txn) < 0) {
return -1;
}
if (tdbBegin(pMeta->db, &pMeta->txn) < 0) {
if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL,
TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1;
}
return 0;
}
int32_t streamMetaAbort(SStreamMeta* pMeta) {
if (tdbAbort(pMeta->db, &pMeta->txn) < 0) {
if (tdbAbort(pMeta->db, pMeta->txn) < 0) {
return -1;
}
memset(&pMeta->txn, 0, sizeof(TXN));
if (tdbTxnOpen(&pMeta->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) <
0) {
return -1;
}
if (tdbBegin(pMeta->db, &pMeta->txn) < 0) {
if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL,
TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1;
}
return 0;
......
......@@ -179,8 +179,8 @@ _err:
}
void streamStateClose(SStreamState* pState) {
tdbCommit(pState->pTdbState->db, &pState->pTdbState->txn);
tdbPostCommit(pState->pTdbState->db, &pState->pTdbState->txn);
tdbCommit(pState->pTdbState->db, pState->pTdbState->txn);
tdbPostCommit(pState->pTdbState->db, pState->pTdbState->txn);
tdbTbClose(pState->pTdbState->pStateDb);
tdbTbClose(pState->pTdbState->pFuncStateDb);
tdbTbClose(pState->pTdbState->pFillStateDb);
......@@ -192,71 +192,61 @@ void streamStateClose(SStreamState* pState) {
}
int32_t streamStateBegin(SStreamState* pState) {
if (tdbTxnOpen(&pState->pTdbState->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL,
if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, tdbDefaultMalloc, tdbDefaultFree, NULL,
TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1;
}
if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn) < 0) {
tdbTxnClose(&pState->pTdbState->txn);
tdbAbort(pState->pTdbState->db, pState->pTdbState->txn);
return -1;
}
return 0;
}
int32_t streamStateCommit(SStreamState* pState) {
if (tdbCommit(pState->pTdbState->db, &pState->pTdbState->txn) < 0) {
if (tdbCommit(pState->pTdbState->db, pState->pTdbState->txn) < 0) {
return -1;
}
if (tdbPostCommit(pState->pTdbState->db, &pState->pTdbState->txn) < 0) {
if (tdbPostCommit(pState->pTdbState->db, pState->pTdbState->txn) < 0) {
return -1;
}
memset(&pState->pTdbState->txn, 0, sizeof(TXN));
if (tdbTxnOpen(&pState->pTdbState->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL,
if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, tdbDefaultMalloc, tdbDefaultFree, NULL,
TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1;
}
if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn) < 0) {
return -1;
}
return 0;
}
int32_t streamStateAbort(SStreamState* pState) {
if (tdbAbort(pState->pTdbState->db, &pState->pTdbState->txn) < 0) {
if (tdbAbort(pState->pTdbState->db, pState->pTdbState->txn) < 0) {
return -1;
}
memset(&pState->pTdbState->txn, 0, sizeof(TXN));
if (tdbTxnOpen(&pState->pTdbState->txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL,
if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn, tdbDefaultMalloc, tdbDefaultFree, NULL,
TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1;
}
if (tdbBegin(pState->pTdbState->db, &pState->pTdbState->txn) < 0) {
return -1;
}
return 0;
}
int32_t streamStateFuncPut(SStreamState* pState, const STupleKey* key, const void* value, int32_t vLen) {
return tdbTbUpsert(pState->pTdbState->pFuncStateDb, key, sizeof(STupleKey), value, vLen, &pState->pTdbState->txn);
return tdbTbUpsert(pState->pTdbState->pFuncStateDb, key, sizeof(STupleKey), value, vLen, pState->pTdbState->txn);
}
int32_t streamStateFuncGet(SStreamState* pState, const STupleKey* key, void** pVal, int32_t* pVLen) {
return tdbTbGet(pState->pTdbState->pFuncStateDb, key, sizeof(STupleKey), pVal, pVLen);
}
int32_t streamStateFuncDel(SStreamState* pState, const STupleKey* key) {
return tdbTbDelete(pState->pTdbState->pFuncStateDb, key, sizeof(STupleKey), &pState->pTdbState->txn);
return tdbTbDelete(pState->pTdbState->pFuncStateDb, key, sizeof(STupleKey), pState->pTdbState->txn);
}
// todo refactor
int32_t streamStatePut(SStreamState* pState, const SWinKey* key, const void* value, int32_t vLen) {
SStateKey sKey = {.key = *key, .opNum = pState->number};
return tdbTbUpsert(pState->pTdbState->pStateDb, &sKey, sizeof(SStateKey), value, vLen, &pState->pTdbState->txn);
return tdbTbUpsert(pState->pTdbState->pStateDb, &sKey, sizeof(SStateKey), value, vLen, pState->pTdbState->txn);
}
// todo refactor
int32_t streamStateFillPut(SStreamState* pState, const SWinKey* key, const void* value, int32_t vLen) {
return tdbTbUpsert(pState->pTdbState->pFillStateDb, key, sizeof(SWinKey), value, vLen, &pState->pTdbState->txn);
return tdbTbUpsert(pState->pTdbState->pFillStateDb, key, sizeof(SWinKey), value, vLen, pState->pTdbState->txn);
}
// todo refactor
......@@ -273,7 +263,7 @@ int32_t streamStateFillGet(SStreamState* pState, const SWinKey* key, void** pVal
// todo refactor
int32_t streamStateDel(SStreamState* pState, const SWinKey* key) {
SStateKey sKey = {.key = *key, .opNum = pState->number};
return tdbTbDelete(pState->pTdbState->pStateDb, &sKey, sizeof(SStateKey), &pState->pTdbState->txn);
return tdbTbDelete(pState->pTdbState->pStateDb, &sKey, sizeof(SStateKey), pState->pTdbState->txn);
}
int32_t streamStateClear(SStreamState* pState) {
......@@ -297,7 +287,7 @@ void streamStateSetNumber(SStreamState* pState, int32_t number) { pState->number
// todo refactor
int32_t streamStateFillDel(SStreamState* pState, const SWinKey* key) {
return tdbTbDelete(pState->pTdbState->pFillStateDb, key, sizeof(SWinKey), &pState->pTdbState->txn);
return tdbTbDelete(pState->pTdbState->pFillStateDb, key, sizeof(SWinKey), pState->pTdbState->txn);
}
int32_t streamStateAddIfNotExist(SStreamState* pState, const SWinKey* key, void** pVal, int32_t* pVLen) {
......@@ -531,7 +521,7 @@ void streamFreeVal(void* val) { tdbFree(val); }
int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen) {
SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
return tdbTbUpsert(pState->pTdbState->pSessionStateDb, &sKey, sizeof(SStateSessionKey), value, vLen,
&pState->pTdbState->txn);
pState->pTdbState->txn);
}
int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVal, int32_t* pVLen) {
......@@ -554,7 +544,7 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa
int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) {
SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
return tdbTbDelete(pState->pTdbState->pSessionStateDb, &sKey, sizeof(SStateSessionKey), &pState->pTdbState->txn);
return tdbTbDelete(pState->pTdbState->pSessionStateDb, &sKey, sizeof(SStateSessionKey), pState->pTdbState->txn);
}
SStreamStateCur* streamStateSessionSeekKeyCurrentPrev(SStreamState* pState, const SSessionKey* key) {
......@@ -833,7 +823,7 @@ _end:
int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char tbname[TSDB_TABLE_NAME_LEN]) {
tdbTbUpsert(pState->pTdbState->pParNameDb, &groupId, sizeof(int64_t), tbname, TSDB_TABLE_NAME_LEN,
&pState->pTdbState->txn);
pState->pTdbState->txn);
return 0;
}
......
......@@ -17,6 +17,7 @@
#define _TD_TDB_H_
#include "os.h"
#include "tdbOs.h"
#ifdef __cplusplus
extern "C" {
......@@ -33,7 +34,8 @@ typedef struct STxn TXN;
// TDB
int32_t tdbOpen(const char *dbname, int szPage, int pages, TDB **ppDb, int8_t rollback);
int32_t tdbClose(TDB *pDb);
int32_t tdbBegin(TDB *pDb, TXN *pTxn);
int32_t tdbBegin(TDB *pDb, TXN **pTxn, void *(*xMalloc)(void *, size_t), void (*xFree)(void *, void *), void *xArg,
int flags);
int32_t tdbCommit(TDB *pDb, TXN *pTxn);
int32_t tdbPostCommit(TDB *pDb, TXN *pTxn);
int32_t tdbPrepareAsyncCommit(TDB *pDb, TXN *pTxn);
......@@ -77,12 +79,16 @@ int32_t tdbTxnClose(TXN *pTxn);
// other
void tdbFree(void *);
typedef struct hashset_st *hashset_t;
struct STxn {
int flags;
int64_t txnId;
void *(*xMalloc)(void *, size_t);
void (*xFree)(void *, void *);
void *xArg;
tdb_fd_t jfd;
hashset_t jPageSet;
};
// error code
......
......@@ -69,7 +69,7 @@ static int tdbBtreeCellSize(const SPage *pPage, SCell *pCell, int dropOfp, TXN *
static int tdbBtcMoveDownward(SBTC *pBtc);
static int tdbBtcMoveUpward(SBTC *pBtc);
int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPgno pgno, tdb_cmpr_fn_t kcmpr,
int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPgno pgno, tdb_cmpr_fn_t kcmpr, TDB *pEnv,
SBTree **ppBt) {
SBTree *pBt;
int ret;
......@@ -106,22 +106,26 @@ int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPg
if (pgno == 0) {
// fetch page & insert into main db
SPage *pPage;
TXN txn;
tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
TXN *txn;
pPager->inTran = 1;
ret = tdbBegin(pEnv, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
if (ret < 0) {
return -1;
}
SBtreeInitPageArg zArg;
zArg.flags = 0x1 | 0x2; // root leaf node;
zArg.pBt = pBt;
ret = tdbPagerFetchPage(pPager, &pgno, &pPage, tdbBtreeInitPage, &zArg, &txn);
ret = tdbPagerFetchPage(pPager, &pgno, &pPage, tdbBtreeInitPage, &zArg, txn);
if (ret < 0) {
tdbAbort(pEnv, txn);
return -1;
}
ret = tdbPagerWrite(pPager, pPage);
if (ret < 0) {
tdbError("failed to write page since %s", terrstr());
tdbAbort(pEnv, txn);
return -1;
}
......@@ -130,18 +134,18 @@ int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPg
pBt->info.nLevel = 1;
pBt->info.nData = 0;
pBt->tbname = (char *)tbname;
// ret = tdbTbInsert(pPager->pEnv->pMainDb, tbname, strlen(tbname) + 1, &pgno, sizeof(SPgno), &txn);
ret = tdbTbInsert(pPager->pEnv->pMainDb, tbname, strlen(tbname) + 1, &pBt->info, sizeof(pBt->info), &txn);
ret = tdbTbInsert(pPager->pEnv->pMainDb, tbname, strlen(tbname) + 1, &pBt->info, sizeof(pBt->info), txn);
if (ret < 0) {
tdbAbort(pEnv, txn);
return -1;
}
}
// tdbUnrefPage(pPage);
tdbPCacheRelease(pPager->pCache, pPage, &txn);
tdbCommit(pPager->pEnv, &txn);
tdbPostCommit(pPager->pEnv, &txn);
tdbTxnClose(&txn);
tdbPCacheRelease(pPager->pCache, pPage, txn);
tdbCommit(pPager->pEnv, txn);
tdbPostCommit(pPager->pEnv, txn);
}
ASSERT(pgno != 0);
......@@ -1534,10 +1538,21 @@ int tdbBtcOpen(SBTC *pBtc, SBTree *pBt, TXN *pTxn) {
memset(&pBtc->coder, 0, sizeof(SCellDecoder));
if (pTxn == NULL) {
pBtc->pTxn = &pBtc->txn;
tdbTxnOpen(pBtc->pTxn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0);
TXN *pTxn = tdbOsCalloc(1, sizeof(*pTxn));
if (!pTxn) {
return -1;
}
if (tdbTxnOpen(pTxn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0) < 0) {
tdbOsFree(pTxn);
return -1;
}
pBtc->pTxn = pTxn;
pBtc->freeTxn = 1;
} else {
pBtc->pTxn = pTxn;
pBtc->freeTxn = 0;
}
return 0;
......@@ -2231,6 +2246,10 @@ int tdbBtcClose(SBTC *pBtc) {
tdbFree(pBtc->coder.pVal);
}
if (pBtc->freeTxn) {
tdbTxnClose(pBtc->pTxn);
}
return 0;
}
......
......@@ -99,19 +99,37 @@ int tdbClose(TDB *pDb) {
int32_t tdbAlter(TDB *pDb, int pages) { return tdbPCacheAlter(pDb->pCache, pages); }
int32_t tdbBegin(TDB *pDb, TXN *pTxn) {
int32_t tdbBegin(TDB *pDb, TXN **ppTxn, void *(*xMalloc)(void *, size_t), void (*xFree)(void *, void *), void *xArg,
int flags) {
SPager *pPager;
int ret;
int64_t txnId = ++pDb->txnId;
if (txnId == INT64_MAX) {
pDb->txnId = 0;
}
TXN *pTxn = tdbOsCalloc(1, sizeof(*pTxn));
if (!pTxn) {
return -1;
}
if (tdbTxnOpen(pTxn, txnId, xMalloc, xFree, xArg, flags) < 0) {
tdbOsFree(pTxn);
return -1;
}
for (pPager = pDb->pgrList; pPager; pPager = pPager->pNext) {
ret = tdbPagerBegin(pPager, pTxn);
if (ret < 0) {
tdbError("failed to begin pager since %s. dbName:%s, txnId:%" PRId64, tstrerror(terrno), pDb->dbName,
pTxn->txnId);
tdbTxnClose(pTxn);
return -1;
}
}
*ppTxn = pTxn;
return 0;
}
......@@ -144,6 +162,8 @@ int32_t tdbPostCommit(TDB *pDb, TXN *pTxn) {
}
}
tdbTxnClose(pTxn);
return 0;
}
......@@ -176,6 +196,8 @@ int32_t tdbAbort(TDB *pDb, TXN *pTxn) {
}
}
tdbTxnClose(pTxn);
return 0;
}
......
......@@ -69,14 +69,15 @@ int tdbPageCreate(int pageSize, SPage **ppPage, void *(*xMalloc)(void *, size_t)
*ppPage = pPage;
tdbDebug("page/create: %p %p", pPage, xMalloc);
tdbTrace("page/create: %p/%d %p", pPage, pPage->id, xMalloc);
return 0;
}
int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg) {
u8 *ptr;
tdbDebug("page/destroy: %p %p", pPage, xFree);
tdbTrace("page/destroy: %p/%d %p", pPage, pPage->id, xFree);
ASSERT(!pPage->isDirty);
ASSERT(xFree);
for (int iOvfl = 0; iOvfl < pPage->nOverflow; iOvfl++) {
......
......@@ -142,7 +142,7 @@ int hashset_contains(hashset_t set, void *item) {
static int tdbPagerInitPage(SPager *pPager, SPage *pPage, int (*initPage)(SPage *, void *, int), void *arg,
u8 loadPage);
static int tdbPagerWritePageToJournal(SPager *pPager, SPage *pPage);
static int tdbPagerWritePageToDB(SPager *pPager, SPage *pPage);
static int tdbPagerPWritePageToDB(SPager *pPager, SPage *pPage);
static FORCE_INLINE int32_t pageCmpFn(const SRBTreeNode *lhs, const SRBTreeNode *rhs) {
SPage *pPageL = (SPage *)(((uint8_t *)lhs) - offsetof(SPage, node));
......@@ -219,80 +219,22 @@ int tdbPagerOpen(SPCache *pCache, const char *fileName, SPager **ppPager) {
int tdbPagerClose(SPager *pPager) {
if (pPager) {
/*
if (pPager->inTran) {
tdbOsClose(pPager->jfd);
}
*/
tdbOsClose(pPager->fd);
tdbOsFree(pPager);
}
return 0;
}
/*
int tdbPagerOpenDB(SPager *pPager, SPgno *ppgno, bool toCreate, SBTree *pBt) {
SPgno pgno;
SPage *pPage;
int ret;
if (pPager->dbOrigSize > 0) {
pgno = 1;
} else {
pgno = 0;
}
{
// TODO: try to search the main DB to get the page number
// pgno = 0;
}
if (pgno == 0 && toCreate) {
// allocate a new child page
TXN txn;
tdbTxnOpen(&txn, 0, tdbDefaultMalloc, tdbDefaultFree, NULL, 0);
pPager->inTran = 1;
SBtreeInitPageArg zArg;
zArg.flags = 0x1 | 0x2; // root leaf node;
zArg.pBt = pBt;
ret = tdbPagerFetchPage(pPager, &pgno, &pPage, tdbBtreeInitPage, &zArg, &txn);
if (ret < 0) {
return -1;
}
// ret = tdbPagerAllocPage(pPager, &pPage, &pgno);
// if (ret < 0) {
// return -1;
//}
// TODO: Need to zero the page
ret = tdbPagerWrite(pPager, pPage);
if (ret < 0) {
tdbError("failed to write page since %s", terrstr());
return -1;
}
tdbTxnClose(&txn);
}
*ppgno = pgno;
return 0;
}
*/
int tdbPagerWrite(SPager *pPager, SPage *pPage) {
int ret;
SPage **ppPage;
ASSERT(pPager->inTran);
#if 0
if (pPager->inTran == 0) {
ret = tdbPagerBegin(pPager);
if (ret < 0) {
return -1;
}
}
#endif
// ASSERT(pPager->inTran);
if (pPage->isDirty) return 0;
// ref page one more time so the page will not be release
......@@ -322,15 +264,16 @@ int tdbPagerWrite(SPager *pPager, SPage *pPage) {
// Write page to journal if neccessary
if (TDB_PAGE_PGNO(pPage) <= pPager->dbOrigSize &&
(pPager->jPageSet == NULL || !hashset_contains(pPager->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))))) {
(pPager->pActiveTxn->jPageSet == NULL ||
!hashset_contains(pPager->pActiveTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))))) {
ret = tdbPagerWritePageToJournal(pPager, pPage);
if (ret < 0) {
tdbError("failed to write page to journal since %s", tstrerror(terrno));
return -1;
}
if (pPager->jPageSet) {
hashset_add(pPager->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
if (pPager->pActiveTxn->jPageSet) {
hashset_add(pPager->pActiveTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
}
}
......@@ -338,23 +281,28 @@ int tdbPagerWrite(SPager *pPager, SPage *pPage) {
}
int tdbPagerBegin(SPager *pPager, TXN *pTxn) {
/*
if (pPager->inTran) {
return 0;
}
*/
// Open the journal
pPager->jfd = tdbOsOpen(pPager->jFileName, TDB_O_CREAT | TDB_O_RDWR, 0755);
if (TDB_FD_INVALID(pPager->jfd)) {
char jTxnFileName[TDB_FILENAME_LEN];
sprintf(jTxnFileName, "%s.%" PRId64, pPager->jFileName, pTxn->txnId);
pTxn->jfd = tdbOsOpen(jTxnFileName, TDB_O_CREAT | TDB_O_RDWR, 0755);
if (TDB_FD_INVALID(pTxn->jfd)) {
tdbError("failed to open file due to %s. jFileName:%s", strerror(errno), pPager->jFileName);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
pPager->jPageSet = hashset_create();
// TODO: write the size of the file
pTxn->jPageSet = hashset_create();
pPager->pActiveTxn = pTxn;
// TODO: write the size of the file
/*
pPager->inTran = 1;
*/
return 0;
}
......@@ -363,9 +311,9 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) {
int ret;
// sync the journal file
ret = tdbOsFSync(pPager->jfd);
ret = tdbOsFSync(pTxn->jfd);
if (ret < 0) {
tdbError("failed to fsync jfd due to %s. jFileName:%s", strerror(errno), pPager->jFileName);
tdbError("failed to fsync: %s. jFileName:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
......@@ -377,7 +325,7 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) {
pPage = (SPage *)pNode;
ASSERT(pPage->nOverflow == 0);
ret = tdbPagerWritePageToDB(pPager, pPage);
ret = tdbPagerPWritePageToDB(pPager, pPage);
if (ret < 0) {
tdbError("failed to write page to db since %s", tstrerror(terrno));
return -1;
......@@ -395,8 +343,8 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) {
pPage->isDirty = 0;
tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage);
if (pPager->jPageSet) {
hashset_remove(pPager->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
if (pTxn->jPageSet) {
hashset_remove(pTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
}
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
}
......@@ -415,35 +363,39 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) {
}
int tdbPagerPostCommit(SPager *pPager, TXN *pTxn) {
char jTxnFileName[TDB_FILENAME_LEN];
sprintf(jTxnFileName, "%s.%" PRId64, pPager->jFileName, pTxn->txnId);
// remove the journal file
if (tdbOsClose(pPager->jfd) < 0) {
tdbError("failed to close jfd due to %s. file:%s", strerror(errno), pPager->jFileName);
if (tdbOsClose(pTxn->jfd) < 0) {
tdbError("failed to close jfd: %s. file:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
if (tdbOsRemove(pPager->jFileName) < 0 && errno != ENOENT) {
tdbError("failed to remove file due to %s. file:%s", strerror(errno), pPager->jFileName);
if (tdbOsRemove(jTxnFileName) < 0 && errno != ENOENT) {
tdbError("failed to remove file due to %s. file:%s", strerror(errno), jTxnFileName);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
if (pPager->jPageSet) {
hashset_destroy(pPager->jPageSet);
if (pTxn->jPageSet) {
hashset_destroy(pTxn->jPageSet);
}
pPager->inTran = 0;
// pPager->inTran = 0;
return 0;
}
int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) {
SPage *pPage;
SPgno maxPgno = pPager->dbOrigSize;
int ret;
// sync the journal file
ret = tdbOsFSync(pPager->jfd);
ret = tdbOsFSync(pTxn->jfd);
if (ret < 0) {
tdbError("failed to fsync jfd due to %s. jFileName:%s", strerror(errno), pPager->jFileName);
tdbError("failed to fsync jfd: %s. jfile:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
......@@ -454,7 +406,12 @@ int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) {
while ((pNode = tRBTreeIterNext(&iter)) != NULL) {
pPage = (SPage *)pNode;
if (pPage->isLocal) continue;
ret = tdbPagerWritePageToDB(pPager, pPage);
SPgno pgno = TDB_PAGE_PGNO(pPage);
if (pgno > maxPgno) {
maxPgno = pgno;
}
ret = tdbPagerPWritePageToDB(pPager, pPage);
if (ret < 0) {
tdbError("failed to write page to db since %s", tstrerror(terrno));
return -1;
......@@ -462,7 +419,8 @@ int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) {
}
tdbTrace("tdbttl commit:%p, %d/%d", pPager, pPager->dbOrigSize, pPager->dbFileSize);
pPager->dbOrigSize = pPager->dbFileSize;
pPager->dbOrigSize = maxPgno;
// pPager->dbOrigSize = pPager->dbFileSize;
// release the page
iter = tRBTreeIterCreate(&pPager->rbt, 1);
......@@ -474,6 +432,7 @@ int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) {
tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage);
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
}
/*
tdbTrace("reset dirty tree: %p", &pPager->rbt);
tRBTreeCreate(&pPager->rbt, pageCmpFn);
......@@ -495,15 +454,15 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) {
SPgno journalSize = 0;
int ret;
// 0, sync the journal file
ret = tdbOsFSync(pPager->jfd);
// sync the journal file
ret = tdbOsFSync(pTxn->jfd);
if (ret < 0) {
tdbError("failed to fsync jfd due to %s. file:%s", strerror(errno), pPager->jFileName);
tdbError("failed to fsync jfd: %s. jfile:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
tdb_fd_t jfd = pPager->jfd;
tdb_fd_t jfd = pTxn->jfd;
ret = tdbGetFileSize(jfd, pPager->pageSize, &journalSize);
if (ret < 0) {
......@@ -567,7 +526,7 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) {
pPage->isDirty = 0;
tRBTreeDrop(&pPager->rbt, (SRBTreeNode *)pPage);
hashset_remove(pPager->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
hashset_remove(pTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage)));
tdbPCacheRelease(pPager->pCache, pPage, pTxn);
}
......@@ -575,11 +534,24 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) {
tRBTreeCreate(&pPager->rbt, pageCmpFn);
// 4, remove the journal file
tdbOsClose(pPager->jfd);
(void)tdbOsRemove(pPager->jFileName);
hashset_destroy(pPager->jPageSet);
if (tdbOsClose(pTxn->jfd) < 0) {
tdbError("failed to close jfd: %s. file:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
char jTxnFileName[TDB_FILENAME_LEN];
sprintf(jTxnFileName, "%s.%" PRId64, pPager->jFileName, pTxn->txnId);
if (tdbOsRemove(jTxnFileName) < 0 && errno != ENOENT) {
tdbError("failed to remove file due to %s. file:%s", strerror(errno), jTxnFileName);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
pPager->inTran = 0;
hashset_destroy(pTxn->jPageSet);
// pPager->inTran = 0;
return 0;
}
......@@ -604,7 +576,7 @@ int tdbPagerFlushPage(SPager *pPager, TXN *pTxn) {
if (pgno > maxPgno) {
maxPgno = pgno;
}
ret = tdbPagerWritePageToDB(pPager, pPage);
ret = tdbPagerPWritePageToDB(pPager, pPage);
if (ret < 0) {
tdbError("failed to write page to db since %s", tstrerror(terrno));
return -1;
......@@ -802,17 +774,18 @@ static int tdbPagerWritePageToJournal(SPager *pPager, SPage *pPage) {
pgno = TDB_PAGE_PGNO(pPage);
ret = tdbOsWrite(pPager->jfd, &pgno, sizeof(pgno));
ret = tdbOsWrite(pPager->pActiveTxn->jfd, &pgno, sizeof(pgno));
if (ret < 0) {
tdbError("failed to write pgno due to %s. file:%s, pgno:%u", strerror(errno), pPager->jFileName, pgno);
tdbError("failed to write pgno due to %s. file:%s, pgno:%u, txnId:%" PRId64, strerror(errno), pPager->jFileName,
pgno, pPager->pActiveTxn->txnId);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
ret = tdbOsWrite(pPager->jfd, pPage->pData, pPage->pageSize);
ret = tdbOsWrite(pPager->pActiveTxn->jfd, pPage->pData, pPage->pageSize);
if (ret < 0) {
tdbError("failed to write page data due to %s. file:%s, pageSize:%ld", strerror(errno), pPager->jFileName,
(long)pPage->pageSize);
tdbError("failed to write page data due to %s. file:%s, pageSize:%d, txnId:%" PRId64, strerror(errno),
pPager->jFileName, pPage->pageSize, pPager->pActiveTxn->txnId);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
......@@ -820,13 +793,6 @@ static int tdbPagerWritePageToJournal(SPager *pPager, SPage *pPage) {
return 0;
}
/*
struct TdFile {
TdThreadRwlock rwlock;
int refId;
int fd;
FILE *fp;
} TdFile;
*/
static int tdbPagerWritePageToDB(SPager *pPager, SPage *pPage) {
i64 offset;
int ret;
......@@ -846,16 +812,32 @@ static int tdbPagerWritePageToDB(SPager *pPager, SPage *pPage) {
return -1;
}
// pwrite(pPager->fd->fd, pPage->pData, pPage->pageSize, offset);
return 0;
}
*/
static int tdbPagerPWritePageToDB(SPager *pPager, SPage *pPage) {
i64 offset;
int ret;
offset = (i64)pPage->pageSize * (TDB_PAGE_PGNO(pPage) - 1);
ret = tdbOsPWrite(pPager->fd, pPage->pData, pPage->pageSize, offset);
if (ret < 0) {
tdbError("failed to pwrite page data due to %s. file:%s, pageSize:%d", strerror(errno), pPager->dbFileName,
pPage->pageSize);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
return 0;
}
int tdbPagerRestore(SPager *pPager, SBTree *pBt) {
static int tdbPagerRestore(SPager *pPager, SBTree *pBt, const char *jFileName) {
int ret = 0;
SPgno journalSize = 0;
u8 *pageBuf = NULL;
tdb_fd_t jfd = tdbOsOpen(pPager->jFileName, TDB_O_RDWR, 0755);
tdb_fd_t jfd = tdbOsOpen(jFileName, TDB_O_RDWR, 0755);
if (jfd == NULL) {
return 0;
}
......@@ -928,12 +910,50 @@ int tdbPagerRestore(SPager *pPager, SBTree *pBt) {
return 0;
}
int tdbPagerRestoreJournals(SPager *pPager, SBTree *pBt) {
tdbDirEntryPtr pDirEntry;
tdbDirPtr pDir = taosOpenDir(pPager->pEnv->dbName);
if (pDir == NULL) {
tdbError("failed to open %s since %s", pPager->pEnv->dbName, strerror(errno));
return -1;
}
while ((pDirEntry = tdbReadDir(pDir)) != NULL) {
char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry));
if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) {
if (tdbPagerRestore(pPager, pBt, name) < 0) {
tdbError("failed to restore file due to %s. jFileName:%s", strerror(errno), name);
return -1;
}
}
}
tdbCloseDir(&pDir);
return 0;
}
int tdbPagerRollback(SPager *pPager) {
if (tdbOsRemove(pPager->jFileName) < 0 && errno != ENOENT) {
tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), pPager->jFileName);
tdbDirEntryPtr pDirEntry;
tdbDirPtr pDir = taosOpenDir(pPager->pEnv->dbName);
if (pDir == NULL) {
tdbError("failed to open %s since %s", pPager->pEnv->dbName, strerror(errno));
return -1;
}
while ((pDirEntry = tdbReadDir(pDir)) != NULL) {
char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry));
if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) {
if (tdbOsRemove(name) < 0 && errno != ENOENT) {
tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), name);
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
}
}
tdbCloseDir(&pDir);
return 0;
}
......@@ -108,7 +108,7 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF
ASSERT(pPager != NULL);
// pTb->pBt
ret = tdbBtreeOpen(keyLen, valLen, pPager, tbname, pgno, keyCmprFn, &(pTb->pBt));
ret = tdbBtreeOpen(keyLen, valLen, pPager, tbname, pgno, keyCmprFn, pEnv, &(pTb->pBt));
if (ret < 0) {
tdbOsFree(pTb);
return -1;
......@@ -117,7 +117,7 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF
if (rollback) {
tdbPagerRollback(pPager);
} else {
ret = tdbPagerRestore(pPager, pTb->pBt);
ret = tdbPagerRestoreJournals(pPager, pTb->pBt);
if (ret < 0) {
tdbOsFree(pTb);
return -1;
......
......@@ -28,4 +28,10 @@ int tdbTxnOpen(TXN *pTxn, int64_t txnid, void *(*xMalloc)(void *, size_t), void
return 0;
}
int tdbTxnClose(TXN *pTxn) { return 0; }
\ No newline at end of file
int tdbTxnClose(TXN *pTxn) {
if (pTxn) {
tdbOsFree(pTxn);
}
return 0;
}
......@@ -147,11 +147,11 @@ struct SBTC {
SPage *pgStack[BTREE_MAX_DEPTH + 1];
SCellDecoder coder;
TXN *pTxn;
TXN txn;
i8 freeTxn;
};
// SBTree
int tdbBtreeOpen(int keyLen, int valLen, SPager *pFile, char const *tbname, SPgno pgno, tdb_cmpr_fn_t kcmpr,
int tdbBtreeOpen(int keyLen, int valLen, SPager *pFile, char const *tbname, SPgno pgno, tdb_cmpr_fn_t kcmpr, TDB *pEnv,
SBTree **ppBt);
int tdbBtreeClose(SBTree *pBt);
int tdbBtreeInsert(SBTree *pBt, const void *pKey, int kLen, const void *pVal, int vLen, TXN *pTxn);
......@@ -197,7 +197,7 @@ int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initP
TXN *pTxn);
void tdbPagerReturnPage(SPager *pPager, SPage *pPage, TXN *pTxn);
int tdbPagerAllocPage(SPager *pPager, SPgno *ppgno);
int tdbPagerRestore(SPager *pPager, SBTree *pBt);
int tdbPagerRestoreJournals(SPager *pPager, SBTree *pBt);
int tdbPagerRollback(SPager *pPager);
// tdbPCache.c ====================================
......@@ -382,24 +382,22 @@ struct STDB {
#ifdef USE_MAINDB
TTB *pMainDb;
#endif
int64_t txnId;
};
typedef struct hashset_st *hashset_t;
struct SPager {
char *dbFileName;
char *jFileName;
int pageSize;
uint8_t fid[TDB_FILE_ID_LEN];
tdb_fd_t fd;
tdb_fd_t jfd;
SPCache *pCache;
SPgno dbFileSize;
SPgno dbOrigSize;
//SPage *pDirty;
hashset_t jPageSet;
// SPage *pDirty;
SRBTree rbt;
u8 inTran;
// u8 inTran;
TXN *pActiveTxn;
SPager *pNext; // used by TDB
SPager *pHashNext; // used by TDB
#ifdef USE_MAINDB
......
......@@ -47,13 +47,19 @@ typedef TdFilePtr tdb_fd_t;
#define TDB_O_RDWR (TD_FILE_WRITE) | (TD_FILE_READ)
#define tdbOsOpen(PATH, OPTION, MODE) taosOpenFile((PATH), (OPTION))
#define tdbOsClose(FD) taosCloseFile(&(FD))
#define tdbOsRead taosReadFile
#define tdbOsPRead taosPReadFile
#define tdbOsWrite taosWriteFile
#define tdbOsPWrite taosPWriteFile
#define tdbOsFSync taosFsyncFile
#define tdbOsLSeek taosLSeekFile
#define tdbDirPtr TdDirPtr
#define tdbDirEntryPtr TdDirEntryPtr
#define tdbReadDir taosReadDir
#define tdbGetDirEntryName taosGetDirEntryName
#define tdbDirEntryBaseName taosDirEntryBaseName
#define tdbCloseDir taosCloseDir
#define tdbOsRemove remove
#define tdbOsFileSize(FD, PSIZE) taosFStatFile(FD, PSIZE, NULL)
......
......@@ -170,11 +170,9 @@ static void insertOfp(void) {
SPoolMem *pPool = openPool();
// start a transaction
TXN txn;
int64_t txnid = 0;
++txnid;
tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
tdbBegin(pEnv, &txn);
TXN *txn = NULL;
tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
// generate value payload
// char val[((4083 - 4 - 3 - 2) + 1) * 100]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4)
......@@ -185,12 +183,12 @@ static void insertOfp(void) {
// insert the generated big data
// char const *key = "key1";
char const *key = "key123456789";
ret = tdbTbInsert(pDb, key, strlen(key), val, valLen, &txn);
ret = tdbTbInsert(pDb, key, strlen(key), val, valLen, txn);
GTEST_ASSERT_EQ(ret, 0);
// commit current transaction
tdbCommit(pEnv, &txn);
tdbTxnClose(&txn);
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
}
// TEST(TdbOVFLPagesTest, DISABLED_TbInsertTest) {
......@@ -258,11 +256,9 @@ TEST(TdbOVFLPagesTest, TbDeleteTest) {
SPoolMem *pPool = openPool();
// start a transaction
TXN txn;
int64_t txnid = 0;
++txnid;
tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
tdbBegin(pEnv, &txn);
TXN *txn;
tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
// generate value payload
// char val[((4083 - 4 - 3 - 2) + 1) * 100]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4)
......@@ -271,7 +267,7 @@ TEST(TdbOVFLPagesTest, TbDeleteTest) {
generateBigVal(val, valLen);
{ // insert the generated big data
ret = tdbTbInsert(pDb, "key1", strlen("key1"), val, valLen, &txn);
ret = tdbTbInsert(pDb, "key1", strlen("key1"), val, valLen, txn);
GTEST_ASSERT_EQ(ret, 0);
}
......@@ -297,7 +293,7 @@ tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_REA
tdbBegin(pEnv, &txn);
*/
{ // upsert the data
ret = tdbTbUpsert(pDb, "key1", strlen("key1"), "value1", strlen("value1"), &txn);
ret = tdbTbUpsert(pDb, "key1", strlen("key1"), "value1", strlen("value1"), txn);
GTEST_ASSERT_EQ(ret, 0);
}
......@@ -316,7 +312,7 @@ tdbBegin(pEnv, &txn);
}
{ // delete the data
ret = tdbTbDelete(pDb, "key1", strlen("key1"), &txn);
ret = tdbTbDelete(pDb, "key1", strlen("key1"), txn);
GTEST_ASSERT_EQ(ret, 0);
}
......@@ -335,8 +331,8 @@ tdbBegin(pEnv, &txn);
}
// commit current transaction
tdbCommit(pEnv, &txn);
tdbTxnClose(&txn);
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
}
// TEST(tdb_test, DISABLED_simple_insert1) {
......@@ -346,7 +342,7 @@ TEST(tdb_test, simple_insert1) {
TTB *pDb;
tdb_cmpr_fn_t compFunc;
int nData = 1;
TXN txn;
TXN *txn;
int const pageSize = 4096;
taosRemoveDir("tdb");
......@@ -365,16 +361,13 @@ TEST(tdb_test, simple_insert1) {
// char val[(4083 - 4 - 3 - 2)]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4)
char val[(4083 - 4 - 3 - 2) + 1]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4)
int64_t poolLimit = 4096; // 1M pool limit
int64_t txnid = 0;
SPoolMem *pPool;
// open the pool
pPool = openPool();
// start a transaction
txnid++;
tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
tdbBegin(pEnv, &txn);
tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
for (int iData = 1; iData <= nData; iData++) {
sprintf(key, "key0");
......@@ -393,26 +386,25 @@ TEST(tdb_test, simple_insert1) {
val[i] = c;
}
ret = tdbTbInsert(pDb, "key1", strlen("key1"), val, valLen, &txn);
ret = tdbTbInsert(pDb, "key1", strlen("key1"), val, valLen, txn);
GTEST_ASSERT_EQ(ret, 0);
// if pool is full, commit the transaction and start a new one
if (pPool->size >= poolLimit) {
// commit current transaction
tdbCommit(pEnv, &txn);
tdbTxnClose(&txn);
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
// start a new transaction
clearPool(pPool);
txnid++;
tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
tdbBegin(pEnv, &txn);
tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
}
}
// commit the transaction
tdbCommit(pEnv, &txn);
tdbTxnClose(&txn);
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
{ // Query the data
void *pVal = NULL;
......
......@@ -125,7 +125,7 @@ TEST(tdb_test, DISABLED_simple_insert1) {
TTB *pDb;
tdb_cmpr_fn_t compFunc;
int nData = 1000000;
TXN txn;
TXN *txn;
taosRemoveDir("tdb");
......@@ -142,40 +142,35 @@ TEST(tdb_test, DISABLED_simple_insert1) {
char key[64];
char val[64];
int64_t poolLimit = 4096; // 1M pool limit
int64_t txnid = 0;
SPoolMem *pPool;
// open the pool
pPool = openPool();
// start a transaction
txnid++;
tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
tdbBegin(pEnv, &txn);
tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
for (int iData = 1; iData <= nData; iData++) {
sprintf(key, "key%d", iData);
sprintf(val, "value%d", iData);
ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), &txn);
ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), txn);
GTEST_ASSERT_EQ(ret, 0);
// if pool is full, commit the transaction and start a new one
if (pPool->size >= poolLimit) {
// commit current transaction
tdbCommit(pEnv, &txn);
tdbTxnClose(&txn);
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
// start a new transaction
clearPool(pPool);
txnid++;
tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
tdbBegin(pEnv, &txn);
tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
}
}
// commit the transaction
tdbCommit(pEnv, &txn);
tdbTxnClose(&txn);
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
{ // Query the data
void *pVal = NULL;
......@@ -245,7 +240,7 @@ TEST(tdb_test, DISABLED_simple_insert2) {
TTB *pDb;
tdb_cmpr_fn_t compFunc;
int nData = 1000000;
TXN txn;
TXN *txn;
taosRemoveDir("tdb");
......@@ -261,21 +256,18 @@ TEST(tdb_test, DISABLED_simple_insert2) {
{
char key[64];
char val[64];
int64_t txnid = 0;
SPoolMem *pPool;
// open the pool
pPool = openPool();
// start a transaction
txnid++;
tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
tdbBegin(pEnv, &txn);
tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
for (int iData = 1; iData <= nData; iData++) {
sprintf(key, "key%d", iData);
sprintf(val, "value%d", iData);
ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), &txn);
ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), txn);
GTEST_ASSERT_EQ(ret, 0);
}
......@@ -312,8 +304,8 @@ TEST(tdb_test, DISABLED_simple_insert2) {
}
// commit the transaction
tdbCommit(pEnv, &txn);
tdbTxnClose(&txn);
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
ret = tdbTbDrop(pDb);
GTEST_ASSERT_EQ(ret, 0);
......@@ -331,7 +323,7 @@ TEST(tdb_test, DISABLED_simple_delete1) {
TTB *pDb;
char key[128];
char data[128];
TXN txn;
TXN *txn;
TDB *pEnv;
SPoolMem *pPool;
void *pKey = NULL;
......@@ -353,14 +345,13 @@ TEST(tdb_test, DISABLED_simple_delete1) {
ret = tdbTbOpen("db.db", -1, -1, tKeyCmpr, pEnv, &pDb, 0);
GTEST_ASSERT_EQ(ret, 0);
tdbTxnOpen(&txn, 0, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
tdbBegin(pEnv, &txn);
tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
// loop to insert batch data
for (int iData = 0; iData < nKV; iData++) {
sprintf(key, "key%d", iData);
sprintf(data, "data%d", iData);
ret = tdbTbInsert(pDb, key, strlen(key), data, strlen(data), &txn);
ret = tdbTbInsert(pDb, key, strlen(key), data, strlen(data), txn);
GTEST_ASSERT_EQ(ret, 0);
}
......@@ -378,7 +369,7 @@ TEST(tdb_test, DISABLED_simple_delete1) {
for (int iData = nKV - 1; iData > 30; iData--) {
sprintf(key, "key%d", iData);
ret = tdbTbDelete(pDb, key, strlen(key), &txn);
ret = tdbTbDelete(pDb, key, strlen(key), txn);
GTEST_ASSERT_EQ(ret, 0);
}
......@@ -413,7 +404,8 @@ TEST(tdb_test, DISABLED_simple_delete1) {
tdbTbcClose(pDbc);
tdbCommit(pEnv, &txn);
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
closePool(pPool);
......@@ -430,7 +422,7 @@ TEST(tdb_test, DISABLED_simple_upsert1) {
char data[64];
void *pData = NULL;
SPoolMem *pPool;
TXN txn;
TXN *txn;
taosRemoveDir("tdb");
......@@ -444,13 +436,12 @@ TEST(tdb_test, DISABLED_simple_upsert1) {
pPool = openPool();
// insert some data
tdbTxnOpen(&txn, 0, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
tdbBegin(pEnv, &txn);
tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
for (int iData = 0; iData < nData; iData++) {
sprintf(key, "key%d", iData);
sprintf(data, "data%d", iData);
ret = tdbTbInsert(pDb, key, strlen(key), data, strlen(data), &txn);
ret = tdbTbInsert(pDb, key, strlen(key), data, strlen(data), txn);
GTEST_ASSERT_EQ(ret, 0);
}
......@@ -467,11 +458,12 @@ TEST(tdb_test, DISABLED_simple_upsert1) {
for (int iData = 0; iData < nData; iData++) {
sprintf(key, "key%d", iData);
sprintf(data, "data%d-u", iData);
ret = tdbTbUpsert(pDb, key, strlen(key), data, strlen(data), &txn);
ret = tdbTbUpsert(pDb, key, strlen(key), data, strlen(data), txn);
GTEST_ASSERT_EQ(ret, 0);
}
tdbCommit(pEnv, &txn);
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
// query the data
for (int iData = 0; iData < nData; iData++) {
......@@ -492,7 +484,7 @@ TEST(tdb_test, multi_thread_query) {
TTB *pDb;
tdb_cmpr_fn_t compFunc;
int nData = 1000000;
TXN txn;
TXN *txn;
taosRemoveDir("tdb");
......@@ -508,26 +500,18 @@ TEST(tdb_test, multi_thread_query) {
char key[64];
char val[64];
int64_t poolLimit = 4096 * 20; // 1M pool limit
int64_t txnid = 0;
SPoolMem *pPool;
// open the pool
pPool = openPool();
// start a transaction
txnid++;
txn.flags = TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED;
txn.txnId = -1;
txn.xMalloc = poolMalloc;
txn.xFree = poolFree;
txn.xArg = pPool;
// tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, );
tdbBegin(pEnv, &txn);
tdbBegin(pEnv, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
for (int iData = 1; iData <= nData; iData++) {
sprintf(key, "key%d", iData);
sprintf(val, "value%d", iData);
ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), &txn);
ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), txn);
GTEST_ASSERT_EQ(ret, 0);
}
......@@ -578,7 +562,7 @@ TEST(tdb_test, multi_thread_query) {
std::vector<std::thread> threads;
for (int i = 0; i < nThreads; i++) {
if (i == 0) {
threads.push_back(std::thread(tdbCommit, pEnv, &txn));
threads.push_back(std::thread(tdbCommit, pEnv, txn));
} else {
threads.push_back(std::thread(f, pDb, nData));
}
......@@ -589,8 +573,8 @@ TEST(tdb_test, multi_thread_query) {
}
// commit the transaction
tdbCommit(pEnv, &txn);
tdbTxnClose(&txn);
tdbCommit(pEnv, txn);
tdbPostCommit(pEnv, txn);
// Close a database
tdbTbClose(pDb);
......@@ -621,17 +605,12 @@ TEST(tdb_test, DISABLED_multi_thread1) {
GTEST_ASSERT_EQ(ret, 0);
auto insert = [](TDB *pDb, TTB *pTb, int nData, int *stop, std::shared_timed_mutex *mu) {
TXN txn = {0};
TXN *txn = NULL;
char key[128];
char val[128];
SPoolMem *pPool = openPool();
txn.flags = TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED;
txn.txnId = -1;
txn.xMalloc = poolMalloc;
txn.xFree = poolFree;
txn.xArg = pPool;
tdbBegin(pDb, &txn);
tdbBegin(pDb, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
for (int iData = 1; iData <= nData; iData++) {
sprintf(key, "key%d", iData);
sprintf(val, "value%d", iData);
......@@ -644,14 +623,17 @@ TEST(tdb_test, DISABLED_multi_thread1) {
}
if (pPool->size > 1024 * 1024) {
tdbCommit(pDb, &txn);
tdbCommit(pDb, txn);
tdbPostCommit(pDb, txn);
clearPool(pPool);
tdbBegin(pDb, &txn);
tdbBegin(pDb, &txn, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED);
}
}
tdbCommit(pDb, &txn);
tdbCommit(pDb, txn);
tdbPostCommit(pDb, txn);
closePool(pPool);
*stop = 1;
......
......@@ -491,6 +491,28 @@ int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) {
return count;
}
int64_t taosPWriteFile(TdFilePtr pFile, const void *buf, int64_t count, int64_t offset) {
if (pFile == NULL) {
return 0;
}
#if FILE_WITH_LOCK
taosThreadRwlockWrlock(&(pFile->rwlock));
#endif
assert(pFile->fd >= 0); // Please check if you have closed the file.
#ifdef WINDOWS
size_t pos = _lseeki64(pFile->fd, 0, SEEK_CUR);
_lseeki64(pFile->fd, offset, SEEK_SET);
int64_t ret = _write(pFile->fd, buf, count);
_lseeki64(pFile->fd, pos, SEEK_SET);
#else
int64_t ret = pwrite(pFile->fd, buf, count, offset);
#endif
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return ret;
}
int64_t taosLSeekFile(TdFilePtr pFile, int64_t offset, int32_t whence) {
#if FILE_WITH_LOCK
taosThreadRwlockRdlock(&(pFile->rwlock));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册