From 76865258cf4b82c8ffcc04293b5aa3940ed55d5a Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 30 Sep 2022 15:49:23 +0800 Subject: [PATCH] enh: protect WAL meta by flushing to a tmpfile at first and then renaming --- source/libs/tdb/src/db/tdbDb.c | 4 +-- source/libs/wal/src/walMeta.c | 54 +++++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/source/libs/tdb/src/db/tdbDb.c b/source/libs/tdb/src/db/tdbDb.c index a7fc50dc86..6c01348bc2 100644 --- a/source/libs/tdb/src/db/tdbDb.c +++ b/source/libs/tdb/src/db/tdbDb.c @@ -121,7 +121,7 @@ int32_t tdbCommit(TDB *pDb, TXN *pTxn) { for (pPager = pDb->pgrList; pPager; pPager = pPager->pNext) { ret = tdbPagerCommit(pPager, pTxn); if (ret < 0) { - tdbError("failed to commit pager. dbName:%s, txnId:%d", pDb->dbName, pTxn->txnId); + tdbError("failed to commit pager since %s. dbName:%s, txnId:%d", tstrerror(terrno), pDb->dbName, pTxn->txnId); return -1; } } @@ -136,7 +136,7 @@ int32_t tdbAbort(TDB *pDb, TXN *pTxn) { for (pPager = pDb->pgrList; pPager; pPager = pPager->pNext) { ret = tdbPagerAbort(pPager, pTxn); if (ret < 0) { - tdbError("failed to abort pager. dbName:%s, txnId:%d", pDb->dbName, pTxn->txnId); + tdbError("failed to abort pager since %s. dbName:%s, txnId:%d", tstrerror(terrno), pDb->dbName, pTxn->txnId); return -1; } } diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index de9c115067..597fd0000b 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -35,8 +35,12 @@ int64_t FORCE_INLINE walGetCommittedVer(SWal* pWal) { return pWal->vers.commitVe int64_t FORCE_INLINE walGetAppliedVer(SWal* pWal) { return pWal->vers.appliedVer; } -static FORCE_INLINE void walBuildMetaName(SWal* pWal, int metaVer, char* buf) { - sprintf(buf, "%s/meta-ver%d", pWal->path, metaVer); +static FORCE_INLINE int walBuildMetaName(SWal* pWal, int metaVer, char* buf) { + return sprintf(buf, "%s/meta-ver%d", pWal->path, metaVer); +} + +static FORCE_INLINE int walBuildTmpMetaName(SWal* pWal, char* buf) { + return sprintf(buf, "%s/meta-ver.tmp", pWal->path); } static FORCE_INLINE int64_t walScanLogGetLastVer(SWal* pWal) { @@ -578,22 +582,51 @@ static int walFindCurMetaVer(SWal* pWal) { int walSaveMeta(SWal* pWal) { int metaVer = walFindCurMetaVer(pWal); char fnameStr[WAL_FILE_LEN]; - walBuildMetaName(pWal, metaVer + 1, fnameStr); - TdFilePtr pMetaFile = taosOpenFile(fnameStr, TD_FILE_CREATE | TD_FILE_WRITE); + char tmpFnameStr[WAL_FILE_LEN]; + int n; + + // flush to a tmpfile + n = walBuildTmpMetaName(pWal, tmpFnameStr); + ASSERT(n < sizeof(tmpFnameStr) && "Buffer overflow of file name"); + + TdFilePtr pMetaFile = taosOpenFile(tmpFnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); if (pMetaFile == NULL) { + wError("failed to open file due to %s. file:%s", strerror(errno), tmpFnameStr); + terrno = TAOS_SYSTEM_ERROR(errno); return -1; } + char* serialized = walMetaSerialize(pWal); int len = strlen(serialized); if (len != taosWriteFile(pMetaFile, serialized, len)) { // TODO:clean file + wError("failed to write file due to %s. file:%s", strerror(errno), tmpFnameStr); + terrno = TAOS_SYSTEM_ERROR(errno); + goto _err; + } - taosCloseFile(&pMetaFile); - taosRemoveFile(fnameStr); - return -1; + if (taosFsyncFile(pMetaFile) < 0) { + wError("failed to sync file due to %s. file:%s", strerror(errno), tmpFnameStr); + terrno = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + if (taosCloseFile(&pMetaFile) < 0) { + wError("failed to close file due to %s. file:%s", strerror(errno), tmpFnameStr); + terrno = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + // rename it + n = walBuildMetaName(pWal, metaVer + 1, fnameStr); + ASSERT(n < sizeof(fnameStr) && "Buffer overflow of file name"); + + if (taosRenameFile(tmpFnameStr, fnameStr) < 0) { + wError("failed to rename file due to %s. dest:%s", strerror(errno), fnameStr); + terrno = TAOS_SYSTEM_ERROR(errno); + goto _err; } - taosCloseFile(&pMetaFile); // delete old file if (metaVer > -1) { walBuildMetaName(pWal, metaVer, fnameStr); @@ -601,6 +634,11 @@ int walSaveMeta(SWal* pWal) { } taosMemoryFree(serialized); return 0; + +_err: + taosCloseFile(&pMetaFile); + taosMemoryFree(serialized); + return -1; } int walLoadMeta(SWal* pWal) { -- GitLab