diff --git a/source/dnode/mgmt/node_mgmt/src/dmEnv.c b/source/dnode/mgmt/node_mgmt/src/dmEnv.c index 076826ebc2d0956aad14f8eeec27a574425788b1..5a7f149bc6e22f544ea5414be07f0829c264bb5f 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmEnv.c +++ b/source/dnode/mgmt/node_mgmt/src/dmEnv.c @@ -51,6 +51,7 @@ static int32_t dmInitMonitor() { static bool dmCheckDiskSpace() { osUpdate(); + // sufficiency if (!osDataSpaceSufficient()) { dWarn("free data disk size: %f GB, not sufficient, expected %f GB at least", (double)tsDataSpace.size.avail / 1024.0 / 1024.0 / 1024.0, (double)tsDataSpace.reserved / 1024.0 / 1024.0 / 1024.0); } @@ -60,7 +61,24 @@ static bool dmCheckDiskSpace() { if (!osTempSpaceSufficient()) { dWarn("free temp disk size: %f GB, not sufficient, expected %f GB at least", (double)tsTempSpace.size.avail / 1024.0 / 1024.0 / 1024.0, (double)tsTempSpace.reserved / 1024.0 / 1024.0 / 1024.0); } - return true; + // availability + bool ret = true; + if (!osDataSpaceAvailable()) { + dError("data disk space unavailable, i.e. %s", tsDataDir); + terrno = TSDB_CODE_VND_NO_DISKSPACE; + ret = false; + } + if (!osLogSpaceAvailable()) { + dError("log disk space unavailable, i.e. %s", tsLogDir); + terrno = TSDB_CODE_VND_NO_DISKSPACE; + ret = false; + } + if (!osTempSpaceAvailable()) { + dError("temp disk space unavailable, i.e. %s", tsTempDir); + terrno = TSDB_CODE_VND_NO_DISKSPACE; + ret = false; + } + return ret; } static bool dmCheckDataDirVersion() { diff --git a/source/dnode/mgmt/test/sut/src/sut.cpp b/source/dnode/mgmt/test/sut/src/sut.cpp index 699203e8c1daf52727959c794dd22e1f9a031c9c..a4d2e4688179b7161c03837b5478b197a6576bca 100644 --- a/source/dnode/mgmt/test/sut/src/sut.cpp +++ b/source/dnode/mgmt/test/sut/src/sut.cpp @@ -43,9 +43,7 @@ void Testbase::InitLog(const char* path) { } void Testbase::Init(const char* path, int16_t port) { -#ifdef _TD_DARWIN_64 osDefaultInit(); -#endif tsServerPort = port; strcpy(tsLocalFqdn, "localhost"); snprintf(tsLocalEp, TSDB_EP_LEN, "%s:%u", tsLocalFqdn, tsServerPort); diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index 279f4dc6562d45b11240628821a7b2ba3c8e6eab..fa22805df2490508346e3d8821a7956992d019e8 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -123,8 +123,8 @@ static FORCE_INLINE int64_t walScanLogGetLastVer(SWal* pWal, int32_t fileIdx) { } SWalCkHead* logContent = (SWalCkHead*)candidate; if (walValidHeadCksum(logContent) != 0) { - wError("vgId:%d, failed to validate checksum of wal entry header. offset:% %" PRId64 ", file:%s", - ((char*)(logContent)-buf), fnameStr); + wWarn("vgId:%d, failed to validate checksum of wal entry header. offset:%" PRId64 ", file:%s", pWal->cfg.vgId, + offset + ((char*)(logContent)-buf), fnameStr); haystack = candidate + 1; if (firstTrial) { break; @@ -162,8 +162,8 @@ static FORCE_INLINE int64_t walScanLogGetLastVer(SWal* pWal, int32_t fileIdx) { } if (walValidBodyCksum(logContent) != 0) { terrno = TSDB_CODE_WAL_CHKSUM_MISMATCH; - wError("vgId:%d, failed to validate checksum of wal entry body. offset:% %" PRId64 ", file:%s", - ((char*)(logContent)-buf), fnameStr); + wWarn("vgId:%d, failed to validate checksum of wal entry body. offset:%" PRId64 ", file:%s", pWal->cfg.vgId, + offset + ((char*)(logContent)-buf), fnameStr); haystack = candidate + 1; if (firstTrial) { break; @@ -481,6 +481,10 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) { continue; } + if (offset != (idxEntry.ver - pFileInfo->firstVer) * sizeof(SWalIdxEntry)) { + continue; + } + if (walReadLogHead(pLogFile, idxEntry.offset, &ckHead) < 0) { wWarn("vgId:%d, failed to read log file since %s. file:%s, offset:%" PRId64 ", idx entry ver:%" PRId64 "", pWal->cfg.vgId, terrstr(), fLogNameStr, idxEntry.offset, idxEntry.ver); @@ -493,6 +497,8 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) { } offset += sizeof(SWalIdxEntry); + ASSERT(offset == (idxEntry.ver - pFileInfo->firstVer + 1) * sizeof(SWalIdxEntry)); + // ftruncate idx file if (offset < fileSize) { if (taosFtruncateFile(pIdxFile, offset) < 0) { diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index 91fa49fce0777b4f382e58878bc6c20f8f233beb..0562bbad274a795048c2b91566a3040e43800541 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -410,25 +410,35 @@ END: static int32_t walWriteIndex(SWal *pWal, int64_t ver, int64_t offset) { SWalIdxEntry entry = {.ver = ver, .offset = offset}; - int64_t idxOffset = taosLSeekFile(pWal->pIdxFile, 0, SEEK_END); + SWalFileInfo *pFileInfo = walGetCurFileInfo(pWal); + ASSERT(pFileInfo != NULL); + ASSERT(pFileInfo->firstVer >= 0); + int64_t idxOffset = (entry.ver - pFileInfo->firstVer) * sizeof(SWalIdxEntry); wDebug("vgId:%d, write index, index:%" PRId64 ", offset:%" PRId64 ", at %" PRId64, pWal->cfg.vgId, ver, offset, idxOffset); + int64_t size = taosWriteFile(pWal->pIdxFile, &entry, sizeof(SWalIdxEntry)); if (size != sizeof(SWalIdxEntry)) { + wError("vgId:%d, failed to write idx entry due to %s. ver:%lld", pWal->cfg.vgId, strerror(errno), ver); terrno = TAOS_SYSTEM_ERROR(errno); - // TODO truncate return -1; } + + ASSERT(taosLSeekFile(pWal->pIdxFile, 0, SEEK_END) == idxOffset + sizeof(SWalIdxEntry) && "Offset of idx entries misaligned"); return 0; } -// TODO gurantee atomicity by truncate failed writing static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgType, SWalSyncInfo syncMeta, const void *body, int32_t bodyLen) { int64_t code = 0; int64_t offset = walGetCurFileOffset(pWal); + SWalFileInfo *pFileInfo = walGetCurFileInfo(pWal); + ASSERT(pFileInfo != NULL); + if (pFileInfo->firstVer == -1) { + pFileInfo->firstVer = index; + } pWal->writeHead.head.version = index; pWal->writeHead.head.bodyLen = bodyLen; pWal->writeHead.head.msgType = msgType; @@ -439,11 +449,14 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy pWal->writeHead.cksumHead = walCalcHeadCksum(&pWal->writeHead); pWal->writeHead.cksumBody = walCalcBodyCksum(body, bodyLen); - wDebug("vgId:%d, wal write log %ld, msgType: %s", pWal->cfg.vgId, index, TMSG_INFO(msgType)); + code = walWriteIndex(pWal, index, offset); + if (code < 0) { + goto END; + } + if (taosWriteFile(pWal->pLogFile, &pWal->writeHead, sizeof(SWalCkHead)) != sizeof(SWalCkHead)) { - // TODO ftruncate terrno = TAOS_SYSTEM_ERROR(errno); wError("vgId:%d, file:%" PRId64 ".log, failed to write since %s", pWal->cfg.vgId, walGetLastFileFirstVer(pWal), strerror(errno)); @@ -452,7 +465,6 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy } if (taosWriteFile(pWal->pLogFile, (char *)body, bodyLen) != bodyLen) { - // TODO ftruncate terrno = TAOS_SYSTEM_ERROR(errno); wError("vgId:%d, file:%" PRId64 ".log, failed to write since %s", pWal->cfg.vgId, walGetLastFileFirstVer(pWal), strerror(errno)); @@ -460,24 +472,31 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy goto END; } - code = walWriteIndex(pWal, index, offset); - if (code < 0) { - // TODO ftruncate - goto END; - } - // set status if (pWal->vers.firstVer == -1) pWal->vers.firstVer = index; pWal->vers.lastVer = index; pWal->totSize += sizeof(SWalCkHead) + bodyLen; - if (walGetCurFileInfo(pWal)->firstVer == -1) { - walGetCurFileInfo(pWal)->firstVer = index; - } - walGetCurFileInfo(pWal)->lastVer = index; - walGetCurFileInfo(pWal)->fileSize += sizeof(SWalCkHead) + bodyLen; + pFileInfo->lastVer = index; + pFileInfo->fileSize += sizeof(SWalCkHead) + bodyLen; return 0; + END: + // recover in a reverse order + if (taosFtruncateFile(pWal->pLogFile, offset) < 0) { + wFatal("vgId:%d, failed to ftruncate logfile to offset:%lld during recovery due to %s", pWal->cfg.vgId, offset, + strerror(errno)); + terrno = TAOS_SYSTEM_ERROR(errno); + ASSERT(0 && "failed to recover from error"); + } + + int64_t idxOffset = (index - pFileInfo->firstVer) * sizeof(SWalIdxEntry); + if (taosFtruncateFile(pWal->pIdxFile, idxOffset) < 0) { + wFatal("vgId:%d, failed to ftruncate idxfile to offset:%lld during recovery due to %s", pWal->cfg.vgId, idxOffset, + strerror(errno)); + terrno = TAOS_SYSTEM_ERROR(errno); + ASSERT(0 && "failed to recover from error"); + } return -1; } diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 2e2300ba147ce9bb1df2e7f401480c731cf0ce0b..9d97cf7ab2958b17c5245ec8b41a5e09ccdff67c 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -443,10 +443,13 @@ static inline int32_t taosBuildLogHead(char *buffer, const char *flags) { static inline void taosPrintLogImp(ELogLevel level, int32_t dflag, const char *buffer, int32_t len) { if ((dflag & DEBUG_FILE) && tsLogObj.logHandle && tsLogObj.logHandle->pFile != NULL && osLogSpaceAvailable()) { taosUpdateLogNums(level); - if (tsAsyncLog) { + if (tsAsyncLog && level != DEBUG_FATAL) { taosPushLogBuffer(tsLogObj.logHandle, buffer, len); } else { taosWriteFile(tsLogObj.logHandle->pFile, buffer, len); + if (level == DEBUG_FATAL) { + taosFsyncFile(tsLogObj.logHandle->pFile); + } } if (tsLogObj.maxLines > 0) {