未验证 提交 3cbfd2d1 编写于 作者: S Shengliang Guan 提交者: GitHub

Merge pull request #17392 from taosdata/FIX/TD-19183-3.0

enh: not allowed to start on disk space unavailable
......@@ -51,6 +51,7 @@ static int32_t dmInitMonitor() {
static bool dmCheckDiskSpace() {
osUpdate();
// sufficiency
if (!osDataSpaceSufficient()) {
dWarn("free data disk size: %f GB, not sufficient, expected %f GB at least", (double)tsDataSpace.size.avail / 1024.0 / 1024.0 / 1024.0, (double)tsDataSpace.reserved / 1024.0 / 1024.0 / 1024.0);
}
......@@ -60,7 +61,24 @@ static bool dmCheckDiskSpace() {
if (!osTempSpaceSufficient()) {
dWarn("free temp disk size: %f GB, not sufficient, expected %f GB at least", (double)tsTempSpace.size.avail / 1024.0 / 1024.0 / 1024.0, (double)tsTempSpace.reserved / 1024.0 / 1024.0 / 1024.0);
}
return true;
// availability
bool ret = true;
if (!osDataSpaceAvailable()) {
dError("data disk space unavailable, i.e. %s", tsDataDir);
terrno = TSDB_CODE_VND_NO_DISKSPACE;
ret = false;
}
if (!osLogSpaceAvailable()) {
dError("log disk space unavailable, i.e. %s", tsLogDir);
terrno = TSDB_CODE_VND_NO_DISKSPACE;
ret = false;
}
if (!osTempSpaceAvailable()) {
dError("temp disk space unavailable, i.e. %s", tsTempDir);
terrno = TSDB_CODE_VND_NO_DISKSPACE;
ret = false;
}
return ret;
}
static bool dmCheckDataDirVersion() {
......
......@@ -43,9 +43,7 @@ void Testbase::InitLog(const char* path) {
}
void Testbase::Init(const char* path, int16_t port) {
#ifdef _TD_DARWIN_64
osDefaultInit();
#endif
tsServerPort = port;
strcpy(tsLocalFqdn, "localhost");
snprintf(tsLocalEp, TSDB_EP_LEN, "%s:%u", tsLocalFqdn, tsServerPort);
......
......@@ -123,8 +123,8 @@ static FORCE_INLINE int64_t walScanLogGetLastVer(SWal* pWal, int32_t fileIdx) {
}
SWalCkHead* logContent = (SWalCkHead*)candidate;
if (walValidHeadCksum(logContent) != 0) {
wError("vgId:%d, failed to validate checksum of wal entry header. offset:% %" PRId64 ", file:%s",
((char*)(logContent)-buf), fnameStr);
wWarn("vgId:%d, failed to validate checksum of wal entry header. offset:%" PRId64 ", file:%s", pWal->cfg.vgId,
offset + ((char*)(logContent)-buf), fnameStr);
haystack = candidate + 1;
if (firstTrial) {
break;
......@@ -162,8 +162,8 @@ static FORCE_INLINE int64_t walScanLogGetLastVer(SWal* pWal, int32_t fileIdx) {
}
if (walValidBodyCksum(logContent) != 0) {
terrno = TSDB_CODE_WAL_CHKSUM_MISMATCH;
wError("vgId:%d, failed to validate checksum of wal entry body. offset:% %" PRId64 ", file:%s",
((char*)(logContent)-buf), fnameStr);
wWarn("vgId:%d, failed to validate checksum of wal entry body. offset:%" PRId64 ", file:%s", pWal->cfg.vgId,
offset + ((char*)(logContent)-buf), fnameStr);
haystack = candidate + 1;
if (firstTrial) {
break;
......@@ -481,6 +481,10 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
continue;
}
if (offset != (idxEntry.ver - pFileInfo->firstVer) * sizeof(SWalIdxEntry)) {
continue;
}
if (walReadLogHead(pLogFile, idxEntry.offset, &ckHead) < 0) {
wWarn("vgId:%d, failed to read log file since %s. file:%s, offset:%" PRId64 ", idx entry ver:%" PRId64 "",
pWal->cfg.vgId, terrstr(), fLogNameStr, idxEntry.offset, idxEntry.ver);
......@@ -493,6 +497,8 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
}
offset += sizeof(SWalIdxEntry);
ASSERT(offset == (idxEntry.ver - pFileInfo->firstVer + 1) * sizeof(SWalIdxEntry));
// ftruncate idx file
if (offset < fileSize) {
if (taosFtruncateFile(pIdxFile, offset) < 0) {
......
......@@ -410,25 +410,35 @@ END:
static int32_t walWriteIndex(SWal *pWal, int64_t ver, int64_t offset) {
SWalIdxEntry entry = {.ver = ver, .offset = offset};
int64_t idxOffset = taosLSeekFile(pWal->pIdxFile, 0, SEEK_END);
SWalFileInfo *pFileInfo = walGetCurFileInfo(pWal);
ASSERT(pFileInfo != NULL);
ASSERT(pFileInfo->firstVer >= 0);
int64_t idxOffset = (entry.ver - pFileInfo->firstVer) * sizeof(SWalIdxEntry);
wDebug("vgId:%d, write index, index:%" PRId64 ", offset:%" PRId64 ", at %" PRId64, pWal->cfg.vgId, ver, offset,
idxOffset);
int64_t size = taosWriteFile(pWal->pIdxFile, &entry, sizeof(SWalIdxEntry));
if (size != sizeof(SWalIdxEntry)) {
wError("vgId:%d, failed to write idx entry due to %s. ver:%lld", pWal->cfg.vgId, strerror(errno), ver);
terrno = TAOS_SYSTEM_ERROR(errno);
// TODO truncate
return -1;
}
ASSERT(taosLSeekFile(pWal->pIdxFile, 0, SEEK_END) == idxOffset + sizeof(SWalIdxEntry) && "Offset of idx entries misaligned");
return 0;
}
// TODO gurantee atomicity by truncate failed writing
static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgType, SWalSyncInfo syncMeta,
const void *body, int32_t bodyLen) {
int64_t code = 0;
int64_t offset = walGetCurFileOffset(pWal);
SWalFileInfo *pFileInfo = walGetCurFileInfo(pWal);
ASSERT(pFileInfo != NULL);
if (pFileInfo->firstVer == -1) {
pFileInfo->firstVer = index;
}
pWal->writeHead.head.version = index;
pWal->writeHead.head.bodyLen = bodyLen;
pWal->writeHead.head.msgType = msgType;
......@@ -439,11 +449,14 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy
pWal->writeHead.cksumHead = walCalcHeadCksum(&pWal->writeHead);
pWal->writeHead.cksumBody = walCalcBodyCksum(body, bodyLen);
wDebug("vgId:%d, wal write log %ld, msgType: %s", pWal->cfg.vgId, index, TMSG_INFO(msgType));
code = walWriteIndex(pWal, index, offset);
if (code < 0) {
goto END;
}
if (taosWriteFile(pWal->pLogFile, &pWal->writeHead, sizeof(SWalCkHead)) != sizeof(SWalCkHead)) {
// TODO ftruncate
terrno = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, file:%" PRId64 ".log, failed to write since %s", pWal->cfg.vgId, walGetLastFileFirstVer(pWal),
strerror(errno));
......@@ -452,7 +465,6 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy
}
if (taosWriteFile(pWal->pLogFile, (char *)body, bodyLen) != bodyLen) {
// TODO ftruncate
terrno = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, file:%" PRId64 ".log, failed to write since %s", pWal->cfg.vgId, walGetLastFileFirstVer(pWal),
strerror(errno));
......@@ -460,24 +472,31 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy
goto END;
}
code = walWriteIndex(pWal, index, offset);
if (code < 0) {
// TODO ftruncate
goto END;
}
// set status
if (pWal->vers.firstVer == -1) pWal->vers.firstVer = index;
pWal->vers.lastVer = index;
pWal->totSize += sizeof(SWalCkHead) + bodyLen;
if (walGetCurFileInfo(pWal)->firstVer == -1) {
walGetCurFileInfo(pWal)->firstVer = index;
}
walGetCurFileInfo(pWal)->lastVer = index;
walGetCurFileInfo(pWal)->fileSize += sizeof(SWalCkHead) + bodyLen;
pFileInfo->lastVer = index;
pFileInfo->fileSize += sizeof(SWalCkHead) + bodyLen;
return 0;
END:
// recover in a reverse order
if (taosFtruncateFile(pWal->pLogFile, offset) < 0) {
wFatal("vgId:%d, failed to ftruncate logfile to offset:%lld during recovery due to %s", pWal->cfg.vgId, offset,
strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
ASSERT(0 && "failed to recover from error");
}
int64_t idxOffset = (index - pFileInfo->firstVer) * sizeof(SWalIdxEntry);
if (taosFtruncateFile(pWal->pIdxFile, idxOffset) < 0) {
wFatal("vgId:%d, failed to ftruncate idxfile to offset:%lld during recovery due to %s", pWal->cfg.vgId, idxOffset,
strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
ASSERT(0 && "failed to recover from error");
}
return -1;
}
......
......@@ -443,10 +443,13 @@ static inline int32_t taosBuildLogHead(char *buffer, const char *flags) {
static inline void taosPrintLogImp(ELogLevel level, int32_t dflag, const char *buffer, int32_t len) {
if ((dflag & DEBUG_FILE) && tsLogObj.logHandle && tsLogObj.logHandle->pFile != NULL && osLogSpaceAvailable()) {
taosUpdateLogNums(level);
if (tsAsyncLog) {
if (tsAsyncLog && level != DEBUG_FATAL) {
taosPushLogBuffer(tsLogObj.logHandle, buffer, len);
} else {
taosWriteFile(tsLogObj.logHandle->pFile, buffer, len);
if (level == DEBUG_FATAL) {
taosFsyncFile(tsLogObj.logHandle->pFile);
}
}
if (tsLogObj.maxLines > 0) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册