提交 9b477eed 编写于 作者: B Benguang Zhao

enh: terminate on failure to recover WAL from writing errors, or to commit vnode

上级 4c9a0c67
......@@ -360,7 +360,12 @@ static int32_t vnodeCommitTask(void *arg) {
// commit
code = vnodeCommitImpl(pInfo);
if (code) goto _exit;
if (code) {
vFatal("vgId:%d, failed to commit vnode since %s", TD_VID(pVnode), terrstr());
taosMsleep(100);
exit(EXIT_FAILURE);
goto _exit;
}
vnodeReturnBufPool(pVnode);
......
......@@ -602,18 +602,18 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
// ftruncate idx file
if (offset < fileSize) {
if (taosFtruncateFile(pIdxFile, offset) < 0) {
wError("vgId:%d, failed to ftruncate file due to %s. offset:%" PRId64 ", file:%s", pWal->cfg.vgId,
strerror(errno), offset, fnameStr);
terrno = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, failed to ftruncate file since %s. offset:%" PRId64 ", file:%s", pWal->cfg.vgId, terrstr(),
offset, fnameStr);
goto _err;
}
}
// rebuild idx file
if (taosLSeekFile(pIdxFile, 0, SEEK_END) < 0) {
wError("vgId:%d, failed to seek file due to %s. offset:%" PRId64 ", file:%s", pWal->cfg.vgId, strerror(errno),
offset, fnameStr);
terrno = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, failed to seek file since %s. offset:%" PRId64 ", file:%s", pWal->cfg.vgId, terrstr(), offset,
fnameStr);
goto _err;
}
......@@ -625,11 +625,12 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
idxEntry.offset += sizeof(SWalCkHead) + ckHead.head.bodyLen;
if (walReadLogHead(pLogFile, idxEntry.offset, &ckHead) < 0) {
wError("vgId:%d, failed to read wal log head since %s. offset:%" PRId64 ", file:%s", pWal->cfg.vgId, terrstr(),
idxEntry.offset, fLogNameStr);
wError("vgId:%d, failed to read wal log head since %s. index:%" PRId64 ", offset:%" PRId64 ", file:%s",
pWal->cfg.vgId, terrstr(), idxEntry.ver, idxEntry.offset, fLogNameStr);
goto _err;
}
if (taosWriteFile(pIdxFile, &idxEntry, sizeof(SWalIdxEntry)) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, failed to append file since %s. file:%s", pWal->cfg.vgId, terrstr(), fnameStr);
goto _err;
}
......@@ -637,6 +638,7 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
}
if (taosFsyncFile(pIdxFile) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, faild to fsync file since %s. file:%s", pWal->cfg.vgId, terrstr(), fnameStr);
goto _err;
}
......
......@@ -473,7 +473,10 @@ static int32_t walWriteIndex(SWal *pWal, int64_t ver, int64_t offset) {
// check alignment of idx entries
int64_t endOffset = taosLSeekFile(pWal->pIdxFile, 0, SEEK_END);
if (endOffset < 0) {
wFatal("vgId:%d, failed to seek end of idxfile due to %s. ver:%" PRId64 "", pWal->cfg.vgId, strerror(errno), ver);
wFatal("vgId:%d, failed to seek end of WAL idxfile due to %s. ver:%" PRId64 "", pWal->cfg.vgId, strerror(errno),
ver);
taosMsleep(100);
exit(EXIT_FAILURE);
}
return 0;
}
......@@ -533,16 +536,20 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy
END:
// recover in a reverse order
if (taosFtruncateFile(pWal->pLogFile, offset) < 0) {
wFatal("vgId:%d, failed to ftruncate logfile to offset:%" PRId64 " during recovery due to %s", pWal->cfg.vgId,
offset, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
wFatal("vgId:%d, failed to recover WAL logfile from write error since %s, offset:%" PRId64, pWal->cfg.vgId,
terrstr(), offset);
taosMsleep(100);
exit(EXIT_FAILURE);
}
int64_t idxOffset = (index - pFileInfo->firstVer) * sizeof(SWalIdxEntry);
if (taosFtruncateFile(pWal->pIdxFile, idxOffset) < 0) {
wFatal("vgId:%d, failed to ftruncate idxfile to offset:%" PRId64 "during recovery due to %s", pWal->cfg.vgId,
idxOffset, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno);
wFatal("vgId:%d, failed to recover WAL idxfile from write error since %s, offset:%" PRId64, pWal->cfg.vgId,
terrstr(), idxOffset);
taosMsleep(100);
exit(EXIT_FAILURE);
}
return -1;
}
......
......@@ -486,24 +486,11 @@ static inline int32_t taosBuildLogHead(char *buffer, const char *flags) {
static inline void taosPrintLogImp(ELogLevel level, int32_t dflag, const char *buffer, int32_t len) {
if ((dflag & DEBUG_FILE) && tsLogObj.logHandle && tsLogObj.logHandle->pFile != NULL && osLogSpaceAvailable()) {
taosUpdateLogNums(level);
#if 0
// DEBUG_FATAL and DEBUG_ERROR are duplicated
// fsync will cause thread blocking and may also generate log misalignment in case of asyncLog
if (tsAsyncLog && level != DEBUG_FATAL) {
taosPushLogBuffer(tsLogObj.logHandle, buffer, len);
} else {
taosWriteFile(tsLogObj.logHandle->pFile, buffer, len);
if (level == DEBUG_FATAL) {
taosFsyncFile(tsLogObj.logHandle->pFile);
}
}
#else
if (tsAsyncLog) {
taosPushLogBuffer(tsLogObj.logHandle, buffer, len);
} else {
taosWriteFile(tsLogObj.logHandle->pFile, buffer, len);
}
#endif
if (tsLogObj.maxLines > 0) {
atomic_add_fetch_32(&tsLogObj.lines, 1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册