提交 9b477eed 编写于 作者: B Benguang Zhao

enh: terminate on failure to recover WAL from writing errors, or to commit vnode

上级 4c9a0c67
...@@ -360,7 +360,12 @@ static int32_t vnodeCommitTask(void *arg) { ...@@ -360,7 +360,12 @@ static int32_t vnodeCommitTask(void *arg) {
// commit // commit
code = vnodeCommitImpl(pInfo); code = vnodeCommitImpl(pInfo);
if (code) goto _exit; if (code) {
vFatal("vgId:%d, failed to commit vnode since %s", TD_VID(pVnode), terrstr());
taosMsleep(100);
exit(EXIT_FAILURE);
goto _exit;
}
vnodeReturnBufPool(pVnode); vnodeReturnBufPool(pVnode);
......
...@@ -602,18 +602,18 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) { ...@@ -602,18 +602,18 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
// ftruncate idx file // ftruncate idx file
if (offset < fileSize) { if (offset < fileSize) {
if (taosFtruncateFile(pIdxFile, offset) < 0) { if (taosFtruncateFile(pIdxFile, offset) < 0) {
wError("vgId:%d, failed to ftruncate file due to %s. offset:%" PRId64 ", file:%s", pWal->cfg.vgId,
strerror(errno), offset, fnameStr);
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, failed to ftruncate file since %s. offset:%" PRId64 ", file:%s", pWal->cfg.vgId, terrstr(),
offset, fnameStr);
goto _err; goto _err;
} }
} }
// rebuild idx file // rebuild idx file
if (taosLSeekFile(pIdxFile, 0, SEEK_END) < 0) { if (taosLSeekFile(pIdxFile, 0, SEEK_END) < 0) {
wError("vgId:%d, failed to seek file due to %s. offset:%" PRId64 ", file:%s", pWal->cfg.vgId, strerror(errno),
offset, fnameStr);
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, failed to seek file since %s. offset:%" PRId64 ", file:%s", pWal->cfg.vgId, terrstr(), offset,
fnameStr);
goto _err; goto _err;
} }
...@@ -625,11 +625,12 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) { ...@@ -625,11 +625,12 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
idxEntry.offset += sizeof(SWalCkHead) + ckHead.head.bodyLen; idxEntry.offset += sizeof(SWalCkHead) + ckHead.head.bodyLen;
if (walReadLogHead(pLogFile, idxEntry.offset, &ckHead) < 0) { if (walReadLogHead(pLogFile, idxEntry.offset, &ckHead) < 0) {
wError("vgId:%d, failed to read wal log head since %s. offset:%" PRId64 ", file:%s", pWal->cfg.vgId, terrstr(), wError("vgId:%d, failed to read wal log head since %s. index:%" PRId64 ", offset:%" PRId64 ", file:%s",
idxEntry.offset, fLogNameStr); pWal->cfg.vgId, terrstr(), idxEntry.ver, idxEntry.offset, fLogNameStr);
goto _err; goto _err;
} }
if (taosWriteFile(pIdxFile, &idxEntry, sizeof(SWalIdxEntry)) < 0) { if (taosWriteFile(pIdxFile, &idxEntry, sizeof(SWalIdxEntry)) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, failed to append file since %s. file:%s", pWal->cfg.vgId, terrstr(), fnameStr); wError("vgId:%d, failed to append file since %s. file:%s", pWal->cfg.vgId, terrstr(), fnameStr);
goto _err; goto _err;
} }
...@@ -637,6 +638,7 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) { ...@@ -637,6 +638,7 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
} }
if (taosFsyncFile(pIdxFile) < 0) { if (taosFsyncFile(pIdxFile) < 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
wError("vgId:%d, faild to fsync file since %s. file:%s", pWal->cfg.vgId, terrstr(), fnameStr); wError("vgId:%d, faild to fsync file since %s. file:%s", pWal->cfg.vgId, terrstr(), fnameStr);
goto _err; goto _err;
} }
......
...@@ -473,7 +473,10 @@ static int32_t walWriteIndex(SWal *pWal, int64_t ver, int64_t offset) { ...@@ -473,7 +473,10 @@ static int32_t walWriteIndex(SWal *pWal, int64_t ver, int64_t offset) {
// check alignment of idx entries // check alignment of idx entries
int64_t endOffset = taosLSeekFile(pWal->pIdxFile, 0, SEEK_END); int64_t endOffset = taosLSeekFile(pWal->pIdxFile, 0, SEEK_END);
if (endOffset < 0) { if (endOffset < 0) {
wFatal("vgId:%d, failed to seek end of idxfile due to %s. ver:%" PRId64 "", pWal->cfg.vgId, strerror(errno), ver); wFatal("vgId:%d, failed to seek end of WAL idxfile due to %s. ver:%" PRId64 "", pWal->cfg.vgId, strerror(errno),
ver);
taosMsleep(100);
exit(EXIT_FAILURE);
} }
return 0; return 0;
} }
...@@ -533,16 +536,20 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy ...@@ -533,16 +536,20 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy
END: END:
// recover in a reverse order // recover in a reverse order
if (taosFtruncateFile(pWal->pLogFile, offset) < 0) { if (taosFtruncateFile(pWal->pLogFile, offset) < 0) {
wFatal("vgId:%d, failed to ftruncate logfile to offset:%" PRId64 " during recovery due to %s", pWal->cfg.vgId,
offset, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
wFatal("vgId:%d, failed to recover WAL logfile from write error since %s, offset:%" PRId64, pWal->cfg.vgId,
terrstr(), offset);
taosMsleep(100);
exit(EXIT_FAILURE);
} }
int64_t idxOffset = (index - pFileInfo->firstVer) * sizeof(SWalIdxEntry); int64_t idxOffset = (index - pFileInfo->firstVer) * sizeof(SWalIdxEntry);
if (taosFtruncateFile(pWal->pIdxFile, idxOffset) < 0) { if (taosFtruncateFile(pWal->pIdxFile, idxOffset) < 0) {
wFatal("vgId:%d, failed to ftruncate idxfile to offset:%" PRId64 "during recovery due to %s", pWal->cfg.vgId,
idxOffset, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
wFatal("vgId:%d, failed to recover WAL idxfile from write error since %s, offset:%" PRId64, pWal->cfg.vgId,
terrstr(), idxOffset);
taosMsleep(100);
exit(EXIT_FAILURE);
} }
return -1; return -1;
} }
......
...@@ -486,24 +486,11 @@ static inline int32_t taosBuildLogHead(char *buffer, const char *flags) { ...@@ -486,24 +486,11 @@ static inline int32_t taosBuildLogHead(char *buffer, const char *flags) {
static inline void taosPrintLogImp(ELogLevel level, int32_t dflag, const char *buffer, int32_t len) { static inline void taosPrintLogImp(ELogLevel level, int32_t dflag, const char *buffer, int32_t len) {
if ((dflag & DEBUG_FILE) && tsLogObj.logHandle && tsLogObj.logHandle->pFile != NULL && osLogSpaceAvailable()) { if ((dflag & DEBUG_FILE) && tsLogObj.logHandle && tsLogObj.logHandle->pFile != NULL && osLogSpaceAvailable()) {
taosUpdateLogNums(level); taosUpdateLogNums(level);
#if 0
// DEBUG_FATAL and DEBUG_ERROR are duplicated
// fsync will cause thread blocking and may also generate log misalignment in case of asyncLog
if (tsAsyncLog && level != DEBUG_FATAL) {
taosPushLogBuffer(tsLogObj.logHandle, buffer, len);
} else {
taosWriteFile(tsLogObj.logHandle->pFile, buffer, len);
if (level == DEBUG_FATAL) {
taosFsyncFile(tsLogObj.logHandle->pFile);
}
}
#else
if (tsAsyncLog) { if (tsAsyncLog) {
taosPushLogBuffer(tsLogObj.logHandle, buffer, len); taosPushLogBuffer(tsLogObj.logHandle, buffer, len);
} else { } else {
taosWriteFile(tsLogObj.logHandle->pFile, buffer, len); taosWriteFile(tsLogObj.logHandle->pFile, buffer, len);
} }
#endif
if (tsLogObj.maxLines > 0) { if (tsLogObj.maxLines > 0) {
atomic_add_fetch_32(&tsLogObj.lines, 1); atomic_add_fetch_32(&tsLogObj.lines, 1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册