提交 2f346de9 编写于 作者: H Hongze Cheng

Fix TD-1635

上级 38c6df29
...@@ -246,6 +246,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_NOT_ENABLED, 0, 0x0901, "Sync modul ...@@ -246,6 +246,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_NOT_ENABLED, 0, 0x0901, "Sync modul
// wal // wal
TAOS_DEFINE_ERROR(TSDB_CODE_WAL_APP_ERROR, 0, 0x1000, "Unexpected generic error in wal") TAOS_DEFINE_ERROR(TSDB_CODE_WAL_APP_ERROR, 0, 0x1000, "Unexpected generic error in wal")
TAOS_DEFINE_ERROR(TSDB_CODE_WAL_FILE_CORRUPTED, 0, 0x1001, "WAL file is corrupted")
// http // http
TAOS_DEFINE_ERROR(TSDB_CODE_HTTP_SERVER_OFFLINE, 0, 0x1100, "http server is not onlin") TAOS_DEFINE_ERROR(TSDB_CODE_HTTP_SERVER_OFFLINE, 0, 0x1100, "http server is not onlin")
......
...@@ -185,7 +185,11 @@ static int32_t sdbInitWal() { ...@@ -185,7 +185,11 @@ static int32_t sdbInitWal() {
} }
sdbInfo("open sdb wal for restore"); sdbInfo("open sdb wal for restore");
walRestore(tsSdbObj.wal, NULL, sdbWrite); int code = walRestore(tsSdbObj.wal, NULL, sdbWrite);
if (code != TSDB_CODE_SUCCESS) {
sdbError("failed to open wal for restore, reason:%s", tstrerror(code));
return -1;
}
return 0; return 0;
} }
......
...@@ -347,9 +347,10 @@ static void walRelease(SWal *pWal) { ...@@ -347,9 +347,10 @@ static void walRelease(SWal *pWal) {
static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) { static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) {
char *name = pWal->name; char *name = pWal->name;
int size = 1024 * 1024; // default 1M buffer size
terrno = 0; terrno = 0;
char *buffer = malloc(1024000); // size for one record char *buffer = malloc(size);
if (buffer == NULL) { if (buffer == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
return terrno; return terrno;
...@@ -357,7 +358,7 @@ static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) { ...@@ -357,7 +358,7 @@ static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) {
SWalHead *pHead = (SWalHead *)buffer; SWalHead *pHead = (SWalHead *)buffer;
int fd = open(name, O_RDONLY); int fd = open(name, O_RDWR);
if (fd < 0) { if (fd < 0) {
wError("wal:%s, failed to open for restore(%s)", name, strerror(errno)); wError("wal:%s, failed to open for restore(%s)", name, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
...@@ -367,29 +368,58 @@ static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) { ...@@ -367,29 +368,58 @@ static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) {
wDebug("wal:%s, start to restore", name); wDebug("wal:%s, start to restore", name);
size_t offset = 0;
while (1) { while (1) {
int ret = taosTRead(fd, pHead, sizeof(SWalHead)); int ret = taosTRead(fd, pHead, sizeof(SWalHead));
if ( ret == 0) break; if (ret == 0) break;
if (ret != sizeof(SWalHead)) { if (ret < 0) {
wWarn("wal:%s, failed to read head, skip, ret:%d(%s)", name, ret, strerror(errno)); wError("wal:%s, failed to read wal head part since %s", name, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
break; break;
} }
if (ret < sizeof(SWalHead)) {
wError("wal:%s, failed to read head, ret:%d, skip the rest of file", name, ret);
taosFtruncate(fd, offset);
fsync(fd);
break;
}
if (!taosCheckChecksumWhole((uint8_t *)pHead, sizeof(SWalHead))) { if (!taosCheckChecksumWhole((uint8_t *)pHead, sizeof(SWalHead))) {
wWarn("wal:%s, cksum is messed up, skip the rest of file", name); wWarn("wal:%s, cksum is messed up, skip the rest of file", name);
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TSDB_CODE_WAL_FILE_CORRUPTED;
ASSERT(false);
break; break;
} }
if (pHead->len > size - sizeof(SWalHead)) {
size = sizeof(SWalHead) + pHead->len;
buffer = realloc(buffer, size);
if (buffer == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno);
break;
}
pHead = (SWalHead *)buffer;
}
ret = taosTRead(fd, pHead->cont, pHead->len); ret = taosTRead(fd, pHead->cont, pHead->len);
if ( ret != pHead->len) { if (ret < 0) {
wWarn("wal:%s, failed to read body, skip, len:%d ret:%d", name, pHead->len, ret); wError("wal:%s failed to read wal body part since %s", name, strerror(errno));
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
break; break;
} }
if (ret < pHead->len) {
wError("wal:%s, failed to read body, len:%d ret:%d, skip the rest of file", name, pHead->len, ret);
taosFtruncate(fd, offset);
fsync(fd);
break;
}
offset = offset + sizeof(SWalHead) + pHead->len;
if (pWal->keep) pWal->version = pHead->version; if (pWal->keep) pWal->version = pHead->version;
(*writeFp)(pVnode, pHead, TAOS_QTYPE_WAL); (*writeFp)(pVnode, pHead, TAOS_QTYPE_WAL);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册