提交 5df0c708 编写于 作者: H hiddenbomb 提交者: LINGuanRen

check disk warning state if io get events time out in loop

上级 baa96708
...@@ -389,7 +389,7 @@ int ObLogFileStore::write(void* buf, int64_t count, int64_t offset) ...@@ -389,7 +389,7 @@ int ObLogFileStore::write(void* buf, int64_t count, int64_t offset)
} else if (OB_FAIL(prepare_write_info(buf, count, offset))) { } else if (OB_FAIL(prepare_write_info(buf, count, offset))) {
COMMON_LOG(ERROR, "prepare io info fail", K(ret)); COMMON_LOG(ERROR, "prepare io info fail", K(ret));
} else { } else {
const int64_t write_begin_ts = common::ObTimeUtility::fast_current_time(); int64_t write_begin_ts = common::ObTimeUtility::fast_current_time();
while (need_retry) { while (need_retry) {
ret = OB_SUCCESS; ret = OB_SUCCESS;
new_req_cnt = 0; new_req_cnt = 0;
...@@ -400,18 +400,17 @@ int ObLogFileStore::write(void* buf, int64_t count, int64_t offset) ...@@ -400,18 +400,17 @@ int ObLogFileStore::write(void* buf, int64_t count, int64_t offset)
} else if (OB_FAIL(process_io_getevents(submitted, io_ctx_, io_events_))) { } else if (OB_FAIL(process_io_getevents(submitted, io_ctx_, io_events_))) {
COMMON_LOG(ERROR, "process get events fail", K(ret), K(new_req_cnt), K(submitted), K(retry_cnt), K_(write_fd)); COMMON_LOG(ERROR, "process get events fail", K(ret), K(new_req_cnt), K(submitted), K(retry_cnt), K_(write_fd));
} }
const int64_t write_end_ts = common::ObTimeUtility::fast_current_time();
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
if (is_disk_warning()) { if (is_disk_warning()) {
set_disk_warning(false); set_disk_warning(false);
} }
} else if (!is_disk_warning()) { } else {
const int64_t write_finish_ts = common::ObTimeUtility::fast_current_time(); check_disk_warning(write_begin_ts, write_end_ts);
const int64_t log_write_timeout_us = GCONF.data_storage_warning_tolerance_time;
if (write_finish_ts - write_begin_ts > log_write_timeout_us) {
set_disk_warning(true);
}
} }
need_retry = process_retry(ret, retry_cnt); need_retry = process_retry(ret, retry_cnt);
write_begin_ts = write_end_ts;
} }
// whatever success or failure, reset write requests, check and mark bad disk // whatever success or failure, reset write requests, check and mark bad disk
...@@ -990,24 +989,23 @@ int ObLogFileStore::process_io_getevents(int64_t& submitted, io_context_t ctx, s ...@@ -990,24 +989,23 @@ int ObLogFileStore::process_io_getevents(int64_t& submitted, io_context_t ctx, s
int gotten = 0; int gotten = 0;
struct timespec timeout; struct timespec timeout;
const int64_t begin_ts = common::ObTimeUtility::fast_current_time();
while (submitted > 0 && OB_SUCC(ret) && !partial_write) { while (submitted > 0 && OB_SUCC(ret) && !partial_write) {
timeout.tv_sec = (OB_REDO_TYPE_CLOG == log_type_ ? CLOG_AIO_TIMEOUT_SECOND : AIO_TIMEOUT_SECOND); timeout.tv_sec = AIO_TIMEOUT_SECOND;
timeout.tv_nsec = 0; timeout.tv_nsec = 0;
if (0 >= (gotten = ob_io_getevents(ctx, 1, submitted, events, &timeout))) { gotten = ob_io_getevents(ctx, 1, submitted, events, &timeout);
// timeout or io error if (0 == gotten) {
if (0 == gotten) { COMMON_LOG(WARN,
COMMON_LOG(WARN, "io_getevents timeout",
"io_getevents timeout", K(ret),
K(ret), K(gotten),
K(gotten), K(submitted),
K(submitted), K(write_fd_.file_id_),
K(write_fd_.file_id_), K(timeout.tv_sec),
LITERAL_K(AIO_TIMEOUT_SECOND)); K(timeout.tv_nsec));
} else { } else if (gotten < 0) {
ret = OB_IO_ERROR; ret = OB_IO_ERROR;
COMMON_LOG( COMMON_LOG(ERROR, "io_getevents fail", K(ret), K(gotten), K(submitted), K(write_fd_.file_id_), K(errno), KERRMSG);
ERROR, "io_getevents fail", K(ret), K(gotten), K(submitted), K(write_fd_.file_id_), K(errno), KERRMSG);
}
} else { } else {
submitted -= gotten; submitted -= gotten;
for (int32_t i = 0; i < gotten; i++) { for (int32_t i = 0; i < gotten; i++) {
...@@ -1039,6 +1037,11 @@ int ObLogFileStore::process_io_getevents(int64_t& submitted, io_context_t ctx, s ...@@ -1039,6 +1037,11 @@ int ObLogFileStore::process_io_getevents(int64_t& submitted, io_context_t ctx, s
} }
} }
} }
if (OB_SUCC(ret)) {
const int64_t end_ts = common::ObTimeUtility::fast_current_time();
check_disk_warning(begin_ts, end_ts);
}
} }
return ret; return ret;
...@@ -1103,5 +1106,12 @@ int ObLogFileStore::process_failed_write() ...@@ -1103,5 +1106,12 @@ int ObLogFileStore::process_failed_write()
} }
return ret; return ret;
} }
void ObLogFileStore::check_disk_warning(const int64_t begin_ts, const int64_t end_ts)
{
if (!is_disk_warning() && end_ts - begin_ts > GCONF.data_storage_warning_tolerance_time) {
set_disk_warning(true);
}
}
} // namespace common } // namespace common
} // namespace oceanbase } // namespace oceanbase
...@@ -182,7 +182,6 @@ public: ...@@ -182,7 +182,6 @@ public:
protected: protected:
static const int MAX_IO_COUNT = 1024; static const int MAX_IO_COUNT = 1024;
static const int64_t AIO_TIMEOUT_SECOND = 30; static const int64_t AIO_TIMEOUT_SECOND = 30;
static const int64_t CLOG_AIO_TIMEOUT_SECOND = 300;
static const int64_t AIO_RETRY_INTERVAL_US = 100 * 1000; // 100ms static const int64_t AIO_RETRY_INTERVAL_US = 100 * 1000; // 100ms
static const int64_t MAX_DISK_COUNT = ObLogDiskManager::MAX_DISK_COUNT; static const int64_t MAX_DISK_COUNT = ObLogDiskManager::MAX_DISK_COUNT;
static const int64_t MAX_IO_RETRY = LLONG_MAX; static const int64_t MAX_IO_RETRY = LLONG_MAX;
...@@ -283,6 +282,7 @@ private: ...@@ -283,6 +282,7 @@ private:
bool process_retry(const int result, int64_t& retry); bool process_retry(const int result, int64_t& retry);
int process_failed_write(); int process_failed_write();
int fstat(const int64_t file_id, struct stat* file_stat) const; int fstat(const int64_t file_id, struct stat* file_stat) const;
void check_disk_warning(const int64_t begin_ts, const int64_t end_ts);
private: private:
bool is_inited_; bool is_inited_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册