提交 7eb645fb 编写于 作者: O obdev 提交者: wangzelin.wzl

add parameters for detecting disk warning and error

上级 20c0cae3
......@@ -150,8 +150,8 @@ void ObIOConfig::set_default_value()
cpu_high_water_level_ = DEFAULT_CPU_HIGH_WATER_LEVEL;
write_failure_detect_interval_ = DEFAULT_WRITE_FAILURE_DETECT_INTERVAL;
read_failure_black_list_interval_ = DEFAULT_READ_FAILURE_IN_BLACK_LIST_INTERVAL;
retry_warn_limit_ = DEFAULT_RETRY_WARN_LIMIT;
retry_error_limit_ = DEFAULT_RETRY_ERROR_LIMIT;
data_storage_warning_tolerance_time_ = DEFAULT_WARNING_TOLERANCE_TIME;
data_storage_error_tolerance_time_ = DEFAULT_ERROR_TOLERANCE_TIME;
disk_io_thread_count_ = DEFAULT_DISK_IO_THREAD_COUNT;
callback_thread_count_ = DEFAULT_IO_CALLBACK_THREAD_COUNT;
large_query_io_percent_ = DEFAULT_LARGE_QUERY_IO_PERCENT;
......@@ -163,7 +163,8 @@ bool ObIOConfig::is_valid() const
return sys_io_low_percent_ >= 0 && sys_io_low_percent_ <= 100 && sys_io_high_percent_ > 0 &&
sys_io_high_percent_ <= 100 && sys_io_low_percent_ <= sys_io_high_percent_ && user_iort_up_percent_ >= 0 &&
cpu_high_water_level_ > 0 && write_failure_detect_interval_ > 0 && read_failure_black_list_interval_ > 0 &&
retry_warn_limit_ > 0 && retry_error_limit_ > retry_warn_limit_ && disk_io_thread_count_ > 0 &&
data_storage_warning_tolerance_time_ > 0 &&
data_storage_error_tolerance_time_ >= data_storage_warning_tolerance_time_ && disk_io_thread_count_ > 0 &&
disk_io_thread_count_ <= ObDisk::MAX_DISK_CHANNEL_CNT * 2 && disk_io_thread_count_ % 2 == 0 &&
callback_thread_count_ > 0 && large_query_io_percent_ >= 0 && large_query_io_percent_ <= 100 &&
data_storage_io_timeout_ms_ > 0;
......@@ -177,8 +178,8 @@ void ObIOConfig::reset()
cpu_high_water_level_ = 0;
write_failure_detect_interval_ = 0;
read_failure_black_list_interval_ = 0;
retry_warn_limit_ = 0;
retry_error_limit_ = 0;
data_storage_warning_tolerance_time_ = 0;
data_storage_error_tolerance_time_ = 0;
disk_io_thread_count_ = 0;
callback_thread_count_ = 0;
large_query_io_percent_ = 0;
......
......@@ -98,8 +98,8 @@ public:
static const int64_t DEFAULT_CPU_HIGH_WATER_LEVEL = 4800;
static const int64_t DEFAULT_WRITE_FAILURE_DETECT_INTERVAL = 60 * 1000 * 1000; // 1 min
static const int64_t DEFAULT_READ_FAILURE_IN_BLACK_LIST_INTERVAL = 300 * 1000 * 1000; // 5 min
static const int32_t DEFAULT_RETRY_WARN_LIMIT = 2;
static const int32_t DEFAULT_RETRY_ERROR_LIMIT = 5;
static const int32_t DEFAULT_WARNING_TOLERANCE_TIME = 30L * 1000L * 1000L; // 30s
static const int32_t DEFAULT_ERROR_TOLERANCE_TIME = 300L * 1000L * 1000L; // 300s
static const int64_t DEFAULT_DISK_IO_THREAD_COUNT = 8;
static const int64_t DEFAULT_IO_CALLBACK_THREAD_COUNT = 8;
static const int64_t DEFAULT_LARGE_QUERY_IO_PERCENT = 0; // 0 means unlimited
......@@ -113,19 +113,22 @@ public:
bool is_valid() const;
void reset();
TO_STRING_KV(K_(sys_io_low_percent), K_(sys_io_high_percent), K_(user_iort_up_percent), K_(cpu_high_water_level),
K_(write_failure_detect_interval), K_(read_failure_black_list_interval), K_(retry_warn_limit),
K_(retry_error_limit), K_(disk_io_thread_count), K_(callback_thread_count), K_(large_query_io_percent),
K_(data_storage_io_timeout_ms));
K_(write_failure_detect_interval), K_(read_failure_black_list_interval), K_(data_storage_warning_tolerance_time),
K_(data_storage_error_tolerance_time), K_(disk_io_thread_count), K_(callback_thread_count),
K_(large_query_io_percent), K_(data_storage_io_timeout_ms));
public:
// schedule related
int64_t sys_io_low_percent_;
int64_t sys_io_high_percent_;
int64_t user_iort_up_percent_;
int64_t cpu_high_water_level_;
// diagnose related
int64_t write_failure_detect_interval_;
int64_t read_failure_black_list_interval_;
int64_t retry_warn_limit_;
int64_t retry_error_limit_;
int64_t data_storage_warning_tolerance_time_;
int64_t data_storage_error_tolerance_time_;
// resource related
int64_t disk_io_thread_count_;
int64_t callback_thread_count_;
int64_t large_query_io_percent_;
......
......@@ -41,28 +41,23 @@ void ObDiskDiagnose::reset()
MEMSET(write_failure_event_ts_, 0, sizeof(write_failure_event_ts_));
}
void ObDiskDiagnose::record_read_fail(const int64_t retry_cnt)
void ObDiskDiagnose::record_read_fail(const int64_t diagnose_begin_ts)
{
const ObIOConfig io_config = OB_IO_MANAGER.get_io_config();
// in oder to reduce the misjudgement, here is the rules:
// watch the continuous read timeout with the exponential growth of timeout
// 1. for more than 3 times, record as dick warning,
// after that, this server is not allowed to be the paxos leader for a period,
// which is indicated by READ_FAILURE_IN_BLACK_LIST_INTERVAL, usually 300s.
//
// 2. for more than 6 times, record as disk error
// if the disk is confirmed normal, the administrator can reset the disk error by
// alter system set disk valid server [=] 'ip:port'
//
if (retry_cnt < io_config.retry_warn_limit_) {
// do nothing
} else if (retry_cnt < io_config.retry_error_limit_) {
last_read_failure_warn_ts_ = ObTimeUtility::current_time();
} else {
const int64_t current_ts = ObTimeUtility::current_time();
if (current_ts >= diagnose_begin_ts + io_config.data_storage_warning_tolerance_time_) {
// set disk warning and record warn_ts
// until warn_ts + READ_FAILURE_IN_BLACK_LIST_INTERVAL, this server is not allowed to be partition leader
last_read_failure_warn_ts_ = current_ts;
}
if (current_ts >= diagnose_begin_ts + io_config.data_storage_error_tolerance_time_) {
// set disk error and record error_ts
// if the disk is confirmed normal, the administrator can reset disk status by:
// alter system set disk valid server [=] 'ip:port'
if (!is_disk_error_) {
disk_error_begin_ts_ = ObTimeUtility::current_time();
disk_error_begin_ts_ = current_ts;
}
disk_error_last_ts_ = ObTimeUtility::current_time();
disk_error_last_ts_ = current_ts;
is_disk_error_ = true;
COMMON_LOG(ERROR, "set_disk_error: attention!!!");
}
......@@ -119,18 +114,6 @@ int64_t ObDiskDiagnose::get_last_io_failure_ts() const
return MAX(disk_error_last_ts_, last_read_failure_warn_ts_);
}
int64_t ObDiskDiagnose::get_max_retry_cnt() const
{
const ObIOConfig io_config = OB_IO_MANAGER.get_io_config();
return io_config.retry_error_limit_;
}
int64_t ObDiskDiagnose::get_warn_retry_cnt() const
{
const ObIOConfig io_config = OB_IO_MANAGER.get_io_config();
return io_config.retry_warn_limit_;
}
/**
* ---------------------------------------------- ObDisk ---------------------------------------------
*/
......@@ -648,33 +631,41 @@ void ObIOFaultDetector::handle(void* t)
const ObIOInfo& info = task->info_;
ObIOHandle handle;
uint64_t timeout_ms = task->timeout_ms_;
int64_t retry_cnt = 0;
const int64_t MIN_IO_WAIT_TIME_MS = 30000; // 30s
for (retry_cnt = 0; retry_cnt < disk_diagnose.get_max_retry_cnt(); ++retry_cnt) {
// remain 1s to avoid race condition for retry_black_list_interval
const int64_t retry_black_list_interval_ms =
OB_IO_MANAGER.get_io_config().read_failure_black_list_interval_ / 1000L - 1000L;
// rety_io_timeout must less than black_list_interval
const int64_t MIN_IO_RETRY_TIMEOUT_MS = min(10L * 1000L /* 10s */, retry_black_list_interval_ms);
const int64_t MAX_IO_RETRY_TIMEOUT_MS = min(180L * 1000L /* 180s*/, retry_black_list_interval_ms);
const int64_t diagnose_begin_ts = ObTimeUtility::current_time();
bool is_retry_succ = false;
while (OB_SUCC(ret) && !is_retry_succ && !disk_diagnose.is_disk_error()) {
handle.reset();
// timeout grows exponentially
if (retry_cnt >= disk_diagnose.get_warn_retry_cnt() - 1) {
timeout_ms = max(timeout_ms * 2, MIN_IO_WAIT_TIME_MS);
} else {
timeout_ms = timeout_ms * 2;
}
if (retry_cnt == disk_diagnose.get_warn_retry_cnt()) {
disk_diagnose.record_read_fail(retry_cnt);
const ObIOConfig io_conf = OB_IO_MANAGER.get_io_config();
const int64_t current_retry_ts = ObTimeUtility::current_time();
const int64_t warn_ts = diagnose_begin_ts + io_conf.data_storage_warning_tolerance_time_;
const int64_t error_ts = diagnose_begin_ts + io_conf.data_storage_error_tolerance_time_;
const int64_t left_timeout_ms =
!disk_diagnose.is_disk_warning() ? (warn_ts - current_retry_ts) / 1000 : (error_ts - current_retry_ts) / 1000;
// timeout of retry io increase exponentially
timeout_ms = min(left_timeout_ms, min(MAX_IO_RETRY_TIMEOUT_MS, max(timeout_ms * 2, MIN_IO_RETRY_TIMEOUT_MS)));
if (timeout_ms > 0) {
// do retry io
if (disk->get_admin_status() != DISK_USING) {
ret = OB_STATE_NOT_MATCH;
COMMON_LOG(WARN, "check_admin_status failed, disk is deleting", K(ret), "status", disk->get_admin_status());
break;
} else if (OB_FAIL(OB_IO_MANAGER.read(info, handle, timeout_ms))) {
COMMON_LOG(WARN, "ObIOManager::read failed", K(ret), K(info), K(timeout_ms));
ret = OB_SUCCESS;
} else {
is_retry_succ = true;
}
}
if (disk->get_admin_status() != DISK_USING) {
ret = OB_STATE_NOT_MATCH;
COMMON_LOG(WARN, "check_admin_status failed, disk is deleting", K(ret), "status", disk->get_admin_status());
break;
} else if (OB_FAIL(OB_IO_MANAGER.read(info, handle, timeout_ms))) {
COMMON_LOG(WARN, "ObIOManager::read failed", K(ret), K(info), K(timeout_ms));
} else {
break; // stop retry if success
if (OB_SUCC(ret) && !is_retry_succ) {
disk_diagnose.record_read_fail(diagnose_begin_ts);
}
}
disk_diagnose.record_read_fail(retry_cnt);
op_free(task);
task = NULL;
......
......@@ -56,13 +56,11 @@ class ObDiskDiagnose {
public:
ObDiskDiagnose();
virtual ~ObDiskDiagnose();
void record_read_fail(const int64_t retry_cnt);
void record_read_fail(const int64_t diagnose_begin_ts);
void record_write_fail();
bool is_disk_warning() const;
bool is_disk_error() const;
void reset_disk_health();
int64_t get_max_retry_cnt() const;
int64_t get_warn_retry_cnt() const;
int64_t get_disk_error_begin_ts() const
{
return disk_error_begin_ts_;
......
......@@ -111,6 +111,8 @@ int ObServerReloadConfig::operator()()
// In the 2.x version, reuse the sys_bkgd_io_timeout configuration item to indicate the data disk io timeout time
// After version 3.1, use the data_storage_io_timeout configuration item.
io_config.data_storage_io_timeout_ms_ = GCONF._data_storage_io_timeout / 1000L;
io_config.data_storage_warning_tolerance_time_ = GCONF.data_storage_warning_tolerance_time;
io_config.data_storage_error_tolerance_time_ = GCONF.data_storage_error_tolerance_time;
if (OB_FAIL(ObIOManager::get_instance().set_io_config(io_config))) {
real_ret = ret;
LOG_WARN("reload io manager config fail, ", K(ret));
......
......@@ -302,6 +302,17 @@ bool ObConfigPartitionBalanceStrategyFuncChecker::check(const ObConfigItem& t) c
return is_valid;
}
bool ObDataStorageErrorToleranceTimeChecker::check(const ObConfigItem& t) const
{
bool is_valid = false;
int64_t value = ObConfigTimeParser::get(t.str(), is_valid);
if (is_valid) {
const int64_t warning_value = GCONF.data_storage_warning_tolerance_time;
is_valid = value >= warning_value;
}
return is_valid;
}
int64_t ObConfigIntParser::get(const char* str, bool& valid)
{
char* p_end = NULL;
......
......@@ -394,6 +394,18 @@ private:
DISALLOW_COPY_AND_ASSIGN(ObConfigPartitionBalanceStrategyFuncChecker);
};
class ObDataStorageErrorToleranceTimeChecker : public ObConfigChecker {
public:
ObDataStorageErrorToleranceTimeChecker()
{}
virtual ~ObDataStorageErrorToleranceTimeChecker()
{}
bool check(const ObConfigItem& t) const;
private:
DISABLE_COPY_ASSIGN(ObDataStorageErrorToleranceTimeChecker);
};
// config item container
class ObConfigStringKey {
public:
......
......@@ -881,6 +881,15 @@ DEF_TIME(_data_storage_io_timeout, OB_CLUSTER_PARAMETER, "120s", "[5s,600s]",
"io timeout for data storage, Range [5s,600s]. "
"The default value is 120s",
ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
DEF_TIME(data_storage_warning_tolerance_time, OB_CLUSTER_PARAMETER, "30s", "[10s,300s]",
"time to tolerate disk read failure, after that, the disk status will be set warning. Range [10s,300s]. The "
"default value is 30s",
ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
DEF_TIME_WITH_CHECKER(data_storage_error_tolerance_time, OB_CLUSTER_PARAMETER, "300s",
common::ObDataStorageErrorToleranceTimeChecker, "[10s,7200s]",
"time to tolerate disk read failure, after that, the disk status will be set error. Range [10s,7200s]. The default "
"value is 300s",
ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
DEF_INT(data_disk_usage_limit_percentage, OB_CLUSTER_PARAMETER, "90", "[50,100]",
"the safe use percentage of data disk"
"Range: [50,100] in integer",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册