提交 5ea62a63 编写于 作者: H HaHaJeff 提交者: wangzelin.wzl

Modify function name and variable about disk hang

上级 f8b75754
......@@ -3170,12 +3170,12 @@ int ObCLogMgr::get_election_group_priority(const uint64_t tenant_id, election::O
OBSERVER.get_gctx().rs_server_status_ == share::RSS_IS_WORKING;
int tmp_ret = OB_SUCCESS;
bool is_data_disk_error = false;
bool is_clog_disk_error = log_engine_.is_clog_disk_error();
bool is_clog_disk_hang = log_engine_.is_clog_disk_hang();
if (OB_SUCCESS != (tmp_ret = ObIOManager::get_instance().is_disk_error(is_data_disk_error))) {
CLOG_LOG(WARN, "is_data_disk_error failed", K(tmp_ret));
}
if (is_clog_disk_error) {
priority.set_system_clog_disk_error();
if (is_clog_disk_hang) {
priority.set_system_clog_disk_hang();
}
if (is_data_disk_error) {
priority.set_system_data_disk_error();
......
......@@ -37,6 +37,7 @@ namespace clog {
ObCLogWriter::ObCLogWriter()
: is_started_(false),
is_disk_error_(false),
is_disk_hang_(false),
file_mutex_(),
file_writer_(NULL),
type_(INVALID_WRITE_POOL),
......@@ -111,6 +112,7 @@ void ObCLogWriter::destroy()
info_getter_ = NULL;
tail_ = NULL;
is_disk_error_ = false;
is_disk_hang_ = false;
is_started_ = false;
}
......@@ -134,34 +136,31 @@ file_id_t ObCLogWriter::get_file_id() const
return (NULL == file_writer_) ? 0 : file_writer_->get_cur_file_id();
}
bool ObCLogWriter::is_disk_error() const
bool ObCLogWriter::is_disk_hang() const
{
bool b_ret = ATOMIC_LOAD(&is_disk_error_);
if (!b_ret && nullptr != file_writer_) {
b_ret = file_writer_->is_write_hang();
}
return b_ret;
bool is_disk_hang = ATOMIC_LOAD(&is_disk_hang_);
return is_disk_hang;
}
int ObCLogWriter::set_is_disk_error()
int ObCLogWriter::set_is_disk_hang()
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited())) {
ret = OB_NOT_INIT;
} else {
ATOMIC_STORE(&is_disk_error_, true);
ATOMIC_STORE(&is_disk_hang_, true);
CLOG_LOG(WARN, "clog disk may be hang or something error has happen!");
}
return ret;
}
int ObCLogWriter::reset_is_disk_error()
int ObCLogWriter::reset_is_disk_hang()
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited())) {
ret = OB_NOT_INIT;
} else {
ATOMIC_STORE(&is_disk_error_, false);
ATOMIC_STORE(&is_disk_hang_, false);
CLOG_LOG(TRACE, "reset clog disk status to normal");
}
return ret;
......@@ -241,7 +240,7 @@ void ObCLogWriter::process_log_items(common::ObIBaseLogItem** items, const int64
do {
// TODO: flush log will not return OB_TIMEOUT, other IO error will be treated as bug
if (OB_FAIL(file_writer_->flush(info_getter_, log_cache_, tail_, flush_start_offset))) {
set_is_disk_error();
is_disk_error_ = true;
// flush log to disk until die when IO hang, other IO error will be treated as bug
if (OB_TIMEOUT == ret && REACH_TIME_INTERVAL(60 * 1000 * 1000)) {
ret = OB_IO_ERROR;
......@@ -250,7 +249,7 @@ void ObCLogWriter::process_log_items(common::ObIBaseLogItem** items, const int64
CLOG_LOG(ERROR, "Fail to flush clog to disk, ", K(ret));
}
} else {
reset_is_disk_error();
is_disk_error_ = false;
}
} while (!has_stoped() && OB_TIMEOUT == ret);
}
......@@ -386,8 +385,8 @@ int ObCLogDiskErrorCB::callback()
storage::ObPartitionService& partition_service = storage::ObPartitionService::get_instance();
if (OB_ISNULL(host_)) {
ret = OB_ERR_UNEXPECTED;
} else if (OB_FAIL(host_->set_is_disk_error())) {
CLOG_LOG(ERROR, "ObCLogDiskErrorCB set_is_disk_error failed", K(ret));
} else if (OB_FAIL(host_->set_is_disk_hang())) {
CLOG_LOG(ERROR, "ObCLogDiskErrorCB set_is_disk_hang failed", K(ret));
} else if (OB_FAIL(partition_service.try_revoke_all_leader(ObElection::RevokeType::CLOG_DISK_HANG))) {
CLOG_LOG(ERROR, "ObCLogDiskErrorCB try_revoke_all_leader failed", K(ret));
}
......@@ -400,7 +399,7 @@ void ObCLogDiskErrorCB::destroy()
// If disk has real error, may cause is_disk_error be false,
// however, observer will be killed.
if (OB_NOT_NULL(host_)) {
(void)host_->reset_is_disk_error();
(void)host_->reset_is_disk_hang();
}
}
......
......@@ -65,9 +65,13 @@ public:
virtual void destroy();
int switch_file();
file_id_t get_file_id() const;
bool is_disk_error() const;
int set_is_disk_error();
int reset_is_disk_error();
bool is_disk_hang() const;
inline bool is_disk_error() const
{
return true == is_disk_error_;
}
int set_is_disk_hang();
int reset_is_disk_hang();
protected:
virtual void process_log_items(common::ObIBaseLogItem** items, const int64_t item_cnt, int64_t& finish_cnt);
......@@ -99,6 +103,7 @@ private:
int inner_switch_file();
bool is_started_;
bool is_disk_error_;
bool is_disk_hang_;
lib::ObMutex file_mutex_;
ObCLogBaseFileWriter* file_writer_;
ObLogWritePoolType type_;
......
......@@ -198,7 +198,7 @@ public:
virtual int check_is_clog_obsoleted(const common::ObPartitionKey& partition_key, const file_id_t file_id,
const offset_t offset, bool& is_obsoleted) const = 0;
virtual bool is_clog_disk_error() const = 0;
virtual bool is_clog_disk_hang() const = 0;
// ================== interface for ObIlogStorage end ====================
};
......
......@@ -2553,16 +2553,16 @@ int ObLogEngine::get_ilog_using_disk_space(int64_t& space) const
return ret;
}
bool ObLogEngine::is_clog_disk_error() const
bool ObLogEngine::is_clog_disk_hang() const
{
bool is_disk_error = false;
const ObCommitLogEnv* env = get_clog_env_();
bool is_disk_hang = false;
const ObCommitLogEnv *env = get_clog_env_();
if (IS_NOT_INIT) {
is_disk_error = false;
is_disk_hang = false;
} else if (OB_LIKELY(NULL != env)) {
is_disk_error = (env->get_writer()).is_disk_error();
is_disk_hang = (env->get_writer()).is_disk_hang();
}
return is_disk_error;
return is_disk_hang;
}
NetworkLimitManager::NetworkLimitManager() : is_inited_(false), addr_array_(), ethernet_speed_(0), hash_map_()
......
......@@ -531,10 +531,9 @@ public:
int check_is_clog_obsoleted(const common::ObPartitionKey& partition_key, const file_id_t file_id,
const offset_t offset, bool& is_obsoleted) const override;
// ================== interface for ObIlogStorage end ====================
int get_clog_using_disk_space(int64_t& space) const;
int get_ilog_using_disk_space(int64_t& space) const;
bool is_clog_disk_error() const override;
int get_clog_using_disk_space(int64_t &space) const;
int get_ilog_using_disk_space(int64_t &space) const;
bool is_clog_disk_hang() const;
private:
int fetch_log_from_server(
......
......@@ -3908,7 +3908,8 @@ int ObPartitionLogService::on_get_election_priority(election::ObElectionPriority
#endif
bool is_tenant_out_of_mem = is_tenant_out_of_memory_();
bool is_data_disk_error = false;
bool is_clog_disk_error = log_engine_->is_clog_disk_error();
bool is_disk_space_enough = log_engine_->is_disk_space_enough();
bool is_clog_disk_hang = log_engine_->is_clog_disk_hang();
const ObReplicaProperty replica_property = mm_.get_replica_property();
const uint64_t log_id = sw_.get_max_confirmed_log_id();
if (OB_SUCCESS != (tmp_ret = ObIOManager::get_instance().is_disk_error(is_data_disk_error))) {
......@@ -3933,8 +3934,11 @@ int ObPartitionLogService::on_get_election_priority(election::ObElectionPriority
if (OB_SUCCESS != (ret = priority.init(is_candidate, mm_.get_timestamp(), log_id, zone_priority))) {
CLOG_LOG(WARN, "priority init error", K_(partition_key), K(ret));
} else {
if (is_clog_disk_error) {
priority.set_system_clog_disk_error();
if (!is_disk_space_enough) {
priority.set_system_disk_full();
}
if (is_clog_disk_hang) {
priority.set_system_clog_disk_hang();
}
if (is_data_disk_error) {
priority.set_system_data_disk_error();
......@@ -6702,7 +6706,7 @@ int ObPartitionLogService::check_is_normal_partition(bool& is_normal_partition)
bool is_out_of_memory = false;
bool is_disk_not_enough = false;
bool is_disk_error = false;
bool is_clog_disk_error = false;
bool is_clog_disk_hang = false;
bool is_archive_restoring = false;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
......@@ -6712,13 +6716,13 @@ int ObPartitionLogService::check_is_normal_partition(bool& is_normal_partition)
} else if (OB_FAIL(ObIOManager::get_instance().is_disk_error(is_disk_error))) {
CLOG_LOG(ERROR, "is_disk_error failed", K(ret), K(partition_key_));
} else {
is_clog_disk_error = log_engine_->is_clog_disk_error();
is_clog_disk_hang = log_engine_->is_clog_disk_hang();
is_disk_not_enough = !log_engine_->is_disk_space_enough();
// physical restoring replica cannot participate in member change.
// because its election module has not been started, it cannot vote.
is_archive_restoring = restore_mgr_.is_archive_restoring();
is_normal_partition =
!(is_disk_not_enough || is_out_of_memory || is_disk_error || is_clog_disk_error || is_archive_restoring);
!(is_disk_not_enough || is_out_of_memory || is_disk_error || is_clog_disk_hang || is_archive_restoring);
}
return ret;
}
......
......@@ -64,9 +64,9 @@ int ObElectionGroupPriority::compare(const ObElectionGroupPriority& priority) co
return ret;
}
void ObElectionGroupPriority::set_system_clog_disk_error()
void ObElectionGroupPriority::set_system_clog_disk_hang()
{
system_score_ += SYSTEM_SCORE_CLOG_DISK_ERROR * 100;
system_score_ += SYSTEM_SCORE_CLOG_DISK_HANG * 100;
}
void ObElectionGroupPriority::set_system_data_disk_error()
......
......@@ -48,7 +48,7 @@ public:
{
return system_score_;
}
void set_system_clog_disk_error();
void set_system_clog_disk_hang();
void set_system_data_disk_error();
void set_system_service_not_started();
......@@ -59,7 +59,7 @@ public:
DECLARE_TO_STRING_AND_YSON;
private:
const static int64_t SYSTEM_SCORE_CLOG_DISK_ERROR = (1 << 6);
const static int64_t SYSTEM_SCORE_CLOG_DISK_HANG = (1 << 6);
const static int64_t SYSTEM_SCORE_DATA_DISK_ERROR = (1 << 4);
const static int64_t SYSTEM_SCORE_SERVICE_NOT_STARTED = (1 << 1);
......
......@@ -197,9 +197,14 @@ bool ObElectionPriority::is_in_election_blacklist() const
return (system_score_ / 100) & SYSTEM_SCORE_IN_ELECTION_BLACKLIST;
}
void ObElectionPriority::set_system_clog_disk_error()
void ObElectionPriority::set_system_disk_full()
{
system_score_ += SYSTEM_SCORE_CLOG_DISK_ERROR * 100;
system_score_ += SYSTEM_SCORE_DISK_FULL * 100;
}
void ObElectionPriority::set_system_clog_disk_hang()
{
system_score_ += SYSTEM_SCORE_CLOG_DISK_HANG * 100;
}
void ObElectionPriority::set_system_tenant_out_of_memory()
......
......@@ -76,7 +76,8 @@ public:
}
int64_t get_system_score_without_election_blacklist() const;
bool is_in_election_blacklist() const;
void set_system_clog_disk_error();
void set_system_disk_full();
void set_system_clog_disk_hang();
void set_system_tenant_out_of_memory();
void set_system_data_disk_error();
void set_system_need_rebuild();
......@@ -96,8 +97,10 @@ private:
int compare_(const ObElectionPriority& priority, const bool with_locality, const bool with_log_id) const;
private:
const static int64_t SYSTEM_SCORE_CLOG_DISK_ERROR = (1 << 6);
const static int64_t SYSTEM_SCORE_TENANT_OUT_OF_MEM = (1 << 5);
const static int64_t SYSTEM_SCORE_DISK_FULL = (1 << 8);
const static int64_t SYSTEM_SCORE_NON_FULL_REPLICA = (1 << 7);
const static int64_t SYSTEM_SCORE_CLOG_DISK_HANG = (1 << 6);
const static int64_t SYSTEM_SCORE_TENANT_OUT_OF_MEM = (1 << 5); // tenant memstore is full
const static int64_t SYSTEM_SCORE_DATA_DISK_ERROR = (1 << 4);
const static int64_t SYSTEM_SCORE_NEED_REBUILD = (1 << 3);
const static int64_t SYSTEM_SCORE_IN_ELECTION_BLACKLIST = (1 << 2);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册