提交 4b49ed92 编写于 作者: O obdev 提交者: ob-robot

use NetKeepAlive for px check server alive

上级 2f701118
......@@ -30,8 +30,8 @@
#include "sql/dtl/ob_dtl_interm_result_manager.h"
#include "sql/dtl/ob_dtl_channel_loop.h"
#include "sql/dtl/ob_dtl_channel_watcher.h"
#include "share/ob_server_blacklist.h"
#include "observer/omt/ob_th_worker.h"
#include "sql/engine/px/ob_px_util.h"
#include "sql/session/ob_sql_session_info.h"
using namespace oceanbase::common;
......@@ -850,9 +850,8 @@ int ObDtlBasicChannel::wait_unblocking()
LOG_WARN("worker interrupt", K(tmp_ret), K(ret));
break;
}
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
share::ObCascadMember(peer_, GCONF.cluster_id), true,
channel_loop_->get_process_query_time()))) {
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(peer_,
channel_loop_->get_process_query_time()))) {
ret = OB_RPC_CONNECT_ERROR;
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(peer_),
K(static_cast<int64_t>(GCONF.cluster_id)));
......
......@@ -15,9 +15,9 @@
#include "share/interrupt/ob_global_interrupt_call.h"
#include "observer/omt/ob_tenant_config_mgr.h"
#include "share/diagnosis/ob_sql_monitor_statname.h"
#include "share/ob_server_blacklist.h"
#include "observer/omt/ob_th_worker.h"
#include "share/ob_occam_time_guard.h"
#include "sql/engine/px/ob_px_util.h"
using namespace oceanbase::common;
......@@ -359,9 +359,7 @@ int ObDtlChannelLoop::process_channels(ObIDltChannelLoopPred *pred, int64_t &nth
LOG_WARN("unexpect next idx", K(next_idx_), K(chan_cnt), K(ret));
} else {
chan = chans_[next_idx_];
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
share::ObCascadMember(chan->get_peer(), GCONF.cluster_id), true,
get_process_query_time()))) {
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(chan->get_peer(), get_process_query_time()))) {
ret = OB_RPC_CONNECT_ERROR;
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(chan->get_peer()),
K(static_cast<int64_t>(GCONF.cluster_id)));
......@@ -415,8 +413,7 @@ int ObDtlChannelLoop::process_channel(int64_t &nth_channel)
}
ObDtlChannel *ch = sentinel_node_.next_link_;
while (OB_EAGAIN == ret && ch != &sentinel_node_) {
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
share::ObCascadMember(ch->get_peer(), GCONF.cluster_id), true,
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(ch->get_peer(),
get_process_query_time()))) {
ret = OB_RPC_CONNECT_ERROR;
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(ch->get_peer()),
......
......@@ -21,7 +21,6 @@
#include "sql/engine/px/ob_px_dtl_msg.h"
#include "sql/engine/px/ob_px_rpc_processor.h"
#include "sql/engine/px/ob_px_sqc_async_proxy.h"
#include "share/ob_server_blacklist.h"
using namespace oceanbase::common;
using namespace oceanbase::share;
......@@ -489,9 +488,7 @@ int ObSerialDfoScheduler::dispatch_sqcs(ObExecContext &exec_ctx,
ObPxSqcMeta &sqc = *sqcs.at(idx);
const ObAddr &addr = sqc.get_exec_addr();
auto proxy = coord_info_.rpc_proxy_.to(addr);
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
share::ObCascadMember(addr, cluster_id), true /* add_server */,
session->get_process_query_time()))) {
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(addr, session->get_process_query_time()))) {
if (!ignore_vtable_error) {
ret = OB_RPC_CONNECT_ERROR;
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(sqc), K(cluster_id),
......
......@@ -14,7 +14,6 @@
#include "ob_px_data_ch_provider.h"
#include "sql/engine/px/ob_px_util.h"
#include "share/ob_server_blacklist.h"
using namespace oceanbase::common;
......@@ -426,9 +425,7 @@ int ObPxChProviderUtil::check_status(int64_t timeout_ts, const ObAddr &qc_addr,
} else if (timeout_ts <= ObTimeUtility::current_time()) {
ret = OB_TIMEOUT;
LOG_WARN("timeout and abort", K(timeout_ts), K(ret));
} else if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
share::ObCascadMember(qc_addr, GCONF.cluster_id), true,
query_start_time))) {
} else if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(qc_addr, query_start_time))) {
ret = OB_RPC_CONNECT_ERROR;
LOG_WARN("peer no in communication, maybe crashed", K(ret), K(qc_addr),
K(static_cast<int64_t>(GCONF.cluster_id)));
......
......@@ -13,7 +13,7 @@
#define USING_LOG_PREFIX SQL_ENG
#include "sql/engine/px/ob_px_sqc_async_proxy.h"
#include "share/ob_server_blacklist.h"
#include "sql/engine/px/ob_px_util.h"
namespace oceanbase {
using namespace common;
......@@ -68,10 +68,8 @@ int ObPxSqcAsyncProxy::launch_all_rpc_request() {
args.enable_serialize_cache();
}
ARRAY_FOREACH_X(sqcs_, idx, count, OB_SUCC(ret)) {
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(
share::ObCascadMember(sqcs_.at(idx)->get_exec_addr(), cluster_id),
true /* add_server */,
session_->get_process_query_time()))) {
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(sqcs_.at(idx)->get_exec_addr(),
session_->get_process_query_time()))) {
ret = OB_RPC_CONNECT_ERROR;
LOG_WARN("peer no in communication, maybe crashed", K(ret),
KPC(sqcs_.at(idx)), K(cluster_id), K(session_->get_process_query_time()));
......
......@@ -28,9 +28,9 @@
#include "share/schema/ob_part_mgr_util.h"
#include "sql/engine/dml/ob_table_insert_op.h"
#include "sql/session/ob_sql_session_info.h"
#include "share/ob_server_blacklist.h"
#include "common/ob_smart_call.h"
#include "storage/ob_locality_manager.h"
#include "rpc/obrpc/ob_net_keepalive.h"
using namespace oceanbase::common;
using namespace oceanbase::sql;
......@@ -3480,14 +3480,12 @@ int ObExtraServerAliveCheck::do_check() const
if (OB_FAIL(dfo_mgr_->get_running_dfos(dfos))) {
LOG_WARN("fail find dfo", K(ret));
} else {
share::ObServerBlacklist &server_black_list = share::ObServerBlacklist::get_instance();
// need check all sqc because we set sqc need_report = false here and don't need wait sqc finish msg.
for (int64_t i = 0; i < dfos.count(); i++) {
ObIArray<ObPxSqcMeta> &sqcs = dfos.at(i)->get_sqcs();
for (int64_t j = 0; j < sqcs.count(); j++) {
if (sqcs.at(j).need_report()) {
if (OB_UNLIKELY(server_black_list.is_in_blacklist(
share::ObCascadMember(sqcs.at(j).get_exec_addr(), cluster_id_), true,
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(sqcs.at(j).get_exec_addr(),
query_start_time_))) {
sqcs.at(j).set_need_report(false);
sqcs.at(j).set_thread_finish(true);
......@@ -3503,8 +3501,7 @@ int ObExtraServerAliveCheck::do_check() const
}
}
} else if (OB_LIKELY(qc_addr_.is_valid())) {
if (OB_UNLIKELY(share::ObServerBlacklist::get_instance().is_in_blacklist(share::ObCascadMember(
qc_addr_, cluster_id_), true, query_start_time_))) {
if (OB_UNLIKELY(ObPxCheckAlive::is_in_blacklist(qc_addr_, query_start_time_))) {
ret = OB_RPC_CONNECT_ERROR;
LOG_WARN("qc not in communication, maybe crashed", K(ret), K(qc_addr_));
}
......@@ -3535,3 +3532,19 @@ bool ObVirtualTableErrorWhitelist::should_ignore_vtable_error(int error_code)
}
return should_ignore;
}
bool ObPxCheckAlive::is_in_blacklist(const common::ObAddr &addr, int64_t server_start_time)
{
int ret = OB_SUCCESS;
bool in_blacklist = false;
obrpc::ObNetKeepAliveData alive_data;
if (OB_FAIL(ObNetKeepAlive::get_instance().in_black(addr, in_blacklist, &alive_data))) {
LOG_WARN("check in black failed", K(ret));
} else if (!in_blacklist && server_start_time > 0) {
in_blacklist = alive_data.start_service_time_ >= server_start_time;
}
if (in_blacklist) {
LOG_WARN("server in blacklist", K(addr), K(server_start_time), K(alive_data.start_service_time_));
}
return in_blacklist;
}
\ No newline at end of file
......@@ -548,6 +548,12 @@ public:
static bool should_ignore_vtable_error(int error_code);
};
class ObPxCheckAlive
{
public:
static bool is_in_blacklist(const common::ObAddr &addr, int64_t server_start_time);
};
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册