From cf98047e853e54cba2dcd16a505ab67511b1ce4d Mon Sep 17 00:00:00 2001 From: obdev Date: Fri, 28 Oct 2022 02:35:52 +0000 Subject: [PATCH] not retry if init_sqc message may be sent out successfully --- src/share/ob_errno.cpp | 13 +++++++++++++ src/share/ob_errno.def | 1 + src/share/ob_errno.h | 3 +++ src/sql/engine/px/ob_px_rpc_processor.cpp | 12 ++++++++++-- src/sql/engine/px/ob_px_rpc_processor.h | 6 ++++-- src/sql/engine/px/ob_px_sqc_async_proxy.cpp | 9 ++++++++- 6 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/share/ob_errno.cpp b/src/share/ob_errno.cpp index 27118bfa3..0819e9a96 100644 --- a/src/share/ob_errno.cpp +++ b/src/share/ob_errno.cpp @@ -6357,6 +6357,18 @@ static const _error _error_OB_DELETE_SERVER_NOT_ALLOWED = { .oracle_str_error = "ORA-00600: internal error code, arguments: -4745, delete server not allowed", .oracle_str_user_error = "ORA-00600: internal error code, arguments: -4745, %s" }; +static const _error _error_OB_PACKET_STATUS_UNKNOWN = { + .error_name = "OB_PACKET_STATUS_UNKNOWN", + .error_cause = "Internal Error", + .error_solution = "Contact OceanBase Support", + .mysql_errno = -1, + .sqlstate = "HY000", + .str_error = "Network error and packet status unknown. Abort auto retry.", + .str_user_error = "Network error and packet status unknown. Abort auto retry.", + .oracle_errno = 600, + .oracle_str_error = "ORA-00600: internal error code, arguments: -4746, Network error and packet status unknown. Abort auto retry.", + .oracle_str_user_error = "ORA-00600: internal error code, arguments: -4746, Network error and packet status unknown. Abort auto retry." +}; static const _error _error_OB_ERR_PARSER_INIT = { .error_name = "OB_ERR_PARSER_INIT", .error_cause = "Internal Error", @@ -22480,6 +22492,7 @@ struct ObStrErrorInit _errors[-OB_FREEZE_SERVICE_EPOCH_MISMATCH] = &_error_OB_FREEZE_SERVICE_EPOCH_MISMATCH; _errors[-OB_FROZEN_INFO_ALREADY_EXIST] = &_error_OB_FROZEN_INFO_ALREADY_EXIST; _errors[-OB_DELETE_SERVER_NOT_ALLOWED] = &_error_OB_DELETE_SERVER_NOT_ALLOWED; + _errors[-OB_PACKET_STATUS_UNKNOWN] = &_error_OB_PACKET_STATUS_UNKNOWN; _errors[-OB_ERR_PARSER_INIT] = &_error_OB_ERR_PARSER_INIT; _errors[-OB_ERR_PARSE_SQL] = &_error_OB_ERR_PARSE_SQL; _errors[-OB_ERR_RESOLVE_SQL] = &_error_OB_ERR_RESOLVE_SQL; diff --git a/src/share/ob_errno.def b/src/share/ob_errno.def index 339abcc97..d178f08dc 100644 --- a/src/share/ob_errno.def +++ b/src/share/ob_errno.def @@ -623,6 +623,7 @@ DEFINE_ERROR(OB_WAIT_TABLET_READY_TIMEOUT, -4742, -1, "HY000", "log stream wait DEFINE_ERROR(OB_FREEZE_SERVICE_EPOCH_MISMATCH, -4743, -1, "HY000", "freeze service epoch is not expected"); DEFINE_ERROR_EXT_DEP(OB_FROZEN_INFO_ALREADY_EXIST, -4744, -1, "HY000", "already exist larger frozen_scn in __all_freeze_info", "%s"); DEFINE_ERROR_EXT(OB_DELETE_SERVER_NOT_ALLOWED, -4745, -1, "HY000", "delete server not allowed", "%s"); +DEFINE_ERROR(OB_PACKET_STATUS_UNKNOWN, -4746, -1, "HY000", "Network error and packet status unknown. Abort auto retry."); //////////////////////////////////////////////////////////////// // SQL & Schema specific error code, -5000 ~ -6000 diff --git a/src/share/ob_errno.h b/src/share/ob_errno.h index dc7e85a29..6ee34ee96 100644 --- a/src/share/ob_errno.h +++ b/src/share/ob_errno.h @@ -433,6 +433,7 @@ constexpr int OB_WAIT_REPLAY_TIMEOUT = -4741; constexpr int OB_WAIT_TABLET_READY_TIMEOUT = -4742; constexpr int OB_FREEZE_SERVICE_EPOCH_MISMATCH = -4743; constexpr int OB_DELETE_SERVER_NOT_ALLOWED = -4745; +constexpr int OB_PACKET_STATUS_UNKNOWN = -4746; constexpr int OB_ERR_PARSER_INIT = -5000; constexpr int OB_ERR_PARSE_SQL = -5001; constexpr int OB_ERR_RESOLVE_SQL = -5002; @@ -2126,6 +2127,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; #define OB_FREEZE_SERVICE_EPOCH_MISMATCH__USER_ERROR_MSG "freeze service epoch is not expected" #define OB_FROZEN_INFO_ALREADY_EXIST__USER_ERROR_MSG "%s" #define OB_DELETE_SERVER_NOT_ALLOWED__USER_ERROR_MSG "%s" +#define OB_PACKET_STATUS_UNKNOWN__USER_ERROR_MSG "Network error and packet status unknown. Abort auto retry." #define OB_ERR_PARSER_INIT__USER_ERROR_MSG "Failed to init SQL parser" #define OB_ERR_PARSE_SQL__USER_ERROR_MSG "%s near \'%.*s\' at line %d" #define OB_ERR_RESOLVE_SQL__USER_ERROR_MSG "Resolve error" @@ -3956,6 +3958,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; #define OB_FREEZE_SERVICE_EPOCH_MISMATCH__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4743, freeze service epoch is not expected" #define OB_FROZEN_INFO_ALREADY_EXIST__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4744, %s" #define OB_DELETE_SERVER_NOT_ALLOWED__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4745, %s" +#define OB_PACKET_STATUS_UNKNOWN__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4746, Network error and packet status unknown. Abort auto retry." #define OB_ERR_PARSER_INIT__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -5000, Failed to init SQL parser" #define OB_ERR_PARSE_SQL__ORA_USER_ERROR_MSG "ORA-00900: %s near \'%.*s\' at line %d" #define OB_ERR_RESOLVE_SQL__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -5002, Resolve error" diff --git a/src/sql/engine/px/ob_px_rpc_processor.cpp b/src/sql/engine/px/ob_px_rpc_processor.cpp index 9fef9218f..0c751bb2c 100644 --- a/src/sql/engine/px/ob_px_rpc_processor.cpp +++ b/src/sql/engine/px/ob_px_rpc_processor.cpp @@ -476,7 +476,10 @@ int ObFastInitSqcCB::deal_with_rpc_timeout_err_safely() { int ret = OB_SUCCESS; - ObDealWithRpcTimeoutCall call(addr_, retry_info_, timeout_ts_, trace_id_); + // only if it's sure init_sqc msg is not sent to sqc successfully, we can retry the query. + bool init_sqc_not_send_out = (get_error() == EASY_TIMEOUT_NOT_SENT_OUT + || get_error() == EASY_DISCONNECT_NOT_SENT_OUT); + ObDealWithRpcTimeoutCall call(addr_, retry_info_, timeout_ts_, trace_id_, init_sqc_not_send_out); call.ret_ = OB_TIMEOUT; ObGlobalInterruptManager *manager = ObGlobalInterruptManager::getInstance(); if (OB_NOT_NULL(manager)) { @@ -529,7 +532,12 @@ void ObDealWithRpcTimeoutCall::deal_with_rpc_timeout_err() LOG_WARN("fail to add invalid server distinctly", K_(trace_id), K(a_ret), K_(addr)); } } - ret_ = OB_RPC_CONNECT_ERROR; + if (can_retry_) { + // return OB_RPC_CONNECT_ERROR to retry. + ret_ = OB_RPC_CONNECT_ERROR; + } else { + ret_ = OB_PACKET_STATUS_UNKNOWN; + } } else { LOG_DEBUG("rpc return OB_TIMEOUT, and it is actually timeout, " "do not change error code", K(ret_), diff --git a/src/sql/engine/px/ob_px_rpc_processor.h b/src/sql/engine/px/ob_px_rpc_processor.h index aedde6ce8..25d1df871 100644 --- a/src/sql/engine/px/ob_px_rpc_processor.h +++ b/src/sql/engine/px/ob_px_rpc_processor.h @@ -120,8 +120,9 @@ public: ObDealWithRpcTimeoutCall(common::ObAddr addr, ObQueryRetryInfo *retry_info, int64_t timeout_ts, - common::ObCurTraceId::TraceId &trace_id) : addr_(addr), retry_info_(retry_info), - timeout_ts_(timeout_ts), trace_id_(trace_id), ret_(common::OB_TIMEOUT) {} + common::ObCurTraceId::TraceId &trace_id, + bool retry) : addr_(addr), retry_info_(retry_info), + timeout_ts_(timeout_ts), trace_id_(trace_id), ret_(common::OB_TIMEOUT), can_retry_(retry) {} ~ObDealWithRpcTimeoutCall() = default; void operator() (hash::HashMapPair &entry); @@ -132,6 +133,7 @@ public: int64_t timeout_ts_; common::ObCurTraceId::TraceId trace_id_; int ret_; + bool can_retry_; }; class ObFastInitSqcCB diff --git a/src/sql/engine/px/ob_px_sqc_async_proxy.cpp b/src/sql/engine/px/ob_px_sqc_async_proxy.cpp index f9fd56687..64b5a2dcb 100644 --- a/src/sql/engine/px/ob_px_sqc_async_proxy.cpp +++ b/src/sql/engine/px/ob_px_sqc_async_proxy.cpp @@ -185,7 +185,14 @@ int ObPxSqcAsyncProxy::wait_all() { if (phy_plan_ctx_->get_timeout_timestamp() - ObTimeUtility::current_time() > 0) { error_index_ = idx; - ret = OB_RPC_CONNECT_ERROR; + bool init_sqc_not_send_out = (callback.get_error() == EASY_TIMEOUT_NOT_SENT_OUT + || callback.get_error() == EASY_DISCONNECT_NOT_SENT_OUT); + if (init_sqc_not_send_out) { + // only if it's sure init_sqc msg is not sent to sqc successfully, return OB_RPC_CONNECT_ERROR to retry. + ret = OB_RPC_CONNECT_ERROR; + } else { + ret = OB_PACKET_STATUS_UNKNOWN; + } } else { ret = OB_TIMEOUT; } -- GitLab