diff --git a/src/share/ob_errno.cpp b/src/share/ob_errno.cpp index 27118bfa3e9c55321ce3165df7d4099080047579..0819e9a964d3783a326804dd7010b079b0fb97ad 100644 --- a/src/share/ob_errno.cpp +++ b/src/share/ob_errno.cpp @@ -6357,6 +6357,18 @@ static const _error _error_OB_DELETE_SERVER_NOT_ALLOWED = { .oracle_str_error = "ORA-00600: internal error code, arguments: -4745, delete server not allowed", .oracle_str_user_error = "ORA-00600: internal error code, arguments: -4745, %s" }; +static const _error _error_OB_PACKET_STATUS_UNKNOWN = { + .error_name = "OB_PACKET_STATUS_UNKNOWN", + .error_cause = "Internal Error", + .error_solution = "Contact OceanBase Support", + .mysql_errno = -1, + .sqlstate = "HY000", + .str_error = "Network error and packet status unknown. Abort auto retry.", + .str_user_error = "Network error and packet status unknown. Abort auto retry.", + .oracle_errno = 600, + .oracle_str_error = "ORA-00600: internal error code, arguments: -4746, Network error and packet status unknown. Abort auto retry.", + .oracle_str_user_error = "ORA-00600: internal error code, arguments: -4746, Network error and packet status unknown. Abort auto retry." +}; static const _error _error_OB_ERR_PARSER_INIT = { .error_name = "OB_ERR_PARSER_INIT", .error_cause = "Internal Error", @@ -22480,6 +22492,7 @@ struct ObStrErrorInit _errors[-OB_FREEZE_SERVICE_EPOCH_MISMATCH] = &_error_OB_FREEZE_SERVICE_EPOCH_MISMATCH; _errors[-OB_FROZEN_INFO_ALREADY_EXIST] = &_error_OB_FROZEN_INFO_ALREADY_EXIST; _errors[-OB_DELETE_SERVER_NOT_ALLOWED] = &_error_OB_DELETE_SERVER_NOT_ALLOWED; + _errors[-OB_PACKET_STATUS_UNKNOWN] = &_error_OB_PACKET_STATUS_UNKNOWN; _errors[-OB_ERR_PARSER_INIT] = &_error_OB_ERR_PARSER_INIT; _errors[-OB_ERR_PARSE_SQL] = &_error_OB_ERR_PARSE_SQL; _errors[-OB_ERR_RESOLVE_SQL] = &_error_OB_ERR_RESOLVE_SQL; diff --git a/src/share/ob_errno.def b/src/share/ob_errno.def index 339abcc977ef701e49e72875db7bd0fd40d6e22c..d178f08dca3dddfdab9dc05934cd91d4e6e7d516 100644 --- a/src/share/ob_errno.def +++ b/src/share/ob_errno.def @@ -623,6 +623,7 @@ DEFINE_ERROR(OB_WAIT_TABLET_READY_TIMEOUT, -4742, -1, "HY000", "log stream wait DEFINE_ERROR(OB_FREEZE_SERVICE_EPOCH_MISMATCH, -4743, -1, "HY000", "freeze service epoch is not expected"); DEFINE_ERROR_EXT_DEP(OB_FROZEN_INFO_ALREADY_EXIST, -4744, -1, "HY000", "already exist larger frozen_scn in __all_freeze_info", "%s"); DEFINE_ERROR_EXT(OB_DELETE_SERVER_NOT_ALLOWED, -4745, -1, "HY000", "delete server not allowed", "%s"); +DEFINE_ERROR(OB_PACKET_STATUS_UNKNOWN, -4746, -1, "HY000", "Network error and packet status unknown. Abort auto retry."); //////////////////////////////////////////////////////////////// // SQL & Schema specific error code, -5000 ~ -6000 diff --git a/src/share/ob_errno.h b/src/share/ob_errno.h index dc7e85a29f49230989538f2033fbdb95a3121b28..6ee34ee96ee39c257755cd37708cd14b8143be07 100644 --- a/src/share/ob_errno.h +++ b/src/share/ob_errno.h @@ -433,6 +433,7 @@ constexpr int OB_WAIT_REPLAY_TIMEOUT = -4741; constexpr int OB_WAIT_TABLET_READY_TIMEOUT = -4742; constexpr int OB_FREEZE_SERVICE_EPOCH_MISMATCH = -4743; constexpr int OB_DELETE_SERVER_NOT_ALLOWED = -4745; +constexpr int OB_PACKET_STATUS_UNKNOWN = -4746; constexpr int OB_ERR_PARSER_INIT = -5000; constexpr int OB_ERR_PARSE_SQL = -5001; constexpr int OB_ERR_RESOLVE_SQL = -5002; @@ -2126,6 +2127,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; #define OB_FREEZE_SERVICE_EPOCH_MISMATCH__USER_ERROR_MSG "freeze service epoch is not expected" #define OB_FROZEN_INFO_ALREADY_EXIST__USER_ERROR_MSG "%s" #define OB_DELETE_SERVER_NOT_ALLOWED__USER_ERROR_MSG "%s" +#define OB_PACKET_STATUS_UNKNOWN__USER_ERROR_MSG "Network error and packet status unknown. Abort auto retry." #define OB_ERR_PARSER_INIT__USER_ERROR_MSG "Failed to init SQL parser" #define OB_ERR_PARSE_SQL__USER_ERROR_MSG "%s near \'%.*s\' at line %d" #define OB_ERR_RESOLVE_SQL__USER_ERROR_MSG "Resolve error" @@ -3956,6 +3958,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; #define OB_FREEZE_SERVICE_EPOCH_MISMATCH__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4743, freeze service epoch is not expected" #define OB_FROZEN_INFO_ALREADY_EXIST__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4744, %s" #define OB_DELETE_SERVER_NOT_ALLOWED__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4745, %s" +#define OB_PACKET_STATUS_UNKNOWN__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4746, Network error and packet status unknown. Abort auto retry." #define OB_ERR_PARSER_INIT__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -5000, Failed to init SQL parser" #define OB_ERR_PARSE_SQL__ORA_USER_ERROR_MSG "ORA-00900: %s near \'%.*s\' at line %d" #define OB_ERR_RESOLVE_SQL__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -5002, Resolve error" diff --git a/src/sql/engine/px/ob_px_rpc_processor.cpp b/src/sql/engine/px/ob_px_rpc_processor.cpp index 9fef9218f4bbebde52ed45234702c798e071c6f8..0c751bb2cf7e99e3b696a0036f90eae55123f7fa 100644 --- a/src/sql/engine/px/ob_px_rpc_processor.cpp +++ b/src/sql/engine/px/ob_px_rpc_processor.cpp @@ -476,7 +476,10 @@ int ObFastInitSqcCB::deal_with_rpc_timeout_err_safely() { int ret = OB_SUCCESS; - ObDealWithRpcTimeoutCall call(addr_, retry_info_, timeout_ts_, trace_id_); + // only if it's sure init_sqc msg is not sent to sqc successfully, we can retry the query. + bool init_sqc_not_send_out = (get_error() == EASY_TIMEOUT_NOT_SENT_OUT + || get_error() == EASY_DISCONNECT_NOT_SENT_OUT); + ObDealWithRpcTimeoutCall call(addr_, retry_info_, timeout_ts_, trace_id_, init_sqc_not_send_out); call.ret_ = OB_TIMEOUT; ObGlobalInterruptManager *manager = ObGlobalInterruptManager::getInstance(); if (OB_NOT_NULL(manager)) { @@ -529,7 +532,12 @@ void ObDealWithRpcTimeoutCall::deal_with_rpc_timeout_err() LOG_WARN("fail to add invalid server distinctly", K_(trace_id), K(a_ret), K_(addr)); } } - ret_ = OB_RPC_CONNECT_ERROR; + if (can_retry_) { + // return OB_RPC_CONNECT_ERROR to retry. + ret_ = OB_RPC_CONNECT_ERROR; + } else { + ret_ = OB_PACKET_STATUS_UNKNOWN; + } } else { LOG_DEBUG("rpc return OB_TIMEOUT, and it is actually timeout, " "do not change error code", K(ret_), diff --git a/src/sql/engine/px/ob_px_rpc_processor.h b/src/sql/engine/px/ob_px_rpc_processor.h index aedde6ce8adf45ea6a30f815bec44a49a7cf3a86..25d1df871326c6d041d166d74fa3d013afc15a83 100644 --- a/src/sql/engine/px/ob_px_rpc_processor.h +++ b/src/sql/engine/px/ob_px_rpc_processor.h @@ -120,8 +120,9 @@ public: ObDealWithRpcTimeoutCall(common::ObAddr addr, ObQueryRetryInfo *retry_info, int64_t timeout_ts, - common::ObCurTraceId::TraceId &trace_id) : addr_(addr), retry_info_(retry_info), - timeout_ts_(timeout_ts), trace_id_(trace_id), ret_(common::OB_TIMEOUT) {} + common::ObCurTraceId::TraceId &trace_id, + bool retry) : addr_(addr), retry_info_(retry_info), + timeout_ts_(timeout_ts), trace_id_(trace_id), ret_(common::OB_TIMEOUT), can_retry_(retry) {} ~ObDealWithRpcTimeoutCall() = default; void operator() (hash::HashMapPair &entry); @@ -132,6 +133,7 @@ public: int64_t timeout_ts_; common::ObCurTraceId::TraceId trace_id_; int ret_; + bool can_retry_; }; class ObFastInitSqcCB diff --git a/src/sql/engine/px/ob_px_sqc_async_proxy.cpp b/src/sql/engine/px/ob_px_sqc_async_proxy.cpp index f9fd5668710b08807f563f743f84370549a4b5b1..64b5a2dcb3b8d05296a561718ab4d322d188793e 100644 --- a/src/sql/engine/px/ob_px_sqc_async_proxy.cpp +++ b/src/sql/engine/px/ob_px_sqc_async_proxy.cpp @@ -185,7 +185,14 @@ int ObPxSqcAsyncProxy::wait_all() { if (phy_plan_ctx_->get_timeout_timestamp() - ObTimeUtility::current_time() > 0) { error_index_ = idx; - ret = OB_RPC_CONNECT_ERROR; + bool init_sqc_not_send_out = (callback.get_error() == EASY_TIMEOUT_NOT_SENT_OUT + || callback.get_error() == EASY_DISCONNECT_NOT_SENT_OUT); + if (init_sqc_not_send_out) { + // only if it's sure init_sqc msg is not sent to sqc successfully, return OB_RPC_CONNECT_ERROR to retry. + ret = OB_RPC_CONNECT_ERROR; + } else { + ret = OB_PACKET_STATUS_UNKNOWN; + } } else { ret = OB_TIMEOUT; }