From b5be94cf157a95e2d4a4b1f29b629a4dad53c552 Mon Sep 17 00:00:00 2001 From: obdev Date: Wed, 26 Oct 2022 10:18:39 +0800 Subject: [PATCH] fix ddl retry error code --- src/rootserver/ddl_task/ob_constraint_task.cpp | 4 ++-- src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp | 2 +- src/rootserver/ddl_task/ob_ddl_retry_task.h | 4 ++++ src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp | 2 +- src/rootserver/ddl_task/ob_ddl_task.cpp | 3 +-- src/rootserver/ddl_task/ob_ddl_task.h | 6 ++++++ src/rootserver/ddl_task/ob_drop_index_task.h | 6 ++++++ src/rootserver/ddl_task/ob_index_build_task.cpp | 6 +++--- src/rootserver/ddl_task/ob_modify_autoinc_task.cpp | 2 +- src/rootserver/ddl_task/ob_table_redefinition_task.cpp | 2 +- src/share/ob_ddl_task_executor.h | 6 +++++- 11 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/rootserver/ddl_task/ob_constraint_task.cpp b/src/rootserver/ddl_task/ob_constraint_task.cpp index b74d61e254..237d3ebaff 100644 --- a/src/rootserver/ddl_task/ob_constraint_task.cpp +++ b/src/rootserver/ddl_task/ob_constraint_task.cpp @@ -777,7 +777,7 @@ int ObConstraintTask::check_replica_end(bool &is_end) ret_code_ = check_job_ret_code_; is_end = true; LOG_WARN("complete sstable job failed", K(ret_code_), K(object_id_), K(target_object_id_)); - if (ObIDDLTask::error_need_retry(ret_code_) || OB_REPLICA_NOT_READABLE == ret_code_ || OB_ERR_INSUFFICIENT_PX_WORKER == ret_code_) { + if (ObIDDLTask::in_ddl_retry_white_list(ret_code_) || OB_REPLICA_NOT_READABLE == ret_code_ || OB_ERR_INSUFFICIENT_PX_WORKER == ret_code_) { check_replica_request_time_ = 0; check_job_ret_code_ = INT64_MAX; ret_code_ = OB_SUCCESS; @@ -1745,7 +1745,7 @@ int ObConstraintTask::check_health() ret = OB_TABLE_NOT_EXIST; LOG_WARN("data table not exist", K(ret), K(is_source_table_exist)); } - if (OB_FAIL(ret) && !ObIDDLTask::error_need_retry(ret)) { + if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) { const ObDDLTaskStatus old_status = static_cast(task_status_); const ObDDLTaskStatus new_status = ObDDLTaskStatus::FAIL; switch_status(new_status, ret); diff --git a/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp b/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp index 689e9ea549..8aba35ef7f 100644 --- a/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp @@ -1293,7 +1293,7 @@ int ObDDLRedefinitionTask::check_health() ret = OB_TABLE_NOT_EXIST; LOG_WARN("data table or dest table not exist", K(ret), K(is_source_table_exist), K(is_dest_table_exist)); } - if (OB_FAIL(ret) && !ObIDDLTask::error_need_retry(ret)) { + if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) { const ObDDLTaskStatus old_status = static_cast(task_status_); const ObDDLTaskStatus new_status = ObDDLTaskStatus::FAIL; switch_status(new_status, ret); diff --git a/src/rootserver/ddl_task/ob_ddl_retry_task.h b/src/rootserver/ddl_task/ob_ddl_retry_task.h index 2fa586e4c5..804fcd1b20 100644 --- a/src/rootserver/ddl_task/ob_ddl_retry_task.h +++ b/src/rootserver/ddl_task/ob_ddl_retry_task.h @@ -56,6 +56,10 @@ private: int deep_copy_ddl_arg(common::ObIAllocator &allocator, const share::ObDDLType &ddl_type, const obrpc::ObDDLArg *source_arg); int init_compat_mode(const share::ObDDLType &ddl_type, const obrpc::ObDDLArg *source_arg); int get_forward_user_message(const obrpc::ObRpcResultCode &rcode); + virtual bool is_error_need_retry(const int ret_code) override + { + return common::OB_PARTITION_NOT_EXIST != ret_code && ObDDLTask::is_error_need_retry(ret_code); + } private: static const int64_t OB_DDL_RETRY_TASK_VERSION = 1L; obrpc::ObDDLArg *ddl_arg_; diff --git a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp index c6df7bad5e..26750a4b45 100644 --- a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp +++ b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp @@ -216,7 +216,7 @@ int ObDDLSingleReplicaExecutor::set_partition_task_status(const common::ObTablet if (OB_SUCCESS == ret_code) { build_infos.at(i).ret_code_ = OB_SUCCESS; build_infos.at(i).stat_ = ObPartitionBuildStat::BUILD_SUCCEED; - } else if (ObIDDLTask::error_need_retry(ret_code) || OB_REPLICA_NOT_READABLE == ret_code || OB_ERR_INSUFFICIENT_PX_WORKER == ret_code) { + } else if (ObIDDLTask::in_ddl_retry_white_list(ret_code) || OB_REPLICA_NOT_READABLE == ret_code || OB_ERR_INSUFFICIENT_PX_WORKER == ret_code) { build_infos.at(i).ret_code_ = OB_SUCCESS; build_infos.at(i).stat_ = ObPartitionBuildStat::BUILD_RETRY; } else { diff --git a/src/rootserver/ddl_task/ob_ddl_task.cpp b/src/rootserver/ddl_task/ob_ddl_task.cpp index ed6be5a3cb..90352df8b1 100644 --- a/src/rootserver/ddl_task/ob_ddl_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_task.cpp @@ -294,8 +294,7 @@ int ObDDLTask::switch_status(ObDDLTaskStatus new_status, const int ret_code) LOG_WARN("check task is canceled", K(tmp_ret), K(trace_id_)); } else if (is_cancel) { real_ret_code = OB_SUCCESS == ret_code ? OB_CANCELED : ret_code; - } else if (SUCCESS == old_status || (OB_SUCCESS != ret_code && (ObIDDLTask::error_need_retry(ret_code) - || (!ObIDDLTask::error_need_retry(ret_code) && MAX_ERR_TOLERANCE_CNT > ++err_code_occurence_cnt_)))) { + } else if (SUCCESS == old_status || (OB_SUCCESS != ret_code && is_error_need_retry(ret_code))) { LOG_INFO("error code found, but execute again", K(ret_code), K(ret_code_), K(old_status), K(new_status), K(err_code_occurence_cnt_)); ret_code_ = OB_SUCCESS; new_status = old_status; diff --git a/src/rootserver/ddl_task/ob_ddl_task.h b/src/rootserver/ddl_task/ob_ddl_task.h index ea8f92421e..55b7e505f1 100644 --- a/src/rootserver/ddl_task/ob_ddl_task.h +++ b/src/rootserver/ddl_task/ob_ddl_task.h @@ -295,6 +295,12 @@ public: K_(ret_code), K_(task_id), K_(parent_task_id), K_(parent_task_key), K_(task_version), K_(parallelism), K_(ddl_stmt_str), K_(compat_mode), K_(sys_task_id), K_(err_code_occurence_cnt)); +protected: + virtual bool is_error_need_retry(const int ret_code) + { + return !share::ObIDDLTask::in_ddl_retry_black_list(ret_code) && (share::ObIDDLTask::in_ddl_retry_white_list(ret_code) + || MAX_ERR_TOLERANCE_CNT > ++err_code_occurence_cnt_); + } protected: static const int64_t MAX_ERR_TOLERANCE_CNT = 3L; // Max torlerance count for error code. bool is_inited_; diff --git a/src/rootserver/ddl_task/ob_drop_index_task.h b/src/rootserver/ddl_task/ob_drop_index_task.h index 9abc206f58..7ce15981c4 100644 --- a/src/rootserver/ddl_task/ob_drop_index_task.h +++ b/src/rootserver/ddl_task/ob_drop_index_task.h @@ -55,6 +55,12 @@ private: int deep_copy_index_arg(common::ObIAllocator &allocator, const obrpc::ObDropIndexArg &src_index_arg, obrpc::ObDropIndexArg &dst_index_arg); + virtual bool is_error_need_retry(const int ret_code) override + { + UNUSED(ret_code); + // we should always retry on drop index task + return task_status_ < share::ObDDLTaskStatus::DROP_SCHEMA; + } private: static const int64_t OB_DROP_INDEX_TASK_VERSION = 1; ObDDLWaitTransEndCtx wait_trans_ctx_; diff --git a/src/rootserver/ddl_task/ob_index_build_task.cpp b/src/rootserver/ddl_task/ob_index_build_task.cpp index ceaa17f4d5..f17997556b 100644 --- a/src/rootserver/ddl_task/ob_index_build_task.cpp +++ b/src/rootserver/ddl_task/ob_index_build_task.cpp @@ -423,7 +423,7 @@ int ObIndexBuildTask::check_health() ret = check_errsim_error(); } #endif - if (OB_FAIL(ret) && !ObIDDLTask::error_need_retry(ret)) { + if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) { const ObDDLTaskStatus old_status = static_cast(task_status_); const ObDDLTaskStatus new_status = ObDDLTaskStatus::FAIL; switch_status(new_status, ret); @@ -622,7 +622,7 @@ int ObIndexBuildTask::check_build_single_replica(bool &is_end) } else if (OB_SUCCESS != complete_sstable_job_ret_code_) { ret = complete_sstable_job_ret_code_; LOG_WARN("sstable complete job has failed", K(ret), K(object_id_), K(index_table_id_)); - if (ObIDDLTask::error_need_retry(ret) || OB_REPLICA_NOT_READABLE == ret || OB_ERR_INSUFFICIENT_PX_WORKER == ret) { + if (ObIDDLTask::in_ddl_retry_white_list(ret) || OB_REPLICA_NOT_READABLE == ret || OB_ERR_INSUFFICIENT_PX_WORKER == ret) { // retry sql job by re-submit is_sstable_complete_task_submitted_ = false; complete_sstable_job_ret_code_ = INT64_MAX; @@ -919,7 +919,7 @@ int ObIndexBuildTask::enable_index() } } DEBUG_SYNC(CREATE_INDEX_TAKE_EFFECT); - if (OB_FAIL(ret) && !ObIDDLTask::error_need_retry(ret)) { + if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) { state_finished = true; next_status = ObDDLTaskStatus::TAKE_EFFECT; } diff --git a/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp b/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp index cd33d69740..4174d5f357 100644 --- a/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp +++ b/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp @@ -556,7 +556,7 @@ int ObModifyAutoincTask::check_health() ret = OB_TABLE_NOT_EXIST; LOG_WARN("data table not exist", K(ret), K(is_source_table_exist)); } - if (OB_FAIL(ret) && !ObIDDLTask::error_need_retry(ret)) { + if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) { const ObDDLTaskStatus old_status = static_cast(task_status_); const ObDDLTaskStatus new_status = ObDDLTaskStatus::FAIL; switch_status(new_status, ret); diff --git a/src/rootserver/ddl_task/ob_table_redefinition_task.cpp b/src/rootserver/ddl_task/ob_table_redefinition_task.cpp index f2dfe53fe1..42e2eef9d6 100644 --- a/src/rootserver/ddl_task/ob_table_redefinition_task.cpp +++ b/src/rootserver/ddl_task/ob_table_redefinition_task.cpp @@ -199,7 +199,7 @@ int ObTableRedefinitionTask::check_build_replica_end(bool &is_end) ret_code_ = complete_sstable_job_ret_code_; is_end = true; LOG_WARN("complete sstable job failed", K(ret_code_), K(object_id_), K(target_object_id_)); - if (ObIDDLTask::error_need_retry(ret_code_) || OB_REPLICA_NOT_READABLE == ret_code_ || OB_ERR_INSUFFICIENT_PX_WORKER == ret_code_) { + if (ObIDDLTask::in_ddl_retry_white_list(ret_code_) || OB_REPLICA_NOT_READABLE == ret_code_ || OB_ERR_INSUFFICIENT_PX_WORKER == ret_code_) { build_replica_request_time_ = 0; complete_sstable_job_ret_code_ = INT64_MAX; ret_code_ = OB_SUCCESS; diff --git a/src/share/ob_ddl_task_executor.h b/src/share/ob_ddl_task_executor.h index 768f9b6689..c709dd3461 100644 --- a/src/share/ob_ddl_task_executor.h +++ b/src/share/ob_ddl_task_executor.h @@ -50,7 +50,7 @@ public: virtual ObIDDLTask *deep_copy(char *buf, const int64_t size) const = 0; virtual bool operator == (const ObIDDLTask &other) const = 0; ObIDDLTaskType get_type() const { return type_; } - static bool error_need_retry(const int ret_code) + static bool in_ddl_retry_white_list(const int ret_code) { return common::OB_TIMEOUT == ret_code || common::OB_STATE_NOT_MATCH == ret_code || common::OB_SERVER_IS_STOPPING == ret_code || common::OB_SERVER_IS_INIT == ret_code || common::OB_EAGAIN == ret_code || common::OB_NOT_MASTER == ret_code @@ -59,6 +59,10 @@ public: || common::OB_PARTITION_NOT_EXIST == ret_code || common::OB_PG_IS_REMOVED == ret_code || common::OB_TENANT_NOT_EXIST == ret_code || common::OB_RPC_SEND_ERROR == ret_code || common::OB_DDL_SCHEMA_VERSION_NOT_MATCH == ret_code; } + static bool in_ddl_retry_black_list(const int ret_code) + { + return common::OB_SERVER_OUTOF_DISK_SPACE == ret_code || common::OB_DISK_ERROR == ret_code; + } protected: typedef common::ObCurTraceId::TraceId TaskId; bool need_retry_; -- GitLab