From 2032dc84779f0456bbc94691d665b40ac8f1915c Mon Sep 17 00:00:00 2001 From: godyangfight Date: Fri, 25 Nov 2022 07:35:52 +0000 Subject: [PATCH] Fix rebuild offline and online failed cannnot do backfill tx bug --- .../high_availability/ob_ls_migration.cpp | 5 ++++- .../ob_ls_prepare_migration.cpp | 9 +++++++++ .../ob_ls_rebuild_cb_impl.cpp | 19 +++++++++++++------ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/storage/high_availability/ob_ls_migration.cpp b/src/storage/high_availability/ob_ls_migration.cpp index 182f38f3c..d78312326 100644 --- a/src/storage/high_availability/ob_ls_migration.cpp +++ b/src/storage/high_availability/ob_ls_migration.cpp @@ -1215,9 +1215,12 @@ int ObStartMigrationTask::update_ls_() LOG_WARN("failed to advance base lsn for migration", K(ret), KPC(ctx_)); } else { ctx_->local_clog_checkpoint_ts_ = ctx_->src_ls_meta_package_.ls_meta_.get_clog_checkpoint_ts(); + } + + if (OB_SUCC(ret)) { + ctx_->local_rebuild_seq_ = ctx_->src_ls_meta_package_.ls_meta_.get_rebuild_seq(); LOG_INFO("update rebuild seq", "old_ls_rebuld_seq", ctx_->local_rebuild_seq_, "new_ls_rebuild_seq", ctx_->src_ls_meta_package_.ls_meta_.get_rebuild_seq(), K(lbt())); - ctx_->local_rebuild_seq_ = ctx_->src_ls_meta_package_.ls_meta_.get_rebuild_seq(); } } return ret; diff --git a/src/storage/high_availability/ob_ls_prepare_migration.cpp b/src/storage/high_availability/ob_ls_prepare_migration.cpp index b29dffc6c..c82a8280b 100644 --- a/src/storage/high_availability/ob_ls_prepare_migration.cpp +++ b/src/storage/high_availability/ob_ls_prepare_migration.cpp @@ -927,10 +927,19 @@ int ObStartPrepareMigrationTask::generate_prepare_migration_dags_() ObBackfillTXCtx *backfill_tx_ctx = nullptr; ObTabletID tablet_id; ObStartPrepareMigrationDag *start_prepare_migration_dag = nullptr; + ObLSHandle ls_handle; + ObLS *ls = nullptr; if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("start prepare migration task do not init", K(ret)); + } else if (OB_FAIL(ObStorageHADagUtils::get_ls(ctx_->arg_.ls_id_, ls_handle))) { + LOG_WARN("failed to get ls", K(ret), KPC(ctx_)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", K(ret), KP(ls)); + } else if (ls->is_offline()) { + LOG_INFO("ls is in offline status, no need generate backfill dag", KPC(ls)); } else if (OB_ISNULL(start_prepare_migration_dag = static_cast(this->get_dag()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("start prepare migration dag should not be NULL", K(ret), KP(start_prepare_migration_dag)); diff --git a/src/storage/high_availability/ob_ls_rebuild_cb_impl.cpp b/src/storage/high_availability/ob_ls_rebuild_cb_impl.cpp index a5df1769e..2a93a90b9 100644 --- a/src/storage/high_availability/ob_ls_rebuild_cb_impl.cpp +++ b/src/storage/high_availability/ob_ls_rebuild_cb_impl.cpp @@ -15,6 +15,7 @@ #include "ob_storage_ha_service.h" #include "share/ls/ob_ls_table_operator.h" #include "observer/ob_server_event_history_table_operator.h" +#include "logservice/ob_log_service.h" namespace oceanbase { @@ -121,7 +122,6 @@ int ObLSRebuildCbImpl::check_need_rebuild_( { int ret = OB_SUCCESS; ObLSInfo ls_info; - share::ObLSTableOperator *lst_operator = GCTX.lst_operator_; int64_t cluster_id = GCONF.cluster_id; uint64_t tenant_id = MTL_ID(); ObAddr leader_addr; @@ -131,21 +131,28 @@ int ObLSRebuildCbImpl::check_need_rebuild_( obrpc::ObFetchLSMemberListInfo member_info; const bool force_renew = true; src_info.cluster_id_ = cluster_id; + ObRole role; + int64_t proposal_id = 0; + logservice::ObLogService *log_service = nullptr; if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("ls rebuild cb impl do not init", K(ret)); - } else if (nullptr == lst_operator) { + } else if (OB_ISNULL(log_service = MTL(logservice::ObLogService*))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("lst_operator ptr is null", K(ret)); + LOG_WARN("log service should not be NULL", K(ret), KP(log_service)); + } else if (OB_FAIL(log_service->get_palf_role(ls_->get_ls_id(), role, proposal_id))) { + LOG_WARN("failed to get role", K(ret), KPC(ls_)); + } else if (is_strong_leader(role)) { + need_rebuild = false; + LOG_INFO("replica is leader, can not rebuild", KPC(ls_)); } else if (OB_ISNULL(location_service = GCTX.location_service_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("location service should not be NULL", K(ret), KP(location_service)); } else if (OB_FAIL(location_service->get_leader(src_info.cluster_id_, tenant_id, ls_->get_ls_id(), force_renew, src_info.src_addr_))) { LOG_WARN("fail to get ls leader server", K(ret), K(tenant_id), KPC(ls_)); - } else if (src_info.src_addr_ == GCONF.self_addr_) { - need_rebuild = false; - LOG_INFO("replica is leader, can not rebuild", KPC(ls_)); + //for rebuild without leader exist + ret = OB_SUCCESS; } else if (OB_FAIL(storage_rpc_->post_ls_member_list_request(tenant_id, src_info, ls_->get_ls_id(), member_info))) { LOG_WARN("failed to get ls member info", K(ret), KPC(ls_)); } else if (!member_info.member_list_.contains(GCONF.self_addr_)) { -- GitLab