From f935bcf90502f48183aafaf2397188dbe0374646 Mon Sep 17 00:00:00 2001 From: ym0 Date: Fri, 14 Jan 2022 10:42:48 +0800 Subject: [PATCH] [PHYSICAL RESTORE] Avoid coredump while create user partitions --- src/rootserver/ob_ddl_service.cpp | 3 ++ src/rootserver/ob_root_service.cpp | 28 ++++++++++++++++--- .../restore/ob_restore_scheduler.cpp | 1 + src/share/ob_debug_sync_point.h | 1 + 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/rootserver/ob_ddl_service.cpp b/src/rootserver/ob_ddl_service.cpp index fb2cc3d56..9637a5ab1 100644 --- a/src/rootserver/ob_ddl_service.cpp +++ b/src/rootserver/ob_ddl_service.cpp @@ -6475,6 +6475,9 @@ int ObDDLService::create_table_partitions_for_physical_restore(const obrpc::ObRe LOG_WARN("root_balancer_ is null", K(ret)); } else if (OB_FAIL(schema_guard.get_table_schema(table_id, table_schema))) { LOG_WARN("fail to get table", K(ret), K(table_id)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("table not exist. table may be droppped concurrently in physical restore", KR(ret), K(table_id)); } else if (OB_FAIL(root_balancer_->alloc_partitions_for_create(*table_schema, create_mode, table_addr))) { LOG_WARN("alloc partition address failed", K(ret), KPC(table_schema)); } else if (OB_FAIL(create_partitions_for_physical_restore( diff --git a/src/rootserver/ob_root_service.cpp b/src/rootserver/ob_root_service.cpp index 9fd0cff5c..080b9c693 100644 --- a/src/rootserver/ob_root_service.cpp +++ b/src/rootserver/ob_root_service.cpp @@ -8487,7 +8487,7 @@ int ObRootService::physical_restore_tenant(const obrpc::ObPhysicalRestoreTenantA int ret = OB_SUCCESS; int64_t gc_snapshot_ts = OB_INVALID_TIMESTAMP; int64_t current_timestamp = ObTimeUtility::current_time(); - const int64_t RESTORE_TIMESTAMP_DETA = 10 * 1000 * 1000L; // 防止恢复到未来的某个时间 + const int64_t RESTORE_TIMESTAMP_DETA = 10 * 1000 * 1000L; int64_t job_id = OB_INVALID_ID; ObSchemaGetterGuard schema_guard; if (!inited_) { @@ -11743,10 +11743,13 @@ int ObRootService::purge_recyclebin_objects(int64_t purge_each_time) // always passed int64_t expire_timeval = GCONF.recyclebin_object_expire_time; ObSEArray tenant_ids; + ObSchemaGetterGuard guard; if (OB_ISNULL(schema_service_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("schema_serviece_ is null", KR(ret)); - } else if (OB_FAIL(schema_service_->get_tenant_ids(tenant_ids))) { + } else if (OB_FAIL(schema_service_->get_tenant_schema_guard(OB_SYS_TENANT_ID, guard))) { + LOG_WARN("fail to get sys schema guard", KR(ret)); + } else if (OB_FAIL(guard.get_tenant_ids(tenant_ids))) { LOG_WARN("get all tenants failed", KR(ret)); } else { const int64_t current_time = ObTimeUtility::current_time(); @@ -11756,19 +11759,36 @@ int ObRootService::purge_recyclebin_objects(int64_t purge_each_time) obrpc::Int64 affected_rows = 0; obrpc::ObPurgeRecycleBinArg arg; int64_t purge_sum = purge_each_time; + const bool is_standby = PRIMARY_CLUSTER != ObClusterInfoGetter::get_cluster_type_v2(); + const ObSimpleTenantSchema *simple_tenant = NULL; // ignore ret for (int i = 0; i < tenant_ids.count() && in_service() && purge_sum > 0; ++i) { int64_t purge_time = GCONF._recyclebin_object_purge_frequency; + const uint64_t tenant_id = tenant_ids.at(i); if (purge_time <= 0) { break; } + if (OB_SYS_TENANT_ID != tenant_id && is_standby) { + // standby cluster won't purge recyclebin automacially. + LOG_TRACE("user tenant won't purge recyclebin automacially in standby cluster", K(tenant_id)); + continue; + } else if (OB_FAIL(guard.get_tenant_info(tenant_id, simple_tenant))) { + LOG_WARN("fail to get simple tenant schema", KR(ret), K(tenant_id)); + } else if (OB_ISNULL(simple_tenant)) { + ret = OB_TENANT_NOT_EXIST; + LOG_WARN("simple tenant schema not exist", KR(ret), K(tenant_id)); + } else if (!simple_tenant->is_normal()) { + // only deal with normal tenant. + LOG_TRACE("tenant which isn't normal won't purge recyclebin automacially", K(tenant_id)); + continue; + } // ignore error code of different tenant ret = OB_SUCCESS; affected_rows = 0; - arg.tenant_id_ = tenant_ids.at(i); + arg.tenant_id_ = tenant_id; arg.expire_time_ = expire_time; arg.auto_purge_ = true; - arg.exec_tenant_id_ = tenant_ids.at(i); + arg.exec_tenant_id_ = tenant_id; LOG_INFO("start purge recycle objects of tenant", K(arg), K(purge_sum)); while (OB_SUCC(ret) && in_service() && purge_sum > 0) { int64_t start_time = ObTimeUtility::current_time(); diff --git a/src/rootserver/restore/ob_restore_scheduler.cpp b/src/rootserver/restore/ob_restore_scheduler.cpp index c203e7b7f..8466b3645 100644 --- a/src/rootserver/restore/ob_restore_scheduler.cpp +++ b/src/rootserver/restore/ob_restore_scheduler.cpp @@ -2610,6 +2610,7 @@ int ObRestoreScheduler::create_user_partitions(const ObPhysicalRestoreJob& job_i LOG_WARN("fail to get tenant tablegroup schemas", KR(ret), K(tenant_id)); } else { // Only non-delay-deleted PG/Standalone partitions should be created. + DEBUG_SYNC(BEFORE_SEND_RESTORE_PARTITIONS_RPC); const int64_t DEFAULT_TIMEOUT = 10 * 1000 * 1000L; // 10s const int64_t TIMEOUT_PER_RPC = GCONF.rpc_timeout; // 2s const int64_t PARTITION_CNT_PER_RPC = 5; diff --git a/src/share/ob_debug_sync_point.h b/src/share/ob_debug_sync_point.h index fe4a62584..f315c8115 100644 --- a/src/share/ob_debug_sync_point.h +++ b/src/share/ob_debug_sync_point.h @@ -293,6 +293,7 @@ class ObString; ACT(BACKUP_BACKUPPIECE_AFTER_SCHEDULE, ) \ ACT(FOLLOWER_BEFORE_UPDATE_RESTORE_FLAG_RESTORE_LOG, ) \ ACT(BEFORE_GLOBAL_INDEX_BUILDER_MOVE_TASK, ) \ + ACT(BEFORE_SEND_RESTORE_PARTITIONS_RPC, ) \ ACT(MAX_DEBUG_SYNC_POINT, ) DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF); -- GitLab