提交 8cd738a4 编写于 作者: H Hongqin-Li 提交者: ob-robot

Fix bugs of ddl reap old replica build task

上级 bd83fce1
......@@ -2238,7 +2238,7 @@ int ObSyncTabletAutoincSeqCtx::call_and_process_all_tablet_autoinc_seqs(P &proxy
return ret;
}
int ObDDLRedefinitionTask::try_reap_old_replica_build_task()
int ObDDLRedefinitionTask::reap_old_replica_build_task(bool &need_exec_new_inner_sql)
{
int ret = OB_SUCCESS;
ObSchemaGetterGuard schema_guard;
......@@ -2261,17 +2261,19 @@ int ObDDLRedefinitionTask::try_reap_old_replica_build_task()
const ObTabletID unused_tablet_id;
const ObDDLTaskInfo unused_addition_info;
const int old_ret_code = OB_SUCCESS;
bool need_exec_new_inner_sql = true;
ObAddr invalid_addr;
(void)ObCheckTabletDataComplementOp::check_and_wait_old_complement_task(tenant_id_, dest_table_id,
if (old_execution_id < 0) {
need_exec_new_inner_sql = true;
} else if (OB_FAIL(ObCheckTabletDataComplementOp::check_and_wait_old_complement_task(tenant_id_, dest_table_id,
task_id_, old_execution_id, invalid_addr, trace_id_,
table_schema->get_schema_version(), snapshot_version_, need_exec_new_inner_sql);
if (!need_exec_new_inner_sql) {
table_schema->get_schema_version(), snapshot_version_, need_exec_new_inner_sql))) {
if (OB_EAGAIN != ret) {
LOG_WARN("failed to check and wait old complement task", K(ret));
}
} else if (!need_exec_new_inner_sql) {
if (OB_FAIL(update_complete_sstable_job_status(unused_tablet_id, snapshot_version_, old_execution_id, old_ret_code, unused_addition_info))) {
LOG_INFO("succ to wait and complete old task finished!", K(ret));
LOG_WARN("failed to wait and complete old task finished!", K(ret));
}
} else {
ret = OB_ENTRY_NOT_EXIST;
}
}
return ret;
......
......@@ -126,7 +126,7 @@ public:
virtual void flt_set_task_span_tag() const = 0;
virtual void flt_set_status_span_tag() const = 0;
virtual int cleanup_impl() override;
int try_reap_old_replica_build_task();
int reap_old_replica_build_task(bool &need_exec_new_inner_sql);
INHERIT_TO_STRING_KV("ObDDLTask", ObDDLTask,
K(wait_trans_ctx_), K(sync_tablet_autoinc_seq_ctx_), K(build_replica_request_time_),
K(complete_sstable_job_ret_code_), K(snapshot_held_), K(has_synced_autoincrement_),
......
......@@ -720,7 +720,7 @@ int ObIndexBuildTask::release_snapshot(const int64_t snapshot)
return ret;
}
int ObIndexBuildTask::try_reap_old_replica_build_task()
int ObIndexBuildTask::reap_old_replica_build_task(bool &need_exec_new_inner_sql)
{
int ret = OB_SUCCESS;
ObSchemaGetterGuard schema_guard;
......@@ -743,17 +743,19 @@ int ObIndexBuildTask::try_reap_old_replica_build_task()
const ObTabletID unused_tablet_id;
const ObDDLTaskInfo unused_addition_info;
const int old_ret_code = OB_SUCCESS;
bool need_exec_new_inner_sql = true;
ObAddr invalid_addr;
(void)ObCheckTabletDataComplementOp::check_and_wait_old_complement_task(tenant_id_, dest_table_id,
if (old_execution_id < 0) {
need_exec_new_inner_sql = true;
} else if (OB_FAIL(ObCheckTabletDataComplementOp::check_and_wait_old_complement_task(tenant_id_, dest_table_id,
task_id_, old_execution_id, invalid_addr, trace_id_,
table_schema->get_schema_version(), snapshot_version_, need_exec_new_inner_sql);
if (!need_exec_new_inner_sql) {
table_schema->get_schema_version(), snapshot_version_, need_exec_new_inner_sql))) {
if (OB_EAGAIN != ret) {
LOG_WARN("failed to check and wait old complement task", K(ret));
}
} else if (!need_exec_new_inner_sql) {
if (OB_FAIL(update_complete_sstable_job_status(unused_tablet_id, snapshot_version_, old_execution_id, old_ret_code, unused_addition_info))) {
LOG_INFO("succ to wait and complete old task finished!", K(ret));
}
} else {
ret = OB_ENTRY_NOT_EXIST;
}
}
return ret;
......@@ -855,7 +857,14 @@ int ObIndexBuildTask::wait_data_complement()
// submit a job to complete sstable for the index table on snapshot_version
if (OB_SUCC(ret) && !state_finished && !is_sstable_complete_task_submitted_) {
if (OB_SUCCESS == try_reap_old_replica_build_task()) {
bool need_exec_new_inner_sql = false;
if (OB_FAIL(reap_old_replica_build_task(need_exec_new_inner_sql))) {
if (OB_EAGAIN == ret) {
ret = OB_SUCCESS; // retry
} else {
LOG_WARN("failed to reap old task", K(ret));
}
} else if (!need_exec_new_inner_sql) {
state_finished = true;
} else if (OB_FAIL(send_build_single_replica_request())) {
LOG_WARN("fail to send build single replica request", K(ret));
......
......@@ -127,7 +127,7 @@ private:
const share::schema::ObTableSchema &index_schema,
const share::schema::ObIndexStatus new_status);
int check_health();
int try_reap_old_replica_build_task();
int reap_old_replica_build_task(bool &need_exec_new_inner_sql);
int send_build_single_replica_request();
int check_build_single_replica(bool &is_end);
int check_need_verify_checksum(bool &need_verify);
......
......@@ -323,7 +323,14 @@ int ObTableRedefinitionTask::table_redefinition(const ObDDLTaskStatus next_task_
}
if (OB_SUCC(ret) && !is_build_replica_end && 0 == build_replica_request_time_) {
if (OB_SUCCESS == try_reap_old_replica_build_task()) {
bool need_exec_new_inner_sql = false;
if (OB_FAIL(reap_old_replica_build_task(need_exec_new_inner_sql))) {
if (OB_EAGAIN == ret) {
ret = OB_SUCCESS; // retry
} else {
LOG_WARN("failed to reap old task", K(ret));
}
} else if (!need_exec_new_inner_sql) {
is_build_replica_end = true;
} else if (OB_FAIL(send_build_replica_request())) {
LOG_WARN("fail to send build replica request", K(ret));
......
......@@ -1178,7 +1178,7 @@ int ObCheckTabletDataComplementOp::check_task_inner_sql_session_status(
if (OB_ISNULL(root_service = GCTX.root_service_)) {
ret = OB_ERR_SYS;
LOG_WARN("fail to get sql proxy, root service is null.!");
} else if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || trace_id.is_invalid() || !inner_sql_exec_addr.is_valid())) {
} else if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || trace_id.is_invalid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(tenant_id), K(trace_id), K(inner_sql_exec_addr));
} else {
......@@ -1240,8 +1240,6 @@ int ObCheckTabletDataComplementOp::check_task_inner_sql_session_status(
while (OB_SUCC(ret)) {
if (OB_FAIL(result->next())) {
if (OB_ITER_END == ret) {
LOG_INFO("success to get result, and no inner sql task", K(ret), K(sql_string.ptr()),
K(ip_str), K(trace_id_str), K(tenant_id), K(sql_string));
ret = OB_SUCCESS;
break;
} else {
......@@ -1250,8 +1248,6 @@ int ObCheckTabletDataComplementOp::check_task_inner_sql_session_status(
} else {
is_old_task_session_exist = true;
EXTRACT_UINT_FIELD_MYSQL(*result, "session_id", session_id, uint64_t);
LOG_INFO("succ to match inner sql session in trace id", K(ret), K(sql_string.ptr()),
K(session_id), K(tenant_id), K(ip_str), K(trace_id_str), K(sql_string));
}
}
}
......@@ -1659,8 +1655,8 @@ int ObCheckTabletDataComplementOp::check_tablet_checksum_update_status(
if (report_checksum_cnt == tablet_count) {
is_checksums_all_report = true;
} else {
ret = OB_EAGAIN;
LOG_INFO("not all tablet has update checksum, will re-check",
is_checksums_all_report = false;
LOG_INFO("not all tablet has update checksum",
K(ret), K(tablet_idx), K(tablet_count), K(is_checksums_all_report));
}
}
......@@ -1728,7 +1724,7 @@ int ObCheckTabletDataComplementOp::check_finish_report_checksum(
} else if (OB_FAIL(check_tablet_checksum_update_status(tenant_id, index_table_id, ddl_task_id, execution_id, dest_tablet_ids, is_checksums_all_report))) {
LOG_WARN("fail to check tablet checksum update status, maybe EAGAIN", K(ret), K(tenant_id), K(dest_tablet_ids), K(execution_id));
} else if (!is_checksums_all_report) {
ret = OB_ERR_UNEXPECTED;
ret = OB_EAGAIN;
LOG_WARN("tablets checksum not all report!", K(is_checksums_all_report), K(ret));
}
return ret;
......@@ -1761,15 +1757,12 @@ int ObCheckTabletDataComplementOp::check_and_wait_old_complement_task(
ret = OB_INVALID_ARGUMENT;
LOG_WARN("fail to check and wait complement task", K(ret), K(tenant_id), K(table_id));
} else {
LOG_INFO("start to check and wait complement task", K(tenant_id), K(table_id), K(inner_sql_exec_addr), K(trace_id));
while (OB_SUCC(ret) && is_old_task_session_exist) {
if (OB_FAIL(check_task_inner_sql_session_status(inner_sql_exec_addr, trace_id, tenant_id, execution_id, scn, is_old_task_session_exist))) {
LOG_WARN("fail check task inner sql session status", K(ret), K(trace_id), K(inner_sql_exec_addr));
} else if (!is_old_task_session_exist) {
LOG_WARN("old inner sql session is not exist.", K(ret));
} else {
usleep(10 * 1000); // sleep 10ms
}
if (OB_FAIL(check_task_inner_sql_session_status(inner_sql_exec_addr, trace_id, tenant_id, execution_id, scn, is_old_task_session_exist))) {
LOG_WARN("fail check task inner sql session status", K(ret), K(trace_id), K(inner_sql_exec_addr));
} else if (is_old_task_session_exist) {
ret = OB_EAGAIN;
} else {
LOG_INFO("old inner sql session is not exist.", K(ret));
}
// After old session exits, the rule of retry is specified as follows
......@@ -1787,6 +1780,8 @@ int ObCheckTabletDataComplementOp::check_and_wait_old_complement_task(
ObArray<ObTabletID> dest_tablet_ids;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id, table_id, dest_tablet_ids))) {
LOG_WARN("fail to get tablets", K(ret), K(tenant_id), K(table_id));
} else if (OB_FAIL(check_tablet_checksum_update_status(tenant_id, table_id, ddl_task_id, execution_id, dest_tablet_ids, is_dst_checksums_all_report))) {
LOG_WARN("fail to check tablet checksum update status.", K(ret), K(tenant_id), K(dest_tablet_ids), K(execution_id));
} else if (is_dst_checksums_all_report) {
......@@ -1794,8 +1789,9 @@ int ObCheckTabletDataComplementOp::check_and_wait_old_complement_task(
LOG_INFO("no need execute because all tablet sstable has build finished", K(need_exec_new_inner_sql));
}
}
LOG_INFO("end to check and wait complement task", K(ret),
K(table_id), K(is_old_task_session_exist), K(is_dst_checksums_all_report), K(need_exec_new_inner_sql));
if (OB_EAGAIN != ret) {
LOG_INFO("end to check and wait complement task", K(ret),
K(table_id), K(is_old_task_session_exist), K(is_dst_checksums_all_report), K(need_exec_new_inner_sql));
}
return ret;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册