/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #define USING_LOG_PREFIX STORAGE #include "storage/ob_partition_storage.h" #include "share/ob_worker.h" #include "lib/time/ob_time_utility.h" #include "lib/utility/ob_tracepoint.h" #include "lib/utility/utility.h" #include "lib/container/ob_array_iterator.h" #include "lib/stat/ob_diagnose_info.h" #include "lib/word_segment/ob_word_segment.h" #include "lib/bloom_filter/ob_bloomfilter.h" #include "lib/file/file_directory_utils.h" #include "lib/objectpool/ob_resource_pool.h" #include "common/rowkey/ob_rowkey.h" #include "common/row/ob_row_store.h" #include "storage/ob_base_storage_info.h" #include "common/sql_mode/ob_sql_mode_utils.h" #include "share/ob_debug_sync.h" #include "share/ob_cluster_version.h" #include "share/ob_index_trans_status_reporter.h" #include "share/ob_index_task_table_operator.h" #include "share/ob_index_status_table_operator.h" #include "share/ob_index_checksum.h" #include "share/ob_index_build_stat.h" #include "share/ob_sstable_checksum_operator.h" #include "share/ob_unique_index_row_transformer.h" #include "share/allocator/ob_memstore_allocator_mgr.h" #include "common/ob_range.h" #include "share/ob_tenant_mgr.h" #include "storage/ob_table_scan_iterator.h" #include "storage/ob_value_row_iterator.h" #include "storage/ob_i_partition_component_factory.h" #include "storage/ob_i_partition_report.h" #include "storage/blocksstable/ob_store_file.h" #include "storage/blocksstable/slog/ob_base_storage_logger.h" #include "storage/compaction/ob_partition_merge_util.h" #include "storage/memtable/ob_memtable.h" #include "storage/ob_partition_log.h" #include "storage/ob_saved_storage_info.h" #include "storage/transaction/ob_trans_service.h" #include "storage/transaction/ob_trans_part_ctx.h" #include "storage/ob_single_merge.h" #include "storage/ob_multiple_get_merge.h" #include "storage/ob_multiple_scan_merge.h" #include "storage/ob_index_merge.h" #include "storage/ob_query_iterator_factory.h" #include "storage/ob_partition_merge_task.h" #include "storage/ob_partition_split_task.h" #include "storage/ob_interm_macro_mgr.h" #include "storage/ob_long_ops_monitor.h" #include "storage/ob_freeze_info_snapshot_mgr.h" #include "storage/ob_store_row_comparer.h" #include "share/schema/ob_schema_getter_guard.h" #include "ob_warm_up.h" #include "observer/ob_server_struct.h" #include "sql/ob_sql_utils.h" #include "sql/ob_sql_define.h" #include "sql/engine/ob_operator.h" #include "lib/stat/ob_diagnose_info.h" #include "share/partition_table/ob_partition_table_operator.h" #include "ob_partition_service.h" #include "storage/ob_sstable_merge_info_mgr.h" #include "share/ob_task_define.h" #include #include #include "ob_table_mgr.h" #include "storage/ob_store_row_filter.h" #include "storage/ob_partition_split.h" #include "observer/omt/ob_tenant_node_balancer.h" #include "lib/hash/ob_hashmap.h" #include "ob_partition_range_spliter.h" #include "storage/ob_sstable_dump_error_info.h" #include "storage/ob_pg_storage.h" namespace oceanbase { using namespace oceanbase::common; using namespace oceanbase::share; using namespace oceanbase::share::schema; using namespace oceanbase::blocksstable; using namespace oceanbase::compaction; using namespace oceanbase::memtable; namespace storage { // can only reset pointer of memtable, cannot reset other variables void ObStorageWriterGuard::reset() { if (NULL != memtable_) { memtable_->dec_write_ref(); memtable_ = NULL; } } bool ObStorageWriterGuard::need_to_refresh_table(ObTablesHandle& tables_handle) { int ret = OB_SUCCESS; bool bool_ret = false; ObITable* table = tables_handle.get_last_table(); if (NULL == table) { bool_ret = true; } else if (!table->is_memtable()) { bool_ret = false; } else if (tables_handle.check_store_expire()) { bool_ret = true; } else if (OB_FAIL(static_cast(table)->inc_write_ref())) { bool_ret = true; } else { bool_ret = false; memtable_ = static_cast(table); } if (bool_ret && check_if_need_log()) { LOG_ERROR("refresh table too much times", K(ret), KP(table), K(store_ctx_.cur_pkey_)); } return bool_ret; } int ObStorageWriterGuard::refresh_and_protect_table(ObRelativeTable& relative_table) { int ret = OB_SUCCESS; ObTablesHandle& tables_handle = relative_table.tables_handle_; // cannot reset store_ reset(); while (OB_SUCC(ret) && need_to_refresh_table(tables_handle)) { if (OB_FAIL(store_->get_read_tables(relative_table.get_table_id(), store_ctx_.mem_ctx_->get_read_snapshot(), tables_handle, relative_table.allow_not_ready()))) { STORAGE_LOG(WARN, "fail to get read tables", K(ret), "pkey", relative_table.get_table_id()); } } return ret; } bool ObStorageWriterGuard::check_if_need_log() { bool need_log = false; if ((++retry_count_ % GET_TS_INTERVAL) == 0) { const int64_t cur_ts = common::ObTimeUtility::current_time(); if (0 >= last_ts_) { last_ts_ = cur_ts; } else if (cur_ts - last_ts_ >= LOG_INTERVAL_US) { last_ts_ = cur_ts; need_log = true; } else { // do nothing } } return need_log; } // called by pg, cannot reset pg_memtable_mgr_ int ObStorageWriterGuard::refresh_and_protect_pg_memtable(ObPGStorage& pg_storage, ObTablesHandle& tables_handle) { int ret = OB_SUCCESS; reset(); if (OB_ISNULL(pg_memtable_mgr_)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "pg memtable mgr is null, unexpected error", K(ret), KP_(pg_memtable_mgr)); } else { static const int64_t MAX_RETRY_CNT = 4000; static const int64_t FAST_RETRY_CNT = 1000; const bool reset_handle = true; const bool include_active_memtable = true; while (OB_SUCC(ret) && need_to_refresh_table(tables_handle)) { if (retry_count_ > MAX_RETRY_CNT) { ret = OB_EAGAIN; STORAGE_LOG(ERROR, "refresh memtable retry too many times", K(ret), K(store_ctx_.cur_pkey_)); } else if (retry_count_ > FAST_RETRY_CNT) { usleep(retry_count_); } if (OB_FAIL(ret)) { } else if (pg_storage.is_empty_pg()) { // special error code ret = OB_EMPTY_PG; STORAGE_LOG(WARN, "refresh and protect pg memtable error", K(ret), "pg_key", pg_storage.get_partition_key()); } else if (OB_FAIL(pg_memtable_mgr_->get_memtables( tables_handle, reset_handle, -1 /*read latest memtable*/, include_active_memtable))) { STORAGE_LOG(WARN, "fail to get memtables", K(ret)); } else { // do nothing } } } return ret; } ObStorageWriterGuard::~ObStorageWriterGuard() { uint32_t& interval = get_writing_throttling_sleep_interval(); if ((!is_replay_) && need_control_mem_ && interval > 0) { uint64_t timeout = 10000; // 10s common::ObWaitEventGuard wait_guard(common::ObWaitEventIds::MEMSTORE_MEM_PAGE_ALLOC_WAIT, timeout, 0, 0, interval); bool need_sleep = true; const int32_t SLEEP_INTERVAL_PER_TIME = 100 * 1000; // 100ms int64_t left_interval = interval; if (NULL != store_ctx_.mem_ctx_ && !is_replay_) { int64_t query_left_time = store_ctx_.mem_ctx_->get_abs_lock_wait_timeout() - ObTimeUtility::current_time(); left_interval = min(left_interval, query_left_time); } if (NULL != memtable_) { need_sleep = memtable_->is_active_memtable(); } reset(); int tmp_ret = OB_SUCCESS; // speed of clog replay is not controled by sleep, but by ObReplayEngine's control while ((left_interval > 0) && need_sleep) { // because left_interval and SLEEP_INTERVAL_PER_TIME both are greater than // zero, so it's safe to convert to uint32_t, be careful with comparation between int and uint uint32_t sleep_interval = static_cast(min(left_interval, SLEEP_INTERVAL_PER_TIME)); usleep(sleep_interval); left_interval -= sleep_interval; if (NULL != store_ctx_.mem_ctx_) { ObGMemstoreAllocator* memstore_allocator = NULL; if (OB_SUCCESS != (tmp_ret = ObMemstoreAllocatorMgr::get_instance().get_tenant_memstore_allocator( store_ctx_.mem_ctx_->get_tenant_id(), memstore_allocator))) { } else if (OB_ISNULL(memstore_allocator)) { OB_LOG(WARN, "get_tenant_mutil_allocator failed", K(store_ctx_.cur_pkey_), K(tmp_ret)); } else { need_sleep = memstore_allocator->need_do_writing_throttle(); } } } } reset(); if (!is_replay_) { interval = 0; } } bool ObPartitionPrefixAccessStat::AccessStat::is_valid() const { return bf_access_cnt_ > 0; } ObPartitionPrefixAccessStat& ObPartitionPrefixAccessStat::operator=(const ObPartitionPrefixAccessStat& other) { if (this != &other) { MEMCPY(this, &other, sizeof(ObPartitionPrefixAccessStat)); } return *this; } int ObPartitionPrefixAccessStat::add_stat(const ObTableAccessStat& stat) { int ret = OB_SUCCESS; const int64_t prefix = stat.rowkey_prefix_; if (OB_UNLIKELY(prefix > MAX_ROWKEY_PREFIX_NUM)) { } else if (OB_UNLIKELY(prefix < 0)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "rowkey prefix should be greater than 0"); } else { AccessStat& rowkey_prefix = rowkey_prefix_[prefix]; rowkey_prefix.bf_access_cnt_ += stat.bf_access_cnt_; rowkey_prefix.bf_filter_cnt_ += stat.bf_filter_cnt_; rowkey_prefix.empty_read_cnt_ += stat.empty_read_cnt_; } return ret; } int ObPartitionPrefixAccessStat::add_stat(const ObTableScanStatistic& stat) { int ret = OB_SUCCESS; const int64_t prefix = stat.rowkey_prefix_; if (OB_UNLIKELY(prefix > MAX_ROWKEY_PREFIX_NUM)) { } else if (OB_UNLIKELY(prefix < 0)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "rowkey prefix should be greater than 0"); } else { AccessStat& rowkey_prefix = rowkey_prefix_[prefix]; rowkey_prefix.bf_access_cnt_ += stat.bf_access_cnt_; rowkey_prefix.bf_filter_cnt_ += stat.bf_filter_cnt_; rowkey_prefix.empty_read_cnt_ += stat.empty_read_cnt_; } return ret; } int ObPartitionPrefixAccessStat::get_optimal_prefix(int64_t& prefix) { int ret = OB_SUCCESS; int64_t opt_prefix = 0; int64_t filter_cnt = 0; for (int i = 1; i <= MAX_ROWKEY_PREFIX_NUM; ++i) { if (filter_cnt < rowkey_prefix_[i].bf_filter_cnt_) { filter_cnt = rowkey_prefix_[i].bf_filter_cnt_; opt_prefix = i; } } if (opt_prefix > 0) { prefix = opt_prefix; } else if (0 == opt_prefix && prefix <= MAX_ROWKEY_PREFIX_NUM) { ret = OB_INVALID_ARGUMENT; prefix = 0; STORAGE_LOG(DEBUG, "invalid rowkey prefix", K(opt_prefix), K(prefix)); } return ret; } // json format, do not quote json names to make string more readable // example [{prefix:1,filter:0,access:1,empty:1},{prefix:2,filter:2,access:2,empty:0}] int64_t ObPartitionPrefixAccessStat::to_string(char* buf, const int64_t buf_len) const { int64_t pos = 0; int print_count = 0; J_ARRAY_START(); for (int i = 0; i <= MAX_ROWKEY_PREFIX_NUM; ++i) { const AccessStat& stat = rowkey_prefix_[i]; if (stat.is_valid()) { if (print_count > 0) { J_COMMA(); } J_OBJ_START(); J_KV("prefix", i, "filter", stat.bf_filter_cnt_, "access", stat.bf_access_cnt_, "empty", stat.empty_read_cnt_); J_OBJ_END(); print_count++; } } J_ARRAY_END(); return pos; } template int ObPartitionStorage::get_merge_opt(const int64_t merge_version, const int64_t storage_version, const int64_t work_version, const int64_t born_version, const T& old_val, const T& new_val, T& opt) { int ret = OB_SUCCESS; // TODO(lincang: after merge_version changed to trans_version, needs to check <= 0) if (merge_version < 0 || storage_version < 0 || work_version < 0) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(merge_version), K(storage_version), K(work_version)); } else { if (storage_version == born_version) { if (work_version > merge_version) { opt = old_val; } else { opt = new_val; } } else if (storage_version < born_version) { opt = old_val; } else if (storage_version > born_version) { opt = new_val; } } return ret; } ObPartitionStorage::ObPartitionStorage() : cluster_version_(CLUSTER_VERSION_140), pkey_(), schema_service_(NULL), cp_fty_(NULL), txs_(NULL), pg_memtable_mgr_(NULL), is_inited_(false), merge_successed_(true), merge_timestamp_(0), merge_failed_cnt_(0), store_(), replay_replica_type_(REPLICA_TYPE_FULL), prefix_access_stat_() {} ObPartitionStorage::~ObPartitionStorage() { destroy(); } int ObPartitionStorage::init(const common::ObPartitionKey& pkey, ObIPartitionComponentFactory* cp_fty, share::schema::ObMultiVersionSchemaService* schema_service, transaction::ObTransService* txs, ObPGMemtableMgr& pg_memtable_mgr) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; STORAGE_LOG(WARN, "The storage has been inited, ", K(ret)); } else if (!pkey.is_valid() || NULL == schema_service || NULL == cp_fty || NULL == txs) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "Invalid argument, ", K(schema_service), K(cp_fty), K(txs), K(ret)); } else { pkey_ = pkey; cp_fty_ = cp_fty; schema_service_ = schema_service; txs_ = txs; pg_memtable_mgr_ = &pg_memtable_mgr; is_inited_ = true; } return ret; } void ObPartitionStorage::destroy() { FLOG_INFO("destroy partition storage", K_(pkey), K(lbt())); if (OB_UNLIKELY(!is_inited_)) { store_.destroy(); pkey_.reset(); prefix_access_stat_.reset(); schema_service_ = NULL; cp_fty_ = NULL; pg_memtable_mgr_ = NULL; is_inited_ = false; } } bool ObPartitionStorage::is_inited() const { bool bool_ret = true; // no lock, caller should control concurrency if (!is_inited_) { bool_ret = false; } else if (!store_.is_inited()) { bool_ret = false; } else { // do nothing } return bool_ret; } int ObPartitionStorage::ObDMLRunningCtx::prepare_column_desc( const common::ObIArray& column_ids, const ObRelativeTable& table, ObColDescIArray& col_descs) { int ret = OB_SUCCESS; int64_t count = column_ids.count(); if (column_ids.count() <= 0 || !table.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(column_ids), K(table)); } else { ObColDesc col_desc; for (int64_t i = 0; OB_SUCC(ret) && i < count; ++i) { if (OB_FAIL(table.get_col_desc(column_ids.at(i), col_desc))) { STORAGE_LOG(WARN, "fail to get column description", "column_id", column_ids.at(i)); } else if (OB_FAIL(col_descs.push_back(col_desc))) { STORAGE_LOG(WARN, "fail to add column description", K(col_desc)); } } } return ret; } int ObPartitionStorage::ObDMLRunningCtx::prepare_column_info(const ObIArray& column_ids) { int ret = OB_SUCCESS; const ObRelativeTable& table = relative_tables_.data_table_; if (column_ids.count() <= 0 || !table.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(column_ids), K(table)); } else if (table.use_schema_param()) { col_descs_ = &(dml_param_.table_param_->get_col_descs()); if (col_descs_->count() <= 0) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "col desc is empty", K(ret), K(table)); } else if (relative_tables_.idx_cnt_ > 0 || T_DML_UPDATE == dml_type_) { col_map_ = &(dml_param_.table_param_->get_col_map()); } } else { // construct column description from schema void* ptr = nullptr; ObColDescArray* tmp_col = nullptr; if (OB_ISNULL(ptr = allocator_.alloc(static_cast(sizeof(ObColDescArray))))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(WARN, "fail to alloc column description", K(ret)); } else if (FALSE_IT(tmp_col = new (ptr) ObColDescArray())) { // do nothing } else if (OB_FAIL(prepare_column_desc(column_ids, table, *tmp_col))) { STORAGE_LOG(WARN, "fail to prepare column description", K(ret)); allocator_.free(ptr); ptr = nullptr; tmp_col = nullptr; } else { col_descs_ = tmp_col; } // construct column map from schema ptr = nullptr; ColumnMap* tmp_map = nullptr; if (OB_SUCC(ret) && (relative_tables_.idx_cnt_ > 0 || T_DML_UPDATE == dml_type_)) { if (OB_ISNULL(ptr = allocator_.alloc(static_cast(sizeof(ColumnMap))))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "fail to alloc column map", K(ret)); } else if (FALSE_IT(tmp_map = new (ptr) ColumnMap(allocator_))) { // do nothing } else if (OB_FAIL(tmp_map->init(column_ids))) { STORAGE_LOG(WARN, "init columns map fail", K(ret)); allocator_.free(tmp_map); ptr = nullptr; tmp_map = nullptr; } else { col_map_ = tmp_map; } } } return ret; } int ObPartitionStorage::write_index_row( ObRelativeTable& relative_table, const ObStoreCtx& ctx, const ObColDescIArray& idx_columns, ObStoreRow& index_row) { int ret = OB_SUCCESS; DEBUG_SYNC(DELAY_INDEX_WRITE); if (OB_UNLIKELY(relative_table.is_domain_index())) { uint64_t domain_column_id = OB_INVALID_ID; if (OB_FAIL(relative_table.get_fulltext_column(domain_column_id))) { STORAGE_LOG(WARN, "failed to get domain column id", K(ret)); } else { int64_t pos = -1; for (int64_t i = 0; OB_SUCC(ret) && -1 == pos && i < idx_columns.count(); ++i) { if (domain_column_id == idx_columns.at(i).col_id_) { pos = i; } } if (OB_SUCC(ret)) { ObSEArray words; ObObj orig_obj = index_row.row_val_.cells_[pos]; ObString orig_string; if (OB_UNLIKELY(-1 == pos)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "domain index has no domain column", K(domain_column_id), K(idx_columns), K(ret)); } else if (orig_obj.is_null()) { // skip } else if (OB_FAIL(orig_obj.get_string(orig_string))) { STORAGE_LOG(WARN, "get string from original obj failed", K(ret)); } else if (OB_FAIL(split_on(orig_string, ',', words))) { STORAGE_LOG(WARN, "failed to split string", K(orig_string), K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < words.count(); ++i) { index_row.row_val_.cells_[pos].set_string(index_row.row_val_.cells_[pos].get_type(), words.at(i)); if (OB_FAIL( write_row(relative_table, ctx, relative_table.get_rowkey_column_num(), idx_columns, index_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to write index row", K(relative_table.get_table_id()), K(relative_table.get_rowkey_column_num()), K(idx_columns), K(index_row)); } } } // recover original data index_row.row_val_.cells_[pos] = orig_obj; } } } } else { if (OB_FAIL(write_row(relative_table, ctx, relative_table.get_rowkey_column_num(), idx_columns, index_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to write index row", K(relative_table.get_table_id()), K(relative_table.get_rowkey_column_num()), K(idx_columns), K(index_row)); } } } return ret; } int ObPartitionStorage::ObDMLRunningCtx::prepare_index_row() { int ret = OB_SUCCESS; if (!relative_tables_.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid relative tables", K(ret)); } else { STORAGE_LOG(DEBUG, "index does not exist, no need to malloc index work member"); if (OB_FAIL(malloc_store_row(allocator_, relative_tables_.max_col_num_, idx_row_))) { STORAGE_LOG(WARN, "fail to malloc temp table row", K(ret)); } else { idx_row_->flag_ = T_DML_DELETE == dml_type_ ? ObActionFlag::OP_DEL_ROW : ObActionFlag::OP_ROW_EXIST; idx_row_->set_dml(dml_type_); } } if (OB_FAIL(ret)) { idx_row_ = nullptr; } return ret; } int ObPartitionStorage::reshape_delete_row( ObDMLRunningCtx& run_ctx, RowReshape*& row_reshape, ObStoreRow& tbl_row, ObStoreRow& new_tbl_row) { int ret = OB_SUCCESS; const ObColDescIArray& col_descs = *run_ctx.col_descs_; ObSQLMode sql_mode = run_ctx.dml_param_.sql_mode_; RowReshape* reshape_ptr = nullptr; bool need_reshape = false; if (OB_FAIL(need_reshape_table_row(tbl_row.row_val_, tbl_row.row_val_.get_count(), sql_mode, need_reshape))) { LOG_WARN("fail to check need reshape new", K(ret), K(tbl_row.row_val_), K(sql_mode)); } else if (need_reshape && nullptr == row_reshape) { if (OB_FAIL( malloc_rows_reshape(run_ctx.allocator_, col_descs, 1, run_ctx.relative_tables_.data_table_, reshape_ptr))) { LOG_WARN("fail to malloc reshape", K(ret), K(col_descs), K(sql_mode)); } else { row_reshape = reshape_ptr; } } if (OB_FAIL(ret)) { } else if (OB_FAIL(reshape_row( tbl_row.row_val_, tbl_row.row_val_.get_count(), row_reshape, new_tbl_row, need_reshape, sql_mode))) { LOG_WARN("fail to reshape new", K(ret), K(new_tbl_row.row_val_), K(need_reshape), K(sql_mode)); } return ret; } int ObPartitionStorage::delete_row(ObDMLRunningCtx& run_ctx, RowReshape*& row_reshape, const ObNewRow& row) { int ret = OB_SUCCESS; const ObDMLBaseParam& dml_param = run_ctx.dml_param_; const ObStoreCtx& ctx = run_ctx.store_ctx_; int64_t rowkey_size = run_ctx.relative_tables_.data_table_.get_rowkey_column_num(); ObStoreRow& tbl_row = run_ctx.tbl_row_; ObStoreRow new_tbl_row; ObStoreRow del_row; ObSEArray update_idx; // update_idx is a dummy param here tbl_row.flag_ = ObActionFlag::OP_DEL_ROW; tbl_row.set_dml(T_DML_DELETE); tbl_row.row_val_ = row; if (OB_FAIL(reshape_delete_row(run_ctx, row_reshape, tbl_row, tbl_row))) { LOG_WARN("failed to reshape row", K(ret), K(tbl_row)); } else if (GCONF.enable_defensive_check() && OB_FAIL(check_old_row_legitimacy(run_ctx, row))) { LOG_WARN("check old row legitimacy failed", K(row)); } else if (!dml_param.is_total_quantity_log_) { if (OB_FAIL(write_row(run_ctx.relative_tables_.data_table_, ctx, rowkey_size, *run_ctx.col_descs_, tbl_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to set row", K_(pkey), K(rowkey_size), K(*run_ctx.col_descs_), K(tbl_row), K(ret)); } } } else if (dml_param.is_total_quantity_log_) { update_idx.reset(); // update_idx is a dummy param here new_tbl_row.reset(); new_tbl_row.flag_ = ObActionFlag::OP_DEL_ROW; new_tbl_row.set_dml(T_DML_DELETE); new_tbl_row.row_val_ = tbl_row.row_val_; tbl_row.flag_ = ObActionFlag::OP_ROW_EXIST; tbl_row.set_dml(T_DML_UNKNOWN); if (OB_FAIL(write_row(run_ctx.relative_tables_.data_table_, ctx, rowkey_size, *run_ctx.col_descs_, update_idx, tbl_row, new_tbl_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to set row", K_(pkey), K(rowkey_size), K(*run_ctx.col_descs_), K(tbl_row), K(new_tbl_row), K(ret)); } } else { STORAGE_LOG(DEBUG, "Success to del main table row, ", K(tbl_row), K(new_tbl_row)); } } if (OB_SUCC(ret)) { bool null_idx_val = false; for (int64_t i = 0; OB_SUCC(ret) && i < run_ctx.relative_tables_.idx_cnt_; ++i) { ObRelativeTable& relative_table = run_ctx.relative_tables_.index_tables_[i]; int64_t size = relative_table.get_rowkey_column_num(); if (OB_ISNULL(run_ctx.idx_row_) || OB_ISNULL(run_ctx.col_map_)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "index row or column map is NULL", KP(run_ctx.idx_row_), KP(run_ctx.col_map_), K(ret)); } else if (OB_FAIL(relative_table.build_index_row(tbl_row.row_val_, *run_ctx.col_map_, true, run_ctx.idx_row_->row_val_, null_idx_val, &run_ctx.idx_col_descs_))) { STORAGE_LOG(WARN, "failed to generate index row", K(ret)); } else if (GCONF.enable_defensive_check() && OB_FAIL(check_delete_index_legitimacy(run_ctx, relative_table, run_ctx.idx_row_->row_val_))) { LOG_WARN("check delete index legitimacy failed", K(ret)); } else if (OB_FAIL(write_index_row(relative_table, ctx, run_ctx.idx_col_descs_, *run_ctx.idx_row_))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to set index row", "index_id", relative_table.get_table_id(), K(size), K(run_ctx.idx_col_descs_), "index_row", to_cstring(*run_ctx.idx_row_), K(ret)); } } else { STORAGE_LOG(DEBUG, "Success to del index row, ", K(*run_ctx.idx_row_)); } } } return ret; } int ObPartitionStorage::delete_row( const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const ObIArray& column_ids, const ObNewRow& row) { int ret = OB_SUCCESS; RowReshape* row_reshape = nullptr; ObTimeGuard timeguard(__func__, 3 * 1000 * 1000); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!ctx.is_valid() || !dml_param.is_valid() || column_ids.count() <= 0 || !row.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "input pointers can not be NULL", K(dml_param), K(column_ids), K(row), KP(ctx.mem_ctx_), K(ret)); } else { ObDMLRunningCtx run_ctx(ctx, dml_param, ctx.mem_ctx_->get_query_allocator(), T_DML_DELETE); if (OB_FAIL(run_ctx.init(&column_ids, NULL, true, schema_service_, store_))) { STORAGE_LOG(WARN, "init dml running context failed", K(ret)); #ifndef NDEBUG } else if (OB_FAIL(check_column_ids_valid(run_ctx.allocator_, run_ctx.relative_tables_, run_ctx.col_map_, column_ids, T_DML_DELETE, column_ids, NO_CHANGE, run_ctx.dml_param_.is_total_quantity_log_))) { STORAGE_LOG(WARN, "column_ids illegal", K(ret)); #endif } else if (OB_FAIL(delete_row(run_ctx, row_reshape, row))) { STORAGE_LOG(WARN, "fail to delete row", K(pkey_), K(row), K(ret)); } else { EVENT_ADD(STORAGE_DELETE_ROW_COUNT, 1); } free_row_reshape(run_ctx.allocator_, row_reshape, 1); } return ret; } int ObPartitionStorage::delete_rows(const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const ObIArray& column_ids, ObNewRowIterator* row_iter, int64_t& affected_rows) { int ret = OB_SUCCESS; RowReshape* row_reshape = nullptr; ObTimeGuard timeguard(__func__, 3 * 1000 * 1000); int64_t afct_num = 0; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_ISNULL(row_iter) || !ctx.is_valid() || column_ids.count() <= 0 || OB_ISNULL(row_iter)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG( WARN, "input pointers can not be NULL", K(dml_param), K(column_ids), KP(row_iter), KP(ctx.mem_ctx_), K(ret)); } else { ObDMLRunningCtx run_ctx(ctx, dml_param, ctx.mem_ctx_->get_query_allocator(), T_DML_DELETE); ObNewRow* row = NULL; if (OB_FAIL(run_ctx.init(&column_ids, NULL, true, schema_service_, store_))) { STORAGE_LOG(WARN, "init dml running context failed", K(ret)); #ifndef NDEBUG } else if (OB_FAIL(check_column_ids_valid(run_ctx.allocator_, run_ctx.relative_tables_, run_ctx.col_map_, column_ids, T_DML_DELETE, column_ids, NO_CHANGE, run_ctx.dml_param_.is_total_quantity_log_))) { STORAGE_LOG(WARN, "column_ids illegal", K(ret)); #endif } // delete table row and index rows while (OB_SUCC(ret) && OB_SUCC(row_iter->get_next_row(row))) { if (ObTimeUtility::current_time() > run_ctx.dml_param_.timeout_) { ret = OB_TIMEOUT; int64_t cur_time = ObTimeUtility::current_time(); STORAGE_LOG(WARN, "query timeout", K(cur_time), K(run_ctx.dml_param_), K(ret)); } else if (OB_ISNULL(row)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "get next row from iterator failed", KP(row), K(ret)); } else if (OB_FAIL(delete_row(run_ctx, row_reshape, *row))) { STORAGE_LOG(WARN, "fail to delete row", K(pkey_), K(row), K(ret)); } else { ++afct_num; } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; } if (OB_SUCC(ret)) { affected_rows = afct_num; EVENT_ADD(STORAGE_DELETE_ROW_COUNT, afct_num); } free_row_reshape(run_ctx.allocator_, row_reshape, 1); } return ret; } int ObPartitionStorage::malloc_rows_reshape(common::ObIAllocator& work_allocator, const ObColDescIArray& col_descs, const int64_t row_count, const ObRelativeTable& table, RowReshape*& row_reshape_ins) { int ret = OB_SUCCESS; int64_t binary_buffer_len = 0; ObSEArray, common::OB_ROW_MAX_COLUMNS_COUNT> binary_len_array; if (col_descs.count() <= 0 || nullptr != row_reshape_ins || !table.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(col_descs), K(row_reshape_ins), K(table)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < col_descs.count(); ++i) { const share::schema::ObColDesc& col_desc = col_descs.at(i); if (col_desc.col_type_.is_binary()) { int32_t data_length = 0; if (OB_FAIL(table.get_column_data_length(col_desc.col_id_, data_length))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "get column data length fail", K(ret), K(col_desc)); } else if (OB_FAIL(binary_len_array.push_back(std::make_pair(i, data_length)))) { STORAGE_LOG(WARN, "fail to push element into row_reshape_ins", K(ret), K(i), K(data_length)); } else { if (OB_UNLIKELY(data_length < 0)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "wrong data length", K(col_desc.col_id_), K(data_length), K(ret)); } else { binary_buffer_len += data_length; } STORAGE_LOG(INFO, "get binary len", K(data_length), K(col_desc), K(i)); } } } } if (OB_SUCC(ret)) { void* ptr = NULL; int64_t num = col_descs.count(); int64_t reshape_size = sizeof(RowReshape); int64_t cell_size = sizeof(common::ObObj) * num; int64_t total_size = reshape_size + cell_size + binary_buffer_len; if (NULL == (ptr = work_allocator.alloc(row_count * total_size))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "failed to malloc temp row cells", K(ret)); } else { row_reshape_ins = new (ptr) RowReshape[row_count]; int64_t reshape_sizes = row_count * reshape_size; int64_t remain_size = cell_size + binary_buffer_len; for (int64_t i = 0; OB_SUCC(ret) && i < row_count; i++) { int64_t offset = reshape_sizes + i * remain_size; void* cell_ptr = static_cast(ptr) + offset; row_reshape_ins[i].row_reshape_cells_ = new (cell_ptr) ObObj[num](); row_reshape_ins[i].binary_buffer_len_ = binary_buffer_len; if (binary_buffer_len > 0) { row_reshape_ins[i].binary_buffer_ptr_ = static_cast(cell_ptr) + cell_size; } if (binary_len_array.count() > 0) { if (OB_FAIL(row_reshape_ins[i].binary_len_array_.assign(binary_len_array))) { STORAGE_LOG(WARN, "failed to assign binary_len_array", K(ret), K(pkey_)); } } STORAGE_LOG(INFO, "binary_len_array", K(binary_len_array), K(col_descs)); } } } return ret; } int ObPartitionStorage::malloc_rows_reshape_if_need(ObIAllocator& work_allocator, const ObColDescIArray& col_descs, const int64_t row_count, const ObRelativeTable& table, const ObSQLMode sql_mode, RowReshape*& row_reshape_ins) { int ret = OB_SUCCESS; bool char_binary_exists = false; bool has_empty_string = false; bool char_only = true; int64_t binary_buffer_len = 0; ObSEArray, common::OB_ROW_MAX_COLUMNS_COUNT> binary_len_array; if (col_descs.count() <= 0 || NULL != row_reshape_ins || !table.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(col_descs), K(row_reshape_ins), K(table)); } for (int64_t i = 0; OB_SUCC(ret) && i < col_descs.count(); ++i) { const share::schema::ObColDesc& col_desc = col_descs.at(i); if (col_desc.col_type_.is_fixed_len_char_type() || col_desc.col_type_.is_binary()) { char_binary_exists = true; if (col_desc.col_type_.is_binary()) { char_only = false; int32_t data_length = 0; if (OB_FAIL(table.get_column_data_length(col_desc.col_id_, data_length))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "get column data length fail", K(ret), K(col_desc)); } else if (OB_FAIL(binary_len_array.push_back(std::make_pair(i, data_length)))) { STORAGE_LOG(WARN, "fail to push element into row_reshape_ins", K(ret), K(i), K(data_length)); } else { if (OB_UNLIKELY(data_length < 0)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "wrong data length", K(col_desc.col_id_), K(data_length), K(ret)); } else { binary_buffer_len += data_length; } } } } else if (is_oracle_compatible(sql_mode) && col_desc.col_type_.is_character_type()) { has_empty_string = true; // STORAGE_LOG(WARN, "Pstr2", K(col_desc), K(lbt()), K(has_empty_string)); } } if (OB_SUCCESS == ret && (char_binary_exists || has_empty_string)) { void* ptr = NULL; int64_t num = col_descs.count(); int64_t reshape_size = sizeof(RowReshape); int64_t cell_size = sizeof(common::ObObj) * num; int64_t total_size = reshape_size + cell_size + binary_buffer_len; if (NULL == (ptr = work_allocator.alloc(row_count * total_size))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "failed to malloc temp row cells", K(ret)); } else { row_reshape_ins = new (ptr) RowReshape[row_count]; int64_t reshape_sizes = row_count * reshape_size; int64_t remain_size = cell_size + binary_buffer_len; for (int64_t i = 0; OB_SUCC(ret) && i < row_count; i++) { int64_t offset = reshape_sizes + i * remain_size; row_reshape_ins[i].char_only_ = char_only; void* cell_ptr = static_cast(ptr) + offset; row_reshape_ins[i].row_reshape_cells_ = new (cell_ptr) ObObj[num](); row_reshape_ins[i].binary_buffer_len_ = binary_buffer_len; if (binary_buffer_len > 0) { row_reshape_ins[i].binary_buffer_ptr_ = static_cast(cell_ptr) + cell_size; } if (binary_len_array.count() > 0) { if (OB_FAIL(row_reshape_ins[i].binary_len_array_.assign(binary_len_array))) { STORAGE_LOG(WARN, "failed to assign binary_len_array", K(ret), K(pkey_)); } } } } } return ret; } void ObPartitionStorage::free_row_reshape(ObIAllocator& work_allocator, RowReshape*& row_reshape_ins, int64_t row_count) { if (NULL != row_reshape_ins) { for (int64_t i = 0; i < row_count; i++) { row_reshape_ins[i].~RowReshape(); } work_allocator.free(row_reshape_ins); row_reshape_ins = NULL; } } int ObPartitionStorage::need_reshape_table_row(const ObNewRow& row, RowReshape* row_reshape_ins, int64_t row_reshape_cells_count, ObSQLMode sql_mode, bool& need_reshape) const { int ret = OB_SUCCESS; need_reshape = false; if (!row.is_valid() || row_reshape_cells_count != row.get_count()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(row), K(row.count_), K(row_reshape_cells_count), K(ret)); } else { if (NULL == row_reshape_ins) { // do not need reshape } else if (row_reshape_ins->char_only_) { ObString space_pattern; for (int64_t i = 0; !need_reshape && i < row.get_count(); ++i) { const ObObj& cell = row.get_cell(i); if (cell.is_fixed_len_char_type()) { space_pattern = ObCharsetUtils::get_const_str(cell.get_collation_type(), ' '); } if (cell.is_fixed_len_char_type() && cell.get_string_len() >= space_pattern.length() && 0 == MEMCMP(cell.get_string_ptr() + cell.get_string_len() - space_pattern.length(), space_pattern.ptr(), space_pattern.length())) { need_reshape = true; } else if (is_oracle_compatible(sql_mode) && cell.is_character_type() && cell.get_string_len() == 0) { // Oracle compatibility mode: '' as null need_reshape = true; STORAGE_LOG(DEBUG, "Pstor2", K(cell), K(cell.get_string()), K(need_reshape)); } } } else { need_reshape = true; // with binary, we do not check it } } return ret; } int ObPartitionStorage::need_reshape_table_row( const common::ObNewRow& row, const int64_t column_cnt, ObSQLMode sql_mode, bool& need_reshape) const { int ret = OB_SUCCESS; need_reshape = false; if (!row.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(row), K(row.count_)); } else { ObString space_pattern; for (int64_t i = 0; !need_reshape && i < column_cnt; ++i) { const ObObj& cell = row.get_cell(i); if (cell.is_fixed_len_char_type()) { space_pattern = ObCharsetUtils::get_const_str(cell.get_collation_type(), ' '); } if (cell.is_fixed_len_char_type() && cell.get_string_len() >= space_pattern.length() && 0 == MEMCMP(cell.get_string_ptr() + cell.get_string_len() - space_pattern.length(), space_pattern.ptr(), space_pattern.length())) { need_reshape = true; } else if (is_oracle_compatible(sql_mode) && cell.is_character_type() && cell.get_string_len() == 0) { // Oracle compatibility mode: '' as null need_reshape = true; STORAGE_LOG(DEBUG, "Pstor2", K(cell), K(cell.get_string()), K(need_reshape)); } else if (cell.is_binary()) { need_reshape = true; } } } return ret; } int ObPartitionStorage::reshape_row(const ObNewRow& row, const int64_t column_cnt, RowReshape* row_reshape_ins, ObStoreRow& tbl_row, bool need_reshape, ObSQLMode sql_mode) const { int ret = OB_SUCCESS; if (column_cnt > row.get_count()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("column cnt can not be larger than row column cnt", K(ret), K(column_cnt), K(row)); } else if (!need_reshape) { tbl_row.row_val_ = row; } else { int64_t binary_len_array_count = row_reshape_ins->binary_len_array_.count(); ObDataBuffer data_buffer(row_reshape_ins->binary_buffer_ptr_, row_reshape_ins->binary_buffer_len_); for (int64_t i = 0, j = 0; OB_SUCC(ret) && i < column_cnt; ++i) { const ObObj& cell = row.get_cell(i); if (cell.is_binary()) { for (; j < binary_len_array_count; j++) { if (row_reshape_ins->binary_len_array_.at(j).first == i) { break; } } if (OB_UNLIKELY(binary_len_array_count <= j)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(binary_len_array_count), K(ret)); } else { const char* str = cell.get_string_ptr(); int32_t len = cell.get_string_len(); char* dest_str = NULL; int32_t binary_len = row_reshape_ins->binary_len_array_.at(j++).second; if (binary_len > len) { if (NULL == (dest_str = (char*)(data_buffer.alloc(binary_len)))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "fail to alloc mem to binary", K(ret), K(i), K(j), K(binary_len)); } else { char pad_char = '\0'; MEMCPY(dest_str, str, len); MEMSET(dest_str + len, pad_char, binary_len - len); } } else if (binary_len == len) { dest_str = const_cast(str); } else if (binary_len < len) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "binary_len should be greater than len", K(ret), K(binary_len), K(len)); } if (OB_SUCC(ret)) { // set_binary set both type_ and cs_type row_reshape_ins->row_reshape_cells_[i].set_binary(ObString(binary_len, dest_str)); } } } else if (is_oracle_compatible(sql_mode) && cell.is_character_type() && cell.get_string_len() == 0) { // Oracle compatibility mode: '' as null LOG_DEBUG("reshape empty string to null", K(cell)); row_reshape_ins->row_reshape_cells_[i].set_null(); } else if (cell.is_fixed_len_char_type()) { const char* str = cell.get_string_ptr(); int32_t len = cell.get_string_len(); ObString space_pattern = ObCharsetUtils::get_const_str(cell.get_collation_type(), ' '); for (; len >= space_pattern.length(); len -= space_pattern.length()) { if (0 != MEMCMP(str + len - space_pattern.length(), space_pattern.ptr(), space_pattern.length())) { break; } } // need to set collation type row_reshape_ins->row_reshape_cells_[i].set_string(cell.get_type(), ObString(len, str)); row_reshape_ins->row_reshape_cells_[i].set_collation_type(cell.get_collation_type()); } else { row_reshape_ins->row_reshape_cells_[i] = cell; } } tbl_row.row_val_.cells_ = row_reshape_ins->row_reshape_cells_; tbl_row.row_val_.count_ = column_cnt; } return ret; } int ObPartitionStorage::reshape_table_rows(const ObNewRow* rows, RowReshape* row_reshape_ins, int64_t row_reshape_cells_count, ObStoreRow* tbl_rows, int64_t row_count, ObSQLMode sql_mode) const { int ret = OB_SUCCESS; bool need_reshape = false; if (row_count <= 0) { ret = OB_ERR_WRONG_VALUE_COUNT_ON_ROW; STORAGE_LOG(WARN, "row count should be bigger than 0", K(row_count), K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < row_count; i++) { if (OB_FAIL(need_reshape_table_row(rows[i], row_reshape_ins, row_reshape_cells_count, sql_mode, need_reshape))) { STORAGE_LOG(WARN, "fail to check whether reshape is needed"); } else if (OB_FAIL(reshape_row( rows[i], rows[i].get_count(), &row_reshape_ins[i], tbl_rows[i], need_reshape, sql_mode))) { STORAGE_LOG(WARN, "fail to reshape row"); } } } return ret; } int ObPartitionStorage::put_rows(const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const ObIArray& column_ids, ObNewRowIterator* row_iter, int64_t& affected_rows) { int ret = OB_SUCCESS; ObTimeGuard timeguard(__func__, 3 * 1000 * 1000); int64_t afct_num = 0; int64_t dup_num = 0; transaction::ObTransSnapInfo snap_info; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!ctx.is_valid() || !dml_param.is_valid() || column_ids.count() <= 0 || OB_ISNULL(row_iter)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ctx.mem_ctx_), K(dml_param), K(column_ids), KP(row_iter), K(ret)); } else { ObDMLRunningCtx run_ctx(ctx, dml_param, ctx.mem_ctx_->get_query_allocator(), T_DML_UPDATE); ObIAllocator& work_allocator = run_ctx.allocator_; ObNewRow* row = NULL; ObStoreRow& tbl_row = run_ctx.tbl_row_; RowReshape* row_reshape_ins = NULL; if (OB_FAIL(run_ctx.init(&column_ids, NULL, false, schema_service_, store_))) { STORAGE_LOG(WARN, "init dml running context failed", K(ret)); } else if (!run_ctx.relative_tables_.is_valid()) { ret = OB_ERR_SYS; STORAGE_LOG(ERROR, "data table schema must not null", K(ret)); } else if (OB_FAIL(malloc_rows_reshape_if_need(work_allocator, *run_ctx.col_descs_, 1, run_ctx.relative_tables_.data_table_, dml_param.sql_mode_, row_reshape_ins))) { STORAGE_LOG(WARN, "get row reshape instance failed", K(ret)); #ifndef NDEBUG } else if (OB_FAIL(check_column_ids_valid(work_allocator, run_ctx.relative_tables_, run_ctx.col_map_, column_ids, T_DML_UPDATE, column_ids, NO_CHANGE, dml_param.is_total_quantity_log_))) { STORAGE_LOG(WARN, "column_ids illegal", K(ret)); #endif } else { tbl_row.flag_ = ObActionFlag::OP_ROW_EXIST; tbl_row.dml_ = T_DML_UPDATE; } while (OB_SUCC(ret) && OB_SUCC(row_iter->get_next_row(row))) { if (ObTimeUtility::current_time() > dml_param.timeout_) { ret = OB_TIMEOUT; int64_t cur_time = ObTimeUtility::current_time(); STORAGE_LOG(WARN, "query timeout", K(cur_time), K(dml_param), K(ret)); } else if (OB_FAIL(reshape_table_rows( row, row_reshape_ins, run_ctx.col_descs_->count(), &tbl_row, 1, dml_param.sql_mode_))) { STORAGE_LOG(WARN, "fail to get reshape row", K(ret), K(run_ctx.col_descs_->count())); } else if (OB_FAIL(direct_insert_row_and_index(run_ctx, tbl_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to write row", K(ret)); } } ++afct_num; } if (OB_ITER_END == ret) { ret = OB_SUCCESS; } free_row_reshape(work_allocator, row_reshape_ins, 1); if (OB_SUCC(ret)) { affected_rows = afct_num; EVENT_ADD(STORAGE_INSERT_ROW_COUNT, afct_num); } } return ret; } int ObPartitionStorage::insert_rows_(ObDMLRunningCtx& run_ctx, const ObNewRow* const rows, const int64_t row_count, ObRowsInfo& rows_info, ObStoreRow* tbl_rows, RowReshape* row_reshape_ins, int64_t& afct_num, int64_t& dup_num) { int ret = OB_SUCCESS; const ObStoreCtx& ctx = run_ctx.store_ctx_; const ObDMLBaseParam& dml_param = run_ctx.dml_param_; const bool is_ignore = dml_param.is_ignore_; const ObRelativeTable& data_table = run_ctx.relative_tables_.data_table_; if (is_ignore) { if (OB_UNLIKELY(row_count > 1)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(ERROR, "ignore not support batch insert", K(row_count), K(ret)); } else { ctx.mem_ctx_->sub_stmt_begin(); } } if (OB_FAIL(ret)) { } else if (ObTimeUtility::current_time() > dml_param.timeout_) { ret = OB_TIMEOUT; int64_t cur_time = ObTimeUtility::current_time(); STORAGE_LOG(WARN, "query timeout", K(cur_time), K(dml_param), K(ret)); } else if (OB_FAIL(reshape_table_rows( rows, row_reshape_ins, run_ctx.col_descs_->count(), tbl_rows, row_count, dml_param.sql_mode_))) { STORAGE_LOG(WARN, "fail to get reshape rows", K(ret)); } else if (OB_FAIL(rows_info.check_duplicate(tbl_rows, row_count, data_table))) { if (OB_ERR_PRIMARY_KEY_DUPLICATE == ret) { char rowkey_buffer[OB_TMP_BUF_SIZE_256]; if (OB_SUCCESS != extract_rowkey(data_table, rows_info.get_duplicate_rowkey(), rowkey_buffer, OB_TMP_BUF_SIZE_256, run_ctx.dml_param_.tz_info_)) { STORAGE_LOG(WARN, "extract rowkey failed"); } else { ObString index_name = "PRIMARY"; if (data_table.is_index_table()) { data_table.get_index_name(index_name); } LOG_USER_ERROR(OB_ERR_PRIMARY_KEY_DUPLICATE, rowkey_buffer, index_name.length(), index_name.ptr()); } } else { STORAGE_LOG(WARN, "fail to check duplicate", K(ret)); } } else if (OB_FAIL( insert_table_rows(run_ctx, run_ctx.relative_tables_.data_table_, *run_ctx.col_descs_, rows_info))) { STORAGE_LOG(WARN, "failed to insert table rows", K(ret)); } else if (OB_FAIL(insert_index_rows(run_ctx, tbl_rows, row_count))) { STORAGE_LOG(WARN, "failed to insert index rows", K(ret)); } else { afct_num = afct_num + row_count; } if (is_ignore && OB_ERR_PRIMARY_KEY_DUPLICATE == ret) { ctx.mem_ctx_->sub_stmt_end(false); // abort row ++dup_num; ret = OB_SUCCESS; } return ret; } static inline int get_next_rows(ObNewRowIterator* row_iter, bool enable_bulk, ObNewRow*& rows, int64_t& row_count) { row_count = 1; return enable_bulk ? row_iter->get_next_rows(rows, row_count) : row_iter->get_next_row(rows); } int ObPartitionStorage::insert_rows(const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const ObIArray& column_ids, ObNewRowIterator* row_iter, int64_t& affected_rows) { int ret = OB_SUCCESS; ObTimeGuard timeguard(__func__, 3 * 1000 * 1000); int64_t afct_num = 0; int64_t dup_num = 0; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!ctx.is_valid() || !dml_param.is_valid() || column_ids.count() <= 0 || OB_ISNULL(row_iter)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ctx.mem_ctx_), K(dml_param), K(column_ids), KP(row_iter), K(ret)); } else { ObDMLRunningCtx run_ctx(ctx, dml_param, ctx.mem_ctx_->get_query_allocator(), T_DML_INSERT); ObIAllocator& work_allocator = run_ctx.allocator_; void* ptr = NULL; ObStoreRow* tbl_rows = NULL; RowReshape* row_reshape_ins = NULL; if (OB_FAIL(run_ctx.init(&column_ids, NULL, true, schema_service_, store_))) { STORAGE_LOG(WARN, "init dml running context failed", K(ret)); #ifndef NDEBUG } else if (OB_FAIL(check_column_ids_valid(work_allocator, run_ctx.relative_tables_, run_ctx.col_map_, column_ids, T_DML_INSERT, column_ids, NO_CHANGE, dml_param.is_total_quantity_log_))) { STORAGE_LOG(WARN, "column_ids illegal", K(ret)); #endif } const bool is_ignore = dml_param.is_ignore_; int64_t row_count = 0; // index of row that exists int64_t row_count_first_bulk = 0; bool first_bulk = true; ObNewRow* rows = NULL; ObRowsInfo rows_info; const ObRelativeTable& data_table = run_ctx.relative_tables_.data_table_; DEBUG_SYNC(BEFORE_INSERT_ROWS); while (OB_SUCC(ret) && OB_SUCC(get_next_rows(row_iter, !is_ignore, rows, row_count))) { if (row_count <= 0) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "row_count should be greater than 0"); } else if (first_bulk) { first_bulk = false; row_count_first_bulk = row_count; if (OB_FAIL(rows_info.init(data_table, ctx, *run_ctx.col_descs_))) { STORAGE_LOG(WARN, "Failed to init rows info", K(ret), K(data_table)); } else if (OB_ISNULL(ptr = work_allocator.alloc(row_count * sizeof(ObStoreRow)))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "fail to allocate memory", K(row_count), K(ret)); } else if (OB_FAIL(malloc_rows_reshape_if_need(work_allocator, *run_ctx.col_descs_, row_count, run_ctx.relative_tables_.data_table_, dml_param.sql_mode_, row_reshape_ins))) { STORAGE_LOG(WARN, "get row reshape instance failed", K(ret)); } else { tbl_rows = new (ptr) ObStoreRow[row_count]; for (int64_t i = 0; i < row_count; i++) { tbl_rows[i].flag_ = ObActionFlag::OP_ROW_EXIST; tbl_rows[i].set_dml(T_DML_INSERT); } } } if (OB_SUCC(ret)) { ret = insert_rows_(run_ctx, rows, row_count, rows_info, tbl_rows, row_reshape_ins, afct_num, dup_num); } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; } if (nullptr != ptr) { work_allocator.free(ptr); } free_row_reshape(work_allocator, row_reshape_ins, row_count_first_bulk); if (OB_SUCC(ret)) { affected_rows = afct_num; dml_param.duplicated_rows_ = dup_num; EVENT_ADD(STORAGE_INSERT_ROW_COUNT, afct_num); const ObTableAccessStat& access_stat = rows_info.exist_helper_.table_access_context_.access_stat_; if (access_stat.rowkey_prefix_ > 0 && access_stat.bf_access_cnt_ > 0) { prefix_access_stat_.add_stat(access_stat); } } } return ret; } int ObPartitionStorage::insert_row( const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const ObIArray& column_ids, const ObNewRow& row) { int ret = OB_SUCCESS; ObTimeGuard timeguard(__func__, 100 * 1000); int64_t afct_num = 0; int64_t dup_num = 0; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!ctx.is_valid() || !dml_param.is_valid() || column_ids.count() <= 0 || !row.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ctx.mem_ctx_), K(dml_param), K(column_ids), K(row), K(ret)); } else { ObDMLRunningCtx run_ctx(ctx, dml_param, ctx.mem_ctx_->get_query_allocator(), T_DML_INSERT); run_ctx.relative_tables_.set_table_param(dml_param.table_param_); ObIAllocator& work_allocator = run_ctx.allocator_; ObStoreRow& tbl_row = run_ctx.tbl_row_; RowReshape* row_reshape_ins = NULL; if (OB_FAIL(run_ctx.init(&column_ids, NULL, true, schema_service_, store_))) { STORAGE_LOG(WARN, "init dml running context failed", K(ret)); #ifndef NDEBUG } else if (OB_FAIL(check_column_ids_valid(work_allocator, run_ctx.relative_tables_, run_ctx.col_map_, column_ids, T_DML_INSERT, column_ids, NO_CHANGE, dml_param.is_total_quantity_log_))) { STORAGE_LOG(WARN, "column_ids illegal", K(ret)); #endif } const bool is_ignore = dml_param.is_ignore_; ObRowsInfo rows_info; const ObRelativeTable& data_table = run_ctx.relative_tables_.data_table_; if (OB_FAIL(ret)) { } else if (OB_FAIL(rows_info.init(data_table, ctx, *run_ctx.col_descs_))) { STORAGE_LOG(WARN, "Failed to init rows info", K(ret), K(data_table)); } else if (OB_FAIL(malloc_rows_reshape_if_need(work_allocator, *run_ctx.col_descs_, 1, run_ctx.relative_tables_.data_table_, dml_param.sql_mode_, row_reshape_ins))) { STORAGE_LOG(WARN, "get row reshape instance failed", K(ret)); } else { tbl_row.flag_ = ObActionFlag::OP_ROW_EXIST; tbl_row.set_dml(T_DML_INSERT); } if (OB_SUCC(ret)) { ret = insert_rows_(run_ctx, &row, 1, rows_info, &tbl_row, row_reshape_ins, afct_num, dup_num); } free_row_reshape(work_allocator, row_reshape_ins, 1); if (OB_SUCC(ret)) { dml_param.duplicated_rows_ = dup_num; EVENT_ADD(STORAGE_INSERT_ROW_COUNT, afct_num); const ObTableAccessStat& access_stat = rows_info.exist_helper_.table_access_context_.access_stat_; if (access_stat.rowkey_prefix_ > 0 && access_stat.bf_access_cnt_ > 0) { prefix_access_stat_.add_stat(access_stat); } } } return ret; } int ObPartitionStorage::insert_row(const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const ObIArray& column_ids, const ObIArray& duplicated_column_ids, const ObNewRow& row, const ObInsertFlag flag, int64_t& affected_rows, ObNewRowIterator*& duplicated_rows) { int ret = OB_SUCCESS; ObTimeGuard timeguard(__func__, 3 * 1000 * 1000); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!ctx.is_valid() || !dml_param.is_valid() || column_ids.count() <= 0 || duplicated_column_ids.count() <= 0 || !row.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", KP(ctx.mem_ctx_), K(dml_param), K(column_ids), K(duplicated_column_ids), K(row), K(flag), K(ret)); } else { ObDMLRunningCtx run_ctx(ctx, dml_param, ctx.mem_ctx_->get_query_allocator(), T_DML_INSERT); ObIAllocator& work_allocator = run_ctx.allocator_; duplicated_rows = NULL; ObStoreRow& tbl_row = run_ctx.tbl_row_; RowReshape* row_reshape_ins = NULL; if (OB_FAIL(run_ctx.init(&column_ids, NULL, true, schema_service_, store_))) { STORAGE_LOG(WARN, "init dml running context failed", K(ret)); } else if (OB_FAIL(malloc_rows_reshape_if_need(work_allocator, *run_ctx.col_descs_, 1, run_ctx.relative_tables_.data_table_, dml_param.sql_mode_, row_reshape_ins))) { STORAGE_LOG(WARN, "get row reshape instance failed", K(ret)); #ifndef NDEBUG } else if (OB_FAIL(check_column_ids_valid(work_allocator, run_ctx.relative_tables_, run_ctx.col_map_, column_ids, T_DML_INSERT, column_ids, NO_CHANGE, run_ctx.dml_param_.is_total_quantity_log_))) { STORAGE_LOG(WARN, "column_ids illegal", K(ret)); #endif } else { tbl_row.flag_ = ObActionFlag::OP_ROW_EXIST; tbl_row.set_dml(T_DML_INSERT); if (OB_FAIL(run_ctx.relative_tables_.data_table_.check_rowkey_in_column_ids(duplicated_column_ids, false))) { STORAGE_LOG(WARN, "is not rowkey columns", K(duplicated_column_ids), K(ret)); } else if (OB_FAIL(reshape_table_rows( &row, row_reshape_ins, run_ctx.col_descs_->count(), &tbl_row, 1, dml_param.sql_mode_))) { STORAGE_LOG(WARN, "fail to get reshape row", K(ret), K(run_ctx.col_descs_->count())); } else if (OB_FAIL(get_conflict_rows(run_ctx, flag, duplicated_column_ids, tbl_row.row_val_, duplicated_rows))) { STORAGE_LOG(WARN, "failed to get conflict row(s)", K(duplicated_column_ids), K(row), K(ret)); } else if (NULL == duplicated_rows) { // insert table row and index row(s) if (OB_FAIL(direct_insert_row_and_index(run_ctx, tbl_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "failed to write row", K(ret)); } } else { affected_rows = 1; EVENT_INC(STORAGE_INSERT_ROW_COUNT); } } else { ret = OB_ERR_PRIMARY_KEY_DUPLICATE; } } free_row_reshape(work_allocator, row_reshape_ins, 1); } return ret; } // deprecated it() int ObPartitionStorage::revert_insert_iter(common::ObNewRowIterator* iter) { int ret = OB_SUCCESS; if (iter) { ObQueryIteratorFactory::free_insert_dup_iter(iter); // iter->~ObNewRowIterator(); // ob_free(iter); } return ret; } /* arguments: change_type is only for update rows, * in other DML interface, just ignore this argument */ int ObPartitionStorage::check_other_columns_in_column_ids(const ObRelativeTables& relative_tables, const ColumnMap* column_ids_map, const common::ObIArray& column_ids, const ObRowDml dml_type, const ChangeType change_type, const bool is_total_quantity_log) { int ret = OB_SUCCESS; if (!relative_tables.is_valid() || OB_ISNULL(column_ids_map) || column_ids.count() <= 0 || !relative_tables.data_table_.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "relative tables is invalid", K(ret)); } else if (is_total_quantity_log || T_DML_INSERT == dml_type || (T_DML_UPDATE == dml_type && ROWKEY_CHANGE == change_type)) { if (column_ids.count() != relative_tables.data_table_.get_column_count()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid column_ids", K(ret), K(relative_tables.data_table_), "column_ids", column_ids, "column_ids length", column_ids.count(), "column count", relative_tables.data_table_.get_column_count()); } else if (OB_FAIL(relative_tables.data_table_.check_column_in_map(*column_ids_map))) { // check if column_ids contains all columns STORAGE_LOG(WARN, "check column in map fail", K(ret), K(relative_tables.data_table_)); } } else if (T_DML_UPDATE == dml_type || T_DML_DELETE == dml_type) { // check if column_ids contains all index columns in all related index tables const int64_t rowkey_len = relative_tables.data_table_.get_rowkey_column_num(); for (int64_t i = 0; OB_SUCC(ret) && i < relative_tables.idx_cnt_; ++i) { if (OB_FAIL(relative_tables.index_tables_[i].check_index_column_in_map(*column_ids_map, rowkey_len))) { STORAGE_LOG(WARN, "check index table column in map fail", K(ret), K(relative_tables.index_tables_[i])); } } } else { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "invalid dml type", K(dml_type), K(ret)); } return ret; } // allocate memory for col_map, remember to free the memory after using int ObPartitionStorage::construct_column_ids_map(const common::ObIArray& column_ids, const int64_t total_column_count, ObIAllocator& work_allocator, ColumnMap*& col_map) { int ret = OB_SUCCESS; void* ptr = NULL; if (column_ids.count() <= 0 || total_column_count <= 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(column_ids), K(total_column_count), K(ret)); } else if (NULL == (ptr = work_allocator.alloc(static_cast(sizeof(ColumnMap))))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "fail to allocate memory for column_ids map", K(ret)); } else { col_map = new (ptr) ColumnMap(work_allocator); if (OB_FAIL(col_map->init(column_ids))) { STORAGE_LOG(WARN, "Init ObColMap failed", K(ret)); } } if (OB_FAIL(ret) && NULL != col_map) { col_map->clear(); work_allocator.free(col_map); col_map = NULL; } return ret; } // free the memory for col_map after using void ObPartitionStorage::deconstruct_column_ids_map(ObIAllocator& work_allocator, ColumnMap*& col_map) { if (NULL != col_map) { col_map->clear(); work_allocator.free(col_map); col_map = NULL; } } /* arguments: upd_column_ids and change_type are only for * update rows, in other DML interfaces, just ignore these * two arguments */ int ObPartitionStorage::check_column_ids_valid(ObIAllocator& work_allocator, const ObRelativeTables& relative_tables, const ColumnMap* col_map, const common::ObIArray& column_ids, const ObRowDml dml_type, const common::ObIArray& upd_column_ids, const ChangeType change_type, const bool is_total_quantity_log) { int ret = OB_SUCCESS; ColumnMap* column_ids_map = nullptr; if (!relative_tables.is_valid() || (NULL != col_map && !col_map->is_inited()) || column_ids.count() <= 0 || upd_column_ids.count() <= 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", KP(col_map), K(column_ids), K(dml_type), K(upd_column_ids), K(change_type), K(is_total_quantity_log), K(ret)); } else if (nullptr != col_map) { column_ids_map = const_cast(col_map); } else if (OB_FAIL(construct_column_ids_map( column_ids, relative_tables.data_table_.get_column_count(), work_allocator, column_ids_map))) { STORAGE_LOG(WARN, "fail to construct column_ids map", K(ret), K(column_ids), K(column_ids_map)); } else if (NULL == column_ids_map) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "column_ids_map should not be NULL by now", K(ret), K(column_ids_map)); } else { } if (OB_SUCC(ret)) { if (OB_FAIL(relative_tables.data_table_.check_rowkey_in_column_ids(column_ids, true))) { STORAGE_LOG(WARN, "invalid rowkey in column_ids", K(column_ids), K(ret)); } else if (OB_FAIL(check_other_columns_in_column_ids( relative_tables, column_ids_map, column_ids, dml_type, change_type, is_total_quantity_log))) { STORAGE_LOG(WARN, "invalid index column in column_ids", K(column_ids), K(ret)); } else if (T_DML_UPDATE == dml_type) { int32_t idx = -1; for (int64_t i = 0; OB_SUCC(ret) && i < upd_column_ids.count(); ++i) { if (OB_FAIL(column_ids_map->get(upd_column_ids.at(i), idx)) || idx < 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "column_ids lack columns or invalid update column id", K(ret), K(idx), "column id", upd_column_ids.at(i), "upd_column_ids", upd_column_ids, "column_ids", column_ids); } } } else { } } // free the memory allocated by construct_column_ids_map() if (NULL == col_map && NULL != column_ids_map) { deconstruct_column_ids_map(work_allocator, column_ids_map); } return ret; } int ObPartitionStorage::fetch_conflict_rows(const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const ObIArray& in_column_ids, const ObIArray& out_column_ids, ObNewRowIterator& check_row_iter, ObIArray& dup_row_iters) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else { ObDMLRunningCtx run_ctx(ctx, dml_param, ctx.mem_ctx_->get_query_allocator(), T_DML_INSERT); ObIAllocator& work_allocator = run_ctx.allocator_; ObStoreRow& tbl_row = run_ctx.tbl_row_; RowReshape* row_reshape_ins = NULL; ObNewRow* check_row = NULL; if (OB_FAIL(run_ctx.init(&in_column_ids, NULL, false, schema_service_, store_))) { STORAGE_LOG(WARN, "init dml running context failed", K(ret)); } else if (OB_FAIL(malloc_rows_reshape_if_need(work_allocator, *run_ctx.col_descs_, 1, run_ctx.relative_tables_.data_table_, dml_param.sql_mode_, row_reshape_ins))) { STORAGE_LOG(WARN, "get row reshape instance failed", K(ret)); } while (OB_SUCC(ret) && OB_SUCC(check_row_iter.get_next_row(check_row))) { tbl_row.flag_ = ObActionFlag::OP_ROW_EXIST; tbl_row.set_dml(T_DML_INSERT); ObNewRowIterator* dup_row_iter = NULL; if (OB_FAIL(reshape_table_rows( check_row, row_reshape_ins, run_ctx.col_descs_->count(), &tbl_row, 1, dml_param.sql_mode_))) { STORAGE_LOG(WARN, "fail to get reshape row", K(ret), K(run_ctx.col_descs_->count())); } else if (OB_FAIL(get_conflict_rows( run_ctx, INSERT_RETURN_ALL_DUP, out_column_ids, tbl_row.row_val_, dup_row_iter))) { STORAGE_LOG(WARN, "failed to get conflict row(s)", K(out_column_ids), K(tbl_row), K(ret)); } else if (OB_FAIL(dup_row_iters.push_back(dup_row_iter))) { STORAGE_LOG(WARN, "store duplicate row iterator failed", K(out_column_ids), K(tbl_row), K(ret)); } if (OB_FAIL(ret) && dup_row_iter != NULL) { ObQueryIteratorFactory::free_insert_dup_iter(dup_row_iter); dup_row_iter = NULL; } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; } free_row_reshape(work_allocator, row_reshape_ins, 1); } return ret; } int ObPartitionStorage::single_get_row(ObSingleRowGetter &row_getter, const ObStoreRowkey &rowkey, ObNewRowIterator *&duplicated_rows, int64_t data_table_rowkey_cnt) { int ret = OB_SUCCESS; ObNewRow *row = nullptr; if (OB_FAIL(row_getter.open(rowkey))) { LOG_WARN("init single row getter failed", K(ret)); } else if (OB_FAIL(row_getter.get_next_row(row))) { if (OB_ITER_END != ret) { LOG_WARN("get next row from single row getter failed", K(ret)); } else { ret = OB_SUCCESS; } } else if (NULL == duplicated_rows) { ObValueRowIterator *dup_iter = NULL; if (OB_ISNULL(dup_iter = ObQueryIteratorFactory::get_insert_dup_iter())) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(WARN, "no memory to alloc ObValueRowIterator", K(ret)); } else { duplicated_rows = dup_iter; if (OB_FAIL(dup_iter->init(true, data_table_rowkey_cnt))) { STORAGE_LOG(WARN, "failed to initialize ObValueRowIterator", K(ret)); } } } if (OB_SUCC(ret) && row != nullptr) { ObValueRowIterator *dup_iter = static_cast(duplicated_rows); if (OB_FAIL(dup_iter->add_row(*row))) { STORAGE_LOG(WARN, "failed to store conflict row", K(*row)); } else { LOG_DEBUG("get conflict row", KPC(row)); } } return ret; } int ObPartitionStorage::get_index_conflict_row(ObDMLRunningCtx &run_ctx, const ObIArray &out_col_ids, ObRelativeTable &relative_table, bool need_index_back, const ObNewRow &row, ObNewRowIterator *&duplicated_rows) { int ret = OB_SUCCESS; ObStoreRow *idx_row = run_ctx.idx_row_; ObNewRowIterator *dup_rowkey_iter = nullptr; bool null_idx_val = false; ObSEArray pk_out_ids; ObArenaAllocator scan_allocator(ObModIds::OB_TABLE_SCAN_ITER); ObIAllocator *allocator = run_ctx.dml_param_.dml_allocator_ != nullptr ? run_ctx.dml_param_.dml_allocator_ : &scan_allocator; ObSingleRowGetter index_row_getter(*allocator, store_); if (OB_FAIL(run_ctx.relative_tables_.data_table_.get_rowkey_column_ids(pk_out_ids))) { LOG_WARN("get rowkey column ids failed", K(ret)); } else if (OB_FAIL(relative_table.build_index_row( row, *run_ctx.col_map_, false, idx_row->row_val_, null_idx_val, nullptr))) { STORAGE_LOG(WARN, "failed to generate index row", K(ret)); } else if (OB_FAIL(index_row_getter.init_dml_access_ctx(run_ctx.store_ctx_, run_ctx.dml_param_))) { LOG_WARN("init dml access ctx failed", K(ret)); } else if (OB_FAIL(index_row_getter.init_dml_access_param(relative_table, run_ctx.dml_param_, pk_out_ids))) { LOG_WARN("init basic index param failed", K(ret)); } else { ObTableAccessContext &index_access_ctx = index_row_getter.get_access_ctx(); int64_t dt_rowkey_cnt = run_ctx.relative_tables_.data_table_.get_rowkey_column_num(); if (relative_table.is_storage_index_table()) { index_access_ctx.query_flag_.index_invalid_ = !relative_table.can_read_index(); } else { index_access_ctx.query_flag_.index_invalid_ = false; } ObStoreRowkey index_rowkey; index_rowkey.assign(idx_row->row_val_.cells_, relative_table.get_rowkey_column_num()); if (OB_LIKELY(!need_index_back)) { if (OB_FAIL(single_get_row(index_row_getter, index_rowkey, duplicated_rows, dt_rowkey_cnt))) { LOG_WARN("single get index row failed", K(ret)); } } else { ObStoreRowkey table_rowkey; ObSingleRowGetter data_row_getter(*allocator, store_); if (OB_FAIL(index_row_getter.open(index_rowkey))) { LOG_WARN("get index row failed", K(ret), K(index_rowkey)); } else if (OB_FAIL(convert_row_to_rowkey(index_row_getter, table_rowkey))) { if (OB_ITER_END != ret) { LOG_WARN("convert row to rowkey failed", K(ret)); } else { ret = OB_SUCCESS; } } else if (OB_FAIL(data_row_getter.init_dml_access_ctx(run_ctx.store_ctx_, run_ctx.dml_param_))) { LOG_WARN("init dml access ctx failed", K(ret)); } else if (OB_FAIL(data_row_getter.init_dml_access_param( run_ctx.relative_tables_.data_table_, run_ctx.dml_param_, out_col_ids))) { LOG_WARN("init data table access param failed", K(ret)); } else if (OB_FAIL(single_get_row(data_row_getter, table_rowkey, duplicated_rows, dt_rowkey_cnt))) { LOG_WARN("single get index row failed", K(ret), K(index_rowkey)); } } } LOG_DEBUG("get index conflict row", K(ret), K(need_index_back), K(out_col_ids), K(relative_table), K(row), KPC(duplicated_rows)); if (dup_rowkey_iter != NULL) { ObQueryIteratorFactory::free_insert_dup_iter(dup_rowkey_iter); dup_rowkey_iter = NULL; } return ret; } int ObPartitionStorage::get_conflict_row(ObDMLRunningCtx &run_ctx, const ObIArray &out_col_ids, ObRelativeTable &relative_table, const ObStoreRowkey &rowkey, ObNewRowIterator *&duplicated_rows) { int ret = OB_SUCCESS; ObArenaAllocator scan_allocator(ObModIds::OB_TABLE_SCAN_ITER); ObIAllocator *allocator = run_ctx.dml_param_.dml_allocator_ != nullptr ? run_ctx.dml_param_.dml_allocator_ : &scan_allocator; int64_t data_table_rowkey_cnt = run_ctx.relative_tables_.data_table_.get_rowkey_column_num(); ObSingleRowGetter single_row_getter(*allocator, store_); if (OB_FAIL(single_row_getter.init_dml_access_ctx(run_ctx.store_ctx_, run_ctx.dml_param_))) { LOG_WARN("init dml access ctx failed", K(ret)); } else if (OB_FAIL(single_row_getter.init_dml_access_param(relative_table, run_ctx.dml_param_, out_col_ids))) { LOG_WARN("init dml access param failed", K(ret)); } else if (OB_FAIL(single_get_row(single_row_getter, rowkey, duplicated_rows, data_table_rowkey_cnt))) { LOG_WARN("single get row failed", K(ret)); } return ret; } int ObPartitionStorage::convert_row_to_rowkey(ObSingleRowGetter &index_row_getter, ObStoreRowkey &rowkey) { int ret = OB_SUCCESS; ObNewRow *row = nullptr; if (OB_FAIL(index_row_getter.get_next_row(row))) { if (OB_ITER_END != ret) { LOG_WARN("get next row from index row getter failed", K(ret)); } } else { rowkey.assign(row->cells_, row->count_); } return ret; } int ObPartitionStorage::check_old_row_legitimacy(ObDMLRunningCtx &run_ctx, const ObNewRow &old_row) { int ret = OB_SUCCESS; ObRelativeTable &data_table = run_ctx.relative_tables_.data_table_; ObStoreRowkey rowkey; rowkey.assign(old_row.cells_, data_table.get_rowkey_column_num()); if (OB_UNLIKELY(rowkey.get_obj_cnt() > old_row.count_) || OB_ISNULL(run_ctx.column_ids_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("old row is invalid", K(ret), K(old_row), K(rowkey.get_obj_cnt()), KP(run_ctx.column_ids_)); } else if (data_table.is_index_table() && !data_table.can_read_index()) { // index can not be read during building index, so does not check old index row } else { // the vertical partition is no longer maintained, // and the defense check skips the vertical partition function ObDMLBaseParam &dml_param = const_cast(run_ctx.dml_param_); ObArenaAllocator scan_allocator(ObModIds::OB_TABLE_SCAN_ITER); ObIAllocator *allocator = run_ctx.dml_param_.dml_allocator_ != nullptr ? run_ctx.dml_param_.dml_allocator_ : &scan_allocator; ObSingleRowGetter old_row_getter(*allocator, store_); ObNewRow *storage_old_row = nullptr; const ObIArray &column_ids = *run_ctx.column_ids_; if (OB_FAIL(old_row_getter.init_dml_access_ctx(run_ctx.store_ctx_, dml_param))) { LOG_WARN("init dml access ctx failed", K(ret)); } else if (OB_FAIL(old_row_getter.init_dml_access_param(data_table, dml_param, column_ids))) { LOG_WARN("init dml access param failed", K(ret)); } else if (OB_FAIL(old_row_getter.open(rowkey, true))) { LOG_WARN("open old row getter failed", K(ret), K(rowkey)); } else if (OB_FAIL(old_row_getter.get_next_row(storage_old_row))) { if (OB_ITER_END == ret) { ret = OB_ERR_DEFENSIVE_CHECK; LOG_WARN("old row in storage is not exists", K(ret)); check_leader_changed_for_sql_recheck_(run_ctx, ret); } else { LOG_WARN("get next row from old_row_iter failed", K(ret), KPC(run_ctx.column_ids_), K(old_row)); } } else if (storage_old_row->get_count() != old_row.get_count()) { ret = OB_ERR_DEFENSIVE_CHECK; LOG_WARN("storage old row is not matched with sql old row", K(ret)); } for (int64_t i = 0; OB_SUCC(ret) && i < old_row.get_count(); ++i) { const ObObj &storage_val = storage_old_row->get_cell(i); const ObObj &sql_val = old_row.get_cell(i); int cmp = 0; if (OB_UNLIKELY(OB_HIDDEN_LOGICAL_ROWID_COLUMN_ID == column_ids.at(i))) { // skip check logical rowid, } else if (OB_UNLIKELY(ObLongTextType == storage_val.get_type() && sql_val.is_lob_locator())) { // skip check lob column type, } else if (OB_UNLIKELY(storage_val.is_nop_value())) { bool is_nop = false; if (OB_FAIL(data_table.is_nop_default_value(column_ids.at(i), is_nop))) { LOG_WARN("check column whether has nop default value failed", K(ret), K(column_ids.at(i))); } else if (!is_nop) { ret = OB_ERR_DEFENSIVE_CHECK; LOG_WARN("storage old row is not matched with sql old row", K(ret), K(i), K(column_ids.at(i)), K(storage_val), K(sql_val)); } } else if (OB_FAIL(storage_val.compare(sql_val, cmp)) || 0 != cmp) { LOG_WARN("storage_val is not equal with sql_val, maybe catch a bug", K(ret), K(storage_val), K(sql_val), K(cmp), K(column_ids.at(i))); ret = OB_ERR_DEFENSIVE_CHECK; } check_leader_changed_for_sql_recheck_(run_ctx, ret); } if (OB_ERR_DEFENSIVE_CHECK == ret) { ObString func_name = ObString::make_string("check_old_row_legitimacy"); LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr()); LOG_ERROR("Fatal Error!!! Catch a defensive error!", K(ret), "column_id", column_ids, KPC(storage_old_row), "sql_old_row", old_row, "dml_param", run_ctx.dml_param_, "dml_type", run_ctx.dml_type_); } } return ret; } int ObPartitionStorage::check_delete_index_legitimacy( ObDMLRunningCtx &run_ctx, ObRelativeTable &index_table, const ObNewRow &old_row) { int ret = OB_SUCCESS; ObStoreRowkey rowkey(old_row.cells_, index_table.get_rowkey_column_num()); if (OB_UNLIKELY(rowkey.get_obj_cnt() > old_row.count_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("old row is invalid", K(ret), K(old_row), K(rowkey.get_obj_cnt())); } else if (index_table.is_index_table() && !index_table.can_read_index()) { // index can not be read during building index, so does not check old index row } else { // the vertical partition is no longer maintained, // and the defense check skips the vertical partition function ObDMLBaseParam &dml_param = const_cast(run_ctx.dml_param_); ObArenaAllocator scan_allocator(ObModIds::OB_TABLE_SCAN_ITER); ObIAllocator *allocator = run_ctx.dml_param_.dml_allocator_ != nullptr ? run_ctx.dml_param_.dml_allocator_ : &scan_allocator; ObFixedArray column_ids; ObSingleRowGetter old_row_getter(*allocator, store_); ObNewRow *storage_old_row = nullptr; column_ids.set_allocator(allocator); column_ids.set_capacity(index_table.get_rowkey_column_num()); if (OB_FAIL(old_row_getter.init_dml_access_ctx(run_ctx.store_ctx_, dml_param))) { LOG_WARN("init dml access ctx failed", K(ret)); } else if (OB_FAIL(index_table.get_rowkey_column_ids(column_ids))) { LOG_WARN("get rowkey column ids failed", K(ret)); } else if (OB_FAIL(old_row_getter.init_dml_access_param(index_table, dml_param, column_ids))) { LOG_WARN("init dml access param failed", K(ret)); } else if (OB_FAIL(old_row_getter.open(rowkey, true))) { LOG_WARN("open old row getter failed", K(ret), K(rowkey)); } else if (OB_FAIL(old_row_getter.get_next_row(storage_old_row))) { if (OB_ITER_END == ret) { ret = OB_ERR_DEFENSIVE_CHECK; LOG_WARN("old row in storage is not exists", K(ret)); } else { LOG_WARN("get next row from old_row_iter failed", K(ret), KPC(run_ctx.column_ids_), K(old_row)); } } if (OB_ERR_DEFENSIVE_CHECK == ret) { ObString func_name = ObString::make_string("check_delete_index_legitimacy"); LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr()); LOG_ERROR("Fatal Error!!! Catch a defensive error!", K(ret), "column_id", column_ids, KPC(storage_old_row), "sql_old_row", old_row, "dml_param", run_ctx.dml_param_, "dml_type", run_ctx.dml_type_); } } return ret; } int ObPartitionStorage::check_new_row_legitimacy(ObDMLRunningCtx &run_ctx, const ObNewRow &new_row) { int ret = OB_SUCCESS; ObRelativeTable &data_table = run_ctx.relative_tables_.data_table_; if (OB_ISNULL(run_ctx.column_ids_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("column ids is nullptr", K(ret)); } else if (OB_FAIL(check_new_row_nullable_value(*run_ctx.column_ids_, data_table, new_row))) { LOG_WARN( "check new row nullable value failed", K(ret), "dml_param", run_ctx.dml_param_, "dml_type", run_ctx.dml_type_); } else if (OB_FAIL(check_new_row_shadow_pk(*run_ctx.column_ids_, data_table, new_row))) { LOG_WARN( "check new row nullable value failed", K(ret), "dml_param", run_ctx.dml_param_, "dml_type", run_ctx.dml_type_); } return ret; } int ObPartitionStorage::check_new_row_nullable_value( const ObIArray &column_ids, ObRelativeTable &data_table, const ObNewRow &new_row) { int ret = OB_SUCCESS; if (OB_UNLIKELY(column_ids.count() > new_row.get_count())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("new row is invalid", K(ret), K(new_row.get_count()), K(column_ids.count())); } for (int64_t i = 0; OB_SUCC(ret) && i < column_ids.count(); ++i) { uint64_t column_id = column_ids.at(i); bool is_nullable = false; if (OB_UNLIKELY(is_shadow_column(column_id))) { // the shadow pk is generated internally, // and the nullable attribute check for it is skipped } else if (OB_FAIL(data_table.is_column_nullable_for_write(column_id, is_nullable))) { LOG_WARN("check is_column_nullable_for_write failed", K(ret), K(column_id)); } else if (new_row.get_cell(i).is_null() && !is_nullable) { bool is_hidden = false; bool is_gen_col = false; bool is_nullable_for_read = false; if (OB_FAIL(data_table.is_column_nullable_for_read(column_id, is_nullable_for_read))) { LOG_WARN("check is nullable for read failed", K(ret)); } else if (is_nullable_for_read) { LOG_TRACE("Catch a defensive nullable error, but this column is not null novalidate", K(column_id), K(column_ids), K(new_row), K(data_table)); } else if (OB_FAIL(data_table.is_hidden_column(column_id, is_hidden))) { LOG_WARN("get is hidden column failed", K(ret), K(column_id)); } else if (OB_FAIL(data_table.is_gen_column(column_id, is_gen_col))) { LOG_WARN("get is gen column failed", K(ret), K(column_id)); } else if (is_hidden && !is_gen_col) { ret = OB_BAD_NULL_ERROR; LOG_WARN("Catch a defensive nullable error, " "maybe cause by add column not null default null ONLINE", K(ret), K(column_id), K(column_ids), K(new_row), K(data_table)); } else { ret = OB_ERR_DEFENSIVE_CHECK; ObString func_name = ObString::make_string("check_new_row_nullable_value"); LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr()); LOG_ERROR( "Fatal Error!!! Catch a defensive error!", K(ret), K(column_id), K(column_ids), K(new_row), K(data_table)); } } } return ret; } int ObPartitionStorage::check_new_row_nullable_value( const ObIArray &col_descs, ObRelativeTable &relative_table, const ObNewRow &new_row) { int ret = OB_SUCCESS; if (OB_UNLIKELY(col_descs.count() > new_row.get_count())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("new row is invalid", K(ret), K(new_row.get_count()), K(col_descs.count())); } for (int64_t i = 0; OB_SUCC(ret) && i < col_descs.count(); ++i) { uint64_t column_id = col_descs.at(i).col_id_; bool is_nullable = false; if (OB_UNLIKELY(is_shadow_column(column_id))) { // the shadow pk is generated internally, // and the nullable attribute check for it is skipped } else if (OB_FAIL(relative_table.is_column_nullable_for_write(column_id, is_nullable))) { LOG_WARN("check is_column_nullable_for_write failed", K(ret), K(column_id)); } else if (new_row.get_cell(i).is_null() && !is_nullable) { bool is_hidden = false; bool is_gen_col = false; bool is_nullable_for_read = false; if (OB_FAIL(relative_table.is_column_nullable_for_read(column_id, is_nullable_for_read))) { LOG_WARN("check is nullable for read failed", K(ret)); } else if (is_nullable_for_read) { // this column is not null novalidate, maybe the null column come from the old data // so output trace log and ignore it LOG_TRACE("Catch a defensive nullable error, but this column is not null novalidate", K(column_id), K(col_descs), K(new_row), K(relative_table)); } else if (OB_FAIL(relative_table.is_hidden_column(column_id, is_hidden))) { LOG_WARN("get is hidden column failed", K(ret), K(column_id)); } else if (OB_FAIL(relative_table.is_gen_column(column_id, is_gen_col))) { LOG_WARN("get is gen column failed", K(ret), K(column_id)); } else if (is_hidden && !is_gen_col) { ret = OB_BAD_NULL_ERROR; LOG_WARN("Catch a defensive nullable error, " "maybe cause by add column not null default null ONLINE", K(ret), K(column_id), K(col_descs), K(new_row), K(relative_table)); } else { ret = OB_ERR_DEFENSIVE_CHECK; ObString func_name = ObString::make_string("check_new_row_nullable_value"); LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr()); LOG_ERROR("Fatal Error!!! Catch a defensive error!", K(ret), K(column_id), K(col_descs), K(new_row), K(relative_table)); } } } return ret; } int ObPartitionStorage::check_new_row_shadow_pk( const ObIArray &column_ids, ObRelativeTable &data_table, const ObNewRow &new_row) { int ret = OB_SUCCESS; if (data_table.get_shadow_rowkey_column_num() > 0) { // check shadow pk int64_t rowkey_cnt = data_table.get_rowkey_column_num(); int64_t spk_cnt = data_table.get_shadow_rowkey_column_num(); int64_t index_col_cnt = rowkey_cnt - spk_cnt; bool need_spk = false; if (OB_UNLIKELY(index_col_cnt <= 0) || OB_UNLIKELY(column_ids.count() < rowkey_cnt)) { ret = OB_ERR_UNEXPECTED; LOG_WARN( "index column count is invalid", K(ret), K(index_col_cnt), K(rowkey_cnt), K(spk_cnt), K(column_ids.count())); } else if (lib::is_mysql_mode()) { // mysql兼容:只要unique index key中有null列,则需要填充shadow列 bool rowkey_has_null = false; for (int64_t i = 0; !rowkey_has_null && i < index_col_cnt; i++) { rowkey_has_null = new_row.get_cell(i).is_null(); } need_spk = rowkey_has_null; } else { // oracle兼容:只有unique index key全为null列时,才需要填充shadow列 bool is_rowkey_all_null = true; for (int64_t i = 0; is_rowkey_all_null && i < index_col_cnt; i++) { is_rowkey_all_null = new_row.get_cell(i).is_null(); } need_spk = is_rowkey_all_null; } for (int64_t i = index_col_cnt; OB_SUCC(ret) && i < rowkey_cnt; ++i) { uint64_t spk_column_id = column_ids.at(i); uint64_t real_pk_id = spk_column_id - OB_MIN_SHADOW_COLUMN_ID; const ObObj &spk_value = new_row.get_cell(i); int64_t pk_idx = OB_INVALID_INDEX; int cmp = 0; if (OB_LIKELY(!need_spk)) { if (!spk_value.is_null()) { ret = OB_ERR_DEFENSIVE_CHECK; ObString func_name = ObString::make_string("check_new_row_shadow_pk"); LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr()); LOG_ERROR("Fatal Error!!! Catch a defensive error!", K(ret), "column_id", column_ids, K(new_row), K(data_table), K(spk_value), K(i), K(spk_column_id), K(real_pk_id)); } } else if (OB_UNLIKELY(!has_exist_in_array(column_ids, real_pk_id, &pk_idx))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("real pk column not exists in column_ids", K(ret), K(column_ids), K(real_pk_id)); } else if (OB_FAIL(new_row.get_cell(pk_idx).compare(spk_value, cmp)) || 0 != cmp) { ret = OB_ERR_DEFENSIVE_CHECK; ObString func_name = ObString::make_string("check_new_row_shadow_pk"); LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr()); LOG_ERROR("Fatal Error!!! Catch a defensive error!", K(ret), "column_id", column_ids, K(new_row), K(data_table), K(spk_value), "pk_value", new_row.get_cell(pk_idx), K(pk_idx), K(i), K(spk_column_id), K(real_pk_id)); } } } return ret; } // this function fetch these conflict rows of one row // and get_conflict_rows promise that the rowkey in the head of the duplicated row // so out_col_ids must make sure the rowkey in the head int ObPartitionStorage::get_conflict_rows(ObDMLRunningCtx& run_ctx, const ObInsertFlag flag, const common::ObIArray& out_col_ids, const common::ObNewRow& row, common::ObNewRowIterator*& duplicated_rows) { int ret = OB_SUCCESS; ObRelativeTables &relative_tables = run_ctx.relative_tables_; ObRelativeTable &data_table = relative_tables.data_table_; ObStoreRowkey rowkey; rowkey.assign(row.cells_, data_table.get_rowkey_column_num()); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_FAIL(get_conflict_row(run_ctx, out_col_ids, data_table, rowkey, duplicated_rows))) { LOG_WARN("get conflict row failed", K(ret), K(rowkey)); } // check conflict row(s) of index table if (OB_SUCC(ret) && !run_ctx.dml_param_.only_data_table_) { // rowkey in the head of out_col_ids bool need_index_back = (out_col_ids.count() != data_table.get_rowkey_column_num()); for (int64_t i = 0; OB_SUCC(ret) && (INSERT_RETURN_ALL_DUP == flag || NULL == duplicated_rows) && i < relative_tables.idx_cnt_; ++i) { if (relative_tables.index_tables_[i].is_unique_index()) { OZ(get_index_conflict_row( run_ctx, out_col_ids, relative_tables.index_tables_[i], need_index_back, row, duplicated_rows)); } } } return ret; } int ObPartitionStorage::extract_rowkey(const ObRelativeTable& table, const common::ObStoreRowkey& rowkey, char* buffer, const int64_t buffer_len, const ObTimeZoneInfo* tz_info) { int ret = OB_SUCCESS; if (!table.is_valid() || !rowkey.is_valid() || OB_ISNULL(buffer) || buffer_len <= 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), K(table), K(rowkey), K(buffer), K(buffer_len), K(tz_info)); } else { const int64_t rowkey_size = table.get_rowkey_column_num(); int64_t pos = 0; int64_t valid_rowkey_size = 0; uint64_t column_id = OB_INVALID_ID; MEMSET(buffer, 0, buffer_len); for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_size; i++) { if (OB_FAIL(table.get_rowkey_col_id_by_idx(i, column_id))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Failed to get rowkey column description", K(i), K(ret)); } else if (column_id <= OB_MIN_SHADOW_COLUMN_ID) { valid_rowkey_size++; } } for (int64_t i = 0; OB_SUCC(ret) && i < valid_rowkey_size; ++i) { const ObObj& obj = rowkey.get_obj_ptr()[i]; if (OB_FAIL(obj.print_plain_str_literal(buffer, buffer_len - 1, pos, tz_info))) { STORAGE_LOG(WARN, "fail to print_plain_str_literal", K(ret)); } else if (i < valid_rowkey_size - 1) { if (OB_FAIL(databuff_printf(buffer, buffer_len - 1, pos, "-"))) { LOG_WARN("databuff print failed", K(ret)); } } } if (buffer != nullptr) { buffer[pos++] = '\0'; } } return ret; } int ObPartitionStorage::insert_table_row( ObDMLRunningCtx& run_ctx, ObRelativeTable& relative_table, const ObColDescIArray& col_descs, ObStoreRow& row) { int ret = OB_SUCCESS; const ObStoreCtx& ctx = run_ctx.store_ctx_; bool exists = false; ObColDescArray cols; if (!ctx.is_valid() || col_descs.count() <= 0 || !row.is_valid() || !relative_table.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", KP(ctx.mem_ctx_), K(col_descs), K(row), K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < relative_table.get_rowkey_column_num(); ++i) { ret = cols.push_back(col_descs.at(i)); } if (OB_FAIL(ret)) { STORAGE_LOG(WARN, "failed to get rowkey columns"); } else if (((!relative_table.is_storage_index_table()) || relative_table.is_unique_index()) && OB_FAIL(rowkey_exists(relative_table, ctx, cols, row.row_val_, exists))) { STORAGE_LOG(WARN, "failed to check whether row exists", K(row), K(ret)); } else { if (OB_UNLIKELY(exists)) { ret = OB_ERR_PRIMARY_KEY_DUPLICATE; STORAGE_LOG(WARN, "rowkey already exists", K(relative_table.get_table_id()), K(row), K(ret)); } else if (OB_UNLIKELY(relative_table.is_storage_index_table())) { if (OB_FAIL(write_index_row(relative_table, ctx, col_descs, row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "failed to set row", K(row), K(ret)); } } } else if (OB_FAIL(write_row(relative_table, ctx, relative_table.get_rowkey_column_num(), col_descs, row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "failed to set row", K(row), K(ret)); } } } } return ret; } int ObPartitionStorage::insert_table_rows( ObDMLRunningCtx& run_ctx, ObRelativeTable& relative_table, const ObColDescIArray& col_descs, ObRowsInfo& rows_info) { int ret = OB_SUCCESS; bool exists = false; const ObStoreCtx& ctx = run_ctx.store_ctx_; if (!ctx.is_valid() || col_descs.count() <= 0 || !relative_table.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", KP(ctx.mem_ctx_), K(col_descs), K(ret)); } else { if (OB_FAIL(ret)) { STORAGE_LOG(WARN, "failed to get rowkey columns"); } else if (((!relative_table.is_storage_index_table()) || relative_table.is_unique_index()) && OB_FAIL(rowkeys_exists(ctx, relative_table, rows_info, exists))) { STORAGE_LOG(WARN, "failed to check whether row exists", K(ret)); } else { if (OB_UNLIKELY(exists)) { ret = OB_ERR_PRIMARY_KEY_DUPLICATE; STORAGE_LOG(WARN, "rowkey already exists", K(relative_table.get_table_id()), K(ctx), K(ret)); } for (int64_t i = 0; OB_SUCC(ret) && i < rows_info.row_count_; i++) { ObStoreRow &row = rows_info.rows_[i]; if (GCONF.enable_defensive_check() && OB_FAIL(check_new_row_legitimacy(run_ctx, row.row_val_))) { LOG_WARN("check new row legitimacy failed", K(ret), K(row.row_val_)); } else if (OB_LIKELY(!relative_table.is_storage_index_table())) { if (OB_FAIL(write_row(relative_table, ctx, relative_table.get_rowkey_column_num(), col_descs, row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "failed to set row", K(row), K(ret)); } } } else if (OB_FAIL(write_index_row(relative_table, ctx, col_descs, row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "failed to set row", K(row), K(ret)); } } } } if (OB_ERR_PRIMARY_KEY_DUPLICATE == ret && !run_ctx.dml_param_.is_ignore_) { char rowkey_buffer[OB_TMP_BUF_SIZE_256]; if (OB_SUCCESS != extract_rowkey(relative_table, rows_info.get_duplicate_rowkey(), rowkey_buffer, OB_TMP_BUF_SIZE_256, run_ctx.dml_param_.tz_info_)) { STORAGE_LOG(WARN, "extract rowkey failed"); } ObString index_name = "PRIMARY"; if (relative_table.is_index_table()) { if (OB_SUCCESS != relative_table.get_index_name(index_name)) { STORAGE_LOG(WARN, "get_index_name failed"); } } else if (share::is_oracle_mode() && OB_SUCCESS != relative_table.get_primary_key_name(index_name)) { STORAGE_LOG(WARN, "get_pk_name failed"); } LOG_USER_ERROR(OB_ERR_PRIMARY_KEY_DUPLICATE, rowkey_buffer, index_name.length(), index_name.ptr()); } } return ret; } int ObPartitionStorage::insert_index_rows(ObDMLRunningCtx& run_ctx, ObStoreRow* rows, int64_t row_count) { int ret = OB_SUCCESS; DEBUG_SYNC(DELAY_INDEX_WRITE); ObString index_name; // insert index row(s) for (int64_t k = 0; OB_SUCC(ret) && k < row_count; k++) { ObStoreRow& tbl_row = rows[k]; bool null_idx_val = false; for (int64_t i = 0; OB_SUCC(ret) && i < run_ctx.relative_tables_.idx_cnt_; ++i) { ObRelativeTable& table = run_ctx.relative_tables_.index_tables_[i]; if (OB_FAIL(table.build_index_row(tbl_row.row_val_, *run_ctx.col_map_, false, run_ctx.idx_row_->row_val_, null_idx_val, &run_ctx.idx_col_descs_))) { STORAGE_LOG(WARN, "failed to generate index row", K(ret)); } else if (OB_FAIL(insert_table_row(run_ctx, table, run_ctx.idx_col_descs_, *run_ctx.idx_row_))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "failed to set row", K(ret), "index_row", to_cstring(*run_ctx.idx_row_)); } if (OB_ERR_PRIMARY_KEY_DUPLICATE == ret && !run_ctx.dml_param_.is_ignore_) { ObStoreRowkey rowkey(run_ctx.idx_row_->row_val_.cells_, table.get_rowkey_column_num()); char rowkey_buffer[OB_TMP_BUF_SIZE_256]; if (OB_SUCCESS != extract_rowkey(table, rowkey, rowkey_buffer, OB_TMP_BUF_SIZE_256, run_ctx.dml_param_.tz_info_)) { STORAGE_LOG(WARN, "extract rowkey failed", K(rowkey)); } else if (OB_SUCCESS == table.get_index_name(index_name)) { LOG_USER_ERROR(OB_ERR_PRIMARY_KEY_DUPLICATE, rowkey_buffer, index_name.length(), to_cstring(index_name)); } else { LOG_USER_ERROR( OB_ERR_PRIMARY_KEY_DUPLICATE, rowkey_buffer, static_cast(sizeof("PRIMARY") - 1), "PRIMARY"); } } } } } return ret; } int ObPartitionStorage::direct_insert_row_and_index(ObDMLRunningCtx& run_ctx, const ObStoreRow& tbl_row) { int ret = OB_SUCCESS; const ObStoreCtx& store_ctx = run_ctx.store_ctx_; ObRelativeTables& relative_tables = run_ctx.relative_tables_; const ObColDescIArray& idx_col_descs = run_ctx.idx_col_descs_; const ColumnMap* col_map = run_ctx.col_map_; ObStoreRow* idx_row = run_ctx.idx_row_; if (!store_ctx.is_valid() || !relative_tables.is_valid() || nullptr == run_ctx.col_descs_ || run_ctx.col_descs_->count() <= 0 || !tbl_row.is_valid() || (relative_tables.idx_cnt_ > 0 && (NULL == col_map || NULL == idx_row))) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", KP(store_ctx.mem_ctx_), K(relative_tables.idx_cnt_), KP(run_ctx.col_descs_), K(tbl_row), KP(col_map), KP(idx_row), K(ret)); } else if (GCONF.enable_defensive_check() && OB_FAIL(check_new_row_legitimacy(run_ctx, tbl_row.row_val_))) { LOG_WARN("check new row legitimacy failed", K(ret), K(tbl_row.row_val_)); } else { const ObColDescIArray& col_descs = *run_ctx.col_descs_; if (OB_FAIL(write_row(relative_tables.data_table_, store_ctx, relative_tables.data_table_.get_rowkey_column_num(), col_descs, tbl_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "failed to write table row", K(relative_tables.data_table_.get_table_id()), K(relative_tables.data_table_.get_rowkey_column_num()), K(col_descs), K(tbl_row), K(ret)); } } bool null_idx_val = false; for (int64_t i = 0; OB_SUCC(ret) && i < relative_tables.idx_cnt_; ++i) { if (OB_FAIL(relative_tables.index_tables_[i].build_index_row( tbl_row.row_val_, *col_map, false, idx_row->row_val_, null_idx_val, &run_ctx.idx_col_descs_))) { STORAGE_LOG(WARN, "failed to generate index row", K(ret)); } else if (OB_FAIL(write_index_row(relative_tables.index_tables_[i], store_ctx, idx_col_descs, *idx_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "failed to write index row", K(i), K(relative_tables.index_tables_[i].get_table_id()), K(relative_tables.index_tables_[i].get_rowkey_column_num()), K(idx_col_descs), K(*idx_row)); } } } } return ret; } /* this func is an encapsulation of ObNewRowIterator->get_next_row. * 1. need_copy_cells is true, perform a cells copy, but not a deep copy. * memory for store_row.row_val.cells_ has already allocated before * this func is invoked, no need to alloc memory in this func. * 2. need_copy_cells is false, just perform an assignment, no any copy behavior, */ int ObPartitionStorage::get_next_row_from_iter( ObNewRowIterator* row_iter, ObStoreRow& store_row, const bool need_copy_cells) { int ret = OB_SUCCESS; ObNewRow* row = NULL; if (NULL == row_iter) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(row_iter), K(ret)); } else if (OB_FAIL(row_iter->get_next_row(row))) { if (OB_ITER_END != ret) { STORAGE_LOG(WARN, "fail to iterate a row", K(row), K(ret)); } } else { if (need_copy_cells) { // in this situation, store_row.row_val has already hold mem for cells_, // no need to alloc mem here, we copy cells only. store_row.row_val_.count_ = row->count_; for (int64_t i = 0; i < row->count_; ++i) { store_row.row_val_.cells_[i] = row->cells_[i]; } } else { store_row.row_val_ = *row; } } return ret; } // It is performance unfriendly modification // In theory, it's possible that update statement doesn't modify data // But it is not common in practice // This is the requirement of DRC, we only check primary key of main table // don't need to check primary key of index int ObPartitionStorage::check_rowkey_value_change(const common::ObNewRow& old_row, const common::ObNewRow& new_row, const int64_t rowkey_len, bool& rowkey_change) const { int ret = OB_SUCCESS; if (OB_UNLIKELY(rowkey_len <= 0) || OB_UNLIKELY(old_row.count_ < rowkey_len) || OB_UNLIKELY(new_row.count_ < rowkey_len)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(old_row), K(new_row), K(rowkey_len), K(ret)); } else { rowkey_change = false; for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_len; ++i) { if (old_row.cells_[i] != new_row.cells_[i]) { rowkey_change = true; break; } } } return ret; } int ObPartitionStorage::update_row(ObDMLRunningCtx& run_ctx, const ObIArray& changes, const ObIArray& update_idx, const bool delay_new, RowReshape*& old_row_reshape_ins, RowReshape*& row_reshape_ins, ObStoreRow& old_tbl_row, ObStoreRow& new_tbl_row, ObRowStore* row_store, bool& duplicate) { int ret = OB_SUCCESS; const ObDMLBaseParam& dml_param = run_ctx.dml_param_; const ObColDescIArray& col_descs = *run_ctx.col_descs_; bool data_tbl_rowkey_change = false; int64_t data_tbl_rowkey_len = run_ctx.relative_tables_.data_table_.get_rowkey_column_num(); bool reshape_new_row = false; bool reshape_old_row = false; RowReshape* reshape_ptr = nullptr; ObSQLMode sql_mode = dml_param.sql_mode_; duplicate = false; const bool is_ignore = dml_param.is_ignore_ && !share::is_oracle_mode(); if (is_ignore) { run_ctx.store_ctx_.mem_ctx_->sub_stmt_begin(); } if (col_descs.count() != old_tbl_row.row_val_.get_count() || col_descs.count() != new_tbl_row.row_val_.get_count()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG( WARN, "invalid argument", K(ret), K(col_descs.count()), K(old_tbl_row.row_val_), K(new_tbl_row.row_val_)); } else if (OB_FAIL(need_reshape_table_row( new_tbl_row.row_val_, new_tbl_row.row_val_.get_count(), sql_mode, reshape_new_row))) { LOG_WARN("fail to check need reshape new", K(ret), K(new_tbl_row.row_val_), K(sql_mode)); } else if (OB_FAIL(need_reshape_table_row( old_tbl_row.row_val_, old_tbl_row.row_val_.get_count(), sql_mode, reshape_old_row))) { LOG_WARN("fail to check need reshape old", K(ret), K(old_tbl_row.row_val_), K(sql_mode)); } else if ((nullptr == row_reshape_ins && nullptr != old_row_reshape_ins) || (nullptr != row_reshape_ins && nullptr == old_row_reshape_ins)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("new and old reshape mismatch", K(ret), KP(row_reshape_ins), KP(old_row_reshape_ins)); } else if ((reshape_new_row || reshape_old_row) && (nullptr == row_reshape_ins || nullptr == old_row_reshape_ins)) { if (OB_FAIL( malloc_rows_reshape(run_ctx.allocator_, col_descs, 2, run_ctx.relative_tables_.data_table_, reshape_ptr))) { LOG_WARN("fail to malloc reshape", K(ret), K(col_descs), K(sql_mode)); } else { row_reshape_ins = &(reshape_ptr[0]); old_row_reshape_ins = &(reshape_ptr[1]); } } if (OB_FAIL(ret)) { } else if (OB_FAIL(reshape_row(new_tbl_row.row_val_, new_tbl_row.row_val_.get_count(), row_reshape_ins, new_tbl_row, reshape_new_row, sql_mode))) { LOG_WARN("fail to reshape new", K(ret), K(new_tbl_row.row_val_), K(reshape_new_row), K(sql_mode)); } else if (OB_FAIL(reshape_row(old_tbl_row.row_val_, old_tbl_row.row_val_.get_count(), old_row_reshape_ins, old_tbl_row, reshape_old_row, sql_mode))) { LOG_WARN("fail to reshape new", K(ret), K(old_tbl_row.row_val_), K(reshape_old_row), K(sql_mode)); } else if (ROWKEY_CHANGE == changes.at(0) && OB_FAIL(check_rowkey_value_change( old_tbl_row.row_val_, new_tbl_row.row_val_, data_tbl_rowkey_len, data_tbl_rowkey_change))) { STORAGE_LOG( WARN, "check data table rowkey change failed", K(old_tbl_row), K(new_tbl_row), K(data_tbl_rowkey_len), K(ret)); } else if (OB_FAIL(process_old_row(run_ctx, data_tbl_rowkey_change, changes, old_tbl_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG( WARN, "fail to process old row", K(*run_ctx.col_descs_), K(old_tbl_row), K(data_tbl_rowkey_change), K(ret)); } } else if (delay_new && share::is_oracle_mode()) { // if total quantity log is needed, we should cache both new row and old row, // and the sequence is new_row1, old_row1, new_row2, old_row2...., // if total quantity log isn't needed, just cache new row if (OB_ISNULL(row_store)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "row_store is NULL", K(ret)); } else if (OB_FAIL(row_store->add_row(new_tbl_row.row_val_))) { STORAGE_LOG(WARN, "failed to store new row", K(new_tbl_row), K(ret)); } else if (OB_FAIL(row_store->add_row(old_tbl_row.row_val_))) { STORAGE_LOG(WARN, "failed to store old row", K(old_tbl_row), K(ret)); } } else if (OB_FAIL(process_new_row(run_ctx, changes, update_idx, old_tbl_row, new_tbl_row, data_tbl_rowkey_change))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "fail to process new row", K(new_tbl_row), K(ret)); } } if (is_ignore && OB_ERR_PRIMARY_KEY_DUPLICATE == ret) { run_ctx.store_ctx_.mem_ctx_->sub_stmt_end(false); // abort row duplicate = true; ret = OB_SUCCESS; } return ret; } int ObPartitionStorage::update_row(const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const ObIArray& column_ids, const ObIArray& updated_column_ids, const ObNewRow& old_row, const ObNewRow& new_row) { int ret = OB_SUCCESS; ObTimeGuard timeguard(__func__, 3 * 1000 * 1000); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!ctx.is_valid() || !dml_param.is_valid() || column_ids.count() <= 0 || updated_column_ids.count() <= 0 || !old_row.is_valid() || !new_row.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), KP(ctx.mem_ctx_), K(dml_param), K(column_ids), K(updated_column_ids), K(old_row), K(new_row)); } else { ObDMLRunningCtx run_ctx(ctx, dml_param, ctx.mem_ctx_->get_query_allocator(), T_DML_UPDATE); ObIAllocator& work_allocator = run_ctx.allocator_; ObStoreRow old_tbl_row; ObStoreRow& new_tbl_row = run_ctx.tbl_row_; RowReshape* row_reshape_ins = NULL; RowReshape* old_row_reshape_ins = NULL; ObSEArray changes; ObSEArray update_idx; bool delay_new = false; // no use bool duplicate = false; if (OB_FAIL(run_ctx.init(&column_ids, &updated_column_ids, true, schema_service_, store_))) { STORAGE_LOG(WARN, "init dml running context failed", K(ret)); } else if (!run_ctx.relative_tables_.is_valid()) { ret = OB_ERR_SYS; STORAGE_LOG(ERROR, "data table is not valid", K(ret)); } else if (OB_FAIL(construct_update_idx(run_ctx.col_map_, updated_column_ids, update_idx))) { STORAGE_LOG(WARN, "failed to construct update_idx", K(updated_column_ids), K(ret)); } else if (OB_FAIL(check_rowkey_change(updated_column_ids, run_ctx.relative_tables_, changes, delay_new))) { STORAGE_LOG(WARN, "failed to check rowkey changes", K(ret)); } else if (changes.count() <= 0) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "changes flag should contain at least one element", "changes flag count", changes.count()); #ifndef NDEBUG } else if (OB_FAIL(check_column_ids_valid(work_allocator, run_ctx.relative_tables_, run_ctx.col_map_, column_ids, T_DML_UPDATE, updated_column_ids, changes.at(0), dml_param.is_total_quantity_log_))) { STORAGE_LOG(WARN, "column_ids illegal", K(ret)); #endif } else { old_tbl_row.row_val_ = old_row; old_tbl_row.flag_ = ObActionFlag::OP_ROW_EXIST; old_tbl_row.set_dml(T_DML_UPDATE); new_tbl_row.row_val_ = new_row; new_tbl_row.flag_ = ObActionFlag::OP_ROW_EXIST; new_tbl_row.set_dml(T_DML_UPDATE); } if (OB_SUCC(ret) && OB_FAIL(update_row(run_ctx, changes, update_idx, false, // process new row with no delay old_row_reshape_ins, row_reshape_ins, old_tbl_row, new_tbl_row, NULL, duplicate))) { STORAGE_LOG(WARN, "failed to update row", K(pkey_), K(ret), K(changes)); } free_row_reshape(work_allocator, row_reshape_ins, 2); row_reshape_ins = nullptr; old_row_reshape_ins = nullptr; if (OB_SUCC(ret)) { EVENT_ADD(STORAGE_UPDATE_ROW_COUNT, 1); } } return ret; } int ObPartitionStorage::update_rows(const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const ObIArray& column_ids, const ObIArray& updated_column_ids, ObNewRowIterator* row_iter, int64_t& affected_rows) { int ret = OB_SUCCESS; ObTimeGuard timeguard(__func__, 3 * 1000 * 1000); int64_t afct_num = 0; int64_t dup_num = 0; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!ctx.is_valid() || !dml_param.is_valid() || column_ids.count() <= 0 || updated_column_ids.count() <= 0 || OB_ISNULL(row_iter)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), KP(ctx.mem_ctx_), K(dml_param), K(column_ids), K(updated_column_ids), KP(row_iter)); } else { ObDMLRunningCtx run_ctx(ctx, dml_param, ctx.mem_ctx_->get_query_allocator(), T_DML_UPDATE); ObIAllocator& work_allocator = run_ctx.allocator_; ObStoreRow old_tbl_row; void* old_row_cells = NULL; ObStoreRow& new_tbl_row = run_ctx.tbl_row_; RowReshape* row_reshape_ins = NULL; RowReshape* old_row_reshape_ins = NULL; ObSEArray changes; ObSEArray update_idx; ObRowStore row_store; bool delay_new = false; if (OB_FAIL(run_ctx.init(&column_ids, &updated_column_ids, true, schema_service_, store_))) { STORAGE_LOG(WARN, "init dml running context failed", K(ret)); } else if (!run_ctx.relative_tables_.is_valid()) { ret = OB_ERR_SYS; STORAGE_LOG(ERROR, "data table is not prepared", K(ret)); } else if (OB_FAIL(construct_update_idx(run_ctx.col_map_, updated_column_ids, update_idx))) { STORAGE_LOG(WARN, "failed to construct update_idx", K(updated_column_ids), K(ret)); } else if (OB_FAIL(check_rowkey_change(updated_column_ids, run_ctx.relative_tables_, changes, delay_new))) { STORAGE_LOG(WARN, "failed to check rowkey changes", K(ret)); } else if (changes.count() <= 0) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "changes flag should contain at least one element", "changes flag count", changes.count()); #ifndef NDEBUG } else if (OB_FAIL(check_column_ids_valid(work_allocator, run_ctx.relative_tables_, run_ctx.col_map_, column_ids, T_DML_UPDATE, updated_column_ids, changes.at(0), dml_param.is_total_quantity_log_))) { STORAGE_LOG(WARN, "column_ids illegal", K(ret)); #endif } else { int64_t num = run_ctx.relative_tables_.data_table_.get_column_count(); if (NULL == (old_row_cells = work_allocator.alloc(static_cast(sizeof(common::ObObj) * num)))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "failed to malloc temp row cells", K(ret)); } else { old_tbl_row.row_val_.cells_ = new (old_row_cells) ObObj[num](); old_tbl_row.flag_ = ObActionFlag::OP_ROW_EXIST; old_tbl_row.set_dml(T_DML_UPDATE); new_tbl_row.flag_ = ObActionFlag::OP_ROW_EXIST; new_tbl_row.set_dml(T_DML_UPDATE); } } int64_t got_row_count = 0; while (OB_SUCC(ret) && OB_SUCC(get_next_row_from_iter(row_iter, old_tbl_row, true)) && OB_SUCC(get_next_row_from_iter(row_iter, new_tbl_row, false))) { bool duplicate = false; ++got_row_count; if ((0 == (0x1FF & got_row_count)) && (ObTimeUtility::current_time() > dml_param.timeout_)) { // checking timeout cost too much, so check every 512 rows ret = OB_TIMEOUT; int64_t cur_time = ObTimeUtility::current_time(); STORAGE_LOG(WARN, "query timeout", K(cur_time), K(dml_param), K(ret)); } else if (OB_FAIL(update_row(run_ctx, changes, update_idx, delay_new, old_row_reshape_ins, row_reshape_ins, old_tbl_row, new_tbl_row, &row_store, duplicate))) { STORAGE_LOG(WARN, "failed to update row", K(pkey_), K(ret)); } else if (duplicate) { dup_num++; } else { afct_num++; } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; } if (OB_SUCCESS == ret && row_store.get_row_count() > 0) { void* ptr1 = NULL; int64_t num = run_ctx.relative_tables_.data_table_.get_column_count(); if (NULL == (ptr1 = work_allocator.alloc(sizeof(common::ObObj) * num))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "failed to malloc temp row cells", K(ret)); } else { new_tbl_row.row_val_.cells_ = new (ptr1) ObObj[num](); ObRowStore::Iterator row_iter2 = row_store.begin(); // when total_quantity_log is true, we should iterate new_tbl_row and old_tbl_row, and // dispose these two rows together, otherwise, when total_quantity_log is false, // row_iter2 doesn't contain old rows, and old_tbl_row is a dummy param in process_new_row while (OB_SUCC(ret) && OB_SUCC(row_iter2.get_next_row(new_tbl_row.row_val_))) { bool rowkey_change = (ROWKEY_CHANGE == changes.at(0)); int64_t data_tbl_rowkey_len = run_ctx.relative_tables_.data_table_.get_rowkey_column_num(); if (OB_FAIL(row_iter2.get_next_row(old_tbl_row.row_val_))) { STORAGE_LOG(WARN, "fail to get row from row stores", K(ret)); } else if (rowkey_change && OB_FAIL(check_rowkey_value_change( old_tbl_row.row_val_, new_tbl_row.row_val_, data_tbl_rowkey_len, rowkey_change))) { STORAGE_LOG(WARN, "check data table rowkey change failed", K(old_tbl_row), K(new_tbl_row), K(data_tbl_rowkey_len), K(ret)); } else if (OB_FAIL(process_new_row(run_ctx, changes, update_idx, old_tbl_row, new_tbl_row, rowkey_change))) { STORAGE_LOG(WARN, "fail to process new row", K(old_tbl_row), K(new_tbl_row), K(ret)); } } work_allocator.free(ptr1); ptr1 = NULL; new_tbl_row.row_val_.cells_ = NULL; } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; } if (NULL != old_row_cells) { work_allocator.free(old_row_cells); } free_row_reshape(work_allocator, row_reshape_ins, 2); row_reshape_ins = nullptr; old_row_reshape_ins = nullptr; old_tbl_row.row_val_.cells_ = NULL; if (OB_SUCC(ret)) { affected_rows = afct_num; dml_param.duplicated_rows_ = dup_num; EVENT_ADD(STORAGE_UPDATE_ROW_COUNT, afct_num); } } return ret; } int ObPartitionStorage::get_change_type( const ObIArray& update_ids, const ObRelativeTable& table, ChangeType& change_type) { int ret = OB_SUCCESS; if (update_ids.count() <= 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(update_ids), K(ret)); } else { const int64_t count = update_ids.count(); bool is_rowkey = false; change_type = NO_CHANGE; for (int64_t i = 0; OB_SUCC(ret) && i < count && ROWKEY_CHANGE != change_type; ++i) { if (OB_FAIL(table.is_rowkey_column_id(update_ids.at(i), is_rowkey))) { LOG_WARN("check is_rowkey fail", K(ret), K(update_ids.at(i))); } else { change_type = is_rowkey ? ROWKEY_CHANGE : OTHER_CHANGE; } } if (OB_SUCC(ret)) { if (table.is_unique_index() && ROWKEY_CHANGE != change_type) { uint64_t cid = OB_INVALID_ID; bool innullable = true; for (int64_t j = 0; OB_SUCC(ret) && j < table.get_rowkey_column_num(); ++j) { if (OB_FAIL(table.get_rowkey_col_id_by_idx(j, cid))) { LOG_WARN("get rowkey column id fail", K(ret), K(j)); } else if (cid > OB_MIN_SHADOW_COLUMN_ID) { if (innullable) { break; // other_change } else { cid -= OB_MIN_SHADOW_COLUMN_ID; for (int64_t k = 0; OB_SUCC(ret) && k < count; ++k) { if (cid == update_ids.at(k)) { change_type = ND_ROWKEY_CHANGE; break; } } if (ND_ROWKEY_CHANGE == change_type) { break; } } } else { bool is_nullable = false; if (OB_FAIL(table.is_column_nullable_for_write(cid, is_nullable))) { LOG_WARN("check nullable fail", K(ret), K(cid), K(table)); } else if (is_nullable) { innullable = false; } } } } } } return ret; } // first is table, others are indexes int ObPartitionStorage::check_rowkey_change(const ObIArray& update_ids, const ObRelativeTables& relative_tables, ObIArray& changes, bool& delay_new) { int ret = OB_SUCCESS; if (update_ids.count() <= 0 || !relative_tables.is_valid() || changes.count() > 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(update_ids), K(changes.count()), K(ret)); } else { changes.reset(); delay_new = false; const ObRelativeTable* relative_table = NULL; ChangeType type; for (int i = 0; OB_SUCC(ret) && i <= relative_tables.idx_cnt_; ++i) { if (0 == i) { relative_table = &relative_tables.data_table_; } else { relative_table = &relative_tables.index_tables_[i - 1]; } if (OB_FAIL(get_change_type(update_ids, *relative_table, type))) { STORAGE_LOG(WARN, "failed to get change type"); } else { if ((0 == i || relative_table->is_unique_index()) && (ROWKEY_CHANGE == type || ND_ROWKEY_CHANGE == type)) { delay_new = true; } if (OB_FAIL(changes.push_back(type))) { STORAGE_LOG(WARN, "failed to check rowkey change", K(ret)); } } } } return ret; } int ObPartitionStorage::construct_update_idx( const ColumnMap* col_map, const common::ObIArray& upd_col_ids, common::ObIArray& update_idx) { int ret = OB_SUCCESS; int err = OB_SUCCESS; if (OB_ISNULL(col_map) || upd_col_ids.count() <= 0 || update_idx.count() > 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(col_map), K(upd_col_ids), K(upd_col_ids.count()), K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < upd_col_ids.count(); ++i) { int32_t idx = -1; if (OB_SUCCESS != (err = col_map->get(upd_col_ids.at(i), idx)) || idx < 0) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "column id doesn't exist", "col_id", upd_col_ids.at(i), K(err), K(ret)); } else if (OB_FAIL(update_idx.push_back(idx))) { STORAGE_LOG(WARN, "fail to push idx into update_idx", K(idx), K(ret)); } } } return ret; } int ObPartitionStorage::process_old_row(ObDMLRunningCtx& run_ctx, const bool data_tbl_rowkey_change, const ObIArray& change_flags, const ObStoreRow& tbl_row) { int ret = OB_SUCCESS; const ObStoreCtx& store_ctx = run_ctx.store_ctx_; ObRelativeTables& relative_tables = run_ctx.relative_tables_; const ColumnMap* col_map = run_ctx.col_map_; ObColDescIArray& idx_col_descs = run_ctx.idx_col_descs_; ObStoreRow* idx_row = run_ctx.idx_row_; bool is_delete_total_quantity_log = run_ctx.dml_param_.is_total_quantity_log_; if (!store_ctx.is_valid() || !relative_tables.is_valid() || change_flags.count() <= 0 || nullptr == run_ctx.col_descs_ || run_ctx.col_descs_->count() <= 0 || !tbl_row.is_valid() || (relative_tables.idx_cnt_ > 0 && (NULL == col_map || NULL == idx_row))) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", KP(store_ctx.mem_ctx_), K(relative_tables.idx_cnt_), KP(col_map), K(change_flags), KP(run_ctx.col_descs_), K(tbl_row), KP(idx_row), K(is_delete_total_quantity_log), K(ret)); } else if (GCONF.enable_defensive_check() && OB_FAIL(check_old_row_legitimacy(run_ctx, tbl_row.row_val_))) { LOG_WARN("check old row legitimacy failed", K(tbl_row.row_val_)); } else { const ObColDescIArray& col_descs = *run_ctx.col_descs_; uint64_t table_id = relative_tables.data_table_.get_table_id(); int64_t rowkey_size = relative_tables.data_table_.get_rowkey_column_num(); const bool prelock = run_ctx.dml_param_.prelock_; bool locked = false; if (OB_UNLIKELY(prelock) && OB_FAIL(check_row_locked_by_myself(relative_tables.data_table_, store_ctx, col_descs, ObStoreRowkey(tbl_row.row_val_.cells_, rowkey_size), locked))) { STORAGE_LOG(WARN, "fail to check row locked", K(ret), K(pkey_), K(tbl_row)); } else if (OB_UNLIKELY(prelock) && !locked) { ret = OB_ERR_ROW_NOT_LOCKED; STORAGE_LOG(DEBUG, "row has not been locked", K(ret), K(pkey_), K(tbl_row)); } else if (data_tbl_rowkey_change) { ObStoreRow del_row = tbl_row; del_row.set_dml(T_DML_DELETE); if (!is_delete_total_quantity_log) { if (OB_FAIL(write_row(relative_tables.data_table_, store_ctx, rowkey_size, col_descs, del_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to delete row", K(del_row), K(ret)); } } } else { ObStoreRow new_tbl_row; new_tbl_row.flag_ = ObActionFlag::OP_DEL_ROW; new_tbl_row.set_dml(T_DML_DELETE); new_tbl_row.row_val_ = tbl_row.row_val_; del_row.flag_ = ObActionFlag::OP_ROW_EXIST; del_row.set_dml(T_DML_UNKNOWN); ObSEArray update_idx; if (OB_FAIL(write_row( relative_tables.data_table_, store_ctx, rowkey_size, col_descs, update_idx, del_row, new_tbl_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to delete row", K(del_row), K(ret)); } } } } else { // need to lock main table rows that don't need to be deleted if (OB_FAIL(lock_row(relative_tables.data_table_, store_ctx, col_descs, ObStoreRowkey(tbl_row.row_val_.cells_, rowkey_size)))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "lock row failed", K(table_id), K(col_descs), K(tbl_row), K(rowkey_size)); } } } if (OB_SUCCESS == ret && relative_tables.idx_cnt_ > 0) { bool null_idx_val = false; idx_row->flag_ = ObActionFlag::OP_DEL_ROW; idx_row->set_dml(T_DML_DELETE); int64_t i = 0; while (OB_SUCCESS == ret && i < relative_tables.idx_cnt_) { ChangeType type = change_flags.at(i + 1); bool exists = true; if (ROWKEY_CHANGE == type || ND_ROWKEY_CHANGE == type) { if (OB_FAIL(relative_tables.index_tables_[i].build_index_row( tbl_row.row_val_, *col_map, true, idx_row->row_val_, null_idx_val, &idx_col_descs))) { STORAGE_LOG(WARN, "failed to generate index row", K(ret)); } else if (GCONF.enable_defensive_check() && relative_tables.index_tables_[i].can_read_index() && relative_tables.index_tables_[i].is_storage_index_table() && OB_FAIL(rowkey_exists( relative_tables.index_tables_[i], store_ctx, idx_col_descs, idx_row->row_val_, exists))) { STORAGE_LOG(WARN, "failed to check rowkey existing", K(*idx_row), K(ret)); } else if (!exists) { ret = OB_ERR_DEFENSIVE_CHECK; check_leader_changed_for_sql_recheck_(run_ctx, ret); if (OB_FAIL(ret)) { ObString func_name = ObString::make_string("process_old_row"); LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr()); STORAGE_LOG(ERROR, "Unexpected old row, update or delete a non exist index row", K(ret), KPC(idx_row), K(relative_tables.data_table_.tables_handle_), K(relative_tables.index_tables_[i]), K(relative_tables.index_tables_[i].tables_handle_)); } } else { // STORAGE_LOG(INFO, "build index row", K(*idx_row), K(null_idx_val), K(type)); if (ND_ROWKEY_CHANGE == type && !null_idx_val) { // ignore delete } else if (OB_FAIL(write_index_row(relative_tables.index_tables_[i], store_ctx, idx_col_descs, *idx_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to set index row", "index_row", to_cstring(*idx_row), K(ret)); } } } } ++i; } } } return ret; } int ObPartitionStorage::process_row_of_data_table(ObDMLRunningCtx& run_ctx, const ObIArray& update_idx, const ObStoreRow& old_tbl_row, const ObStoreRow& new_tbl_row, const bool rowkey_change) { int ret = OB_SUCCESS; const ObStoreCtx& ctx = run_ctx.store_ctx_; ObRelativeTables& relative_tables = run_ctx.relative_tables_; bool is_update_total_quantity_log = run_ctx.dml_param_.is_total_quantity_log_; const common::ObTimeZoneInfo* tz_info = run_ctx.dml_param_.tz_info_; if (!ctx.is_valid() || !relative_tables.is_valid() || nullptr == run_ctx.col_descs_ || run_ctx.col_descs_->count() <= 0 || update_idx.count() <= 0 || (is_update_total_quantity_log && !old_tbl_row.is_valid()) || !new_tbl_row.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", KP(ctx.mem_ctx_), KP(run_ctx.col_descs_), K(update_idx), K(old_tbl_row), K(new_tbl_row), K(is_update_total_quantity_log), K(rowkey_change), K(ret)); } else { const ObColDescIArray& col_descs = *run_ctx.col_descs_; bool exists = false; int64_t rowkey_len = relative_tables.data_table_.get_rowkey_column_num(); int64_t table_id = relative_tables.data_table_.get_table_id(); if (rowkey_change && OB_FAIL(rowkey_exists(relative_tables.data_table_, ctx, col_descs, new_tbl_row.row_val_, exists))) { STORAGE_LOG(WARN, "failed to check rowkey exists", K(new_tbl_row), K(ret)); } else if (exists) { char buffer[OB_TMP_BUF_SIZE_256]; ObStoreRowkey rowkey(new_tbl_row.row_val_.cells_, relative_tables.data_table_.get_rowkey_column_num()); ret = OB_ERR_PRIMARY_KEY_DUPLICATE; if (OB_SUCCESS != extract_rowkey(relative_tables.data_table_, rowkey, buffer, OB_TMP_BUF_SIZE_256, tz_info)) { STORAGE_LOG(WARN, "extract rowkey failed", K(rowkey)); } else { ObString index_name = "PRIMARY"; if (relative_tables.data_table_.is_index_table()) { relative_tables.data_table_.get_index_name(index_name); } LOG_USER_ERROR(OB_ERR_PRIMARY_KEY_DUPLICATE, buffer, index_name.length(), index_name.ptr()); } STORAGE_LOG(WARN, "rowkey already exists", K(new_tbl_row), K(ret)); } else { ObStoreRow new_row; new_row.flag_ = ObActionFlag::OP_ROW_EXIST; new_row.set_dml(rowkey_change ? T_DML_INSERT : T_DML_UPDATE); new_row.row_val_ = new_tbl_row.row_val_; if (is_update_total_quantity_log && !rowkey_change) { ObStoreRow old_row; old_row.flag_ = ObActionFlag::OP_ROW_EXIST; old_row.set_dml(T_DML_UPDATE); old_row.row_val_ = old_tbl_row.row_val_; if (OB_FAIL(write_row(relative_tables.data_table_, ctx, rowkey_len, col_descs, update_idx, old_row, new_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to update to row", K(table_id), K(old_row), K(new_row), K(ret)); } } } else { if (OB_FAIL(write_row(relative_tables.data_table_, ctx, rowkey_len, col_descs, new_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to update to row", K(table_id), K(new_row), K(ret)); } } } } } return ret; } int ObPartitionStorage::process_row_of_index_tables( ObDMLRunningCtx& run_ctx, const ObIArray& change_flags, const ObStoreRow& new_tbl_row) { // index table doesn't need full column clog, so don't need to call set function of memtable full column clog int ret = OB_SUCCESS; const ObStoreCtx& ctx = run_ctx.store_ctx_; ObRelativeTables& relative_tables = run_ctx.relative_tables_; const ColumnMap* col_map = run_ctx.col_map_; ObColDescIArray& idx_col_descs = run_ctx.idx_col_descs_; ObStoreRow* idx_row = run_ctx.idx_row_; const common::ObTimeZoneInfo* tz_info = run_ctx.dml_param_.tz_info_; if (!ctx.is_valid() || !relative_tables.is_valid() || OB_ISNULL(col_map) || change_flags.count() <= 0 || !new_tbl_row.is_valid() || OB_ISNULL(idx_row)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG( WARN, "invalid argument", KP(ctx.mem_ctx_), K(col_map), K(change_flags), K(new_tbl_row), KP(idx_row), K(ret)); } else { bool null_idx_val = false; int64_t i = 0; ObString idx_name; while (OB_SUCCESS == ret && i < relative_tables.idx_cnt_) { ObRelativeTable& relative_table = relative_tables.index_tables_[i]; if (NO_CHANGE != change_flags.at(i + 1)) { idx_row->flag_ = ObActionFlag::OP_ROW_EXIST; if (OB_FAIL(relative_table.build_index_row( new_tbl_row.row_val_, *col_map, false, idx_row->row_val_, null_idx_val, &idx_col_descs))) { STORAGE_LOG(WARN, "failed to generate index row", K(ret)); } else { if (ROWKEY_CHANGE == change_flags.at(i + 1) || (ND_ROWKEY_CHANGE == change_flags.at(i + 1) && null_idx_val)) { idx_row->set_dml(T_DML_INSERT); } else { idx_row->set_dml(T_DML_UPDATE); } if (T_DML_INSERT == idx_row->get_dml() && relative_table.is_unique_index()) { bool exists = false; if (OB_FAIL(rowkey_exists(relative_table, ctx, idx_col_descs, idx_row->row_val_, exists))) { STORAGE_LOG(WARN, "failed to check rowkey existing", K(*idx_row), K(ret)); } else if (exists) { ret = OB_ERR_PRIMARY_KEY_DUPLICATE; char rowkey_buffer[OB_TMP_BUF_SIZE_256]; ObStoreRowkey rowkey(idx_row->row_val_.cells_, relative_table.get_rowkey_column_num()); if (OB_SUCCESS != extract_rowkey(relative_table, rowkey, rowkey_buffer, OB_TMP_BUF_SIZE_256, tz_info)) { STORAGE_LOG(WARN, "extract rowkey failed", K(rowkey)); } if (OB_SUCCESS == relative_table.get_index_name(idx_name)) { LOG_USER_ERROR(OB_ERR_PRIMARY_KEY_DUPLICATE, rowkey_buffer, idx_name.length(), idx_name.ptr()); } else { LOG_USER_ERROR( OB_ERR_PRIMARY_KEY_DUPLICATE, rowkey_buffer, static_cast(sizeof("PRIMARY") - 1), "PRIMARY"); } STORAGE_LOG(WARN, "index duplicate", "row", to_cstring(idx_row->row_val_)); } else { } } if (OB_SUCC(ret)) { if (OB_FAIL(write_index_row(relative_table, ctx, idx_col_descs, *idx_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "failed to set index row", K(*idx_row), K(ret)); } } } } } ++i; } } return ret; } int ObPartitionStorage::process_new_row(ObDMLRunningCtx& run_ctx, const ObIArray& change_flags, const common::ObIArray& update_idx, const ObStoreRow& old_tbl_row, const ObStoreRow& new_tbl_row, const bool rowkey_change) { int ret = OB_SUCCESS; if (change_flags.count() <= 0 || update_idx.count() <= 0 || !new_tbl_row.is_valid() || change_flags.count() != run_ctx.relative_tables_.idx_cnt_ + 1 || (run_ctx.relative_tables_.idx_cnt_ > 0 && (NULL == run_ctx.col_map_ || NULL == run_ctx.idx_row_))) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(run_ctx.relative_tables_.idx_cnt_), KP(run_ctx.col_map_), KP(run_ctx.idx_row_), K(change_flags), K(update_idx), K(new_tbl_row), K(rowkey_change), K(ret)); } else if (GCONF.enable_defensive_check() && OB_FAIL(check_new_row_legitimacy(run_ctx, new_tbl_row.row_val_))) { LOG_WARN("check new row legitimacy failed", K(ret), K(new_tbl_row.row_val_)); } else { // write full column clog needs to construct update_idx and pass to memtable if (OB_FAIL(process_row_of_data_table(run_ctx, update_idx, old_tbl_row, new_tbl_row, rowkey_change))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "fail to process_row_of_data_table", K(update_idx), K(old_tbl_row), K(new_tbl_row), K(rowkey_change), K(ret)); } } else if (run_ctx.relative_tables_.idx_cnt_ > 0 && OB_FAIL(process_row_of_index_tables(run_ctx, change_flags, new_tbl_row))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret && OB_TRANSACTION_SET_VIOLATION != ret) { STORAGE_LOG(WARN, "fail to process_row_of_index_tables", K(change_flags), K(new_tbl_row), K(ret)); } } else { } } return ret; } bool ObPartitionStorage::illegal_filter(const ObTableScanParam& param) const { bool bret = true; if (!param.scan_flag_.is_index_back() && param.filters_before_index_back_.count() > 0) { // index filters exist only when index back query bret = false; STORAGE_LOG(INFO, "index filter(s) exist(s) only when index back query!"); } return bret; } // weak consistency read, check schema version // requirement: schema version of read operation should be >= schema version of data // always read old data with new schema int ObPartitionStorage::check_schema_version_for_bounded_staleness_read_(const int64_t table_version_for_read, const int64_t data_max_schema_version, const uint64_t table_id /* table id of table, not PG */) { int ret = OB_SUCCESS; int64_t cur_table_version = OB_INVALID_VERSION; int64_t tenant_schema_version = OB_INVALID_VERSION; if (table_version_for_read >= data_max_schema_version) { // read schema version is biger than max schema version of data, pass } else { // read schema version is smaller than max schema version of data, two possible cases: // 1. max schema version of data is max schema version of table, return schema error, asking for schema refresh // // standalone pg is in this case // // 2. max schema version of data is max schema version of multiple table partitions // // It is the case when pg contains multiple partitions, it can only return max schema version of all partitions // // To differentiate the above two cases, check with the help of local schema version const uint64_t tenant_id = is_inner_table(table_id) ? OB_SYS_TENANT_ID : extract_tenant_id(table_id); ObSchemaGetterGuard schema_guard; // get schema version of this table in schema service if (OB_ISNULL(schema_service_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "invalid schema service", K(ret), K(schema_service_)); } else if (OB_FAIL(schema_service_->get_tenant_full_schema_guard(tenant_id, schema_guard))) { STORAGE_LOG(WARN, "schema service get tenant schema guard fail", K(ret), K(tenant_id), K(table_id)); } else if (OB_FAIL(schema_guard.get_table_schema_version(table_id, cur_table_version))) { STORAGE_LOG(WARN, "get table schema version fail", K(ret), K(table_id)); } // check whether input table version and schema version of this table in schema service same // if not same, refresh schema else if (OB_UNLIKELY(table_version_for_read != cur_table_version)) { ret = OB_SCHEMA_ERROR; STORAGE_LOG(WARN, "schema version for read mismatch", K(ret), K(table_id), K(table_version_for_read), K(cur_table_version), K(data_max_schema_version), K(pkey_)); } // get max schema version of the tenant else if (OB_FAIL(schema_service_->get_tenant_refreshed_schema_version(tenant_id, tenant_schema_version))) { STORAGE_LOG(WARN, "get tenant refreshed schema version fail", K(ret), K(tenant_id)); } else if (tenant_schema_version >= data_max_schema_version) { // if max schema version of the tenant is bigger than data's schema version, // then schema of read operation is newer than data's } else { ret = OB_SCHEMA_NOT_UPTODATE; STORAGE_LOG(WARN, "schema is not up to date for read, need refresh", K(ret), K(table_version_for_read), K(cur_table_version), K(tenant_schema_version), K(data_max_schema_version), K(table_id), K(tenant_id), K(pkey_)); } } STORAGE_LOG(DEBUG, "check schema version for bounded staleness read", K(ret), K(data_max_schema_version), K(table_version_for_read), K(cur_table_version), K(tenant_schema_version), K(table_id), K(pkey_)); return ret; } int ObPartitionStorage::erase_stat_cache() { int ret = OB_SUCCESS; ObSSTable* sstable = nullptr; ObTableHandle table_handle; if (OB_FAIL(ObStatManager::get_instance().erase_table_stat(pkey_))) { STORAGE_LOG(WARN, "failed to erase table stat", K(ret), K(pkey_)); } else if (OB_FAIL(store_.get_last_major_sstable(pkey_.get_table_id(), table_handle))) { STORAGE_LOG(WARN, "failed to get_last_major_sstable", K(ret), K(pkey_)); } else if (OB_FAIL(table_handle.get_sstable(sstable))) { STORAGE_LOG(WARN, "failed to get last major sstable", K(ret), K(pkey_)); } else if (OB_ISNULL(sstable)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "sstable is null", K(ret), K(pkey_)); } else { const ObIArray& column_meta = sstable->get_meta().column_metas_; ObSSTableColumnMeta col_meta; for (int64_t i = 0; OB_SUCC(ret) && i < column_meta.count(); ++i) { if (OB_FAIL(column_meta.at(i, col_meta))) { STORAGE_LOG(WARN, "failed to get column meta", K(ret), K(i), K(pkey_)); } else { if (OB_FAIL(ObStatManager::get_instance().erase_column_stat(pkey_, col_meta.column_id_))) { STORAGE_LOG(WARN, "failed to erase column stat", K(ret), K(pkey_), K(col_meta)); } } } } return ret; } int ObPartitionStorage::table_scan( ObTableScanParam& param, const int64_t data_max_schema_version, common::ObNewRowIterator*& result) { int ret = OB_SUCCESS; ObTableScanIterator* iter = NULL; ObStoreCtx ctx; ctx.cur_pkey_ = pkey_; // STORAGE_LOG(DEBUG, "begin table scan", K(param)); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_UNLIKELY(!param.is_valid()) || OB_UNLIKELY(!illegal_filter(param))) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument, ", K(ret), K(param)); } else if (param.trans_desc_->is_bounded_staleness_read() && OB_FAIL(check_schema_version_for_bounded_staleness_read_( param.schema_version_, data_max_schema_version, param.ref_table_id_))) { // check schema_version with ref_table_id, becuase schema_version of scan_param is from ref table STORAGE_LOG(WARN, "check schema version for bounded staleness read fail", K(ret), K(param)); } else if (OB_FAIL(txs_->get_store_ctx(*param.trans_desc_, param.pg_key_, ctx))) { STORAGE_LOG(WARN, "fail to get transaction context", K_(pkey), K(param)); } else { bool ctx_add = false; if (OB_SUCC(ret)) { if (OB_UNLIKELY(NULL == (iter = rp_alloc(ObTableScanIterator, ObTableScanIterator::LABEL)))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "no memory"); } else if (OB_FAIL(iter->init(*txs_, ctx, param, store_))) { STORAGE_LOG(WARN, "Fail to init table scan iterator, ", K(ret)); } else { ctx_add = true; if (param.for_update_) { if (OB_FAIL(lock_scan_rows(ctx, param, *iter))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "lock_scan_rows failed", K(ret), K(param)); } } else if (OB_FAIL(iter->rescan(param))) { STORAGE_LOG(WARN, "Fail to rescan iterator, ", K(ret)); } } // save table's schema version in memtable context, for waiting transaction end in building index if (OB_SUCC(ret)) { ctx.mem_ctx_->set_table_version(param.schema_version_); } } } if (OB_SUCC(ret)) { result = iter; } else { if (!ctx_add) { int tmp_ret = OB_SUCCESS; if (OB_SUCCESS != (tmp_ret = txs_->revert_store_ctx(*param.trans_desc_, param.pg_key_, ctx))) { STORAGE_LOG(WARN, "fail to revert transaction context", K(tmp_ret)); } } if (NULL != iter) { revert_scan_iter(iter); iter = NULL; } } } return ret; } int ObPartitionStorage::table_scan( ObTableScanParam& param, const int64_t data_max_schema_version, common::ObNewIterIterator*& result) { int ret = OB_SUCCESS; ObTableScanIterIterator* iter = NULL; ObStoreCtx ctx; ctx.cur_pkey_ = pkey_; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_UNLIKELY(!param.is_valid()) || OB_UNLIKELY(!illegal_filter(param))) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument, ", K(ret), K(param)); } else if (param.trans_desc_->is_bounded_staleness_read() && OB_FAIL(check_schema_version_for_bounded_staleness_read_( param.schema_version_, data_max_schema_version, param.ref_table_id_))) { // check schema_version with ref_table_id, becuase schema_version of scan_param is from ref table STORAGE_LOG(WARN, "check schema version for bounded staleness read fail", K(ret), K(param)); // need to get store ctx of PG, cur_key_ saves the real partition } else if (OB_FAIL(txs_->get_store_ctx(*param.trans_desc_, param.pg_key_, ctx))) { STORAGE_LOG(WARN, "fail to get transaction context", K(ret), K(param)); } else { bool ctx_add = false; if (OB_SUCC(ret)) { if (OB_UNLIKELY(NULL == (iter = rp_alloc(ObTableScanIterIterator, ObTableScanIterIterator::LABEL)))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "no memory"); } else if (OB_FAIL(iter->init(*txs_, ctx, param, store_))) { STORAGE_LOG(WARN, "Fail to init table scan iterator, ", K(ret)); } else { ctx_add = true; if (param.for_update_) { if (OB_FAIL(lock_scan_rows(ctx, param, *iter))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "lock_scan_rows failed", K(ret), K(param)); } } else if (OB_FAIL(iter->rescan(param))) { STORAGE_LOG(WARN, "Fail to rescan iterator, ", K(ret)); } } // save table's schema version in memtable context, for waiting transaction end in building index if (OB_SUCC(ret)) { ctx.mem_ctx_->set_table_version(param.schema_version_); } } } if (OB_SUCC(ret)) { result = iter; } else { if (!ctx_add) { int tmp_ret = OB_SUCCESS; if (OB_SUCCESS != (tmp_ret = txs_->revert_store_ctx(*param.trans_desc_, param.pg_key_, ctx))) { STORAGE_LOG(WARN, "fail to revert transaction context", K(tmp_ret)); } } if (NULL != iter) { iter->reset(); rp_free(static_cast(iter), ObTableScanIterIterator::LABEL); iter = NULL; } } } return ret; } int ObPartitionStorage::join_mv_scan(ObTableScanParam& left_param, ObTableScanParam& right_param, const int64_t left_data_max_schema_version, const int64_t right_data_max_schema_version, ObIPartitionStorage& right_storage, common::ObNewRowIterator*& result) { int ret = OB_NOT_SUPPORTED; UNUSEDx(left_param, right_param, left_data_max_schema_version, right_data_max_schema_version, right_storage, result); return ret; } int ObPartitionStorage::prepare_lock_row(const ObTableScanParam& scan_param, const common::ObNewRow& row) { int ret = OB_SUCCESS; if (NULL != scan_param.output_exprs_ && NULL != scan_param.op_) { if (row.count_ < scan_param.output_exprs_->count()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("row cell count not enough", K(ret), K(row.count_), K(scan_param.output_exprs_->count())); } else { for (int64_t i = 0; OB_SUCC(ret) && i < scan_param.output_exprs_->count(); i++) { sql::ObExpr* e = scan_param.output_exprs_->at(i); ObDatum* datum = NULL; if (OB_FAIL(e->eval(scan_param.op_->get_eval_ctx(), datum))) { LOG_WARN("expr evaluate failed", K(ret), K(*e)); } else if (OB_FAIL(datum->to_obj(row.cells_[i], e->obj_meta_, e->obj_datum_map_))) { LOG_WARN("convert datum to obj failed", K(ret), K(*datum)); } } } } return ret; } int ObPartitionStorage::lock_scan_rows( const ObStoreCtx& ctx, const ObTableScanParam& scan_param, ObTableScanIterator& iter) { int ret = OB_SUCCESS; RowReshape* row_reshape = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_ISNULL(ctx.mem_ctx_) || OB_ISNULL(scan_param.table_param_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), KP(ctx.mem_ctx_), KP(scan_param.table_param_)); } else { ObNewRow* row = nullptr; while (OB_SUCC(ret) && OB_SUCC(iter.get_next_row(row))) { if (NULL != scan_param.op_) { if (OB_FAIL(prepare_lock_row(scan_param, *row))) { LOG_WARN("prepare lock row failed", K(ret)); } scan_param.op_->clear_evaluated_flag(); } if (OB_SUCC(ret)) { if (OB_FAIL(lock_rows_(ctx, scan_param, *row, row_reshape))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "failed to lock row", K(row), K(ret)); } } } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; } else if (OB_TRY_LOCK_ROW_CONFLICT == ret) { // do nothing } else { STORAGE_LOG(WARN, "get_next_row error", K(ret)); } free_row_reshape(ctx.mem_ctx_->get_query_allocator(), row_reshape, 1); } return ret; } int ObPartitionStorage::lock_scan_rows( const ObStoreCtx& ctx, const ObTableScanParam& scan_param, ObTableScanIterIterator& iter) { int ret = OB_SUCCESS; RowReshape* row_reshape = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_ISNULL(ctx.mem_ctx_) || OB_ISNULL(scan_param.table_param_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), KP(ctx.mem_ctx_), KP(scan_param.table_param_)); } else { ObNewRow* row = nullptr; ObTableScanNewRowIterator row_iter(iter); while (OB_SUCC(ret) && OB_SUCC(row_iter.get_next_row(row))) { if (NULL != scan_param.op_) { if (OB_FAIL(prepare_lock_row(scan_param, *row))) { LOG_WARN("prepare lock row failed", K(ret)); } scan_param.op_->clear_evaluated_flag(); } if (OB_SUCC(ret)) { if (OB_FAIL(lock_rows_(ctx, scan_param, *row, row_reshape))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "fail to lock row", K(ret), K(row)); } } } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; } else if (OB_TRY_LOCK_ROW_CONFLICT == ret) { // do nothing } else { STORAGE_LOG(WARN, "fail to get next row", K(ret)); } free_row_reshape(ctx.mem_ctx_->get_query_allocator(), row_reshape, 1); } return ret; } int ObPartitionStorage::revert_scan_iter(ObNewRowIterator* iter) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (iter) { switch (iter->get_type()) { case ObNewRowIterator::ObTableScanIterator: iter->reset(); rp_free(static_cast(iter), ObTableScanIterator::LABEL); break; default: iter->~ObNewRowIterator(); break; } iter = NULL; } return ret; } int ObPartitionStorage::lock_rows(const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const int64_t abs_lock_timeout, common::ObNewRowIterator* row_iter, ObLockFlag lock_flag, int64_t& affected_rows) { int ret = OB_SUCCESS; ObTimeGuard timeguard(__func__, 3 * 1000 * 1000); UNUSED(lock_flag); int64_t afct_num = 0; ObColDescArray col_desc; RowReshape* row_reshape_ins = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!ctx.is_valid() || !dml_param.is_valid() || OB_ISNULL(row_iter)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ctx.mem_ctx_), K(dml_param), K(abs_lock_timeout), K(row_iter), K(lock_flag), K(ret)); } else { ObDMLRunningCtx run_ctx(ctx, dml_param, ctx.mem_ctx_->get_query_allocator(), T_DML_LOCK); ObNewRow* row = NULL; if (OB_FAIL(run_ctx.init(NULL, NULL, false, schema_service_, store_))) { STORAGE_LOG(WARN, "init dml running context failed", K(ret)); } else if (OB_FAIL(run_ctx.relative_tables_.data_table_.get_rowkey_column_ids(col_desc))) { STORAGE_LOG(WARN, "Fail to get column desc", K(ret)); } else { ctx.mem_ctx_->set_abs_lock_wait_timeout(get_lock_wait_timeout(abs_lock_timeout, dml_param.timeout_)); while (OB_SUCCESS == ret && OB_SUCC(row_iter->get_next_row(row))) { if (ObTimeUtility::current_time() > dml_param.timeout_) { ret = OB_TIMEOUT; int64_t cur_time = ObTimeUtility::current_time(); STORAGE_LOG(WARN, "query timeout", K(cur_time), K(dml_param), K(ret)); } else if (OB_FAIL(lock_row(run_ctx.relative_tables_.data_table_, ctx, col_desc, *row, dml_param.sql_mode_, run_ctx.allocator_, row_reshape_ins))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "failed to lock row", K(*row), K(ret)); } } else { ++afct_num; } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; affected_rows = afct_num; } free_row_reshape(run_ctx.allocator_, row_reshape_ins, 1); } } return ret; } int ObPartitionStorage::lock_rows(const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const int64_t abs_lock_timeout, const common::ObNewRow& row, ObLockFlag lock_flag) { int ret = OB_SUCCESS; RowReshape* row_reshape = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_FAIL(lock_rows_(ctx, dml_param, abs_lock_timeout, row, lock_flag, row_reshape))) { LOG_WARN("failed to lock rows", K(ret), K(row), K(ctx)); } free_row_reshape(ctx.mem_ctx_->get_query_allocator(), row_reshape, 1); return ret; } int ObPartitionStorage::lock_rows_(const ObStoreCtx& ctx, const ObDMLBaseParam& dml_param, const int64_t abs_lock_timeout, const common::ObNewRow& row, ObLockFlag lock_flag, RowReshape*& row_reshape) { int ret = OB_SUCCESS; ObTimeGuard timeguard(__func__, 3 * 1000 * 1000); UNUSED(lock_flag); ObColDescArray col_desc; if (!ctx.is_valid() || !dml_param.is_valid() || row.is_invalid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG( WARN, "invalid argument", K(ctx.mem_ctx_), K(dml_param), K(abs_lock_timeout), K(row), K(lock_flag), K(ret)); } else { ObDMLRunningCtx run_ctx(ctx, dml_param, ctx.mem_ctx_->get_query_allocator(), T_DML_LOCK); if (OB_FAIL(run_ctx.init(NULL, NULL, false, schema_service_, store_))) { STORAGE_LOG(WARN, "init dml running context failed", K(ret)); } else if (ObTimeUtility::current_time() > dml_param.timeout_) { ret = OB_TIMEOUT; int64_t cur_time = ObTimeUtility::current_time(); STORAGE_LOG(WARN, "query timeout", K(cur_time), K(dml_param), K(ret)); } else if (OB_FAIL(run_ctx.relative_tables_.data_table_.get_rowkey_column_ids(col_desc))) { STORAGE_LOG(WARN, "failed to get column desc", K(ret)); } else { ctx.mem_ctx_->set_abs_lock_wait_timeout(get_lock_wait_timeout(abs_lock_timeout, dml_param.timeout_)); if (OB_FAIL(lock_row(run_ctx.relative_tables_.data_table_, ctx, col_desc, row, dml_param.sql_mode_, run_ctx.allocator_, row_reshape))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { STORAGE_LOG(WARN, "failed to lock row", K(row), K(ret)); } } } } return ret; } int ObPartitionStorage::lock_rows_( const ObStoreCtx& ctx, const ObTableScanParam& scan_param, const common::ObNewRow& row, RowReshape*& row_reshape) { int ret = OB_SUCCESS; ObTimeGuard timeguard(__func__, 3 * 1000 * 1000); bool lock_conflict = false; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!ctx.is_valid() || !scan_param.is_valid() || row.is_invalid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ctx.mem_ctx_), K(scan_param), K(row), K(ret)); } ObDMLBaseParam dml_param; dml_param.timeout_ = scan_param.timeout_; dml_param.schema_version_ = scan_param.schema_version_; dml_param.only_data_table_ = true; // only need row lock on main table dml_param.sql_mode_ = scan_param.sql_mode_; while (OB_SUCC(ret)) { if (scan_param.for_update_wait_timeout_ > 0 && ObTimeUtility::current_time() > scan_param.for_update_wait_timeout_) { ret = lock_conflict ? OB_ERR_EXCLUSIVE_LOCK_CONFLICT : OB_TIMEOUT; int64_t cur_time = ObTimeUtility::current_time(); STORAGE_LOG(WARN, "wait timeout", K(cur_time), K_(scan_param.for_update_wait_timeout), K(ret)); } else if (OB_SUCC(lock_rows_(ctx, dml_param, scan_param.for_update_wait_timeout_, row, LF_NONE, row_reshape))) { break; } else if (0 == scan_param.for_update_wait_timeout_ && OB_EAGAIN == ret) { ret = OB_ERR_EXCLUSIVE_LOCK_CONFLICT; } else { if (OB_TIMEOUT == ret) { ret = lock_conflict ? OB_ERR_EXCLUSIVE_LOCK_CONFLICT : ret; int64_t cur_time = ObTimeUtility::current_time(); STORAGE_LOG(WARN, "query timeout", K(cur_time), K_(scan_param.timeout), K(ret)); } else if (OB_EAGAIN == ret) { lock_conflict = true; ret = OB_SUCCESS; } else { STORAGE_LOG(WARN, "lock row failed", K(row), K(ret)); } } if (OB_SUCC(ret)) { if (OB_FAIL(THIS_WORKER.check_status())) { STORAGE_LOG(WARN, "query interrupt, ", K(ret)); } } } return ret; } int ObPartitionStorage::get_batch_rows(const ObTableScanParam& param, const ObBatch& batch, int64_t& logical_row_count, int64_t& physical_row_count, common::ObIArray& est_records) { int ret = OB_SUCCESS; ObPartitionEst batch_estimate; ObTablesHandle tables_handle; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_UNLIKELY(!param.is_estimate_valid() || !batch.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(param), K(batch), K(ret)); } else { if (-1 == param.frozen_version_) { if (OB_FAIL(store_.get_read_tables(param.index_id_, GET_BATCH_ROWS_READ_SNAPSHOT_VERSION, tables_handle))) { STORAGE_LOG(WARN, "failed to get read stores", K(ret), K(param)); } } else { if (OB_FAIL(store_.get_read_frozen_tables(param.index_id_, param.frozen_version_, tables_handle))) { STORAGE_LOG(WARN, "failed to get read stores", K(ret), K(param)); } } } if (OB_SUCC(ret)) { STORAGE_LOG(DEBUG, "got estimate cost tables", K(tables_handle), K(param)); if (OB_FAIL(estimate_row_count(param, batch, tables_handle.get_tables(), batch_estimate, est_records))) { STORAGE_LOG(WARN, "failed to estimate_cost on stores. ", K(param), K(batch), K(ret)); } else { logical_row_count = batch_estimate.logical_row_count_; physical_row_count = batch_estimate.physical_row_count_; } } return ret; } // TODO(): fix it for split partition later int ObPartitionStorage::query_range_to_macros(ObIAllocator& allocator, const ObIArray& ranges, const int64_t type, uint64_t* macros_count, const int64_t* total_task_count, ObIArray* splitted_ranges, ObIArray* split_index) { int ret = OB_SUCCESS; ObTableHandle table_handle; ObSSTable* last_base_table = NULL; STORAGE_LOG(DEBUG, "split_macros"); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (ranges.empty()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "ranges must not empty", K(ret)); } else if (OB_FAIL(store_.get_last_major_sstable(ranges.at(0).get_table_id(), table_handle))) { STORAGE_LOG(WARN, "failed to get store handle", K(ret)); } else if (OB_FAIL(table_handle.get_sstable(last_base_table))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "failed to get last base store", K(ret)); } else if (OB_ISNULL(last_base_table)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "invlaid last base store", K(ret)); } else if (!last_base_table->is_major_sstable()) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "invalid ssstore type", K(ret), "type", last_base_table->is_major_sstable()); } else if (OB_FAIL(last_base_table->query_range_to_macros( allocator, ranges, type, macros_count, total_task_count, splitted_ranges, split_index))) { LOG_WARN("failed to do query range to macros", K(ret)); } return ret; } int ObPartitionStorage::get_multi_ranges_cost(const ObIArray& ranges, int64_t& total_size) { int ret = OB_SUCCESS; ObTablesHandle tables_handle; if (IS_NOT_INIT) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "Partition storage is not initialized", K(ret)); } else if (OB_UNLIKELY(ranges.empty())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "Invalid argument to get multi range cost", K(ret), K(ranges)); } else if (OB_FAIL(store_.get_read_tables(ranges.at(0).get_table_id(), ObVersionRange::MAX_VERSION, tables_handle))) { STORAGE_LOG(WARN, "Failed to get read tables", K(ret)); } else { ObPartitionMultiRangeSpliter spliter; if (OB_FAIL(spliter.get_multi_range_size(tables_handle, ranges, total_size))) { STORAGE_LOG(WARN, "Failed to get multi ranges cost", K(ret)); } } return ret; } int ObPartitionStorage::split_multi_ranges(const ObIArray& ranges, const int64_t expected_task_count, ObIAllocator& allocator, ObArrayArray& multi_range_split_array) { int ret = OB_SUCCESS; ObTablesHandle tables_handle; if (IS_NOT_INIT) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "Partition storage is not initialized", K(ret)); } else if (OB_UNLIKELY(ranges.empty())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "Invalid argument to get multi range cost", K(ret), K(ranges)); } else if (OB_FAIL(store_.get_read_tables(ranges.at(0).get_table_id(), ObVersionRange::MAX_VERSION, tables_handle))) { STORAGE_LOG(WARN, "Failed to get read tables", K(ret)); } else { ObPartitionMultiRangeSpliter spliter; if (OB_FAIL(spliter.get_split_multi_ranges( tables_handle, ranges, expected_task_count, allocator, multi_range_split_array))) { STORAGE_LOG(WARN, "Failed to get multi ranges split array", K(ret)); } } return ret; } int ObPartitionStorage::estimate_row_count(const storage::ObTableScanParam& param, const storage::ObBatch& batch, const ObIArray& tables, ObPartitionEst& part_est, common::ObIArray& est_records) { int ret = OB_SUCCESS; if (!param.is_estimate_valid() || !batch.is_valid() || tables.count() <= 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(param), K(batch), K(tables.count()), K(ret)); } else { switch (batch.type_) { case ObBatch::T_GET: if (OB_ISNULL(batch.rowkey_)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid batch", K(batch), K(ret)); } else { ret = ObSingleMerge::estimate_row_count(param.scan_flag_, param.index_id_, *batch.rowkey_, tables, part_est); } break; case ObBatch::T_MULTI_GET: if (OB_ISNULL(batch.rowkeys_)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid batch", K(batch), K(ret)); } else { ret = ObMultipleGetMerge::estimate_row_count( param.scan_flag_, param.index_id_, *batch.rowkeys_, tables, part_est); } break; case ObBatch::T_SCAN: if (OB_ISNULL(batch.range_)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid batch", K(batch), K(ret)); } else { ret = ObMultipleScanMerge::estimate_row_count( param.scan_flag_, param.index_id_, *batch.range_, tables, part_est, est_records); } break; case ObBatch::T_MULTI_SCAN: if (OB_ISNULL(batch.ranges_)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid batch", K(batch), K(ret)); } else { ret = ObMultipleMultiScanMerge::estimate_row_count( param.scan_flag_, param.index_id_, *batch.ranges_, tables, part_est, est_records); } break; default: ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "unknown range type", K(batch.type_)); break; } if (OB_FAIL(ret)) { STORAGE_LOG(WARN, "failed to estimate merge cost", K(batch.type_), K(ret)); } } return ret; } // TODO(): fix it for spilit partition later int ObPartitionStorage::get_table_stat(const uint64_t table_id, common::ObTableStat& stat) { int ret = OB_SUCCESS; ObTableHandle table_handle; ObSSTable* base_table = NULL; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_FAIL(store_.get_last_major_sstable(table_id, table_handle))) { if (OB_ENTRY_NOT_EXIST != ret) { STORAGE_LOG(WARN, "Fail to get base ssstore, ", K(ret)); } } else if (OB_FAIL(table_handle.get_sstable(base_table))) { STORAGE_LOG(WARN, "failed to get_sstable", K(ret)); } else if (OB_FAIL(base_table->get_table_stat(stat))) { STORAGE_LOG(WARN, "fail to get table stat. on ssstore. ", K(ret)); } return ret; } // TODO(): merge two get_concurrent_cnt method later // the schema_version maybe tenant level, but the function // is only called by get_build_index_param currently, so no problem int ObPartitionStorage::get_concurrent_cnt(uint64_t table_id, int64_t schema_version, int64_t& concurrent_cnt) { int ret = OB_SUCCESS; ObSchemaGetterGuard schema_guard; const ObTableSchema* table_schema = NULL; ObTableHandle handle; ObSSTable* sstable = NULL; concurrent_cnt = 0; const uint64_t tenant_id = is_inner_table(table_id) ? OB_SYS_TENANT_ID : extract_tenant_id(table_id); const bool check_formal = extract_pure_id(table_id) > OB_MAX_CORE_TABLE_ID; // to avoid circular dependency if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "The partition storage has not been initialized, ", K(ret), K_(pkey)); } else if (OB_FAIL(store_.get_last_major_sstable(table_id, handle))) { STORAGE_LOG(WARN, "failed to get_last_base_sstable", K(ret), K(table_id)); } else if (OB_FAIL(handle.get_sstable(sstable))) { STORAGE_LOG(WARN, "failed to get sstable", K(ret), K(table_id)); } else if (OB_ISNULL(sstable)) { ret = OB_ERR_SYS; STORAGE_LOG(ERROR, "sstable must not null", K(ret), K(table_id)); } else if (OB_FAIL(schema_service_->get_tenant_schema_guard( tenant_id, schema_guard, schema_version, OB_INVALID_VERSION /*sys_schema_version*/))) { STORAGE_LOG(WARN, "Fail to get schema, ", K(schema_version), K(ret)); } else if (check_formal && OB_FAIL(schema_guard.check_formal_guard())) { STORAGE_LOG(WARN, "schema_guard is not formal", K(ret), K(tenant_id)); } else if (OB_FAIL(schema_guard.get_table_schema(table_id, table_schema))) { STORAGE_LOG(WARN, "Fail to get main table schema, ", K(table_id), K(ret)); } else if (NULL == table_schema) { ret = OB_SCHEMA_ERROR; STORAGE_LOG(ERROR, "Fail to get main table schema, ", K_(pkey), K(ret)); } else if (OB_FAIL(sstable->get_concurrent_cnt(table_schema->get_tablet_size(), concurrent_cnt))) { STORAGE_LOG(WARN, "failed to get_concurrent_cnt", K(ret), K(table_id)); } return ret; } int ObPartitionStorage::build_merge_ctx(storage::ObSSTableMergeCtx& ctx) { int ret = OB_SUCCESS; ObGetMergeTablesParam get_merge_table_param; ObGetMergeTablesResult get_merge_table_result; ObVersion merge_version; get_merge_table_param.merge_type_ = ctx.param_.merge_type_; get_merge_table_param.index_id_ = ctx.param_.index_id_; get_merge_table_param.merge_version_ = ctx.param_.merge_version_; get_merge_table_param.trans_table_end_log_ts_ = ctx.trans_table_end_log_ts_; get_merge_table_param.trans_table_timestamp_ = ctx.trans_table_timestamp_; // only ctx.param_ is inited, fill other fields here if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("The partition storage has not been initialized, ", K(ret), K_(pkey)); } else if (!ctx.param_.is_valid() || OB_ISNULL(ctx.partition_guard_.get_pg_partition())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(ctx)); } else if (ctx.param_.pkey_ != pkey_) { ret = OB_ERR_SYS; LOG_WARN("pkey not match", K(ret), K(pkey_), K(ctx.param_.pkey_)); } else if (OB_FAIL(store_.get_merge_tables(get_merge_table_param, get_merge_table_result))) { LOG_WARN("failed to get merge tables", K(ret), K(ctx), K(get_merge_table_result)); } else { if (OB_FAIL(ctx.tables_handle_.add_tables(get_merge_table_result.handle_))) { LOG_WARN("failed to add tables", K(ret)); } else if (NULL != get_merge_table_result.base_handle_.get_table() && OB_FAIL(ctx.base_table_handle_.set_table(get_merge_table_result.base_handle_.get_table()))) { LOG_WARN("failed to set base table", K(ret)); } else { ctx.sstable_version_range_ = get_merge_table_result.version_range_; ctx.read_base_version_ = is_major_merge(get_merge_table_param.merge_type_) ? get_merge_table_result.read_base_version_ : 0; ctx.log_ts_range_ = get_merge_table_result.log_ts_range_; ctx.param_.merge_version_ = get_merge_table_result.merge_version_; if (ctx.param_.merge_type_ != get_merge_table_result.suggest_merge_type_) { FLOG_INFO( "change merge type", "param", ctx.param_, "suggest_merge_type", get_merge_table_result.suggest_merge_type_); ctx.param_.merge_type_ = get_merge_table_result.suggest_merge_type_; } ctx.base_schema_version_ = get_merge_table_result.base_schema_version_; ctx.schema_version_ = get_merge_table_result.schema_version_; ctx.create_snapshot_version_ = get_merge_table_result.create_snapshot_version_; ctx.dump_memtable_timestamp_ = get_merge_table_result.dump_memtable_timestamp_; ctx.create_sstable_for_large_snapshot_ = get_merge_table_result.create_sstable_for_large_snapshot_; } } if (OB_FAIL(ret)) { } else if (OB_FAIL(get_schemas_to_merge(ctx))) { LOG_WARN("Fail to get schemas to merge, ", K(ret), K_(pkey), K(ctx)); } else if (OB_FAIL(check_useless_index_mini_merge(ctx))) { STORAGE_LOG(WARN, "Failed to check useless index mini merge", K(ret), K_(pkey), K(ctx)); } else { if (OB_SUCC(ret)) { if (ctx.param_.is_major_merge()) { if (OB_FAIL(cal_major_merge_param(ctx))) { LOG_WARN("Fail to cal major merge param, ", K(ret), K_(pkey), K(ctx)); } } else { if (OB_FAIL(cal_minor_merge_param(ctx))) { LOG_WARN("Fail to cal minor merge param, ", K(ret), K_(pkey), K(ctx)); } } } } if (OB_SUCC(ret)) { FLOG_INFO("succeed to build merge ctx", K(pkey_), K(ctx)); } return ret; } int ObPartitionStorage::check_useless_index_mini_merge(const storage::ObSSTableMergeCtx &ctx) { int ret = OB_SUCCESS; if (OB_ISNULL(ctx.table_schema_)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected null table schema", K(ret), K(ctx)); } else if (!ctx.param_.is_mini_merge()) { } else if (ctx.log_ts_range_.end_log_ts_ < ctx.pg_last_replay_log_ts_) { ObSEArray active_table_ids; const uint64_t index_id = ctx.param_.index_id_; ObMemtable *memtable = nullptr; ObITable *table = nullptr; for (int64_t i = 0; OB_SUCC(ret) && i < ctx.tables_handle_.get_count(); i++) { if (OB_ISNULL(table = ctx.tables_handle_.get_table(i))) { ret = OB_ERR_SYS; STORAGE_LOG(ERROR, "Unexpected null table", K(ret), K(i), K(ctx.tables_handle_)); } else if (!table->is_memtable()) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(ERROR, "Unexpected situation, new create table/index should not has sstable", K(ret), K(ctx)); } else if (FALSE_IT(memtable = reinterpret_cast(table))) { } else if (OB_FAIL(memtable->get_active_table_ids(active_table_ids))) { STORAGE_LOG(WARN, "Failed to get active table ids of memtable", K(ret), KPC(memtable)); } else { for (int64_t j = 0; OB_SUCC(ret) && j < active_table_ids.count(); j++) { if (index_id == active_table_ids.at(j)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(ERROR, "new create effective index should not has data within old frozen memtable", K(ret), K(index_id), K(ctx.tables_handle_), K(ctx.pg_last_replay_log_ts_), K(ctx.log_ts_range_), K(j), KPC(memtable)); } } active_table_ids.reset(); } } if (OB_FAIL(ret)) { } else { ret = OB_NO_NEED_MERGE; STORAGE_LOG(WARN, "new create index should not mini for old memtable with lager last replay log ts", K(ret), K(index_id), K(ctx.tables_handle_), K(ctx.pg_last_replay_log_ts_), K(ctx.log_ts_range_)); } } return ret; } int ObPartitionStorage::check_need_update_estimator( const ObTableSchema& table_schema, int64_t data_version, int64_t& stat_sampling_ratio) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "The partition storage has not been initialized", K(ret), K_(pkey)); } else if (OB_UNLIKELY(!table_schema.is_valid()) || OB_UNLIKELY(data_version <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid argument", K(ret), K(table_schema), K(data_version)); } else if (table_schema.is_index_table()) { // index no need update estimator stat_sampling_ratio = 0; } else if (FALSE_IT(stat_sampling_ratio = GCONF.merge_stat_sampling_ratio)) { } else if (stat_sampling_ratio < 0) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Invalid stat_sampling_ratio value", K(stat_sampling_ratio), K(ret)); } else if (0 == stat_sampling_ratio) { // ratio == 0 mean disable estimator } else { int tmp_ret = OB_SUCCESS; ObArenaAllocator allocator(ObModIds::OB_CS_MERGER); ObArray column_stats; ObArray column_ids(OB_MALLOC_NORMAL_BLOCK_SIZE, allocator); uint64_t table_id = table_schema.get_table_id(); void* ptr = NULL; ObColumnStat* stat_ptr = NULL; if (OB_UNLIKELY(OB_SUCCESS != (tmp_ret = table_schema.get_store_column_ids(column_ids)))) { LOG_WARN("Fail to get column ids. ", K(tmp_ret)); } for (int64_t i = 0; OB_SUCCESS == tmp_ret && i < column_ids.count(); ++i) { ptr = allocator.alloc(sizeof(ObColumnStat)); if (OB_ISNULL(ptr) || OB_ISNULL(stat_ptr = new (ptr) ObColumnStat(allocator)) || OB_UNLIKELY(!stat_ptr->is_writable())) { tmp_ret = OB_ALLOCATE_MEMORY_FAILED; LOG_ERROR("fail to allocate memory for ObColumnStat object. ", K(tmp_ret)); } else if (OB_SUCCESS != (tmp_ret = column_stats.push_back(stat_ptr))) { LOG_WARN("fail to push back stat_ptr. ", K(tmp_ret)); } else { stat_ptr->set_table_id(table_id); stat_ptr->set_partition_id(pkey_.get_partition_id()); stat_ptr->set_column_id(column_ids.at(i).col_id_); ptr = NULL; stat_ptr = NULL; } } if (OB_SUCCESS == tmp_ret) { if (OB_SUCCESS != (tmp_ret = ObStatManager::get_instance().get_batch_stat( table_schema, pkey_.get_partition_id(), column_stats, allocator))) { LOG_WARN("fail to batch get column stat. ", K(tmp_ret), K(table_id), K(pkey_.get_partition_id())); } else { for (int64_t i = 0; OB_SUCCESS == tmp_ret && i < column_ids.count(); ++i) { stat_ptr = column_stats.at(i); if (OB_ISNULL(stat_ptr) || column_ids.at(i).col_id_ != stat_ptr->get_key().column_id_) { tmp_ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected error.", K(tmp_ret), KP(stat_ptr), K(table_id), K(pkey_.get_partition_id()), "expect_column_id", column_ids.at(i).col_id_, "actual_column_id", stat_ptr->get_key().column_id_); } else if (stat_ptr->get_version() >= data_version) { LOG_INFO("column stat has updated by another replica.", K(*stat_ptr)); // no need update in this time, put NULL; stat_sampling_ratio = 0; break; } } } } if (OB_SUCCESS != tmp_ret) { stat_sampling_ratio = 0; } } LOG_INFO("check_estimator_stat_sampling_ratio", K(data_version), K(stat_sampling_ratio)); return ret; } int ObPartitionStorage::update_estimator(const ObTableSchema* base_schema, const bool is_full, const ObIArray& column_stats, ObSSTable* sstable, const common::ObPartitionKey& pkey) { int ret = OB_SUCCESS; int64_t estimate_start_time = 0; int64_t estimate_end_time = 0; ObArenaAllocator allocator; ObSSTableMergeInfo& sstable_merge_info = sstable->get_sstable_merge_info(); estimate_start_time = ::oceanbase::common::ObTimeUtility::current_time(); int tmp_ret = OB_SUCCESS; bool need_report = true; if (!is_full) { ObArray base_column_stats; ObArray column_ids(OB_MALLOC_NORMAL_BLOCK_SIZE, allocator); ObColumnStat* stat_ptr = NULL; void* ptr = NULL; int64_t partition_id = pkey.get_partition_id(); uint64_t table_id = base_schema->get_table_id(); if (OB_UNLIKELY(OB_SUCCESS != (tmp_ret = base_schema->get_store_column_ids(column_ids)))) { LOG_WARN("Fail to get column ids. ", K(tmp_ret)); } for (int64_t i = 0; OB_SUCCESS == tmp_ret && i < column_ids.count(); ++i) { ptr = allocator.alloc(sizeof(common::ObColumnStat)); if (OB_ISNULL(ptr) || OB_ISNULL(stat_ptr = new (ptr) common::ObColumnStat(allocator)) || OB_UNLIKELY(!stat_ptr->is_writable())) { tmp_ret = OB_ALLOCATE_MEMORY_FAILED; LOG_ERROR("fail to allocate memory for ObColumnStat object. ", K(tmp_ret)); } else if (OB_SUCCESS != (tmp_ret = base_column_stats.push_back(stat_ptr))) { LOG_WARN("fail to push back stat_ptr. ", K(tmp_ret)); } else { stat_ptr->set_table_id(table_id); stat_ptr->set_partition_id(partition_id); stat_ptr->set_column_id(column_ids.at(i).col_id_); ptr = NULL; stat_ptr = NULL; } } if (OB_SUCCESS == tmp_ret) { if (OB_SUCCESS != (tmp_ret = ObStatManager::get_instance().get_batch_stat( *base_schema, partition_id, base_column_stats, allocator))) { STORAGE_LOG(WARN, "fail to batch get column stat. ", K(tmp_ret), K(partition_id)); } else if (base_column_stats.count() == 0) { ret = OB_ERR_UNEXPECTED; LOG_WARN("column stat count should not be 0", K(tmp_ret)); } else if (base_column_stats.at(0)->get_version() >= sstable_merge_info.version_.version_) { // already has latest column stat, no need to update need_report = false; } else { for (int64_t i = 0; OB_SUCCESS == tmp_ret && i < column_stats.count(); ++i) { for (int64_t j = 0; OB_SUCCESS == tmp_ret && j < base_column_stats.count(); ++j) { if (NULL != base_column_stats.at(j) && NULL != column_stats.at(i) && base_column_stats.at(j)->get_column_id() == column_stats.at(i)->get_column_id()) { if (OB_SUCCESS != (tmp_ret = column_stats.at(i)->add(*base_column_stats.at(j)))) { STORAGE_LOG(WARN, "Fail to add other, ", K(tmp_ret)); } } } } } } } for (int64_t i = 0; OB_SUCCESS == tmp_ret && need_report && i < column_stats.count(); ++i) { if (NULL != column_stats.at(i)) { if (is_full || 0 == column_stats.at(i)->get_version()) { column_stats.at(i)->set_last_rebuild_version(sstable_merge_info.version_.version_); } column_stats.at(i)->set_version(sstable_merge_info.version_.version_); if (OB_UNLIKELY(OB_SUCCESS != (tmp_ret = column_stats.at(i)->finish()))) { STORAGE_LOG(WARN, "Fail to finish column state, ", K(tmp_ret), K(i)); } } } if (need_report && OB_UNLIKELY(OB_SUCCESS != (tmp_ret = ObStatManager::get_instance().update_column_stats(column_stats)))) { STORAGE_LOG(WARN, "Fail to update column stats, ", K(tmp_ret)); } else { STORAGE_LOG(INFO, "finish update column stat completed.", K(need_report)); } estimate_end_time = ::oceanbase::common::ObTimeUtility::current_time(); sstable_merge_info.estimate_cost_time_ = (estimate_end_time - estimate_start_time); return ret; } bool ObPartitionStorage::has_memstore() { bool bret = false; ObTablesHandle tables_handle; if (OB_UNLIKELY(!is_inited_)) { STORAGE_LOG(ERROR, "partition storage is not initialized"); } else { bret = store_.has_memtable(); } return bret; } int ObPartitionStorage::get_replayed_table_version(int64_t& table_version) { int ret = OB_SUCCESS; ObTableHandle table_handle; ObMemtable* memtable = NULL; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_FAIL(store_.get_active_memtable(table_handle))) { STORAGE_LOG(WARN, "Fail to get active memtable", K(ret)); } else if (OB_FAIL(table_handle.get_memtable(memtable))) { STORAGE_LOG(WARN, "failed to get memtable", K(ret)); } else if (OB_ISNULL(memtable)) { ret = OB_ERR_SYS; STORAGE_LOG(WARN, "memtable must not null", K(ret)); } else { table_version = memtable->get_max_schema_version(); } return ret; } int ObPartitionStorage::serialize(ObArenaAllocator& allocator, char*& new_buf, int64_t& serialize_size) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_FAIL(store_.serialize(allocator, new_buf, serialize_size))) { STORAGE_LOG(WARN, "failed to serialize store", K(ret)); } return ret; } int ObPartitionStorage::deserialize(const ObReplicaType replica_type, const char* buf, const int64_t buf_len, ObIPartitionGroup* pg, int64_t& pos, bool& is_old_meta, ObPartitionStoreMeta& old_meta) { int ret = OB_SUCCESS; int64_t magic_num = 0; int64_t tmp_pos = pos; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_ISNULL(buf) || buf_len <= 0 || pos < 0 || pos >= buf_len || OB_ISNULL(pg)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(buf), K(buf_len), K(pos), K(ret), KP(pg)); } else if (OB_FAIL(serialization::decode_i64(buf, buf_len, tmp_pos, &magic_num))) { STORAGE_LOG(WARN, "deserialize magic_num failed.", K(ret)); } else if (MAGIC_NUM_1_4_61 != magic_num) { if (OB_FAIL(store_.deserialize(pg, pg_memtable_mgr_, replica_type, buf, buf_len, pos, is_old_meta, old_meta))) { LOG_WARN("failed to deserialize store_", K(ret), K(buf_len), K(pos), K(magic_num)); } else { pkey_ = store_.get_partition_key(); } } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("MAGIC_NUM_1_4_61 should not be here", K(ret), K(magic_num)); } return ret; } int ObPartitionStorage::get_all_tables(ObTablesHandle& tables_handle) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_FAIL(store_.get_all_tables(tables_handle))) { STORAGE_LOG(WARN, "failed to get_all_tables", K(ret)); } return ret; } int ObPartitionStorage::get_schemas_to_split(storage::ObSSTableSplitCtx& ctx) { int ret = OB_SUCCESS; const uint64_t index_id = ctx.param_.index_id_; int64_t& schema_version = ctx.schema_version_; ObSchemaGetterGuard& schema_guard = ctx.schema_guard_; const ObTableSchema*& table_schema = ctx.table_schema_; int64_t& split_schema_version = ctx.split_schema_version_; ObSchemaGetterGuard& split_schema_guard = ctx.split_schema_guard_; const ObTableSchema*& split_table_schema = ctx.split_table_schema_; ObITable* table = NULL; const uint64_t tenant_id = is_inner_table(index_id) ? OB_SYS_TENANT_ID : extract_tenant_id(index_id); const bool check_formal = extract_pure_id(index_id) > OB_MAX_CORE_TABLE_ID; // to avoid circular dependency if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!is_valid_id(index_id)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), K(index_id)); } else if (ctx.tables_handle_.get_tables().count() < 1) { ret = OB_ERR_SYS; STORAGE_LOG(ERROR, "table count must not zero", K(ret), K(pkey_)); } else if (ctx.param_.is_major_split_) { if (OB_ISNULL(table = ctx.tables_handle_.get_table(0))) { ret = OB_ERR_SYS; STORAGE_LOG(WARN, "failed to get first table", K(ret), K(pkey_)); } else if (OB_FAIL(table->get_frozen_schema_version(schema_version))) { LOG_WARN("failed to get frozen schema version", K(ret), K_(pkey), KPC(table)); } } else if (!ctx.param_.is_major_split_) { int64_t tenant_schema_version = 0; if (OB_ISNULL(table = ctx.tables_handle_.get_last_table())) { ret = OB_ERR_SYS; STORAGE_LOG(WARN, "failed to get last table", K(ret)); } else if (OB_FAIL(table->get_frozen_schema_version(schema_version))) { LOG_WARN("failed to get frozen schema version", K(ret), K_(pkey), KPC(table)); } else if (OB_FAIL(schema_service_->get_tenant_refreshed_schema_version(tenant_id, tenant_schema_version))) { LOG_WARN("failed to get tenant schema version", K(ret), K(tenant_id), K_(pkey)); } else if (tenant_schema_version < schema_version) { ret = OB_SCHEMA_ERROR; LOG_WARN("local schema is too old", K(ret), K(tenant_schema_version), K(schema_version)); } else { schema_version = tenant_schema_version; } } if (OB_SUCC(ret)) { if (OB_ISNULL(ctx.partition_guard_.get_partition_group())) { ret = OB_ERR_SYS; STORAGE_LOG(WARN, "partition should not be NULL", K(ret)); } else { split_schema_version = ctx.partition_guard_.get_partition_group()->get_split_info().get_schema_version(); if (OB_FAIL(schema_service_->get_tenant_schema_guard( tenant_id, split_schema_guard, split_schema_version, OB_INVALID_VERSION))) { STORAGE_LOG(WARN, "Fail to get schema mgr by version, ", K(ret), K(ctx)); } else if (check_formal && OB_FAIL(split_schema_guard.check_formal_guard())) { STORAGE_LOG(WARN, "schema_guard is not formal", K(ret), K(tenant_id)); } else if (OB_FAIL(split_schema_guard.get_table_schema(ctx.param_.index_id_, split_table_schema))) { if (OB_ENTRY_NOT_EXIST == ret) { ret = OB_TABLE_IS_DELETED; STORAGE_LOG(INFO, "table is deleted", K(index_id), K(schema_version)); } else { STORAGE_LOG(WARN, "Fail to get table schema, ", K(ret), K(index_id)); } } } } if (OB_SUCC(ret)) { if (OB_FAIL( schema_service_->get_tenant_schema_guard(tenant_id, schema_guard, schema_version, OB_INVALID_VERSION))) { STORAGE_LOG(WARN, "Fail to get schema, ", K(schema_version), K(ret)); } else if (check_formal && OB_FAIL(schema_guard.check_formal_guard())) { STORAGE_LOG(WARN, "schema_guard is not formal", K(ret), K(tenant_id)); } else if (OB_FAIL(schema_guard.get_table_schema(index_id, table_schema))) { if (OB_ENTRY_NOT_EXIST == ret) { ret = OB_TABLE_IS_DELETED; STORAGE_LOG(INFO, "table is deleted", K(index_id), K(schema_version)); } else { STORAGE_LOG(WARN, "Fail to get table schema, ", K(ret), K(index_id)); } } else if (NULL == table_schema) { ret = OB_SCHEMA_ERROR; STORAGE_LOG(WARN, "table_schema must not null", K(ret), K_(pkey), K(index_id), K(schema_version)); } else { STORAGE_LOG(INFO, "table_schema is, ", K(*table_schema)); } } return ret; } int ObPartitionStorage::get_schemas_to_merge(storage::ObSSTableMergeCtx& ctx) { int ret = OB_SUCCESS; const uint64_t index_id = ctx.param_.index_id_; int64_t& schema_version = ctx.schema_version_; ObSchemaGetterGuard& schema_guard = ctx.schema_guard_; const ObTableSchema*& data_table_schema = ctx.data_table_schema_; const ObTableSchema*& table_schema = ctx.table_schema_; const ObTableSchema*& dep_table_schema = ctx.mv_dep_table_schema_; int64_t save_schema_version = schema_version; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!is_valid_id(index_id)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), K(index_id)); } else if (OB_FAIL(schema_service_->retry_get_schema_guard( schema_version, index_id, schema_guard, save_schema_version))) { if (OB_TABLE_IS_DELETED != ret) { STORAGE_LOG(WARN, "Fail to get schema, ", K(schema_version), K(ret)); } else { STORAGE_LOG(WARN, "table is deleted", K(ret), K(index_id)); } } else if (save_schema_version < schema_version) { ret = OB_SCHEMA_ERROR; LOG_WARN( "can not use older schema version", K(ret), K(schema_version), K(save_schema_version), K_(pkey), K(index_id)); } else if (OB_FAIL(schema_guard.get_table_schema(index_id, table_schema))) { STORAGE_LOG(WARN, "Fail to get table schema, ", K(ret), K(index_id)); } else if (NULL == table_schema) { const uint64_t fetch_tenant_id = is_inner_table(index_id) ? OB_SYS_TENANT_ID : extract_tenant_id(index_id); if (OB_FAIL(schema_service_->get_tenant_full_schema_guard(fetch_tenant_id, schema_guard))) { STORAGE_LOG(WARN, "Fail to get schema, ", K(ret), K(fetch_tenant_id)); } else if (OB_FAIL(schema_guard.get_table_schema(index_id, table_schema))) { STORAGE_LOG(WARN, "Fail to get table schema, ", K(ret), K(index_id)); } else if (NULL == table_schema) { ret = OB_TABLE_IS_DELETED; STORAGE_LOG(WARN, "table is deleted", K(ret), K_(pkey), K(index_id)); } } if (OB_SUCC(ret)) { const ObPrintableTableSchema* print_table_schema = reinterpret_cast(table_schema); FLOG_INFO("Get schema to merge", K(schema_version), K(save_schema_version), K(*print_table_schema)); schema_version = save_schema_version; } if (OB_SUCC(ret)) { if (pkey_.table_id_ != index_id) { if (OB_FAIL(schema_guard.get_table_schema(pkey_.table_id_, data_table_schema))) { STORAGE_LOG(WARN, "Fail to get data table schema, ", K(ret), K(schema_version), K(pkey_.table_id_)); } else if (NULL == data_table_schema) { ret = OB_SCHEMA_ERROR; LOG_ERROR("data_table_schema must not null", K(ret), K(pkey_), K(schema_version), K(pkey_.table_id_)); } else { STORAGE_LOG(INFO, "Get data table schema to merge", K(schema_version), K(*reinterpret_cast(data_table_schema))); } } else { data_table_schema = table_schema; } if (OB_SUCC(ret) && NULL != data_table_schema) { const bool with_global_index = false; if (OB_FAIL(schema_guard.get_all_aux_table_status( data_table_schema->get_table_id(), with_global_index, ctx.index_stats_))) { LOG_WARN("failed to get index statsu", K(ret), K(pkey_)); } else { LOG_INFO("succeed to get index status", "index_status", ctx.index_stats_); } } } // get dependent table schema, only major merge need merge mv if (OB_SUCC(ret) && ctx.param_.is_major_merge()) { if (OB_FAIL(get_depend_table_schema(table_schema, schema_guard, dep_table_schema))) { STORAGE_LOG(WARN, "fail to get depend table schema", K(ret)); } } if (OB_SUCC(ret) && NULL != table_schema && table_schema->is_use_bloomfilter()) { ctx.bf_rowkey_prefix_ = table_schema->get_rowkey_column_num(); if (OB_SUCCESS != get_prefix_access_stat().get_optimal_prefix(ctx.bf_rowkey_prefix_)) { ctx.bf_rowkey_prefix_ = 0; } else if (OB_UNLIKELY(ctx.bf_rowkey_prefix_ > table_schema->get_rowkey_column_num())) { STORAGE_LOG(WARN, "unexpected prefix rowkey,", K(ctx.bf_rowkey_prefix_)); ctx.bf_rowkey_prefix_ = 0; } else { STORAGE_LOG(INFO, "succeed to get optimal prefix,", K(ctx.bf_rowkey_prefix_)); } } return ret; } int ObPartitionStorage::get_depend_table_schema( const ObTableSchema* table_schema, ObSchemaGetterGuard& schema_guard, const ObTableSchema*& dep_table_schema) { int ret = OB_SUCCESS; dep_table_schema = NULL; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "ObPartitionStorage has not been inited", K(ret)); } else if (OB_ISNULL(table_schema)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), KP(table_schema)); } else { const int64_t dep_table_count = table_schema->get_depend_table_ids().count(); if (0 == dep_table_count) { // do nothing } else if (1 != dep_table_count) { ret = OB_ERR_SYS; LOG_ERROR("materialized view only support one depend table", K(ret), K(pkey_)); } else { const uint64_t dep_table_id = table_schema->get_depend_table_ids().at(0); if (OB_FAIL(schema_guard.get_table_schema(dep_table_id, dep_table_schema))) { LOG_WARN("failed to get table schema", K(ret), K(dep_table_id)); } else if (NULL == dep_table_schema) { ret = OB_SCHEMA_ERROR; LOG_ERROR("dep_table_schema must not null", K(ret), K(pkey_), K(dep_table_id)); } else { LOG_INFO("get dependent schema to merge", K(*dep_table_schema)); } } } return ret; } int ObPartitionStorage::check_is_schema_changed(const int64_t column_checksum_method, const ObTableSchema& base_table_schema, const ObTableSchema& main_table_schema, bool& is_schema_changed, bool& is_column_changed, bool& is_progressive_merge_num_changed) { int ret = OB_SUCCESS; const ObColumnSchemaV2* base_column = NULL; is_schema_changed = false; is_column_changed = false; is_progressive_merge_num_changed = base_table_schema.get_progressive_merge_num() != main_table_schema.get_progressive_merge_num(); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } for (ObTableSchema::const_column_iterator iter = main_table_schema.column_begin(); OB_SUCC(ret) && iter != main_table_schema.column_end() && !is_column_changed; ++iter) { if (OB_ISNULL(iter)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "wrong column iterator", K(ret)); } else if (NULL != (base_column = base_table_schema.get_column_schema((*iter)->get_column_id()))) { if (CCM_VALUE_ONLY == column_checksum_method && ob_is_integer_type((*iter)->get_meta_type().get_type()) && ob_is_integer_type(base_column->get_meta_type().get_type())) { is_column_changed = (*iter)->get_meta_type().get_type() < base_column->get_meta_type().get_type(); } else if (CCM_VALUE_ONLY == column_checksum_method && ob_is_string_type((*iter)->get_meta_type().get_type()) && (ob_is_string_type(base_column->get_meta_type().get_type())) && (*iter)->get_charset_type() == base_column->get_charset_type() && (*iter)->get_meta_type().get_collation_type() == base_column->get_collation_type()) { is_column_changed = (*iter)->get_accuracy().get_length() < base_column->get_accuracy().get_length(); } else if ((common::ObStringTC == (*iter)->get_meta_type().get_type_class() ? (*iter)->get_accuracy().get_length() < base_column->get_accuracy().get_length() : (*iter)->get_accuracy() != base_column->get_accuracy()) || (*iter)->get_charset_type() != base_column->get_charset_type() || (*iter)->get_meta_type() != base_column->get_meta_type() || (*iter)->get_orig_default_value() != base_column->get_orig_default_value()) { // column is modified is_column_changed = true; } } else { // add some column is_schema_changed = true; if ((*iter)->is_stored_generated_column()) { is_column_changed = true; } } } if (OB_SUCC(ret)) { if (base_table_schema.get_column_count() != main_table_schema.get_column_count() || 0 != strcmp(base_table_schema.get_compress_func_name(), main_table_schema.get_compress_func_name()) || base_table_schema.get_row_store_type() != main_table_schema.get_row_store_type() //|| base_table_schema.get_store_format() != main_table_schema.get_store_format() // enable it when store format represent more storage option || base_table_schema.get_pctfree() != main_table_schema.get_pctfree()) { is_schema_changed = true; } } STORAGE_LOG(INFO, "check schema is changed", K(is_schema_changed), K(is_column_changed), K(is_progressive_merge_num_changed), K(reinterpret_cast(base_table_schema)), K(reinterpret_cast(main_table_schema))); return ret; } int ObPartitionStorage::cal_major_merge_param(ObSSTableMergeCtx& ctx) { int ret = OB_SUCCESS; bool is_schema_changed = false; bool is_column_changed = false; bool is_progressive_merge_num_changed = false; ObSSTable* last_major_sstable = nullptr; int64_t merge_version = ctx.param_.merge_version_.major_; ctx.checksum_method_ = 0; // some input param check CK(OB_NOT_NULL(ctx.table_schema_)); // check schema change to determine whether use increment/progressive/full merge if (OB_SUCC(ret)) { if (OB_FAIL(ctx.base_table_handle_.get_sstable(last_major_sstable))) { LOG_WARN("failed to get sstable", K(ret)); } else if (OB_ISNULL(last_major_sstable)) { ret = OB_ERR_SYS; LOG_ERROR("last_major_sstable must not null", K(ret)); } } if (OB_SUCC(ret)) { const ObTableSchema* base_table_schema = nullptr; ObSchemaGetterGuard base_schema_guard; uint64_t tenant_id = is_inner_table(ctx.table_schema_->get_table_id()) ? OB_SYS_TENANT_ID : ctx.table_schema_->get_tenant_id(); const uint64_t table_id = pkey_.get_table_id(); const bool check_formal = extract_pure_id(table_id) > OB_MAX_CORE_TABLE_ID; // to avoid circular dependency OZ(schema_service_->get_tenant_schema_guard( tenant_id, base_schema_guard, ctx.base_schema_version_, OB_INVALID_VERSION)); if (check_formal) { OZ(base_schema_guard.check_formal_guard()); } OZ(base_schema_guard.get_table_schema(ctx.param_.index_id_, base_table_schema), tenant_id, ctx.base_schema_version_, ctx.table_schema_->get_table_id()); if (OB_SUCC(ret) && OB_NOT_NULL(base_table_schema)) { // determine whether use increment/progressive/full merge OZ(check_is_schema_changed(last_major_sstable->get_meta().checksum_method_, *base_table_schema, *ctx.table_schema_, is_schema_changed, is_column_changed, is_progressive_merge_num_changed)); } } if (OB_SUCC(ret)) { const int64_t schema_progressive_merge_round = ctx.data_table_schema_->get_progressive_merge_round(); ctx.is_full_merge_ = false; if (is_column_changed) { // column has been changed, must do full merge ctx.is_full_merge_ = true; } else if (1 == ctx.table_schema_->get_progressive_merge_num()) { ctx.is_full_merge_ = true; } if (0 == schema_progressive_merge_round) { ctx.use_new_progressive_ = false; if (ctx.is_full_merge_) { // full merge ctx.progressive_merge_num_ = 1; ctx.progressive_merge_start_version_ = 0; } else { const int64_t rewrite_merge_version = 0; int64_t progressive_merge_num = ctx.table_schema_->get_progressive_merge_num(); if (0 == progressive_merge_num) { if (GET_MIN_CLUSTER_VERSION() >= CLUSTER_VERSION_2100) { // force rewrite progressive_merge_num = 0 == rewrite_merge_version ? 0 : OB_AUTO_PROGRESSIVE_MERGE_NUM; } } if (is_schema_changed || (rewrite_merge_version == merge_version)) { // new progressive merge ctx.progressive_merge_num_ = progressive_merge_num; ctx.progressive_merge_start_version_ = merge_version; } else if (last_major_sstable->get_meta().progressive_merge_start_version_ < merge_version && last_major_sstable->get_meta().progressive_merge_end_version_ > merge_version) { // in progressive merge progress if (is_progressive_merge_num_changed) { ctx.progressive_merge_num_ = progressive_merge_num; ctx.progressive_merge_start_version_ = merge_version; } else { ctx.progressive_merge_num_ = last_major_sstable->get_meta().progressive_merge_end_version_ - last_major_sstable->get_meta().progressive_merge_start_version_; ctx.progressive_merge_start_version_ = last_major_sstable->get_meta().progressive_merge_start_version_; } } else if (rewrite_merge_version > 0 && merge_version >= rewrite_merge_version && merge_version < rewrite_merge_version + progressive_merge_num) { // in online index build, progressive_merge_start_version is not calculated in init, need extra check ctx.progressive_merge_start_version_ = is_progressive_merge_num_changed ? merge_version : rewrite_merge_version; ctx.progressive_merge_num_ = progressive_merge_num; STORAGE_LOG(INFO, "build index", K(ctx.progressive_merge_start_version_), K(ctx.progressive_merge_num_)); } else { // increment merge ctx.progressive_merge_num_ = 0; ctx.progressive_merge_start_version_ = 0; } } if (OB_SUCC(ret)) { if (ctx.table_schema_->is_index_table()) { // if this is the first round major merge after index created, do full merge // to avoid bug 15777811 // TODO: temporary solution, better way is to read tables using non-intersected version range if (last_major_sstable->get_snapshot_version() == ctx.create_snapshot_version_) { ctx.is_full_merge_ = true; // full merge in index build, progressive_merge_start_version needs to be calculated, keeping consistent // with main table ctx.backup_progressive_merge_num_ = ctx.progressive_merge_num_; ctx.backup_progressive_merge_start_version_ = ctx.progressive_merge_start_version_; ctx.progressive_merge_num_ = 1; ctx.progressive_merge_start_version_ = 0; ctx.is_created_index_first_merge_ = true; } } } STORAGE_LOG(INFO, "Calc progressive param, ", K(is_schema_changed), K(is_column_changed), K(is_progressive_merge_num_changed), K(ctx.progressive_merge_num_), K(ctx.progressive_merge_start_version_), K(merge_version), K(ctx.table_schema_->get_progressive_merge_num()), K(ctx.data_table_schema_->get_progressive_merge_num()), K(ctx.backup_progressive_merge_num_), K(ctx.backup_progressive_merge_start_version_)); } else { const int64_t meta_progressive_merge_round = last_major_sstable->get_meta().progressive_merge_round_; const int64_t storage_format_version = ctx.data_table_schema_->get_storage_format_version(); int64_t progressive_merge_num = 0; if (0 == ctx.data_table_schema_->get_progressive_merge_num()) { if (storage_format_version < OB_STORAGE_FORMAT_VERSION_V4) { progressive_merge_num = OB_AUTO_PROGRESSIVE_MERGE_NUM; } else { progressive_merge_num = 1 == schema_progressive_merge_round ? 0 : OB_AUTO_PROGRESSIVE_MERGE_NUM; } } else { progressive_merge_num = ctx.data_table_schema_->get_progressive_merge_num(); } ctx.progressive_merge_num_ = progressive_merge_num; if (ctx.is_full_merge_) { ctx.progressive_merge_round_ = schema_progressive_merge_round; ctx.progressive_merge_step_ = progressive_merge_num; } else { ctx.use_new_progressive_ = true; if (meta_progressive_merge_round < schema_progressive_merge_round) { ctx.progressive_merge_round_ = schema_progressive_merge_round; ctx.progressive_merge_step_ = 0; ctx.progressive_merge_start_version_ = merge_version; } else if (meta_progressive_merge_round == schema_progressive_merge_round) { ctx.progressive_merge_round_ = meta_progressive_merge_round; ctx.progressive_merge_step_ = last_major_sstable->get_meta().progressive_merge_step_; } } STORAGE_LOG(INFO, "Calc progressive param", K(is_schema_changed), K(is_column_changed), K(is_progressive_merge_num_changed), K(ctx.progressive_merge_num_), K(ctx.progressive_merge_round_), K(schema_progressive_merge_round), K(storage_format_version), K(ctx.progressive_merge_step_), K(ctx.is_full_merge_), K(progressive_merge_num), K(meta_progressive_merge_round)); } } // determine logical data version if (OB_SUCC(ret)) { if (last_major_sstable->get_logical_data_version() >= merge_version) { ctx.logical_data_version_ = last_major_sstable->get_logical_data_version() + 1; STORAGE_LOG(INFO, "Logical data version of sstable larger than frozen version", K(ctx.logical_data_version_), K(merge_version)); } else { ctx.logical_data_version_ = merge_version; } } // determine checksum method if (OB_SUCC(ret)) { const int64_t progressive_start_version = ctx.progressive_merge_start_version_; const int64_t progressive_end_version = ctx.progressive_merge_start_version_ + ctx.progressive_merge_num_; ctx.checksum_method_ = last_major_sstable->get_meta().checksum_method_; ctx.is_in_progressive_new_checksum_ = false; } // determine merge level and data format OZ(get_merge_level(ctx.param_.merge_version_, ctx, ctx.merge_level_)); OZ(get_store_column_checksum_in_micro(ctx.param_.merge_version_, ctx, ctx.store_column_checksum_in_micro_)); if (OB_SUCC(ret)) { if (ctx.is_full_merge_) { ctx.merge_level_ = MACRO_BLOCK_MERGE_LEVEL; } else { const int64_t storage_format_version = ctx.data_table_schema_->get_storage_format_version(); if (storage_format_version < OB_STORAGE_FORMAT_VERSION_V4) { if (ctx.param_.merge_version_ >= ctx.progressive_merge_start_version_ && ctx.param_.merge_version_ < ctx.progressive_merge_start_version_ + ctx.progressive_merge_num_) { // when doing progressive merge, should use macro block merge level ctx.merge_level_ = MACRO_BLOCK_MERGE_LEVEL; } } else { if (ctx.progressive_merge_step_ < ctx.progressive_merge_num_) { ctx.merge_level_ = MACRO_BLOCK_MERGE_LEVEL; } } if (ctx.merge_level_ != MACRO_BLOCK_MERGE_LEVEL) { if (is_column_changed || is_schema_changed) { ctx.merge_level_ = MACRO_BLOCK_MERGE_LEVEL; } } } } // determine estimator, parallize, mv and init final merge ctx if (OB_SUCC(ret)) { OZ(check_need_update_estimator(*ctx.table_schema_, ctx.param_.merge_version_, ctx.stat_sampling_ratio_)); } OZ(prepare_merge_mv_depend_sstable( ctx.param_.merge_version_, ctx.table_schema_, ctx.mv_dep_table_schema_, ctx.mv_dep_tables_handle_), pkey_, ctx.param_.merge_version_); OZ(init_merge_context(ctx), ctx); return ret; } int ObPartitionStorage::cal_minor_merge_param(ObSSTableMergeCtx& ctx) { int ret = OB_SUCCESS; const ObMultiVersionRowInfo* multi_version_row_info = NULL; ObMultiVersionColDescGenerate multi_version_col_desc_gen; ObSSTable* first_minor_sstable = nullptr; // some input param check CK(!ctx.tables_handle_.empty()); CK(OB_NOT_NULL(ctx.tables_handle_.get_table(0))); if (OB_SUCC(ret)) { ctx.progressive_merge_num_ = 0; // determine whether to use increment/full merge ctx.is_full_merge_ = false; ctx.merge_level_ = MACRO_BLOCK_MERGE_LEVEL; if (ctx.tables_handle_.get_table(0)->is_minor_sstable()) { // if base_version of minor merge is bigger than base_version of minor sstable in the merge, then run full merge // 1. to clean outdated data in the multi version sstable // 2. to avoid bug, because with incremental minor merge, if multi version rows with same rowkey // cross macro blocks, it cannot know the position of last_multi_version_row first_minor_sstable = reinterpret_cast(ctx.tables_handle_.get_table(0)); if (first_minor_sstable->is_multi_version_minor_sstable() && ctx.sstable_version_range_.base_version_ > first_minor_sstable->get_base_version()) { ctx.is_full_merge_ = true; } } // determine logical data version if (OB_SUCC(ret)) { if (OB_NOT_NULL(first_minor_sstable) && first_minor_sstable->get_logical_data_version() >= ctx.sstable_version_range_.snapshot_version_) { ctx.logical_data_version_ = first_minor_sstable->get_logical_data_version() + 1; STORAGE_LOG( INFO, "Logical data version of sstable larger than snapshot version", K(ctx), KPC(first_minor_sstable)); } else { ctx.logical_data_version_ = ctx.sstable_version_range_.snapshot_version_; } } OZ(multi_version_col_desc_gen.init(ctx.table_schema_)); OZ(multi_version_col_desc_gen.generate_multi_version_row_info(multi_version_row_info)); CK(OB_NOT_NULL(multi_version_row_info)); if (OB_SUCC(ret)) { ctx.multi_version_row_info_ = *multi_version_row_info; } OZ(init_merge_context(ctx), ctx); } return ret; } int ObPartitionStorage::init_merge_context(storage::ObSSTableMergeCtx& ctx) { int ret = OB_SUCCESS; bool has_lob = false; ObArray sstable_metas; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_ISNULL(ctx.table_schema_)) { ret = OB_ERR_SYS; LOG_WARN("table schema must not null", K(ret), K(ctx)); } else if (OB_FAIL(ctx.init_parallel_merge())) { LOG_WARN("Failed to init parallel merge in sstable merge ctx", K(ret)); } else if (OB_FAIL(ctx.table_schema_->has_lob_column(has_lob, true))) { LOG_WARN("Failed to check table has lob column", K(ret)); } else if (OB_FAIL(ctx.merge_context_.init(ctx.get_concurrent_cnt(), has_lob, &ctx.column_stats_, false))) { LOG_WARN("failed to init merge context", K(ret)); } else if (ctx.param_.is_major_merge()) { ObSSTable* old_version_sstable = NULL; for (int64_t i = 0; OB_SUCC(ret) && i < ctx.tables_handle_.get_count(); ++i) { ObITable* table = ctx.tables_handle_.get_table(i); if (OB_ISNULL(table)) { ret = OB_ERR_SYS; LOG_ERROR("table must not null", K(ret)); } else if (table->get_version() == ctx.param_.merge_version_ - 1 && table->is_major_sstable()) { old_version_sstable = static_cast(table); if (OB_FAIL(sstable_metas.push_back(&old_version_sstable->get_meta()))) { LOG_WARN("failed to add sstable meta", K(ret), K(ctx)); } } } if (OB_SUCC(ret) && ctx.need_incremental_checksum()) { if (OB_FAIL(ctx.column_checksum_.init(ctx.get_concurrent_cnt(), *ctx.table_schema_, !ctx.is_full_merge_, ctx.checksum_method_, sstable_metas))) { LOG_WARN("failed to init merge context checksum", K(ret), K(ctx)); } } } else if (ctx.param_.is_mini_merge()) { if (OB_FAIL(ctx.merge_context_for_complement_minor_sstable_.init( ctx.get_concurrent_cnt(), has_lob, &ctx.column_stats_, true))) { LOG_WARN("failed to init merge context", K(ret)); } } return ret; } int ObPartitionStorage::get_concurrent_cnt(ObTablesHandle& tables_handle, const share::schema::ObTableSchema& table_schema, const ObMergeType& merge_type, int64_t& concurrent_cnt) { int ret = OB_SUCCESS; ObIArray& tables = tables_handle.get_tables(); if (!table_schema.is_valid()) { ret = OB_ERR_SYS; LOG_ERROR("invalid args", K(ret), K(table_schema), K(tables.count())); } else if (MAJOR_MERGE != merge_type) { concurrent_cnt = 1; LOG_INFO("merge is only support one thread merge", K(merge_type), K(table_schema), K(tables_handle)); } else if (tables.count() < 1) { ret = OB_ERR_SYS; LOG_ERROR("invalid tables_handle", K(ret), K(tables_handle)); } else if (!tables.at(0)->is_sstable()) { ret = OB_ERR_SYS; LOG_ERROR("first table must be sstable", K(ret), K(tables)); } else { ObSSTable* first_sstable = static_cast(tables.at(0)); const int64_t tablet_size = table_schema.get_tablet_size(); if (OB_FAIL(first_sstable->get_concurrent_cnt(tablet_size, concurrent_cnt))) { LOG_WARN("failed to get_concurrent_cnt", K(ret), K(tables)); } } return ret; } void ObPartitionStorage::check_data_checksum(const common::ObReplicaType& replica_type, const ObPartitionKey& pkey, const ObTableSchema& schema, const ObSSTable& data_sstable, common::ObIArray& base_tables, ObIPartitionReport& report) { // check data_checksum between replicas.The check is used to hold the memstore if the data_checksum is not equal // between replicas Notes: if two replicas select the __all_meta_table at the same time, they may both can't get the // current version replica information. In this case, the check is not useful. int ret = OB_SUCCESS; ObArray items; if (OB_ISNULL(GCTX.sql_proxy_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, sql proxy must not be NULL", K(ret)); } else if (!data_sstable.is_major_sstable()) { // only check major sstable // TODO: open later } else if (OB_SUCCESS != (ret = ObSSTableDataChecksumOperator::get_checksum(pkey.get_table_id(), schema.get_table_id(), pkey.get_partition_id(), ObITable::MAJOR_SSTABLE, items, *GCTX.sql_proxy_))) { STORAGE_LOG(WARN, "fail to get partition checksum.", K(ret), K(pkey)); } else if (items.count() > 0) { const ObSSTableMeta& data_sstable_meta = data_sstable.get_meta(); if (REPLICA_TYPE_LOGONLY == replica_type) { if (0 == data_sstable_meta.data_checksum_) { } else { ret = OB_CHECKSUM_ERROR; STORAGE_LOG(ERROR, "data_checksum is not equal 0.", K(ret), K(replica_type), K(data_sstable_meta)); } } else { for (int64_t i = 0; OB_SUCC(ret) && i < items.count(); ++i) { // only compare the replica which data_version equal with current data_sstable if (data_sstable.get_snapshot_version() == items.at(i).snapshot_version_) { if (REPLICA_TYPE_LOGONLY == items.at(i).replica_type_) { if (0 == items.at(i).data_checksum_) { } else { ret = OB_CHECKSUM_ERROR; STORAGE_LOG(ERROR, "data_checksum between replicas is not equal.", K(ret), K(items.at(i))); } } else if (data_sstable_meta.data_checksum_ != items.at(i).data_checksum_) { ret = OB_CHECKSUM_ERROR; STORAGE_LOG( ERROR, "data_checksum between replicas is not equal.", K(ret), K(items.at(i)), K(data_sstable_meta)); } } } } #ifdef ERRSIM if (OB_SUCC(ret)) { ret = E(EventTable::EN_MERGE_CHECKSUM) OB_SUCCESS; } #endif if (OB_CHECKSUM_ERROR == ret) { int report_ret = OB_SUCCESS; if (OB_SUCCESS != (report_ret = report.report_merge_error(pkey, ret))) { STORAGE_LOG(ERROR, "Fail to submit checksum error task, ", K(report_ret)); } // OB_CHECKSUM_ERROR gold bless you, dump memtable and minor sstable, continue to refresh __all_meta_table STORAGE_LOG(INFO, "begin to dump, "); dump2text(schema, base_tables, pkey); // no need to check ret } } else { // first time insert into __all_meta_table, skip check } } void ObPartitionStorage::dump2text( const ObTableSchema& schema, common::ObIArray& base_tables, const ObPartitionKey& pkey) { int ret = OB_SUCCESS; int pret = 0; ObITable* table = NULL; ObSSTable* sstable = NULL; FILE* fd = NULL; int64_t file_size = 0; char real_dir[OB_MAX_FILE_NAME_LENGTH]; char dump_mem_dir[OB_MAX_FILE_NAME_LENGTH]; char dump_ss_dir[OB_MAX_FILE_NAME_LENGTH]; char dump_mem_name[OB_MAX_FILE_NAME_LENGTH]; char dump_ss_name[OB_MAX_FILE_NAME_LENGTH]; if (OB_FAIL(FileDirectoryUtils::get_file_size("/proc/sys/kernel/core_pattern", file_size)) || file_size > OB_MAX_FILE_NAME_LENGTH) { STORAGE_LOG(WARN, "fail to get size", K(ret)); } else if (NULL == (fd = fopen("/proc/sys/kernel/core_pattern", "r"))) { ret = OB_IO_ERROR; STORAGE_LOG(WARN, "open file fail:", K(ret)); } else if (fread(real_dir, sizeof(char), file_size, fd) != file_size) { ret = OB_IO_ERROR; STORAGE_LOG(WARN, "read file fail:", K(ret), K(errno), KERRNOMSG(errno)); } for (int64_t i = 0; OB_SUCC(ret) && i < base_tables.count(); i++) { if (NULL == (table = base_tables.at(i))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "The store not a valid store.", K(ret), K(i)); } else { if (NULL != table && table->is_memtable()) { pret = snprintf(dump_mem_dir, OB_MAX_FILE_NAME_LENGTH, "%s/%s.%s.%ld.%s.%d.%s.%ld.%s.%d.%s.%d", dirname(real_dir), "dump_memtable", "part_id", pkey.get_partition_id(), "part_cnt", pkey.get_partition_cnt(), "table_id", pkey.get_table_id(), "major", table->get_version().major_, "minor", table->get_version().minor_); if (pret < 0 || pret >= OB_MAX_FILE_NAME_LENGTH) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "name too long", K(ret), K(dump_mem_dir)); } else if (0 != mkdir(dump_mem_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) { if (EEXIST == errno) { ret = OB_SUCCESS; } else { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "fail to mkdir.", K(ret), K(dump_ss_name), K(errno), KERRNOMSG(errno)); } } else { pret = snprintf(dump_mem_name, OB_MAX_FILE_NAME_LENGTH, "%s/%s", dump_mem_dir, "memtable"); if (pret < 0 || pret >= OB_MAX_FILE_NAME_LENGTH) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "name too long", K(ret), K(dump_mem_name)); } else { ((ObMemtable*)table)->dump2text(dump_mem_name); // no need to check ret } } } } } for (int64_t i = 1; OB_SUCC(ret) && i < base_tables.count(); i++) { // base sstore don't need to dump if (NULL == (table = base_tables.at(i))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "The store not a valid store.", K(ret), K(i)); } else if (table->is_minor_sstable()) { sstable = static_cast(table); pret = snprintf(dump_ss_dir, OB_MAX_FILE_NAME_LENGTH, "%s/%s.%s.%ld.%s.%d.%s.%ld.%s.%d.%s.%d", dirname(real_dir), "dump_sstable", "part_id", pkey.get_partition_id(), "part_cnt", pkey.get_partition_cnt(), "table_id", schema.get_table_id(), "major", sstable->get_version().major_, "minor", sstable->get_version().minor_); if (pret < 0 || pret >= OB_MAX_FILE_NAME_LENGTH) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "name too long", K(ret), K(dump_ss_dir)); } else if (0 != mkdir(dump_ss_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) { if (EEXIST == errno) { ret = OB_SUCCESS; } else { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "fail to mkdir.", K(ret), K(dump_ss_dir), K(errno), KERRNOMSG(errno)); } } else { pret = snprintf(dump_ss_name, OB_MAX_FILE_NAME_LENGTH, "%s/%s", dump_ss_dir, "sstable"); if (pret < 0 || pret >= OB_MAX_FILE_NAME_LENGTH) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "name too long", K(ret), K(dump_ss_name)); } else { sstable->dump2text(dump_ss_dir, schema, dump_ss_name); // no need to check ret } } } } } int ObPartitionStorage::lock(const ObStoreCtx& ctx) { int ret = OB_SUCCESS; UNUSED(ctx); return ret; } int ObPartitionStorage::check_index_need_build(const ObTableSchema& index_schema, bool& need_build) { int ret = OB_SUCCESS; bool table_exist = false; need_build = false; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "ObPartitionStorage has not been inited", K(ret)); } else if (!index_schema.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), K(index_schema)); } else if (OB_FAIL(schema_service_->check_table_exist(index_schema.get_table_id(), OB_INVALID_VERSION, /*latest local guard*/ table_exist))) { STORAGE_LOG( WARN, "Fail to check if table exist, ", K_(pkey), "schema_version", index_schema.get_schema_version(), K(ret)); } else if (OB_UNLIKELY(!table_exist)) { need_build = false; ObTaskController::get().allow_next_syslog(); LOG_INFO("The table does not exist, no need to create index, ", K(index_schema.get_table_id())); } else { need_build = index_schema.is_index_local_storage() && !index_schema.can_read_index() && INDEX_STATUS_UNAVAILABLE == index_schema.get_index_status(); if (need_build) { ObIndexStatusTableOperator::ObBuildIndexStatus status; if (OB_FAIL(ObIndexStatusTableOperator::get_build_index_status( index_schema.get_table_id(), pkey_.get_partition_id(), GCTX.self_addr_, *GCTX.sql_proxy_, status))) { if (OB_ENTRY_NOT_EXIST != ret) { LOG_WARN("fail to get build index status", K(ret), K(pkey_), "index_id", index_schema.get_table_id()); } else { ret = OB_SUCCESS; need_build = true; } } else { need_build = false; } } } return ret; } int ObPartitionStorage::get_build_index_stores( const ObTenantSchema& tenant_schema, compaction::ObBuildIndexParam& param) { int ret = OB_SUCCESS; ObIndexTransStatus status; ObSimpleFrozenStatus frozen_status; int64_t freeze_version = 0; ObFreezeInfoProxy freeze_info_proxy; const bool allow_not_ready = false; const bool need_safety_check = true; ObTablesHandle tables_handle; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "ObPartitionStorage has not been inited", K(ret)); } else if (OB_UNLIKELY(!tenant_schema.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(tenant_schema)); } else if (OB_FAIL(ObIndexTransStatusReporter::get_wait_trans_status(param.index_schema_->get_table_id(), ObIndexTransStatusReporter::ROOT_SERVICE, -1, *GCTX.sql_proxy_, status))) { STORAGE_LOG( WARN, "fail to get build index version", K(ret), K(pkey_), "index_id", param.index_schema_->get_table_id()); } else if (OB_UNLIKELY(status.snapshot_version_ <= 0)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "error unexpected, snapshot version is invalid", K(ret), K(status.snapshot_version_)); } else if (OB_FAIL(store_.get_read_tables(param.index_schema_->get_data_table_id(), status.snapshot_version_, tables_handle, allow_not_ready, need_safety_check))) { if (OB_REPLICA_NOT_READABLE == ret) { ret = OB_EAGAIN; } else { STORAGE_LOG(WARN, "snapshot version has been discard", K(ret)); } } else if (OB_FAIL(freeze_info_proxy.get_frozen_info_less_than( *GCTX.sql_proxy_, status.snapshot_version_, frozen_status))) { if (OB_ENTRY_NOT_EXIST != ret) { STORAGE_LOG(WARN, "fail to get next major freeze", K(ret), K(pkey_)); } else { freeze_version = 1L; ret = OB_SUCCESS; } } else { freeze_version = frozen_status.frozen_version_; } // get local sort task ranges if (OB_SUCC(ret)) { ObSSTable* sstable = NULL; if (OB_FAIL(tables_handle.get_first_sstable(sstable))) { STORAGE_LOG(WARN, "fail to get last base store", K(ret)); } else if (OB_ISNULL(sstable)) { ret = OB_ERR_SYS; STORAGE_LOG(WARN, "sstable must not be NULL", K(ret)); } else { ObExtStoreRange range; for (int64_t i = 0; OB_SUCC(ret) && i < param.concurrent_cnt_; ++i) { if (OB_FAIL(sstable->get_range(i, param.concurrent_cnt_, param.allocator_, range))) { STORAGE_LOG(WARN, "fail to get range", K(ret)); } else if (OB_FAIL(param.local_sort_ranges_.push_back(range))) { STORAGE_LOG(WARN, "fail to push back local sort range", K(ret)); } } } } if (OB_SUCC(ret)) { DEBUG_SYNC(BEFORE_UPDATE_INDEX_BUILD_VERSION); const ObTableSchema* dep_table_schema = param.dep_table_schema_; param.version_ = freeze_version; param.snapshot_version_ = status.snapshot_version_; param.checksum_method_ = CCM_VALUE_ONLY; if (NULL == dep_table_schema) { STORAGE_LOG(INFO, "succ to get build index param", K(param), "table_schema", *param.table_schema_, "index_schema", *param.index_schema_); } else { STORAGE_LOG(INFO, "succ to get build index param", K(param), "table_schema", *param.table_schema_, "index_schema", *param.index_schema_, "dep_table_schema", *dep_table_schema); } } return ret; } int ObPartitionStorage::get_build_index_context(const ObBuildIndexParam& param, ObBuildIndexContext& context) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "invalid arguments", K(ret)); } else if (OB_UNLIKELY(!param.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(param)); } else if (OB_FAIL(context.preallocate_local_sorters(param.concurrent_cnt_))) { STORAGE_LOG(WARN, "fail to preallocate local sorters", K(ret)); } return ret; } int ObPartitionStorage::generate_index_output_param(const ObTableSchema& data_table_schema, const ObTableSchema& index_schema, ObArray& col_ids, ObArray& org_col_ids, ObArray& output_projector, int64_t& unique_key_cnt) { int ret = OB_SUCCESS; col_ids.reuse(); output_projector.reuse(); unique_key_cnt = 0; if (OB_UNLIKELY(!data_table_schema.is_valid() || !index_schema.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(data_table_schema), K(index_schema)); } else { // add data table rowkey const ObRowkeyInfo& rowkey_info = data_table_schema.get_rowkey_info(); const ObRowkeyColumn* rowkey_column = NULL; ObColDesc col_desc; for (int32_t i = 0; OB_SUCC(ret) && i < rowkey_info.get_size(); ++i) { if (OB_ISNULL(rowkey_column = rowkey_info.get_column(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("The rowkey column is NULL", K(ret), K(i), K(rowkey_info)); } else { col_desc.col_id_ = rowkey_column->column_id_; col_desc.col_type_ = rowkey_column->type_; col_desc.col_order_ = rowkey_column->order_; if (OB_FAIL(col_ids.push_back(col_desc))) { STORAGE_LOG(WARN, "fail to push back column desc", K(ret)); } } } // add index table other columns ObArray index_table_columns; if (OB_SUCC(ret)) { if (OB_FAIL(index_schema.get_column_ids(index_table_columns))) { STORAGE_LOG(WARN, "fail to get column ids", K(ret)); } for (int64_t i = 0; OB_SUCC(ret) && i < index_table_columns.count(); ++i) { const ObColDesc& index_col_desc = index_table_columns.at(i); int64_t j = 0; for (j = 0; OB_SUCC(ret) && j < col_ids.count(); ++j) { if (index_col_desc.col_id_ == col_ids.at(j).col_id_) { break; } } if (j == col_ids.count() && index_col_desc.col_id_ < OB_MIN_SHADOW_COLUMN_ID) { if (OB_FAIL(col_ids.push_back(index_col_desc))) { STORAGE_LOG(WARN, "fail to push back index col desc", K(ret)); } } } } for (int64_t k = 0; OB_SUCC(ret) && k < index_schema.get_rowkey_column_num(); ++k) { if (index_table_columns.at(k).col_id_ >= OB_MIN_SHADOW_COLUMN_ID) { index_table_columns.at(k).col_id_ = index_table_columns.at(k).col_id_ - OB_MIN_SHADOW_COLUMN_ID; } else { ++unique_key_cnt; } } // generate output projector for (int64_t i = 0; OB_SUCC(ret) && i < index_table_columns.count(); ++i) { int64_t j = 0; for (j = 0; OB_SUCC(ret) && j < col_ids.count(); ++j) { if (col_ids.at(j).col_id_ == index_table_columns.at(i).col_id_) { break; } } if (j == col_ids.count()) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "error unexpected, output col does not exist in index table columns", K(ret)); } else if (OB_FAIL(output_projector.push_back(static_cast(j)))) { STORAGE_LOG(WARN, "fail to push back output projector", K(ret)); } } if (OB_SUCC(ret)) { if (OB_FAIL(org_col_ids.assign(index_table_columns))) { STORAGE_LOG(WARN, "fail to assign col ids", K(ret)); } } STORAGE_LOG( INFO, "output index projector", K(output_projector), K(index_table_columns), K(col_ids), K(unique_key_cnt)); } return ret; } int ObPartitionStorage::local_sort_index_by_range( const int64_t idx, const ObBuildIndexParam& index_param, const ObBuildIndexContext& index_context) { int ret = OB_SUCCESS; int64_t start_time = ObTimeUtility::current_time(); ObExtStoreRange range; ObArray col_ids; ObArray org_col_ids; ObArray output_projector; ObArray projector; const ObStoreRow* row = NULL; ObArray sort_column_indexes; ObTablesHandle tables_handle; ObSSTable* sstable = NULL; ObArenaAllocator allocator(ObModIds::OB_PARTITION_STORAGE); uint64_t tenant_id = OB_INVALID_ID; ObColumnChecksumCalculator checksum; int64_t row_count = 0; ObQueryFlag query_flag(ObQueryFlag::Forward, true, /*is daily merge scan*/ true, /*is read multiple macro block*/ true, /*sys task scan, read one macro block in single io*/ false /*is full row scan?*/, false, false); int comp_ret = OB_SUCCESS; ObStoreRowComparer comparer(comp_ret, sort_column_indexes); const int64_t file_buf_size = ObExternalSortConstant::DEFAULT_FILE_READ_WRITE_BUFFER; const int64_t expire_timestamp = 0; // no time limited const ObTableSchema* table_schema = NULL; const ObTableSchema* index_schema = NULL; const ObTableSchema* dep_table = NULL; const int64_t concurrent_cnt = index_param.concurrent_cnt_; ObExternalSort* local_sort = NULL; int64_t unique_key_cnt = 0; int64_t macro_block_cnt = 0; ObCreateIndexKey key; ObCreateIndexScanTaskStat task_stat; int tmp_ret = OB_SUCCESS; int64_t index_table_size = 0; if (OB_UNLIKELY(!index_param.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), K(index_param)); } else { table_schema = index_param.table_schema_; index_schema = index_param.index_schema_; dep_table = index_param.dep_table_schema_; local_sort = index_context.sorters_.at(idx); tenant_id = extract_tenant_id(table_schema->get_table_id()); if (index_schema->is_materialized_view()) { query_flag.join_type_ = sql::LEFT_OUTER_JOIN; } } DEBUG_SYNC(BEFORE_BUILD_LOCAL_INDEX); if (OB_SUCC(ret)) { const bool allow_not_ready = false; const bool need_safety_check = true; if (OB_FAIL(store_.get_read_tables(index_param.index_schema_->get_data_table_id(), index_param.snapshot_version_, tables_handle, allow_not_ready, need_safety_check))) { if (OB_REPLICA_NOT_READABLE == ret) { ret = OB_EAGAIN; } else { STORAGE_LOG(WARN, "snapshot version has been discard", K(ret)); } } else if (OB_FAIL(tables_handle.get_first_sstable(sstable))) { STORAGE_LOG(WARN, "fail to get last base store", K(ret)); } else if (OB_ISNULL(sstable)) { ret = OB_ERR_SYS; STORAGE_LOG(WARN, "sstable must not be NULL", K(ret)); } else { if (OB_FAIL(index_param.local_sort_ranges_.at(idx, range))) { STORAGE_LOG(WARN, "fail to get range", K(ret)); } else { macro_block_cnt = sstable->get_macro_block_count() / concurrent_cnt; key.index_table_id_ = index_schema->get_table_id(); key.partition_id_ = pkey_.get_partition_id(); key.tenant_id_ = extract_tenant_id(key.index_table_id_); task_stat.type_ = ObILongOpsTaskStat::TaskType::SCAN; task_stat.task_id_ = idx; task_stat.state_ = ObILongOpsTaskStat::TaskState::RUNNING; task_stat.macro_count_ = macro_block_cnt; if (OB_SUCCESS != (tmp_ret = key.to_key_string())) { LOG_WARN("fail to key string", K(ret)); } else if (OB_SUCCESS != (tmp_ret = LONG_OPS_MONITOR_INSTANCE.update_task_stat(key, task_stat))) { STORAGE_LOG(WARN, "fail to update task stat", K(ret)); } } } } if (OB_SUCC(ret)) { col_ids.reuse(); sort_column_indexes.reuse(); for (int64_t i = 0; OB_SUCC(ret) && i < index_schema->get_rowkey_column_num(); ++i) { if (OB_FAIL(sort_column_indexes.push_back(i))) { STORAGE_LOG(WARN, "Fail to push sort column indexes, ", K(ret), K(i)); } } } STORAGE_LOG( INFO, "start to local_sort_index_by_range", "index_id", index_schema->get_table_id(), K(idx), K(concurrent_cnt)); if (OB_FAIL(ret)) { // do nothing } else if (OB_FAIL(SLOGGER.begin(OB_LOG_CS_DAILY_MERGE))) { STORAGE_LOG(WARN, "fail to begin transaction", K(ret)); } else if (OB_FAIL(local_sort->init( index_param.DEFAULT_INDEX_SORT_MEMORY_LIMIT, file_buf_size, expire_timestamp, tenant_id, &comparer))) { STORAGE_LOG(WARN, "Fail to init external sort, ", K(ret)); } else if (OB_FAIL(comp_ret)) { STORAGE_LOG(ERROR, "comp_ret must not fail", K(ret)); } else if (OB_FAIL(generate_index_output_param( *table_schema, *index_schema, col_ids, org_col_ids, projector, unique_key_cnt))) { STORAGE_LOG(WARN, "Fail to get column ids, ", K(ret)); } else { // extend col_ids for generated column ObArray extended_col_ids; ObArray org_extended_col_ids; ObArray dependent_exprs; ObArray gen_col_schemas; ObExprCtx expr_ctx; if (OB_SUCC(ret)) { ObArray index_table_columns; if (OB_FAIL(append(extended_col_ids, col_ids))) { STORAGE_LOG(ERROR, "failed to clone col_ids", K(ret), K(col_ids), K(extended_col_ids)); } else if (OB_FAIL(append(org_extended_col_ids, org_col_ids))) { STORAGE_LOG(WARN, "fail to append col array", K(ret)); } else if (OB_FAIL(append(output_projector, projector))) { STORAGE_LOG(WARN, "fail to clone output projector", K(ret)); } else { const ObTableSchema* data_table_schema = NULL; const ObColumnSchemaV2* column_schema = NULL; for (int64_t i = 0; OB_SUCC(ret) && i < org_col_ids.count(); ++i) { data_table_schema = NULL; column_schema = NULL; if (org_col_ids.at(i).col_id_ < OB_APP_MIN_COLUMN_ID) { // do nothing } else if (OB_ISNULL(column_schema = table_schema->get_column_schema(org_col_ids.at(i).col_id_))) { if (NULL == dep_table || !index_schema->is_depend_column(org_col_ids.at(i).col_id_) || OB_ISNULL( column_schema = dep_table->get_column_schema(org_col_ids.at(i).col_id_ - OB_MIN_MV_COLUMN_ID))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "The column schema is NULL, ", K(ret), K(org_col_ids.at(i).col_id_)); } else { data_table_schema = dep_table; } } else { data_table_schema = table_schema; } if (OB_SUCC(ret)) { if (NULL != column_schema && column_schema->is_generated_column()) { ObSEArray ref_columns; if (OB_FAIL(column_schema->get_cascaded_column_ids(ref_columns))) { STORAGE_LOG(WARN, "Failed to get cascaded column ids", K(ret)); } else { ObColDesc col; const ObColumnSchemaV2* data_column_schema = NULL; for (int64_t j = 0; OB_SUCC(ret) && j < ref_columns.count(); ++j) { col.reset(); if (OB_ISNULL(data_column_schema = table_schema->get_column_schema(ref_columns.at(j)))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "The column schema is NULL, ", K(ret)); } else { col.col_id_ = ref_columns.at(j); col.col_type_ = data_column_schema->get_meta_type(); col.col_order_ = data_column_schema->get_order_in_rowkey(); if (OB_FAIL(org_extended_col_ids.push_back(col))) { STORAGE_LOG(WARN, "fail to push back col id", K(ret)); } else { int64_t k = 0; for (k = 0; OB_SUCC(ret) && k < extended_col_ids.count(); ++k) { if (extended_col_ids.at(k).col_id_ == col.col_id_) { break; } } if (k == extended_col_ids.count()) { if (OB_FAIL(extended_col_ids.push_back(col))) { STORAGE_LOG(WARN, "push back error", K(ret)); } } if (OB_SUCC(ret)) { if (OB_FAIL(output_projector.push_back(static_cast(k)))) { STORAGE_LOG(WARN, "fail to push back output projector", K(ret)); } } } } } } if (OB_SUCC(ret)) { // make sql expression for generated column ObISqlExpression* expr = NULL; if (OB_FAIL(sql::ObSQLUtils::make_generated_expression_from_str( column_schema->get_orig_default_value().get_string(), *data_table_schema, *column_schema, org_extended_col_ids, allocator, expr))) { STORAGE_LOG(WARN, "failed to make sql expression", K(column_schema->get_orig_default_value().get_string()), K(*data_table_schema), K(*column_schema), K(extended_col_ids), K(ret)); } else if (OB_FAIL(dependent_exprs.push_back(expr))) { STORAGE_LOG(WARN, "push back error", K(ret)); } else if (OB_FAIL(gen_col_schemas.push_back(column_schema))) { STORAGE_LOG(WARN, "push back error", K(ret)); } } } else { if (OB_FAIL(dependent_exprs.push_back(NULL))) { STORAGE_LOG(WARN, "push back error", K(ret)); } else if (OB_FAIL(gen_col_schemas.push_back(NULL))) { STORAGE_LOG(WARN, "push back error", K(ret)); } } } } } } LOG_INFO("output projector", K(extended_col_ids), K(org_extended_col_ids), K(output_projector)); int64_t buf_cells_cnt = org_extended_col_ids.count() + org_col_ids.count(); ObObj *cells_buf = NULL; ObStoreRow default_row; ObStoreRow tmp_row; ObStoreRow new_row; if (OB_FAIL(ret)) { } else if (OB_ISNULL(cells_buf = reinterpret_cast(allocator.alloc(sizeof(ObObj) * buf_cells_cnt)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_ERROR("failed to alloc cells buf", K(ret), K(org_extended_col_ids.count()), K(org_col_ids.count())); } else { cells_buf = new (cells_buf) ObObj[buf_cells_cnt]; default_row.flag_ = ObActionFlag::OP_ROW_EXIST; default_row.row_val_.cells_ = cells_buf; default_row.row_val_.count_ = org_extended_col_ids.count(); tmp_row.flag_ = ObActionFlag::OP_ROW_EXIST; tmp_row.row_val_.cells_ = cells_buf + org_extended_col_ids.count(); tmp_row.row_val_.count_ = org_col_ids.count(); new_row.flag_ = ObActionFlag::OP_ROW_EXIST; } // fill default row for (int64_t i = 0; OB_SUCC(ret) && i < org_extended_col_ids.count(); i++) { const uint64_t col_id = org_extended_col_ids.at(i).col_id_; const ObColumnSchemaV2 *col = index_schema->get_column_schema(col_id); if (NULL == col) { // generated column's depend column, get column schema from data table col = table_schema->get_column_schema(col_id); } if (OB_ISNULL(col)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to get column schema", K(ret), K(col_id)); } else { default_row.row_val_.cells_[i] = col->get_orig_default_value(); } } ObArray col_params; for (int64_t i = 0; OB_SUCC(ret) && i < extended_col_ids.count(); i++) { const ObColumnSchemaV2* col = index_schema->get_column_schema(extended_col_ids.at(i).col_id_); if (NULL == col) { // generated column's depend column, get column schema from data table col = table_schema->get_column_schema(extended_col_ids.at(i).col_id_); } if (NULL == col) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get column schema failed", K(ret)); } else { void* buf = allocator.alloc(sizeof(ObColumnParam)); if (NULL == buf) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("alloc memory failed", K(ret)); } else { ObColumnParam* col_param = new (buf) ObColumnParam(allocator); if (OB_FAIL(ObTableParam::convert_column_schema_to_param(*col, *col_param))) { LOG_WARN("convert schema column to column parameter failed", K(ret)); } else if (OB_FAIL(col_params.push_back(col_param))) { LOG_WARN("push to array failed", K(ret)); } } } } ObTableAccessParam access_param; ObTableAccessContext access_ctx; ObBlockCacheWorkingSet block_cache_ws; ObStoreCtx ctx; ObStoreRow result_row; if (OB_SUCC(ret)) { if (OB_FAIL(access_param.out_col_desc_param_.init())) { LOG_WARN("init out cols fail", K(ret)); } else if (OB_FAIL(access_param.out_col_desc_param_.assign(extended_col_ids))) { LOG_WARN("assign out cols fail", K(ret)); } else { access_param.iter_param_.table_id_ = table_schema->get_table_id(); access_param.iter_param_.rowkey_cnt_ = table_schema->get_rowkey_column_num(); access_param.iter_param_.schema_version_ = table_schema->get_schema_version(); access_param.iter_param_.out_cols_project_ = &output_projector; access_param.iter_param_.out_cols_ = &access_param.out_col_desc_param_.get_col_descs(); access_param.out_cols_param_ = &col_params; access_param.reserve_cell_cnt_ = output_projector.count(); } } if (OB_SUCC(ret)) { const int64_t cell_cnt = org_extended_col_ids.count(); if (NULL == (cells_buf = reinterpret_cast(allocator.alloc(sizeof(ObObj) * cell_cnt)))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "failed to alloc cells buf", K(ret), K(cell_cnt)); } else { result_row.flag_ = ObActionFlag::OP_ROW_EXIST; result_row.row_val_.cells_ = new (cells_buf) ObObj[cell_cnt]; result_row.row_val_.count_ = cell_cnt; } } if (OB_SUCC(ret)) { common::ObVersionRange trans_version_range; trans_version_range.snapshot_version_ = index_param.snapshot_version_; trans_version_range.multi_version_start_ = index_param.snapshot_version_; trans_version_range.base_version_ = 0; ObPartitionKey pg_key; if (OB_ISNULL(ctx.mem_ctx_ = txs_->get_mem_ctx_factory()->alloc())) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "fail to allocate transaction memory context", K(ret)); } else if (OB_FAIL(ctx.mem_ctx_->trans_begin())) { STORAGE_LOG(WARN, "fail to begin transaction", K(ret)); } else if (OB_FAIL( ctx.mem_ctx_->sub_trans_begin(index_param.snapshot_version_, BUILD_INDEX_READ_SNAPSHOT_VERSION))) { STORAGE_LOG(WARN, "fail to begin sub transaction", K(ret), K(index_param)); } else if (OB_FAIL(block_cache_ws.init(tenant_id))) { LOG_WARN("block_cache_ws init failed", K(ret), K(tenant_id)); } else if (OB_FAIL(checksum.init(org_col_ids.count()))) { STORAGE_LOG(WARN, "fail to init checksum calculator", K(ret)); } else if (OB_FAIL(ObPartitionService::get_instance().get_pg_key(pkey_, pg_key))) { LOG_WARN("failed to get_pg_key", K(ret), K(pkey_)); } else if (OB_FAIL(ctx.init_trans_ctx_mgr(pg_key))) { LOG_WARN("failed to init_trans_ctx_mgr", K(ret), K(pg_key)); } else if (OB_FAIL(access_ctx.init(query_flag, ctx, allocator, allocator, block_cache_ws, trans_version_range))) { LOG_WARN("failed to init accesss ctx", K(ret)); } } ObStoreCtx rctx; ObTablesHandle rtables_handle; ObTableAccessParam rta_param; ObTableAccessContext rta_ctx; ObGetTableParam get_table_param; ObIStoreRowIterator* row_iter = NULL; ObMultipleScanMerge* scan_merge = NULL; void* buf = NULL; ObTableParam rt_param(allocator); get_table_param.partition_store_ = &store_; STORAGE_LOG(INFO, "start do output_store.scan"); if (1 == idx && !index_param.table_schema_->is_sys_table()) { DEBUG_SYNC(BEFORE_BUILD_LOCAL_INDEX_SCAN); } if (OB_FAIL(ret)) { // do nothing } else if (OB_FAIL(range.to_collation_free_range_on_demand_and_cutoff_range(allocator))) { STORAGE_LOG(WARN, "fail to get to collation free range", K(ret)); } else if (OB_ISNULL(buf = allocator.alloc(sizeof(ObMultipleScanMerge)))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(WARN, "fail to alloc memory for ObMultipleScanMerge", K(ret)); } else if (OB_ISNULL(scan_merge = new (buf) ObMultipleScanMerge())) { ret = OB_ERR_SYS; STORAGE_LOG(WARN, "fail to do placement new", K(ret)); } else if (OB_FAIL(scan_merge->init(access_param, access_ctx, get_table_param))) { STORAGE_LOG(WARN, "fail to init scan merge", K(ret), K(access_param), K(access_ctx)); } else if (OB_FAIL(scan_merge->open(range))) { STORAGE_LOG(WARN, "fail to open scan merge", K(ret)); } else { scan_merge->disable_padding(); scan_merge->disable_fill_default(); scan_merge->disable_fill_virtual_column(); row_iter = scan_merge; } if (OB_SUCC(ret)) { ObArenaAllocator calc_buf(ObModIds::OB_SQL_EXPR_CALC); int64_t get_next_row_time = 0; int64_t trans_time = 0; int64_t add_item_time = 0; int64_t t1 = 0; int64_t t2 = 0; int64_t t3 = 0; int64_t t4 = 0; uint64_t tenant_id = extract_tenant_id(table_schema->get_table_id()); if (OB_FAIL(sql::ObSQLUtils::make_default_expr_context(tenant_id, allocator, expr_ctx))) { STORAGE_LOG(WARN, "failed to make default expr context ", K(ret)); } tables_handle.reset(); DEBUG_SYNC(BEFORE_BUILD_LOCAL_INDEX_REFRESH_TABLES); STORAGE_LOG(INFO, "start do next row", K(col_ids), K(default_row.row_val_)); // TODO(): replace this with global function ObWorker::CompatMode sql_mode; if (OB_SUCC(ret)) { sql_mode = THIS_WORKER.get_compatibility_mode(); } ObTableSchemaParam schema_param(allocator); ObRelativeTable relative_table; ObSQLMode sql_mode_for_ddl_reshape = SMO_TRADITIONAL; // Hack to prevent row reshaping from converting empty string to null. // // Supposing we have a row of type varchar with some spaces and an index on this column, // and then we convert this column to char. In this case, the DDL routine will first rebuild // the data table and then rebuilding the index table. The row may be reshaped as follows. // // - without hack: ' '(varchar) => ''(char) => null(char) // - with hack: ' '(varchar) => ''(char) => ''(char) ObFixedArray column_ids(allocator, org_col_ids.count()); RowReshape *row_reshape_ins = nullptr; for (int64_t i = 0; OB_SUCC(ret) && i < org_col_ids.count(); i++) { const ObColDesc &col_desc = org_col_ids.at(i); if (OB_FAIL(column_ids.push_back(col_desc.col_id_))) { LOG_WARN("failed to push back column id", K(ret)); } } if (OB_FAIL(ret)) { } else if (OB_FAIL(schema_param.convert(table_schema, column_ids))) { LOG_WARN("failed to convert schema param", K(ret)); } else if (OB_UNLIKELY(false == relative_table.set_schema_param(&schema_param))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to set schema param", K(ret)); } else if (OB_FAIL(malloc_rows_reshape_if_need( allocator, org_col_ids, 1, relative_table, sql_mode_for_ddl_reshape, row_reshape_ins))) { STORAGE_LOG(WARN, "failed to malloc for row reshape", K(ret)); } else { // do nothing } while (OB_SUCC(ret)) { t1 = ObTimeUtility::current_time(); calc_buf.reuse(); dag_yield(); if (OB_FAIL(row_iter->get_next_row(row))) { if (OB_ITER_END != ret) { STORAGE_LOG(WARN, "Fail to get next row, ", K(ret)); } } else { t2 = ObTimeUtility::current_time(); get_next_row_time += t2 - t1; DEBUG_SYNC(BEFORE_BUILD_LOCAL_INDEX_REFRESH_TABLES_MID); // fill default value if column is not generated column for (int64_t k = 0; OB_SUCC(ret) && k < row->row_val_.count_; ++k) { if (row->row_val_.cells_[k].is_nop_value()) { if (k >= org_col_ids.count()) { // e.g. columns that some generated columns in org_col_ids depend on result_row.row_val_.cells_[k] = default_row.row_val_.cells_[k]; } else if (nullptr == dependent_exprs.at(k)) { result_row.row_val_.cells_[k] = default_row.row_val_.cells_[k]; } } else { result_row.row_val_.cells_[k] = row->row_val_.cells_[k]; } } for (int64_t k = 0; OB_SUCC(ret) && k < org_col_ids.count(); ++k) { if (row->row_val_.cells_[k].is_nop_value()) { if (NULL != dependent_exprs.at(k)) { // generated column if (OB_FAIL(sql::ObSQLUtils::calc_sql_expression(dependent_exprs.at(k), *table_schema, org_extended_col_ids, result_row.row_val_, calc_buf, expr_ctx, tmp_row.row_val_.cells_[k]))) { STORAGE_LOG(WARN, "failed to calc expr", K(result_row.row_val_), K(org_col_ids), K(row->row_val_), K(dependent_exprs.at(k)), K(extended_col_ids), K(org_extended_col_ids), K(ret)); } else if (OB_UNLIKELY(!tmp_row.row_val_.cells_[k].is_null() && !sql::ObSQLUtils::is_same_type_for_compare( gen_col_schemas.at(k)->get_meta_type(), tmp_row.row_val_.cells_[k].get_meta()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("result type is not consistent with schema, please check expr result", K(ret), "column schema type", gen_col_schemas.at(k)->get_meta_type(), "result", tmp_row.row_val_.cells_[k]); } } else { tmp_row.row_val_.cells_[k] = default_row.row_val_.cells_[k]; } } else { tmp_row.row_val_.cells_[k] = row->row_val_.cells_[k]; } } // process unique index shadow columns if (OB_SUCC(ret) && index_schema->is_unique_index()) { const int64_t shadow_column_cnt = index_schema->get_rowkey_column_num() - unique_key_cnt; if (OB_FAIL(ObUniqueIndexRowTransformer::convert_to_unique_index_row(tmp_row.row_val_, static_cast(sql_mode), unique_key_cnt, shadow_column_cnt, NULL, tmp_row.row_val_))) { LOG_WARN("fail to convert to unique index row", K(ret)); } } // process fulltext if (OB_SUCC(ret)) { if (index_schema->is_domain_index()) { int64_t t3_1 = 0; int64_t t3_2 = 0; ObSEArray words; ObObj orig_obj; uint64_t domain_column_id = OB_INVALID_ID; if (OB_FAIL(index_schema->get_index_info().get_fulltext_column(domain_column_id))) { STORAGE_LOG(WARN, "failed to get domain column id", K(index_schema->get_index_info()), K(ret)); } else { int64_t pos = -1; for (int64_t i = 0; OB_SUCC(ret) && -1 == pos && i < org_col_ids.count(); ++i) { if (domain_column_id == org_col_ids.at(i).col_id_) { pos = i; } } if (OB_SUCC(ret)) { ObSEArray words; ObString orig_string; if (OB_UNLIKELY(pos < 0 || tmp_row.row_val_.count_ <= pos)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "domain index has no domain column", K(domain_column_id), K(org_col_ids), K(ret)); } else if (OB_FALSE_IT(orig_string = tmp_row.row_val_.cells_[pos].get_string())) { } else if (OB_FAIL(split_on(orig_string, ',', words))) { STORAGE_LOG(WARN, "failed to split string", K(orig_string), K(ret)); } else { t3 = ObTimeUtility::current_time(); trans_time += t3 - t2; for (int64_t i = 0; OB_SUCC(ret) && i < words.count(); ++i) { t3_1 = ObTimeUtility::current_time(); tmp_row.row_val_.cells_[pos].set_string(tmp_row.row_val_.cells_[pos].get_type(), words.at(i)); t3_2 = ObTimeUtility::current_time(); trans_time += t3_2 - t3_1; if (OB_FAIL(local_sort->add_item(tmp_row))) { STORAGE_LOG(WARN, "Fail to add item to sort, ", K(ret)); } ++row_count; index_table_size += tmp_row.get_serialize_size(); t4 = ObTimeUtility::current_time(); add_item_time += t4 - t3_2; } // recover original data tmp_row.row_val_.cells_[pos] = orig_obj; } } } } else if (OB_FAIL(reshape_table_rows(&tmp_row.row_val_, row_reshape_ins, org_col_ids.count(), &new_row, 1, sql_mode_for_ddl_reshape))) { STORAGE_LOG(WARN, "failed to reshape rows", K(ret)); } else { tmp_row = new_row; t3 = ObTimeUtility::current_time(); if (OB_FAIL(local_sort->add_item(tmp_row))) { STORAGE_LOG(WARN, "Fail to add item to sort, ", K(ret)); } ++row_count; t4 = ObTimeUtility::current_time(); trans_time += t3 - t2; add_item_time += t4 - t3; index_table_size += tmp_row.get_serialize_size(); } } // calculate main table checksum if (OB_SUCC(ret)) { if (OB_FAIL(checksum.calc_column_checksum(index_param.checksum_method_, &tmp_row, NULL, NULL))) { STORAGE_LOG(WARN, "fail to calc column checksum", K(ret)); } } } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; } free_row_reshape(allocator, row_reshape_ins, 1); STORAGE_LOG(INFO, "print prepare build index cost time", K(get_next_row_time), K(trans_time), K(add_item_time), K(org_col_ids), K(ObArrayWrap(checksum.get_column_checksum(), org_col_ids.count()))); } task_stat.state_ = (OB_SUCCESS == ret) ? ObILongOpsTaskStat::TaskState::SUCCESS : ObILongOpsTaskStat::TaskState::FAIL; if (OB_SUCCESS != (tmp_ret = LONG_OPS_MONITOR_INSTANCE.update_task_stat(key, task_stat))) { STORAGE_LOG(WARN, "fail to update task stat", K(tmp_ret), K(key)); } if (NULL != scan_merge) { scan_merge->~ObMultipleScanMerge(); scan_merge = NULL; } if (NULL != rctx.mem_ctx_) { rctx.mem_ctx_->trans_end(true, 0); rctx.mem_ctx_->trans_clear(); txs_->get_mem_ctx_factory()->free(rctx.mem_ctx_); rctx.mem_ctx_ = NULL; } if (NULL != ctx.mem_ctx_) { ctx.mem_ctx_->trans_end(true, 0); ctx.mem_ctx_->trans_clear(); txs_->get_mem_ctx_factory()->free(ctx.mem_ctx_); ctx.mem_ctx_ = NULL; } sql::ObSQLUtils::destruct_default_expr_context(expr_ctx); if (OB_SUCC(ret)) { const bool is_final_merge = false; int64_t storage_log_seq_num = 0; const int64_t index_macro_cnt = index_table_size / OB_FILE_SYSTEM.get_macro_block_size(); ObCreateIndexSortTaskStat sort_task_stat; sort_task_stat.task_id_ = concurrent_cnt + idx; sort_task_stat.type_ = ObILongOpsTaskStat::TaskType::SORT; sort_task_stat.state_ = ObILongOpsTaskStat::TaskState::RUNNING; sort_task_stat.macro_count_ = index_macro_cnt; sort_task_stat.run_count_ = MAX(1L, static_cast(std::log(index_macro_cnt) / std::log(31))); const_cast(index_context).add_index_macro_cnt(index_macro_cnt); if (OB_SUCCESS != (tmp_ret = LONG_OPS_MONITOR_INSTANCE.update_task_stat(key, sort_task_stat))) { STORAGE_LOG(WARN, "fail to update task stat", K(ret)); } if (OB_FAIL(local_sort->do_sort(is_final_merge))) { STORAGE_LOG(WARN, "Fail to do sort, ", K(ret)); } else if (OB_FAIL( const_cast(index_context) .add_main_table_checksum(checksum.get_column_checksum(), row_count, org_col_ids.count()))) { STORAGE_LOG(WARN, "fail to add main table checksum", K(ret)); } else if (OB_FAIL(SLOGGER.commit(storage_log_seq_num))) { STORAGE_LOG(WARN, "fail to commit transaction", K(ret)); } sort_task_stat.state_ = (OB_SUCCESS == ret) ? ObILongOpsTaskStat::TaskState::SUCCESS : ObILongOpsTaskStat::TaskState::FAIL; if (OB_SUCCESS != (tmp_ret = LONG_OPS_MONITOR_INSTANCE.update_task_stat(key, sort_task_stat))) { STORAGE_LOG(WARN, "fail to update task stat", K(ret)); } } } if (OB_FAIL(ret)) { int tmp_ret = OB_SUCCESS; if (OB_SUCCESS != (tmp_ret = SLOGGER.abort())) { STORAGE_LOG(WARN, "fail to abort transaction", K(tmp_ret)); } } int64_t cost_time = ObTimeUtility::current_time() - start_time; STORAGE_LOG(INFO, "finish local_sort_index_by_range", K(ret), "table_id", index_schema->get_table_id(), K(cost_time), K(idx), K(concurrent_cnt), K(row_count)); return ret; } int ObPartitionStorage::prepare_merge_mv_depend_sstable(const common::ObVersion& frozen_version, const share::schema::ObTableSchema* schema, const share::schema::ObTableSchema* dep_schema, ObTablesHandle& mv_dep_tables_handle) { int ret = OB_SUCCESS; bool table_exist = false; ObPartitionStorage* dep_storage = NULL; ObIPartitionGroupGuard guard; ObPGPartitionGuard pg_partition_guard; const int64_t snapshot_version = ObVersionRange::MAX_VERSION; // TODO: fill it if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); } else if (OB_ISNULL(schema)) { ret = OB_INVALID_ARGUMENT; LOG_ERROR("table schema must not null", K(ret), K(pkey_)); } else if (!schema->is_materialized_view()) { // not mv, do nothing } else if (OB_ISNULL(dep_schema)) { ret = OB_ERR_SYS; LOG_ERROR("for mv merge, dep_schema must not null", K(ret), K(frozen_version), K(*schema)); } else if (dep_schema->get_partition_num() != 1) { ret = OB_NOT_SUPPORTED; LOG_ERROR("partitioned join mv depend table not supported", K(ret), K(*dep_schema)); } else if (OB_FAIL(schema_service_->check_table_exist(schema->get_table_id(), OB_INVALID_VERSION, /*latest local guard*/ table_exist))) { STORAGE_LOG(WARN, "failed to check table exist", K(ret), "table_id", schema->get_table_id(), "schema_version", schema->get_schema_version()); } else if (OB_UNLIKELY(!table_exist)) { LOG_INFO("skip merge not exist table", K(ret), K(pkey_), K(schema->get_table_id())); } else { const uint64_t dep_table_id = dep_schema->get_table_id(); ObPartitionKey rpkey(dep_table_id, 0, dep_schema->get_partition_cnt()); if (OB_FAIL(GCTX.par_ser_->get_partition(rpkey, guard)) || OB_ISNULL(guard.get_partition_group())) { LOG_WARN("get partition failed", K(ret), K(rpkey)); } else if (OB_FAIL(guard.get_partition_group()->get_pg_partition(rpkey, pg_partition_guard)) || OB_ISNULL(pg_partition_guard.get_pg_partition())) { LOG_WARN("get pg partition failed", K(ret), K(rpkey)); } else if (NULL == (dep_storage = static_cast(pg_partition_guard.get_pg_partition()->get_storage()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get partition storage failed", K(ret)); } else if (!ObReplicaTypeCheck::is_readable_replica(dep_storage->get_replica_type())) { ret = OB_NOT_SUPPORTED; LOG_WARN("dependent table of MV is not readable", K(ret), "table id", schema->get_table_id(), "dependent table id", dep_table_id, "dependent table type", dep_storage->get_replica_type()); } else if (OB_FAIL(dep_storage->get_partition_store().get_read_tables( dep_table_id, snapshot_version, mv_dep_tables_handle))) { LOG_WARN("failed to get read tables", K(ret), K(frozen_version), K(dep_table_id)); } else if (mv_dep_tables_handle.get_tables().count() < 1) { ret = OB_ERR_SYS; LOG_ERROR("dep_tables must not zero", K(ret), K(snapshot_version), K(dep_table_id)); } else { ObITable* end_table = mv_dep_tables_handle.get_tables().at(mv_dep_tables_handle.get_tables().count() - 1); if (OB_ISNULL(end_table)) { ret = OB_ERR_SYS; LOG_ERROR("end table must not null", K(ret), K(frozen_version), K(dep_table_id)); } else if (end_table->is_memtable() && static_cast(end_table)->is_active_memtable()) { // TODO: this needs to be modified after daily merge with global version. Including global version is enough. ret = OB_EAGAIN; LOG_WARN("dep table is not frozen, cannot merge", K(ret), K(frozen_version), K(dep_table_id), K(*end_table)); } } } return ret; } int ObPartitionStorage::join_mv_init_merge_param(const share::schema::ObTableSchema& ltable, const share::schema::ObTableSchema& rtable, const ObTableAccessParam& lta_param, const ObTableAccessContext& lta_ctx, ObTableParam& rt_param, ObTableAccessParam& rta_param, ObTableAccessContext& rta_ctx, ObStoreCtx& rctx) { int ret = OB_SUCCESS; if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); } else { rctx.mem_ctx_ = txs_->get_mem_ctx_factory()->alloc(); if (NULL == rctx.mem_ctx_) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("alloc memory failed", K(ret)); } else if (OB_FAIL(rctx.mem_ctx_->trans_begin())) { LOG_WARN("failed to begin transaction", K(ret)); } else if (OB_FAIL(rctx.mem_ctx_->sub_trans_begin( MV_RIGHT_MERGE_READ_SNAPSHOT_VERSION, MV_RIGHT_MERGE_READ_SNAPSHOT_VERSION))) { LOG_WARN("failed to begin sub transaction", K(ret)); } else { ObArray col_ids; for (int64_t i = 0; OB_SUCC(ret) && i < lta_param.out_col_desc_param_.count(); ++i) { if (i >= lta_param.iter_param_.out_cols_project_->count()) { ret = OB_ERR_UNEXPECTED; LOG_WARN( "invalid out col projector idx", K(ret), K(i), "count", lta_param.iter_param_.out_cols_project_->count()); } else { const int64_t idx = lta_param.iter_param_.out_cols_project_->at(i); if (OB_FAIL(col_ids.push_back(lta_param.out_col_desc_param_.get_col_descs().at(idx).col_id_))) { LOG_WARN("array push back failed", K(ret)); } } } if (OB_FAIL(ret)) { } else if (OB_FAIL(rt_param.convert_join_mv_rparam(ltable, rtable, col_ids))) { LOG_WARN("convert to join mv param failed", K(ret)); } else { ObColDescArray tmp_rt_out_cols; ObColDesc desc; FOREACH_CNT_X(c, rt_param.get_columns(), OB_SUCC(ret)) { desc.col_id_ = (*c)->get_column_id(); desc.col_type_ = (*c)->get_meta_type(); desc.col_order_ = (*c)->get_column_order(); if (OB_FAIL(tmp_rt_out_cols.push_back(desc))) { LOG_WARN("array push back failed", K(ret)); } } // TODO hard code, should be passed as parameters ObVersionRange trans_version_range; trans_version_range.base_version_ = 0; trans_version_range.multi_version_start_ = MV_RIGHT_MERGE_READ_SNAPSHOT_VERSION; trans_version_range.snapshot_version_ = MV_RIGHT_MERGE_READ_SNAPSHOT_VERSION; ObPartitionKey pg_key; if (!trans_version_range.is_valid()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("trans version range is not valid", K(ret), K(trans_version_range)); } else if (OB_FAIL(ret)) { } else if (OB_ISNULL(lta_ctx.block_cache_ws_)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected null block cache working set", K(ret)); } else if (OB_FAIL(ObPartitionService::get_instance().get_pg_key(pkey_, pg_key))) { LOG_WARN("failed to get_pg_key", K(ret), K(pkey_)); } else if (OB_FAIL(rctx.init_trans_ctx_mgr(pg_key))) { LOG_WARN("failed to init_trans_ctx_mgr", K(ret), K(pg_key)); } else if (OB_FAIL(rta_ctx.init(lta_ctx.query_flag_, rctx, *lta_ctx.allocator_, *lta_ctx.stmt_allocator_, *lta_ctx.block_cache_ws_, trans_version_range))) { LOG_WARN("init right table access context failed", K(ret)); } else if (OB_FAIL(rta_param.init(rtable.get_table_id(), rtable.get_schema_version(), rtable.get_rowkey_column_num(), tmp_rt_out_cols, false, /*is_multi_version_minor_merge*/ &rt_param, true /* is_mv_right_table */))) { LOG_WARN("init right table access param failed", K(ret)); } } } } if (OB_FAIL(ret) && NULL != rctx.mem_ctx_) { rctx.mem_ctx_->trans_end(true, 0); rctx.mem_ctx_->trans_clear(); txs_->get_mem_ctx_factory()->free(rctx.mem_ctx_); rctx.mem_ctx_ = NULL; } return ret; } int ObPartitionStorage::retire_warmup_store(const bool is_disk_full) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K_(pkey), K(ret)); } else if (OB_FAIL(store_.retire_prewarm_store(is_disk_full))) { STORAGE_LOG(WARN, "fail to retire sorted store", K_(pkey), K(ret)); } return ret; } int ObPartitionStorage::halt_prewarm() { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K_(pkey), K(ret)); } else if (OB_FAIL(store_.halt_prewarm())) { STORAGE_LOG(WARN, "fail to halt prewarm", K_(pkey), K(ret)); } return ret; } int ObPartitionStorage::get_partition_ss_store_info(common::ObArray& partition_ss_store_info_list) { int ret = OB_SUCCESS; ObTablesHandle tables_handle; PartitionSSStoreInfo info; ObArray sstables; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_FAIL(store_.get_all_tables(tables_handle))) { STORAGE_LOG(WARN, "Fail to get all ssstores", K(ret)); } else if (OB_FAIL(tables_handle.get_all_sstables(sstables))) { STORAGE_LOG(WARN, "failed to get all sstables", K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < sstables.count(); ++i) { ObSSTable* sstable = sstables.at(i); info.reset(); if (OB_ISNULL(sstable)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "The base ssstore is NULL", K(ret)); } else if (sstable->is_major_sstable()) { const ObSSTableMergeInfo& merge_info = sstable->get_sstable_merge_info(); info.version_ = sstable->get_version(); info.macro_block_count_ = sstable->get_meta().get_total_macro_block_count(); info.use_old_macro_block_count_ = sstable->get_meta().get_total_use_old_macro_block_count(); info.rewrite_macro_old_micro_block_count_ = merge_info.rewrite_macro_old_micro_block_count_; info.rewrite_macro_total_micro_block_count_ = merge_info.rewrite_macro_total_micro_block_count_; info.is_merged_ = true; info.is_modified_ = info.macro_block_count_ != info.use_old_macro_block_count_; if (OB_FAIL(partition_ss_store_info_list.push_back(info))) { STORAGE_LOG(WARN, "Fail to add info", K(ret)); } } } } return ret; } /* int ObPartitionStorage::get_sstore_min_version(common::ObVersion &min_version) { int ret = OB_SUCCESS; ObSSStore *store = NULL; ObTablesHandle tables_handle; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (OB_FAIL(get_ssstore_max_version(min_version))) { STORAGE_LOG(WARN, "Fail to get max ssstore version", K(ret)); } else if (OB_FAIL(stores_.get_all_ssstores(stores_handle))) { STORAGE_LOG(WARN, "Fail to get all ssstores", K(ret)); } else { const ObIArray &ssstores = stores_handle.get_stores(); for (int64_t i = 0; OB_SUCC(ret) && i < ssstores.count(); ++i) { if (is_major_ssstore(ssstores.at(i)->get_store_type())) { store = static_cast(ssstores.at(i)); } if (NULL == store) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "The base ssstore is NULL", K(ret)); } else if (min_version > store->get_version()){ min_version = store->get_version(); } else {} } } return ret; } */ int ObPartitionStorage::do_warm_up_request(const ObIWarmUpRequest* request) { int ret = OB_SUCCESS; ObTablesHandle tables_handle; const int64_t snapshot_version = WARM_UP_READ_SNAPSHOT_VERSION; EVENT_INC(WARM_UP_REQUEST_COUNT); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (NULL == request) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid args", K(ret), KP(request)); } else if (!ObReplicaTypeCheck::is_replica_with_ssstore(store_.get_replica_type())) { // without ssstore, skip warm up } else if (OB_FAIL(store_.get_read_tables(request->get_index_id(), snapshot_version, tables_handle))) { STORAGE_LOG(WARN, "Fail to get read stores, ", K(ret)); } else if (OB_FAIL(request->warm_up(txs_->get_mem_ctx_factory(), tables_handle.get_tables()))) { if (OB_ITER_END != ret) { STORAGE_LOG(WARN, "Fail to warm up request, ", K_(pkey), K(ret)); } } else { if (REACH_TIME_INTERVAL(1000 * 1000)) { STORAGE_LOG(INFO, "Success to do warm up request, ", K_(pkey)); } } return ret; } int ObPartitionStorage::ObDMLRunningCtx::init(const ObIArray* column_ids, const ObIArray* upd_col_ids, const bool use_table_param, ObMultiVersionSchemaService* schema_service, ObPartitionStore& store) { int ret = OB_SUCCESS; if (IS_INIT) { ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret)); } else if (OB_ISNULL(schema_service) || OB_UNLIKELY(!store_ctx_.is_valid()) || OB_UNLIKELY(!dml_param_.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), KP(store_ctx_.mem_ctx_), K(dml_param_), KP(schema_service)); } else if (use_table_param) { relative_tables_.set_table_param(dml_param_.table_param_); } const ObPartitionKey& pkey = store_ctx_.cur_pkey_; if (OB_FAIL(ret)) { } else if (OB_FAIL(relative_tables_.prepare_tables(store_ctx_.cur_pkey_.get_table_id(), dml_param_.schema_version_, store_ctx_.mem_ctx_->get_read_snapshot(), dml_param_.tenant_schema_version_, upd_col_ids, schema_service, store))) { STORAGE_LOG(WARN, "failed to get relative table", K(ret)); } else if (OB_UNLIKELY(!pkey.is_pg() && (extract_pure_id(pkey.get_table_id()) == OB_ALL_TABLE_HISTORY_TID || extract_pure_id(pkey.get_table_id()) == OB_ALL_TABLE_V2_HISTORY_TID || extract_pure_id(pkey.get_table_id()) == OB_ALL_BACKUP_PIECE_FILES_TID) && relative_tables_.get_index_tables_buf_count() != 1)) { // __all_table_history and __all_backup_piece_files should always have one index ret = OB_SCHEMA_EAGAIN; STORAGE_LOG(WARN, "__all_table_history should have one index table", K(ret), "cnt", relative_tables_.get_index_tables_buf_count()); } else if ((relative_tables_.idx_cnt_ > 0 || T_DML_UPDATE == dml_type_) && OB_FAIL(prepare_index_row())) { STORAGE_LOG(WARN, "failed to malloc work members", K(ret)); } else if (NULL != column_ids && OB_FAIL(prepare_column_info(*column_ids))) { STORAGE_LOG(WARN, "fail to get column descriptions and column map", K(ret), K(*column_ids)); } else { store_ctx_.mem_ctx_->set_table_version(dml_param_.schema_version_); store_ctx_.mem_ctx_->set_abs_expired_time(dml_param_.timeout_); store_ctx_.mem_ctx_->set_abs_lock_wait_timeout(dml_param_.timeout_); column_ids_ = column_ids; is_inited_ = true; } if (IS_NOT_INIT) { // DO NOT change the order, relative_tables_ should reset after free_work_members free_work_members(); relative_tables_.reset(); } return ret; } void ObPartitionStorage::ObDMLRunningCtx::free_work_members() { if (nullptr != idx_row_) { allocator_.free(idx_row_); idx_row_ = nullptr; } if (nullptr != col_descs_ && !relative_tables_.data_table_.use_schema_param()) { ObColDescIArray* tmp_col = const_cast(col_descs_); tmp_col->reset(); allocator_.free(tmp_col); col_descs_ = nullptr; } if (nullptr != col_map_ && !relative_tables_.data_table_.use_schema_param()) { ColumnMap* tmp_map = const_cast(col_map_); tmp_map->clear(); allocator_.free(tmp_map); col_map_ = nullptr; } } int ObPartitionStorage::do_rowkey_exists(const ObStoreCtx& store_ctx, const ObIArray& read_tables, const int64_t table_id, const ObStoreRowkey& rowkey, const ObQueryFlag& query_flag, const ObColDescIArray& col_descs, bool& exists) { int ret = OB_SUCCESS; if (!store_ctx.is_valid() || read_tables.count() <= 0 || OB_INVALID_ID == table_id || !rowkey.is_valid() || col_descs.count() <= 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", KP(store_ctx.mem_ctx_), K(read_tables.count()), K(table_id), K(rowkey), K(query_flag), K(col_descs), K(ret)); } else { bool found = false; for (int64_t i = read_tables.count() - 1; (!found) && OB_SUCC(ret) && i >= 0; --i) { ObITable* table = read_tables.at(i); if (OB_ISNULL(table)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "unknown store", K(table), K(ret)); } else if (OB_FAIL(table->exist(store_ctx, table_id, rowkey, col_descs, exists, found))) { STORAGE_LOG(WARN, "Fail to check if exist in store, ", K(table_id), K(i), K(ret)); } else { STORAGE_LOG(DEBUG, "rowkey_exists check::", K(i), K(*table), K(table_id), K(rowkey), K(exists), K(found)); } } if (OB_SUCCESS == ret && false == found) { exists = false; } } return ret; } int ObPartitionStorage::rowkey_exists(ObRelativeTable& relative_table, const ObStoreCtx& store_ctx, const ObColDescIArray& col_descs, const common::ObNewRow& row, bool& exists) { int ret = OB_SUCCESS; ObTablesHandle& tables_handle = relative_table.tables_handle_; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!store_ctx.is_valid() || col_descs.count() <= 0 || !row.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", KP(store_ctx.mem_ctx_), K(col_descs), K(row), K(ret)); } else { { ObStorageWriterGuard guard(store_, store_ctx, false); if (OB_FAIL(guard.refresh_and_protect_table(relative_table))) { STORAGE_LOG(WARN, "fail to protect table", K(ret), K(pkey_)); } } if (OB_SUCC(ret)) { ObStoreRowkey rowkey(row.cells_, relative_table.get_rowkey_column_num()); bool read_latest = true; ObQueryFlag flag; flag.read_latest_ = read_latest & ObQueryFlag::OBSF_MASK_READ_LATEST; if (relative_table.is_storage_index_table()) { flag.index_invalid_ = !relative_table.can_read_index(); } ret = do_rowkey_exists( store_ctx, tables_handle.get_tables(), relative_table.get_table_id(), rowkey, flag, col_descs, exists); } } return ret; } int ObPartitionStorage::do_rowkeys_prefix_exist( const ObIArray& read_stores, ObRowsInfo& rows_info, bool& may_exist) { int ret = OB_SUCCESS; if (!rows_info.is_valid() || read_stores.count() <= 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(rows_info), K(read_stores.count()), K(ret)); } else { may_exist = false; for (int64_t i = read_stores.count() - 1; !may_exist && OB_SUCC(ret) && i >= 0; --i) { ObITable* store = read_stores.at(i); if (OB_ISNULL(store)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "unknown store", K(store), K(ret)); } else if (OB_FAIL(store->prefix_exist(rows_info, may_exist))) { STORAGE_LOG(WARN, "Fail to check if exist in store, ", K(rows_info), K(i), K(ret)); } else { STORAGE_LOG(DEBUG, "rowkey prefix exists check", K(i), K(rows_info.ext_rowkeys_), K(may_exist), K(rows_info.get_max_prefix_length())); } } } return ret; } int ObPartitionStorage::do_rowkeys_exists(const ObIArray& read_stores, ObRowsInfo& rows_info, bool& exists) { int ret = OB_SUCCESS; if (!rows_info.is_valid() || read_stores.count() <= 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(rows_info), K(read_stores.count()), K(ret)); } else { bool all_rows_found = false; for (int64_t i = read_stores.count() - 1; !exists && !all_rows_found && OB_SUCC(ret) && i >= 0; --i) { ObITable* store = read_stores.at(i); if (OB_ISNULL(store)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "unknown store", K(store), K(ret)); } else if (OB_FAIL(store->exist(rows_info, exists, all_rows_found))) { STORAGE_LOG(WARN, "Fail to check if exist in store, ", K(rows_info), K(i), K(ret)); } else { STORAGE_LOG(DEBUG, "rowkey_exists check::", K(i), K(rows_info.ext_rowkeys_), K(exists), K(all_rows_found)); } } } return ret; } int ObPartitionStorage::rowkeys_exists( const ObStoreCtx& store_ctx, ObRelativeTable& relative_table, ObRowsInfo& rows_info, bool& exists) { int ret = OB_SUCCESS; ObTablesHandle& tables_handle = relative_table.tables_handle_; bool may_exist = true; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!rows_info.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), K(rows_info)); } else { { ObStorageWriterGuard guard(store_, store_ctx, false); if (OB_FAIL(guard.refresh_and_protect_table(relative_table))) { STORAGE_LOG(WARN, "fail to protect table", K(ret), K(pkey_)); } } if (OB_SUCC(ret) && rows_info.get_max_prefix_length() > 0) { if (OB_FAIL(do_rowkeys_prefix_exist(tables_handle.get_tables(), rows_info, may_exist))) { STORAGE_LOG(WARN, "Failed to do prefix rowkey exist", K(ret)); } } STORAGE_LOG(DEBUG, "rows info", K(rows_info), K(may_exist)); if (OB_SUCC(ret) && may_exist) { ret = do_rowkeys_exists(tables_handle.get_tables(), rows_info, exists); } } return ret; } int ObPartitionStorage::write_row(ObRelativeTable& relative_table, const ObStoreCtx& store_ctx, const int64_t rowkey_len, const common::ObIArray& col_descs, const storage::ObStoreRow& row) { int ret = OB_SUCCESS; { ObStorageWriterGuard guard(store_, store_ctx, true); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!store_ctx.is_valid() || col_descs.count() <= 0 || rowkey_len <= 0 || !row.is_valid() || !relative_table.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", KP(store_ctx.mem_ctx_), K(relative_table), K(rowkey_len), K(col_descs), K(row), K(ret)); } else if (OB_FAIL(guard.refresh_and_protect_table(relative_table))) { STORAGE_LOG(WARN, "fail to protect table", K(ret), K(pkey_)); } else { ObMemtable* write_memtable = NULL; const uint64_t table_id = relative_table.get_table_id(); store_ctx.tables_ = &relative_table.tables_handle_.get_tables(); if (OB_FAIL(relative_table.tables_handle_.get_last_memtable(write_memtable))) { STORAGE_LOG(WARN, "failed to get_last_memtable", K(ret)); } else if (OB_FAIL(write_memtable->set(store_ctx, table_id, rowkey_len, col_descs, row))) { STORAGE_LOG(WARN, "failed to set memtable", K(ret)); } } } if (OB_SUCC(ret)) { int tmp_ret = OB_SUCCESS; ObMemtableCtx* mt_ctx = static_cast(store_ctx.mem_ctx_); transaction::ObPartTransCtx* part_ctx = static_cast(mt_ctx->get_trans_ctx()); if (OB_UNLIKELY(OB_SUCCESS != (tmp_ret = part_ctx->submit_log_if_neccessary()))) { TRANS_LOG(INFO, "submit log if neccesary failed", K(tmp_ret), K(store_ctx), K(relative_table)); } } return ret; } int ObPartitionStorage::write_row(ObRelativeTable& relative_table, const storage::ObStoreCtx& store_ctx, const int64_t rowkey_len, const common::ObIArray& col_descs, const ObIArray& update_idx, const storage::ObStoreRow& old_row, const storage::ObStoreRow& new_row) { int ret = OB_SUCCESS; { ObStorageWriterGuard guard(store_, store_ctx, true); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!store_ctx.is_valid() || col_descs.count() <= 0 || rowkey_len <= 0 || !old_row.is_valid() || !new_row.is_valid() || !relative_table.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", KP(store_ctx.mem_ctx_), K(relative_table), K(rowkey_len), K(col_descs), K(update_idx), K(old_row), K(new_row), K(ret)); } else if (OB_FAIL(guard.refresh_and_protect_table(relative_table))) { STORAGE_LOG(WARN, "fail to protect table", K(ret), K(pkey_)); } else { ObMemtable* write_memtable = NULL; const uint64_t table_id = relative_table.get_table_id(); store_ctx.tables_ = &(relative_table.tables_handle_.get_tables()); if (OB_FAIL(relative_table.tables_handle_.get_last_memtable(write_memtable))) { STORAGE_LOG(WARN, "failed to get write memtable", K(ret)); } else if (OB_FAIL( write_memtable->set(store_ctx, table_id, rowkey_len, col_descs, update_idx, old_row, new_row))) { STORAGE_LOG(WARN, "failed to set write memtable", K(ret)); } } } if (OB_SUCC(ret)) { int tmp_ret = OB_SUCCESS; ObMemtableCtx* mt_ctx = static_cast(store_ctx.mem_ctx_); transaction::ObPartTransCtx* part_ctx = static_cast(mt_ctx->get_trans_ctx()); if (OB_UNLIKELY(OB_SUCCESS != (tmp_ret = part_ctx->submit_log_if_neccessary()))) { TRANS_LOG(INFO, "submit log if neccesary failed", K(tmp_ret), K(store_ctx), K(relative_table)); } } return ret; } int ObPartitionStorage::lock_row(ObRelativeTable& relative_table, const storage::ObStoreCtx& store_ctx, const common::ObIArray& col_descs, const common::ObNewRow& row, const ObSQLMode sql_mode, ObIAllocator& allocator, RowReshape*& row_reshape_ins) { int ret = OB_SUCCESS; ObStorageWriterGuard guard(store_, store_ctx, true); bool need_reshape_row; ObStoreRow lock_row; RowReshape* reshape_ptr = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!relative_table.is_valid() || !store_ctx.is_valid() || col_descs.count() <= 0 || !row.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), K(relative_table), KP(store_ctx.mem_ctx_), K(col_descs), K(row)); } else if (OB_FAIL(guard.refresh_and_protect_table(relative_table))) { STORAGE_LOG(WARN, "fail to protect table", K(ret), K(pkey_)); } else if (OB_FAIL(need_reshape_table_row(row, col_descs.count(), sql_mode, need_reshape_row))) { LOG_WARN("fail to check need reshape row", K(ret), K(row), K(sql_mode)); } else if (need_reshape_row && nullptr == row_reshape_ins) { if (OB_FAIL(malloc_rows_reshape(allocator, col_descs, 1, relative_table, reshape_ptr))) { LOG_WARN("fail to malloc reshape", K(ret), K(col_descs), K(sql_mode)); } else { row_reshape_ins = reshape_ptr; } } if (OB_SUCC(ret)) { if (OB_FAIL(reshape_row(row, col_descs.count(), row_reshape_ins, lock_row, need_reshape_row, sql_mode))) { LOG_WARN("failed to reshape row", K(ret), K(row), K(need_reshape_row), K(sql_mode)); } else if (GCONF.enable_defensive_check() && OB_FAIL(check_new_row_nullable_value(col_descs, relative_table, lock_row.row_val_))) { LOG_WARN("check lock row nullable failed", K(ret)); } else { memtable::ObMemtable* write_memtable = NULL; const uint64_t table_id = relative_table.get_table_id(); store_ctx.tables_ = &(relative_table.tables_handle_.get_tables()); if (OB_FAIL(relative_table.tables_handle_.get_last_memtable(write_memtable))) { STORAGE_LOG(WARN, "failed to get_last_memtable", K(ret)); } else if (OB_ISNULL(write_memtable)) { ret = OB_ERR_SYS; STORAGE_LOG(ERROR, "write_memtable must not null", K(ret)); } else if (OB_FAIL(write_memtable->lock(store_ctx, table_id, col_descs, lock_row.row_val_))) { STORAGE_LOG(WARN, "failed to lock write_memtable", K(ret), K(col_descs), K(lock_row)); } } } return ret; } int ObPartitionStorage::lock_row(ObRelativeTable& relative_table, const storage::ObStoreCtx& store_ctx, const common::ObIArray& col_descs, const common::ObStoreRowkey& rowkey) { int ret = OB_SUCCESS; ObStorageWriterGuard guard(store_, store_ctx, true); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!relative_table.is_valid() || !store_ctx.is_valid() || col_descs.count() <= 0 || !rowkey.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), K(relative_table), KP(store_ctx.mem_ctx_), K(col_descs), K(rowkey)); } else if (OB_FAIL(guard.refresh_and_protect_table(relative_table))) { STORAGE_LOG(WARN, "fail to protect table", K(ret), K(pkey_)); } else { memtable::ObMemtable* write_memtable = NULL; const uint64_t table_id = relative_table.get_table_id(); store_ctx.tables_ = &(relative_table.tables_handle_.get_tables()); if (OB_FAIL(relative_table.tables_handle_.get_last_memtable(write_memtable))) { STORAGE_LOG(WARN, "failed to get_last_memtable", K(ret)); } else if (OB_ISNULL(write_memtable)) { ret = OB_ERR_SYS; STORAGE_LOG(ERROR, "write_memtable must not null", K(ret)); } else if (OB_FAIL(write_memtable->lock(store_ctx, table_id, col_descs, rowkey))) { STORAGE_LOG(WARN, "failed to lock write memtable", K(ret), K(table_id), K(col_descs), K(rowkey)); } } return ret; } int ObPartitionStorage::check_row_locked_by_myself(ObRelativeTable& relative_table, const ObStoreCtx& store_ctx, const ObIArray& col_descs, const ObStoreRowkey& rowkey, bool& locked) { int ret = OB_SUCCESS; ObStorageWriterGuard guard(store_, store_ctx, true); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret)); } else if (!relative_table.is_valid() || !store_ctx.is_valid() || col_descs.count() <= 0 || !rowkey.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), K(relative_table), KP(store_ctx.mem_ctx_), K(col_descs), K(rowkey)); } else if (OB_FAIL(guard.refresh_and_protect_table(relative_table))) { STORAGE_LOG(WARN, "fail to protect table", K(ret), K(pkey_)); } else { memtable::ObMemtable* write_memtable = NULL; const uint64_t table_id = relative_table.get_table_id(); store_ctx.tables_ = &(relative_table.tables_handle_.get_tables()); if (OB_FAIL(relative_table.tables_handle_.get_last_memtable(write_memtable))) { STORAGE_LOG(WARN, "failed to get_last_memtable", K(ret)); } else if (OB_ISNULL(write_memtable)) { ret = OB_ERR_SYS; STORAGE_LOG(ERROR, "write_memtable must not null", K(ret)); } else if (OB_FAIL(write_memtable->check_row_locked_by_myself(store_ctx, table_id, col_descs, rowkey, locked))) { STORAGE_LOG(WARN, "failed to lock write memtable", K(ret), K(table_id), K(col_descs), K(rowkey)); } } return ret; } bool ObPartitionStorage::is_expired_version( const common::ObVersion& base_version, const common::ObVersion& version, const int64_t max_kept_number) { return version.major_ > 0 && version.major_ < (base_version.major_ - max(0, max_kept_number - 1)); } int ObPartitionStorage::get_merge_level( const int64_t merge_version, const ObSSTableMergeCtx& ctx, ObMergeLevel& merge_level) { int ret = OB_SUCCESS; int64_t storage_format_work_version = 0; int64_t storage_format_version = 0; if (OB_FAIL(ctx.get_storage_format_version(storage_format_version))) { LOG_WARN("fail to get storage format version", K(ret)); } else if (OB_FAIL(ctx.get_storage_format_work_version(storage_format_work_version))) { LOG_WARN("fail to get storage format work version", K(ret)); } else if (OB_FAIL(get_merge_opt(merge_version, storage_format_version, storage_format_work_version, OB_STORAGE_FORMAT_VERSION_V1, MACRO_BLOCK_MERGE_LEVEL, MICRO_BLOCK_MERGE_LEVEL, merge_level))) { LOG_WARN("fail to get merge option", K(ret)); } return ret; } int ObPartitionStorage::get_store_column_checksum_in_micro( const int64_t merge_version, const ObSSTableMergeCtx& ctx, bool& store_column_checksum) { int ret = OB_SUCCESS; int64_t storage_format_work_version = 0; int64_t storage_format_version = 0; if (OB_FAIL(ctx.get_storage_format_version(storage_format_version))) { LOG_WARN("fail to get storage format version", K(ret)); } else if (OB_FAIL(ctx.get_storage_format_work_version(storage_format_work_version))) { LOG_WARN("fail to get storage format work version", K(ret)); } else if (OB_FAIL(get_merge_opt(merge_version, storage_format_version, storage_format_work_version, OB_STORAGE_FORMAT_VERSION_V3, false, true, store_column_checksum))) { LOG_WARN("fail to get merge option", K(ret)); } return ret; } int ObPartitionStorage::deep_copy_build_index_schemas(const ObTableSchema* data_table_schema, const ObTableSchema* index_schema, const ObTableSchema* dep_table_schema, ObBuildIndexParam& param) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "ObPartitionStorage has not been inited", K(ret)); } else if (OB_ISNULL(data_table_schema) || OB_ISNULL(index_schema)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), KP(data_table_schema), KP(index_schema)); } else { ObIAllocator& allocator = param.allocator_; const int64_t table_schema_cnt = NULL == dep_table_schema ? 2 : 3; const int64_t alloc_size = table_schema_cnt * sizeof(ObTableSchema); char* buf = NULL; if (OB_ISNULL(buf = static_cast(allocator.alloc(alloc_size)))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(WARN, "fail to allocate memory for deep copy schema", K(ret)); } else { ObTableSchema* deep_copy_data_table_schema = NULL; ObTableSchema* deep_copy_index_schema = NULL; ObTableSchema* deep_copy_dep_table_schema = NULL; deep_copy_data_table_schema = new (buf) ObTableSchema(&allocator); buf += sizeof(ObTableSchema); deep_copy_index_schema = new (buf) ObTableSchema(&allocator); buf += sizeof(ObTableSchema); if (NULL != deep_copy_dep_table_schema) { deep_copy_dep_table_schema = new (buf) ObTableSchema(&allocator); } if (OB_FAIL(deep_copy_data_table_schema->assign(*data_table_schema))) { STORAGE_LOG(WARN, "fail to assign table schema", K(ret)); } else if (OB_FAIL(deep_copy_index_schema->assign(*index_schema))) { STORAGE_LOG(WARN, "fail to assign index schema", K(ret)); } else if (NULL != dep_table_schema && OB_FAIL(deep_copy_dep_table_schema->assign(*dep_table_schema))) { STORAGE_LOG(WARN, "fail to assign dep table schema", K(ret)); } else { param.table_schema_ = deep_copy_data_table_schema; param.index_schema_ = deep_copy_index_schema; param.dep_table_schema_ = deep_copy_dep_table_schema; } } } return ret; } // schema_version maybe tenant level // but it doesn't support index building of tenant level system table // (index of tenant level virtual table works immediatelly) int ObPartitionStorage::get_build_index_param( const uint64_t index_id, const int64_t schema_version, ObIPartitionReport* report, ObBuildIndexParam& param) { int ret = OB_SUCCESS; int64_t concurrent_cnt = 0; const uint64_t tenant_id = is_inner_table(index_id) ? OB_SYS_TENANT_ID : extract_tenant_id(index_id); const bool check_formal = extract_pure_id(index_id) > OB_MAX_CORE_TABLE_ID; // to avoid circular dependency if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "ObPartitionStorage has not been inited", K(ret)); } else if (OB_INVALID_ID == index_id || OB_INVALID_VERSION == schema_version || NULL == report) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(index_id), K(schema_version), KP(report)); } else { ObSchemaGetterGuard schema_guard; const ObTenantSchema* tenant_schema = NULL; const ObTableSchema* data_table_schema = NULL; const ObTableSchema* index_schema = NULL; const ObTableSchema* dep_table = NULL; if (OB_FAIL( schema_service_->get_tenant_schema_guard(tenant_id, schema_guard, schema_version, OB_INVALID_VERSION))) { STORAGE_LOG(WARN, "fail to get schema guard", K(ret), K(schema_version)); } else if (check_formal && OB_FAIL(schema_guard.check_formal_guard())) { LOG_WARN("schema_guard is not formal", K(ret), K(tenant_id)); } else if (OB_FAIL(schema_guard.get_tenant_info(extract_tenant_id(index_id), tenant_schema))) { STORAGE_LOG(WARN, "fail to get tenant info", K(ret), K(index_id)); } else if (OB_ISNULL(tenant_schema)) { ret = OB_SCHEMA_ERROR; STORAGE_LOG(WARN, "tenant not exist", K(ret)); } else if (OB_FAIL(schema_guard.get_table_schema(index_id, index_schema))) { STORAGE_LOG(WARN, "fail to get table schema", K(ret), K(index_id)); } else if (OB_ISNULL(index_schema)) { ret = OB_SCHEMA_ERROR; STORAGE_LOG(WARN, "index schema not exist", K(ret), K(index_id)); } else if (OB_FAIL(schema_guard.get_table_schema(index_schema->get_data_table_id(), data_table_schema))) { STORAGE_LOG( WARN, "fail to get table schema", K(ret), K(index_id), "data_table_id", index_schema->get_data_table_id()); } else if (OB_ISNULL(data_table_schema)) { ret = OB_SCHEMA_ERROR; STORAGE_LOG(WARN, "data table not exist", K(ret), "data_table_id", index_schema->get_data_table_id()); } else if (OB_FAIL(get_depend_table_schema(index_schema, schema_guard, dep_table))) { STORAGE_LOG(WARN, "fail to get depend table schema", K(ret)); } else if (OB_FAIL(deep_copy_build_index_schemas(data_table_schema, index_schema, dep_table, param))) { STORAGE_LOG(WARN, "fail to deep copy build index schemas", K(ret)); } else if (OB_FAIL(get_concurrent_cnt(data_table_schema->get_table_id(), schema_version, concurrent_cnt))) { STORAGE_LOG(WARN, "fail to get concurrent cnt", K(ret)); } else { param.schema_cnt_ = data_table_schema->get_index_tid_count() + 1; param.schema_version_ = schema_version; param.report_ = report; // control currency level, to finish merge in the last round param.concurrent_cnt_ = MIN(concurrent_cnt, param.DEFAULT_INDEX_SORT_MEMORY_LIMIT / ObExternalSortConstant::DEFAULT_FILE_READ_WRITE_BUFFER / 2); param.report_ = report; if (OB_FAIL(get_build_index_stores(*tenant_schema, param))) { STORAGE_LOG(WARN, "fail to get build index stores", K(ret)); } } } return ret; } int ObPartitionStorage::check_table_continuity(const uint64_t table_id, const int64_t version, bool& is_continual) { int ret = OB_SUCCESS; ObTablesHandle tables_handle; bool tmp_continual = false; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "not inited", K(ret)); } else if (OB_INVALID_ID == table_id || 0 >= version) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid args", K(ret), K(table_id), K(version)); } else if (OB_FAIL(store_.get_read_tables(table_id, version, tables_handle))) { STORAGE_LOG(WARN, "failed to get read stores", K(ret), K(table_id), K(version)); } else if (0 == tables_handle.get_count()) { tmp_continual = false; } else { tmp_continual = true; int64_t last_version = version; for (int64_t i = tables_handle.get_count() - 1; tmp_continual && i >= 0; i--) { ObITable* table = tables_handle.get_table(i); if (table->get_snapshot_version() != last_version) { tmp_continual = false; } else { last_version = table->get_base_version(); } } } if (OB_SUCCESS == ret) { is_continual = tmp_continual; } return ret; } void ObPartitionStorage::set_merge_status(bool merge_success) { ATOMIC_STORE(&merge_successed_, merge_success); ATOMIC_STORE(&merge_timestamp_, ObTimeUtility::current_time()); if (merge_success) { ATOMIC_STORE(&merge_failed_cnt_, 0); } else { ATOMIC_AAF(&merge_failed_cnt_, 1); } } bool ObPartitionStorage::can_schedule_merge() { int bool_ret = true; if (!merge_successed_ && (ObTimeUtility::current_time() < (merge_timestamp_ + merge_failed_cnt_ * DELAY_SCHEDULE_TIME_UNIT))) { bool_ret = false; } return bool_ret; } int ObPartitionStorage::append_local_sort_data(const share::ObBuildIndexAppendLocalDataParam& param, const common::ObPGKey& pg_key, const blocksstable::ObStorageFileHandle& file_handle, ObNewRowIterator& iter) { int ret = OB_SUCCESS; ObFreezeInfoProxy freeze_info_proxy; int64_t snapshot_version = 0; ObSchemaGetterGuard schema_guard; const ObTenantSchema* tenant_schema = NULL; const ObTableSchema* index_schema = NULL; const ObTableSchema* data_table_schema = NULL; ObColumnChecksumCalculator checksum; ObDataStoreDesc data_desc; SMART_VAR(ObMacroBlockWriter, writer) { const uint64_t execution_id = param.execution_id_; const uint64_t task_id = param.task_id_; const uint64_t index_table_id = param.index_id_; const int64_t schema_version = param.schema_version_; const bool is_major_merge = true; ObTablesHandle table_handle; ObIntermMacroKey key; ObIntermMacroHandle handle; ObSimpleFrozenStatus frozen_status; bool need_append = false; bool has_major = false; int64_t checksum_method = 0; key.execution_id_ = execution_id; key.task_id_ = task_id; const uint64_t tenant_id = is_inner_table(index_table_id) ? OB_SYS_TENANT_ID : extract_tenant_id(index_table_id); const bool check_formal = extract_pure_id(index_table_id) > OB_MAX_CORE_TABLE_ID; // to avoid circular dependency if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "ObPartitionStorage has not been inited", K(ret)); } else if (OB_UNLIKELY(!param.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), K(param)); } else if (OB_FAIL(store_.has_major_sstable(index_table_id, has_major))) { STORAGE_LOG(WARN, "fail to check has major sstable", K(ret)); } else { need_append = !has_major; } if (OB_SUCC(ret) && need_append) { if (OB_FAIL(INTERM_MACRO_MGR.get(key, handle))) { if (OB_ENTRY_NOT_EXIST != ret) { STORAGE_LOG(WARN, "fail to get interm macro value", K(ret)); } else { ret = OB_SUCCESS; need_append = true; } } else { need_append = false; } } STORAGE_LOG(INFO, "begin append local sort data", K(ret), K(need_append), K(has_major)); if (OB_SUCC(ret) && need_append) { if (OB_FAIL( schema_service_->get_tenant_schema_guard(tenant_id, schema_guard, schema_version, OB_INVALID_VERSION))) { STORAGE_LOG(WARN, "fail to get schema guard", K(ret), K(pkey_), K(index_table_id), K(schema_version)); } else if (check_formal && OB_FAIL(schema_guard.check_formal_guard())) { LOG_WARN("schema_guard is not formal", K(ret), K(tenant_id)); } else if (OB_FAIL(schema_guard.get_table_schema(index_table_id, index_schema))) { STORAGE_LOG(WARN, "fail to get table schema", K(ret), K(index_table_id)); } else if (OB_ISNULL(index_schema)) { ret = OB_SCHEMA_ERROR; STORAGE_LOG(WARN, "schema error, index schema not exist", K(ret), K(index_table_id), K(schema_version)); } else if (OB_FAIL(schema_guard.get_table_schema(index_schema->get_data_table_id(), data_table_schema))) { STORAGE_LOG(WARN, "fail to get table schema", K(ret), "data_table_id", index_schema->get_data_table_id()); } else if (OB_ISNULL(data_table_schema)) { ret = OB_SCHEMA_ERROR; STORAGE_LOG( WARN, "data table schema must not be NULL", K(ret), "data_table_id", index_schema->get_data_table_id()); } else if (OB_FAIL(ObIndexBuildStatOperator::get_snapshot_version( index_schema->get_data_table_id(), index_table_id, *GCTX.sql_proxy_, snapshot_version))) { STORAGE_LOG(WARN, "fail to get snapshot version", K(ret), K(index_table_id)); } else if (snapshot_version <= 0) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "error unexpected, snapshot version is invalid", K(ret), K(snapshot_version)); } else if (OB_FAIL( freeze_info_proxy.get_frozen_info_less_than(*GCTX.sql_proxy_, snapshot_version, frozen_status))) { if (OB_ENTRY_NOT_EXIST != ret) { STORAGE_LOG(WARN, "fail to get next major freeze", K(ret), K(pkey_)); } else { frozen_status.frozen_version_ = 1L; ret = OB_SUCCESS; } } else if (OB_FAIL(ObIndexChecksumOperator::get_checksum_method( param.execution_id_, index_schema->get_data_table_id(), checksum_method, *GCTX.sql_proxy_))) { STORAGE_LOG(WARN, "fail to get checksum method", K(ret)); } if (OB_FAIL(ret)) { } else if (OB_FAIL(schema_guard.get_tenant_info(pkey_.get_tenant_id(), tenant_schema))) { STORAGE_LOG(WARN, "fail to get tenant schema", K(ret), K(pkey_), "tenant_id", pkey_.get_tenant_id()); } else if (OB_ISNULL(tenant_schema)) { ret = OB_SCHEMA_ERROR; STORAGE_LOG(WARN, "tenant schema not exist", K(ret), K(pkey_), "tenant_id", pkey_.get_tenant_id()); } else if (OB_FAIL(checksum.init(index_schema->get_column_count()))) { STORAGE_LOG(WARN, "fail to init checksum", K(ret)); } else if (OB_FAIL(data_desc.init(*index_schema, frozen_status.frozen_version_, nullptr, pkey_.get_partition_id(), is_major_merge ? storage::MAJOR_MERGE : storage::MINOR_MERGE, blocksstable::CCM_VALUE_ONLY == checksum_method /*calc column checksum*/, true /*store column checksum in micro block*/, pg_key, file_handle))) { STORAGE_LOG(WARN, "Fail to init data store desc, ", K(ret)); } else if (OB_FAIL(writer.open(data_desc, ObMacroDataSeq((INT64_MAX / param.task_cnt_) * param.task_id_)))) { STORAGE_LOG(WARN, "Fail to open macro block writer, ", K(ret)); } else { ObStoreRow row; ObNewRow *row_val = NULL; ObArenaAllocator allocator(lib::ObLabel("PartStorageTmp")); ObColDescArray col_descs; ObTableSchemaParam schema_param(allocator); ObRelativeTable relative_table; ObSQLMode sql_mode_for_ddl_reshape = SMO_TRADITIONAL; // see local_sort_index_by_range ObFixedArray column_ids(allocator, data_desc.row_column_count_); RowReshape *row_reshape_ins = nullptr; row.flag_ = ObActionFlag::OP_ROW_EXIST; for (int64_t i = 0; OB_SUCC(ret) && i < data_desc.row_column_count_; i++) { ObColDesc col_desc; col_desc.col_id_ = data_desc.column_ids_[i]; col_desc.col_order_ = data_desc.column_orders_[i]; col_desc.col_type_ = data_desc.column_types_[i]; if (OB_FAIL(col_descs.push_back(col_desc))) { LOG_WARN("failed to push back col desc", K(ret)); } else if (OB_FAIL(column_ids.push_back(col_desc.col_id_))) { LOG_WARN("failed to push back column id", K(ret)); } } if (OB_FAIL(ret)) { } else if (OB_FAIL(schema_param.convert(index_schema, column_ids))) { LOG_WARN("failed to convert schema param", K(ret)); } else if (OB_UNLIKELY(false == relative_table.set_schema_param(&schema_param))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to set schema param", K(ret)); } else if (OB_FAIL(malloc_rows_reshape_if_need( allocator, col_descs, 1, relative_table, sql_mode_for_ddl_reshape, row_reshape_ins))) { STORAGE_LOG(WARN, "failed to malloc for row reshape", K(ret)); } else { // do nothing } while (OB_SUCC(ret)) { if (OB_FAIL(THIS_WORKER.check_status())) { STORAGE_LOG(WARN, "failed to check status", K(ret)); } else if (OB_FAIL(iter.get_next_row(row_val))) { if (OB_ITER_END != ret) { STORAGE_LOG(WARN, "fail to get next row", K(ret)); } else { ret = OB_SUCCESS; break; } } else { if (OB_FAIL(reshape_table_rows( row_val, row_reshape_ins, col_descs.count(), &row, 1, sql_mode_for_ddl_reshape))) { STORAGE_LOG(WARN, "failed to reshape rows", K(ret)); } else if (OB_FAIL(writer.append_row(row))) { if (OB_ERR_PRIMARY_KEY_DUPLICATE == ret && index_schema->is_unique_index()) { LOG_USER_ERROR( OB_ERR_PRIMARY_KEY_DUPLICATE, "", static_cast(sizeof("UNIQUE IDX") - 1), "UNIQUE IDX"); } else { STORAGE_LOG(WARN, "fail to append row to sstable", K(ret)); } } else if (OB_FAIL(checksum.calc_column_checksum(checksum_method, &row, NULL, NULL))) { STORAGE_LOG(WARN, "fail to calc column checksum", K(ret)); } } } free_row_reshape(allocator, row_reshape_ins, 1); if (OB_SUCC(ret)) { if (OB_FAIL(writer.close())) { STORAGE_LOG(WARN, "fail to close writer", K(ret)); } else { bool is_added = false; if (OB_FAIL(INTERM_MACRO_MGR.put(key, writer.get_macro_block_write_ctx()))) { STORAGE_LOG(WARN, "fail to put interm result", K(ret)); } else { is_added = true; if (OB_FAIL(report_checksum( execution_id, task_id, *index_schema, checksum_method, checksum.get_column_checksum()))) { STORAGE_LOG(WARN, "fail to report checksum", K(ret), K_(pkey), K(index_table_id)); } } if (OB_FAIL(ret) && is_added) { int tmp_ret = OB_SUCCESS; if (OB_SUCCESS != (tmp_ret = INTERM_MACRO_MGR.remove(key))) { STORAGE_LOG(WARN, "fail to remove interm macro list", K(tmp_ret), K(pkey_)); } } } } } } STORAGE_LOG(INFO, "finish append local sort data", K(ret), K_(pkey), K(index_table_id), K(snapshot_version), K(frozen_status)); } return ret; } int ObPartitionStorage::report_checksum(const uint64_t execution_id, const uint64_t task_id, const ObTableSchema& index_schema, const int64_t checksum_method, int64_t* column_checksum) { int ret = OB_SUCCESS; ObArray column_ids; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "ObPartitionStorage has not been inited", K(ret)); } else if (OB_UNLIKELY(!index_schema.is_valid()) || OB_ISNULL(column_checksum)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(index_schema), KP(column_checksum)); } else if (OB_FAIL(index_schema.get_column_ids(column_ids))) { STORAGE_LOG(WARN, "fail to get column ids", K(ret)); } else { ObArray checksum_items; for (int64_t i = 0; OB_SUCC(ret) && i < column_ids.count(); ++i) { ObIndexChecksumItem item; item.execution_id_ = execution_id; item.tenant_id_ = pkey_.get_tenant_id(); item.table_id_ = index_schema.get_table_id(); item.partition_id_ = pkey_.get_partition_id(); item.column_id_ = column_ids.at(i).col_id_; item.task_id_ = task_id; item.checksum_ = column_checksum[i]; item.checksum_method_ = checksum_method; if (item.column_id_ >= OB_MIN_SHADOW_COLUMN_ID) { continue; } else if (OB_FAIL(checksum_items.push_back(item))) { STORAGE_LOG(WARN, "fail to push back item", K(ret)); } } if (OB_SUCC(ret)) { if (OB_FAIL(ObIndexChecksumOperator::update_checksum(checksum_items, *GCTX.sql_proxy_))) { LOG_WARN("fail to update checksum", K(ret)); } } } return ret; } int ObPartitionStorage::create_partition_store(const ObReplicaType& replica_type, const int64_t multi_version_start, const uint64_t data_table_id, const int64_t create_schema_version, const int64_t create_timestamp, ObIPartitionGroup* pg, ObTablesHandle& sstables_handle) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "not inited", K(ret)); } else if (OB_INVALID_ID == data_table_id || OB_ISNULL(pg)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid args", K(ret), K(data_table_id), KP(pg)); } else { ObPGPartitionStoreMeta partition_meta; const bool write_slog = true; const bool need_create_sstable = sstables_handle.get_count() > 0; int64_t data_version = 0; int64_t snapshot_version = 0; if (need_create_sstable) { for (int64_t i = 0; i < sstables_handle.get_count() && OB_SUCC(ret); ++i) { const ObITable* table = sstables_handle.get_table(i); if (OB_ISNULL(table)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "table is NULL", K(ret), K(pkey_)); } else if (table->is_major_sstable()) { data_version = MAX(data_version, int64_t(table->get_version().major_)); snapshot_version = MAX(snapshot_version, table->get_snapshot_version()); } } } if (OB_FAIL(ret)) { } else if (OB_FAIL(init_partition_meta(replica_type, data_version, multi_version_start, data_table_id, create_schema_version, create_timestamp, snapshot_version, partition_meta))) { STORAGE_LOG(WARN, "failed to init partition meta", K(ret)); } else if (OB_FAIL(store_.create_partition_store( partition_meta, write_slog, pg, ObFreezeInfoMgrWrapper::get_instance(), pg_memtable_mgr_))) { STORAGE_LOG(WARN, "failed to init partition store", K(ret)); } } return ret; } int ObPartitionStorage::init_partition_meta(const ObReplicaType& replica_type, const int64_t data_version, const int64_t multi_version_start, const uint64_t data_table_id, const int64_t create_schema_version, const int64_t create_timestamp, const int64_t snapshot_version, ObPGPartitionStoreMeta& partition_meta) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "not inited", K(ret)); } else if (!ObReplicaTypeCheck::is_replica_type_valid(replica_type) || multi_version_start <= 0 || OB_INVALID_ID == data_table_id || create_schema_version < 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid args", K(ret), K(replica_type), K(multi_version_start), K(data_table_id), K(create_schema_version)); } else { partition_meta.pkey_ = pkey_; partition_meta.replica_type_ = replica_type; partition_meta.multi_version_start_ = multi_version_start; partition_meta.data_table_id_ = data_table_id; partition_meta.create_schema_version_ = create_schema_version; partition_meta.report_status_.reset(); partition_meta.report_status_.data_version_ = data_version; partition_meta.create_timestamp_ = create_timestamp; partition_meta.report_status_.snapshot_version_ = snapshot_version; } return ret; } int ObPartitionStorage::update_multi_version_start(const int64_t multi_version_start) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "partition storage is not initialized", K(ret), K(pkey_)); } else if (OB_FAIL(store_.update_multi_version_start(multi_version_start))) { STORAGE_LOG(WARN, "failed to update_multi_version_start", K(ret)); } return ret; } bool ObPartitionStorage::need_check_index_( const uint64_t index_id, const ObIArray& index_stats) { bool need_check = false; for (int64_t i = 0; i < index_stats.count(); ++i) { const share::schema::ObIndexTableStat& stat = index_stats.at(i); if (stat.index_id_ == index_id) { need_check = !is_final_invalid_index_status(stat.index_status_, stat.is_drop_schema_); break; } } return need_check; } int ObPartitionStorage::validate_sstables( const ObIArray& index_stats, bool& is_all_checked) { int ret = OB_SUCCESS; ObTableHandle main_table_handle; ObTablesHandle handle; ObSSTable* main_sstable = NULL; is_all_checked = true; bool is_physical_split_finish = true; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObPartitionStore has not been inited", K(ret)); } else if (OB_FAIL(store_.is_physical_split_finished(is_physical_split_finish))) { STORAGE_LOG(WARN, "failed to check physical split finish", K(ret)); } else if (!is_physical_split_finish) { is_all_checked = true; } else if (OB_FAIL(store_.get_last_major_sstable(pkey_.get_table_id(), main_table_handle))) { if (OB_ENTRY_NOT_EXIST == ret) { ret = OB_SUCCESS; } else { LOG_WARN("fail to get last major sstable", K(ret)); } } else if (OB_FAIL(store_.get_last_all_major_sstable(handle))) { LOG_WARN("fail to get last all major sstable", K(ret)); } else if (OB_FAIL(main_table_handle.get_sstable(main_sstable))) { LOG_WARN("fail to get major sstable", K(ret)); } else { ObArray index_tables; if (OB_FAIL(index_tables.push_back(main_sstable))) { LOG_WARN("failed to add main sstable", K(ret)); } for (int64_t i = 0; OB_SUCC(ret) && i < handle.get_count(); ++i) { ObITable* table = handle.get_table(i); ObSSTable* sstable = NULL; if (OB_ISNULL(table)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, table must not be NULL", K(ret)); } else if (OB_UNLIKELY(!table->is_major_sstable())) { ret = OB_ERR_SYS; LOG_WARN("err sys, table must be sstable", K(ret), K(*table)); } else if (OB_ISNULL(sstable = static_cast(table))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, sstable must not be NULL", K(ret)); } else if (table->get_key().table_id_ == pkey_.get_table_id()) { // do nothing } else if (sstable->get_meta().create_snapshot_version_ == table->get_snapshot_version()) { // create index first time } else if (!need_check_index_(table->get_key().table_id_, index_stats)) { // skip index we do not care } else if (OB_FAIL(index_tables.push_back(sstable))) { LOG_WARN("fail to push back sstable", K(ret)); } } if (OB_SUCC(ret)) { ColumnChecksumMap column_checksum_map; const int64_t row_count = main_sstable->get_meta().row_count_; if (OB_FAIL(column_checksum_map.create(main_sstable->get_meta().column_cnt_, ObModIds::OB_PARTITION_STORAGE))) { LOG_WARN("failed to create column checksum map", K(ret)); } for (int64_t i = 0; OB_SUCC(ret) && i < index_tables.count(); ++i) { ObSSTable* index_table = index_tables.at(i); if (main_sstable->get_snapshot_version() != index_table->get_snapshot_version()) { is_all_checked = false; if (REACH_TIME_INTERVAL(5 * 1000 * 1000)) { FLOG_INFO("index table data version is smaller than main table, can not checked all", K(*main_sstable), K(*index_table)); } } else if (OB_FAIL(validate_sstable(row_count, index_table, column_checksum_map))) { if (OB_ERR_PRIMARY_KEY_DUPLICATE == ret) { handle_error_index_table(*index_table, index_stats, ret); if (OB_ERR_PRIMARY_KEY_DUPLICATE == ret) { dump_error_info(*main_sstable, *index_table); } } else { LOG_WARN("fail to validate index sstable", K(ret), "main_sstable_meta", main_sstable->get_meta()); } } } if (column_checksum_map.created()) { column_checksum_map.destroy(); } } if (is_all_checked) { is_all_checked = OB_SUCCESS == ret; } } return ret; } int ObPartitionStorage::dump_error_info(ObSSTable& main_sstable, ObSSTable& index_sstable) { int ret = OB_SUCCESS; ObSSTableDumpErrorInfo dumper; ObSchemaGetterGuard schema_guard; const ObTableSchema* main_table_schema = NULL; const ObTableSchema* index_table_schema = NULL; int64_t save_schema_version = 0; const int64_t schema_version = main_sstable.get_meta().schema_version_; if (OB_FAIL(schema_service_->retry_get_schema_guard( schema_version, main_sstable.get_table_id(), schema_guard, save_schema_version))) { if (OB_TABLE_IS_DELETED != ret) { STORAGE_LOG(WARN, "Fail to get schema, ", K(schema_version), K(ret)); } else { STORAGE_LOG(WARN, "table is deleted", K(ret), "table_id", main_sstable.get_table_id()); } } else if (save_schema_version < schema_version) { ret = OB_SCHEMA_ERROR; LOG_WARN("can not use older schema version", K(ret), K(schema_version), K(save_schema_version), K_(pkey), "table_id", main_sstable.get_table_id()); } else if (OB_FAIL(schema_guard.get_table_schema(main_sstable.get_table_id(), main_table_schema))) { LOG_WARN("fail to get table schema", K(ret), K(main_sstable)); } else if (OB_ISNULL(main_table_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("main table schema is null", K(ret), K(main_sstable)); } else if (OB_FAIL(schema_guard.get_table_schema(index_sstable.get_table_id(), index_table_schema))) { LOG_WARN("fail to get table schema", K(ret), K(index_sstable)); } else if (OB_ISNULL(index_table_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("index table schema is null", K(ret), K(index_sstable)); } else if (OB_FAIL(dumper.main_and_index_row_count_error( main_sstable, *main_table_schema, index_sstable, *index_table_schema))) { LOG_WARN("failed to find extras rows", K(ret), K(main_sstable), K(index_sstable)); } return ret; } int ObPartitionStorage::try_update_report_status(const common::ObVersion& version, bool& finished, bool& need_report) { int ret = OB_SUCCESS; bool major_merge_finished = false; ObSchemaGetterGuard schema_guard; const ObTableSchema* main_table_schema = NULL; ObArray index_stats; bool is_all_checked = false; finished = false; const uint64_t fetch_tenant_id = is_inner_table(pkey_.get_table_id()) ? OB_SYS_TENANT_ID : pkey_.get_tenant_id(); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "ObPartitionStorage has not been inited", K(ret)); } else if (!version.is_valid()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(version)); } else if (OB_FAIL(schema_service_->get_tenant_full_schema_guard(fetch_tenant_id, schema_guard))) { STORAGE_LOG(WARN, "fail to get schema guard", K(ret), K(fetch_tenant_id)); } else if (OB_FAIL(schema_guard.get_table_schema(pkey_.get_table_id(), main_table_schema))) { STORAGE_LOG(WARN, "fail to get table schema", K(ret), K(pkey_)); } else if (OB_ISNULL(main_table_schema)) { // do nothing finished = true; STORAGE_LOG(INFO, "main table schema has been deleted", K(ret), K(pkey_)); } else { ObSEArray simple_index_infos; const bool with_mv = true; const ObTableSchema* tmp_schema = NULL; if (OB_FAIL(main_table_schema->get_simple_index_infos_without_delay_deleted_tid(simple_index_infos, with_mv))) { STORAGE_LOG(WARN, "fail to get index tid array", K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < simple_index_infos.count(); ++i) { if (OB_FAIL(schema_guard.get_table_schema(simple_index_infos.at(i).table_id_, tmp_schema))) { STORAGE_LOG(WARN, "fail to get table schema", K(ret)); } else if (OB_ISNULL(tmp_schema)) { ret = OB_SCHEMA_ERROR; STORAGE_LOG(WARN, "schema error, index schema must be exist", K(ret)); } else if (OB_FAIL(index_stats.push_back(ObIndexTableStat(simple_index_infos.at(i).table_id_, tmp_schema->get_index_status(), tmp_schema->is_dropped_schema())))) { STORAGE_LOG(WARN, "fail to push back index status", K(ret)); } } } } if (OB_FAIL(ret) || finished) { } else if (OB_FAIL(store_.check_need_report(version, need_report))) { STORAGE_LOG(WARN, "fail to check need report", K(ret)); } else if (need_report) { if (OB_FAIL(store_.check_major_merge_finished(version, major_merge_finished))) { STORAGE_LOG(WARN, "fail to check major merge finished", K(ret)); } else if (major_merge_finished) { if (OB_FAIL(validate_sstables(index_stats, is_all_checked))) { STORAGE_LOG(WARN, "fail to validate sstables", K(ret), K(pkey_)); } else { const int64_t data_table_id = main_table_schema->is_global_index_table() ? main_table_schema->get_data_table_id() : main_table_schema->get_table_id(); if (OB_FAIL(store_.try_update_report_status(data_table_id))) { STORAGE_LOG(WARN, "fail to advance report status", K(ret)); } else { finished = true; } } } } else { finished = true; } return ret; } int ObPartitionStorage::fill_checksum(const uint64_t index_id, const int sstable_type, ObSSTableChecksumItem& item) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "ObPartitionStorage has not been inited", K(ret)); } else if (OB_UNLIKELY(OB_INVALID_ID == index_id || sstable_type < 0)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(index_id), K(sstable_type)); } else if (OB_FAIL(store_.fill_checksum(index_id, sstable_type, item))) { if (OB_ENTRY_NOT_EXIST != ret) { STORAGE_LOG(WARN, "fail to fill checksum item", K(ret), K(index_id), K(sstable_type)); } } else { for (int64_t i = 0; OB_SUCC(ret) && i < item.column_checksum_.count(); ++i) { item.column_checksum_.at(i).server_ = GCTX.self_addr_; } } return ret; } int ObPartitionStorage::feedback_scan_access_stat(const ObTableScanParam& param) { int ret = OB_SUCCESS; const ObTableScanStatistic& main_scan_stat = param.main_table_scan_stat_; if (main_scan_stat.rowkey_prefix_ > 0 && main_scan_stat.bf_access_cnt_ > 0) { if (OB_FAIL(prefix_access_stat_.add_stat(main_scan_stat))) {} } return ret; } int ObPartitionStorage::validate_sstable(const int64_t row_count, const ObSSTable* sstable, ColumnChecksumMap& cc_map) { int ret = OB_SUCCESS; const ObSSTableMeta& meta = sstable->get_meta(); if (OB_ISNULL(sstable)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("sstable is null", K(ret)); } else if (INDEX_TYPE_DOMAIN_CTXCAT == meta.index_type_) { // skip checking domain_ctxcat index } else if (row_count != meta.row_count_) { ret = OB_ERR_PRIMARY_KEY_DUPLICATE; LOG_WARN( "sstable row count is not same to main table", K(ret), "main table row_count", row_count, "sstable meta", meta); } else { const int64_t table_id = sstable->get_table_id(); ObColumnChecksumEntry entry; for (int64_t i = 0; OB_SUCC(ret) && i < meta.column_cnt_; ++i) { const ObSSTableColumnMeta& col_meta = meta.column_metas_.at(i); if (col_meta.column_id_ > static_cast(OB_MIN_SHADOW_COLUMN_ID)) { // skip shadow column } else if (OB_FAIL(cc_map.get_refactored(col_meta.column_id_, entry))) { if (OB_LIKELY(OB_HASH_NOT_EXIST == ret)) { if (OB_FAIL(cc_map.set_refactored( col_meta.column_id_, ObColumnChecksumEntry(col_meta.column_checksum_, table_id)))) { LOG_WARN("failed to set column checksum entry", K(ret), K(col_meta), K(table_id)); } } else { LOG_WARN("failed to get column checksum", K(ret), K(col_meta), K(table_id)); } } else if (col_meta.column_checksum_ != entry.checksum_) { ret = OB_CHECKSUM_ERROR; LOG_ERROR("column checksum is not equal", K(ret), "column_id", col_meta.column_id_, "table_id", entry.table_id_, "other table_id", table_id, "checksum", entry.checksum_, "other checksum", col_meta.column_checksum_, K(meta)); } } } return ret; } void ObPartitionStorage::handle_error_index_table( const ObSSTable& index_table, const ObIArray& index_stats, int& result) { int ret = OB_SUCCESS; result = OB_ERR_PRIMARY_KEY_DUPLICATE; int64_t j = 0; for (j = 0; j < index_stats.count(); ++j) { if (index_stats.at(j).index_id_ == index_table.get_key().table_id_) { break; } } if (j == index_stats.count()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, fail to find index stats", K(ret), K(index_table), K(index_stats)); } else { // index building, not stopping merge const share::schema::ObIndexTableStat& index_stat = index_stats.at(j); if (share::schema::INDEX_STATUS_UNAVAILABLE == index_stat.index_status_ || is_error_index_status(index_stat.index_status_, index_stat.is_drop_schema_)) { LOG_WARN("fail to build index, row count is not the same", K(ret), K(index_stats.at(j))); result = OB_SUCCESS; } else { LOG_ERROR("fail to merge index, row count is not the same", K(result), "index_id", index_table.get_key().table_id_, K(index_stats.at(j))); } } } void ObPartitionStorage::check_leader_changed_for_sql_recheck_(ObDMLRunningCtx &run_ctx, int &ret) { if (OB_ERR_DEFENSIVE_CHECK == ret) { if (run_ctx.store_ctx_.mem_ctx_ != NULL) { ObMemtableCtx *curr_mt_ctx = static_cast(run_ctx.store_ctx_.mem_ctx_); transaction::ObTransCtx *trans_ctx = curr_mt_ctx->get_trans_ctx(); if (NULL != trans_ctx) { if (!trans_ctx->is_bounded_staleness_read() && curr_mt_ctx->is_for_replay()) { TRANS_LOG(WARN, "strong consistent read follower when sql check", K(trans_ctx->get_trans_id())); ret = OB_NOT_MASTER; } } } } } } // namespace storage } // namespace oceanbase