提交 f0bad67c 编写于 作者: O obdev 提交者: wangzelin.wzl

Patch fixs of estimating row count to 3.1_opensource_release

上级 af7df013
......@@ -18,6 +18,7 @@ namespace oceanbase {
using namespace common;
using namespace blocksstable;
namespace storage {
const int64_t MACRO_BLOCK_COUNT_THRESHOLD = 1024;
ObSSTableEstimateContext::ObSSTableEstimateContext() : sstable_(NULL), rowkeys_(NULL)
{}
......@@ -118,50 +119,34 @@ ObStoreRowSingleScanEstimator::ObStoreRowSingleScanEstimator()
ObStoreRowSingleScanEstimator::~ObStoreRowSingleScanEstimator()
{}
int ObStoreRowSingleScanEstimator::set_context(ObSSTableEstimateContext& context)
{
int ret = OB_SUCCESS;
if (OB_FAIL(ObISSTableEstimator::set_context(context))) {
STORAGE_LOG(WARN, "failed to set context", K(ret));
} else if (OB_FAIL(context_.sstable_->find_macros(*context_.range_, context_.macro_blocks_))) {
STORAGE_LOG(WARN, "fail to find macros", K(ret));
}
return ret;
}
int ObStoreRowSingleScanEstimator::open()
int ObStoreRowSingleScanEstimator::check_bf(ObMacroBlockCtx& macro_block_ctx)
{
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
if (1 == context_.macro_blocks_.count()) {
// check bloom filter to identify if the scan is empty
const MacroBlockId& macro_block_id = context_.macro_blocks_.at(0).get_macro_block_id();
ObStoreRowkey rowkey;
ObStorageFileHandle file_handle;
ObStorageFile* file = nullptr;
if (OB_FAIL(file_handle.assign(context_.sstable_->get_storage_file_handle()))) {
STORAGE_LOG(WARN, "fail to get file handle", K(ret), K(context_.sstable_->get_storage_file_handle()));
} else if (OB_ISNULL(file = file_handle.get_storage_file())) {
ret = OB_ERR_UNEXPECTED;
STORAGE_LOG(WARN, "fail to get pg file", K(ret), K(file_handle));
} else if (OB_FAIL(get_common_rowkey(context_.range_->get_range(), rowkey))) {
STORAGE_LOG(WARN, "failed to get common rowkey", K(ret), K(context_.range_));
} else if (rowkey.get_obj_cnt() > 0) {
// allow check contain fail, should not overwrite ret
bool is_contain = false;
if (OB_SUCCESS !=
(tmp_ret = context_.cache_context_.bf_cache_->may_contain(
context_.sstable_->get_table_id(), macro_block_id, file->get_file_id(), rowkey, is_contain))) {
if (OB_ENTRY_NOT_EXIST != tmp_ret) {
STORAGE_LOG(WARN, "failed to check may contain", K(tmp_ret), K_(context), K(macro_block_id), K(rowkey));
}
} else if (!is_contain) {
is_empty_scan_ = true;
// check bloom filter to identify if the scan is empty
const MacroBlockId& macro_block_id = macro_block_ctx.get_macro_block_id();
ObStoreRowkey rowkey;
ObStorageFileHandle file_handle;
ObStorageFile* file = nullptr;
if (OB_FAIL(file_handle.assign(context_.sstable_->get_storage_file_handle()))) {
STORAGE_LOG(WARN, "fail to get file handle", K(ret), K(context_.sstable_->get_storage_file_handle()));
} else if (OB_ISNULL(file = file_handle.get_storage_file())) {
ret = OB_ERR_UNEXPECTED;
STORAGE_LOG(WARN, "fail to get pg file", K(ret), K(file_handle));
} else if (OB_FAIL(get_common_rowkey(context_.range_->get_range(), rowkey))) {
STORAGE_LOG(WARN, "failed to get common rowkey", K(ret), K(context_.range_));
} else if (rowkey.get_obj_cnt() > 0) {
// allow check contain fail, should not overwrite ret
bool is_contain = false;
if (OB_SUCCESS !=
(tmp_ret = context_.cache_context_.bf_cache_->may_contain(
context_.sstable_->get_table_id(), macro_block_id, file->get_file_id(), rowkey, is_contain))) {
if (OB_ENTRY_NOT_EXIST != tmp_ret) {
STORAGE_LOG(WARN, "failed to check may contain", K(tmp_ret), K_(context), K(macro_block_id), K(rowkey));
}
} else if (!is_contain) {
is_empty_scan_ = true;
}
} else if (0 == context_.macro_blocks_.count()) {
is_empty_scan_ = true;
}
return ret;
......@@ -177,31 +162,51 @@ void ObStoreRowSingleScanEstimator::reset()
int ObStoreRowSingleScanEstimator::estimate_row_count(ObPartitionEst& part_est)
{
int ret = OB_SUCCESS;
int64_t total_macro_block_count = 0;
ObMacroBlockIterator macro_iter;
if (OB_FAIL(open())) {
STORAGE_LOG(WARN, "failed to open single scan estimator", K(ret));
if (!context_.range_->get_range().is_valid()) {
} else if (context_.range_->get_range().is_whole_range()) {
part_est.logical_row_count_ = context_.sstable_->get_meta().row_count_;
part_est.physical_row_count_ = part_est.logical_row_count_;
} else if (OB_FAIL(macro_iter.open(*context_.sstable_, *context_.range_))) {
STORAGE_LOG(WARN, "fail to open macro iter,", K(ret));
} else if (OB_FAIL(macro_iter.get_macro_block_count(total_macro_block_count))) {
STORAGE_LOG(WARN, "fail to get macro block count,", K(ret));
} else if (0 == total_macro_block_count) {
} else if (total_macro_block_count > MACRO_BLOCK_COUNT_THRESHOLD) {
// there are too many block, do estimate in fast way
part_est.logical_row_count_ = (double)total_macro_block_count / context_.sstable_->get_meta().macro_block_count_ *
context_.sstable_->get_meta().row_count_;
part_est.physical_row_count_ = part_est.logical_row_count_;
} else {
// do calculate cost metrics by macro block scan.
int64_t total_macro_block_count = context_.macro_blocks_.count();
if (context_.range_->get_range().is_whole_range()) {
part_est.logical_row_count_ += context_.sstable_->get_meta().row_count_;
} else if (!is_empty_scan_) {
MacroBlockId macro_block_id;
for (int64_t i = 0; OB_SUCC(ret) && i < total_macro_block_count; ++i) {
const bool is_start_block = (0 == i);
const bool is_last_block = (total_macro_block_count - 1 == i);
const ObMacroBlockCtx& macro_block_ctx = context_.macro_blocks_.at(i);
if (OB_FAIL(estimate_macro_row_count(macro_block_ctx, is_start_block, is_last_block, part_est))) {
// do estimate by macro block scan
ObMacroBlockCtx macro_block_ctx;
int64_t idx = 0;
while (OB_SUCC(ret)) {
bool is_start_block = 0 == idx;
bool is_last_block = total_macro_block_count - 1 == idx;
if (OB_FAIL(macro_iter.get_next_macro_block(macro_block_ctx))) {
if (OB_ITER_END != ret) {
STORAGE_LOG(WARN, "fail to get next macro block, ", K(ret));
} else {
ret = OB_SUCCESS;
break;
}
} else {
if (1 == total_macro_block_count && OB_FAIL(check_bf(macro_block_ctx))) {
STORAGE_LOG(WARN, "failed to open single scan estimator", K(ret));
} else if (is_empty_scan_) {
} else if (OB_FAIL(estimate_macro_row_count(macro_block_ctx, is_start_block, is_last_block, part_est))) {
STORAGE_LOG(WARN,
"cannot estimate cost of macro block.",
K(ret),
K(macro_block_ctx),
K(i),
K(idx),
K(total_macro_block_count));
}
}
idx++;
}
part_est.physical_row_count_ = part_est.logical_row_count_;
}
......@@ -626,7 +631,7 @@ int ObMultiVersionSingleScanEstimator::estimate_macro_row_count(const blocksstab
int64_t logical_row_count = 0, physical_row_count = 0;
if (OB_FAIL(context_.cache_context_.block_index_cache_->get_micro_infos(context_.sstable_->get_table_id(),
macro_block_ctx,
context_.range_->get_range(),
context_.multi_version_range_.get_range(),
is_left_border,
is_right_border,
micro_infos))) {
......@@ -637,7 +642,7 @@ int ObMultiVersionSingleScanEstimator::estimate_macro_row_count(const blocksstab
}
} else if (1 != micro_infos.count()) {
ret = OB_ERR_UNEXPECTED;
STORAGE_LOG(WARN, "unexpected error, should only 1 micro block, ", K(ret));
STORAGE_LOG(WARN, "unexpected error, should only 1 micro block, ", K(ret), K(micro_infos.count()));
} else if (OB_FAIL(estimate_border_row_count(
micro_infos.at(0), macro_block_ctx, true, logical_row_count, physical_row_count))) {
STORAGE_LOG(WARN, "failed to estimate_border_row_count for multi version, ", K(ret));
......
......@@ -150,8 +150,7 @@ class ObStoreRowSingleScanEstimator : public ObISSTableEstimator {
public:
ObStoreRowSingleScanEstimator();
virtual ~ObStoreRowSingleScanEstimator();
int set_context(ObSSTableEstimateContext& context);
int open();
int check_bf(blocksstable::ObMacroBlockCtx& macro_block_ctx);
void reset();
virtual int estimate_row_count(ObPartitionEst& part_est);
......
......@@ -1987,6 +1987,59 @@ TEST_F(TestMicroBlockRowScanner, test_magic_row)
scanner_iter.reset();
}
TEST_F(TestMicroBlockRowScanner, test_estimate_with_magic_row)
{
GCONF._enable_sparse_row = false;
const int64_t rowkey_cnt = 4;
const int64_t micro_cnt = 2;
const char* micro_data[micro_cnt];
int index = 0;
micro_data[index++] = "bigint var bigint bigint bigint bigint flag multi_version_row_flag trans_id\n"
"1 var1 -1 -1 9 NOP EXIST U trans_id_1\n";
micro_data[index++] = "bigint var bigint bigint bigint bigint flag multi_version_row_flag trans_id\n"
"1 var1 MAGIC MAGIC NOP NOP EXIST LM trans_id_0\n";
prepare_data(micro_data, index, rowkey_cnt, 9, "none", FLAT_ROW_STORE, 0);
// minor
ObVersionRange trans_version_range;
trans_version_range.base_version_ = 0;
trans_version_range.snapshot_version_ = 100;
trans_version_range.multi_version_start_ = 1;
prepare_query_param(trans_version_range, true, false);
ObStoreRange range;
const char var1[] = "var1";
ObObj start_val[2];
ObObj end_val[2];
start_val[0].set_int(1);
start_val[1].set_varchar(var1, 4);
start_val[1].set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI);
end_val[0].set_int(2);
end_val[1].set_varchar(var1, 4);
end_val[1].set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI);
ObStoreRowkey start_key(start_val, 2);
ObStoreRowkey end_key(end_val, 2);
range.table_id_ = combine_id(TENANT_ID, TABLE_ID);
range.start_key_ = start_key;
range.end_key_ = end_key;
common::ObQueryFlag query_flag;
ObExtStoreRange ext_range;
ext_range.reset();
ext_range.get_range() = range;
ASSERT_EQ(OB_SUCCESS, ext_range.to_collation_free_range_on_demand_and_cutoff_range(allocator_));
ObPartitionEst cost_metrics;
cost_metrics.reset();
// (1 : 2) -> (1,-max : 2,max) would cover 1 micro block
// but if not converts to multiversion, (1 : 2) is same with (1,-max : 2,max) would cover 2 micro blocks
int ret = sstable_.estimate_scan_row_count(context_.query_flag_, range.table_id_, ext_range, cost_metrics);
ASSERT_EQ(OB_SUCCESS, ret);
ASSERT_EQ(0, cost_metrics.logical_row_count_);
ASSERT_EQ(0, cost_metrics.physical_row_count_);
}
TEST_F(TestMicroBlockRowScanner, minor_merge_lob_reuse_allocator)
{
const int64_t rowkey_cnt = 4;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册