提交 bbe2a656 编写于 作者: V Vitaliy Lyudvichenko 提交者: alexey-milovidov

Fixed preferred_block_size_bytes in case of PREWHERE with non-materialized column. [#METR-25237]

上级 d9306ab6
......@@ -150,11 +150,11 @@ try
std::reverse(remaining_mark_ranges.begin(), remaining_mark_ranges.end());
auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr
: std::make_shared<MergeTreeBlockSizePredictor>(data_part, columns, pre_columns);
: std::make_unique<MergeTreeBlockSizePredictor>(data_part, ordered_names, data_part->storage.getSampleBlock());
task = std::make_unique<MergeTreeReadTask>(data_part, remaining_mark_ranges, part_index_in_query, ordered_names,
column_name_set, columns, pre_columns, remove_prewhere_column, should_reorder,
size_predictor);
std::move(size_predictor));
if (!reader)
{
......
......@@ -67,30 +67,29 @@ MergeTreeReadTask::MergeTreeReadTask(
const MergeTreeData::DataPartPtr & data_part, const MarkRanges & mark_ranges, const std::size_t part_index_in_query,
const Names & ordered_names, const NameSet & column_name_set, const NamesAndTypesList & columns,
const NamesAndTypesList & pre_columns, const bool remove_prewhere_column, const bool should_reorder,
const MergeTreeBlockSizePredictorPtr & size_predictor)
MergeTreeBlockSizePredictorPtr && size_predictor)
: data_part{data_part}, mark_ranges{mark_ranges}, part_index_in_query{part_index_in_query},
ordered_names{ordered_names}, column_name_set{column_name_set}, columns{columns}, pre_columns{pre_columns},
remove_prewhere_column{remove_prewhere_column}, should_reorder{should_reorder}, size_predictor{size_predictor}
remove_prewhere_column{remove_prewhere_column}, should_reorder{should_reorder}, size_predictor{std::move(size_predictor)}
{}
MergeTreeReadTask::~MergeTreeReadTask() = default;
MergeTreeBlockSizePredictor::MergeTreeBlockSizePredictor(
const MergeTreeData::DataPartPtr & data_part_,
const NamesAndTypesList & columns,
const NamesAndTypesList & pre_columns)
const MergeTreeData::DataPartPtr & data_part_, const Names & columns, const Block & sample_block)
: data_part(data_part_)
{
auto add_column = [&] (const NameAndTypePair & column)
for (const String & column_name : columns)
{
ColumnPtr column_data = column.type->createColumn();
const auto column_checksum = data_part->tryGetBinChecksum(column.name);
const auto column_checksum = data_part->tryGetBinChecksum(column_name);
/// There are no data files, column will be const
if (!column_checksum)
/// There are no column data files, column will be const
if (!column_checksum || !data_part->hasColumnFiles(column_name))
return;
const ColumnPtr & column_data = sample_block.getByName(column_name).column;
if (column_data->isFixed())
{
fixed_columns_bytes_per_row += column_data->sizeOfField();
......@@ -98,19 +97,13 @@ MergeTreeBlockSizePredictor::MergeTreeBlockSizePredictor(
else
{
ColumnInfo info;
info.name = column.name;
info.name = column_name;
info.bytes_per_row_global = column_checksum->uncompressed_size;
dynamic_columns_infos.emplace_back(info);
}
};
for (const NameAndTypePair & column : pre_columns)
add_column(column);
for (const NameAndTypePair & column : columns)
add_column(column);
size_t rows_approx = data_part->getExactSizeRows();
bytes_per_row_global = fixed_columns_bytes_per_row;
......@@ -133,7 +126,7 @@ void MergeTreeBlockSizePredictor::startBlock()
}
/// FIXME: add last_read_row_in_part parameter to take into account gaps between adjacent ranges
/// TODO: add last_read_row_in_part parameter to take into account gaps between adjacent ranges
void MergeTreeBlockSizePredictor::update(const Block & block, double decay)
{
size_t new_rows = block.rows();
......
......@@ -10,7 +10,7 @@ struct MergeTreeReadTask;
struct MergeTreeBlockSizePredictor;
using MergeTreeReadTaskPtr = std::unique_ptr<MergeTreeReadTask>;
using MergeTreeBlockSizePredictorPtr = std::shared_ptr<MergeTreeBlockSizePredictor>;
using MergeTreeBlockSizePredictorPtr = std::unique_ptr<MergeTreeBlockSizePredictor>;
/** If some of the requested columns are not in the part,
......@@ -50,7 +50,7 @@ struct MergeTreeReadTask
const MergeTreeData::DataPartPtr & data_part, const MarkRanges & mark_ranges, const std::size_t part_index_in_query,
const Names & ordered_names, const NameSet & column_name_set, const NamesAndTypesList & columns,
const NamesAndTypesList & pre_columns, const bool remove_prewhere_column, const bool should_reorder,
const MergeTreeBlockSizePredictorPtr & size_predictor);
MergeTreeBlockSizePredictorPtr && size_predictor);
virtual ~MergeTreeReadTask();
};
......@@ -58,10 +58,7 @@ struct MergeTreeReadTask
struct MergeTreeBlockSizePredictor
{
MergeTreeBlockSizePredictor(
const MergeTreeData::DataPartPtr & data_part_,
const NamesAndTypesList & columns,
const NamesAndTypesList & pre_columns);
MergeTreeBlockSizePredictor(const MergeTreeData::DataPartPtr & data_part_, const Names & columns, const Block & sample_block);
/// Reset some values for correct statistics calculating
void startBlock();
......
......@@ -108,12 +108,12 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(const std::size_t min_marks_to_r
}
auto curr_task_size_predictor = !per_part_size_predictor[part_idx] ? nullptr
: std::make_shared<MergeTreeBlockSizePredictor>(*per_part_size_predictor[part_idx]); /// make a copy
: std::make_unique<MergeTreeBlockSizePredictor>(*per_part_size_predictor[part_idx]); /// make a copy
return std::make_unique<MergeTreeReadTask>(
part.data_part, ranges_to_get_from_part, part.part_index_in_query, column_names,
per_part_column_name_set[part_idx], per_part_columns[part_idx], per_part_pre_columns[part_idx],
per_part_remove_prewhere_column[part_idx], per_part_should_reorder[part_idx], curr_task_size_predictor);
per_part_remove_prewhere_column[part_idx], per_part_should_reorder[part_idx], std::move(curr_task_size_predictor));
}
......@@ -163,6 +163,7 @@ std::vector<std::size_t> MergeTreeReadPool::fillPerPartInfo(
const bool check_columns)
{
std::vector<std::size_t> per_part_sum_marks;
Block sample_block = data.getSampleBlock();
for (const auto i : ext::range(0, parts.size()))
{
......@@ -247,8 +248,8 @@ std::vector<std::size_t> MergeTreeReadPool::fillPerPartInfo(
if (predict_block_size_bytes)
{
per_part_size_predictor.emplace_back(std::make_shared<MergeTreeBlockSizePredictor>(
part.data_part, per_part_columns.back(), per_part_pre_columns.back()));
per_part_size_predictor.emplace_back(std::make_unique<MergeTreeBlockSizePredictor>(
part.data_part, column_names, sample_block));
}
else
per_part_size_predictor.emplace_back(nullptr);
......
1 0
2 0
45 0
8999
8999
250500 250500 1000
PASSED
PASSED
250500 250500 1000
......@@ -10,6 +10,25 @@ clickhouse-client --preferred_block_size_bytes=52 -q "SELECT DISTINCT blockSize(
clickhouse-client --preferred_block_size_bytes=90 -q "SELECT DISTINCT blockSize(), ignore(p) FROM test.preferred_block_size_bytes"
clickhouse-client -q "DROP TABLE IF EXISTS test.preferred_block_size_bytes"
# PREWHERE using empty column
clickhouse-client -q "DROP TABLE IF EXISTS test.pbs"
clickhouse-client -q "CREATE TABLE test.pbs (p Date, i UInt64) ENGINE = MergeTree(p, p, 8192)"
clickhouse-client -q "INSERT INTO test.pbs (i) SELECT number AS i FROM system.numbers LIMIT 9000"
clickhouse-client -q "ALTER TABLE test.pbs ADD COLUMN s UInt8 DEFAULT 1"
clickhouse-client --preferred_block_size_bytes=10000 -q "SELECT max(i) FROM test.pbs PREWHERE s = 1"
clickhouse-client -q "INSERT INTO test.pbs (i, s) SELECT number AS i, 1 AS s FROM system.numbers LIMIT 9000"
clickhouse-client --preferred_block_size_bytes=10000 -q "SELECT max(i) FROM test.pbs PREWHERE s = 1"
clickhouse-client -q "DROP TABLE test.pbs"
# Nullable PREWHERE
clickhouse-client -q "DROP TABLE IF EXISTS test.nullable_prewhere"
clickhouse-client -q "CREATE TABLE test.nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree(p, p, 8)"
clickhouse-client -q "INSERT INTO test.nullable_prewhere SELECT toDate(0) AS p, if(number % 2 = 0, CAST(number AS Nullable(UInt64)), CAST(NULL AS Nullable(UInt64))) AS f, number as d FROM system.numbers LIMIT 1001"
clickhouse-client -q "SELECT sum(d), sum(f), max(d) FROM test.nullable_prewhere PREWHERE NOT isNull(f)"
clickhouse-client -q "DROP TABLE IF EXISTS test.nullable_prewhere"
# Depend on 00282_merging test
pushd `dirname $0` > /dev/null
......@@ -25,11 +44,3 @@ cat "$SCRIPTDIR"/00282_merging.sql | clickhouse-client --preferred_block_size_by
cmp "$SCRIPTDIR"/00282_merging.reference preferred_block_size_bytes.stdout && echo PASSED || echo FAILED
rm preferred_block_size_bytes.stdout
# Nullable PREWHERE
clickhouse-client -q "DROP TABLE IF EXISTS test.nullable_prewhere"
clickhouse-client -q "CREATE TABLE test.nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree(p, p, 8)"
clickhouse-client -q "INSERT INTO test.nullable_prewhere SELECT toDate(0) AS p, if(number % 2 = 0, CAST(number AS Nullable(UInt64)), CAST(NULL AS Nullable(UInt64))) AS f, number as d FROM system.numbers LIMIT 1001"
clickhouse-client -q "SELECT sum(d), sum(f), max(d) FROM test.nullable_prewhere PREWHERE NOT isNull(f)"
clickhouse-client -q "DROP TABLE IF EXISTS test.nullable_prewhere"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册