From b1fa3fab9dc2789cd3472c846ecc0e63e0907de2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Oct 2018 11:58:19 +0300 Subject: [PATCH] Fix prewhere whith final. #2827 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 20 ++++++++++++------- dbms/src/Interpreters/ExpressionAnalyzer.h | 5 +++-- .../Interpreters/InterpreterSelectQuery.cpp | 15 +++++++------- dbms/src/Storages/IStorage.h | 3 +++ .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 ++-- .../MergeTree/MergeTreeRangeReader.cpp | 14 +++++++++++-- dbms/src/Storages/SelectQueryInfo.h | 4 ++-- dbms/src/Storages/StorageMergeTree.h | 2 ++ .../src/Storages/StorageReplicatedMergeTree.h | 2 ++ 9 files changed, 47 insertions(+), 22 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 1924608b67..97ca33174f 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -2450,16 +2450,23 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty return true; } -bool ExpressionAnalyzer::appendPrewhere(ExpressionActionsChain & chain, bool only_types, const ASTPtr & sampling_expression) +bool ExpressionAnalyzer::appendPrewhere(ExpressionActionsChain & chain, bool only_types, + const ASTPtr & sampling_expression, const ASTPtr & primary_expression) { assertSelect(); if (!select_query->prewhere_expression) return false; - Names required_sample_columns; + Names additional_required_mergetree_columns; if (sampling_expression) - required_sample_columns = ExpressionAnalyzer(sampling_expression, context, storage).getRequiredSourceColumns(); + additional_required_mergetree_columns = ExpressionAnalyzer(sampling_expression, context, storage).getRequiredSourceColumns(); + if (primary_expression) + { + auto required_primary_columns = ExpressionAnalyzer(primary_expression, context, storage).getRequiredSourceColumns(); + additional_required_mergetree_columns.insert(additional_required_mergetree_columns.end(), + required_primary_columns.begin(), required_primary_columns.end()); + } initChain(chain, source_columns); auto & step = chain.getLastStep(); @@ -2476,10 +2483,9 @@ bool ExpressionAnalyzer::appendPrewhere(ExpressionActionsChain & chain, bool onl auto required_columns = tmp_actions->getRequiredColumns(); NameSet required_source_columns(required_columns.begin(), required_columns.end()); - /// Add required columns for sample expression to required output in order not to remove them after - /// prewhere execution because sampling is executed after prewhere. - /// TODO: add sampling execution to common chain. - for (const auto & column : required_sample_columns) + /// Add required columns to required output in order not to remove them after prewhere execution. + /// TODO: add sampling and final execution to common chain. + for (const auto & column : additional_required_mergetree_columns) { if (required_source_columns.count(column)) { diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 4b2b0d818e..210011d9ca 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -142,8 +142,9 @@ public: bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types); bool appendJoin(ExpressionActionsChain & chain, bool only_types); /// remove_filter is set in ExpressionActionsChain::finalize(); - /// sampling_expression is needed if sampling is used in order to not remove columns are used in it. - bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const ASTPtr & sampling_expression); + /// sampling_expression and primary_expression are needed in order to not remove columns are used in it. + bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, + const ASTPtr & sampling_expression, const ASTPtr & primary_expression); bool appendWhere(ExpressionActionsChain & chain, bool only_types); bool appendGroupBy(ExpressionActionsChain & chain, bool only_types); void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 45edccf723..e0f5c7e56d 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -308,21 +308,21 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression const ExpressionActionsChain::Step & step = chain.steps.at(0); res.prewhere_info->remove_prewhere_column = step.can_remove_required_output.at(0); - Names columns_to_remove_after_sampling; + Names columns_to_remove; for (size_t i = 1; i < step.required_output.size(); ++i) { if (step.can_remove_required_output[i]) - columns_to_remove_after_sampling.push_back(step.required_output[i]); + columns_to_remove.push_back(step.required_output[i]); } - if (!columns_to_remove_after_sampling.empty()) + if (!columns_to_remove.empty()) { auto columns = res.prewhere_info->prewhere_actions->getSampleBlock().getNamesAndTypesList(); ExpressionActionsPtr actions = std::make_shared(columns, context); - for (const auto & column : columns_to_remove_after_sampling) + for (const auto & column : columns_to_remove) actions->add(ExpressionAction::removeColumn(column)); - res.prewhere_info->after_sampling_actions = std::move(actions); + res.prewhere_info->remove_columns_actions = std::move(actions); } } if (has_where) @@ -336,8 +336,9 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression { ExpressionActionsChain chain(context); - ASTPtr sampling_expression = storage && query.sample_size() ? storage->getSamplingExpression() : nullptr; - if (query_analyzer->appendPrewhere(chain, !res.first_stage, sampling_expression)) + ASTPtr sampling_expression = (storage && query.sample_size()) ? storage->getSamplingExpression() : nullptr; + ASTPtr primary_expression = (storage && query.final()) ? storage->getPrimaryExpression() : nullptr; + if (query_analyzer->appendPrewhere(chain, !res.first_stage, sampling_expression, primary_expression)) { has_prewhere = true; diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index d256a5628f..a0a6103558 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -346,6 +346,9 @@ public: /// Returns sampling expression for storage or nullptr if there is no. virtual ASTPtr getSamplingExpression() const { return nullptr; } + /// Returns primary expression for storage or nullptr if there is no. + virtual ASTPtr getPrimaryExpression() const { return nullptr; } + using ITableDeclaration::ITableDeclaration; using std::enable_shared_from_this::shared_from_this; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 3ae37c291d..438a660af6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -597,9 +597,9 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( stream = std::make_shared>( stream, std::make_shared(), used_sample_factor, "_sample_factor"); - if (query_info.prewhere_info && query_info.prewhere_info->after_sampling_actions) + if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions) for (auto & stream : res) - stream = std::make_shared(stream, query_info.prewhere_info->after_sampling_actions); + stream = std::make_shared(stream, query_info.prewhere_info->remove_columns_actions); return res; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp index e59124809d..f6fc0807c0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -430,10 +430,20 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar } else { + size_t num_rows = read_result.block.rows(); + if (!read_result.block) + { + if (auto * filter = read_result.getFilter()) + num_rows = countBytesInFilter(filter->getData()); /// All columns were removed and filter is not always true. + else if (read_result.totalRowsPerGranule()) + num_rows = read_result.numReadRows(); /// All columns were removed and filter is always true. + /// else filter is always false. + } + /// If block is empty, we still may need to add missing columns. /// In that case use number of rows in result block and don't filter block. - merge_tree_reader->fillMissingColumns(block, should_reorder, should_evaluate_missing_defaults, - read_result.numReadRows()); + if (num_rows) + merge_tree_reader->fillMissingColumns(block, should_reorder, should_evaluate_missing_defaults, num_rows); } for (auto i : ext::range(0, block.columns())) diff --git a/dbms/src/Storages/SelectQueryInfo.h b/dbms/src/Storages/SelectQueryInfo.h index d875e0cc7e..107a45b18c 100644 --- a/dbms/src/Storages/SelectQueryInfo.h +++ b/dbms/src/Storages/SelectQueryInfo.h @@ -25,8 +25,8 @@ struct PrewhereInfo ExpressionActionsPtr alias_actions; /// Actions which are executed on block in order to get filter column for prewhere step. ExpressionActionsPtr prewhere_actions; - /// Actions which are executed after sampling in order to remove unused columns. - ExpressionActionsPtr after_sampling_actions; + /// Actions which are executed after reading from storage in order to remove unused columns. + ExpressionActionsPtr remove_columns_actions; String prewhere_column_name; bool remove_prewhere_column = false; diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 007ef1de5e..0182f31dc8 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -96,6 +96,8 @@ public: ASTPtr getSamplingExpression() const override { return data.sampling_expression; } + ASTPtr getPrimaryExpression() const override { return data.primary_expr_ast; } + private: String path; String database_name; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 779e202fdf..491f30d93e 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -195,6 +195,8 @@ public: ASTPtr getSamplingExpression() const override { return data.sampling_expression; } + ASTPtr getPrimaryExpression() const override { return data.primary_expr_ast; } + private: /// Delete old parts from disk and from ZooKeeper. void clearOldPartsAndRemoveFromZK(); -- GitLab