diff --git a/src/Core/iostream_debug_helpers.cpp b/src/Core/iostream_debug_helpers.cpp index 08477770c53adc3586be9ee5c29bd55731740354..a6fc329c8ebf4de4acfceafdbe02a47a3bfc4464 100644 --- a/src/Core/iostream_debug_helpers.cpp +++ b/src/Core/iostream_debug_helpers.cpp @@ -118,7 +118,7 @@ std::ostream & operator<<(std::ostream & stream, const ExpressionActions & what) return stream; } -std::ostream & operator<<(std::ostream & stream, const SyntaxAnalyzerResult & what) +std::ostream & operator<<(std::ostream & stream, const TreeRewriterResult & what) { stream << "SyntaxAnalyzerResult{"; stream << "storage=" << what.storage << "; "; diff --git a/src/Core/iostream_debug_helpers.h b/src/Core/iostream_debug_helpers.h index 8abffd4fe58233de956a78687f9b534d877939ef..ef195ed4abfd50d4e8b1b2283c33498c3fa62de8 100644 --- a/src/Core/iostream_debug_helpers.h +++ b/src/Core/iostream_debug_helpers.h @@ -46,8 +46,8 @@ std::ostream & operator<<(std::ostream & stream, const ExpressionAction & what); class ExpressionActions; std::ostream & operator<<(std::ostream & stream, const ExpressionActions & what); -struct SyntaxAnalyzerResult; -std::ostream & operator<<(std::ostream & stream, const SyntaxAnalyzerResult & what); +struct TreeRewriterResult; +std::ostream & operator<<(std::ostream & stream, const TreeRewriterResult & what); } /// some operator<< should be declared before operator<<(... std::shared_ptr<>) diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index 65e01a73f950e2e3e978f8d0638a65aba90f025e..bc5270687d803cf3b6d756523edc4db248ea05a3 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -67,7 +67,7 @@ TTLBlockInputStream::TTLBlockInputStream( if (!default_expr_list->children.empty()) { - auto syntax_result = SyntaxAnalyzer(storage.global_context).analyze(default_expr_list, metadata_snapshot->getColumns().getAllPhysical()); + auto syntax_result = TreeRewriter(storage.global_context).analyze(default_expr_list, metadata_snapshot->getColumns().getAllPhysical()); defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true); } diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h index 73f3f80f3a995a42631a60a6d3f7100b52d3fff9..5d445335045bd74a547ec1a0032fdd62a97071d5 100644 --- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h +++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.h @@ -6,6 +6,8 @@ namespace DB { +class ASTFunction; + /// Extract constant arguments out of aggregate functions from child functions /// 'sum(a * 2)' -> 'sum(a) * 2' /// Rewrites: sum([multiply|divide]) -> [multiply|divide](sum) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index dd2e85eed964ad06ed5d8cfda79bd15473303e84..acbf6255fbaede2a49b125d3d97ebde7fcb22672 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -130,7 +130,7 @@ bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column) ExpressionAnalyzer::ExpressionAnalyzer( const ASTPtr & query_, - const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, + const TreeRewriterResultPtr & syntax_analyzer_result_, const Context & context_, size_t subquery_depth_, bool do_global) @@ -523,7 +523,7 @@ static JoinPtr tryGetStorageJoin(std::shared_ptr analyzed_join) static ExpressionActionsPtr createJoinedBlockActions(const Context & context, const TableJoin & analyzed_join) { ASTPtr expression_list = analyzed_join.rightKeysList(); - auto syntax_result = SyntaxAnalyzer(context).analyze(expression_list, analyzed_join.columnsFromJoinedTable()); + auto syntax_result = TreeRewriter(context).analyze(expression_list, analyzed_join.columnsFromJoinedTable()); return ExpressionAnalyzer(expression_list, syntax_result, context).getActions(true, false); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index bd099693a91490e924a6d7e4b48f94c8428be7bc..a37235f2f77be8bdf1d21d52bb58dea9b3718485 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -82,7 +82,7 @@ public: /// auto actions = ExpressionAnalyzer(query, syntax, context).getActions(); ExpressionAnalyzer( const ASTPtr & query_, - const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, + const TreeRewriterResultPtr & syntax_analyzer_result_, const Context & context_) : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, 0, false) {} @@ -112,7 +112,7 @@ public: protected: ExpressionAnalyzer( const ASTPtr & query_, - const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, + const TreeRewriterResultPtr & syntax_analyzer_result_, const Context & context_, size_t subquery_depth_, bool do_global_); @@ -122,7 +122,7 @@ protected: const ExtractedSettings settings; size_t subquery_depth; - SyntaxAnalyzerResultPtr syntax; + TreeRewriterResultPtr syntax; const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists. const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; } @@ -231,7 +231,7 @@ public: SelectQueryExpressionAnalyzer( const ASTPtr & query_, - const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, + const TreeRewriterResultPtr & syntax_analyzer_result_, const Context & context_, const StorageMetadataPtr & metadata_snapshot_, const NameSet & required_result_columns_ = {}, diff --git a/src/Interpreters/GroupByFunctionKeysVisitor.h b/src/Interpreters/GroupByFunctionKeysVisitor.h index afcf5a14118dda43925a097177f36cfafdc21c3d..04917b00c275c9e95ae632f0fa4ec302d59d86b1 100644 --- a/src/Interpreters/GroupByFunctionKeysVisitor.h +++ b/src/Interpreters/GroupByFunctionKeysVisitor.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 503807be0a7888da93493e01c7fe1418a6e5ceca..ebd6c1e8de430457edb24d1743c620b454f00f9e 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index dad3f82fbc4cdd1ec29ad8e0554a5fee546a1e01..c13ed22b11145261d80992352edd19b847a8e65a 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -131,7 +131,7 @@ String InterpreterSelectQuery::generateFilterActions( table_expr->children.push_back(table_expr->database_and_table_name); /// Using separate expression analyzer to prevent any possible alias injection - auto syntax_result = SyntaxAnalyzer(*context).analyzeSelect(query_ast, SyntaxAnalyzerResult({}, storage, metadata_snapshot)); + auto syntax_result = TreeRewriter(*context).analyzeSelect(query_ast, TreeRewriterResult({}, storage, metadata_snapshot)); SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, *context, metadata_snapshot); actions = analyzer.simpleSelectActions(); @@ -311,9 +311,9 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (view) view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot); - syntax_analyzer_result = SyntaxAnalyzer(*context).analyzeSelect( + syntax_analyzer_result = TreeRewriter(*context).analyzeSelect( query_ptr, - SyntaxAnalyzerResult(source_header.getNamesAndTypesList(), storage, metadata_snapshot), + TreeRewriterResult(source_header.getNamesAndTypesList(), storage, metadata_snapshot), options, joined_tables.tablesWithColumns(), required_result_column_names, table_join); /// Save scalar sub queries's results in the query context @@ -1194,7 +1194,7 @@ void InterpreterSelectQuery::executeFetchColumns( = ext::map(required_columns_after_prewhere, [](const auto & it) { return it.name; }); } - auto syntax_result = SyntaxAnalyzer(*context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, metadata_snapshot); + auto syntax_result = TreeRewriter(*context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, metadata_snapshot); alias_actions = ExpressionAnalyzer(required_columns_all_expr, syntax_result, *context).getActions(true); /// The set of required columns could be added as a result of adding an action to calculate ALIAS. @@ -1225,7 +1225,7 @@ void InterpreterSelectQuery::executeFetchColumns( prewhere_info->prewhere_actions = std::move(new_actions); auto analyzed_result - = SyntaxAnalyzer(*context).analyze(required_columns_from_prewhere_expr, metadata_snapshot->getColumns().getAllPhysical()); + = TreeRewriter(*context).analyze(required_columns_from_prewhere_expr, metadata_snapshot->getColumns().getAllPhysical()); prewhere_info->alias_actions = ExpressionAnalyzer(required_columns_from_prewhere_expr, analyzed_result, *context).getActions(true, false); diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 3ee0fdb44e1de9737d760ea41ea0e674bd25b780..ccf202fd52922b95ae593061a485e5c1626fa0b2 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -26,8 +26,8 @@ class InterpreterSelectWithUnionQuery; class Context; class QueryPlan; -struct SyntaxAnalyzerResult; -using SyntaxAnalyzerResultPtr = std::shared_ptr; +struct TreeRewriterResult; +using TreeRewriterResultPtr = std::shared_ptr; /** Interprets the SELECT query. Returns the stream of blocks with the results of the query before `to_stage` stage. @@ -161,7 +161,7 @@ private: SelectQueryOptions options; ASTPtr query_ptr; std::shared_ptr context; - SyntaxAnalyzerResultPtr syntax_analyzer_result; + TreeRewriterResultPtr syntax_analyzer_result; std::unique_ptr query_analyzer; SelectQueryInfo query_info; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 0e1295a635e8f38491eaac2b8b47dcee4bcbe720..94740ae0bd4a0368d6b862afb1083fa382987e94 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -321,7 +321,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) if (column.default_desc.kind == ColumnDefaultKind::Materialized) { auto query = column.default_desc.expression->clone(); - auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns); + auto syntax_result = TreeRewriter(context).analyze(query, all_columns); for (const String & dependency : syntax_result->requiredSourceColumns()) { if (updated_columns.count(dependency)) @@ -418,7 +418,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) throw Exception("Unknown index: " + command.index_name, ErrorCodes::BAD_ARGUMENTS); auto query = (*it).expression_list_ast->clone(); - auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns); + auto syntax_result = TreeRewriter(context).analyze(query, all_columns); const auto required_columns = syntax_result->requiredSourceColumns(); for (const auto & column : required_columns) dependencies.emplace(column, ColumnDependency::SKIP_INDEX); @@ -584,7 +584,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & for (const String & column : stage.output_columns) all_asts->children.push_back(std::make_shared(column)); - auto syntax_result = SyntaxAnalyzer(context).analyze(all_asts, all_columns); + auto syntax_result = TreeRewriter(context).analyze(all_asts, all_columns); if (context.hasQueryContext()) for (const auto & it : syntax_result->getScalars()) context.getQueryContext().addScalar(it.first, it.second); diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index a589d1d42965a8e939e41da34cff13ad08df539d..6fb7e72d40c301774a6b028ab295c83a8d2ccadb 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -41,7 +41,7 @@ class TableJoin * It's possible to use name `expr(t2 columns)`. */ - friend class SyntaxAnalyzer; + friend class TreeRewriter; const SizeLimits size_limits; const size_t default_max_bytes = 0; diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..97922aba3f0695fc8b4c1ba5b942eae867687d37 --- /dev/null +++ b/src/Interpreters/TreeOptimizer.cpp @@ -0,0 +1,561 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + +const std::unordered_set possibly_injective_function_names +{ + "dictGet", + "dictGetString", + "dictGetUInt8", + "dictGetUInt16", + "dictGetUInt32", + "dictGetUInt64", + "dictGetInt8", + "dictGetInt16", + "dictGetInt32", + "dictGetInt64", + "dictGetFloat32", + "dictGetFloat64", + "dictGetDate", + "dictGetDateTime" +}; + +/** You can not completely remove GROUP BY. Because if there were no aggregate functions, then it turns out that there will be no aggregation. + * Instead, leave `GROUP BY const`. + * Next, see deleting the constants in the analyzeAggregation method. + */ +void appendUnusedGroupByColumn(ASTSelectQuery * select_query, const NameSet & source_columns) +{ + /// You must insert a constant that is not the name of the column in the table. Such a case is rare, but it happens. + UInt64 unused_column = 0; + String unused_column_name = toString(unused_column); + + while (source_columns.count(unused_column_name)) + { + ++unused_column; + unused_column_name = toString(unused_column); + } + + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::make_shared()); + select_query->groupBy()->children.emplace_back(std::make_shared(UInt64(unused_column))); +} + +/// Eliminates injective function calls and constant expressions from group by statement. +void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) +{ + const FunctionFactory & function_factory = FunctionFactory::instance(); + + if (!select_query->groupBy()) + { + // If there is a HAVING clause without GROUP BY, make sure we have some aggregation happen. + if (select_query->having()) + appendUnusedGroupByColumn(select_query, source_columns); + return; + } + + const auto is_literal = [] (const ASTPtr & ast) -> bool + { + return ast->as(); + }; + + auto & group_exprs = select_query->groupBy()->children; + + /// removes expression at index idx by making it last one and calling .pop_back() + const auto remove_expr_at_index = [&group_exprs] (const size_t idx) + { + if (idx < group_exprs.size() - 1) + std::swap(group_exprs[idx], group_exprs.back()); + + group_exprs.pop_back(); + }; + + /// iterate over each GROUP BY expression, eliminate injective function calls and literals + for (size_t i = 0; i < group_exprs.size();) + { + if (const auto * function = group_exprs[i]->as()) + { + /// assert function is injective + if (possibly_injective_function_names.count(function->name)) + { + /// do not handle semantic errors here + if (function->arguments->children.size() < 2) + { + ++i; + continue; + } + + const auto * dict_name_ast = function->arguments->children[0]->as(); + const auto * attr_name_ast = function->arguments->children[1]->as(); + if (!dict_name_ast || !attr_name_ast) + { + ++i; + continue; + } + + const auto & dict_name = dict_name_ast->value.safeGet(); + const auto & attr_name = attr_name_ast->value.safeGet(); + + const auto & dict_ptr = context.getExternalDictionariesLoader().getDictionary(dict_name); + if (!dict_ptr->isInjective(attr_name)) + { + ++i; + continue; + } + } + else if (!function_factory.get(function->name, context)->isInjective(Block{})) + { + ++i; + continue; + } + + /// copy shared pointer to args in order to ensure lifetime + auto args_ast = function->arguments; + + /** remove function call and take a step back to ensure + * next iteration does not skip not yet processed data + */ + remove_expr_at_index(i); + + /// copy non-literal arguments + std::remove_copy_if( + std::begin(args_ast->children), std::end(args_ast->children), + std::back_inserter(group_exprs), is_literal + ); + } + else if (is_literal(group_exprs[i])) + { + remove_expr_at_index(i); + } + else + { + /// if neither a function nor literal - advance to next expression + ++i; + } + } + + if (group_exprs.empty()) + appendUnusedGroupByColumn(select_query, source_columns); +} + +struct GroupByKeysInfo +{ + std::unordered_set key_names; ///set of keys' short names + bool has_identifier = false; + bool has_function = false; + bool has_possible_collision = false; +}; + +GroupByKeysInfo getGroupByKeysInfo(ASTs & group_keys) +{ + GroupByKeysInfo data; + + ///filling set with short names of keys + for (auto & group_key : group_keys) + { + if (group_key->as()) + data.has_function = true; + + if (auto * group_key_ident = group_key->as()) + { + data.has_identifier = true; + if (data.key_names.count(group_key_ident->shortName())) + { + ///There may be a collision between different tables having similar variables. + ///Due to the fact that we can't track these conflicts yet, + ///it's better to disable some optimizations to avoid elimination necessary keys. + data.has_possible_collision = true; + } + + data.key_names.insert(group_key_ident->shortName()); + } + else if (auto * group_key_func = group_key->as()) + { + data.key_names.insert(group_key_func->getColumnName()); + } + else + { + data.key_names.insert(group_key->getColumnName()); + } + } + + return data; +} + +///eliminate functions of other GROUP BY keys +void optimizeGroupByFunctionKeys(ASTSelectQuery * select_query) +{ + if (!select_query->groupBy()) + return; + + auto grp_by = select_query->groupBy(); + auto & group_keys = grp_by->children; + + ASTs modified; ///result + + GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_keys); + + if (!group_by_keys_data.has_function || group_by_keys_data.has_possible_collision) + return; + + GroupByFunctionKeysVisitor::Data visitor_data{group_by_keys_data.key_names}; + GroupByFunctionKeysVisitor(visitor_data).visit(grp_by); + + modified.reserve(group_keys.size()); + + ///filling the result + for (auto & group_key : group_keys) + { + if (auto * group_key_func = group_key->as()) + { + if (group_by_keys_data.key_names.count(group_key_func->getColumnName())) + modified.push_back(group_key); + + continue; + } + if (auto * group_key_ident = group_key->as()) + { + if (group_by_keys_data.key_names.count(group_key_ident->shortName())) + modified.push_back(group_key); + + continue; + } + else + { + if (group_by_keys_data.key_names.count(group_key->getColumnName())) + modified.push_back(group_key); + } + } + + ///modifying the input + grp_by->children = modified; +} + +/// Eliminates min/max/any-aggregators of functions of GROUP BY keys +void optimizeAggregateFunctionsOfGroupByKeys(ASTSelectQuery * select_query) +{ + if (!select_query->groupBy()) + return; + + auto grp_by = select_query->groupBy(); + auto & group_keys = grp_by->children; + + GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_keys); + + auto select = select_query->select(); + + SelectAggregateFunctionOfGroupByKeysVisitor::Data visitor_data{group_by_keys_data.key_names}; + SelectAggregateFunctionOfGroupByKeysVisitor(visitor_data).visit(select); +} + +/// Remove duplicate items from ORDER BY. +void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query) +{ + if (!select_query->orderBy()) + return; + + /// Make unique sorting conditions. + using NameAndLocale = std::pair; + std::set elems_set; + + ASTs & elems = select_query->orderBy()->children; + ASTs unique_elems; + unique_elems.reserve(elems.size()); + + for (const auto & elem : elems) + { + String name = elem->children.front()->getColumnName(); + const auto & order_by_elem = elem->as(); + + if (elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second) + unique_elems.emplace_back(elem); + } + + if (unique_elems.size() < elems.size()) + elems = std::move(unique_elems); +} + +/// Optimize duplicate ORDER BY and DISTINCT +void optimizeDuplicateOrderByAndDistinct(ASTPtr & query, const Context & context) +{ + DuplicateOrderByVisitor::Data order_by_data{context}; + DuplicateOrderByVisitor(order_by_data).visit(query); + DuplicateDistinctVisitor::Data distinct_data{}; + DuplicateDistinctVisitor(distinct_data).visit(query); +} + +/// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression, +/// has a single argument and not an aggregate functions. +void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, const Context & context, + const TablesWithColumns & tables_with_columns) +{ + auto order_by = select_query->orderBy(); + if (!order_by) + return; + + std::unordered_set group_by_hashes; + if (auto group_by = select_query->groupBy()) + { + for (auto & elem : group_by->children) + { + auto hash = elem->getTreeHash(); + String key = toString(hash.first) + '_' + toString(hash.second); + group_by_hashes.insert(key); + } + } + + for (auto & child : order_by->children) + { + auto * order_by_element = child->as(); + auto & ast_func = order_by_element->children[0]; + if (!ast_func->as()) + continue; + + MonotonicityCheckVisitor::Data data{tables_with_columns, context, group_by_hashes}; + MonotonicityCheckVisitor(data).visit(ast_func); + + if (!data.isRejected()) + { + ast_func = data.identifier->clone(); + ast_func->setAlias(""); + if (!data.monotonicity.is_positive) + order_by_element->direction *= -1; + } + } +} + +/// If ORDER BY has argument x followed by f(x) transfroms it to ORDER BY x. +/// Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y +/// in case if f(), g(), h(), t() are deterministic (in scope of query). +/// Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x). +void optimizeRedundantFunctionsInOrderBy(const ASTSelectQuery * select_query, const Context & context) +{ + const auto & order_by = select_query->orderBy(); + if (!order_by) + return; + + std::unordered_set prev_keys; + ASTs modified; + modified.reserve(order_by->children.size()); + + for (auto & order_by_element : order_by->children) + { + /// Order by contains ASTOrderByElement as children and meaning item only as a grand child. + ASTPtr & name_or_function = order_by_element->children[0]; + + if (name_or_function->as()) + { + if (!prev_keys.empty()) + { + RedundantFunctionsInOrderByVisitor::Data data{prev_keys, context}; + RedundantFunctionsInOrderByVisitor(data).visit(name_or_function); + if (data.redundant) + continue; + } + } + + /// @note Leave duplicate keys unchanged. They would be removed in optimizeDuplicatesInOrderBy() + if (auto * identifier = name_or_function->as()) + prev_keys.emplace(getIdentifierName(identifier)); + + modified.push_back(order_by_element); + } + + if (modified.size() < order_by->children.size()) + order_by->children = std::move(modified); +} + +/// Remove duplicate items from LIMIT BY. +void optimizeLimitBy(const ASTSelectQuery * select_query) +{ + if (!select_query->limitBy()) + return; + + std::set elems_set; + + ASTs & elems = select_query->limitBy()->children; + ASTs unique_elems; + unique_elems.reserve(elems.size()); + + for (const auto & elem : elems) + { + if (elems_set.emplace(elem->getColumnName()).second) + unique_elems.emplace_back(elem); + } + + if (unique_elems.size() < elems.size()) + elems = std::move(unique_elems); +} + +/// Remove duplicated columns from USING(...). +void optimizeUsing(const ASTSelectQuery * select_query) +{ + if (!select_query->join()) + return; + + const auto * table_join = select_query->join()->table_join->as(); + if (!(table_join && table_join->using_expression_list)) + return; + + ASTs & expression_list = table_join->using_expression_list->children; + ASTs uniq_expressions_list; + + std::set expressions_names; + + for (const auto & expression : expression_list) + { + auto expression_name = expression->getAliasOrColumnName(); + if (expressions_names.find(expression_name) == expressions_names.end()) + { + uniq_expressions_list.push_back(expression); + expressions_names.insert(expression_name); + } + } + + if (uniq_expressions_list.size() < expression_list.size()) + expression_list = uniq_expressions_list; +} + +void optimizeAggregationFunctions(ASTPtr & query) +{ + /// Move arithmetic operations out of aggregation functions + ArithmeticOperationsInAgrFuncVisitor::Data data; + ArithmeticOperationsInAgrFuncVisitor(data).visit(query); +} + +void optimizeAnyInput(ASTPtr & query) +{ + /// Removing arithmetic operations from functions + AnyInputVisitor::Data data = {}; + AnyInputVisitor(data).visit(query); +} + +void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, const Context & context) +{ + RemoveInjectiveFunctionsVisitor::Data data = {context}; + RemoveInjectiveFunctionsVisitor(data).visit(query); +} + +void transformIfStringsIntoEnum(ASTPtr & query) +{ + std::unordered_set function_names = {"if", "transform"}; + std::unordered_set used_as_argument; + + FindUsedFunctionsVisitor::Data used_data{function_names, used_as_argument}; + FindUsedFunctionsVisitor(used_data).visit(query); + + ConvertStringsToEnumVisitor::Data convert_data{used_as_argument}; + ConvertStringsToEnumVisitor(convert_data).visit(query); +} + +} + +void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif) +{ + /// Optimize if with constant condition after constants was substituted instead of scalar subqueries. + OptimizeIfWithConstantConditionVisitor(aliases).visit(query); + + if (if_chain_to_multiif) + OptimizeIfChainsVisitor().visit(query); +} + +void TreeOptimizer::apply(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, + const std::vector & tables_with_columns, + const Context & context, bool & rewrite_subqueries) +{ + const auto & settings = context.getSettingsRef(); + + auto * select_query = query->as(); + if (!select_query) + throw Exception("Select analyze for not select asts.", ErrorCodes::LOGICAL_ERROR); + + optimizeIf(query, aliases, settings.optimize_if_chain_to_multiif); + + /// Move arithmetic operations out of aggregation functions + if (settings.optimize_arithmetic_operations_in_aggregate_functions) + optimizeAggregationFunctions(query); + + /// Push the predicate expression down to the subqueries. + rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_columns, settings).optimize(*select_query); + + /// GROUP BY injective function elimination. + optimizeGroupBy(select_query, source_columns_set, context); + + /// GROUP BY functions of other keys elimination. + if (settings.optimize_group_by_function_keys) + optimizeGroupByFunctionKeys(select_query); + + ///Move all operations out of any function + if (settings.optimize_move_functions_out_of_any) + optimizeAnyInput(query); + + /// Remove injective functions inside uniq + if (settings.optimize_injective_functions_inside_uniq) + optimizeInjectiveFunctionsInsideUniq(query, context); + + /// Eliminate min/max/any aggregators of functions of GROUP BY keys + if (settings.optimize_aggregators_of_group_by_keys) + optimizeAggregateFunctionsOfGroupByKeys(select_query); + + /// Remove duplicate items from ORDER BY. + optimizeDuplicatesInOrderBy(select_query); + + /// Remove duplicate ORDER BY and DISTINCT from subqueries. + if (settings.optimize_duplicate_order_by_and_distinct) + optimizeDuplicateOrderByAndDistinct(query, context); + + /// Remove functions from ORDER BY if its argument is also in ORDER BY + if (settings.optimize_redundant_functions_in_order_by) + optimizeRedundantFunctionsInOrderBy(select_query, context); + + /// Replace monotonous functions with its argument + if (settings.optimize_monotonous_functions_in_order_by) + optimizeMonotonousFunctionsInOrderBy(select_query, context, tables_with_columns); + + /// If function "if" has String-type arguments, transform them into enum + if (settings.optimize_if_transform_strings_to_enum) + transformIfStringsIntoEnum(query); + + /// Remove duplicated elements from LIMIT BY clause. + optimizeLimitBy(select_query); + + /// Remove duplicated columns from USING(...). + optimizeUsing(select_query); +} + +} diff --git a/src/Interpreters/TreeOptimizer.h b/src/Interpreters/TreeOptimizer.h new file mode 100644 index 0000000000000000000000000000000000000000..a81264184c167a23ab6a0ef18a7f950485495fbf --- /dev/null +++ b/src/Interpreters/TreeOptimizer.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class Context; + +/// Part of of Tree Rewriter (SyntaxAnalyzer) that optimizes AST. +/// Query should be ready to execute either before either after it. But resulting query could be faster. +class TreeOptimizer +{ +public: + static void apply(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, + const std::vector & tables_with_columns, + const Context & context, bool & rewrite_subqueries); + + static void optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif); +}; + +} diff --git a/src/Interpreters/SyntaxAnalyzer.cpp b/src/Interpreters/TreeRewriter.cpp similarity index 56% rename from src/Interpreters/SyntaxAnalyzer.cpp rename to src/Interpreters/TreeRewriter.cpp index 79a9f2b820709f18097cc9dbda4dc19ff2b0c9a1..3c8238bc1237d29353ad95df95984711d45d9f3d 100644 --- a/src/Interpreters/SyntaxAnalyzer.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -2,56 +2,35 @@ #include #include -#include +#include #include #include -#include #include #include #include #include #include #include -#include #include -#include -#include #include #include #include #include /// getSmallestColumn() #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include -#include -#include #include -#include #include #include -#include - #include #include #include #include -#include - namespace DB { @@ -250,461 +229,8 @@ void executeScalarSubqueries(ASTPtr & query, const Context & context, size_t sub ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); } -const std::unordered_set possibly_injective_function_names -{ - "dictGet", - "dictGetString", - "dictGetUInt8", - "dictGetUInt16", - "dictGetUInt32", - "dictGetUInt64", - "dictGetInt8", - "dictGetInt16", - "dictGetInt32", - "dictGetInt64", - "dictGetFloat32", - "dictGetFloat64", - "dictGetDate", - "dictGetDateTime" -}; - -/** You can not completely remove GROUP BY. Because if there were no aggregate functions, then it turns out that there will be no aggregation. - * Instead, leave `GROUP BY const`. - * Next, see deleting the constants in the analyzeAggregation method. - */ -void appendUnusedGroupByColumn(ASTSelectQuery * select_query, const NameSet & source_columns) -{ - /// You must insert a constant that is not the name of the column in the table. Such a case is rare, but it happens. - UInt64 unused_column = 0; - String unused_column_name = toString(unused_column); - - while (source_columns.count(unused_column_name)) - { - ++unused_column; - unused_column_name = toString(unused_column); - } - - select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::make_shared()); - select_query->groupBy()->children.emplace_back(std::make_shared(UInt64(unused_column))); -} - -/// Eliminates injective function calls and constant expressions from group by statement. -void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) -{ - const FunctionFactory & function_factory = FunctionFactory::instance(); - - if (!select_query->groupBy()) - { - // If there is a HAVING clause without GROUP BY, make sure we have some aggregation happen. - if (select_query->having()) - appendUnusedGroupByColumn(select_query, source_columns); - return; - } - - const auto is_literal = [] (const ASTPtr & ast) -> bool - { - return ast->as(); - }; - - auto & group_exprs = select_query->groupBy()->children; - - /// removes expression at index idx by making it last one and calling .pop_back() - const auto remove_expr_at_index = [&group_exprs] (const size_t idx) - { - if (idx < group_exprs.size() - 1) - std::swap(group_exprs[idx], group_exprs.back()); - - group_exprs.pop_back(); - }; - - /// iterate over each GROUP BY expression, eliminate injective function calls and literals - for (size_t i = 0; i < group_exprs.size();) - { - if (const auto * function = group_exprs[i]->as()) - { - /// assert function is injective - if (possibly_injective_function_names.count(function->name)) - { - /// do not handle semantic errors here - if (function->arguments->children.size() < 2) - { - ++i; - continue; - } - - const auto * dict_name_ast = function->arguments->children[0]->as(); - const auto * attr_name_ast = function->arguments->children[1]->as(); - if (!dict_name_ast || !attr_name_ast) - { - ++i; - continue; - } - - const auto & dict_name = dict_name_ast->value.safeGet(); - const auto & attr_name = attr_name_ast->value.safeGet(); - - const auto & dict_ptr = context.getExternalDictionariesLoader().getDictionary(dict_name); - if (!dict_ptr->isInjective(attr_name)) - { - ++i; - continue; - } - } - else if (!function_factory.get(function->name, context)->isInjective(Block{})) - { - ++i; - continue; - } - - /// copy shared pointer to args in order to ensure lifetime - auto args_ast = function->arguments; - - /** remove function call and take a step back to ensure - * next iteration does not skip not yet processed data - */ - remove_expr_at_index(i); - - /// copy non-literal arguments - std::remove_copy_if( - std::begin(args_ast->children), std::end(args_ast->children), - std::back_inserter(group_exprs), is_literal - ); - } - else if (is_literal(group_exprs[i])) - { - remove_expr_at_index(i); - } - else - { - /// if neither a function nor literal - advance to next expression - ++i; - } - } - - if (group_exprs.empty()) - appendUnusedGroupByColumn(select_query, source_columns); -} - -struct GroupByKeysInfo -{ - std::unordered_set key_names; ///set of keys' short names - bool has_identifier = false; - bool has_function = false; - bool has_possible_collision = false; -}; - -GroupByKeysInfo getGroupByKeysInfo(ASTs & group_keys) -{ - GroupByKeysInfo data; - - ///filling set with short names of keys - for (auto & group_key : group_keys) - { - if (group_key->as()) - data.has_function = true; - - if (auto * group_key_ident = group_key->as()) - { - data.has_identifier = true; - if (data.key_names.count(group_key_ident->shortName())) - { - ///There may be a collision between different tables having similar variables. - ///Due to the fact that we can't track these conflicts yet, - ///it's better to disable some optimizations to avoid elimination necessary keys. - data.has_possible_collision = true; - } - - data.key_names.insert(group_key_ident->shortName()); - } - else if (auto * group_key_func = group_key->as()) - { - data.key_names.insert(group_key_func->getColumnName()); - } - else - { - data.key_names.insert(group_key->getColumnName()); - } - } - - return data; -} - -///eliminate functions of other GROUP BY keys -void optimizeGroupByFunctionKeys(ASTSelectQuery * select_query) -{ - if (!select_query->groupBy()) - return; - - auto grp_by = select_query->groupBy(); - auto & group_keys = grp_by->children; - - ASTs modified; ///result - - GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_keys); - - if (!group_by_keys_data.has_function || group_by_keys_data.has_possible_collision) - return; - - GroupByFunctionKeysVisitor::Data visitor_data{group_by_keys_data.key_names}; - GroupByFunctionKeysVisitor(visitor_data).visit(grp_by); - - modified.reserve(group_keys.size()); - - ///filling the result - for (auto & group_key : group_keys) - { - if (auto * group_key_func = group_key->as()) - { - if (group_by_keys_data.key_names.count(group_key_func->getColumnName())) - modified.push_back(group_key); - - continue; - } - if (auto * group_key_ident = group_key->as()) - { - if (group_by_keys_data.key_names.count(group_key_ident->shortName())) - modified.push_back(group_key); - - continue; - } - else - { - if (group_by_keys_data.key_names.count(group_key->getColumnName())) - modified.push_back(group_key); - } - } - - ///modifying the input - grp_by->children = modified; -} - -/// Eliminates min/max/any-aggregators of functions of GROUP BY keys -void optimizeAggregateFunctionsOfGroupByKeys(ASTSelectQuery * select_query) -{ - if (!select_query->groupBy()) - return; - - auto grp_by = select_query->groupBy(); - auto & group_keys = grp_by->children; - - GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_keys); - - auto select = select_query->select(); - - SelectAggregateFunctionOfGroupByKeysVisitor::Data visitor_data{group_by_keys_data.key_names}; - SelectAggregateFunctionOfGroupByKeysVisitor(visitor_data).visit(select); -} - -/// Remove duplicate items from ORDER BY. -void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query) -{ - if (!select_query->orderBy()) - return; - - /// Make unique sorting conditions. - using NameAndLocale = std::pair; - std::set elems_set; - - ASTs & elems = select_query->orderBy()->children; - ASTs unique_elems; - unique_elems.reserve(elems.size()); - - for (const auto & elem : elems) - { - String name = elem->children.front()->getColumnName(); - const auto & order_by_elem = elem->as(); - - if (elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second) - unique_elems.emplace_back(elem); - } - - if (unique_elems.size() < elems.size()) - elems = std::move(unique_elems); -} - -/// Optimize duplicate ORDER BY and DISTINCT -void optimizeDuplicateOrderByAndDistinct(ASTPtr & query, const Context & context) -{ - DuplicateOrderByVisitor::Data order_by_data{context}; - DuplicateOrderByVisitor(order_by_data).visit(query); - DuplicateDistinctVisitor::Data distinct_data{}; - DuplicateDistinctVisitor(distinct_data).visit(query); -} - -/// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression, -/// has a single argument and not an aggregate functions. -void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, const Context & context, - const TablesWithColumns & tables_with_columns) -{ - auto order_by = select_query->orderBy(); - if (!order_by) - return; - - std::unordered_set group_by_hashes; - if (auto group_by = select_query->groupBy()) - { - for (auto & elem : group_by->children) - { - auto hash = elem->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); - group_by_hashes.insert(key); - } - } - - for (auto & child : order_by->children) - { - auto * order_by_element = child->as(); - auto & ast_func = order_by_element->children[0]; - if (!ast_func->as()) - continue; - - MonotonicityCheckVisitor::Data data{tables_with_columns, context, group_by_hashes}; - MonotonicityCheckVisitor(data).visit(ast_func); - - if (!data.isRejected()) - { - ast_func = data.identifier->clone(); - ast_func->setAlias(""); - if (!data.monotonicity.is_positive) - order_by_element->direction *= -1; - } - } -} - -/// If ORDER BY has argument x followed by f(x) transfroms it to ORDER BY x. -/// Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y -/// in case if f(), g(), h(), t() are deterministic (in scope of query). -/// Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x). -void optimizeRedundantFunctionsInOrderBy(const ASTSelectQuery * select_query, const Context & context) -{ - const auto & order_by = select_query->orderBy(); - if (!order_by) - return; - - std::unordered_set prev_keys; - ASTs modified; - modified.reserve(order_by->children.size()); - - for (auto & order_by_element : order_by->children) - { - /// Order by contains ASTOrderByElement as children and meaning item only as a grand child. - ASTPtr & name_or_function = order_by_element->children[0]; - - if (name_or_function->as()) - { - if (!prev_keys.empty()) - { - RedundantFunctionsInOrderByVisitor::Data data{prev_keys, context}; - RedundantFunctionsInOrderByVisitor(data).visit(name_or_function); - if (data.redundant) - continue; - } - } - - /// @note Leave duplicate keys unchanged. They would be removed in optimizeDuplicatesInOrderBy() - if (auto * identifier = name_or_function->as()) - prev_keys.emplace(getIdentifierName(identifier)); - - modified.push_back(order_by_element); - } - - if (modified.size() < order_by->children.size()) - order_by->children = std::move(modified); -} - -/// Remove duplicate items from LIMIT BY. -void optimizeLimitBy(const ASTSelectQuery * select_query) -{ - if (!select_query->limitBy()) - return; - - std::set elems_set; - - ASTs & elems = select_query->limitBy()->children; - ASTs unique_elems; - unique_elems.reserve(elems.size()); - for (const auto & elem : elems) - { - if (elems_set.emplace(elem->getColumnName()).second) - unique_elems.emplace_back(elem); - } - - if (unique_elems.size() < elems.size()) - elems = std::move(unique_elems); -} - -/// Remove duplicated columns from USING(...). -void optimizeUsing(const ASTSelectQuery * select_query) -{ - if (!select_query->join()) - return; - - const auto * table_join = select_query->join()->table_join->as(); - if (!(table_join && table_join->using_expression_list)) - return; - - ASTs & expression_list = table_join->using_expression_list->children; - ASTs uniq_expressions_list; - - std::set expressions_names; - - for (const auto & expression : expression_list) - { - auto expression_name = expression->getAliasOrColumnName(); - if (expressions_names.find(expression_name) == expressions_names.end()) - { - uniq_expressions_list.push_back(expression); - expressions_names.insert(expression_name); - } - } - - if (uniq_expressions_list.size() < expression_list.size()) - expression_list = uniq_expressions_list; -} - -void optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif) -{ - /// Optimize if with constant condition after constants was substituted instead of scalar subqueries. - OptimizeIfWithConstantConditionVisitor(aliases).visit(query); - - if (if_chain_to_multiif) - OptimizeIfChainsVisitor().visit(query); -} - -void optimizeAggregationFunctions(ASTPtr & query) -{ - /// Move arithmetic operations out of aggregation functions - ArithmeticOperationsInAgrFuncVisitor::Data data; - ArithmeticOperationsInAgrFuncVisitor(data).visit(query); -} - -void optimizeAnyInput(ASTPtr & query) -{ - /// Removing arithmetic operations from functions - AnyInputVisitor::Data data = {}; - AnyInputVisitor(data).visit(query); -} - -void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, const Context & context) -{ - RemoveInjectiveFunctionsVisitor::Data data = {context}; - RemoveInjectiveFunctionsVisitor(data).visit(query); -} - -void transformIfStringsIntoEnum(ASTPtr & query) -{ - std::unordered_set function_names = {"if", "transform"}; - std::unordered_set used_as_argument; - - FindUsedFunctionsVisitor::Data used_data{function_names, used_as_argument}; - FindUsedFunctionsVisitor(used_data).visit(query); - - ConvertStringsToEnumVisitor::Data convert_data{used_as_argument}; - ConvertStringsToEnumVisitor(convert_data).visit(query); -} - -void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query, +void getArrayJoinedColumns(ASTPtr & query, TreeRewriterResult & result, const ASTSelectQuery * select_query, const NamesAndTypesList & source_columns, const NameSet & source_columns_set) { if (ASTPtr array_join_expression_list = select_query->arrayJoinExpressionList()) @@ -842,7 +368,7 @@ std::vector getAggregates(ASTPtr & query, const ASTSelectQu /// Add columns from storage to source_columns list. Deduplicate resulted list. /// Special columns are non physical columns, for example ALIAS -void SyntaxAnalyzerResult::collectSourceColumns(bool add_special) +void TreeRewriterResult::collectSourceColumns(bool add_special) { if (storage) { @@ -862,7 +388,7 @@ void SyntaxAnalyzerResult::collectSourceColumns(bool add_special) /// Calculate which columns are required to execute the expression. /// Then, delete all other columns from the list of available columns. /// After execution, columns will only contain the list of columns needed to read from the table. -void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, bool is_select) +void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select) { /// We calculate required_source_columns with source_columns modifications and swap them on exit required_source_columns = source_columns; @@ -1027,9 +553,9 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, bool is_sele } -SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect( +TreeRewriterResultPtr TreeRewriter::analyzeSelect( ASTPtr & query, - SyntaxAnalyzerResult && result, + TreeRewriterResult && result, const SelectQueryOptions & select_options, const std::vector & tables_with_columns, const Names & required_result_columns, @@ -1080,67 +606,14 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect( /// Executing scalar subqueries - replacing them with constant values. executeScalarSubqueries(query, context, subquery_depth, result.scalars, select_options.only_analyze); - { - optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif); - - /// Move arithmetic operations out of aggregation functions - if (settings.optimize_arithmetic_operations_in_aggregate_functions) - optimizeAggregationFunctions(query); - - /// Push the predicate expression down to the subqueries. - result.rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_columns, settings).optimize(*select_query); + TreeOptimizer::apply(query, result.aliases, source_columns_set, tables_with_columns, context, result.rewrite_subqueries); - /// GROUP BY injective function elimination. - optimizeGroupBy(select_query, source_columns_set, context); + /// array_join_alias_to_name, array_join_result_to_source. + getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set); - /// GROUP BY functions of other keys elimination. - if (settings.optimize_group_by_function_keys) - optimizeGroupByFunctionKeys(select_query); - - ///Move all operations out of any function - if (settings.optimize_move_functions_out_of_any) - optimizeAnyInput(query); - - /// Remove injective functions inside uniq - if (settings.optimize_injective_functions_inside_uniq) - optimizeInjectiveFunctionsInsideUniq(query, context); - - /// Eliminate min/max/any aggregators of functions of GROUP BY keys - if (settings.optimize_aggregators_of_group_by_keys) - optimizeAggregateFunctionsOfGroupByKeys(select_query); - - /// Remove duplicate items from ORDER BY. - optimizeDuplicatesInOrderBy(select_query); - - /// Remove duplicate ORDER BY and DISTINCT from subqueries. - if (settings.optimize_duplicate_order_by_and_distinct) - optimizeDuplicateOrderByAndDistinct(query, context); - - /// Remove functions from ORDER BY if its argument is also in ORDER BY - if (settings.optimize_redundant_functions_in_order_by) - optimizeRedundantFunctionsInOrderBy(select_query, context); - - /// Replace monotonous functions with its argument - if (settings.optimize_monotonous_functions_in_order_by) - optimizeMonotonousFunctionsInOrderBy(select_query, context, tables_with_columns); - - /// If function "if" has String-type arguments, transform them into enum - if (settings.optimize_if_transform_strings_to_enum) - transformIfStringsIntoEnum(query); - - /// Remove duplicated elements from LIMIT BY clause. - optimizeLimitBy(select_query); - - /// Remove duplicated columns from USING(...). - optimizeUsing(select_query); - - /// array_join_alias_to_name, array_join_result_to_source. - getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set); - - setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, - result.analyzed_join->table_join); - collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); - } + setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, + result.analyzed_join->table_join); + collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); result.aggregates = getAggregates(query, *select_query); result.collectUsedColumns(query, true); @@ -1152,10 +625,10 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect( !select_query->sampleSize() && !select_query->sampleOffset() && !select_query->final() && (tables_with_columns.size() < 2 || isLeft(result.analyzed_join->kind())); - return std::make_shared(result); + return std::make_shared(result); } -SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( +TreeRewriterResultPtr TreeRewriter::analyze( ASTPtr & query, const NamesAndTypesList & source_columns, ConstStoragePtr storage, @@ -1167,14 +640,14 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( const auto & settings = context.getSettingsRef(); - SyntaxAnalyzerResult result(source_columns, storage, metadata_snapshot, false); + TreeRewriterResult result(source_columns, storage, metadata_snapshot, false); normalize(query, result.aliases, settings); /// Executing scalar subqueries. Column defaults could be a scalar subquery. executeScalarSubqueries(query, context, 0, result.scalars, false); - optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif); + TreeOptimizer::optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif); if (allow_aggregations) { @@ -1191,10 +664,10 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( assertNoAggregates(query, "in wrong place"); result.collectUsedColumns(query, false); - return std::make_shared(result); + return std::make_shared(result); } -void SyntaxAnalyzer::normalize(ASTPtr & query, Aliases & aliases, const Settings & settings) +void TreeRewriter::normalize(ASTPtr & query, Aliases & aliases, const Settings & settings) { CustomizeCountDistinctVisitor::Data data_count_distinct{settings.count_distinct_implementation}; CustomizeCountDistinctVisitor(data_count_distinct).visit(query); diff --git a/src/Interpreters/SyntaxAnalyzer.h b/src/Interpreters/TreeRewriter.h similarity index 87% rename from src/Interpreters/SyntaxAnalyzer.h rename to src/Interpreters/TreeRewriter.h index 2c86ce11c422f4562fc267d6d4b3e92829b239e9..83cfabe2ec41d251c41f18258e254ad5565576e3 100644 --- a/src/Interpreters/SyntaxAnalyzer.h +++ b/src/Interpreters/TreeRewriter.h @@ -20,7 +20,7 @@ using Scalars = std::map; struct StorageInMemoryMetadata; using StorageMetadataPtr = std::shared_ptr; -struct SyntaxAnalyzerResult +struct TreeRewriterResult { ConstStoragePtr storage; StorageMetadataPtr metadata_snapshot; @@ -56,7 +56,7 @@ struct SyntaxAnalyzerResult /// Results of scalar sub queries Scalars scalars; - SyntaxAnalyzerResult( + TreeRewriterResult( const NamesAndTypesList & source_columns_, ConstStoragePtr storage_ = {}, const StorageMetadataPtr & metadata_snapshot_ = {}, @@ -74,29 +74,26 @@ struct SyntaxAnalyzerResult const Scalars & getScalars() const { return scalars; } }; -using SyntaxAnalyzerResultPtr = std::shared_ptr; +using TreeRewriterResultPtr = std::shared_ptr; -/// AST syntax analysis. -/// Optimises AST tree and collect information for further expression analysis. +/// Tree Rewriter in terms of CMU slides @sa https://15721.courses.cs.cmu.edu/spring2020/slides/19-optimizer1.pdf +/// +/// Optimises AST tree and collect information for further expression analysis in ExpressionAnalyzer. /// Result AST has the following invariants: /// * all aliases are substituted /// * qualified names are translated /// * scalar subqueries are executed replaced with constants /// * unneeded columns are removed from SELECT clause /// * duplicated columns are removed from ORDER BY, LIMIT BY, USING(...). -/// Motivation: -/// * group most of the AST-changing operations in single place -/// * avoid AST rewriting in ExpressionAnalyzer -/// * decompose ExpressionAnalyzer -class SyntaxAnalyzer +class TreeRewriter { public: - SyntaxAnalyzer(const Context & context_) + TreeRewriter(const Context & context_) : context(context_) {} /// Analyze and rewrite not select query - SyntaxAnalyzerResultPtr analyze( + TreeRewriterResultPtr analyze( ASTPtr & query, const NamesAndTypesList & source_columns_, ConstStoragePtr storage = {}, @@ -104,9 +101,9 @@ public: bool allow_aggregations = false) const; /// Analyze and rewrite select query - SyntaxAnalyzerResultPtr analyzeSelect( + TreeRewriterResultPtr analyzeSelect( ASTPtr & query, - SyntaxAnalyzerResult && result, + TreeRewriterResult && result, const SelectQueryOptions & select_options = {}, const std::vector & tables_with_columns = {}, const Names & required_result_columns = {}, diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 417dccadb3d33f374600e5b927cece0c1f2dbddc..e1f53c72801f9d1ebcb15b86285670b197460b23 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -36,7 +36,7 @@ std::pair> evaluateConstantExpression(co ReplaceQueryParameterVisitor param_visitor(context.getQueryParameters()); param_visitor.visit(ast); String name = ast->getColumnName(); - auto syntax_result = SyntaxAnalyzer(context).analyze(ast, source_columns); + auto syntax_result = TreeRewriter(context).analyze(ast, source_columns); ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions(); /// There must be at least one column in the block so that it knows the number of rows. diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index 38f961306ff554943f403e52a51ec7e7355788ad..19495b0ffed882bbf3aafcf152753ca00fa9714b 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include @@ -98,7 +98,7 @@ void executeExpressionsOnBlock( if (!save_unneeded_columns) { - auto syntax_result = SyntaxAnalyzer(context).analyze(expr_list, block.getNamesAndTypesList()); + auto syntax_result = TreeRewriter(context).analyze(expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{expr_list, syntax_result, context}.getActions(true)->execute(block); return; } @@ -107,7 +107,7 @@ void executeExpressionsOnBlock( * we are going to operate on a copy instead of the original block */ Block copy_block{block}; - auto syntax_result = SyntaxAnalyzer(context).analyze(expr_list, block.getNamesAndTypesList()); + auto syntax_result = TreeRewriter(context).analyze(expr_list, block.getNamesAndTypesList()); auto expression_analyzer = ExpressionAnalyzer{expr_list, syntax_result, context}; auto required_source_columns = syntax_result->requiredSourceColumns(); auto rows_was = copy_block.rows(); diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index a175cd52ee4e45bc22ce57f0ef9ef36b9b3efa57..48be2fae1b9b8958d7f3734d2e806eef1e0499d7 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -134,7 +134,6 @@ SRCS( SortedBlocksWriter.cpp StorageID.cpp SubqueryForSet.cpp - SyntaxAnalyzer.cpp SystemLog.cpp TableJoin.cpp TablesStatus.cpp @@ -142,6 +141,8 @@ SRCS( ThreadStatusExt.cpp TraceLog.cpp TranslateQualifiedNamesVisitor.cpp + TreeOptimizer.cpp + TreeRewriter.cpp ) END() diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 1ae094f4ded374cf48c7876401f514394df11ac5..9514ca69e549d427dee2beaeaedd7a7a7f766a6f 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -303,7 +303,7 @@ ConstantExpressionTemplate::TemplateStructure::TemplateStructure(LiteralsInfo & addNodesToCastResult(result_type, expression, null_as_default); - auto syntax_result = SyntaxAnalyzer(context).analyze(expression, literals.getNamesAndTypesList()); + auto syntax_result = TreeRewriter(context).analyze(expression, literals.getNamesAndTypesList()); result_column_name = expression->getColumnName(); actions_on_literals = ExpressionAnalyzer(expression, syntax_result, context).getActions(false); } diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 8e0cd40f605675439c1a3088de21735d138904cf..03e8c8efd63d40e67dce8cf3e6a3832211a6c60b 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -853,7 +853,7 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con if (default_expression) { ASTPtr query = default_expression->clone(); - auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns.getAll()); + auto syntax_result = TreeRewriter(context).analyze(query, all_columns.getAll()); const auto actions = ExpressionAnalyzer(query, syntax_result, context).getActions(true); const auto required_columns = actions->getRequiredColumns(); diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index c1a5c1f77a024fb73c4c70e877a0779ca0a878a0..869cc5cdcdbcff587fec627d103a8c57d384f105 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include @@ -477,7 +477,7 @@ Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const N try { - auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, all_columns); + auto syntax_analyzer_result = TreeRewriter(context).analyze(default_expr_list, all_columns); const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true); for (const auto & action : actions->getActions()) if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN) diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index 3d4f528302a7927972d9d6abb5c1c31444d9000d..cff0b975a1df2ef5710d96698037a8af7dea24a9 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -45,10 +45,10 @@ ConstraintsExpressions ConstraintsDescription::getExpressions(const DB::Context res.reserve(constraints.size()); for (const auto & constraint : constraints) { - // SyntaxAnalyzer::analyze has query as non-const argument so to avoid accidental query changes we clone it + // TreeRewriter::analyze has query as non-const argument so to avoid accidental query changes we clone it auto * constraint_ptr = constraint->as(); ASTPtr expr = constraint_ptr->expr->clone(); - auto syntax_result = SyntaxAnalyzer(context).analyze(expr, source_columns_); + auto syntax_result = TreeRewriter(context).analyze(expr, source_columns_); res.push_back(ExpressionAnalyzer(constraint_ptr->expr->clone(), syntax_result, context).getActions(false)); } return res; diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index 69b5d50dcb6447b34692c895f81697a64031ec47..143d97cdc15ea5e6d2ced22ea79ca3f7e0f15f7e 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include @@ -90,7 +90,7 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast ASTPtr expr_list = extractKeyExpressionList(index_definition->expr->clone()); result.expression_list_ast = expr_list->clone(); - auto syntax = SyntaxAnalyzer(context).analyze(expr_list, columns.getAllPhysical()); + auto syntax = TreeRewriter(context).analyze(expr_list, columns.getAllPhysical()); result.expression = ExpressionAnalyzer(expr_list, syntax, context).getActions(true); Block block_without_columns = result.expression->getSampleBlock(); @@ -166,7 +166,7 @@ ExpressionActionsPtr IndicesDescription::getSingleExpressionForIndices(const Col for (const auto & index_expr : index.expression_list_ast->children) combined_expr_list->children.push_back(index_expr->clone()); - auto syntax_result = SyntaxAnalyzer(context).analyze(combined_expr_list, columns.getAllPhysical()); + auto syntax_result = TreeRewriter(context).analyze(combined_expr_list, columns.getAllPhysical()); return ExpressionAnalyzer(combined_expr_list, syntax_result, context).getActions(false); } diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 7d5b0d56008fb60778b81c4f9308e793883731e1..a3a192820d014716ad617d0b15a31c8b7ebed5a2 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -107,7 +107,7 @@ KeyDescription KeyDescription::getSortingKeyFromAST( { auto expr = result.expression_list_ast->clone(); - auto syntax_result = SyntaxAnalyzer(context).analyze(expr, columns.getAllPhysical()); + auto syntax_result = TreeRewriter(context).analyze(expr, columns.getAllPhysical()); /// In expression we also need to store source columns result.expression = ExpressionAnalyzer(expr, syntax_result, context).getActions(false); /// In sample block we use just key columns diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 79bbc0e7216c788936ff19cfdb46f2eb4efee1df..e1c5dcbfd8bf193cfd6c8aa419107be7d685bbec 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -348,7 +348,7 @@ inline bool Range::less(const Field & lhs, const Field & rhs) { return applyVisi * For index to work when something like "WHERE Date = toDate(now())" is written. */ Block KeyCondition::getBlockWithConstants( - const ASTPtr & query, const SyntaxAnalyzerResultPtr & syntax_analyzer_result, const Context & context) + const ASTPtr & query, const TreeRewriterResultPtr & syntax_analyzer_result, const Context & context) { Block result { diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index a37af2d677b5759e8a912d6f956af7c089c22879..219773b6d199d52ba9e076c997daeaf1505f63cf 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -302,7 +302,7 @@ public: const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type); static Block getBlockWithConstants( - const ASTPtr & query, const SyntaxAnalyzerResultPtr & syntax_analyzer_result, const Context & context); + const ASTPtr & query, const TreeRewriterResultPtr & syntax_analyzer_result, const Context & context); static std::optional applyMonotonicFunctionsChainToRange( Range key_range, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 49b010de3ba51bec2e0caad6d638a3191462d98a..c71172850ba558f04683323ecb0df261f2e02136 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -339,7 +339,7 @@ void MergeTreeData::checkProperties( if (!added_key_column_expr_list->children.empty()) { - auto syntax = SyntaxAnalyzer(global_context).analyze(added_key_column_expr_list, all_columns); + auto syntax = TreeRewriter(global_context).analyze(added_key_column_expr_list, all_columns); Names used_columns = syntax->requiredSourceColumns(); NamesAndTypesList deleted_columns; @@ -404,7 +404,7 @@ ExpressionActionsPtr getCombinedIndicesExpression( for (const auto & index_expr : index.expression_list_ast->children) combined_expr_list->children.push_back(index_expr->clone()); - auto syntax_result = SyntaxAnalyzer(context).analyze(combined_expr_list, columns.getAllPhysical()); + auto syntax_result = TreeRewriter(context).analyze(combined_expr_list, columns.getAllPhysical()); return ExpressionAnalyzer(combined_expr_list, syntax_result, context).getActions(false); } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 46e1b2b9cb6ea05e73101e7a6d9495b6322d6064..b59070ca0707cbf83056a617be6f8f2515d395c8 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -1562,7 +1562,7 @@ std::set MergeTreeDataMergerMutator::getIndicesToRecalculate( if (!indices_to_recalc.empty() && input_stream) { - auto indices_recalc_syntax = SyntaxAnalyzer(context).analyze(indices_recalc_expr_list, input_stream->getHeader().getNamesAndTypesList()); + auto indices_recalc_syntax = TreeRewriter(context).analyze(indices_recalc_expr_list, input_stream->getHeader().getNamesAndTypesList()); auto indices_recalc_expr = ExpressionAnalyzer( indices_recalc_expr_list, indices_recalc_syntax, context).getActions(false); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 306bcd9000afc30bfedcf6c9ce9e515c06a71248..8d084ed4c6c1251976f605ca34ea1611ad2dd1e1 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -518,7 +518,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( } ASTPtr query = filter_function; - auto syntax_result = SyntaxAnalyzer(context).analyze(query, available_real_columns); + auto syntax_result = TreeRewriter(context).analyze(query, available_real_columns); filter_expression = ExpressionAnalyzer(filter_function, syntax_result, context).getActions(false); if (!select.final()) @@ -651,7 +651,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( auto order_key_prefix_ast = metadata_snapshot->getSortingKey().expression_list_ast->clone(); order_key_prefix_ast->children.resize(prefix_size); - auto syntax_result = SyntaxAnalyzer(context).analyze(order_key_prefix_ast, metadata_snapshot->getColumns().getAllPhysical()); + auto syntax_result = TreeRewriter(context).analyze(order_key_prefix_ast, metadata_snapshot->getColumns().getAllPhysical()); auto sorting_key_prefix_expr = ExpressionAnalyzer(order_key_prefix_ast, syntax_result, context).getActions(false); res = spreadMarkRangesAmongStreamsWithOrder( diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index e9fb0c335948453ee1ba73b7af8ce20612a00c9c..421d0e8b38c07ebe643f4b66cdedf48c3266b442 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index c49ead0d82d8035777b76f683cee288b3ab6ecc3..50163d892e28a1d3deea5968700e60507d2b041c 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 44973a984a3ed769b43c68c8169dee2d5c2a632c..de89a27ab460aeb5d277e6da240ab8b6cbd3e0de 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 221be222fd22f90c9c21d8e703cd81b7c60a04e0..3d0c0327da5a8958fffa34b1623940b98b93925e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -265,7 +265,7 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( /// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h). traverseAST(expression_ast); - auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze( + auto syntax_analyzer_result = TreeRewriter(context).analyze( expression_ast, index_sample_block.getNamesAndTypesList()); actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); } diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index bc220bc33ce7ac5bf4bb5d2b4cf00cc1076e5133..37f07ad18767a287097527601dcd38a5a115aca1 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes ReadInOrderOptimizer::ReadInOrderOptimizer( const ManyExpressionActions & elements_actions_, const SortDescription & required_sort_description_, - const SyntaxAnalyzerResultPtr & syntax_result) + const TreeRewriterResultPtr & syntax_result) : elements_actions(elements_actions_) , required_sort_description(required_sort_description_) { diff --git a/src/Storages/ReadInOrderOptimizer.h b/src/Storages/ReadInOrderOptimizer.h index f2a3e448f50f3bb7143338fd2d1f8e829df690dd..3a16a10f89b05bf72a1303dfd765c035aa59cac7 100644 --- a/src/Storages/ReadInOrderOptimizer.h +++ b/src/Storages/ReadInOrderOptimizer.h @@ -18,7 +18,7 @@ public: ReadInOrderOptimizer( const ManyExpressionActions & elements_actions, const SortDescription & required_sort_description, - const SyntaxAnalyzerResultPtr & syntax_result); + const TreeRewriterResultPtr & syntax_result); InputOrderInfoPtr getInputOrder(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 26b318f107bd105dfe6eb3edc858817f3d832576..9a5d0cc63384965814193a57c8b252ecec0bb449 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -57,8 +57,8 @@ using PrewhereInfoPtr = std::shared_ptr; using FilterInfoPtr = std::shared_ptr; using InputOrderInfoPtr = std::shared_ptr; -struct SyntaxAnalyzerResult; -using SyntaxAnalyzerResultPtr = std::shared_ptr; +struct TreeRewriterResult; +using TreeRewriterResultPtr = std::shared_ptr; class ReadInOrderOptimizer; using ReadInOrderOptimizerPtr = std::shared_ptr; @@ -73,7 +73,7 @@ struct SelectQueryInfo ASTPtr query; ASTPtr view_query; /// Optimized VIEW query - SyntaxAnalyzerResultPtr syntax_analyzer_result; + TreeRewriterResultPtr syntax_analyzer_result; PrewhereInfoPtr prewhere_info; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index ab47eb6f8e0f61b1343f4df854d8bfb780b12c07..d340a834973cff01d939313907733be687615df6 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include @@ -185,7 +185,7 @@ std::string makeFormattedListOfShards(const ClusterPtr & cluster) ExpressionActionsPtr buildShardingKeyExpression(const ASTPtr & sharding_key, const Context & context, const NamesAndTypesList & columns, bool project) { ASTPtr query = sharding_key; - auto syntax_result = SyntaxAnalyzer(context).analyze(query, columns); + auto syntax_result = TreeRewriter(context).analyze(query, columns); return ExpressionAnalyzer(query, syntax_result, context).getActions(project); } @@ -235,7 +235,7 @@ void replaceConstantExpressions( ConstStoragePtr storage, const StorageMetadataPtr & metadata_snapshot) { - auto syntax_result = SyntaxAnalyzer(context).analyze(node, columns, storage, metadata_snapshot); + auto syntax_result = TreeRewriter(context).analyze(node, columns, storage, metadata_snapshot); Block block_with_constants = KeyCondition::getBlockWithConstants(node, syntax_result, context); InDepthNodeVisitor visitor(block_with_constants); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 773febe61df962b91f4753d63c7b0126cb9f72f3..06aef21542fc6bd5842574f0e4e3a8d6f3c0f559 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -472,7 +472,7 @@ void StorageMerge::convertingSourceStream( NamesAndTypesList source_columns = metadata_snapshot->getSampleBlock().getNamesAndTypesList(); auto virtual_column = *getVirtuals().tryGetByName("_table"); source_columns.emplace_back(NameAndTypePair{virtual_column.name, virtual_column.type}); - auto syntax_result = SyntaxAnalyzer(context).analyze(where_expression, source_columns); + auto syntax_result = TreeRewriter(context).analyze(where_expression, source_columns); ExpressionActionsPtr actions = ExpressionAnalyzer{where_expression, syntax_result, context}.getActions(false, false); Names required_columns = actions->getRequiredColumns(); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 2c29958250c6d269650f940d102366279ae9ce7d..4c9da0952780319474ee75567a9a1a578b11eb35 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -143,7 +143,7 @@ TTLDescription TTLDescription::getTTLFromAST( result.expression_ast = definition_ast->clone(); auto ttl_ast = result.expression_ast->clone(); - auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(ttl_ast, columns.getAllPhysical()); + auto syntax_analyzer_result = TreeRewriter(context).analyze(ttl_ast, columns.getAllPhysical()); result.expression = ExpressionAnalyzer(ttl_ast, syntax_analyzer_result, context).getActions(false); result.result_column = ttl_ast->getColumnName(); @@ -162,7 +162,7 @@ TTLDescription TTLDescription::getTTLFromAST( { if (ASTPtr where_expr_ast = ttl_element->where()) { - auto where_syntax_result = SyntaxAnalyzer(context).analyze(where_expr_ast, columns.getAllPhysical()); + auto where_syntax_result = TreeRewriter(context).analyze(where_expr_ast, columns.getAllPhysical()); result.where_expression = ExpressionAnalyzer(where_expr_ast, where_syntax_result, context).getActions(false); result.where_result_column = where_expr_ast->getColumnName(); } @@ -220,7 +220,7 @@ TTLDescription TTLDescription::getTTLFromAST( if (value->as()) { - auto syntax_result = SyntaxAnalyzer(context).analyze(value, columns.getAllPhysical(), {}, {}, true); + auto syntax_result = TreeRewriter(context).analyze(value, columns.getAllPhysical(), {}, {}, true); auto expr_actions = ExpressionAnalyzer(value, syntax_result, context).getActions(false); for (const auto & column : expr_actions->getRequiredColumns()) { @@ -249,7 +249,7 @@ TTLDescription TTLDescription::getTTLFromAST( for (auto [name, value] : aggregations) { - auto syntax_result = SyntaxAnalyzer(context).analyze(value, columns.getAllPhysical(), {}, {}, true); + auto syntax_result = TreeRewriter(context).analyze(value, columns.getAllPhysical(), {}, {}, true); auto expr_analyzer = ExpressionAnalyzer(value, syntax_result, context); TTLAggregateDescription set_part; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index f0718a014b809d26df9e4fd2981733be430590dd..6b99dc25e377c453f3399b835f666c7957bc870c 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include #include @@ -118,7 +118,7 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c return; /// Let's analyze and calculate the expression. - auto syntax_result = SyntaxAnalyzer(context).analyze(expression_ast, block.getNamesAndTypesList()); + auto syntax_result = TreeRewriter(context).analyze(expression_ast, block.getNamesAndTypesList()); ExpressionAnalyzer analyzer(expression_ast, syntax_result, context); ExpressionActionsPtr actions = analyzer.getActions(false); diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index bd7d7d5d1b8f6d5c11692e32747c8997eb31a0c1..1a63132ee0c9b6434ffe8c4d10b41d623247445b 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -56,7 +56,7 @@ static void check(const std::string & query, const std::string & expected, const ParserSelectQuery parser; ASTPtr ast = parseQuery(parser, query, 1000, 1000); SelectQueryInfo query_info; - query_info.syntax_analyzer_result = SyntaxAnalyzer(context).analyzeSelect(ast, columns); + query_info.syntax_analyzer_result = TreeRewriter(context).analyzeSelect(ast, columns); query_info.query = ast; std::string transformed_query = transformQueryForExternalDatabase(query_info, columns, IdentifierQuotingStyle::DoubleQuotes, "test", "table", context); diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 0d34f0b3068fb762c074412c9c3c62572b9024eb..2556cd1064873dd993bb5aa3b0945621f6ee4f0a 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -91,7 +91,7 @@ public: void replaceConstantExpressions(ASTPtr & node, const Context & context, const NamesAndTypesList & all_columns) { - auto syntax_result = SyntaxAnalyzer(context).analyze(node, all_columns); + auto syntax_result = TreeRewriter(context).analyze(node, all_columns); Block block_with_constants = KeyCondition::getBlockWithConstants(node, syntax_result, context); InDepthNodeVisitor visitor(block_with_constants);