diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index c7810666da83a37630d86d37e680388202c9b726..ba9dca82822745085577663fae6b531883781172 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1486,7 +1486,7 @@ void ExpressionAnalyzer::tryMakeSetFromSubquery(const ASTPtr & subquery_or_table return; } - prepared_sets[subquery_or_table_name.get()] = std::move(set); + prepared_sets[subquery_or_table_name->range] = std::move(set); } @@ -1515,7 +1515,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & { const ASTPtr & arg = args.children.at(1); - if (!prepared_sets.count(arg.get())) /// Not already prepared. + if (!prepared_sets.count(arg->range)) /// Not already prepared. { if (typeid_cast(arg.get()) || typeid_cast(arg.get())) { @@ -1550,7 +1550,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ const ASTPtr & arg = args.children.at(1); /// Already converted. - if (prepared_sets.count(arg.get())) + if (prepared_sets.count(arg->range)) return; /// If the subquery or table name for SELECT. @@ -1573,7 +1573,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ if (storage_set) { - prepared_sets[arg.get()] = storage_set->getSet(); + prepared_sets[arg->range] = storage_set->getSet(); return; } } @@ -1584,7 +1584,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ /// If you already created a Set with the same subquery / table. if (subquery_for_set.set) { - prepared_sets[arg.get()] = subquery_for_set.set; + prepared_sets[arg->range] = subquery_for_set.set; return; } @@ -1630,7 +1630,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ } subquery_for_set.set = set; - prepared_sets[arg.get()] = set; + prepared_sets[arg->range] = set; } else { @@ -1712,7 +1712,7 @@ void ExpressionAnalyzer::makeExplicitSet(const ASTFunction * node, const Block & SetPtr set = std::make_shared(SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode)); set->createFromAST(set_element_types, elements_ast, context, create_ordered_set); - prepared_sets[right_arg.get()] = std::move(set); + prepared_sets[right_arg->range] = std::move(set); } @@ -2102,12 +2102,12 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, /// Select the name in the next cycle. argument_names.emplace_back(); } - else if (prepared_sets.count(child.get()) && functionIsInOrGlobalInOperator(node->name) && arg == 1) + else if (prepared_sets.count(child->range) && functionIsInOrGlobalInOperator(node->name) && arg == 1) { ColumnWithTypeAndName column; column.type = std::make_shared(); - const SetPtr & set = prepared_sets[child.get()]; + const SetPtr & set = prepared_sets[child->range]; /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, /// so that sets with the same literal representation do not fuse together (they can have different types). diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index d118e06334dadc1f60641b0c2ac311c7295eb45b..b8a0bd5a17b2e10a5d16068654832449fde12892 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -3,9 +3,9 @@ #include #include #include -#include "ExpressionActions.h" -#include "ProjectionManipulation.h" - +#include +#include +#include namespace DB { @@ -23,7 +23,7 @@ using ASTPtr = std::shared_ptr; class Set; using SetPtr = std::shared_ptr; -using PreparedSets = std::unordered_map; +using PreparedSets = std::unordered_map; class IBlockInputStream; using BlockInputStreamPtr = std::shared_ptr; diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index 9c4f33ebf4912daffcd529244df8cbca3df7caec..1558ebb66816d0c8d039241335e2b3d73271aaec 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -65,7 +65,7 @@ public: ASTPtr ptr() { return shared_from_this(); } - /** Get a deep copy of the tree. */ + /** Get a deep copy of the tree. Cloned object must have the same range. */ virtual ASTPtr clone() const = 0; /** Get hash code, identifying this element and its subtree. diff --git a/dbms/src/Parsers/StringRange.h b/dbms/src/Parsers/StringRange.h index b919a899293317b47bb1d53422f0144b39048b46..3de66f2cd009f75c56806dead09ddec849756df1 100644 --- a/dbms/src/Parsers/StringRange.h +++ b/dbms/src/Parsers/StringRange.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -14,9 +15,10 @@ struct StringRange const char * first = nullptr; const char * second = nullptr; - StringRange() {} + StringRange() = default; + StringRange(const StringRange & other) = default; StringRange(const char * begin, const char * end) : first(begin), second(end) {} - StringRange(TokenIterator token) : first(token->begin), second(token->end) {} + explicit StringRange(TokenIterator token) : first(token->begin), second(token->end) {} StringRange(TokenIterator token_begin, TokenIterator token_end) { @@ -34,6 +36,8 @@ struct StringRange first = token_begin->begin; second = token_last->end; } + + bool operator==(const StringRange & rhs) const { return std::tie(first, second) == std::tie(rhs.first, rhs.second); } }; using StringPtr = std::shared_ptr; @@ -44,4 +48,16 @@ inline String toString(const StringRange & range) return range.first ? String(range.first, range.second) : String(); } +struct StringRangeHash +{ + UInt64 operator()(const StringRange & range) const + { + SipHash hash; + hash.update(range.first); + hash.update(range.second); + return hash.get64(); + } +}; + } + diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index ffed82ad773c1be4a8331e3f70663dd60a53b537..bc618f8d6550eaf0e763d31d489ae3d14a0cfe4d 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -641,8 +641,8 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo bool is_set_const = false; bool is_constant_transformed = false; - if (prepared_sets.count(args[1].get()) - && isTupleIndexable(args[0], context, out, prepared_sets[args[1].get()], key_column_num)) + if (prepared_sets.count(args[1]->range) + && isTupleIndexable(args[0], context, out, prepared_sets[args[1]->range], key_column_num)) { key_arg_pos = 0; is_set_const = true; @@ -1016,7 +1016,7 @@ bool KeyCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, c { auto in_func = typeid_cast(element.in_function.get()); const ASTs & args = typeid_cast(*in_func->arguments).children; - PreparedSets::const_iterator it = prepared_sets.find(args[1].get()); + PreparedSets::const_iterator it = prepared_sets.find(args[1]->range); if (in_func && it != prepared_sets.end()) { rpn_stack.emplace_back(element.set_index->mayBeTrueInRange(key_ranges, data_types)); diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 3b35c127511eb7ca2a225196c64dcefc84cb1d3c..b26b4f9bdcd0801d505faac0d4a7c00f900b9d68 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -334,7 +334,7 @@ bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const IAST * const ast) const if ((primary_key_columns.count(first_arg_name) && isConstant(args[1])) || (primary_key_columns.count(second_arg_name) && isConstant(args[0])) || (primary_key_columns.count(first_arg_name) - && (prepared_sets.count(args[1].get()) || typeid_cast(args[1].get())))) + && (prepared_sets.count(args[1]->range) || typeid_cast(args[1].get())))) return true; } diff --git a/dbms/src/Storages/SelectQueryInfo.h b/dbms/src/Storages/SelectQueryInfo.h index 5443434fd40d26226b6eadff4db0e895318f8215..608ebde5301a3715d8110e804594ad710ca7510f 100644 --- a/dbms/src/Storages/SelectQueryInfo.h +++ b/dbms/src/Storages/SelectQueryInfo.h @@ -2,7 +2,7 @@ #include #include - +#include namespace DB { @@ -14,7 +14,7 @@ class Set; using SetPtr = std::shared_ptr; /// Information about calculated sets in right hand side of IN. -using PreparedSets = std::unordered_map; +using PreparedSets = std::unordered_map; /** Query along with some additional data, diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index c0f264e7f85e66aa3c7d8cbd39689a5e881765e8..93254187ec9002bdcfe9db2049d9dd4686b926b9 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -84,33 +84,6 @@ bool StorageMerge::isRemote() const } -namespace -{ - using NodeHashToSet = std::map; - - void relinkSetsImpl(const ASTPtr & query, const NodeHashToSet & node_hash_to_set, PreparedSets & new_sets) - { - auto hash = query->getTreeHash(); - auto it = node_hash_to_set.find(hash); - if (node_hash_to_set.end() != it) - new_sets[query.get()] = it->second; - - for (const auto & child : query->children) - relinkSetsImpl(child, node_hash_to_set, new_sets); - } - - /// Re-link prepared sets onto cloned and modified AST. - void relinkSets(const ASTPtr & query, const PreparedSets & old_sets, PreparedSets & new_sets) - { - NodeHashToSet node_hash_to_set; - for (const auto & node_set : old_sets) - node_hash_to_set.emplace(node_set.first->getTreeHash(), node_set.second); - - relinkSetsImpl(query, node_hash_to_set, new_sets); - } -} - - bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) const { /// It's beneficial if it is true for at least one table. @@ -211,8 +184,6 @@ BlockInputStreams StorageMerge::read( SelectQueryInfo modified_query_info; modified_query_info.query = modified_query_ast; - relinkSets(modified_query_info.query, query_info.sets, modified_query_info.sets); - BlockInputStreams source_streams; if (curr_table_number < num_streams)