diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 4fffb34e6f04fe835448bc63e6d0ca1a4abfa810..8053bf029870919c3761be6ab1a3d38323e0d887 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1542,7 +1542,7 @@ void ExpressionAnalyzer::tryMakeSetFromSubquery(const ASTPtr & subquery_or_table return; } - set->makeOrderedSet(); + set->finalizeOrderedSet(); prepared_sets[subquery_or_table_name.get()] = std::move(set); } @@ -1563,7 +1563,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & { if (typeid_cast(arg.get()) || typeid_cast(arg.get())) { - if (settings.try_primary_key_for_in_with_subqueries && storage->mayBenefitFromIndexForIn(args.children.at(0))) + if (settings.use_index_for_in_with_subqueries && storage->mayBenefitFromIndexForIn(args.children.at(0))) tryMakeSetFromSubquery(arg); } else diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 319743a318791e19f2963ff52b314b3c8c08e244..1090d3a7fec961085a6330555a7a8e02b725091c 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -313,9 +313,10 @@ private: */ void makeExplicitSet(const ASTFunction * node, const Block & sample_block, bool create_ordered_set); - /*** - * Create Set from a subuqery or a table expression in the query. - */ + /** + * Create Set from a subuqery or a table expression in the query. The created set is suitable for using the index. + * The set will not be created if its size hits the limit. + */ void tryMakeSetFromSubquery(const ASTPtr & subquery_or_table_name); void makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block); diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 726cec6ebf90a3f9eabd74b198448e10a565a371..b3e136da7eea71cc6753added1b06510e7819bc5 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -209,7 +209,7 @@ bool Set::insertFromBlock(const Block & block, bool create_ordered_set) } -void Set::makeOrderedSet() +void Set::finalizeOrderedSet() { if (!ordered_set_elements) { @@ -301,7 +301,7 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co insertFromBlock(block, create_ordered_set); if (create_ordered_set) - makeOrderedSet(); + finalizeOrderedSet(); } diff --git a/dbms/src/Interpreters/Set.h b/dbms/src/Interpreters/Set.h index fe1b003e713bfd894c6e152d3e1c3d5778393061..3c5811855feb1a0808560f9a603c8586d1e482a3 100644 --- a/dbms/src/Interpreters/Set.h +++ b/dbms/src/Interpreters/Set.h @@ -41,11 +41,11 @@ public: /** Returns false, if some limit was exceeded and no need to insert more data. * - * Note that if create_ordered_set = true, you must call the method makeOrderedSet() after all required blocks + * Note that if create_ordered_set = true, you must call the method finalizeOrderedSet() after all required blocks * have been inserted. */ bool insertFromBlock(const Block & block, bool create_ordered_set = false); - void makeOrderedSet(); + void finalizeOrderedSet(); /** For columns of 'block', check belonging of corresponding rows to the set. * Return UInt8 column with the result. diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 02dd12448f92e9802370345fb68ada2da75289b0..914f5b330b6697cb4b0197dcb72a9bc0e22b976d 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -306,8 +306,8 @@ struct Settings M(SettingSeconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP send timeout") \ M(SettingSeconds, http_receive_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP receive timeout") \ M(SettingBool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown") \ - /** Try using the primary key if there is a subquery or a table expression on the right side of the IN operator. */ \ - M(SettingBool, try_primary_key_for_in_with_subqueries, true, "Try using the primary key if there is a subquery or a table expression on the right side of the IN operator.") + /** Try using an index if there is a subquery or a table expression on the right side of the IN operator. */ \ + M(SettingBool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.") /// Possible limits for query execution. diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 54b9979b8d8f09204f6aeb3c67c2ff6bc3e4697a..14c48d489caaea1010c060deec49c48fec8958da 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -277,7 +277,7 @@ public: /// Does table support index for IN sections virtual bool supportsIndexForIn() const { return false; } - /// Provides a hint that the storage engine may evaluate the IN-condition by using the index. + /// Provides a hint that the storage engine may evaluate the IN-condition by using an index. virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */) const { return false; } /// Checks validity of the data diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index f6c06ff8fe45e1c01e784d4c0b070958d522b509..c3afacf499e77f7518b6b07e096a27778b6bd368 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2159,7 +2159,7 @@ void MergeTreeData::Transaction::replaceParts(MergeTreeData::DataPartState move_ } } -bool MergeTreeData::isIndexColumn(const ASTPtr & node) const +bool MergeTreeData::isPrimaryKeyColumn(const ASTPtr &node) const { String column_name = node->getColumnName(); @@ -2172,21 +2172,22 @@ bool MergeTreeData::isIndexColumn(const ASTPtr & node) const bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) const { - /// Make sure that the left side of the IN operator is part of the primary key. If there is a tuple on the left side - /// of the IN operator, each item of the tuple must be part of the primary key. + /// Make sure that the left side of the IN operator is part of the primary key. + /// If there is a tuple on the left side of the IN operator, each item of the tuple must be part of the primary key. const ASTFunction * left_in_operand_tuple = typeid_cast(left_in_operand.get()); if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple") { for (const auto & item : left_in_operand_tuple->arguments->children) - if (!isIndexColumn(item)) - return false; + if (!isPrimaryKeyColumn(item)) + /// The tuple itself may be part of the primary key, so check that as a last resort. + return isPrimaryKeyColumn(left_in_operand); /// tuple() is invalid but can still be found here since this method may be called before the arguments are validated. - return left_in_operand_tuple->arguments->children.size() != 0; + return !left_in_operand_tuple->arguments->children.empty(); } else { - return isIndexColumn(left_in_operand); + return isPrimaryKeyColumn(left_in_operand); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index c9eb12155406d11424f5792dbcd02ded0c57ad48..0a468f99f25810b63fa0f0f3fb595467c3bf41b7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -666,7 +666,7 @@ private: DataPartPtr getAnyPartInPartition(const String & partition_id, std::unique_lock & data_parts_lock); /// Checks whether the column is in the primary key. - bool isIndexColumn(const ASTPtr & node) const; + bool isPrimaryKeyColumn(const ASTPtr &node) const; }; }