From 52b73ad9b90e0fcbbef7a2633fe380407d8df187 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 23 Nov 2020 22:04:51 +0300 Subject: [PATCH] Backport #16993 to 20.12: Fix Merge(Distributed()) with JOIN --- src/Storages/StorageMerge.cpp | 12 ++++++++++++ .../01560_merge_distributed_join.reference | 0 .../01560_merge_distributed_join.sql | 19 +++++++++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 tests/queries/0_stateless/01560_merge_distributed_join.reference create mode 100644 tests/queries/0_stateless/01560_merge_distributed_join.sql diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 5779d822fa..f15ef8a578 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -135,6 +135,18 @@ bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, cons QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const { + ASTPtr modified_query = query_info.query->clone(); + auto & modified_select = modified_query->as(); + /// In case of JOIN the first stage (which includes JOIN) + /// should be done on the initiator always. + /// + /// Since in case of JOIN query on shards will receive query w/o JOIN (and their columns). + /// (see modifySelect()/removeJoin()) + /// + /// And for this we need to return FetchColumns. + if (removeJoin(modified_select)) + return QueryProcessingStage::FetchColumns; + auto stage_in_source_tables = QueryProcessingStage::FetchColumns; DatabaseTablesIteratorPtr iterator = getDatabaseIterator(context); diff --git a/tests/queries/0_stateless/01560_merge_distributed_join.reference b/tests/queries/0_stateless/01560_merge_distributed_join.reference new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/queries/0_stateless/01560_merge_distributed_join.sql b/tests/queries/0_stateless/01560_merge_distributed_join.sql new file mode 100644 index 0000000000..991a0609dc --- /dev/null +++ b/tests/queries/0_stateless/01560_merge_distributed_join.sql @@ -0,0 +1,19 @@ +-- test from https://github.com/ClickHouse/ClickHouse/issues/11755#issuecomment-700850254 +DROP TABLE IF EXISTS cat_hist; +DROP TABLE IF EXISTS prod_hist; +DROP TABLE IF EXISTS products_l; +DROP TABLE IF EXISTS products; + +CREATE TABLE cat_hist (categoryId UUID, categoryName String) ENGINE Memory; +CREATE TABLE prod_hist (categoryId UUID, productId UUID) ENGINE = MergeTree ORDER BY productId; + +CREATE TABLE products_l AS prod_hist ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), prod_hist); +CREATE TABLE products as prod_hist ENGINE = Merge(currentDatabase(), '^products_'); + +SELECT * FROM products AS p LEFT JOIN cat_hist AS c USING (categoryId); +SELECT * FROM products AS p GLOBAL LEFT JOIN cat_hist AS c USING (categoryId); + +DROP TABLE cat_hist; +DROP TABLE prod_hist; +DROP TABLE products_l; +DROP TABLE products; -- GitLab