From bfa59adbfefae4c9a5328c27c8a0907581378757 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Mon, 1 Jun 2020 20:27:22 +0300 Subject: [PATCH] fix select from StorageJoin --- src/Storages/StorageJoin.cpp | 67 +++++++------------ ...140_select_from_storage_join_fix.reference | 8 +++ .../01140_select_from_storage_join_fix.sql | 42 ++++++++++++ 3 files changed, 76 insertions(+), 41 deletions(-) create mode 100644 tests/queries/0_stateless/01140_select_from_storage_join_fix.reference create mode 100644 tests/queries/0_stateless/01140_select_from_storage_join_fix.sql diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 9b17334e57..da251fa7d6 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -251,22 +251,26 @@ public: , max_block_size(max_block_size_) , sample_block(std::move(sample_block_)) { - columns.resize(sample_block.columns()); column_indices.resize(sample_block.columns()); - column_with_null.resize(sample_block.columns()); + + auto & saved_block = parent.getJoinedData()->sample_block; + for (size_t i = 0; i < sample_block.columns(); ++i) { auto & [_, type, name] = sample_block.getByPosition(i); if (parent.right_table_keys.has(name)) { key_pos = i; - column_with_null[i] = parent.right_table_keys.getByName(name).type->isNullable(); + auto & column = parent.right_table_keys.getByName(name); + restored_block.insert(column); } else { - auto pos = parent.sample_block_with_columns_to_add.getPositionByName(name); + size_t pos = saved_block.getPositionByName(name); column_indices[i] = pos; - column_with_null[i] = !parent.sample_block_with_columns_to_add.getByPosition(pos).type->equals(*type); + + auto & column = saved_block.getByPosition(pos); + restored_block.insert(column); } } } @@ -291,11 +295,10 @@ private: std::shared_lock lock; UInt64 max_block_size; Block sample_block; + Block restored_block; /// sample_block with parent column types ColumnNumbers column_indices; - std::vector column_with_null; std::optional key_pos; - MutableColumns columns; std::unique_ptr> position; /// type erasure @@ -303,23 +306,7 @@ private: template Chunk createChunk(const Maps & maps) { - for (size_t i = 0; i < sample_block.columns(); ++i) - { - const auto & src_col = sample_block.safeGetByPosition(i); - columns[i] = src_col.type->createColumn(); - if (column_with_null[i]) - { - if (key_pos == i) - { - // unwrap null key column - auto & nullable_col = assert_cast(*columns[i]); - columns[i] = nullable_col.getNestedColumnPtr()->assumeMutable(); - } - else - // wrap non key column with null - columns[i] = makeNullable(std::move(columns[i]))->assumeMutable(); - } - } + MutableColumns columns = restored_block.cloneEmpty().mutateColumns(); size_t rows_added = 0; @@ -327,7 +314,7 @@ private: { #define M(TYPE) \ case HashJoin::Type::TYPE: \ - rows_added = fillColumns(*maps.TYPE); \ + rows_added = fillColumns(*maps.TYPE, columns); \ break; APPLY_FOR_JOIN_VARIANTS_LIMITED(M) #undef M @@ -340,29 +327,27 @@ private: if (!rows_added) return {}; - Columns res_columns; - res_columns.reserve(columns.size()); - + /// Correct nullability for (size_t i = 0; i < columns.size(); ++i) - if (column_with_null[i]) + { + bool src_nullable = restored_block.getByPosition(i).type->isNullable(); + bool dst_nullable = sample_block.getByPosition(i).type->isNullable(); + + if (src_nullable && !dst_nullable) { - if (key_pos == i) - res_columns.emplace_back(makeNullable(std::move(columns[i]))); - else - { - const auto & nullable_col = assert_cast(*columns[i]); - res_columns.emplace_back(makeNullable(nullable_col.getNestedColumnPtr())); - } + auto & nullable_column = assert_cast(*columns[i]); + columns[i] = nullable_column.getNestedColumnPtr()->assumeMutable(); } - else - res_columns.emplace_back(std::move(columns[i])); + else if (!src_nullable && dst_nullable) + columns[i] = makeNullable(std::move(columns[i]))->assumeMutable(); + } - UInt64 num_rows = res_columns.at(0)->size(); - return Chunk(std::move(res_columns), num_rows); + UInt64 num_rows = columns.at(0)->size(); + return Chunk(std::move(columns), num_rows); } template - size_t fillColumns(const Map & map) + size_t fillColumns(const Map & map, MutableColumns & columns) { size_t rows_added = 0; diff --git a/tests/queries/0_stateless/01140_select_from_storage_join_fix.reference b/tests/queries/0_stateless/01140_select_from_storage_join_fix.reference new file mode 100644 index 0000000000..101a270ad3 --- /dev/null +++ b/tests/queries/0_stateless/01140_select_from_storage_join_fix.reference @@ -0,0 +1,8 @@ +1 s 1 String String +2 s 2 String String +3 s 3 Nullable(String) String +4 s 4 String Nullable(String) +1 s 1 String String +2 s 2 String String +3 s 3 Nullable(String) String +4 s 4 String Nullable(String) diff --git a/tests/queries/0_stateless/01140_select_from_storage_join_fix.sql b/tests/queries/0_stateless/01140_select_from_storage_join_fix.sql new file mode 100644 index 0000000000..4e64c90f56 --- /dev/null +++ b/tests/queries/0_stateless/01140_select_from_storage_join_fix.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; +DROP TABLE IF EXISTS t4; + +CREATE TABLE t1 (id String, name String, value UInt32) +ENGINE = Join(ANY, LEFT, id) +SETTINGS join_use_nulls = 1; + +CREATE TABLE t2 (id String, name String, value UInt32) +ENGINE = Join(ANY, LEFT, id) +SETTINGS join_use_nulls = 0; + +CREATE TABLE t3 (id Nullable(String), name String, value UInt32) +ENGINE = Join(ANY, LEFT, id) +SETTINGS join_use_nulls = 1; + +CREATE TABLE t4 (id String, name Nullable(String), value UInt32) +ENGINE = Join(ANY, LEFT, id) +SETTINGS join_use_nulls = 0; + +insert into t1 values('1', 's', 1); +insert into t2 values('2', 's', 2); +insert into t3 values('3', 's', 3); +insert into t4 values('4', 's', 4); + +select *, toTypeName(id), toTypeName(name) from t1; +select *, toTypeName(id), toTypeName(name) from t2; +select *, toTypeName(id), toTypeName(name) from t3; +select *, toTypeName(id), toTypeName(name) from t4; + +SET join_use_nulls = 1; + +select *, toTypeName(id), toTypeName(name) from t1; +select *, toTypeName(id), toTypeName(name) from t2; +select *, toTypeName(id), toTypeName(name) from t3; +select *, toTypeName(id), toTypeName(name) from t4; + +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; +DROP TABLE t4; -- GitLab