diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 3336f175ebefd0591e3f8eea94647261322e2036..db7b9179a77ccf79b0d07dec808a2888d164458e 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -32,23 +32,54 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } -static NameSet requiredRightKeys(const Names & key_names, const NamesAndTypesList & columns_added_by_join) -{ - NameSet required; +static std::unordered_map requiredRightKeys(const Names & key_names, const NamesAndTypesList & columns_added_by_join) +{ NameSet right_keys; for (const auto & name : key_names) right_keys.insert(name); + std::unordered_map required; for (const auto & column : columns_added_by_join) - { if (right_keys.count(column.name)) - required.insert(column.name); - } + required.insert({column.name, column.type}); return required; } +static void convertColumnToNullable(ColumnWithTypeAndName & column) +{ + if (column.type->isNullable()) + return; + + column.type = makeNullable(column.type); + if (column.column) + column.column = makeNullable(column.column); +} + +/// Converts column to nullable if needed. No backward convertion. +static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable) +{ + if (nullable) + convertColumnToNullable(column); + return std::move(column); +} + +static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable, const ColumnUInt8 & negative_null_map) +{ + if (nullable) + { + convertColumnToNullable(column); + if (negative_null_map.size()) + { + MutableColumnPtr mutable_column = (*std::move(column.column)).mutate(); + static_cast(*mutable_column).applyNegatedNullMap(negative_null_map); + column.column = std::move(mutable_column); + } + } + return std::move(column); +} + Join::Join(const Names & key_names_right_, bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, bool any_take_last_row_) @@ -215,15 +246,6 @@ size_t Join::getTotalByteCount() const return res; } - -static void convertColumnToNullable(ColumnWithTypeAndName & column) -{ - column.type = makeNullable(column.type); - if (column.column) - column.column = makeNullable(column.column); -} - - void Join::setSampleBlock(const Block & block) { std::unique_lock lock(rwlock); @@ -712,7 +734,7 @@ void Join::joinBlockImpl( std::unique_ptr offsets_to_replicate; - IColumn::Filter filter = switchJoinRightColumns( + IColumn::Filter row_filter = switchJoinRightColumns( type, maps_, block.rows(), key_columns, key_sizes, added, null_map, offsets_to_replicate); for (size_t i = 0; i < added.size(); ++i) @@ -720,10 +742,16 @@ void Join::joinBlockImpl( /// Filter & insert missing rows - NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); + auto right_keys = requiredRightKeys(key_names_right, columns_added_by_join); if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) { + /// Some trash to represent IColumn::Filter as ColumnUInt8 needed for ColumnNullable::applyNullMap() + auto null_map_filter_ptr = ColumnUInt8::create(); + ColumnUInt8 & null_map_filter = static_cast(*null_map_filter_ptr); + null_map_filter.getData().swap(row_filter); + const IColumn::Filter & filter = null_map_filter.getData(); + constexpr bool inner_or_right = static_in_v; if constexpr (inner_or_right) { @@ -737,10 +765,12 @@ void Join::joinBlockImpl( auto & right_name = key_names_right[i]; auto & left_name = key_names_left[i]; - if (needed_key_names_right.count(right_name) && !block.has(right_name)) + auto it = right_keys.find(right_name); + if (it != right_keys.end() && !block.has(right_name)) { const auto & col = block.getByName(left_name); - block.insert({col.column, col.type, right_name}); + bool is_nullable = it->second->isNullable(); + block.insert(correctNullability({col.column, col.type, right_name}, is_nullable)); } } } @@ -752,7 +782,8 @@ void Join::joinBlockImpl( auto & right_name = key_names_right[i]; auto & left_name = key_names_left[i]; - if (needed_key_names_right.count(right_name) && !block.has(right_name)) + auto it = right_keys.find(right_name); + if (it != right_keys.end() && !block.has(right_name)) { const auto & col = block.getByName(left_name); ColumnPtr column = col.column->convertToFullColumnIfConst(); @@ -766,13 +797,15 @@ void Join::joinBlockImpl( mut_column->insertDefault(); } - block.insert({std::move(mut_column), col.type, right_name}); + bool is_nullable = use_nulls || it->second->isNullable(); + block.insert(correctNullability({std::move(mut_column), col.type, right_name}, is_nullable, null_map_filter)); } } } } else { + constexpr bool left_or_full = static_in_v; if (!offsets_to_replicate) throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR); @@ -782,7 +815,8 @@ void Join::joinBlockImpl( auto & right_name = key_names_right[i]; auto & left_name = key_names_left[i]; - if (needed_key_names_right.count(right_name) && !block.has(right_name)) + auto it = right_keys.find(right_name); + if (it != right_keys.end() && !block.has(right_name)) { const auto & col = block.getByName(left_name); ColumnPtr column = col.column->convertToFullColumnIfConst(); @@ -793,7 +827,7 @@ void Join::joinBlockImpl( { if (size_t to_insert = (*offsets_to_replicate)[row] - last_offset) { - if (!filter[row]) + if (!row_filter[row]) mut_column->insertDefault(); else for (size_t dup = 0; dup < to_insert; ++dup) @@ -803,7 +837,9 @@ void Join::joinBlockImpl( last_offset = (*offsets_to_replicate)[row]; } - block.insert({std::move(mut_column), col.type, right_name}); + /// TODO: null_map_filter + bool is_nullable = (use_nulls && left_or_full) || it->second->isNullable(); + block.insert(correctNullability({std::move(mut_column), col.type, right_name}, is_nullable)); } } @@ -997,11 +1033,8 @@ struct AdderNonJoined; template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) { - for (size_t j = 0; j < columns_left.size(); ++j) - columns_left[j]->insertDefault(); - for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*mapped.block->getByPosition(j).column.get(), mapped.row_num); @@ -1012,13 +1045,10 @@ struct AdderNonJoined template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) { for (auto current = &static_cast(mapped); current != nullptr; current = current->next) { - for (size_t j = 0; j < columns_left.size(); ++j) - columns_left[j]->insertDefault(); - for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*current->block->getByPosition(j).column.get(), current->row_num); @@ -1040,53 +1070,51 @@ public: * result_sample_block - keys, "left" columns, and "right" columns. */ - std::unordered_map key_renames; - makeResultSampleBlock(left_sample_block, key_names_left, columns_added_by_join, key_renames); - - const Block & right_sample_block = parent.sample_block_with_columns_to_add; - - size_t num_keys = key_names_left.size(); - size_t num_columns_left = left_sample_block.columns() - num_keys; - size_t num_columns_right = right_sample_block.columns(); - - column_indices_left.reserve(num_columns_left); - column_indices_keys_and_right.reserve(num_keys + num_columns_right); - std::vector is_left_key(left_sample_block.columns(), false); + std::vector key_positions_left; + key_positions_left.reserve(key_names_left.size()); for (const std::string & key : key_names_left) { size_t key_pos = left_sample_block.getPositionByName(key); + key_positions_left.push_back(key_pos); is_left_key[key_pos] = true; + } + + const Block & right_sample_block = parent.sample_block_with_columns_to_add; + + std::unordered_map left_to_right_key_map; + makeResultSampleBlock(left_sample_block, right_sample_block, columns_added_by_join, + key_positions_left, is_left_key, left_to_right_key_map); + + column_indices_left.reserve(left_sample_block.columns() - key_names_left.size()); + column_indices_keys_and_right.reserve(key_names_left.size() + right_sample_block.columns()); + + /// Use right key columns if present. @note left & right key columns could have different nullability. + for (size_t key_pos : key_positions_left) + { /// Here we establish the mapping between key columns of the left- and right-side tables. /// key_pos index is inserted in the position corresponding to key column in parent.blocks /// (saved blocks of the right-side table) and points to the same key column /// in the left_sample_block and thus in the result_sample_block. - column_indices_keys_and_right.push_back(key_pos); - auto it = key_renames.find(key); - if (it != key_renames.end()) - key_renames_indices[key_pos] = result_sample_block.getPositionByName(it->second); - } - - size_t num_src_columns = left_sample_block.columns() + right_sample_block.columns(); - - for (size_t i = 0; i < result_sample_block.columns(); ++i) - { - if (i < left_sample_block.columns()) + auto it = left_to_right_key_map.find(key_pos); + if (it != left_to_right_key_map.end()) { - if (!is_left_key[i]) - { - column_indices_left.emplace_back(i); - - /// If use_nulls, convert left columns to Nullable. - if (parent.use_nulls) - convertColumnToNullable(result_sample_block.getByPosition(i)); - } + column_indices_keys_and_right.push_back(it->second); + column_indices_left.push_back(key_pos); } - else if (i < num_src_columns) - column_indices_keys_and_right.emplace_back(i); + else + column_indices_keys_and_right.push_back(key_pos); } + + for (size_t i = 0; i < left_sample_block.columns(); ++i) + if (!is_left_key[i]) + column_indices_left.emplace_back(i); + + size_t num_additional_keys = left_to_right_key_map.size(); + for (size_t i = left_sample_block.columns(); i < result_sample_block.columns() - num_additional_keys; ++i) + column_indices_keys_and_right.emplace_back(i); } String getName() const override { return "NonJoined"; } @@ -1118,18 +1146,25 @@ private: /// Indices of key columns in result_sample_block or columns that come from the right-side table. /// Order is significant: it is the same as the order of columns in the blocks of the right-side table that are saved in parent.blocks. ColumnNumbers column_indices_keys_and_right; - std::unordered_map key_renames_indices; std::unique_ptr> position; /// type erasure - void makeResultSampleBlock(const Block & left_sample_block, const Names & key_names_left, - const NamesAndTypesList & columns_added_by_join, std::unordered_map & key_renames) + void makeResultSampleBlock(const Block & left_sample_block, const Block & right_sample_block, + const NamesAndTypesList & columns_added_by_join, + const std::vector & key_positions_left, const std::vector & is_left_key, + std::unordered_map & left_to_right_key_map) { - const Block & right_sample_block = parent.sample_block_with_columns_to_add; - result_sample_block = materializeBlock(left_sample_block); + /// Convert left columns to Nullable if allowed + if (parent.use_nulls) + { + for (size_t i = 0; i < result_sample_block.columns(); ++i) + if (!is_left_key[i]) + convertColumnToNullable(result_sample_block.getByPosition(i)); + } + /// Add columns from the right-side table to the block. for (size_t i = 0; i < right_sample_block.columns(); ++i) { @@ -1139,20 +1174,23 @@ private: } const auto & key_names_right = parent.key_names_right; - NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); + auto right_keys = requiredRightKeys(key_names_right, columns_added_by_join); /// Add join key columns from right block if they has different name. for (size_t i = 0; i < key_names_right.size(); ++i) { auto & right_name = key_names_right[i]; - auto & left_name = key_names_left[i]; + size_t left_key_pos = key_positions_left[i]; - if (needed_key_names_right.count(right_name) && !result_sample_block.has(right_name)) + auto it = right_keys.find(right_name); + if (it != right_keys.end() && !result_sample_block.has(right_name)) { - const auto & col = result_sample_block.getByName(left_name); - result_sample_block.insert({col.column, col.type, right_name}); + const auto & col = result_sample_block.getByPosition(left_key_pos); + bool is_nullable = (parent.use_nulls && isFull(parent.kind)) || it->second->isNullable(); + result_sample_block.insert(correctNullability({col.column, col.type, right_name}, is_nullable)); - key_renames[left_name] = right_name; + size_t right_key_pos = result_sample_block.getPositionByName(right_name); + left_to_right_key_map[left_key_pos] = right_key_pos; } } } @@ -1169,7 +1207,7 @@ private: { #define M(TYPE) \ case Join::Type::TYPE: \ - rows_added = fillColumns(*maps.TYPE, columns_left, columns_keys_and_right); \ + rows_added = fillColumns(*maps.TYPE, columns_keys_and_right); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -1183,32 +1221,12 @@ private: Block res = result_sample_block.cloneEmpty(); + /// @note it's possible to make ColumnConst here and materialize it later for (size_t i = 0; i < columns_left.size(); ++i) - res.getByPosition(column_indices_left[i]).column = std::move(columns_left[i]); - - if (key_renames_indices.empty()) - { - for (size_t i = 0; i < columns_keys_and_right.size(); ++i) - res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); - } - else - { - for (size_t i = 0; i < columns_keys_and_right.size(); ++i) - { - size_t key_idx = column_indices_keys_and_right[i]; + res.getByPosition(column_indices_left[i]).column = columns_left[i]->cloneResized(rows_added); - auto it = key_renames_indices.find(key_idx); - if (it != key_renames_indices.end()) - { - auto & key_column = res.getByPosition(key_idx).column; - if (key_column->empty()) - key_column = key_column->cloneResized(columns_keys_and_right[i]->size()); - res.getByPosition(it->second).column = std::move(columns_keys_and_right[i]); - } - else - res.getByPosition(key_idx).column = std::move(columns_keys_and_right[i]); - } - } + for (size_t i = 0; i < columns_keys_and_right.size(); ++i) + res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); return res; } @@ -1230,7 +1248,7 @@ private: } template - size_t fillColumns(const Map & map, MutableColumns & columns_left, MutableColumns & columns_keys_and_right) + size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right) { size_t rows_added = 0; @@ -1247,7 +1265,7 @@ private: if (it->getSecond().getUsed()) continue; - AdderNonJoined::add(it->getSecond(), rows_added, columns_left, columns_keys_and_right); + AdderNonJoined::add(it->getSecond(), rows_added, columns_keys_and_right); if (rows_added >= max_block_size) { diff --git a/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference b/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference new file mode 100644 index 0000000000000000000000000000000000000000..c2b37fba363abb250cc5664edd831840848600c2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference @@ -0,0 +1,44 @@ +on +l \N \N String Nullable(String) +l \N \N String Nullable(String) + r \N String Nullable(String) +\N r \N Nullable(String) Nullable(String) +l \N String Nullable(String) +l \N String Nullable(String) + r \N String Nullable(String) +\N r \N Nullable(String) Nullable(String) +\N \N +0 \N +using +l \N String Nullable(String) +l \N String Nullable(String) + \N String Nullable(String) +\N \N Nullable(String) Nullable(String) +l \N String Nullable(String) +l \N String Nullable(String) + \N String Nullable(String) +\N \N Nullable(String) Nullable(String) +\N \N +0 \N +on + join_use_nulls +l \N \N TODO Nullable(String) +l \N \N TODO Nullable(String) + r \N TODO Nullable(String) +\N r \N Nullable(String) Nullable(String) +l \N TODO Nullable(String) +l \N TODO Nullable(String) + r \N TODO Nullable(String) +\N r \N Nullable(String) Nullable(String) +\N \N +0 \N +using + join_use_nulls +l \N TODO Nullable(String) +l \N TODO Nullable(String) + \N TODO Nullable(String) +\N \N Nullable(String) Nullable(String) +l \N TODO Nullable(String) +l \N TODO Nullable(String) + \N TODO Nullable(String) +\N \N Nullable(String) Nullable(String) +\N \N +0 \N diff --git a/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.sql b/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.sql new file mode 100644 index 0000000000000000000000000000000000000000..b38ac854bc7a6e47aeeb360d49712ca386ee9965 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.sql @@ -0,0 +1,69 @@ +USE test; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; +CREATE TABLE t1 ( id String ) ENGINE = Memory; +CREATE TABLE t2 ( id Nullable(String) ) ENGINE = Memory; +CREATE TABLE t3 ( id Nullable(String), not_id Nullable(String) ) ENGINE = Memory; + +insert into t1 values ('l'); +insert into t3 (id) values ('r'); + +SELECT 'on'; + +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 ANY LEFT JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 ANY FULL JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 ANY FULL JOIN t3 ON t2.id = t3.id; + +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 LEFT JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 FULL JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 FULL JOIN t3 ON t2.id = t3.id; + +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 ANY LEFT JOIN t3 ON t1.id = t3.id; +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 LEFT JOIN t3 ON t1.id = t3.id; + +SELECT 'using'; + +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 ANY LEFT JOIN t3 USING(id); +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 ANY FULL JOIN t3 USING(id); +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 ANY FULL JOIN t3 USING(id); + +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 LEFT JOIN t3 USING(id); +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 FULL JOIN t3 USING(id); +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 FULL JOIN t3 USING(id); + +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 ANY LEFT JOIN t3 USING(id); +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 LEFT JOIN t3 USING(id); + +SET join_use_nulls = 1; +-- TODO: toTypeName(t1.id) String -> Nullable(String) + +SELECT 'on + join_use_nulls'; + +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 ANY LEFT JOIN t3 ON t1.id = t3.id; +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 ANY FULL JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 ANY FULL JOIN t3 ON t2.id = t3.id; + +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 LEFT JOIN t3 ON t1.id = t3.id; +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 FULL JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 FULL JOIN t3 ON t2.id = t3.id; + +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 ANY LEFT JOIN t3 ON t1.id = t3.id; +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 LEFT JOIN t3 ON t1.id = t3.id; + +SELECT 'using + join_use_nulls'; + +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 ANY LEFT JOIN t3 USING(id); +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 ANY FULL JOIN t3 USING(id); +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 ANY FULL JOIN t3 USING(id); + +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 LEFT JOIN t3 USING(id); +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 FULL JOIN t3 USING(id); +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 FULL JOIN t3 USING(id); + +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 ANY LEFT JOIN t3 USING(id); +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 LEFT JOIN t3 USING(id); + +DROP TABLE t1; +DROP TABLE t2; diff --git a/dbms/tests/queries/0_stateless/00852_any_join_nulls.reference b/dbms/tests/queries/0_stateless/00852_any_join_nulls.reference new file mode 100644 index 0000000000000000000000000000000000000000..b0d5371e4f7c407c5685002910566c5b20f0e14a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00852_any_join_nulls.reference @@ -0,0 +1,2 @@ +1 0 +\N 1 diff --git a/dbms/tests/queries/0_stateless/00852_any_join_nulls.sql b/dbms/tests/queries/0_stateless/00852_any_join_nulls.sql new file mode 100644 index 0000000000000000000000000000000000000000..b3f4b332a468604cc8a0ab141dfa55403c28206a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00852_any_join_nulls.sql @@ -0,0 +1,19 @@ +USE test; + +DROP TABLE IF EXISTS table1; +DROP TABLE IF EXISTS table2; +CREATE TABLE table1 ( id String ) ENGINE = Log; +CREATE TABLE table2 ( parent_id String ) ENGINE = Log; + +insert into table1 values ('1'); + +SELECT table2.parent_id = '', isNull(table2.parent_id) +FROM table1 ANY LEFT JOIN table2 ON table1.id = table2.parent_id; + +SET join_use_nulls = 1; + +SELECT table2.parent_id = '', isNull(table2.parent_id) +FROM table1 ANY LEFT JOIN table2 ON table1.id = table2.parent_id; + +DROP TABLE test.table1; +DROP TABLE test.table2;