未验证 提交 0dee8cf7 编写于 作者: A alexey-milovidov 提交者: GitHub

Merge pull request #4855 from 4ertus2/joins

Fix crash when joining nulluble vs not nullable
......@@ -1236,8 +1236,12 @@ public:
makeResultSampleBlock(left_sample_block, right_sample_block, columns_added_by_join,
key_positions_left, is_left_key, left_to_right_key_map);
auto nullability_changes = getNullabilityChanges(parent.sample_block_with_keys, result_sample_block,
key_positions_left, left_to_right_key_map);
column_indices_left.reserve(left_sample_block.columns() - key_names_left.size());
column_indices_keys_and_right.reserve(key_names_left.size() + right_sample_block.columns());
key_nullability_changes.reserve(key_positions_left.size());
/// Use right key columns if present. @note left & right key columns could have different nullability.
for (size_t key_pos : key_positions_left)
......@@ -1250,11 +1254,12 @@ public:
auto it = left_to_right_key_map.find(key_pos);
if (it != left_to_right_key_map.end())
{
column_indices_keys_and_right.push_back(it->second);
column_indices_left.push_back(key_pos);
key_pos = it->second;
}
else
column_indices_keys_and_right.push_back(key_pos);
column_indices_keys_and_right.push_back(key_pos);
key_nullability_changes.push_back(nullability_changes.count(key_pos));
}
for (size_t i = 0; i < left_sample_block.columns(); ++i)
......@@ -1281,7 +1286,7 @@ protected:
if (parent.dispatch([&](auto, auto strictness, auto & map) { block = createBlock<strictness>(map); }))
;
else
throw Exception("Logical error: unknown JOIN strictness (must be ANY or ALL)", ErrorCodes::LOGICAL_ERROR);
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
return block;
}
......@@ -1290,11 +1295,13 @@ private:
UInt64 max_block_size;
Block result_sample_block;
/// Indices of columns in result_sample_block that come from the left-side table (except key columns).
/// Indices of columns in result_sample_block that come from the left-side table (except shared right+left key columns).
ColumnNumbers column_indices_left;
/// Indices of key columns in result_sample_block or columns that come from the right-side table.
/// Order is significant: it is the same as the order of columns in the blocks of the right-side table that are saved in parent.blocks.
ColumnNumbers column_indices_keys_and_right;
/// Which key columns need change nullability (right is nullable and left is not or vice versa)
std::vector<bool> key_nullability_changes;
std::unique_ptr<void, std::function<void(void *)>> position; /// type erasure
......@@ -1350,6 +1357,9 @@ private:
MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left);
MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right);
/// Temporary change destination key columns' nullability according to mapped block
changeNullability(columns_keys_and_right, key_nullability_changes);
size_t rows_added = 0;
switch (parent.type)
......@@ -1368,6 +1378,9 @@ private:
if (!rows_added)
return {};
/// Revert columns nullability
changeNullability(columns_keys_and_right, key_nullability_changes);
Block res = result_sample_block.cloneEmpty();
/// @note it's possible to make ColumnConst here and materialize it later
......@@ -1425,6 +1438,47 @@ private:
return rows_added;
}
static std::unordered_set<size_t> getNullabilityChanges(const Block & sample_block_with_keys, const Block & out_block,
const std::vector<size_t> & key_positions,
const std::unordered_map<size_t, size_t> & left_to_right_key_map)
{
std::unordered_set<size_t> nullability_changes;
for (size_t i = 0; i < key_positions.size(); ++i)
{
size_t key_pos = key_positions[i];
auto it = left_to_right_key_map.find(key_pos);
if (it != left_to_right_key_map.end())
key_pos = it->second;
const auto & dst = out_block.getByPosition(key_pos).column;
const auto & src = sample_block_with_keys.getByPosition(i).column;
if (dst->isColumnNullable() != src->isColumnNullable())
nullability_changes.insert(key_pos);
}
return nullability_changes;
}
static void changeNullability(MutableColumns & columns, const std::vector<bool> & changes_bitmap)
{
/// @note changes_bitmap.size() <= columns.size()
for (size_t i = 0; i < changes_bitmap.size(); ++i)
{
if (changes_bitmap[i])
{
ColumnPtr column = std::move(columns[i]);
if (column->isColumnNullable())
column = static_cast<const ColumnNullable &>(*column).getNestedColumnPtr();
else
column = makeNullable(column);
columns[i] = (*std::move(column)).mutate();
}
}
}
};
......
foo \N 2 0 Nullable(String) Nullable(String)
bar bar 1 2 Nullable(String) Nullable(String)
\N 0 1 Nullable(String) Nullable(String)
\N test 0 1 Nullable(String) Nullable(String)
foo \N 2 0 Nullable(String) Nullable(String)
bar bar 1 2 Nullable(String) Nullable(String)
\N 0 1 Nullable(String) Nullable(String)
\N test 0 1 Nullable(String) Nullable(String)
foo 2 0 String Nullable(String)
bar bar 1 2 String Nullable(String)
test 0 1 String Nullable(String)
bar bar 1 2 String Nullable(String)
test 0 1 String Nullable(String)
foo 2 0 String
bar 1 2 String
test 0 1 String
USE test;
DROP TABLE IF EXISTS table_a;
DROP TABLE IF EXISTS table_b;
CREATE TABLE table_a (
event_id UInt64,
something String,
other Nullable(String)
) ENGINE = MergeTree ORDER BY (event_id);
CREATE TABLE table_b (
event_id UInt64,
something Nullable(String),
other String
) ENGINE = MergeTree ORDER BY (event_id);
INSERT INTO table_a VALUES (1, 'foo', 'foo'), (2, 'foo', 'foo'), (3, 'bar', 'bar');
INSERT INTO table_b VALUES (1, 'bar', 'bar'), (2, 'bar', 'bar'), (3, 'test', 'test'), (4, NULL, '');
SELECT s1.other, s2.other, count_a, count_b, toTypeName(s1.other), toTypeName(s2.other) FROM
( SELECT other, count() AS count_a FROM table_a GROUP BY other ) s1
ALL FULL JOIN
( SELECT other, count() AS count_b FROM table_b GROUP BY other ) s2
ON s1.other = s2.other
ORDER BY count_a DESC;
SELECT s1.other, s2.other, count_a, count_b, toTypeName(s1.other), toTypeName(s2.other) FROM
( SELECT other, count() AS count_a FROM table_a GROUP BY other ) s1
ALL FULL JOIN
( SELECT other, count() AS count_b FROM table_b GROUP BY other ) s2
USING other
ORDER BY count_a DESC;
SELECT s1.something, s2.something, count_a, count_b, toTypeName(s1.something), toTypeName(s2.something) FROM
( SELECT something, count() AS count_a FROM table_a GROUP BY something ) s1
ALL FULL JOIN
( SELECT something, count() AS count_b FROM table_b GROUP BY something ) s2
ON s1.something = s2.something
ORDER BY count_a DESC;
SELECT s1.something, s2.something, count_a, count_b, toTypeName(s1.something), toTypeName(s2.something) FROM
( SELECT something, count() AS count_a FROM table_a GROUP BY something ) s1
ALL RIGHT JOIN
( SELECT something, count() AS count_b FROM table_b GROUP BY something ) s2
USING (something)
ORDER BY count_a DESC;
SELECT something, count_a, count_b, toTypeName(something) FROM
( SELECT something, count() AS count_a FROM table_a GROUP BY something )
ALL FULL JOIN
( SELECT something, count() AS count_b FROM table_b GROUP BY something )
USING (something)
ORDER BY count_a DESC;
DROP TABLE table_a;
DROP TABLE table_b;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册