From 0439ef5f7f141ecf0b2c35d4cc533b3f389fa94e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Jul 2015 13:54:45 +0300 Subject: [PATCH] dbms: implemented LEFT ARRAY JOIN [#METR-17474]. --- .../DB/Interpreters/ExpressionActions.h | 4 ++- dbms/include/DB/Parsers/ASTSelectQuery.h | 1 + dbms/src/Interpreters/ExpressionActions.cpp | 26 ++++++++++++++++--- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 4 +-- dbms/src/Parsers/ParserSelectQuery.cpp | 21 ++++++++++++--- dbms/src/Parsers/formatAST.cpp | 4 ++- .../00207_left_array_join.reference | 23 ++++++++++++++++ .../0_stateless/00207_left_array_join.sql | 2 ++ 8 files changed, 75 insertions(+), 10 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00207_left_array_join.reference create mode 100644 dbms/tests/queries/0_stateless/00207_left_array_join.sql diff --git a/dbms/include/DB/Interpreters/ExpressionActions.h b/dbms/include/DB/Interpreters/ExpressionActions.h index b4dedade09..39aed32e3c 100644 --- a/dbms/include/DB/Interpreters/ExpressionActions.h +++ b/dbms/include/DB/Interpreters/ExpressionActions.h @@ -66,6 +66,7 @@ public: /// Для ARRAY_JOIN NameSet array_joined_columns; + bool array_join_is_left; /// Для JOIN const Join * join = nullptr; @@ -122,13 +123,14 @@ public: return a; } - static ExpressionAction arrayJoin(const NameSet & array_joined_columns) + static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left) { if (array_joined_columns.empty()) throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR); ExpressionAction a; a.type = ARRAY_JOIN; a.array_joined_columns = array_joined_columns; + a.array_join_is_left = array_join_is_left; return a; } diff --git a/dbms/include/DB/Parsers/ASTSelectQuery.h b/dbms/include/DB/Parsers/ASTSelectQuery.h index f6edf7ebfb..b941046c53 100644 --- a/dbms/include/DB/Parsers/ASTSelectQuery.h +++ b/dbms/include/DB/Parsers/ASTSelectQuery.h @@ -50,6 +50,7 @@ public: ASTPtr select_expression_list; ASTPtr database; ASTPtr table; /// Идентификатор, табличная функция или подзапрос (рекурсивно ASTSelectQuery) + bool array_join_is_left = false; /// LEFT ARRAY JOIN ASTPtr array_join_expression_list; /// ARRAY JOIN ASTPtr join; /// Обычный (не ARRAY) JOIN. bool final = false; diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index a227bb1c71..caa612434c 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -268,6 +269,24 @@ void ExpressionAction::execute(Block & block) const if (!any_array) throw Exception("ARRAY JOIN of not array: " + *array_joined_columns.begin(), ErrorCodes::TYPE_MISMATCH); + /// Если LEFT ARRAY JOIN, то создаём столбцы, в которых пустые массивы заменены на массивы с одним элементом - значением по-умолчанию. + std::map non_empty_array_columns; + if (array_join_is_left) + { + for (const auto & name : array_joined_columns) + { + auto src_col = block.getByName(name); + + Block tmp_block{src_col, {{}, src_col.type, {}}}; + + FunctionEmptyArrayToSingle().execute(tmp_block, {0}, 1); + non_empty_array_columns[name] = tmp_block.getByPosition(1).column; + } + + any_array_ptr = non_empty_array_columns.begin()->second; + any_array = typeid_cast(&*any_array_ptr); + } + size_t columns = block.columns(); for (size_t i = 0; i < columns; ++i) { @@ -278,7 +297,8 @@ void ExpressionAction::execute(Block & block) const if (!typeid_cast(&*current.type)) throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH); - ColumnPtr array_ptr = current.column; + ColumnPtr array_ptr = array_join_is_left ? non_empty_array_columns[current.name] : current.column; + if (array_ptr->isConst()) array_ptr = dynamic_cast(*array_ptr).convertToFullColumn(); @@ -379,7 +399,7 @@ std::string ExpressionAction::toString() const break; case ARRAY_JOIN: - ss << "ARRAY JOIN "; + ss << (array_join_is_left ? "LEFT " : "") << "ARRAY JOIN "; for (NameSet::const_iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it) { if (it != array_joined_columns.begin()) @@ -761,7 +781,7 @@ std::string ExpressionActions::getID() const ss << actions[i].result_name; if (actions[i].type == ExpressionAction::ARRAY_JOIN) { - ss << "{"; + ss << (actions[i].array_join_is_left ? "LEFT ARRAY JOIN" : "ARRAY JOIN") << "{"; for (NameSet::const_iterator it = actions[i].array_joined_columns.begin(); it != actions[i].array_joined_columns.end(); ++it) { diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index c5a99bf5eb..37bd9cc15a 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1369,7 +1369,7 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl actions_stack.addAction(ExpressionAction::copyColumn(arg->getColumnName(), result_name)); NameSet joined_columns; joined_columns.insert(result_name); - actions_stack.addAction(ExpressionAction::arrayJoin(joined_columns)); + actions_stack.addAction(ExpressionAction::arrayJoin(joined_columns, false)); } return; @@ -1666,7 +1666,7 @@ void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actio result_columns.insert(result_source.first); } - actions->add(ExpressionAction::arrayJoin(result_columns)); + actions->add(ExpressionAction::arrayJoin(result_columns, select_query->array_join_is_left)); } bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types) diff --git a/dbms/src/Parsers/ParserSelectQuery.cpp b/dbms/src/Parsers/ParserSelectQuery.cpp index 97a171846d..ee0908f185 100644 --- a/dbms/src/Parsers/ParserSelectQuery.cpp +++ b/dbms/src/Parsers/ParserSelectQuery.cpp @@ -23,6 +23,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p ParserString s_select("SELECT", true, true); ParserString s_distinct("DISTINCT", true, true); ParserString s_from("FROM", true, true); + ParserString s_left("LEFT", true, true); ParserString s_array("ARRAY", true, true); ParserString s_join("JOIN", true, true); ParserString s_using("USING", true, true); @@ -166,8 +167,22 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p if (!parse_final_and_sample()) return false; - /// ARRAY JOIN expr list - if (s_array.ignore(pos, end, max_parsed_pos, expected)) + /// [LEFT] ARRAY JOIN expr list + Pos saved_pos = pos; + bool has_array_join = false; + if (s_left.ignore(pos, end, max_parsed_pos, expected) && ws.ignore(pos, end) && s_array.ignore(pos, end, max_parsed_pos, expected)) + { + select_query->array_join_is_left = true; + has_array_join = true; + } + else + { + pos = saved_pos; + if (s_array.ignore(pos, end, max_parsed_pos, expected)) + has_array_join = true; + } + + if (has_array_join) { ws.ignore(pos, end); @@ -182,7 +197,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p ws.ignore(pos, end); } - /// [GLOBAL] ANY|ALL INNER|LEFT JOIN (subquery) USING tuple + /// [GLOBAL] [ANY|ALL] INNER|LEFT|RIGHT|FULL|CROSS [OUTER] JOIN (subquery)|table_name USING tuple join.parse(pos, end, select_query->join, max_parsed_pos, expected); if (!parse_final_and_sample()) diff --git a/dbms/src/Parsers/formatAST.cpp b/dbms/src/Parsers/formatAST.cpp index 68feeb8d51..5d0854827e 100644 --- a/dbms/src/Parsers/formatAST.cpp +++ b/dbms/src/Parsers/formatAST.cpp @@ -163,7 +163,9 @@ void formatAST(const ASTSelectQuery & ast, std::ostream & s, size_t indent, bo if (ast.array_join_expression_list) { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "ARRAY JOIN " << (hilite ? hilite_none : ""); + s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str + << (ast.array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (hilite ? hilite_none : ""); + one_line ? formatAST(*ast.array_join_expression_list, s, indent, hilite, one_line) : formatExpressionListMultiline(typeid_cast(*ast.array_join_expression_list), s, indent, hilite); diff --git a/dbms/tests/queries/0_stateless/00207_left_array_join.reference b/dbms/tests/queries/0_stateless/00207_left_array_join.reference new file mode 100644 index 0000000000..10ec6a7a16 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00207_left_array_join.reference @@ -0,0 +1,23 @@ +0 +1 +2 +2 +3 +4 +5 +5 +6 +7 +0 [] 0 +1 [0] 0 +2 [0,1] 0 +2 [0,1] 1 +3 [] 0 +4 [0] 0 +5 [0,1] 0 +5 [0,1] 1 +6 [] 0 +7 [0] 0 +8 [0,1] 0 +8 [0,1] 1 +9 [] 0 diff --git a/dbms/tests/queries/0_stateless/00207_left_array_join.sql b/dbms/tests/queries/0_stateless/00207_left_array_join.sql new file mode 100644 index 0000000000..8186054c25 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00207_left_array_join.sql @@ -0,0 +1,2 @@ +SELECT number FROM system.numbers LEFT ARRAY JOIN range(number % 3) AS arr LIMIT 10; +SELECT number, arr, x FROM (SELECT number, range(number % 3) AS arr FROM system.numbers LIMIT 10) LEFT ARRAY JOIN arr AS x; -- GitLab