diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index c3ea45bf81724fa43addc2446600e38d3df0da97..f249a451312e59a2ecc032641af8c2af06036c73 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -16,8 +16,7 @@ namespace DB ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( const JoinedColumnsList & columns_added_by_join, const ASTSelectQuery * select_query_with_join, - const Context & context, - NameSet & required_columns_from_joined_table) const + const Context & context) const { if (!select_query_with_join) return nullptr; @@ -48,8 +47,14 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( ASTPtr query = expression_list; auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_column_names, required_columns); - ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns); - auto joined_block_actions = analyzer.getActions(false); + ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns_set); + return analyzer.getActions(false); +} + +NameSet AnalyzedJoin::getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join, + const ExpressionActionsPtr & joined_block_actions) const +{ + NameSet required_columns_from_joined_table; auto required_action_columns = joined_block_actions->getRequiredColumns(); required_columns_from_joined_table.insert(required_action_columns.begin(), required_action_columns.end()); @@ -63,7 +68,7 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( if (!sample.has(column.name_and_type.name)) required_columns_from_joined_table.insert(column.name_and_type.name); - return joined_block_actions; + return required_columns_from_joined_table; } const JoinedColumnsList & AnalyzedJoin::getColumnsFromJoinedTable( diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h index 4c215821755a8a47dfcf548d33377648c693e757..d8d8673ba15ada47879422c5b1a16ddf95a467d5 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.h +++ b/dbms/src/Interpreters/AnalyzedJoin.h @@ -64,9 +64,11 @@ struct AnalyzedJoin ExpressionActionsPtr createJoinedBlockActions( const JoinedColumnsList & columns_added_by_join, /// Subset of available_joined_columns. const ASTSelectQuery * select_query_with_join, - const Context & context, - NameSet & required_columns_from_joined_table /// Columns which will be used in query from joined table. - ) const; + const Context & context) const; + + /// Columns which will be used in query from joined table. + NameSet getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join, + const ExpressionActionsPtr & joined_block_actions) const; const JoinedColumnsList & getColumnsFromJoinedTable(const NameSet & source_columns, const Context & context, diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 0393e86ddf319859a9a9464370d73537efc51244..8883698c52bea0b5578e7f04bbe605b2b382d378 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -160,15 +160,13 @@ ExpressionAction ExpressionAction::arrayJoin(const NameSet & array_joined_column ExpressionAction ExpressionAction::ordinaryJoin( std::shared_ptr join_, const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_, - const NameSet & columns_added_by_join_from_right_keys_) + const NamesAndTypesList & columns_added_by_join_) { ExpressionAction a; a.type = JOIN; a.join = std::move(join_); a.join_key_names_left = join_key_names_left; a.columns_added_by_join = columns_added_by_join_; - a.columns_added_by_join_from_right_keys = columns_added_by_join_from_right_keys_; return a; } @@ -463,7 +461,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const case JOIN: { - join->joinBlock(block, join_key_names_left, columns_added_by_join_from_right_keys); + join->joinBlock(block, join_key_names_left, columns_added_by_join); break; } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 2b6034ba8998eb5a70d4fc11bcb113d4ffecad4a..484cbf31d95490ce105032e755c0e1376d96ff2a 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -109,7 +109,6 @@ public: std::shared_ptr join; Names join_key_names_left; NamesAndTypesList columns_added_by_join; - NameSet columns_added_by_join_from_right_keys; /// For PROJECT. NamesWithAliases projection; @@ -126,7 +125,7 @@ public: static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_); static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); static ExpressionAction ordinaryJoin(std::shared_ptr join_, const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_, const NameSet & columns_added_by_join_from_right_keys_); + const NamesAndTypesList & columns_added_by_join_); /// Which columns necessary to perform this action. Names getNeededColumns() const; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index fd56c55e05f6b1562130c289ff2b08a26d4bd2de..c8cf0da68d9470aaa78e16054a59c57996972e42 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -83,7 +83,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, const Context & context_, const NamesAndTypesList & additional_source_columns, - const Names & required_result_columns_, + const NameSet & required_result_columns_, size_t subquery_depth_, bool do_global_, const SubqueriesForSets & subqueries_for_sets_) @@ -504,13 +504,12 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only columns_added_by_join_list.push_back(joined_column.name_and_type); if (only_types) - actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, - columns_added_by_join_list, columns_added_by_join_from_right_keys)); + actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, columns_added_by_join_list)); else for (auto & subquery_for_set : subqueries_for_sets) if (subquery_for_set.second.join) actions->add(ExpressionAction::ordinaryJoin(subquery_for_set.second.join, analyzedJoin().key_names_left, - columns_added_by_join_list, columns_added_by_join_from_right_keys)); + columns_added_by_join_list)); } bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) @@ -851,8 +850,7 @@ void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) con for (size_t i = 0; i < asts.size(); ++i) { String result_name = asts[i]->getAliasOrColumnName(); - if (required_result_columns.empty() - || std::find(required_result_columns.begin(), required_result_columns.end(), result_name) != required_result_columns.end()) + if (required_result_columns.empty() || required_result_columns.count(result_name)) { result_columns.emplace_back(asts[i]->getColumnName(), result_name); step.required_output.push_back(result_columns.back().second); @@ -1003,10 +1001,6 @@ void ExpressionAnalyzer::collectUsedColumns() for (const auto & name : source_columns) avaliable_columns.insert(name.name); - NameSet right_keys; - for (const auto & right_key_name : analyzed_join.key_names_right) - right_keys.insert(right_key_name); - /** You also need to ignore the identifiers of the columns that are obtained by JOIN. * (Do not assume that they are required for reading from the "left" table). */ @@ -1018,10 +1012,6 @@ void ExpressionAnalyzer::collectUsedColumns() { columns_added_by_join.push_back(joined_column); required.erase(name); - - /// Some columns from right join key may be used in query. This columns will be appended to block during join. - if (right_keys.count(name)) - columns_added_by_join_from_right_keys.insert(name); } } @@ -1057,8 +1047,6 @@ void ExpressionAnalyzer::collectUsedColumns() if (cropped_name == name) { columns_added_by_join.push_back(joined_column); - if (right_keys.count(name)) - columns_added_by_join_from_right_keys.insert(name); collated = true; break; } @@ -1072,9 +1060,8 @@ void ExpressionAnalyzer::collectUsedColumns() required.swap(fixed_required); } - /// @note required_columns_from_joined_table is output - joined_block_actions = analyzed_join.createJoinedBlockActions( - columns_added_by_join, select_query, context, required_columns_from_joined_table); + joined_block_actions = analyzed_join.createJoinedBlockActions(columns_added_by_join, select_query, context); + required_columns_from_joined_table = analyzed_join.getRequiredColumnsFromJoinedTable(columns_added_by_join, joined_block_actions); } if (columns_context.has_array_join) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index ae698f81282e5aa397af5b6318e3889ee8c2553c..d8872f1b8d18e04e067795b17e5b903f147a27dd 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -43,7 +43,7 @@ struct ExpressionAnalyzerData NamesAndTypesList source_columns; /// If non-empty, ignore all expressions in not from this list. - Names required_result_columns; + NameSet required_result_columns; SubqueriesForSets subqueries_for_sets; PreparedSets prepared_sets; @@ -73,13 +73,9 @@ struct ExpressionAnalyzerData /// Columns which will be used in query from joined table. Duplicate names are qualified. NameSet required_columns_from_joined_table; - /// Such columns will be copied from left join keys during join. - /// Example: select right from tab1 join tab2 on left + 1 = right - NameSet columns_added_by_join_from_right_keys; - protected: ExpressionAnalyzerData(const NamesAndTypesList & source_columns_, - const Names & required_result_columns_, + const NameSet & required_result_columns_, const SubqueriesForSets & subqueries_for_sets_) : source_columns(source_columns_), required_result_columns(required_result_columns_), @@ -136,7 +132,7 @@ public: const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, const Context & context_, const NamesAndTypesList & additional_source_columns = {}, - const Names & required_result_columns_ = {}, + const NameSet & required_result_columns_ = {}, size_t subquery_depth_ = 0, bool do_global_ = false, const SubqueriesForSets & subqueries_for_set_ = {}); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index ed73e2d09aeb6c0f344541f2a7e4cd7901bf1a41..3b17a874bfaabcf92cd7a7ffc75629844504fbe7 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -195,7 +195,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze( query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage); query_analyzer = std::make_unique( - query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), required_result_column_names, subquery_depth, !only_analyze); + query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), + NameSet(required_result_column_names.begin(), required_result_column_names.end()), subquery_depth, !only_analyze); if (!only_analyze) { diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 9ddf4e0aa6a7baf2219ed8c43b50f889612dff57..e1215fea77d421301de6d4abb07571bd280108c2 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -32,6 +32,23 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } +static NameSet requiredRightKeys(const Names & key_names, const NamesAndTypesList & columns_added_by_join) +{ + NameSet required; + + NameSet right_keys; + for (const auto & name : key_names) + right_keys.insert(name); + + for (const auto & column : columns_added_by_join) + { + if (right_keys.count(column.name)) + required.insert(column.name); + } + + return required; +} + Join::Join(const Names & key_names_right_, bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, bool any_take_last_row_) @@ -959,10 +976,12 @@ void Join::joinGet(Block & block, const String & column_name) const } -void Join::joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const +void Join::joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const { // std::cerr << "joinBlock: " << block.dumpStructure() << "\n"; + NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); + std::shared_lock lock(rwlock); checkTypesOfKeys(block, key_names_left, sample_block_with_keys); diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 3a70f1d07ac18f0db245c505bdcdbad18998f312..233aca7d1d1937f63f8bcd4feed3034677230ab7 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -240,7 +240,7 @@ public: /** Join data from the map (that was previously built by calls to insertFromBlock) to the block with data from "left" table. * Could be called from different threads in parallel. */ - void joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const; + void joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const; /// Infer the return type for joinGet function DataTypePtr joinGetReturnType(const String & column_name) const;