提交 a5a36d00 编写于 作者: N Nikolai Kochetov

Syntax analyzer (in progress).

上级 6ebfd2c4
......@@ -13,18 +13,20 @@
namespace DB
{
void AnalyzedJoin::createJoinedBlockActions(const NameSet & source_columns,
ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions(
const NameSet & source_columns,
const JoinedColumnsList & columns_added_by_join,
const ASTSelectQuery * select_query_with_join,
const Context & context)
const Context & context,
NameSet & required_columns_from_joined_table) const
{
if (!select_query_with_join)
return;
return nullptr;
const ASTTablesInSelectQueryElement * join = select_query_with_join->join();
if (!join)
return;
return nullptr;
const auto & join_params = static_cast<const ASTTableJoin &>(*join->table_join);
......@@ -41,13 +43,12 @@ void AnalyzedJoin::createJoinedBlockActions(const NameSet & source_columns,
required_columns_set.insert(joined_column.name_and_type.name);
Names required_columns(required_columns_set.begin(), required_columns_set.end());
const auto & columns_from_joined_table = getColumnsFromJoinedTable(source_columns, context, select_query_with_join);
NamesAndTypesList source_column_names;
for (auto & column : columns_from_joined_table)
source_column_names.emplace_back(column.name_and_type);
ExpressionAnalyzer analyzer(expression_list, context, nullptr, source_column_names, required_columns);
joined_block_actions = analyzer.getActions(false);
auto joined_block_actions = analyzer.getActions(false);
auto required_action_columns = joined_block_actions->getRequiredColumns();
required_columns_from_joined_table.insert(required_action_columns.begin(), required_action_columns.end());
......@@ -60,6 +61,8 @@ void AnalyzedJoin::createJoinedBlockActions(const NameSet & source_columns,
for (auto & column : columns_added_by_join)
if (!sample.has(column.name_and_type.name))
required_columns_from_joined_table.insert(column.name_and_type.name);
return joined_block_actions;
}
const JoinedColumnsList & AnalyzedJoin::getColumnsFromJoinedTable(
......
......@@ -57,20 +57,17 @@ struct AnalyzedJoin
/// All columns which can be read from joined table. Duplicating names are qualified.
JoinedColumnsList columns_from_joined_table;
/// Columns which will be used in query to the joined query. Duplicating names are qualified.
NameSet required_columns_from_joined_table;
/// Columns which will be added to block, possible including some columns from right join key.
/// Columns from joined table which may be added to block.
/// It's columns_from_joined_table without duplicate columns and possibly modified types.
JoinedColumnsList available_joined_columns;
/// Such columns will be copied from left join keys during join.
NameSet columns_added_by_join_from_right_keys;
/// Actions which need to be calculated on joined block.
ExpressionActionsPtr joined_block_actions;
void createJoinedBlockActions(const NameSet & source_columns,
const JoinedColumnsList & columns_added_by_join, // Subset of available_joined_columns
ExpressionActionsPtr createJoinedBlockActions(
const NameSet & source_columns,
const JoinedColumnsList & columns_added_by_join, /// Subset of available_joined_columns.
const ASTSelectQuery * select_query_with_join,
const Context & context);
const Context & context,
NameSet & required_columns_from_joined_table /// Columns which will be used in query from joined table.
) const;
const JoinedColumnsList & getColumnsFromJoinedTable(const NameSet & source_columns,
const Context & context,
......
......@@ -681,7 +681,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
Names original_columns;
for (const auto & column : analyzed_join.columns_from_joined_table)
if (analyzed_join.required_columns_from_joined_table.count(column.name_and_type.name))
if (required_columns_from_joined_table.count(column.name_and_type.name))
original_columns.emplace_back(column.original_name);
auto interpreter = interpretSubquery(table, context, subquery_depth, original_columns);
......@@ -692,7 +692,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
/// Alias duplicating columns as qualified.
for (const auto & column : analyzed_join.columns_from_joined_table)
if (analyzed_join.required_columns_from_joined_table.count(column.name_and_type.name))
if (required_columns_from_joined_table.count(column.name_and_type.name))
subquery_for_set.joined_block_aliases.emplace_back(column.original_name, column.name_and_type.name);
auto sample_block = subquery_for_set.source->getHeader();
......@@ -708,12 +708,12 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
}
}
analyzed_join.joined_block_actions->execute(sample_block);
joined_block_actions->execute(sample_block);
/// TODO You do not need to set this up when JOIN is only needed on remote servers.
subquery_for_set.join = join;
subquery_for_set.join->setSampleBlock(sample_block);
subquery_for_set.joined_block_actions = analyzed_join.joined_block_actions;
subquery_for_set.joined_block_actions = joined_block_actions;
}
addJoinAction(step.actions, false);
......@@ -1117,7 +1117,8 @@ void ExpressionAnalyzer::collectUsedColumns()
NameSet source_columns_set;
for (const auto & type_name : source_columns)
source_columns_set.insert(type_name.name);
analyzed_join.createJoinedBlockActions(source_columns_set, columns_added_by_join, select_query, context);
joined_block_actions = analyzed_join.createJoinedBlockActions(
source_columns_set, columns_added_by_join, select_query, context, required_columns_from_joined_table);
/// Some columns from right join key may be used in query. This columns will be appended to block during join.
for (const auto & right_key_name : analyzed_join.key_names_right)
......
......@@ -73,6 +73,19 @@ struct ExpressionAnalyzerData
/// Predicate optimizer overrides the sub queries
bool rewrite_subqueries = false;
/// Columns will be added to block by join.
JoinedColumnsList columns_added_by_join; /// Subset of analyzed_join.available_joined_columns
/// Actions which need to be calculated on joined block.
ExpressionActionsPtr joined_block_actions;
/// Columns which will be used in query from joined table. Duplicate names are qualified.
NameSet required_columns_from_joined_table;
/// Such columns will be copied from left join keys during join.
/// Example: select right from tab1 join tab2 on left + 1 = right
NameSet columns_added_by_join_from_right_keys;
protected:
ExpressionAnalyzerData(const NamesAndTypesList & source_columns_,
const Names & required_result_columns_,
......@@ -228,7 +241,6 @@ private:
bool do_global; /// Do I need to prepare for execution global subqueries when analyzing the query.
AnalyzedJoin analyzed_join;
JoinedColumnsList columns_added_by_join; /// Subset of analyzed_join.available_joined_columns
/** Remove all unnecessary columns from the list of all available columns of the table (`columns`).
* At the same time, form a set of unknown columns (`unknown_required_source_columns`),
......@@ -236,45 +248,9 @@ private:
*/
void collectUsedColumns();
/** Find the columns that are obtained by JOIN.
*/
void collectJoinedColumns(NameSet & joined_columns);
/// Parse JOIN ON expression and collect ASTs for joined columns.
void collectJoinedColumnsFromJoinOnExpr();
/** For star nodes(`*`), expand them to a list of all columns.
* For literal nodes, substitute aliases.
*/
void normalizeTree();
/// Eliminates injective function calls and constant expressions from group by statement
void optimizeGroupBy();
/// Remove duplicate items from ORDER BY.
void optimizeOrderBy();
void optimizeLimitBy();
/// Remove duplicated columns from USING(...).
void optimizeUsing();
/// remove Function_if AST if condition is constant
void optimizeIfWithConstantCondition();
void optimizeIfWithConstantConditionImpl(ASTPtr & current_ast);
bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) const;
/// Replacing scalar subqueries with constant values.
void executeScalarSubqueries();
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
void initGlobalSubqueriesAndExternalTables();
/** Initialize InterpreterSelectQuery for a subquery in the GLOBAL IN/JOIN section,
* create a temporary table of type Memory and store it in the external_tables dictionary.
*/
void addExternalStorage(ASTPtr & subquery_or_table_name);
void getArrayJoinedColumns();
void addMultipleArrayJoinAction(ExpressionActionsPtr & actions) const;
void addJoinAction(ExpressionActionsPtr & actions, bool only_types) const;
......@@ -311,17 +287,6 @@ private:
void makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block);
/** Translate qualified names such as db.table.column, table.column, table_alias.column
* to unqualified names. This is done in a poor transitional way:
* only one ("main") table is supported. Ambiguity is not detected or resolved.
*/
void translateQualifiedNames();
/** Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
* This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
*/
void removeUnneededColumnsFromSelectClause();
bool isRemoteStorage() const;
};
......
......@@ -75,6 +75,9 @@ private:
return false;
}
/** Initialize InterpreterSelectQuery for a subquery in the GLOBAL IN/JOIN section,
* create a temporary table of type Memory and store it in the external_tables dictionary.
*/
void addExternalStorage(ASTPtr & subquery_or_table_name_or_table_expression) const
{
/// With nondistributed queries, creating temporary tables does not make sense.
......
......@@ -48,6 +48,180 @@ namespace
using LogAST = DebugASTLog<false>; /// set to true to enable logs
using Aliases = std::unordered_map<String, ASTPtr>;
/// Add columns from storage to source_columns list.
NamesAndTypesList collectSourceColumns(NamesAndTypesList source_columns, ASTSelectQuery * select_query,
const Context & context, StoragePtr & storage);
/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names.
void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context);
/// For star nodes(`*`), expand them to a list of all columns. For literal nodes, substitute aliases.
void normalizeTree(
SyntaxAnalyzerResult & result,
const Names & source_columns,
const NameSet & source_columns_set,
const StoragePtr & storage,
const Context & context,
const ASTSelectQuery * select_query,
bool asterisk_left_columns_only);
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns);
/// Replacing scalar subqueries with constant values.
void executeScalarSubqueries(SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query,
const Context & context, size_t subquery_depth);
/// Remove Function_if AST if condition is constant.
void optimizeIfWithConstantCondition(ASTPtr & current_ast, Aliases & aliases);
/// Eliminates injective function calls and constant expressions from group by statement.
void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context);
/// Remove duplicate items from ORDER BY.
void optimizeOrderBy(const ASTSelectQuery * select_query);
/// Remove duplicate items from LIMIT BY.
void optimizeLimitBy(const ASTSelectQuery * select_query);
/// Remove duplicated columns from USING(...).
void optimizeUsing(const ASTSelectQuery * select_query);
void getArrayJoinedColumns(SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query,
const Names & source_columns, const NameSet & source_columns_set);
/// Parse JOIN ON expression and collect ASTs for joined columns.
void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context);
/// Find the columns that are obtained by JOIN.
void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context);
}
SyntaxAnalyzerResult SyntaxAnalyzer::analyze(
const ASTPtr & query,
const Context & context,
const StoragePtr & storage,
NamesAndTypesList source_columns,
const Names & required_result_columns,
size_t subquery_depth) const
{
SyntaxAnalyzerResult result;
result.storage = storage;
result.query = query; // ->clone();
auto * select_query = typeid_cast<ASTSelectQuery *>(result.query.get());
result.source_columns = collectSourceColumns(std::move(source_columns), select_query, context, result.storage);
const auto & settings = context.getSettingsRef();
Names source_columns_list;
source_columns_list.reserve(result.source_columns.size());
for (const auto & type_name : result.source_columns)
source_columns_list.emplace_back(type_name.name);
NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end());
translateQualifiedNames(result.query, select_query, source_columns_set, context);
/// Depending on the user's profile, check for the execution rights
/// distributed subqueries inside the IN or JOIN sections and process these subqueries.
InJoinSubqueriesPreprocessor(context).process(select_query);
/// Optimizes logical expressions.
LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
/// Creates a dictionary `aliases`: alias -> ASTPtr
{
LogAST log;
QueryAliasesVisitor query_aliases_visitor(result.aliases, log.stream());
query_aliases_visitor.visit(query);
}
/// Common subexpression elimination. Rewrite rules.
normalizeTree(result, source_columns_list, source_columns_set, storage,
context, select_query, settings.asterisk_left_columns_only != 0);
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
removeUnneededColumnsFromSelectClause(select_query, required_result_columns);
/// Executing scalar subqueries - replacing them with constant values.
executeScalarSubqueries(result, select_query, context, subquery_depth);
/// Optimize if with constant condition after constants was substituted instead of sclalar subqueries.
optimizeIfWithConstantCondition(result.query, result.aliases);
/// GROUP BY injective function elimination.
optimizeGroupBy(select_query, source_columns_set, context);
/// Remove duplicate items from ORDER BY.
optimizeOrderBy(select_query);
// Remove duplicated elements from LIMIT BY clause.
optimizeLimitBy(select_query);
/// Remove duplicated columns from USING(...).
optimizeUsing(select_query);
/// array_join_alias_to_name, array_join_result_to_source.
getArrayJoinedColumns(result, select_query, source_columns_list, source_columns_set);
/// Push the predicate expression down to the subqueries.
result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize();
collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context);
return result;
}
namespace
{
void removeDuplicateColumns(NamesAndTypesList & columns)
{
std::set<String> names;
for (auto it = columns.begin(); it != columns.end();)
{
if (names.emplace(it->name).second)
++it;
else
columns.erase(it++);
}
}
NamesAndTypesList collectSourceColumns(NamesAndTypesList source_columns, ASTSelectQuery * select_query,
const Context & context, StoragePtr & storage)
{
if (!storage && select_query)
{
if (auto db_and_table = getDatabaseAndTable(*select_query, 0))
storage = context.tryGetTable(db_and_table->database, db_and_table->table);
}
if (storage)
{
auto physical_columns = storage->getColumns().getAllPhysical();
if (source_columns.empty())
source_columns.swap(physical_columns);
else
source_columns.insert(source_columns.end(), physical_columns.begin(), physical_columns.end());
if (select_query)
{
const auto & storage_aliases = storage->getColumns().aliases;
source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end());
}
}
removeDuplicateColumns(source_columns);
return source_columns;
}
void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context)
{
......@@ -790,122 +964,6 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * s
}
}
void removeDuplicateColumns(NamesAndTypesList & columns)
{
std::set<String> names;
for (auto it = columns.begin(); it != columns.end();)
{
if (names.emplace(it->name).second)
++it;
else
columns.erase(it++);
}
}
NamesAndTypesList collectSourceColumns(NamesAndTypesList source_columns, ASTSelectQuery * select_query,
const Context & context, StoragePtr & storage)
{
if (!storage && select_query)
{
if (auto db_and_table = getDatabaseAndTable(*select_query, 0))
storage = context.tryGetTable(db_and_table->database, db_and_table->table);
}
if (storage)
{
auto physical_columns = storage->getColumns().getAllPhysical();
if (source_columns.empty())
source_columns.swap(physical_columns);
else
source_columns.insert(source_columns.end(), physical_columns.begin(), physical_columns.end());
if (select_query)
{
const auto & storage_aliases = storage->getColumns().aliases;
source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end());
}
}
removeDuplicateColumns(source_columns);
return source_columns;
}
}
SyntaxAnalyzerResult SyntaxAnalyzer::analyze(const ASTPtr & query,
const Context & context,
const StoragePtr & storage,
NamesAndTypesList source_columns,
const Names & required_result_columns,
size_t subquery_depth) const
{
SyntaxAnalyzerResult result;
result.storage = storage;
result.query = query; // ->clone();
auto * select_query = typeid_cast<ASTSelectQuery *>(result.query.get());
result.source_columns = collectSourceColumns(std::move(source_columns), select_query, context, result.storage);
const auto & settings = context.getSettingsRef();
Names source_columns_list;
source_columns_list.reserve(result.source_columns.size());
for (const auto & type_name : result.source_columns)
source_columns_list.emplace_back(type_name.name);
NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end());
translateQualifiedNames(result.query, select_query, source_columns_set, context);
/// Depending on the user's profile, check for the execution rights
/// distributed subqueries inside the IN or JOIN sections and process these subqueries.
InJoinSubqueriesPreprocessor(context).process(select_query);
/// Optimizes logical expressions.
LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
/// Creates a dictionary `aliases`: alias -> ASTPtr
{
LogAST log;
QueryAliasesVisitor query_aliases_visitor(result.aliases, log.stream());
query_aliases_visitor.visit(query);
}
/// Common subexpression elimination. Rewrite rules.
normalizeTree(result, source_columns_list, source_columns_set, storage,
context, select_query, settings.asterisk_left_columns_only != 0);
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
removeUnneededColumnsFromSelectClause(select_query, required_result_columns);
/// Executing scalar subqueries - replacing them with constant values.
executeScalarSubqueries(result, select_query, context, subquery_depth);
/// Optimize if with constant condition after constants was substituted instead of sclalar subqueries.
optimizeIfWithConstantCondition(result.query, result.aliases);
/// GROUP BY injective function elimination.
optimizeGroupBy(select_query, source_columns_set, context);
/// Remove duplicate items from ORDER BY.
optimizeOrderBy(select_query);
// Remove duplicated elements from LIMIT BY clause.
optimizeLimitBy(select_query);
/// Remove duplicated columns from USING(...).
optimizeUsing(select_query);
/// array_join_alias_to_name, array_join_result_to_source.
getArrayJoinedColumns(result, select_query, source_columns_list, source_columns_set);
/// Push the predicate expression down to the subqueries.
result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize();
collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context);
return result;
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册