提交 46b74832 编写于 作者: M Michael Kolupaev

clickhouse: ExpressionAnalyzer: development [#CONV-7444].

上级 2fd47144
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
namespace DB namespace DB
{ {
typedef std::pair<std::string, std::string> NameWithAlias; typedef std::pair<std::string, std::string> NameWithAlias;
typedef std::vector<NameWithAlias> NamesWithAliases; typedef std::vector<NameWithAlias> NamesWithAliases;
...@@ -91,9 +91,6 @@ public: ...@@ -91,9 +91,6 @@ public:
/// - Не удаляет "неожиданные" столбцы (например, добавленные функциями). /// - Не удаляет "неожиданные" столбцы (например, добавленные функциями).
void finalize(const Names & output_columns); void finalize(const Names & output_columns);
/// Убирает лишние входные столбцы из последовательности наборов действий, каждый из которых принимает на вход результат предыдущего.
static void finalizeChain(std::vector<SharedPtr<ExpressionActions> > & chain, Names output_columns);
/// Получить список входных столбцов. /// Получить список входных столбцов.
Names getRequiredColumns() const Names getRequiredColumns() const
{ {
...@@ -123,7 +120,47 @@ private: ...@@ -123,7 +120,47 @@ private:
}; };
typedef SharedPtr<ExpressionActions> ExpressionActionsPtr; typedef SharedPtr<ExpressionActions> ExpressionActionsPtr;
typedef std::vector<ExpressionActionsPtr> ExpressionActionsChain;
struct ExpressionActionsChain
{
struct Step
{
ExpressionActionsPtr actions;
Names required_output;
Step(ExpressionActionsPtr actions_ = NULL, Names required_output_ = Names())
: actions(actions_), required_output(required_output_) {}
};
typedef std::vector<Step> Steps;
Settings settings;
Steps steps;
void addStep()
{
if (steps.empty())
throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR);
NamesAndTypesList columns = steps.back().actions->getSampleBlock().getColumnsList();
steps.push_back(Step(new ExpressionActions(columns, settings)));
}
void finalize()
{
for (int i = static_cast<int>(steps.size()) - 1; i >= 0; --i)
{
steps[i].actions->finalize(steps[i].required_output);
if (i > 0)
{
const NamesAndTypesList & columns = steps[i].actions->getRequiredColumnsWithTypes();
for (NamesAndTypesList::const_iterator it = columns.begin(); it != columns.end(); ++it)
steps[i-1].required_output.push_back(it->first);
}
}
}
};
} }
...@@ -38,25 +38,28 @@ public: ...@@ -38,25 +38,28 @@ public:
void getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates); void getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates);
/// Эти методы позволяют собрать цепочку преобразований над блоком, получающую значения в нужных секциях запроса. /** Эти методы позволяют собрать цепочку преобразований над блоком, получающую значения в нужных секциях запроса.
/// В конце нужно финализировать получившуюся цепочку. *
* Пример использования:
* ExpressionActionsChain chain;
* analyzer.appendWhere(chain);
* chain.addStep();
* analyzer.appendSelect(chain);
* analyzer.appendOrderBy(chain);
* chain.finalize();
*/
/// До агрегации: /// До агрегации:
bool appendWhere(ExpressionActionsChain & chain); bool appendWhere(ExpressionActionsChain & chain);
bool appendGroupBy(ExpressionActionsChain & chain); bool appendGroupBy(ExpressionActionsChain & chain);
void appendAggregateFunctionsArguments(ExpressionActionsChain & chain); void appendAggregateFunctionsArguments(ExpressionActionsChain & chain);
/// Финализирует всю цепочку.
void appendProjectBeforeAggregation(ExpressionActionsChain & chain);
/// После агрегации: /// После агрегации:
bool appendHaving(ExpressionActionsChain & chain); bool appendHaving(ExpressionActionsChain & chain);
void appendSelect(ExpressionActionsChain & chain); void appendSelect(ExpressionActionsChain & chain);
bool appendOrderBy(ExpressionActionsChain & chain); bool appendOrderBy(ExpressionActionsChain & chain);
/// Действия, удаляющие из блока столбцы, кроме столбцов из указанных секций запроса. /// Удаляет все столбцы кроме выбираемых SELECT, упорядочивает оставшиеся столбцы и переименовывает их в алиасы.
/// Столбцы из секции SELECT также переупорядочиваются и переименовываются в алиасы. void appendProjectResult(ExpressionActionsChain & chain);
/// Финализирует всю цепочку.
void appendProject(ExpressionActionsChain & chain, bool select_section, bool order_by_section);
/// Если ast не запрос SELECT, просто получает все действия для вычисления выражения. /// Если ast не запрос SELECT, просто получает все действия для вычисления выражения.
ExpressionActionsPtr getActions(); ExpressionActionsPtr getActions();
...@@ -141,6 +144,8 @@ private: ...@@ -141,6 +144,8 @@ private:
/// Заменить avg(x) на sum(Sign * x) / sum(Sign) /// Заменить avg(x) на sum(Sign * x) / sum(Sign)
ASTPtr rewriteAvg(const ASTFunction * node); ASTPtr rewriteAvg(const ASTFunction * node);
void initChain(ExpressionActionsChain & chain, NamesAndTypesList & columns);
void assertSelect(); void assertSelect();
void assertAggregation(); void assertAggregation();
}; };
......
...@@ -133,6 +133,18 @@ std::string ExpressionActions::Action::toString() const ...@@ -133,6 +133,18 @@ std::string ExpressionActions::Action::toString() const
} }
ss << " )"; ss << " )";
break; break;
case PROJECT:
ss << "{";
for (size_t i = 0; i < projection.size(); ++i)
{
if (i)
ss << ", ";
ss << projection[i].first;
if (projection[i].second != "" && projection[i].second != projection[i].first)
ss << "=>" << projection[i].second;
}
ss << "}";
break;
default: default:
throw Exception("Unexpected Action type", ErrorCodes::LOGICAL_ERROR); throw Exception("Unexpected Action type", ErrorCodes::LOGICAL_ERROR);
} }
...@@ -195,7 +207,8 @@ void ExpressionActions::finalize(const Names & output_columns) ...@@ -195,7 +207,8 @@ void ExpressionActions::finalize(const Names & output_columns)
{ {
const std::string name = output_columns[i]; const std::string name = output_columns[i];
if (!sample_block.has(name)) if (!sample_block.has(name))
throw Exception("Unknown column: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); throw Exception("Unknown column: " + name + ", there are only columns "
+ sample_block.dumpNames(), ErrorCodes::UNKNOWN_IDENTIFIER);
final_columns.insert(name); final_columns.insert(name);
} }
...@@ -208,6 +221,10 @@ void ExpressionActions::finalize(const Names & output_columns) ...@@ -208,6 +221,10 @@ void ExpressionActions::finalize(const Names & output_columns)
{ {
used_columns.insert(actions[i].argument_names[j]); used_columns.insert(actions[i].argument_names[j]);
} }
for (size_t j = 0; j < actions[i].projection.size(); ++j)
{
used_columns.insert(actions[i].projection[j].first);
}
} }
for (NamesAndTypesList::iterator it = input_columns.begin(); it != input_columns.end();) for (NamesAndTypesList::iterator it = input_columns.begin(); it != input_columns.end();)
{ {
...@@ -215,7 +232,8 @@ void ExpressionActions::finalize(const Names & output_columns) ...@@ -215,7 +232,8 @@ void ExpressionActions::finalize(const Names & output_columns)
++it; ++it;
if (!used_columns.count(it0->first)) if (!used_columns.count(it0->first))
{ {
sample_block.erase(it0->first); if (sample_block.has(it0->first))
sample_block.erase(it0->first);
input_columns.erase(it0); input_columns.erase(it0);
} }
} }
...@@ -228,20 +246,6 @@ void ExpressionActions::finalize(const Names & output_columns) ...@@ -228,20 +246,6 @@ void ExpressionActions::finalize(const Names & output_columns)
} }
} }
void ExpressionActions::finalizeChain(ExpressionActionsChain & chain, Names columns)
{
for (int i = static_cast<int>(chain.size()) - 1; i >= 0; --i)
{
chain[i]->finalize(columns);
columns.clear();
for (NamesAndTypesList::const_iterator it = chain[i]->input_columns.begin(); it != chain[i]->input_columns.end(); ++it)
{
columns.push_back(it->first);
}
}
}
std::string ExpressionActions::dumpActions() const std::string ExpressionActions::dumpActions() const
{ {
std::stringstream ss; std::stringstream ss;
......
...@@ -653,6 +653,14 @@ void ExpressionAnalyzer::assertAggregation() ...@@ -653,6 +653,14 @@ void ExpressionAnalyzer::assertAggregation()
throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR); throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
} }
void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, NamesAndTypesList & columns)
{
if (chain.steps.empty())
{
chain.settings = settings;
chain.steps.push_back(ExpressionActionsChain::Step(new ExpressionActions(columns, settings)));
}
}
bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain) bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain)
{ {
...@@ -661,10 +669,11 @@ bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain) ...@@ -661,10 +669,11 @@ bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain)
if (!select_query->where_expression) if (!select_query->where_expression)
return false; return false;
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? columns : chain.back()->getSampleBlock().getColumnsList(), settings); initChain(chain, columns);
chain.push_back(actions); ExpressionActionsChain::Step & step = chain.steps.back();
getActionsImpl(select_query->where_expression, false, false, *actions); step.required_output.push_back(select_query->where_expression->getColumnName());
getActionsImpl(select_query->where_expression, false, false, *step.actions);
return true; return true;
} }
...@@ -676,13 +685,14 @@ bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain) ...@@ -676,13 +685,14 @@ bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain)
if (!select_query->group_expression_list) if (!select_query->group_expression_list)
return false; return false;
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? columns : chain.back()->getSampleBlock().getColumnsList(), settings); initChain(chain, columns);
chain.push_back(actions); ExpressionActionsChain::Step & step = chain.steps.back();
ASTs asts = select_query->group_expression_list->children; ASTs asts = select_query->group_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i) for (size_t i = 0; i < asts.size(); ++i)
{ {
getActionsImpl(asts[i], false, false, *actions); step.required_output.push_back(asts[i]->getColumnName());
getActionsImpl(asts[i], false, false, *step.actions);
} }
return true; return true;
...@@ -692,53 +702,16 @@ void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChai ...@@ -692,53 +702,16 @@ void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChai
{ {
assertAggregation(); assertAggregation();
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? columns : chain.back()->getSampleBlock().getColumnsList(), settings); initChain(chain, columns);
chain.push_back(actions); ExpressionActionsChain::Step & step = chain.steps.back();
getActionsBeforeAggregationImpl(select_query->select_expression_list, &*actions, NULL); getActionsBeforeAggregationImpl(select_query->select_expression_list, &*step.actions, &step.required_output);
if (select_query->having_expression) if (select_query->having_expression)
getActionsBeforeAggregationImpl(select_query->having_expression, &*actions, NULL); getActionsBeforeAggregationImpl(select_query->having_expression, &*step.actions, &step.required_output);
if (select_query->order_expression_list) if (select_query->order_expression_list)
getActionsBeforeAggregationImpl(select_query->order_expression_list, &*actions, NULL); getActionsBeforeAggregationImpl(select_query->order_expression_list, &*step.actions, &step.required_output);
}
/// Финализирует всю цепочку.
void ExpressionAnalyzer::appendProjectBeforeAggregation(ExpressionActionsChain & chain)
{
assertAggregation();
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? columns : chain.back()->getSampleBlock().getColumnsList(), settings);
chain.push_back(actions);
Names result_columns;
getActionsBeforeAggregationImpl(select_query->select_expression_list, NULL, &result_columns);
if (select_query->having_expression)
getActionsBeforeAggregationImpl(select_query->having_expression, NULL, &result_columns);
if (select_query->order_expression_list)
getActionsBeforeAggregationImpl(select_query->order_expression_list, NULL, &result_columns);
if (select_query->where_expression)
{
result_columns.push_back(select_query->where_expression->getColumnName());
}
if (select_query->group_expression_list)
{
ASTs asts = select_query->group_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
result_columns.push_back(asts[i]->getColumnName());
}
}
actions->add(ExpressionActions::Action(result_columns));
ExpressionActions::finalizeChain(chain, result_columns);
} }
bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain) bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain)
...@@ -748,10 +721,11 @@ bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain) ...@@ -748,10 +721,11 @@ bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain)
if (!select_query->having_expression) if (!select_query->having_expression)
return false; return false;
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? aggregated_columns : chain.back()->getSampleBlock().getColumnsList(), settings); initChain(chain, aggregated_columns);
chain.push_back(actions); ExpressionActionsChain::Step & step = chain.steps.back();
getActionsImpl(select_query->having_expression, false, false, *actions); step.required_output.push_back(select_query->having_expression->getColumnName());
getActionsImpl(select_query->having_expression, false, false, *step.actions);
return true; return true;
} }
...@@ -760,10 +734,16 @@ void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain) ...@@ -760,10 +734,16 @@ void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain)
{ {
assertSelect(); assertSelect();
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? aggregated_columns : chain.back()->getSampleBlock().getColumnsList(), settings); initChain(chain, aggregated_columns);
chain.push_back(actions); ExpressionActionsChain::Step & step = chain.steps.back();
getActionsImpl(select_query->select_expression_list, false, false, *step.actions);
getActionsImpl(select_query->select_expression_list, false, false, *actions); ASTs asts = select_query->select_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
step.required_output.push_back(asts[i]->getColumnName());
}
} }
bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain) bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain)
...@@ -773,51 +753,41 @@ bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain) ...@@ -773,51 +753,41 @@ bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain)
if (!select_query->order_expression_list) if (!select_query->order_expression_list)
return false; return false;
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? aggregated_columns : chain.back()->getSampleBlock().getColumnsList(), settings); initChain(chain, aggregated_columns);
chain.push_back(actions); ExpressionActionsChain::Step & step = chain.steps.back();
getActionsImpl(select_query->order_expression_list, false, false, *actions); getActionsImpl(select_query->order_expression_list, false, false, *step.actions);
ASTs asts = select_query->order_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
ASTOrderByElement * ast = dynamic_cast<ASTOrderByElement *>(&*asts[i]);
if (!ast || ast->children.size() != 1)
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children[0];
step.required_output.push_back(order_expression->getColumnName());
}
return true; return true;
} }
void ExpressionAnalyzer::appendProject(ExpressionActionsChain & chain, bool select_section, bool order_by_section) void ExpressionAnalyzer::appendProjectResult(DB::ExpressionActionsChain & chain)
{ {
assertSelect(); assertSelect();
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? aggregated_columns : chain.back()->getSampleBlock().getColumnsList(), settings); initChain(chain, aggregated_columns);
chain.push_back(actions); ExpressionActionsChain::Step & step = chain.steps.back();
NamesWithAliases result_columns; NamesWithAliases result_columns;
Names result_names;
if (select_section) ASTs asts = select_query->select_expression_list->children;
{ for (size_t i = 0; i < asts.size(); ++i)
ASTs asts = select_query->select_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
result_names.push_back(result_columns.back().first);
}
}
if (order_by_section && select_query->order_expression_list)
{ {
ASTs asts = select_query->order_expression_list->children; result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
for (size_t i = 0; i < asts.size(); ++i) step.required_output.push_back(result_columns.back().second);
{
ASTOrderByElement * ast = dynamic_cast<ASTOrderByElement *>(&*asts[i]);
if (!ast || ast->children.size() != 1)
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children[0];
result_columns.push_back(NameWithAlias(order_expression->getColumnName(), ""));
result_names.push_back(result_columns.back().first);
}
} }
actions->add(ExpressionActions::Action(result_columns)); step.actions->add(ExpressionActions::Action(result_columns));
ExpressionActions::finalizeChain(chain, result_names);
} }
...@@ -832,7 +802,7 @@ void ExpressionAnalyzer::getActionsBeforeAggregationImpl(ASTPtr ast, ExpressionA ...@@ -832,7 +802,7 @@ void ExpressionAnalyzer::getActionsBeforeAggregationImpl(ASTPtr ast, ExpressionA
{ {
if (result_columns) if (result_columns)
result_columns->push_back(arguments[i]->getColumnName()); result_columns->push_back(arguments[i]->getColumnName());
if (actions); if (actions)
getActionsImpl(arguments[i], false, false, *actions); getActionsImpl(arguments[i], false, false, *actions);
} }
} }
......
...@@ -81,29 +81,32 @@ int main(int argc, char ** argv) ...@@ -81,29 +81,32 @@ int main(int argc, char ** argv)
std::cout << "\n"; std::cout << "\n";
ExpressionActionsChain before; ExpressionActionsChain before;
analyzer.appendWhere(before); if (analyzer.appendWhere(before))
before.addStep();
analyzer.appendAggregateFunctionsArguments(before); analyzer.appendAggregateFunctionsArguments(before);
analyzer.appendGroupBy(before); analyzer.appendGroupBy(before);
analyzer.appendProjectBeforeAggregation(before); before.finalize();
ExpressionActionsChain after; ExpressionActionsChain after;
analyzer.appendHaving(after); if (analyzer.appendHaving(after))
after.addStep();
analyzer.appendSelect(after); analyzer.appendSelect(after);
analyzer.appendOrderBy(after); analyzer.appendOrderBy(after);
analyzer.appendProject(after, true, true); after.addStep();
analyzer.appendProject(after, true, false); analyzer.appendProjectResult(after);
after.finalize();
std::cout << "before aggregation:\n\n"; std::cout << "before aggregation:\n\n";
for (size_t i = 0; i < before.size(); ++i) for (size_t i = 0; i < before.steps.size(); ++i)
{ {
before[i]->dumpActions(); std::cout << before.steps[i].actions->dumpActions();
std::cout << std::endl; std::cout << std::endl;
} }
std::cout << "\nafter aggregation:\n\n"; std::cout << "\nafter aggregation:\n\n";
for (size_t i = 0; i < after.size(); ++i) for (size_t i = 0; i < after.steps.size(); ++i)
{ {
after[i]->dumpActions(); std::cout << after.steps[i].actions->dumpActions();
std::cout << std::endl; std::cout << std::endl;
} }
} }
...@@ -114,15 +117,17 @@ int main(int argc, char ** argv) ...@@ -114,15 +117,17 @@ int main(int argc, char ** argv)
if (dynamic_cast<ASTSelectQuery *>(&*root)) if (dynamic_cast<ASTSelectQuery *>(&*root))
{ {
ExpressionActionsChain chain; ExpressionActionsChain chain;
analyzer.appendWhere(chain); if (analyzer.appendWhere(chain))
chain.addStep();
analyzer.appendSelect(chain); analyzer.appendSelect(chain);
analyzer.appendOrderBy(chain); analyzer.appendOrderBy(chain);
analyzer.appendProject(chain, true, true); chain.addStep();
analyzer.appendProject(chain, true, false); analyzer.appendProjectResult(chain);
chain.finalize();
for (size_t i = 0; i < chain.size(); ++i) for (size_t i = 0; i < chain.steps.size(); ++i)
{ {
chain[i]->dumpActions(); std::cout << chain.steps[i].actions->dumpActions();
std::cout << std::endl; std::cout << std::endl;
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册