提交 46b74832 编写于 作者: M Michael Kolupaev

clickhouse: ExpressionAnalyzer: development [#CONV-7444].

上级 2fd47144
......@@ -6,7 +6,7 @@
namespace DB
{
typedef std::pair<std::string, std::string> NameWithAlias;
typedef std::vector<NameWithAlias> NamesWithAliases;
......@@ -91,9 +91,6 @@ public:
/// - Не удаляет "неожиданные" столбцы (например, добавленные функциями).
void finalize(const Names & output_columns);
/// Убирает лишние входные столбцы из последовательности наборов действий, каждый из которых принимает на вход результат предыдущего.
static void finalizeChain(std::vector<SharedPtr<ExpressionActions> > & chain, Names output_columns);
/// Получить список входных столбцов.
Names getRequiredColumns() const
{
......@@ -123,7 +120,47 @@ private:
};
typedef SharedPtr<ExpressionActions> ExpressionActionsPtr;
typedef std::vector<ExpressionActionsPtr> ExpressionActionsChain;
struct ExpressionActionsChain
{
struct Step
{
ExpressionActionsPtr actions;
Names required_output;
Step(ExpressionActionsPtr actions_ = NULL, Names required_output_ = Names())
: actions(actions_), required_output(required_output_) {}
};
typedef std::vector<Step> Steps;
Settings settings;
Steps steps;
void addStep()
{
if (steps.empty())
throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR);
NamesAndTypesList columns = steps.back().actions->getSampleBlock().getColumnsList();
steps.push_back(Step(new ExpressionActions(columns, settings)));
}
void finalize()
{
for (int i = static_cast<int>(steps.size()) - 1; i >= 0; --i)
{
steps[i].actions->finalize(steps[i].required_output);
if (i > 0)
{
const NamesAndTypesList & columns = steps[i].actions->getRequiredColumnsWithTypes();
for (NamesAndTypesList::const_iterator it = columns.begin(); it != columns.end(); ++it)
steps[i-1].required_output.push_back(it->first);
}
}
}
};
}
......@@ -38,25 +38,28 @@ public:
void getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates);
/// Эти методы позволяют собрать цепочку преобразований над блоком, получающую значения в нужных секциях запроса.
/// В конце нужно финализировать получившуюся цепочку.
/** Эти методы позволяют собрать цепочку преобразований над блоком, получающую значения в нужных секциях запроса.
*
* Пример использования:
* ExpressionActionsChain chain;
* analyzer.appendWhere(chain);
* chain.addStep();
* analyzer.appendSelect(chain);
* analyzer.appendOrderBy(chain);
* chain.finalize();
*/
/// До агрегации:
bool appendWhere(ExpressionActionsChain & chain);
bool appendGroupBy(ExpressionActionsChain & chain);
void appendAggregateFunctionsArguments(ExpressionActionsChain & chain);
/// Финализирует всю цепочку.
void appendProjectBeforeAggregation(ExpressionActionsChain & chain);
/// После агрегации:
bool appendHaving(ExpressionActionsChain & chain);
void appendSelect(ExpressionActionsChain & chain);
bool appendOrderBy(ExpressionActionsChain & chain);
/// Действия, удаляющие из блока столбцы, кроме столбцов из указанных секций запроса.
/// Столбцы из секции SELECT также переупорядочиваются и переименовываются в алиасы.
/// Финализирует всю цепочку.
void appendProject(ExpressionActionsChain & chain, bool select_section, bool order_by_section);
/// Удаляет все столбцы кроме выбираемых SELECT, упорядочивает оставшиеся столбцы и переименовывает их в алиасы.
void appendProjectResult(ExpressionActionsChain & chain);
/// Если ast не запрос SELECT, просто получает все действия для вычисления выражения.
ExpressionActionsPtr getActions();
......@@ -141,6 +144,8 @@ private:
/// Заменить avg(x) на sum(Sign * x) / sum(Sign)
ASTPtr rewriteAvg(const ASTFunction * node);
void initChain(ExpressionActionsChain & chain, NamesAndTypesList & columns);
void assertSelect();
void assertAggregation();
};
......
......@@ -133,6 +133,18 @@ std::string ExpressionActions::Action::toString() const
}
ss << " )";
break;
case PROJECT:
ss << "{";
for (size_t i = 0; i < projection.size(); ++i)
{
if (i)
ss << ", ";
ss << projection[i].first;
if (projection[i].second != "" && projection[i].second != projection[i].first)
ss << "=>" << projection[i].second;
}
ss << "}";
break;
default:
throw Exception("Unexpected Action type", ErrorCodes::LOGICAL_ERROR);
}
......@@ -195,7 +207,8 @@ void ExpressionActions::finalize(const Names & output_columns)
{
const std::string name = output_columns[i];
if (!sample_block.has(name))
throw Exception("Unknown column: " + name, ErrorCodes::UNKNOWN_IDENTIFIER);
throw Exception("Unknown column: " + name + ", there are only columns "
+ sample_block.dumpNames(), ErrorCodes::UNKNOWN_IDENTIFIER);
final_columns.insert(name);
}
......@@ -208,6 +221,10 @@ void ExpressionActions::finalize(const Names & output_columns)
{
used_columns.insert(actions[i].argument_names[j]);
}
for (size_t j = 0; j < actions[i].projection.size(); ++j)
{
used_columns.insert(actions[i].projection[j].first);
}
}
for (NamesAndTypesList::iterator it = input_columns.begin(); it != input_columns.end();)
{
......@@ -215,7 +232,8 @@ void ExpressionActions::finalize(const Names & output_columns)
++it;
if (!used_columns.count(it0->first))
{
sample_block.erase(it0->first);
if (sample_block.has(it0->first))
sample_block.erase(it0->first);
input_columns.erase(it0);
}
}
......@@ -228,20 +246,6 @@ void ExpressionActions::finalize(const Names & output_columns)
}
}
void ExpressionActions::finalizeChain(ExpressionActionsChain & chain, Names columns)
{
for (int i = static_cast<int>(chain.size()) - 1; i >= 0; --i)
{
chain[i]->finalize(columns);
columns.clear();
for (NamesAndTypesList::const_iterator it = chain[i]->input_columns.begin(); it != chain[i]->input_columns.end(); ++it)
{
columns.push_back(it->first);
}
}
}
std::string ExpressionActions::dumpActions() const
{
std::stringstream ss;
......
......@@ -653,6 +653,14 @@ void ExpressionAnalyzer::assertAggregation()
throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
}
void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, NamesAndTypesList & columns)
{
if (chain.steps.empty())
{
chain.settings = settings;
chain.steps.push_back(ExpressionActionsChain::Step(new ExpressionActions(columns, settings)));
}
}
bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain)
{
......@@ -661,10 +669,11 @@ bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain)
if (!select_query->where_expression)
return false;
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? columns : chain.back()->getSampleBlock().getColumnsList(), settings);
chain.push_back(actions);
initChain(chain, columns);
ExpressionActionsChain::Step & step = chain.steps.back();
getActionsImpl(select_query->where_expression, false, false, *actions);
step.required_output.push_back(select_query->where_expression->getColumnName());
getActionsImpl(select_query->where_expression, false, false, *step.actions);
return true;
}
......@@ -676,13 +685,14 @@ bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain)
if (!select_query->group_expression_list)
return false;
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? columns : chain.back()->getSampleBlock().getColumnsList(), settings);
chain.push_back(actions);
initChain(chain, columns);
ExpressionActionsChain::Step & step = chain.steps.back();
ASTs asts = select_query->group_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
getActionsImpl(asts[i], false, false, *actions);
step.required_output.push_back(asts[i]->getColumnName());
getActionsImpl(asts[i], false, false, *step.actions);
}
return true;
......@@ -692,53 +702,16 @@ void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChai
{
assertAggregation();
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? columns : chain.back()->getSampleBlock().getColumnsList(), settings);
chain.push_back(actions);
initChain(chain, columns);
ExpressionActionsChain::Step & step = chain.steps.back();
getActionsBeforeAggregationImpl(select_query->select_expression_list, &*actions, NULL);
getActionsBeforeAggregationImpl(select_query->select_expression_list, &*step.actions, &step.required_output);
if (select_query->having_expression)
getActionsBeforeAggregationImpl(select_query->having_expression, &*actions, NULL);
getActionsBeforeAggregationImpl(select_query->having_expression, &*step.actions, &step.required_output);
if (select_query->order_expression_list)
getActionsBeforeAggregationImpl(select_query->order_expression_list, &*actions, NULL);
}
/// Финализирует всю цепочку.
void ExpressionAnalyzer::appendProjectBeforeAggregation(ExpressionActionsChain & chain)
{
assertAggregation();
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? columns : chain.back()->getSampleBlock().getColumnsList(), settings);
chain.push_back(actions);
Names result_columns;
getActionsBeforeAggregationImpl(select_query->select_expression_list, NULL, &result_columns);
if (select_query->having_expression)
getActionsBeforeAggregationImpl(select_query->having_expression, NULL, &result_columns);
if (select_query->order_expression_list)
getActionsBeforeAggregationImpl(select_query->order_expression_list, NULL, &result_columns);
if (select_query->where_expression)
{
result_columns.push_back(select_query->where_expression->getColumnName());
}
if (select_query->group_expression_list)
{
ASTs asts = select_query->group_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
result_columns.push_back(asts[i]->getColumnName());
}
}
actions->add(ExpressionActions::Action(result_columns));
ExpressionActions::finalizeChain(chain, result_columns);
getActionsBeforeAggregationImpl(select_query->order_expression_list, &*step.actions, &step.required_output);
}
bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain)
......@@ -748,10 +721,11 @@ bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain)
if (!select_query->having_expression)
return false;
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? aggregated_columns : chain.back()->getSampleBlock().getColumnsList(), settings);
chain.push_back(actions);
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
getActionsImpl(select_query->having_expression, false, false, *actions);
step.required_output.push_back(select_query->having_expression->getColumnName());
getActionsImpl(select_query->having_expression, false, false, *step.actions);
return true;
}
......@@ -760,10 +734,16 @@ void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain)
{
assertSelect();
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? aggregated_columns : chain.back()->getSampleBlock().getColumnsList(), settings);
chain.push_back(actions);
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
getActionsImpl(select_query->select_expression_list, false, false, *step.actions);
getActionsImpl(select_query->select_expression_list, false, false, *actions);
ASTs asts = select_query->select_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
step.required_output.push_back(asts[i]->getColumnName());
}
}
bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain)
......@@ -773,51 +753,41 @@ bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain)
if (!select_query->order_expression_list)
return false;
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? aggregated_columns : chain.back()->getSampleBlock().getColumnsList(), settings);
chain.push_back(actions);
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
getActionsImpl(select_query->order_expression_list, false, false, *actions);
getActionsImpl(select_query->order_expression_list, false, false, *step.actions);
ASTs asts = select_query->order_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
ASTOrderByElement * ast = dynamic_cast<ASTOrderByElement *>(&*asts[i]);
if (!ast || ast->children.size() != 1)
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children[0];
step.required_output.push_back(order_expression->getColumnName());
}
return true;
}
void ExpressionAnalyzer::appendProject(ExpressionActionsChain & chain, bool select_section, bool order_by_section)
void ExpressionAnalyzer::appendProjectResult(DB::ExpressionActionsChain & chain)
{
assertSelect();
ExpressionActionsPtr actions = new ExpressionActions(chain.empty() ? aggregated_columns : chain.back()->getSampleBlock().getColumnsList(), settings);
chain.push_back(actions);
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
NamesWithAliases result_columns;
Names result_names;
if (select_section)
{
ASTs asts = select_query->select_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
result_names.push_back(result_columns.back().first);
}
}
if (order_by_section && select_query->order_expression_list)
ASTs asts = select_query->select_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
ASTs asts = select_query->order_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
ASTOrderByElement * ast = dynamic_cast<ASTOrderByElement *>(&*asts[i]);
if (!ast || ast->children.size() != 1)
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children[0];
result_columns.push_back(NameWithAlias(order_expression->getColumnName(), ""));
result_names.push_back(result_columns.back().first);
}
result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
step.required_output.push_back(result_columns.back().second);
}
actions->add(ExpressionActions::Action(result_columns));
ExpressionActions::finalizeChain(chain, result_names);
step.actions->add(ExpressionActions::Action(result_columns));
}
......@@ -832,7 +802,7 @@ void ExpressionAnalyzer::getActionsBeforeAggregationImpl(ASTPtr ast, ExpressionA
{
if (result_columns)
result_columns->push_back(arguments[i]->getColumnName());
if (actions);
if (actions)
getActionsImpl(arguments[i], false, false, *actions);
}
}
......
......@@ -81,29 +81,32 @@ int main(int argc, char ** argv)
std::cout << "\n";
ExpressionActionsChain before;
analyzer.appendWhere(before);
if (analyzer.appendWhere(before))
before.addStep();
analyzer.appendAggregateFunctionsArguments(before);
analyzer.appendGroupBy(before);
analyzer.appendProjectBeforeAggregation(before);
before.finalize();
ExpressionActionsChain after;
analyzer.appendHaving(after);
if (analyzer.appendHaving(after))
after.addStep();
analyzer.appendSelect(after);
analyzer.appendOrderBy(after);
analyzer.appendProject(after, true, true);
analyzer.appendProject(after, true, false);
after.addStep();
analyzer.appendProjectResult(after);
after.finalize();
std::cout << "before aggregation:\n\n";
for (size_t i = 0; i < before.size(); ++i)
for (size_t i = 0; i < before.steps.size(); ++i)
{
before[i]->dumpActions();
std::cout << before.steps[i].actions->dumpActions();
std::cout << std::endl;
}
std::cout << "\nafter aggregation:\n\n";
for (size_t i = 0; i < after.size(); ++i)
for (size_t i = 0; i < after.steps.size(); ++i)
{
after[i]->dumpActions();
std::cout << after.steps[i].actions->dumpActions();
std::cout << std::endl;
}
}
......@@ -114,15 +117,17 @@ int main(int argc, char ** argv)
if (dynamic_cast<ASTSelectQuery *>(&*root))
{
ExpressionActionsChain chain;
analyzer.appendWhere(chain);
if (analyzer.appendWhere(chain))
chain.addStep();
analyzer.appendSelect(chain);
analyzer.appendOrderBy(chain);
analyzer.appendProject(chain, true, true);
analyzer.appendProject(chain, true, false);
chain.addStep();
analyzer.appendProjectResult(chain);
chain.finalize();
for (size_t i = 0; i < chain.size(); ++i)
for (size_t i = 0; i < chain.steps.size(); ++i)
{
chain[i]->dumpActions();
std::cout << chain.steps[i].actions->dumpActions();
std::cout << std::endl;
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册