ExpressionAnalyzer.cpp 45.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
#include <DB/DataTypes/FieldToDataType.h>

#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ASTAsterisk.h>
#include <DB/Parsers/ASTExpressionList.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Parsers/ASTSubquery.h>
#include <DB/Parsers/ASTSet.h>
11
#include <DB/Parsers/ASTOrderByElement.h>
12 13 14 15

#include <DB/DataTypes/DataTypeSet.h>
#include <DB/DataTypes/DataTypeTuple.h>
#include <DB/DataTypes/DataTypeExpression.h>
16
#include <DB/DataTypes/DataTypeNested.h>
17 18 19 20 21 22 23 24 25 26 27 28 29 30
#include <DB/Columns/ColumnSet.h>
#include <DB/Columns/ColumnExpression.h>

#include <DB/Interpreters/InterpreterSelectQuery.h>
#include <DB/Interpreters/ExpressionAnalyzer.h>

#include <DB/Storages/StorageMergeTree.h>
#include <DB/Storages/StorageDistributed.h>


namespace DB
{


A
Alexey Milovidov 已提交
31
static std::string * getAlias(ASTPtr & ast)
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
{
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		return &node->alias;
	}
	else if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
		return &node->alias;
	}
	else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
	{
		return &node->alias;
	}
	else
	{
		return NULL;
	}
}

51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
static void setAlias(ASTPtr & ast, const std::string & alias)
{
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		node->alias = alias;
	}
	else if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
		node->alias = alias;
	}
	else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
	{
		node->alias = alias;
	}
	else
	{
		throw Exception("Can't set alias of " + ast->getColumnName(), ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
	}
}

71 72 73 74 75

void ExpressionAnalyzer::init()
{
	select_query = dynamic_cast<ASTSelectQuery *>(&*ast);
	has_aggregation = false;
76

77 78
	createAliasesDict(ast); /// Если есть агрегатные функции, присвоит has_aggregation=true.
	normalizeTree();
79

80
	getArrayJoinedColumns();
81

82 83
	removeUnusedColumns();

84
	/// Найдем агрегатные функции.
85 86
	if (select_query && (select_query->group_expression_list || select_query->having_expression))
		has_aggregation = true;
87

88
	ExpressionActions temp_actions(columns, settings);
89 90 91 92 93 94 95 96 97 98 99 100 101

	if (select_query && select_query->array_join_expression_list)
	{
		const ASTs & array_join_asts = select_query->array_join_expression_list->children;
		for (size_t i = 0; i < array_join_asts.size(); ++i)
		{
			ASTPtr ast = array_join_asts[i];
			getRootActionsImpl(ast, true, false, temp_actions);
		}

		addMultipleArrayJoinAction(temp_actions);

		const Block & temp_sample = temp_actions.getSampleBlock();
102
		for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
103
		{
104
			columns_after_array_join.push_back(NameAndTypePair(it->first, temp_sample.getByName(it->first).type));
105 106
		}
	}
107 108 109 110 111
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
	{
		if (!array_join_result_to_source.count(it->first))
			columns_after_array_join.push_back(*it);
	}
112
	getAggregatesImpl(ast, temp_actions);
113

114 115
	if (has_aggregation)
	{
116
		assertSelect();
117

118 119 120
		/// Найдем ключи агрегации.
		if (select_query->group_expression_list)
		{
121
			NameSet unique_keys;
122 123 124
			const ASTs & group_asts = select_query->group_expression_list->children;
			for (size_t i = 0; i < group_asts.size(); ++i)
			{
125
				getRootActionsImpl(group_asts[i], true, false, temp_actions);
126 127
				NameAndTypePair key;
				key.first = group_asts[i]->getColumnName();
128
				key.second = temp_actions.getSampleBlock().getByName(key.first).type;
129
				aggregation_keys.push_back(key);
130

131 132 133 134 135
				if (!unique_keys.count(key.first))
				{
					aggregated_columns.push_back(key);
					unique_keys.insert(key.first);
				}
136 137
			}
		}
138

139 140 141 142 143 144 145 146
		for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
		{
			AggregateDescription & desc = aggregate_descriptions[i];
			aggregated_columns.push_back(NameAndTypePair(desc.column_name, desc.function->getReturnType()));
		}
	}
	else
	{
147
		aggregated_columns = columns_after_array_join;
148 149 150 151
	}
}


152
NamesAndTypesList::iterator ExpressionAnalyzer::findColumn(const String & name, NamesAndTypesList & cols)
153 154
{
	NamesAndTypesList::iterator it;
155
	for (it = cols.begin(); it != cols.end(); ++it)
156 157 158 159 160 161
		if (it->first == name)
			break;
	return it;
}


162 163
/// ignore_levels - алиасы в скольки верхних уровнях поддерева нужно игнорировать.
/// Например, при ignore_levels=1 ast не может быть занесен в словарь, но его дети могут.
164
void ExpressionAnalyzer::createAliasesDict(ASTPtr & ast, int ignore_levels)
165
{
166
	ASTSelectQuery * select = dynamic_cast<ASTSelectQuery *>(&*ast);
167

168 169
	/// Обход снизу-вверх. Не опускаемся в подзапросы.
	for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
170 171
	{
		int new_ignore_levels = std::max(0, ignore_levels - 1);
172 173
		/// Алиасы верхнего уровня в секции ARRAY JOIN имеют особый смысл, их добавлять не будем
		///  (пропустим сам expression list и его детей).
174 175
		if (select && *it == select->array_join_expression_list)
			new_ignore_levels = 2;
176
		if (!dynamic_cast<ASTSelectQuery *>(&**it))
177 178 179 180 181 182
			createAliasesDict(*it, new_ignore_levels);
	}

	if (ignore_levels > 0)
		return;

A
Alexey Milovidov 已提交
183
	std::string * alias = getAlias(ast);
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
	if (alias && !alias->empty())
	{
		if (aliases.count(*alias) && ast->getTreeID() != aliases[*alias]->getTreeID())
		{
			throw Exception("Different expressions with the same alias " + *alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
		}
		else
		{
			aliases[*alias] = ast;
		}
	}
}


StoragePtr ExpressionAnalyzer::getTable()
{
	if (const ASTSelectQuery * select = dynamic_cast<const ASTSelectQuery *>(&*ast))
	{
		if (select->table && !dynamic_cast<const ASTSelectQuery *>(&*select->table))
		{
204
			String database = select->database ?
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
				dynamic_cast<const ASTIdentifier &>(*select->database).name :
				"";
			const String & table = dynamic_cast<const ASTIdentifier &>(*select->table).name;
			return context.tryGetTable(database, table);
		}
	}
	return StoragePtr();
}


bool ExpressionAnalyzer::needSignRewrite()
{
	if (settings.sign_rewrite && storage)
	{
		if (const StorageMergeTree * merge_tree = dynamic_cast<const StorageMergeTree *>(&*storage))
			return merge_tree->getName() == "CollapsingMergeTree";
		if (const StorageDistributed * distributed = dynamic_cast<const StorageDistributed *>(&*storage))
			return !distributed->getSignColumnName().empty();
	}
	return false;
}


String ExpressionAnalyzer::getSignColumnName()
{
	if (const StorageMergeTree * merge_tree = dynamic_cast<const StorageMergeTree *>(&*storage))
		return merge_tree->getSignColumnName();
	if (const StorageDistributed * distributed = dynamic_cast<const StorageDistributed *>(&*storage))
		return distributed->getSignColumnName();
	return "";
}


ASTPtr ExpressionAnalyzer::createSignColumn()
{
	ASTIdentifier * p_sign_column = new ASTIdentifier(ast->range, sign_column_name);
	ASTIdentifier & sign_column = *p_sign_column;
	ASTPtr sign_column_node = p_sign_column;
	sign_column.name = sign_column_name;
	return sign_column_node;
}


ASTPtr ExpressionAnalyzer::rewriteCount(const ASTFunction * node)
{
	/// 'Sign'
	ASTExpressionList * p_exp_list = new ASTExpressionList;
	ASTExpressionList & exp_list = *p_exp_list;
	ASTPtr exp_list_node = p_exp_list;
	exp_list.children.push_back(createSignColumn());
255

256 257 258 259 260 261 262 263
	/// sum(Sign)
	ASTFunction * p_sum = new ASTFunction;
	ASTFunction & sum = *p_sum;
	ASTPtr sum_node = p_sum;
	sum.name = "sum";
	sum.alias = node->alias;
	sum.arguments = exp_list_node;
	sum.children.push_back(exp_list_node);
264

265 266 267 268 269
	return sum_node;
}


ASTPtr ExpressionAnalyzer::rewriteSum(const ASTFunction * node)
270
{
271 272 273 274 275 276
	/// 'x', 'Sign'
	ASTExpressionList * p_mult_exp_list = new ASTExpressionList;
	ASTExpressionList & mult_exp_list = *p_mult_exp_list;
	ASTPtr mult_exp_list_node = p_mult_exp_list;
	mult_exp_list.children.push_back(createSignColumn());
	mult_exp_list.children.push_back(node->arguments->children[0]);
277

278 279 280 281 282 283 284
	/// x * Sign
	ASTFunction * p_mult = new ASTFunction;
	ASTFunction & mult = *p_mult;
	ASTPtr mult_node = p_mult;
	mult.name = "multiply";
	mult.arguments = mult_exp_list_node;
	mult.children.push_back(mult_exp_list_node);
285

286 287 288 289 290
	/// 'x * Sign'
	ASTExpressionList * p_exp_list = new ASTExpressionList;
	ASTExpressionList & exp_list = *p_exp_list;
	ASTPtr exp_list_node = p_exp_list;
	exp_list.children.push_back(mult_node);
291

292 293 294 295 296 297 298
	/// sum(x * Sign)
	ASTFunction * p_sum = new ASTFunction;
	ASTFunction & sum = *p_sum;
	ASTPtr sum_node = p_sum;
	sum.name = "sum";
	sum.alias = node->alias;
	sum.arguments = exp_list_node;
299 300
	sum.children.push_back(exp_list_node);

301 302 303 304 305 306 307 308 309 310
	return sum_node;
}


ASTPtr ExpressionAnalyzer::rewriteAvg(const ASTFunction * node)
{
	/// node без alias для переписывания числителя и знаменателя
	ASTPtr node_clone = node->clone();
	ASTFunction * node_clone_func = dynamic_cast<ASTFunction *>(&*node_clone);
	node_clone_func->alias = "";
311

312 313 314 315 316 317
	/// 'sum(Sign * x)', 'sum(Sign)'
	ASTExpressionList * p_div_exp_list = new ASTExpressionList;
	ASTExpressionList & div_exp_list = *p_div_exp_list;
	ASTPtr div_exp_list_node = p_div_exp_list;
	div_exp_list.children.push_back(rewriteSum(node_clone_func));
	div_exp_list.children.push_back(rewriteCount(node_clone_func));
318

319 320 321 322 323 324 325 326
	/// sum(Sign * x) / sum(Sign)
	ASTFunction * p_div = new ASTFunction;
	ASTFunction & div = *p_div;
	ASTPtr div_node = p_div;
	div.name = "divide";
	div.alias = node->alias;
	div.arguments = div_exp_list_node;
	div.children.push_back(div_exp_list_node);
327

328 329 330 331
	return div_node;
}


332
bool ExpressionAnalyzer::considerSignRewrite(ASTPtr & ast)
333 334
{
	ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
335
	if (!node)
336
		return false;
337 338 339
	const String & name = node->name;
	if (name == "count")
		ast = rewriteCount(node);
340
	else if (name == "sum")
341
		ast = rewriteSum(node);
342
	else if (name == "avg")
343
		ast = rewriteAvg(node);
344 345 346
	else
		return false;
	return true;
347 348 349 350 351 352 353 354 355
}


void ExpressionAnalyzer::normalizeTree()
{
	SetOfASTs tmp_set;
	MapOfASTs tmp_map;
	if (needSignRewrite())
		sign_column_name = getSignColumnName();
M
Merge  
Michael Kolupaev 已提交
356
	normalizeTreeImpl(ast, tmp_map, tmp_set, "", false);
357 358 359 360 361
}


/// finished_asts - уже обработанные вершины (и на что они заменены)
/// current_asts - вершины в текущем стеке вызовов этого метода
362
/// current_alias - алиас, повешенный на предка ast (самого глубокого из предков с алиасами)
363
/// in_sign_rewritten - находимся ли мы в поддереве, полученном в результате sign rewrite
M
Merge  
Michael Kolupaev 已提交
364
void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, bool in_sign_rewritten)
365 366 367 368 369 370
{
	if (finished_asts.count(ast))
	{
		ast = finished_asts[ast];
		return;
	}
371

372 373
	ASTPtr initial_ast = ast;
	current_asts.insert(initial_ast);
374

M
Merge  
Michael Kolupaev 已提交
375 376
	std::string * my_alias = getAlias(ast);
	if (my_alias && !my_alias->empty())
377
		current_alias = *my_alias;
378

379
	/// rewrite правила, которые действуют при обходе сверху-вниз.
380

381 382
	if (!in_sign_rewritten && !sign_column_name.empty())
		in_sign_rewritten = considerSignRewrite(ast);
383

384 385
	bool replaced = false;

386 387 388 389 390 391 392 393 394 395 396 397
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		/** Нет ли в таблице столбца, название которого полностью совпадает с записью функции?
		 * Например, в таблице есть столбец "domain(URL)", и мы запросили domain(URL).
		 */
		String function_string = node->getColumnName();
		NamesAndTypesList::const_iterator it = findColumn(function_string);
		if (columns.end() != it)
		{
			ASTIdentifier * ast_id = new ASTIdentifier(node->range, std::string(node->range.first, node->range.second));
			ast = ast_id;
			current_asts.insert(ast);
398
			replaced = true;
399 400 401
		}
	}
	else if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
402
	{
403
		if (node->kind == ASTIdentifier::Column)
404
		{
M
Merge  
Michael Kolupaev 已提交
405
			/// Если это алиас, но не родительский алиас (чтобы работали конструкции вроде "SELECT column+1 AS column").
406
			Aliases::const_iterator jt = aliases.find(node->name);
M
Merge  
Michael Kolupaev 已提交
407
			if (jt != aliases.end() && current_alias != node->name)
408
			{
409
				/// Заменим его на соответствующий узел дерева.
410 411
				if (current_asts.count(jt->second))
					throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES);
412 413 414 415 416 417 418 419 420 421 422 423
				if (my_alias && !my_alias->empty() && *my_alias != jt->second->getAlias())
				{
					/// В конструкции вроде "a AS b", где a - алиас, нужно перевесить алиас b на результат подстановки алиаса a.
					ast = jt->second->clone();
					setAlias(ast, *my_alias);
				}
				else
				{
					ast = jt->second;
				}

				replaced = true;
424 425 426 427
			}
			else
			{
				/// Проверим имеет ли смысл sign-rewrite
428
				if (!in_sign_rewritten && sign_column_name != "" && node->name == sign_column_name)
429 430 431 432 433 434 435 436 437 438 439 440 441
					throw Exception("Requested Sign column while sign-rewrite is on.", ErrorCodes::QUERY_SECTION_DOESNT_MAKE_SENSE);
			}
		}
	}
	else if (ASTExpressionList * node = dynamic_cast<ASTExpressionList *>(&*ast))
	{
		/// Заменим * на список столбцов.
		ASTs & asts = node->children;
		for (int i = static_cast<int>(asts.size()) - 1; i >= 0; --i)
		{
			if (ASTAsterisk * asterisk = dynamic_cast<ASTAsterisk *>(&*asts[i]))
			{
				ASTs all_columns;
S
Merge  
Sergey Fedorov 已提交
442
				for (NamesAndTypesList::const_iterator it = columns.begin(); it != columns.end(); ++it)
443 444 445 446 447 448
					all_columns.push_back(new ASTIdentifier(asterisk->range, it->first));
				asts.erase(asts.begin() + i);
				asts.insert(asts.begin() + i, all_columns.begin(), all_columns.end());
			}
		}
	}
449

450 451 452 453 454 455 456 457 458 459
	/// Если заменили корень поддерева вызовемся для нового корня снова - на случай, если алиас заменился на алиас.
	if (replaced)
	{
		normalizeTreeImpl(ast, finished_asts, current_asts, current_alias, in_sign_rewritten);
		current_asts.erase(initial_ast);
		current_asts.erase(ast);
		finished_asts[initial_ast] = ast;
		return;
	}

460
	/// Рекурсивные вызовы. Не опускаемся в подзапросы.
461

462 463
	for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
		if (!dynamic_cast<ASTSelectQuery *>(&**it))
464
			normalizeTreeImpl(*it, finished_asts, current_asts, current_alias, in_sign_rewritten);
465

466 467 468
	/// Если секция WHERE или HAVING состоит из одного алиаса, ссылку нужно заменить не только в children, но и в where_expression и having_expression.
	if (ASTSelectQuery * select = dynamic_cast<ASTSelectQuery *>(&*ast))
	{
469 470
		if (select->prewhere_expression)
			normalizeTreeImpl(select->prewhere_expression, finished_asts, current_asts, current_alias, in_sign_rewritten);
471
		if (select->where_expression)
472
			normalizeTreeImpl(select->where_expression, finished_asts, current_asts, current_alias, in_sign_rewritten);
473
		if (select->having_expression)
474
			normalizeTreeImpl(select->having_expression, finished_asts, current_asts, current_alias, in_sign_rewritten);
475
	}
476

M
Merge  
Michael Kolupaev 已提交
477 478
	/// Действия, выполняемые снизу вверх.

479
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
480 481 482
	{
		if (node->name == "lambda")
		{
483
			node->kind = ASTFunction::LAMBDA_EXPRESSION;
484 485 486
		}
		else if (context.getAggregateFunctionFactory().isAggregateFunctionName(node->name))
		{
487
			node->kind = ASTFunction::AGGREGATE_FUNCTION;
488
		}
489 490 491 492 493 494 495 496
		else if (node->name == "arrayJoin")
		{
			node->kind = ASTFunction::ARRAY_JOIN;
		}
		else
		{
			node->kind = ASTFunction::FUNCTION;
		}
497
	}
498

499 500 501 502 503 504
	current_asts.erase(initial_ast);
	current_asts.erase(ast);
	finished_asts[initial_ast] = ast;
}


505
void ExpressionAnalyzer::makeSet(ASTFunction * node, const Block & sample_block)
506
{
507
	/** Нужно преобразовать правый аргумент в множество.
508
	  * Это может быть значение, перечисление значений или подзапрос.
509 510 511 512
	  * Перечисление значений парсится как функция tuple.
	  */
	IAST & args = *node->arguments;
	ASTPtr & arg = args.children[1];
513

514 515
	if (dynamic_cast<ASTSet *>(&*arg))
		return;
516

517 518 519 520
	if (dynamic_cast<ASTSubquery *>(&*arg))
	{
		/// Исполняем подзапрос, превращаем результат в множество, и кладём это множество на место подзапроса.
		ASTSet * ast_set = new ASTSet(arg->getColumnName());
521
		InterpreterSelectQuery interpreter(arg->children[0], context, QueryProcessingStage::Complete, subquery_depth + 1);
522
		ast_set->set = new Set(settings.limits);
523 524 525
		ast_set->set->create(interpreter.execute());
		arg = ast_set;
	}
526
	else
527 528
	{
		/// Случай явного перечисления значений.
529

530 531 532 533 534 535 536 537 538 539
		DataTypes set_element_types;
		ASTPtr & left_arg = args.children[0];

		ASTFunction * left_arg_tuple = dynamic_cast<ASTFunction *>(&*left_arg);

		if (left_arg_tuple && left_arg_tuple->name == "tuple")
		{
			for (ASTs::const_iterator it = left_arg_tuple->arguments->children.begin();
				it != left_arg_tuple->arguments->children.end();
				++it)
540
				set_element_types.push_back(sample_block.getByName((*it)->getColumnName()).type);
541 542 543
		}
		else
		{
544
			DataTypePtr left_type = sample_block.getByName(left_arg->getColumnName()).type;
545 546 547 548 549
			if (DataTypeArray * array_type = dynamic_cast<DataTypeArray *>(&*left_type))
				set_element_types.push_back(array_type->getNestedType());
			else
				set_element_types.push_back(left_type);
		}
550

551 552 553
		/// Отличим случай x in (1, 2) от случая x in 1 (он же x in (1)).
		bool single_value = false;
		ASTPtr elements_ast = arg;
554

555 556 557 558 559
		if (ASTFunction * set_func = dynamic_cast<ASTFunction *>(&*arg))
		{
			if (set_func->name != "tuple")
				throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.",
								ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
560

561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
			/// Отличм случай (x, y) in ((1, 2), (3, 4)) от случая (x, y) in (1, 2).
			ASTFunction * any_element = dynamic_cast<ASTFunction *>(&*set_func->arguments->children[0]);
			if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple"))
				single_value = true;
			else
				elements_ast = set_func->arguments;
		}
		else if (dynamic_cast<ASTLiteral *>(&*arg))
		{
			single_value = true;
		}
		else
		{
			throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.",
							ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
		}
577

578 579 580 581 582 583
		if (single_value)
		{
			ASTPtr exp_list = new ASTExpressionList;
			exp_list->children.push_back(elements_ast);
			elements_ast = exp_list;
		}
584

585
		ASTSet * ast_set = new ASTSet(arg->getColumnName());
586
		ast_set->set = new Set(settings.limits);
587
		ast_set->set->create(set_element_types, elements_ast);
588 589
		arg = ast_set;
	}
590 591 592 593 594 595
}


static std::string getUniqueName(const Block & block, const std::string & prefix)
{
	int i = 1;
596
	while (block.has(prefix + toString(i)))
597
		++i;
598
	return prefix + toString(i);
599 600 601
}


602 603 604
void ExpressionAnalyzer::getRootActionsImpl(ASTPtr ast, bool no_subqueries, bool only_consts, ExpressionActions & actions)
{
	ScopeStack scopes(actions, settings);
605
	getActionsImpl(ast, no_subqueries, only_consts, scopes);
606 607 608 609
	actions = *scopes.popLevel();
}


610 611
void ExpressionAnalyzer::getArrayJoinedColumns()
{
612 613
	if (select_query && select_query->array_join_expression_list)
	{
614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
		ASTs & array_join_asts = select_query->array_join_expression_list->children;
		for (size_t i = 0; i < array_join_asts .size(); ++i)
		{
			ASTPtr ast = array_join_asts [i];

			String nested_table_name = ast->getColumnName();
			String nested_table_alias = ast->getAlias();
			if (nested_table_alias == nested_table_name && !dynamic_cast<ASTIdentifier *>(&*ast))
				throw Exception("No alias for non-trivial value in ARRAY JOIN: " + nested_table_name, ErrorCodes::ALIAS_REQUIRED);

			if (array_join_alias_to_name.count(nested_table_alias) || aliases.count(nested_table_alias))
				throw Exception("Duplicate alias " + nested_table_alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
			array_join_alias_to_name[nested_table_alias] = nested_table_name;
		}

		ASTs & query_asts = select_query->children;
		for (size_t i = 0; i < query_asts.size(); ++i)
		{
			ASTPtr ast = query_asts[i];
			if (select_query && ast == select_query->array_join_expression_list)
				continue;
			getArrayJoinedColumnsImpl(ast);
		}
637 638 639

		/// Если результат ARRAY JOIN не используется, придется все равно по-ARRAY-JOIN-ить какой-нибудь столбец,
		/// чтобы получить правильное количество строк.
640
		if (array_join_result_to_source.empty())
641 642 643 644 645 646 647
		{
			ASTPtr expr = select_query->array_join_expression_list->children[0];
			String source_name = expr->getColumnName();
			String result_name = expr->getAlias();
			/// Это массив.
			if (!dynamic_cast<ASTIdentifier *>(&*expr) || findColumn(source_name, columns) != columns.end())
			{
648
				array_join_result_to_source[result_name] = source_name;
649 650 651 652 653 654 655 656 657 658
			}
			else /// Это вложенная таблица.
			{
				bool found = false;
				for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
				{
					String table_name = DataTypeNested::extractNestedTableName(it->first);
					String column_name = DataTypeNested::extractNestedColumnName(it->first);
					if (table_name == source_name)
					{
659 660
						array_join_result_to_source[DataTypeNested::concatenateNestedName(result_name, column_name)]
							= it->first;
661 662 663 664
						found = true;
						break;
					}
				}
665 666
				if (!found)
					throw Exception("No columns in nested table " + source_name, ErrorCodes::EMPTY_NESTED_TABLE);
667 668
			}
		}
669
	}
670 671 672 673
}


void ExpressionAnalyzer::getArrayJoinedColumnsImpl(ASTPtr ast)
674 675 676
{
	if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
677 678 679 680 681 682 683 684 685 686 687 688
		if (node->kind == ASTIdentifier::Column)
		{
			String table_name = DataTypeNested::extractNestedTableName(node->name);
			if (array_join_alias_to_name.count(node->name))
				array_join_result_to_source[node->name] = array_join_alias_to_name[node->name];
			else if (array_join_alias_to_name.count(table_name))
			{
				String nested_column = DataTypeNested::extractNestedColumnName(node->name);
				array_join_result_to_source[node->name]
					= DataTypeNested::concatenateNestedName(array_join_alias_to_name[table_name], nested_column);
			}
		}
689 690 691 692
	}
	else
	{
		for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
693
			if (!dynamic_cast<ASTSelectQuery *>(&**it))
694
				getArrayJoinedColumnsImpl(*it);
695 696 697 698 699
	}
}


void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack)
700 701
{
	/// Если результат вычисления уже есть в блоке.
702
	if ((dynamic_cast<ASTFunction *>(&*ast) || dynamic_cast<ASTLiteral *>(&*ast))
703
		&& actions_stack.getSampleBlock().has(ast->getColumnName()))
704
		return;
705

706 707
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
708
		if (node->kind == ASTFunction::LAMBDA_EXPRESSION)
709
			throw Exception("Unexpected expression", ErrorCodes::UNEXPECTED_EXPRESSION);
710

711 712 713 714 715
		if (node->kind == ASTFunction::ARRAY_JOIN)
		{
			if (node->arguments->children.size() != 1)
				throw Exception("arrayJoin requires exactly 1 argument", ErrorCodes::TYPE_MISMATCH);
			ASTPtr arg = node->arguments->children[0];
716
			getActionsImpl(arg, no_subqueries, only_consts, actions_stack);
717
			if (!only_consts)
718
			{
719 720 721 722 723
				String result_name = node->getColumnName();
				actions_stack.addAction(ExpressionActions::Action::copyColumn(arg->getColumnName(), result_name));
				NameSet joined_columns;
				joined_columns.insert(result_name);
				actions_stack.addAction(ExpressionActions::Action::arrayJoin(joined_columns));
724
			}
725

726 727
			return;
		}
728

729
		if (node->kind == ASTFunction::FUNCTION)
730 731 732 733 734
		{
			if (node->name == "in" || node->name == "notIn")
			{
				if (!no_subqueries)
				{
735
					/// Найдем тип первого аргумента (потом getActionsImpl вызовется для него снова и ни на что не повлияет).
736
					getActionsImpl(node->arguments->children[0], no_subqueries, only_consts, actions_stack);
737
					/// Превратим tuple или подзапрос в множество.
738
					makeSet(node, actions_stack.getSampleBlock());
739 740 741 742 743 744 745 746 747
				}
				else
				{
					/// Мы в той части дерева, которую не собираемся вычислять. Нужно только определить типы.
					/// Не будем выполнять подзапросы и составлять множества. Вставим произвольный столбец правильного типа.
					ColumnWithNameAndType fake_column;
					fake_column.name = node->getColumnName();
					fake_column.type = new DataTypeUInt8;
					fake_column.column = new ColumnConstUInt8(1, 0);
748
					actions_stack.addAction(ExpressionActions::Action::addColumn(fake_column));
749
					getActionsImpl(node->arguments, no_subqueries, only_consts, actions_stack);
750 751 752
					return;
				}
			}
753

754
			FunctionPtr function = context.getFunctionFactory().get(node->name, context);
755

756 757
			Names argument_names;
			DataTypes argument_types;
M
Merge  
Michael Kolupaev 已提交
758
			bool arguments_present = true;
759

760 761
			/// Если у функции есть аргумент-лямбда-выражение, нужно определить его тип до рекурсивного вызова.
			bool has_lambda_arguments = false;
762

763 764 765
			for (size_t i = 0; i < node->arguments->children.size(); ++i)
			{
				ASTPtr child = node->arguments->children[i];
766

767
				ASTFunction * lambda = dynamic_cast<ASTFunction *>(&*child);
768
				ASTSet * set = dynamic_cast<ASTSet *>(&*child);
769 770
				if (lambda && lambda->name == "lambda")
				{
M
Merge  
Michael Kolupaev 已提交
771
					/// Если аргумент - лямбда-выражение, только запомним его примерный тип.
772 773
					if (lambda->arguments->children.size() != 2)
						throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
774

775
					ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*lambda->arguments->children[0]);
776

777 778
					if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
						throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
779

780 781
					has_lambda_arguments = true;
					argument_types.push_back(new DataTypeExpression(DataTypes(lambda_args_tuple->arguments->children.size())));
782 783
					/// Выберем название в следующем цикле.
					argument_names.push_back("");
784
				}
785 786 787 788 789 790 791 792
				else if (set)
				{
					/// Если аргумент - множество, дадим ему уникальное имя,
					///  чтобы множества с одинаковой записью не склеивались (у них может быть разный тип).
					ColumnWithNameAndType column;
					column.column = new ColumnSet(1, set->set);
					column.type = new DataTypeSet;
					column.name = getUniqueName(actions_stack.getSampleBlock(), "__set");
793

794
					actions_stack.addAction(ExpressionActions::Action::addColumn(column));
795

796 797 798
					argument_types.push_back(column.type);
					argument_names.push_back(column.name);
				}
799 800
				else
				{
M
Merge  
Michael Kolupaev 已提交
801
					/// Если аргумент не лямбда-выражение, вызовемся рекурсивно и узнаем его тип.
802
					getActionsImpl(child, no_subqueries, only_consts, actions_stack);
803
					std::string name = child->getColumnName();
804
					if (actions_stack.getSampleBlock().has(name))
M
Merge  
Michael Kolupaev 已提交
805
					{
806
						argument_types.push_back(actions_stack.getSampleBlock().getByName(name).type);
M
Merge  
Michael Kolupaev 已提交
807 808 809 810 811 812 813 814 815 816 817 818 819
						argument_names.push_back(name);
					}
					else
					{
						if (only_consts)
						{
							arguments_present = false;
						}
						else
						{
							throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER);
						}
					}
820 821
				}
			}
822

M
Merge  
Michael Kolupaev 已提交
823 824
			if (only_consts && !arguments_present)
				return;
825

826
			Names additional_requirements;
827

828 829 830
			if (has_lambda_arguments && !only_consts)
			{
				function->getLambdaArgumentTypes(argument_types);
831

832 833 834 835
				/// Вызовемся рекурсивно для лямбда-выражений.
				for (size_t i = 0; i < node->arguments->children.size(); ++i)
				{
					ASTPtr child = node->arguments->children[i];
836

837 838 839 840 841 842
					ASTFunction * lambda = dynamic_cast<ASTFunction *>(&*child);
					if (lambda && lambda->name == "lambda")
					{
						DataTypeExpression * lambda_type = dynamic_cast<DataTypeExpression *>(&*argument_types[i]);
						ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*lambda->arguments->children[0]);
						ASTs lambda_arg_asts = lambda_args_tuple->arguments->children;
843
						NamesAndTypesList lambda_arguments;
844

845 846 847 848 849
						for (size_t j = 0; j < lambda_arg_asts.size(); ++j)
						{
							ASTIdentifier * identifier = dynamic_cast<ASTIdentifier *>(&*lambda_arg_asts[j]);
							if (!identifier)
								throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
850

851 852
							String arg_name = identifier->name;
							NameAndTypePair arg(arg_name, lambda_type->getArgumentTypes()[j]);
853

854
							lambda_arguments.push_back(arg);
855
						}
856

857
						actions_stack.pushLevel(lambda_arguments);
858
						getActionsImpl(lambda->arguments->children[1], no_subqueries, only_consts, actions_stack);
859
						ExpressionActionsPtr lambda_actions = actions_stack.popLevel();
860

861
						String result_name = lambda->arguments->children[1]->getColumnName();
862
						lambda_actions->finalize(Names(1, result_name));
863
						DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type;
864
						argument_types[i] = new DataTypeExpression(lambda_type->getArgumentTypes(), result_type);
865

866 867 868 869 870 871
						Names captured = lambda_actions->getRequiredColumns();
						for (size_t j = 0; j < captured.size(); ++j)
						{
							if (findColumn(captured[j], lambda_arguments) == lambda_arguments.end())
								additional_requirements.push_back(captured[j]);
						}
872

873 874
						/// Не можем дать название getColumnName(),
						///  потому что оно не однозначно определяет выражение (типы аргументов могут быть разными).
875
						argument_names[i] = getUniqueName(actions_stack.getSampleBlock(), "__lambda");
876

877
						ColumnWithNameAndType lambda_column;
878
						lambda_column.column = new ColumnExpression(1, lambda_actions, lambda_arguments, result_type, result_name);
879 880
						lambda_column.type = argument_types[i];
						lambda_column.name = argument_names[i];
881
						actions_stack.addAction(ExpressionActions::Action::addColumn(lambda_column));
882 883 884
					}
				}
			}
885

886 887 888 889
			if (only_consts)
			{
				for (size_t i = 0; i < argument_names.size(); ++i)
				{
890
					if (!actions_stack.getSampleBlock().has(argument_names[i]))
891
					{
M
Merge  
Michael Kolupaev 已提交
892
						arguments_present = false;
893 894 895 896
						break;
					}
				}
			}
897

M
Merge  
Michael Kolupaev 已提交
898
			if (arguments_present)
899 900
				actions_stack.addAction(ExpressionActions::Action::applyFunction(function, argument_names, node->getColumnName()),
										additional_requirements);
901 902 903 904 905 906 907 908 909
		}
	}
	else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
	{
		DataTypePtr type = apply_visitor(FieldToDataType(), node->value);
		ColumnWithNameAndType column;
		column.column = type->createConstColumn(1, node->value);
		column.type = type;
		column.name = node->getColumnName();
910

911
		actions_stack.addAction(ExpressionActions::Action::addColumn(column));
912 913 914 915
	}
	else
	{
		for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
916
			getActionsImpl(*it, no_subqueries, only_consts, actions_stack);
917 918 919 920 921 922 923
	}
}


void ExpressionAnalyzer::getAggregatesImpl(ASTPtr ast, ExpressionActions & actions)
{
	ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
924
	if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION)
925
	{
926
		has_aggregation = true;
927 928
		AggregateDescription aggregate;
		aggregate.column_name = node->getColumnName();
929

930 931 932
		for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
			if (aggregate_descriptions[i].column_name == aggregate.column_name)
				return;
933

934 935 936
		ASTs & arguments = node->arguments->children;
		aggregate.argument_names.resize(arguments.size());
		DataTypes types(arguments.size());
937

938 939
		for (size_t i = 0; i < arguments.size(); ++i)
		{
940
			getRootActionsImpl(arguments[i], true, false, actions);
941 942 943 944
			const std::string & name = arguments[i]->getColumnName();
			types[i] = actions.getSampleBlock().getByName(name).type;
			aggregate.argument_names[i] = name;
		}
945

946
		aggregate.function = context.getAggregateFunctionFactory().get(node->name, types);
947

948 949 950 951
		if (node->parameters)
		{
			ASTs & parameters = dynamic_cast<ASTExpressionList &>(*node->parameters).children;
			Row params_row(parameters.size());
952

953 954 955 956 957
			for (size_t i = 0; i < parameters.size(); ++i)
			{
				ASTLiteral * lit = dynamic_cast<ASTLiteral *>(&*parameters[i]);
				if (!lit)
					throw Exception("Parameters to aggregate functions must be literals", ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS);
958

959 960
				params_row[i] = lit->value;
			}
961

962 963
			aggregate.function->setParameters(params_row);
		}
964

965
		aggregate.function->setArguments(types);
966

967 968 969 970 971 972
		aggregate_descriptions.push_back(aggregate);
	}
	else
	{
		for (size_t i = 0; i < ast->children.size(); ++i)
		{
973 974 975
			ASTPtr child = ast->children[i];
			if (!dynamic_cast<ASTSubquery *>(&*child) && !dynamic_cast<ASTSelectQuery *>(&*child))
				getAggregatesImpl(child, actions);
976 977 978 979
		}
	}
}

980 981 982 983 984
void ExpressionAnalyzer::assertSelect()
{
	if (!select_query)
		throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR);
}
985

986
void ExpressionAnalyzer::assertAggregation()
987 988 989
{
	if (!has_aggregation)
		throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
990
}
991

992 993 994 995 996 997 998 999
void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, NamesAndTypesList & columns)
{
	if (chain.steps.empty())
	{
		chain.settings = settings;
		chain.steps.push_back(ExpressionActionsChain::Step(new ExpressionActions(columns, settings)));
	}
}
1000

1001 1002
void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActions & actions)
{
1003 1004
	NameSet result_columns;
	for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
1005
	{
1006 1007 1008
		if (it->first != it->second)
			actions.add(ExpressionActions::Action::copyColumn(it->second, it->first));
		result_columns.insert(it->first);
1009 1010
	}

1011
	actions.add(ExpressionActions::Action::arrayJoin(result_columns));
1012 1013 1014
}


1015 1016 1017
bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain)
{
	assertSelect();
1018 1019

	if (!select_query->array_join_expression_list)
1020
		return false;
1021

1022 1023
	initChain(chain, columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1024 1025 1026 1027

	getRootActionsImpl(select_query->array_join_expression_list, false, false, *step.actions);

	addMultipleArrayJoinAction(*step.actions);
1028 1029 1030

	for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
		step.required_output.push_back(it->first);
1031

1032 1033 1034
	return true;
}

1035 1036 1037
bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain)
{
	assertSelect();
1038

1039 1040
	if (!select_query->where_expression)
		return false;
1041 1042

	initChain(chain, columns_after_array_join);
1043
	ExpressionActionsChain::Step & step = chain.steps.back();
1044

1045
	step.required_output.push_back(select_query->where_expression->getColumnName());
1046
	getRootActionsImpl(select_query->where_expression, false, false, *step.actions);
1047

1048 1049 1050 1051 1052 1053
	return true;
}

bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain)
{
	assertAggregation();
1054

1055 1056
	if (!select_query->group_expression_list)
		return false;
1057

1058
	initChain(chain, columns);
1059
	ExpressionActionsChain::Step & step = chain.steps.back();
1060

1061
	ASTs asts = select_query->group_expression_list->children;
1062 1063
	for (size_t i = 0; i < asts.size(); ++i)
	{
1064
		step.required_output.push_back(asts[i]->getColumnName());
1065
		getRootActionsImpl(asts[i], false, false, *step.actions);
1066
	}
1067

1068 1069 1070 1071 1072 1073
	return true;
}

void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain)
{
	assertAggregation();
1074 1075

	initChain(chain, columns_after_array_join);
1076
	ExpressionActionsChain::Step & step = chain.steps.back();
1077

1078 1079 1080 1081 1082 1083 1084
	for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
	{
		for (size_t j = 0; j < aggregate_descriptions[i].argument_names.size(); ++j)
		{
			step.required_output.push_back(aggregate_descriptions[i].argument_names[j]);
		}
	}
1085

1086
	getActionsBeforeAggregationImpl(select_query->select_expression_list, *step.actions);
1087

1088
	if (select_query->having_expression)
1089
		getActionsBeforeAggregationImpl(select_query->having_expression, *step.actions);
1090

1091
	if (select_query->order_expression_list)
1092
		getActionsBeforeAggregationImpl(select_query->order_expression_list, *step.actions);
1093 1094 1095 1096 1097
}

bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain)
{
	assertAggregation();
1098

1099 1100
	if (!select_query->having_expression)
		return false;
1101

1102 1103
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1104

1105
	step.required_output.push_back(select_query->having_expression->getColumnName());
1106
	getRootActionsImpl(select_query->having_expression, false, false, *step.actions);
1107

1108
	return true;
1109 1110
}

1111 1112 1113
void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain)
{
	assertSelect();
1114

1115 1116
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1117

1118
	getRootActionsImpl(select_query->select_expression_list, false, false, *step.actions);
1119

1120 1121 1122 1123 1124
	ASTs asts = select_query->select_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
	{
		step.required_output.push_back(asts[i]->getColumnName());
	}
1125
}
1126

1127
bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain)
1128
{
1129
	assertSelect();
1130

1131 1132
	if (!select_query->order_expression_list)
		return false;
1133

1134 1135
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1136

1137
	getRootActionsImpl(select_query->order_expression_list, false, false, *step.actions);
1138

1139 1140 1141 1142 1143 1144 1145 1146 1147
	ASTs asts = select_query->order_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
	{
		ASTOrderByElement * ast = dynamic_cast<ASTOrderByElement *>(&*asts[i]);
		if (!ast || ast->children.size() != 1)
			throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
		ASTPtr order_expression = ast->children[0];
		step.required_output.push_back(order_expression->getColumnName());
	}
1148

1149 1150 1151
	return true;
}

1152
void ExpressionAnalyzer::appendProjectResult(DB::ExpressionActionsChain & chain)
1153 1154
{
	assertSelect();
1155

1156 1157
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1158

1159
	NamesWithAliases result_columns;
1160

1161 1162
	ASTs asts = select_query->select_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
1163
	{
1164 1165
		result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
		step.required_output.push_back(result_columns.back().second);
1166
	}
1167

1168
	step.actions->add(ExpressionActions::Action::project(result_columns));
1169 1170 1171
}


1172 1173 1174
Block ExpressionAnalyzer::getSelectSampleBlock()
{
	assertSelect();
1175

1176
	ExpressionActions temp_actions(aggregated_columns, settings);
1177
	NamesWithAliases result_columns;
1178

1179 1180 1181 1182
	ASTs asts = select_query->select_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
	{
		result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
1183
		getRootActionsImpl(asts[i], true, false, temp_actions);
1184
	}
1185

1186
	temp_actions.add(ExpressionActions::Action::project(result_columns));
1187

1188 1189 1190
	return temp_actions.getSampleBlock();
}

1191
void ExpressionAnalyzer::getActionsBeforeAggregationImpl(ASTPtr ast, ExpressionActions & actions)
1192 1193
{
	ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
1194
	if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION)
1195 1196
	{
		ASTs & arguments = node->arguments->children;
1197

1198 1199
		for (size_t i = 0; i < arguments.size(); ++i)
		{
1200
			getRootActionsImpl(arguments[i], false, false, actions);
1201 1202 1203 1204 1205 1206 1207
		}
	}
	else
	{
		for (size_t i = 0; i < ast->children.size(); ++i)
		{
			getActionsBeforeAggregationImpl(ast->children[i], actions);
1208 1209
		}
	}
1210 1211 1212
}


M
Merge  
Michael Kolupaev 已提交
1213
ExpressionActionsPtr ExpressionAnalyzer::getActions(bool project_result)
1214
{
1215
	ExpressionActionsPtr actions = new ExpressionActions(columns, settings);
1216
	NamesWithAliases result_columns;
1217
	Names result_names;
1218

1219
	ASTs asts;
1220

1221
	if (ASTExpressionList * node = dynamic_cast<ASTExpressionList *>(&*ast))
1222
		asts = node->children;
1223
	else
1224
		asts = ASTs(1, ast);
1225

1226
	for (size_t i = 0; i < asts.size(); ++i)
1227
	{
1228 1229 1230 1231 1232 1233 1234 1235 1236
		std::string name = asts[i]->getColumnName();
		std::string alias;
		if (project_result)
			alias = asts[i]->getAlias();
		else
			alias = name;
		result_columns.push_back(NameWithAlias(name, alias));
		result_names.push_back(alias);
		getRootActionsImpl(asts[i], false, false, *actions);
1237
	}
1238

M
Merge  
Michael Kolupaev 已提交
1239 1240 1241 1242
	if (project_result)
	{
		actions->add(ExpressionActions::Action::project(result_columns));
	}
M
Merge  
Michael Kolupaev 已提交
1243 1244 1245 1246 1247 1248
	else
	{
		/// Не будем удалять исходные столбцы.
		for (NamesAndTypesList::const_iterator it = columns.begin(); it != columns.end(); ++it)
			result_names.push_back(it->first);
	}
1249

1250
	actions->finalize(result_names);
1251

1252 1253 1254 1255 1256 1257
	return actions;
}


ExpressionActionsPtr ExpressionAnalyzer::getConstActions()
{
M
Merge  
Michael Kolupaev 已提交
1258
	ExpressionActionsPtr actions = new ExpressionActions(NamesAndTypesList(), settings);
1259

1260
	getRootActionsImpl(ast, true, true, *actions);
1261

1262 1263 1264 1265 1266 1267 1268 1269 1270 1271
	return actions;
}

void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates)
{
	for (NamesAndTypesList::iterator it = aggregation_keys.begin(); it != aggregation_keys.end(); ++it)
		key_names.push_back(it->first);
	aggregates = aggregate_descriptions;
}

1272
void ExpressionAnalyzer::removeUnusedColumns()
1273 1274 1275
{
	NamesSet required;
	NamesSet ignored;
1276

1277 1278 1279 1280 1281 1282
	if (select_query && select_query->array_join_expression_list)
	{
		ASTs & expressions = select_query->array_join_expression_list->children;
		for (size_t i = 0; i < expressions.size(); ++i)
		{
			/// Игнорируем идентификаторы верхнего уровня из секции ARRAY JOIN.
1283
			/// Их потом добавим отдельно.
1284
			if (dynamic_cast<ASTIdentifier *>(&*expressions[i]))
1285
			{
1286
				ignored.insert(expressions[i]->getColumnName());
1287 1288 1289 1290 1291 1292 1293
			}
			else
			{
				/// Для выражений в ARRAY JOIN ничего игнорировать не нужно.
				NamesSet empty;
				getRequiredColumnsImpl(expressions[i], required, empty);
			}
1294 1295

			ignored.insert(expressions[i]->getAlias());
1296 1297
		}
	}
1298

1299
	getRequiredColumnsImpl(ast, required, ignored);
1300

1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311
	NameSet array_join_sources;
	for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
	{
		array_join_sources.insert(it->second);
	}
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
	{
		if (array_join_sources.count(it->first))
			required.insert(it->first);
	}

1312 1313 1314
	/// Нужно прочитать хоть один столбец, чтобы узнать количество строк.
	if (required.empty())
		required.insert(ExpressionActions::getSmallestColumn(columns));
1315

1316
	unknown_required_columns = required;
1317

1318 1319 1320 1321
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end();)
	{
		NamesAndTypesList::iterator it0 = it;
		++it;
1322

1323
		unknown_required_columns.erase(it0->first);
1324

1325
		if (!required.count(it0->first))
1326 1327
		{
			required.erase(it0->first);
1328
			columns.erase(it0);
1329
		}
1330 1331 1332 1333 1334
	}
}

Names ExpressionAnalyzer::getRequiredColumns()
{
M
Merge  
Michael Kolupaev 已提交
1335 1336
	if (!unknown_required_columns.empty())
		throw Exception("Unknown identifier: " + *unknown_required_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER);
1337

1338 1339 1340
	Names res;
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
		res.push_back(it->first);
1341 1342 1343
	return res;
}

1344 1345


1346
void ExpressionAnalyzer::getRequiredColumnsImpl(ASTPtr ast, NamesSet & required_columns, NamesSet & ignored_names)
1347 1348 1349
{
	if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
1350 1351 1352
		if (node->kind == ASTIdentifier::Column
			&& !ignored_names.count(node->name)
			&& !ignored_names.count(DataTypeNested::extractNestedTableName(node->name)))
1353
		{
1354
			required_columns.insert(node->name);
1355
		}
1356 1357
		return;
	}
1358

1359 1360 1361 1362 1363 1364
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		if (node->kind == ASTFunction::LAMBDA_EXPRESSION)
		{
			if (node->arguments->children.size() != 2)
				throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
1365

1366
			ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*node->arguments->children[0]);
1367

1368 1369
			if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
				throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
1370

1371 1372
			/// Не нужно добавлять параметры лямбда-выражения в required_columns.
			Names added_ignored;
1373
			for (size_t i = 0 ; i < lambda_args_tuple->arguments->children.size(); ++i)
1374
			{
1375
				ASTIdentifier * identifier = dynamic_cast<ASTIdentifier *>(&*lambda_args_tuple->arguments->children[i]);
1376 1377 1378 1379 1380 1381 1382 1383 1384
				if (!identifier)
					throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
				std::string name = identifier->name;
				if (!ignored_names.count(name))
				{
					ignored_names.insert(name);
					added_ignored.push_back(name);
				}
			}
1385

1386
			getRequiredColumnsImpl(node->arguments->children[1], required_columns, ignored_names);
1387

1388 1389
			for (size_t i = 0; i < added_ignored.size(); ++i)
				ignored_names.erase(added_ignored[i]);
1390

1391 1392 1393
			return;
		}
	}
1394

1395 1396
	ASTSelectQuery * select = dynamic_cast<ASTSelectQuery *>(&*ast);

1397
	for (size_t i = 0; i < ast->children.size(); ++i)
1398 1399
	{
		ASTPtr child = ast->children[i];
1400 1401 1402 1403
		/// Не пойдем в секцию ARRAY JOIN, потому что там нужно смотреть на имена не-ARRAY-JOIN-енных столбцов.
		/// Туда removeUnusedColumns отправит нас отдельно.
		if (!dynamic_cast<ASTSubquery *>(&*child) && !dynamic_cast<ASTSelectQuery *>(&*child) &&
			!(select && child == select->array_join_expression_list))
1404
			getRequiredColumnsImpl(child, required_columns, ignored_names);
1405
    }
1406 1407
}

1408
}