ExpressionAnalyzer.cpp 49.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
#include <DB/DataTypes/FieldToDataType.h>

#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ASTAsterisk.h>
#include <DB/Parsers/ASTExpressionList.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Parsers/ASTSubquery.h>
#include <DB/Parsers/ASTSet.h>
11
#include <DB/Parsers/ASTOrderByElement.h>
12
#include <DB/Parsers/ParserSelectQuery.h>
13 14 15 16

#include <DB/DataTypes/DataTypeSet.h>
#include <DB/DataTypes/DataTypeTuple.h>
#include <DB/DataTypes/DataTypeExpression.h>
17
#include <DB/DataTypes/DataTypeNested.h>
18 19 20 21 22 23 24 25 26 27 28 29 30 31
#include <DB/Columns/ColumnSet.h>
#include <DB/Columns/ColumnExpression.h>

#include <DB/Interpreters/InterpreterSelectQuery.h>
#include <DB/Interpreters/ExpressionAnalyzer.h>

#include <DB/Storages/StorageMergeTree.h>
#include <DB/Storages/StorageDistributed.h>


namespace DB
{


A
Alexey Milovidov 已提交
32
static std::string * getAlias(ASTPtr & ast)
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
{
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		return &node->alias;
	}
	else if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
		return &node->alias;
	}
	else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
	{
		return &node->alias;
	}
	else
	{
		return NULL;
	}
}

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
static void setAlias(ASTPtr & ast, const std::string & alias)
{
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		node->alias = alias;
	}
	else if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
		node->alias = alias;
	}
	else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
	{
		node->alias = alias;
	}
	else
	{
		throw Exception("Can't set alias of " + ast->getColumnName(), ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
	}
}

72 73 74 75 76

void ExpressionAnalyzer::init()
{
	select_query = dynamic_cast<ASTSelectQuery *>(&*ast);
	has_aggregation = false;
77

78 79
	createAliasesDict(ast); /// Если есть агрегатные функции, присвоит has_aggregation=true.
	normalizeTree();
80

81
	getArrayJoinedColumns();
82

83 84
	removeUnusedColumns();

85
	/// Найдем агрегатные функции.
86 87
	if (select_query && (select_query->group_expression_list || select_query->having_expression))
		has_aggregation = true;
88

89
	ExpressionActions temp_actions(columns, settings);
90 91 92 93 94 95 96 97 98 99 100 101 102

	if (select_query && select_query->array_join_expression_list)
	{
		const ASTs & array_join_asts = select_query->array_join_expression_list->children;
		for (size_t i = 0; i < array_join_asts.size(); ++i)
		{
			ASTPtr ast = array_join_asts[i];
			getRootActionsImpl(ast, true, false, temp_actions);
		}

		addMultipleArrayJoinAction(temp_actions);

		const Block & temp_sample = temp_actions.getSampleBlock();
103
		for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
104
		{
105
			columns_after_array_join.push_back(NameAndTypePair(it->first, temp_sample.getByName(it->first).type));
106 107
		}
	}
108 109 110 111 112
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
	{
		if (!array_join_result_to_source.count(it->first))
			columns_after_array_join.push_back(*it);
	}
113
	getAggregatesImpl(ast, temp_actions);
114

115 116
	if (has_aggregation)
	{
117
		assertSelect();
118

119 120 121
		/// Найдем ключи агрегации.
		if (select_query->group_expression_list)
		{
122
			NameSet unique_keys;
123 124 125
			const ASTs & group_asts = select_query->group_expression_list->children;
			for (size_t i = 0; i < group_asts.size(); ++i)
			{
126
				getRootActionsImpl(group_asts[i], true, false, temp_actions);
127 128
				NameAndTypePair key;
				key.first = group_asts[i]->getColumnName();
129
				key.second = temp_actions.getSampleBlock().getByName(key.first).type;
130
				aggregation_keys.push_back(key);
131

132 133 134 135 136
				if (!unique_keys.count(key.first))
				{
					aggregated_columns.push_back(key);
					unique_keys.insert(key.first);
				}
137 138
			}
		}
139

140 141 142 143 144 145 146 147
		for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
		{
			AggregateDescription & desc = aggregate_descriptions[i];
			aggregated_columns.push_back(NameAndTypePair(desc.column_name, desc.function->getReturnType()));
		}
	}
	else
	{
148
		aggregated_columns = columns_after_array_join;
149 150 151 152
	}
}


153
NamesAndTypesList::iterator ExpressionAnalyzer::findColumn(const String & name, NamesAndTypesList & cols)
154 155
{
	NamesAndTypesList::iterator it;
156
	for (it = cols.begin(); it != cols.end(); ++it)
157 158 159 160 161 162
		if (it->first == name)
			break;
	return it;
}


163 164
/// ignore_levels - алиасы в скольки верхних уровнях поддерева нужно игнорировать.
/// Например, при ignore_levels=1 ast не может быть занесен в словарь, но его дети могут.
165
void ExpressionAnalyzer::createAliasesDict(ASTPtr & ast, int ignore_levels)
166
{
167
	ASTSelectQuery * select = dynamic_cast<ASTSelectQuery *>(&*ast);
168

169 170
	/// Обход снизу-вверх. Не опускаемся в подзапросы.
	for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
171 172
	{
		int new_ignore_levels = std::max(0, ignore_levels - 1);
173 174
		/// Алиасы верхнего уровня в секции ARRAY JOIN имеют особый смысл, их добавлять не будем
		///  (пропустим сам expression list и его детей).
175 176
		if (select && *it == select->array_join_expression_list)
			new_ignore_levels = 2;
177
		if (!dynamic_cast<ASTSelectQuery *>(&**it))
178 179 180 181 182 183
			createAliasesDict(*it, new_ignore_levels);
	}

	if (ignore_levels > 0)
		return;

A
Alexey Milovidov 已提交
184
	std::string * alias = getAlias(ast);
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
	if (alias && !alias->empty())
	{
		if (aliases.count(*alias) && ast->getTreeID() != aliases[*alias]->getTreeID())
		{
			throw Exception("Different expressions with the same alias " + *alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
		}
		else
		{
			aliases[*alias] = ast;
		}
	}
}


StoragePtr ExpressionAnalyzer::getTable()
{
	if (const ASTSelectQuery * select = dynamic_cast<const ASTSelectQuery *>(&*ast))
	{
203
		if (select->table && !dynamic_cast<const ASTSelectQuery *>(&*select->table) && !dynamic_cast<const ASTFunction *>(&*select->table))
204
		{
205
			String database = select->database ?
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
				dynamic_cast<const ASTIdentifier &>(*select->database).name :
				"";
			const String & table = dynamic_cast<const ASTIdentifier &>(*select->table).name;
			return context.tryGetTable(database, table);
		}
	}
	return StoragePtr();
}


bool ExpressionAnalyzer::needSignRewrite()
{
	if (settings.sign_rewrite && storage)
	{
		if (const StorageMergeTree * merge_tree = dynamic_cast<const StorageMergeTree *>(&*storage))
			return merge_tree->getName() == "CollapsingMergeTree";
		if (const StorageDistributed * distributed = dynamic_cast<const StorageDistributed *>(&*storage))
			return !distributed->getSignColumnName().empty();
	}
	return false;
}


String ExpressionAnalyzer::getSignColumnName()
{
	if (const StorageMergeTree * merge_tree = dynamic_cast<const StorageMergeTree *>(&*storage))
		return merge_tree->getSignColumnName();
	if (const StorageDistributed * distributed = dynamic_cast<const StorageDistributed *>(&*storage))
		return distributed->getSignColumnName();
	return "";
}


ASTPtr ExpressionAnalyzer::createSignColumn()
{
	ASTIdentifier * p_sign_column = new ASTIdentifier(ast->range, sign_column_name);
	ASTIdentifier & sign_column = *p_sign_column;
	ASTPtr sign_column_node = p_sign_column;
	sign_column.name = sign_column_name;
	return sign_column_node;
}


ASTPtr ExpressionAnalyzer::rewriteCount(const ASTFunction * node)
{
	/// 'Sign'
	ASTExpressionList * p_exp_list = new ASTExpressionList;
	ASTExpressionList & exp_list = *p_exp_list;
	ASTPtr exp_list_node = p_exp_list;
	exp_list.children.push_back(createSignColumn());
256

257 258 259 260 261 262 263 264
	/// sum(Sign)
	ASTFunction * p_sum = new ASTFunction;
	ASTFunction & sum = *p_sum;
	ASTPtr sum_node = p_sum;
	sum.name = "sum";
	sum.alias = node->alias;
	sum.arguments = exp_list_node;
	sum.children.push_back(exp_list_node);
265

266 267 268 269 270
	return sum_node;
}


ASTPtr ExpressionAnalyzer::rewriteSum(const ASTFunction * node)
271
{
272 273 274 275 276 277
	/// 'x', 'Sign'
	ASTExpressionList * p_mult_exp_list = new ASTExpressionList;
	ASTExpressionList & mult_exp_list = *p_mult_exp_list;
	ASTPtr mult_exp_list_node = p_mult_exp_list;
	mult_exp_list.children.push_back(createSignColumn());
	mult_exp_list.children.push_back(node->arguments->children[0]);
278

279 280 281 282 283 284 285
	/// x * Sign
	ASTFunction * p_mult = new ASTFunction;
	ASTFunction & mult = *p_mult;
	ASTPtr mult_node = p_mult;
	mult.name = "multiply";
	mult.arguments = mult_exp_list_node;
	mult.children.push_back(mult_exp_list_node);
286

287 288 289 290 291
	/// 'x * Sign'
	ASTExpressionList * p_exp_list = new ASTExpressionList;
	ASTExpressionList & exp_list = *p_exp_list;
	ASTPtr exp_list_node = p_exp_list;
	exp_list.children.push_back(mult_node);
292

293 294 295 296 297 298 299
	/// sum(x * Sign)
	ASTFunction * p_sum = new ASTFunction;
	ASTFunction & sum = *p_sum;
	ASTPtr sum_node = p_sum;
	sum.name = "sum";
	sum.alias = node->alias;
	sum.arguments = exp_list_node;
300 301
	sum.children.push_back(exp_list_node);

302 303 304 305 306 307 308 309 310 311
	return sum_node;
}


ASTPtr ExpressionAnalyzer::rewriteAvg(const ASTFunction * node)
{
	/// node без alias для переписывания числителя и знаменателя
	ASTPtr node_clone = node->clone();
	ASTFunction * node_clone_func = dynamic_cast<ASTFunction *>(&*node_clone);
	node_clone_func->alias = "";
312

313 314 315 316 317 318
	/// 'sum(Sign * x)', 'sum(Sign)'
	ASTExpressionList * p_div_exp_list = new ASTExpressionList;
	ASTExpressionList & div_exp_list = *p_div_exp_list;
	ASTPtr div_exp_list_node = p_div_exp_list;
	div_exp_list.children.push_back(rewriteSum(node_clone_func));
	div_exp_list.children.push_back(rewriteCount(node_clone_func));
319

320 321 322 323 324 325 326 327
	/// sum(Sign * x) / sum(Sign)
	ASTFunction * p_div = new ASTFunction;
	ASTFunction & div = *p_div;
	ASTPtr div_node = p_div;
	div.name = "divide";
	div.alias = node->alias;
	div.arguments = div_exp_list_node;
	div.children.push_back(div_exp_list_node);
328

329 330 331 332
	return div_node;
}


333
bool ExpressionAnalyzer::considerSignRewrite(ASTPtr & ast)
334 335
{
	ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
336
	if (!node)
337
		return false;
338 339 340
	const String & name = node->name;
	if (name == "count")
		ast = rewriteCount(node);
341
	else if (name == "sum")
342
		ast = rewriteSum(node);
343
	else if (name == "avg")
344
		ast = rewriteAvg(node);
345 346 347
	else
		return false;
	return true;
348 349 350 351 352 353 354 355 356
}


void ExpressionAnalyzer::normalizeTree()
{
	SetOfASTs tmp_set;
	MapOfASTs tmp_map;
	if (needSignRewrite())
		sign_column_name = getSignColumnName();
M
Merge  
Michael Kolupaev 已提交
357
	normalizeTreeImpl(ast, tmp_map, tmp_set, "", false);
358 359 360 361 362
}


/// finished_asts - уже обработанные вершины (и на что они заменены)
/// current_asts - вершины в текущем стеке вызовов этого метода
363
/// current_alias - алиас, повешенный на предка ast (самого глубокого из предков с алиасами)
364
/// in_sign_rewritten - находимся ли мы в поддереве, полученном в результате sign rewrite
M
Merge  
Michael Kolupaev 已提交
365
void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, bool in_sign_rewritten)
366 367 368 369 370 371
{
	if (finished_asts.count(ast))
	{
		ast = finished_asts[ast];
		return;
	}
372

373 374
	ASTPtr initial_ast = ast;
	current_asts.insert(initial_ast);
375

M
Merge  
Michael Kolupaev 已提交
376 377
	std::string * my_alias = getAlias(ast);
	if (my_alias && !my_alias->empty())
378
		current_alias = *my_alias;
379

380
	/// rewrite правила, которые действуют при обходе сверху-вниз.
381

382 383
	if (!in_sign_rewritten && !sign_column_name.empty())
		in_sign_rewritten = considerSignRewrite(ast);
384

385 386
	bool replaced = false;

387 388 389 390 391 392 393 394 395 396 397 398
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		/** Нет ли в таблице столбца, название которого полностью совпадает с записью функции?
		 * Например, в таблице есть столбец "domain(URL)", и мы запросили domain(URL).
		 */
		String function_string = node->getColumnName();
		NamesAndTypesList::const_iterator it = findColumn(function_string);
		if (columns.end() != it)
		{
			ASTIdentifier * ast_id = new ASTIdentifier(node->range, std::string(node->range.first, node->range.second));
			ast = ast_id;
			current_asts.insert(ast);
399
			replaced = true;
400
		}
401 402 403
		if (node->name == "in" || node->name == "notIn")
			if (ASTIdentifier * right = dynamic_cast<ASTIdentifier *>(&*node->arguments->children[1]))
				right->kind = ASTIdentifier::Table;
404 405
	}
	else if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
406
	{
407
		if (node->kind == ASTIdentifier::Column)
408
		{
M
Merge  
Michael Kolupaev 已提交
409
			/// Если это алиас, но не родительский алиас (чтобы работали конструкции вроде "SELECT column+1 AS column").
410
			Aliases::const_iterator jt = aliases.find(node->name);
M
Merge  
Michael Kolupaev 已提交
411
			if (jt != aliases.end() && current_alias != node->name)
412
			{
413
				/// Заменим его на соответствующий узел дерева.
414 415
				if (current_asts.count(jt->second))
					throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES);
416 417 418 419 420 421 422 423 424 425 426 427
				if (my_alias && !my_alias->empty() && *my_alias != jt->second->getAlias())
				{
					/// В конструкции вроде "a AS b", где a - алиас, нужно перевесить алиас b на результат подстановки алиаса a.
					ast = jt->second->clone();
					setAlias(ast, *my_alias);
				}
				else
				{
					ast = jt->second;
				}

				replaced = true;
428 429 430 431
			}
			else
			{
				/// Проверим имеет ли смысл sign-rewrite
432
				if (!in_sign_rewritten && sign_column_name != "" && node->name == sign_column_name)
433 434 435 436 437 438 439 440 441 442 443 444 445
					throw Exception("Requested Sign column while sign-rewrite is on.", ErrorCodes::QUERY_SECTION_DOESNT_MAKE_SENSE);
			}
		}
	}
	else if (ASTExpressionList * node = dynamic_cast<ASTExpressionList *>(&*ast))
	{
		/// Заменим * на список столбцов.
		ASTs & asts = node->children;
		for (int i = static_cast<int>(asts.size()) - 1; i >= 0; --i)
		{
			if (ASTAsterisk * asterisk = dynamic_cast<ASTAsterisk *>(&*asts[i]))
			{
				ASTs all_columns;
S
Merge  
Sergey Fedorov 已提交
446
				for (NamesAndTypesList::const_iterator it = columns.begin(); it != columns.end(); ++it)
447 448 449 450 451 452
					all_columns.push_back(new ASTIdentifier(asterisk->range, it->first));
				asts.erase(asts.begin() + i);
				asts.insert(asts.begin() + i, all_columns.begin(), all_columns.end());
			}
		}
	}
453

454 455 456 457 458 459 460 461 462 463
	/// Если заменили корень поддерева вызовемся для нового корня снова - на случай, если алиас заменился на алиас.
	if (replaced)
	{
		normalizeTreeImpl(ast, finished_asts, current_asts, current_alias, in_sign_rewritten);
		current_asts.erase(initial_ast);
		current_asts.erase(ast);
		finished_asts[initial_ast] = ast;
		return;
	}

464
	/// Рекурсивные вызовы. Не опускаемся в подзапросы.
465

466 467
	for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
		if (!dynamic_cast<ASTSelectQuery *>(&**it))
468
			normalizeTreeImpl(*it, finished_asts, current_asts, current_alias, in_sign_rewritten);
469

470 471 472
	/// Если секция WHERE или HAVING состоит из одного алиаса, ссылку нужно заменить не только в children, но и в where_expression и having_expression.
	if (ASTSelectQuery * select = dynamic_cast<ASTSelectQuery *>(&*ast))
	{
473 474
		if (select->prewhere_expression)
			normalizeTreeImpl(select->prewhere_expression, finished_asts, current_asts, current_alias, in_sign_rewritten);
475
		if (select->where_expression)
476
			normalizeTreeImpl(select->where_expression, finished_asts, current_asts, current_alias, in_sign_rewritten);
477
		if (select->having_expression)
478
			normalizeTreeImpl(select->having_expression, finished_asts, current_asts, current_alias, in_sign_rewritten);
479
	}
480

M
Merge  
Michael Kolupaev 已提交
481 482
	/// Действия, выполняемые снизу вверх.

483
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
484
	{
485 486 487 488
		if (node->kind == ASTFunction::TABLE_FUNCTION)
		{
		}
		else if (node->name == "lambda")
489
		{
490
			node->kind = ASTFunction::LAMBDA_EXPRESSION;
491 492 493
		}
		else if (context.getAggregateFunctionFactory().isAggregateFunctionName(node->name))
		{
494
			node->kind = ASTFunction::AGGREGATE_FUNCTION;
495
		}
496 497 498 499 500 501 502 503
		else if (node->name == "arrayJoin")
		{
			node->kind = ASTFunction::ARRAY_JOIN;
		}
		else
		{
			node->kind = ASTFunction::FUNCTION;
		}
504
	}
505

506 507 508 509 510 511
	current_asts.erase(initial_ast);
	current_asts.erase(ast);
	finished_asts[initial_ast] = ast;
}


512
void ExpressionAnalyzer::makeSet(ASTFunction * node, const Block & sample_block)
513
{
514
	/** Нужно преобразовать правый аргумент в множество.
515
	  * Это может быть имя таблицы, значение, перечисление значений или подзапрос.
516 517 518 519
	  * Перечисление значений парсится как функция tuple.
	  */
	IAST & args = *node->arguments;
	ASTPtr & arg = args.children[1];
520

521 522
	if (dynamic_cast<ASTSet *>(&*arg))
		return;
523

524 525
	/// Если подзапрос или имя таблицы для селекта
	if (dynamic_cast<ASTSubquery *>(&*arg) || dynamic_cast<ASTIdentifier *>(&*arg))
526
	{
527
		/// Получаем поток блоков для подзапроса, отдаем его множеству, и кладём это множество на место подзапроса.
528
		ASTSet * ast_set = new ASTSet(arg->getColumnName());
529
		ASTPtr ast_set_ptr = ast_set;
530

531 532 533 534 535 536 537
		if (sets_with_subqueries.count(ast_set->getColumnName()))
		{
			ast_set->set = sets_with_subqueries[ast_set->getColumnName()];
		}
		else
		{
			/** Для подзапроса в секции IN не действуют ограничения на максимальный размер результата.
538 539 540
			  * Так как результат этого поздапроса - ещё не результат всего запроса.
			  * Вместо этого работают ограничения max_rows_in_set, max_bytes_in_set, set_overflow_mode.
			  */
541 542 543 544
			Context subquery_context = context;
			Settings subquery_settings = context.getSettings();
			subquery_settings.limits.max_result_rows = 0;
			subquery_settings.limits.max_result_bytes = 0;
545
			/// Вычисление extremes не имеет смысла и не нужно (если его делать, то в результате всего запроса могут взяться extremes подзапроса).
546
			subquery_settings.extremes = 0;
547 548
			subquery_context.setSettings(subquery_settings);

549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
			ASTPtr subquery;
			if (ASTIdentifier * table = dynamic_cast<ASTIdentifier *>(&*arg))
			{
				ParserSelectQuery parser;

				String query = "SELECT * FROM " + table->name;
				const char * begin = query.data();
				const char * end = begin + query.size();
				const char * pos = begin;
				const char * expected = "";

				bool parse_res = parser.parse(pos, end, subquery, expected);
				if (!parse_res)
					throw Exception("Error in parsing select query while creating set for table " + table->name + ".",
									ErrorCodes::LOGICAL_ERROR);
			}
			else
				subquery = arg->children[0];

			InterpreterSelectQuery interpreter(subquery, subquery_context, QueryProcessingStage::Complete, subquery_depth + 1);
569 570 571 572 573
			ast_set->set = new Set(settings.limits);
			ast_set->set->setSource(interpreter.execute());
			sets_with_subqueries[ast_set->getColumnName()] = ast_set->set;
		}
		arg = ast_set_ptr;
574
	}
575
	else
576 577
	{
		/// Случай явного перечисления значений.
578

579 580 581 582 583 584 585 586 587 588
		DataTypes set_element_types;
		ASTPtr & left_arg = args.children[0];

		ASTFunction * left_arg_tuple = dynamic_cast<ASTFunction *>(&*left_arg);

		if (left_arg_tuple && left_arg_tuple->name == "tuple")
		{
			for (ASTs::const_iterator it = left_arg_tuple->arguments->children.begin();
				it != left_arg_tuple->arguments->children.end();
				++it)
589
				set_element_types.push_back(sample_block.getByName((*it)->getColumnName()).type);
590 591 592
		}
		else
		{
593
			DataTypePtr left_type = sample_block.getByName(left_arg->getColumnName()).type;
594 595 596 597 598
			if (DataTypeArray * array_type = dynamic_cast<DataTypeArray *>(&*left_type))
				set_element_types.push_back(array_type->getNestedType());
			else
				set_element_types.push_back(left_type);
		}
599

600 601 602
		/// Отличим случай x in (1, 2) от случая x in 1 (он же x in (1)).
		bool single_value = false;
		ASTPtr elements_ast = arg;
603

604 605 606 607 608
		if (ASTFunction * set_func = dynamic_cast<ASTFunction *>(&*arg))
		{
			if (set_func->name != "tuple")
				throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.",
								ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
609

610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625
			/// Отличм случай (x, y) in ((1, 2), (3, 4)) от случая (x, y) in (1, 2).
			ASTFunction * any_element = dynamic_cast<ASTFunction *>(&*set_func->arguments->children[0]);
			if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple"))
				single_value = true;
			else
				elements_ast = set_func->arguments;
		}
		else if (dynamic_cast<ASTLiteral *>(&*arg))
		{
			single_value = true;
		}
		else
		{
			throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.",
							ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
		}
626

627 628 629 630 631 632
		if (single_value)
		{
			ASTPtr exp_list = new ASTExpressionList;
			exp_list->children.push_back(elements_ast);
			elements_ast = exp_list;
		}
633

634
		ASTSet * ast_set = new ASTSet(arg->getColumnName());
635
		ASTPtr ast_set_ptr = ast_set;
636
		ast_set->set = new Set(settings.limits);
637 638
		ast_set->set->createFromAST(set_element_types, elements_ast);
		arg = ast_set_ptr;
639
	}
640 641 642 643 644 645
}


static std::string getUniqueName(const Block & block, const std::string & prefix)
{
	int i = 1;
646
	while (block.has(prefix + toString(i)))
647
		++i;
648
	return prefix + toString(i);
649 650 651
}


652 653 654
void ExpressionAnalyzer::getRootActionsImpl(ASTPtr ast, bool no_subqueries, bool only_consts, ExpressionActions & actions)
{
	ScopeStack scopes(actions, settings);
655
	getActionsImpl(ast, no_subqueries, only_consts, scopes);
656 657 658 659
	actions = *scopes.popLevel();
}


660 661
void ExpressionAnalyzer::getArrayJoinedColumns()
{
662 663
	if (select_query && select_query->array_join_expression_list)
	{
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
		ASTs & array_join_asts = select_query->array_join_expression_list->children;
		for (size_t i = 0; i < array_join_asts .size(); ++i)
		{
			ASTPtr ast = array_join_asts [i];

			String nested_table_name = ast->getColumnName();
			String nested_table_alias = ast->getAlias();
			if (nested_table_alias == nested_table_name && !dynamic_cast<ASTIdentifier *>(&*ast))
				throw Exception("No alias for non-trivial value in ARRAY JOIN: " + nested_table_name, ErrorCodes::ALIAS_REQUIRED);

			if (array_join_alias_to_name.count(nested_table_alias) || aliases.count(nested_table_alias))
				throw Exception("Duplicate alias " + nested_table_alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
			array_join_alias_to_name[nested_table_alias] = nested_table_name;
		}

		ASTs & query_asts = select_query->children;
		for (size_t i = 0; i < query_asts.size(); ++i)
		{
			ASTPtr ast = query_asts[i];
			if (select_query && ast == select_query->array_join_expression_list)
				continue;
			getArrayJoinedColumnsImpl(ast);
		}
687 688 689

		/// Если результат ARRAY JOIN не используется, придется все равно по-ARRAY-JOIN-ить какой-нибудь столбец,
		/// чтобы получить правильное количество строк.
690
		if (array_join_result_to_source.empty())
691 692 693 694 695 696 697
		{
			ASTPtr expr = select_query->array_join_expression_list->children[0];
			String source_name = expr->getColumnName();
			String result_name = expr->getAlias();
			/// Это массив.
			if (!dynamic_cast<ASTIdentifier *>(&*expr) || findColumn(source_name, columns) != columns.end())
			{
698
				array_join_result_to_source[result_name] = source_name;
699 700 701 702 703 704 705 706 707 708
			}
			else /// Это вложенная таблица.
			{
				bool found = false;
				for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
				{
					String table_name = DataTypeNested::extractNestedTableName(it->first);
					String column_name = DataTypeNested::extractNestedColumnName(it->first);
					if (table_name == source_name)
					{
709 710
						array_join_result_to_source[DataTypeNested::concatenateNestedName(result_name, column_name)]
							= it->first;
711 712 713 714
						found = true;
						break;
					}
				}
715 716
				if (!found)
					throw Exception("No columns in nested table " + source_name, ErrorCodes::EMPTY_NESTED_TABLE);
717 718
			}
		}
719
	}
720 721 722 723
}


void ExpressionAnalyzer::getArrayJoinedColumnsImpl(ASTPtr ast)
724 725 726
{
	if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
727 728 729 730 731 732 733 734 735 736 737 738
		if (node->kind == ASTIdentifier::Column)
		{
			String table_name = DataTypeNested::extractNestedTableName(node->name);
			if (array_join_alias_to_name.count(node->name))
				array_join_result_to_source[node->name] = array_join_alias_to_name[node->name];
			else if (array_join_alias_to_name.count(table_name))
			{
				String nested_column = DataTypeNested::extractNestedColumnName(node->name);
				array_join_result_to_source[node->name]
					= DataTypeNested::concatenateNestedName(array_join_alias_to_name[table_name], nested_column);
			}
		}
739 740 741 742
	}
	else
	{
		for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
743
			if (!dynamic_cast<ASTSelectQuery *>(&**it))
744
				getArrayJoinedColumnsImpl(*it);
745 746 747 748 749
	}
}


void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack)
750 751
{
	/// Если результат вычисления уже есть в блоке.
752
	if ((dynamic_cast<ASTFunction *>(&*ast) || dynamic_cast<ASTLiteral *>(&*ast))
753
		&& actions_stack.getSampleBlock().has(ast->getColumnName()))
754
		return;
755

756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
	if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
		std::string name = node->getColumnName();
		if (!only_consts && !actions_stack.getSampleBlock().has(name))
		{
			/// Запрошенного столбца нет в блоке.
			/// Если такой столбец есть до агрегации, значит пользователь наверно забыл окружить его агрегатной функцией или добавить в GROUP BY.

			bool found = false;
			for (NamesAndTypesList::const_iterator it = columns_after_array_join.begin();
					it != columns_after_array_join.end(); ++it)
				if (it->first == name)
					found = true;

			if (found)
				throw Exception("Column " + name + " is not under aggregate function and not in GROUP BY.",
772
					ErrorCodes::NOT_AN_AGGREGATE);
773 774 775
		}
	}
	else if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
776
	{
777
		if (node->kind == ASTFunction::LAMBDA_EXPRESSION)
778
			throw Exception("Unexpected expression", ErrorCodes::UNEXPECTED_EXPRESSION);
779

780 781 782 783 784
		if (node->kind == ASTFunction::ARRAY_JOIN)
		{
			if (node->arguments->children.size() != 1)
				throw Exception("arrayJoin requires exactly 1 argument", ErrorCodes::TYPE_MISMATCH);
			ASTPtr arg = node->arguments->children[0];
785
			getActionsImpl(arg, no_subqueries, only_consts, actions_stack);
786
			if (!only_consts)
787
			{
788 789 790 791 792
				String result_name = node->getColumnName();
				actions_stack.addAction(ExpressionActions::Action::copyColumn(arg->getColumnName(), result_name));
				NameSet joined_columns;
				joined_columns.insert(result_name);
				actions_stack.addAction(ExpressionActions::Action::arrayJoin(joined_columns));
793
			}
794

795 796
			return;
		}
797

798
		if (node->kind == ASTFunction::FUNCTION)
799 800 801 802 803
		{
			if (node->name == "in" || node->name == "notIn")
			{
				if (!no_subqueries)
				{
804
					/// Найдем тип первого аргумента (потом getActionsImpl вызовется для него снова и ни на что не повлияет).
805
					getActionsImpl(node->arguments->children[0], no_subqueries, only_consts, actions_stack);
806
					/// Превратим tuple или подзапрос в множество.
807
					makeSet(node, actions_stack.getSampleBlock());
808 809 810
				}
				else
				{
811 812 813 814 815 816 817 818 819 820 821
					if (!only_consts)
					{
						/// Мы в той части дерева, которую не собираемся вычислять. Нужно только определить типы.
						/// Не будем выполнять подзапросы и составлять множества. Вставим произвольный столбец правильного типа.
						ColumnWithNameAndType fake_column;
						fake_column.name = node->getColumnName();
						fake_column.type = new DataTypeUInt8;
						fake_column.column = new ColumnConstUInt8(1, 0);
						actions_stack.addAction(ExpressionActions::Action::addColumn(fake_column));
						getActionsImpl(node->arguments->children[0], no_subqueries, only_consts, actions_stack);
					}
822 823 824
					return;
				}
			}
825

826
			FunctionPtr function = context.getFunctionFactory().get(node->name, context);
827

828 829
			Names argument_names;
			DataTypes argument_types;
M
Merge  
Michael Kolupaev 已提交
830
			bool arguments_present = true;
831

832 833
			/// Если у функции есть аргумент-лямбда-выражение, нужно определить его тип до рекурсивного вызова.
			bool has_lambda_arguments = false;
834

835 836 837
			for (size_t i = 0; i < node->arguments->children.size(); ++i)
			{
				ASTPtr child = node->arguments->children[i];
838

839
				ASTFunction * lambda = dynamic_cast<ASTFunction *>(&*child);
840
				ASTSet * set = dynamic_cast<ASTSet *>(&*child);
841 842
				if (lambda && lambda->name == "lambda")
				{
M
Merge  
Michael Kolupaev 已提交
843
					/// Если аргумент - лямбда-выражение, только запомним его примерный тип.
844 845
					if (lambda->arguments->children.size() != 2)
						throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
846

847
					ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*lambda->arguments->children[0]);
848

849 850
					if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
						throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
851

852 853
					has_lambda_arguments = true;
					argument_types.push_back(new DataTypeExpression(DataTypes(lambda_args_tuple->arguments->children.size())));
854 855
					/// Выберем название в следующем цикле.
					argument_names.push_back("");
856
				}
857 858 859 860
				else if (set)
				{
					ColumnWithNameAndType column;
					column.type = new DataTypeSet;
861

862 863 864 865 866 867 868 869 870 871 872 873 874
					/// Если аргумент - множество, заданное перечислением значений, дадим ему уникальное имя,
					///  чтобы множества с одинаковой записью не склеивались (у них может быть разный тип).
					if (!set->set->getSource())
						column.name = getUniqueName(actions_stack.getSampleBlock(), "__set");
					else
						column.name = set->getColumnName();

					if (!actions_stack.getSampleBlock().has(column.name))
					{
						column.column = new ColumnSet(1, set->set);

						actions_stack.addAction(ExpressionActions::Action::addColumn(column));
					}
875

876 877 878
					argument_types.push_back(column.type);
					argument_names.push_back(column.name);
				}
879 880
				else
				{
M
Merge  
Michael Kolupaev 已提交
881
					/// Если аргумент не лямбда-выражение, вызовемся рекурсивно и узнаем его тип.
882
					getActionsImpl(child, no_subqueries, only_consts, actions_stack);
883
					std::string name = child->getColumnName();
884
					if (actions_stack.getSampleBlock().has(name))
M
Merge  
Michael Kolupaev 已提交
885
					{
886
						argument_types.push_back(actions_stack.getSampleBlock().getByName(name).type);
M
Merge  
Michael Kolupaev 已提交
887 888 889 890 891 892 893 894 895 896 897 898 899
						argument_names.push_back(name);
					}
					else
					{
						if (only_consts)
						{
							arguments_present = false;
						}
						else
						{
							throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER);
						}
					}
900 901
				}
			}
902

M
Merge  
Michael Kolupaev 已提交
903 904
			if (only_consts && !arguments_present)
				return;
905

906
			Names additional_requirements;
907

908 909 910
			if (has_lambda_arguments && !only_consts)
			{
				function->getLambdaArgumentTypes(argument_types);
911

912 913 914 915
				/// Вызовемся рекурсивно для лямбда-выражений.
				for (size_t i = 0; i < node->arguments->children.size(); ++i)
				{
					ASTPtr child = node->arguments->children[i];
916

917 918 919 920 921 922
					ASTFunction * lambda = dynamic_cast<ASTFunction *>(&*child);
					if (lambda && lambda->name == "lambda")
					{
						DataTypeExpression * lambda_type = dynamic_cast<DataTypeExpression *>(&*argument_types[i]);
						ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*lambda->arguments->children[0]);
						ASTs lambda_arg_asts = lambda_args_tuple->arguments->children;
923
						NamesAndTypesList lambda_arguments;
924

925 926 927 928 929
						for (size_t j = 0; j < lambda_arg_asts.size(); ++j)
						{
							ASTIdentifier * identifier = dynamic_cast<ASTIdentifier *>(&*lambda_arg_asts[j]);
							if (!identifier)
								throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
930

931 932
							String arg_name = identifier->name;
							NameAndTypePair arg(arg_name, lambda_type->getArgumentTypes()[j]);
933

934
							lambda_arguments.push_back(arg);
935
						}
936

937
						actions_stack.pushLevel(lambda_arguments);
938
						getActionsImpl(lambda->arguments->children[1], no_subqueries, only_consts, actions_stack);
939
						ExpressionActionsPtr lambda_actions = actions_stack.popLevel();
940

941
						String result_name = lambda->arguments->children[1]->getColumnName();
942
						lambda_actions->finalize(Names(1, result_name));
943
						DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type;
944
						argument_types[i] = new DataTypeExpression(lambda_type->getArgumentTypes(), result_type);
945

946 947 948 949 950 951
						Names captured = lambda_actions->getRequiredColumns();
						for (size_t j = 0; j < captured.size(); ++j)
						{
							if (findColumn(captured[j], lambda_arguments) == lambda_arguments.end())
								additional_requirements.push_back(captured[j]);
						}
952

953 954
						/// Не можем дать название getColumnName(),
						///  потому что оно не однозначно определяет выражение (типы аргументов могут быть разными).
955
						argument_names[i] = getUniqueName(actions_stack.getSampleBlock(), "__lambda");
956

957
						ColumnWithNameAndType lambda_column;
958
						lambda_column.column = new ColumnExpression(1, lambda_actions, lambda_arguments, result_type, result_name);
959 960
						lambda_column.type = argument_types[i];
						lambda_column.name = argument_names[i];
961
						actions_stack.addAction(ExpressionActions::Action::addColumn(lambda_column));
962 963 964
					}
				}
			}
965

966 967 968 969
			if (only_consts)
			{
				for (size_t i = 0; i < argument_names.size(); ++i)
				{
970
					if (!actions_stack.getSampleBlock().has(argument_names[i]))
971
					{
M
Merge  
Michael Kolupaev 已提交
972
						arguments_present = false;
973 974 975 976
						break;
					}
				}
			}
977

M
Merge  
Michael Kolupaev 已提交
978
			if (arguments_present)
979 980
				actions_stack.addAction(ExpressionActions::Action::applyFunction(function, argument_names, node->getColumnName()),
										additional_requirements);
981 982 983 984 985 986 987 988 989
		}
	}
	else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
	{
		DataTypePtr type = apply_visitor(FieldToDataType(), node->value);
		ColumnWithNameAndType column;
		column.column = type->createConstColumn(1, node->value);
		column.type = type;
		column.name = node->getColumnName();
990

991
		actions_stack.addAction(ExpressionActions::Action::addColumn(column));
992 993 994 995
	}
	else
	{
		for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
996
			getActionsImpl(*it, no_subqueries, only_consts, actions_stack);
997 998 999 1000 1001 1002 1003
	}
}


void ExpressionAnalyzer::getAggregatesImpl(ASTPtr ast, ExpressionActions & actions)
{
	ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
1004
	if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION)
1005
	{
1006
		has_aggregation = true;
1007 1008
		AggregateDescription aggregate;
		aggregate.column_name = node->getColumnName();
1009

1010 1011 1012
		for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
			if (aggregate_descriptions[i].column_name == aggregate.column_name)
				return;
1013

1014 1015 1016
		ASTs & arguments = node->arguments->children;
		aggregate.argument_names.resize(arguments.size());
		DataTypes types(arguments.size());
1017

1018 1019
		for (size_t i = 0; i < arguments.size(); ++i)
		{
1020
			getRootActionsImpl(arguments[i], true, false, actions);
1021 1022 1023 1024
			const std::string & name = arguments[i]->getColumnName();
			types[i] = actions.getSampleBlock().getByName(name).type;
			aggregate.argument_names[i] = name;
		}
1025

1026
		aggregate.function = context.getAggregateFunctionFactory().get(node->name, types);
1027

1028 1029 1030
		if (node->parameters)
		{
			ASTs & parameters = dynamic_cast<ASTExpressionList &>(*node->parameters).children;
1031
			Array params_row(parameters.size());
1032

1033 1034 1035 1036 1037
			for (size_t i = 0; i < parameters.size(); ++i)
			{
				ASTLiteral * lit = dynamic_cast<ASTLiteral *>(&*parameters[i]);
				if (!lit)
					throw Exception("Parameters to aggregate functions must be literals", ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS);
1038

1039 1040
				params_row[i] = lit->value;
			}
1041

1042
			aggregate.parameters = params_row;
1043 1044
			aggregate.function->setParameters(params_row);
		}
1045

1046
		aggregate.function->setArguments(types);
1047

1048 1049 1050 1051 1052 1053
		aggregate_descriptions.push_back(aggregate);
	}
	else
	{
		for (size_t i = 0; i < ast->children.size(); ++i)
		{
1054 1055 1056
			ASTPtr child = ast->children[i];
			if (!dynamic_cast<ASTSubquery *>(&*child) && !dynamic_cast<ASTSelectQuery *>(&*child))
				getAggregatesImpl(child, actions);
1057 1058 1059 1060
		}
	}
}

1061 1062 1063 1064 1065
void ExpressionAnalyzer::assertSelect()
{
	if (!select_query)
		throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR);
}
1066

1067
void ExpressionAnalyzer::assertAggregation()
1068 1069 1070
{
	if (!has_aggregation)
		throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
1071
}
1072

1073 1074 1075 1076 1077 1078 1079 1080
void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, NamesAndTypesList & columns)
{
	if (chain.steps.empty())
	{
		chain.settings = settings;
		chain.steps.push_back(ExpressionActionsChain::Step(new ExpressionActions(columns, settings)));
	}
}
1081

1082 1083
void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActions & actions)
{
1084 1085
	NameSet result_columns;
	for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
1086
	{
1087 1088 1089
		if (it->first != it->second)
			actions.add(ExpressionActions::Action::copyColumn(it->second, it->first));
		result_columns.insert(it->first);
1090 1091
	}

1092
	actions.add(ExpressionActions::Action::arrayJoin(result_columns));
1093 1094 1095
}


1096 1097 1098
bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain)
{
	assertSelect();
1099 1100

	if (!select_query->array_join_expression_list)
1101
		return false;
1102

1103 1104
	initChain(chain, columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1105 1106 1107 1108

	getRootActionsImpl(select_query->array_join_expression_list, false, false, *step.actions);

	addMultipleArrayJoinAction(*step.actions);
1109 1110 1111

	for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
		step.required_output.push_back(it->first);
1112

1113 1114 1115
	return true;
}

1116 1117 1118
bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain)
{
	assertSelect();
1119

1120 1121
	if (!select_query->where_expression)
		return false;
1122 1123

	initChain(chain, columns_after_array_join);
1124
	ExpressionActionsChain::Step & step = chain.steps.back();
1125

1126
	step.required_output.push_back(select_query->where_expression->getColumnName());
1127
	getRootActionsImpl(select_query->where_expression, false, false, *step.actions);
1128

1129 1130 1131 1132 1133 1134
	return true;
}

bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain)
{
	assertAggregation();
1135

1136 1137
	if (!select_query->group_expression_list)
		return false;
1138

1139
	initChain(chain, columns);
1140
	ExpressionActionsChain::Step & step = chain.steps.back();
1141

1142
	ASTs asts = select_query->group_expression_list->children;
1143 1144
	for (size_t i = 0; i < asts.size(); ++i)
	{
1145
		step.required_output.push_back(asts[i]->getColumnName());
1146
		getRootActionsImpl(asts[i], false, false, *step.actions);
1147
	}
1148

1149 1150 1151 1152 1153 1154
	return true;
}

void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain)
{
	assertAggregation();
1155 1156

	initChain(chain, columns_after_array_join);
1157
	ExpressionActionsChain::Step & step = chain.steps.back();
1158

1159 1160 1161 1162 1163 1164 1165
	for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
	{
		for (size_t j = 0; j < aggregate_descriptions[i].argument_names.size(); ++j)
		{
			step.required_output.push_back(aggregate_descriptions[i].argument_names[j]);
		}
	}
1166

1167
	getActionsBeforeAggregationImpl(select_query->select_expression_list, *step.actions);
1168

1169
	if (select_query->having_expression)
1170
		getActionsBeforeAggregationImpl(select_query->having_expression, *step.actions);
1171

1172
	if (select_query->order_expression_list)
1173
		getActionsBeforeAggregationImpl(select_query->order_expression_list, *step.actions);
1174 1175 1176 1177 1178
}

bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain)
{
	assertAggregation();
1179

1180 1181
	if (!select_query->having_expression)
		return false;
1182

1183 1184
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1185

1186
	step.required_output.push_back(select_query->having_expression->getColumnName());
1187
	getRootActionsImpl(select_query->having_expression, false, false, *step.actions);
1188

1189
	return true;
1190 1191
}

1192 1193 1194
void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain)
{
	assertSelect();
1195

1196 1197
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1198

1199
	getRootActionsImpl(select_query->select_expression_list, false, false, *step.actions);
1200

1201 1202 1203 1204 1205
	ASTs asts = select_query->select_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
	{
		step.required_output.push_back(asts[i]->getColumnName());
	}
1206
}
1207

1208
bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain)
1209
{
1210
	assertSelect();
1211

1212 1213
	if (!select_query->order_expression_list)
		return false;
1214

1215 1216
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1217

1218
	getRootActionsImpl(select_query->order_expression_list, false, false, *step.actions);
1219

1220 1221 1222 1223 1224 1225 1226 1227 1228
	ASTs asts = select_query->order_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
	{
		ASTOrderByElement * ast = dynamic_cast<ASTOrderByElement *>(&*asts[i]);
		if (!ast || ast->children.size() != 1)
			throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
		ASTPtr order_expression = ast->children[0];
		step.required_output.push_back(order_expression->getColumnName());
	}
1229

1230 1231 1232
	return true;
}

1233
void ExpressionAnalyzer::appendProjectResult(DB::ExpressionActionsChain & chain)
1234 1235
{
	assertSelect();
1236

1237 1238
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1239

1240
	NamesWithAliases result_columns;
1241

1242 1243
	ASTs asts = select_query->select_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
1244
	{
1245 1246
		result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
		step.required_output.push_back(result_columns.back().second);
1247
	}
1248

1249
	step.actions->add(ExpressionActions::Action::project(result_columns));
1250 1251 1252
}


1253 1254 1255 1256 1257 1258 1259 1260 1261
Sets ExpressionAnalyzer::getSetsWithSubqueries()
{
	Sets res;
	for (auto & s : sets_with_subqueries)
		res.push_back(s.second);
	return res;
}


1262 1263 1264
Block ExpressionAnalyzer::getSelectSampleBlock()
{
	assertSelect();
1265

1266
	ExpressionActions temp_actions(aggregated_columns, settings);
1267
	NamesWithAliases result_columns;
1268

1269 1270 1271 1272
	ASTs asts = select_query->select_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
	{
		result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
1273
		getRootActionsImpl(asts[i], true, false, temp_actions);
1274
	}
1275

1276
	temp_actions.add(ExpressionActions::Action::project(result_columns));
1277

1278 1279 1280
	return temp_actions.getSampleBlock();
}

1281
void ExpressionAnalyzer::getActionsBeforeAggregationImpl(ASTPtr ast, ExpressionActions & actions)
1282 1283
{
	ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
1284
	if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION)
1285 1286
	{
		ASTs & arguments = node->arguments->children;
1287

1288 1289
		for (size_t i = 0; i < arguments.size(); ++i)
		{
1290
			getRootActionsImpl(arguments[i], false, false, actions);
1291 1292 1293 1294 1295 1296 1297
		}
	}
	else
	{
		for (size_t i = 0; i < ast->children.size(); ++i)
		{
			getActionsBeforeAggregationImpl(ast->children[i], actions);
1298 1299
		}
	}
1300 1301 1302
}


M
Merge  
Michael Kolupaev 已提交
1303
ExpressionActionsPtr ExpressionAnalyzer::getActions(bool project_result)
1304
{
1305
	ExpressionActionsPtr actions = new ExpressionActions(columns, settings);
1306
	NamesWithAliases result_columns;
1307
	Names result_names;
1308

1309
	ASTs asts;
1310

1311
	if (ASTExpressionList * node = dynamic_cast<ASTExpressionList *>(&*ast))
1312
		asts = node->children;
1313
	else
1314
		asts = ASTs(1, ast);
1315

1316
	for (size_t i = 0; i < asts.size(); ++i)
1317
	{
1318 1319 1320 1321 1322 1323 1324 1325 1326
		std::string name = asts[i]->getColumnName();
		std::string alias;
		if (project_result)
			alias = asts[i]->getAlias();
		else
			alias = name;
		result_columns.push_back(NameWithAlias(name, alias));
		result_names.push_back(alias);
		getRootActionsImpl(asts[i], false, false, *actions);
1327
	}
1328

M
Merge  
Michael Kolupaev 已提交
1329 1330 1331 1332
	if (project_result)
	{
		actions->add(ExpressionActions::Action::project(result_columns));
	}
M
Merge  
Michael Kolupaev 已提交
1333 1334 1335 1336 1337 1338
	else
	{
		/// Не будем удалять исходные столбцы.
		for (NamesAndTypesList::const_iterator it = columns.begin(); it != columns.end(); ++it)
			result_names.push_back(it->first);
	}
1339

1340
	actions->finalize(result_names);
1341

1342 1343 1344 1345 1346 1347
	return actions;
}


ExpressionActionsPtr ExpressionAnalyzer::getConstActions()
{
M
Merge  
Michael Kolupaev 已提交
1348
	ExpressionActionsPtr actions = new ExpressionActions(NamesAndTypesList(), settings);
1349

1350
	getRootActionsImpl(ast, true, true, *actions);
1351

1352 1353 1354 1355 1356 1357 1358 1359 1360 1361
	return actions;
}

void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates)
{
	for (NamesAndTypesList::iterator it = aggregation_keys.begin(); it != aggregation_keys.end(); ++it)
		key_names.push_back(it->first);
	aggregates = aggregate_descriptions;
}

1362
void ExpressionAnalyzer::removeUnusedColumns()
1363 1364 1365
{
	NamesSet required;
	NamesSet ignored;
1366

1367 1368 1369 1370 1371 1372
	if (select_query && select_query->array_join_expression_list)
	{
		ASTs & expressions = select_query->array_join_expression_list->children;
		for (size_t i = 0; i < expressions.size(); ++i)
		{
			/// Игнорируем идентификаторы верхнего уровня из секции ARRAY JOIN.
1373
			/// Их потом добавим отдельно.
1374
			if (dynamic_cast<ASTIdentifier *>(&*expressions[i]))
1375
			{
1376
				ignored.insert(expressions[i]->getColumnName());
1377 1378 1379 1380 1381 1382 1383
			}
			else
			{
				/// Для выражений в ARRAY JOIN ничего игнорировать не нужно.
				NamesSet empty;
				getRequiredColumnsImpl(expressions[i], required, empty);
			}
1384 1385

			ignored.insert(expressions[i]->getAlias());
1386 1387
		}
	}
1388

1389
	getRequiredColumnsImpl(ast, required, ignored);
1390

1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401
	NameSet array_join_sources;
	for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
	{
		array_join_sources.insert(it->second);
	}
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
	{
		if (array_join_sources.count(it->first))
			required.insert(it->first);
	}

1402 1403 1404
	/// Нужно прочитать хоть один столбец, чтобы узнать количество строк.
	if (required.empty())
		required.insert(ExpressionActions::getSmallestColumn(columns));
1405

1406
	unknown_required_columns = required;
1407

1408 1409 1410 1411
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end();)
	{
		NamesAndTypesList::iterator it0 = it;
		++it;
1412

1413
		unknown_required_columns.erase(it0->first);
1414

1415
		if (!required.count(it0->first))
1416 1417
		{
			required.erase(it0->first);
1418
			columns.erase(it0);
1419
		}
1420
	}
S
Merge  
Sergey Fedorov 已提交
1421 1422 1423 1424 1425

	/// Возможно, среди неизвестных столбцов есть виртуальные. Удаляем их из списка неизвестных и добавляем
	/// в columns list, чтобы при дальнейшей обработке запроса они воспринимались как настоящие.
	for (NameSet::iterator it = unknown_required_columns.begin(); it != unknown_required_columns.end();)
	{
S
Merge  
Sergey Fedorov 已提交
1426
		if (storage && storage->hasColumn(*it))
S
Merge  
Sergey Fedorov 已提交
1427
		{
S
Merge  
Sergey Fedorov 已提交
1428 1429 1430 1431
			columns.push_back(storage->getColumn(*it));
			unknown_required_columns.erase(it++);
		} else
			++it;
S
Merge  
Sergey Fedorov 已提交
1432
	}
1433 1434 1435 1436
}

Names ExpressionAnalyzer::getRequiredColumns()
{
M
Merge  
Michael Kolupaev 已提交
1437 1438
	if (!unknown_required_columns.empty())
		throw Exception("Unknown identifier: " + *unknown_required_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER);
1439

1440 1441 1442
	Names res;
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
		res.push_back(it->first);
1443 1444 1445
	return res;
}

1446
void ExpressionAnalyzer::getRequiredColumnsImpl(ASTPtr ast, NamesSet & required_columns, NamesSet & ignored_names)
1447 1448 1449
{
	if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
1450 1451 1452
		if (node->kind == ASTIdentifier::Column
			&& !ignored_names.count(node->name)
			&& !ignored_names.count(DataTypeNested::extractNestedTableName(node->name)))
1453
		{
1454
			required_columns.insert(node->name);
1455
		}
1456 1457
		return;
	}
1458

1459 1460 1461 1462 1463 1464
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		if (node->kind == ASTFunction::LAMBDA_EXPRESSION)
		{
			if (node->arguments->children.size() != 2)
				throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
1465

1466
			ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*node->arguments->children[0]);
1467

1468 1469
			if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
				throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
1470

1471 1472
			/// Не нужно добавлять параметры лямбда-выражения в required_columns.
			Names added_ignored;
1473
			for (size_t i = 0 ; i < lambda_args_tuple->arguments->children.size(); ++i)
1474
			{
1475
				ASTIdentifier * identifier = dynamic_cast<ASTIdentifier *>(&*lambda_args_tuple->arguments->children[i]);
1476 1477 1478 1479 1480 1481 1482 1483 1484
				if (!identifier)
					throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
				std::string name = identifier->name;
				if (!ignored_names.count(name))
				{
					ignored_names.insert(name);
					added_ignored.push_back(name);
				}
			}
1485

1486
			getRequiredColumnsImpl(node->arguments->children[1], required_columns, ignored_names);
1487

1488 1489
			for (size_t i = 0; i < added_ignored.size(); ++i)
				ignored_names.erase(added_ignored[i]);
1490

1491 1492 1493
			return;
		}
	}
1494

1495 1496
	ASTSelectQuery * select = dynamic_cast<ASTSelectQuery *>(&*ast);

1497
	for (size_t i = 0; i < ast->children.size(); ++i)
1498 1499
	{
		ASTPtr child = ast->children[i];
1500 1501 1502 1503
		/// Не пойдем в секцию ARRAY JOIN, потому что там нужно смотреть на имена не-ARRAY-JOIN-енных столбцов.
		/// Туда removeUnusedColumns отправит нас отдельно.
		if (!dynamic_cast<ASTSubquery *>(&*child) && !dynamic_cast<ASTSelectQuery *>(&*child) &&
			!(select && child == select->array_join_expression_list))
1504
			getRequiredColumnsImpl(child, required_columns, ignored_names);
1505
    }
1506 1507
}

1508
}