ExpressionAnalyzer.cpp 47.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
#include <DB/DataTypes/FieldToDataType.h>

#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ASTAsterisk.h>
#include <DB/Parsers/ASTExpressionList.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Parsers/ASTSubquery.h>
#include <DB/Parsers/ASTSet.h>
11
#include <DB/Parsers/ASTOrderByElement.h>
12 13 14 15

#include <DB/DataTypes/DataTypeSet.h>
#include <DB/DataTypes/DataTypeTuple.h>
#include <DB/DataTypes/DataTypeExpression.h>
16
#include <DB/DataTypes/DataTypeNested.h>
17 18 19 20 21 22 23 24 25 26 27 28 29 30
#include <DB/Columns/ColumnSet.h>
#include <DB/Columns/ColumnExpression.h>

#include <DB/Interpreters/InterpreterSelectQuery.h>
#include <DB/Interpreters/ExpressionAnalyzer.h>

#include <DB/Storages/StorageMergeTree.h>
#include <DB/Storages/StorageDistributed.h>


namespace DB
{


A
Alexey Milovidov 已提交
31
static std::string * getAlias(ASTPtr & ast)
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
{
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		return &node->alias;
	}
	else if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
		return &node->alias;
	}
	else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
	{
		return &node->alias;
	}
	else
	{
		return NULL;
	}
}

51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
static void setAlias(ASTPtr & ast, const std::string & alias)
{
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		node->alias = alias;
	}
	else if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
		node->alias = alias;
	}
	else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
	{
		node->alias = alias;
	}
	else
	{
		throw Exception("Can't set alias of " + ast->getColumnName(), ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
	}
}

71 72 73 74 75

void ExpressionAnalyzer::init()
{
	select_query = dynamic_cast<ASTSelectQuery *>(&*ast);
	has_aggregation = false;
76

77 78
	createAliasesDict(ast); /// Если есть агрегатные функции, присвоит has_aggregation=true.
	normalizeTree();
79

80
	getArrayJoinedColumns();
81

82 83
	removeUnusedColumns();

84
	/// Найдем агрегатные функции.
85 86
	if (select_query && (select_query->group_expression_list || select_query->having_expression))
		has_aggregation = true;
87

88
	ExpressionActions temp_actions(columns, settings);
89 90 91 92 93 94 95 96 97 98 99 100 101

	if (select_query && select_query->array_join_expression_list)
	{
		const ASTs & array_join_asts = select_query->array_join_expression_list->children;
		for (size_t i = 0; i < array_join_asts.size(); ++i)
		{
			ASTPtr ast = array_join_asts[i];
			getRootActionsImpl(ast, true, false, temp_actions);
		}

		addMultipleArrayJoinAction(temp_actions);

		const Block & temp_sample = temp_actions.getSampleBlock();
102
		for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
103
		{
104
			columns_after_array_join.push_back(NameAndTypePair(it->first, temp_sample.getByName(it->first).type));
105 106
		}
	}
107 108 109 110 111
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
	{
		if (!array_join_result_to_source.count(it->first))
			columns_after_array_join.push_back(*it);
	}
112
	getAggregatesImpl(ast, temp_actions);
113

114 115
	if (has_aggregation)
	{
116
		assertSelect();
117

118 119 120
		/// Найдем ключи агрегации.
		if (select_query->group_expression_list)
		{
121
			NameSet unique_keys;
122 123 124
			const ASTs & group_asts = select_query->group_expression_list->children;
			for (size_t i = 0; i < group_asts.size(); ++i)
			{
125
				getRootActionsImpl(group_asts[i], true, false, temp_actions);
126 127
				NameAndTypePair key;
				key.first = group_asts[i]->getColumnName();
128
				key.second = temp_actions.getSampleBlock().getByName(key.first).type;
129
				aggregation_keys.push_back(key);
130

131 132 133 134 135
				if (!unique_keys.count(key.first))
				{
					aggregated_columns.push_back(key);
					unique_keys.insert(key.first);
				}
136 137
			}
		}
138

139 140 141 142 143 144 145 146
		for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
		{
			AggregateDescription & desc = aggregate_descriptions[i];
			aggregated_columns.push_back(NameAndTypePair(desc.column_name, desc.function->getReturnType()));
		}
	}
	else
	{
147
		aggregated_columns = columns_after_array_join;
148 149 150 151
	}
}


152
NamesAndTypesList::iterator ExpressionAnalyzer::findColumn(const String & name, NamesAndTypesList & cols)
153 154
{
	NamesAndTypesList::iterator it;
155
	for (it = cols.begin(); it != cols.end(); ++it)
156 157 158 159 160 161
		if (it->first == name)
			break;
	return it;
}


162 163
/// ignore_levels - алиасы в скольки верхних уровнях поддерева нужно игнорировать.
/// Например, при ignore_levels=1 ast не может быть занесен в словарь, но его дети могут.
164
void ExpressionAnalyzer::createAliasesDict(ASTPtr & ast, int ignore_levels)
165
{
166
	ASTSelectQuery * select = dynamic_cast<ASTSelectQuery *>(&*ast);
167

168 169
	/// Обход снизу-вверх. Не опускаемся в подзапросы.
	for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
170 171
	{
		int new_ignore_levels = std::max(0, ignore_levels - 1);
172 173
		/// Алиасы верхнего уровня в секции ARRAY JOIN имеют особый смысл, их добавлять не будем
		///  (пропустим сам expression list и его детей).
174 175
		if (select && *it == select->array_join_expression_list)
			new_ignore_levels = 2;
176
		if (!dynamic_cast<ASTSelectQuery *>(&**it))
177 178 179 180 181 182
			createAliasesDict(*it, new_ignore_levels);
	}

	if (ignore_levels > 0)
		return;

A
Alexey Milovidov 已提交
183
	std::string * alias = getAlias(ast);
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
	if (alias && !alias->empty())
	{
		if (aliases.count(*alias) && ast->getTreeID() != aliases[*alias]->getTreeID())
		{
			throw Exception("Different expressions with the same alias " + *alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
		}
		else
		{
			aliases[*alias] = ast;
		}
	}
}


StoragePtr ExpressionAnalyzer::getTable()
{
	if (const ASTSelectQuery * select = dynamic_cast<const ASTSelectQuery *>(&*ast))
	{
202
		if (select->table && !dynamic_cast<const ASTSelectQuery *>(&*select->table) && !dynamic_cast<const ASTFunction *>(&*select->table))
203
		{
204
			String database = select->database ?
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
				dynamic_cast<const ASTIdentifier &>(*select->database).name :
				"";
			const String & table = dynamic_cast<const ASTIdentifier &>(*select->table).name;
			return context.tryGetTable(database, table);
		}
	}
	return StoragePtr();
}


bool ExpressionAnalyzer::needSignRewrite()
{
	if (settings.sign_rewrite && storage)
	{
		if (const StorageMergeTree * merge_tree = dynamic_cast<const StorageMergeTree *>(&*storage))
			return merge_tree->getName() == "CollapsingMergeTree";
		if (const StorageDistributed * distributed = dynamic_cast<const StorageDistributed *>(&*storage))
			return !distributed->getSignColumnName().empty();
	}
	return false;
}


String ExpressionAnalyzer::getSignColumnName()
{
	if (const StorageMergeTree * merge_tree = dynamic_cast<const StorageMergeTree *>(&*storage))
		return merge_tree->getSignColumnName();
	if (const StorageDistributed * distributed = dynamic_cast<const StorageDistributed *>(&*storage))
		return distributed->getSignColumnName();
	return "";
}


ASTPtr ExpressionAnalyzer::createSignColumn()
{
	ASTIdentifier * p_sign_column = new ASTIdentifier(ast->range, sign_column_name);
	ASTIdentifier & sign_column = *p_sign_column;
	ASTPtr sign_column_node = p_sign_column;
	sign_column.name = sign_column_name;
	return sign_column_node;
}


ASTPtr ExpressionAnalyzer::rewriteCount(const ASTFunction * node)
{
	/// 'Sign'
	ASTExpressionList * p_exp_list = new ASTExpressionList;
	ASTExpressionList & exp_list = *p_exp_list;
	ASTPtr exp_list_node = p_exp_list;
	exp_list.children.push_back(createSignColumn());
255

256 257 258 259 260 261 262 263
	/// sum(Sign)
	ASTFunction * p_sum = new ASTFunction;
	ASTFunction & sum = *p_sum;
	ASTPtr sum_node = p_sum;
	sum.name = "sum";
	sum.alias = node->alias;
	sum.arguments = exp_list_node;
	sum.children.push_back(exp_list_node);
264

265 266 267 268 269
	return sum_node;
}


ASTPtr ExpressionAnalyzer::rewriteSum(const ASTFunction * node)
270
{
271 272 273 274 275 276
	/// 'x', 'Sign'
	ASTExpressionList * p_mult_exp_list = new ASTExpressionList;
	ASTExpressionList & mult_exp_list = *p_mult_exp_list;
	ASTPtr mult_exp_list_node = p_mult_exp_list;
	mult_exp_list.children.push_back(createSignColumn());
	mult_exp_list.children.push_back(node->arguments->children[0]);
277

278 279 280 281 282 283 284
	/// x * Sign
	ASTFunction * p_mult = new ASTFunction;
	ASTFunction & mult = *p_mult;
	ASTPtr mult_node = p_mult;
	mult.name = "multiply";
	mult.arguments = mult_exp_list_node;
	mult.children.push_back(mult_exp_list_node);
285

286 287 288 289 290
	/// 'x * Sign'
	ASTExpressionList * p_exp_list = new ASTExpressionList;
	ASTExpressionList & exp_list = *p_exp_list;
	ASTPtr exp_list_node = p_exp_list;
	exp_list.children.push_back(mult_node);
291

292 293 294 295 296 297 298
	/// sum(x * Sign)
	ASTFunction * p_sum = new ASTFunction;
	ASTFunction & sum = *p_sum;
	ASTPtr sum_node = p_sum;
	sum.name = "sum";
	sum.alias = node->alias;
	sum.arguments = exp_list_node;
299 300
	sum.children.push_back(exp_list_node);

301 302 303 304 305 306 307 308 309 310
	return sum_node;
}


ASTPtr ExpressionAnalyzer::rewriteAvg(const ASTFunction * node)
{
	/// node без alias для переписывания числителя и знаменателя
	ASTPtr node_clone = node->clone();
	ASTFunction * node_clone_func = dynamic_cast<ASTFunction *>(&*node_clone);
	node_clone_func->alias = "";
311

312 313 314 315 316 317
	/// 'sum(Sign * x)', 'sum(Sign)'
	ASTExpressionList * p_div_exp_list = new ASTExpressionList;
	ASTExpressionList & div_exp_list = *p_div_exp_list;
	ASTPtr div_exp_list_node = p_div_exp_list;
	div_exp_list.children.push_back(rewriteSum(node_clone_func));
	div_exp_list.children.push_back(rewriteCount(node_clone_func));
318

319 320 321 322 323 324 325 326
	/// sum(Sign * x) / sum(Sign)
	ASTFunction * p_div = new ASTFunction;
	ASTFunction & div = *p_div;
	ASTPtr div_node = p_div;
	div.name = "divide";
	div.alias = node->alias;
	div.arguments = div_exp_list_node;
	div.children.push_back(div_exp_list_node);
327

328 329 330 331
	return div_node;
}


332
bool ExpressionAnalyzer::considerSignRewrite(ASTPtr & ast)
333 334
{
	ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
335
	if (!node)
336
		return false;
337 338 339
	const String & name = node->name;
	if (name == "count")
		ast = rewriteCount(node);
340
	else if (name == "sum")
341
		ast = rewriteSum(node);
342
	else if (name == "avg")
343
		ast = rewriteAvg(node);
344 345 346
	else
		return false;
	return true;
347 348 349 350 351 352 353 354 355
}


void ExpressionAnalyzer::normalizeTree()
{
	SetOfASTs tmp_set;
	MapOfASTs tmp_map;
	if (needSignRewrite())
		sign_column_name = getSignColumnName();
M
Merge  
Michael Kolupaev 已提交
356
	normalizeTreeImpl(ast, tmp_map, tmp_set, "", false);
357 358 359 360 361
}


/// finished_asts - уже обработанные вершины (и на что они заменены)
/// current_asts - вершины в текущем стеке вызовов этого метода
362
/// current_alias - алиас, повешенный на предка ast (самого глубокого из предков с алиасами)
363
/// in_sign_rewritten - находимся ли мы в поддереве, полученном в результате sign rewrite
M
Merge  
Michael Kolupaev 已提交
364
void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, bool in_sign_rewritten)
365 366 367 368 369 370
{
	if (finished_asts.count(ast))
	{
		ast = finished_asts[ast];
		return;
	}
371

372 373
	ASTPtr initial_ast = ast;
	current_asts.insert(initial_ast);
374

M
Merge  
Michael Kolupaev 已提交
375 376
	std::string * my_alias = getAlias(ast);
	if (my_alias && !my_alias->empty())
377
		current_alias = *my_alias;
378

379
	/// rewrite правила, которые действуют при обходе сверху-вниз.
380

381 382
	if (!in_sign_rewritten && !sign_column_name.empty())
		in_sign_rewritten = considerSignRewrite(ast);
383

384 385
	bool replaced = false;

386 387 388 389 390 391 392 393 394 395 396 397
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		/** Нет ли в таблице столбца, название которого полностью совпадает с записью функции?
		 * Например, в таблице есть столбец "domain(URL)", и мы запросили domain(URL).
		 */
		String function_string = node->getColumnName();
		NamesAndTypesList::const_iterator it = findColumn(function_string);
		if (columns.end() != it)
		{
			ASTIdentifier * ast_id = new ASTIdentifier(node->range, std::string(node->range.first, node->range.second));
			ast = ast_id;
			current_asts.insert(ast);
398
			replaced = true;
399 400 401
		}
	}
	else if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
402
	{
403
		if (node->kind == ASTIdentifier::Column)
404
		{
M
Merge  
Michael Kolupaev 已提交
405
			/// Если это алиас, но не родительский алиас (чтобы работали конструкции вроде "SELECT column+1 AS column").
406
			Aliases::const_iterator jt = aliases.find(node->name);
M
Merge  
Michael Kolupaev 已提交
407
			if (jt != aliases.end() && current_alias != node->name)
408
			{
409
				/// Заменим его на соответствующий узел дерева.
410 411
				if (current_asts.count(jt->second))
					throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES);
412 413 414 415 416 417 418 419 420 421 422 423
				if (my_alias && !my_alias->empty() && *my_alias != jt->second->getAlias())
				{
					/// В конструкции вроде "a AS b", где a - алиас, нужно перевесить алиас b на результат подстановки алиаса a.
					ast = jt->second->clone();
					setAlias(ast, *my_alias);
				}
				else
				{
					ast = jt->second;
				}

				replaced = true;
424 425 426 427
			}
			else
			{
				/// Проверим имеет ли смысл sign-rewrite
428
				if (!in_sign_rewritten && sign_column_name != "" && node->name == sign_column_name)
429 430 431 432 433 434 435 436 437 438 439 440 441
					throw Exception("Requested Sign column while sign-rewrite is on.", ErrorCodes::QUERY_SECTION_DOESNT_MAKE_SENSE);
			}
		}
	}
	else if (ASTExpressionList * node = dynamic_cast<ASTExpressionList *>(&*ast))
	{
		/// Заменим * на список столбцов.
		ASTs & asts = node->children;
		for (int i = static_cast<int>(asts.size()) - 1; i >= 0; --i)
		{
			if (ASTAsterisk * asterisk = dynamic_cast<ASTAsterisk *>(&*asts[i]))
			{
				ASTs all_columns;
S
Merge  
Sergey Fedorov 已提交
442
				for (NamesAndTypesList::const_iterator it = columns.begin(); it != columns.end(); ++it)
443 444 445 446 447 448
					all_columns.push_back(new ASTIdentifier(asterisk->range, it->first));
				asts.erase(asts.begin() + i);
				asts.insert(asts.begin() + i, all_columns.begin(), all_columns.end());
			}
		}
	}
449

450 451 452 453 454 455 456 457 458 459
	/// Если заменили корень поддерева вызовемся для нового корня снова - на случай, если алиас заменился на алиас.
	if (replaced)
	{
		normalizeTreeImpl(ast, finished_asts, current_asts, current_alias, in_sign_rewritten);
		current_asts.erase(initial_ast);
		current_asts.erase(ast);
		finished_asts[initial_ast] = ast;
		return;
	}

460
	/// Рекурсивные вызовы. Не опускаемся в подзапросы.
461

462 463
	for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
		if (!dynamic_cast<ASTSelectQuery *>(&**it))
464
			normalizeTreeImpl(*it, finished_asts, current_asts, current_alias, in_sign_rewritten);
465

466 467 468
	/// Если секция WHERE или HAVING состоит из одного алиаса, ссылку нужно заменить не только в children, но и в where_expression и having_expression.
	if (ASTSelectQuery * select = dynamic_cast<ASTSelectQuery *>(&*ast))
	{
469 470
		if (select->prewhere_expression)
			normalizeTreeImpl(select->prewhere_expression, finished_asts, current_asts, current_alias, in_sign_rewritten);
471
		if (select->where_expression)
472
			normalizeTreeImpl(select->where_expression, finished_asts, current_asts, current_alias, in_sign_rewritten);
473
		if (select->having_expression)
474
			normalizeTreeImpl(select->having_expression, finished_asts, current_asts, current_alias, in_sign_rewritten);
475
	}
476

M
Merge  
Michael Kolupaev 已提交
477 478
	/// Действия, выполняемые снизу вверх.

479
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
480 481 482
	{
		if (node->name == "lambda")
		{
483
			node->kind = ASTFunction::LAMBDA_EXPRESSION;
484 485 486
		}
		else if (context.getAggregateFunctionFactory().isAggregateFunctionName(node->name))
		{
487
			node->kind = ASTFunction::AGGREGATE_FUNCTION;
488
		}
489 490 491 492 493 494 495 496
		else if (node->name == "arrayJoin")
		{
			node->kind = ASTFunction::ARRAY_JOIN;
		}
		else
		{
			node->kind = ASTFunction::FUNCTION;
		}
497
	}
498

499 500 501 502 503 504
	current_asts.erase(initial_ast);
	current_asts.erase(ast);
	finished_asts[initial_ast] = ast;
}


505
void ExpressionAnalyzer::makeSet(ASTFunction * node, const Block & sample_block)
506
{
507
	/** Нужно преобразовать правый аргумент в множество.
508
	  * Это может быть значение, перечисление значений или подзапрос.
509 510 511 512
	  * Перечисление значений парсится как функция tuple.
	  */
	IAST & args = *node->arguments;
	ASTPtr & arg = args.children[1];
513

514 515
	if (dynamic_cast<ASTSet *>(&*arg))
		return;
516

517 518 519 520
	if (dynamic_cast<ASTSubquery *>(&*arg))
	{
		/// Исполняем подзапрос, превращаем результат в множество, и кладём это множество на место подзапроса.
		ASTSet * ast_set = new ASTSet(arg->getColumnName());
521 522 523 524 525 526 527 528 529 530 531 532

		/** Для подзапроса в секции IN не действуют ограничения на максимальный размер результата.
		  * Так как результат этого поздапроса - ещё не результат всего запроса.
		  * Вместо этого работают ограничения max_rows_in_set, max_bytes_in_set, set_overflow_mode.
		  */
		Context subquery_context = context;
		Settings subquery_settings = context.getSettings();
		subquery_settings.limits.max_result_rows = 0;
		subquery_settings.limits.max_result_bytes = 0;
		subquery_context.setSettings(subquery_settings);

		InterpreterSelectQuery interpreter(arg->children[0], subquery_context, QueryProcessingStage::Complete, subquery_depth + 1);
533
		ast_set->set = new Set(settings.limits);
534 535 536
		ast_set->set->create(interpreter.execute());
		arg = ast_set;
	}
537
	else
538 539
	{
		/// Случай явного перечисления значений.
540

541 542 543 544 545 546 547 548 549 550
		DataTypes set_element_types;
		ASTPtr & left_arg = args.children[0];

		ASTFunction * left_arg_tuple = dynamic_cast<ASTFunction *>(&*left_arg);

		if (left_arg_tuple && left_arg_tuple->name == "tuple")
		{
			for (ASTs::const_iterator it = left_arg_tuple->arguments->children.begin();
				it != left_arg_tuple->arguments->children.end();
				++it)
551
				set_element_types.push_back(sample_block.getByName((*it)->getColumnName()).type);
552 553 554
		}
		else
		{
555
			DataTypePtr left_type = sample_block.getByName(left_arg->getColumnName()).type;
556 557 558 559 560
			if (DataTypeArray * array_type = dynamic_cast<DataTypeArray *>(&*left_type))
				set_element_types.push_back(array_type->getNestedType());
			else
				set_element_types.push_back(left_type);
		}
561

562 563 564
		/// Отличим случай x in (1, 2) от случая x in 1 (он же x in (1)).
		bool single_value = false;
		ASTPtr elements_ast = arg;
565

566 567 568 569 570
		if (ASTFunction * set_func = dynamic_cast<ASTFunction *>(&*arg))
		{
			if (set_func->name != "tuple")
				throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.",
								ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
571

572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587
			/// Отличм случай (x, y) in ((1, 2), (3, 4)) от случая (x, y) in (1, 2).
			ASTFunction * any_element = dynamic_cast<ASTFunction *>(&*set_func->arguments->children[0]);
			if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple"))
				single_value = true;
			else
				elements_ast = set_func->arguments;
		}
		else if (dynamic_cast<ASTLiteral *>(&*arg))
		{
			single_value = true;
		}
		else
		{
			throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.",
							ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
		}
588

589 590 591 592 593 594
		if (single_value)
		{
			ASTPtr exp_list = new ASTExpressionList;
			exp_list->children.push_back(elements_ast);
			elements_ast = exp_list;
		}
595

596
		ASTSet * ast_set = new ASTSet(arg->getColumnName());
597
		ast_set->set = new Set(settings.limits);
598
		ast_set->set->create(set_element_types, elements_ast);
599 600
		arg = ast_set;
	}
601 602 603 604 605 606
}


static std::string getUniqueName(const Block & block, const std::string & prefix)
{
	int i = 1;
607
	while (block.has(prefix + toString(i)))
608
		++i;
609
	return prefix + toString(i);
610 611 612
}


613 614 615
void ExpressionAnalyzer::getRootActionsImpl(ASTPtr ast, bool no_subqueries, bool only_consts, ExpressionActions & actions)
{
	ScopeStack scopes(actions, settings);
616
	getActionsImpl(ast, no_subqueries, only_consts, scopes);
617 618 619 620
	actions = *scopes.popLevel();
}


621 622
void ExpressionAnalyzer::getArrayJoinedColumns()
{
623 624
	if (select_query && select_query->array_join_expression_list)
	{
625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647
		ASTs & array_join_asts = select_query->array_join_expression_list->children;
		for (size_t i = 0; i < array_join_asts .size(); ++i)
		{
			ASTPtr ast = array_join_asts [i];

			String nested_table_name = ast->getColumnName();
			String nested_table_alias = ast->getAlias();
			if (nested_table_alias == nested_table_name && !dynamic_cast<ASTIdentifier *>(&*ast))
				throw Exception("No alias for non-trivial value in ARRAY JOIN: " + nested_table_name, ErrorCodes::ALIAS_REQUIRED);

			if (array_join_alias_to_name.count(nested_table_alias) || aliases.count(nested_table_alias))
				throw Exception("Duplicate alias " + nested_table_alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
			array_join_alias_to_name[nested_table_alias] = nested_table_name;
		}

		ASTs & query_asts = select_query->children;
		for (size_t i = 0; i < query_asts.size(); ++i)
		{
			ASTPtr ast = query_asts[i];
			if (select_query && ast == select_query->array_join_expression_list)
				continue;
			getArrayJoinedColumnsImpl(ast);
		}
648 649 650

		/// Если результат ARRAY JOIN не используется, придется все равно по-ARRAY-JOIN-ить какой-нибудь столбец,
		/// чтобы получить правильное количество строк.
651
		if (array_join_result_to_source.empty())
652 653 654 655 656 657 658
		{
			ASTPtr expr = select_query->array_join_expression_list->children[0];
			String source_name = expr->getColumnName();
			String result_name = expr->getAlias();
			/// Это массив.
			if (!dynamic_cast<ASTIdentifier *>(&*expr) || findColumn(source_name, columns) != columns.end())
			{
659
				array_join_result_to_source[result_name] = source_name;
660 661 662 663 664 665 666 667 668 669
			}
			else /// Это вложенная таблица.
			{
				bool found = false;
				for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
				{
					String table_name = DataTypeNested::extractNestedTableName(it->first);
					String column_name = DataTypeNested::extractNestedColumnName(it->first);
					if (table_name == source_name)
					{
670 671
						array_join_result_to_source[DataTypeNested::concatenateNestedName(result_name, column_name)]
							= it->first;
672 673 674 675
						found = true;
						break;
					}
				}
676 677
				if (!found)
					throw Exception("No columns in nested table " + source_name, ErrorCodes::EMPTY_NESTED_TABLE);
678 679
			}
		}
680
	}
681 682 683 684
}


void ExpressionAnalyzer::getArrayJoinedColumnsImpl(ASTPtr ast)
685 686 687
{
	if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
688 689 690 691 692 693 694 695 696 697 698 699
		if (node->kind == ASTIdentifier::Column)
		{
			String table_name = DataTypeNested::extractNestedTableName(node->name);
			if (array_join_alias_to_name.count(node->name))
				array_join_result_to_source[node->name] = array_join_alias_to_name[node->name];
			else if (array_join_alias_to_name.count(table_name))
			{
				String nested_column = DataTypeNested::extractNestedColumnName(node->name);
				array_join_result_to_source[node->name]
					= DataTypeNested::concatenateNestedName(array_join_alias_to_name[table_name], nested_column);
			}
		}
700 701 702 703
	}
	else
	{
		for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
704
			if (!dynamic_cast<ASTSelectQuery *>(&**it))
705
				getArrayJoinedColumnsImpl(*it);
706 707 708 709 710
	}
}


void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack)
711 712
{
	/// Если результат вычисления уже есть в блоке.
713
	if ((dynamic_cast<ASTFunction *>(&*ast) || dynamic_cast<ASTLiteral *>(&*ast))
714
		&& actions_stack.getSampleBlock().has(ast->getColumnName()))
715
		return;
716

717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
	if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
		std::string name = node->getColumnName();
		if (!only_consts && !actions_stack.getSampleBlock().has(name))
		{
			/// Запрошенного столбца нет в блоке.
			/// Если такой столбец есть до агрегации, значит пользователь наверно забыл окружить его агрегатной функцией или добавить в GROUP BY.

			bool found = false;
			for (NamesAndTypesList::const_iterator it = columns_after_array_join.begin();
					it != columns_after_array_join.end(); ++it)
				if (it->first == name)
					found = true;

			if (found)
				throw Exception("Column " + name + " is not under aggregate function and not in GROUP BY.",
733
					ErrorCodes::NOT_AN_AGGREGATE);
734 735 736
		}
	}
	else if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
737
	{
738
		if (node->kind == ASTFunction::LAMBDA_EXPRESSION)
739
			throw Exception("Unexpected expression", ErrorCodes::UNEXPECTED_EXPRESSION);
740

741 742 743 744 745
		if (node->kind == ASTFunction::ARRAY_JOIN)
		{
			if (node->arguments->children.size() != 1)
				throw Exception("arrayJoin requires exactly 1 argument", ErrorCodes::TYPE_MISMATCH);
			ASTPtr arg = node->arguments->children[0];
746
			getActionsImpl(arg, no_subqueries, only_consts, actions_stack);
747
			if (!only_consts)
748
			{
749 750 751 752 753
				String result_name = node->getColumnName();
				actions_stack.addAction(ExpressionActions::Action::copyColumn(arg->getColumnName(), result_name));
				NameSet joined_columns;
				joined_columns.insert(result_name);
				actions_stack.addAction(ExpressionActions::Action::arrayJoin(joined_columns));
754
			}
755

756 757
			return;
		}
758

759
		if (node->kind == ASTFunction::FUNCTION)
760 761 762 763 764
		{
			if (node->name == "in" || node->name == "notIn")
			{
				if (!no_subqueries)
				{
765
					/// Найдем тип первого аргумента (потом getActionsImpl вызовется для него снова и ни на что не повлияет).
766
					getActionsImpl(node->arguments->children[0], no_subqueries, only_consts, actions_stack);
767
					/// Превратим tuple или подзапрос в множество.
768
					makeSet(node, actions_stack.getSampleBlock());
769 770 771 772 773 774 775 776 777
				}
				else
				{
					/// Мы в той части дерева, которую не собираемся вычислять. Нужно только определить типы.
					/// Не будем выполнять подзапросы и составлять множества. Вставим произвольный столбец правильного типа.
					ColumnWithNameAndType fake_column;
					fake_column.name = node->getColumnName();
					fake_column.type = new DataTypeUInt8;
					fake_column.column = new ColumnConstUInt8(1, 0);
778
					actions_stack.addAction(ExpressionActions::Action::addColumn(fake_column));
779
					getActionsImpl(node->arguments, no_subqueries, only_consts, actions_stack);
780 781 782
					return;
				}
			}
783

784
			FunctionPtr function = context.getFunctionFactory().get(node->name, context);
785

786 787
			Names argument_names;
			DataTypes argument_types;
M
Merge  
Michael Kolupaev 已提交
788
			bool arguments_present = true;
789

790 791
			/// Если у функции есть аргумент-лямбда-выражение, нужно определить его тип до рекурсивного вызова.
			bool has_lambda_arguments = false;
792

793 794 795
			for (size_t i = 0; i < node->arguments->children.size(); ++i)
			{
				ASTPtr child = node->arguments->children[i];
796

797
				ASTFunction * lambda = dynamic_cast<ASTFunction *>(&*child);
798
				ASTSet * set = dynamic_cast<ASTSet *>(&*child);
799 800
				if (lambda && lambda->name == "lambda")
				{
M
Merge  
Michael Kolupaev 已提交
801
					/// Если аргумент - лямбда-выражение, только запомним его примерный тип.
802 803
					if (lambda->arguments->children.size() != 2)
						throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
804

805
					ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*lambda->arguments->children[0]);
806

807 808
					if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
						throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
809

810 811
					has_lambda_arguments = true;
					argument_types.push_back(new DataTypeExpression(DataTypes(lambda_args_tuple->arguments->children.size())));
812 813
					/// Выберем название в следующем цикле.
					argument_names.push_back("");
814
				}
815 816 817 818 819 820 821 822
				else if (set)
				{
					/// Если аргумент - множество, дадим ему уникальное имя,
					///  чтобы множества с одинаковой записью не склеивались (у них может быть разный тип).
					ColumnWithNameAndType column;
					column.column = new ColumnSet(1, set->set);
					column.type = new DataTypeSet;
					column.name = getUniqueName(actions_stack.getSampleBlock(), "__set");
823

824
					actions_stack.addAction(ExpressionActions::Action::addColumn(column));
825

826 827 828
					argument_types.push_back(column.type);
					argument_names.push_back(column.name);
				}
829 830
				else
				{
M
Merge  
Michael Kolupaev 已提交
831
					/// Если аргумент не лямбда-выражение, вызовемся рекурсивно и узнаем его тип.
832
					getActionsImpl(child, no_subqueries, only_consts, actions_stack);
833
					std::string name = child->getColumnName();
834
					if (actions_stack.getSampleBlock().has(name))
M
Merge  
Michael Kolupaev 已提交
835
					{
836
						argument_types.push_back(actions_stack.getSampleBlock().getByName(name).type);
M
Merge  
Michael Kolupaev 已提交
837 838 839 840 841 842 843 844 845 846 847 848 849
						argument_names.push_back(name);
					}
					else
					{
						if (only_consts)
						{
							arguments_present = false;
						}
						else
						{
							throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER);
						}
					}
850 851
				}
			}
852

M
Merge  
Michael Kolupaev 已提交
853 854
			if (only_consts && !arguments_present)
				return;
855

856
			Names additional_requirements;
857

858 859 860
			if (has_lambda_arguments && !only_consts)
			{
				function->getLambdaArgumentTypes(argument_types);
861

862 863 864 865
				/// Вызовемся рекурсивно для лямбда-выражений.
				for (size_t i = 0; i < node->arguments->children.size(); ++i)
				{
					ASTPtr child = node->arguments->children[i];
866

867 868 869 870 871 872
					ASTFunction * lambda = dynamic_cast<ASTFunction *>(&*child);
					if (lambda && lambda->name == "lambda")
					{
						DataTypeExpression * lambda_type = dynamic_cast<DataTypeExpression *>(&*argument_types[i]);
						ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*lambda->arguments->children[0]);
						ASTs lambda_arg_asts = lambda_args_tuple->arguments->children;
873
						NamesAndTypesList lambda_arguments;
874

875 876 877 878 879
						for (size_t j = 0; j < lambda_arg_asts.size(); ++j)
						{
							ASTIdentifier * identifier = dynamic_cast<ASTIdentifier *>(&*lambda_arg_asts[j]);
							if (!identifier)
								throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
880

881 882
							String arg_name = identifier->name;
							NameAndTypePair arg(arg_name, lambda_type->getArgumentTypes()[j]);
883

884
							lambda_arguments.push_back(arg);
885
						}
886

887
						actions_stack.pushLevel(lambda_arguments);
888
						getActionsImpl(lambda->arguments->children[1], no_subqueries, only_consts, actions_stack);
889
						ExpressionActionsPtr lambda_actions = actions_stack.popLevel();
890

891
						String result_name = lambda->arguments->children[1]->getColumnName();
892
						lambda_actions->finalize(Names(1, result_name));
893
						DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type;
894
						argument_types[i] = new DataTypeExpression(lambda_type->getArgumentTypes(), result_type);
895

896 897 898 899 900 901
						Names captured = lambda_actions->getRequiredColumns();
						for (size_t j = 0; j < captured.size(); ++j)
						{
							if (findColumn(captured[j], lambda_arguments) == lambda_arguments.end())
								additional_requirements.push_back(captured[j]);
						}
902

903 904
						/// Не можем дать название getColumnName(),
						///  потому что оно не однозначно определяет выражение (типы аргументов могут быть разными).
905
						argument_names[i] = getUniqueName(actions_stack.getSampleBlock(), "__lambda");
906

907
						ColumnWithNameAndType lambda_column;
908
						lambda_column.column = new ColumnExpression(1, lambda_actions, lambda_arguments, result_type, result_name);
909 910
						lambda_column.type = argument_types[i];
						lambda_column.name = argument_names[i];
911
						actions_stack.addAction(ExpressionActions::Action::addColumn(lambda_column));
912 913 914
					}
				}
			}
915

916 917 918 919
			if (only_consts)
			{
				for (size_t i = 0; i < argument_names.size(); ++i)
				{
920
					if (!actions_stack.getSampleBlock().has(argument_names[i]))
921
					{
M
Merge  
Michael Kolupaev 已提交
922
						arguments_present = false;
923 924 925 926
						break;
					}
				}
			}
927

M
Merge  
Michael Kolupaev 已提交
928
			if (arguments_present)
929 930
				actions_stack.addAction(ExpressionActions::Action::applyFunction(function, argument_names, node->getColumnName()),
										additional_requirements);
931 932 933 934 935 936 937 938 939
		}
	}
	else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
	{
		DataTypePtr type = apply_visitor(FieldToDataType(), node->value);
		ColumnWithNameAndType column;
		column.column = type->createConstColumn(1, node->value);
		column.type = type;
		column.name = node->getColumnName();
940

941
		actions_stack.addAction(ExpressionActions::Action::addColumn(column));
942 943 944 945
	}
	else
	{
		for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
946
			getActionsImpl(*it, no_subqueries, only_consts, actions_stack);
947 948 949 950 951 952 953
	}
}


void ExpressionAnalyzer::getAggregatesImpl(ASTPtr ast, ExpressionActions & actions)
{
	ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
954
	if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION)
955
	{
956
		has_aggregation = true;
957 958
		AggregateDescription aggregate;
		aggregate.column_name = node->getColumnName();
959

960 961 962
		for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
			if (aggregate_descriptions[i].column_name == aggregate.column_name)
				return;
963

964 965 966
		ASTs & arguments = node->arguments->children;
		aggregate.argument_names.resize(arguments.size());
		DataTypes types(arguments.size());
967

968 969
		for (size_t i = 0; i < arguments.size(); ++i)
		{
970
			getRootActionsImpl(arguments[i], true, false, actions);
971 972 973 974
			const std::string & name = arguments[i]->getColumnName();
			types[i] = actions.getSampleBlock().getByName(name).type;
			aggregate.argument_names[i] = name;
		}
975

976
		aggregate.function = context.getAggregateFunctionFactory().get(node->name, types);
977

978 979 980 981
		if (node->parameters)
		{
			ASTs & parameters = dynamic_cast<ASTExpressionList &>(*node->parameters).children;
			Row params_row(parameters.size());
982

983 984 985 986 987
			for (size_t i = 0; i < parameters.size(); ++i)
			{
				ASTLiteral * lit = dynamic_cast<ASTLiteral *>(&*parameters[i]);
				if (!lit)
					throw Exception("Parameters to aggregate functions must be literals", ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS);
988

989 990
				params_row[i] = lit->value;
			}
991

992 993
			aggregate.function->setParameters(params_row);
		}
994

995
		aggregate.function->setArguments(types);
996

997 998 999 1000 1001 1002
		aggregate_descriptions.push_back(aggregate);
	}
	else
	{
		for (size_t i = 0; i < ast->children.size(); ++i)
		{
1003 1004 1005
			ASTPtr child = ast->children[i];
			if (!dynamic_cast<ASTSubquery *>(&*child) && !dynamic_cast<ASTSelectQuery *>(&*child))
				getAggregatesImpl(child, actions);
1006 1007 1008 1009
		}
	}
}

1010 1011 1012 1013 1014
void ExpressionAnalyzer::assertSelect()
{
	if (!select_query)
		throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR);
}
1015

1016
void ExpressionAnalyzer::assertAggregation()
1017 1018 1019
{
	if (!has_aggregation)
		throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
1020
}
1021

1022 1023 1024 1025 1026 1027 1028 1029
void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, NamesAndTypesList & columns)
{
	if (chain.steps.empty())
	{
		chain.settings = settings;
		chain.steps.push_back(ExpressionActionsChain::Step(new ExpressionActions(columns, settings)));
	}
}
1030

1031 1032
void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActions & actions)
{
1033 1034
	NameSet result_columns;
	for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
1035
	{
1036 1037 1038
		if (it->first != it->second)
			actions.add(ExpressionActions::Action::copyColumn(it->second, it->first));
		result_columns.insert(it->first);
1039 1040
	}

1041
	actions.add(ExpressionActions::Action::arrayJoin(result_columns));
1042 1043 1044
}


1045 1046 1047
bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain)
{
	assertSelect();
1048 1049

	if (!select_query->array_join_expression_list)
1050
		return false;
1051

1052 1053
	initChain(chain, columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1054 1055 1056 1057

	getRootActionsImpl(select_query->array_join_expression_list, false, false, *step.actions);

	addMultipleArrayJoinAction(*step.actions);
1058 1059 1060

	for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
		step.required_output.push_back(it->first);
1061

1062 1063 1064
	return true;
}

1065 1066 1067
bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain)
{
	assertSelect();
1068

1069 1070
	if (!select_query->where_expression)
		return false;
1071 1072

	initChain(chain, columns_after_array_join);
1073
	ExpressionActionsChain::Step & step = chain.steps.back();
1074

1075
	step.required_output.push_back(select_query->where_expression->getColumnName());
1076
	getRootActionsImpl(select_query->where_expression, false, false, *step.actions);
1077

1078 1079 1080 1081 1082 1083
	return true;
}

bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain)
{
	assertAggregation();
1084

1085 1086
	if (!select_query->group_expression_list)
		return false;
1087

1088
	initChain(chain, columns);
1089
	ExpressionActionsChain::Step & step = chain.steps.back();
1090

1091
	ASTs asts = select_query->group_expression_list->children;
1092 1093
	for (size_t i = 0; i < asts.size(); ++i)
	{
1094
		step.required_output.push_back(asts[i]->getColumnName());
1095
		getRootActionsImpl(asts[i], false, false, *step.actions);
1096
	}
1097

1098 1099 1100 1101 1102 1103
	return true;
}

void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain)
{
	assertAggregation();
1104 1105

	initChain(chain, columns_after_array_join);
1106
	ExpressionActionsChain::Step & step = chain.steps.back();
1107

1108 1109 1110 1111 1112 1113 1114
	for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
	{
		for (size_t j = 0; j < aggregate_descriptions[i].argument_names.size(); ++j)
		{
			step.required_output.push_back(aggregate_descriptions[i].argument_names[j]);
		}
	}
1115

1116
	getActionsBeforeAggregationImpl(select_query->select_expression_list, *step.actions);
1117

1118
	if (select_query->having_expression)
1119
		getActionsBeforeAggregationImpl(select_query->having_expression, *step.actions);
1120

1121
	if (select_query->order_expression_list)
1122
		getActionsBeforeAggregationImpl(select_query->order_expression_list, *step.actions);
1123 1124 1125 1126 1127
}

bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain)
{
	assertAggregation();
1128

1129 1130
	if (!select_query->having_expression)
		return false;
1131

1132 1133
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1134

1135
	step.required_output.push_back(select_query->having_expression->getColumnName());
1136
	getRootActionsImpl(select_query->having_expression, false, false, *step.actions);
1137

1138
	return true;
1139 1140
}

1141 1142 1143
void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain)
{
	assertSelect();
1144

1145 1146
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1147

1148
	getRootActionsImpl(select_query->select_expression_list, false, false, *step.actions);
1149

1150 1151 1152 1153 1154
	ASTs asts = select_query->select_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
	{
		step.required_output.push_back(asts[i]->getColumnName());
	}
1155
}
1156

1157
bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain)
1158
{
1159
	assertSelect();
1160

1161 1162
	if (!select_query->order_expression_list)
		return false;
1163

1164 1165
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1166

1167
	getRootActionsImpl(select_query->order_expression_list, false, false, *step.actions);
1168

1169 1170 1171 1172 1173 1174 1175 1176 1177
	ASTs asts = select_query->order_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
	{
		ASTOrderByElement * ast = dynamic_cast<ASTOrderByElement *>(&*asts[i]);
		if (!ast || ast->children.size() != 1)
			throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
		ASTPtr order_expression = ast->children[0];
		step.required_output.push_back(order_expression->getColumnName());
	}
1178

1179 1180 1181
	return true;
}

1182
void ExpressionAnalyzer::appendProjectResult(DB::ExpressionActionsChain & chain)
1183 1184
{
	assertSelect();
1185

1186 1187
	initChain(chain, aggregated_columns);
	ExpressionActionsChain::Step & step = chain.steps.back();
1188

1189
	NamesWithAliases result_columns;
1190

1191 1192
	ASTs asts = select_query->select_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
1193
	{
1194 1195
		result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
		step.required_output.push_back(result_columns.back().second);
1196
	}
1197

1198
	step.actions->add(ExpressionActions::Action::project(result_columns));
1199 1200 1201
}


1202 1203 1204
Block ExpressionAnalyzer::getSelectSampleBlock()
{
	assertSelect();
1205

1206
	ExpressionActions temp_actions(aggregated_columns, settings);
1207
	NamesWithAliases result_columns;
1208

1209 1210 1211 1212
	ASTs asts = select_query->select_expression_list->children;
	for (size_t i = 0; i < asts.size(); ++i)
	{
		result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
1213
		getRootActionsImpl(asts[i], true, false, temp_actions);
1214
	}
1215

1216
	temp_actions.add(ExpressionActions::Action::project(result_columns));
1217

1218 1219 1220
	return temp_actions.getSampleBlock();
}

1221
void ExpressionAnalyzer::getActionsBeforeAggregationImpl(ASTPtr ast, ExpressionActions & actions)
1222 1223
{
	ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
1224
	if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION)
1225 1226
	{
		ASTs & arguments = node->arguments->children;
1227

1228 1229
		for (size_t i = 0; i < arguments.size(); ++i)
		{
1230
			getRootActionsImpl(arguments[i], false, false, actions);
1231 1232 1233 1234 1235 1236 1237
		}
	}
	else
	{
		for (size_t i = 0; i < ast->children.size(); ++i)
		{
			getActionsBeforeAggregationImpl(ast->children[i], actions);
1238 1239
		}
	}
1240 1241 1242
}


M
Merge  
Michael Kolupaev 已提交
1243
ExpressionActionsPtr ExpressionAnalyzer::getActions(bool project_result)
1244
{
1245
	ExpressionActionsPtr actions = new ExpressionActions(columns, settings);
1246
	NamesWithAliases result_columns;
1247
	Names result_names;
1248

1249
	ASTs asts;
1250

1251
	if (ASTExpressionList * node = dynamic_cast<ASTExpressionList *>(&*ast))
1252
		asts = node->children;
1253
	else
1254
		asts = ASTs(1, ast);
1255

1256
	for (size_t i = 0; i < asts.size(); ++i)
1257
	{
1258 1259 1260 1261 1262 1263 1264 1265 1266
		std::string name = asts[i]->getColumnName();
		std::string alias;
		if (project_result)
			alias = asts[i]->getAlias();
		else
			alias = name;
		result_columns.push_back(NameWithAlias(name, alias));
		result_names.push_back(alias);
		getRootActionsImpl(asts[i], false, false, *actions);
1267
	}
1268

M
Merge  
Michael Kolupaev 已提交
1269 1270 1271 1272
	if (project_result)
	{
		actions->add(ExpressionActions::Action::project(result_columns));
	}
M
Merge  
Michael Kolupaev 已提交
1273 1274 1275 1276 1277 1278
	else
	{
		/// Не будем удалять исходные столбцы.
		for (NamesAndTypesList::const_iterator it = columns.begin(); it != columns.end(); ++it)
			result_names.push_back(it->first);
	}
1279

1280
	actions->finalize(result_names);
1281

1282 1283 1284 1285 1286 1287
	return actions;
}


ExpressionActionsPtr ExpressionAnalyzer::getConstActions()
{
M
Merge  
Michael Kolupaev 已提交
1288
	ExpressionActionsPtr actions = new ExpressionActions(NamesAndTypesList(), settings);
1289

1290
	getRootActionsImpl(ast, true, true, *actions);
1291

1292 1293 1294 1295 1296 1297 1298 1299 1300 1301
	return actions;
}

void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates)
{
	for (NamesAndTypesList::iterator it = aggregation_keys.begin(); it != aggregation_keys.end(); ++it)
		key_names.push_back(it->first);
	aggregates = aggregate_descriptions;
}

1302
void ExpressionAnalyzer::removeUnusedColumns()
1303 1304 1305
{
	NamesSet required;
	NamesSet ignored;
1306

1307 1308 1309 1310 1311 1312
	if (select_query && select_query->array_join_expression_list)
	{
		ASTs & expressions = select_query->array_join_expression_list->children;
		for (size_t i = 0; i < expressions.size(); ++i)
		{
			/// Игнорируем идентификаторы верхнего уровня из секции ARRAY JOIN.
1313
			/// Их потом добавим отдельно.
1314
			if (dynamic_cast<ASTIdentifier *>(&*expressions[i]))
1315
			{
1316
				ignored.insert(expressions[i]->getColumnName());
1317 1318 1319 1320 1321 1322 1323
			}
			else
			{
				/// Для выражений в ARRAY JOIN ничего игнорировать не нужно.
				NamesSet empty;
				getRequiredColumnsImpl(expressions[i], required, empty);
			}
1324 1325

			ignored.insert(expressions[i]->getAlias());
1326 1327
		}
	}
1328

1329
	getRequiredColumnsImpl(ast, required, ignored);
1330

1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341
	NameSet array_join_sources;
	for (NameToNameMap::iterator it = array_join_result_to_source.begin(); it != array_join_result_to_source.end(); ++it)
	{
		array_join_sources.insert(it->second);
	}
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
	{
		if (array_join_sources.count(it->first))
			required.insert(it->first);
	}

1342 1343 1344
	/// Нужно прочитать хоть один столбец, чтобы узнать количество строк.
	if (required.empty())
		required.insert(ExpressionActions::getSmallestColumn(columns));
1345

1346
	unknown_required_columns = required;
1347

1348 1349 1350 1351
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end();)
	{
		NamesAndTypesList::iterator it0 = it;
		++it;
1352

1353
		unknown_required_columns.erase(it0->first);
1354

1355
		if (!required.count(it0->first))
1356 1357
		{
			required.erase(it0->first);
1358
			columns.erase(it0);
1359
		}
1360
	}
S
Merge  
Sergey Fedorov 已提交
1361 1362 1363 1364 1365

	/// Возможно, среди неизвестных столбцов есть виртуальные. Удаляем их из списка неизвестных и добавляем
	/// в columns list, чтобы при дальнейшей обработке запроса они воспринимались как настоящие.
	for (NameSet::iterator it = unknown_required_columns.begin(); it != unknown_required_columns.end();)
	{
S
Merge  
Sergey Fedorov 已提交
1366
		if (storage && storage->hasColumn(*it))
S
Merge  
Sergey Fedorov 已提交
1367
		{
S
Merge  
Sergey Fedorov 已提交
1368 1369 1370 1371
			columns.push_back(storage->getColumn(*it));
			unknown_required_columns.erase(it++);
		} else
			++it;
S
Merge  
Sergey Fedorov 已提交
1372
	}
1373 1374 1375 1376
}

Names ExpressionAnalyzer::getRequiredColumns()
{
M
Merge  
Michael Kolupaev 已提交
1377 1378
	if (!unknown_required_columns.empty())
		throw Exception("Unknown identifier: " + *unknown_required_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER);
1379

1380 1381 1382
	Names res;
	for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it)
		res.push_back(it->first);
1383 1384 1385
	return res;
}

1386
void ExpressionAnalyzer::getRequiredColumnsImpl(ASTPtr ast, NamesSet & required_columns, NamesSet & ignored_names)
1387 1388 1389
{
	if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
	{
1390 1391 1392
		if (node->kind == ASTIdentifier::Column
			&& !ignored_names.count(node->name)
			&& !ignored_names.count(DataTypeNested::extractNestedTableName(node->name)))
1393
		{
1394
			required_columns.insert(node->name);
1395
		}
1396 1397
		return;
	}
1398

1399 1400 1401 1402 1403 1404
	if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
	{
		if (node->kind == ASTFunction::LAMBDA_EXPRESSION)
		{
			if (node->arguments->children.size() != 2)
				throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
1405

1406
			ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*node->arguments->children[0]);
1407

1408 1409
			if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
				throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
1410

1411 1412
			/// Не нужно добавлять параметры лямбда-выражения в required_columns.
			Names added_ignored;
1413
			for (size_t i = 0 ; i < lambda_args_tuple->arguments->children.size(); ++i)
1414
			{
1415
				ASTIdentifier * identifier = dynamic_cast<ASTIdentifier *>(&*lambda_args_tuple->arguments->children[i]);
1416 1417 1418 1419 1420 1421 1422 1423 1424
				if (!identifier)
					throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
				std::string name = identifier->name;
				if (!ignored_names.count(name))
				{
					ignored_names.insert(name);
					added_ignored.push_back(name);
				}
			}
1425

1426
			getRequiredColumnsImpl(node->arguments->children[1], required_columns, ignored_names);
1427

1428 1429
			for (size_t i = 0; i < added_ignored.size(); ++i)
				ignored_names.erase(added_ignored[i]);
1430

1431 1432 1433
			return;
		}
	}
1434

1435 1436
	ASTSelectQuery * select = dynamic_cast<ASTSelectQuery *>(&*ast);

1437
	for (size_t i = 0; i < ast->children.size(); ++i)
1438 1439
	{
		ASTPtr child = ast->children[i];
1440 1441 1442 1443
		/// Не пойдем в секцию ARRAY JOIN, потому что там нужно смотреть на имена не-ARRAY-JOIN-енных столбцов.
		/// Туда removeUnusedColumns отправит нас отдельно.
		if (!dynamic_cast<ASTSubquery *>(&*child) && !dynamic_cast<ASTSelectQuery *>(&*child) &&
			!(select && child == select->array_join_expression_list))
1444
			getRequiredColumnsImpl(child, required_columns, ignored_names);
1445
    }
1446 1447
}

1448
}