ExpressionAnalyzer.cpp 41.2 KB
Newer Older
1
#include <Poco/Util/Application.h>
A
Alexey Milovidov 已提交
2
#include <Poco/String.h>
3

4 5
#include <Core/Block.h>

6 7 8 9
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTAsterisk.h>
10
#include <Parsers/ASTQualifiedAsterisk.h>
11 12
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTSelectQuery.h>
13
#include <Parsers/ASTSelectWithUnionQuery.h>
14 15
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTOrderByElement.h>
16
#include <Parsers/formatAST.h>
17
#include <Parsers/DumpASTNode.h>
18

19
#include <DataTypes/DataTypeNullable.h>
20
#include <DataTypes/NestedUtils.h>
21
#include <DataTypes/DataTypesNumber.h>
V
Vadim 已提交
22

23
#include <Columns/IColumn.h>
24

25
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
26 27 28 29
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/InJoinSubqueriesPreprocessor.h>
#include <Interpreters/LogicalExpressionsOptimizer.h>
30
#include <Interpreters/PredicateExpressionsOptimizer.h>
31 32 33
#include <Interpreters/ExternalDictionaries.h>
#include <Interpreters/Set.h>
#include <Interpreters/Join.h>
34

35
#include <AggregateFunctions/AggregateFunctionFactory.h>
36
#include <AggregateFunctions/parseAggregateFunctionParameters.h>
37

38 39 40
#include <Storages/StorageDistributed.h>
#include <Storages/StorageMemory.h>
#include <Storages/StorageJoin.h>
41

42 43
#include <DataStreams/LazyBlockInputStream.h>
#include <DataStreams/copyData.h>
44

45
#include <Dictionaries/IDictionary.h>
46

47
#include <Common/typeid_cast.h>
48
#include <Common/StringUtils/StringUtils.h>
49

50
#include <Parsers/formatAST.h>
51

52
#include <ext/range.h>
53
#include <DataTypes/DataTypeFactory.h>
54
#include <Functions/FunctionsMiscellaneous.h>
55
#include <Parsers/queryToString.h>
56 57
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
N
Nikolai Kochetov 已提交
58
#include <Parsers/queryToString.h>
59
#include <Interpreters/interpretSubquery.h>
60
#include <Interpreters/DatabaseAndTableWithAlias.h>
61
#include <Interpreters/QueryNormalizer.h>
62

63
#include <Interpreters/ActionsVisitor.h>
64 65 66
#include <Interpreters/ExternalTablesVisitor.h>
#include <Interpreters/GlobalSubqueriesVisitor.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
67 68 69 70

namespace DB
{

C
chertus 已提交
71
using LogAST = DebugASTLog<false>; /// set to true to enable logs
72 73


74 75
namespace ErrorCodes
{
76 77
    extern const int UNKNOWN_IDENTIFIER;
    extern const int ILLEGAL_AGGREGATION;
78
    extern const int EXPECTED_ALL_OR_ANY;
79 80
}

81 82
/// From SyntaxAnalyzer.cpp
extern void removeDuplicateColumns(NamesAndTypesList & columns);
83

84
ExpressionAnalyzer::ExpressionAnalyzer(
85
    const ASTPtr & query_,
86
    const SyntaxAnalyzerResultPtr & syntax_analyzer_result_,
87
    const Context & context_,
88
    const NamesAndTypesList & additional_source_columns,
89
    const Names & required_result_columns_,
90
    size_t subquery_depth_,
A
Alexey Milovidov 已提交
91
    bool do_global_,
92
    const SubqueriesForSets & subqueries_for_sets_)
93
    : ExpressionAnalyzerData(syntax_analyzer_result_->source_columns, required_result_columns_, subqueries_for_sets_)
94 95
    , query(query_), context(context_), settings(context.getSettings())
    , subquery_depth(subquery_depth_), do_global(do_global_)
N
Nikolai Kochetov 已提交
96
    , syntax(syntax_analyzer_result_)
97
{
98 99
    storage = syntax->storage;
    rewrite_subqueries = syntax->rewrite_subqueries;
100

N
Nikolai Kochetov 已提交
101
    select_query = typeid_cast<ASTSelectQuery *>(query.get());
102

103
    if (!additional_source_columns.empty())
104
    {
105
        source_columns.insert(source_columns.end(), additional_source_columns.begin(), additional_source_columns.end());
106
        removeDuplicateColumns(source_columns);
107
    }
108

109
    /// Delete the unnecessary from `source_columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`.
110
    collectUsedColumns();
111

F
f1yegor 已提交
112 113
    /// external_tables, subqueries_for_sets for global subqueries.
    /// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers.
114
    initGlobalSubqueriesAndExternalTables();
115

116
    /// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns.
F
f1yegor 已提交
117 118 119 120 121 122
    /// This analysis should be performed after processing global subqueries, because otherwise,
    /// if the aggregate function contains a global subquery, then `analyzeAggregation` method will save
    /// in `aggregate_descriptions` the information about the parameters of this aggregate function, among which
    /// global subquery. Then, when you call `initGlobalSubqueriesAndExternalTables` method, this
    /// the global subquery will be replaced with a temporary table, resulting in aggregate_descriptions
    /// will contain out-of-date information, which will lead to an error when the query is executed.
123
    analyzeAggregation();
124 125
}

C
chertus 已提交
126
bool ExpressionAnalyzer::isRemoteStorage() const
127
{
C
chertus 已提交
128
    return storage && storage->isRemote();
129 130 131
}


132 133
void ExpressionAnalyzer::analyzeAggregation()
{
F
f1yegor 已提交
134 135 136
    /** Find aggregation keys (aggregation_keys), information about aggregate functions (aggregate_descriptions),
     *  as well as a set of columns obtained after the aggregation, if any,
     *  or after all the actions that are usually performed before aggregation (aggregated_columns).
137
     *
F
f1yegor 已提交
138
     * Everything below (compiling temporary ExpressionActions) - only for the purpose of query analysis (type output).
139 140 141 142 143
     */

    if (select_query && (select_query->group_expression_list || select_query->having_expression))
        has_aggregation = true;

144
    ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(source_columns, context);
145 146 147

    if (select_query && select_query->array_join_expression_list())
    {
148
        getRootActions(select_query->array_join_expression_list(), true, temp_actions);
149
        addMultipleArrayJoinAction(temp_actions);
150
        array_join_columns = temp_actions->getSampleBlock().getNamesAndTypesList();
151 152 153 154 155 156 157
    }

    if (select_query)
    {
        const ASTTablesInSelectQueryElement * join = select_query->join();
        if (join)
        {
158 159
            const auto table_join = static_cast<const ASTTableJoin &>(*join->table_join);
            if (table_join.using_expression_list)
160
                getRootActions(table_join.using_expression_list, true, temp_actions);
161
            if (table_join.on_expression)
N
Nikolai Kochetov 已提交
162
                for (const auto & key_ast : analyzedJoin().key_asts_left)
163
                    getRootActions(key_ast, true, temp_actions);
164 165 166 167 168

            addJoinAction(temp_actions, true);
        }
    }

169
    getAggregates(query, temp_actions);
170 171 172 173 174 175 176 177 178 179 180 181 182

    if (has_aggregation)
    {
        assertSelect();

        /// Find out aggregation keys.
        if (select_query->group_expression_list)
        {
            NameSet unique_keys;
            ASTs & group_asts = select_query->group_expression_list->children;
            for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i)
            {
                ssize_t size = group_asts.size();
183
                getRootActions(group_asts[i], true, temp_actions);
184 185 186 187 188 189 190 191 192 193

                const auto & column_name = group_asts[i]->getColumnName();
                const auto & block = temp_actions->getSampleBlock();

                if (!block.has(column_name))
                    throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);

                const auto & col = block.getByName(column_name);

                /// Constant expressions have non-null column pointer at this stage.
194
                if (col.column && col.column->isColumnConst())
195 196 197 198 199 200 201 202 203 204 205 206 207 208
                {
                    /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work.
                    if (!aggregate_descriptions.empty() || size > 1)
                    {
                        if (i + 1 < static_cast<ssize_t>(size))
                            group_asts[i] = std::move(group_asts.back());

                        group_asts.pop_back();

                        --i;
                        continue;
                    }
                }

209
                NameAndTypePair key{column_name, col.type};
210 211 212 213 214

                /// Aggregation keys are uniqued.
                if (!unique_keys.count(key.name))
                {
                    unique_keys.insert(key.name);
215
                    aggregation_keys.push_back(key);
216 217

                    /// Key is no longer needed, therefore we can save a little by moving it.
218
                    aggregated_columns.push_back(std::move(key));
219 220 221 222 223 224
                }
            }

            if (group_asts.empty())
            {
                select_query->group_expression_list = nullptr;
225
                has_aggregation = select_query->having_expression || aggregate_descriptions.size();
226 227 228 229 230 231
            }
        }

        for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
        {
            AggregateDescription & desc = aggregate_descriptions[i];
232
            aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType());
233 234
        }
    }
235 236 237 238
    else
    {
        aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList();
    }
239 240 241
}


242 243
void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables()
{
F
f1yegor 已提交
244
    /// Adds existing external tables (not subqueries) to the external_tables dictionary.
245 246
    ExternalTablesMatcher::Data tables_data{context, external_tables};
    ExternalTablesVisitor(tables_data).visit(query);
247

C
chertus 已提交
248 249
    if (do_global)
    {
C
chertus 已提交
250
        GlobalSubqueriesVisitor subqueries_visitor(context, subquery_depth, isRemoteStorage(),
C
chertus 已提交
251 252 253
                                                   external_tables, subqueries_for_sets, has_global_subqueries);
        subqueries_visitor.visit(query);
    }
254 255 256
}


257
void ExpressionAnalyzer::makeSetsForIndex()
P
Pavel Kartavyy 已提交
258
{
259 260 261 262 263 264 265
    if (storage && select_query && storage->supportsIndexForIn())
    {
        if (select_query->where_expression)
            makeSetsForIndexImpl(select_query->where_expression, storage->getSampleBlock());
        if (select_query->prewhere_expression)
            makeSetsForIndexImpl(select_query->prewhere_expression, storage->getSampleBlock());
    }
P
Pavel Kartavyy 已提交
266 267
}

268

269
void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
270 271 272
{
    BlockIO res = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {})->execute();

273
    SetPtr set = std::make_shared<Set>(settings.size_limits_for_set, true);
274

275
    set->setHeader(res.in->getHeader());
276 277 278
    while (Block block = res.in->read())
    {
        /// If the limits have been exceeded, give up and let the default subquery processing actions take place.
A
Alexey Milovidov 已提交
279
        if (!set->insertFromBlock(block))
280 281 282
            return;
    }

283
    prepared_sets[subquery_or_table_name->range] = std::move(set);
284 285 286
}


287
void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block)
P
Pavel Kartavyy 已提交
288
{
289
    for (auto & child : node->children)
290
    {
291
        /// Don't descent into subqueries.
292 293 294 295 296 297 298 299 300
        if (typeid_cast<ASTSubquery *>(child.get()))
            continue;

        /// Don't dive into lambda functions
        const ASTFunction * func = typeid_cast<const ASTFunction *>(child.get());
        if (func && func->name == "lambda")
            continue;

        makeSetsForIndexImpl(child, sample_block);
301
    }
302

303
    const ASTFunction * func = typeid_cast<const ASTFunction *>(node.get());
304
    if (func && functionIsInOperator(func->name))
305
    {
306
        const IAST & args = *func->arguments;
307

308
        if (storage && storage->mayBenefitFromIndexForIn(args.children.at(0)))
309
        {
310 311
            const ASTPtr & arg = args.children.at(1);

312
            if (!prepared_sets.count(arg->range)) /// Not already prepared.
313
            {
314 315 316
                if (typeid_cast<ASTSubquery *>(arg.get()) || typeid_cast<ASTIdentifier *>(arg.get()))
                {
                    if (settings.use_index_for_in_with_subqueries)
317
                        tryMakeSetForIndexFromSubquery(arg);
318 319 320
                }
                else
                {
321
                    NamesAndTypesList temp_columns = source_columns;
322
                    temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
N
Nikolai Kochetov 已提交
323
                    for (const auto & joined_column : columns_added_by_join)
N
Nikolai Kochetov 已提交
324
                        temp_columns.push_back(joined_column.name_and_type);
325
                    ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, context);
326
                    getRootActions(func->arguments->children.at(0), true, temp_actions);
327

328 329
                    Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
                    if (sample_block_with_calculated_columns.has(args.children.at(0)->getColumnName()))
330 331
                        makeExplicitSet(func, sample_block_with_calculated_columns, true, context,
                                        settings.size_limits_for_set, prepared_sets);
332 333 334 335
                }
            }
        }
    }
336 337
}

T
Tsarkova Anastasia 已提交
338 339
bool ExpressionAnalyzer::isThereArrayJoin(const ASTPtr & ast)
{
T
Tsarkova Anastasia 已提交
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
    if (typeid_cast<ASTIdentifier *>(ast.get()))
    {
        return false;
    }
    else if (ASTFunction * node = typeid_cast<ASTFunction *>(ast.get()))
    {
        if (node->name == "arrayJoin")
        {
            return true;
        }
        if (functionIsInOrGlobalInOperator(node->name))
        {
            return isThereArrayJoin(node->arguments->children.at(0));
        }
        if (node->name == "indexHint")
        {
            return false;
        }
        if (AggregateFunctionFactory::instance().isAggregateFunctionName(node->name))
        {
            return false;
        }
        for (auto & child : node->arguments->children)
        {
T
Tsarkova Anastasia 已提交
364 365
            if (isThereArrayJoin(child))
            {
T
Tsarkova Anastasia 已提交
366 367 368 369 370 371 372 373 374 375 376 377 378
                return true;
            }
        }
        return false;
    }
    else if (typeid_cast<ASTLiteral *>(ast.get()))
    {
        return false;
    }
    else
    {
        for (auto & child : ast->children)
        {
T
Tsarkova Anastasia 已提交
379 380
            if (isThereArrayJoin(child))
            {
T
Tsarkova Anastasia 已提交
381 382 383 384 385 386
                return true;
            }
        }
        return false;
    }
}
387

388 389

void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts)
390
{
391
    LogAST log;
392
    ActionsVisitor actions_visitor(context, settings.size_limits_for_set, subquery_depth,
393
                                   source_columns, actions, prepared_sets, subqueries_for_sets,
C
chertus 已提交
394
                                   no_subqueries, only_consts, !isRemoteStorage(), log.stream());
395 396 397 398 399 400 401 402
    actions_visitor.visit(ast);
    actions = actions_visitor.popActionsLevel();
}


void ExpressionAnalyzer::getActionsFromJoinKeys(const ASTTableJoin & table_join, bool no_subqueries, ExpressionActionsPtr & actions)
{
    bool only_consts = false;
403

404
    LogAST log;
405
    ActionsVisitor actions_visitor(context, settings.size_limits_for_set, subquery_depth,
406
                                   source_columns, actions, prepared_sets, subqueries_for_sets,
C
chertus 已提交
407
                                   no_subqueries, only_consts, !isRemoteStorage(), log.stream());
408

409
    if (table_join.using_expression_list)
410
        actions_visitor.visit(table_join.using_expression_list);
411 412
    else if (table_join.on_expression)
    {
N
Nikolai Kochetov 已提交
413
        for (const auto & ast : analyzedJoin().key_asts_left)
414
            actions_visitor.visit(ast);
415 416
    }

417
    actions = actions_visitor.popActionsLevel();
418 419
}

420

421
void ExpressionAnalyzer::getAggregates(const ASTPtr & ast, ExpressionActionsPtr & actions)
422
{
F
f1yegor 已提交
423
    /// There can not be aggregate functions inside the WHERE and PREWHERE.
424 425 426 427 428 429
    if (select_query && (ast.get() == select_query->where_expression.get() || ast.get() == select_query->prewhere_expression.get()))
    {
        assertNoAggregates(ast, "in WHERE or PREWHERE");
        return;
    }

F
f1yegor 已提交
430
    /// If we are not analyzing a SELECT query, but a separate expression, then there can not be aggregate functions in it.
431 432 433 434 435 436 437
    if (!select_query)
    {
        assertNoAggregates(ast, "in wrong place");
        return;
    }

    const ASTFunction * node = typeid_cast<const ASTFunction *>(ast.get());
438
    if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name))
439 440 441 442 443
    {
        has_aggregation = true;
        AggregateDescription aggregate;
        aggregate.column_name = node->getColumnName();

F
f1yegor 已提交
444
        /// Make unique aggregate functions.
445 446 447 448 449 450 451 452 453 454
        for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
            if (aggregate_descriptions[i].column_name == aggregate.column_name)
                return;

        const ASTs & arguments = node->arguments->children;
        aggregate.argument_names.resize(arguments.size());
        DataTypes types(arguments.size());

        for (size_t i = 0; i < arguments.size(); ++i)
        {
F
f1yegor 已提交
455
            /// There can not be other aggregate functions within the aggregate functions.
456 457
            assertNoAggregates(arguments[i], "inside another aggregate function");

458
            getRootActions(arguments[i], true, actions);
459 460 461 462 463
            const std::string & name = arguments[i]->getColumnName();
            types[i] = actions->getSampleBlock().getByName(name).type;
            aggregate.argument_names[i] = name;
        }

464 465
        aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters) : Array();
        aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters);
466 467 468 469 470 471 472 473 474 475

        aggregate_descriptions.push_back(aggregate);
    }
    else
    {
        for (const auto & child : ast->children)
            if (!typeid_cast<const ASTSubquery *>(child.get())
                && !typeid_cast<const ASTSelectQuery *>(child.get()))
                getAggregates(child, actions);
    }
476 477
}

478 479 480

void ExpressionAnalyzer::assertNoAggregates(const ASTPtr & ast, const char * description)
{
481
    const ASTFunction * node = typeid_cast<const ASTFunction *>(ast.get());
482

483
    if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name))
484 485
        throw Exception("Aggregate function " + node->getColumnName()
            + " is found " + String(description) + " in query", ErrorCodes::ILLEGAL_AGGREGATION);
486

487 488 489 490
    for (const auto & child : ast->children)
        if (!typeid_cast<const ASTSubquery *>(child.get())
            && !typeid_cast<const ASTSelectQuery *>(child.get()))
            assertNoAggregates(child, description);
491 492 493
}


494
void ExpressionAnalyzer::assertSelect() const
495
{
496 497
    if (!select_query)
        throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR);
498
}
499

500
void ExpressionAnalyzer::assertAggregation() const
501
{
502 503
    if (!has_aggregation)
        throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
504
}
505

506
void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const
507
{
508 509
    if (chain.steps.empty())
    {
510
        chain.steps.emplace_back(std::make_shared<ExpressionActions>(columns, context));
511
    }
512
}
513

514
/// "Big" ARRAY JOIN.
515
void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actions) const
516
{
517
    NameSet result_columns;
518
    for (const auto & result_source : syntax->array_join_result_to_source)
519 520 521 522
    {
        /// Assign new names to columns, if needed.
        if (result_source.first != result_source.second)
            actions->add(ExpressionAction::copyColumn(result_source.second, result_source.first));
523

F
f1yegor 已提交
524
        /// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names.
525 526
        result_columns.insert(result_source.first);
    }
527

528
    actions->add(ExpressionAction::arrayJoin(result_columns, select_query->array_join_is_left(), context));
529 530
}

531
bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types)
532
{
533
    assertSelect();
534

535 536
    if (!select_query->array_join_expression_list())
        return false;
537

538
    initChain(chain, source_columns);
539
    ExpressionActionsChain::Step & step = chain.steps.back();
540

541
    getRootActions(select_query->array_join_expression_list(), only_types, step.actions);
542

543
    addMultipleArrayJoinAction(step.actions);
544

545
    return true;
546 547
}

548
void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only_types) const
549
{
N
Nikolai Kochetov 已提交
550 551 552 553
    NamesAndTypesList columns_added_by_join_list;
    for (const auto & joined_column : columns_added_by_join)
        columns_added_by_join_list.push_back(joined_column.name_and_type);

554
    if (only_types)
N
Nikolai Kochetov 已提交
555
        actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, columns_added_by_join_list));
556 557 558
    else
        for (auto & subquery_for_set : subqueries_for_sets)
            if (subquery_for_set.second.join)
N
Nikolai Kochetov 已提交
559
                actions->add(ExpressionAction::ordinaryJoin(subquery_for_set.second.join, analyzedJoin().key_names_left,
N
Nikolai Kochetov 已提交
560
                                                            columns_added_by_join_list));
561 562 563 564
}

bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types)
{
565 566 567 568 569
    assertSelect();

    if (!select_query->join())
        return false;

570
    initChain(chain, source_columns);
571 572
    ExpressionActionsChain::Step & step = chain.steps.back();

573
    const auto & join_element = static_cast<const ASTTablesInSelectQueryElement &>(*select_query->join());
574 575
    auto & join_params = static_cast<ASTTableJoin &>(*join_element.table_join);

576
    if (join_params.strictness == ASTTableJoin::Strictness::Unspecified && join_params.kind != ASTTableJoin::Kind::Cross)
577
    {
578
        if (settings.join_default_strictness == "ANY")
579
            join_params.strictness = ASTTableJoin::Strictness::Any;
580
        else if (settings.join_default_strictness == "ALL")
581 582
            join_params.strictness = ASTTableJoin::Strictness::All;
        else
583
            throw Exception("Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty", DB::ErrorCodes::EXPECTED_ALL_OR_ANY);
584 585
    }

586
    const auto & table_to_join = static_cast<const ASTTableExpression &>(*join_element.table_expression);
587

588
    getActionsFromJoinKeys(join_params, only_types, step.actions);
589

F
f1yegor 已提交
590
    /// Two JOINs are not supported with the same subquery, but different USINGs.
A
Alexey Milovidov 已提交
591
    auto join_hash = join_element.getTreeHash();
592

A
Alexey Milovidov 已提交
593
    SubqueryForSet & subquery_for_set = subqueries_for_sets[toString(join_hash.first) + "_" + toString(join_hash.second)];
594

F
f1yegor 已提交
595 596
    /// Special case - if table name is specified on the right of JOIN, then the table has the type Join (the previously prepared mapping).
    /// TODO This syntax does not support specifying a database name.
597 598
    if (table_to_join.database_and_table_name)
    {
N
Nikolai Kochetov 已提交
599
        const auto & identifier = static_cast<const ASTIdentifier &>(*table_to_join.database_and_table_name);
600 601
        DatabaseAndTableWithAlias database_table(identifier);
        StoragePtr table = context.tryGetTable(database_table.database, database_table.table);
602 603 604

        if (table)
        {
A
Alexey Milovidov 已提交
605
            StorageJoin * storage_join = dynamic_cast<StorageJoin *>(table.get());
606 607 608 609

            if (storage_join)
            {
                storage_join->assertCompatible(join_params.kind, join_params.strictness);
F
f1yegor 已提交
610
                /// TODO Check the set of keys.
611 612 613 614 615 616 617 618 619 620

                JoinPtr & join = storage_join->getJoin();
                subquery_for_set.join = join;
            }
        }
    }

    if (!subquery_for_set.join)
    {
        JoinPtr join = std::make_shared<Join>(
N
Nikolai Kochetov 已提交
621
            analyzedJoin().key_names_left, analyzedJoin().key_names_right, columns_added_by_join_from_right_keys,
622
            settings.join_use_nulls, settings.size_limits_for_join,
623 624
            join_params.kind, join_params.strictness);

F
f1yegor 已提交
625 626 627 628
        /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs
          * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1,
          *   in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`.
          * - this function shows the expression JOIN _data1.
629 630 631 632
          */
        if (!subquery_for_set.source)
        {
            ASTPtr table;
633

634
            if (table_to_join.subquery)
635
                table = table_to_join.subquery;
636 637 638 639
            else if (table_to_join.table_function)
                table = table_to_join.table_function;
            else if (table_to_join.database_and_table_name)
                table = table_to_join.database_and_table_name;
640

641
            Names original_columns;
N
Nikolai Kochetov 已提交
642
            for (const auto & column : analyzedJoin().columns_from_joined_table)
N
Nikolai Kochetov 已提交
643
                if (required_columns_from_joined_table.count(column.name_and_type.name))
644 645 646
                    original_columns.emplace_back(column.original_name);

            auto interpreter = interpretSubquery(table, context, subquery_depth, original_columns);
647 648 649
            subquery_for_set.source = std::make_shared<LazyBlockInputStream>(
                interpreter->getSampleBlock(),
                [interpreter]() mutable { return interpreter->execute().in; });
650 651
        }

652
        /// Alias duplicating columns as qualified.
N
Nikolai Kochetov 已提交
653
        for (const auto & column : analyzedJoin().columns_from_joined_table)
N
Nikolai Kochetov 已提交
654
            if (required_columns_from_joined_table.count(column.name_and_type.name))
655
                subquery_for_set.joined_block_aliases.emplace_back(column.original_name, column.name_and_type.name);
N
Nikolai Kochetov 已提交
656

657
        auto sample_block = subquery_for_set.source->getHeader();
N
Nikolai Kochetov 已提交
658 659 660 661 662 663 664 665 666 667 668 669
        for (const auto & name_with_alias : subquery_for_set.joined_block_aliases)
        {
            if (sample_block.has(name_with_alias.first))
            {
                auto pos = sample_block.getPositionByName(name_with_alias.first);
                auto column = sample_block.getByPosition(pos);
                sample_block.erase(pos);
                column.name = name_with_alias.second;
                sample_block.insert(std::move(column));
            }
        }

N
Nikolai Kochetov 已提交
670
        joined_block_actions->execute(sample_block);
N
Nikolai Kochetov 已提交
671

F
f1yegor 已提交
672
        /// TODO You do not need to set this up when JOIN is only needed on remote servers.
673
        subquery_for_set.join = join;
674
        subquery_for_set.join->setSampleBlock(sample_block);
N
Nikolai Kochetov 已提交
675
        subquery_for_set.joined_block_actions = joined_block_actions;
676 677 678 679 680
    }

    addJoinAction(step.actions, false);

    return true;
681 682
}

683 684
bool ExpressionAnalyzer::appendPrewhere(
    ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns)
685 686 687 688 689 690 691
{
    assertSelect();

    if (!select_query->prewhere_expression)
        return false;

    initChain(chain, source_columns);
N
Nikolai Kochetov 已提交
692
    auto & step = chain.getLastStep();
693
    getRootActions(select_query->prewhere_expression, only_types, step.actions);
N
Nikolai Kochetov 已提交
694 695
    String prewhere_column_name = select_query->prewhere_expression->getColumnName();
    step.required_output.push_back(prewhere_column_name);
696
    step.can_remove_required_output.push_back(true);
N
Nikolai Kochetov 已提交
697 698 699

    {
        /// Remove unused source_columns from prewhere actions.
700
        auto tmp_actions = std::make_shared<ExpressionActions>(source_columns, context);
701
        getRootActions(select_query->prewhere_expression, only_types, tmp_actions);
N
Nikolai Kochetov 已提交
702 703 704 705
        tmp_actions->finalize({prewhere_column_name});
        auto required_columns = tmp_actions->getRequiredColumns();
        NameSet required_source_columns(required_columns.begin(), required_columns.end());

N
Nikolai Kochetov 已提交
706 707
        /// Add required columns to required output in order not to remove them after prewhere execution.
        /// TODO: add sampling and final execution to common chain.
708
        for (const auto & column : additional_required_columns)
709 710 711 712 713 714 715 716
        {
            if (required_source_columns.count(column))
            {
                step.required_output.push_back(column);
                step.can_remove_required_output.push_back(true);
            }
        }

N
Nikolai Kochetov 已提交
717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
        auto names = step.actions->getSampleBlock().getNames();
        NameSet name_set(names.begin(), names.end());

        for (const auto & column : source_columns)
            if (required_source_columns.count(column.name) == 0)
                name_set.erase(column.name);

        Names required_output(name_set.begin(), name_set.end());
        step.actions->finalize(required_output);
    }

    {
        /// Add empty action with input = {prewhere actions output} + {unused source columns}
        /// Reasons:
        /// 1. Remove remove source columns which are used only in prewhere actions during prewhere actions execution.
        ///    Example: select A prewhere B > 0. B can be removed at prewhere step.
        /// 2. Store side columns which were calculated during prewhere actions execution if they are used.
        ///    Example: select F(A) prewhere F(A) > 0. F(A) can be saved from prewhere step.
N
Nikolai Kochetov 已提交
735
        /// 3. Check if we can remove filter column at prewhere step. If we can, action will store single REMOVE_COLUMN.
N
Nikolai Kochetov 已提交
736 737 738 739 740 741 742 743 744 745 746 747 748 749
        ColumnsWithTypeAndName columns = step.actions->getSampleBlock().getColumnsWithTypeAndName();
        auto required_columns = step.actions->getRequiredColumns();
        NameSet prewhere_input_names(required_columns.begin(), required_columns.end());
        NameSet unused_source_columns;

        for (const auto & column : source_columns)
        {
            if (prewhere_input_names.count(column.name) == 0)
            {
                columns.emplace_back(column.type, column.name);
                unused_source_columns.emplace(column.name);
            }
        }

750
        chain.steps.emplace_back(std::make_shared<ExpressionActions>(std::move(columns), context));
N
Nikolai Kochetov 已提交
751 752
        chain.steps.back().additional_input = std::move(unused_source_columns);
    }
753 754 755

    return true;
}
756

757
bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types)
758
{
759
    assertSelect();
760

761 762
    if (!select_query->where_expression)
        return false;
763

764
    initChain(chain, source_columns);
765
    ExpressionActionsChain::Step & step = chain.steps.back();
766

767
    step.required_output.push_back(select_query->where_expression->getColumnName());
768
    step.can_remove_required_output = {true};
769

770
    getRootActions(select_query->where_expression, only_types, step.actions);
771

772
    return true;
773 774
}

775
bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types)
776
{
777
    assertAggregation();
778

779 780
    if (!select_query->group_expression_list)
        return false;
781

782
    initChain(chain, source_columns);
783
    ExpressionActionsChain::Step & step = chain.steps.back();
784

785 786 787 788
    ASTs asts = select_query->group_expression_list->children;
    for (size_t i = 0; i < asts.size(); ++i)
    {
        step.required_output.push_back(asts[i]->getColumnName());
789
        getRootActions(asts[i], only_types, step.actions);
790
    }
791

792
    return true;
793 794
}

795
void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types)
796
{
797
    assertAggregation();
798

799
    initChain(chain, source_columns);
800
    ExpressionActionsChain::Step & step = chain.steps.back();
801

802 803 804 805 806 807 808
    for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
    {
        for (size_t j = 0; j < aggregate_descriptions[i].argument_names.size(); ++j)
        {
            step.required_output.push_back(aggregate_descriptions[i].argument_names[j]);
        }
    }
809

810
    getActionsBeforeAggregation(select_query->select_expression_list, step.actions, only_types);
811

812 813
    if (select_query->having_expression)
        getActionsBeforeAggregation(select_query->having_expression, step.actions, only_types);
814

815 816
    if (select_query->order_expression_list)
        getActionsBeforeAggregation(select_query->order_expression_list, step.actions, only_types);
817 818
}

819
bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types)
820
{
821
    assertAggregation();
822

823 824
    if (!select_query->having_expression)
        return false;
825

826 827
    initChain(chain, aggregated_columns);
    ExpressionActionsChain::Step & step = chain.steps.back();
828

829
    step.required_output.push_back(select_query->having_expression->getColumnName());
830
    getRootActions(select_query->having_expression, only_types, step.actions);
831

832
    return true;
833 834
}

835
void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_types)
836
{
837
    assertSelect();
838

839 840
    initChain(chain, aggregated_columns);
    ExpressionActionsChain::Step & step = chain.steps.back();
841

842
    getRootActions(select_query->select_expression_list, only_types, step.actions);
843

844
    for (const auto & child : select_query->select_expression_list->children)
845
        step.required_output.push_back(child->getColumnName());
846
}
847

848
bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types)
849
{
850
    assertSelect();
851

852 853
    if (!select_query->order_expression_list)
        return false;
854

855 856
    initChain(chain, aggregated_columns);
    ExpressionActionsChain::Step & step = chain.steps.back();
857

858
    getRootActions(select_query->order_expression_list, only_types, step.actions);
859

860 861 862 863 864 865 866 867 868
    ASTs asts = select_query->order_expression_list->children;
    for (size_t i = 0; i < asts.size(); ++i)
    {
        ASTOrderByElement * ast = typeid_cast<ASTOrderByElement *>(asts[i].get());
        if (!ast || ast->children.size() < 1)
            throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
        ASTPtr order_expression = ast->children.at(0);
        step.required_output.push_back(order_expression->getColumnName());
    }
869

870
    return true;
871 872
}

873 874 875 876 877 878 879 880 881 882
bool ExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types)
{
    assertSelect();

    if (!select_query->limit_by_expression_list)
        return false;

    initChain(chain, aggregated_columns);
    ExpressionActionsChain::Step & step = chain.steps.back();

883
    getRootActions(select_query->limit_by_expression_list, only_types, step.actions);
884 885 886 887 888 889 890

    for (const auto & child : select_query->limit_by_expression_list->children)
        step.required_output.push_back(child->getColumnName());

    return true;
}

A
Alexey Milovidov 已提交
891
void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const
892
{
893
    assertSelect();
894

895 896
    initChain(chain, aggregated_columns);
    ExpressionActionsChain::Step & step = chain.steps.back();
897

898
    NamesWithAliases result_columns;
899

900 901 902
    ASTs asts = select_query->select_expression_list->children;
    for (size_t i = 0; i < asts.size(); ++i)
    {
903
        String result_name = asts[i]->getAliasOrColumnName();
904
        if (required_result_columns.empty()
A
Alexey Milovidov 已提交
905
            || std::find(required_result_columns.begin(), required_result_columns.end(), result_name) != required_result_columns.end())
906 907 908 909
        {
            result_columns.emplace_back(asts[i]->getColumnName(), result_name);
            step.required_output.push_back(result_columns.back().second);
        }
910
    }
911

912
    step.actions->add(ExpressionAction::project(result_columns));
913 914 915
}


916
void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types)
917 918 919
{
    initChain(chain, source_columns);
    ExpressionActionsChain::Step & step = chain.steps.back();
920
    getRootActions(expr, only_types, step.actions);
921 922 923 924
    step.required_output.push_back(expr->getColumnName());
}


925
void ExpressionAnalyzer::getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries)
926
{
927
    ASTFunction * node = typeid_cast<ASTFunction *>(ast.get());
928

929
    if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name))
930
        for (auto & argument : node->arguments->children)
931
            getRootActions(argument, no_subqueries, actions);
932 933 934
    else
        for (auto & child : ast->children)
            getActionsBeforeAggregation(child, actions, no_subqueries);
935 936 937
}


938
ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result)
939
{
940
    ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(source_columns, context);
941 942 943 944 945
    NamesWithAliases result_columns;
    Names result_names;

    ASTs asts;

946
    if (auto node = typeid_cast<const ASTExpressionList *>(query.get()))
947 948
        asts = node->children;
    else
949
        asts = ASTs(1, query);
950 951 952 953 954

    for (size_t i = 0; i < asts.size(); ++i)
    {
        std::string name = asts[i]->getColumnName();
        std::string alias;
955
        if (add_aliases)
956 957 958 959 960
            alias = asts[i]->getAliasOrColumnName();
        else
            alias = name;
        result_columns.emplace_back(name, alias);
        result_names.push_back(alias);
961
        getRootActions(asts[i], false, actions);
962 963
    }

964
    if (add_aliases)
965
    {
966 967 968 969
        if (project_result)
            actions->add(ExpressionAction::project(result_columns));
        else
            actions->add(ExpressionAction::addAliases(result_columns));
970
    }
N
Nikolai Kochetov 已提交
971 972

    if (!(add_aliases && project_result))
973
    {
F
f1yegor 已提交
974
        /// We will not delete the original columns.
975
        for (const auto & column_name_type : source_columns)
976 977 978 979 980 981
            result_names.push_back(column_name_type.name);
    }

    actions->finalize(result_names);

    return actions;
982 983 984 985 986
}


ExpressionActionsPtr ExpressionAnalyzer::getConstActions()
{
987
    ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(NamesAndTypesList(), context);
988

989
    getRootActions(query, true, actions, true);
990
    return actions;
991 992
}

993
void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const
994
{
995 996
    for (const auto & name_and_type : aggregation_keys)
        key_names.emplace_back(name_and_type.name);
997

998
    aggregates = aggregate_descriptions;
999 1000
}

1001
void ExpressionAnalyzer::collectUsedColumns()
1002
{
F
f1yegor 已提交
1003 1004 1005
    /** Calculate which columns are required to execute the expression.
      * Then, delete all other columns from the list of available columns.
      * After execution, columns will only contain the list of columns needed to read from the table.
1006 1007 1008 1009 1010
      */

    NameSet required;
    NameSet ignored;

1011
    NameSet available_columns;
1012
    for (const auto & column : source_columns)
1013 1014
        available_columns.insert(column.name);

1015 1016 1017 1018 1019
    if (select_query && select_query->array_join_expression_list())
    {
        ASTs & expressions = select_query->array_join_expression_list()->children;
        for (size_t i = 0; i < expressions.size(); ++i)
        {
F
f1yegor 已提交
1020 1021
            /// Ignore the top-level identifiers from the ARRAY JOIN section.
            /// Then add them separately.
1022 1023 1024 1025 1026 1027
            if (typeid_cast<ASTIdentifier *>(expressions[i].get()))
            {
                ignored.insert(expressions[i]->getColumnName());
            }
            else
            {
F
f1yegor 已提交
1028
                /// Nothing needs to be ignored for expressions in ARRAY JOIN.
1029
                NameSet empty;
1030 1031
                RequiredSourceColumnsVisitor visitor(available_columns, required, empty, empty, empty);
                visitor.visit(expressions[i]);
1032 1033 1034 1035 1036 1037
            }

            ignored.insert(expressions[i]->getAliasOrColumnName());
        }
    }

F
f1yegor 已提交
1038 1039
    /** You also need to ignore the identifiers of the columns that are obtained by JOIN.
      * (Do not assume that they are required for reading from the "left" table).
1040 1041
      */
    NameSet available_joined_columns;
N
Nikolai Kochetov 已提交
1042
    for (const auto & joined_column : analyzedJoin().available_joined_columns)
N
Nikolai Kochetov 已提交
1043
        available_joined_columns.insert(joined_column.name_and_type.name);
1044 1045

    NameSet required_joined_columns;
1046

N
Nikolai Kochetov 已提交
1047
    for (const auto & left_key_ast : analyzedJoin().key_asts_left)
1048
    {
C
chertus 已提交
1049 1050
        NameSet empty;
        RequiredSourceColumnsVisitor columns_visitor(available_columns, required, ignored, empty, required_joined_columns);
1051 1052
        columns_visitor.visit(left_key_ast);
    }
1053

1054 1055
    RequiredSourceColumnsVisitor columns_visitor(available_columns, required, ignored, available_joined_columns, required_joined_columns);
    columns_visitor.visit(query);
1056

N
Nikolai Kochetov 已提交
1057
    columns_added_by_join = analyzedJoin().available_joined_columns;
N
Nikolai Kochetov 已提交
1058
    for (auto it = columns_added_by_join.begin(); it != columns_added_by_join.end();)
1059
    {
N
Nikolai Kochetov 已提交
1060
        if (required_joined_columns.count(it->name_and_type.name))
1061 1062
            ++it;
        else
N
Nikolai Kochetov 已提交
1063
            columns_added_by_join.erase(it++);
1064
    }
1065

N
Nikolai Kochetov 已提交
1066
    joined_block_actions = analyzedJoin().createJoinedBlockActions(
1067
        columns_added_by_join, select_query, context, required_columns_from_joined_table);
N
Nikolai Kochetov 已提交
1068

1069
    /// Some columns from right join key may be used in query. This columns will be appended to block during join.
N
Nikolai Kochetov 已提交
1070
    for (const auto & right_key_name : analyzedJoin().key_names_right)
N
Nikolai Kochetov 已提交
1071
        if (required_joined_columns.count(right_key_name))
N
Nikolai Kochetov 已提交
1072
            columns_added_by_join_from_right_keys.insert(right_key_name);
N
Nikolai Kochetov 已提交
1073

F
f1yegor 已提交
1074
    /// Insert the columns required for the ARRAY JOIN calculation into the required columns list.
1075
    NameSet array_join_sources;
1076
    for (const auto & result_source : syntax->array_join_result_to_source)
1077 1078
        array_join_sources.insert(result_source.second);

1079
    for (const auto & column_name_type : source_columns)
1080 1081 1082
        if (array_join_sources.count(column_name_type.name))
            required.insert(column_name_type.name);

F
f1yegor 已提交
1083
    /// You need to read at least one column to find the number of rows.
Z
zhang2014 已提交
1084
    if (select_query && required.empty())
1085
        required.insert(ExpressionActions::getSmallestColumn(source_columns));
1086

1087
    NameSet unknown_required_source_columns = required;
1088

1089
    for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();)
1090
    {
1091
        unknown_required_source_columns.erase(it->name);
1092 1093

        if (!required.count(it->name))
1094
            source_columns.erase(it++);
1095 1096
        else
            ++it;
1097 1098
    }

1099 1100
    /// If there are virtual columns among the unknown columns. Remove them from the list of unknown and add
    /// in columns list, so that when further processing they are also considered.
1101 1102
    if (storage)
    {
1103
        for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();)
1104 1105 1106
        {
            if (storage->hasColumn(*it))
            {
1107 1108
                source_columns.push_back(storage->getColumn(*it));
                unknown_required_source_columns.erase(it++);
1109 1110 1111 1112 1113
            }
            else
                ++it;
        }
    }
1114 1115

    if (!unknown_required_source_columns.empty())
1116 1117 1118
        throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin()
            + (select_query && !select_query->tables ? ". Note that there is no tables (FROM clause) in your query" : ""),
            ErrorCodes::UNKNOWN_IDENTIFIER);
1119 1120
}

1121

1122
Names ExpressionAnalyzer::getRequiredSourceColumns() const
1123
{
1124
    return source_columns.getNames();
1125 1126
}

1127
}