planner.c 137.4 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * planner.c
4
 *	  The query optimizer external interface.
5
 *
6
 * Portions Copyright (c) 2005-2008, Greenplum inc
7
 * Portions Copyright (c) 2012-Present Pivotal Software, Inc.
B
Bruce Momjian 已提交
8
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
9
 * Portions Copyright (c) 1994, Regents of the University of California
10 11 12
 *
 *
 * IDENTIFICATION
B
Bruce Momjian 已提交
13
 *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.250 2009/01/01 17:23:44 momjian Exp $
14 15 16
 *
 *-------------------------------------------------------------------------
 */
M
Marc G. Fournier 已提交
17

18 19
#include "postgres.h"

20 21
#include <limits.h>

22
#include "catalog/pg_operator.h"
23
#include "executor/executor.h"
24
#include "executor/execHHashagg.h"
25
#include "executor/nodeAgg.h"
26
#include "miscadmin.h"
B
Bruce Momjian 已提交
27 28
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
29
#include "optimizer/cost.h"
30
#include "optimizer/orca.h"
31
#include "optimizer/pathnode.h"
32
#include "optimizer/paths.h"
B
Bruce Momjian 已提交
33
#include "optimizer/planmain.h"
34 35
#include "optimizer/planner.h"
#include "optimizer/prep.h"
36
#include "optimizer/subselect.h"
37
#include "optimizer/transform.h"
38 39
#include "optimizer/tlist.h"
#include "optimizer/var.h"
40 41 42
#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
#endif
B
Bruce Momjian 已提交
43
#include "parser/parse_expr.h"
44
#include "parser/parse_oper.h"
45
#include "parser/parse_relation.h"
46
#include "parser/parsetree.h"
47
#include "utils/lsyscache.h"
48
#include "utils/selfuncs.h"
49
#include "utils/syscache.h"
50

51
#include "catalog/pg_proc.h"
52
#include "cdb/cdbllize.h"
53
#include "cdb/cdbmutate.h"		/* apply_shareinput */
H
Heikki Linnakangas 已提交
54
#include "cdb/cdbpartition.h"
55 56
#include "cdb/cdbpath.h"		/* cdbpath_segments */
#include "cdb/cdbpathtoplan.h"	/* cdbpathtoplan_create_flow() */
57
#include "cdb/cdbpullup.h"
58 59
#include "cdb/cdbgroup.h"		/* grouping_planner extensions */
#include "cdb/cdbsetop.h"		/* motion utilities */
H
Heikki Linnakangas 已提交
60
#include "cdb/cdbvars.h"
61

62

63 64
/* GUC parameter */
double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
65

66 67 68
/* Hook for plugins to get control in planner() */
planner_hook_type planner_hook = NULL;

69

70
/* Expression kind codes for preprocess_expression */
71 72 73 74 75
#define EXPRKIND_QUAL		0
#define EXPRKIND_TARGET		1
#define EXPRKIND_RTFUNC		2
#define EXPRKIND_VALUES		3
#define EXPRKIND_LIMIT		4
76
#define EXPRKIND_APPINFO	5
77
#define EXPRKIND_WINDOW_BOUND 6
78

79

80 81
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
82
static Plan *inheritance_planner(PlannerInfo *root);
83
static Plan *grouping_planner(PlannerInfo *root, double tuple_fraction);
84
static double preprocess_limit(PlannerInfo *root,
B
Bruce Momjian 已提交
85
				 double tuple_fraction,
B
Bruce Momjian 已提交
86
				 int64 *offset_est, int64 *count_est);
87
static void preprocess_groupclause(PlannerInfo *root);
88 89 90 91
static bool choose_hashed_distinct(PlannerInfo *root,
					   Plan *input_plan, List *input_pathkeys,
					   double tuple_fraction, double limit_tuples,
					   double dNumDistinctRows);
92
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
93
					   AttrNumber **groupColIdx, Oid **groupOperators, bool *need_tlist_eval);
94
static void locate_grouping_columns(PlannerInfo *root,
95
						List *stlist,
B
Bruce Momjian 已提交
96 97
						List *sub_tlist,
						AttrNumber *groupColIdx);
98
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
99 100 101 102 103 104 105 106 107 108 109 110 111 112
static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
static List *add_volatile_sort_exprs(List *window_tlist, List *tlist,
						List *activeWindows);
static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
						 List *tlist, bool canonicalize);
static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc,
						   List *tlist,
						   int numSortCols, AttrNumber *sortColIdx,
						   int *partNumCols,
						   AttrNumber **partColIdx,
						   Oid **partOperators,
						   int *ordNumCols,
						   AttrNumber **ordColIdx,
						   Oid **ordOperators);
113

114
static Bitmapset *canonicalize_colref_list(Node *node);
115 116 117 118 119
static List *canonicalize_gs_list(List *gsl, bool ordinary);
static List *rollup_gs_list(List *gsl);
static List *add_gs_combinations(List *list, int n, int i, Bitmapset **base, Bitmapset **work);
static List *cube_gs_list(List *gsl);
static CanonicalGroupingSets *make_canonical_groupingsets(List *groupClause);
120
static int	gs_compare(const void *a, const void *b);
121 122 123
static void sort_canonical_gs_list(List *gs, int *p_nsets, Bitmapset ***p_sets);

static Plan *pushdown_preliminary_limit(Plan *plan, Node *limitCount, int64 count_est, Node *limitOffset, int64 offset_est);
124
static bool is_dummy_plan(Plan *plan);
125

126 127
static bool isSimplyUpdatableQuery(Query *query);

128

129 130
/*****************************************************************************
 *
131 132
 *	   Query optimizer entry point
 *
133 134 135 136 137 138 139 140
 * To support loadable plugins that monitor or modify planner behavior,
 * we provide a hook variable that lets a plugin get control before and
 * after the standard planning process.  The plugin would normally call
 * standard_planner().
 *
 * Note to plugin authors: standard_planner() scribbles on its Query input,
 * so you'd better copy that data structure if you want to plan more than once.
 *
141
 *****************************************************************************/
142
PlannedStmt *
143
planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
144
{
145
	PlannedStmt *result;
146 147
	instr_time	starttime, endtime;

148
	if (planner_hook)
149 150 151
	{
		if (gp_log_optimization_time)
			INSTR_TIME_SET_CURRENT(starttime);
152 153

		START_MEMORY_ACCOUNT(MemoryAccounting_CreateAccount(0, MEMORY_OWNER_TYPE_PlannerHook));
154
		{
155
			result = (*planner_hook) (parse, cursorOptions, boundParams);
156 157 158 159 160 161 162
		}
		END_MEMORY_ACCOUNT();

		if (gp_log_optimization_time)
		{
			INSTR_TIME_SET_CURRENT(endtime);
			INSTR_TIME_SUBTRACT(endtime, starttime);
163
			elog(LOG, "Planner Hook(s): %.3f ms", INSTR_TIME_GET_MILLISEC(endtime));
164 165
		}
	}
166 167
	else
		result = standard_planner(parse, cursorOptions, boundParams);
168 169 170 171 172 173 174 175 176

	return result;
}

PlannedStmt *
standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
{
	PlannedStmt *result;
	PlannerGlobal *glob;
177
	double		tuple_fraction;
178 179 180 181
	PlannerInfo *root;
	Plan	   *top_plan;
	ListCell   *lp,
			   *lr;
182
	PlannerConfig *config;
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
	instr_time		starttime;
	instr_time		endtime;

	/*
	 * If ORCA has been enabled, and we are in a state in which ORCA planning
	 * is supported, then go ahead.
	 */
	if (optimizer &&
		GP_ROLE_UTILITY != Gp_role && MASTER_CONTENT_ID == GpIdentity.segindex)
	{
		if (gp_log_optimization_time)
			INSTR_TIME_SET_CURRENT(starttime);

		START_MEMORY_ACCOUNT(MemoryAccounting_CreateAccount(0, MEMORY_OWNER_TYPE_Optimizer));
		{
			result = optimize_query(parse, boundParams);
		}
		END_MEMORY_ACCOUNT();

		if (gp_log_optimization_time)
		{
			INSTR_TIME_SET_CURRENT(endtime);
			INSTR_TIME_SUBTRACT(endtime, starttime);
			elog(LOG, "Optimizer Time: %.3f ms", INSTR_TIME_GET_MILLISEC(endtime));
		}

		if (result)
			return result;
	}

	/*
	 * Fall back to using the PostgreSQL planner in case Orca didn't run (in
	 * utility mode or on a segment) or if it didn't produce a plan.
	 */
	if (gp_log_optimization_time)
		INSTR_TIME_SET_CURRENT(starttime);

	/*
	 * Incorrectly indented on purpose to avoid re-indenting an entire upstream
	 * function
	 */
	START_MEMORY_ACCOUNT(MemoryAccounting_CreateAccount(0, MEMORY_OWNER_TYPE_Planner));
	{
226

227 228 229
	/* Cursor options may come from caller or from DECLARE CURSOR stmt */
	if (parse->utilityStmt &&
		IsA(parse->utilityStmt, DeclareCursorStmt))
230
	{
231
		cursorOptions |= ((DeclareCursorStmt *) parse->utilityStmt)->options;
232

233 234 235 236
		/* Also try to make any cursor declared with DECLARE CURSOR updatable. */
		cursorOptions |= CURSOR_OPT_UPDATABLE;
	}

237
	/*
238 239 240 241
	 * Set up global state for this planner invocation.  This data is needed
	 * across all levels of sub-Query that might exist in the given command,
	 * so we keep it in a separate struct that's linked to by each per-Query
	 * PlannerInfo.
242
	 */
243
	glob = makeNode(PlannerGlobal);
244

245 246 247 248
	glob->boundParams = boundParams;
	glob->paramlist = NIL;
	glob->subplans = NIL;
	glob->subrtables = NIL;
249
	glob->rewindPlanIDs = NULL;
250 251 252
	glob->finalrtable = NIL;
	glob->relationOids = NIL;
	glob->invalItems = NIL;
253
	glob->lastPHId = 0;
254
	glob->transientPlan = false;
255
	glob->oneoffPlan = false;
256
	/* ApplyShareInputContext initialization. */
257 258 259
	glob->share.producers = NULL;
	glob->share.producer_count = 0;
	glob->share.sliceMarks = NULL;
260 261 262 263
	glob->share.motStack = NIL;
	glob->share.qdShares = NIL;
	glob->share.qdSlices = NIL;
	glob->share.nextPlanId = 0;
264

265 266 267 268 269
	if ((cursorOptions & CURSOR_OPT_UPDATABLE) != 0)
		glob->simplyUpdatable = isSimplyUpdatableQuery(parse);
	else
		glob->simplyUpdatable = false;

270
	/* Determine what fraction of the plan is likely to be scanned */
271
	if (cursorOptions & CURSOR_OPT_FAST_PLAN)
272 273
	{
		/*
B
Bruce Momjian 已提交
274
		 * We have no real idea how many tuples the user will ultimately FETCH
275
		 * from a cursor, but it is often the case that he doesn't want 'em
276 277 278
		 * all, or would prefer a fast-start plan anyway so that he can
		 * process some of the tuples sooner.  Use a GUC parameter to decide
		 * what fraction to optimize for.
279
		 */
280 281 282 283 284 285 286 287 288 289 290 291
		tuple_fraction = cursor_tuple_fraction;

		/*
		 * We document cursor_tuple_fraction as simply being a fraction,
		 * which means the edge cases 0 and 1 have to be treated specially
		 * here.  We convert 1 to 0 ("all the tuples") and 0 to a very small
		 * fraction.
		 */
		if (tuple_fraction >= 1.0)
			tuple_fraction = 0.0;
		else if (tuple_fraction <= 0.0)
			tuple_fraction = 1e-10;
292 293 294 295 296 297
	}
	else
	{
		/* Default assumption is we need all the tuples */
		tuple_fraction = 0.0;
	}
298

299
	parse = normalize_query(parse);
300

301
	config = DefaultPlannerConfig();
302

303
	/* primary planning entry point (may recurse for subqueries) */
304
	top_plan = subquery_planner(glob, parse, NULL,
305 306
								false, tuple_fraction, &root,
								config);
307

308
	/*
B
Bruce Momjian 已提交
309 310
	 * If creating a plan for a scrollable cursor, make sure it can run
	 * backwards on demand.  Add a Material node at the top at need.
311
	 */
312
	if (cursorOptions & CURSOR_OPT_SCROLL)
313
	{
314 315 316 317 318
		if (!ExecSupportsBackwardScan(top_plan))
			top_plan = materialize_finished_plan(root, top_plan);
	}


319 320
	/*
	 * Fix sharing id and shared id.
321 322 323 324 325 326
	 *
	 * This must be called before set_plan_references and cdbparallelize.  The other mutator
	 * or tree walker assumes the input is a tree.  If there is plan sharing, we have a DAG. 
	 *
	 * apply_shareinput will fix shared_id, and change the DAG to a tree.
	 */
327
	forboth(lp, glob->subplans, lr, glob->subrtables)
328 329
	{
		Plan	   *subplan = (Plan *) lfirst(lp);
330 331 332
		List	   *subrtable = (List *) lfirst(lr);

		lfirst(lp) = apply_shareinput_dag_to_tree(glob, subplan, subrtable);
333
	}
334
	top_plan = apply_shareinput_dag_to_tree(glob, top_plan, root->parse->rtable);
335

336
	/* final cleanup of the plan */
337 338 339 340 341 342 343 344 345
	Assert(glob->finalrtable == NIL);
	Assert(parse == root->parse);
	top_plan = set_plan_references(glob, top_plan, root->parse->rtable);
	/* ... and the subplans (both regular subplans and initplans) */
	Assert(list_length(glob->subplans) == list_length(glob->subrtables));
	forboth(lp, glob->subplans, lr, glob->subrtables)
	{
		Plan	   *subplan = (Plan *) lfirst(lp);
		List	   *subrtable = (List *) lfirst(lr);
346

347 348 349 350 351
		lfirst(lp) = set_plan_references(glob, subplan, subrtable);
	}

	/* walk plan and remove unused initplans and their params */
	remove_unused_initplans(top_plan, root);
352

353 354 355 356
	/* walk subplans and fixup subplan node referring to same plan_id */
	SubPlanWalkerContext subplan_context;
	fixup_subplans(top_plan, root, &subplan_context);

357
	if (Gp_role == GP_ROLE_DISPATCH)
358 359
	{
		top_plan = cdbparallelize(root, top_plan, parse,
360 361 362
								  cursorOptions,
								  boundParams);

363
		/*
364 365 366 367
		 * cdbparallelize() mutates all the nodes, so the producer nodes we
		 * memorized earlier are no longer valid. apply_shareinput_xslice()
		 * will re-populate it, but clear it for now, just to make sure that
		 * we don't access the obsolete copies of the nodes.
368 369 370
		 */
		if (glob->share.producer_count > 0)
			memset(glob->share.producers, 0, glob->share.producer_count * sizeof(ShareInputScan *));
371

372 373 374 375
		/*
		 * cdbparallelize may create additional slices that may affect share
		 * input. need to mark material nodes that are split acrossed multi
		 * slices.
376 377 378 379
		 */
		top_plan = apply_shareinput_xslice(top_plan, glob);
	}

H
Haisheng Yuan 已提交
380
	/*
381
	 * Remove unused subplans.
H
Haisheng Yuan 已提交
382 383 384 385 386
	 * Executor initializes state for subplans even they are unused.
	 * When the generated subplan is not used and has motion inside,
	 * causing motionID not being assigned, which will break sanity
	 * check when executor tries to initialize subplan state.
	 */
387 388
	remove_unused_subplans(root, &subplan_context);
	bms_free(subplan_context.bms_subplans);
H
Haisheng Yuan 已提交
389

390 391
	top_plan = zap_trivial_result(root, top_plan);

392 393 394 395
	/* fix ShareInputScans for EXPLAIN */
	foreach(lp, glob->subplans)
	{
		Plan	   *subplan = (Plan *) lfirst(lp);
396

397
		lfirst(lp) = replace_shareinput_targetlists(glob, subplan, glob->finalrtable);
398
	}
399
	top_plan = replace_shareinput_targetlists(glob, top_plan, glob->finalrtable);
400

401
	/*
402 403
	 * To save on memory, and on the network bandwidth when the plan is
	 * dispatched QEs, strip all subquery RTEs of the original Query objects.
404 405 406
	 */
	remove_subquery_in_RTEs((Node *) glob->finalrtable);

407 408
	/* build the PlannedStmt result */
	result = makeNode(PlannedStmt);
409

410 411 412
	result->commandType = parse->commandType;
	result->canSetTag = parse->canSetTag;
	result->transientPlan = glob->transientPlan;
413
	result->oneoffPlan = glob->oneoffPlan;
414 415 416 417 418 419
	result->planTree = top_plan;
	result->rtable = glob->finalrtable;
	result->resultRelations = root->resultRelations;
	result->utilityStmt = parse->utilityStmt;
	result->intoClause = parse->intoClause;
	result->subplans = glob->subplans;
420
	result->rewindPlanIDs = glob->rewindPlanIDs;
421 422 423 424 425 426
	result->returningLists = root->returningLists;
	result->result_partitions = root->result_partitions;
	result->result_aosegnos = root->result_aosegnos;
	result->rowMarks = parse->rowMarks;
	result->relationOids = glob->relationOids;
	result->invalItems = glob->invalItems;
427
	result->nParamExec = list_length(glob->paramlist);
428 429 430 431 432 433
	result->nMotionNodes = top_plan->nMotionNodes;
	result->nInitPlans = top_plan->nInitPlans;
	result->intoPolicy = GpPolicyCopy(CurrentMemoryContext, parse->intoPolicy);
	result->queryPartOids = NIL;
	result->queryPartsMetadata = NIL;
	result->numSelectorsPerScanId = NIL;
434

435 436
	result->simplyUpdatable = glob->simplyUpdatable;

437 438 439 440 441 442 443 444 445 446 447 448
	{
		ListCell *lc;

		foreach(lc, glob->relationOids)
		{
			Oid reloid = lfirst_oid(lc);

			if (rel_is_partitioned(reloid))
				result->queryPartOids = lappend_oid(result->queryPartOids, reloid);
		}
	}

449
	Assert(result->utilityStmt == NULL || IsA(result->utilityStmt, DeclareCursorStmt));
450

451 452 453
	if (Gp_role == GP_ROLE_DISPATCH)
	{
		/*
454 455 456
		 * Generate a plan node id for each node. Used by gpmon. Note that
		 * this needs to be the last step of the planning when the structure
		 * of the plan is final.
457 458 459
		 */
		assign_plannode_id(result);
	}
460

461 462 463 464 465 466 467 468 469 470
	if (gp_log_optimization_time)
	{
		INSTR_TIME_SET_CURRENT(endtime);
		INSTR_TIME_SUBTRACT(endtime, starttime);
		elog(LOG, "Planner Time: %.3f ms", INSTR_TIME_GET_MILLISEC(endtime));
	}

	}
	END_MEMORY_ACCOUNT();

471 472
	return result;
}
473

474

475
/*--------------------
476 477 478
 * subquery_planner
 *	  Invokes the planner on a subquery.  We recurse to here for each
 *	  sub-SELECT found in the query tree.
479
 *
480
 * glob is the global state for the current planner run.
481
 * parse is the querytree produced by the parser & rewriter.
482 483
 * parent_root is the immediate parent Query's info (NULL at the top level).
 * hasRecursion is true if this is a recursive WITH query.
484
 * tuple_fraction is the fraction of tuples we expect will be retrieved.
485
 * tuple_fraction is interpreted as explained for grouping_planner, below.
486
 *
487 488
 * If subroot isn't NULL, we pass back the query's final PlannerInfo struct;
 * among other things this tells the output sort ordering of the plan.
489
 *
490
 * Basically, this routine does the stuff that should only be done once
491 492 493 494 495
 * per Query object.  It then calls grouping_planner.  At one time,
 * grouping_planner could be invoked recursively on the same Query object;
 * that's not currently true, but we keep the separation between the two
 * routines anyway, in case we need it again someday.
 *
496 497
 * subquery_planner will be called recursively to handle sub-Query nodes
 * found within the query's expressions and rangetable.
498
 *
499 500
 * Returns a query plan.
 *--------------------
501
 */
502
Plan *
503
subquery_planner(PlannerGlobal *glob, Query *parse,
504
				 PlannerInfo *parent_root,
505
				 bool hasRecursion, double tuple_fraction,
506 507
				 PlannerInfo **subroot,
				 PlannerConfig *config)
508
{
509
	int			num_old_subplans = list_length(glob->subplans);
510
	PlannerInfo *root;
511
	Plan	   *plan;
512
	List	   *newHaving;
513
	bool		hasOuterJoins;
514
	ListCell   *l;
515

516 517 518
	/* Create a PlannerInfo data structure for this subquery */
	root = makeNode(PlannerInfo);
	root->parse = parse;
519 520 521 522
	root->glob = glob;
	root->query_level = parent_root ? parent_root->query_level + 1 : 1;
	root->parent_root = parent_root;
	root->planner_cxt = CurrentMemoryContext;
523
	root->init_plans = NIL;
524
	root->cte_plan_ids = NIL;
525
	root->eq_classes = NIL;
526
	root->init_plans = NIL;
527

528 529 530 531 532
	root->list_cteplaninfo = NIL;
	if (parse->cteList != NIL)
	{
		root->list_cteplaninfo = init_list_cteplaninfo(list_length(parse->cteList));
	}
533

534
	root->append_rel_list = NIL;
535

536 537 538
	Assert(config);
	root->config = config;

539
	if (Gp_role == GP_ROLE_DISPATCH && gp_session_id > -1)
540 541
	{
		/* Choose a segdb to which our singleton gangs should be dispatched. */
542
		gp_singleton_segindex = gp_session_id % getgpsegmentCount();
543
	}
544

545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
	root->hasRecursion = hasRecursion;
	if (hasRecursion)
		root->wt_param_id = SS_assign_worktable_param(root);
	else
		root->wt_param_id = -1;
	root->non_recursive_plan = NULL;

	/*
	 * If there is a WITH list, process each WITH query and build an
	 * initplan SubPlan structure for it.
	 *
	 * Unlike upstrem, we do not use initplan + CteScan, so SS_process_ctes
	 * will generate unused initplans. Commenting out the following two
	 * lines.
	 */
560
#if 0
561 562
	if (parse->cteList)
		SS_process_ctes(root);
563
#endif
564

565 566 567 568 569
	/*
	 * Ensure that jointree has been normalized. See
	 * normalize_query_jointree_mutator()
	 */
	AssertImply(parse->jointree->fromlist, list_length(parse->jointree->fromlist) == 1);
570

571
	/* CDB: Stash current query level's relids before pulling up subqueries. */
572
	root->currlevel_relids = get_relids_in_jointree((Node *) parse->jointree, false);
573

574
	/*
575
	 * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
576
	 * to transform them into joins.  Note that this step does not descend
577 578
	 * into subqueries; if we pull up any subqueries below, their SubLinks are
	 * processed just before pulling them up.
579 580
	 */
	if (parse->hasSubLinks)
581
		pull_up_sublinks(root);
582

583 584 585
	/*
	 * Scan the rangetable for set-returning functions, and inline them
	 * if possible (producing subqueries that might get pulled up next).
586
	 * Recursion issues here are handled in the same way as for SubLinks.
587 588 589
	 */
	inline_set_returning_functions(root);

590 591 592 593 594
	/*
	 * Check to see if any subqueries in the rangetable can be merged into
	 * this query.
	 */
	parse->jointree = (FromExpr *)
595
		pull_up_subqueries(root, (Node *) parse->jointree, NULL, NULL);
B
Bruce Momjian 已提交
596

597
	/*
B
Bruce Momjian 已提交
598 599
	 * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
	 * avoid the expense of doing flatten_join_alias_vars().  Also check for
600 601
	 * outer joins --- if none, we can skip reduce_outer_joins().
	 * This must be done after we have done pull_up_subqueries, of course.
602
	 */
603
	root->hasJoinRTEs = false;
604
	hasOuterJoins = false;
605
	foreach(l, parse->rtable)
606
	{
607
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
608 609 610

		if (rte->rtekind == RTE_JOIN)
		{
611
			root->hasJoinRTEs = true;
612 613
			if (IS_OUTER_JOIN(rte->jointype))
			{
614
				hasOuterJoins = true;
615 616 617
				/* Can quit scanning once we find an outer join */
				break;
			}
618 619 620
		}
	}

621 622 623 624 625 626 627 628 629 630
	/*
	 * Expand any rangetable entries that are inheritance sets into "append
	 * relations".  This can add entries to the rangetable, but they must be
	 * plain base relations not joins, so it's OK (and marginally more
	 * efficient) to do it after checking for join RTEs.  We must do it after
	 * pulling up subqueries, else we'd fail to handle inherited tables in
	 * subqueries.
	 */
	expand_inherited_tables(root);

631 632 633 634
	/* CDB: If parent RTE belongs to current query level, children do too. */
	foreach(l, root->append_rel_list)
	{
		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
635

636 637 638 639
		if (bms_is_member(appinfo->parent_relid, root->currlevel_relids))
			root->currlevel_relids = bms_add_member(root->currlevel_relids,
													appinfo->child_relid);
	}
640

641 642
	/*
	 * Set hasHavingQual to remember if HAVING clause is present.  Needed
B
Bruce Momjian 已提交
643 644
	 * because preprocess_expression will reduce a constant-true condition to
	 * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
645
	 */
646
	root->hasHavingQual = (parse->havingQual != NULL);
647

648 649 650
	/* Clear this flag; might get set in distribute_qual_to_rels */
	root->hasPseudoConstantQuals = false;

651
	/*
652
	 * Do expression preprocessing on targetlist and quals.
653
	 */
654
	parse->targetList = (List *)
655
		preprocess_expression(root, (Node *) parse->targetList,
656 657
							  EXPRKIND_TARGET);

658 659 660 661
	parse->returningList = (List *)
		preprocess_expression(root, (Node *) parse->returningList,
							  EXPRKIND_TARGET);

662
	preprocess_qual_conditions(root, (Node *) parse->jointree);
663

664
	parse->havingQual = preprocess_expression(root, parse->havingQual,
665 666
											  EXPRKIND_QUAL);

667 668 669
	parse->scatterClause = (List *)
		preprocess_expression(root, (Node *) parse->scatterClause,
							  EXPRKIND_TARGET);
670 671

	/*
672
	 * Do expression preprocessing on other expressions.
673 674
	 */
	foreach(l, parse->windowClause)
675
	{
676
		WindowClause *wc = (WindowClause *) lfirst(l);
677

678 679 680 681 682
		/* partitionClause/orderClause are sort/group expressions */
		wc->startOffset = preprocess_expression(root, wc->startOffset,
												EXPRKIND_WINDOW_BOUND);
		wc->endOffset = preprocess_expression(root, wc->endOffset,
											  EXPRKIND_WINDOW_BOUND);
683 684
	}

685
	parse->limitOffset = preprocess_expression(root, parse->limitOffset,
686
											   EXPRKIND_LIMIT);
687
	parse->limitCount = preprocess_expression(root, parse->limitCount,
688 689
											  EXPRKIND_LIMIT);

690 691
	root->append_rel_list = (List *)
		preprocess_expression(root, (Node *) root->append_rel_list,
692
							  EXPRKIND_APPINFO);
693

694
	/* Also need to preprocess expressions for function and values RTEs */
695
	foreach(l, parse->rtable)
696
	{
697
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
698

699 700 701 702 703 704 705
		if (rte->rtekind == RTE_FUNCTION || rte->rtekind == RTE_TABLEFUNCTION)
			rte->funcexpr = preprocess_expression(root, rte->funcexpr,
												  EXPRKIND_RTFUNC);
		else if (rte->rtekind == RTE_VALUES)
			rte->values_lists = (List *)
				preprocess_expression(root, (Node *) rte->values_lists,
									  EXPRKIND_VALUES);
706 707
	}

708
	/*
B
Bruce Momjian 已提交
709 710 711
	 * In some cases we may want to transfer a HAVING clause into WHERE. We
	 * cannot do so if the HAVING clause contains aggregates (obviously) or
	 * volatile functions (since a HAVING clause is supposed to be executed
712 713 714 715
	 * only once per group).  Also, it may be that the clause is so expensive
	 * to execute that we're better off doing it only once per group, despite
	 * the loss of selectivity.  This is hard to estimate short of doing the
	 * entire planning process twice, so we use a heuristic: clauses
B
Bruce Momjian 已提交
716 717
	 * containing subplans are left in HAVING.	Otherwise, we move or copy the
	 * HAVING clause into WHERE, in hopes of eliminating tuples before
718 719
	 * aggregation instead of after.
	 *
720 721 722 723 724 725 726 727
	 * If the query has explicit grouping then we can simply move such a
	 * clause into WHERE; any group that fails the clause will not be in the
	 * output because none of its tuples will reach the grouping or
	 * aggregation stage.  Otherwise we must have a degenerate (variable-free)
	 * HAVING clause, which we put in WHERE so that query_planner() can use it
	 * in a gating Result node, but also keep in HAVING to ensure that we
	 * don't emit a bogus aggregated row. (This could be done better, but it
	 * seems not worth optimizing.)
728 729
	 *
	 * Note that both havingQual and parse->jointree->quals are in
B
Bruce Momjian 已提交
730 731
	 * implicitly-ANDed-list form at this point, even though they are declared
	 * as Node *.
732 733
	 */
	newHaving = NIL;
734
	foreach(l, (List *) parse->havingQual)
735
	{
736
		Node	   *havingclause = (Node *) lfirst(l);
737

738 739 740 741 742
		if (contain_agg_clause(havingclause) ||
			contain_volatile_functions(havingclause) ||
			contain_subplans(havingclause))
		{
			/* keep it in HAVING */
743
			newHaving = lappend(newHaving, havingclause);
744
		}
745
		else if (parse->groupClause &&
746
				 !contain_extended_grouping(parse->groupClause))
747 748
		{
			/* move it to WHERE */
749 750
			parse->jointree->quals = (Node *)
				lappend((List *) parse->jointree->quals, havingclause);
751 752 753 754 755 756 757 758 759
		}
		else
		{
			/* put a copy in WHERE, keep it in HAVING */
			parse->jointree->quals = (Node *)
				lappend((List *) parse->jointree->quals,
						copyObject(havingclause));
			newHaving = lappend(newHaving, havingclause);
		}
760 761 762
	}
	parse->havingQual = (Node *) newHaving;

763
	/*
B
Bruce Momjian 已提交
764 765
	 * If we have any outer joins, try to reduce them to plain inner joins.
	 * This step is most easily done after we've done expression
B
Bruce Momjian 已提交
766
	 * preprocessing.
767
	 */
768
	if (hasOuterJoins)
769
		reduce_outer_joins(root);
770

771
	/*
B
Bruce Momjian 已提交
772 773
	 * Do the main planning.  If we have an inherited target relation, that
	 * needs special processing, else go straight to grouping_planner.
774
	 */
775
	if (parse->resultRelation &&
776 777
		rt_fetch(parse->resultRelation, parse->rtable)->inh)
		plan = inheritance_planner(root);
778
	else
779
		plan = grouping_planner(root, tuple_fraction);
780 781

	/*
B
Bruce Momjian 已提交
782
	 * If any subplans were generated, or if we're inside a subplan, build
B
Bruce Momjian 已提交
783 784
	 * initPlan list and extParam/allParam sets for plan nodes, and attach the
	 * initPlans to the top plan node.
785
	 */
786
	if (list_length(glob->subplans) != num_old_subplans ||
787 788
		root->query_level > 1)
	{
789
		Assert(root->parse == parse); /* GPDB isn't always careful about this. */
790
		SS_finalize_plan(root, plan, true);
791
	}
792

793 794 795
	/* Return internal info if caller wants it */
	if (subroot)
		*subroot = root;
796

797
	return plan;
798
}
799

800 801 802 803 804 805 806
/*
 * preprocess_expression
 *		Do subquery_planner's preprocessing work for an expression,
 *		which can be a targetlist, a WHERE clause (including JOIN/ON
 *		conditions), or a HAVING clause.
 */
static Node *
807
preprocess_expression(PlannerInfo *root, Node *expr, int kind)
808
{
809
	/*
B
Bruce Momjian 已提交
810 811 812
	 * Fall out quickly if expression is empty.  This occurs often enough to
	 * be worth checking.  Note that null->null is the correct conversion for
	 * implicit-AND result format, too.
813 814 815 816
	 */
	if (expr == NULL)
		return NULL;

817 818 819
	/*
	 * If the query has any join RTEs, replace join alias variables with
	 * base-relation variables. We must do this before sublink processing,
B
Bruce Momjian 已提交
820 821 822
	 * else sublinks expanded out from join aliases wouldn't get processed. We
	 * can skip it in VALUES lists, however, since they can't contain any Vars
	 * at all.
823
	 */
824
	if (root->hasJoinRTEs && kind != EXPRKIND_VALUES)
825
		expr = flatten_join_alias_vars(root, expr);
826

827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
	if (root->parse->hasFuncsWithExecRestrictions)
	{
		if (kind == EXPRKIND_RTFUNC)
		{
			/* allowed */
		}
		else if (kind == EXPRKIND_TARGET)
		{
			/*
			 * Allowed in simple cases with no range table. For example,
			 * "SELECT func()" is allowed, but "SELECT func() FROM foo" is not.
			 */
			if (root->parse->rtable &&
				check_execute_on_functions((Node *) root->parse->targetList) != PROEXECLOCATION_ANY)
			{
				ereport(ERROR,
						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						 errmsg("function with EXECUTE ON restrictions cannot be used in the SELECT list of a query with FROM")));
			}
		}
		else
		{
			if (check_execute_on_functions((Node *) root->parse->targetList) != PROEXECLOCATION_ANY)
				ereport(ERROR,
						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						 errmsg("function with EXECUTE ON restrictions cannot be used here")));
		}
	}

856
	/*
857
	 * Simplify constant expressions.
858
	 *
859 860 861 862 863 864
	 * Note: one essential effect here is to insert the current actual values
	 * of any default arguments for functions.  To ensure that happens, we
	 * *must* process all expressions here.  Previous PG versions sometimes
	 * skipped const-simplification if it didn't seem worth the trouble, but
	 * we can't do that anymore.
	 *
865 866 867 868 869
	 * Note: this also flattens nested AND and OR expressions into N-argument
	 * form.  All processing of a qual expression after this point must be
	 * careful to maintain AND/OR flatness --- that is, do not generate a tree
	 * with AND directly under AND, nor OR directly under OR.
	 */
870
	expr = eval_const_expressions(root, expr);
871 872 873

	/*
	 * If it's a qual or havingQual, canonicalize it.
874
	 */
875
	if (kind == EXPRKIND_QUAL)
876
	{
877
		expr = (Node *) canonicalize_qual((Expr *) expr);
878 879 880 881 882 883

#ifdef OPTIMIZER_DEBUG
		printf("After canonicalize_qual()\n");
		pprint(expr);
#endif
	}
884

885
	/* Expand SubLinks to SubPlans */
886
	if (root->parse->hasSubLinks)
887
		expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
888

889
	/*
B
Bruce Momjian 已提交
890 891
	 * XXX do not insert anything here unless you have grokked the comments in
	 * SS_replace_correlation_vars ...
892 893
	 */

894
	/* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
895 896
	if (root->query_level > 1)
		expr = SS_replace_correlation_vars(root, expr);
897

898
	/*
B
Bruce Momjian 已提交
899 900 901
	 * If it's a qual or havingQual, convert it to implicit-AND format. (We
	 * don't want to do this before eval_const_expressions, since the latter
	 * would be unable to simplify a top-level AND correctly. Also,
902
	 * SS_process_sublinks expects explicit-AND format.)
903 904 905 906
	 */
	if (kind == EXPRKIND_QUAL)
		expr = (Node *) make_ands_implicit((Expr *) expr);

907 908 909 910 911 912 913 914 915
	return expr;
}

/*
 * preprocess_qual_conditions
 *		Recursively scan the query's jointree and do subquery_planner's
 *		preprocessing work on each qual condition found therein.
 */
static void
916
preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
917 918 919 920 921 922 923 924 925 926
{
	if (jtnode == NULL)
		return;
	if (IsA(jtnode, RangeTblRef))
	{
		/* nothing to do here */
	}
	else if (IsA(jtnode, FromExpr))
	{
		FromExpr   *f = (FromExpr *) jtnode;
927
		ListCell   *l;
928

929
		foreach(l, f->fromlist)
930
			preprocess_qual_conditions(root, lfirst(l));
931

932
		f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
933 934 935 936 937
	}
	else if (IsA(jtnode, JoinExpr))
	{
		JoinExpr   *j = (JoinExpr *) jtnode;

938 939
		preprocess_qual_conditions(root, j->larg);
		preprocess_qual_conditions(root, j->rarg);
940

941
		j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
942 943
	}
	else
944 945
		elog(ERROR, "unrecognized node type: %d",
			 (int) nodeTag(jtnode));
946
}
947

948
/*
949 950 951 952
 * inheritance_planner
 *	  Generate a plan in the case where the result relation is an
 *	  inheritance set.
 *
953 954 955 956 957 958 959 960 961
 * We have to handle this case differently from cases where a source relation
 * is an inheritance set. Source inheritance is expanded at the bottom of the
 * plan tree (see allpaths.c), but target inheritance has to be expanded at
 * the top.  The reason is that for UPDATE, each target relation needs a
 * different targetlist matching its own column set.  Also, for both UPDATE
 * and DELETE, the executor needs the Append plan node at the top, else it
 * can't keep track of which table is the current target table.  Fortunately,
 * the UPDATE/DELETE target can never be the nullable side of an outer join,
 * so it's OK to generate the plan this way.
962 963 964 965
 *
 * Returns a query plan.
 */
static Plan *
966
inheritance_planner(PlannerInfo *root)
967
{
968
	Query	   *parse = root->parse;
969
	Index		parentRTindex = parse->resultRelation;
970
	List	   *subplans = NIL;
971 972
	List	   *resultRelations = NIL;
	List	   *returningLists = NIL;
973
	List	   *tlist = NIL;
974
	PlannerInfo subroot;
975
	ListCell   *l;
976

977
	/* MPP */
978
	Plan	   *plan;
979
	CdbLocusType append_locustype = CdbLocusType_Null;
980
	bool		locus_ok = TRUE;
981

982
	foreach(l, root->append_rel_list)
983
	{
984
		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
B
Bruce Momjian 已提交
985
		Plan	   *subplan;
986

987 988 989 990
		/* append_rel_list contains all append rels; ignore others */
		if (appinfo->parent_relid != parentRTindex)
			continue;

991
		/*
992
		 * Generate modified query with this rel as target.
993 994 995
		 */
		memcpy(&subroot, root, sizeof(PlannerInfo));
		subroot.parse = (Query *)
996
			adjust_appendrel_attrs(&subroot, (Node *) parse,
997
								   appinfo);
998 999
		subroot.returningLists = NIL;
		subroot.init_plans = NIL;
1000
		/* We needn't modify the child's append_rel_list */
1001
		/* There shouldn't be any OJ info to translate, as yet */
1002
		Assert(subroot.join_info_list == NIL);
1003 1004
		/* and we haven't created PlaceHolderInfos, either */
		Assert(subroot.placeholder_list == NIL);
1005

1006
		/* Generate plan */
1007 1008
		subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ );

1009
		/*
B
Bruce Momjian 已提交
1010 1011
		 * If this child rel was excluded by constraint exclusion, exclude it
		 * from the plan.
1012 1013 1014
		 *
		 * MPP-1544: perform this check before testing for loci compatibility
		 * we might have inserted a dummy table with incorrect locus
1015 1016 1017
		 */
		if (is_dummy_plan(subplan))
			continue;
1018

1019
		/* MPP needs target loci to match. */
1020
		if (Gp_role == GP_ROLE_DISPATCH)
1021
		{
1022 1023 1024 1025
			CdbLocusType locustype = (subplan->flow == NULL) ?
			CdbLocusType_Null : subplan->flow->locustype;

			if (append_locustype == CdbLocusType_Null && locus_ok)
1026 1027 1028 1029 1030
			{
				append_locustype = locustype;
			}
			else
			{
1031
				switch (locustype)
1032
				{
1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052
					case CdbLocusType_Entry:
						locus_ok = locus_ok && (locustype == append_locustype);
						break;
					case CdbLocusType_Hashed:
					case CdbLocusType_HashedOJ:
					case CdbLocusType_Strewn:
						/* MPP-2023: Among subplans, these loci are okay. */
						break;
					case CdbLocusType_Null:
					case CdbLocusType_SingleQE:
					case CdbLocusType_General:
					case CdbLocusType_Replicated:
						/* These loci are not valid on base relations */
						locus_ok = FALSE;
						break;
					default:
						/* We should not be hitting this */
						locus_ok = FALSE;
						Assert(0);
						break;
1053 1054
				}
			}
1055
			if (!locus_ok)
1056 1057
			{
				ereport(ERROR, (
1058 1059
								errcode(ERRCODE_CDB_INTERNAL_ERROR),
					 errmsg("incompatible loci in target inheritance set")));
1060 1061
			}
		}
B
Bruce Momjian 已提交
1062

1063
		/* Save tlist from first rel for use below */
1064 1065
		if (subplans == NIL)
		{
1066
			tlist = subplan->targetlist;
1067 1068
		}

1069 1070 1071 1072 1073 1074
		/**
		 * The grouping planner scribbles on the rtable e.g. to add pseudo columns.
		 * We need to keep track of this.
		 */
		parse->rtable = subroot.parse->rtable;

1075 1076
		subplans = lappend(subplans, subplan);

1077 1078 1079
		/* Make sure any initplans from this rel get into the outer list */
		root->init_plans = list_concat(root->init_plans, subroot.init_plans);

1080
		/* Build target-relations list for the executor */
1081 1082 1083 1084 1085
		resultRelations = lappend_int(resultRelations, appinfo->child_relid);

		/* Build list of per-relation RETURNING targetlists */
		if (parse->returningList)
		{
1086
			Assert(list_length(subroot.returningLists) == 1);
1087
			returningLists = list_concat(returningLists,
1088
										 subroot.returningLists);
1089
		}
1090 1091
	}

1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
	/**
	 * If due to constraint exclusions all the result relations have been removed,
	 * we need something upstream.
	 */
	if (resultRelations)
	{
		root->resultRelations = resultRelations;
	}
	else
	{
		root->resultRelations = list_make1_int(parse->resultRelation);
	}
	
	root->returningLists = returningLists;
1106

1107 1108 1109 1110 1111 1112 1113
	/* Mark result as unordered (probably unnecessary) */
	root->query_pathkeys = NIL;

	/*
	 * If we managed to exclude every child rel, return a dummy plan
	 */
	if (subplans == NIL)
1114 1115 1116 1117
	{
		root->resultRelations = list_make1_int(parentRTindex);
		/* although dummy, it must have a valid tlist for executor */
		tlist = preprocess_targetlist(root, parse->targetList);
1118
		plan = (Plan *) make_result(root,
1119
									tlist,
1120 1121 1122
									(Node *) list_make1(makeBoolConst(false,
																	  false)),
									NULL);
1123 1124 1125 1126 1127

		if (Gp_role == GP_ROLE_DISPATCH)
			mark_plan_general(plan);

		return plan;
1128
	}
1129

1130 1131
	/* Suppress Append if there's only one surviving child rel */
	if (list_length(subplans) == 1)
1132 1133
		plan = (Plan *) linitial(subplans);
	else
1134
	{
1135
		plan = (Plan *) make_append(subplans, true, tlist);
1136

1137
		/* MPP dispatch needs to know the kind of locus. */
1138
		if (Gp_role == GP_ROLE_DISPATCH)
1139
		{
1140
			switch (append_locustype)
1141 1142 1143 1144
			{
				case CdbLocusType_Entry:
					mark_plan_entry(plan);
					break;
1145

1146 1147 1148 1149
				case CdbLocusType_Hashed:
				case CdbLocusType_HashedOJ:
				case CdbLocusType_Strewn:
					/* Depend on caller to avoid incompatible hash keys. */
1150 1151 1152 1153 1154

					/*
					 * For our purpose (UPD/DEL target), strewn is good
					 * enough.
					 */
1155 1156 1157 1158 1159 1160
					mark_plan_strewn(plan);
					break;

				default:
					ereport(ERROR,
							(errcode(ERRCODE_CDB_INTERNAL_ERROR),
1161
							 errmsg("unexpected locus assigned to target inheritance set")));
1162
			}
1163 1164 1165 1166 1167 1168 1169 1170 1171
		}
	}

	return plan;
}

#ifdef USE_ASSERT_CHECKING

static void grouping_planner_output_asserts(PlannerInfo *root, Plan *plan);
1172

1173 1174 1175
/**
 * Ensure goodness of plans returned by grouping planner
 */
1176 1177
void
grouping_planner_output_asserts(PlannerInfo *root, Plan *plan)
1178
{
1179 1180 1181 1182 1183 1184
	/*
	 * Ensure that plan refers to vars that have varlevelsup = 0 AND varno is
	 * in the rtable
	 */
	List	   *allVars = extract_nodes(root->glob, (Node *) plan, T_Var);
	ListCell   *lc = NULL;
1185

1186
	foreach(lc, allVars)
1187
	{
1188 1189
		Var		   *var = (Var *) lfirst(lc);

1190 1191
		Assert(var->varlevelsup == 0 && "Plan contains vars that refer to outer plan.");
		Assert((var->varno == OUTER
1192 1193
		|| (var->varno > 0 && var->varno <= list_length(root->parse->rtable)))
			   && "Plan contains var that refer outside the rtable.");
1194 1195 1196 1197 1198 1199
		Assert(var->varno == var->varnoold && "Varno and varnoold do not agree!");

		/** If a pseudo column, there should be a corresponding entry in the relation */
		if (var->varattno <= FirstLowInvalidHeapAttributeNumber)
		{
			RangeTblEntry *rte = rt_fetch(var->varno, root->parse->rtable);
1200

1201 1202 1203 1204 1205 1206 1207 1208 1209 1210
			Assert(rte);
			Assert(rte->pseudocols);
			Assert(list_length(rte->pseudocols) > var->varattno - FirstLowInvalidHeapAttributeNumber);
		}
	}
}
#endif

/*
 * getAnySubplan
1211
 *	 Return one subplan for the given node.
1212 1213 1214 1215 1216 1217 1218 1219
 *
 * If the given node is an Append, the first subplan is returned.
 * If the given node is a SubqueryScan, its subplan is returned.
 * Otherwise, the lefttree of the given node is returned.
 */
static Plan *
getAnySubplan(Plan *node)
{
1220 1221
	Assert(is_plan_node((Node *) node));

1222 1223
	if (IsA(node, Append))
	{
1224 1225
		Append	   *append = (Append *) node;

1226
		Assert(list_length(append->appendplans) > 0);
1227
		return (Plan *) linitial(append->appendplans);
1228
	}
1229

1230 1231
	else if (IsA(node, SubqueryScan))
	{
1232 1233
		SubqueryScan *subqueryScan = (SubqueryScan *) node;

1234 1235
		return subqueryScan->subplan;
	}
1236

1237
	return node->lefttree;
1238 1239 1240 1241 1242 1243 1244
}

/*--------------------
 * grouping_planner
 *	  Perform planning steps related to grouping, aggregation, etc.
 *	  This primarily means adding top-level processing to the basic
 *	  query plan produced by query_planner.
1245 1246 1247 1248
 *
 * tuple_fraction is the fraction of tuples we expect will be retrieved
 *
 * tuple_fraction is interpreted as follows:
1249
 *	  0: expect all tuples to be retrieved (normal case)
1250 1251 1252 1253 1254
 *	  0 < tuple_fraction < 1: expect the given fraction of tuples available
 *		from the plan to be retrieved
 *	  tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
 *		expected to be retrieved (ie, a LIMIT specification)
 *
1255
 * Returns a query plan.  Also, root->query_pathkeys is returned as the
1256
 * actual output ordering of the plan (in pathkey format).
1257 1258
 *--------------------
 */
1259
static Plan *
1260
grouping_planner(PlannerInfo *root, double tuple_fraction)
1261
{
1262
	Query	   *parse = root->parse;
1263
	List	   *tlist = parse->targetList;
B
Bruce Momjian 已提交
1264 1265
	int64		offset_est = 0;
	int64		count_est = 0;
1266
	double		limit_tuples = -1.0;
1267
	Plan	   *result_plan;
1268 1269
	List	   *current_pathkeys = NIL;
	CdbPathLocus current_locus;
1270
	Path	   *best_path = NULL;
1271
	double		dNumGroups = 0;
1272 1273
	double		numDistinct = 1;
	List	   *distinctExprs = NIL;
1274
	bool		must_gather;
1275

1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
	double		motion_cost_per_row =
	(gp_motion_cost_per_row > 0.0) ?
	gp_motion_cost_per_row :
	2.0 * cpu_tuple_cost;

	CdbPathLocus_MakeNull(&current_locus);

	/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
	if (parse->limitCount || parse->limitOffset)
	{
		tuple_fraction = preprocess_limit(root, tuple_fraction,
										  &offset_est, &count_est);
1288

1289
		/*
B
Bruce Momjian 已提交
1290 1291
		 * If we have a known LIMIT, and don't have an unknown OFFSET, we can
		 * estimate the effects of using a bounded sort.
1292 1293 1294 1295
		 */
		if (count_est > 0 && offset_est >= 0)
			limit_tuples = (double) count_est + (double) offset_est;
	}
1296

1297
	if (parse->setOperations)
B
Bruce Momjian 已提交
1298
	{
B
Bruce Momjian 已提交
1299
		List	   *set_sortclauses;
1300

1301
		/*
B
Bruce Momjian 已提交
1302
		 * If there's a top-level ORDER BY, assume we have to fetch all the
1303 1304 1305
		 * tuples.	This might be too simplistic given all the hackery below
		 * to possibly avoid the sort; but the odds of accurate estimates
		 * here are pretty low anyway.
1306 1307 1308 1309
		 */
		if (parse->sortClause)
			tuple_fraction = 0.0;

1310
		/*
B
Bruce Momjian 已提交
1311
		 * Construct the plan for set operations.  The result will not need
1312 1313 1314
		 * any work except perhaps a top-level sort and/or LIMIT.  Note that
		 * any special work for recursive unions is the responsibility of
		 * plan_set_operations.
1315
		 */
1316
		result_plan = plan_set_operations(root, tuple_fraction,
1317 1318 1319
										  &set_sortclauses);

		/*
B
Bruce Momjian 已提交
1320 1321 1322
		 * Calculate pathkeys representing the sort order (if any) of the set
		 * operation's result.  We have to do this before overwriting the sort
		 * key information...
1323
		 */
1324 1325
		current_pathkeys = make_pathkeys_for_sortclauses(root,
														 set_sortclauses,
B
Bruce Momjian 已提交
1326
													 result_plan->targetlist,
1327
														 true);
1328 1329

		/*
B
Bruce Momjian 已提交
1330 1331 1332 1333 1334
		 * We should not need to call preprocess_targetlist, since we must be
		 * in a SELECT query node.	Instead, use the targetlist returned by
		 * plan_set_operations (since this tells whether it returned any
		 * resjunk columns!), and transfer any sort key information from the
		 * original tlist.
1335 1336
		 */
		Assert(parse->commandType == CMD_SELECT);
1337

1338 1339
		tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
										tlist);
1340

1341
		/*
1342
		 * Can't handle FOR UPDATE/SHARE here (parser should have checked
B
Bruce Momjian 已提交
1343
		 * already, but let's make sure).
1344 1345
		 */
		if (parse->rowMarks)
1346 1347
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1348
					 errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT")));
1349

1350
		/*
1351
		 * Calculate pathkeys that represent result ordering requirements
1352
		 */
1353
		Assert(parse->distinctClause == NIL);
1354 1355 1356 1357
		root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
															parse->sortClause,
															tlist,
															true);
B
Bruce Momjian 已提交
1358
	}
1359
	else
1360
	{
1361
		/* No set operations, do regular planning */
B
Bruce Momjian 已提交
1362
		List	   *sub_tlist;
1363
		AttrNumber *groupColIdx = NULL;
1364
		Oid		   *groupOperators = NULL;
1365
		bool		need_tlist_eval = true;
1366
		QualCost	tlist_cost;
1367 1368
		Path	   *cheapest_path;
		Path	   *sorted_path;
1369
		long		numGroups = 0;
1370
		AggClauseCounts agg_counts;
1371
		int			numGroupCols;
1372
		bool		use_hashed_grouping = false;
1373 1374
		WindowFuncLists *wflists = NULL;
		List	   *activeWindows = NIL;
1375
		bool		grpext = false;
1376
		CanonicalGroupingSets *canonical_grpsets;
1377

1378 1379 1380 1381 1382
		MemSet(&agg_counts, 0, sizeof(AggClauseCounts));

		/* A recursive query should always have setOperations */
		Assert(!root->hasRecursion);

1383 1384 1385 1386 1387
		/* Preprocess GROUP BY clause, if any */
		if (parse->groupClause)
			preprocess_groupclause(root);
		numGroupCols = list_length(parse->groupClause);

1388
		/* Preprocess targetlist */
1389
		tlist = preprocess_targetlist(root, tlist);
B
Bruce Momjian 已提交
1390

1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406
		/*
		 * Locate any window functions in the tlist.  (We don't need to look
		 * anywhere else, since expressions used in ORDER BY will be in there
		 * too.)  Note that they could all have been eliminated by constant
		 * folding, in which case we don't need to do any more work.
		 */
		if (parse->hasWindowFuncs)
		{
			wflists = find_window_functions((Node *) tlist,
											list_length(parse->windowClause));
			if (wflists->numWindowFuncs > 0)
				activeWindows = select_active_windows(root, wflists);
			else
				parse->hasWindowFuncs = false;
		}

1407 1408 1409
		/* Obtain canonical grouping sets */
		canonical_grpsets = make_canonical_groupingsets(parse->groupClause);
		numGroupCols = canonical_grpsets->num_distcols;
1410

1411
		/*
1412 1413
		 * Clean up parse->groupClause if the grouping set is an empty
		 * set.
1414
		 */
1415 1416 1417 1418 1419 1420 1421
		if (numGroupCols == 0)
		{
			list_free(parse->groupClause);
			parse->groupClause = NIL;
		}

		grpext = is_grouping_extension(canonical_grpsets);
1422

1423
		/*
1424 1425
		 * Calculate pathkeys that represent grouping/ordering requirements.
		 * Stash them in PlannerInfo so that query_planner can canonicalize
1426 1427 1428
		 * them after EquivalenceClasses have been formed.  The sortClause
		 * is certainly sort-able, but GROUP BY and DISTINCT might not be,
		 * in which case we just leave their pathkeys empty.
1429
		 */
1430 1431
		if (parse->groupClause &&
			grouping_is_sortable(parse->groupClause))
1432
			root->group_pathkeys =
1433
			make_pathkeys_for_groupclause(root,
1434
										  parse->groupClause,
1435
										  tlist);
1436 1437 1438
		else
			root->group_pathkeys = NIL;

1439 1440 1441
		if (parse->distinctClause &&
			grouping_is_sortable(parse->distinctClause))
			root->distinct_pathkeys =
1442 1443 1444 1445 1446
				make_pathkeys_for_sortclauses(root,
											  parse->distinctClause,
											  tlist,
											  false);
		else
1447 1448 1449 1450 1451 1452 1453
			root->distinct_pathkeys = NIL;

		root->sort_pathkeys =
			make_pathkeys_for_sortclauses(root,
										  parse->sortClause,
										  tlist,
										  false);
1454

1455 1456
		/*
		 * Will need actual number of aggregates for estimating costs.
1457
		 *
B
Bruce Momjian 已提交
1458 1459
		 * Note: we do not attempt to detect duplicate aggregates here; a
		 * somewhat-overestimated count is okay for our present purposes.
1460
		 *
1461 1462
		 * Note: think not that we can turn off hasAggs if we find no aggs. It
		 * is possible for constant-expression simplification to remove all
B
Bruce Momjian 已提交
1463 1464
		 * explicit references to aggs, but we still have to follow the
		 * aggregate semantics (eg, producing only one output row).
1465
		 */
1466 1467
		MemSet(&agg_counts, 0, sizeof(AggClauseCounts));

1468
		if (parse->hasAggs)
1469 1470 1471 1472
		{
			count_agg_clauses((Node *) tlist, &agg_counts);
			count_agg_clauses(parse->havingQual, &agg_counts);
		}
1473

1474 1475 1476 1477 1478
		/*
		 * Generate appropriate target list for subplan; may be different from
		 * tlist if grouping or aggregation is needed.
		 */
		sub_tlist = make_subplanTargetList(root, tlist,
1479
										   &groupColIdx, &groupOperators, &need_tlist_eval);
1480

1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493
		/* We consider only the first (bottom) window in pathkeys logic */
		if (activeWindows != NIL)
		{
			WindowClause *wc = (WindowClause *) linitial(activeWindows);

			root->window_pathkeys = make_pathkeys_for_window(root,
															 wc,
															 tlist,
															 false);
		}
		else
			root->window_pathkeys = NIL;

1494
		/*
1495 1496 1497 1498 1499 1500 1501 1502 1503
		 * Will need actual number of aggregates for estimating costs.
		 *
		 * Note: we do not attempt to detect duplicate aggregates here; a
		 * somewhat-overestimated count is okay for our present purposes.
		 *
		 * Note: think not that we can turn off hasAggs if we find no aggs. It
		 * is possible for constant-expression simplification to remove all
		 * explicit references to aggs, but we still have to follow the
		 * aggregate semantics (eg, producing only one output row).
1504
		 */
1505 1506 1507 1508 1509
		if (parse->hasAggs)
		{
			count_agg_clauses((Node *) tlist, &agg_counts);
			count_agg_clauses(parse->havingQual, &agg_counts);
		}
1510

1511
		/*
1512
		 * Figure out whether we want a sorted result from query_planner.
1513
		 *
1514
		 * If we have a sortable GROUP BY clause, then we want a result sorted
1515 1516 1517 1518 1519 1520
		 * properly for grouping.  Otherwise, if we have window functions to
		 * evaluate, we try to sort for the first window.  Otherwise, if
		 * there's a sortable DISTINCT clause that's more rigorous than the
		 * ORDER BY clause, we try to produce output that's sufficiently well
		 * sorted for the DISTINCT.  Otherwise, if there is an ORDER BY
		 * clause, we want to sort by the ORDER BY clause.
1521
		 *
1522 1523 1524 1525 1526 1527
		 * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
		 * superset of GROUP BY, it would be tempting to request sort by ORDER
		 * BY --- but that might just leave us failing to exploit an available
		 * sort order at all.  Needs more thought.  The choice for DISTINCT
		 * versus ORDER BY is much easier, since we know that the parser
		 * ensured that one is a superset of the other.
1528
		 */
1529
		if (root->group_pathkeys)
1530
			root->query_pathkeys = root->group_pathkeys;
1531 1532
		else if (root->window_pathkeys)
			root->query_pathkeys = root->window_pathkeys;
1533 1534 1535
		else if (list_length(root->distinct_pathkeys) >
				 list_length(root->sort_pathkeys))
			root->query_pathkeys = root->distinct_pathkeys;
1536
		else if (root->sort_pathkeys)
1537
			root->query_pathkeys = root->sort_pathkeys;
1538
		else
1539
			root->query_pathkeys = NIL;
1540

1541
		/*
B
Bruce Momjian 已提交
1542 1543 1544 1545
		 * Generate the best unsorted and presorted paths for this Query (but
		 * note there may not be any presorted path).  query_planner will also
		 * estimate the number of groups in the query, and canonicalize all
		 * the pathkeys.
1546
		 */
1547
		query_planner(root, sub_tlist, tuple_fraction, limit_tuples,
1548
					  &cheapest_path, &sorted_path, &dNumGroups);
1549

1550
		/*
1551
		 * If grouping, decide whether to use sorted or hashed grouping.
1552
		 */
1553
		if (parse->groupClause)
1554
		{
1555 1556 1557 1558 1559 1560 1561 1562
			bool	can_hash;
			bool	can_sort;

			/*
			 * Executor doesn't support hashed aggregation with DISTINCT
			 * aggregates.  (Doing so would imply storing *all* the input
			 * values in the hash table, which seems like a certain loser.)
			 */
1563
			can_hash = (agg_counts.numOrderedAggs == 0 &&
1564 1565 1566 1567 1568 1569 1570 1571 1572
						grouping_is_hashable(parse->groupClause));
			can_sort = grouping_is_sortable(parse->groupClause);
			if (can_hash && can_sort)
			{
				/* we have a meaningful choice to make ... */
				use_hashed_grouping =
					choose_hashed_grouping(root,
										   tuple_fraction, limit_tuples,
										   cheapest_path, sorted_path,
1573
										   numGroupCols,
1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584
										   dNumGroups, &agg_counts);
			}
			else if (can_hash)
				use_hashed_grouping = true;
			else if (can_sort)
				use_hashed_grouping = false;
			else
				ereport(ERROR,
						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						 errmsg("could not implement GROUP BY"),
						 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1585 1586 1587

			/* Also convert # groups to long int --- but 'ware overflow! */
			numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
1588 1589
		}

B
Bruce Momjian 已提交
1590
		/*
1591
		 * Select the best path.  If we are doing hashed grouping, we will
B
Bruce Momjian 已提交
1592 1593
		 * always read all the input tuples, so use the cheapest-total path.
		 * Otherwise, trust query_planner's decision about which to use.
1594
		 */
1595
		if (use_hashed_grouping || !sorted_path)
1596
			best_path = cheapest_path;
1597
		else
1598 1599
			best_path = sorted_path;

1600 1601 1602 1603
		/*
		 * CDB:  For now, we either - construct a general parallel plan, - let
		 * the sequential planner handle the situation, or - construct a
		 * sequential plan using the mix-max index optimization.
1604 1605 1606
		 *
		 * Eventually we should add a parallel version of the min-max
		 * optimization.  For now, it's either-or.
1607
		 */
1608
		if (Gp_role == GP_ROLE_DISPATCH)
1609
		{
1610
			bool		querynode_changed = false;
1611
			bool		pass_subtlist = agg_counts.hasOrderedAggs;
1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624
			GroupContext group_context;

			group_context.best_path = best_path;
			group_context.cheapest_path = cheapest_path;
			group_context.subplan = NULL;
			group_context.sub_tlist = pass_subtlist ? sub_tlist : NIL;
			group_context.tlist = tlist;
			group_context.use_hashed_grouping = use_hashed_grouping;
			group_context.tuple_fraction = tuple_fraction;
			group_context.canonical_grpsets = canonical_grpsets;
			group_context.grouping = 0;
			group_context.numGroupCols = 0;
			group_context.groupColIdx = NULL;
1625
			group_context.groupOperators = NULL;
1626 1627 1628 1629 1630 1631
			group_context.numDistinctCols = 0;
			group_context.distinctColIdx = NULL;
			group_context.p_dNumGroups = &dNumGroups;
			group_context.pcurrent_pathkeys = &current_pathkeys;
			group_context.querynode_changed = &querynode_changed;

H
Heikki Linnakangas 已提交
1632 1633 1634
			result_plan = cdb_grouping_planner(root,
											   &agg_counts,
											   &group_context);
1635 1636 1637 1638 1639 1640

			/* Add the Repeat node if needed. */
			if (result_plan != NULL &&
				canonical_grpsets != NULL &&
				canonical_grpsets->grpset_counts != NULL)
			{
1641 1642 1643 1644
				bool		need_repeat_node = false;
				int			grpset_no;
				int			repeat_count = 0;

1645 1646 1647 1648 1649 1650 1651 1652
				for (grpset_no = 0; grpset_no < canonical_grpsets->ngrpsets; grpset_no++)
				{
					if (canonical_grpsets->grpset_counts[grpset_no] > 1)
					{
						need_repeat_node = true;
						break;
					}
				}
1653

1654 1655
				if (canonical_grpsets->ngrpsets == 1)
					repeat_count = canonical_grpsets->grpset_counts[0];
1656

1657 1658 1659 1660 1661 1662
				if (need_repeat_node)
				{
					result_plan = add_repeat_node(result_plan, repeat_count, 0);
				}
			}

1663 1664 1665 1666 1667 1668 1669 1670
			if (result_plan != NULL && querynode_changed)
			{
				/*
				 * We want to re-write sort_pathkeys here since the 2-stage
				 * aggregation subplan or grouping extension subplan may
				 * change the previous root->parse Query node, which makes the
				 * current sort_pathkeys invalid.
				 */
1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682
				if (parse->distinctClause)
					root->distinct_pathkeys =
						make_pathkeys_for_sortclauses(root,
													  parse->distinctClause,
													  result_plan->targetlist,
													  true);
				if (parse->sortClause)
					root->sort_pathkeys =
						make_pathkeys_for_sortclauses(root,
													  parse->sortClause,
													  result_plan->targetlist,
													  true);
1683 1684
			}
		}
1685
		else	/* Not GP_ROLE_DISPATCH */
1686 1687
		{
			/*
1688 1689 1690 1691
			 * Check to see if it's possible to optimize MIN/MAX aggregates.
			 * If so, we will forget all the work we did so far to choose a
			 * "regular" path ... but we had to do it anyway to be able to
			 * tell which way is cheaper.
1692
			 */
1693 1694 1695 1696 1697 1698
			result_plan = optimize_minmax_aggregates(root,
													 tlist,
													 best_path);
			if (result_plan != NULL)
			{
				/*
1699 1700
				 * optimize_minmax_aggregates generated the full plan, with
				 * the right tlist, and it has no sort order.
1701 1702
				 */
				current_pathkeys = NIL;
1703
				mark_plan_entry(result_plan);
1704 1705
			}

1706
		}
1707

1708
		if (result_plan == NULL)
1709
		{
1710
			/*
1711 1712
			 * Normal case --- create a plan according to query_planner's
			 * results.
1713
			 */
1714
			bool	need_sort_for_grouping = false;
1715

1716
			result_plan = create_plan(root, best_path);
1717
			current_pathkeys = best_path->pathkeys;
1718
			current_locus = best_path->locus;	/* just use keys, don't copy */
1719

1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735
			/*
			 * The returned plan might be ordered by TLEs that we don't need
			 * in the final result, and will therefore not be present in the
			 * final target list. Also remove them from current_pathkeys, so
			 * that current_pathkeys only contains expressions that can be
			 * evaluated using the new target list. This is not required in
			 * PostgreSQL, because in PostgreSQL current_pathkeys is only
			 * compared against, and there's no need to re-evaluate it. But
			 * in GPDB, we might use current_pathkeys to maintain the order
			 * in a Motion node that we create, so we must be able to
			 * evaluate it.
			 */
			current_pathkeys =
				cdbpullup_truncatePathKeysForTargetList(current_pathkeys,
														sub_tlist);

1736 1737
			/* Detect if we'll need an explicit sort for grouping */
			if (parse->groupClause && !use_hashed_grouping &&
1738
				!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
1739 1740
			{
				need_sort_for_grouping = true;
1741

1742
				/*
1743
				 * Always override create_plan's tlist, so that we don't
1744 1745 1746 1747 1748
				 * sort useless data from a "physical" tlist.
				 */
				need_tlist_eval = true;
			}

1749
			/*
1750
			 * create_plan returns a plan with just a "flat" tlist of
1751 1752
			 * required Vars.  Usually we need to insert the sub_tlist as the
			 * tlist of the top plan node.	However, we can skip that if we
1753
			 * determined that whatever create_plan chose to return will be
1754 1755 1756
			 * good enough.
			 */
			if (need_tlist_eval)
1757
			{
1758 1759 1760 1761 1762
				/*
				 * If the top-level plan node is one that cannot do expression
				 * evaluation, we must insert a Result node to project the
				 * desired tlist.
				 */
1763
				result_plan = plan_pushdown_tlist(root, result_plan, sub_tlist);
1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778

				/*
				 * Also, account for the cost of evaluation of the sub_tlist.
				 *
				 * Up to now, we have only been dealing with "flat" tlists,
				 * containing just Vars.  So their evaluation cost is zero
				 * according to the model used by cost_qual_eval() (or if you
				 * prefer, the cost is factored into cpu_tuple_cost).  Thus we
				 * can avoid accounting for tlist cost throughout
				 * query_planner() and subroutines.  But now we've inserted a
				 * tlist that might contain actual operators, sub-selects, etc
				 * --- so we'd better account for its cost.
				 *
				 * Below this point, any tlist eval cost for added-on nodes
				 * should be accounted for as we create those nodes.
1779 1780 1781 1782
				 * Presently, of the node types we can add on, only Agg,
				 * WindowAgg, and Group project new tlists (the rest just copy
				 * their input tuples) --- so make_agg(), make_windowagg() and
				 * make_group() are responsible for computing the added cost.
1783
				 */
1784
				cost_qual_eval(&tlist_cost, sub_tlist, root);
1785 1786 1787
				result_plan->startup_cost += tlist_cost.startup;
				result_plan->total_cost += tlist_cost.startup +
					tlist_cost.per_tuple * result_plan->plan_rows;
1788 1789 1790 1791
			}
			else
			{
				/*
1792
				 * Since we're using create_plan's tlist and not the one
1793 1794
				 * make_subplanTargetList calculated, we have to refigure any
				 * grouping-column indexes make_subplanTargetList computed.
1795
				 */
1796
				locate_grouping_columns(root, tlist, result_plan->targetlist,
1797
										groupColIdx);
1798
			}
B
Bruce Momjian 已提交
1799

1800
			Assert(result_plan->flow);
1801

1802
			/*
1803 1804
			 * Insert AGG or GROUP node if needed, plus an explicit sort step
			 * if necessary.
1805
			 *
1806
			 * HAVING clause, if any, becomes qual of the Agg or Group node.
1807
			 */
1808
			if (!grpext && use_hashed_grouping)
1809 1810
			{
				/* Hashed aggregate plan --- no sort needed */
1811
				result_plan = (Plan *) make_agg(root,
1812 1813
												tlist,
												(List *) parse->havingQual,
1814
												AGG_HASHED, false,
1815 1816
												numGroupCols,
												groupColIdx,
1817
												groupOperators,
1818
												numGroups,
1819 1820 1821
												/* GPDB_84_MERGE_FIXME: What would be
												 * appropriate values for these extra
												 * arguments? */
1822 1823 1824 1825
												0, /* num_nullcols */
												0, /* input_grouping */
												0, /* grouping */
												0, /* rollup_gs_times */
1826
												agg_counts.numAggs,
1827
												agg_counts.transitionSpace,
1828
												result_plan);
1829 1830 1831 1832 1833

				if (canonical_grpsets != NULL &&
					canonical_grpsets->grpset_counts != NULL &&
					canonical_grpsets->grpset_counts[0] > 1)
				{
1834
					result_plan->flow = pull_up_Flow(result_plan, result_plan->lefttree);
1835
					result_plan = add_repeat_node(result_plan,
1836
										 canonical_grpsets->grpset_counts[0],
1837 1838 1839
												  0);
				}

1840 1841
				/* Hashed aggregation produces randomly-ordered results */
				current_pathkeys = NIL;
1842
				CdbPathLocus_MakeNull(&current_locus);
1843
			}
1844
			else if (!grpext && (parse->hasAggs || parse->groupClause))
1845
			{
1846 1847
				/* Plain aggregate plan --- sort if needed */
				AggStrategy aggstrategy;
1848

1849 1850
				if (parse->groupClause)
				{
1851
					if (need_sort_for_grouping)
1852
					{
1853 1854 1855 1856 1857 1858
						result_plan = (Plan *)
							make_sort_from_groupcols(root,
													 parse->groupClause,
													 groupColIdx,
													 false,
													 result_plan);
1859
						current_pathkeys = root->group_pathkeys;
1860 1861 1862

						/* Decorate the Sort node with a Flow node. */
						mark_sort_locus(result_plan);
1863
					}
1864
					aggstrategy = AGG_SORTED;
1865

1866
					/*
1867 1868
					 * The AGG node will not change the sort ordering of its
					 * groups, so current_pathkeys describes the result too.
1869 1870 1871 1872
					 */
				}
				else
				{
1873 1874 1875 1876
					aggstrategy = AGG_PLAIN;
					/* Result will be only one row anyway; no sort order */
					current_pathkeys = NIL;
				}
1877

1878 1879 1880 1881 1882 1883 1884 1885 1886
				/*
				 * We make a single Agg node if this is not a grouping extension.
				 */
				result_plan = (Plan *) make_agg(root,
												tlist,
												(List *) parse->havingQual,
												aggstrategy, false,
												numGroupCols,
												groupColIdx,
1887
												groupOperators,
1888 1889 1890 1891 1892 1893 1894 1895
												numGroups,
												0, /* num_nullcols */
												0, /* input_grouping */
												0, /* grouping */
												0, /* rollup_gs_times */
												agg_counts.numAggs,
												agg_counts.transitionSpace,
												result_plan);
1896

1897 1898 1899 1900
				if (canonical_grpsets != NULL &&
					canonical_grpsets->grpset_counts != NULL &&
					canonical_grpsets->grpset_counts[0] > 1)
				{
1901
					result_plan->flow = pull_up_Flow(result_plan, result_plan->lefttree);
1902
					result_plan = add_repeat_node(result_plan,
1903
										 canonical_grpsets->grpset_counts[0],
1904 1905
												  0);
				}
1906

1907 1908 1909 1910 1911
				CdbPathLocus_MakeNull(&current_locus);
			}
			else if (grpext && (parse->hasAggs || parse->groupClause))
			{
				/* Plan the grouping extension */
1912 1913
				ListCell   *lc;
				bool		querynode_changed = false;
B
Bruce Momjian 已提交
1914

1915 1916 1917
				/*
				 * Make a copy of tlist. Really need to?
				 */
1918
				List	   *new_tlist = copyObject(tlist);
1919

1920 1921 1922
				/* Make EXPLAIN output look nice */
				foreach(lc, result_plan->targetlist)
				{
1923
					TargetEntry *tle = (TargetEntry *) lfirst(lc);
1924

1925
					if (IsA(tle->expr, Var) &&tle->resname == NULL)
1926
					{
1927
						TargetEntry *vartle = tlist_member((Node *) tle->expr, tlist);
1928

1929
						if (vartle != NULL && vartle->resname != NULL)
1930
							tle->resname = pstrdup(vartle->resname);
1931 1932
					}
				}
1933 1934 1935 1936 1937 1938 1939 1940

				result_plan = plan_grouping_extension(root, best_path, tuple_fraction,
													  use_hashed_grouping,
													  &new_tlist, result_plan->targetlist,
													  true, false,
													  (List *) parse->havingQual,
													  &numGroupCols,
													  &groupColIdx,
1941
													  &groupOperators,
1942 1943 1944 1945 1946 1947 1948 1949
													  &agg_counts,
													  canonical_grpsets,
													  &dNumGroups,
													  &querynode_changed,
													  &current_pathkeys,
													  result_plan);
				if (querynode_changed)
				{
1950 1951 1952 1953 1954
					/*
					 * We want to re-write sort_pathkeys here since the
					 * 2-stage aggregation subplan or grouping extension
					 * subplan may change the previous root->parse Query node,
					 * which makes the current sort_pathkeys invalid.
1955
					 */
1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968
					if (parse->distinctClause &&
						grouping_is_sortable(parse->distinctClause))
						root->distinct_pathkeys =
							make_pathkeys_for_sortclauses(root,
														  parse->distinctClause,
														  result_plan->targetlist,
														  true);
					if (parse->sortClause)
						root->sort_pathkeys =
							make_pathkeys_for_sortclauses(root,
														  parse->sortClause,
														  result_plan->targetlist,
														  true);
1969 1970
					CdbPathLocus_MakeNull(&current_locus);
				}
1971
			}
1972
			else if (root->hasHavingQual)
1973
			{
1974 1975 1976 1977 1978
				/*
				 * No aggregates, and no GROUP BY, but we have a HAVING qual.
				 * This is a degenerate case in which we are supposed to emit
				 * either 0 or 1 row depending on whether HAVING succeeds.
				 * Furthermore, there cannot be any variables in either HAVING
B
Bruce Momjian 已提交
1979 1980 1981 1982 1983 1984
				 * or the targetlist, so we actually do not need the FROM
				 * table at all!  We can just throw away the plan-so-far and
				 * generate a Result node.	This is a sufficiently unusual
				 * corner case that it's not worth contorting the structure of
				 * this routine to avoid having to generate the plan in the
				 * first place.
1985
				 */
1986 1987
				result_plan = (Plan *) make_result(root,
												   tlist,
1988 1989
												   parse->havingQual,
												   NULL);
1990 1991 1992 1993
				/* Result will be only one row anyway; no sort order */
				current_pathkeys = NIL;
				mark_plan_general(result_plan);
				CdbPathLocus_MakeNull(&current_locus);
1994
			}
1995
		}						/* end of non-minmax-aggregate case */
1996

1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041
		/*
		 * Since each window function could require a different sort order, we
		 * stack up a WindowAgg node for each window, with sort steps between
		 * them as needed.
		 */
		if (activeWindows)
		{
			List	   *window_tlist;
			ListCell   *l;

			/*
			 * If the top-level plan node is one that cannot do expression
			 * evaluation, we must insert a Result node to project the desired
			 * tlist.  (In some cases this might not really be required, but
			 * it's not worth trying to avoid it.)  Note that on second and
			 * subsequent passes through the following loop, the top-level
			 * node will be a WindowAgg which we know can project; so we only
			 * need to check once.
			 */
			if (!is_projection_capable_plan(result_plan))
			{
				result_plan = (Plan *) make_result(root,
												   NIL,
												   NULL,
												   result_plan);

				result_plan->flow = pull_up_Flow(result_plan,
												 getAnySubplan(result_plan));
			}

			/*
			 * The "base" targetlist for all steps of the windowing process is
			 * a flat tlist of all Vars and Aggs needed in the result. (In
			 * some cases we wouldn't need to propagate all of these all the
			 * way to the top, since they might only be needed as inputs to
			 * WindowFuncs.  It's probably not worth trying to optimize that
			 * though.)  We also need any volatile sort expressions, because
			 * make_sort_from_pathkeys won't add those on its own, and anyway
			 * we want them evaluated only once at the bottom of the stack.
			 * As we climb up the stack, we add outputs for the WindowFuncs
			 * computed at each level.  Also, each input tlist has to present
			 * all the columns needed to sort the data for the next WindowAgg
			 * step.  That's handled internally by make_sort_from_pathkeys,
			 * but we need the copyObject steps here to ensure that each plan
			 * node has a separately modifiable tlist.
2042 2043 2044 2045 2046 2047
			 *
			 * Note: it's essential here to use PVC_INCLUDE_AGGREGATES so that
			 * Vars mentioned only in aggregate expressions aren't pulled out
			 * as separate targetlist entries.  Otherwise we could be putting
			 * ungrouped Vars directly into an Agg node's tlist, resulting in
			 * undefined behavior.
2048
			 */
2049 2050 2051
			window_tlist = flatten_tlist(tlist,
										 PVC_INCLUDE_AGGREGATES,
										 PVC_INCLUDE_PLACEHOLDERS);
2052 2053
			window_tlist = add_volatile_sort_exprs(window_tlist, tlist,
												   activeWindows);
2054 2055 2056
			foreach(l, activeWindows)
			{
				WindowClause *wc = (WindowClause *) lfirst(l);
2057
				List	   *extravars;
2058

2059
				extravars = pull_var_clause(wc->startOffset,
2060 2061
											PVC_REJECT_AGGREGATES,
											PVC_INCLUDE_PLACEHOLDERS);
2062 2063 2064
				window_tlist = add_to_flat_tlist(window_tlist, extravars);

				extravars = pull_var_clause(wc->endOffset,
2065 2066
											PVC_REJECT_AGGREGATES,
											PVC_INCLUDE_PLACEHOLDERS);
2067
				window_tlist = add_to_flat_tlist(window_tlist, extravars);
2068 2069
			}

2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084
			window_tlist = add_to_flat_tlist_junk(window_tlist,
												  result_plan->flow->hashExpr,
												  true /* resjunk */);
			result_plan->targetlist = (List *) copyObject(window_tlist);

			foreach(l, activeWindows)
			{
				WindowClause *wc = (WindowClause *) lfirst(l);
				List	   *window_pathkeys;
				int			partNumCols;
				AttrNumber *partColIdx;
				Oid		   *partOperators;
				int			ordNumCols;
				AttrNumber *ordColIdx;
				Oid		   *ordOperators;
2085 2086 2087
				int			firstOrderCol = 0;
				Oid			firstOrderCmpOperator = InvalidOid;
				bool		firstOrderNullsFirst = false;
2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189

				/*
				 * Unless the PARTITION BY in the window happens to match the
				 * current distribution, we need a motion. Each partition
				 * needs to be handled in the same segment.
				 *
				 * If there is no PARTITION BY, then all rows form a single
				 * partition, so we need to gather all the tuples to a single
				 * node. But we'll do that after the Sort, so that the Sort
				 * is parallelized.
				 */
				if (wc->partitionClause && !CdbPathLocus_IsGeneral(current_locus))
				{
					List	   *dist_pathkeys;

					dist_pathkeys =
						make_pathkeys_for_sortclauses(root, wc->partitionClause,
													  tlist, false);

					if (!cdbpathlocus_collocates(root, current_locus, dist_pathkeys, false))
					{
						List	   *dist_exprs = NIL;
						ListCell   *lc;

						foreach (lc, wc->partitionClause)
						{
							SortGroupClause *sc = (SortGroupClause *) lfirst(lc);
							TargetEntry *tle = get_sortgroupclause_tle(sc, tlist);

							dist_exprs = lappend(dist_exprs, tle->expr);
						}

						result_plan = (Plan *) make_motion_hash(root, result_plan, dist_exprs);
						result_plan->total_cost += motion_cost_per_row * result_plan->plan_rows;
						current_pathkeys = NIL; /* no longer sorted */
						Assert(result_plan->flow);

						/*
						 * Change current_locus based on the new distribution
						 * pathkeys.
						 */
						CdbPathLocus_MakeHashed(&current_locus, dist_pathkeys);
					}
				}

				window_pathkeys = make_pathkeys_for_window(root,
														   wc,
														   tlist,
														   true);

				/*
				 * This is a bit tricky: we build a sort node even if we don't
				 * really have to sort.  Even when no explicit sort is needed,
				 * we need to have suitable resjunk items added to the input
				 * plan's tlist for any partitioning or ordering columns that
				 * aren't plain Vars.  Furthermore, this way we can use
				 * existing infrastructure to identify which input columns are
				 * the interesting ones.
				 */
				if (window_pathkeys)
				{
					Sort	   *sort_plan;

					sort_plan = make_sort_from_pathkeys(root,
														result_plan,
														window_pathkeys,
														-1.0,
														true);
					if (!pathkeys_contained_in(window_pathkeys,
											   current_pathkeys))
					{
						/* we do indeed need to sort */
						result_plan = (Plan *) sort_plan;
						current_pathkeys = window_pathkeys;
						mark_sort_locus(result_plan);

						if (!result_plan->flow)
							result_plan->flow = pull_up_Flow(result_plan,
															 getAnySubplan(result_plan));
					}
					/* In either case, extract the per-column information */
					get_column_info_for_window(root, wc, tlist,
											   sort_plan->numCols,
											   sort_plan->sortColIdx,
											   &partNumCols,
											   &partColIdx,
											   &partOperators,
											   &ordNumCols,
											   &ordColIdx,
											   &ordOperators);
				}
				else
				{
					/* empty window specification, nothing to sort */
					partNumCols = 0;
					partColIdx = NULL;
					partOperators = NULL;
					ordNumCols = 0;
					ordColIdx = NULL;
					ordOperators = NULL;
				}

2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210
				if (wc->orderClause)
				{
					SortGroupClause *sortcl = (SortGroupClause *) linitial(wc->orderClause);
					ListCell	*l_tle;

					firstOrderCol = 0;
					foreach(l_tle, window_tlist)
					{
						TargetEntry *tle = (TargetEntry *) lfirst(l_tle);

						firstOrderCol++;
						if (sortcl->tleSortGroupRef == tle->ressortgroupref)
							break;
					}
					if (!l_tle)
						elog(ERROR, "failed to locate ORDER BY column");

					firstOrderCmpOperator = sortcl->sortop;
					firstOrderNullsFirst = sortcl->nulls_first;
				}

2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245
				/*
				 * If there was no PARTITION BY, gather the result.
				 */
				if (!wc->partitionClause &&
					!CdbPathLocus_IsGeneral(current_locus) &&
					result_plan->flow->flotype != FLOW_SINGLETON)
				{
					result_plan =
						(Plan *) make_motion_gather_to_QE(root, result_plan, current_pathkeys);
				}

				if (lnext(l))
				{
					/* Add the current WindowFuncs to the running tlist */
					window_tlist = add_to_flat_tlist(window_tlist,
										   wflists->windowFuncs[wc->winref]);
				}
				else
				{
					/* Install the original tlist in the topmost WindowAgg */
					window_tlist = tlist;
				}

				/* ... and make the WindowAgg plan node */
				result_plan = (Plan *)
					make_windowagg(root,
								   (List *) copyObject(window_tlist),
								   wflists->windowFuncs[wc->winref],
								   wc->winref,
								   partNumCols,
								   partColIdx,
								   partOperators,
								   ordNumCols,
								   ordColIdx,
								   ordOperators,
2246 2247 2248
								   firstOrderCol,
								   firstOrderCmpOperator,
								   firstOrderNullsFirst,
2249 2250 2251 2252 2253 2254 2255
								   wc->frameOptions,
								   wc->startOffset,
								   wc->endOffset,
								   result_plan);
			}
		}

2256 2257
		/* free canonical_grpsets */
		free_canonical_groupingsets(canonical_grpsets);
B
Bruce Momjian 已提交
2258
	}							/* end of if (setOperations) */
2259

2260 2261 2262 2263 2264 2265
	/*
	 * Decorate the top node with a Flow node if it doesn't have one yet. (In
	 * such cases we require the next-to-top node to have a Flow node from
	 * which we can obtain the distribution info.)
	 */
	if (!result_plan->flow)
2266
		result_plan->flow = pull_up_Flow(result_plan, getAnySubplan(result_plan));
2267

2268
	/*
2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286
	 * An ORDER BY or DISTINCT doesn't make much sense, unless we bring all
	 * the data to a single node. Otherwise it's just a partial order. (If
	 * there's a LIMIT or OFFSET clause, we'll take care of this below, after
	 * inserting the Limit node).
	 *
	 * In a subquery, though, a partial order is OK. In fact, we could
	 * probably not bother with the sort at all, unless there's a LIMIT or
	 * OFFSET, because it's not going to make any difference to the overall
	 * query's result. For example, in "WHERE x IN (SELECT ...  ORDER BY
	 * foo)", the ORDER BY in the subquery will make no difference. PostgreSQL
	 * honors the sort, though, and historically, GPDB has also done a partial
	 * sort, separately on each node. So keep that behavior for now.
	 *
	 * A SELECT INTO or CREATE TABLE AS is similar to a subquery: the order
	 * doesn't really matter, but let's keep the partial order anyway.
	 *
	 * In a TABLE function's input subquery, a partial order is the documented
	 * behavior, so in that case that's definitely what we want.
2287
	 */
2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302
	if ((parse->distinctClause || parse->sortClause) &&
		(root->config->honor_order_by || !root->parent_root) &&
		!parse->intoClause &&
		/*
		 * GPDB_84_MERGE_FIXME: Does this do the right thing, if you have a
		 * SELECT DISTINCT query as argument to a table function?
		 */
		!parse->isTableValueSelect &&
		!parse->limitCount && !parse->limitOffset)
	{
		must_gather = true;
	}
	else
		must_gather = false;

2303
	/*
2304
	 * If there is a DISTINCT clause, add the necessary node(s).
2305
	 */
2306
	if (parse->distinctClause)
2307
	{
2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335
		double	dNumDistinctRows;
		long	numDistinctRows;
		bool	use_hashed_distinct;
		bool	can_sort;
		bool	can_hash;

		/*
		 * If there was grouping or aggregation, use the current number of
		 * rows as the estimated number of DISTINCT rows (ie, assume the
		 * result was already mostly unique).  If not, use the number of
		 * distinct-groups calculated by query_planner.
		 */
		if (parse->groupClause || root->hasHavingQual || parse->hasAggs)
			dNumDistinctRows = result_plan->plan_rows;
		else
			dNumDistinctRows = dNumGroups;

		/* Also convert to long int --- but 'ware overflow! */
		numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);

		/*
		 * If we have a sortable DISTINCT ON clause, we always use sorting.
		 * This enforces the expected behavior of DISTINCT ON.
		 */
		can_sort = grouping_is_sortable(parse->distinctClause);
		if (can_sort && parse->hasDistinctOn)
			use_hashed_distinct = false;
		else
2336
		{
2337
			can_hash = grouping_is_hashable(parse->distinctClause);
2338 2339 2340 2341 2342 2343 2344 2345

			/* GPDB_84_MERGE_FIXME: The hash Agg we build for DISTINCT currently
			 * loses the GROUP_ID() information, so don't use it if there's a
			 * GROUP_ID().
			 */
			if (can_hash && contain_group_id((Node *) result_plan->targetlist))
				can_hash = false;

2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364
			if (can_hash && can_sort)
			{
				/* we have a meaningful choice to make ... */
				use_hashed_distinct =
					choose_hashed_distinct(root,
										   result_plan, current_pathkeys,
										   tuple_fraction, limit_tuples,
										   dNumDistinctRows);
			}
			else if (can_hash)
				use_hashed_distinct = true;
			else if (can_sort)
				use_hashed_distinct = false;
			else
			{
				ereport(ERROR,
						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						 errmsg("could not implement DISTINCT"),
						 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378
				use_hashed_distinct = false;	/* keep compiler quiet */
			}
		}

		/*
		 * MPP: If there's a DISTINCT clause and we're not collocated on the
		 * distinct key, we need to redistribute on that key.  In addition, we
		 * need to consider whether to "pre-unique" by doing a Sort-Unique
		 * operation on the data as currently distributed, redistributing on the
		 * district key, and doing the Sort-Unique again. This 2-phase approach
		 * will be a win, if the cost of redistributing the entire input exceeds
		 * the cost of an extra Redistribute-Sort-Unique on the pre-uniqued
		 * (reduced) input.
		 */
2379
		distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
2380
												result_plan->targetlist);
2381
		numDistinct = estimate_num_groups(root, distinctExprs,
2382
										  result_plan->plan_rows);
2383 2384

		if (CdbPathLocus_IsNull(current_locus))
2385 2386 2387
		{
			current_locus = cdbpathlocus_from_flow(result_plan->flow);
		}
2388 2389

		if (Gp_role == GP_ROLE_DISPATCH && CdbPathLocus_IsPartitioned(current_locus))
2390
		{
2391 2392
			bool		needMotion = !cdbpathlocus_collocates(root, current_locus,
															  root->distinct_pathkeys, false /* exact_match */ );
2393

2394
			/* Apply the preunique optimization, if enabled and worthwhile. */
2395 2396
			/* GPDB_84_MERGE_FIXME: pre-unique for hash distinct not implemented. */
			if (root->config->gp_enable_preunique && needMotion && !use_hashed_distinct)
2397
			{
2398 2399 2400 2401
				double		base_cost,
							alt_cost;
				Path		sort_path;	/* dummy for result of cost_sort */

2402 2403
				base_cost = motion_cost_per_row * result_plan->plan_rows;
				alt_cost = motion_cost_per_row * numDistinct;
2404
				cost_sort(&sort_path, root, NIL, alt_cost,
2405
						  numDistinct, result_plan->plan_rows, -1.0);
2406
				alt_cost += sort_path.startup_cost;
2407 2408 2409 2410
				alt_cost += cpu_operator_cost * numDistinct
					* list_length(parse->distinctClause);

				if (alt_cost < base_cost || root->config->gp_eager_preunique)
2411
				{
2412 2413 2414 2415
					/*
					 * Reduce the number of rows to move by adding a [Sort
					 * and] Unique prior to the redistribute Motion.
					 */
2416
					if (root->sort_pathkeys)
2417
					{
2418
						if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
2419
						{
2420 2421
							result_plan = (Plan *) make_sort_from_pathkeys(root,
																		   result_plan,
2422
																		   root->sort_pathkeys,
2423 2424
																		   limit_tuples,
																		   true);
2425
							((Sort *) result_plan)->noduplicates = gp_enable_sort_distinct;
2426
							current_pathkeys = root->sort_pathkeys;
2427 2428 2429 2430 2431 2432
							mark_sort_locus(result_plan);
						}
					}

					result_plan = (Plan *) make_unique(result_plan, parse->distinctClause);

2433
					result_plan->flow = pull_up_Flow(result_plan, result_plan->lefttree);
2434 2435 2436 2437

					result_plan->plan_rows = numDistinct;

					/*
2438 2439 2440 2441 2442
					 * Our sort node (under the unique node), unfortunately
					 * can't guarantee uniqueness -- so we aren't allowed to
					 * push the limit into the sort; but we can avoid moving
					 * the entire sorted result-set by plunking a limit on the
					 * top of the unique-node.
2443 2444 2445 2446 2447
					 */
					if (parse->limitCount)
					{
						/*
						 * Our extra limit operation is basically a
2448 2449
						 * third-phase on multi-phase limit (see 2-phase limit
						 * below)
2450 2451 2452 2453 2454 2455
						 */
						result_plan = pushdown_preliminary_limit(result_plan, parse->limitCount, count_est, parse->limitOffset, offset_est);
					}
				}
			}

2456
			if (needMotion)
2457
			{
2458
				result_plan = (Plan *) make_motion_hash(root, result_plan, distinctExprs);
2459
				result_plan->total_cost += motion_cost_per_row * result_plan->plan_rows;
2460
				current_pathkeys = NIL;		/* Any pre-existing order now lost. */
2461 2462 2463 2464 2465 2466 2467 2468 2469
			}
		}
		else if ( result_plan->flow->flotype == FLOW_SINGLETON )
			; /* Already collocated. */
		else
		{
			ereport(ERROR, (errcode(ERRCODE_CDB_INTERNAL_ERROR),
							errmsg("unexpected input locus to distinct")));
		}
2470 2471 2472 2473

		if (use_hashed_distinct)
		{
			/* Hashed aggregate plan --- no sort needed */
2474

2475 2476 2477 2478
			result_plan = (Plan *) make_agg(root,
											result_plan->targetlist,
											NIL,
											AGG_HASHED,
2479 2480 2481 2482 2483
											false, /* streaming */
										  list_length(parse->distinctClause),
								 extract_grouping_cols(parse->distinctClause,
													result_plan->targetlist),
								 extract_grouping_ops(parse->distinctClause),
2484
											numDistinctRows,
2485 2486 2487 2488
											0, /* num_nullcols */
											0, /* input_grouping */
											0, /* grouping */
											0, /* rollupGSTimes */
2489
											0,
2490
											0, /* transSpace */
2491 2492 2493 2494 2495 2496 2497 2498 2499
											result_plan);
			/* Hashed aggregation produces randomly-ordered results */
			current_pathkeys = NIL;
		}
		else
		{
			/*
			 * Use a Unique node to implement DISTINCT.  Add an explicit sort
			 * if we couldn't make the path come out the way the Unique node
2500 2501 2502 2503
			 * needs it.  If we do have to sort, always sort by the more
			 * rigorous of DISTINCT and ORDER BY, to avoid a second sort
			 * below.  However, for regular DISTINCT, don't sort now if we
			 * don't have to --- sorting afterwards will likely be cheaper,
2504 2505 2506
			 * and also has the possibility of optimizing via LIMIT.  But for
			 * DISTINCT ON, we *must* force the final sort now, else it won't
			 * have the desired behavior.
2507
			 */
2508
			List	   *needed_pathkeys;
2509 2510 2511 2512 2513 2514 2515 2516 2517

			if (parse->hasDistinctOn &&
				list_length(root->distinct_pathkeys) <
				list_length(root->sort_pathkeys))
				needed_pathkeys = root->sort_pathkeys;
			else
				needed_pathkeys = root->distinct_pathkeys;

			if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531
			{
				if (list_length(root->distinct_pathkeys) >=
					list_length(root->sort_pathkeys))
					current_pathkeys = root->distinct_pathkeys;
				else
				{
					current_pathkeys = root->sort_pathkeys;
					/* Assert checks that parser didn't mess up... */
					Assert(pathkeys_contained_in(root->distinct_pathkeys,
												 current_pathkeys));
				}

				result_plan = (Plan *) make_sort_from_pathkeys(root,
															   result_plan,
2532 2533 2534 2535 2536 2537
															current_pathkeys,
															   -1.0,
															   true);
				mark_sort_locus(result_plan);
			}

2538
			if (must_gather && result_plan->flow->flotype != FLOW_SINGLETON)
2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550
			{
				/*
				 * As an optimization, eliminate any duplicates within the segment,
				 * before the motion.
				 */
				if (IsA(result_plan, Sort) &&gp_enable_sort_distinct)
					((Sort *) result_plan)->noduplicates = true;
				result_plan = (Plan *) make_unique(result_plan, parse->distinctClause);
				result_plan->flow = pull_up_Flow(result_plan, result_plan->lefttree);

				result_plan = (Plan *) make_motion_gather(root, result_plan, -1,
														  current_pathkeys);
2551 2552 2553 2554 2555 2556
			}

			result_plan = (Plan *) make_unique(result_plan,
											   parse->distinctClause);
			result_plan->plan_rows = dNumDistinctRows;
			/* The Unique node won't change sort ordering */
2557
		}
2558
		result_plan->flow = pull_up_Flow(result_plan, result_plan->lefttree);
2559 2560
	}

2561
	/*
2562 2563
	 * If ORDER BY was given and we were not able to make the plan come out in
	 * the right order, add an explicit sort step.
2564
	 */
2565
	if (parse->sortClause)
2566
	{
2567
		if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
2568
		{
2569 2570
			result_plan = (Plan *) make_sort_from_pathkeys(root,
														   result_plan,
2571
														   root->sort_pathkeys,
2572 2573
														   limit_tuples,
														   true);
2574
			mark_sort_locus(result_plan);
2575
			current_pathkeys = root->sort_pathkeys;
2576
			result_plan->flow = pull_up_Flow(result_plan, result_plan->lefttree);
2577
		}
2578

2579
		if (must_gather && result_plan->flow->flotype != FLOW_SINGLETON)
2580
		{
2581 2582 2583 2584 2585 2586 2587 2588
			/*
			 * current_pathkeys might contain unneeded columns that have been
			 * eliminated from the final target list, and we cannot maintain
			 * such an order in the Motion anymore. So use root->sortpathkeys
			 * rather than current_pathkeys here. (See similar case in LIMIT
			 * handling below.
			 */
			current_pathkeys = root->sort_pathkeys;
2589
			result_plan = (Plan *) make_motion_gather(root, result_plan, -1,
2590
													  current_pathkeys);
2591
		}
2592
	}
2593

2594 2595 2596
	/*
	 * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
	 */
2597
	if (parse->limitCount || parse->limitOffset)
2598
	{
2599 2600 2601 2602 2603
		if (Gp_role == GP_ROLE_DISPATCH && result_plan->flow->flotype == FLOW_PARTITIONED)
		{
			/* pushdown the first phase of multi-phase limit (which takes offset into account) */
			result_plan = pushdown_preliminary_limit(result_plan, parse->limitCount, count_est, parse->limitOffset, offset_est);
			
2604 2605 2606 2607
			/*
			 * Focus on QE [merge to preserve order], prior to final LIMIT.
			 *
			 * If there is an ORDER BY, the input should be in the required
2608
			 * order now, and we must preserve the order in the merge.
2609 2610 2611
			 */
			if (parse->sortClause)
			{
2612
				if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
2613
					elog(ERROR, "invalid result order generated for ORDER BY + LIMIT");
2614 2615 2616
				current_pathkeys = root->sort_pathkeys;
				result_plan = (Plan *) make_motion_gather_to_QE(root, result_plan,
																current_pathkeys);
2617 2618 2619
			}
			else
				result_plan = (Plan *) make_motion_gather_to_QE(root, result_plan, NIL);
2620 2621
			result_plan->total_cost += motion_cost_per_row * result_plan->plan_rows;
		}
2622

2623 2624 2625 2626 2627 2628 2629 2630 2631
		if (current_pathkeys == NIL)
		{
			/* This used to be a WARNING.  If reinstated, it should be a NOTICE
			 * and steps taken to avoid issuing it at inopportune times, e.g.,
			 * from the query generated by psql tab-completion.
			 */
			ereport(DEBUG1, (errmsg("LIMIT/OFFSET applied to unordered result.") ));
		}

2632
		/* For multi-phase limit, this is the final limit */
2633
		result_plan = (Plan *) make_limit(result_plan,
2634
										  parse->limitOffset,
2635 2636 2637
										  parse->limitCount,
										  offset_est,
										  count_est);
2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668
		result_plan->flow = pull_up_Flow(result_plan, result_plan->lefttree);
	}

	/*
	 * Deal with explicit redistribution requirements for TableValueExpr
	 * subplans with explicit distribitution
	 */
	if (parse->scatterClause)
	{
		bool		r;
		List	   *exprList;

		/* Deal with the special case of SCATTER RANDOMLY */
		if (list_length(parse->scatterClause) == 1 && linitial(parse->scatterClause) == NULL)
			exprList = NIL;
		else
			exprList = parse->scatterClause;

		/*
		 * Repartition the subquery plan based on our distribution
		 * requirements
		 */
		r = repartitionPlan(result_plan, false, false, exprList);
		if (!r)
		{
			/*
			 * This should not be possible, repartitionPlan should never fail
			 * when both stable and rescannable are false.
			 */
			elog(ERROR, "failure repartitioning plan");
		}
2669 2670
	}

2671
	Insist(result_plan->flow);
2672

2673 2674
	/*
	 * Deal with the RETURNING clause if any.  It's convenient to pass the
B
Bruce Momjian 已提交
2675 2676
	 * returningList through setrefs.c now rather than at top level (if we
	 * waited, handling inherited UPDATE/DELETE would be much harder).
2677 2678 2679
	 */
	if (parse->returningList)
	{
B
Bruce Momjian 已提交
2680
		List	   *rlist;
2681

2682 2683 2684
		Assert(parse->resultRelation);
		rlist = set_returning_clause_references(root->glob,
												parse->returningList,
2685
												result_plan,
2686
												parse->resultRelation);
2687
		root->returningLists = list_make1(rlist);
2688
	}
2689 2690
	else
		root->returningLists = NIL;
2691

2692 2693 2694 2695 2696
	/* Compute result-relations list if needed */
	if (parse->resultRelation)
		root->resultRelations = list_make1_int(parse->resultRelation);
	else
		root->resultRelations = NIL;
2697

2698
	/*
B
Bruce Momjian 已提交
2699 2700
	 * Return the actual output ordering in query_pathkeys for possible use by
	 * an outer query level.
2701
	 */
2702
	root->query_pathkeys = current_pathkeys;
2703

2704 2705 2706 2707
#ifdef USE_ASSERT_CHECKING
	grouping_planner_output_asserts(root, result_plan);
#endif

2708
	return result_plan;
2709 2710
}

2711
/*
2712 2713
 * Entry is through is_dummy_plan().
 *
2714 2715 2716
 * Detect whether a plan node is a "dummy" plan created when a relation
 * is deemed not to need scanning due to constraint exclusion.
 *
2717 2718 2719 2720 2721 2722
 * At bottom, such dummy plans are Result nodes with constant FALSE
 * filter quals.  However, we also recognize simple plans that are
 * known to return no rows because they contain a dummy.
 *
 * BTW The plan_tree_walker framework is overkill here, but it's good to 
 *     do things the standard way.
2723 2724
 */
static bool
2725
is_dummy_plan_walker(Node *node, bool *context)
2726
{
2727 2728 2729
	/*
	 * We are only interested in Plan nodes.
	 */
2730
	if (node == NULL || !is_plan_node(node))
2731
		return false;
2732

2733
	switch (nodeTag(node))
2734
	{
2735
		case T_Result:
2736

2737
			/*
2738 2739
			 * This tests the base case of a dummy plan which is a Result node
			 * with a constant FALSE filter quals.  (This is the case
2740 2741 2742 2743
			 * constructed as an empty Append path by set_plain_rel_pathlist
			 * in allpaths.c and made into a Result plan by create_append_plan
			 * in createplan.c.
			 */
2744
			{
2745 2746 2747
				List	   *rcqual = (List *) ((Result *) node)->resconstantqual;

				if (list_length(rcqual) == 1)
2748
				{
2749 2750 2751 2752 2753 2754 2755 2756 2757
					Const	   *constqual = (Const *) linitial(rcqual);

					if (constqual && IsA(constqual, Const))
					{
						if (!constqual->constisnull &&
							!DatumGetBool(constqual->constvalue))
							*context = true;
						return true;
					}
2758 2759 2760
				}
			}
			return false;
2761

2762
		case T_SubqueryScan:
2763 2764

			/*
2765 2766 2767
			 * A SubqueryScan is dummy, if its subplan is dummy.
			 */
			{
2768 2769 2770 2771 2772 2773 2774 2775 2776
				SubqueryScan *subqueryscan = (SubqueryScan *) node;
				Plan	   *subplan = subqueryscan->subplan;

				if (is_dummy_plan(subplan))
				{
					*context = true;
					return true;
				}
			}
2777
			return false;
2778

2779 2780 2781
		case T_NestLoop:
		case T_MergeJoin:
		case T_HashJoin:
2782

2783
			/*
2784
			 * Joins with dummy inner and/or outer plans are dummy or not
2785 2786 2787
			 * based on the type of join.
			 */
			{
2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801
				switch (((Join *) node)->jointype)
				{
					case JOIN_INNER:	/* either */
						*context = is_dummy_plan(innerPlan(node))
							|| is_dummy_plan(outerPlan(node));
						break;

					case JOIN_LEFT:
					case JOIN_FULL:
					case JOIN_RIGHT:	/* both */
						*context = is_dummy_plan(innerPlan(node))
							&& is_dummy_plan(outerPlan(node));
						break;

2802
					case JOIN_SEMI:
2803
					case JOIN_LASJ_NOTIN:
E
Ekta Khanna 已提交
2804
					case JOIN_ANTI:		/* outer */
2805 2806 2807 2808 2809 2810 2811 2812
						*context = is_dummy_plan(outerPlan(node));
						break;

					default:
						break;
				}

				return true;
2813
			}
2814 2815 2816 2817 2818 2819 2820

			/*
			 * It may seem that we should check for Append or SetOp nodes with
			 * all dummy branches, but that case should not occur.  It would
			 * cause big problems elsewhere in the code.
			 */

2821 2822 2823 2824
		case T_Hash:
		case T_Material:
		case T_Sort:
		case T_Unique:
2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835

			/*
			 * Some node types are dummy, if their outer plan is dummy so we
			 * just recur.
			 *
			 * We don't include "tricky" nodes like Motion that might affect
			 * plan topology, even though we know they will return no rows
			 * from a dummy.
			 */
			return plan_tree_walker(node, is_dummy_plan_walker, context);

2836
		default:
2837 2838 2839 2840 2841

			/*
			 * Other node types are "opaque" so we choose a conservative
			 * course and terminate the walk.
			 */
2842
			return true;
2843
	}
2844
	/* not reached */
2845 2846 2847
}


2848
static bool
2849 2850
is_dummy_plan(Plan *plan)
{
2851 2852 2853 2854 2855
	bool		is_dummy = false;

	is_dummy_plan_walker((Node *) plan, &is_dummy);

	return is_dummy;
2856 2857
}

2858
/*
2859
 * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2860
 *
2861
 * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
B
Bruce Momjian 已提交
2862
 * results back in *count_est and *offset_est.	These variables are set to
2863 2864 2865 2866 2867 2868 2869 2870
 * 0 if the corresponding clause is not present, and -1 if it's present
 * but we couldn't estimate the value for it.  (The "0" convention is OK
 * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
 * LIMIT 0 as though it were LIMIT 1.  But this is in line with the planner's
 * usual practice of never estimating less than one row.)  These values will
 * be passed to make_limit, which see if you change this code.
 *
 * The return value is the suitably adjusted tuple_fraction to use for
B
Bruce Momjian 已提交
2871
 * planning the query.	This adjustment is not overridable, since it reflects
2872 2873
 * plan actions that grouping_planner() will certainly take, not assumptions
 * about context.
2874 2875
 */
static double
2876
preprocess_limit(PlannerInfo *root, double tuple_fraction,
B
Bruce Momjian 已提交
2877
				 int64 *offset_est, int64 *count_est)
2878 2879
{
	Query	   *parse = root->parse;
2880 2881
	Node	   *est;
	double		limit_fraction;
2882

2883 2884
	/* Should not be called unless LIMIT or OFFSET */
	Assert(parse->limitCount || parse->limitOffset);
2885 2886

	/*
2887 2888
	 * Try to obtain the clause values.  We use estimate_expression_value
	 * primarily because it can sometimes do something useful with Params.
2889
	 */
2890
	if (parse->limitCount)
2891
	{
2892
		est = estimate_expression_value(root, parse->limitCount);
2893
		if (est && IsA(est, Const))
2894
		{
2895
			if (((Const *) est)->constisnull)
2896
			{
2897
				/* NULL indicates LIMIT ALL, ie, no limit */
B
Bruce Momjian 已提交
2898
				*count_est = 0; /* treat as not present */
2899 2900 2901
			}
			else
			{
2902
				if (((Const *) est)->consttype == INT4OID)
2903 2904 2905
					*count_est = DatumGetInt32(((Const *) est)->constvalue);
				else
					*count_est = DatumGetInt64(((Const *) est)->constvalue);
2906 2907
				if (*count_est <= 0)
					*count_est = 1;		/* force to at least 1 */
2908 2909
			}
		}
2910 2911
		else
			*count_est = -1;	/* can't estimate */
2912 2913
	}
	else
2914 2915 2916
		*count_est = 0;			/* not present */

	if (parse->limitOffset)
2917
	{
2918
		est = estimate_expression_value(root, parse->limitOffset);
2919 2920 2921 2922 2923
		if (est && IsA(est, Const))
		{
			if (((Const *) est)->constisnull)
			{
				/* Treat NULL as no offset; the executor will too */
B
Bruce Momjian 已提交
2924
				*offset_est = 0;	/* treat as not present */
2925 2926 2927
			}
			else
			{
2928
				if (((Const *) est)->consttype == INT4OID)
2929 2930
					*offset_est = DatumGetInt32(((Const *) est)->constvalue);
				else
2931
					*offset_est = DatumGetInt64(((Const *) est)->constvalue);
2932

2933 2934 2935 2936 2937 2938
				if (*offset_est < 0)
					*offset_est = 0;	/* less than 0 is same as 0 */
			}
		}
		else
			*offset_est = -1;	/* can't estimate */
2939
	}
2940 2941
	else
		*offset_est = 0;		/* not present */
2942

2943
	if (*count_est != 0)
2944
	{
2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960
		/*
		 * A LIMIT clause limits the absolute number of tuples returned.
		 * However, if it's not a constant LIMIT then we have to guess; for
		 * lack of a better idea, assume 10% of the plan's result is wanted.
		 */
		if (*count_est < 0 || *offset_est < 0)
		{
			/* LIMIT or OFFSET is an expression ... punt ... */
			limit_fraction = 0.10;
		}
		else
		{
			/* LIMIT (plus OFFSET, if any) is max number of tuples needed */
			limit_fraction = (double) *count_est + (double) *offset_est;
		}

2961 2962
		/*
		 * If we have absolute limits from both caller and LIMIT, use the
2963 2964 2965 2966
		 * smaller value; likewise if they are both fractional.  If one is
		 * fractional and the other absolute, we can't easily determine which
		 * is smaller, but we use the heuristic that the absolute will usually
		 * be smaller.
2967 2968 2969 2970 2971 2972 2973 2974 2975 2976
		 */
		if (tuple_fraction >= 1.0)
		{
			if (limit_fraction >= 1.0)
			{
				/* both absolute */
				tuple_fraction = Min(tuple_fraction, limit_fraction);
			}
			else
			{
2977
				/* caller absolute, limit fractional; use caller's value */
2978 2979 2980 2981 2982 2983
			}
		}
		else if (tuple_fraction > 0.0)
		{
			if (limit_fraction >= 1.0)
			{
2984 2985
				/* caller fractional, limit absolute; use limit */
				tuple_fraction = limit_fraction;
2986 2987 2988 2989
			}
			else
			{
				/* both fractional */
2990
				tuple_fraction = Min(tuple_fraction, limit_fraction);
2991 2992 2993 2994 2995 2996 2997 2998
			}
		}
		else
		{
			/* no info from caller, just use limit */
			tuple_fraction = limit_fraction;
		}
	}
2999 3000 3001
	else if (*offset_est != 0 && tuple_fraction > 0.0)
	{
		/*
B
Bruce Momjian 已提交
3002 3003 3004 3005 3006
		 * We have an OFFSET but no LIMIT.	This acts entirely differently
		 * from the LIMIT case: here, we need to increase rather than decrease
		 * the caller's tuple_fraction, because the OFFSET acts to cause more
		 * tuples to be fetched instead of fewer.  This only matters if we got
		 * a tuple_fraction > 0, however.
3007 3008 3009 3010 3011 3012 3013 3014 3015 3016
		 *
		 * As above, use 10% if OFFSET is present but unestimatable.
		 */
		if (*offset_est < 0)
			limit_fraction = 0.10;
		else
			limit_fraction = (double) *offset_est;

		/*
		 * If we have absolute counts from both caller and OFFSET, add them
B
Bruce Momjian 已提交
3017 3018 3019
		 * together; likewise if they are both fractional.	If one is
		 * fractional and the other absolute, we want to take the larger, and
		 * we heuristically assume that's the fractional one.
3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044
		 */
		if (tuple_fraction >= 1.0)
		{
			if (limit_fraction >= 1.0)
			{
				/* both absolute, so add them together */
				tuple_fraction += limit_fraction;
			}
			else
			{
				/* caller absolute, limit fractional; use limit */
				tuple_fraction = limit_fraction;
			}
		}
		else
		{
			if (limit_fraction >= 1.0)
			{
				/* caller fractional, limit absolute; use caller's value */
			}
			else
			{
				/* both fractional, so add them together */
				tuple_fraction += limit_fraction;
				if (tuple_fraction >= 1.0)
B
Bruce Momjian 已提交
3045
					tuple_fraction = 0.0;		/* assume fetch all */
3046 3047 3048
			}
		}
	}
3049 3050 3051 3052

	return tuple_fraction;
}

3053

3054
/*
3055 3056 3057 3058 3059 3060
 * preprocess_groupclause - do preparatory work on GROUP BY clause
 *
 * The idea here is to adjust the ordering of the GROUP BY elements
 * (which in itself is semantically insignificant) to match ORDER BY,
 * thereby allowing a single sort operation to both implement the ORDER BY
 * requirement and set up for a Unique step that implements GROUP BY.
3061
 *
3062 3063 3064 3065
 * In principle it might be interesting to consider other orderings of the
 * GROUP BY elements, which could match the sort ordering of other
 * possible plans (eg an indexscan) and thereby reduce cost.  We don't
 * bother with that, though.  Hashed grouping will frequently win anyway.
3066 3067 3068
 *
 * Note: we need no comparable processing of the distinctClause because
 * the parser already enforced that that matches ORDER BY.
3069
 */
3070 3071
static void
preprocess_groupclause(PlannerInfo *root)
3072
{
3073 3074 3075 3076 3077
	Query	   *parse = root->parse;
	List	   *new_groupclause;
	bool		partial_match;
	ListCell   *sl;
	ListCell   *gl;
3078

3079
	/* If no ORDER BY, nothing useful to do here */
3080 3081
	if (parse->sortClause == NIL)
		return;
3082

3083 3084 3085 3086 3087 3088
	/*
	 * GPDB: The grouping clause might contain grouping sets, not just plain
	 * SortGroupClauses. Give up if we see any. (Yes, we could probably do
	 * better than that, but this will do for now.)
	 */
	foreach(gl, parse->groupClause)
3089
	{
3090
		Node *node = lfirst(gl);
3091

3092 3093 3094
		if (node == NULL || !IsA(node, SortGroupClause))
			return;
	}
3095

3096 3097 3098 3099 3100 3101 3102 3103 3104 3105
	/*
	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
	 * items, but only as far as we can make a matching prefix.
	 *
	 * This code assumes that the sortClause contains no duplicate items.
	 */
	new_groupclause = NIL;
	foreach(sl, parse->sortClause)
	{
		SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
3106

3107
		foreach(gl, parse->groupClause)
3108
		{
3109
			SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
3110

3111
			if (equal(gc, sc))
3112
			{
3113 3114
				new_groupclause = lappend(new_groupclause, gc);
				break;
3115 3116
			}
		}
3117 3118
		if (gl == NULL)
			break;				/* no match, so stop scanning */
3119 3120
	}

3121 3122 3123 3124 3125 3126
	/* Did we match all of the ORDER BY list, or just some of it? */
	partial_match = (sl != NULL);

	/* If no match at all, no point in reordering GROUP BY */
	if (new_groupclause == NIL)
		return;
3127

3128 3129 3130 3131 3132
	/*
	 * Add any remaining GROUP BY items to the new list, but only if we
	 * were able to make a complete match.  In other words, we only
	 * rearrange the GROUP BY list if the result is that one list is a
	 * prefix of the other --- otherwise there's no possibility of a
3133 3134
	 * common sort.  Also, give up if there are any non-sortable GROUP BY
	 * items, since then there's no hope anyway.
3135 3136 3137 3138 3139 3140 3141 3142 3143
	 */
	foreach(gl, parse->groupClause)
	{
		SortGroupClause *gc = (SortGroupClause *) lfirst(gl);

		if (list_member_ptr(new_groupclause, gc))
			continue;			/* it matched an ORDER BY item */
		if (partial_match)
			return;				/* give up, no common sort possible */
3144 3145
		if (!OidIsValid(gc->sortop))
			return;				/* give up, GROUP BY can't be sorted */
3146 3147 3148 3149 3150 3151
		new_groupclause = lappend(new_groupclause, gc);
	}

	/* Success --- install the rearranged GROUP BY list */
	Assert(list_length(parse->groupClause) == list_length(new_groupclause));
	parse->groupClause = new_groupclause;
3152 3153
}

3154 3155
/*
 * choose_hashed_grouping - should we use hashed grouping?
3156 3157
 *
 * Note: this is only applied when both alternatives are actually feasible.
3158
 */
3159
bool
3160 3161
choose_hashed_grouping(PlannerInfo *root,
					   double tuple_fraction, double limit_tuples,
3162
					   Path *cheapest_path, Path *sorted_path,
3163
					   int numGroupOps,
3164
					   double dNumGroups, AggClauseCounts *agg_counts)
3165
{
3166
	int			numGroupCols;
3167 3168
	double		cheapest_path_rows;
	int			cheapest_path_width;
3169
	double		hashentrysize;
3170
	List	   *target_pathkeys;
3171 3172 3173 3174
	List	   *current_pathkeys;
	Path		hashed_p;
	Path		sorted_p;

3175
	HashAggTableSizes hash_info;
3176

3177
	/* Prefer sorting when enable_hashagg is off */
3178
	if (!root->config->enable_hashagg)
3179
		return false;
3180 3181

	/*
3182 3183 3184
	 * CDB: The preliminary function is used to merge transient values during
	 * hash reloading (see execHHashagg.c). So hash agg is not allowed if one
	 * of the aggregates doesn't have its preliminary function.
3185
	 */
3186
	if (agg_counts->missing_prelimfunc)
3187
		return false;
3188

3189
	/*
3190 3191
	 * CDB: The parallel grouping planner cannot use hashed aggregation for
	 * ordered aggregates.
3192
	 */
3193
	if (agg_counts->numOrderedAggs != 0)
3194
		return false;
3195

3196 3197 3198 3199 3200 3201 3202 3203
	/*
	 * Don't do it if it doesn't look like the hashtable will fit into
	 * work_mem.
	 *
	 * Beware here of the possibility that cheapest_path->parent is NULL. This
	 * could happen if user does something silly like SELECT 'foo' GROUP BY 1;
	 */
	if (cheapest_path->parent)
3204
	{
3205 3206 3207 3208 3209 3210 3211 3212
		cheapest_path_rows = cdbpath_rows(root, cheapest_path);
		cheapest_path_width = cheapest_path->parent->width;
	}
	else
	{
		cheapest_path_rows = 1; /* assume non-set result */
		cheapest_path_width = 100;		/* arbitrary */
	}
3213

3214
	/* Estimate per-hash-entry space at tuple width... */
3215 3216 3217
	hashentrysize = agg_hash_entrywidth(agg_counts->numAggs,
							   sizeof(HeapTupleData) + sizeof(HeapTupleHeaderData) + cheapest_path_width,
							   agg_counts->transitionSpace);
3218 3219 3220

	if (!calcHashAggTableSizes(global_work_mem(root),
							   dNumGroups,
3221
							   hashentrysize,
3222 3223 3224
							   false,
							   &hash_info))
	{
3225 3226
		return false;
	}
3227

3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241
	/*
	 * When we have both GROUP BY and DISTINCT, use the more-rigorous of
	 * DISTINCT and ORDER BY as the assumed required output sort order.
	 * This is an oversimplification because the DISTINCT might get
	 * implemented via hashing, but it's not clear that the case is common
	 * enough (or that our estimates are good enough) to justify trying to
	 * solve it exactly.
	 */
	if (list_length(root->distinct_pathkeys) >
		list_length(root->sort_pathkeys))
		target_pathkeys = root->distinct_pathkeys;
	else
		target_pathkeys = root->sort_pathkeys;

3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257
	/*
	 * See if the estimated cost is no more than doing it the other way. While
	 * avoiding the need for sorted input is usually a win, the fact that the
	 * output won't be sorted may be a loss; so we need to do an actual cost
	 * comparison.
	 *
	 * We need to consider cheapest_path + hashagg [+ final sort] versus
	 * either cheapest_path [+ sort] + group or agg [+ final sort] or
	 * presorted_path + group or agg [+ final sort] where brackets indicate a
	 * step that may not be needed. We assume query_planner() will have
	 * returned a presorted path only if it's a winner compared to
	 * cheapest_path for this purpose.
	 *
	 * These path variables are dummies that just hold cost fields; we don't
	 * make actual Paths for these steps.
	 */
3258
	numGroupCols = num_distcols_in_grouplist(root->parse->groupClause);
3259 3260 3261 3262 3263 3264
	cost_agg(&hashed_p, root, AGG_HASHED, agg_counts->numAggs,
			 numGroupCols, dNumGroups,
			 cheapest_path->startup_cost, cheapest_path->total_cost,
			 cheapest_path_rows, hash_info.workmem_per_entry,
			 hash_info.nbatches, hash_info.hashentry_width, false);
	/* Result of hashed agg is always unsorted */
3265 3266
	if (target_pathkeys)
		cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost,
3267
				  dNumGroups, cheapest_path_width, limit_tuples);
3268 3269 3270 3271 3272 3273 3274 3275

	if (sorted_path)
	{
		sorted_p.startup_cost = sorted_path->startup_cost;
		sorted_p.total_cost = sorted_path->total_cost;
		current_pathkeys = sorted_path->pathkeys;
	}
	else
3276
	{
3277 3278 3279 3280 3281 3282 3283
		sorted_p.startup_cost = cheapest_path->startup_cost;
		sorted_p.total_cost = cheapest_path->total_cost;
		current_pathkeys = cheapest_path->pathkeys;
	}
	if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
	{
		cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
3284
				  cheapest_path_rows, cheapest_path_width, -1.0);
3285 3286 3287 3288 3289
		current_pathkeys = root->group_pathkeys;
	}

	if (root->parse->hasAggs)
		cost_agg(&sorted_p, root, AGG_SORTED, agg_counts->numAggs,
3290
				 numGroupCols, dNumGroups,
3291 3292 3293 3294 3295 3296 3297
				 sorted_p.startup_cost, sorted_p.total_cost,
				 cheapest_path_rows, 0.0, 0.0, 0.0, false);
	else
		cost_group(&sorted_p, root, numGroupCols, dNumGroups,
				   sorted_p.startup_cost, sorted_p.total_cost,
				   cheapest_path_rows);
	/* The Agg or Group node will preserve ordering */
3298 3299 3300
	if (target_pathkeys &&
		!pathkeys_contained_in(target_pathkeys, current_pathkeys))
		cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost,
3301
				  dNumGroups, cheapest_path_width, limit_tuples);
3302

3303 3304 3305 3306 3307 3308
	/*
	 * Now make the decision using the top-level tuple fraction.  First we
	 * have to convert an absolute count (LIMIT) into fractional form.
	 */
	if (tuple_fraction >= 1.0)
		tuple_fraction /= dNumGroups;
3309

3310
	if (!root->config->enable_groupagg)
3311 3312 3313
		return true;

	if (compare_fractional_path_costs(&hashed_p, &sorted_p, 
3314 3315 3316 3317 3318 3319 3320 3321
									  tuple_fraction) < 0)
	{
		/* Hashed is cheaper, so use it */
		return true;
	}
	return false;
}

3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346
/*
 * choose_hashed_distinct - should we use hashing for DISTINCT?
 *
 * This is fairly similar to choose_hashed_grouping, but there are enough
 * differences that it doesn't seem worth trying to unify the two functions.
 *
 * But note that making the two choices independently is a bit bogus in
 * itself.  If the two could be combined into a single choice operation
 * it'd probably be better, but that seems far too unwieldy to be practical,
 * especially considering that the combination of GROUP BY and DISTINCT
 * isn't very common in real queries.  By separating them, we are giving
 * extra preference to using a sorting implementation when a common sort key
 * is available ... and that's not necessarily wrong anyway.
 *
 * Note: this is only applied when both alternatives are actually feasible.
 */
static bool
choose_hashed_distinct(PlannerInfo *root,
					   Plan *input_plan, List *input_pathkeys,
					   double tuple_fraction, double limit_tuples,
					   double dNumDistinctRows)
{
	int			numDistinctCols = list_length(root->parse->distinctClause);
	Size		hashentrysize;
	List	   *current_pathkeys;
3347
	List	   *needed_pathkeys;
3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379
	Path		hashed_p;
	Path		sorted_p;

	/* Prefer sorting when enable_hashagg is off */
	if (!enable_hashagg)
		return false;

	/*
	 * Don't do it if it doesn't look like the hashtable will fit into
	 * work_mem.
	 */
	hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData));

	if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
		return false;

	/*
	 * See if the estimated cost is no more than doing it the other way. While
	 * avoiding the need for sorted input is usually a win, the fact that the
	 * output won't be sorted may be a loss; so we need to do an actual cost
	 * comparison.
	 *
	 * We need to consider input_plan + hashagg [+ final sort] versus
	 * input_plan [+ sort] + group [+ final sort] where brackets indicate
	 * a step that may not be needed.
	 *
	 * These path variables are dummies that just hold cost fields; we don't
	 * make actual Paths for these steps.
	 */
	cost_agg(&hashed_p, root, AGG_HASHED, 0,
			 numDistinctCols, dNumDistinctRows,
			 input_plan->startup_cost, input_plan->total_cost,
3380 3381 3382 3383 3384 3385 3386
			 input_plan->plan_rows,
			 /* GPDB_84_MERGE_FIXME: What would be appropriate values for these extra
			  * arguments? */
			 0, /* input_width */
			 0, /* hash_batches */
			 0, /* hashentry_width */
			 false /* hash_streaming */);
3387 3388 3389 3390 3391 3392 3393 3394
	/*
	 * Result of hashed agg is always unsorted, so if ORDER BY is present
	 * we need to charge for the final sort.
	 */
	if (root->parse->sortClause)
		cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
				  dNumDistinctRows, input_plan->plan_width, limit_tuples);

3395 3396 3397 3398
	/*
	 * Now for the GROUP case.  See comments in grouping_planner about the
	 * sorting choices here --- this code should match that code.
	 */
3399 3400 3401
	sorted_p.startup_cost = input_plan->startup_cost;
	sorted_p.total_cost = input_plan->total_cost;
	current_pathkeys = input_pathkeys;
3402 3403 3404 3405
	if (root->parse->hasDistinctOn &&
		list_length(root->distinct_pathkeys) <
		list_length(root->sort_pathkeys))
		needed_pathkeys = root->sort_pathkeys;
3406
	else
3407 3408
		needed_pathkeys = root->distinct_pathkeys;
	if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424
	{
		if (list_length(root->distinct_pathkeys) >=
			list_length(root->sort_pathkeys))
			current_pathkeys = root->distinct_pathkeys;
		else
			current_pathkeys = root->sort_pathkeys;
		cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost,
				  input_plan->plan_rows, input_plan->plan_width, -1.0);
	}
	cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
			   sorted_p.startup_cost, sorted_p.total_cost,
			   input_plan->plan_rows);
	if (root->parse->sortClause &&
		!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
		cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
				  dNumDistinctRows, input_plan->plan_width, limit_tuples);
3425

3426 3427 3428 3429 3430 3431
	/*
	 * Now make the decision using the top-level tuple fraction.  First we
	 * have to convert an absolute count (LIMIT) into fractional form.
	 */
	if (tuple_fraction >= 1.0)
		tuple_fraction /= dNumDistinctRows;
3432

3433 3434 3435 3436 3437 3438
	if (compare_fractional_path_costs(&hashed_p, &sorted_p,
									  tuple_fraction) < 0)
	{
		/* Hashed is cheaper, so use it */
		return true;
	}
3439
	return false;
3440 3441
}

3442 3443
/*---------------
 * make_subplanTargetList
3444
 *	  Generate appropriate target list when grouping is required.
3445
 *
3446 3447 3448 3449 3450
 * When grouping_planner inserts grouping or aggregation plan nodes
 * above the scan/join plan constructed by query_planner+create_plan,
 * we typically want the scan/join plan to emit a different target list
 * than the outer plan nodes should have.  This routine generates the
 * correct target list for the scan/join subplan.
3451 3452 3453 3454
 *
 * The initial target list passed from the parser already contains entries
 * for all ORDER BY and GROUP BY expressions, but it will not have entries
 * for variables used only in HAVING clauses; so we need to add those
3455 3456 3457 3458
 * variables to the subplan target list.  Also, we flatten all expressions
 * except GROUP BY items into their component variables; the other expressions
 * will be computed by the inserted nodes rather than by the subplan.
 * For example, given a query like
3459 3460
 *		SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
 * we want to pass this targetlist to the subplan:
3461
 *		a,b,c,d,a+b
3462
 * where the a+b target will be used by the Sort/Group steps, and the
3463
 * other targets will be used for computing the final results.	(In the
3464
 * above example we could theoretically suppress the a and b targets and
3465 3466 3467
 * pass down only c,d,a+b, but it's not really worth the trouble to
 * eliminate simple var references from the subplan.  We will avoid doing
 * the extra computation to recompute a+b at the outer level; see
3468
 * fix_upper_expr() in setrefs.c.)
3469
 *
3470 3471 3472
 * If we are grouping or aggregating, *and* there are no non-Var grouping
 * expressions, then the returned tlist is effectively dummy; we do not
 * need to force it to be evaluated, because all the Vars it contains
3473 3474 3475 3476
 * should be present in the "flat" tlist generated by create_plan, though
 * possibly in a different order.  In that case we'll use create_plan's tlist,
 * and the tlist made here is only needed as input to query_planner to tell
 * it which Vars are needed in the output of the scan/join plan.
3477
 *
3478
 * 'tlist' is the query's target list.
3479
 * 'groupColIdx' receives an array of column numbers for the GROUP BY
3480
 *			expressions (if there are any) in the returned target list.
3481 3482
 * 'groupOperators' receives an array of equality operators corresponding
 *			the GROUP BY expressions.
3483
 * 'need_tlist_eval' is set true if we really need to evaluate the
3484
 *			returned tlist as-is.
3485
 *
3486
 * The result is the targetlist to be passed to query_planner.
3487 3488 3489
 *---------------
 */
static List *
3490
make_subplanTargetList(PlannerInfo *root,
3491
					   List *tlist,
3492
					   AttrNumber **groupColIdx,
3493
					   Oid **groupOperators,
3494
					   bool *need_tlist_eval)
3495
{
3496
	Query	   *parse = root->parse;
3497
	List	   *sub_tlist;
3498
	List	   *extravars;
3499 3500 3501 3502
	int			numCols;

	*groupColIdx = NULL;

B
Bruce Momjian 已提交
3503
	/*
3504
	 * If we're not grouping or aggregating, there's nothing to do here;
3505 3506
	 * query_planner should receive the unmodified target list.
	 */
3507 3508
	if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual &&
		!parse->hasWindowFuncs)
3509 3510
	{
		*need_tlist_eval = true;
3511
		return tlist;
3512
	}
3513

B
Bruce Momjian 已提交
3514
	/*
3515 3516 3517 3518 3519
	 * Otherwise, start with a "flattened" tlist (having just the Vars
	 * mentioned in the targetlist and HAVING qual).  Note this includes Vars
	 * used in resjunk items, so we are covering the needs of ORDER BY and
	 * window specifications.  Vars used within Aggrefs will be pulled out
	 * here, too.
3520
	 */
3521 3522 3523 3524 3525 3526
	sub_tlist = flatten_tlist(tlist,
							  PVC_RECURSE_AGGREGATES,
							  PVC_INCLUDE_PLACEHOLDERS);
	extravars = pull_var_clause(parse->havingQual,
								PVC_RECURSE_AGGREGATES,
								PVC_INCLUDE_PLACEHOLDERS);
3527
	sub_tlist = add_to_flat_tlist(sub_tlist, extravars);
3528
	list_free(extravars);
3529

3530 3531 3532 3533 3534 3535 3536
	{
		ListCell *lc;

		foreach(lc, root->parse->windowClause)
		{
			WindowClause *window = (WindowClause *) lfirst(lc);

3537 3538 3539
			extravars = pull_var_clause(window->startOffset,
										PVC_REJECT_AGGREGATES,
										PVC_INCLUDE_PLACEHOLDERS);
3540 3541 3542
			sub_tlist = add_to_flat_tlist(sub_tlist, extravars);
			list_free(extravars);

3543 3544 3545
			extravars = pull_var_clause(window->endOffset,
										PVC_REJECT_AGGREGATES,
										PVC_INCLUDE_PLACEHOLDERS);
3546 3547 3548 3549 3550
			sub_tlist = add_to_flat_tlist(sub_tlist, extravars);
			list_free(extravars);
		}
	}

3551 3552
	/*
	 * XXX Set need_tlist_eval to true for group queries.
3553
	 *
3554 3555 3556 3557
	 * Reason: We are doing an aggregate on top.  No matter what we do, hash
	 * or sort, we may spill.  Every unnecessary columns means useless I/O,
	 * and heap_form/deform_tuple.  It is almost always better to to the
	 * projection.
3558
	 */
3559
	if (parse->groupClause)
3560 3561
		*need_tlist_eval = true;
	else
3562
		*need_tlist_eval = false;		/* only eval if not flat tlist */
3563 3564

	/*
3565
	 * If grouping, create sub_tlist entries for all GROUP BY expressions
B
Bruce Momjian 已提交
3566 3567
	 * (GROUP BY items that are simple Vars should be in the list already),
	 * and make an array showing where the group columns are in the sub_tlist.
3568
	 */
3569 3570
	numCols = num_distcols_in_grouplist(parse->groupClause);

3571
	if (numCols > 0)
3572 3573
	{
		int			keyno = 0;
3574
		AttrNumber *grpColIdx;
3575
		Oid		   *grpOperators;
3576
		List	   *grouptles;
3577 3578
		List	   *sortops;
		List	   *eqops;
3579
		ListCell   *lc_tle;
3580
		ListCell   *lc_eqop;
3581 3582

		grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
3583
		grpOperators = (Oid *) palloc(sizeof(Oid) * numCols);
3584
		*groupColIdx = grpColIdx;
3585
		*groupOperators = grpOperators;
3586

3587
		get_sortgroupclauses_tles(parse->groupClause, tlist,
3588
								  &grouptles, &sortops, &eqops);
3589
		Assert(numCols == list_length(grouptles) &&
3590 3591 3592
			   numCols == list_length(sortops) &&
			   numCols == list_length(eqops));
		forboth(lc_tle, grouptles, lc_eqop, eqops)
3593
		{
3594 3595
			Node	   *groupexpr;
			TargetEntry *tle;
3596 3597 3598
			TargetEntry *sub_tle = NULL;
			ListCell   *sl = NULL;

3599 3600
			tle = (TargetEntry *) lfirst(lc_tle);
			groupexpr = (Node *) tle->expr;
3601

3602
			/*
3603
			 * Find or make a matching sub_tlist entry.
3604
			 */
3605
			foreach(sl, sub_tlist)
3606
			{
3607
				sub_tle = (TargetEntry *) lfirst(sl);
3608 3609
				if (equal(groupexpr, sub_tle->expr)
					&& (sub_tle->ressortgroupref == 0))
3610
					break;
3611
			}
3612
			if (!sl)
3613
			{
3614
				sub_tle = makeTargetEntry((Expr *) groupexpr,
3615 3616 3617
										  list_length(sub_tlist) + 1,
										  NULL,
										  false);
3618
				sub_tlist = lappend(sub_tlist, sub_tle);
B
Bruce Momjian 已提交
3619
				*need_tlist_eval = true;		/* it's not flat anymore */
3620 3621
			}

3622 3623
			/* Set its group reference and save its resno */
			sub_tle->ressortgroupref = tle->ressortgroupref;
3624
			grpColIdx[keyno] = sub_tle->resno;
3625 3626 3627
			grpOperators[keyno] = lfirst_oid(lc_eqop);
			if (!OidIsValid(grpOperators[keyno]))           /* shouldn't happen */
				elog(ERROR, "could not find equality operator for grouping column");
3628
			keyno++;
3629
		}
3630
		Assert(keyno == numCols);
3631 3632 3633 3634 3635
	}

	return sub_tlist;
}

3636 3637
/*
 * locate_grouping_columns
3638
 *		Locate grouping columns in the tlist chosen by create_plan.
3639 3640
 *
 * This is only needed if we don't use the sub_tlist chosen by
B
Bruce Momjian 已提交
3641
 * make_subplanTargetList.	We have to forget the column indexes found
3642 3643 3644
 * by that routine and re-locate the grouping vars in the real sub_tlist.
 */
static void
3645
locate_grouping_columns(PlannerInfo *root,
3646 3647 3648 3649 3650
						List *tlist,
						List *sub_tlist,
						AttrNumber *groupColIdx)
{
	int			keyno = 0;
3651
	List	   *grouptles;
3652 3653
	List	   *sortops;
	List	   *eqops;
3654
	ListCell   *ge;
3655 3656 3657 3658

	/*
	 * No work unless grouping.
	 */
3659
	if (!root->parse->groupClause)
3660 3661 3662 3663 3664 3665
	{
		Assert(groupColIdx == NULL);
		return;
	}
	Assert(groupColIdx != NULL);

3666
	get_sortgroupclauses_tles(root->parse->groupClause, tlist,
3667
							  &grouptles, &sortops, &eqops);
3668 3669

	foreach (ge, grouptles)
3670
	{
3671 3672 3673
		TargetEntry *groupte = (TargetEntry *)lfirst(ge);
		Node	*groupexpr;

B
Bruce Momjian 已提交
3674 3675
		TargetEntry *te = NULL;
		ListCell   *sl;
3676

3677 3678
		groupexpr = (Node *) groupte->expr;

3679 3680 3681 3682 3683 3684 3685
		foreach(sl, sub_tlist)
		{
			te = (TargetEntry *) lfirst(sl);
			if (equal(groupexpr, te->expr))
				break;
		}
		if (!sl)
3686
			elog(ERROR, "failed to locate grouping columns");
3687

3688
		groupColIdx[keyno++] = te->resno;
3689 3690 3691
	}
}

3692 3693 3694 3695 3696 3697 3698
/*
 * postprocess_setop_tlist
 *	  Fix up targetlist returned by plan_set_operations().
 *
 * We need to transpose sort key info from the orig_tlist into new_tlist.
 * NOTE: this would not be good enough if we supported resjunk sort keys
 * for results of set operations --- then, we'd need to project a whole
3699
 * new tlist to evaluate the resjunk columns.  For now, just ereport if we
3700 3701 3702 3703 3704
 * find any resjunk columns in orig_tlist.
 */
static List *
postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
{
3705 3706
	ListCell   *l;
	ListCell   *orig_tlist_item = list_head(orig_tlist);
3707

3708 3709 3710 3711
	/* empty orig has no effect on info in new (MPP-2655) */
	if (orig_tlist_item == NULL)
		return new_tlist;

3712 3713 3714 3715 3716 3717
	foreach(l, new_tlist)
	{
		TargetEntry *new_tle = (TargetEntry *) lfirst(l);
		TargetEntry *orig_tle;

		/* ignore resjunk columns in setop result */
3718
		if (new_tle->resjunk)
3719 3720
			continue;

3721 3722 3723
		Assert(orig_tlist_item != NULL);
		orig_tle = (TargetEntry *) lfirst(orig_tlist_item);
		orig_tlist_item = lnext(orig_tlist_item);
B
Bruce Momjian 已提交
3724
		if (orig_tle->resjunk)	/* should not happen */
3725
			elog(ERROR, "resjunk output columns are not implemented");
3726 3727
		Assert(new_tle->resno == orig_tle->resno);
		new_tle->ressortgroupref = orig_tle->ressortgroupref;
3728
	}
3729
	if (orig_tlist_item != NULL)
3730
		elog(ERROR, "resjunk output columns are not implemented");
3731 3732
	return new_tlist;
}
3733

3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813
/*
 * select_active_windows
 *		Create a list of the "active" window clauses (ie, those referenced
 *		by non-deleted WindowFuncs) in the order they are to be executed.
 */
static List *
select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
{
	List	   *result;
	List	   *actives;
	ListCell   *lc;

	/* First, make a list of the active windows */
	actives = NIL;
	foreach(lc, root->parse->windowClause)
	{
		WindowClause *wc = (WindowClause *) lfirst(lc);

		/* It's only active if wflists shows some related WindowFuncs */
		Assert(wc->winref <= wflists->maxWinRef);
		if (wflists->windowFuncs[wc->winref] != NIL)
			actives = lappend(actives, wc);
	}

	/*
	 * Now, ensure that windows with identical partitioning/ordering clauses
	 * are adjacent in the list.  This is required by the SQL standard, which
	 * says that only one sort is to be used for such windows, even if they
	 * are otherwise distinct (eg, different names or framing clauses).
	 *
	 * There is room to be much smarter here, for example detecting whether
	 * one window's sort keys are a prefix of another's (so that sorting for
	 * the latter would do for the former), or putting windows first that
	 * match a sort order available for the underlying query.  For the moment
	 * we are content with meeting the spec.
	 */
	result = NIL;
	while (actives != NIL)
	{
		WindowClause *wc = (WindowClause *) linitial(actives);
		ListCell   *prev;
		ListCell   *next;

		/* Move wc from actives to result */
		actives = list_delete_first(actives);
		result = lappend(result, wc);

		/* Now move any matching windows from actives to result */
		prev = NULL;
		for (lc = list_head(actives); lc; lc = next)
		{
			WindowClause *wc2 = (WindowClause *) lfirst(lc);

			next = lnext(lc);
			/* framing options are NOT to be compared here! */
			if (equal(wc->partitionClause, wc2->partitionClause) &&
				equal(wc->orderClause, wc2->orderClause))
			{
				actives = list_delete_cell(actives, lc, prev);
				result = lappend(result, wc2);
			}
			else
				prev = lc;
		}
	}

	return result;
}

/*
 * add_volatile_sort_exprs
 *		Identify any volatile sort/group expressions used by the active
 *		windows, and add them to window_tlist if not already present.
 *		Return the modified window_tlist.
 */
static List *
add_volatile_sort_exprs(List *window_tlist, List *tlist, List *activeWindows)
{
	Bitmapset  *sgrefs = NULL;
	ListCell   *lc;
3814
	Bitmapset  *firstOrderColRefs = NULL;
3815 3816 3817 3818 3819 3820

	/* First, collect the sortgrouprefs of the windows into a bitmapset */
	foreach(lc, activeWindows)
	{
		WindowClause *wc = (WindowClause *) lfirst(lc);
		ListCell   *lc2;
3821
		bool		firstOrderCol = true;
3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833

		foreach(lc2, wc->partitionClause)
		{
			SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);

			sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
		}
		foreach(lc2, wc->orderClause)
		{
			SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);

			sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
3834 3835 3836 3837

			if (firstOrderCol)
				firstOrderColRefs = bms_add_member(firstOrderColRefs, sortcl->tleSortGroupRef);
			firstOrderCol = false;
3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854
		}
	}

	/*
	 * Now scan the original tlist to find the referenced expressions. Any
	 * that are volatile must be added to window_tlist.
	 *
	 * Note: we know that the input window_tlist contains no items marked with
	 * ressortgrouprefs, so we don't have to worry about collisions of the
	 * reference numbers.
	 */
	foreach(lc, tlist)
	{
		TargetEntry *tle = (TargetEntry *) lfirst(lc);

		if (tle->ressortgroupref != 0 &&
			bms_is_member(tle->ressortgroupref, sgrefs) &&
3855 3856
			(bms_is_member(tle->ressortgroupref, firstOrderColRefs) ||
			 contain_volatile_functions((Node *) tle->expr)))
3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020
		{
			TargetEntry *newtle;

			newtle = makeTargetEntry(tle->expr,
									 list_length(window_tlist) + 1,
									 NULL,
									 false);
			newtle->ressortgroupref = tle->ressortgroupref;
			window_tlist = lappend(window_tlist, newtle);
		}
	}

	return window_tlist;
}

/*
 * make_pathkeys_for_window
 *		Create a pathkeys list describing the required input ordering
 *		for the given WindowClause.
 *
 * The required ordering is first the PARTITION keys, then the ORDER keys.
 * In the future we might try to implement windowing using hashing, in which
 * case the ordering could be relaxed, but for now we always sort.
 */
static List *
make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
						 List *tlist, bool canonicalize)
{
	List	   *window_pathkeys;
	List	   *window_sortclauses;

	/* Throw error if can't sort */
	if (!grouping_is_sortable(wc->partitionClause))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("could not implement window PARTITION BY"),
				 errdetail("Window partitioning columns must be of sortable datatypes.")));
	if (!grouping_is_sortable(wc->orderClause))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("could not implement window ORDER BY"),
		errdetail("Window ordering columns must be of sortable datatypes.")));

	/* Okay, make the combined pathkeys */
	window_sortclauses = list_concat(list_copy(wc->partitionClause),
									 list_copy(wc->orderClause));
	window_pathkeys = make_pathkeys_for_sortclauses(root,
													window_sortclauses,
													tlist,
													canonicalize);
	list_free(window_sortclauses);
	return window_pathkeys;
}

/*----------
 * get_column_info_for_window
 *		Get the partitioning/ordering column numbers and equality operators
 *		for a WindowAgg node.
 *
 * This depends on the behavior of make_pathkeys_for_window()!
 *
 * We are given the target WindowClause and an array of the input column
 * numbers associated with the resulting pathkeys.  In the easy case, there
 * are the same number of pathkey columns as partitioning + ordering columns
 * and we just have to copy some data around.  However, it's possible that
 * some of the original partitioning + ordering columns were eliminated as
 * redundant during the transformation to pathkeys.  (This can happen even
 * though the parser gets rid of obvious duplicates.  A typical scenario is a
 * window specification "PARTITION BY x ORDER BY y" coupled with a clause
 * "WHERE x = y" that causes the two sort columns to be recognized as
 * redundant.)	In that unusual case, we have to work a lot harder to
 * determine which keys are significant.
 *
 * The method used here is a bit brute-force: add the sort columns to a list
 * one at a time and note when the resulting pathkey list gets longer.  But
 * it's a sufficiently uncommon case that a faster way doesn't seem worth
 * the amount of code refactoring that'd be needed.
 *----------
 */
static void
get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist,
						   int numSortCols, AttrNumber *sortColIdx,
						   int *partNumCols,
						   AttrNumber **partColIdx,
						   Oid **partOperators,
						   int *ordNumCols,
						   AttrNumber **ordColIdx,
						   Oid **ordOperators)
{
	int			numPart = list_length(wc->partitionClause);
	int			numOrder = list_length(wc->orderClause);

	if (numSortCols == numPart + numOrder)
	{
		/* easy case */
		*partNumCols = numPart;
		*partColIdx = sortColIdx;
		*partOperators = extract_grouping_ops(wc->partitionClause);
		*ordNumCols = numOrder;
		*ordColIdx = sortColIdx + numPart;
		*ordOperators = extract_grouping_ops(wc->orderClause);
	}
	else
	{
		List	   *sortclauses;
		List	   *pathkeys;
		int			scidx;
		ListCell   *lc;

		/* first, allocate what's certainly enough space for the arrays */
		*partNumCols = 0;
		*partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber));
		*partOperators = (Oid *) palloc(numPart * sizeof(Oid));
		*ordNumCols = 0;
		*ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber));
		*ordOperators = (Oid *) palloc(numOrder * sizeof(Oid));
		sortclauses = NIL;
		pathkeys = NIL;
		scidx = 0;
		foreach(lc, wc->partitionClause)
		{
			SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
			List	   *new_pathkeys;

			sortclauses = lappend(sortclauses, sgc);
			new_pathkeys = make_pathkeys_for_sortclauses(root,
														 sortclauses,
														 tlist,
														 true);
			if (list_length(new_pathkeys) > list_length(pathkeys))
			{
				/* this sort clause is actually significant */
				(*partColIdx)[*partNumCols] = sortColIdx[scidx++];
				(*partOperators)[*partNumCols] = sgc->eqop;
				(*partNumCols)++;
				pathkeys = new_pathkeys;
			}
		}
		foreach(lc, wc->orderClause)
		{
			SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
			List	   *new_pathkeys;

			sortclauses = lappend(sortclauses, sgc);
			new_pathkeys = make_pathkeys_for_sortclauses(root,
														 sortclauses,
														 tlist,
														 true);
			if (list_length(new_pathkeys) > list_length(pathkeys))
			{
				/* this sort clause is actually significant */
				(*ordColIdx)[*ordNumCols] = sortColIdx[scidx++];
				(*ordOperators)[*ordNumCols] = sgc->eqop;
				(*ordNumCols)++;
				pathkeys = new_pathkeys;
			}
		}
		/* complain if we didn't eat exactly the right number of sort cols */
		if (scidx != numSortCols)
			elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators");
	}
}


4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061
/*
 * Produce the canonical form of a GROUP BY clause given the parse
 * tree form.
 *
 * The result is a CanonicalGroupingSets, which contains a list of
 * Bitmapsets.  Each Bitmapset contains the sort-group reference
 * values of the attributes in one of the grouping sets specified in
 * the GROUP BY clause.  The number of list elements is the number of
 * grouping sets specified.
 */
static CanonicalGroupingSets *
make_canonical_groupingsets(List *groupClause)
{
	CanonicalGroupingSets *canonical_grpsets = 
		(CanonicalGroupingSets *) palloc0(sizeof(CanonicalGroupingSets));
	ListCell *lc;
	List *ord_grping = NIL; /* the ordinary grouping */
	List *rollups = NIL;    /* the grouping sets from ROLLUP */
	List *grpingsets = NIL; /* the grouping sets from GROUPING SETS */
	List *cubes = NIL;      /* the grouping sets from CUBE */
	Bitmapset *bms = NULL;
	List *final_grpingsets = NIL;
	List *list_grpingsets = NIL;
	int setno;
	int prev_setno = 0;

	if (groupClause == NIL)
		return canonical_grpsets;

	foreach (lc, groupClause)
	{
		GroupingClause *gc;

		Node *node = lfirst(lc);

		if (node == NULL)
			continue;

		/* Note that the top-level empty sets have been removed
		 * in the parser.
		 */
4062
		Assert(IsA(node, SortGroupClause) ||
4063 4064 4065
			   IsA(node, GroupingClause) ||
			   IsA(node, List));

4066
		if (IsA(node, SortGroupClause) ||
4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223
			IsA(node, List))
		{
			ord_grping = lappend(ord_grping,
								 canonicalize_colref_list(node));
			continue;
		}

		gc = (GroupingClause *)node;
		switch (gc->groupType)
		{
			case GROUPINGTYPE_ROLLUP:
				rollups = lappend(rollups,
								  rollup_gs_list(canonicalize_gs_list(gc->groupsets, true)));
				break;
			case GROUPINGTYPE_CUBE:
				cubes = lappend(cubes,
								cube_gs_list(canonicalize_gs_list(gc->groupsets, true)));
				break;
			case GROUPINGTYPE_GROUPING_SETS:
				grpingsets = lappend(grpingsets,
									 canonicalize_gs_list(gc->groupsets, false));
				break;
			default:
				elog(ERROR, "invalid grouping set");
		}
	}

	/* Obtain the cartesian product of grouping sets generated for ordinary
	 * grouping sets, rollups, cubes, and grouping sets.
	 *
	 * We apply a small optimization here. We always append grouping sets
	 * generated for rollups, cubes and grouping sets to grouping sets for
	 * ordinary sets. This makes it easier to tell if there is a partial
	 * rollup. Consider the example of GROUP BY rollup(i,j),k. There are
	 * three grouping sets for rollup(i,j): (i,j), (i), (). If we append
	 * k after each grouping set for rollups, we get three sets:
	 * (i,j,k), (i,k) and (k). We can not easily tell that this is a partial
	 * rollup. However, if we append each grouping set after k, we get
	 * these three sets: (k,i,j), (k,i), (k), which is obviously a partial
	 * rollup.
	 */

	/* First, we bring all columns in ordinary grouping sets together into
	 * one list.
	 */
	foreach (lc, ord_grping)
	{
	    Bitmapset *sub_bms = (Bitmapset *)lfirst(lc);
		bms = bms_add_members(bms, sub_bms);
	}

	final_grpingsets = lappend(final_grpingsets, bms);

	/* Make the list of grouping sets */
	if (rollups)
		list_grpingsets = list_concat(list_grpingsets, rollups);
	if (cubes)
		list_grpingsets = list_concat(list_grpingsets, cubes);
	if (grpingsets)
		list_grpingsets = list_concat(list_grpingsets, grpingsets);

	/* Obtain the cartesian product of grouping sets generated from ordinary
	 * grouping sets, rollups, cubes, and grouping sets.
	 */
	foreach (lc, list_grpingsets)
	{
		List *bms_list = (List *)lfirst(lc);
		ListCell *tmp_lc;
		List *tmp_list;

		tmp_list = final_grpingsets;
		final_grpingsets = NIL;

		foreach (tmp_lc, tmp_list)
		{
			Bitmapset *tmp_bms = (Bitmapset *)lfirst(tmp_lc);
			ListCell *bms_lc;

			foreach (bms_lc, bms_list)
			{
				bms = bms_copy(tmp_bms);
				bms = bms_add_members(bms, (Bitmapset *)lfirst(bms_lc));
				final_grpingsets = lappend(final_grpingsets, bms);
			}
		}
	}

	/* Sort final_grpingsets */
	sort_canonical_gs_list(final_grpingsets,
						   &(canonical_grpsets->ngrpsets),
						   &(canonical_grpsets->grpsets));

	/* Combine duplicate grouping sets and set the counts for
	 * each grouping set.
	 */
	canonical_grpsets->grpset_counts =
		(int *)palloc0(canonical_grpsets->ngrpsets * sizeof(int));
	prev_setno = 0;
	canonical_grpsets->grpset_counts[0] = 1;
	for (setno = 1; setno<canonical_grpsets->ngrpsets; setno++)
	{
		if (bms_equal(canonical_grpsets->grpsets[setno],
					  canonical_grpsets->grpsets[prev_setno]))
		{
			canonical_grpsets->grpset_counts[prev_setno]++;
			if (canonical_grpsets->grpsets[setno])
				pfree(canonical_grpsets->grpsets[setno]);
		}

		else
		{
			prev_setno++;
			canonical_grpsets->grpsets[prev_setno] =
				canonical_grpsets->grpsets[setno];
			canonical_grpsets->grpset_counts[prev_setno]++;
		}
	}
	/* Reset ngrpsets to eliminate duplicate groupint sets */
	canonical_grpsets->ngrpsets = prev_setno + 1;

	/* Obtain the number of distinct columns appeared in these
	 * grouping sets.
	 */
	{
		Bitmapset *distcols = NULL;
		for (setno =0; setno < canonical_grpsets->ngrpsets; setno++)
			distcols =
				bms_add_members(distcols, canonical_grpsets->grpsets[setno]);
		
		canonical_grpsets->num_distcols = bms_num_members(distcols);
		bms_free(distcols);
	}
	

	/* Release spaces */
	list_free_deep(ord_grping);
	list_free_deep(list_grpingsets);
	list_free(final_grpingsets);
	
	return canonical_grpsets;
}

/* Produce the canonical representation of a column reference list.
 *
 * A column reference list (in SQL) is a comma-delimited list of
 * column references which are represented by the parser as a
 * List of GroupClauses.  No nesting is allowed in column reference 
 * lists.
 *
 * As a convenience, this function also recognizes a bare column
 * reference.
 *
 * The result is a Bitmapset of the sort-group-ref values in the list.
 */
static Bitmapset* canonicalize_colref_list(Node * node)
{
	ListCell *lc;
4224
	SortGroupClause *gc;
4225 4226 4227 4228 4229
	Bitmapset* gs = NULL;
	
	if ( node == NULL )
		elog(ERROR,"invalid column reference list");
	
4230
	if ( IsA(node, SortGroupClause) )
4231
	{
4232
		gc = (SortGroupClause *) node;
4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245
		return bms_make_singleton(gc->tleSortGroupRef);
	}
	
	if ( !IsA(node, List) )
		elog(ERROR,"invalid column reference list");
	
	foreach (lc, (List*)node)
	{
		Node *cr = lfirst(lc);
		
		if ( cr == NULL )
			continue;
			
4246
		if ( !IsA(cr, SortGroupClause) )
4247 4248
			elog(ERROR,"invalid column reference list");

4249
		gc = (SortGroupClause *) cr;
4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294
		gs = bms_add_member(gs, gc->tleSortGroupRef);	
	}
	return gs;
}

/* Produce the list of canonical grouping sets corresponding to a
 * grouping set list or an ordinary grouping set list.
 * 
 * An ordinary grouping set list (in SQL) is a comma-delimited list 
 * of ordinary grouping sets.  
 * 
 * Each ordinary grouping set is either a grouping column reference 
 * or a parenthesized list of grouping column references.  No nesting 
 * is allowed.  
 *
 * A grouping set list (in SQL) is a comma-delimited list of grouping 
 * sets.  
 *
 * Each grouping set is either an ordinary grouping set, a rollup list, 
 * a cube list, the empty grouping set, or (recursively) a grouping set 
 * list.
 *
 * The parse tree form of an ordinary grouping set is a  list containing
 * GroupClauses and lists of GroupClauses (without nesting).  In the case
 * of a (general) grouping set, the parse tree list may also include
 * NULLs and GroupingClauses.
 *
 * The result is a list of bit map sets.
 */
static List *canonicalize_gs_list(List *gsl, bool ordinary)
{
	ListCell *lc;
	List *list = NIL;

	foreach (lc, gsl)
	{
		Node *node = lfirst(lc);

		if ( node == NULL )
		{
			if ( ordinary )
				elog(ERROR,"invalid ordinary grouping set");
			
			list = lappend(list, NIL); /* empty grouping set */
		}
4295
		else if ( IsA(node, SortGroupClause) || IsA(node, List) )
4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498
		{
			/* ordinary grouping set */
			list = lappend(list, canonicalize_colref_list(node));
		}
		else if ( IsA(node, GroupingClause) )
		{	
			List *gs = NIL;
			GroupingClause *gc = (GroupingClause*)node;
			
			if ( ordinary )
				elog(ERROR,"invalid ordinary grouping set");
				
			switch ( gc->groupType )
			{
				case GROUPINGTYPE_ROLLUP:
					gs = rollup_gs_list(canonicalize_gs_list(gc->groupsets, true));
					break;
				case GROUPINGTYPE_CUBE:
					gs = cube_gs_list(canonicalize_gs_list(gc->groupsets, true));
					break;
				case GROUPINGTYPE_GROUPING_SETS:
					gs = canonicalize_gs_list(gc->groupsets, false);
					break;
				default:
					elog(ERROR,"invalid grouping set");
			}
			list = list_concat(list,gs);
		}
		else
		{
			elog(ERROR,"invalid grouping set list");
		}
	}
	return list;
}

/* Produce the list of N+1 canonical grouping sets corresponding
 * to the rollup of the given list of N canonical grouping sets.
 * These N+1 grouping sets are listed in the descending order
 * based on the number of columns.
 *
 * Argument and result are both a list of bit map sets.
 */
static List *rollup_gs_list(List *gsl)
{
	ListCell *lc;
	Bitmapset **bms;
	int i, n = list_length(gsl);
	
	if ( n == 0 )
		elog(ERROR,"invalid grouping ordinary grouping set list");
	
	if ( n > 1 )
	{
		/* Reverse the elements in gsl */
		List *new_gsl = NIL;
		foreach (lc, gsl)
		{
			new_gsl = lcons(lfirst(lc), new_gsl);
		}
		list_free(gsl);
		gsl = new_gsl;

		bms = (Bitmapset**)palloc(n*sizeof(Bitmapset*));
		i = 0;
		foreach (lc, gsl)
		{
			bms[i++] = (Bitmapset*)lfirst(lc);
		}
		for ( i = n-2; i >= 0; i-- )
		{
			bms[i] = bms_add_members(bms[i], bms[i+1]);
		}
		pfree(bms);
	}

	return lappend(gsl, NULL);
}

/* Subroutine for cube_gs_list. */
static List *add_gs_combinations(List *list, int n, int i,
								 Bitmapset **base, Bitmapset **work)
{
	if ( i < n )
	{
		work[i] = base[i];
		list = add_gs_combinations(list, n, i+1, base, work);
		work[i] = NULL;
		list = add_gs_combinations(list, n, i+1, base, work);	
	}
	else
	{
		Bitmapset *gs = NULL;
		int j;
		for ( j = 0; j < n; j++ )
		{
			gs = bms_add_members(gs, work[j]);
		}
		list = lappend(list,gs);
	}
	return list;
}

/* Produce the list of 2^N canonical grouping sets corresponding
 * to the cube of the given list of N canonical grouping sets.
 *
 * We could do this more efficiently, but the number of grouping
 * sets should be small, so don't bother.
 *
 * Argument and result are both a list of bit map sets.
 */
static List *cube_gs_list(List *gsl)
{
	ListCell *lc;
	Bitmapset **bms_base;
	Bitmapset **bms_work;
	int i, n = list_length(gsl);
	
	if ( n == 0 )
		elog(ERROR,"invalid grouping ordinary grouping set list");
	
	bms_base = (Bitmapset**)palloc(n*sizeof(Bitmapset*));
	bms_work = (Bitmapset**)palloc(n*sizeof(Bitmapset*));
	i = 0;
	foreach (lc, gsl)
	{
		bms_work[i] = NULL;
		bms_base[i++] = (Bitmapset*)lfirst(lc);
	}

	return add_gs_combinations(NIL, n, 0, bms_base, bms_work);
}

/* Subroutine for sort_canonical_gs_list. */
static int gs_compare(const void *a, const void*b)
{
	/* Put the larger grouping sets before smaller ones. */
	return (0-bms_compare(*(Bitmapset**)a, *(Bitmapset**)b));
}

/* Produce a sorted array of Bitmapsets from the given list of Bitmapsets in
 * descending order.
 */
static void sort_canonical_gs_list(List *gs, int *p_nsets, Bitmapset ***p_sets)
{
	ListCell *lc;
	int nsets = list_length(gs);
	Bitmapset **sets = palloc(nsets*sizeof(Bitmapset*));
	int i = 0;
	
	foreach (lc, gs)
	{
		sets[i++] =  (Bitmapset*)lfirst(lc);
	}
	
	qsort(sets, nsets, sizeof(Bitmapset*), gs_compare);
	
	Assert( p_nsets != NULL && p_sets != NULL );
	
	*p_nsets = nsets;
	*p_sets = sets;
}

/*
 * In any plan where we are doing multi-phase limit, the first phase needs
 * to take the offset into account.
 */
static Plan *
pushdown_preliminary_limit(Plan *plan, Node *limitCount, int64 count_est, Node *limitOffset, int64 offset_est)
{
	Node *precount = copyObject(limitCount);
	int64 precount_est = count_est;
	Plan *result_plan = plan;

	/*
	 * If we've specified an offset *and* a limit, we need to collect
	 * from tuples from 0 -> count + offset
	 *
	 * add offset to each QEs requested contribution. 
	 * ( MPP-1370: Do it even if no ORDER BY was specified) 
	 */	
	if (precount && limitOffset)
	{
		precount = (Node*)make_op(NULL,
								  list_make1(makeString(pstrdup("+"))),
								  copyObject(limitOffset),
								  precount,
								  -1);
		precount_est += offset_est;
	}
			
	if (precount != NULL)
	{
		/*
		 * Add a prelimary LIMIT on the partitioned results. This may
		 * reduce the amount of work done on the QEs.
		 */
		result_plan = (Plan *) make_limit(result_plan,
										  NULL,
										  precount,
										  0,
										  precount_est);

4499
		result_plan->flow = pull_up_Flow(result_plan, result_plan->lefttree);
4500 4501 4502 4503
	}

	return result_plan;
}
4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539


/*
 * isSimplyUpdatableQuery -
 *  determine whether a query is a simply updatable scan of a relation
 *
 * A query is simply updatable if, and only if, it...
 * - has no window clauses
 * - has no sort clauses
 * - has no grouping, having, distinct clauses, or simple aggregates
 * - has no subqueries
 * - has no LIMIT/OFFSET
 * - references only one range table (i.e. no joins, self-joins)
 *   - this range table must itself be updatable
 */
static bool
isSimplyUpdatableQuery(Query *query)
{
	if (query->commandType == CMD_SELECT &&
		query->windowClause == NIL &&
		query->sortClause == NIL &&
		query->groupClause == NIL &&
		query->havingQual == NULL &&
		query->distinctClause == NIL &&
		!query->hasAggs &&
		!query->hasSubLinks &&
		query->limitCount == NULL &&
		query->limitOffset == NULL &&
		list_length(query->rtable) == 1)
	{
		RangeTblEntry *rte = (RangeTblEntry *) linitial(query->rtable);
		if (isSimplyUpdatableRelation(rte->relid, true))
			return true;
	}
	return false;
}