planner.c 153.1 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * planner.c
4
 *	  The query optimizer external interface.
5
 *
B
Bruce Momjian 已提交
6
 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  src/backend/optimizer/plan/planner.c
12 13 14
 *
 *-------------------------------------------------------------------------
 */
M
Marc G. Fournier 已提交
15

16 17
#include "postgres.h"

18
#include <limits.h>
19
#include <math.h>
20

21
#include "access/htup_details.h"
22
#include "access/parallel.h"
23
#include "access/sysattr.h"
24
#include "access/xact.h"
25
#include "catalog/pg_constraint_fn.h"
26
#include "executor/executor.h"
27
#include "executor/nodeAgg.h"
28
#include "foreign/fdwapi.h"
29
#include "miscadmin.h"
30
#include "lib/bipartite_match.h"
B
Bruce Momjian 已提交
31
#include "nodes/makefuncs.h"
32
#include "nodes/nodeFuncs.h"
33 34 35
#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
#endif
B
Bruce Momjian 已提交
36
#include "optimizer/clauses.h"
37 38
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
39
#include "optimizer/paths.h"
40
#include "optimizer/plancat.h"
B
Bruce Momjian 已提交
41
#include "optimizer/planmain.h"
42 43
#include "optimizer/planner.h"
#include "optimizer/prep.h"
44
#include "optimizer/subselect.h"
45
#include "optimizer/tlist.h"
46
#include "parser/analyze.h"
47
#include "parser/parsetree.h"
48
#include "parser/parse_agg.h"
49
#include "rewrite/rewriteManip.h"
50
#include "storage/dsm_impl.h"
51
#include "utils/rel.h"
52
#include "utils/selfuncs.h"
53
#include "utils/syscache.h"
54

55

56
/* GUC parameters */
57
double		cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
58
int			force_parallel_mode = FORCE_PARALLEL_OFF;
59

60 61 62 63
/* Hook for plugins to get control in planner() */
planner_hook_type planner_hook = NULL;


64
/* Expression kind codes for preprocess_expression */
65 66 67
#define EXPRKIND_QUAL			0
#define EXPRKIND_TARGET			1
#define EXPRKIND_RTFUNC			2
B
Bruce Momjian 已提交
68
#define EXPRKIND_RTFUNC_LATERAL 3
69
#define EXPRKIND_VALUES			4
B
Bruce Momjian 已提交
70
#define EXPRKIND_VALUES_LATERAL 5
71 72 73
#define EXPRKIND_LIMIT			6
#define EXPRKIND_APPINFO		7
#define EXPRKIND_PHV			8
B
Bruce Momjian 已提交
74
#define EXPRKIND_TABLESAMPLE	9
75

76 77 78 79 80
/* Passthrough data for standard_qp_callback */
typedef struct
{
	List	   *tlist;			/* preprocessed query targetlist */
	List	   *activeWindows;	/* active windows, if any */
81
	List	   *groupClause;	/* overrides parse->groupClause */
82
} standard_qp_extra;
83

84
/* Local functions */
85 86
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
87
static Plan *inheritance_planner(PlannerInfo *root);
88
static Plan *grouping_planner(PlannerInfo *root, double tuple_fraction);
89
static void preprocess_rowmarks(PlannerInfo *root);
90
static double preprocess_limit(PlannerInfo *root,
B
Bruce Momjian 已提交
91
				 double tuple_fraction,
B
Bruce Momjian 已提交
92
				 int64 *offset_est, int64 *count_est);
93
static bool limit_needed(Query *parse);
94
static void remove_useless_groupby_columns(PlannerInfo *root);
95 96 97
static List *preprocess_groupclause(PlannerInfo *root, List *force);
static List *extract_rollup_sets(List *groupingSets);
static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
98
static void standard_qp_callback(PlannerInfo *root, void *extra);
99 100
static bool choose_hashed_grouping(PlannerInfo *root,
					   double tuple_fraction, double limit_tuples,
101
					   double path_rows, int path_width,
102
					   Path *cheapest_path, Path *sorted_path,
103
					   double dNumGroups, AggClauseCosts *agg_costs);
104 105
static bool choose_hashed_distinct(PlannerInfo *root,
					   double tuple_fraction, double limit_tuples,
106 107 108 109
					   double path_rows, int path_width,
					   Cost cheapest_startup_cost, Cost cheapest_total_cost,
					   Cost sorted_startup_cost, Cost sorted_total_cost,
					   List *sorted_pathkeys,
110
					   double dNumDistinctRows);
111
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
112
					   AttrNumber **groupColIdx, bool *need_tlist_eval);
113
static int	get_grouping_column_index(Query *parse, TargetEntry *tle);
114
static void locate_grouping_columns(PlannerInfo *root,
B
Bruce Momjian 已提交
115 116 117
						List *tlist,
						List *sub_tlist,
						AttrNumber *groupColIdx);
118
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
T
Tom Lane 已提交
119
static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
120 121
static List *make_windowInputTargetList(PlannerInfo *root,
						   List *tlist, List *activeWindows);
T
Tom Lane 已提交
122
static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
123
						 List *tlist);
T
Tom Lane 已提交
124
static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc,
125 126 127 128 129 130 131 132
						   List *tlist,
						   int numSortCols, AttrNumber *sortColIdx,
						   int *partNumCols,
						   AttrNumber **partColIdx,
						   Oid **partOperators,
						   int *ordNumCols,
						   AttrNumber **ordColIdx,
						   Oid **ordOperators);
133
static Plan *build_grouping_chain(PlannerInfo *root,
B
Bruce Momjian 已提交
134 135 136 137 138 139 140 141 142
					 Query *parse,
					 List *tlist,
					 bool need_sort_for_grouping,
					 List *rollup_groupclauses,
					 List *rollup_lists,
					 AttrNumber *groupColIdx,
					 AggClauseCosts *agg_costs,
					 long numGroups,
					 Plan *result_plan);
143 144 145

/*****************************************************************************
 *
146 147
 *	   Query optimizer entry point
 *
148 149 150 151 152 153 154 155
 * To support loadable plugins that monitor or modify planner behavior,
 * we provide a hook variable that lets a plugin get control before and
 * after the standard planning process.  The plugin would normally call
 * standard_planner().
 *
 * Note to plugin authors: standard_planner() scribbles on its Query input,
 * so you'd better copy that data structure if you want to plan more than once.
 *
156
 *****************************************************************************/
157
PlannedStmt *
158
planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
159 160 161 162 163 164 165 166 167 168 169 170
{
	PlannedStmt *result;

	if (planner_hook)
		result = (*planner_hook) (parse, cursorOptions, boundParams);
	else
		result = standard_planner(parse, cursorOptions, boundParams);
	return result;
}

PlannedStmt *
standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
171
{
172
	PlannedStmt *result;
173
	PlannerGlobal *glob;
174
	double		tuple_fraction;
175 176
	PlannerInfo *root;
	Plan	   *top_plan;
177
	ListCell   *lp,
178
			   *lr;
179

180 181 182 183 184
	/* Cursor options may come from caller or from DECLARE CURSOR stmt */
	if (parse->utilityStmt &&
		IsA(parse->utilityStmt, DeclareCursorStmt))
		cursorOptions |= ((DeclareCursorStmt *) parse->utilityStmt)->options;

185
	/*
186 187 188 189
	 * Set up global state for this planner invocation.  This data is needed
	 * across all levels of sub-Query that might exist in the given command,
	 * so we keep it in a separate struct that's linked to by each per-Query
	 * PlannerInfo.
190
	 */
191
	glob = makeNode(PlannerGlobal);
192

193
	glob->boundParams = boundParams;
194
	glob->subplans = NIL;
195
	glob->subroots = NIL;
196
	glob->rewindPlanIDs = NULL;
197
	glob->finalrtable = NIL;
198
	glob->finalrowmarks = NIL;
199
	glob->resultRelations = NIL;
200
	glob->relationOids = NIL;
201
	glob->invalItems = NIL;
202
	glob->nParamExec = 0;
203
	glob->lastPHId = 0;
204
	glob->lastRowMarkId = 0;
R
Robert Haas 已提交
205
	glob->lastPlanNodeId = 0;
206
	glob->transientPlan = false;
207
	glob->hasRowSecurity = false;
208
	glob->hasForeignJoin = false;
209

210
	/*
211 212 213 214 215
	 * Assess whether it's feasible to use parallel mode for this query. We
	 * can't do this in a standalone backend, or if the command will try to
	 * modify any data, or if this is a cursor operation, or if GUCs are set
	 * to values that don't permit parallelism, or if parallel-unsafe
	 * functions are present in the query tree.
216
	 *
217 218
	 * For now, we don't try to use parallel mode if we're running inside a
	 * parallel worker.  We might eventually be able to relax this
219 220
	 * restriction, but for now it seems best not to have parallel workers
	 * trying to create their own parallel workers.
221 222 223 224 225 226
	 *
	 * We can't use parallelism in serializable mode because the predicate
	 * locking code is not parallel-aware.  It's not catastrophic if someone
	 * tries to run a parallel plan in serializable mode; it just won't get
	 * any workers and will run serially.  But it seems like a good heuristic
	 * to assume that the same serialization level will be in effect at plan
227 228
	 * time and execution time, so don't generate a parallel plan if we're in
	 * serializable mode.
229 230 231 232
	 */
	glob->parallelModeOK = (cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
		IsUnderPostmaster && dynamic_shared_memory_type != DSM_IMPL_NONE &&
		parse->commandType == CMD_SELECT && !parse->hasModifyingCTE &&
233 234 235
		parse->utilityStmt == NULL && max_parallel_degree > 0 &&
		!IsParallelWorker() && !IsolationIsSerializable() &&
		!has_parallel_hazard((Node *) parse, true);
236 237

	/*
238 239 240 241 242 243
	 * glob->parallelModeNeeded should tell us whether it's necessary to
	 * impose the parallel mode restrictions, but we don't actually want to
	 * impose them unless we choose a parallel plan, so that people who
	 * mislabel their functions but don't use parallelism anyway aren't
	 * harmed. But when force_parallel_mode is set, we enable the restrictions
	 * whenever possible for testing purposes.
244
	 *
245 246 247 248
	 * glob->wholePlanParallelSafe should tell us whether it's OK to stick a
	 * Gather node on top of the entire plan.  However, it only needs to be
	 * accurate when force_parallel_mode is 'on' or 'regress', so we don't
	 * bother doing the work otherwise.  The value we set here is just a
T
Tom Lane 已提交
249 250
	 * preliminary guess; it may get changed from true to false later, but not
	 * vice versa.
251
	 */
252 253 254 255 256 257 258 259 260 261 262
	if (force_parallel_mode == FORCE_PARALLEL_OFF || !glob->parallelModeOK)
	{
		glob->parallelModeNeeded = false;
		glob->wholePlanParallelSafe = false;	/* either false or don't care */
	}
	else
	{
		glob->parallelModeNeeded = true;
		glob->wholePlanParallelSafe =
			!has_parallel_hazard((Node *) parse, false);
	}
263

264
	/* Determine what fraction of the plan is likely to be scanned */
265
	if (cursorOptions & CURSOR_OPT_FAST_PLAN)
266 267
	{
		/*
B
Bruce Momjian 已提交
268
		 * We have no real idea how many tuples the user will ultimately FETCH
269 270 271 272 273 274 275 276
		 * from a cursor, but it is often the case that he doesn't want 'em
		 * all, or would prefer a fast-start plan anyway so that he can
		 * process some of the tuples sooner.  Use a GUC parameter to decide
		 * what fraction to optimize for.
		 */
		tuple_fraction = cursor_tuple_fraction;

		/*
277
		 * We document cursor_tuple_fraction as simply being a fraction, which
B
Bruce Momjian 已提交
278
		 * means the edge cases 0 and 1 have to be treated specially here.  We
279
		 * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
280
		 */
281 282 283 284
		if (tuple_fraction >= 1.0)
			tuple_fraction = 0.0;
		else if (tuple_fraction <= 0.0)
			tuple_fraction = 1e-10;
285 286 287 288 289 290 291
	}
	else
	{
		/* Default assumption is we need all the tuples */
		tuple_fraction = 0.0;
	}

292
	/* primary planning entry point (may recurse for subqueries) */
293 294
	top_plan = subquery_planner(glob, parse, NULL,
								false, tuple_fraction, &root);
295

296
	/*
B
Bruce Momjian 已提交
297 298
	 * If creating a plan for a scrollable cursor, make sure it can run
	 * backwards on demand.  Add a Material node at the top at need.
299
	 */
300
	if (cursorOptions & CURSOR_OPT_SCROLL)
301
	{
302 303
		if (!ExecSupportsBackwardScan(top_plan))
			top_plan = materialize_finished_plan(top_plan);
304 305
	}

306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
	/*
	 * At present, we don't copy subplans to workers.  The presence of a
	 * subplan in one part of the plan doesn't preclude the use of parallelism
	 * in some other part of the plan, but it does preclude the possibility of
	 * regarding the entire plan parallel-safe.
	 */
	if (glob->subplans != NULL)
		glob->wholePlanParallelSafe = false;

	/*
	 * Optionally add a Gather node for testing purposes, provided this is
	 * actually a safe thing to do.
	 */
	if (glob->wholePlanParallelSafe &&
		force_parallel_mode != FORCE_PARALLEL_OFF)
	{
		Gather	   *gather = makeNode(Gather);

		gather->plan.targetlist = top_plan->targetlist;
		gather->plan.qual = NIL;
		gather->plan.lefttree = top_plan;
		gather->plan.righttree = NULL;
		gather->num_workers = 1;
		gather->single_copy = true;
		gather->invisible = (force_parallel_mode == FORCE_PARALLEL_REGRESS);
		root->glob->parallelModeNeeded = true;
		top_plan = &gather->plan;
	}

335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
	/*
	 * If any Params were generated, run through the plan tree and compute
	 * each plan node's extParam/allParam sets.  Ideally we'd merge this into
	 * set_plan_references' tree traversal, but for now it has to be separate
	 * because we need to visit subplans before not after main plan.
	 */
	if (glob->nParamExec > 0)
	{
		Assert(list_length(glob->subplans) == list_length(glob->subroots));
		forboth(lp, glob->subplans, lr, glob->subroots)
		{
			Plan	   *subplan = (Plan *) lfirst(lp);
			PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);

			SS_finalize_plan(subroot, subplan);
		}
		SS_finalize_plan(root, top_plan);
	}

354
	/* final cleanup of the plan */
355
	Assert(glob->finalrtable == NIL);
356
	Assert(glob->finalrowmarks == NIL);
357
	Assert(glob->resultRelations == NIL);
358
	top_plan = set_plan_references(root, top_plan);
359
	/* ... and the subplans (both regular subplans and initplans) */
360 361
	Assert(list_length(glob->subplans) == list_length(glob->subroots));
	forboth(lp, glob->subplans, lr, glob->subroots)
362
	{
B
Bruce Momjian 已提交
363
		Plan	   *subplan = (Plan *) lfirst(lp);
364
		PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
365

366
		lfirst(lp) = set_plan_references(subroot, subplan);
367
	}
368 369 370 371 372

	/* build the PlannedStmt result */
	result = makeNode(PlannedStmt);

	result->commandType = parse->commandType;
373
	result->queryId = parse->queryId;
374
	result->hasReturning = (parse->returningList != NIL);
375
	result->hasModifyingCTE = parse->hasModifyingCTE;
376
	result->canSetTag = parse->canSetTag;
377
	result->transientPlan = glob->transientPlan;
378
	result->planTree = top_plan;
379
	result->rtable = glob->finalrtable;
380
	result->resultRelations = glob->resultRelations;
381
	result->utilityStmt = parse->utilityStmt;
382
	result->subplans = glob->subplans;
383
	result->rewindPlanIDs = glob->rewindPlanIDs;
384
	result->rowMarks = glob->finalrowmarks;
385
	result->relationOids = glob->relationOids;
386
	result->invalItems = glob->invalItems;
387
	result->nParamExec = glob->nParamExec;
388
	result->hasRowSecurity = glob->hasRowSecurity;
389
	result->parallelModeNeeded = glob->parallelModeNeeded;
390
	result->hasForeignJoin = glob->hasForeignJoin;
391 392

	return result;
393
}
394

395

396
/*--------------------
397 398 399
 * subquery_planner
 *	  Invokes the planner on a subquery.  We recurse to here for each
 *	  sub-SELECT found in the query tree.
400
 *
401
 * glob is the global state for the current planner run.
402
 * parse is the querytree produced by the parser & rewriter.
403 404
 * parent_root is the immediate parent Query's info (NULL at the top level).
 * hasRecursion is true if this is a recursive WITH query.
405
 * tuple_fraction is the fraction of tuples we expect will be retrieved.
406
 * tuple_fraction is interpreted as explained for grouping_planner, below.
407
 *
408 409
 * If subroot isn't NULL, we pass back the query's final PlannerInfo struct;
 * among other things this tells the output sort ordering of the plan.
410
 *
411
 * Basically, this routine does the stuff that should only be done once
412 413 414 415 416
 * per Query object.  It then calls grouping_planner.  At one time,
 * grouping_planner could be invoked recursively on the same Query object;
 * that's not currently true, but we keep the separation between the two
 * routines anyway, in case we need it again someday.
 *
417 418
 * subquery_planner will be called recursively to handle sub-Query nodes
 * found within the query's expressions and rangetable.
419
 *
420 421
 * Returns a query plan.
 *--------------------
422
 */
423
Plan *
424
subquery_planner(PlannerGlobal *glob, Query *parse,
425 426
				 PlannerInfo *parent_root,
				 bool hasRecursion, double tuple_fraction,
427
				 PlannerInfo **subroot)
428
{
429
	PlannerInfo *root;
430
	Plan	   *plan;
431
	List	   *newWithCheckOptions;
432
	List	   *newHaving;
433
	bool		hasOuterJoins;
434
	ListCell   *l;
435

436 437 438
	/* Create a PlannerInfo data structure for this subquery */
	root = makeNode(PlannerInfo);
	root->parse = parse;
439
	root->glob = glob;
440 441
	root->query_level = parent_root ? parent_root->query_level + 1 : 1;
	root->parent_root = parent_root;
442
	root->plan_params = NIL;
443
	root->outer_params = NULL;
444
	root->planner_cxt = CurrentMemoryContext;
445
	root->init_plans = NIL;
446
	root->cte_plan_ids = NIL;
447
	root->multiexpr_params = NIL;
448
	root->eq_classes = NIL;
449
	root->append_rel_list = NIL;
450
	root->rowMarks = NIL;
451
	root->hasInheritedTarget = false;
452
	root->grouping_map = NULL;
453

454 455
	root->hasRecursion = hasRecursion;
	if (hasRecursion)
456
		root->wt_param_id = SS_assign_special_param(root);
457 458 459 460 461
	else
		root->wt_param_id = -1;
	root->non_recursive_plan = NULL;

	/*
462 463
	 * If there is a WITH list, process each WITH query and build an initplan
	 * SubPlan structure for it.
464 465 466 467
	 */
	if (parse->cteList)
		SS_process_ctes(root);

468
	/*
469 470 471 472
	 * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
	 * to transform them into joins.  Note that this step does not descend
	 * into subqueries; if we pull up any subqueries below, their SubLinks are
	 * processed just before pulling them up.
473 474
	 */
	if (parse->hasSubLinks)
475
		pull_up_sublinks(root);
476

477
	/*
478 479
	 * Scan the rangetable for set-returning functions, and inline them if
	 * possible (producing subqueries that might get pulled up next).
480
	 * Recursion issues here are handled in the same way as for SubLinks.
481 482 483
	 */
	inline_set_returning_functions(root);

484
	/*
485 486
	 * Check to see if any subqueries in the jointree can be merged into this
	 * query.
487
	 */
488
	pull_up_subqueries(root);
B
Bruce Momjian 已提交
489

490
	/*
491 492 493
	 * If this is a simple UNION ALL query, flatten it into an appendrel. We
	 * do this now because it requires applying pull_up_subqueries to the leaf
	 * queries of the UNION ALL, which weren't touched above because they
494 495 496 497 498
	 * weren't referenced by the jointree (they will be after we do this).
	 */
	if (parse->setOperations)
		flatten_simple_union_all(root);

499
	/*
B
Bruce Momjian 已提交
500 501
	 * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
	 * avoid the expense of doing flatten_join_alias_vars().  Also check for
502 503 504
	 * outer joins --- if none, we can skip reduce_outer_joins().  And check
	 * for LATERAL RTEs, too.  This must be done after we have done
	 * pull_up_subqueries(), of course.
505
	 */
506
	root->hasJoinRTEs = false;
507
	root->hasLateralRTEs = false;
508
	hasOuterJoins = false;
509
	foreach(l, parse->rtable)
510
	{
511
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
512 513 514

		if (rte->rtekind == RTE_JOIN)
		{
515
			root->hasJoinRTEs = true;
516
			if (IS_OUTER_JOIN(rte->jointype))
517
				hasOuterJoins = true;
518
		}
519 520
		if (rte->lateral)
			root->hasLateralRTEs = true;
521 522
	}

523
	/*
B
Bruce Momjian 已提交
524
	 * Preprocess RowMark information.  We need to do this after subquery
525 526 527
	 * pullup (so that all non-inherited RTEs are present) and before
	 * inheritance expansion (so that the info is available for
	 * expand_inherited_tables to examine and modify).
528
	 */
529
	preprocess_rowmarks(root);
530

531 532 533 534 535 536 537 538 539 540
	/*
	 * Expand any rangetable entries that are inheritance sets into "append
	 * relations".  This can add entries to the rangetable, but they must be
	 * plain base relations not joins, so it's OK (and marginally more
	 * efficient) to do it after checking for join RTEs.  We must do it after
	 * pulling up subqueries, else we'd fail to handle inherited tables in
	 * subqueries.
	 */
	expand_inherited_tables(root);

541 542
	/*
	 * Set hasHavingQual to remember if HAVING clause is present.  Needed
B
Bruce Momjian 已提交
543 544
	 * because preprocess_expression will reduce a constant-true condition to
	 * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
545
	 */
546
	root->hasHavingQual = (parse->havingQual != NULL);
547

548 549 550
	/* Clear this flag; might get set in distribute_qual_to_rels */
	root->hasPseudoConstantQuals = false;

551
	/*
552 553 554 555
	 * Do expression preprocessing on targetlist and quals, as well as other
	 * random expressions in the querytree.  Note that we do not need to
	 * handle sort/group expressions explicitly, because they are actually
	 * part of the targetlist.
556
	 */
557
	parse->targetList = (List *)
558
		preprocess_expression(root, (Node *) parse->targetList,
559 560
							  EXPRKIND_TARGET);

561 562 563 564 565 566 567 568 569 570 571 572
	newWithCheckOptions = NIL;
	foreach(l, parse->withCheckOptions)
	{
		WithCheckOption *wco = (WithCheckOption *) lfirst(l);

		wco->qual = preprocess_expression(root, wco->qual,
										  EXPRKIND_QUAL);
		if (wco->qual != NULL)
			newWithCheckOptions = lappend(newWithCheckOptions, wco);
	}
	parse->withCheckOptions = newWithCheckOptions;

573 574 575 576
	parse->returningList = (List *)
		preprocess_expression(root, (Node *) parse->returningList,
							  EXPRKIND_TARGET);

577
	preprocess_qual_conditions(root, (Node *) parse->jointree);
578

579
	parse->havingQual = preprocess_expression(root, parse->havingQual,
580 581
											  EXPRKIND_QUAL);

582 583 584 585 586 587 588 589 590 591 592
	foreach(l, parse->windowClause)
	{
		WindowClause *wc = (WindowClause *) lfirst(l);

		/* partitionClause/orderClause are sort/group expressions */
		wc->startOffset = preprocess_expression(root, wc->startOffset,
												EXPRKIND_LIMIT);
		wc->endOffset = preprocess_expression(root, wc->endOffset,
											  EXPRKIND_LIMIT);
	}

593
	parse->limitOffset = preprocess_expression(root, parse->limitOffset,
594
											   EXPRKIND_LIMIT);
595
	parse->limitCount = preprocess_expression(root, parse->limitCount,
596 597
											  EXPRKIND_LIMIT);

598 599 600 601 602 603 604 605 606 607 608
	if (parse->onConflict)
	{
		parse->onConflict->onConflictSet = (List *)
			preprocess_expression(root, (Node *) parse->onConflict->onConflictSet,
								  EXPRKIND_TARGET);

		parse->onConflict->onConflictWhere =
			preprocess_expression(root, (Node *) parse->onConflict->onConflictWhere,
								  EXPRKIND_QUAL);
	}

609 610
	root->append_rel_list = (List *)
		preprocess_expression(root, (Node *) root->append_rel_list,
611
							  EXPRKIND_APPINFO);
612

613
	/* Also need to preprocess expressions within RTEs */
614
	foreach(l, parse->rtable)
615
	{
616
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
617
		int			kind;
618

619 620 621
		if (rte->rtekind == RTE_RELATION)
		{
			if (rte->tablesample)
622 623 624
				rte->tablesample = (TableSampleClause *)
					preprocess_expression(root,
										  (Node *) rte->tablesample,
625 626 627
										  EXPRKIND_TABLESAMPLE);
		}
		else if (rte->rtekind == RTE_SUBQUERY)
628 629 630 631 632 633 634 635 636 637 638 639 640 641
		{
			/*
			 * We don't want to do all preprocessing yet on the subquery's
			 * expressions, since that will happen when we plan it.  But if it
			 * contains any join aliases of our level, those have to get
			 * expanded now, because planning of the subquery won't do it.
			 * That's only possible if the subquery is LATERAL.
			 */
			if (rte->lateral && root->hasJoinRTEs)
				rte->subquery = (Query *)
					flatten_join_alias_vars(root, (Node *) rte->subquery);
		}
		else if (rte->rtekind == RTE_FUNCTION)
		{
642
			/* Preprocess the function expression(s) fully */
643
			kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
644
			rte->functions = (List *) preprocess_expression(root, (Node *) rte->functions, kind);
645
		}
646
		else if (rte->rtekind == RTE_VALUES)
647 648 649
		{
			/* Preprocess the values lists fully */
			kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES;
650
			rte->values_lists = (List *)
651 652
				preprocess_expression(root, (Node *) rte->values_lists, kind);
		}
653 654
	}

655
	/*
B
Bruce Momjian 已提交
656 657 658
	 * In some cases we may want to transfer a HAVING clause into WHERE. We
	 * cannot do so if the HAVING clause contains aggregates (obviously) or
	 * volatile functions (since a HAVING clause is supposed to be executed
659 660 661 662 663 664 665 666 667 668 669 670 671
	 * only once per group).  We also can't do this if there are any nonempty
	 * grouping sets; moving such a clause into WHERE would potentially change
	 * the results, if any referenced column isn't present in all the grouping
	 * sets.  (If there are only empty grouping sets, then the HAVING clause
	 * must be degenerate as discussed below.)
	 *
	 * Also, it may be that the clause is so expensive to execute that we're
	 * better off doing it only once per group, despite the loss of
	 * selectivity.  This is hard to estimate short of doing the entire
	 * planning process twice, so we use a heuristic: clauses containing
	 * subplans are left in HAVING.  Otherwise, we move or copy the HAVING
	 * clause into WHERE, in hopes of eliminating tuples before aggregation
	 * instead of after.
672
	 *
673 674 675 676 677 678 679 680
	 * If the query has explicit grouping then we can simply move such a
	 * clause into WHERE; any group that fails the clause will not be in the
	 * output because none of its tuples will reach the grouping or
	 * aggregation stage.  Otherwise we must have a degenerate (variable-free)
	 * HAVING clause, which we put in WHERE so that query_planner() can use it
	 * in a gating Result node, but also keep in HAVING to ensure that we
	 * don't emit a bogus aggregated row. (This could be done better, but it
	 * seems not worth optimizing.)
681 682
	 *
	 * Note that both havingQual and parse->jointree->quals are in
B
Bruce Momjian 已提交
683 684
	 * implicitly-ANDed-list form at this point, even though they are declared
	 * as Node *.
685 686
	 */
	newHaving = NIL;
687
	foreach(l, (List *) parse->havingQual)
688
	{
689
		Node	   *havingclause = (Node *) lfirst(l);
690

691 692
		if ((parse->groupClause && parse->groupingSets) ||
			contain_agg_clause(havingclause) ||
693
			contain_volatile_functions(havingclause) ||
694
			contain_subplans(havingclause))
695 696
		{
			/* keep it in HAVING */
697
			newHaving = lappend(newHaving, havingclause);
698
		}
699
		else if (parse->groupClause && !parse->groupingSets)
700 701
		{
			/* move it to WHERE */
702 703
			parse->jointree->quals = (Node *)
				lappend((List *) parse->jointree->quals, havingclause);
704 705 706 707 708 709 710 711 712
		}
		else
		{
			/* put a copy in WHERE, keep it in HAVING */
			parse->jointree->quals = (Node *)
				lappend((List *) parse->jointree->quals,
						copyObject(havingclause));
			newHaving = lappend(newHaving, havingclause);
		}
713 714 715
	}
	parse->havingQual = (Node *) newHaving;

716 717 718
	/* Remove any redundant GROUP BY columns */
	remove_useless_groupby_columns(root);

719
	/*
B
Bruce Momjian 已提交
720 721
	 * If we have any outer joins, try to reduce them to plain inner joins.
	 * This step is most easily done after we've done expression
B
Bruce Momjian 已提交
722
	 * preprocessing.
723
	 */
724
	if (hasOuterJoins)
725
		reduce_outer_joins(root);
726

727
	/*
B
Bruce Momjian 已提交
728 729
	 * Do the main planning.  If we have an inherited target relation, that
	 * needs special processing, else go straight to grouping_planner.
730
	 */
731
	if (parse->resultRelation &&
732 733
		rt_fetch(parse->resultRelation, parse->rtable)->inh)
		plan = inheritance_planner(root);
734
	else
735
	{
736
		plan = grouping_planner(root, tuple_fraction);
737 738 739
		/* If it's not SELECT, we need a ModifyTable node */
		if (parse->commandType != CMD_SELECT)
		{
740
			List	   *withCheckOptionLists;
B
Bruce Momjian 已提交
741 742
			List	   *returningLists;
			List	   *rowMarks;
743

744
			/*
745 746
			 * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if
			 * needed.
747
			 */
748 749 750 751 752
			if (parse->withCheckOptions)
				withCheckOptionLists = list_make1(parse->withCheckOptions);
			else
				withCheckOptionLists = NIL;

753
			if (parse->returningList)
754
				returningLists = list_make1(parse->returningList);
755 756 757
			else
				returningLists = NIL;

758
			/*
B
Bruce Momjian 已提交
759 760 761
			 * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
			 * will have dealt with fetching non-locked marked rows, else we
			 * need to have ModifyTable do that.
762 763 764 765 766 767
			 */
			if (parse->rowMarks)
				rowMarks = NIL;
			else
				rowMarks = root->rowMarks;

T
Tom Lane 已提交
768 769
			plan = (Plan *) make_modifytable(root,
											 parse->commandType,
770
											 parse->canSetTag,
771
											 parse->resultRelation,
772
									   list_make1_int(parse->resultRelation),
773
											 list_make1(plan),
774
											 withCheckOptionLists,
775 776
											 returningLists,
											 rowMarks,
777
											 parse->onConflict,
778
											 SS_assign_special_param(root));
779 780
		}
	}
781 782

	/*
783 784 785 786 787 788 789 790 791
	 * Capture the set of outer-level param IDs we have access to, for use in
	 * extParam/allParam calculations later.
	 */
	SS_identify_outer_params(root);

	/*
	 * If any initPlans were created in this query level, attach them to the
	 * topmost plan node for the level, and increment that node's cost to
	 * account for them.
792
	 */
793
	SS_attach_initplans(root, plan);
B
Bruce Momjian 已提交
794

795 796 797
	/* Return internal info if caller wants it */
	if (subroot)
		*subroot = root;
798

799
	return plan;
800
}
801

802 803 804 805
/*
 * preprocess_expression
 *		Do subquery_planner's preprocessing work for an expression,
 *		which can be a targetlist, a WHERE clause (including JOIN/ON
806
 *		conditions), a HAVING clause, or a few other things.
807 808
 */
static Node *
809
preprocess_expression(PlannerInfo *root, Node *expr, int kind)
810
{
811
	/*
B
Bruce Momjian 已提交
812 813 814
	 * Fall out quickly if expression is empty.  This occurs often enough to
	 * be worth checking.  Note that null->null is the correct conversion for
	 * implicit-AND result format, too.
815 816 817 818
	 */
	if (expr == NULL)
		return NULL;

819 820
	/*
	 * If the query has any join RTEs, replace join alias variables with
821 822
	 * base-relation variables.  We must do this before sublink processing,
	 * else sublinks expanded out from join aliases would not get processed.
823 824 825
	 * We can skip it in non-lateral RTE functions, VALUES lists, and
	 * TABLESAMPLE clauses, however, since they can't contain any Vars of the
	 * current query level.
826
	 */
827
	if (root->hasJoinRTEs &&
828 829 830
		!(kind == EXPRKIND_RTFUNC ||
		  kind == EXPRKIND_VALUES ||
		  kind == EXPRKIND_TABLESAMPLE))
831
		expr = flatten_join_alias_vars(root, expr);
832

833
	/*
834
	 * Simplify constant expressions.
835
	 *
836
	 * Note: an essential effect of this is to convert named-argument function
B
Bruce Momjian 已提交
837 838 839 840 841
	 * calls to positional notation and insert the current actual values of
	 * any default arguments for functions.  To ensure that happens, we *must*
	 * process all expressions here.  Previous PG versions sometimes skipped
	 * const-simplification if it didn't seem worth the trouble, but we can't
	 * do that anymore.
842
	 *
843 844 845 846 847
	 * Note: this also flattens nested AND and OR expressions into N-argument
	 * form.  All processing of a qual expression after this point must be
	 * careful to maintain AND/OR flatness --- that is, do not generate a tree
	 * with AND directly under AND, nor OR directly under OR.
	 */
848
	expr = eval_const_expressions(root, expr);
849 850 851

	/*
	 * If it's a qual or havingQual, canonicalize it.
852
	 */
853
	if (kind == EXPRKIND_QUAL)
854
	{
855
		expr = (Node *) canonicalize_qual((Expr *) expr);
856 857 858 859 860 861

#ifdef OPTIMIZER_DEBUG
		printf("After canonicalize_qual()\n");
		pprint(expr);
#endif
	}
862

863
	/* Expand SubLinks to SubPlans */
864
	if (root->parse->hasSubLinks)
865
		expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
866

867
	/*
B
Bruce Momjian 已提交
868 869
	 * XXX do not insert anything here unless you have grokked the comments in
	 * SS_replace_correlation_vars ...
870 871
	 */

872
	/* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
873 874
	if (root->query_level > 1)
		expr = SS_replace_correlation_vars(root, expr);
875

876
	/*
B
Bruce Momjian 已提交
877 878 879
	 * If it's a qual or havingQual, convert it to implicit-AND format. (We
	 * don't want to do this before eval_const_expressions, since the latter
	 * would be unable to simplify a top-level AND correctly. Also,
880
	 * SS_process_sublinks expects explicit-AND format.)
881 882 883 884
	 */
	if (kind == EXPRKIND_QUAL)
		expr = (Node *) make_ands_implicit((Expr *) expr);

885 886 887 888 889 890 891 892 893
	return expr;
}

/*
 * preprocess_qual_conditions
 *		Recursively scan the query's jointree and do subquery_planner's
 *		preprocessing work on each qual condition found therein.
 */
static void
894
preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
895 896 897 898 899 900 901 902 903 904
{
	if (jtnode == NULL)
		return;
	if (IsA(jtnode, RangeTblRef))
	{
		/* nothing to do here */
	}
	else if (IsA(jtnode, FromExpr))
	{
		FromExpr   *f = (FromExpr *) jtnode;
905
		ListCell   *l;
906

907
		foreach(l, f->fromlist)
908
			preprocess_qual_conditions(root, lfirst(l));
909

910
		f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
911 912 913 914 915
	}
	else if (IsA(jtnode, JoinExpr))
	{
		JoinExpr   *j = (JoinExpr *) jtnode;

916 917
		preprocess_qual_conditions(root, j->larg);
		preprocess_qual_conditions(root, j->rarg);
918

919
		j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
920 921
	}
	else
922 923
		elog(ERROR, "unrecognized node type: %d",
			 (int) nodeTag(jtnode));
924
}
925

926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942
/*
 * preprocess_phv_expression
 *	  Do preprocessing on a PlaceHolderVar expression that's been pulled up.
 *
 * If a LATERAL subquery references an output of another subquery, and that
 * output must be wrapped in a PlaceHolderVar because of an intermediate outer
 * join, then we'll push the PlaceHolderVar expression down into the subquery
 * and later pull it back up during find_lateral_references, which runs after
 * subquery_planner has preprocessed all the expressions that were in the
 * current query level to start with.  So we need to preprocess it then.
 */
Expr *
preprocess_phv_expression(PlannerInfo *root, Expr *expr)
{
	return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
}

943
/*
944 945 946 947
 * inheritance_planner
 *	  Generate a plan in the case where the result relation is an
 *	  inheritance set.
 *
948 949 950 951
 * We have to handle this case differently from cases where a source relation
 * is an inheritance set. Source inheritance is expanded at the bottom of the
 * plan tree (see allpaths.c), but target inheritance has to be expanded at
 * the top.  The reason is that for UPDATE, each target relation needs a
952
 * different targetlist matching its own column set.  Fortunately,
953 954
 * the UPDATE/DELETE target can never be the nullable side of an outer join,
 * so it's OK to generate the plan this way.
955 956 957 958
 *
 * Returns a query plan.
 */
static Plan *
959
inheritance_planner(PlannerInfo *root)
960
{
961
	Query	   *parse = root->parse;
962
	int			parentRTindex = parse->resultRelation;
963 964 965
	Bitmapset  *resultRTindexes;
	Bitmapset  *subqueryRTindexes;
	Bitmapset  *modifiableARIindexes;
966
	int			nominalRelation = -1;
967 968 969
	List	   *final_rtable = NIL;
	int			save_rel_array_size = 0;
	RelOptInfo **save_rel_array = NULL;
970
	List	   *subplans = NIL;
971
	List	   *resultRelations = NIL;
972
	List	   *withCheckOptionLists = NIL;
973
	List	   *returningLists = NIL;
974
	List	   *rowMarks;
975
	ListCell   *lc;
976
	Index		rti;
977

978 979
	Assert(parse->commandType != CMD_INSERT);

980 981 982 983 984 985 986 987 988 989 990 991 992 993
	/*
	 * We generate a modified instance of the original Query for each target
	 * relation, plan that, and put all the plans into a list that will be
	 * controlled by a single ModifyTable node.  All the instances share the
	 * same rangetable, but each instance must have its own set of subquery
	 * RTEs within the finished rangetable because (1) they are likely to get
	 * scribbled on during planning, and (2) it's not inconceivable that
	 * subqueries could get planned differently in different cases.  We need
	 * not create duplicate copies of other RTE kinds, in particular not the
	 * target relations, because they don't have either of those issues.  Not
	 * having to duplicate the target relations is important because doing so
	 * (1) would result in a rangetable of length O(N^2) for N targets, with
	 * at least O(N^3) work expended here; and (2) would greatly complicate
	 * management of the rowMarks list.
994 995 996
	 *
	 * Note that any RTEs with security barrier quals will be turned into
	 * subqueries during planning, and so we must create copies of them too,
B
Bruce Momjian 已提交
997 998
	 * except where they are target relations, which will each only be used in
	 * a single plan.
999 1000
	 *
	 * To begin with, we'll need a bitmapset of the target relation relids.
1001
	 */
1002
	resultRTindexes = bms_make_singleton(parentRTindex);
1003 1004 1005
	foreach(lc, root->append_rel_list)
	{
		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
B
Bruce Momjian 已提交
1006

1007 1008 1009 1010 1011
		if (appinfo->parent_relid == parentRTindex)
			resultRTindexes = bms_add_member(resultRTindexes,
											 appinfo->child_relid);
	}

1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
	/*
	 * Now, generate a bitmapset of the relids of the subquery RTEs, including
	 * security-barrier RTEs that will become subqueries, as just explained.
	 */
	subqueryRTindexes = NULL;
	rti = 1;
	foreach(lc, parse->rtable)
	{
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);

		if (rte->rtekind == RTE_SUBQUERY ||
			(rte->securityQuals != NIL &&
			 !bms_is_member(rti, resultRTindexes)))
			subqueryRTindexes = bms_add_member(subqueryRTindexes, rti);
		rti++;
	}

	/*
	 * Next, we want to identify which AppendRelInfo items contain references
	 * to any of the aforesaid subquery RTEs.  These items will need to be
	 * copied and modified to adjust their subquery references; whereas the
	 * other ones need not be touched.  It's worth being tense over this
	 * because we can usually avoid processing most of the AppendRelInfo
	 * items, thereby saving O(N^2) space and time when the target is a large
	 * inheritance tree.  We can identify AppendRelInfo items by their
	 * child_relid, since that should be unique within the list.
	 */
	modifiableARIindexes = NULL;
	if (subqueryRTindexes != NULL)
	{
		foreach(lc, root->append_rel_list)
		{
			AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);

			if (bms_is_member(appinfo->parent_relid, subqueryRTindexes) ||
				bms_is_member(appinfo->child_relid, subqueryRTindexes) ||
				bms_overlap(pull_varnos((Node *) appinfo->translated_vars),
							subqueryRTindexes))
				modifiableARIindexes = bms_add_member(modifiableARIindexes,
													  appinfo->child_relid);
		}
	}

	/*
	 * And now we can get on with generating a plan for each child table.
	 */
1058
	foreach(lc, root->append_rel_list)
1059
	{
1060 1061
		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
		PlannerInfo subroot;
B
Bruce Momjian 已提交
1062
		Plan	   *subplan;
1063

1064 1065 1066 1067
		/* append_rel_list contains all append rels; ignore others */
		if (appinfo->parent_relid != parentRTindex)
			continue;

1068
		/*
1069 1070
		 * We need a working copy of the PlannerInfo so that we can control
		 * propagation of information back to the main copy.
1071 1072
		 */
		memcpy(&subroot, root, sizeof(PlannerInfo));
1073 1074 1075 1076 1077 1078 1079

		/*
		 * Generate modified query with this rel as target.  We first apply
		 * adjust_appendrel_attrs, which copies the Query and changes
		 * references to the parent RTE to refer to the current child RTE,
		 * then fool around with subquery RTEs.
		 */
1080
		subroot.parse = (Query *)
1081 1082
			adjust_appendrel_attrs(root,
								   (Node *) parse,
1083
								   appinfo);
1084 1085 1086

		/*
		 * The rowMarks list might contain references to subquery RTEs, so
1087 1088 1089
		 * make a copy that we can apply ChangeVarNodes to.  (Fortunately, the
		 * executor doesn't need to see the modified copies --- we can just
		 * pass it the original rowMarks list.)
1090 1091 1092
		 */
		subroot.rowMarks = (List *) copyObject(root->rowMarks);

1093 1094 1095
		/*
		 * The append_rel_list likewise might contain references to subquery
		 * RTEs (if any subqueries were flattenable UNION ALLs).  So prepare
1096 1097 1098 1099 1100 1101
		 * to apply ChangeVarNodes to that, too.  As explained above, we only
		 * want to copy items that actually contain such references; the rest
		 * can just get linked into the subroot's append_rel_list.
		 *
		 * If we know there are no such references, we can just use the outer
		 * append_rel_list unmodified.
1102
		 */
1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118
		if (modifiableARIindexes != NULL)
		{
			ListCell   *lc2;

			subroot.append_rel_list = NIL;
			foreach(lc2, root->append_rel_list)
			{
				AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);

				if (bms_is_member(appinfo2->child_relid, modifiableARIindexes))
					appinfo2 = (AppendRelInfo *) copyObject(appinfo2);

				subroot.append_rel_list = lappend(subroot.append_rel_list,
												  appinfo2);
			}
		}
1119

1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131
		/*
		 * Add placeholders to the child Query's rangetable list to fill the
		 * RT indexes already reserved for subqueries in previous children.
		 * These won't be referenced, so there's no need to make them very
		 * valid-looking.
		 */
		while (list_length(subroot.parse->rtable) < list_length(final_rtable))
			subroot.parse->rtable = lappend(subroot.parse->rtable,
											makeNode(RangeTblEntry));

		/*
		 * If this isn't the first child Query, generate duplicates of all
1132 1133 1134 1135 1136
		 * subquery (or subquery-to-be) RTEs, and adjust Var numbering to
		 * reference the duplicates.  To simplify the loop logic, we scan the
		 * original rtable not the copy just made by adjust_appendrel_attrs;
		 * that should be OK since subquery RTEs couldn't contain any
		 * references to the target rel.
1137
		 */
1138
		if (final_rtable != NIL && subqueryRTindexes != NULL)
1139 1140 1141 1142 1143 1144 1145 1146
		{
			ListCell   *lr;

			rti = 1;
			foreach(lr, parse->rtable)
			{
				RangeTblEntry *rte = (RangeTblEntry *) lfirst(lr);

1147
				if (bms_is_member(rti, subqueryRTindexes))
1148
				{
1149
					Index		newrti;
1150 1151 1152

					/*
					 * The RTE can't contain any references to its own RT
1153 1154 1155
					 * index, except in the security barrier quals, so we can
					 * save a few cycles by applying ChangeVarNodes before we
					 * append the RTE to the rangetable.
1156 1157 1158 1159
					 */
					newrti = list_length(subroot.parse->rtable) + 1;
					ChangeVarNodes((Node *) subroot.parse, rti, newrti, 0);
					ChangeVarNodes((Node *) subroot.rowMarks, rti, newrti, 0);
1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173
					/* Skip processing unchanging parts of append_rel_list */
					if (modifiableARIindexes != NULL)
					{
						ListCell   *lc2;

						foreach(lc2, subroot.append_rel_list)
						{
							AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);

							if (bms_is_member(appinfo2->child_relid,
											  modifiableARIindexes))
								ChangeVarNodes((Node *) appinfo2, rti, newrti, 0);
						}
					}
1174
					rte = copyObject(rte);
1175
					ChangeVarNodes((Node *) rte->securityQuals, rti, newrti, 0);
1176 1177 1178 1179 1180 1181 1182
					subroot.parse->rtable = lappend(subroot.parse->rtable,
													rte);
				}
				rti++;
			}
		}

1183
		/* There shouldn't be any OJ info to translate, as yet */
1184
		Assert(subroot.join_info_list == NIL);
1185 1186
		/* and we haven't created PlaceHolderInfos, either */
		Assert(subroot.placeholder_list == NIL);
1187 1188
		/* hack to mark target relation as an inheritance partition */
		subroot.hasInheritedTarget = true;
1189

1190
		/* Generate plan */
1191 1192
		subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ );

1193
		/*
B
Bruce Momjian 已提交
1194 1195
		 * Planning may have modified the query result relation (if there were
		 * security barrier quals on the result RTE).
1196 1197 1198
		 */
		appinfo->child_relid = subroot.parse->resultRelation;

1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
		/*
		 * We'll use the first child relation (even if it's excluded) as the
		 * nominal target relation of the ModifyTable node.  Because of the
		 * way expand_inherited_rtentry works, this should always be the RTE
		 * representing the parent table in its role as a simple member of the
		 * inheritance set.  (It would be logically cleaner to use the
		 * inheritance parent RTE as the nominal target; but since that RTE
		 * will not be otherwise referenced in the plan, doing so would give
		 * rise to confusing use of multiple aliases in EXPLAIN output for
		 * what the user will think is the "same" table.)
		 */
		if (nominalRelation < 0)
			nominalRelation = appinfo->child_relid;

1213
		/*
B
Bruce Momjian 已提交
1214
		 * If this child rel was excluded by constraint exclusion, exclude it
1215
		 * from the result plan.
1216 1217 1218
		 */
		if (is_dummy_plan(subplan))
			continue;
B
Bruce Momjian 已提交
1219

1220 1221
		subplans = lappend(subplans, subplan);

1222 1223 1224 1225 1226 1227 1228 1229
		/*
		 * If this is the first non-excluded child, its post-planning rtable
		 * becomes the initial contents of final_rtable; otherwise, append
		 * just its modified subquery RTEs to final_rtable.
		 */
		if (final_rtable == NIL)
			final_rtable = subroot.parse->rtable;
		else
1230 1231
		{
			List	   *tmp_rtable = NIL;
B
Bruce Momjian 已提交
1232 1233
			ListCell   *cell1,
					   *cell2;
1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261

			/*
			 * Check to see if any of the original RTEs were turned into
			 * subqueries during planning.  Currently, this should only ever
			 * happen due to securityQuals being involved which push a
			 * relation down under a subquery, to ensure that the security
			 * barrier quals are evaluated first.
			 *
			 * When this happens, we want to use the new subqueries in the
			 * final rtable.
			 */
			forboth(cell1, final_rtable, cell2, subroot.parse->rtable)
			{
				RangeTblEntry *rte1 = (RangeTblEntry *) lfirst(cell1);
				RangeTblEntry *rte2 = (RangeTblEntry *) lfirst(cell2);

				if (rte1->rtekind == RTE_RELATION &&
					rte2->rtekind == RTE_SUBQUERY)
				{
					/* Should only be when there are securityQuals today */
					Assert(rte1->securityQuals != NIL);
					tmp_rtable = lappend(tmp_rtable, rte2);
				}
				else
					tmp_rtable = lappend(tmp_rtable, rte1);
			}

			final_rtable = list_concat(tmp_rtable,
1262
									   list_copy_tail(subroot.parse->rtable,
1263
												 list_length(final_rtable)));
1264
		}
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283

		/*
		 * We need to collect all the RelOptInfos from all child plans into
		 * the main PlannerInfo, since setrefs.c will need them.  We use the
		 * last child's simple_rel_array (previous ones are too short), so we
		 * have to propagate forward the RelOptInfos that were already built
		 * in previous children.
		 */
		Assert(subroot.simple_rel_array_size >= save_rel_array_size);
		for (rti = 1; rti < save_rel_array_size; rti++)
		{
			RelOptInfo *brel = save_rel_array[rti];

			if (brel)
				subroot.simple_rel_array[rti] = brel;
		}
		save_rel_array_size = subroot.simple_rel_array_size;
		save_rel_array = subroot.simple_rel_array;

1284
		/* Make sure any initplans from this rel get into the outer list */
1285
		root->init_plans = subroot.init_plans;
1286

1287
		/* Build list of target-relation RT indexes */
1288 1289
		resultRelations = lappend_int(resultRelations, appinfo->child_relid);

1290 1291 1292 1293
		/* Build lists of per-relation WCO and RETURNING targetlists */
		if (parse->withCheckOptions)
			withCheckOptionLists = lappend(withCheckOptionLists,
										   subroot.parse->withCheckOptions);
1294
		if (parse->returningList)
1295 1296
			returningLists = lappend(returningLists,
									 subroot.parse->returningList);
1297 1298

		Assert(!parse->onConflict);
1299 1300
	}

1301 1302 1303 1304
	/* Mark result as unordered (probably unnecessary) */
	root->query_pathkeys = NIL;

	/*
B
Bruce Momjian 已提交
1305 1306
	 * If we managed to exclude every child rel, return a dummy plan; it
	 * doesn't even need a ModifyTable node.
1307 1308
	 */
	if (subplans == NIL)
1309 1310
	{
		/* although dummy, it must have a valid tlist for executor */
1311 1312
		List	   *tlist;

1313
		tlist = preprocess_targetlist(root, parse->targetList);
1314 1315
		return (Plan *) make_result(root,
									tlist,
1316 1317 1318
									(Node *) list_make1(makeBoolConst(false,
																	  false)),
									NULL);
1319
	}
1320

1321
	/*
1322
	 * Put back the final adjusted rtable into the master copy of the Query.
1323
	 */
1324 1325 1326
	parse->rtable = final_rtable;
	root->simple_rel_array_size = save_rel_array_size;
	root->simple_rel_array = save_rel_array;
1327

1328
	/*
B
Bruce Momjian 已提交
1329 1330
	 * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will
	 * have dealt with fetching non-locked marked rows, else we need to have
B
Bruce Momjian 已提交
1331
	 * ModifyTable do that.
1332 1333 1334 1335 1336 1337
	 */
	if (parse->rowMarks)
		rowMarks = NIL;
	else
		rowMarks = root->rowMarks;

1338
	/* And last, tack on a ModifyTable node to do the UPDATE/DELETE work */
T
Tom Lane 已提交
1339 1340
	return (Plan *) make_modifytable(root,
									 parse->commandType,
1341
									 parse->canSetTag,
1342
									 nominalRelation,
1343
									 resultRelations,
B
Bruce Momjian 已提交
1344
									 subplans,
1345
									 withCheckOptionLists,
1346 1347
									 returningLists,
									 rowMarks,
1348
									 NULL,
1349
									 SS_assign_special_param(root));
1350 1351 1352 1353 1354 1355 1356
}

/*--------------------
 * grouping_planner
 *	  Perform planning steps related to grouping, aggregation, etc.
 *	  This primarily means adding top-level processing to the basic
 *	  query plan produced by query_planner.
1357 1358 1359 1360
 *
 * tuple_fraction is the fraction of tuples we expect will be retrieved
 *
 * tuple_fraction is interpreted as follows:
1361
 *	  0: expect all tuples to be retrieved (normal case)
1362 1363 1364 1365 1366
 *	  0 < tuple_fraction < 1: expect the given fraction of tuples available
 *		from the plan to be retrieved
 *	  tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
 *		expected to be retrieved (ie, a LIMIT specification)
 *
1367
 * Returns a query plan.  Also, root->query_pathkeys is returned as the
1368
 * actual output ordering of the plan (in pathkey format).
1369 1370
 *--------------------
 */
1371
static Plan *
1372
grouping_planner(PlannerInfo *root, double tuple_fraction)
1373
{
1374
	Query	   *parse = root->parse;
1375
	List	   *tlist = parse->targetList;
B
Bruce Momjian 已提交
1376 1377
	int64		offset_est = 0;
	int64		count_est = 0;
1378
	double		limit_tuples = -1.0;
1379 1380
	Plan	   *result_plan;
	List	   *current_pathkeys;
1381
	double		dNumGroups = 0;
1382 1383
	bool		use_hashed_distinct = false;
	bool		tested_hashed_distinct = false;
1384

1385 1386
	/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
	if (parse->limitCount || parse->limitOffset)
1387
	{
1388 1389
		tuple_fraction = preprocess_limit(root, tuple_fraction,
										  &offset_est, &count_est);
B
Bruce Momjian 已提交
1390

1391
		/*
B
Bruce Momjian 已提交
1392 1393
		 * If we have a known LIMIT, and don't have an unknown OFFSET, we can
		 * estimate the effects of using a bounded sort.
1394 1395 1396 1397
		 */
		if (count_est > 0 && offset_est >= 0)
			limit_tuples = (double) count_est + (double) offset_est;
	}
1398

1399
	if (parse->setOperations)
B
Bruce Momjian 已提交
1400
	{
B
Bruce Momjian 已提交
1401
		List	   *set_sortclauses;
1402

1403
		/*
B
Bruce Momjian 已提交
1404
		 * If there's a top-level ORDER BY, assume we have to fetch all the
B
Bruce Momjian 已提交
1405
		 * tuples.  This might be too simplistic given all the hackery below
1406 1407
		 * to possibly avoid the sort; but the odds of accurate estimates here
		 * are pretty low anyway.
1408 1409 1410 1411
		 */
		if (parse->sortClause)
			tuple_fraction = 0.0;

1412
		/*
B
Bruce Momjian 已提交
1413
		 * Construct the plan for set operations.  The result will not need
1414 1415 1416
		 * any work except perhaps a top-level sort and/or LIMIT.  Note that
		 * any special work for recursive unions is the responsibility of
		 * plan_set_operations.
1417
		 */
1418
		result_plan = plan_set_operations(root, tuple_fraction,
1419 1420 1421
										  &set_sortclauses);

		/*
B
Bruce Momjian 已提交
1422 1423 1424
		 * Calculate pathkeys representing the sort order (if any) of the set
		 * operation's result.  We have to do this before overwriting the sort
		 * key information...
1425
		 */
1426 1427
		current_pathkeys = make_pathkeys_for_sortclauses(root,
														 set_sortclauses,
B
Bruce Momjian 已提交
1428
													result_plan->targetlist);
1429 1430

		/*
B
Bruce Momjian 已提交
1431
		 * We should not need to call preprocess_targetlist, since we must be
B
Bruce Momjian 已提交
1432
		 * in a SELECT query node.  Instead, use the targetlist returned by
B
Bruce Momjian 已提交
1433 1434 1435
		 * plan_set_operations (since this tells whether it returned any
		 * resjunk columns!), and transfer any sort key information from the
		 * original tlist.
1436 1437
		 */
		Assert(parse->commandType == CMD_SELECT);
1438

1439 1440
		tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
										tlist);
1441

1442
		/*
B
Bruce Momjian 已提交
1443 1444
		 * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
		 * checked already, but let's make sure).
1445 1446
		 */
		if (parse->rowMarks)
1447 1448
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
B
Bruce Momjian 已提交
1449 1450
			/*------
			  translator: %s is a SQL row locking clause such as FOR UPDATE */
1451 1452
					 errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
							LCS_asString(((RowMarkClause *)
B
Bruce Momjian 已提交
1453
									linitial(parse->rowMarks))->strength))));
1454

1455
		/*
1456
		 * Calculate pathkeys that represent result ordering requirements
1457
		 */
1458
		Assert(parse->distinctClause == NIL);
1459 1460
		root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
															parse->sortClause,
1461
															tlist);
B
Bruce Momjian 已提交
1462
	}
1463
	else
1464
	{
1465
		/* No set operations, do regular planning */
1466
		long		numGroups = 0;
1467
		AggClauseCosts agg_costs;
1468
		int			numGroupCols;
1469 1470
		double		path_rows;
		int			path_width;
1471
		bool		use_hashed_grouping = false;
T
Tom Lane 已提交
1472 1473
		WindowFuncLists *wflists = NULL;
		List	   *activeWindows = NIL;
1474
		OnConflictExpr *onconfl;
1475 1476 1477 1478 1479 1480 1481 1482
		int			maxref = 0;
		List	   *rollup_lists = NIL;
		List	   *rollup_groupclauses = NIL;
		standard_qp_extra qp_extra;
		RelOptInfo *final_rel;
		Path	   *cheapest_path;
		Path	   *sorted_path;
		Path	   *best_path;
1483

1484
		MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
1485

1486 1487 1488
		/* A recursive query should always have setOperations */
		Assert(!root->hasRecursion);

1489
		/* Preprocess grouping sets, if any */
1490 1491
		if (parse->groupingSets)
		{
1492 1493
			int		   *tleref_to_colnum_map;
			List	   *sets;
1494
			ListCell   *lc;
1495 1496
			ListCell   *lc2;
			ListCell   *lc_set;
1497

1498 1499 1500 1501
			parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);

			/* Identify max SortGroupRef in groupClause, for array sizing */
			/* (note this value will be used again later) */
1502 1503 1504
			foreach(lc, parse->groupClause)
			{
				SortGroupClause *gc = lfirst(lc);
B
Bruce Momjian 已提交
1505

1506 1507 1508 1509
				if (gc->tleSortGroupRef > maxref)
					maxref = gc->tleSortGroupRef;
			}

1510 1511
			/* Allocate workspace array for remapping */
			tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
1512

1513 1514
			/* Examine the rollup sets */
			sets = extract_rollup_sets(parse->groupingSets);
1515 1516 1517

			foreach(lc_set, sets)
			{
1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540
				List	   *current_sets = (List *) lfirst(lc_set);
				List	   *groupclause;
				int			ref;

				/*
				 * Reorder the current list of grouping sets into correct
				 * prefix order.  If only one aggregation pass is needed, try
				 * to make the list match the ORDER BY clause; if more than
				 * one pass is needed, we don't bother with that.
				 */
				current_sets = reorder_grouping_sets(current_sets,
													 (list_length(sets) == 1
													  ? parse->sortClause
													  : NIL));

				/*
				 * Order the groupClause appropriately.  If the first grouping
				 * set is empty, this can match regular GROUP BY
				 * preprocessing, otherwise we have to force the groupClause
				 * to match that grouping set's order.
				 */
				groupclause = preprocess_groupclause(root,
													 linitial(current_sets));
1541 1542 1543 1544 1545 1546 1547 1548

				/*
				 * Now that we've pinned down an order for the groupClause for
				 * this list of grouping sets, we need to remap the entries in
				 * the grouping sets from sortgrouprefs to plain indices
				 * (0-based) into the groupClause for this collection of
				 * grouping sets.
				 */
1549
				ref = 0;
1550 1551 1552
				foreach(lc, groupclause)
				{
					SortGroupClause *gc = lfirst(lc);
B
Bruce Momjian 已提交
1553

1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564
					tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
				}

				foreach(lc, current_sets)
				{
					foreach(lc2, (List *) lfirst(lc))
					{
						lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
					}
				}

1565
				/* Save the reordered sets and corresponding groupclauses */
1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577
				rollup_lists = lcons(current_sets, rollup_lists);
				rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
			}
		}
		else
		{
			/* Preprocess GROUP BY clause, if any */
			if (parse->groupClause)
				parse->groupClause = preprocess_groupclause(root, NIL);
			rollup_groupclauses = list_make1(parse->groupClause);
		}

1578 1579
		numGroupCols = list_length(parse->groupClause);

1580
		/* Preprocess targetlist */
1581
		tlist = preprocess_targetlist(root, tlist);
B
Bruce Momjian 已提交
1582

1583 1584 1585 1586 1587 1588 1589
		onconfl = parse->onConflict;
		if (onconfl)
			onconfl->onConflictSet =
				preprocess_onconflict_targetlist(onconfl->onConflictSet,
												 parse->resultRelation,
												 parse->rtable);

1590 1591 1592 1593 1594
		/*
		 * Expand any rangetable entries that have security barrier quals.
		 * This may add new security barrier subquery RTEs to the rangetable.
		 */
		expand_security_quals(root, tlist);
1595 1596
		if (parse->hasRowSecurity)
			root->glob->hasRowSecurity = true;
1597

T
Tom Lane 已提交
1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613
		/*
		 * Locate any window functions in the tlist.  (We don't need to look
		 * anywhere else, since expressions used in ORDER BY will be in there
		 * too.)  Note that they could all have been eliminated by constant
		 * folding, in which case we don't need to do any more work.
		 */
		if (parse->hasWindowFuncs)
		{
			wflists = find_window_functions((Node *) tlist,
											list_length(parse->windowClause));
			if (wflists->numWindowFuncs > 0)
				activeWindows = select_active_windows(root, wflists);
			else
				parse->hasWindowFuncs = false;
		}

1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624
		/*
		 * Do aggregate preprocessing, if the query has any aggs.
		 *
		 * Note: think not that we can turn off hasAggs if we find no aggs. It
		 * is possible for constant-expression simplification to remove all
		 * explicit references to aggs, but we still have to follow the
		 * aggregate semantics (eg, producing only one output row).
		 */
		if (parse->hasAggs)
		{
			/*
B
Bruce Momjian 已提交
1625 1626
			 * Collect statistics about aggregates for estimating costs. Note:
			 * we do not attempt to detect duplicate aggregates here; a
1627
			 * somewhat-overestimated cost is okay for our present purposes.
1628
			 */
1629 1630
			count_agg_clauses(root, (Node *) tlist, &agg_costs);
			count_agg_clauses(root, parse->havingQual, &agg_costs);
1631 1632

			/*
1633 1634 1635 1636
			 * Preprocess MIN/MAX aggregates, if any.  Note: be careful about
			 * adding logic between here and the optimize_minmax_aggregates
			 * call.  Anything that is needed in MIN/MAX-optimizable cases
			 * will have to be duplicated in planagg.c.
1637 1638 1639 1640
			 */
			preprocess_minmax_aggregates(root, tlist);
		}

1641 1642 1643
		/* Make tuple_fraction accessible to lower-level routines */
		root->tuple_fraction = tuple_fraction;

1644 1645 1646 1647 1648 1649 1650
		/*
		 * Figure out whether there's a hard limit on the number of rows that
		 * query_planner's result subplan needs to return.  Even if we know a
		 * hard limit overall, it doesn't apply if the query has any
		 * grouping/aggregation operations.
		 */
		if (parse->groupClause ||
1651
			parse->groupingSets ||
1652 1653 1654 1655
			parse->distinctClause ||
			parse->hasAggs ||
			parse->hasWindowFuncs ||
			root->hasHavingQual)
1656
			root->limit_tuples = -1.0;
1657
		else
1658
			root->limit_tuples = limit_tuples;
1659

1660 1661 1662
		/* Set up data needed by standard_qp_callback */
		qp_extra.tlist = tlist;
		qp_extra.activeWindows = activeWindows;
1663
		qp_extra.groupClause = llast(rollup_groupclauses);
1664

1665
		/*
B
Bruce Momjian 已提交
1666
		 * Generate the best unsorted and presorted paths for this Query (but
B
Bruce Momjian 已提交
1667
		 * note there may not be any presorted paths).  We also generate (in
1668
		 * standard_qp_callback) pathkey representations of the query's sort
1669
		 * clause, distinct clause, etc.
1670
		 */
1671
		final_rel = query_planner(root, tlist,
1672
								  standard_qp_callback, &qp_extra);
1673

1674
		/*
1675 1676 1677
		 * Extract rowcount and width estimates for use below.  If final_rel
		 * has been proven dummy, its rows estimate will be zero; clamp it to
		 * one to avoid zero-divide in subsequent calculations.
1678
		 */
1679
		path_rows = clamp_row_est(final_rel->rows);
1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697
		path_width = final_rel->width;

		/*
		 * If there's grouping going on, estimate the number of result groups.
		 * We couldn't do this any earlier because it depends on relation size
		 * estimates that are created within query_planner().
		 *
		 * Then convert tuple_fraction to fractional form if it is absolute,
		 * and if grouping or aggregation is involved, adjust tuple_fraction
		 * to describe the fraction of the underlying un-aggregated tuples
		 * that will be fetched.
		 */
		dNumGroups = 1;			/* in case not grouping */

		if (parse->groupClause)
		{
			List	   *groupExprs;

1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713
			if (parse->groupingSets)
			{
				ListCell   *lc,
						   *lc2;

				dNumGroups = 0;

				forboth(lc, rollup_groupclauses, lc2, rollup_lists)
				{
					ListCell   *lc3;

					groupExprs = get_sortgrouplist_exprs(lfirst(lc),
														 parse->targetList);

					foreach(lc3, lfirst(lc2))
					{
B
Bruce Momjian 已提交
1714
						List	   *gset = lfirst(lc3);
1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730

						dNumGroups += estimate_num_groups(root,
														  groupExprs,
														  path_rows,
														  &gset);
					}
				}
			}
			else
			{
				groupExprs = get_sortgrouplist_exprs(parse->groupClause,
													 parse->targetList);

				dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
												 NULL);
			}
1731 1732 1733

			/*
			 * In GROUP BY mode, an absolute LIMIT is relative to the number
B
Bruce Momjian 已提交
1734
			 * of groups not the number of tuples.  If the caller gave us a
1735 1736 1737 1738 1739 1740
			 * fraction, keep it as-is.  (In both cases, we are effectively
			 * assuming that all the groups are about the same size.)
			 */
			if (tuple_fraction >= 1.0)
				tuple_fraction /= dNumGroups;

1741 1742 1743 1744 1745 1746 1747
			/*
			 * If there's more than one grouping set, we'll have to sort the
			 * entire input.
			 */
			if (list_length(rollup_lists) > 1)
				tuple_fraction = 0.0;

1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762
			/*
			 * If both GROUP BY and ORDER BY are specified, we will need two
			 * levels of sort --- and, therefore, certainly need to read all
			 * the tuples --- unless ORDER BY is a subset of GROUP BY.
			 * Likewise if we have both DISTINCT and GROUP BY, or if we have a
			 * window specification not compatible with the GROUP BY.
			 */
			if (!pathkeys_contained_in(root->sort_pathkeys,
									   root->group_pathkeys) ||
				!pathkeys_contained_in(root->distinct_pathkeys,
									   root->group_pathkeys) ||
				!pathkeys_contained_in(root->window_pathkeys,
									   root->group_pathkeys))
				tuple_fraction = 0.0;
		}
1763
		else if (parse->hasAggs || root->hasHavingQual || parse->groupingSets)
1764
		{
1765 1766
			/*
			 * Ungrouped aggregate will certainly want to read all the tuples,
1767 1768 1769
			 * and it will deliver a single result row per grouping set (or 1
			 * if no grouping sets were explicitly given, in which case leave
			 * dNumGroups as-is)
1770 1771
			 */
			tuple_fraction = 0.0;
1772 1773
			if (parse->groupingSets)
				dNumGroups = list_length(parse->groupingSets);
1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787
		}
		else if (parse->distinctClause)
		{
			/*
			 * Since there was no grouping or aggregation, it's reasonable to
			 * assume the UNIQUE filter has effects comparable to GROUP BY.
			 * (If DISTINCT is used with grouping, we ignore its effects for
			 * rowcount estimation purposes; this amounts to assuming the
			 * grouped rows are distinct already.)
			 */
			List	   *distinctExprs;

			distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
													parse->targetList);
1788
			dNumGroups = estimate_num_groups(root, distinctExprs, path_rows, NULL);
1789 1790 1791 1792 1793 1794

			/*
			 * Adjust tuple_fraction the same way as for GROUP BY, too.
			 */
			if (tuple_fraction >= 1.0)
				tuple_fraction /= dNumGroups;
1795 1796
		}
		else
1797
		{
1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861
			/*
			 * Plain non-grouped, non-aggregated query: an absolute tuple
			 * fraction can be divided by the number of tuples.
			 */
			if (tuple_fraction >= 1.0)
				tuple_fraction /= path_rows;
		}

		/*
		 * Pick out the cheapest-total path as well as the cheapest presorted
		 * path for the requested pathkeys (if there is one).  We should take
		 * the tuple fraction into account when selecting the cheapest
		 * presorted path, but not when selecting the cheapest-total path,
		 * since if we have to sort then we'll have to fetch all the tuples.
		 * (But there's a special case: if query_pathkeys is NIL, meaning
		 * order doesn't matter, then the "cheapest presorted" path will be
		 * the cheapest overall for the tuple fraction.)
		 */
		cheapest_path = final_rel->cheapest_total_path;

		sorted_path =
			get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
													  root->query_pathkeys,
													  NULL,
													  tuple_fraction);

		/* Don't consider same path in both guises; just wastes effort */
		if (sorted_path == cheapest_path)
			sorted_path = NULL;

		/*
		 * Forget about the presorted path if it would be cheaper to sort the
		 * cheapest-total path.  Here we need consider only the behavior at
		 * the tuple_fraction point.  Also, limit_tuples is only relevant if
		 * not grouping/aggregating, so use root->limit_tuples in the
		 * cost_sort call.
		 */
		if (sorted_path)
		{
			Path		sort_path;		/* dummy for result of cost_sort */

			if (root->query_pathkeys == NIL ||
				pathkeys_contained_in(root->query_pathkeys,
									  cheapest_path->pathkeys))
			{
				/* No sort needed for cheapest path */
				sort_path.startup_cost = cheapest_path->startup_cost;
				sort_path.total_cost = cheapest_path->total_cost;
			}
			else
			{
				/* Figure cost for sorting */
				cost_sort(&sort_path, root, root->query_pathkeys,
						  cheapest_path->total_cost,
						  path_rows, path_width,
						  0.0, work_mem, root->limit_tuples);
			}

			if (compare_fractional_path_costs(sorted_path, &sort_path,
											  tuple_fraction) > 0)
			{
				/* Presorted path is a loser */
				sorted_path = NULL;
			}
1862
		}
1863

1864 1865 1866
		/*
		 * Consider whether we want to use hashing instead of sorting.
		 */
1867 1868
		if (parse->groupClause)
		{
1869
			/*
1870
			 * If grouping, decide whether to use sorted or hashed grouping.
1871 1872
			 * If grouping sets are present, we can currently do only sorted
			 * grouping.
1873
			 */
1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888

			if (parse->groupingSets)
			{
				use_hashed_grouping = false;
			}
			else
			{
				use_hashed_grouping =
					choose_hashed_grouping(root,
										   tuple_fraction, limit_tuples,
										   path_rows, path_width,
										   cheapest_path, sorted_path,
										   dNumGroups, &agg_costs);
			}

1889 1890
			/* Also convert # groups to long int --- but 'ware overflow! */
			numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
1891
		}
1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911
		else if (parse->distinctClause && sorted_path &&
				 !root->hasHavingQual && !parse->hasAggs && !activeWindows)
		{
			/*
			 * We'll reach the DISTINCT stage without any intermediate
			 * processing, so figure out whether we will want to hash or not
			 * so we can choose whether to use cheapest or sorted path.
			 */
			use_hashed_distinct =
				choose_hashed_distinct(root,
									   tuple_fraction, limit_tuples,
									   path_rows, path_width,
									   cheapest_path->startup_cost,
									   cheapest_path->total_cost,
									   sorted_path->startup_cost,
									   sorted_path->total_cost,
									   sorted_path->pathkeys,
									   dNumGroups);
			tested_hashed_distinct = true;
		}
1912

B
Bruce Momjian 已提交
1913
		/*
1914
		 * Select the best path.  If we are doing hashed grouping, we will
B
Bruce Momjian 已提交
1915
		 * always read all the input tuples, so use the cheapest-total path.
1916
		 * Otherwise, the comparison above is correct.
1917
		 */
1918
		if (use_hashed_grouping || use_hashed_distinct || !sorted_path)
1919
			best_path = cheapest_path;
1920
		else
1921
			best_path = sorted_path;
1922

1923
		/*
B
Bruce Momjian 已提交
1924 1925 1926 1927
		 * Check to see if it's possible to optimize MIN/MAX aggregates. If
		 * so, we will forget all the work we did so far to choose a "regular"
		 * path ... but we had to do it anyway to be able to tell which way is
		 * cheaper.
1928
		 */
1929
		result_plan = optimize_minmax_aggregates(root,
1930
												 tlist,
1931
												 &agg_costs,
1932 1933 1934 1935
												 best_path);
		if (result_plan != NULL)
		{
			/*
B
Bruce Momjian 已提交
1936 1937
			 * optimize_minmax_aggregates generated the full plan, with the
			 * right tlist, and it has no sort order.
1938 1939 1940 1941
			 */
			current_pathkeys = NIL;
		}
		else
1942
		{
1943
			/*
1944 1945
			 * Normal case --- create a plan according to query_planner's
			 * results.
1946
			 */
1947 1948 1949
			List	   *sub_tlist;
			AttrNumber *groupColIdx = NULL;
			bool		need_tlist_eval = true;
1950
			bool		need_sort_for_grouping = false;
1951

1952
			result_plan = create_plan(root, best_path);
1953 1954
			current_pathkeys = best_path->pathkeys;

1955 1956
			/* Detect if we'll need an explicit sort for grouping */
			if (parse->groupClause && !use_hashed_grouping &&
B
Bruce Momjian 已提交
1957
			  !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
1958
				need_sort_for_grouping = true;
1959

1960 1961 1962 1963 1964 1965 1966
			/*
			 * Generate appropriate target list for scan/join subplan; may be
			 * different from tlist if grouping or aggregation is needed.
			 */
			sub_tlist = make_subplanTargetList(root, tlist,
											   &groupColIdx,
											   &need_tlist_eval);
1967

1968
			/*
1969 1970 1971 1972
			 * create_plan returns a plan with just a "flat" tlist of required
			 * Vars.  Usually we need to insert the sub_tlist as the tlist of
			 * the top plan node.  However, we can skip that if we determined
			 * that whatever create_plan chose to return will be good enough.
1973 1974 1975 1976
			 *
			 * If we need_sort_for_grouping, always override create_plan's
			 * tlist, so that we don't sort useless data from a "physical"
			 * tlist.
1977
			 */
1978
			if (need_tlist_eval || need_sort_for_grouping)
1979
			{
1980 1981
				/*
				 * If the top-level plan node is one that cannot do expression
1982 1983
				 * evaluation and its existing target list isn't already what
				 * we need, we must insert a Result node to project the
1984 1985
				 * desired tlist.
				 */
1986 1987
				if (!is_projection_capable_plan(result_plan) &&
					!tlist_same_exprs(sub_tlist, result_plan->targetlist))
1988
				{
1989 1990 1991
					result_plan = (Plan *) make_result(root,
													   sub_tlist,
													   NULL,
1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
													   result_plan);
				}
				else
				{
					/*
					 * Otherwise, just replace the subplan's flat tlist with
					 * the desired tlist.
					 */
					result_plan->targetlist = sub_tlist;
				}

				/*
				 * Also, account for the cost of evaluation of the sub_tlist.
2005
				 * See comments for add_tlist_costs_to_plan() for more info.
2006
				 */
2007
				add_tlist_costs_to_plan(root, result_plan, sub_tlist);
2008 2009 2010 2011
			}
			else
			{
				/*
2012
				 * Since we're using create_plan's tlist and not the one
2013 2014
				 * make_subplanTargetList calculated, we have to refigure any
				 * grouping-column indexes make_subplanTargetList computed.
2015
				 */
2016
				locate_grouping_columns(root, tlist, result_plan->targetlist,
2017
										groupColIdx);
2018
			}
B
Bruce Momjian 已提交
2019

2020 2021
			/*
			 * groupColIdx is now cast in stone, so record a mapping from
2022
			 * tleSortGroupRef to column index.  setrefs.c will need this to
2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033
			 * finalize GROUPING() operations.
			 */
			if (parse->groupingSets)
			{
				AttrNumber *grouping_map = palloc0(sizeof(AttrNumber) * (maxref + 1));
				ListCell   *lc;
				int			i = 0;

				foreach(lc, parse->groupClause)
				{
					SortGroupClause *gc = lfirst(lc);
B
Bruce Momjian 已提交
2034

2035 2036 2037 2038 2039 2040
					grouping_map[gc->tleSortGroupRef] = groupColIdx[i++];
				}

				root->grouping_map = grouping_map;
			}

2041
			/*
2042 2043
			 * Insert AGG or GROUP node if needed, plus an explicit sort step
			 * if necessary.
2044
			 *
2045
			 * HAVING clause, if any, becomes qual of the Agg or Group node.
2046
			 */
2047 2048 2049
			if (use_hashed_grouping)
			{
				/* Hashed aggregate plan --- no sort needed */
2050
				result_plan = (Plan *) make_agg(root,
2051 2052 2053
												tlist,
												(List *) parse->havingQual,
												AGG_HASHED,
2054
												&agg_costs,
2055 2056
												numGroupCols,
												groupColIdx,
B
Bruce Momjian 已提交
2057
									extract_grouping_ops(parse->groupClause),
2058
												NIL,
2059
												numGroups,
R
Robert Haas 已提交
2060 2061
												false,
												true,
2062 2063 2064 2065
												result_plan);
				/* Hashed aggregation produces randomly-ordered results */
				current_pathkeys = NIL;
			}
2066 2067
			else if (parse->hasAggs ||
					 (parse->groupingSets && parse->groupClause))
2068
			{
2069
				/*
2070 2071
				 * Aggregation and/or non-degenerate grouping sets.
				 *
B
Bruce Momjian 已提交
2072 2073 2074
				 * Output is in sorted order by group_pathkeys if, and only
				 * if, there is a single rollup operation on a non-empty list
				 * of grouping expressions.
2075 2076 2077 2078
				 */
				if (list_length(rollup_groupclauses) == 1
					&& list_length(linitial(rollup_groupclauses)) > 0)
					current_pathkeys = root->group_pathkeys;
2079 2080 2081
				else
					current_pathkeys = NIL;

2082 2083 2084 2085 2086 2087 2088 2089 2090 2091
				result_plan = build_grouping_chain(root,
												   parse,
												   tlist,
												   need_sort_for_grouping,
												   rollup_groupclauses,
												   rollup_lists,
												   groupColIdx,
												   &agg_costs,
												   numGroups,
												   result_plan);
2092 2093
			}
			else if (parse->groupClause)
2094
			{
2095 2096 2097 2098
				/*
				 * GROUP BY without aggregation, so insert a group node (plus
				 * the appropriate sort node, if necessary).
				 *
2099 2100
				 * Add an explicit sort if we couldn't make the path come out
				 * the way the GROUP node needs it.
2101
				 */
2102
				if (need_sort_for_grouping)
2103
				{
2104
					result_plan = (Plan *)
2105
						make_sort_from_groupcols(root,
2106 2107 2108
												 parse->groupClause,
												 groupColIdx,
												 result_plan);
2109
					current_pathkeys = root->group_pathkeys;
2110
				}
B
Bruce Momjian 已提交
2111

2112
				result_plan = (Plan *) make_group(root,
2113 2114 2115 2116
												  tlist,
												  (List *) parse->havingQual,
												  numGroupCols,
												  groupColIdx,
2117
									extract_grouping_ops(parse->groupClause),
2118 2119 2120
												  dNumGroups,
												  result_plan);
				/* The Group node won't change sort ordering */
2121
			}
2122
			else if (root->hasHavingQual || parse->groupingSets)
2123
			{
B
Bruce Momjian 已提交
2124
				int			nrows = list_length(parse->groupingSets);
2125

2126
				/*
B
Bruce Momjian 已提交
2127 2128
				 * No aggregates, and no GROUP BY, but we have a HAVING qual
				 * or grouping sets (which by elimination of cases above must
2129 2130 2131
				 * consist solely of empty grouping sets, since otherwise
				 * groupClause will be non-empty).
				 *
2132
				 * This is a degenerate case in which we are supposed to emit
B
Bruce Momjian 已提交
2133 2134 2135 2136
				 * either 0 or 1 row for each grouping set depending on
				 * whether HAVING succeeds.  Furthermore, there cannot be any
				 * variables in either HAVING or the targetlist, so we
				 * actually do not need the FROM table at all!	We can just
T
Tom Lane 已提交
2137 2138 2139 2140
				 * throw away the plan-so-far and generate a Result node. This
				 * is a sufficiently unusual corner case that it's not worth
				 * contorting the structure of this routine to avoid having to
				 * generate the plan in the first place.
2141
				 */
2142 2143
				result_plan = (Plan *) make_result(root,
												   tlist,
2144 2145
												   parse->havingQual,
												   NULL);
2146 2147 2148 2149 2150 2151 2152 2153

				/*
				 * Doesn't seem worthwhile writing code to cons up a
				 * generate_series or a values scan to emit multiple rows.
				 * Instead just clone the result in an Append.
				 */
				if (nrows > 1)
				{
B
Bruce Momjian 已提交
2154
					List	   *plans = list_make1(result_plan);
2155 2156 2157 2158 2159 2160

					while (--nrows > 0)
						plans = lappend(plans, copyObject(result_plan));

					result_plan = (Plan *) make_append(plans, tlist);
				}
2161
			}
2162
		}						/* end of non-minmax-aggregate case */
T
Tom Lane 已提交
2163 2164

		/*
2165 2166 2167
		 * Since each window function could require a different sort order, we
		 * stack up a WindowAgg node for each window, with sort steps between
		 * them as needed.
T
Tom Lane 已提交
2168 2169 2170 2171 2172 2173 2174 2175
		 */
		if (activeWindows)
		{
			List	   *window_tlist;
			ListCell   *l;

			/*
			 * If the top-level plan node is one that cannot do expression
2176 2177
			 * evaluation, we must insert a Result node to project the desired
			 * tlist.  (In some cases this might not really be required, but
2178 2179 2180
			 * it's not worth trying to avoid it.  In particular, think not to
			 * skip adding the Result if the initial window_tlist matches the
			 * top-level plan node's output, because we might change the tlist
B
Bruce Momjian 已提交
2181
			 * inside the following loop.)	Note that on second and subsequent
2182 2183 2184
			 * passes through the following loop, the top-level node will be a
			 * WindowAgg which we know can project; so we only need to check
			 * once.
T
Tom Lane 已提交
2185 2186 2187 2188 2189 2190 2191 2192 2193 2194
			 */
			if (!is_projection_capable_plan(result_plan))
			{
				result_plan = (Plan *) make_result(root,
												   NIL,
												   NULL,
												   result_plan);
			}

			/*
2195
			 * The "base" targetlist for all steps of the windowing process is
B
Bruce Momjian 已提交
2196
			 * a flat tlist of all Vars and Aggs needed in the result.  (In
2197 2198 2199
			 * some cases we wouldn't need to propagate all of these all the
			 * way to the top, since they might only be needed as inputs to
			 * WindowFuncs.  It's probably not worth trying to optimize that
2200 2201 2202
			 * though.)  We also add window partitioning and sorting
			 * expressions to the base tlist, to ensure they're computed only
			 * once at the bottom of the stack (that's critical for volatile
B
Bruce Momjian 已提交
2203
			 * functions).  As we climb up the stack, we'll add outputs for
2204 2205 2206 2207 2208 2209 2210 2211
			 * the WindowFuncs computed at each level.
			 */
			window_tlist = make_windowInputTargetList(root,
													  tlist,
													  activeWindows);

			/*
			 * The copyObject steps here are needed to ensure that each plan
B
Bruce Momjian 已提交
2212
			 * node has a separately modifiable tlist.  (XXX wouldn't a
2213
			 * shallow list copy do for that?)
T
Tom Lane 已提交
2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229
			 */
			result_plan->targetlist = (List *) copyObject(window_tlist);

			foreach(l, activeWindows)
			{
				WindowClause *wc = (WindowClause *) lfirst(l);
				List	   *window_pathkeys;
				int			partNumCols;
				AttrNumber *partColIdx;
				Oid		   *partOperators;
				int			ordNumCols;
				AttrNumber *ordColIdx;
				Oid		   *ordOperators;

				window_pathkeys = make_pathkeys_for_window(root,
														   wc,
2230
														   tlist);
T
Tom Lane 已提交
2231 2232 2233 2234 2235 2236

				/*
				 * This is a bit tricky: we build a sort node even if we don't
				 * really have to sort.  Even when no explicit sort is needed,
				 * we need to have suitable resjunk items added to the input
				 * plan's tlist for any partitioning or ordering columns that
2237 2238
				 * aren't plain Vars.  (In theory, make_windowInputTargetList
				 * should have provided all such columns, but let's not assume
B
Bruce Momjian 已提交
2239
				 * that here.)	Furthermore, this way we can use existing
2240 2241
				 * infrastructure to identify which input columns are the
				 * interesting ones.
T
Tom Lane 已提交
2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283
				 */
				if (window_pathkeys)
				{
					Sort	   *sort_plan;

					sort_plan = make_sort_from_pathkeys(root,
														result_plan,
														window_pathkeys,
														-1.0);
					if (!pathkeys_contained_in(window_pathkeys,
											   current_pathkeys))
					{
						/* we do indeed need to sort */
						result_plan = (Plan *) sort_plan;
						current_pathkeys = window_pathkeys;
					}
					/* In either case, extract the per-column information */
					get_column_info_for_window(root, wc, tlist,
											   sort_plan->numCols,
											   sort_plan->sortColIdx,
											   &partNumCols,
											   &partColIdx,
											   &partOperators,
											   &ordNumCols,
											   &ordColIdx,
											   &ordOperators);
				}
				else
				{
					/* empty window specification, nothing to sort */
					partNumCols = 0;
					partColIdx = NULL;
					partOperators = NULL;
					ordNumCols = 0;
					ordColIdx = NULL;
					ordOperators = NULL;
				}

				if (lnext(l))
				{
					/* Add the current WindowFuncs to the running tlist */
					window_tlist = add_to_flat_tlist(window_tlist,
2284
										   wflists->windowFuncs[wc->winref]);
T
Tom Lane 已提交
2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295
				}
				else
				{
					/* Install the original tlist in the topmost WindowAgg */
					window_tlist = tlist;
				}

				/* ... and make the WindowAgg plan node */
				result_plan = (Plan *)
					make_windowagg(root,
								   (List *) copyObject(window_tlist),
2296
								   wflists->windowFuncs[wc->winref],
2297
								   wc->winref,
T
Tom Lane 已提交
2298 2299 2300 2301 2302 2303
								   partNumCols,
								   partColIdx,
								   partOperators,
								   ordNumCols,
								   ordColIdx,
								   ordOperators,
2304
								   wc->frameOptions,
2305 2306
								   wc->startOffset,
								   wc->endOffset,
T
Tom Lane 已提交
2307 2308 2309
								   result_plan);
			}
		}
B
Bruce Momjian 已提交
2310
	}							/* end of if (setOperations) */
2311

2312
	/*
2313
	 * If there is a DISTINCT clause, add the necessary node(s).
2314
	 */
2315
	if (parse->distinctClause)
2316
	{
2317 2318
		double		dNumDistinctRows;
		long		numDistinctRows;
2319 2320 2321 2322 2323

		/*
		 * If there was grouping or aggregation, use the current number of
		 * rows as the estimated number of DISTINCT rows (ie, assume the
		 * result was already mostly unique).  If not, use the number of
2324
		 * distinct-groups calculated previously.
2325
		 */
2326
		if (parse->groupClause || parse->groupingSets || root->hasHavingQual || parse->hasAggs)
2327 2328 2329 2330 2331 2332 2333
			dNumDistinctRows = result_plan->plan_rows;
		else
			dNumDistinctRows = dNumGroups;

		/* Also convert to long int --- but 'ware overflow! */
		numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);

2334 2335
		/* Choose implementation method if we didn't already */
		if (!tested_hashed_distinct)
2336
		{
2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352
			/*
			 * At this point, either hashed or sorted grouping will have to
			 * work from result_plan, so we pass that as both "cheapest" and
			 * "sorted".
			 */
			use_hashed_distinct =
				choose_hashed_distinct(root,
									   tuple_fraction, limit_tuples,
									   result_plan->plan_rows,
									   result_plan->plan_width,
									   result_plan->startup_cost,
									   result_plan->total_cost,
									   result_plan->startup_cost,
									   result_plan->total_cost,
									   current_pathkeys,
									   dNumDistinctRows);
2353 2354 2355 2356 2357 2358 2359 2360 2361
		}

		if (use_hashed_distinct)
		{
			/* Hashed aggregate plan --- no sort needed */
			result_plan = (Plan *) make_agg(root,
											result_plan->targetlist,
											NIL,
											AGG_HASHED,
2362
											NULL,
2363 2364 2365 2366
										  list_length(parse->distinctClause),
								 extract_grouping_cols(parse->distinctClause,
													result_plan->targetlist),
								 extract_grouping_ops(parse->distinctClause),
2367
											NIL,
2368
											numDistinctRows,
R
Robert Haas 已提交
2369 2370
											false,
											true,
2371 2372 2373 2374 2375 2376 2377 2378 2379
											result_plan);
			/* Hashed aggregation produces randomly-ordered results */
			current_pathkeys = NIL;
		}
		else
		{
			/*
			 * Use a Unique node to implement DISTINCT.  Add an explicit sort
			 * if we couldn't make the path come out the way the Unique node
2380 2381 2382 2383
			 * needs it.  If we do have to sort, always sort by the more
			 * rigorous of DISTINCT and ORDER BY, to avoid a second sort
			 * below.  However, for regular DISTINCT, don't sort now if we
			 * don't have to --- sorting afterwards will likely be cheaper,
2384 2385 2386
			 * and also has the possibility of optimizing via LIMIT.  But for
			 * DISTINCT ON, we *must* force the final sort now, else it won't
			 * have the desired behavior.
2387
			 */
2388
			List	   *needed_pathkeys;
2389 2390 2391 2392 2393 2394 2395 2396 2397

			if (parse->hasDistinctOn &&
				list_length(root->distinct_pathkeys) <
				list_length(root->sort_pathkeys))
				needed_pathkeys = root->sort_pathkeys;
			else
				needed_pathkeys = root->distinct_pathkeys;

			if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411
			{
				if (list_length(root->distinct_pathkeys) >=
					list_length(root->sort_pathkeys))
					current_pathkeys = root->distinct_pathkeys;
				else
				{
					current_pathkeys = root->sort_pathkeys;
					/* Assert checks that parser didn't mess up... */
					Assert(pathkeys_contained_in(root->distinct_pathkeys,
												 current_pathkeys));
				}

				result_plan = (Plan *) make_sort_from_pathkeys(root,
															   result_plan,
2412
															current_pathkeys,
2413 2414 2415 2416 2417 2418 2419
															   -1.0);
			}

			result_plan = (Plan *) make_unique(result_plan,
											   parse->distinctClause);
			result_plan->plan_rows = dNumDistinctRows;
			/* The Unique node won't change sort ordering */
2420
		}
2421
	}
2422 2423

	/*
2424 2425
	 * If ORDER BY was given and we were not able to make the plan come out in
	 * the right order, add an explicit sort step.
2426
	 */
2427
	if (parse->sortClause)
2428
	{
2429 2430 2431 2432
		if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
		{
			result_plan = (Plan *) make_sort_from_pathkeys(root,
														   result_plan,
2433
														 root->sort_pathkeys,
2434 2435 2436
														   limit_tuples);
			current_pathkeys = root->sort_pathkeys;
		}
2437
	}
2438

2439
	/*
B
Bruce Momjian 已提交
2440 2441 2442
	 * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
	 * (Note: we intentionally test parse->rowMarks not root->rowMarks here.
	 * If there are only non-locking rowmarks, they should be handled by the
B
Bruce Momjian 已提交
2443
	 * ModifyTable node instead.)
2444 2445 2446 2447
	 */
	if (parse->rowMarks)
	{
		result_plan = (Plan *) make_lockrows(result_plan,
2448 2449
											 root->rowMarks,
											 SS_assign_special_param(root));
B
Bruce Momjian 已提交
2450

2451 2452 2453 2454 2455 2456 2457 2458 2459 2460
		/*
		 * The result can no longer be assumed sorted, since locking might
		 * cause the sort key columns to be replaced with new values.
		 */
		current_pathkeys = NIL;
	}

	/*
	 * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
	 */
2461
	if (limit_needed(parse))
2462 2463 2464 2465 2466 2467
	{
		result_plan = (Plan *) make_limit(result_plan,
										  parse->limitOffset,
										  parse->limitCount,
										  offset_est,
										  count_est);
2468 2469
	}

2470
	/*
B
Bruce Momjian 已提交
2471 2472
	 * Return the actual output ordering in query_pathkeys for possible use by
	 * an outer query level.
2473
	 */
2474
	root->query_pathkeys = current_pathkeys;
2475

2476
	return result_plan;
2477 2478
}

2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503

/*
 * Given a groupclause for a collection of grouping sets, produce the
 * corresponding groupColIdx.
 *
 * root->grouping_map maps the tleSortGroupRef to the actual column position in
 * the input tuple. So we get the ref from the entries in the groupclause and
 * look them up there.
 */
static AttrNumber *
remap_groupColIdx(PlannerInfo *root, List *groupClause)
{
	AttrNumber *grouping_map = root->grouping_map;
	AttrNumber *new_grpColIdx;
	ListCell   *lc;
	int			i;

	Assert(grouping_map);

	new_grpColIdx = palloc0(sizeof(AttrNumber) * list_length(groupClause));

	i = 0;
	foreach(lc, groupClause)
	{
		SortGroupClause *clause = lfirst(lc);
B
Bruce Momjian 已提交
2504

2505 2506 2507 2508 2509 2510 2511 2512
		new_grpColIdx[i++] = grouping_map[clause->tleSortGroupRef];
	}

	return new_grpColIdx;
}

/*
 * Build Agg and Sort nodes to implement sorted grouping with one or more
2513
 * grouping sets.  A plain GROUP BY or just the presence of aggregates counts
2514
 * for this purpose as a single grouping set; the calling code is responsible
2515 2516
 * for providing a single-element rollup_groupclauses list for such cases,
 * though rollup_lists may be nil.
2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527
 *
 * The last entry in rollup_groupclauses (which is the one the input is sorted
 * on, if at all) is the one used for the returned Agg node. Any additional
 * rollups are attached, with corresponding sort info, to subsidiary Agg and
 * Sort nodes attached to the side of the real Agg node; these nodes don't
 * participate in the plan directly, but they are both a convenient way to
 * represent the required data and a convenient way to account for the costs
 * of execution.
 */
static Plan *
build_grouping_chain(PlannerInfo *root,
B
Bruce Momjian 已提交
2528 2529 2530 2531 2532
					 Query *parse,
					 List *tlist,
					 bool need_sort_for_grouping,
					 List *rollup_groupclauses,
					 List *rollup_lists,
2533 2534
					 AttrNumber *groupColIdx,
					 AggClauseCosts *agg_costs,
B
Bruce Momjian 已提交
2535 2536
					 long numGroups,
					 Plan *result_plan)
2537 2538 2539 2540 2541 2542 2543 2544
{
	AttrNumber *top_grpColIdx = groupColIdx;
	List	   *chain = NIL;

	/*
	 * Prepare the grpColIdx for the real Agg node first, because we may need
	 * it for sorting
	 */
2545 2546
	if (parse->groupingSets)
		top_grpColIdx = remap_groupColIdx(root, llast(rollup_groupclauses));
2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563

	/*
	 * If we need a Sort operation on the input, generate that.
	 */
	if (need_sort_for_grouping)
	{
		result_plan = (Plan *)
			make_sort_from_groupcols(root,
									 llast(rollup_groupclauses),
									 top_grpColIdx,
									 result_plan);
	}

	/*
	 * Generate the side nodes that describe the other sort and group
	 * operations besides the top one.
	 */
2564
	if (list_length(rollup_groupclauses) > 1)
2565
	{
2566 2567
		ListCell   *lc,
				   *lc2;
2568

2569 2570 2571 2572 2573 2574 2575 2576
		Assert(list_length(rollup_groupclauses) == list_length(rollup_lists));
		forboth(lc, rollup_groupclauses, lc2, rollup_lists)
		{
			List	   *groupClause = (List *) lfirst(lc);
			List	   *gsets = (List *) lfirst(lc2);
			AttrNumber *new_grpColIdx;
			Plan	   *sort_plan;
			Plan	   *agg_plan;
2577

2578 2579 2580
			/* We want to iterate over all but the last rollup list elements */
			if (lnext(lc) == NULL)
				break;
2581

2582
			new_grpColIdx = remap_groupColIdx(root, groupClause);
2583

2584 2585 2586 2587 2588
			sort_plan = (Plan *)
				make_sort_from_groupcols(root,
										 groupClause,
										 new_grpColIdx,
										 result_plan);
2589

2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607
			/*
			 * sort_plan includes the cost of result_plan, which is not what
			 * we want (since we'll not actually run that plan again).  So
			 * correct the cost figures.
			 */
			sort_plan->startup_cost -= result_plan->total_cost;
			sort_plan->total_cost -= result_plan->total_cost;

			agg_plan = (Plan *) make_agg(root,
										 tlist,
										 (List *) parse->havingQual,
										 AGG_SORTED,
										 agg_costs,
										 list_length(linitial(gsets)),
										 new_grpColIdx,
										 extract_grouping_ops(groupClause),
										 gsets,
										 numGroups,
R
Robert Haas 已提交
2608 2609
										 false,
										 true,
2610
										 sort_plan);
2611

2612 2613 2614 2615 2616
			/*
			 * Nuke stuff we don't need to avoid bloating debug output.
			 */
			sort_plan->targetlist = NIL;
			sort_plan->lefttree = NULL;
2617

2618 2619
			agg_plan->targetlist = NIL;
			agg_plan->qual = NIL;
2620

2621 2622
			chain = lappend(chain, agg_plan);
		}
2623 2624 2625 2626 2627 2628
	}

	/*
	 * Now make the final Agg node
	 */
	{
2629 2630
		List	   *groupClause = (List *) llast(rollup_groupclauses);
		List	   *gsets = rollup_lists ? (List *) llast(rollup_lists) : NIL;
2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641
		int			numGroupCols;
		ListCell   *lc;

		if (gsets)
			numGroupCols = list_length(linitial(gsets));
		else
			numGroupCols = list_length(parse->groupClause);

		result_plan = (Plan *) make_agg(root,
										tlist,
										(List *) parse->havingQual,
B
Bruce Momjian 已提交
2642
								 (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
2643 2644 2645 2646 2647 2648
										agg_costs,
										numGroupCols,
										top_grpColIdx,
										extract_grouping_ops(groupClause),
										gsets,
										numGroups,
R
Robert Haas 已提交
2649 2650
										false,
										true,
2651 2652 2653 2654 2655 2656 2657 2658 2659 2660
										result_plan);

		((Agg *) result_plan)->chain = chain;

		/*
		 * Add the additional costs. But only the total costs count, since the
		 * additional sorts aren't run on startup.
		 */
		foreach(lc, chain)
		{
B
Bruce Momjian 已提交
2661
			Plan	   *subplan = lfirst(lc);
2662 2663 2664 2665 2666 2667 2668 2669

			result_plan->total_cost += subplan->total_cost;
		}
	}

	return result_plan;
}

2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690
/*
 * add_tlist_costs_to_plan
 *
 * Estimate the execution costs associated with evaluating the targetlist
 * expressions, and add them to the cost estimates for the Plan node.
 *
 * If the tlist contains set-returning functions, also inflate the Plan's cost
 * and plan_rows estimates accordingly.  (Hence, this must be called *after*
 * any logic that uses plan_rows to, eg, estimate qual evaluation costs.)
 *
 * Note: during initial stages of planning, we mostly consider plan nodes with
 * "flat" tlists, containing just Vars.  So their evaluation cost is zero
 * according to the model used by cost_qual_eval() (or if you prefer, the cost
 * is factored into cpu_tuple_cost).  Thus we can avoid accounting for tlist
 * cost throughout query_planner() and subroutines.  But once we apply a
 * tlist that might contain actual operators, sub-selects, etc, we'd better
 * account for its cost.  Any set-returning functions in the tlist must also
 * affect the estimated rowcount.
 *
 * Once grouping_planner() has applied a general tlist to the topmost
 * scan/join plan node, any tlist eval cost for added-on nodes should be
B
Bruce Momjian 已提交
2691
 * accounted for as we create those nodes.  Presently, of the node types we
2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724
 * can add on later, only Agg, WindowAgg, and Group project new tlists (the
 * rest just copy their input tuples) --- so make_agg(), make_windowagg() and
 * make_group() are responsible for calling this function to account for their
 * tlist costs.
 */
void
add_tlist_costs_to_plan(PlannerInfo *root, Plan *plan, List *tlist)
{
	QualCost	tlist_cost;
	double		tlist_rows;

	cost_qual_eval(&tlist_cost, tlist, root);
	plan->startup_cost += tlist_cost.startup;
	plan->total_cost += tlist_cost.startup +
		tlist_cost.per_tuple * plan->plan_rows;

	tlist_rows = tlist_returns_set_rows(tlist);
	if (tlist_rows > 1)
	{
		/*
		 * We assume that execution costs of the tlist proper were all
		 * accounted for by cost_qual_eval.  However, it still seems
		 * appropriate to charge something more for the executor's general
		 * costs of processing the added tuples.  The cost is probably less
		 * than cpu_tuple_cost, though, so we arbitrarily use half of that.
		 */
		plan->total_cost += plan->plan_rows * (tlist_rows - 1) *
			cpu_tuple_cost / 2;

		plan->plan_rows *= tlist_rows;
	}
}

2725 2726 2727 2728 2729
/*
 * Detect whether a plan node is a "dummy" plan created when a relation
 * is deemed not to need scanning due to constraint exclusion.
 *
 * Currently, such dummy plans are Result nodes with constant FALSE
2730 2731 2732
 * filter quals (see set_dummy_rel_pathlist and create_append_plan).
 *
 * XXX this probably ought to be somewhere else, but not clear where.
2733
 */
2734
bool
2735 2736 2737 2738
is_dummy_plan(Plan *plan)
{
	if (IsA(plan, Result))
	{
B
Bruce Momjian 已提交
2739
		List	   *rcqual = (List *) ((Result *) plan)->resconstantqual;
2740 2741 2742

		if (list_length(rcqual) == 1)
		{
B
Bruce Momjian 已提交
2743
			Const	   *constqual = (Const *) linitial(rcqual);
2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755

			if (constqual && IsA(constqual, Const))
			{
				if (!constqual->constisnull &&
					!DatumGetBool(constqual->constvalue))
					return true;
			}
		}
	}
	return false;
}

2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816
/*
 * Create a bitmapset of the RT indexes of live base relations
 *
 * Helper for preprocess_rowmarks ... at this point in the proceedings,
 * the only good way to distinguish baserels from appendrel children
 * is to see what is in the join tree.
 */
static Bitmapset *
get_base_rel_indexes(Node *jtnode)
{
	Bitmapset  *result;

	if (jtnode == NULL)
		return NULL;
	if (IsA(jtnode, RangeTblRef))
	{
		int			varno = ((RangeTblRef *) jtnode)->rtindex;

		result = bms_make_singleton(varno);
	}
	else if (IsA(jtnode, FromExpr))
	{
		FromExpr   *f = (FromExpr *) jtnode;
		ListCell   *l;

		result = NULL;
		foreach(l, f->fromlist)
			result = bms_join(result,
							  get_base_rel_indexes(lfirst(l)));
	}
	else if (IsA(jtnode, JoinExpr))
	{
		JoinExpr   *j = (JoinExpr *) jtnode;

		result = bms_join(get_base_rel_indexes(j->larg),
						  get_base_rel_indexes(j->rarg));
	}
	else
	{
		elog(ERROR, "unrecognized node type: %d",
			 (int) nodeTag(jtnode));
		result = NULL;			/* keep compiler quiet */
	}
	return result;
}

/*
 * preprocess_rowmarks - set up PlanRowMarks if needed
 */
static void
preprocess_rowmarks(PlannerInfo *root)
{
	Query	   *parse = root->parse;
	Bitmapset  *rels;
	List	   *prowmarks;
	ListCell   *l;
	int			i;

	if (parse->rowMarks)
	{
		/*
B
Bruce Momjian 已提交
2817 2818 2819
		 * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
		 * grouping, since grouping renders a reference to individual tuple
		 * CTIDs invalid.  This is also checked at parse time, but that's
2820 2821
		 * insufficient because of rule substitution, query pullup, etc.
		 */
2822
		CheckSelectLocking(parse, ((RowMarkClause *)
B
Bruce Momjian 已提交
2823
								   linitial(parse->rowMarks))->strength);
2824 2825 2826 2827
	}
	else
	{
		/*
B
Bruce Momjian 已提交
2828 2829
		 * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
		 * UPDATE/SHARE.
2830 2831 2832 2833 2834 2835 2836
		 */
		if (parse->commandType != CMD_UPDATE &&
			parse->commandType != CMD_DELETE)
			return;
	}

	/*
B
Bruce Momjian 已提交
2837 2838
	 * We need to have rowmarks for all base relations except the target. We
	 * make a bitmapset of all base rels and then remove the items we don't
2839
	 * need or have FOR [KEY] UPDATE/SHARE marks for.
2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851
	 */
	rels = get_base_rel_indexes((Node *) parse->jointree);
	if (parse->resultRelation)
		rels = bms_del_member(rels, parse->resultRelation);

	/*
	 * Convert RowMarkClauses to PlanRowMark representation.
	 */
	prowmarks = NIL;
	foreach(l, parse->rowMarks)
	{
		RowMarkClause *rc = (RowMarkClause *) lfirst(l);
2852 2853
		RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
		PlanRowMark *newrc;
2854

2855
		/*
2856
		 * Currently, it is syntactically impossible to have FOR UPDATE et al
B
Bruce Momjian 已提交
2857
		 * applied to an update/delete target rel.  If that ever becomes
2858 2859
		 * possible, we should drop the target from the PlanRowMark list.
		 */
2860
		Assert(rc->rti != parse->resultRelation);
2861 2862

		/*
B
Bruce Momjian 已提交
2863 2864 2865 2866
		 * Ignore RowMarkClauses for subqueries; they aren't real tables and
		 * can't support true locking.  Subqueries that got flattened into the
		 * main query should be ignored completely.  Any that didn't will get
		 * ROW_MARK_COPY items in the next loop.
2867 2868 2869 2870
		 */
		if (rte->rtekind != RTE_RELATION)
			continue;

2871 2872
		rels = bms_del_member(rels, rc->rti);

2873
		newrc = makeNode(PlanRowMark);
2874
		newrc->rti = newrc->prti = rc->rti;
2875
		newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2876
		newrc->markType = select_rowmark_type(rte, rc->strength);
2877 2878
		newrc->allMarkTypes = (1 << newrc->markType);
		newrc->strength = rc->strength;
2879
		newrc->waitPolicy = rc->waitPolicy;
2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899
		newrc->isParent = false;

		prowmarks = lappend(prowmarks, newrc);
	}

	/*
	 * Now, add rowmarks for any non-target, non-locked base relations.
	 */
	i = 0;
	foreach(l, parse->rtable)
	{
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
		PlanRowMark *newrc;

		i++;
		if (!bms_is_member(i, rels))
			continue;

		newrc = makeNode(PlanRowMark);
		newrc->rti = newrc->prti = i;
2900
		newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2901
		newrc->markType = select_rowmark_type(rte, LCS_NONE);
2902 2903
		newrc->allMarkTypes = (1 << newrc->markType);
		newrc->strength = LCS_NONE;
2904
		newrc->waitPolicy = LockWaitBlock;		/* doesn't matter */
2905 2906 2907 2908 2909 2910 2911 2912
		newrc->isParent = false;

		prowmarks = lappend(prowmarks, newrc);
	}

	root->rowMarks = prowmarks;
}

2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925
/*
 * Select RowMarkType to use for a given table
 */
RowMarkType
select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
{
	if (rte->rtekind != RTE_RELATION)
	{
		/* If it's not a table at all, use ROW_MARK_COPY */
		return ROW_MARK_COPY;
	}
	else if (rte->relkind == RELKIND_FOREIGN_TABLE)
	{
2926 2927 2928 2929 2930 2931
		/* Let the FDW select the rowmark type, if it wants to */
		FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);

		if (fdwroutine->GetForeignRowMarkType != NULL)
			return fdwroutine->GetForeignRowMarkType(rte, strength);
		/* Otherwise, use ROW_MARK_COPY by default */
2932 2933 2934 2935 2936 2937 2938 2939
		return ROW_MARK_COPY;
	}
	else
	{
		/* Regular table, apply the appropriate lock type */
		switch (strength)
		{
			case LCS_NONE:
B
Bruce Momjian 已提交
2940

2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953
				/*
				 * We don't need a tuple lock, only the ability to re-fetch
				 * the row.  Regular tables support ROW_MARK_REFERENCE, but if
				 * this RTE has security barrier quals, it will be turned into
				 * a subquery during planning, so use ROW_MARK_COPY.
				 *
				 * This is only necessary for LCS_NONE, since real tuple locks
				 * on an RTE with security barrier quals are supported by
				 * pushing the lock down into the subquery --- see
				 * expand_security_qual.
				 */
				if (rte->securityQuals != NIL)
					return ROW_MARK_COPY;
2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973
				return ROW_MARK_REFERENCE;
				break;
			case LCS_FORKEYSHARE:
				return ROW_MARK_KEYSHARE;
				break;
			case LCS_FORSHARE:
				return ROW_MARK_SHARE;
				break;
			case LCS_FORNOKEYUPDATE:
				return ROW_MARK_NOKEYEXCLUSIVE;
				break;
			case LCS_FORUPDATE:
				return ROW_MARK_EXCLUSIVE;
				break;
		}
		elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
		return ROW_MARK_EXCLUSIVE;		/* keep compiler quiet */
	}
}

2974
/*
2975
 * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2976
 *
2977
 * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
B
Bruce Momjian 已提交
2978
 * results back in *count_est and *offset_est.  These variables are set to
2979 2980 2981 2982 2983 2984 2985 2986
 * 0 if the corresponding clause is not present, and -1 if it's present
 * but we couldn't estimate the value for it.  (The "0" convention is OK
 * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
 * LIMIT 0 as though it were LIMIT 1.  But this is in line with the planner's
 * usual practice of never estimating less than one row.)  These values will
 * be passed to make_limit, which see if you change this code.
 *
 * The return value is the suitably adjusted tuple_fraction to use for
B
Bruce Momjian 已提交
2987
 * planning the query.  This adjustment is not overridable, since it reflects
2988 2989
 * plan actions that grouping_planner() will certainly take, not assumptions
 * about context.
2990 2991
 */
static double
2992
preprocess_limit(PlannerInfo *root, double tuple_fraction,
B
Bruce Momjian 已提交
2993
				 int64 *offset_est, int64 *count_est)
2994 2995
{
	Query	   *parse = root->parse;
2996 2997
	Node	   *est;
	double		limit_fraction;
2998

2999 3000
	/* Should not be called unless LIMIT or OFFSET */
	Assert(parse->limitCount || parse->limitOffset);
3001 3002

	/*
3003 3004
	 * Try to obtain the clause values.  We use estimate_expression_value
	 * primarily because it can sometimes do something useful with Params.
3005
	 */
3006
	if (parse->limitCount)
3007
	{
3008
		est = estimate_expression_value(root, parse->limitCount);
3009
		if (est && IsA(est, Const))
3010
		{
3011
			if (((Const *) est)->constisnull)
3012
			{
3013
				/* NULL indicates LIMIT ALL, ie, no limit */
B
Bruce Momjian 已提交
3014
				*count_est = 0; /* treat as not present */
3015 3016 3017
			}
			else
			{
B
Bruce Momjian 已提交
3018
				*count_est = DatumGetInt64(((Const *) est)->constvalue);
3019 3020
				if (*count_est <= 0)
					*count_est = 1;		/* force to at least 1 */
3021 3022
			}
		}
3023 3024
		else
			*count_est = -1;	/* can't estimate */
3025 3026
	}
	else
3027 3028 3029
		*count_est = 0;			/* not present */

	if (parse->limitOffset)
3030
	{
3031
		est = estimate_expression_value(root, parse->limitOffset);
3032 3033 3034 3035 3036
		if (est && IsA(est, Const))
		{
			if (((Const *) est)->constisnull)
			{
				/* Treat NULL as no offset; the executor will too */
B
Bruce Momjian 已提交
3037
				*offset_est = 0;	/* treat as not present */
3038 3039 3040
			}
			else
			{
B
Bruce Momjian 已提交
3041
				*offset_est = DatumGetInt64(((Const *) est)->constvalue);
3042
				if (*offset_est < 0)
3043
					*offset_est = 0;	/* treat as not present */
3044 3045 3046 3047
			}
		}
		else
			*offset_est = -1;	/* can't estimate */
3048
	}
3049 3050
	else
		*offset_est = 0;		/* not present */
3051

3052
	if (*count_est != 0)
3053
	{
3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069
		/*
		 * A LIMIT clause limits the absolute number of tuples returned.
		 * However, if it's not a constant LIMIT then we have to guess; for
		 * lack of a better idea, assume 10% of the plan's result is wanted.
		 */
		if (*count_est < 0 || *offset_est < 0)
		{
			/* LIMIT or OFFSET is an expression ... punt ... */
			limit_fraction = 0.10;
		}
		else
		{
			/* LIMIT (plus OFFSET, if any) is max number of tuples needed */
			limit_fraction = (double) *count_est + (double) *offset_est;
		}

3070 3071
		/*
		 * If we have absolute limits from both caller and LIMIT, use the
3072 3073 3074 3075
		 * smaller value; likewise if they are both fractional.  If one is
		 * fractional and the other absolute, we can't easily determine which
		 * is smaller, but we use the heuristic that the absolute will usually
		 * be smaller.
3076 3077 3078 3079 3080 3081 3082 3083 3084 3085
		 */
		if (tuple_fraction >= 1.0)
		{
			if (limit_fraction >= 1.0)
			{
				/* both absolute */
				tuple_fraction = Min(tuple_fraction, limit_fraction);
			}
			else
			{
3086
				/* caller absolute, limit fractional; use caller's value */
3087 3088 3089 3090 3091 3092
			}
		}
		else if (tuple_fraction > 0.0)
		{
			if (limit_fraction >= 1.0)
			{
3093 3094
				/* caller fractional, limit absolute; use limit */
				tuple_fraction = limit_fraction;
3095 3096 3097 3098
			}
			else
			{
				/* both fractional */
3099
				tuple_fraction = Min(tuple_fraction, limit_fraction);
3100 3101 3102 3103 3104 3105 3106 3107
			}
		}
		else
		{
			/* no info from caller, just use limit */
			tuple_fraction = limit_fraction;
		}
	}
3108 3109 3110
	else if (*offset_est != 0 && tuple_fraction > 0.0)
	{
		/*
B
Bruce Momjian 已提交
3111
		 * We have an OFFSET but no LIMIT.  This acts entirely differently
B
Bruce Momjian 已提交
3112 3113 3114 3115
		 * from the LIMIT case: here, we need to increase rather than decrease
		 * the caller's tuple_fraction, because the OFFSET acts to cause more
		 * tuples to be fetched instead of fewer.  This only matters if we got
		 * a tuple_fraction > 0, however.
3116 3117 3118 3119 3120 3121 3122 3123 3124 3125
		 *
		 * As above, use 10% if OFFSET is present but unestimatable.
		 */
		if (*offset_est < 0)
			limit_fraction = 0.10;
		else
			limit_fraction = (double) *offset_est;

		/*
		 * If we have absolute counts from both caller and OFFSET, add them
B
Bruce Momjian 已提交
3126
		 * together; likewise if they are both fractional.  If one is
B
Bruce Momjian 已提交
3127 3128
		 * fractional and the other absolute, we want to take the larger, and
		 * we heuristically assume that's the fractional one.
3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153
		 */
		if (tuple_fraction >= 1.0)
		{
			if (limit_fraction >= 1.0)
			{
				/* both absolute, so add them together */
				tuple_fraction += limit_fraction;
			}
			else
			{
				/* caller absolute, limit fractional; use limit */
				tuple_fraction = limit_fraction;
			}
		}
		else
		{
			if (limit_fraction >= 1.0)
			{
				/* caller fractional, limit absolute; use caller's value */
			}
			else
			{
				/* both fractional, so add them together */
				tuple_fraction += limit_fraction;
				if (tuple_fraction >= 1.0)
B
Bruce Momjian 已提交
3154
					tuple_fraction = 0.0;		/* assume fetch all */
3155 3156 3157
			}
		}
	}
3158 3159 3160 3161

	return tuple_fraction;
}

3162 3163 3164 3165 3166
/*
 * limit_needed - do we actually need a Limit plan node?
 *
 * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
 * a Limit node.  This is worth checking for because "OFFSET 0" is a common
B
Bruce Momjian 已提交
3167
 * locution for an optimization fence.  (Because other places in the planner
3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201
 * merely check whether parse->limitOffset isn't NULL, it will still work as
 * an optimization fence --- we're just suppressing unnecessary run-time
 * overhead.)
 *
 * This might look like it could be merged into preprocess_limit, but there's
 * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
 * in preprocess_limit it's good enough to consider estimated values.
 */
static bool
limit_needed(Query *parse)
{
	Node	   *node;

	node = parse->limitCount;
	if (node)
	{
		if (IsA(node, Const))
		{
			/* NULL indicates LIMIT ALL, ie, no limit */
			if (!((Const *) node)->constisnull)
				return true;	/* LIMIT with a constant value */
		}
		else
			return true;		/* non-constant LIMIT */
	}

	node = parse->limitOffset;
	if (node)
	{
		if (IsA(node, Const))
		{
			/* Treat NULL as no offset; the executor would too */
			if (!((Const *) node)->constisnull)
			{
B
Bruce Momjian 已提交
3202
				int64		offset = DatumGetInt64(((Const *) node)->constvalue);
3203

3204 3205
				if (offset != 0)
					return true;	/* OFFSET with a nonzero value */
3206 3207 3208 3209 3210 3211 3212 3213 3214
			}
		}
		else
			return true;		/* non-constant OFFSET */
	}

	return false;				/* don't need a Limit plan node */
}

3215

3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368
/*
 * remove_useless_groupby_columns
 *		Remove any columns in the GROUP BY clause that are redundant due to
 *		being functionally dependent on other GROUP BY columns.
 *
 * Since some other DBMSes do not allow references to ungrouped columns, it's
 * not unusual to find all columns listed in GROUP BY even though listing the
 * primary-key columns would be sufficient.  Deleting such excess columns
 * avoids redundant sorting work, so it's worth doing.  When we do this, we
 * must mark the plan as dependent on the pkey constraint (compare the
 * parser's check_ungrouped_columns() and check_functional_grouping()).
 *
 * In principle, we could treat any NOT-NULL columns appearing in a UNIQUE
 * index as the determining columns.  But as with check_functional_grouping(),
 * there's currently no way to represent dependency on a NOT NULL constraint,
 * so we consider only the pkey for now.
 */
static void
remove_useless_groupby_columns(PlannerInfo *root)
{
	Query	   *parse = root->parse;
	Bitmapset **groupbyattnos;
	Bitmapset **surplusvars;
	ListCell   *lc;
	int			relid;

	/* No chance to do anything if there are less than two GROUP BY items */
	if (list_length(parse->groupClause) < 2)
		return;

	/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
	if (parse->groupingSets)
		return;

	/*
	 * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
	 * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
	 * that are GROUP BY items.
	 */
	groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
										   (list_length(parse->rtable) + 1));
	foreach(lc, parse->groupClause)
	{
		SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
		TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
		Var		   *var = (Var *) tle->expr;

		/*
		 * Ignore non-Vars and Vars from other query levels.
		 *
		 * XXX in principle, stable expressions containing Vars could also be
		 * removed, if all the Vars are functionally dependent on other GROUP
		 * BY items.  But it's not clear that such cases occur often enough to
		 * be worth troubling over.
		 */
		if (!IsA(var, Var) ||
			var->varlevelsup > 0)
			continue;

		/* OK, remember we have this Var */
		relid = var->varno;
		Assert(relid <= list_length(parse->rtable));
		groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
						 var->varattno - FirstLowInvalidHeapAttributeNumber);
	}

	/*
	 * Consider each relation and see if it is possible to remove some of its
	 * Vars from GROUP BY.  For simplicity and speed, we do the actual removal
	 * in a separate pass.  Here, we just fill surplusvars[k] with a bitmapset
	 * of the column attnos of RTE k that are removable GROUP BY items.
	 */
	surplusvars = NULL;			/* don't allocate array unless required */
	relid = 0;
	foreach(lc, parse->rtable)
	{
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
		Bitmapset  *relattnos;
		Bitmapset  *pkattnos;
		Oid			constraintOid;

		relid++;

		/* Only plain relations could have primary-key constraints */
		if (rte->rtekind != RTE_RELATION)
			continue;

		/* Nothing to do unless this rel has multiple Vars in GROUP BY */
		relattnos = groupbyattnos[relid];
		if (bms_membership(relattnos) != BMS_MULTIPLE)
			continue;

		/*
		 * Can't remove any columns for this rel if there is no suitable
		 * (i.e., nondeferrable) primary key constraint.
		 */
		pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
		if (pkattnos == NULL)
			continue;

		/*
		 * If the primary key is a proper subset of relattnos then we have
		 * some items in the GROUP BY that can be removed.
		 */
		if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
		{
			/*
			 * To easily remember whether we've found anything to do, we don't
			 * allocate the surplusvars[] array until we find something.
			 */
			if (surplusvars == NULL)
				surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
										   (list_length(parse->rtable) + 1));

			/* Remember the attnos of the removable columns */
			surplusvars[relid] = bms_difference(relattnos, pkattnos);

			/* Also, mark the resulting plan as dependent on this constraint */
			parse->constraintDeps = lappend_oid(parse->constraintDeps,
												constraintOid);
		}
	}

	/*
	 * If we found any surplus Vars, build a new GROUP BY clause without them.
	 * (Note: this may leave some TLEs with unreferenced ressortgroupref
	 * markings, but that's harmless.)
	 */
	if (surplusvars != NULL)
	{
		List	   *new_groupby = NIL;

		foreach(lc, parse->groupClause)
		{
			SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
			TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
			Var		   *var = (Var *) tle->expr;

			/*
			 * New list must include non-Vars, outer Vars, and anything not
			 * marked as surplus.
			 */
			if (!IsA(var, Var) ||
				var->varlevelsup > 0 ||
			!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
						   surplusvars[var->varno]))
				new_groupby = lappend(new_groupby, sgc);
		}

		parse->groupClause = new_groupby;
	}
}

3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380
/*
 * preprocess_groupclause - do preparatory work on GROUP BY clause
 *
 * The idea here is to adjust the ordering of the GROUP BY elements
 * (which in itself is semantically insignificant) to match ORDER BY,
 * thereby allowing a single sort operation to both implement the ORDER BY
 * requirement and set up for a Unique step that implements GROUP BY.
 *
 * In principle it might be interesting to consider other orderings of the
 * GROUP BY elements, which could match the sort ordering of other
 * possible plans (eg an indexscan) and thereby reduce cost.  We don't
 * bother with that, though.  Hashed grouping will frequently win anyway.
3381 3382 3383
 *
 * Note: we need no comparable processing of the distinctClause because
 * the parser already enforced that that matches ORDER BY.
3384 3385 3386 3387 3388
 *
 * For grouping sets, the order of items is instead forced to agree with that
 * of the grouping set (and items not in the grouping set are skipped). The
 * work of sorting the order of grouping set elements to match the ORDER BY if
 * possible is done elsewhere.
3389
 */
3390 3391
static List *
preprocess_groupclause(PlannerInfo *root, List *force)
3392 3393
{
	Query	   *parse = root->parse;
3394
	List	   *new_groupclause = NIL;
3395 3396 3397 3398
	bool		partial_match;
	ListCell   *sl;
	ListCell   *gl;

3399 3400 3401 3402 3403
	/* For grouping sets, we need to force the ordering */
	if (force)
	{
		foreach(sl, force)
		{
B
Bruce Momjian 已提交
3404
			Index		ref = lfirst_int(sl);
3405 3406 3407 3408 3409 3410 3411 3412
			SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);

			new_groupclause = lappend(new_groupclause, cl);
		}

		return new_groupclause;
	}

3413
	/* If no ORDER BY, nothing useful to do here */
3414
	if (parse->sortClause == NIL)
3415
		return parse->groupClause;
3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445

	/*
	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
	 * items, but only as far as we can make a matching prefix.
	 *
	 * This code assumes that the sortClause contains no duplicate items.
	 */
	foreach(sl, parse->sortClause)
	{
		SortGroupClause *sc = (SortGroupClause *) lfirst(sl);

		foreach(gl, parse->groupClause)
		{
			SortGroupClause *gc = (SortGroupClause *) lfirst(gl);

			if (equal(gc, sc))
			{
				new_groupclause = lappend(new_groupclause, gc);
				break;
			}
		}
		if (gl == NULL)
			break;				/* no match, so stop scanning */
	}

	/* Did we match all of the ORDER BY list, or just some of it? */
	partial_match = (sl != NULL);

	/* If no match at all, no point in reordering GROUP BY */
	if (new_groupclause == NIL)
3446
		return parse->groupClause;
3447 3448

	/*
3449 3450 3451 3452 3453 3454
	 * Add any remaining GROUP BY items to the new list, but only if we were
	 * able to make a complete match.  In other words, we only rearrange the
	 * GROUP BY list if the result is that one list is a prefix of the other
	 * --- otherwise there's no possibility of a common sort.  Also, give up
	 * if there are any non-sortable GROUP BY items, since then there's no
	 * hope anyway.
3455 3456 3457 3458 3459 3460 3461 3462
	 */
	foreach(gl, parse->groupClause)
	{
		SortGroupClause *gc = (SortGroupClause *) lfirst(gl);

		if (list_member_ptr(new_groupclause, gc))
			continue;			/* it matched an ORDER BY item */
		if (partial_match)
3463
			return parse->groupClause;	/* give up, no common sort possible */
3464
		if (!OidIsValid(gc->sortop))
3465
			return parse->groupClause;	/* give up, GROUP BY can't be sorted */
3466 3467 3468 3469 3470
		new_groupclause = lappend(new_groupclause, gc);
	}

	/* Success --- install the rearranged GROUP BY list */
	Assert(list_length(parse->groupClause) == list_length(new_groupclause));
3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497
	return new_groupclause;
}

/*
 * Extract lists of grouping sets that can be implemented using a single
 * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
 *
 * Input must be sorted with smallest sets first. Result has each sublist
 * sorted with smallest sets first.
 *
 * We want to produce the absolute minimum possible number of lists here to
 * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
 * of finding the minimal partition of a partially-ordered set into chains
 * (which is what we need, taking the list of grouping sets as a poset ordered
 * by set inclusion) can be mapped to the problem of finding the maximum
 * cardinality matching on a bipartite graph, which is solvable in polynomial
 * time with a worst case of no worse than O(n^2.5) and usually much
 * better. Since our N is at most 4096, we don't need to consider fallbacks to
 * heuristic or approximate methods.  (Planning time for a 12-d cube is under
 * half a second on my modest system even with optimization off and assertions
 * on.)
 */
static List *
extract_rollup_sets(List *groupingSets)
{
	int			num_sets_raw = list_length(groupingSets);
	int			num_empty = 0;
B
Bruce Momjian 已提交
3498
	int			num_sets = 0;	/* distinct sets */
3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528
	int			num_chains = 0;
	List	   *result = NIL;
	List	  **results;
	List	  **orig_sets;
	Bitmapset **set_masks;
	int		   *chains;
	short	  **adjacency;
	short	   *adjacency_buf;
	BipartiteMatchState *state;
	int			i;
	int			j;
	int			j_size;
	ListCell   *lc1 = list_head(groupingSets);
	ListCell   *lc;

	/*
	 * Start by stripping out empty sets.  The algorithm doesn't require this,
	 * but the planner currently needs all empty sets to be returned in the
	 * first list, so we strip them here and add them back after.
	 */
	while (lc1 && lfirst(lc1) == NIL)
	{
		++num_empty;
		lc1 = lnext(lc1);
	}

	/* bail out now if it turns out that all we had were empty sets. */
	if (!lc1)
		return list_make1(groupingSets);

T
Tom Lane 已提交
3529
	/*----------
B
Bruce Momjian 已提交
3530 3531
	 * We don't strictly need to remove duplicate sets here, but if we don't,
	 * they tend to become scattered through the result, which is a bit
T
Tom Lane 已提交
3532 3533
	 * confusing (and irritating if we ever decide to optimize them out).
	 * So we remove them here and add them back after.
3534 3535 3536
	 *
	 * For each non-duplicate set, we fill in the following:
	 *
T
Tom Lane 已提交
3537 3538 3539
	 * orig_sets[i] = list of the original set lists
	 * set_masks[i] = bitmapset for testing inclusion
	 * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
3540 3541 3542
	 *
	 * chains[i] will be the result group this set is assigned to.
	 *
T
Tom Lane 已提交
3543 3544 3545
	 * We index all of these from 1 rather than 0 because it is convenient
	 * to leave 0 free for the NIL node in the graph algorithm.
	 *----------
3546
	 */
B
Bruce Momjian 已提交
3547
	orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570
	set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
	adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
	adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));

	j_size = 0;
	j = 0;
	i = 1;

	for_each_cell(lc, lc1)
	{
		List	   *candidate = lfirst(lc);
		Bitmapset  *candidate_set = NULL;
		ListCell   *lc2;
		int			dup_of = 0;

		foreach(lc2, candidate)
		{
			candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
		}

		/* we can only be a dup if we're the same length as a previous set */
		if (j_size == list_length(candidate))
		{
B
Bruce Momjian 已提交
3571 3572
			int			k;

3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594
			for (k = j; k < i; ++k)
			{
				if (bms_equal(set_masks[k], candidate_set))
				{
					dup_of = k;
					break;
				}
			}
		}
		else if (j_size < list_length(candidate))
		{
			j_size = list_length(candidate);
			j = i;
		}

		if (dup_of > 0)
		{
			orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
			bms_free(candidate_set);
		}
		else
		{
B
Bruce Momjian 已提交
3595 3596
			int			k;
			int			n_adj = 0;
3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638

			orig_sets[i] = list_make1(candidate);
			set_masks[i] = candidate_set;

			/* fill in adjacency list; no need to compare equal-size sets */

			for (k = j - 1; k > 0; --k)
			{
				if (bms_is_subset(set_masks[k], candidate_set))
					adjacency_buf[++n_adj] = k;
			}

			if (n_adj > 0)
			{
				adjacency_buf[0] = n_adj;
				adjacency[i] = palloc((n_adj + 1) * sizeof(short));
				memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
			}
			else
				adjacency[i] = NULL;

			++i;
		}
	}

	num_sets = i - 1;

	/*
	 * Apply the graph matching algorithm to do the work.
	 */
	state = BipartiteMatch(num_sets, num_sets, adjacency);

	/*
	 * Now, the state->pair* fields have the info we need to assign sets to
	 * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
	 * pair_vu[v] = u (both will be true, but we check both so that we can do
	 * it in one pass)
	 */
	chains = palloc0((num_sets + 1) * sizeof(int));

	for (i = 1; i <= num_sets; ++i)
	{
B
Bruce Momjian 已提交
3639 3640
		int			u = state->pair_vu[i];
		int			v = state->pair_uv[i];
3641 3642 3643 3644 3645 3646 3647 3648 3649 3650

		if (u > 0 && u < i)
			chains[i] = chains[u];
		else if (v > 0 && v < i)
			chains[i] = chains[v];
		else
			chains[i] = ++num_chains;
	}

	/* build result lists. */
B
Bruce Momjian 已提交
3651
	results = palloc0((num_chains + 1) * sizeof(List *));
3652 3653 3654

	for (i = 1; i <= num_sets; ++i)
	{
B
Bruce Momjian 已提交
3655
		int			c = chains[i];
3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696

		Assert(c > 0);

		results[c] = list_concat(results[c], orig_sets[i]);
	}

	/* push any empty sets back on the first list. */
	while (num_empty-- > 0)
		results[1] = lcons(NIL, results[1]);

	/* make result list */
	for (i = 1; i <= num_chains; ++i)
		result = lappend(result, results[i]);

	/*
	 * Free all the things.
	 *
	 * (This is over-fussy for small sets but for large sets we could have
	 * tied up a nontrivial amount of memory.)
	 */
	BipartiteMatchFree(state);
	pfree(results);
	pfree(chains);
	for (i = 1; i <= num_sets; ++i)
		if (adjacency[i])
			pfree(adjacency[i]);
	pfree(adjacency);
	pfree(adjacency_buf);
	pfree(orig_sets);
	for (i = 1; i <= num_sets; ++i)
		bms_free(set_masks[i]);
	pfree(set_masks);

	return result;
}

/*
 * Reorder the elements of a list of grouping sets such that they have correct
 * prefix relationships.
 *
 * The input must be ordered with smallest sets first; the result is returned
3697 3698
 * with largest sets first.  Note that the result shares no list substructure
 * with the input, so it's safe for the caller to modify it later.
3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714
 *
 * If we're passed in a sortclause, we follow its order of columns to the
 * extent possible, to minimize the chance that we add unnecessary sorts.
 * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
 * gets implemented in one pass.)
 */
static List *
reorder_grouping_sets(List *groupingsets, List *sortclause)
{
	ListCell   *lc;
	ListCell   *lc2;
	List	   *previous = NIL;
	List	   *result = NIL;

	foreach(lc, groupingsets)
	{
B
Bruce Momjian 已提交
3715 3716
		List	   *candidate = lfirst(lc);
		List	   *new_elems = list_difference_int(candidate, previous);
3717 3718 3719 3720 3721 3722

		if (list_length(new_elems) > 0)
		{
			while (list_length(sortclause) > list_length(previous))
			{
				SortGroupClause *sc = list_nth(sortclause, list_length(previous));
B
Bruce Momjian 已提交
3723 3724
				int			ref = sc->tleSortGroupRef;

3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750
				if (list_member_int(new_elems, ref))
				{
					previous = lappend_int(previous, ref);
					new_elems = list_delete_int(new_elems, ref);
				}
				else
				{
					/* diverged from the sortclause; give up on it */
					sortclause = NIL;
					break;
				}
			}

			foreach(lc2, new_elems)
			{
				previous = lappend_int(previous, lfirst_int(lc2));
			}
		}

		result = lcons(list_copy(previous), result);
		list_free(new_elems);
	}

	list_free(previous);

	return result;
3751 3752
}

3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768
/*
 * Compute query_pathkeys and other pathkeys during plan generation
 */
static void
standard_qp_callback(PlannerInfo *root, void *extra)
{
	Query	   *parse = root->parse;
	standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
	List	   *tlist = qp_extra->tlist;
	List	   *activeWindows = qp_extra->activeWindows;

	/*
	 * Calculate pathkeys that represent grouping/ordering requirements.  The
	 * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
	 * be, in which case we just leave their pathkeys empty.
	 */
3769 3770
	if (qp_extra->groupClause &&
		grouping_is_sortable(qp_extra->groupClause))
3771 3772
		root->group_pathkeys =
			make_pathkeys_for_sortclauses(root,
3773
										  qp_extra->groupClause,
3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834
										  tlist);
	else
		root->group_pathkeys = NIL;

	/* We consider only the first (bottom) window in pathkeys logic */
	if (activeWindows != NIL)
	{
		WindowClause *wc = (WindowClause *) linitial(activeWindows);

		root->window_pathkeys = make_pathkeys_for_window(root,
														 wc,
														 tlist);
	}
	else
		root->window_pathkeys = NIL;

	if (parse->distinctClause &&
		grouping_is_sortable(parse->distinctClause))
		root->distinct_pathkeys =
			make_pathkeys_for_sortclauses(root,
										  parse->distinctClause,
										  tlist);
	else
		root->distinct_pathkeys = NIL;

	root->sort_pathkeys =
		make_pathkeys_for_sortclauses(root,
									  parse->sortClause,
									  tlist);

	/*
	 * Figure out whether we want a sorted result from query_planner.
	 *
	 * If we have a sortable GROUP BY clause, then we want a result sorted
	 * properly for grouping.  Otherwise, if we have window functions to
	 * evaluate, we try to sort for the first window.  Otherwise, if there's a
	 * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
	 * we try to produce output that's sufficiently well sorted for the
	 * DISTINCT.  Otherwise, if there is an ORDER BY clause, we want to sort
	 * by the ORDER BY clause.
	 *
	 * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
	 * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
	 * that might just leave us failing to exploit an available sort order at
	 * all.  Needs more thought.  The choice for DISTINCT versus ORDER BY is
	 * much easier, since we know that the parser ensured that one is a
	 * superset of the other.
	 */
	if (root->group_pathkeys)
		root->query_pathkeys = root->group_pathkeys;
	else if (root->window_pathkeys)
		root->query_pathkeys = root->window_pathkeys;
	else if (list_length(root->distinct_pathkeys) >
			 list_length(root->sort_pathkeys))
		root->query_pathkeys = root->distinct_pathkeys;
	else if (root->sort_pathkeys)
		root->query_pathkeys = root->sort_pathkeys;
	else
		root->query_pathkeys = NIL;
}

3835 3836
/*
 * choose_hashed_grouping - should we use hashed grouping?
3837
 *
3838
 * Returns TRUE to select hashing, FALSE to select sorting.
3839 3840
 */
static bool
3841 3842
choose_hashed_grouping(PlannerInfo *root,
					   double tuple_fraction, double limit_tuples,
3843
					   double path_rows, int path_width,
3844
					   Path *cheapest_path, Path *sorted_path,
3845
					   double dNumGroups, AggClauseCosts *agg_costs)
3846
{
3847 3848 3849 3850
	Query	   *parse = root->parse;
	int			numGroupCols = list_length(parse->groupClause);
	bool		can_hash;
	bool		can_sort;
3851
	Size		hashentrysize;
3852
	List	   *target_pathkeys;
3853 3854 3855 3856
	List	   *current_pathkeys;
	Path		hashed_p;
	Path		sorted_p;

3857 3858
	/*
	 * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
B
Bruce Momjian 已提交
3859
	 * aggregates.  (Doing so would imply storing *all* the input values in
3860
	 * the hash table, and/or running many sorts in parallel, either of which
3861 3862 3863
	 * seems like a certain loser.)  We similarly don't support ordered-set
	 * aggregates in hashed aggregation, but that case is included in the
	 * numOrderedAggs count.
3864
	 */
3865
	can_hash = (agg_costs->numOrderedAggs == 0 &&
3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882
				grouping_is_hashable(parse->groupClause));
	can_sort = grouping_is_sortable(parse->groupClause);

	/* Quick out if only one choice is workable */
	if (!(can_hash && can_sort))
	{
		if (can_hash)
			return true;
		else if (can_sort)
			return false;
		else
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("could not implement GROUP BY"),
					 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
	}

3883
	/* Prefer sorting when enable_hashagg is off */
3884 3885 3886 3887 3888 3889 3890 3891 3892
	if (!enable_hashagg)
		return false;

	/*
	 * Don't do it if it doesn't look like the hashtable will fit into
	 * work_mem.
	 */

	/* Estimate per-hash-entry space at tuple width... */
3893
	hashentrysize = MAXALIGN(path_width) + MAXALIGN(SizeofMinimalTupleHeader);
3894
	/* plus space for pass-by-ref transition values... */
3895
	hashentrysize += agg_costs->transitionSpace;
3896
	/* plus the per-hash-entry overhead */
3897
	hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3898 3899 3900 3901

	if (hashentrysize * dNumGroups > work_mem * 1024L)
		return false;

3902 3903
	/*
	 * When we have both GROUP BY and DISTINCT, use the more-rigorous of
3904 3905 3906 3907
	 * DISTINCT and ORDER BY as the assumed required output sort order. This
	 * is an oversimplification because the DISTINCT might get implemented via
	 * hashing, but it's not clear that the case is common enough (or that our
	 * estimates are good enough) to justify trying to solve it exactly.
3908 3909 3910 3911 3912 3913 3914
	 */
	if (list_length(root->distinct_pathkeys) >
		list_length(root->sort_pathkeys))
		target_pathkeys = root->distinct_pathkeys;
	else
		target_pathkeys = root->sort_pathkeys;

3915
	/*
B
Bruce Momjian 已提交
3916 3917 3918 3919
	 * See if the estimated cost is no more than doing it the other way. While
	 * avoiding the need for sorted input is usually a win, the fact that the
	 * output won't be sorted may be a loss; so we need to do an actual cost
	 * comparison.
3920
	 *
3921 3922 3923
	 * We need to consider cheapest_path + hashagg [+ final sort] versus
	 * either cheapest_path [+ sort] + group or agg [+ final sort] or
	 * presorted_path + group or agg [+ final sort] where brackets indicate a
3924 3925
	 * step that may not be needed.  We assume grouping_planner() will have
	 * passed us a presorted path only if it's a winner compared to
3926
	 * cheapest_path for this purpose.
3927
	 *
3928 3929
	 * These path variables are dummies that just hold cost fields; we don't
	 * make actual Paths for these steps.
3930
	 */
3931
	cost_agg(&hashed_p, root, AGG_HASHED, agg_costs,
3932 3933
			 numGroupCols, dNumGroups,
			 cheapest_path->startup_cost, cheapest_path->total_cost,
3934
			 path_rows);
3935
	/* Result of hashed agg is always unsorted */
3936 3937
	if (target_pathkeys)
		cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost,
3938 3939
				  dNumGroups, path_width,
				  0.0, work_mem, limit_tuples);
3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952

	if (sorted_path)
	{
		sorted_p.startup_cost = sorted_path->startup_cost;
		sorted_p.total_cost = sorted_path->total_cost;
		current_pathkeys = sorted_path->pathkeys;
	}
	else
	{
		sorted_p.startup_cost = cheapest_path->startup_cost;
		sorted_p.total_cost = cheapest_path->total_cost;
		current_pathkeys = cheapest_path->pathkeys;
	}
3953
	if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
3954
	{
3955
		cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
3956 3957
				  path_rows, path_width,
				  0.0, work_mem, -1.0);
3958
		current_pathkeys = root->group_pathkeys;
3959 3960
	}

3961
	if (parse->hasAggs)
3962
		cost_agg(&sorted_p, root, AGG_SORTED, agg_costs,
3963 3964
				 numGroupCols, dNumGroups,
				 sorted_p.startup_cost, sorted_p.total_cost,
3965
				 path_rows);
3966
	else
3967
		cost_group(&sorted_p, root, numGroupCols, dNumGroups,
3968
				   sorted_p.startup_cost, sorted_p.total_cost,
3969
				   path_rows);
3970
	/* The Agg or Group node will preserve ordering */
3971 3972 3973
	if (target_pathkeys &&
		!pathkeys_contained_in(target_pathkeys, current_pathkeys))
		cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost,
3974 3975
				  dNumGroups, path_width,
				  0.0, work_mem, limit_tuples);
3976 3977

	/*
3978
	 * Now make the decision using the top-level tuple fraction.
3979 3980 3981 3982 3983 3984 3985 3986 3987 3988
	 */
	if (compare_fractional_path_costs(&hashed_p, &sorted_p,
									  tuple_fraction) < 0)
	{
		/* Hashed is cheaper, so use it */
		return true;
	}
	return false;
}

3989 3990 3991 3992 3993
/*
 * choose_hashed_distinct - should we use hashing for DISTINCT?
 *
 * This is fairly similar to choose_hashed_grouping, but there are enough
 * differences that it doesn't seem worth trying to unify the two functions.
3994 3995 3996
 * (One difference is that we sometimes apply this after forming a Plan,
 * so the input alternatives can't be represented as Paths --- instead we
 * pass in the costs as individual variables.)
3997 3998
 *
 * But note that making the two choices independently is a bit bogus in
B
Bruce Momjian 已提交
3999
 * itself.  If the two could be combined into a single choice operation
4000 4001 4002 4003 4004 4005
 * it'd probably be better, but that seems far too unwieldy to be practical,
 * especially considering that the combination of GROUP BY and DISTINCT
 * isn't very common in real queries.  By separating them, we are giving
 * extra preference to using a sorting implementation when a common sort key
 * is available ... and that's not necessarily wrong anyway.
 *
4006
 * Returns TRUE to select hashing, FALSE to select sorting.
4007 4008 4009 4010
 */
static bool
choose_hashed_distinct(PlannerInfo *root,
					   double tuple_fraction, double limit_tuples,
4011 4012 4013 4014
					   double path_rows, int path_width,
					   Cost cheapest_startup_cost, Cost cheapest_total_cost,
					   Cost sorted_startup_cost, Cost sorted_total_cost,
					   List *sorted_pathkeys,
4015 4016
					   double dNumDistinctRows)
{
4017 4018 4019 4020
	Query	   *parse = root->parse;
	int			numDistinctCols = list_length(parse->distinctClause);
	bool		can_sort;
	bool		can_hash;
4021 4022
	Size		hashentrysize;
	List	   *current_pathkeys;
4023
	List	   *needed_pathkeys;
4024 4025 4026
	Path		hashed_p;
	Path		sorted_p;

4027
	/*
B
Bruce Momjian 已提交
4028 4029
	 * If we have a sortable DISTINCT ON clause, we always use sorting. This
	 * enforces the expected behavior of DISTINCT ON.
4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050
	 */
	can_sort = grouping_is_sortable(parse->distinctClause);
	if (can_sort && parse->hasDistinctOn)
		return false;

	can_hash = grouping_is_hashable(parse->distinctClause);

	/* Quick out if only one choice is workable */
	if (!(can_hash && can_sort))
	{
		if (can_hash)
			return true;
		else if (can_sort)
			return false;
		else
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("could not implement DISTINCT"),
					 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
	}

4051 4052 4053 4054 4055 4056 4057 4058
	/* Prefer sorting when enable_hashagg is off */
	if (!enable_hashagg)
		return false;

	/*
	 * Don't do it if it doesn't look like the hashtable will fit into
	 * work_mem.
	 */
4059 4060

	/* Estimate per-hash-entry space at tuple width... */
4061
	hashentrysize = MAXALIGN(path_width) + MAXALIGN(SizeofMinimalTupleHeader);
4062 4063
	/* plus the per-hash-entry overhead */
	hashentrysize += hash_agg_entry_size(0);
4064 4065 4066 4067 4068 4069 4070 4071 4072 4073

	if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
		return false;

	/*
	 * See if the estimated cost is no more than doing it the other way. While
	 * avoiding the need for sorted input is usually a win, the fact that the
	 * output won't be sorted may be a loss; so we need to do an actual cost
	 * comparison.
	 *
4074 4075
	 * We need to consider cheapest_path + hashagg [+ final sort] versus
	 * sorted_path [+ sort] + group [+ final sort] where brackets indicate a
4076
	 * step that may not be needed.
4077 4078 4079 4080
	 *
	 * These path variables are dummies that just hold cost fields; we don't
	 * make actual Paths for these steps.
	 */
4081
	cost_agg(&hashed_p, root, AGG_HASHED, NULL,
4082
			 numDistinctCols, dNumDistinctRows,
4083 4084
			 cheapest_startup_cost, cheapest_total_cost,
			 path_rows);
4085

4086
	/*
4087 4088
	 * Result of hashed agg is always unsorted, so if ORDER BY is present we
	 * need to charge for the final sort.
4089
	 */
4090
	if (parse->sortClause)
4091
		cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
4092 4093
				  dNumDistinctRows, path_width,
				  0.0, work_mem, limit_tuples);
4094

4095
	/*
B
Bruce Momjian 已提交
4096
	 * Now for the GROUP case.  See comments in grouping_planner about the
4097 4098
	 * sorting choices here --- this code should match that code.
	 */
4099 4100 4101 4102
	sorted_p.startup_cost = sorted_startup_cost;
	sorted_p.total_cost = sorted_total_cost;
	current_pathkeys = sorted_pathkeys;
	if (parse->hasDistinctOn &&
4103 4104 4105 4106 4107 4108
		list_length(root->distinct_pathkeys) <
		list_length(root->sort_pathkeys))
		needed_pathkeys = root->sort_pathkeys;
	else
		needed_pathkeys = root->distinct_pathkeys;
	if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
4109 4110 4111 4112 4113 4114 4115
	{
		if (list_length(root->distinct_pathkeys) >=
			list_length(root->sort_pathkeys))
			current_pathkeys = root->distinct_pathkeys;
		else
			current_pathkeys = root->sort_pathkeys;
		cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost,
4116 4117
				  path_rows, path_width,
				  0.0, work_mem, -1.0);
4118 4119 4120
	}
	cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
			   sorted_p.startup_cost, sorted_p.total_cost,
4121 4122
			   path_rows);
	if (parse->sortClause &&
4123 4124
		!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
		cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
4125 4126
				  dNumDistinctRows, path_width,
				  0.0, work_mem, limit_tuples);
4127 4128

	/*
4129
	 * Now make the decision using the top-level tuple fraction.
4130 4131 4132 4133 4134 4135 4136 4137 4138 4139
	 */
	if (compare_fractional_path_costs(&hashed_p, &sorted_p,
									  tuple_fraction) < 0)
	{
		/* Hashed is cheaper, so use it */
		return true;
	}
	return false;
}

4140
/*
4141
 * make_subplanTargetList
4142
 *	  Generate appropriate target list when grouping is required.
4143
 *
4144 4145 4146 4147 4148
 * When grouping_planner inserts grouping or aggregation plan nodes
 * above the scan/join plan constructed by query_planner+create_plan,
 * we typically want the scan/join plan to emit a different target list
 * than the outer plan nodes should have.  This routine generates the
 * correct target list for the scan/join subplan.
4149 4150 4151 4152
 *
 * The initial target list passed from the parser already contains entries
 * for all ORDER BY and GROUP BY expressions, but it will not have entries
 * for variables used only in HAVING clauses; so we need to add those
4153 4154 4155 4156
 * variables to the subplan target list.  Also, we flatten all expressions
 * except GROUP BY items into their component variables; the other expressions
 * will be computed by the inserted nodes rather than by the subplan.
 * For example, given a query like
4157 4158
 *		SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
 * we want to pass this targetlist to the subplan:
4159
 *		a+b,c,d
4160
 * where the a+b target will be used by the Sort/Group steps, and the
4161
 * other targets will be used for computing the final results.
4162
 *
4163 4164 4165
 * If we are grouping or aggregating, *and* there are no non-Var grouping
 * expressions, then the returned tlist is effectively dummy; we do not
 * need to force it to be evaluated, because all the Vars it contains
4166 4167 4168 4169
 * should be present in the "flat" tlist generated by create_plan, though
 * possibly in a different order.  In that case we'll use create_plan's tlist,
 * and the tlist made here is only needed as input to query_planner to tell
 * it which Vars are needed in the output of the scan/join plan.
4170
 *
4171
 * 'tlist' is the query's target list.
4172
 * 'groupColIdx' receives an array of column numbers for the GROUP BY
4173
 *			expressions (if there are any) in the returned target list.
4174
 * 'need_tlist_eval' is set true if we really need to evaluate the
4175 4176
 *			returned tlist as-is.  (Note: locate_grouping_columns assumes
 *			that if this is FALSE, all grouping columns are simple Vars.)
4177
 *
4178
 * The result is the targetlist to be passed to query_planner.
4179 4180
 */
static List *
4181
make_subplanTargetList(PlannerInfo *root,
4182
					   List *tlist,
4183 4184
					   AttrNumber **groupColIdx,
					   bool *need_tlist_eval)
4185
{
4186
	Query	   *parse = root->parse;
4187
	List	   *sub_tlist;
4188 4189
	List	   *non_group_cols;
	List	   *non_group_vars;
4190 4191 4192 4193
	int			numCols;

	*groupColIdx = NULL;

B
Bruce Momjian 已提交
4194
	/*
4195
	 * If we're not grouping or aggregating, there's nothing to do here;
4196 4197
	 * query_planner should receive the unmodified target list.
	 */
4198
	if (!parse->hasAggs && !parse->groupClause && !parse->groupingSets && !root->hasHavingQual &&
T
Tom Lane 已提交
4199
		!parse->hasWindowFuncs)
4200 4201
	{
		*need_tlist_eval = true;
4202
		return tlist;
4203
	}
4204

B
Bruce Momjian 已提交
4205
	/*
4206 4207
	 * Otherwise, we must build a tlist containing all grouping columns, plus
	 * any other Vars mentioned in the targetlist and HAVING qual.
4208
	 */
4209 4210
	sub_tlist = NIL;
	non_group_cols = NIL;
4211
	*need_tlist_eval = false;	/* only eval if not flat tlist */
4212

4213
	numCols = list_length(parse->groupClause);
4214
	if (numCols > 0)
4215
	{
4216 4217 4218 4219 4220 4221 4222
		/*
		 * If grouping, create sub_tlist entries for all GROUP BY columns, and
		 * make an array showing where the group columns are in the sub_tlist.
		 *
		 * Note: with this implementation, the array entries will always be
		 * 1..N, but we don't want callers to assume that.
		 */
4223
		AttrNumber *grpColIdx;
4224
		ListCell   *tl;
4225

4226
		grpColIdx = (AttrNumber *) palloc0(sizeof(AttrNumber) * numCols);
4227
		*groupColIdx = grpColIdx;
4228

4229
		foreach(tl, tlist)
4230
		{
4231 4232
			TargetEntry *tle = (TargetEntry *) lfirst(tl);
			int			colno;
4233

4234 4235 4236 4237 4238 4239 4240 4241
			colno = get_grouping_column_index(parse, tle);
			if (colno >= 0)
			{
				/*
				 * It's a grouping column, so add it to the result tlist and
				 * remember its resno in grpColIdx[].
				 */
				TargetEntry *newtle;
4242

4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255
				newtle = makeTargetEntry(tle->expr,
										 list_length(sub_tlist) + 1,
										 NULL,
										 false);
				sub_tlist = lappend(sub_tlist, newtle);

				Assert(grpColIdx[colno] == 0);	/* no dups expected */
				grpColIdx[colno] = newtle->resno;

				if (!(newtle->expr && IsA(newtle->expr, Var)))
					*need_tlist_eval = true;	/* tlist contains non Vars */
			}
			else
4256
			{
4257
				/*
4258 4259
				 * Non-grouping column, so just remember the expression for
				 * later call to pull_var_clause.  There's no need for
4260 4261 4262
				 * pull_var_clause to examine the TargetEntry node itself.
				 */
				non_group_cols = lappend(non_group_cols, tle->expr);
4263 4264 4265
			}
		}
	}
4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285
	else
	{
		/*
		 * With no grouping columns, just pass whole tlist to pull_var_clause.
		 * Need (shallow) copy to avoid damaging input tlist below.
		 */
		non_group_cols = list_copy(tlist);
	}

	/*
	 * If there's a HAVING clause, we'll need the Vars it uses, too.
	 */
	if (parse->havingQual)
		non_group_cols = lappend(non_group_cols, parse->havingQual);

	/*
	 * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
	 * add them to the result tlist if not already present.  (A Var used
	 * directly as a GROUP BY item will be present already.)  Note this
	 * includes Vars used in resjunk items, so we are covering the needs of
B
Bruce Momjian 已提交
4286
	 * ORDER BY and window specifications.  Vars used within Aggrefs will be
4287 4288 4289 4290 4291 4292 4293 4294 4295 4296
	 * pulled out here, too.
	 */
	non_group_vars = pull_var_clause((Node *) non_group_cols,
									 PVC_RECURSE_AGGREGATES,
									 PVC_INCLUDE_PLACEHOLDERS);
	sub_tlist = add_to_flat_tlist(sub_tlist, non_group_vars);

	/* clean up cruft */
	list_free(non_group_vars);
	list_free(non_group_cols);
4297 4298 4299 4300

	return sub_tlist;
}

4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331
/*
 * get_grouping_column_index
 *		Get the GROUP BY column position, if any, of a targetlist entry.
 *
 * Returns the index (counting from 0) of the TLE in the GROUP BY list, or -1
 * if it's not a grouping column.  Note: the result is unique because the
 * parser won't make multiple groupClause entries for the same TLE.
 */
static int
get_grouping_column_index(Query *parse, TargetEntry *tle)
{
	int			colno = 0;
	Index		ressortgroupref = tle->ressortgroupref;
	ListCell   *gl;

	/* No need to search groupClause if TLE hasn't got a sortgroupref */
	if (ressortgroupref == 0)
		return -1;

	foreach(gl, parse->groupClause)
	{
		SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);

		if (grpcl->tleSortGroupRef == ressortgroupref)
			return colno;
		colno++;
	}

	return -1;
}

4332 4333
/*
 * locate_grouping_columns
4334
 *		Locate grouping columns in the tlist chosen by create_plan.
4335 4336
 *
 * This is only needed if we don't use the sub_tlist chosen by
B
Bruce Momjian 已提交
4337
 * make_subplanTargetList.  We have to forget the column indexes found
T
Tom Lane 已提交
4338
 * by that routine and re-locate the grouping exprs in the real sub_tlist.
4339
 * We assume the grouping exprs are just Vars (see make_subplanTargetList).
4340 4341
 */
static void
4342
locate_grouping_columns(PlannerInfo *root,
4343 4344 4345 4346 4347
						List *tlist,
						List *sub_tlist,
						AttrNumber *groupColIdx)
{
	int			keyno = 0;
4348
	ListCell   *gl;
4349 4350 4351 4352

	/*
	 * No work unless grouping.
	 */
4353
	if (!root->parse->groupClause)
4354 4355 4356 4357 4358 4359
	{
		Assert(groupColIdx == NULL);
		return;
	}
	Assert(groupColIdx != NULL);

4360
	foreach(gl, root->parse->groupClause)
4361
	{
4362
		SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
4363 4364
		Var		   *groupexpr = (Var *) get_sortgroupclause_expr(grpcl, tlist);
		TargetEntry *te;
4365

4366 4367
		/*
		 * The grouping column returned by create_plan might not have the same
B
Bruce Momjian 已提交
4368
		 * typmod as the original Var.  (This can happen in cases where a
4369 4370 4371
		 * set-returning function has been inlined, so that we now have more
		 * knowledge about what it returns than we did when the original Var
		 * was created.)  So we can't use tlist_member() to search the tlist;
B
Bruce Momjian 已提交
4372
		 * instead use tlist_member_match_var.  For safety, still check that
4373 4374 4375 4376 4377
		 * the vartype matches.
		 */
		if (!(groupexpr && IsA(groupexpr, Var)))
			elog(ERROR, "grouping column is not a Var as expected");
		te = tlist_member_match_var(groupexpr, sub_tlist);
T
Tom Lane 已提交
4378
		if (!te)
4379
			elog(ERROR, "failed to locate grouping columns");
4380
		Assert(((Var *) te->expr)->vartype == groupexpr->vartype);
4381
		groupColIdx[keyno++] = te->resno;
4382 4383 4384
	}
}

4385 4386 4387 4388 4389 4390 4391
/*
 * postprocess_setop_tlist
 *	  Fix up targetlist returned by plan_set_operations().
 *
 * We need to transpose sort key info from the orig_tlist into new_tlist.
 * NOTE: this would not be good enough if we supported resjunk sort keys
 * for results of set operations --- then, we'd need to project a whole
4392
 * new tlist to evaluate the resjunk columns.  For now, just ereport if we
4393 4394 4395 4396 4397
 * find any resjunk columns in orig_tlist.
 */
static List *
postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
{
4398 4399
	ListCell   *l;
	ListCell   *orig_tlist_item = list_head(orig_tlist);
4400 4401 4402 4403 4404 4405 4406

	foreach(l, new_tlist)
	{
		TargetEntry *new_tle = (TargetEntry *) lfirst(l);
		TargetEntry *orig_tle;

		/* ignore resjunk columns in setop result */
4407
		if (new_tle->resjunk)
4408 4409
			continue;

4410 4411 4412
		Assert(orig_tlist_item != NULL);
		orig_tle = (TargetEntry *) lfirst(orig_tlist_item);
		orig_tlist_item = lnext(orig_tlist_item);
B
Bruce Momjian 已提交
4413
		if (orig_tle->resjunk)	/* should not happen */
4414
			elog(ERROR, "resjunk output columns are not implemented");
4415 4416
		Assert(new_tle->resno == orig_tle->resno);
		new_tle->ressortgroupref = orig_tle->ressortgroupref;
4417
	}
4418
	if (orig_tlist_item != NULL)
4419
		elog(ERROR, "resjunk output columns are not implemented");
4420 4421
	return new_tlist;
}
T
Tom Lane 已提交
4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453

/*
 * select_active_windows
 *		Create a list of the "active" window clauses (ie, those referenced
 *		by non-deleted WindowFuncs) in the order they are to be executed.
 */
static List *
select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
{
	List	   *result;
	List	   *actives;
	ListCell   *lc;

	/* First, make a list of the active windows */
	actives = NIL;
	foreach(lc, root->parse->windowClause)
	{
		WindowClause *wc = (WindowClause *) lfirst(lc);

		/* It's only active if wflists shows some related WindowFuncs */
		Assert(wc->winref <= wflists->maxWinRef);
		if (wflists->windowFuncs[wc->winref] != NIL)
			actives = lappend(actives, wc);
	}

	/*
	 * Now, ensure that windows with identical partitioning/ordering clauses
	 * are adjacent in the list.  This is required by the SQL standard, which
	 * says that only one sort is to be used for such windows, even if they
	 * are otherwise distinct (eg, different names or framing clauses).
	 *
	 * There is room to be much smarter here, for example detecting whether
4454 4455 4456 4457
	 * one window's sort keys are a prefix of another's (so that sorting for
	 * the latter would do for the former), or putting windows first that
	 * match a sort order available for the underlying query.  For the moment
	 * we are content with meeting the spec.
T
Tom Lane 已提交
4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476
	 */
	result = NIL;
	while (actives != NIL)
	{
		WindowClause *wc = (WindowClause *) linitial(actives);
		ListCell   *prev;
		ListCell   *next;

		/* Move wc from actives to result */
		actives = list_delete_first(actives);
		result = lappend(result, wc);

		/* Now move any matching windows from actives to result */
		prev = NULL;
		for (lc = list_head(actives); lc; lc = next)
		{
			WindowClause *wc2 = (WindowClause *) lfirst(lc);

			next = lnext(lc);
4477
			/* framing options are NOT to be compared here! */
T
Tom Lane 已提交
4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491
			if (equal(wc->partitionClause, wc2->partitionClause) &&
				equal(wc->orderClause, wc2->orderClause))
			{
				actives = list_delete_cell(actives, lc, prev);
				result = lappend(result, wc2);
			}
			else
				prev = lc;
		}
	}

	return result;
}

4492
/*
4493 4494 4495 4496 4497
 * make_windowInputTargetList
 *	  Generate appropriate target list for initial input to WindowAgg nodes.
 *
 * When grouping_planner inserts one or more WindowAgg nodes into the plan,
 * this function computes the initial target list to be computed by the node
B
Bruce Momjian 已提交
4498
 * just below the first WindowAgg.  This list must contain all values needed
4499 4500 4501 4502 4503 4504 4505
 * to evaluate the window functions, compute the final target list, and
 * perform any required final sort step.  If multiple WindowAggs are needed,
 * each intermediate one adds its window function results onto this tlist;
 * only the topmost WindowAgg computes the actual desired target list.
 *
 * This function is much like make_subplanTargetList, though not quite enough
 * like it to share code.  As in that function, we flatten most expressions
B
Bruce Momjian 已提交
4506
 * into their component variables.  But we do not want to flatten window
4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523
 * PARTITION BY/ORDER BY clauses, since that might result in multiple
 * evaluations of them, which would be bad (possibly even resulting in
 * inconsistent answers, if they contain volatile functions).  Also, we must
 * not flatten GROUP BY clauses that were left unflattened by
 * make_subplanTargetList, because we may no longer have access to the
 * individual Vars in them.
 *
 * Another key difference from make_subplanTargetList is that we don't flatten
 * Aggref expressions, since those are to be computed below the window
 * functions and just referenced like Vars above that.
 *
 * 'tlist' is the query's final target list.
 * 'activeWindows' is the list of active windows previously identified by
 *			select_active_windows.
 *
 * The result is the targetlist to be computed by the plan node immediately
 * below the first WindowAgg node.
4524 4525
 */
static List *
4526 4527 4528
make_windowInputTargetList(PlannerInfo *root,
						   List *tlist,
						   List *activeWindows)
4529
{
4530 4531 4532 4533 4534
	Query	   *parse = root->parse;
	Bitmapset  *sgrefs;
	List	   *new_tlist;
	List	   *flattenable_cols;
	List	   *flattenable_vars;
4535 4536
	ListCell   *lc;

4537 4538 4539 4540 4541 4542 4543
	Assert(parse->hasWindowFuncs);

	/*
	 * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
	 * into a bitmapset for convenient reference below.
	 */
	sgrefs = NULL;
4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562
	foreach(lc, activeWindows)
	{
		WindowClause *wc = (WindowClause *) lfirst(lc);
		ListCell   *lc2;

		foreach(lc2, wc->partitionClause)
		{
			SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);

			sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
		}
		foreach(lc2, wc->orderClause)
		{
			SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);

			sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
		}
	}

4563 4564 4565 4566 4567 4568 4569 4570
	/* Add in sortgroupref numbers of GROUP BY clauses, too */
	foreach(lc, parse->groupClause)
	{
		SortGroupClause *grpcl = (SortGroupClause *) lfirst(lc);

		sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
	}

4571
	/*
4572 4573
	 * Construct a tlist containing all the non-flattenable tlist items, and
	 * save aside the others for a moment.
4574
	 */
4575 4576 4577
	new_tlist = NIL;
	flattenable_cols = NIL;

4578 4579 4580 4581
	foreach(lc, tlist)
	{
		TargetEntry *tle = (TargetEntry *) lfirst(lc);

4582 4583 4584 4585 4586
		/*
		 * Don't want to deconstruct window clauses or GROUP BY items.  (Note
		 * that such items can't contain window functions, so it's okay to
		 * compute them below the WindowAgg nodes.)
		 */
4587
		if (tle->ressortgroupref != 0 &&
4588
			bms_is_member(tle->ressortgroupref, sgrefs))
4589
		{
4590
			/* Don't want to deconstruct this value, so add to new_tlist */
4591 4592 4593
			TargetEntry *newtle;

			newtle = makeTargetEntry(tle->expr,
4594
									 list_length(new_tlist) + 1,
4595 4596
									 NULL,
									 false);
4597
			/* Preserve its sortgroupref marking, in case it's volatile */
4598
			newtle->ressortgroupref = tle->ressortgroupref;
4599 4600 4601 4602 4603 4604 4605 4606 4607 4608
			new_tlist = lappend(new_tlist, newtle);
		}
		else
		{
			/*
			 * Column is to be flattened, so just remember the expression for
			 * later call to pull_var_clause.  There's no need for
			 * pull_var_clause to examine the TargetEntry node itself.
			 */
			flattenable_cols = lappend(flattenable_cols, tle->expr);
4609 4610 4611
		}
	}

4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630
	/*
	 * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
	 * add them to the result tlist if not already present.  (Some might be
	 * there already because they're used directly as window/group clauses.)
	 *
	 * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that the
	 * Aggrefs are placed in the Agg node's tlist and not left to be computed
	 * at higher levels.
	 */
	flattenable_vars = pull_var_clause((Node *) flattenable_cols,
									   PVC_INCLUDE_AGGREGATES,
									   PVC_INCLUDE_PLACEHOLDERS);
	new_tlist = add_to_flat_tlist(new_tlist, flattenable_vars);

	/* clean up cruft */
	list_free(flattenable_vars);
	list_free(flattenable_cols);

	return new_tlist;
4631 4632
}

T
Tom Lane 已提交
4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643
/*
 * make_pathkeys_for_window
 *		Create a pathkeys list describing the required input ordering
 *		for the given WindowClause.
 *
 * The required ordering is first the PARTITION keys, then the ORDER keys.
 * In the future we might try to implement windowing using hashing, in which
 * case the ordering could be relaxed, but for now we always sort.
 */
static List *
make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
4644
						 List *tlist)
T
Tom Lane 已提交
4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658
{
	List	   *window_pathkeys;
	List	   *window_sortclauses;

	/* Throw error if can't sort */
	if (!grouping_is_sortable(wc->partitionClause))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("could not implement window PARTITION BY"),
				 errdetail("Window partitioning columns must be of sortable datatypes.")));
	if (!grouping_is_sortable(wc->orderClause))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("could not implement window ORDER BY"),
4659
		errdetail("Window ordering columns must be of sortable datatypes.")));
T
Tom Lane 已提交
4660 4661 4662 4663 4664 4665

	/* Okay, make the combined pathkeys */
	window_sortclauses = list_concat(list_copy(wc->partitionClause),
									 list_copy(wc->orderClause));
	window_pathkeys = make_pathkeys_for_sortclauses(root,
													window_sortclauses,
4666
													tlist);
T
Tom Lane 已提交
4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678
	list_free(window_sortclauses);
	return window_pathkeys;
}

/*----------
 * get_column_info_for_window
 *		Get the partitioning/ordering column numbers and equality operators
 *		for a WindowAgg node.
 *
 * This depends on the behavior of make_pathkeys_for_window()!
 *
 * We are given the target WindowClause and an array of the input column
B
Bruce Momjian 已提交
4679
 * numbers associated with the resulting pathkeys.  In the easy case, there
T
Tom Lane 已提交
4680 4681 4682 4683 4684 4685 4686
 * are the same number of pathkey columns as partitioning + ordering columns
 * and we just have to copy some data around.  However, it's possible that
 * some of the original partitioning + ordering columns were eliminated as
 * redundant during the transformation to pathkeys.  (This can happen even
 * though the parser gets rid of obvious duplicates.  A typical scenario is a
 * window specification "PARTITION BY x ORDER BY y" coupled with a clause
 * "WHERE x = y" that causes the two sort columns to be recognized as
4687
 * redundant.)	In that unusual case, we have to work a lot harder to
T
Tom Lane 已提交
4688 4689 4690
 * determine which keys are significant.
 *
 * The method used here is a bit brute-force: add the sort columns to a list
B
Bruce Momjian 已提交
4691
 * one at a time and note when the resulting pathkey list gets longer.  But
T
Tom Lane 已提交
4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743
 * it's a sufficiently uncommon case that a faster way doesn't seem worth
 * the amount of code refactoring that'd be needed.
 *----------
 */
static void
get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist,
						   int numSortCols, AttrNumber *sortColIdx,
						   int *partNumCols,
						   AttrNumber **partColIdx,
						   Oid **partOperators,
						   int *ordNumCols,
						   AttrNumber **ordColIdx,
						   Oid **ordOperators)
{
	int			numPart = list_length(wc->partitionClause);
	int			numOrder = list_length(wc->orderClause);

	if (numSortCols == numPart + numOrder)
	{
		/* easy case */
		*partNumCols = numPart;
		*partColIdx = sortColIdx;
		*partOperators = extract_grouping_ops(wc->partitionClause);
		*ordNumCols = numOrder;
		*ordColIdx = sortColIdx + numPart;
		*ordOperators = extract_grouping_ops(wc->orderClause);
	}
	else
	{
		List	   *sortclauses;
		List	   *pathkeys;
		int			scidx;
		ListCell   *lc;

		/* first, allocate what's certainly enough space for the arrays */
		*partNumCols = 0;
		*partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber));
		*partOperators = (Oid *) palloc(numPart * sizeof(Oid));
		*ordNumCols = 0;
		*ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber));
		*ordOperators = (Oid *) palloc(numOrder * sizeof(Oid));
		sortclauses = NIL;
		pathkeys = NIL;
		scidx = 0;
		foreach(lc, wc->partitionClause)
		{
			SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
			List	   *new_pathkeys;

			sortclauses = lappend(sortclauses, sgc);
			new_pathkeys = make_pathkeys_for_sortclauses(root,
														 sortclauses,
4744
														 tlist);
T
Tom Lane 已提交
4745 4746 4747
			if (list_length(new_pathkeys) > list_length(pathkeys))
			{
				/* this sort clause is actually significant */
4748 4749
				(*partColIdx)[*partNumCols] = sortColIdx[scidx++];
				(*partOperators)[*partNumCols] = sgc->eqop;
T
Tom Lane 已提交
4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761
				(*partNumCols)++;
				pathkeys = new_pathkeys;
			}
		}
		foreach(lc, wc->orderClause)
		{
			SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
			List	   *new_pathkeys;

			sortclauses = lappend(sortclauses, sgc);
			new_pathkeys = make_pathkeys_for_sortclauses(root,
														 sortclauses,
4762
														 tlist);
T
Tom Lane 已提交
4763 4764 4765
			if (list_length(new_pathkeys) > list_length(pathkeys))
			{
				/* this sort clause is actually significant */
4766 4767
				(*ordColIdx)[*ordNumCols] = sortColIdx[scidx++];
				(*ordOperators)[*ordNumCols] = sgc->eqop;
T
Tom Lane 已提交
4768 4769 4770 4771 4772 4773 4774 4775 4776
				(*ordNumCols)++;
				pathkeys = new_pathkeys;
			}
		}
		/* complain if we didn't eat exactly the right number of sort cols */
		if (scidx != numSortCols)
			elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators");
	}
}
4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790


/*
 * expression_planner
 *		Perform planner's transformations on a standalone expression.
 *
 * Various utility commands need to evaluate expressions that are not part
 * of a plannable query.  They can do so using the executor's regular
 * expression-execution machinery, but first the expression has to be fed
 * through here to transform it from parser output to something executable.
 *
 * Currently, we disallow sublinks in standalone expressions, so there's no
 * real "planning" involved here.  (That might not always be true though.)
 * What we must do is run eval_const_expressions to ensure that any function
4791 4792 4793 4794
 * calls are converted to positional notation and function default arguments
 * get inserted.  The fact that constant subexpressions get simplified is a
 * side-effect that is useful when the expression will get evaluated more than
 * once.  Also, we must fix operator function IDs.
4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805
 *
 * Note: this must not make any damaging changes to the passed-in expression
 * tree.  (It would actually be okay to apply fix_opfuncids to it, but since
 * we first do an expression_tree_mutator-based walk, what is returned will
 * be a new node tree.)
 */
Expr *
expression_planner(Expr *expr)
{
	Node	   *result;

4806 4807 4808 4809
	/*
	 * Convert named-argument function calls, insert default arguments and
	 * simplify constant subexprs
	 */
4810 4811 4812 4813 4814 4815 4816
	result = eval_const_expressions(NULL, (Node *) expr);

	/* Fill in opfuncid values if missing */
	fix_opfuncids(result);

	return (Expr *) result;
}
4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862


/*
 * plan_cluster_use_sort
 *		Use the planner to decide how CLUSTER should implement sorting
 *
 * tableOid is the OID of a table to be clustered on its index indexOid
 * (which is already known to be a btree index).  Decide whether it's
 * cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
 * Return TRUE to use sorting, FALSE to use an indexscan.
 *
 * Note: caller had better already hold some type of lock on the table.
 */
bool
plan_cluster_use_sort(Oid tableOid, Oid indexOid)
{
	PlannerInfo *root;
	Query	   *query;
	PlannerGlobal *glob;
	RangeTblEntry *rte;
	RelOptInfo *rel;
	IndexOptInfo *indexInfo;
	QualCost	indexExprCost;
	Cost		comparisonCost;
	Path	   *seqScanPath;
	Path		seqScanAndSortPath;
	IndexPath  *indexScanPath;
	ListCell   *lc;

	/* Set up mostly-dummy planner state */
	query = makeNode(Query);
	query->commandType = CMD_SELECT;

	glob = makeNode(PlannerGlobal);

	root = makeNode(PlannerInfo);
	root->parse = query;
	root->glob = glob;
	root->query_level = 1;
	root->planner_cxt = CurrentMemoryContext;
	root->wt_param_id = -1;

	/* Build a minimal RTE for the rel */
	rte = makeNode(RangeTblEntry);
	rte->rtekind = RTE_RELATION;
	rte->relid = tableOid;
B
Bruce Momjian 已提交
4863
	rte->relkind = RELKIND_RELATION;	/* Don't be too picky. */
4864
	rte->lateral = false;
4865 4866 4867 4868
	rte->inh = false;
	rte->inFromCl = true;
	query->rtable = list_make1(rte);

4869 4870
	/* Set up RTE/RelOptInfo arrays */
	setup_simple_rel_arrays(root);
4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882

	/* Build RelOptInfo */
	rel = build_simple_rel(root, 1, RELOPT_BASEREL);

	/* Locate IndexOptInfo for the target index */
	indexInfo = NULL;
	foreach(lc, rel->indexlist)
	{
		indexInfo = (IndexOptInfo *) lfirst(lc);
		if (indexInfo->indexoid == indexOid)
			break;
	}
4883 4884 4885 4886 4887 4888 4889 4890

	/*
	 * It's possible that get_relation_info did not generate an IndexOptInfo
	 * for the desired index; this could happen if it's not yet reached its
	 * indcheckxmin usability horizon, or if it's a system index and we're
	 * ignoring system indexes.  In such cases we should tell CLUSTER to not
	 * trust the index contents but use seqscan-and-sort.
	 */
4891
	if (lc == NULL)				/* not in the list? */
4892 4893 4894 4895 4896 4897 4898 4899 4900 4901
		return true;			/* use sort */

	/*
	 * Rather than doing all the pushups that would be needed to use
	 * set_baserel_size_estimates, just do a quick hack for rows and width.
	 */
	rel->rows = rel->tuples;
	rel->width = get_relation_data_width(tableOid, NULL);

	root->total_table_pages = rel->pages;
4902 4903 4904

	/*
	 * Determine eval cost of the index expressions, if any.  We need to
4905 4906 4907
	 * charge twice that amount for each tuple comparison that happens during
	 * the sort, since tuplesort.c will have to re-evaluate the index
	 * expressions each time.  (XXX that's pretty inefficient...)
4908 4909 4910 4911 4912
	 */
	cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
	comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);

	/* Estimate the cost of seq scan + sort */
4913
	seqScanPath = create_seqscan_path(root, rel, NULL, 0);
4914 4915 4916 4917 4918 4919
	cost_sort(&seqScanAndSortPath, root, NIL,
			  seqScanPath->total_cost, rel->tuples, rel->width,
			  comparisonCost, maintenance_work_mem, -1.0);

	/* Estimate the cost of index scan */
	indexScanPath = create_index_path(root, indexInfo,
4920
									  NIL, NIL, NIL, NIL, NIL,
4921 4922
									  ForwardScanDirection, false,
									  NULL, 1.0);
4923 4924 4925

	return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
}