planner.c 108.6 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * planner.c
4
 *	  The query optimizer external interface.
5
 *
6 7
 * Portions Copyright (c) 2005-2008, Greenplum inc
 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
8
 * Portions Copyright (c) 1994, Regents of the University of California
9 10 11
 *
 *
 * IDENTIFICATION
12
 *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.211 2007/01/10 18:06:03 tgl Exp $
13 14 15
 *
 *-------------------------------------------------------------------------
 */
M
Marc G. Fournier 已提交
16

17 18
#include "postgres.h"

19 20
#include <limits.h>

21
#include "catalog/pg_operator.h"
22
#include "executor/executor.h"
23
#include "executor/execHHashagg.h"
24
#include "executor/nodeAgg.h"
25
#include "miscadmin.h"
B
Bruce Momjian 已提交
26 27
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
28 29
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
30
#include "optimizer/paths.h"
B
Bruce Momjian 已提交
31
#include "optimizer/planmain.h"
32 33
#include "optimizer/planner.h"
#include "optimizer/prep.h"
34
#include "optimizer/subselect.h"
35 36
#include "optimizer/planpartition.h"
#include "optimizer/transform.h"
37 38
#include "optimizer/tlist.h"
#include "optimizer/var.h"
39 40 41
#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
#endif
B
Bruce Momjian 已提交
42
#include "parser/parse_expr.h"
43
#include "parser/parse_oper.h"
44
#include "parser/parsetree.h"
45
#include "utils/lsyscache.h"
46
#include "utils/selfuncs.h"
47
#include "utils/syscache.h"
48

49 50 51 52 53 54 55 56
#include "cdb/cdbllize.h"
#include "cdb/cdbmutate.h" 	/* apply_shareinput */
#include "cdb/cdbpath.h"        /* cdbpath_segments */
#include "cdb/cdbpathtoplan.h"  /* cdbpathtoplan_create_flow() */
#include "cdb/cdbpartition.h" /* query_has_external_partition() */
#include "cdb/cdbgroup.h" /* grouping_planner extensions */
#include "cdb/cdbsetop.h" /* motion utilities */
#include "cdb/cdbsubselect.h"   /* cdbsubselect_flatten_sublinks() */
57

58 59
/* GUC parameter */
double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
60

61 62 63 64
/* Hook for plugins to get control in planner() */
planner_hook_type planner_hook = NULL;

ParamListInfo PlannerBoundParamList = NULL;		/* current boundParams */
65

66 67 68 69 70 71 72 73 74
/* Expression kind codes for preprocess_expression */
#define EXPRKIND_QUAL			0
#define EXPRKIND_TARGET			1
#define EXPRKIND_RTFUNC			2
#define EXPRKIND_VALUES			3
#define EXPRKIND_LIMIT			4
#define EXPRKIND_ININFO			5
#define EXPRKIND_APPINFO		6
#define EXPRKIND_WINDOW_BOUND	7
75

76 77
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
78
static Plan *inheritance_planner(PlannerInfo *root);
79
static Plan *grouping_planner(PlannerInfo *root, double tuple_fraction);
80
static double preprocess_limit(PlannerInfo *root,
B
Bruce Momjian 已提交
81
				 double tuple_fraction,
B
Bruce Momjian 已提交
82
				 int64 *offset_est, int64 *count_est);
83 84 85
#ifdef NOT_USED
static Oid *extract_grouping_ops(List *groupClause, int *numGroupOps);
#endif
86
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
87
					   AttrNumber **groupColIdx, Oid **groupOperators, bool *need_tlist_eval);
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
static List *register_ordered_aggs(List *tlist, Node *havingqual, List *sub_tlist);

// GP optimizer entry point
#ifdef USE_ORCA
extern PlannedStmt *PplstmtOptimize(Query *parse, bool *pfUnexpectedFailure);
#endif

typedef struct
{
	List *tlist;
	Node *havingqual;
	List *sub_tlist;
	Index last_sgr;
} register_ordered_aggs_context;


static Node *register_ordered_aggs_mutator(Node *node, 
										   register_ordered_aggs_context *context);
static void register_AggOrder(AggOrder *aggorder, 
							  register_ordered_aggs_context *context);
108
static void locate_grouping_columns(PlannerInfo *root,
109
						List *stlist,
B
Bruce Momjian 已提交
110 111
						List *sub_tlist,
						AttrNumber *groupColIdx);
112 113
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);

114 115 116 117 118 119 120 121 122 123
static Bitmapset* canonicalize_colref_list(Node * node);
static List *canonicalize_gs_list(List *gsl, bool ordinary);
static List *rollup_gs_list(List *gsl);
static List *add_gs_combinations(List *list, int n, int i, Bitmapset **base, Bitmapset **work);
static List *cube_gs_list(List *gsl);
static CanonicalGroupingSets *make_canonical_groupingsets(List *groupClause);
static int gs_compare(const void *a, const void*b);
static void sort_canonical_gs_list(List *gs, int *p_nsets, Bitmapset ***p_sets);

static Plan *pushdown_preliminary_limit(Plan *plan, Node *limitCount, int64 count_est, Node *limitOffset, int64 offset_est);
124
static bool is_dummy_plan(Plan *plan);
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224


#ifdef USE_ORCA
/**
 * Logging of optimization outcome
 */
static void log_optimizer(PlannedStmt *plan, bool fUnexpectedFailure)
{
	if (!optimizer_log)
	{
		//  optimizer logging is not enabled
		return;
	}

	if (NULL != plan)
	{
		elog(DEBUG1, "Optimizer produced plan");
		return;
	}

	// optimizer failed to produce a plan, log failure
	if (OPTIMIZER_ALL_FAIL == optimizer_log_failure)
	{
		elog(LOG, "Planner produced plan :%d", fUnexpectedFailure);
		return;
	}

	if (fUnexpectedFailure && OPTIMIZER_UNEXPECTED_FAIL == optimizer_log_failure)
	{
		// unexpected fall back
		elog(LOG, "Planner produced plan :%d", fUnexpectedFailure);
		return;
	}

	if (!fUnexpectedFailure && OPTIMIZER_EXPECTED_FAIL == optimizer_log_failure)
	{
		// expected fall back
		elog(LOG, "Planner produced plan :%d", fUnexpectedFailure);
	}
}
#endif

#ifdef USE_ORCA
/**
 * Postprocessing of optimizer's plan
 */
static void postprocess_plan(PlannedStmt *plan)
{
	Assert(plan);

	PlannerGlobal *globNew =  makeNode(PlannerGlobal);

	/* initialize */
	globNew->paramlist = NIL;
	globNew->subrtables = NIL;
	globNew->rewindPlanIDs = NULL;
	globNew->finalrtable = NIL;
	globNew->relationOids = NIL;
	globNew->invalItems = NIL;
	globNew->transientPlan = false;
	globNew->share.sharedNodes = NIL;
	globNew->share.sliceMarks = NIL;
	globNew->share.motStack = NIL;
	globNew->share.qdShares = NIL;
	globNew->share.qdSlices = NIL;
	globNew->share.planNodes = NIL;
	globNew->share.nextPlanId = 0;
	globNew->subplans = plan->subplans;
	(void) apply_shareinput_xslice(plan->planTree, globNew);
}
#endif

#ifdef USE_ORCA
/*
 * optimize query using the new optimizer
 */
static PlannedStmt *
optimize_query(Query *parse, ParamListInfo boundParams)
{
	/* flag to check if optimizer unexpectedly failed to produce a plan */
	bool fUnexpectedFailure = false;

	/* create a local copy and hand it to the optimizer */
	Query *pqueryCopy = (Query *) copyObject(parse);

	/* perform pre-processing of query tree before calling optimizer */
	pqueryCopy = preprocess_query_optimizer(pqueryCopy, boundParams);

	PlannedStmt *result = PplstmtOptimize(pqueryCopy, &fUnexpectedFailure);

	if (result)
	{
		postprocess_plan(result);
	}

	log_optimizer(result, fUnexpectedFailure);

	return result;
}
#endif
225 226 227

/*****************************************************************************
 *
228 229
 *	   Query optimizer entry point
 *
230 231 232 233 234 235 236 237
 * To support loadable plugins that monitor or modify planner behavior,
 * we provide a hook variable that lets a plugin get control before and
 * after the standard planning process.  The plugin would normally call
 * standard_planner().
 *
 * Note to plugin authors: standard_planner() scribbles on its Query input,
 * so you'd better copy that data structure if you want to plan more than once.
 *
238
 *****************************************************************************/
239 240 241

PlannedStmt * 
planner(Query *parse, int cursorOptions,
242
		ParamListInfo boundParams)
243
{
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
	PlannedStmt *result = NULL;
	instr_time	starttime, endtime;

	/**
	 * If the new optimizer is enabled, try that first. If it does not return a plan,
	 * then fall back to the planner.
	 * TODO: caragg 11/08/2013: Enable ORCA when running in utility mode (MPP-21841)
	 * TODO: caragg 02/05/2014: Enable ORCA when running on the segments (MPP-22515)
	 */
#ifdef USE_ORCA
	if (optimizer
		&& (GP_ROLE_UTILITY != Gp_role)
		&& (MASTER_CONTENT_ID == GpIdentity.segindex)
		&& !query_has_external_partition(parse))
	{
		if (gp_log_optimization_time)
		{
			INSTR_TIME_SET_CURRENT(starttime);
		}
		START_MEMORY_ACCOUNT(MemoryAccounting_CreateAccount(0, MEMORY_OWNER_TYPE_Optimizer));
		{
			result = optimize_query(parse, boundParams);
		}
		END_MEMORY_ACCOUNT();

		if (gp_log_optimization_time)
		{
			INSTR_TIME_SET_CURRENT(endtime);
			INSTR_TIME_SUBTRACT(endtime, starttime);
			elog(LOG, "Optimizer Time: %.3f ms", INSTR_TIME_GET_MILLISEC(endtime));
		}
	}
#endif /* USE_ORCA */

	if (!result)
	{
		if (gp_log_optimization_time)
		{
			INSTR_TIME_SET_CURRENT(starttime);
		}
		START_MEMORY_ACCOUNT(MemoryAccounting_CreateAccount(0, MEMORY_OWNER_TYPE_Planner));
		{
			if (NULL != planner_hook)
			{
				result = (*planner_hook) (parse, cursorOptions, boundParams);
			}
			else
			{
				result = standard_planner(parse, cursorOptions, boundParams);
			}

			if (gp_log_optimization_time)
			{
				INSTR_TIME_SET_CURRENT(endtime);
				INSTR_TIME_SUBTRACT(endtime, starttime);
				elog(LOG, "Planner Time: %.3f ms", INSTR_TIME_GET_MILLISEC(endtime));
			}
		}
		END_MEMORY_ACCOUNT();
	}

	return result;
}


PlannedStmt *
standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
{
	PlannedStmt *result;
	PlannerGlobal *glob;
	bool isCursor = false;
315
	double		tuple_fraction;
316 317 318 319
	PlannerInfo *root;
	Plan	   *top_plan;
	ListCell   *lp,
			   *lr;
320

321 322 323 324 325 326 327
	/* Cursor options may come from caller or from DECLARE CURSOR stmt. */
	if (parse->utilityStmt && IsA(parse->utilityStmt, DeclareCursorStmt))
	{
		isCursor = true;
		cursorOptions |= ((DeclareCursorStmt *) parse->utilityStmt)->options;
	}
	
328
	/*
329
	 * The planner can be called recursively (an example is when
330
	 * eval_const_expressions tries to pre-evaluate an SQL function).
331
	 *
332 333 334 335
	 * Set up global state for this planner invocation.  This data is needed
	 * across all levels of sub-Query that might exist in the given command,
	 * so we keep it in a separate struct that's linked to by each per-Query
	 * PlannerInfo.
336
	 */
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
	glob = makeNode(PlannerGlobal);
	
	glob->boundParams = boundParams;
	glob->paramlist = NIL;
	glob->subplans = NIL;
	glob->subrtables = NIL;
	glob->rewindPlanIDs = NULL;
	glob->finalrtable = NIL;
	glob->relationOids = NIL;
	glob->invalItems = NIL;
	glob->transientPlan = false;
	/* ApplyShareInputContext initialization. */
	glob->share.sharedNodes = NIL;
	glob->share.sliceMarks = NIL;
	glob->share.motStack = NIL;
	glob->share.qdShares = NIL;
	glob->share.qdSlices = NIL;
	glob->share.planNodes = NIL;
	glob->share.nextPlanId = 0;
356

357 358 359 360
	/* Determine what fraction of the plan is likely to be scanned */
	if (isCursor)
	{
		/*
B
Bruce Momjian 已提交
361
		 * We have no real idea how many tuples the user will ultimately FETCH
362 363 364 365
		 * from a cursor, but it is sometimes the case that he doesn't want 'em
		 * all, or would prefer a fast-start plan anyway so that he can
		 * process some of the tuples sooner.  Use a GUC parameter to decide
		 * what fraction to optimize for.
366
		 */
367 368 369 370 371 372 373 374 375 376 377 378
		tuple_fraction = cursor_tuple_fraction;

		/*
		 * We document cursor_tuple_fraction as simply being a fraction,
		 * which means the edge cases 0 and 1 have to be treated specially
		 * here.  We convert 1 to 0 ("all the tuples") and 0 to a very small
		 * fraction.
		 */
		if (tuple_fraction >= 1.0)
			tuple_fraction = 0.0;
		else if (tuple_fraction <= 0.0)
			tuple_fraction = 1e-10;
379 380 381
	}
	else
	{
382

383 384 385
		/* Default assumption is we need all the tuples */
		tuple_fraction = 0.0;
	}
386 387
	
	parse = normalize_query(parse);
388

389
	PlannerConfig *config = DefaultPlannerConfig();
390

391 392
	/* primary planning entry point (may recurse for subqueries) */
	top_plan = subquery_planner(glob, parse, NULL, tuple_fraction, &root, config);
393

394
	/*
B
Bruce Momjian 已提交
395 396
	 * If creating a plan for a scrollable cursor, make sure it can run
	 * backwards on demand.  Add a Material node at the top at need.
397 398 399
	 */
	if (isCursor && (cursorOptions & CURSOR_OPT_SCROLL))
	{
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
		if (!ExecSupportsBackwardScan(top_plan))
			top_plan = materialize_finished_plan(root, top_plan);
	}


	/* Fix sharing id and shared id.  
	 *
	 * This must be called before set_plan_references and cdbparallelize.  The other mutator
	 * or tree walker assumes the input is a tree.  If there is plan sharing, we have a DAG. 
	 *
	 * apply_shareinput will fix shared_id, and change the DAG to a tree.
	 */

	foreach(lp, glob->subplans)
	{
		Plan	   *subplan = (Plan *) lfirst(lp);
		lfirst(lp) = apply_shareinput_dag_to_tree(subplan, &glob->share);
417
	}
418
	top_plan = apply_shareinput_dag_to_tree(top_plan, &glob->share);
419

420
	/* final cleanup of the plan */
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
	Assert(glob->finalrtable == NIL);
	Assert(parse == root->parse);
	top_plan = set_plan_references(glob, top_plan, root->parse->rtable);
	/* ... and the subplans (both regular subplans and initplans) */
	Assert(list_length(glob->subplans) == list_length(glob->subrtables));
	forboth(lp, glob->subplans, lr, glob->subrtables)
	{
		Plan	   *subplan = (Plan *) lfirst(lp);
		List	   *subrtable = (List *) lfirst(lr);
		
		lfirst(lp) = set_plan_references(glob, subplan, subrtable);
	}

	/* walk plan and remove unused initplans and their params */
	remove_unused_initplans(top_plan, root);
436

437
	/* executor wants to know total number of Params used overall */
438
	top_plan->nParamExec = list_length(glob->paramlist);
439

440 441 442 443 444 445 446 447 448 449 450 451
	if ( Gp_role == GP_ROLE_DISPATCH )
	{
		XsliceInAppendContext append_context;
		
		top_plan = cdbparallelize(root, top_plan, parse,
									cursorOptions,
									boundParams);

		/* cdbparallelize may create additional slices that may affect share input.
		 * need to mark material nodes that are split acrossed multi slices.
		 */
		top_plan = apply_shareinput_xslice(top_plan, glob);
452

453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
		/*
		 * Walk the plan tree to set hasXslice field in each Append node to true
		 * if the subnodes of the Append node contain cross-slice shared node, and
		 * one of these subnodes running in the same slice as the Append node.
		 */
		planner_init_plan_tree_base(&append_context.base, root);
		append_context.currentSliceNo = 0;
		append_context.slices = NULL;
		plan_tree_walker((Node *)top_plan, set_hasxslice_in_append_walker, &append_context);
		bms_free(append_context.slices);
	}

	top_plan = zap_trivial_result(root, top_plan);

	
	/* build the PlannedStmt result */
	result = makeNode(PlannedStmt);
	
	result->commandType = parse->commandType;
	result->canSetTag = parse->canSetTag;
	result->transientPlan = glob->transientPlan;
	result->planTree = top_plan;
	result->rtable = glob->finalrtable;
	result->resultRelations = root->resultRelations;
	result->utilityStmt = parse->utilityStmt;
	result->intoClause = parse->intoClause;
	result->subplans = glob->subplans;
	result->rewindPlanIDs = glob->rewindPlanIDs;
	result->returningLists = root->returningLists;
	result->result_partitions = root->result_partitions;
	result->result_aosegnos = root->result_aosegnos;
	result->rowMarks = parse->rowMarks;
	result->relationOids = glob->relationOids;
	result->invalItems = glob->invalItems;
	result->nCrossLevelParams = list_length(glob->paramlist);
	result->nMotionNodes = top_plan->nMotionNodes;
	result->nInitPlans = top_plan->nInitPlans;
	result->intoPolicy = GpPolicyCopy(CurrentMemoryContext, parse->intoPolicy);
	result->queryPartOids = NIL;
	result->queryPartsMetadata = NIL;
	result->numSelectorsPerScanId = NIL;
	
	Assert(result->utilityStmt == NULL || IsA(result->utilityStmt, DeclareCursorStmt));
	
	if (Gp_role == GP_ROLE_DISPATCH)
	{
		/*
		 * Generate a plan node id for each node. Used by gpmon.
		 * Note that this needs to be the last step of the planning when
		 * the structure of the plan is final.
		 */
		assign_plannode_id(result);
	}
506

507 508
	return result;
}
509

510
/*--------------------
511 512 513
 * subquery_planner
 *	  Invokes the planner on a subquery.  We recurse to here for each
 *	  sub-SELECT found in the query tree.
514
 *
515
 * glob is the global state for the current planner run.
516
 * parse is the querytree produced by the parser & rewriter.
517 518
 * parent_root is the immediate parent Query's info (NULL at the top level).
 * hasRecursion is true if this is a recursive WITH query.
519
 * tuple_fraction is the fraction of tuples we expect will be retrieved.
520
 * tuple_fraction is interpreted as explained for grouping_planner, below.
521
 *
522 523
 * If subroot isn't NULL, we pass back the query's final PlannerInfo struct;
 * among other things this tells the output sort ordering of the plan.
524
 *
525
 * Basically, this routine does the stuff that should only be done once
526 527 528 529 530
 * per Query object.  It then calls grouping_planner.  At one time,
 * grouping_planner could be invoked recursively on the same Query object;
 * that's not currently true, but we keep the separation between the two
 * routines anyway, in case we need it again someday.
 *
531 532
 * subquery_planner will be called recursively to handle sub-Query nodes
 * found within the query's expressions and rangetable.
533
 *
534 535
 * Returns a query plan.
 *--------------------
536
 */
537
Plan *
538 539 540 541 542 543
subquery_planner(PlannerGlobal *glob,
				 Query *parse, 
				 PlannerInfo *parent_root,
				 double tuple_fraction,
				 PlannerInfo **subroot,
				 PlannerConfig *config)
544
{
545
	int			num_old_subplans = list_length(glob->subplans);
546
	PlannerInfo *root;
547
	Plan	   *plan;
548
	List	   *newHaving;
549
	ListCell   *l;
550

551 552 553
	/* Create a PlannerInfo data structure for this subquery */
	root = makeNode(PlannerInfo);
	root->parse = parse;
554 555 556 557 558 559 560 561 562 563 564 565
	root->glob = glob;
	root->query_level = parent_root ? parent_root->query_level + 1 : 1;
	root->parent_root = parent_root;
	root->planner_cxt = CurrentMemoryContext;
	root->init_plans = NIL;
	
	root->list_cteplaninfo = NIL;
	if (parse->cteList != NIL)
	{
		root->list_cteplaninfo = init_list_cteplaninfo(list_length(parse->cteList));
	}
	
566 567
	root->in_info_list = NIL;
	root->append_rel_list = NIL;
568

569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585
	Assert(config);
	root->config = config;

	if (Gp_role != GP_ROLE_DISPATCH && root->config->cdbpath_segments > 0)
	{
		/* Choose a segdb to which our singleton gangs should be dispatched. */
		gp_singleton_segindex = gp_session_id % root->config->cdbpath_segments;
    }

    /* Ensure that jointree has been normalized. See normalize_query_jointree_mutator() */
    AssertImply(parse->jointree->fromlist, list_length(parse->jointree->fromlist) == 1);

    
    /* CDB: Stash current query level's relids before pulling up subqueries. */
    root->currlevel_relids = get_relids_in_jointree((Node *)parse->jointree);

    /*
B
Bruce Momjian 已提交
586 587 588 589
	 * Look for IN clauses at the top level of WHERE, and transform them into
	 * joins.  Note that this step only handles IN clauses originally at top
	 * level of WHERE; if we pull up any subqueries in the next step, their
	 * INs are processed just before pulling them up.
590 591
	 */
	if (parse->hasSubLinks)
592 593
        cdbsubselect_flatten_sublinks(root, (Node *)parse);

594

595 596 597 598 599
	/*
	 * Check to see if any subqueries in the rangetable can be merged into
	 * this query.
	 */
	parse->jointree = (FromExpr *)
600
		pull_up_subqueries(root, (Node *) parse->jointree, false, false);
B
Bruce Momjian 已提交
601

602
	/*
B
Bruce Momjian 已提交
603 604 605 606
	 * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
	 * avoid the expense of doing flatten_join_alias_vars().  Also check for
	 * outer joins --- if none, we can skip reduce_outer_joins() and some
	 * other processing.  This must be done after we have done
B
Bruce Momjian 已提交
607
	 * pull_up_subqueries, of course.
608 609
	 *
	 * Note: if reduce_outer_joins manages to eliminate all outer joins,
B
Bruce Momjian 已提交
610
	 * root->hasOuterJoins is not reset currently.	This is OK since its
611
	 * purpose is merely to suppress unnecessary processing in simple cases.
612
	 */
613
	root->hasJoinRTEs = false;
614
	root->hasOuterJoins = false;
615
	foreach(l, parse->rtable)
616
	{
617
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
618 619 620

		if (rte->rtekind == RTE_JOIN)
		{
621
			root->hasJoinRTEs = true;
622 623
			if (IS_OUTER_JOIN(rte->jointype))
			{
624
				root->hasOuterJoins = true;
625 626 627
				/* Can quit scanning once we find an outer join */
				break;
			}
628 629 630
		}
	}

631 632 633 634 635 636 637 638 639 640
	/*
	 * Expand any rangetable entries that are inheritance sets into "append
	 * relations".  This can add entries to the rangetable, but they must be
	 * plain base relations not joins, so it's OK (and marginally more
	 * efficient) to do it after checking for join RTEs.  We must do it after
	 * pulling up subqueries, else we'd fail to handle inherited tables in
	 * subqueries.
	 */
	expand_inherited_tables(root);

641 642 643 644 645 646 647 648 649 650
    /* CDB: If parent RTE belongs to current query level, children do too. */
    foreach (l, root->append_rel_list)
    {
        AppendRelInfo  *appinfo = (AppendRelInfo *)lfirst(l);

        if (bms_is_member(appinfo->parent_relid, root->currlevel_relids))
            root->currlevel_relids = bms_add_member(root->currlevel_relids, 
                                                    appinfo->child_relid);
    }

651 652
	/*
	 * Set hasHavingQual to remember if HAVING clause is present.  Needed
B
Bruce Momjian 已提交
653 654
	 * because preprocess_expression will reduce a constant-true condition to
	 * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
655
	 */
656
	root->hasHavingQual = (parse->havingQual != NULL);
657

658 659 660
	/* Clear this flag; might get set in distribute_qual_to_rels */
	root->hasPseudoConstantQuals = false;

661
	/*
662
	 * Do expression preprocessing on targetlist and quals.
663
	 */
664
	parse->targetList = (List *)
665
		preprocess_expression(root, (Node *) parse->targetList,
666 667
							  EXPRKIND_TARGET);

668 669 670 671
	parse->returningList = (List *)
		preprocess_expression(root, (Node *) parse->returningList,
							  EXPRKIND_TARGET);

672
	preprocess_qual_conditions(root, (Node *) parse->jointree);
673

674
	parse->havingQual = preprocess_expression(root, parse->havingQual,
675 676
											  EXPRKIND_QUAL);

677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697
	parse->scatterClause = (List *)
		preprocess_expression(root, (Node *) parse->scatterClause,
							  EXPRKIND_TARGET);
	/* 
	 * Do expression preprocessing on other expressions.
	 */	 
	foreach (l, parse->windowClause)
	{
		WindowSpec *w =(WindowSpec*)lfirst(l);
		
		if ( w != NULL && w->frame != NULL )
		{
			WindowFrame *f = w->frame;
						
			if (f->trail != NULL )
				f->trail->val = preprocess_expression(root, f->trail->val, EXPRKIND_WINDOW_BOUND);
			if ( f->lead != NULL )
				f->lead->val = preprocess_expression(root, f->lead->val, EXPRKIND_WINDOW_BOUND);
		}
	}

698
	parse->limitOffset = preprocess_expression(root, parse->limitOffset,
699
											   EXPRKIND_LIMIT);
700
	parse->limitCount = preprocess_expression(root, parse->limitCount,
701 702
											  EXPRKIND_LIMIT);

703 704
	root->in_info_list = (List *)
		preprocess_expression(root, (Node *) root->in_info_list,
705
							  EXPRKIND_ININFO);
706 707 708
	root->append_rel_list = (List *)
		preprocess_expression(root, (Node *) root->append_rel_list,
							  EXPRKIND_APPINFO);
709

710
	/* Also need to preprocess expressions for function and values RTEs */
711
	foreach(l, parse->rtable)
712
	{
713
		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
714

715 716 717 718 719 720 721
		if (rte->rtekind == RTE_FUNCTION || rte->rtekind == RTE_TABLEFUNCTION)
			rte->funcexpr = preprocess_expression(root, rte->funcexpr,
												  EXPRKIND_RTFUNC);
		else if (rte->rtekind == RTE_VALUES)
			rte->values_lists = (List *)
				preprocess_expression(root, (Node *) rte->values_lists,
									  EXPRKIND_VALUES);
722 723
	}

724
	/*
B
Bruce Momjian 已提交
725 726 727
	 * In some cases we may want to transfer a HAVING clause into WHERE. We
	 * cannot do so if the HAVING clause contains aggregates (obviously) or
	 * volatile functions (since a HAVING clause is supposed to be executed
728 729 730 731
	 * only once per group).  Also, it may be that the clause is so expensive
	 * to execute that we're better off doing it only once per group, despite
	 * the loss of selectivity.  This is hard to estimate short of doing the
	 * entire planning process twice, so we use a heuristic: clauses
B
Bruce Momjian 已提交
732 733
	 * containing subplans are left in HAVING.	Otherwise, we move or copy the
	 * HAVING clause into WHERE, in hopes of eliminating tuples before
734 735
	 * aggregation instead of after.
	 *
736 737 738 739 740 741 742 743
	 * If the query has explicit grouping then we can simply move such a
	 * clause into WHERE; any group that fails the clause will not be in the
	 * output because none of its tuples will reach the grouping or
	 * aggregation stage.  Otherwise we must have a degenerate (variable-free)
	 * HAVING clause, which we put in WHERE so that query_planner() can use it
	 * in a gating Result node, but also keep in HAVING to ensure that we
	 * don't emit a bogus aggregated row. (This could be done better, but it
	 * seems not worth optimizing.)
744 745
	 *
	 * Note that both havingQual and parse->jointree->quals are in
B
Bruce Momjian 已提交
746 747
	 * implicitly-ANDed-list form at this point, even though they are declared
	 * as Node *.
748 749
	 */
	newHaving = NIL;
750
	foreach(l, (List *) parse->havingQual)
751
	{
752
		Node	   *havingclause = (Node *) lfirst(l);
753

754 755 756 757 758
		if (contain_agg_clause(havingclause) ||
			contain_volatile_functions(havingclause) ||
			contain_subplans(havingclause))
		{
			/* keep it in HAVING */
759
			newHaving = lappend(newHaving, havingclause);
760
		}
761 762
		else if (parse->groupClause && 
				 !contain_extended_grouping(parse->groupClause))
763 764
		{
			/* move it to WHERE */
765 766
			parse->jointree->quals = (Node *)
				lappend((List *) parse->jointree->quals, havingclause);
767 768 769 770 771 772 773 774 775
		}
		else
		{
			/* put a copy in WHERE, keep it in HAVING */
			parse->jointree->quals = (Node *)
				lappend((List *) parse->jointree->quals,
						copyObject(havingclause));
			newHaving = lappend(newHaving, havingclause);
		}
776 777 778
	}
	parse->havingQual = (Node *) newHaving;

779
	/*
B
Bruce Momjian 已提交
780 781
	 * If we have any outer joins, try to reduce them to plain inner joins.
	 * This step is most easily done after we've done expression
B
Bruce Momjian 已提交
782
	 * preprocessing.
783
	 */
784
	if (root->hasOuterJoins)
785
		reduce_outer_joins(root);
786

787
	/*
B
Bruce Momjian 已提交
788 789
	 * Do the main planning.  If we have an inherited target relation, that
	 * needs special processing, else go straight to grouping_planner.
790
	 */
791
	if (parse->resultRelation &&
792 793
		rt_fetch(parse->resultRelation, parse->rtable)->inh)
		plan = inheritance_planner(root);
794
	else
795
		plan = grouping_planner(root, tuple_fraction);
796

797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
	if (Gp_role == GP_ROLE_DISPATCH
			&& root->config->gp_dynamic_partition_pruning)
	{
		/**
		 * Apply transformation for dynamic partition elimination.
		 */
		plan = apply_dyn_partition_transforms(root, plan);
	}

	/* 
	 * Deal with explicit redistribution requirements for TableValueExpr 
	 * subplans with explicit distribitution
	 */
	if (parse->scatterClause)
	{
		bool	 r;
		List    *exprList;
		

		/* Deal with the special case of SCATTER RANDOMLY */
		if (list_length(parse->scatterClause) == 1 && linitial(parse->scatterClause) == NULL)
			exprList = NIL;
		else
			exprList = parse->scatterClause;

		/* Repartition the subquery plan based on our distribution requirements */
		r = repartitionPlan(plan, false, false, exprList);
		if (!r)
		{
			/* 
			 * This should not be possible, repartitionPlan should never
			 * fail when both stable and rescannable are false.
			 */
			elog(ERROR, "failure repartitioning plan");
		}
	}

834
	/*
B
Bruce Momjian 已提交
835
	 * If any subplans were generated, or if we're inside a subplan, build
B
Bruce Momjian 已提交
836 837
	 * initPlan list and extParam/allParam sets for plan nodes, and attach the
	 * initPlans to the top plan node.
838
	 */
839 840 841 842 843 844 845
	if (list_length(glob->subplans) != num_old_subplans || 
		root->query_level > 1)
		
	{
		Assert(root->parse == parse); /* GPDP isn't always careful about this. */
		SS_finalize_plan(root, root->parse->rtable, plan, true);
	}
846

847 848 849 850
	/* Return internal info if caller wants it */
	if (subroot)
		*subroot = root;
	
851
	return plan;
852
}
853

854 855 856 857 858 859 860
/*
 * preprocess_expression
 *		Do subquery_planner's preprocessing work for an expression,
 *		which can be a targetlist, a WHERE clause (including JOIN/ON
 *		conditions), or a HAVING clause.
 */
static Node *
861
preprocess_expression(PlannerInfo *root, Node *expr, int kind)
862
{
863
	/*
B
Bruce Momjian 已提交
864 865 866
	 * Fall out quickly if expression is empty.  This occurs often enough to
	 * be worth checking.  Note that null->null is the correct conversion for
	 * implicit-AND result format, too.
867 868 869 870
	 */
	if (expr == NULL)
		return NULL;

871 872 873
	/*
	 * If the query has any join RTEs, replace join alias variables with
	 * base-relation variables. We must do this before sublink processing,
B
Bruce Momjian 已提交
874 875 876
	 * else sublinks expanded out from join aliases wouldn't get processed. We
	 * can skip it in VALUES lists, however, since they can't contain any Vars
	 * at all.
877
	 */
878
	if (root->hasJoinRTEs && kind != EXPRKIND_VALUES)
879
		expr = flatten_join_alias_vars(root, expr);
880

881
	/*
882
	 * Simplify constant expressions.
883
	 *
884 885 886 887
	 * Note: this also flattens nested AND and OR expressions into N-argument
	 * form.  All processing of a qual expression after this point must be
	 * careful to maintain AND/OR flatness --- that is, do not generate a tree
	 * with AND directly under AND, nor OR directly under OR.
888
	 *
889
	 * Because this is a relatively expensive process, we skip it when the
890
	 * query is trivial, such as "SELECT 2+2;". The
B
Bruce Momjian 已提交
891
	 * expression will only be evaluated once anyway, so no point in
892 893
	 * pre-simplifying; we can't execute it any faster than the executor can,
	 * and we will waste cycles copying the tree.  Notice however that we
B
Bruce Momjian 已提交
894 895
	 * still must do it for quals (to get AND/OR flatness); and if we are in a
	 * subquery we should not assume it will be done only once.
896
	 *
897 898 899 900 901
	 * However, if targeted dispatch is enabled then we WILL optimize
	 *           VALUES lists because targeted dispatch will benefit from this
	 *           -- it IS more expensive to evaluate it on the executor
	 *           without doing it first in the planner because the planner
	 *           may determine that only a single segment is required!
902
	 */
903 904 905 906 907 908 909 910 911 912 913 914 915
	if (kind != EXPRKIND_VALUES && (
			root->parse->jointree->fromlist != NIL ||
			kind == EXPRKIND_QUAL ||

			/* note that we could optimize the direct dispatch case
			 *   by only doing the simplification for the distribution
			 *   key columns.
			 */
			(root->config->gp_enable_direct_dispatch && kind == EXPRKIND_TARGET ) ||
			root->query_level > 1))
	{
		expr = eval_const_expressions(root, expr);
	}
916 917 918

	/*
	 * If it's a qual or havingQual, canonicalize it.
919
	 */
920
	if (kind == EXPRKIND_QUAL)
921
	{
922
		expr = (Node *) canonicalize_qual((Expr *) expr);
923 924 925 926 927 928

#ifdef OPTIMIZER_DEBUG
		printf("After canonicalize_qual()\n");
		pprint(expr);
#endif
	}
929

930
	/* Expand SubLinks to SubPlans */
931
	if (root->parse->hasSubLinks)
932
		expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
933

934
	/*
B
Bruce Momjian 已提交
935 936
	 * XXX do not insert anything here unless you have grokked the comments in
	 * SS_replace_correlation_vars ...
937 938
	 */

939
	/* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
940 941
	if (root->query_level > 1)
		expr = SS_replace_correlation_vars(root, expr);
942

943
	/*
B
Bruce Momjian 已提交
944 945 946
	 * If it's a qual or havingQual, convert it to implicit-AND format. (We
	 * don't want to do this before eval_const_expressions, since the latter
	 * would be unable to simplify a top-level AND correctly. Also,
947
	 * SS_process_sublinks expects explicit-AND format.)
948 949 950 951
	 */
	if (kind == EXPRKIND_QUAL)
		expr = (Node *) make_ands_implicit((Expr *) expr);

952 953 954 955 956 957 958 959 960
	return expr;
}

/*
 * preprocess_qual_conditions
 *		Recursively scan the query's jointree and do subquery_planner's
 *		preprocessing work on each qual condition found therein.
 */
static void
961
preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
962 963 964 965 966 967 968 969 970 971
{
	if (jtnode == NULL)
		return;
	if (IsA(jtnode, RangeTblRef))
	{
		/* nothing to do here */
	}
	else if (IsA(jtnode, FromExpr))
	{
		FromExpr   *f = (FromExpr *) jtnode;
972
		ListCell   *l;
973

974
		foreach(l, f->fromlist)
975
			preprocess_qual_conditions(root, lfirst(l));
976

977
		f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
978 979 980 981
	}
	else if (IsA(jtnode, JoinExpr))
	{
		JoinExpr   *j = (JoinExpr *) jtnode;
982
		ListCell   *l;
983

984 985
		preprocess_qual_conditions(root, j->larg);
		preprocess_qual_conditions(root, j->rarg);
986

987 988 989
        foreach(l, j->subqfromlist)
		    preprocess_qual_conditions(root, lfirst(l));

990
		j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
991 992
	}
	else
993 994
		elog(ERROR, "unrecognized node type: %d",
			 (int) nodeTag(jtnode));
995
}
996

997
/*
998 999 1000 1001
 * inheritance_planner
 *	  Generate a plan in the case where the result relation is an
 *	  inheritance set.
 *
1002 1003 1004 1005 1006 1007 1008 1009 1010
 * We have to handle this case differently from cases where a source relation
 * is an inheritance set. Source inheritance is expanded at the bottom of the
 * plan tree (see allpaths.c), but target inheritance has to be expanded at
 * the top.  The reason is that for UPDATE, each target relation needs a
 * different targetlist matching its own column set.  Also, for both UPDATE
 * and DELETE, the executor needs the Append plan node at the top, else it
 * can't keep track of which table is the current target table.  Fortunately,
 * the UPDATE/DELETE target can never be the nullable side of an outer join,
 * so it's OK to generate the plan this way.
1011 1012 1013 1014
 *
 * Returns a query plan.
 */
static Plan *
1015
inheritance_planner(PlannerInfo *root)
1016
{
1017
	Query	   *parse = root->parse;
1018
	Index       parentRTindex = parse->resultRelation;
1019
	List	   *subplans = NIL;
1020 1021
	List	   *resultRelations = NIL;
	List	   *returningLists = NIL;
1022
	List	   *tlist = NIL;
1023
	PlannerInfo subroot;
1024
	ListCell   *l;
1025

1026 1027 1028 1029 1030
	/* MPP */
	Plan *plan;
	CdbLocusType append_locustype = CdbLocusType_Null;
	bool locus_ok = TRUE;

1031
	foreach(l, root->append_rel_list)
1032
	{
1033
		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
B
Bruce Momjian 已提交
1034
		Plan	   *subplan;
1035

1036 1037 1038 1039
		/* append_rel_list contains all append rels; ignore others */
		if (appinfo->parent_relid != parentRTindex)
			continue;

1040
		/*
B
Bruce Momjian 已提交
1041 1042 1043
		 * Generate modified query with this rel as target.  We have to be
		 * prepared to translate varnos in in_info_list as well as in the
		 * Query proper.
1044 1045 1046
		 */
		memcpy(&subroot, root, sizeof(PlannerInfo));
		subroot.parse = (Query *)
1047
			adjust_appendrel_attrs(&subroot, (Node *) parse,
1048
								   appinfo);
1049 1050
		subroot.returningLists = NIL;
		subroot.init_plans = NIL;
1051
		subroot.in_info_list = (List *)
1052
			adjust_appendrel_attrs(&subroot, (Node *) root->in_info_list,
1053 1054
								   appinfo);
		/* There shouldn't be any OJ info to translate, as yet */
1055
		Assert(subroot.oj_info_list == NIL);
1056

1057
		/* Generate plan */
1058 1059
		subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ );

1060
		/*
B
Bruce Momjian 已提交
1061 1062
		 * If this child rel was excluded by constraint exclusion, exclude it
		 * from the plan.
1063 1064 1065
		 *
		 * MPP-1544: perform this check before testing for loci compatibility
		 * we might have inserted a dummy table with incorrect locus
1066 1067 1068
		 */
		if (is_dummy_plan(subplan))
			continue;
1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
		
		/* MPP needs target loci to match. */
		if ( Gp_role == GP_ROLE_DISPATCH )
		{
			CdbLocusType locustype = ( subplan->flow == NULL ) ?
				CdbLocusType_Null : subplan->flow->locustype;
				
			if ( append_locustype == CdbLocusType_Null && locus_ok )
			{
				append_locustype = locustype;
			}
			else
			{
				switch ( locustype )
				{
				case CdbLocusType_Entry:
					locus_ok = locus_ok && (locustype == append_locustype);
					break;
				case CdbLocusType_Hashed:
				case CdbLocusType_HashedOJ:
				case CdbLocusType_Strewn:
					/* MPP-2023: Among subplans, these loci are okay. */
					break;
				case CdbLocusType_Null:
				case CdbLocusType_SingleQE:
				case CdbLocusType_General:
				case CdbLocusType_Replicated:
					/* These loci are not valid on base relations */
					locus_ok = FALSE;
					break;
				default:
					/* We should not be hitting this */
					locus_ok = FALSE;
					Assert (0); 
					break;
				}
			}
			if ( ! locus_ok )
			{
				ereport(ERROR, (
					errcode(ERRCODE_CDB_INTERNAL_ERROR),
					errmsg("incompatible loci in target inheritance set") ));
			}
		}
B
Bruce Momjian 已提交
1113

1114
		/* Save tlist from first rel for use below */
1115 1116
		if (subplans == NIL)
		{
1117
			tlist = subplan->targetlist;
1118 1119
		}

1120 1121 1122 1123 1124 1125
		/**
		 * The grouping planner scribbles on the rtable e.g. to add pseudo columns.
		 * We need to keep track of this.
		 */
		parse->rtable = subroot.parse->rtable;

1126 1127 1128
		subplans = lappend(subplans, subplan);

		/* Build target-relations list for the executor */
1129 1130 1131 1132 1133
		resultRelations = lappend_int(resultRelations, appinfo->child_relid);

		/* Build list of per-relation RETURNING targetlists */
		if (parse->returningList)
		{
1134
			Assert(list_length(subroot.returningLists) == 1);
1135
			returningLists = list_concat(returningLists,
1136
										 subroot.returningLists);
1137
		}
1138 1139
	}

1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153
	/**
	 * If due to constraint exclusions all the result relations have been removed,
	 * we need something upstream.
	 */
	if (resultRelations)
	{
		root->resultRelations = resultRelations;
	}
	else
	{
		root->resultRelations = list_make1_int(parse->resultRelation);
	}
	
	root->returningLists = returningLists;
1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
	/* Mark result as unordered (probably unnecessary) */
	root->query_pathkeys = NIL;

	/*
	 * If we managed to exclude every child rel, return a dummy plan
	 */
	if (subplans == NIL)
		return (Plan *) make_result(tlist,
									(Node *) list_make1(makeBoolConst(false,
																	  false)),
									NULL);

1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252
	plan = (Plan *) make_append(subplans, true, tlist);
	
	/* MPP dispatch needs to know the kind of locus. */
	if ( Gp_role == GP_ROLE_DISPATCH )
	{
		switch ( append_locustype )
		{
		case CdbLocusType_Entry:
			mark_plan_entry(plan);
			break;
			
		case CdbLocusType_Hashed:
		case CdbLocusType_HashedOJ:
		case CdbLocusType_Strewn:
			/* Depend on caller to avoid incompatible hash keys. */
			/* For our purpose (UPD/DEL target), strewn is good enough. */
			mark_plan_strewn(plan);
			break;
			
		default:
			ereport(ERROR, (
				errcode(ERRCODE_CDB_INTERNAL_ERROR),
				errmsg("unexpected locus assigned to target inheritance set") ));
		}
	}
	
	/* Suppress Append if there's only one surviving child rel */
	if (list_length(subplans) == 1)
		return (Plan *) linitial(subplans);

	return plan;
}

#ifdef USE_ASSERT_CHECKING

static void grouping_planner_output_asserts(PlannerInfo *root, Plan *plan);
/**
 * Ensure goodness of plans returned by grouping planner
 */
void grouping_planner_output_asserts(PlannerInfo *root, Plan *plan)
{
	Assert(plan);

	/* Ensure that plan refers to vars that have varlevelsup = 0 AND varno is in the rtable */
	List *allVars = extract_nodes(root->glob, (Node *) plan, T_Var);
	ListCell *lc = NULL;

	foreach (lc, allVars)
	{
		Var *var = (Var *) lfirst(lc);
		Assert(var->varlevelsup == 0 && "Plan contains vars that refer to outer plan.");
		Assert((var->varno == OUTER
				|| (var->varno > 0 && var->varno <= list_length(root->parse->rtable)))
				&& "Plan contains var that refer outside the rtable.");
		Assert(var->varno == var->varnoold && "Varno and varnoold do not agree!");

		/** If a pseudo column, there should be a corresponding entry in the relation */
		if (var->varattno <= FirstLowInvalidHeapAttributeNumber)
		{
			RangeTblEntry *rte = rt_fetch(var->varno, root->parse->rtable);
			Assert(rte);
			Assert(rte->pseudocols);
			Assert(list_length(rte->pseudocols) > var->varattno - FirstLowInvalidHeapAttributeNumber);
		}
	}
}
#endif

/*
 * getAnySubplan
 *   Return one subplan for the given node.
 *
 * If the given node is an Append, the first subplan is returned.
 * If the given node is a SubqueryScan, its subplan is returned.
 * Otherwise, the lefttree of the given node is returned.
 */
static Plan *
getAnySubplan(Plan *node)
{
	Assert(is_plan_node((Node *)node));
	
	if (IsA(node, Append))
	{
		Append *append = (Append*)node;
		Assert(list_length(append->appendplans) > 0);
		return (Plan *)linitial(append->appendplans);
	}
1253

1254 1255 1256 1257 1258 1259 1260
	else if (IsA(node, SubqueryScan))
	{
		SubqueryScan *subqueryScan = (SubqueryScan *)node;
		return subqueryScan->subplan;
	}
	
	return node->lefttree;
1261 1262 1263 1264 1265 1266 1267
}

/*--------------------
 * grouping_planner
 *	  Perform planning steps related to grouping, aggregation, etc.
 *	  This primarily means adding top-level processing to the basic
 *	  query plan produced by query_planner.
1268 1269 1270 1271
 *
 * tuple_fraction is the fraction of tuples we expect will be retrieved
 *
 * tuple_fraction is interpreted as follows:
1272
 *	  0: expect all tuples to be retrieved (normal case)
1273 1274 1275 1276 1277
 *	  0 < tuple_fraction < 1: expect the given fraction of tuples available
 *		from the plan to be retrieved
 *	  tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
 *		expected to be retrieved (ie, a LIMIT specification)
 *
1278
 * Returns a query plan.  Also, root->query_pathkeys is returned as the
1279
 * actual output ordering of the plan (in pathkey format).
1280 1281
 *--------------------
 */
1282
static Plan *
1283
grouping_planner(PlannerInfo *root, double tuple_fraction)
1284
{
1285
	Query	   *parse = root->parse;
1286
	List	   *tlist = parse->targetList;
B
Bruce Momjian 已提交
1287 1288
	int64		offset_est = 0;
	int64		count_est = 0;
1289
	Plan	   *result_plan;
1290 1291
	List	   *current_pathkeys = NIL;
	CdbPathLocus current_locus;
1292
	List	   *sort_pathkeys;
1293
    Path       *best_path = NULL;
1294
	double		dNumGroups = 0;
1295 1296 1297 1298 1299 1300 1301 1302 1303 1304
	double		numDistinct = 1;
	List	   *distinctExprs = NIL;

	double		motion_cost_per_row  =
						(gp_motion_cost_per_row > 0.0) ?
						gp_motion_cost_per_row :
						2.0 * cpu_tuple_cost;
	
	CdbPathLocus_MakeNull(&current_locus); 
	
1305 1306 1307 1308
	/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
	if (parse->limitCount || parse->limitOffset)
		tuple_fraction = preprocess_limit(root, tuple_fraction,
										  &offset_est, &count_est);
1309

1310
	if (parse->setOperations)
B
Bruce Momjian 已提交
1311
	{
B
Bruce Momjian 已提交
1312
		List	   *set_sortclauses;
1313

1314
		/*
B
Bruce Momjian 已提交
1315 1316 1317 1318 1319
		 * If there's a top-level ORDER BY, assume we have to fetch all the
		 * tuples.	This might seem too simplistic given all the hackery below
		 * to possibly avoid the sort ... but a nonzero tuple_fraction is only
		 * of use to plan_set_operations() when the setop is UNION ALL, and
		 * the result of UNION ALL is always unsorted.
1320 1321 1322 1323
		 */
		if (parse->sortClause)
			tuple_fraction = 0.0;

1324
		/*
B
Bruce Momjian 已提交
1325 1326
		 * Construct the plan for set operations.  The result will not need
		 * any work except perhaps a top-level sort and/or LIMIT.
1327
		 */
1328
		result_plan = plan_set_operations(root, tuple_fraction,
1329 1330 1331
										  &set_sortclauses);

		/*
B
Bruce Momjian 已提交
1332 1333 1334
		 * Calculate pathkeys representing the sort order (if any) of the set
		 * operation's result.  We have to do this before overwriting the sort
		 * key information...
1335 1336
		 */
		current_pathkeys = make_pathkeys_for_sortclauses(set_sortclauses,
B
Bruce Momjian 已提交
1337
													result_plan->targetlist);
1338
		current_pathkeys = canonicalize_pathkeys(root, current_pathkeys);
1339 1340

		/*
B
Bruce Momjian 已提交
1341 1342 1343 1344 1345
		 * We should not need to call preprocess_targetlist, since we must be
		 * in a SELECT query node.	Instead, use the targetlist returned by
		 * plan_set_operations (since this tells whether it returned any
		 * resjunk columns!), and transfer any sort key information from the
		 * original tlist.
1346 1347
		 */
		Assert(parse->commandType == CMD_SELECT);
1348

1349 1350
		tlist = postprocess_setop_tlist(result_plan->targetlist, tlist);

1351
		/*
1352
		 * Can't handle FOR UPDATE/SHARE here (parser should have checked
B
Bruce Momjian 已提交
1353
		 * already, but let's make sure).
1354 1355
		 */
		if (parse->rowMarks)
1356 1357
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1358
					 errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT")));
1359

1360
		/*
1361
		 * Calculate pathkeys that represent result ordering requirements
1362
		 */
1363 1364
		sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
													  tlist);
1365
		sort_pathkeys = canonicalize_pathkeys(root, sort_pathkeys);
B
Bruce Momjian 已提交
1366
	}
1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393
	else if ( parse->windowClause && parse->targetList &&
			  contain_windowref((Node *)parse->targetList, NULL) )
	{
		if (extract_nodes(NULL, (Node *) tlist, T_PercentileExpr) != NIL)
		{
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("window function with WITHIN GROUP aggregate is not supported")));
		}
		/*
		 * Calculate pathkeys that represent ordering requirements. Stash
		 * them in PlannerInfo so that query_planner can canonicalize them.
		 */
		root->group_pathkeys = NIL;
		root->sort_pathkeys =
			make_pathkeys_for_sortclauses(parse->sortClause, tlist);

		
		result_plan = window_planner(root, tuple_fraction, &current_pathkeys);
		
		/* Recover sort pathkeys for use later.  These may or may not
		 * match the current_pathkeys resulting from the window plan.  
		 */
		sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
													  result_plan->targetlist);
		sort_pathkeys = canonicalize_pathkeys(root, sort_pathkeys);
	}
1394
	else
1395
	{
1396
		/* No set operations, do regular planning */
B
Bruce Momjian 已提交
1397
		List	   *sub_tlist;
1398 1399
		List	   *group_pathkeys;
		AttrNumber *groupColIdx = NULL;
1400
		Oid		   *groupOperators = NULL;
1401
		bool		need_tlist_eval = true;
1402
		QualCost	tlist_cost;
1403 1404
		Path	   *cheapest_path;
		Path	   *sorted_path;
1405
		long		numGroups = 0;
1406
		AggClauseCounts agg_counts;
1407
		int			numGroupCols;
1408
		bool		use_hashed_grouping = false;
1409 1410 1411
		bool        grpext = false;
		bool		has_within = false;
		CanonicalGroupingSets *canonical_grpsets;
1412

1413
		/* Preprocess targetlist */
1414
		tlist = preprocess_targetlist(root, tlist);
B
Bruce Momjian 已提交
1415

1416 1417 1418
		/* Obtain canonical grouping sets */
		canonical_grpsets = make_canonical_groupingsets(parse->groupClause);
		numGroupCols = canonical_grpsets->num_distcols;
1419

1420
		/*
1421 1422
		 * Clean up parse->groupClause if the grouping set is an empty
		 * set.
1423
		 */
1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437
		if (numGroupCols == 0)
		{
			list_free(parse->groupClause);
			parse->groupClause = NIL;
		}

		grpext = is_grouping_extension(canonical_grpsets);
		has_within = extract_nodes(NULL, (Node *) tlist, T_PercentileExpr) != NIL;
		has_within |= extract_nodes(NULL, parse->havingQual, T_PercentileExpr) != NIL;

		if (grpext && has_within)
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("WITHIN GROUP aggregate cannot be used in GROUPING SETS query")));
1438

1439 1440
		/*
		 * Will need actual number of aggregates for estimating costs.
1441
		 *
B
Bruce Momjian 已提交
1442 1443
		 * Note: we do not attempt to detect duplicate aggregates here; a
		 * somewhat-overestimated count is okay for our present purposes.
1444
		 *
1445 1446
		 * Note: think not that we can turn off hasAggs if we find no aggs. It
		 * is possible for constant-expression simplification to remove all
B
Bruce Momjian 已提交
1447 1448
		 * explicit references to aggs, but we still have to follow the
		 * aggregate semantics (eg, producing only one output row).
1449
		 */
1450 1451
		MemSet(&agg_counts, 0, sizeof(AggClauseCounts));

1452
		if (parse->hasAggs)
1453 1454 1455 1456
		{
			count_agg_clauses((Node *) tlist, &agg_counts);
			count_agg_clauses(parse->havingQual, &agg_counts);
		}
1457

1458 1459 1460 1461 1462
		/*
		 * Generate appropriate target list for subplan; may be different from
		 * tlist if grouping or aggregation is needed.
		 */
		sub_tlist = make_subplanTargetList(root, tlist,
1463 1464
										   &groupColIdx, &groupOperators,
										   &need_tlist_eval);
1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484
		
		/* Augment the subplan target list to include targets for ordered
		 * aggregates.  As a side effect, this may scribble updated sort
		 * group ref values into AggOrder nodes within Aggref nodes of the
		 * query.  A pity, but it would harder to do this earlier.
		 */
		sub_tlist = register_ordered_aggs(tlist, 
										  root->parse->havingQual, 
										  sub_tlist);

		/*
		 * Calculate pathkeys that represent grouping/ordering requirements.
		 * Stash them in PlannerInfo so that query_planner can canonicalize
		 * them.
		 */
		root->group_pathkeys =
			make_pathkeys_for_groupclause(parse->groupClause, tlist);
		root->sort_pathkeys =
			make_pathkeys_for_sortclauses(parse->sortClause, tlist);

1485 1486 1487
		/*
		 * Figure out whether we need a sorted result from query_planner.
		 *
B
Bruce Momjian 已提交
1488 1489 1490 1491 1492 1493
		 * If we have a GROUP BY clause, then we want a result sorted properly
		 * for grouping.  Otherwise, if there is an ORDER BY clause, we want
		 * to sort by the ORDER BY clause.	(Note: if we have both, and ORDER
		 * BY is a superset of GROUP BY, it would be tempting to request sort
		 * by ORDER BY --- but that might just leave us failing to exploit an
		 * available sort order at all. Needs more thought...)
1494 1495
		 */
		if (parse->groupClause)
1496
			root->query_pathkeys = root->group_pathkeys;
1497
		else if (parse->sortClause)
1498
			root->query_pathkeys = root->sort_pathkeys;
1499
		else
1500
			root->query_pathkeys = NIL;
1501

1502
		/*
B
Bruce Momjian 已提交
1503 1504 1505 1506
		 * Generate the best unsorted and presorted paths for this Query (but
		 * note there may not be any presorted path).  query_planner will also
		 * estimate the number of groups in the query, and canonicalize all
		 * the pathkeys.
1507
		 */
1508 1509
		query_planner(root, sub_tlist, tuple_fraction,
					  &cheapest_path, &sorted_path, &dNumGroups);
1510

1511 1512
		group_pathkeys = root->group_pathkeys;
		sort_pathkeys = root->sort_pathkeys;
1513

1514
		/*
1515 1516
		 * If grouping, extract the grouping operators and decide whether we
		 * want to use hashed grouping.
1517
		 */
1518
		if (parse->groupClause)
1519
		{
1520
			use_hashed_grouping =
1521
				choose_hashed_grouping(root, tuple_fraction,
1522
									   cheapest_path, sorted_path,
1523
									   groupOperators, numGroupCols, dNumGroups,
1524
									   &agg_counts);
1525 1526 1527

			/* Also convert # groups to long int --- but 'ware overflow! */
			numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
1528 1529
		}

B
Bruce Momjian 已提交
1530
		/*
1531
		 * Select the best path.  If we are doing hashed grouping, we will
B
Bruce Momjian 已提交
1532 1533
		 * always read all the input tuples, so use the cheapest-total path.
		 * Otherwise, trust query_planner's decision about which to use.
1534
		 */
1535
		if (use_hashed_grouping || !sorted_path)
1536
			best_path = cheapest_path;
1537
		else
1538 1539
			best_path = sorted_path;

1540 1541 1542 1543 1544 1545 1546
		/* CDB:  For now, we either
		 * - construct a general parallel plan,
		 * - let the sequential planner handle the situation, or
		 * - construct a sequential plan using the mix-max index optimization.
		 *
		 * Eventually we should add a parallel version of the min-max
		 * optimization.  For now, it's either-or.
1547
		 */
1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565
		if ( Gp_role == GP_ROLE_DISPATCH )
		{
			bool querynode_changed = false;
			bool pass_subtlist = false;
			GroupContext group_context;

			pass_subtlist = (agg_counts.aggOrder != NIL || has_within);
			group_context.best_path = best_path;
			group_context.cheapest_path = cheapest_path;
			group_context.subplan = NULL;
			group_context.sub_tlist = pass_subtlist ? sub_tlist : NIL;
			group_context.tlist = tlist;
			group_context.use_hashed_grouping = use_hashed_grouping;
			group_context.tuple_fraction = tuple_fraction;
			group_context.canonical_grpsets = canonical_grpsets;
			group_context.grouping = 0;
			group_context.numGroupCols = 0;
			group_context.groupColIdx = NULL;
1566
			group_context.groupOperators = NULL;
1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631
			group_context.numDistinctCols = 0;
			group_context.distinctColIdx = NULL;
			group_context.p_dNumGroups = &dNumGroups;
			group_context.pcurrent_pathkeys = &current_pathkeys;
			group_context.querynode_changed = &querynode_changed;

			/* within_agg_planner calls cdb_grouping_planner */
			if (has_within)
				result_plan = within_agg_planner(root,
												 &agg_counts,
												 &group_context);
			else
				result_plan = cdb_grouping_planner(root,
												   &agg_counts,
												   &group_context);

			/* Add the Repeat node if needed. */
			if (result_plan != NULL &&
				canonical_grpsets != NULL &&
				canonical_grpsets->grpset_counts != NULL)
			{
				bool need_repeat_node = false;
				int grpset_no;
				int repeat_count = 0;
				
				for (grpset_no = 0; grpset_no < canonical_grpsets->ngrpsets; grpset_no++)
				{
					if (canonical_grpsets->grpset_counts[grpset_no] > 1)
					{
						need_repeat_node = true;
						break;
					}
				}
				
				if (canonical_grpsets->ngrpsets == 1)
					repeat_count = canonical_grpsets->grpset_counts[0];
				
				if (need_repeat_node)
				{
					result_plan = add_repeat_node(result_plan, repeat_count, 0);
				}
			}

			if ( result_plan != NULL && result_plan->flow->numSortCols == 0 )
			{
				/*
				 * cdb_grouping_planner generated the full plan, with the
				 * the right tlist.  And it has no sort order
				 */
				current_pathkeys = NIL;
			}

 			if (result_plan != NULL && querynode_changed)
 			{
 				/* We want to re-write sort_pathkeys here since the 2-stage
				 * aggregation subplan or grouping extension subplan may change
				 * the previous root->parse Query node, which makes the current
				 * sort_pathkeys invalid.
 				 */
 				sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
															  result_plan->targetlist);
				sort_pathkeys = canonicalize_pathkeys(root, sort_pathkeys);
			}
		}
		else /* Not GP_ROLE_DISPATCH */
1632 1633
		{
			/*
1634 1635 1636 1637
			 * Check to see if it's possible to optimize MIN/MAX aggregates.
			 * If so, we will forget all the work we did so far to choose a
			 * "regular" path ... but we had to do it anyway to be able to
			 * tell which way is cheaper.
1638
			 */
1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651
			result_plan = optimize_minmax_aggregates(root,
													 tlist,
													 best_path);
			if (result_plan != NULL)
			{
				/*
				 * optimize_minmax_aggregates generated the full plan, with the
				 * right tlist, and it has no sort order.
				 */
				current_pathkeys = NIL;
                mark_plan_entry(result_plan);
			}

1652
		}
1653

1654
		if (result_plan == NULL)
1655
		{
1656
			/*
1657 1658
			 * Normal case --- create a plan according to query_planner's
			 * results.
1659
			 */
1660
			result_plan = create_plan(root, best_path);
1661
			current_pathkeys = best_path->pathkeys;
1662
			current_locus = best_path->locus; /* just use keys, don't copy */
1663 1664 1665 1666 1667 1668 1669 1670 1671

			/*
			 * create_plan() returns a plan with just a "flat" tlist of
			 * required Vars.  Usually we need to insert the sub_tlist as the
			 * tlist of the top plan node.	However, we can skip that if we
			 * determined that whatever query_planner chose to return will be
			 * good enough.
			 */
			if (need_tlist_eval)
1672
			{
1673 1674 1675 1676 1677
				/*
				 * If the top-level plan node is one that cannot do expression
				 * evaluation, we must insert a Result node to project the
				 * desired tlist.
				 */
1678
				result_plan = plan_pushdown_tlist(result_plan, sub_tlist);
1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698

				/*
				 * Also, account for the cost of evaluation of the sub_tlist.
				 *
				 * Up to now, we have only been dealing with "flat" tlists,
				 * containing just Vars.  So their evaluation cost is zero
				 * according to the model used by cost_qual_eval() (or if you
				 * prefer, the cost is factored into cpu_tuple_cost).  Thus we
				 * can avoid accounting for tlist cost throughout
				 * query_planner() and subroutines.  But now we've inserted a
				 * tlist that might contain actual operators, sub-selects, etc
				 * --- so we'd better account for its cost.
				 *
				 * Below this point, any tlist eval cost for added-on nodes
				 * should be accounted for as we create those nodes.
				 * Presently, of the node types we can add on, only Agg and
				 * Group project new tlists (the rest just copy their input
				 * tuples) --- so make_agg() and make_group() are responsible
				 * for computing the added cost.
				 */
1699
				cost_qual_eval(&tlist_cost, sub_tlist, root);
1700 1701 1702
				result_plan->startup_cost += tlist_cost.startup;
				result_plan->total_cost += tlist_cost.startup +
					tlist_cost.per_tuple * result_plan->plan_rows;
1703 1704 1705 1706
			}
			else
			{
				/*
1707 1708 1709
				 * Since we're using query_planner's tlist and not the one
				 * make_subplanTargetList calculated, we have to refigure any
				 * grouping-column indexes make_subplanTargetList computed.
1710
				 */
1711
				locate_grouping_columns(root, tlist, result_plan->targetlist,
1712
										groupColIdx);
1713
			}
B
Bruce Momjian 已提交
1714

1715 1716
            Assert(result_plan->flow);

1717
			/*
1718 1719
			 * Insert AGG or GROUP node if needed, plus an explicit sort step
			 * if necessary.
1720
			 *
1721
			 * HAVING clause, if any, becomes qual of the Agg or Group node.
1722
			 */
1723
			if (!grpext && use_hashed_grouping)
1724 1725
			{
				/* Hashed aggregate plan --- no sort needed */
1726
				result_plan = (Plan *) make_agg(root,
1727 1728
												tlist,
												(List *) parse->havingQual,
1729
												AGG_HASHED, false,
1730 1731
												numGroupCols,
												groupColIdx,
1732
												groupOperators,
1733
												numGroups,
1734 1735 1736 1737
												0, /* num_nullcols */
												0, /* input_grouping */
												0, /* grouping */
												0, /* rollup_gs_times */
1738
												agg_counts.numAggs,
1739
												agg_counts.transitionSpace,
1740
												result_plan);
1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753

				if (canonical_grpsets != NULL &&
					canonical_grpsets->grpset_counts != NULL &&
					canonical_grpsets->grpset_counts[0] > 1)
				{
					result_plan->flow = pull_up_Flow(result_plan,
													 result_plan->lefttree,
													 (current_pathkeys != NIL));
					result_plan = add_repeat_node(result_plan,
												  canonical_grpsets->grpset_counts[0],
												  0);
				}

1754 1755
				/* Hashed aggregation produces randomly-ordered results */
				current_pathkeys = NIL;
1756
				CdbPathLocus_MakeNull(&current_locus);
1757
			}
1758
			else if (!grpext && (parse->hasAggs || parse->groupClause))
1759
			{
1760 1761
				/* Plain aggregate plan --- sort if needed */
				AggStrategy aggstrategy;
1762

1763 1764 1765 1766
				if (parse->groupClause)
				{
					if (!pathkeys_contained_in(group_pathkeys,
											   current_pathkeys))
1767
					{
1768 1769 1770 1771 1772 1773 1774 1775 1776 1777
						result_plan = (Plan *)
							make_sort_from_groupcols(root,
													 parse->groupClause,
													 groupColIdx,
													 false,
													 result_plan);
						current_pathkeys = group_pathkeys;

						/* Decorate the Sort node with a Flow node. */
						mark_sort_locus(result_plan);
1778
					}
1779
					aggstrategy = AGG_SORTED;
1780

1781
					/*
1782 1783
					 * The AGG node will not change the sort ordering of its
					 * groups, so current_pathkeys describes the result too.
1784 1785 1786 1787
					 */
				}
				else
				{
1788 1789 1790 1791
					aggstrategy = AGG_PLAIN;
					/* Result will be only one row anyway; no sort order */
					current_pathkeys = NIL;
				}
1792

1793 1794 1795 1796 1797 1798 1799 1800 1801
				/*
				 * We make a single Agg node if this is not a grouping extension.
				 */
				result_plan = (Plan *) make_agg(root,
												tlist,
												(List *) parse->havingQual,
												aggstrategy, false,
												numGroupCols,
												groupColIdx,
1802
												groupOperators,
1803 1804 1805 1806 1807 1808 1809 1810
												numGroups,
												0, /* num_nullcols */
												0, /* input_grouping */
												0, /* grouping */
												0, /* rollup_gs_times */
												agg_counts.numAggs,
												agg_counts.transitionSpace,
												result_plan);
1811

1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822
				if (canonical_grpsets != NULL &&
					canonical_grpsets->grpset_counts != NULL &&
					canonical_grpsets->grpset_counts[0] > 1)
				{
					result_plan->flow = pull_up_Flow(result_plan,
													 result_plan->lefttree,
													 (current_pathkeys != NIL));
					result_plan = add_repeat_node(result_plan,
												  canonical_grpsets->grpset_counts[0],
												  0);
				}
1823

1824 1825 1826 1827 1828 1829 1830
				CdbPathLocus_MakeNull(&current_locus);
			}
			else if (grpext && (parse->hasAggs || parse->groupClause))
			{
				/* Plan the grouping extension */
				ListCell *lc;
				bool querynode_changed = false;
B
Bruce Momjian 已提交
1831

1832 1833 1834 1835
				/*
				 * Make a copy of tlist. Really need to?
				 */
				List *new_tlist = copyObject(tlist);
1836

1837 1838 1839 1840 1841 1842
				/* Make EXPLAIN output look nice */
				foreach(lc, result_plan->targetlist)
				{
					TargetEntry *tle = (TargetEntry*)lfirst(lc);

					if ( IsA(tle->expr, Var) && tle->resname == NULL )
1843
					{
1844 1845 1846 1847
						TargetEntry *vartle = tlist_member((Node*)tle->expr, tlist);

						if ( vartle != NULL && vartle->resname != NULL )
							tle->resname = pstrdup(vartle->resname);
1848 1849
					}
				}
1850 1851 1852 1853 1854 1855 1856 1857

				result_plan = plan_grouping_extension(root, best_path, tuple_fraction,
													  use_hashed_grouping,
													  &new_tlist, result_plan->targetlist,
													  true, false,
													  (List *) parse->havingQual,
													  &numGroupCols,
													  &groupColIdx,
1858
													  &groupOperators,
1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876
													  &agg_counts,
													  canonical_grpsets,
													  &dNumGroups,
													  &querynode_changed,
													  &current_pathkeys,
													  result_plan);
				if (querynode_changed)
				{
					/* We want to re-write sort_pathkeys here since the 2-stage
					 * aggregation subplan or grouping extension subplan may change
					 * the previous root->parse Query node, which makes the current
					 * sort_pathkeys invalid.
					 */
					sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
																  result_plan->targetlist);
					sort_pathkeys = canonicalize_pathkeys(root, sort_pathkeys);
					CdbPathLocus_MakeNull(&current_locus);
				}
1877
			}
1878
			else if (root->hasHavingQual)
1879
			{
1880 1881 1882 1883 1884
				/*
				 * No aggregates, and no GROUP BY, but we have a HAVING qual.
				 * This is a degenerate case in which we are supposed to emit
				 * either 0 or 1 row depending on whether HAVING succeeds.
				 * Furthermore, there cannot be any variables in either HAVING
B
Bruce Momjian 已提交
1885 1886 1887 1888 1889 1890
				 * or the targetlist, so we actually do not need the FROM
				 * table at all!  We can just throw away the plan-so-far and
				 * generate a Result node.	This is a sufficiently unusual
				 * corner case that it's not worth contorting the structure of
				 * this routine to avoid having to generate the plan in the
				 * first place.
1891 1892 1893 1894
				 */
				result_plan = (Plan *) make_result(tlist,
												   parse->havingQual,
												   NULL);
1895 1896 1897 1898
				/* Result will be only one row anyway; no sort order */
				current_pathkeys = NIL;
				mark_plan_general(result_plan);
				CdbPathLocus_MakeNull(&current_locus);
1899
			}
1900
		}						/* end of non-minmax-aggregate case */
1901 1902 1903

		/* free canonical_grpsets */
		free_canonical_groupingsets(canonical_grpsets);
B
Bruce Momjian 已提交
1904
	}							/* end of if (setOperations) */
1905

1906 1907 1908 1909 1910 1911 1912 1913 1914 1915
    /*
     * Decorate the top node with a Flow node if it doesn't have one yet.
     * (In such cases we require the next-to-top node to have a Flow node
     * from which we can obtain the distribution info.)
     */
    if (!result_plan->flow)
        result_plan->flow = pull_up_Flow(result_plan,
                                         getAnySubplan(result_plan),
                                         (current_pathkeys != NIL));

1916
	/*
1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
	 * MPP: If there's a DISTINCT clause and we're not collocated on the
	 * distinct key, we need to redistribute on that key.  In addition, 
	 * we need to consider whether to "pre-unique" by doing a Sort-Unique 
	 * operation on the data as currently distributed, redistributing on 
	 * the district key, and doing the Sort-Unique again. This 2-phase 
	 * approach will be a win, if the cost of redistributing the entire 
	 * input exceeds the cost of an extra Redistribute-Sort-Unique on the
	 * pre-uniqued (reduced) input.
	 */
	if ( parse->distinctClause != NULL)
	{
		distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, 
												result_plan->targetlist);
		numDistinct = estimate_num_groups(root, distinctExprs, 
										  result_plan->plan_rows);
		
		if ( CdbPathLocus_IsNull(current_locus) )
		{
			current_locus = cdbpathlocus_from_flow(result_plan->flow);
		}
		
		if ( Gp_role == GP_ROLE_DISPATCH && CdbPathLocus_IsPartitioned(current_locus) )
		{
			List *distinct_pathkeys = make_pathkeys_for_sortclauses(parse->distinctClause,
																	result_plan->targetlist);
			bool needMotion = !cdbpathlocus_collocates(current_locus, distinct_pathkeys, false /*exact_match*/);
			
			/* Apply the preunique optimization, if enabled and worthwhile. */
			if ( root->config->gp_enable_preunique && needMotion )
			{
				double base_cost, alt_cost;
				Path sort_path;	 /* dummy for result of cost_sort */
				
				base_cost = motion_cost_per_row * result_plan->plan_rows;
				alt_cost = motion_cost_per_row * numDistinct;
				cost_sort(&sort_path, root, NIL, alt_cost, 
						  numDistinct, result_plan->plan_rows);
				alt_cost += sort_path.startup_cost;
				alt_cost += cpu_operator_cost * numDistinct 
							* list_length(parse->distinctClause);
					  
				if ( alt_cost < base_cost || root->config->gp_eager_preunique )
				{
					/* Reduce the number of rows to move by adding a [Sort and]
					 * Unique prior to the redistribute Motion. */
					if (parse->sortClause)
					{
						if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
						{
							result_plan = (Plan *)
							make_sort_from_sortclauses(root,
													   parse->sortClause,
													   result_plan);
							((Sort*)result_plan)->noduplicates = gp_enable_sort_distinct;
							current_pathkeys = sort_pathkeys;
							mark_sort_locus(result_plan);
						}
					}

					result_plan = (Plan *) make_unique(result_plan, parse->distinctClause);

					result_plan->flow = pull_up_Flow(result_plan,
													 result_plan->lefttree,
													 true);

					result_plan->plan_rows = numDistinct;

					/*
					 * Our sort node (under the unique node),
					 * unfortunately can't guarantee uniqueness -- so
					 * we aren't allowed to push the limit into the
					 * sort; but we can avoid moving the entire sorted
					 * result-set by plunking a limit on the top of
					 * the unique-node.
					 */
					if (parse->limitCount)
					{
						/*
						 * Our extra limit operation is basically a
						 * third-phase on multi-phase limit (see
						 * 2-phase limit below)
						 */
						result_plan = pushdown_preliminary_limit(result_plan, parse->limitCount, count_est, parse->limitOffset, offset_est);
					}
				}
			}

			if ( needMotion )
			{
				result_plan = (Plan*)make_motion_hash(root, result_plan, distinctExprs);
				result_plan->total_cost += motion_cost_per_row * result_plan->plan_rows;
				current_pathkeys = NULL; /* Any pre-existing order now lost. */
			}
		}
		else if ( result_plan->flow->flotype == FLOW_SINGLETON )
			; /* Already collocated. */
		else
		{
			ereport(ERROR, (errcode(ERRCODE_CDB_INTERNAL_ERROR),
							errmsg("unexpected input locus to distinct")));
		}
	}

    /*
B
Bruce Momjian 已提交
2021
	 * If we were not able to make the plan come out in the right order, add
2022 2023
	 * an explicit sort step.  Note that, if we going to add a Unique node,
	 * the sort_pathkeys will have the distinct keys as a prefix.
2024
	 */
2025
	if (parse->sortClause)
2026
	{
2027
		if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
2028
		{
2029
			result_plan = (Plan *)
2030
				make_sort_from_sortclauses(root,
2031 2032
										   parse->sortClause,
										   result_plan);
2033
			current_pathkeys = sort_pathkeys;
2034
			mark_sort_locus(result_plan);
2035
		}
2036
	}
2037 2038

	/*
2039
	 * If there is a DISTINCT clause, add the UNIQUE node.
2040
	 */
2041
	if (parse->distinctClause)
2042
	{
2043 2044
		if ( IsA(result_plan, Sort) && gp_enable_sort_distinct )
			((Sort*)result_plan)->noduplicates = true;
2045
		result_plan = (Plan *) make_unique(result_plan, parse->distinctClause);
2046 2047 2048
        result_plan->flow = pull_up_Flow(result_plan,
                                         result_plan->lefttree,
                                         true);
B
Bruce Momjian 已提交
2049

2050
		/*
B
Bruce Momjian 已提交
2051 2052 2053
		 * If there was grouping or aggregation, leave plan_rows as-is (ie,
		 * assume the result was already mostly unique).  If not, use the
		 * number of distinct-groups calculated by query_planner.
2054
		 */
2055
		if (!parse->groupClause && !root->hasHavingQual && !parse->hasAggs)
2056
			result_plan->plan_rows = dNumGroups;
2057
	}
2058

2059 2060 2061
	/*
	 * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
	 */
2062
	if (parse->limitCount || parse->limitOffset)
2063
	{
2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083
		if (Gp_role == GP_ROLE_DISPATCH && result_plan->flow->flotype == FLOW_PARTITIONED)
		{
			/* pushdown the first phase of multi-phase limit (which takes offset into account) */
			result_plan = pushdown_preliminary_limit(result_plan, parse->limitCount, count_est, parse->limitOffset, offset_est);
			
			/* Focus on QE [merge to preserve order], prior to final LIMIT. */
			result_plan = (Plan*)make_motion_gather_to_QE(result_plan, current_pathkeys != NIL);
			result_plan->total_cost += motion_cost_per_row * result_plan->plan_rows;
		}
			
		if (current_pathkeys == NIL)
		{
			/* This used to be a WARNING.  If reinstated, it should be a NOTICE
			 * and steps taken to avoid issuing it at inopportune times, e.g.,
			 * from the query generated by psql tab-completion.
			 */
			ereport(DEBUG1, (errmsg("LIMIT/OFFSET applied to unordered result.") ));
		}

		/* For multi-phase limit, this is the final limit */			
2084
		result_plan = (Plan *) make_limit(result_plan,
2085
										  parse->limitOffset,
2086 2087 2088
										  parse->limitCount,
										  offset_est,
										  count_est);
2089 2090 2091
        result_plan->flow = pull_up_Flow(result_plan,
                                         result_plan->lefttree,
                                         true);
2092 2093
	}

2094 2095
    Insist(result_plan->flow);

2096 2097
	/*
	 * Deal with the RETURNING clause if any.  It's convenient to pass the
B
Bruce Momjian 已提交
2098 2099
	 * returningList through setrefs.c now rather than at top level (if we
	 * waited, handling inherited UPDATE/DELETE would be much harder).
2100 2101 2102
	 */
	if (parse->returningList)
	{
B
Bruce Momjian 已提交
2103
		List	   *rlist;
2104

2105 2106 2107
		Assert(parse->resultRelation);
		rlist = set_returning_clause_references(root->glob,
												parse->returningList,
2108
												result_plan,
2109 2110
												parse->resultRelation); 
		root->returningLists = list_make1(rlist);
2111
	}
2112 2113
	else
		root->returningLists = NIL;
2114

2115 2116 2117 2118 2119
	/* Compute result-relations list if needed */
	if (parse->resultRelation)
		root->resultRelations = list_make1_int(parse->resultRelation);
	else
		root->resultRelations = NIL;
2120

2121
	/*
B
Bruce Momjian 已提交
2122 2123
	 * Return the actual output ordering in query_pathkeys for possible use by
	 * an outer query level.
2124
	 */
2125
	root->query_pathkeys = current_pathkeys;
2126

2127 2128 2129 2130
#ifdef USE_ASSERT_CHECKING
	grouping_planner_output_asserts(root, result_plan);
#endif

2131
	return result_plan;
2132 2133
}

2134
/*
2135 2136
 * Entry is through is_dummy_plan().
 *
2137 2138 2139
 * Detect whether a plan node is a "dummy" plan created when a relation
 * is deemed not to need scanning due to constraint exclusion.
 *
2140 2141 2142 2143 2144 2145
 * At bottom, such dummy plans are Result nodes with constant FALSE
 * filter quals.  However, we also recognize simple plans that are
 * known to return no rows because they contain a dummy.
 *
 * BTW The plan_tree_walker framework is overkill here, but it's good to 
 *     do things the standard way.
2146 2147
 */
static bool
2148
is_dummy_plan_walker(Node *node, bool *context)
2149
{
2150 2151 2152 2153 2154 2155 2156
	/*
	 * We are only interested in Plan nodes.
	 */
	if ( node == NULL || !is_plan_node(node) )
		return false;
	
	switch (nodeTag(node))
2157
	{
2158 2159 2160 2161 2162 2163 2164 2165
		case T_Result:
			/* 
			 * This tests the base case of a dummy plan which is a Result
			 * node with a constant FALSE filter quals.  (This is the case
			 * constructed as an empty Append path by set_plain_rel_pathlist
			 * in allpaths.c and made into a Result plan by create_append_plan
			 * in createplan.c.
			 */
2166
		{
2167 2168 2169
			List *rcqual = (List *) ((Result *) node)->resconstantqual;
			
			if (list_length(rcqual) == 1)
2170
			{
2171 2172 2173 2174 2175 2176 2177
				Const	   *constqual = (Const *) linitial(rcqual);
				
				if (constqual && IsA(constqual, Const))
				{
					if (!constqual->constisnull &&
						!DatumGetBool(constqual->constvalue))
						*context = true;
2178
					return true;
2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229
				}
			}
		}
			return false;
			
		case T_SubqueryScan:
			/* 
			 * A SubqueryScan is dummy, if its subplan is dummy.
			 */
		{
			SubqueryScan *subqueryscan = (SubqueryScan *) node;
			Plan	   *subplan = subqueryscan->subplan;
			
			if ( is_dummy_plan(subplan) )
			{
				*context = true;
				return true;
			}				
		}
			return false;
			
		case T_NestLoop:
		case T_MergeJoin:
		case T_HashJoin:
			/*
			 * Joins with dummy inner and/or outer plans are dummy or not 
			 * based on the type of join.
			 */
		{
			switch ( ((Join*)node)->jointype )
			{
				case JOIN_INNER: /* either */
					*context = is_dummy_plan(innerPlan(node))
					|| is_dummy_plan(outerPlan(node));
					break;
					
				case JOIN_LEFT:
				case JOIN_FULL:
				case JOIN_RIGHT: /* both */
					*context = is_dummy_plan(innerPlan(node))
					&& is_dummy_plan(outerPlan(node));
					break;
					
				case JOIN_IN:
				case JOIN_LASJ_NOTIN:
				case JOIN_LASJ: /* outer */
					*context = is_dummy_plan(outerPlan(node));
					break;
					
				default:
					break;
2230
			}
2231 2232
			
			return true;
2233
		}
2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259
			
		/* It may seem that we should check for Append or SetOp nodes with all
		 * dummy branches, but that case should not occur.  It would cause big
		 * problems elsewhere in the code.
		 */
			
		case T_Hash:
		case T_Material:
		case T_Sort:
		case T_Unique:
		    /* 
		     * Some node types are dummy, if their outer plan is dummy 
		     * so we just recur.
		     *
		     * We don't include "tricky" nodes like Motion that might
		     * affect plan topology, even though we know they will return
		     * no rows from a dummy.
		     */
		    return plan_tree_walker(node, is_dummy_plan_walker, context);
		    
		default:
		    /* 
		     * Other node types are "opaque" so we choose a conservative
		     * course and terminate the walk.
		     */
			return true;
2260
	}
2261 2262 2263 2264
    /* not reached */
}


2265
static bool
2266 2267 2268 2269 2270 2271 2272
is_dummy_plan(Plan *plan)
{
    bool is_dummy = false;
    
    is_dummy_plan_walker((Node*)plan, &is_dummy);
    
    return is_dummy;
2273 2274
}

2275
/*
2276
 * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2277
 *
2278
 * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
B
Bruce Momjian 已提交
2279
 * results back in *count_est and *offset_est.	These variables are set to
2280 2281 2282 2283 2284 2285 2286 2287
 * 0 if the corresponding clause is not present, and -1 if it's present
 * but we couldn't estimate the value for it.  (The "0" convention is OK
 * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
 * LIMIT 0 as though it were LIMIT 1.  But this is in line with the planner's
 * usual practice of never estimating less than one row.)  These values will
 * be passed to make_limit, which see if you change this code.
 *
 * The return value is the suitably adjusted tuple_fraction to use for
B
Bruce Momjian 已提交
2288
 * planning the query.	This adjustment is not overridable, since it reflects
2289 2290
 * plan actions that grouping_planner() will certainly take, not assumptions
 * about context.
2291 2292
 */
static double
2293
preprocess_limit(PlannerInfo *root, double tuple_fraction,
B
Bruce Momjian 已提交
2294
				 int64 *offset_est, int64 *count_est)
2295 2296
{
	Query	   *parse = root->parse;
2297 2298
	Node	   *est;
	double		limit_fraction;
2299

2300 2301
	/* Should not be called unless LIMIT or OFFSET */
	Assert(parse->limitCount || parse->limitOffset);
2302 2303

	/*
2304 2305
	 * Try to obtain the clause values.  We use estimate_expression_value
	 * primarily because it can sometimes do something useful with Params.
2306
	 */
2307
	if (parse->limitCount)
2308
	{
2309
		est = estimate_expression_value(root, parse->limitCount);
2310
		if (est && IsA(est, Const))
2311
		{
2312
			if (((Const *) est)->constisnull)
2313
			{
2314
				/* NULL indicates LIMIT ALL, ie, no limit */
B
Bruce Momjian 已提交
2315
				*count_est = 0; /* treat as not present */
2316 2317 2318
			}
			else
			{
2319 2320 2321 2322
				if (((Const *)est)->consttype == INT4OID)
					*count_est = DatumGetInt32(((Const *) est)->constvalue);
				else
					*count_est = DatumGetInt64(((Const *) est)->constvalue);
2323 2324
				if (*count_est <= 0)
					*count_est = 1;		/* force to at least 1 */
2325 2326
			}
		}
2327 2328
		else
			*count_est = -1;	/* can't estimate */
2329 2330
	}
	else
2331 2332 2333
		*count_est = 0;			/* not present */

	if (parse->limitOffset)
2334
	{
2335
		est = estimate_expression_value(root, parse->limitOffset);
2336 2337 2338 2339 2340
		if (est && IsA(est, Const))
		{
			if (((Const *) est)->constisnull)
			{
				/* Treat NULL as no offset; the executor will too */
B
Bruce Momjian 已提交
2341
				*offset_est = 0;	/* treat as not present */
2342 2343 2344
			}
			else
			{
2345 2346 2347
				if (((Const *)est)->consttype == INT4OID)
					*offset_est = DatumGetInt32(((Const *) est)->constvalue);
				else
B
Bruce Momjian 已提交
2348
				*offset_est = DatumGetInt64(((Const *) est)->constvalue);
2349

2350 2351 2352 2353 2354 2355
				if (*offset_est < 0)
					*offset_est = 0;	/* less than 0 is same as 0 */
			}
		}
		else
			*offset_est = -1;	/* can't estimate */
2356
	}
2357 2358
	else
		*offset_est = 0;		/* not present */
2359

2360
	if (*count_est != 0)
2361
	{
2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377
		/*
		 * A LIMIT clause limits the absolute number of tuples returned.
		 * However, if it's not a constant LIMIT then we have to guess; for
		 * lack of a better idea, assume 10% of the plan's result is wanted.
		 */
		if (*count_est < 0 || *offset_est < 0)
		{
			/* LIMIT or OFFSET is an expression ... punt ... */
			limit_fraction = 0.10;
		}
		else
		{
			/* LIMIT (plus OFFSET, if any) is max number of tuples needed */
			limit_fraction = (double) *count_est + (double) *offset_est;
		}

2378 2379
		/*
		 * If we have absolute limits from both caller and LIMIT, use the
2380 2381 2382 2383
		 * smaller value; likewise if they are both fractional.  If one is
		 * fractional and the other absolute, we can't easily determine which
		 * is smaller, but we use the heuristic that the absolute will usually
		 * be smaller.
2384 2385 2386 2387 2388 2389 2390 2391 2392 2393
		 */
		if (tuple_fraction >= 1.0)
		{
			if (limit_fraction >= 1.0)
			{
				/* both absolute */
				tuple_fraction = Min(tuple_fraction, limit_fraction);
			}
			else
			{
2394
				/* caller absolute, limit fractional; use caller's value */
2395 2396 2397 2398 2399 2400
			}
		}
		else if (tuple_fraction > 0.0)
		{
			if (limit_fraction >= 1.0)
			{
2401 2402
				/* caller fractional, limit absolute; use limit */
				tuple_fraction = limit_fraction;
2403 2404 2405 2406
			}
			else
			{
				/* both fractional */
2407
				tuple_fraction = Min(tuple_fraction, limit_fraction);
2408 2409 2410 2411 2412 2413 2414 2415
			}
		}
		else
		{
			/* no info from caller, just use limit */
			tuple_fraction = limit_fraction;
		}
	}
2416 2417 2418
	else if (*offset_est != 0 && tuple_fraction > 0.0)
	{
		/*
B
Bruce Momjian 已提交
2419 2420 2421 2422 2423
		 * We have an OFFSET but no LIMIT.	This acts entirely differently
		 * from the LIMIT case: here, we need to increase rather than decrease
		 * the caller's tuple_fraction, because the OFFSET acts to cause more
		 * tuples to be fetched instead of fewer.  This only matters if we got
		 * a tuple_fraction > 0, however.
2424 2425 2426 2427 2428 2429 2430 2431 2432 2433
		 *
		 * As above, use 10% if OFFSET is present but unestimatable.
		 */
		if (*offset_est < 0)
			limit_fraction = 0.10;
		else
			limit_fraction = (double) *offset_est;

		/*
		 * If we have absolute counts from both caller and OFFSET, add them
B
Bruce Momjian 已提交
2434 2435 2436
		 * together; likewise if they are both fractional.	If one is
		 * fractional and the other absolute, we want to take the larger, and
		 * we heuristically assume that's the fractional one.
2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461
		 */
		if (tuple_fraction >= 1.0)
		{
			if (limit_fraction >= 1.0)
			{
				/* both absolute, so add them together */
				tuple_fraction += limit_fraction;
			}
			else
			{
				/* caller absolute, limit fractional; use limit */
				tuple_fraction = limit_fraction;
			}
		}
		else
		{
			if (limit_fraction >= 1.0)
			{
				/* caller fractional, limit absolute; use caller's value */
			}
			else
			{
				/* both fractional, so add them together */
				tuple_fraction += limit_fraction;
				if (tuple_fraction >= 1.0)
B
Bruce Momjian 已提交
2462
					tuple_fraction = 0.0;		/* assume fetch all */
2463 2464 2465
			}
		}
	}
2466 2467 2468 2469

	return tuple_fraction;
}

2470 2471 2472
/*
 * extract_grouping_ops - make an array of the equality operator OIDs
 *		for the GROUP BY clause
2473 2474 2475 2476
 *
 * In PostgreSQL, the returned array's size is always list_length(groupClause), but
 * in GPDB's GROUPING SETS implementation, that's not true. The size of the
 * returned array is returned in *numGroupCols.
2477
 */
2478
#ifdef NOT_USED
2479
static Oid *
2480
extract_grouping_ops(List *groupClause, int *numGroupOps)
2481
{
2482
	int			maxCols = list_length(groupClause);
2483 2484 2485 2486
	int			colno = 0;
	Oid		   *groupOperators;
	ListCell   *glitem;

2487
	groupOperators = (Oid *) palloc(maxCols * sizeof(Oid));
2488 2489 2490

	foreach(glitem, groupClause)
	{
2491 2492 2493 2494
		Node *node = lfirst(glitem);

		if (node == NULL)
			continue;
2495

2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528
		if (IsA(node, GroupClause) || IsA(node, SortClause))
		{
			GroupClause *groupcl = (GroupClause *) lfirst(glitem);

			if (colno == maxCols)
			{
				maxCols *= 2;
				groupOperators = (Oid *) repalloc(groupOperators,
												  maxCols * sizeof(Oid));
			}

			groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop);
			if (!OidIsValid(groupOperators[colno]))		/* shouldn't happen */
				elog(ERROR, "could not find equality operator for ordering operator %u",
					 groupcl->sortop);
			colno++;
		}
		else if (IsA(node, GroupingClause))
		{
			List	   *groupsets = ((GroupingClause *) node)->groupsets;
			Oid		   *subops;
			int			nsubops;

			subops = extract_grouping_ops(groupsets, &nsubops);
			while (colno + nsubops > maxCols)
			{
				maxCols *= 2;
				groupOperators = (Oid *) repalloc(groupOperators,
												  maxCols * sizeof(Oid));
			}
			memcpy(&groupOperators[colno], subops, nsubops * sizeof(Oid));
			colno += nsubops;
		}
2529 2530
	}

2531 2532
	*numGroupOps = colno;

2533 2534
	return groupOperators;
}
2535
#endif
2536

2537 2538 2539
/*
 * choose_hashed_grouping - should we use hashed grouping?
 */
2540
bool
2541
choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
2542
					   Path *cheapest_path, Path *sorted_path,
2543
					   Oid *groupOperators, int numGroupOps, double dNumGroups,
2544
					   AggClauseCounts *agg_counts)
2545
{
2546
	int			numGroupCols;
2547 2548
	double		cheapest_path_rows;
	int			cheapest_path_width;
2549 2550 2551 2552
	Size		hashentrysize;
	List	   *current_pathkeys;
	Path		hashed_p;
	Path		sorted_p;
2553
	int			i;
2554

2555 2556 2557 2558
	HashAggTableSizes   hash_info;
	bool		has_dqa = false;
	bool		hash_cheaper = false;

2559 2560
	/*
	 * Check can't-do-it conditions, including whether the grouping operators
2561 2562 2563
	 * are hashjoinable.  (We assume hashing is OK if they are marked
	 * oprcanhash.  If there isn't actually a supporting hash function,
	 * the executor will complain at runtime.)
2564 2565 2566 2567 2568
	 *
	 * Executor doesn't support hashed aggregation with DISTINCT aggregates.
	 * (Doing so would imply storing *all* the input values in the hash table,
	 * which seems like a certain loser.)
	 *
2569 2570 2571 2572
	 * CDB: The parallel grouping planner can use hashed aggregation
	 * with DISTINCT-qualified aggregates in some cases, so in case we
	 * don't choose hashed grouping here, we make note in agg_counts to 
	 * indicate whether DQAs are the only reason.
2573 2574 2575 2576
	 */
	if (!root->config->enable_hashagg)
		goto hash_not_ok;
	has_dqa = agg_counts->numDistinctAggs != 0;
2577
	for (i = 0; i < numGroupOps; i++)
2578 2579
	{
		if (!op_hashjoinable(groupOperators[i]))
2580
			goto hash_not_ok;
2581
	}
2582 2583

	/*
2584 2585 2586
	 * CDB: The preliminary function is used to merge transient values
	 * during hash reloading (see execHHashagg.c). So hash agg is not
	 * allowed if one of the aggregates doesn't have its preliminary function.
2587
	 */
2588 2589
	if (agg_counts->missing_prelimfunc)
		goto hash_not_ok;
2590

2591 2592 2593 2594 2595 2596
	/*
	 * CDB: The parallel grouping planner cannot use hashed aggregation
	 * for ordered aggregates.
	 */
	if (agg_counts->aggOrder != NIL)
		goto hash_not_ok;
2597

2598 2599 2600 2601 2602 2603 2604 2605
	/*
	 * Don't do it if it doesn't look like the hashtable will fit into
	 * work_mem.
	 *
	 * Beware here of the possibility that cheapest_path->parent is NULL. This
	 * could happen if user does something silly like SELECT 'foo' GROUP BY 1;
	 */
	if (cheapest_path->parent)
2606
	{
2607 2608 2609 2610 2611 2612 2613 2614
		cheapest_path_rows = cdbpath_rows(root, cheapest_path);
		cheapest_path_width = cheapest_path->parent->width;
	}
	else
	{
		cheapest_path_rows = 1; /* assume non-set result */
		cheapest_path_width = 100;		/* arbitrary */
	}
2615

2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636
	/* Estimate per-hash-entry space at tuple width... */
	/* (Should improve this estimate since not all
	 *  attributes are saved in a hash table entry's
	 *  grouping key tuple.)
	 */
	hashentrysize = MAXALIGN(cheapest_path_width) + MAXALIGN(sizeof(MemTupleData));
	/* plus space for pass-by-ref transition values... */
	hashentrysize += agg_counts->transitionSpace;
	/* plus the per-hash-entry overhead */
	hashentrysize += hash_agg_entry_size(agg_counts->numAggs);

	if (!calcHashAggTableSizes(global_work_mem(root),
							   dNumGroups,
							   agg_counts->numAggs,
							   /* The following estimate is very rough but good enough for planning. */
							   sizeof(HeapTupleData) + sizeof(HeapTupleHeaderData) + cheapest_path_width,
							   agg_counts->transitionSpace,
							   false,
							   &hash_info))
	{
		goto hash_not_ok;
2637
	}
2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654

	/*
	 * See if the estimated cost is no more than doing it the other way. While
	 * avoiding the need for sorted input is usually a win, the fact that the
	 * output won't be sorted may be a loss; so we need to do an actual cost
	 * comparison.
	 *
	 * We need to consider cheapest_path + hashagg [+ final sort] versus
	 * either cheapest_path [+ sort] + group or agg [+ final sort] or
	 * presorted_path + group or agg [+ final sort] where brackets indicate a
	 * step that may not be needed. We assume query_planner() will have
	 * returned a presorted path only if it's a winner compared to
	 * cheapest_path for this purpose.
	 *
	 * These path variables are dummies that just hold cost fields; we don't
	 * make actual Paths for these steps.
	 */
2655
	numGroupCols = num_distcols_in_grouplist(root->parse->groupClause);
2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672
	cost_agg(&hashed_p, root, AGG_HASHED, agg_counts->numAggs,
			 numGroupCols, dNumGroups,
			 cheapest_path->startup_cost, cheapest_path->total_cost,
			 cheapest_path_rows, hash_info.workmem_per_entry,
			 hash_info.nbatches, hash_info.hashentry_width, false);
	/* Result of hashed agg is always unsorted */
	if (root->sort_pathkeys)
		cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
				  dNumGroups, cheapest_path_width);

	if (sorted_path)
	{
		sorted_p.startup_cost = sorted_path->startup_cost;
		sorted_p.total_cost = sorted_path->total_cost;
		current_pathkeys = sorted_path->pathkeys;
	}
	else
2673
	{
2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686
		sorted_p.startup_cost = cheapest_path->startup_cost;
		sorted_p.total_cost = cheapest_path->total_cost;
		current_pathkeys = cheapest_path->pathkeys;
	}
	if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
	{
		cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
				  cheapest_path_rows, cheapest_path_width);
		current_pathkeys = root->group_pathkeys;
	}

	if (root->parse->hasAggs)
		cost_agg(&sorted_p, root, AGG_SORTED, agg_counts->numAggs,
2687
				 numGroupCols, dNumGroups,
2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698
				 sorted_p.startup_cost, sorted_p.total_cost,
				 cheapest_path_rows, 0.0, 0.0, 0.0, false);
	else
		cost_group(&sorted_p, root, numGroupCols, dNumGroups,
				   sorted_p.startup_cost, sorted_p.total_cost,
				   cheapest_path_rows);
	/* The Agg or Group node will preserve ordering */
	if (root->sort_pathkeys &&
		!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
		cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
				  dNumGroups, cheapest_path_width);
2699

2700 2701 2702 2703 2704 2705
	/*
	 * Now make the decision using the top-level tuple fraction.  First we
	 * have to convert an absolute count (LIMIT) into fractional form.
	 */
	if (tuple_fraction >= 1.0)
		tuple_fraction /= dNumGroups;
2706

2707 2708 2709 2710 2711 2712
	if (!root->config->enable_groupagg)
		hash_cheaper = true;
	else
		hash_cheaper = 0 > compare_fractional_path_costs(&hashed_p, 
														 &sorted_p, 
														 tuple_fraction);
2713

2714 2715 2716 2717 2718 2719
	agg_counts->canHashAgg = true; /* costing is wrong if there are DQAs */
	return !has_dqa && hash_cheaper;

hash_not_ok:
	agg_counts->canHashAgg = false;
	return false;
2720 2721
}

2722 2723
/*---------------
 * make_subplanTargetList
2724
 *	  Generate appropriate target list when grouping is required.
2725
 *
2726 2727
 * When grouping_planner inserts Aggregate, Group, or Result plan nodes
 * above the result of query_planner, we typically want to pass a different
2728
 * target list to query_planner than the outer plan nodes should have.
2729
 * This routine generates the correct target list for the subplan.
2730 2731 2732 2733
 *
 * The initial target list passed from the parser already contains entries
 * for all ORDER BY and GROUP BY expressions, but it will not have entries
 * for variables used only in HAVING clauses; so we need to add those
2734 2735 2736 2737
 * variables to the subplan target list.  Also, we flatten all expressions
 * except GROUP BY items into their component variables; the other expressions
 * will be computed by the inserted nodes rather than by the subplan.
 * For example, given a query like
2738 2739
 *		SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
 * we want to pass this targetlist to the subplan:
2740
 *		a,b,c,d,a+b
2741
 * where the a+b target will be used by the Sort/Group steps, and the
2742
 * other targets will be used for computing the final results.	(In the
2743
 * above example we could theoretically suppress the a and b targets and
2744 2745 2746 2747
 * pass down only c,d,a+b, but it's not really worth the trouble to
 * eliminate simple var references from the subplan.  We will avoid doing
 * the extra computation to recompute a+b at the outer level; see
 * replace_vars_with_subplan_refs() in setrefs.c.)
2748
 *
2749 2750 2751 2752 2753
 * If we are grouping or aggregating, *and* there are no non-Var grouping
 * expressions, then the returned tlist is effectively dummy; we do not
 * need to force it to be evaluated, because all the Vars it contains
 * should be present in the output of query_planner anyway.
 *
2754
 * 'tlist' is the query's target list.
2755
 * 'groupColIdx' receives an array of column numbers for the GROUP BY
2756
 *			expressions (if there are any) in the subplan's target list.
2757 2758
 * 'groupOperators' receives an array of equality operators corresponding
 *			the GROUP BY expressions.
2759 2760
 * 'need_tlist_eval' is set true if we really need to evaluate the
 *			result tlist.
2761
 *
2762
 * The result is the targetlist to be passed to the subplan.
2763 2764 2765
 *---------------
 */
static List *
2766
make_subplanTargetList(PlannerInfo *root,
2767
					   List *tlist,
2768
					   AttrNumber **groupColIdx,
2769
					   Oid **groupOperators,
2770
					   bool *need_tlist_eval)
2771
{
2772
	Query	   *parse = root->parse;
2773
	List	   *sub_tlist;
2774
	List	   *extravars;
2775 2776 2777 2778
	int			numCols;

	*groupColIdx = NULL;

B
Bruce Momjian 已提交
2779
	/*
2780
	 * If we're not grouping or aggregating, there's nothing to do here;
2781 2782
	 * query_planner should receive the unmodified target list.
	 */
2783
	if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual)
2784 2785
	{
		*need_tlist_eval = true;
2786
		return tlist;
2787
	}
2788

B
Bruce Momjian 已提交
2789
	/*
2790
	 * Otherwise, start with a "flattened" tlist (having just the vars
B
Bruce Momjian 已提交
2791 2792
	 * mentioned in the targetlist and HAVING qual --- but not upper- level
	 * Vars; they will be replaced by Params later on).
2793
	 */
2794
	sub_tlist = flatten_tlist(tlist);
2795
	extravars = pull_var_clause(parse->havingQual, false);
2796
	sub_tlist = add_to_flat_tlist(sub_tlist, extravars, false /* resjunk */);
2797
	list_free(extravars);
2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810

	/* XXX
	 * Set need_tlist_eval to true for group queries.
	 *
	 * Reason: We are doing an aggregate on top.  No matter what we do,
	 * hash or sort, we may spill.  Every unnecessary columns means useless
	 * I/O, and heap_form/deform_tuple.  It is almost always better to
	 * to the projection.
	 */
	if(parse->groupClause)
		*need_tlist_eval = true;
	else
		*need_tlist_eval = false;	/* only eval if not flat tlist */
2811 2812

	/*
2813
	 * If grouping, create sub_tlist entries for all GROUP BY expressions
B
Bruce Momjian 已提交
2814 2815
	 * (GROUP BY items that are simple Vars should be in the list already),
	 * and make an array showing where the group columns are in the sub_tlist.
2816
	 */
2817 2818
	numCols = num_distcols_in_grouplist(parse->groupClause);

2819
	if (numCols > 0)
2820 2821
	{
		int			keyno = 0;
2822
		AttrNumber *grpColIdx;
2823
		Oid		   *grpOperators;
2824
		List       *grouptles;
2825 2826 2827
		List	   *groupops;
		ListCell   *lc_tle;
		ListCell   *lc_op;
2828 2829

		grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
2830
		grpOperators = (Oid *) palloc(sizeof(Oid) * numCols);
2831
		*groupColIdx = grpColIdx;
2832
		*groupOperators = grpOperators;
2833

2834 2835 2836 2837 2838
		get_sortgroupclauses_tles(parse->groupClause, tlist,
								  &grouptles, &groupops);
		Assert(numCols == list_length(grouptles) &&
			   numCols == list_length(groupops));
		forboth (lc_tle, grouptles, lc_op, groupops)
2839
		{
2840 2841 2842 2843 2844
			Node *groupexpr;
			TargetEntry	   *tle;
			TargetEntry *sub_tle = NULL;
			ListCell   *sl = NULL;

2845
			tle = (TargetEntry *)lfirst(lc_tle);
2846
			groupexpr = (Node*) tle->expr;
2847

2848 2849
			/* Find or make a matching sub_tlist entry */
			foreach(sl, sub_tlist)
2850
			{
2851 2852 2853
				sub_tle = (TargetEntry *) lfirst(sl);
				if (equal(groupexpr, sub_tle->expr) 
						&& (sub_tle->ressortgroupref == 0))
2854
					break;
2855
			}
2856
			if (!sl)
2857
			{
2858
				sub_tle = makeTargetEntry((Expr *) groupexpr,
2859 2860 2861
									 list_length(sub_tlist) + 1,
									 NULL,
									 false);
2862
				sub_tlist = lappend(sub_tlist, sub_tle);
B
Bruce Momjian 已提交
2863
				*need_tlist_eval = true;		/* it's not flat anymore */
2864 2865
			}

2866 2867
			/* Set its group reference and save its resno */
			sub_tle->ressortgroupref = tle->ressortgroupref;
2868 2869 2870 2871 2872 2873 2874
			grpColIdx[keyno] = sub_tle->resno;

			grpOperators[keyno] = get_equality_op_for_ordering_op(lfirst_oid(lc_op));
			if (!OidIsValid(grpOperators[keyno]))		/* shouldn't happen */
				elog(ERROR, "could not find equality operator for ordering operator %u",
					 lfirst_oid(lc_op));
			keyno++;
2875
		}
2876
		Assert(keyno == numCols);
2877 2878 2879 2880 2881
	}

	return sub_tlist;
}

2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059

/*
 * Function: register_ordered_aggs 
 *
 * Update the AggOrder nodes found in Aggref nodes of the given Query
 * node for a grouping/aggregating query to refer to targets in the
 * indirectly given subplan target list.  As a side-effect, new targets
 * may be added to he subplan target list.
 *
 * The idea is that Aggref nodes from the input Query node specify 
 * ordering expressions corresponding to sort specifications that must
 * refer (via sortgroupref values as usual) to the target list of the 
 * node below them in the plan.  Initially they may not, so we must find
 * or add them to the indirectly given subplan targetlist and adjust the
 * AggOrder node to match.
 *
 * This may scribble on the Query!  (This isn't too bad since only the
 * tleSortGroupRef fields of SortClause nodes and the corresponding 
 * ressortgroupref fields of TargetEntry nodes in the AggOrder node in
 * an Aggref change, and the interpretation of the list is the same
 * afterward.)
 */
List *register_ordered_aggs(List *tlist, Node *havingqual, List *sub_tlist)
{
	ListCell *lc;
	register_ordered_aggs_context ctx;
	
	ctx.tlist = tlist; /* aggregating target list */
	ctx.havingqual = havingqual; /* aggregating HAVING qual */
	ctx.sub_tlist = sub_tlist; /* input target list */
	ctx.last_sgr = 0; /* 0 = unassigned */
	
	/* There may be Aggrefs in the query's target list. */
	foreach (lc, ctx.tlist)
	{
		TargetEntry *tle = (TargetEntry *)lfirst(lc);
		tle->expr = (Expr*)register_ordered_aggs_mutator((Node*)tle->expr, &ctx);
	}
	
	/* There may be Aggrefs in the query's having clause */
	ctx.havingqual = register_ordered_aggs_mutator(ctx.havingqual, &ctx);
	
	return ctx.sub_tlist;
}

/*
 * Function: register_ordered_aggs_mutator
 *
 * Update the AggOrder nodes found in Aggref nodes of the given expression
 * to refer to targets in the context's subplan target list.  New targets 
 * may be added to he subplan target list as a side effect. 
 */
Node *register_ordered_aggs_mutator(Node *node, 
									register_ordered_aggs_context *context)
{
	if ( node == NULL )
		return NULL;
	if ( IsA(node, Aggref) )
	{
		Aggref *aggref = (Aggref*)node;
		
		if ( aggref->aggorder )
		{
			register_AggOrder( aggref->aggorder, context );
		}
	}
	return expression_tree_mutator(node, 
								   register_ordered_aggs_mutator, 
								   (void *)context );
}


/*
 * Function register_AggOrder 
 *
 * Find or add the sort targets in the given AggOrder node to the
 * indirectly given subplan target list.  If we add a target, give
 * it a distinct sortgroupref value.  
 * 
 * Then update the AggOrder node to refer to the subplan target list.  
 * We need to update the target node too, so the sort specification 
 * continues to refer to its target in the AggOrder.  Note, however,
 * that we need to defer these updates to the end so that we don't
 * mess up the correspondence in the AggOrder before we're done
 * using it.
 */
typedef struct agg_order_update_spec
{
	SortClause *sort;
	TargetEntry *entry;
	Index sortgroupref;
}
agg_order_update_spec;

void register_AggOrder(AggOrder *aggorder, 
					   register_ordered_aggs_context *context)
{	
	ListCell *lc;
	List *updates = NIL;
	agg_order_update_spec *update;
	
	/* In the first release, targets and orders are 1:1.  This may
	 * change, but for now ... */
	Assert( list_length(aggorder->sortTargets) == 
		    list_length(aggorder->sortClause) );
	
	foreach (lc, aggorder->sortClause)
	{
		SortClause *sort;
		TargetEntry *sort_tle;
		TargetEntry *sub_tle;
		
		sort = (SortClause *)lfirst(lc);
		Assert(IsA(sort, SortClause));
		Assert( sort->tleSortGroupRef != 0 );
		sort_tle = get_sortgroupclause_tle(sort, aggorder->sortTargets);
		
		/* Find sort expression in the given target list, ... */
		sub_tle = tlist_member((Node*)sort_tle->expr, context->sub_tlist);
		
		/* ... or add it. */
		if ( !sub_tle )
		{
			sub_tle = makeTargetEntry(copyObject(sort_tle->expr),
									  list_length(context->sub_tlist) + 1,
									  NULL,
									  false);
			/* We fill in the sortgroupref below. */
			context->sub_tlist = lappend( context->sub_tlist, sub_tle );
		}
		
		if ( sub_tle->ressortgroupref == 0 )
		{
			/* Lazy initialize next sortgroupref value. */
			if ( context->last_sgr == 0 )
			{
				ListCell *c;
				/* Targets in sub_tlist and main tlist must not conflict. */
				foreach( c, context->tlist )
				{
					TargetEntry *tle = (TargetEntry*)lfirst(c);
					if ( context->last_sgr < tle->ressortgroupref )
						context->last_sgr = tle->ressortgroupref;
				}
				
				/* Might there be non-zero SGRs in sub_tlist? Don't see
				 * how, but be safe.
				 */
				foreach( c, context->sub_tlist )
				{
					TargetEntry *tle = (TargetEntry*)lfirst(c);
					if ( context->last_sgr < tle->ressortgroupref )
						context->last_sgr = tle->ressortgroupref;
				}
			}

			sub_tle->ressortgroupref = ++context->last_sgr;
		}
		
		/* Update AggOrder to agree with the tle in the target list. */
		update = (agg_order_update_spec*)palloc(sizeof(agg_order_update_spec));
		update->sort = sort;
		update->entry = sort_tle;
		update->sortgroupref = sub_tle->ressortgroupref;
		updates = lappend(updates, update);
	}
	
	foreach (lc, updates)
	{
		update = (agg_order_update_spec*)lfirst(lc);
		
		update->sort->tleSortGroupRef = update->sortgroupref;
		update->entry->ressortgroupref = update->sortgroupref;
	}
	list_free(updates);
}


3060 3061 3062 3063 3064
/*
 * locate_grouping_columns
 *		Locate grouping columns in the tlist chosen by query_planner.
 *
 * This is only needed if we don't use the sub_tlist chosen by
B
Bruce Momjian 已提交
3065
 * make_subplanTargetList.	We have to forget the column indexes found
3066 3067 3068
 * by that routine and re-locate the grouping vars in the real sub_tlist.
 */
static void
3069
locate_grouping_columns(PlannerInfo *root,
3070 3071 3072 3073 3074
						List *tlist,
						List *sub_tlist,
						AttrNumber *groupColIdx)
{
	int			keyno = 0;
3075 3076 3077
	List	   *grouptles;
	List	   *groupops;
	ListCell   *ge;
3078 3079 3080 3081

	/*
	 * No work unless grouping.
	 */
3082
	if (!root->parse->groupClause)
3083 3084 3085 3086 3087 3088
	{
		Assert(groupColIdx == NULL);
		return;
	}
	Assert(groupColIdx != NULL);

3089 3090
	get_sortgroupclauses_tles(root->parse->groupClause, tlist,
							  &grouptles, &groupops);
3091 3092

	foreach (ge, grouptles)
3093
	{
3094 3095 3096
		TargetEntry *groupte = (TargetEntry *)lfirst(ge);
		Node	*groupexpr;

B
Bruce Momjian 已提交
3097 3098
		TargetEntry *te = NULL;
		ListCell   *sl;
3099

3100 3101
		groupexpr = (Node *) groupte->expr;

3102 3103 3104 3105 3106 3107 3108
		foreach(sl, sub_tlist)
		{
			te = (TargetEntry *) lfirst(sl);
			if (equal(groupexpr, te->expr))
				break;
		}
		if (!sl)
3109
			elog(ERROR, "failed to locate grouping columns");
3110

3111
		groupColIdx[keyno++] = te->resno;
3112 3113 3114
	}
}

3115 3116 3117 3118 3119 3120 3121
/*
 * postprocess_setop_tlist
 *	  Fix up targetlist returned by plan_set_operations().
 *
 * We need to transpose sort key info from the orig_tlist into new_tlist.
 * NOTE: this would not be good enough if we supported resjunk sort keys
 * for results of set operations --- then, we'd need to project a whole
3122
 * new tlist to evaluate the resjunk columns.  For now, just ereport if we
3123 3124 3125 3126 3127
 * find any resjunk columns in orig_tlist.
 */
static List *
postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
{
3128 3129
	ListCell   *l;
	ListCell   *orig_tlist_item = list_head(orig_tlist);
3130

3131 3132 3133 3134
 	/* empty orig has no effect on info in new (MPP-2655) */
 	if ( orig_tlist_item == NULL )
 		return new_tlist;
 	
3135 3136 3137 3138 3139 3140
	foreach(l, new_tlist)
	{
		TargetEntry *new_tle = (TargetEntry *) lfirst(l);
		TargetEntry *orig_tle;

		/* ignore resjunk columns in setop result */
3141
		if (new_tle->resjunk)
3142 3143
			continue;

3144 3145 3146
		Assert(orig_tlist_item != NULL);
		orig_tle = (TargetEntry *) lfirst(orig_tlist_item);
		orig_tlist_item = lnext(orig_tlist_item);
B
Bruce Momjian 已提交
3147
		if (orig_tle->resjunk)	/* should not happen */
3148
			elog(ERROR, "resjunk output columns are not implemented");
3149 3150
		Assert(new_tle->resno == orig_tle->resno);
		new_tle->ressortgroupref = orig_tle->ressortgroupref;
3151
	}
3152
	if (orig_tlist_item != NULL)
3153
		elog(ERROR, "resjunk output columns are not implemented");
3154 3155
	return new_tlist;
}
3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641

/*
 * Produce the canonical form of a GROUP BY clause given the parse
 * tree form.
 *
 * The result is a CanonicalGroupingSets, which contains a list of
 * Bitmapsets.  Each Bitmapset contains the sort-group reference
 * values of the attributes in one of the grouping sets specified in
 * the GROUP BY clause.  The number of list elements is the number of
 * grouping sets specified.
 */
static CanonicalGroupingSets *
make_canonical_groupingsets(List *groupClause)
{
	CanonicalGroupingSets *canonical_grpsets = 
		(CanonicalGroupingSets *) palloc0(sizeof(CanonicalGroupingSets));
	ListCell *lc;
	List *ord_grping = NIL; /* the ordinary grouping */
	List *rollups = NIL;    /* the grouping sets from ROLLUP */
	List *grpingsets = NIL; /* the grouping sets from GROUPING SETS */
	List *cubes = NIL;      /* the grouping sets from CUBE */
	Bitmapset *bms = NULL;
	List *final_grpingsets = NIL;
	List *list_grpingsets = NIL;
	int setno;
	int prev_setno = 0;

	if (groupClause == NIL)
		return canonical_grpsets;

	foreach (lc, groupClause)
	{
		GroupingClause *gc;

		Node *node = lfirst(lc);

		if (node == NULL)
			continue;

		/* Note that the top-level empty sets have been removed
		 * in the parser.
		 */
		Assert(IsA(node, GroupClause) ||
			   IsA(node, GroupingClause) ||
			   IsA(node, List));

		if (IsA(node, GroupClause) ||
			IsA(node, List))
		{
			ord_grping = lappend(ord_grping,
								 canonicalize_colref_list(node));
			continue;
		}

		gc = (GroupingClause *)node;
		switch (gc->groupType)
		{
			case GROUPINGTYPE_ROLLUP:
				rollups = lappend(rollups,
								  rollup_gs_list(canonicalize_gs_list(gc->groupsets, true)));
				break;
			case GROUPINGTYPE_CUBE:
				cubes = lappend(cubes,
								cube_gs_list(canonicalize_gs_list(gc->groupsets, true)));
				break;
			case GROUPINGTYPE_GROUPING_SETS:
				grpingsets = lappend(grpingsets,
									 canonicalize_gs_list(gc->groupsets, false));
				break;
			default:
				elog(ERROR, "invalid grouping set");
		}
	}

	/* Obtain the cartesian product of grouping sets generated for ordinary
	 * grouping sets, rollups, cubes, and grouping sets.
	 *
	 * We apply a small optimization here. We always append grouping sets
	 * generated for rollups, cubes and grouping sets to grouping sets for
	 * ordinary sets. This makes it easier to tell if there is a partial
	 * rollup. Consider the example of GROUP BY rollup(i,j),k. There are
	 * three grouping sets for rollup(i,j): (i,j), (i), (). If we append
	 * k after each grouping set for rollups, we get three sets:
	 * (i,j,k), (i,k) and (k). We can not easily tell that this is a partial
	 * rollup. However, if we append each grouping set after k, we get
	 * these three sets: (k,i,j), (k,i), (k), which is obviously a partial
	 * rollup.
	 */

	/* First, we bring all columns in ordinary grouping sets together into
	 * one list.
	 */
	foreach (lc, ord_grping)
	{
	    Bitmapset *sub_bms = (Bitmapset *)lfirst(lc);
		bms = bms_add_members(bms, sub_bms);
	}

	final_grpingsets = lappend(final_grpingsets, bms);

	/* Make the list of grouping sets */
	if (rollups)
		list_grpingsets = list_concat(list_grpingsets, rollups);
	if (cubes)
		list_grpingsets = list_concat(list_grpingsets, cubes);
	if (grpingsets)
		list_grpingsets = list_concat(list_grpingsets, grpingsets);

	/* Obtain the cartesian product of grouping sets generated from ordinary
	 * grouping sets, rollups, cubes, and grouping sets.
	 */
	foreach (lc, list_grpingsets)
	{
		List *bms_list = (List *)lfirst(lc);
		ListCell *tmp_lc;
		List *tmp_list;

		tmp_list = final_grpingsets;
		final_grpingsets = NIL;

		foreach (tmp_lc, tmp_list)
		{
			Bitmapset *tmp_bms = (Bitmapset *)lfirst(tmp_lc);
			ListCell *bms_lc;

			foreach (bms_lc, bms_list)
			{
				bms = bms_copy(tmp_bms);
				bms = bms_add_members(bms, (Bitmapset *)lfirst(bms_lc));
				final_grpingsets = lappend(final_grpingsets, bms);
			}
		}
	}

	/* Sort final_grpingsets */
	sort_canonical_gs_list(final_grpingsets,
						   &(canonical_grpsets->ngrpsets),
						   &(canonical_grpsets->grpsets));

	/* Combine duplicate grouping sets and set the counts for
	 * each grouping set.
	 */
	canonical_grpsets->grpset_counts =
		(int *)palloc0(canonical_grpsets->ngrpsets * sizeof(int));
	prev_setno = 0;
	canonical_grpsets->grpset_counts[0] = 1;
	for (setno = 1; setno<canonical_grpsets->ngrpsets; setno++)
	{
		if (bms_equal(canonical_grpsets->grpsets[setno],
					  canonical_grpsets->grpsets[prev_setno]))
		{
			canonical_grpsets->grpset_counts[prev_setno]++;
			if (canonical_grpsets->grpsets[setno])
				pfree(canonical_grpsets->grpsets[setno]);
		}

		else
		{
			prev_setno++;
			canonical_grpsets->grpsets[prev_setno] =
				canonical_grpsets->grpsets[setno];
			canonical_grpsets->grpset_counts[prev_setno]++;
		}
	}
	/* Reset ngrpsets to eliminate duplicate groupint sets */
	canonical_grpsets->ngrpsets = prev_setno + 1;

	/* Obtain the number of distinct columns appeared in these
	 * grouping sets.
	 */
	{
		Bitmapset *distcols = NULL;
		for (setno =0; setno < canonical_grpsets->ngrpsets; setno++)
			distcols =
				bms_add_members(distcols, canonical_grpsets->grpsets[setno]);
		
		canonical_grpsets->num_distcols = bms_num_members(distcols);
		bms_free(distcols);
	}
	

	/* Release spaces */
	list_free_deep(ord_grping);
	list_free_deep(list_grpingsets);
	list_free(final_grpingsets);
	
	return canonical_grpsets;
}

/* Produce the canonical representation of a column reference list.
 *
 * A column reference list (in SQL) is a comma-delimited list of
 * column references which are represented by the parser as a
 * List of GroupClauses.  No nesting is allowed in column reference 
 * lists.
 *
 * As a convenience, this function also recognizes a bare column
 * reference.
 *
 * The result is a Bitmapset of the sort-group-ref values in the list.
 */
static Bitmapset* canonicalize_colref_list(Node * node)
{
	ListCell *lc;
	GroupClause *gc;
	Bitmapset* gs = NULL;
	
	if ( node == NULL )
		elog(ERROR,"invalid column reference list");
	
	if ( IsA(node, GroupClause) )
	{
		gc = (GroupClause*)node;
		return bms_make_singleton(gc->tleSortGroupRef);
	}
	
	if ( !IsA(node, List) )
		elog(ERROR,"invalid column reference list");
	
	foreach (lc, (List*)node)
	{
		Node *cr = lfirst(lc);
		
		if ( cr == NULL )
			continue;
			
		if ( !IsA(cr, GroupClause) )
			elog(ERROR,"invalid column reference list");

		gc = (GroupClause*)cr;
		gs = bms_add_member(gs, gc->tleSortGroupRef);	
	}
	return gs;
}

/* Produce the list of canonical grouping sets corresponding to a
 * grouping set list or an ordinary grouping set list.
 * 
 * An ordinary grouping set list (in SQL) is a comma-delimited list 
 * of ordinary grouping sets.  
 * 
 * Each ordinary grouping set is either a grouping column reference 
 * or a parenthesized list of grouping column references.  No nesting 
 * is allowed.  
 *
 * A grouping set list (in SQL) is a comma-delimited list of grouping 
 * sets.  
 *
 * Each grouping set is either an ordinary grouping set, a rollup list, 
 * a cube list, the empty grouping set, or (recursively) a grouping set 
 * list.
 *
 * The parse tree form of an ordinary grouping set is a  list containing
 * GroupClauses and lists of GroupClauses (without nesting).  In the case
 * of a (general) grouping set, the parse tree list may also include
 * NULLs and GroupingClauses.
 *
 * The result is a list of bit map sets.
 */
static List *canonicalize_gs_list(List *gsl, bool ordinary)
{
	ListCell *lc;
	List *list = NIL;

	foreach (lc, gsl)
	{
		Node *node = lfirst(lc);

		if ( node == NULL )
		{
			if ( ordinary )
				elog(ERROR,"invalid ordinary grouping set");
			
			list = lappend(list, NIL); /* empty grouping set */
		}
		else if ( IsA(node, GroupClause) || IsA(node, List) )
		{
			/* ordinary grouping set */
			list = lappend(list, canonicalize_colref_list(node));
		}
		else if ( IsA(node, GroupingClause) )
		{	
			List *gs = NIL;
			GroupingClause *gc = (GroupingClause*)node;
			
			if ( ordinary )
				elog(ERROR,"invalid ordinary grouping set");
				
			switch ( gc->groupType )
			{
				case GROUPINGTYPE_ROLLUP:
					gs = rollup_gs_list(canonicalize_gs_list(gc->groupsets, true));
					break;
				case GROUPINGTYPE_CUBE:
					gs = cube_gs_list(canonicalize_gs_list(gc->groupsets, true));
					break;
				case GROUPINGTYPE_GROUPING_SETS:
					gs = canonicalize_gs_list(gc->groupsets, false);
					break;
				default:
					elog(ERROR,"invalid grouping set");
			}
			list = list_concat(list,gs);
		}
		else
		{
			elog(ERROR,"invalid grouping set list");
		}
	}
	return list;
}

/* Produce the list of N+1 canonical grouping sets corresponding
 * to the rollup of the given list of N canonical grouping sets.
 * These N+1 grouping sets are listed in the descending order
 * based on the number of columns.
 *
 * Argument and result are both a list of bit map sets.
 */
static List *rollup_gs_list(List *gsl)
{
	ListCell *lc;
	Bitmapset **bms;
	int i, n = list_length(gsl);
	
	if ( n == 0 )
		elog(ERROR,"invalid grouping ordinary grouping set list");
	
	if ( n > 1 )
	{
		/* Reverse the elements in gsl */
		List *new_gsl = NIL;
		foreach (lc, gsl)
		{
			new_gsl = lcons(lfirst(lc), new_gsl);
		}
		list_free(gsl);
		gsl = new_gsl;

		bms = (Bitmapset**)palloc(n*sizeof(Bitmapset*));
		i = 0;
		foreach (lc, gsl)
		{
			bms[i++] = (Bitmapset*)lfirst(lc);
		}
		for ( i = n-2; i >= 0; i-- )
		{
			bms[i] = bms_add_members(bms[i], bms[i+1]);
		}
		pfree(bms);
	}

	return lappend(gsl, NULL);
}

/* Subroutine for cube_gs_list. */
static List *add_gs_combinations(List *list, int n, int i,
								 Bitmapset **base, Bitmapset **work)
{
	if ( i < n )
	{
		work[i] = base[i];
		list = add_gs_combinations(list, n, i+1, base, work);
		work[i] = NULL;
		list = add_gs_combinations(list, n, i+1, base, work);	
	}
	else
	{
		Bitmapset *gs = NULL;
		int j;
		for ( j = 0; j < n; j++ )
		{
			gs = bms_add_members(gs, work[j]);
		}
		list = lappend(list,gs);
	}
	return list;
}

/* Produce the list of 2^N canonical grouping sets corresponding
 * to the cube of the given list of N canonical grouping sets.
 *
 * We could do this more efficiently, but the number of grouping
 * sets should be small, so don't bother.
 *
 * Argument and result are both a list of bit map sets.
 */
static List *cube_gs_list(List *gsl)
{
	ListCell *lc;
	Bitmapset **bms_base;
	Bitmapset **bms_work;
	int i, n = list_length(gsl);
	
	if ( n == 0 )
		elog(ERROR,"invalid grouping ordinary grouping set list");
	
	bms_base = (Bitmapset**)palloc(n*sizeof(Bitmapset*));
	bms_work = (Bitmapset**)palloc(n*sizeof(Bitmapset*));
	i = 0;
	foreach (lc, gsl)
	{
		bms_work[i] = NULL;
		bms_base[i++] = (Bitmapset*)lfirst(lc);
	}

	return add_gs_combinations(NIL, n, 0, bms_base, bms_work);
}

/* Subroutine for sort_canonical_gs_list. */
static int gs_compare(const void *a, const void*b)
{
	/* Put the larger grouping sets before smaller ones. */
	return (0-bms_compare(*(Bitmapset**)a, *(Bitmapset**)b));
}

/* Produce a sorted array of Bitmapsets from the given list of Bitmapsets in
 * descending order.
 */
static void sort_canonical_gs_list(List *gs, int *p_nsets, Bitmapset ***p_sets)
{
	ListCell *lc;
	int nsets = list_length(gs);
	Bitmapset **sets = palloc(nsets*sizeof(Bitmapset*));
	int i = 0;
	
	foreach (lc, gs)
	{
		sets[i++] =  (Bitmapset*)lfirst(lc);
	}
	
	qsort(sets, nsets, sizeof(Bitmapset*), gs_compare);
	
	Assert( p_nsets != NULL && p_sets != NULL );
	
	*p_nsets = nsets;
	*p_sets = sets;
}

/*
 * In any plan where we are doing multi-phase limit, the first phase needs
 * to take the offset into account.
 */
static Plan *
pushdown_preliminary_limit(Plan *plan, Node *limitCount, int64 count_est, Node *limitOffset, int64 offset_est)
{
	Node *precount = copyObject(limitCount);
	int64 precount_est = count_est;
	Plan *result_plan = plan;

	/*
	 * If we've specified an offset *and* a limit, we need to collect
	 * from tuples from 0 -> count + offset
	 *
	 * add offset to each QEs requested contribution. 
	 * ( MPP-1370: Do it even if no ORDER BY was specified) 
	 */	
	if (precount && limitOffset)
	{
		precount = (Node*)make_op(NULL,
								  list_make1(makeString(pstrdup("+"))),
								  copyObject(limitOffset),
								  precount,
								  -1);
		precount_est += offset_est;
	}
			
	if (precount != NULL)
	{
		/*
		 * Add a prelimary LIMIT on the partitioned results. This may
		 * reduce the amount of work done on the QEs.
		 */
		result_plan = (Plan *) make_limit(result_plan,
										  NULL,
										  precount,
										  0,
										  precount_est);

		result_plan->flow = pull_up_Flow(result_plan,
										 result_plan->lefttree,
										 true);
	}

	return result_plan;
}