subselect.c 34.1 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * subselect.c
4 5
 *	  Planning routines for subselects and parameters.
 *
P
 
PostgreSQL Daemon 已提交
6
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9
 *
 * IDENTIFICATION
10
 *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.95 2005/04/06 16:34:05 tgl Exp $
11 12 13 14 15
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

16
#include "catalog/pg_operator.h"
17
#include "catalog/pg_type.h"
18
#include "miscadmin.h"
19
#include "nodes/makefuncs.h"
20
#include "nodes/params.h"
21
#include "optimizer/clauses.h"
22 23
#include "optimizer/cost.h"
#include "optimizer/planmain.h"
B
Bruce Momjian 已提交
24 25
#include "optimizer/planner.h"
#include "optimizer/subselect.h"
26
#include "optimizer/var.h"
27
#include "parser/parsetree.h"
28 29
#include "parser/parse_expr.h"
#include "parser/parse_oper.h"
30
#include "parser/parse_relation.h"
31
#include "rewrite/rewriteManip.h"
32
#include "utils/builtins.h"
33
#include "utils/lsyscache.h"
34
#include "utils/syscache.h"
35

36

37
Index		PlannerQueryLevel;	/* level of current query */
38
List	   *PlannerInitPlan;	/* init subplans for current query */
39
List	   *PlannerParamList;	/* to keep track of cross-level Params */
40 41

int			PlannerPlanId = 0;	/* to assign unique ID to subquery plans */
42

43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
/*
 * PlannerParamList keeps track of the PARAM_EXEC slots that we have decided
 * we need for the query.  At runtime these slots are used to pass values
 * either down into subqueries (for outer references in subqueries) or up out
 * of subqueries (for the results of a subplan).  The n'th entry in the list
 * (n counts from 0) corresponds to Param->paramid = n.
 *
 * Each ParamList item shows the absolute query level it is associated with,
 * where the outermost query is level 1 and nested subqueries have higher
 * numbers.  The item the parameter slot represents can be one of three kinds:
 *
 * A Var: the slot represents a variable of that level that must be passed
 * down because subqueries have outer references to it.  The varlevelsup
 * value in the Var will always be zero.
 *
 * An Aggref (with an expression tree representing its argument): the slot
 * represents an aggregate expression that is an outer reference for some
 * subquery.  The Aggref itself has agglevelsup = 0, and its argument tree
 * is adjusted to match in level.
62
 *
63 64 65 66 67 68
 * A Param: the slot holds the result of a subplan (it is a setParam item
 * for that subplan).  The absolute level shown for such items corresponds
 * to the parent query of the subplan.
 *
 * Note: we detect duplicate Var parameters and coalesce them into one slot,
 * but we do not do this for Aggref or Param slots.
69
 */
70 71 72 73
typedef struct PlannerParamItem
{
	Node	   *item;			/* the Var, Aggref, or Param */
	Index		abslevel;		/* its absolute query level */
74
} PlannerParamItem;
75 76


77
typedef struct finalize_primnode_context
78
{
B
Bruce Momjian 已提交
79 80
	Bitmapset  *paramids;		/* Set of PARAM_EXEC paramids found */
	Bitmapset  *outer_params;	/* Set of accessible outer paramids */
81
} finalize_primnode_context;
82 83


84
static List *convert_sublink_opers(List *lefthand, List *operOids,
B
Bruce Momjian 已提交
85 86
					  List *targetlist, int rtindex,
					  List **righthandIds);
87
static bool subplan_is_hashable(SubLink *slink, SubPlan *node);
88
static Node *replace_correlation_vars_mutator(Node *node, void *context);
89
static Node *process_sublinks_mutator(Node *node, bool *isTopQual);
90
static Bitmapset *finalize_plan(Plan *plan, List *rtable,
91 92 93
			  Bitmapset *outer_params,
			  Bitmapset *valid_params);
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
94 95


96 97 98 99
/*
 * Generate a Param node to replace the given Var,
 * which is expected to have varlevelsup > 0 (ie, it is not local).
 */
100
static Param *
101
replace_outer_var(Var *var)
102
{
103
	Param	   *retval;
104
	ListCell   *ppl;
105 106
	PlannerParamItem *pitem;
	Index		abslevel;
107 108
	int			i;

109
	Assert(var->varlevelsup > 0 && var->varlevelsup < PlannerQueryLevel);
110
	abslevel = PlannerQueryLevel - var->varlevelsup;
111

112
	/*
113
	 * If there's already a PlannerParamList entry for this same Var, just
B
Bruce Momjian 已提交
114
	 * use it.	NOTE: in sufficiently complex querytrees, it is possible
115
	 * for the same varno/abslevel to refer to different RTEs in different
B
Bruce Momjian 已提交
116 117 118 119
	 * parts of the parsetree, so that different fields might end up
	 * sharing the same Param number.  As long as we check the vartype as
	 * well, I believe that this sort of aliasing will cause no trouble.
	 * The correct field should get stored into the Param slot at
120
	 * execution in each part of the tree.
121
	 *
B
Bruce Momjian 已提交
122 123 124 125
	 * We also need to demand a match on vartypmod.  This does not matter for
	 * the Param itself, since those are not typmod-dependent, but it does
	 * matter when make_subplan() instantiates a modified copy of the Var
	 * for a subplan's args list.
126 127
	 */
	i = 0;
128
	foreach(ppl, PlannerParamList)
129
	{
130 131 132
		pitem = (PlannerParamItem *) lfirst(ppl);
		if (pitem->abslevel == abslevel && IsA(pitem->item, Var))
		{
B
Bruce Momjian 已提交
133
			Var		   *pvar = (Var *) pitem->item;
134

135 136
			if (pvar->varno == var->varno &&
				pvar->varattno == var->varattno &&
137 138
				pvar->vartype == var->vartype &&
				pvar->vartypmod == var->vartypmod)
139 140
				break;
		}
141
		i++;
142
	}
143

144
	if (!ppl)
145 146
	{
		/* Nope, so make a new one */
147 148 149 150 151 152 153 154 155
		var = (Var *) copyObject(var);
		var->varlevelsup = 0;

		pitem = (PlannerParamItem *) palloc(sizeof(PlannerParamItem));
		pitem->item = (Node *) var;
		pitem->abslevel = abslevel;

		PlannerParamList = lappend(PlannerParamList, pitem);
		/* i is already the correct index for the new item */
156
	}
157

158 159 160 161
	retval = makeNode(Param);
	retval->paramkind = PARAM_EXEC;
	retval->paramid = (AttrNumber) i;
	retval->paramtype = var->vartype;
162

163
	return retval;
164 165
}

166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
/*
 * Generate a Param node to replace the given Aggref
 * which is expected to have agglevelsup > 0 (ie, it is not local).
 */
static Param *
replace_outer_agg(Aggref *agg)
{
	Param	   *retval;
	PlannerParamItem *pitem;
	Index		abslevel;
	int			i;

	Assert(agg->agglevelsup > 0 && agg->agglevelsup < PlannerQueryLevel);
	abslevel = PlannerQueryLevel - agg->agglevelsup;

	/*
	 * It does not seem worthwhile to try to match duplicate outer aggs.
	 * Just make a new slot every time.
	 */
	agg = (Aggref *) copyObject(agg);
B
Bruce Momjian 已提交
186
	IncrementVarSublevelsUp((Node *) agg, -((int) agg->agglevelsup), 0);
187 188 189 190 191 192 193
	Assert(agg->agglevelsup == 0);

	pitem = (PlannerParamItem *) palloc(sizeof(PlannerParamItem));
	pitem->item = (Node *) agg;
	pitem->abslevel = abslevel;

	PlannerParamList = lappend(PlannerParamList, pitem);
194
	i = list_length(PlannerParamList) - 1;
195 196 197 198 199 200 201 202 203

	retval = makeNode(Param);
	retval->paramkind = PARAM_EXEC;
	retval->paramid = (AttrNumber) i;
	retval->paramtype = agg->aggtype;

	return retval;
}

204 205
/*
 * Generate a new Param node that will not conflict with any other.
206 207 208 209
 *
 * This is used to allocate PARAM_EXEC slots for subplan outputs.
 *
 * paramtypmod is currently unused but might be wanted someday.
210 211 212 213
 */
static Param *
generate_new_param(Oid paramtype, int32 paramtypmod)
{
214 215
	Param	   *retval;
	PlannerParamItem *pitem;
216

217
	retval = makeNode(Param);
218
	retval->paramkind = PARAM_EXEC;
219
	retval->paramid = (AttrNumber) list_length(PlannerParamList);
220 221
	retval->paramtype = paramtype;

222 223 224 225 226 227
	pitem = (PlannerParamItem *) palloc(sizeof(PlannerParamItem));
	pitem->item = (Node *) retval;
	pitem->abslevel = PlannerQueryLevel;

	PlannerParamList = lappend(PlannerParamList, pitem);

228 229 230
	return retval;
}

231
/*
232 233 234
 * Convert a bare SubLink (as created by the parser) into a SubPlan.
 *
 * We are given the raw SubLink and the already-processed lefthand argument
235 236
 * list (use this instead of the SubLink's own field).  We are also told if
 * this expression appears at top level of a WHERE/HAVING qual.
237 238 239 240 241 242
 *
 * The result is whatever we need to substitute in place of the SubLink
 * node in the executable expression.  This will be either the SubPlan
 * node (if we have to do the subplan as a subplan), or a Param node
 * representing the result of an InitPlan, or possibly an AND or OR tree
 * containing InitPlan Param nodes.
243
 */
244
static Node *
245
make_subplan(SubLink *slink, List *lefthand, bool isTopQual)
246
{
B
Bruce Momjian 已提交
247
	SubPlan    *node = makeNode(SubPlan);
248
	Query	   *subquery = (Query *) (slink->subselect);
249
	double		tuple_fraction;
250
	Plan	   *plan;
251 252
	Bitmapset  *tmpset;
	int			paramid;
253
	Node	   *result;
254

255
	/*
B
Bruce Momjian 已提交
256 257 258 259
	 * Copy the source Query node.	This is a quick and dirty kluge to
	 * resolve the fact that the parser can generate trees with multiple
	 * links to the same sub-Query node, but the planner wants to scribble
	 * on the Query. Try to clean this up when we do querytree redesign...
260 261 262
	 */
	subquery = (Query *) copyObject(subquery);

263
	/*
264 265 266 267 268 269 270
	 * For an EXISTS subplan, tell lower-level planner to expect that only
	 * the first tuple will be retrieved.  For ALL and ANY subplans, we
	 * will be able to stop evaluating if the test condition fails, so
	 * very often not all the tuples will be retrieved; for lack of a
	 * better idea, specify 50% retrieval.	For EXPR and MULTIEXPR
	 * subplans, use default behavior (we're only expecting one row out,
	 * anyway).
271
	 *
272 273 274
	 * NOTE: if you change these numbers, also change cost_qual_eval_walker()
	 * in path/costsize.c.
	 *
275
	 * XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or
B
Bruce Momjian 已提交
276 277
	 * materialize its result below.  In that case it would've been better
	 * to specify full retrieval.  At present, however, we can only detect
278 279 280 281 282
	 * correlation or lack of it after we've made the subplan :-(. Perhaps
	 * detection of correlation should be done as a separate step.
	 * Meanwhile, we don't want to be too optimistic about the percentage
	 * of tuples retrieved, for fear of selecting a plan that's bad for
	 * the materialization case.
283 284 285
	 */
	if (slink->subLinkType == EXISTS_SUBLINK)
		tuple_fraction = 1.0;	/* just like a LIMIT 1 */
286 287
	else if (slink->subLinkType == ALL_SUBLINK ||
			 slink->subLinkType == ANY_SUBLINK)
288
		tuple_fraction = 0.5;	/* 50% */
289
	else
290
		tuple_fraction = 0.0;	/* default behavior */
291

292
	/*
293
	 * Generate the plan for the subquery.
294
	 */
295
	node->plan = plan = subquery_planner(subquery, tuple_fraction);
296

B
Bruce Momjian 已提交
297
	node->plan_id = PlannerPlanId++;	/* Assign unique ID to this
298
										 * SubPlan */
299

300
	node->rtable = subquery->rtable;
301

302
	/*
303
	 * Initialize other fields of the SubPlan node.
304 305
	 */
	node->subLinkType = slink->subLinkType;
306
	node->useOr = slink->useOr;
307 308 309
	node->exprs = NIL;
	node->paramIds = NIL;
	node->useHashTable = false;
310 311
	/* At top level of a qual, can treat UNKNOWN the same as FALSE */
	node->unknownEqFalse = isTopQual;
312 313 314
	node->setParam = NIL;
	node->parParam = NIL;
	node->args = NIL;
315

316
	/*
B
Bruce Momjian 已提交
317 318
	 * Make parParam list of params that current query level will pass to
	 * this child plan.
319
	 */
320 321
	tmpset = bms_copy(plan->extParam);
	while ((paramid = bms_first_member(tmpset)) >= 0)
322
	{
323
		PlannerParamItem *pitem = list_nth(PlannerParamList, paramid);
324

325
		if (pitem->abslevel == PlannerQueryLevel)
326
			node->parParam = lappend_int(node->parParam, paramid);
327
	}
328
	bms_free(tmpset);
329 330

	/*
B
Bruce Momjian 已提交
331 332 333 334 335 336 337
	 * Un-correlated or undirect correlated plans of EXISTS, EXPR, ARRAY,
	 * or MULTIEXPR types can be used as initPlans.  For EXISTS, EXPR, or
	 * ARRAY, we just produce a Param referring to the result of
	 * evaluating the initPlan.  For MULTIEXPR, we must build an AND or
	 * OR-clause of the individual comparison operators, using the
	 * appropriate lefthand side expressions and Params for the initPlan's
	 * target items.
338
	 */
339 340
	if (node->parParam == NIL && slink->subLinkType == EXISTS_SUBLINK)
	{
341
		Param	   *prm;
342

343
		prm = generate_new_param(BOOLOID, -1);
344
		node->setParam = list_make1_int(prm->paramid);
345 346 347 348 349
		PlannerInitPlan = lappend(PlannerInitPlan, node);
		result = (Node *) prm;
	}
	else if (node->parParam == NIL && slink->subLinkType == EXPR_SUBLINK)
	{
350
		TargetEntry *te = linitial(plan->targetlist);
351
		Param	   *prm;
352

353 354 355
		Assert(!te->resjunk);
		prm = generate_new_param(exprType((Node *) te->expr),
								 exprTypmod((Node *) te->expr));
356
		node->setParam = list_make1_int(prm->paramid);
357 358 359
		PlannerInitPlan = lappend(PlannerInitPlan, node);
		result = (Node *) prm;
	}
360 361
	else if (node->parParam == NIL && slink->subLinkType == ARRAY_SUBLINK)
	{
362
		TargetEntry *te = linitial(plan->targetlist);
363 364 365
		Oid			arraytype;
		Param	   *prm;

366 367
		Assert(!te->resjunk);
		arraytype = get_array_type(exprType((Node *) te->expr));
368
		if (!OidIsValid(arraytype))
369
			elog(ERROR, "could not find array type for datatype %s",
370
				 format_type_be(exprType((Node *) te->expr)));
371
		prm = generate_new_param(arraytype, -1);
372
		node->setParam = list_make1_int(prm->paramid);
373 374 375
		PlannerInitPlan = lappend(PlannerInitPlan, node);
		result = (Node *) prm;
	}
376
	else if (node->parParam == NIL && slink->subLinkType == MULTIEXPR_SUBLINK)
377
	{
B
Bruce Momjian 已提交
378
		List	   *exprs;
379 380 381 382 383

		/* Convert the lefthand exprs and oper OIDs into executable exprs */
		exprs = convert_sublink_opers(lefthand,
									  slink->operOids,
									  plan->targetlist,
B
Bruce Momjian 已提交
384
									  0,
385
									  &node->paramIds);
386
		node->setParam = list_copy(node->paramIds);
387
		PlannerInitPlan = lappend(PlannerInitPlan, node);
B
Bruce Momjian 已提交
388

389 390 391 392 393
		/*
		 * The executable expressions are returned to become part of the
		 * outer plan's expression tree; they are not kept in the initplan
		 * node.
		 */
394
		if (list_length(exprs) > 1)
395 396
			result = (Node *) (node->useOr ? make_orclause(exprs) :
							   make_andclause(exprs));
397
		else
398
			result = (Node *) linitial(exprs);
399
	}
400
	else
401
	{
402
		List	   *args;
403
		ListCell   *l;
404

405
		/*
406 407 408
		 * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types
		 * to initPlans, even when they are uncorrelated or undirect
		 * correlated, because we need to scan the output of the subplan
409 410 411 412 413
		 * for each outer tuple.  But if it's an IN (= ANY) test, we might
		 * be able to use a hashtable to avoid comparing all the tuples.
		 */
		if (subplan_is_hashable(slink, node))
			node->useHashTable = true;
B
Bruce Momjian 已提交
414

415
		/*
B
Bruce Momjian 已提交
416 417 418 419 420 421 422 423
		 * Otherwise, we have the option to tack a MATERIAL node onto the
		 * top of the subplan, to reduce the cost of reading it
		 * repeatedly.	This is pointless for a direct-correlated subplan,
		 * since we'd have to recompute its results each time anyway.  For
		 * uncorrelated/undirect correlated subplans, we add MATERIAL if
		 * the subplan's top plan node is anything more complicated than a
		 * plain sequential scan, and we do it even for seqscan if the
		 * qual appears selective enough to eliminate many tuples.
424
		 */
425
		else if (node->parParam == NIL)
426 427 428 429 430 431
		{
			bool		use_material;

			switch (nodeTag(plan))
			{
				case T_SeqScan:
432
					if (plan->initPlan)
433 434 435 436 437 438 439
						use_material = true;
					else
					{
						Selectivity qualsel;

						qualsel = clauselist_selectivity(subquery,
														 plan->qual,
440
														 0, JOIN_INNER);
441 442 443
						/* Is 10% selectivity a good threshold?? */
						use_material = qualsel < 0.10;
					}
444 445
					break;
				case T_Material:
446
				case T_FunctionScan:
447
				case T_Sort:
448 449 450

					/*
					 * Don't add another Material node if there's one
451 452
					 * already, nor if the top node is any other type that
					 * materializes its output anyway.
453 454 455 456 457 458 459 460
					 */
					use_material = false;
					break;
				default:
					use_material = true;
					break;
			}
			if (use_material)
461
				node->plan = plan = materialize_finished_plan(plan);
462 463
		}

464 465 466 467
		/* Convert the lefthand exprs and oper OIDs into executable exprs */
		node->exprs = convert_sublink_opers(lefthand,
											slink->operOids,
											plan->targetlist,
B
Bruce Momjian 已提交
468
											0,
469
											&node->paramIds);
470

471
		/*
472
		 * Make node->args from parParam.
473
		 */
474
		args = NIL;
475
		foreach(l, node->parParam)
476
		{
477
			PlannerParamItem *pitem = list_nth(PlannerParamList, lfirst_int(l));
478 479

			/*
480
			 * The Var or Aggref has already been adjusted to have the
B
Bruce Momjian 已提交
481
			 * correct varlevelsup or agglevelsup.	We probably don't even
482
			 * need to copy it again, but be safe.
483
			 */
484
			args = lappend(args, copyObject(pitem->item));
485
		}
486
		node->args = args;
487

488
		result = (Node *) node;
489 490 491 492 493 494
	}

	return result;
}

/*
495
 * convert_sublink_opers: given a lefthand-expressions list and a list of
B
Bruce Momjian 已提交
496
 * operator OIDs, build a list of actually executable expressions.	The
497 498
 * righthand sides of the expressions are Params or Vars representing the
 * results of the sub-select.
499
 *
500 501 502
 * If rtindex is 0, we build Params to represent the sub-select outputs.
 * The paramids of the Params created are returned in the *righthandIds list.
 *
503 504
 * If rtindex is not 0, we build Vars using that rtindex as varno.	Copies
 * of the Var nodes are returned in *righthandIds (this is a bit of a type
505
 * cheat, but we can get away with it).
506
 */
507
static List *
508
convert_sublink_opers(List *lefthand, List *operOids,
509
					  List *targetlist, int rtindex,
B
Bruce Momjian 已提交
510
					  List **righthandIds)
511
{
512
	List	   *result = NIL;
B
Bruce Momjian 已提交
513 514 515
	ListCell   *l,
			   *lefthand_item,
			   *tlist_item;
516

517
	*righthandIds = NIL;
518 519
	lefthand_item = list_head(lefthand);
	tlist_item = list_head(targetlist);
520

521
	foreach(l, operOids)
522
	{
523
		Oid			opid = lfirst_oid(l);
524 525
		Node	   *leftop = (Node *) lfirst(lefthand_item);
		TargetEntry *te = (TargetEntry *) lfirst(tlist_item);
526
		Node	   *rightop;
527 528
		Operator	tup;

529
		Assert(!te->resjunk);
530

531 532 533 534
		if (rtindex)
		{
			/* Make the Var node representing the subplan's result */
			rightop = (Node *) makeVar(rtindex,
535 536 537
									   te->resno,
									   exprType((Node *) te->expr),
									   exprTypmod((Node *) te->expr),
538
									   0);
B
Bruce Momjian 已提交
539

540
			/*
B
Bruce Momjian 已提交
541
			 * Copy it for caller.	NB: we need a copy to avoid having
542 543 544
			 * doubly-linked substructure in the modified parse tree.
			 */
			*righthandIds = lappend(*righthandIds, copyObject(rightop));
545 546 547 548 549 550
		}
		else
		{
			/* Make the Param node representing the subplan's result */
			Param	   *prm;

551 552
			prm = generate_new_param(exprType((Node *) te->expr),
									 exprTypmod((Node *) te->expr));
553
			/* Record its ID */
554
			*righthandIds = lappend_int(*righthandIds, prm->paramid);
555 556
			rightop = (Node *) prm;
		}
557

558
		/* Look up the operator to pass to make_op_expr */
559
		tup = SearchSysCache(OPEROID,
560
							 ObjectIdGetDatum(opid),
561 562
							 0, 0, 0);
		if (!HeapTupleIsValid(tup))
563
			elog(ERROR, "cache lookup failed for operator %u", opid);
564

565
		/*
566 567
		 * Make the expression node.
		 *
B
Bruce Momjian 已提交
568 569 570 571
		 * Note: we use make_op_expr in case runtime type conversion function
		 * calls must be inserted for this operator!  (But we are not
		 * expecting to have to resolve unknown Params, so it's okay to
		 * pass a null pstate.)
572
		 */
B
Bruce Momjian 已提交
573 574 575 576 577 578
		result = lappend(result,
						 make_op_expr(NULL,
									  tup,
									  leftop,
									  rightop,
									  exprType(leftop),
579
									  exprType((Node *) te->expr)));
580

581 582
		ReleaseSysCache(tup);

583 584
		lefthand_item = lnext(lefthand_item);
		tlist_item = lnext(tlist_item);
585
	}
586

587 588 589 590 591 592 593 594 595 596 597 598 599
	return result;
}

/*
 * subplan_is_hashable: decide whether we can implement a subplan by hashing
 *
 * Caution: the SubPlan node is not completely filled in yet.  We can rely
 * on its plan and parParam fields, however.
 */
static bool
subplan_is_hashable(SubLink *slink, SubPlan *node)
{
	double		subquery_size;
600
	ListCell   *l;
601 602

	/*
B
Bruce Momjian 已提交
603 604 605
	 * The sublink type must be "= ANY" --- that is, an IN operator. (We
	 * require the operator name to be unqualified, which may be overly
	 * paranoid, or may not be.)  XXX since we also check that the
606 607 608 609
	 * operators are hashable, the test on operator name may be redundant?
	 */
	if (slink->subLinkType != ANY_SUBLINK)
		return false;
610
	if (list_length(slink->operName) != 1 ||
611
		strcmp(strVal(linitial(slink->operName)), "=") != 0)
612
		return false;
B
Bruce Momjian 已提交
613

614 615
	/*
	 * The subplan must not have any direct correlation vars --- else we'd
B
Bruce Momjian 已提交
616 617
	 * have to recompute its output each time, so that the hashtable
	 * wouldn't gain anything.
618 619 620
	 */
	if (node->parParam != NIL)
		return false;
B
Bruce Momjian 已提交
621

622
	/*
B
Bruce Momjian 已提交
623 624
	 * The estimated size of the subquery result must fit in work_mem.
	 * (XXX what about hashtable overhead?)
625 626 627
	 */
	subquery_size = node->plan->plan_rows *
		(MAXALIGN(node->plan->plan_width) + MAXALIGN(sizeof(HeapTupleData)));
628
	if (subquery_size > work_mem * 1024L)
629
		return false;
B
Bruce Momjian 已提交
630

631
	/*
B
Bruce Momjian 已提交
632 633 634 635 636 637 638 639 640 641 642 643
	 * The combining operators must be hashable, strict, and
	 * self-commutative. The need for hashability is obvious, since we
	 * want to use hashing. Without strictness, behavior in the presence
	 * of nulls is too unpredictable.  (We actually must assume even more
	 * than plain strictness, see nodeSubplan.c for details.)  And
	 * commutativity ensures that the left and right datatypes are the
	 * same; this allows us to assume that the combining operators are
	 * equality for the righthand datatype, so that they can be used to
	 * compare righthand tuples as well as comparing lefthand to righthand
	 * tuples.	(This last restriction could be relaxed by using two
	 * different sets of operators with the hash table, but there is no
	 * obvious usefulness to that at present.)
644
	 */
645
	foreach(l, slink->operOids)
646
	{
647
		Oid			opid = lfirst_oid(l);
648 649 650 651 652 653 654 655 656
		HeapTuple	tup;
		Form_pg_operator optup;

		tup = SearchSysCache(OPEROID,
							 ObjectIdGetDatum(opid),
							 0, 0, 0);
		if (!HeapTupleIsValid(tup))
			elog(ERROR, "cache lookup failed for operator %u", opid);
		optup = (Form_pg_operator) GETSTRUCT(tup);
657 658
		if (!optup->oprcanhash || optup->oprcom != opid ||
			!func_strict(optup->oprcode))
659 660 661 662 663 664 665
		{
			ReleaseSysCache(tup);
			return false;
		}
		ReleaseSysCache(tup);
	}
	return true;
666 667
}

668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683
/*
 * convert_IN_to_join: can we convert an IN SubLink to join style?
 *
 * The caller has found a SubLink at the top level of WHERE, but has not
 * checked the properties of the SubLink at all.  Decide whether it is
 * appropriate to process this SubLink in join style.  If not, return NULL.
 * If so, build the qual clause(s) to replace the SubLink, and return them.
 *
 * Side effects of a successful conversion include adding the SubLink's
 * subselect to the query's rangetable and adding an InClauseInfo node to
 * its in_info_list.
 */
Node *
convert_IN_to_join(Query *parse, SubLink *sublink)
{
	Query	   *subselect = (Query *) sublink->subselect;
684
	Relids		left_varnos;
685 686 687
	int			rtindex;
	RangeTblEntry *rte;
	RangeTblRef *rtr;
B
Bruce Momjian 已提交
688
	InClauseInfo *ininfo;
689 690 691
	List	   *exprs;

	/*
B
Bruce Momjian 已提交
692 693 694
	 * The sublink type must be "= ANY" --- that is, an IN operator. (We
	 * require the operator name to be unqualified, which may be overly
	 * paranoid, or may not be.)
695 696 697
	 */
	if (sublink->subLinkType != ANY_SUBLINK)
		return NULL;
698
	if (list_length(sublink->operName) != 1 ||
699
		strcmp(strVal(linitial(sublink->operName)), "=") != 0)
700
		return NULL;
B
Bruce Momjian 已提交
701

702 703 704 705 706 707
	/*
	 * The sub-select must not refer to any Vars of the parent query.
	 * (Vars of higher levels should be okay, though.)
	 */
	if (contain_vars_of_level((Node *) subselect, 1))
		return NULL;
B
Bruce Momjian 已提交
708

709 710 711 712 713
	/*
	 * The left-hand expressions must contain some Vars of the current
	 * query, else it's not gonna be a join.
	 */
	left_varnos = pull_varnos((Node *) sublink->lefthand);
714
	if (bms_is_empty(left_varnos))
715
		return NULL;
B
Bruce Momjian 已提交
716

717 718 719 720 721 722 723
	/*
	 * The left-hand expressions mustn't be volatile.  (Perhaps we should
	 * test the combining operators, too?  We'd only need to point the
	 * function directly at the sublink ...)
	 */
	if (contain_volatile_functions((Node *) sublink->lefthand))
		return NULL;
B
Bruce Momjian 已提交
724

725 726 727 728 729
	/*
	 * Okay, pull up the sub-select into top range table and jointree.
	 *
	 * We rely here on the assumption that the outer query has no references
	 * to the inner (necessarily true, other than the Vars that we build
B
Bruce Momjian 已提交
730 731
	 * below).	Therefore this is a lot easier than what
	 * pull_up_subqueries has to go through.
732 733 734 735 736 737
	 */
	rte = addRangeTableEntryForSubquery(NULL,
										subselect,
										makeAlias("IN_subquery", NIL),
										false);
	parse->rtable = lappend(parse->rtable, rte);
738
	rtindex = list_length(parse->rtable);
739 740 741
	rtr = makeNode(RangeTblRef);
	rtr->rtindex = rtindex;
	parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr);
B
Bruce Momjian 已提交
742

743 744 745 746 747
	/*
	 * Now build the InClauseInfo node.
	 */
	ininfo = makeNode(InClauseInfo);
	ininfo->lefthand = left_varnos;
748
	ininfo->righthand = bms_make_singleton(rtindex);
749
	parse->in_info_list = lcons(ininfo, parse->in_info_list);
B
Bruce Momjian 已提交
750

751 752
	/*
	 * Build the result qual expressions.  As a side effect,
B
Bruce Momjian 已提交
753 754
	 * ininfo->sub_targetlist is filled with a list of Vars representing
	 * the subselect outputs.
755 756 757 758
	 */
	exprs = convert_sublink_opers(sublink->lefthand,
								  sublink->operOids,
								  subselect->targetList,
B
Bruce Momjian 已提交
759
								  rtindex,
760 761 762 763
								  &ininfo->sub_targetlist);
	return (Node *) make_ands_explicit(exprs);
}

764 765
/*
 * Replace correlation vars (uplevel vars) with Params.
766 767 768 769 770 771 772
 *
 * Uplevel aggregates are replaced, too.
 *
 * Note: it is critical that this runs immediately after SS_process_sublinks.
 * Since we do not recurse into the arguments of uplevel aggregates, they will
 * get copied to the appropriate subplan args list in the parent query with
 * uplevel vars not replaced by Params, but only adjusted in level (see
B
Bruce Momjian 已提交
773
 * replace_outer_agg).	That's exactly what we want for the vars of the parent
774
 * level --- but if an aggregate's argument contains any further-up variables,
B
Bruce Momjian 已提交
775
 * they have to be replaced with Params in their turn.	That will happen when
776 777 778 779
 * the parent level runs SS_replace_correlation_vars.  Therefore it must do
 * so after expanding its sublinks to subplans.  And we don't want any steps
 * in between, else those steps would never get applied to the aggregate
 * argument expressions, either in the parent or the child level.
780
 */
781
Node *
782
SS_replace_correlation_vars(Node *expr)
783
{
784 785 786
	/* No setup needed for tree walk, so away we go */
	return replace_correlation_vars_mutator(expr, NULL);
}
787

788 789 790 791 792 793
static Node *
replace_correlation_vars_mutator(Node *node, void *context)
{
	if (node == NULL)
		return NULL;
	if (IsA(node, Var))
794
	{
795
		if (((Var *) node)->varlevelsup > 0)
796 797 798 799 800 801
			return (Node *) replace_outer_var((Var *) node);
	}
	if (IsA(node, Aggref))
	{
		if (((Aggref *) node)->agglevelsup > 0)
			return (Node *) replace_outer_agg((Aggref *) node);
802
	}
803 804 805
	return expression_tree_mutator(node,
								   replace_correlation_vars_mutator,
								   context);
806 807
}

808 809
/*
 * Expand SubLinks to SubPlans in the given expression.
810 811 812 813
 *
 * The isQual argument tells whether or not this expression is a WHERE/HAVING
 * qualifier expression.  If it is, any sublinks appearing at top level need
 * not distinguish FALSE from UNKNOWN return values.
814
 */
815
Node *
816
SS_process_sublinks(Node *expr, bool isQual)
817
{
818 819
	/* The only context needed is the initial are-we-in-a-qual flag */
	return process_sublinks_mutator(expr, &isQual);
820 821 822
}

static Node *
823
process_sublinks_mutator(Node *node, bool *isTopQual)
824
{
B
Bruce Momjian 已提交
825
	bool		locTopQual;
826

827
	if (node == NULL)
828
		return NULL;
829
	if (IsA(node, SubLink))
830
	{
831
		SubLink    *sublink = (SubLink *) node;
832
		List	   *lefthand;
833

834
		/*
B
Bruce Momjian 已提交
835 836
		 * First, recursively process the lefthand-side expressions, if
		 * any.
837
		 */
838
		locTopQual = false;
839
		lefthand = (List *)
840
			process_sublinks_mutator((Node *) sublink->lefthand, &locTopQual);
B
Bruce Momjian 已提交
841

842 843 844
		/*
		 * Now build the SubPlan node and make the expr to return.
		 */
845
		return make_subplan(sublink, lefthand, *isTopQual);
846
	}
847

848
	/*
B
Bruce Momjian 已提交
849 850 851
	 * We should never see a SubPlan expression in the input (since this
	 * is the very routine that creates 'em to begin with).  We shouldn't
	 * find ourselves invoked directly on a Query, either.
852
	 */
853
	Assert(!is_subplan(node));
854
	Assert(!IsA(node, Query));
855

856
	/*
857
	 * Because make_subplan() could return an AND or OR clause, we have to
B
Bruce Momjian 已提交
858 859
	 * take steps to preserve AND/OR flatness of a qual.  We assume the
	 * input has been AND/OR flattened and so we need no recursion here.
860
	 *
B
Bruce Momjian 已提交
861 862 863 864
	 * If we recurse down through anything other than an AND node, we are
	 * definitely not at top qual level anymore.  (Due to the coding here,
	 * we will not get called on the List subnodes of an AND, so no check
	 * is needed for List.)
865
	 */
866 867
	if (and_clause(node))
	{
B
Bruce Momjian 已提交
868 869
		List	   *newargs = NIL;
		ListCell   *l;
870 871

		/* Still at qual top-level */
872
		locTopQual = *isTopQual;
873 874 875

		foreach(l, ((BoolExpr *) node)->args)
		{
B
Bruce Momjian 已提交
876
			Node	   *newarg;
877 878 879 880

			newarg = process_sublinks_mutator(lfirst(l),
											  (void *) &locTopQual);
			if (and_clause(newarg))
881
				newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
882 883 884 885 886 887 888 889 890 891 892
			else
				newargs = lappend(newargs, newarg);
		}
		return (Node *) make_andclause(newargs);
	}

	/* otherwise not at qual top-level */
	locTopQual = false;

	if (or_clause(node))
	{
B
Bruce Momjian 已提交
893 894
		List	   *newargs = NIL;
		ListCell   *l;
895 896 897

		foreach(l, ((BoolExpr *) node)->args)
		{
B
Bruce Momjian 已提交
898
			Node	   *newarg;
899 900 901 902

			newarg = process_sublinks_mutator(lfirst(l),
											  (void *) &locTopQual);
			if (or_clause(newarg))
903
				newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
904 905 906 907 908
			else
				newargs = lappend(newargs, newarg);
		}
		return (Node *) make_orclause(newargs);
	}
909

910 911
	return expression_tree_mutator(node,
								   process_sublinks_mutator,
912
								   (void *) &locTopQual);
913 914
}

915 916 917
/*
 * SS_finalize_plan - do final sublink processing for a completed Plan.
 *
918 919
 * This recursively computes the extParam and allParam sets
 * for every Plan node in the given plan tree.
920
 */
921
void
922
SS_finalize_plan(Plan *plan, List *rtable)
923
{
924 925 926
	Bitmapset  *outer_params = NULL;
	Bitmapset  *valid_params = NULL;
	int			paramid;
927
	ListCell   *l;
928 929

	/*
B
Bruce Momjian 已提交
930 931 932
	 * First, scan the param list to discover the sets of params that are
	 * available from outer query levels and my own query level. We do
	 * this once to save time in the per-plan recursion steps.
933 934
	 */
	paramid = 0;
935
	foreach(l, PlannerParamList)
936
	{
937
		PlannerParamItem *pitem = (PlannerParamItem *) lfirst(l);
938

939
		if (pitem->abslevel < PlannerQueryLevel)
940 941 942 943 944
		{
			/* valid outer-level parameter */
			outer_params = bms_add_member(outer_params, paramid);
			valid_params = bms_add_member(valid_params, paramid);
		}
945 946
		else if (pitem->abslevel == PlannerQueryLevel &&
				 IsA(pitem->item, Param))
947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971
		{
			/* valid local parameter (i.e., a setParam of my child) */
			valid_params = bms_add_member(valid_params, paramid);
		}

		paramid++;
	}

	/*
	 * Now recurse through plan tree.
	 */
	(void) finalize_plan(plan, rtable, outer_params, valid_params);

	bms_free(outer_params);
	bms_free(valid_params);
}

/*
 * Recursive processing of all nodes in the plan tree
 *
 * The return value is the computed allParam set for the given Plan node.
 * This is just an internal notational convenience.
 */
static Bitmapset *
finalize_plan(Plan *plan, List *rtable,
972
			  Bitmapset *outer_params, Bitmapset *valid_params)
973 974
{
	finalize_primnode_context context;
975 976

	if (plan == NULL)
977
		return NULL;
978

979 980
	context.paramids = NULL;	/* initialize set to empty */
	context.outer_params = outer_params;
981

982
	/*
983
	 * When we call finalize_primnode, context.paramids sets are
984
	 * automatically merged together.  But when recursing to self, we have
985
	 * to do it the hard way.  We want the paramids set to include params
986
	 * in subplans as well as at this level.
987 988
	 */

989
	/* Find params in targetlist and qual */
990 991
	finalize_primnode((Node *) plan->targetlist, &context);
	finalize_primnode((Node *) plan->qual, &context);
992

993
	/* Check additional node-type-specific fields */
994 995 996
	switch (nodeTag(plan))
	{
		case T_Result:
997
			finalize_primnode(((Result *) plan)->resconstantqual,
998
							  &context);
999 1000
			break;

1001 1002
		case T_IndexScan:
			finalize_primnode((Node *) ((IndexScan *) plan)->indxqual,
1003
							  &context);
1004 1005 1006

			/*
			 * we need not look at indxqualorig, since it will have the
1007
			 * same param references as indxqual.
1008 1009 1010 1011 1012
			 */
			break;

		case T_TidScan:
			finalize_primnode((Node *) ((TidScan *) plan)->tideval,
1013
							  &context);
1014
			break;
1015

1016
		case T_SubqueryScan:
B
Bruce Momjian 已提交
1017

1018
			/*
B
Bruce Momjian 已提交
1019 1020 1021 1022 1023
			 * In a SubqueryScan, SS_finalize_plan has already been run on
			 * the subplan by the inner invocation of subquery_planner, so
			 * there's no need to do it again.  Instead, just pull out the
			 * subplan's extParams list, which represents the params it
			 * needs from my level and higher levels.
1024
			 */
1025
			context.paramids = bms_add_members(context.paramids,
B
Bruce Momjian 已提交
1026
							 ((SubqueryScan *) plan)->subplan->extParam);
1027 1028
			break;

1029 1030 1031
		case T_FunctionScan:
			{
				RangeTblEntry *rte;
1032

1033 1034 1035
				rte = rt_fetch(((FunctionScan *) plan)->scan.scanrelid,
							   rtable);
				Assert(rte->rtekind == RTE_FUNCTION);
1036
				finalize_primnode(rte->funcexpr, &context);
1037 1038 1039 1040
			}
			break;

		case T_Append:
1041
			{
B
Bruce Momjian 已提交
1042
				ListCell   *l;
1043 1044 1045 1046 1047 1048 1049 1050 1051 1052

				foreach(l, ((Append *) plan)->appendplans)
				{
					context.paramids =
						bms_add_members(context.paramids,
										finalize_plan((Plan *) lfirst(l),
													  rtable,
													  outer_params,
													  valid_params));
				}
1053
			}
1054 1055
			break;

1056 1057
		case T_NestLoop:
			finalize_primnode((Node *) ((Join *) plan)->joinqual,
1058
							  &context);
1059 1060
			break;

1061
		case T_MergeJoin:
1062
			finalize_primnode((Node *) ((Join *) plan)->joinqual,
1063
							  &context);
1064
			finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses,
1065
							  &context);
1066 1067 1068
			break;

		case T_HashJoin:
1069
			finalize_primnode((Node *) ((Join *) plan)->joinqual,
1070
							  &context);
1071
			finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses,
1072
							  &context);
1073
			break;
1074

1075 1076 1077 1078 1079 1080 1081
		case T_Limit:
			finalize_primnode(((Limit *) plan)->limitOffset,
							  &context);
			finalize_primnode(((Limit *) plan)->limitCount,
							  &context);
			break;

1082 1083 1084 1085 1086 1087
		case T_Hash:
		case T_Agg:
		case T_SeqScan:
		case T_Material:
		case T_Sort:
		case T_Unique:
1088
		case T_SetOp:
1089 1090
		case T_Group:
			break;
1091

1092
		default:
1093 1094
			elog(ERROR, "unrecognized node type: %d",
				 (int) nodeTag(plan));
1095
	}
1096

1097
	/* Process left and right child plans, if any */
1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108
	context.paramids = bms_add_members(context.paramids,
									   finalize_plan(plan->lefttree,
													 rtable,
													 outer_params,
													 valid_params));

	context.paramids = bms_add_members(context.paramids,
									   finalize_plan(plan->righttree,
													 rtable,
													 outer_params,
													 valid_params));
1109

1110
	/* Now we have all the paramids */
1111

1112
	if (!bms_is_subset(context.paramids, valid_params))
1113
		elog(ERROR, "plan should not reference subplan's variable");
1114

1115 1116
	plan->extParam = bms_intersect(context.paramids, outer_params);
	plan->allParam = context.paramids;
1117

1118
	/*
B
Bruce Momjian 已提交
1119 1120
	 * For speed at execution time, make sure extParam/allParam are
	 * actually NULL if they are empty sets.
1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131
	 */
	if (bms_is_empty(plan->extParam))
	{
		bms_free(plan->extParam);
		plan->extParam = NULL;
	}
	if (bms_is_empty(plan->allParam))
	{
		bms_free(plan->allParam);
		plan->allParam = NULL;
	}
1132

1133
	return plan->allParam;
1134
}
1135 1136

/*
1137 1138
 * finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given
 * expression tree to the result set.
1139 1140
 */
static bool
1141
finalize_primnode(Node *node, finalize_primnode_context *context)
1142 1143 1144 1145 1146 1147 1148 1149 1150
{
	if (node == NULL)
		return false;
	if (IsA(node, Param))
	{
		if (((Param *) node)->paramkind == PARAM_EXEC)
		{
			int			paramid = (int) ((Param *) node)->paramid;

1151
			context->paramids = bms_add_member(context->paramids, paramid);
1152 1153 1154 1155 1156
		}
		return false;			/* no more to do here */
	}
	if (is_subplan(node))
	{
B
Bruce Momjian 已提交
1157
		SubPlan    *subplan = (SubPlan *) node;
1158

1159 1160
		/* Add outer-level params needed by the subplan to paramids */
		context->paramids = bms_join(context->paramids,
B
Bruce Momjian 已提交
1161 1162
								   bms_intersect(subplan->plan->extParam,
												 context->outer_params));
1163 1164 1165
		/* fall through to recurse into subplan args */
	}
	return expression_tree_walker(node, finalize_primnode,
1166
								  (void *) context);
1167
}