subselect.c 32.2 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * subselect.c
4 5
 *	  Planning routines for subselects and parameters.
 *
B
Bruce Momjian 已提交
6
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9
 *
 * IDENTIFICATION
B
Bruce Momjian 已提交
10
 *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.78 2003/06/25 21:30:30 momjian Exp $
11 12 13 14 15
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

16
#include "catalog/pg_operator.h"
17
#include "catalog/pg_type.h"
18
#include "miscadmin.h"
19
#include "nodes/makefuncs.h"
20
#include "nodes/params.h"
21
#include "optimizer/clauses.h"
22 23
#include "optimizer/cost.h"
#include "optimizer/planmain.h"
B
Bruce Momjian 已提交
24 25
#include "optimizer/planner.h"
#include "optimizer/subselect.h"
26
#include "optimizer/var.h"
27
#include "parser/parsetree.h"
28 29
#include "parser/parse_expr.h"
#include "parser/parse_oper.h"
30
#include "parser/parse_relation.h"
31
#include "rewrite/rewriteManip.h"
32
#include "utils/builtins.h"
33
#include "utils/lsyscache.h"
34
#include "utils/syscache.h"
35

36

37
Index		PlannerQueryLevel;	/* level of current query */
38
List	   *PlannerInitPlan;	/* init subplans for current query */
39
List	   *PlannerParamList;	/* to keep track of cross-level Params */
40 41

int			PlannerPlanId = 0;	/* to assign unique ID to subquery plans */
42

43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
/*
 * PlannerParamList keeps track of the PARAM_EXEC slots that we have decided
 * we need for the query.  At runtime these slots are used to pass values
 * either down into subqueries (for outer references in subqueries) or up out
 * of subqueries (for the results of a subplan).  The n'th entry in the list
 * (n counts from 0) corresponds to Param->paramid = n.
 *
 * Each ParamList item shows the absolute query level it is associated with,
 * where the outermost query is level 1 and nested subqueries have higher
 * numbers.  The item the parameter slot represents can be one of three kinds:
 *
 * A Var: the slot represents a variable of that level that must be passed
 * down because subqueries have outer references to it.  The varlevelsup
 * value in the Var will always be zero.
 *
 * An Aggref (with an expression tree representing its argument): the slot
 * represents an aggregate expression that is an outer reference for some
 * subquery.  The Aggref itself has agglevelsup = 0, and its argument tree
 * is adjusted to match in level.
62
 *
63 64 65 66 67 68
 * A Param: the slot holds the result of a subplan (it is a setParam item
 * for that subplan).  The absolute level shown for such items corresponds
 * to the parent query of the subplan.
 *
 * Note: we detect duplicate Var parameters and coalesce them into one slot,
 * but we do not do this for Aggref or Param slots.
69
 */
70 71 72 73 74
typedef struct PlannerParamItem
{
	Node	   *item;			/* the Var, Aggref, or Param */
	Index		abslevel;		/* its absolute query level */
} PlannerParamItem;
75 76


77
typedef struct finalize_primnode_context
78
{
79 80 81
	Bitmapset   *paramids;		/* Set of PARAM_EXEC paramids found */
	Bitmapset   *outer_params;	/* Set of accessible outer paramids */
} finalize_primnode_context;
82 83


84
static List *convert_sublink_opers(List *lefthand, List *operOids,
85
								   List *targetlist, int rtindex,
B
Bruce Momjian 已提交
86
								   List **righthandIds);
87
static bool subplan_is_hashable(SubLink *slink, SubPlan *node);
88
static Node *replace_correlation_vars_mutator(Node *node, void *context);
89
static Node *process_sublinks_mutator(Node *node, bool *isTopQual);
90 91 92 93
static Bitmapset *finalize_plan(Plan *plan, List *rtable,
								Bitmapset *outer_params,
								Bitmapset *valid_params);
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
94 95


96 97 98 99
/*
 * Generate a Param node to replace the given Var,
 * which is expected to have varlevelsup > 0 (ie, it is not local).
 */
100
static Param *
101
replace_outer_var(Var *var)
102
{
103
	Param	   *retval;
104 105 106
	List	   *ppl;
	PlannerParamItem *pitem;
	Index		abslevel;
107 108
	int			i;

109
	Assert(var->varlevelsup > 0 && var->varlevelsup < PlannerQueryLevel);
110
	abslevel = PlannerQueryLevel - var->varlevelsup;
111

112
	/*
113
	 * If there's already a PlannerParamList entry for this same Var, just
B
Bruce Momjian 已提交
114
	 * use it.	NOTE: in sufficiently complex querytrees, it is possible
115
	 * for the same varno/abslevel to refer to different RTEs in different
B
Bruce Momjian 已提交
116 117 118 119
	 * parts of the parsetree, so that different fields might end up
	 * sharing the same Param number.  As long as we check the vartype as
	 * well, I believe that this sort of aliasing will cause no trouble.
	 * The correct field should get stored into the Param slot at
120
	 * execution in each part of the tree.
121 122
	 */
	i = 0;
123
	foreach(ppl, PlannerParamList)
124
	{
125 126 127 128
		pitem = (PlannerParamItem *) lfirst(ppl);
		if (pitem->abslevel == abslevel && IsA(pitem->item, Var))
		{
			Var	   *pvar = (Var *) pitem->item;
129

130 131 132 133 134
			if (pvar->varno == var->varno &&
				pvar->varattno == var->varattno &&
				pvar->vartype == var->vartype)
				break;
		}
135
		i++;
136
	}
137

138
	if (!ppl)
139 140
	{
		/* Nope, so make a new one */
141 142 143 144 145 146 147 148 149
		var = (Var *) copyObject(var);
		var->varlevelsup = 0;

		pitem = (PlannerParamItem *) palloc(sizeof(PlannerParamItem));
		pitem->item = (Node *) var;
		pitem->abslevel = abslevel;

		PlannerParamList = lappend(PlannerParamList, pitem);
		/* i is already the correct index for the new item */
150
	}
151

152 153 154 155
	retval = makeNode(Param);
	retval->paramkind = PARAM_EXEC;
	retval->paramid = (AttrNumber) i;
	retval->paramtype = var->vartype;
156

157
	return retval;
158 159
}

160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
/*
 * Generate a Param node to replace the given Aggref
 * which is expected to have agglevelsup > 0 (ie, it is not local).
 */
static Param *
replace_outer_agg(Aggref *agg)
{
	Param	   *retval;
	PlannerParamItem *pitem;
	Index		abslevel;
	int			i;

	Assert(agg->agglevelsup > 0 && agg->agglevelsup < PlannerQueryLevel);
	abslevel = PlannerQueryLevel - agg->agglevelsup;

	/*
	 * It does not seem worthwhile to try to match duplicate outer aggs.
	 * Just make a new slot every time.
	 */
	agg = (Aggref *) copyObject(agg);
	IncrementVarSublevelsUp((Node *) agg, - ((int) agg->agglevelsup), 0);
	Assert(agg->agglevelsup == 0);

	pitem = (PlannerParamItem *) palloc(sizeof(PlannerParamItem));
	pitem->item = (Node *) agg;
	pitem->abslevel = abslevel;

	PlannerParamList = lappend(PlannerParamList, pitem);
	i = length(PlannerParamList) - 1;

	retval = makeNode(Param);
	retval->paramkind = PARAM_EXEC;
	retval->paramid = (AttrNumber) i;
	retval->paramtype = agg->aggtype;

	return retval;
}

198 199
/*
 * Generate a new Param node that will not conflict with any other.
200 201 202 203
 *
 * This is used to allocate PARAM_EXEC slots for subplan outputs.
 *
 * paramtypmod is currently unused but might be wanted someday.
204 205 206 207
 */
static Param *
generate_new_param(Oid paramtype, int32 paramtypmod)
{
208 209
	Param	   *retval;
	PlannerParamItem *pitem;
210

211
	retval = makeNode(Param);
212
	retval->paramkind = PARAM_EXEC;
213
	retval->paramid = (AttrNumber) length(PlannerParamList);
214 215
	retval->paramtype = paramtype;

216 217 218 219 220 221
	pitem = (PlannerParamItem *) palloc(sizeof(PlannerParamItem));
	pitem->item = (Node *) retval;
	pitem->abslevel = PlannerQueryLevel;

	PlannerParamList = lappend(PlannerParamList, pitem);

222 223 224
	return retval;
}

225
/*
226 227 228
 * Convert a bare SubLink (as created by the parser) into a SubPlan.
 *
 * We are given the raw SubLink and the already-processed lefthand argument
229 230
 * list (use this instead of the SubLink's own field).  We are also told if
 * this expression appears at top level of a WHERE/HAVING qual.
231 232 233 234 235 236
 *
 * The result is whatever we need to substitute in place of the SubLink
 * node in the executable expression.  This will be either the SubPlan
 * node (if we have to do the subplan as a subplan), or a Param node
 * representing the result of an InitPlan, or possibly an AND or OR tree
 * containing InitPlan Param nodes.
237
 */
238
static Node *
239
make_subplan(SubLink *slink, List *lefthand, bool isTopQual)
240
{
241
	SubPlan	   *node = makeNode(SubPlan);
242
	Query	   *subquery = (Query *) (slink->subselect);
243
	double		tuple_fraction;
244
	Plan	   *plan;
245 246
	Bitmapset  *tmpset;
	int			paramid;
247 248
	List	   *lst;
	Node	   *result;
249

250
	/*
B
Bruce Momjian 已提交
251 252 253 254
	 * Copy the source Query node.	This is a quick and dirty kluge to
	 * resolve the fact that the parser can generate trees with multiple
	 * links to the same sub-Query node, but the planner wants to scribble
	 * on the Query. Try to clean this up when we do querytree redesign...
255 256 257
	 */
	subquery = (Query *) copyObject(subquery);

258
	/*
259 260 261 262 263 264 265
	 * For an EXISTS subplan, tell lower-level planner to expect that only
	 * the first tuple will be retrieved.  For ALL and ANY subplans, we
	 * will be able to stop evaluating if the test condition fails, so
	 * very often not all the tuples will be retrieved; for lack of a
	 * better idea, specify 50% retrieval.	For EXPR and MULTIEXPR
	 * subplans, use default behavior (we're only expecting one row out,
	 * anyway).
266
	 *
267 268 269
	 * NOTE: if you change these numbers, also change cost_qual_eval_walker()
	 * in path/costsize.c.
	 *
270 271 272
	 * XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or
	 * materialize its result below.  In that case it would've been better to
	 * specify full retrieval.  At present, however, we can only detect
273 274 275 276 277
	 * correlation or lack of it after we've made the subplan :-(. Perhaps
	 * detection of correlation should be done as a separate step.
	 * Meanwhile, we don't want to be too optimistic about the percentage
	 * of tuples retrieved, for fear of selecting a plan that's bad for
	 * the materialization case.
278 279 280
	 */
	if (slink->subLinkType == EXISTS_SUBLINK)
		tuple_fraction = 1.0;	/* just like a LIMIT 1 */
281 282
	else if (slink->subLinkType == ALL_SUBLINK ||
			 slink->subLinkType == ANY_SUBLINK)
283
		tuple_fraction = 0.5;	/* 50% */
284
	else
285
		tuple_fraction = 0.0;	/* default behavior */
286

287
	/*
288
	 * Generate the plan for the subquery.
289
	 */
290
	node->plan = plan = subquery_planner(subquery, tuple_fraction);
291

B
Bruce Momjian 已提交
292
	node->plan_id = PlannerPlanId++;	/* Assign unique ID to this
293
										 * SubPlan */
294

295
	node->rtable = subquery->rtable;
296

297
	/*
298
	 * Initialize other fields of the SubPlan node.
299 300
	 */
	node->subLinkType = slink->subLinkType;
301
	node->useOr = slink->useOr;
302 303 304
	node->exprs = NIL;
	node->paramIds = NIL;
	node->useHashTable = false;
305 306
	/* At top level of a qual, can treat UNKNOWN the same as FALSE */
	node->unknownEqFalse = isTopQual;
307 308 309
	node->setParam = NIL;
	node->parParam = NIL;
	node->args = NIL;
310

311
	/*
B
Bruce Momjian 已提交
312 313
	 * Make parParam list of params that current query level will pass to
	 * this child plan.
314
	 */
315 316
	tmpset = bms_copy(plan->extParam);
	while ((paramid = bms_first_member(tmpset)) >= 0)
317
	{
318
		PlannerParamItem *pitem = nth(paramid, PlannerParamList);
319

320
		if (pitem->abslevel == PlannerQueryLevel)
321
			node->parParam = lappendi(node->parParam, paramid);
322
	}
323
	bms_free(tmpset);
324 325

	/*
326 327 328
	 * Un-correlated or undirect correlated plans of EXISTS, EXPR, ARRAY, or
	 * MULTIEXPR types can be used as initPlans.  For EXISTS, EXPR, or ARRAY,
	 * we just produce a Param referring to the result of evaluating the
329 330 331
	 * initPlan.  For MULTIEXPR, we must build an AND or OR-clause of the
	 * individual comparison operators, using the appropriate lefthand
	 * side expressions and Params for the initPlan's target items.
332
	 */
333 334
	if (node->parParam == NIL && slink->subLinkType == EXISTS_SUBLINK)
	{
335
		Param	   *prm;
336

337
		prm = generate_new_param(BOOLOID, -1);
338
		node->setParam = makeListi1(prm->paramid);
339 340 341 342 343 344
		PlannerInitPlan = lappend(PlannerInitPlan, node);
		result = (Node *) prm;
	}
	else if (node->parParam == NIL && slink->subLinkType == EXPR_SUBLINK)
	{
		TargetEntry *te = lfirst(plan->targetlist);
345
		Param	   *prm;
346

347
		Assert(!te->resdom->resjunk);
348
		prm = generate_new_param(te->resdom->restype, te->resdom->restypmod);
349
		node->setParam = makeListi1(prm->paramid);
350 351 352
		PlannerInitPlan = lappend(PlannerInitPlan, node);
		result = (Node *) prm;
	}
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
	else if (node->parParam == NIL && slink->subLinkType == ARRAY_SUBLINK)
	{
		TargetEntry *te = lfirst(plan->targetlist);
		Oid			arraytype;
		Param	   *prm;

		Assert(!te->resdom->resjunk);
		arraytype = get_array_type(te->resdom->restype);
		if (!OidIsValid(arraytype))
			elog(ERROR, "Cannot find array type for datatype %s",
				 format_type_be(te->resdom->restype));
		prm = generate_new_param(arraytype, -1);
		node->setParam = makeListi1(prm->paramid);
		PlannerInitPlan = lappend(PlannerInitPlan, node);
		result = (Node *) prm;
	}
369
	else if (node->parParam == NIL && slink->subLinkType == MULTIEXPR_SUBLINK)
370
	{
371 372 373 374 375 376
		List   *exprs;

		/* Convert the lefthand exprs and oper OIDs into executable exprs */
		exprs = convert_sublink_opers(lefthand,
									  slink->operOids,
									  plan->targetlist,
B
Bruce Momjian 已提交
377
									  0,
378
									  &node->paramIds);
379
		node->setParam = listCopy(node->paramIds);
380
		PlannerInitPlan = lappend(PlannerInitPlan, node);
381 382 383 384 385 386 387 388
		/*
		 * The executable expressions are returned to become part of the
		 * outer plan's expression tree; they are not kept in the initplan
		 * node.
		 */
		if (length(exprs) > 1)
			result = (Node *) (node->useOr ? make_orclause(exprs) :
							   make_andclause(exprs));
389
		else
390
			result = (Node *) lfirst(exprs);
391
	}
392
	else
393
	{
394
		List	   *args;
395

396
		/*
397 398 399
		 * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types
		 * to initPlans, even when they are uncorrelated or undirect
		 * correlated, because we need to scan the output of the subplan
400 401 402 403 404 405 406 407 408 409 410 411 412 413
		 * for each outer tuple.  But if it's an IN (= ANY) test, we might
		 * be able to use a hashtable to avoid comparing all the tuples.
		 */
		if (subplan_is_hashable(slink, node))
			node->useHashTable = true;
		/*
		 * Otherwise, we have the option to tack a MATERIAL node onto the top
		 * of the subplan, to reduce the cost of reading it repeatedly.  This
		 * is pointless for a direct-correlated subplan, since we'd have to
		 * recompute its results each time anyway.  For uncorrelated/undirect
		 * correlated subplans, we add MATERIAL if the subplan's top plan node
		 * is anything more complicated than a plain sequential scan, and we
		 * do it even for seqscan if the qual appears selective enough to
		 * eliminate many tuples.
414
		 */
415
		else if (node->parParam == NIL)
416 417 418 419 420 421
		{
			bool		use_material;

			switch (nodeTag(plan))
			{
				case T_SeqScan:
422
					if (plan->initPlan)
423 424 425 426 427 428 429
						use_material = true;
					else
					{
						Selectivity qualsel;

						qualsel = clauselist_selectivity(subquery,
														 plan->qual,
430
														 0, JOIN_INNER);
431 432 433
						/* Is 10% selectivity a good threshold?? */
						use_material = qualsel < 0.10;
					}
434 435
					break;
				case T_Material:
436
				case T_FunctionScan:
437
				case T_Sort:
438 439 440

					/*
					 * Don't add another Material node if there's one
441 442
					 * already, nor if the top node is any other type that
					 * materializes its output anyway.
443 444 445 446 447 448 449 450 451
					 */
					use_material = false;
					break;
				default:
					use_material = true;
					break;
			}
			if (use_material)
			{
452
				node->plan = plan = materialize_finished_plan(plan);
453 454 455
			}
		}

456 457 458 459
		/* Convert the lefthand exprs and oper OIDs into executable exprs */
		node->exprs = convert_sublink_opers(lefthand,
											slink->operOids,
											plan->targetlist,
B
Bruce Momjian 已提交
460
											0,
461
											&node->paramIds);
462

463
		/*
464
		 * Make node->args from parParam.
465
		 */
466
		args = NIL;
467
		foreach(lst, node->parParam)
468
		{
469
			PlannerParamItem *pitem = nth(lfirsti(lst), PlannerParamList);
470 471

			/*
472 473 474
			 * The Var or Aggref has already been adjusted to have the
			 * correct varlevelsup or agglevelsup.  We probably don't even
			 * need to copy it again, but be safe.
475
			 */
476
			args = lappend(args, copyObject(pitem->item));
477
		}
478
		node->args = args;
479

480
		result = (Node *) node;
481 482 483 484 485 486
	}

	return result;
}

/*
487 488
 * convert_sublink_opers: given a lefthand-expressions list and a list of
 * operator OIDs, build a list of actually executable expressions.  The
489 490
 * righthand sides of the expressions are Params or Vars representing the
 * results of the sub-select.
491
 *
492 493 494 495 496 497
 * If rtindex is 0, we build Params to represent the sub-select outputs.
 * The paramids of the Params created are returned in the *righthandIds list.
 *
 * If rtindex is not 0, we build Vars using that rtindex as varno.  The
 * Vars themselves are returned in *righthandIds (this is a bit of a type
 * cheat, but we can get away with it).
498
 */
499
static List *
500
convert_sublink_opers(List *lefthand, List *operOids,
501
					  List *targetlist, int rtindex,
B
Bruce Momjian 已提交
502
					  List **righthandIds)
503
{
504
	List	   *result = NIL;
505 506
	List	   *lst;

507
	*righthandIds = NIL;
508 509

	foreach(lst, operOids)
510
	{
511
		Oid			opid = lfirsto(lst);
512
		Node	   *leftop = lfirst(lefthand);
513
		TargetEntry *te = lfirst(targetlist);
514
		Node	   *rightop;
515 516
		Operator	tup;

517 518
		Assert(!te->resdom->resjunk);

519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
		if (rtindex)
		{
			/* Make the Var node representing the subplan's result */
			rightop = (Node *) makeVar(rtindex,
									   te->resdom->resno,
									   te->resdom->restype,
									   te->resdom->restypmod,
									   0);
			/* Record it for caller */
			*righthandIds = lappend(*righthandIds, rightop);
		}
		else
		{
			/* Make the Param node representing the subplan's result */
			Param	   *prm;

			prm = generate_new_param(te->resdom->restype,
									 te->resdom->restypmod);
			/* Record its ID */
			*righthandIds = lappendi(*righthandIds, prm->paramid);
			rightop = (Node *) prm;
		}
541

542
		/* Look up the operator to pass to make_op_expr */
543
		tup = SearchSysCache(OPEROID,
544
							 ObjectIdGetDatum(opid),
545 546
							 0, 0, 0);
		if (!HeapTupleIsValid(tup))
547
			elog(ERROR, "cache lookup failed for operator %u", opid);
548

549
		/*
550 551
		 * Make the expression node.
		 *
552
		 * Note: we use make_op_expr in case runtime type conversion
553 554 555
		 * function calls must be inserted for this operator!  (But we
		 * are not expecting to have to resolve unknown Params, so
		 * it's okay to pass a null pstate.)
556
		 */
B
Bruce Momjian 已提交
557 558 559 560 561 562 563
		result = lappend(result,
						 make_op_expr(NULL,
									  tup,
									  leftop,
									  rightop,
									  exprType(leftop),
									  te->resdom->restype));
564

565 566
		ReleaseSysCache(tup);

567
		lefthand = lnext(lefthand);
568
		targetlist = lnext(targetlist);
569
	}
570

571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
	return result;
}

/*
 * subplan_is_hashable: decide whether we can implement a subplan by hashing
 *
 * Caution: the SubPlan node is not completely filled in yet.  We can rely
 * on its plan and parParam fields, however.
 */
static bool
subplan_is_hashable(SubLink *slink, SubPlan *node)
{
	double		subquery_size;
	List	   *opids;

	/*
	 * The sublink type must be "= ANY" --- that is, an IN operator.
	 * (We require the operator name to be unqualified, which may be
	 * overly paranoid, or may not be.)  XXX since we also check that the
	 * operators are hashable, the test on operator name may be redundant?
	 */
	if (slink->subLinkType != ANY_SUBLINK)
		return false;
	if (length(slink->operName) != 1 ||
		strcmp(strVal(lfirst(slink->operName)), "=") != 0)
		return false;
	/*
	 * The subplan must not have any direct correlation vars --- else we'd
	 * have to recompute its output each time, so that the hashtable wouldn't
	 * gain anything.
	 */
	if (node->parParam != NIL)
		return false;
	/*
	 * The estimated size of the subquery result must fit in SortMem.
	 * (XXX what about hashtable overhead?)
	 */
	subquery_size = node->plan->plan_rows *
		(MAXALIGN(node->plan->plan_width) + MAXALIGN(sizeof(HeapTupleData)));
	if (subquery_size > SortMem * 1024L)
		return false;
	/*
613 614 615 616 617 618 619 620 621 622 623
	 * The combining operators must be hashable, strict, and self-commutative.
	 * The need for hashability is obvious, since we want to use hashing.
	 * Without strictness, behavior in the presence of nulls is too
	 * unpredictable.  (We actually must assume even more than plain
	 * strictness, see nodeSubplan.c for details.)  And commutativity ensures
	 * that the left and right datatypes are the same; this allows us to
	 * assume that the combining operators are equality for the righthand
	 * datatype, so that they can be used to compare righthand tuples as
	 * well as comparing lefthand to righthand tuples.  (This last restriction
	 * could be relaxed by using two different sets of operators with the
	 * hash table, but there is no obvious usefulness to that at present.)
624 625 626
	 */
	foreach(opids, slink->operOids)
	{
627
		Oid			opid = lfirsto(opids);
628 629 630 631 632 633 634 635 636
		HeapTuple	tup;
		Form_pg_operator optup;

		tup = SearchSysCache(OPEROID,
							 ObjectIdGetDatum(opid),
							 0, 0, 0);
		if (!HeapTupleIsValid(tup))
			elog(ERROR, "cache lookup failed for operator %u", opid);
		optup = (Form_pg_operator) GETSTRUCT(tup);
637 638
		if (!optup->oprcanhash || optup->oprcom != opid ||
			!func_strict(optup->oprcode))
639 640 641 642 643 644 645
		{
			ReleaseSysCache(tup);
			return false;
		}
		ReleaseSysCache(tup);
	}
	return true;
646 647
}

648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663
/*
 * convert_IN_to_join: can we convert an IN SubLink to join style?
 *
 * The caller has found a SubLink at the top level of WHERE, but has not
 * checked the properties of the SubLink at all.  Decide whether it is
 * appropriate to process this SubLink in join style.  If not, return NULL.
 * If so, build the qual clause(s) to replace the SubLink, and return them.
 *
 * Side effects of a successful conversion include adding the SubLink's
 * subselect to the query's rangetable and adding an InClauseInfo node to
 * its in_info_list.
 */
Node *
convert_IN_to_join(Query *parse, SubLink *sublink)
{
	Query	   *subselect = (Query *) sublink->subselect;
664
	Relids		left_varnos;
665 666 667 668 669 670 671 672 673
	int			rtindex;
	RangeTblEntry *rte;
	RangeTblRef *rtr;
	InClauseInfo  *ininfo;
	List	   *exprs;

	/*
	 * The sublink type must be "= ANY" --- that is, an IN operator.
	 * (We require the operator name to be unqualified, which may be
B
Bruce Momjian 已提交
674
	 * overly paranoid, or may not be.)
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
	 */
	if (sublink->subLinkType != ANY_SUBLINK)
		return NULL;
	if (length(sublink->operName) != 1 ||
		strcmp(strVal(lfirst(sublink->operName)), "=") != 0)
		return NULL;
	/*
	 * The sub-select must not refer to any Vars of the parent query.
	 * (Vars of higher levels should be okay, though.)
	 */
	if (contain_vars_of_level((Node *) subselect, 1))
		return NULL;
	/*
	 * The left-hand expressions must contain some Vars of the current
	 * query, else it's not gonna be a join.
	 */
	left_varnos = pull_varnos((Node *) sublink->lefthand);
692
	if (bms_is_empty(left_varnos))
693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
		return NULL;
	/*
	 * The left-hand expressions mustn't be volatile.  (Perhaps we should
	 * test the combining operators, too?  We'd only need to point the
	 * function directly at the sublink ...)
	 */
	if (contain_volatile_functions((Node *) sublink->lefthand))
		return NULL;
	/*
	 * Okay, pull up the sub-select into top range table and jointree.
	 *
	 * We rely here on the assumption that the outer query has no references
	 * to the inner (necessarily true, other than the Vars that we build
	 * below).  Therefore this is a lot easier than what pull_up_subqueries
	 * has to go through.
	 */
	rte = addRangeTableEntryForSubquery(NULL,
										subselect,
										makeAlias("IN_subquery", NIL),
										false);
	parse->rtable = lappend(parse->rtable, rte);
	rtindex = length(parse->rtable);
	rtr = makeNode(RangeTblRef);
	rtr->rtindex = rtindex;
	parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr);
	/*
	 * Now build the InClauseInfo node.
	 */
	ininfo = makeNode(InClauseInfo);
	ininfo->lefthand = left_varnos;
723
	ininfo->righthand = bms_make_singleton(rtindex);
724 725 726 727 728 729 730 731 732
	parse->in_info_list = lcons(ininfo, parse->in_info_list);
	/*
	 * Build the result qual expressions.  As a side effect,
	 * ininfo->sub_targetlist is filled with a list of the Vars
	 * representing the subselect outputs.
	 */
	exprs = convert_sublink_opers(sublink->lefthand,
								  sublink->operOids,
								  subselect->targetList,
B
Bruce Momjian 已提交
733
								  rtindex,
734 735 736 737
								  &ininfo->sub_targetlist);
	return (Node *) make_ands_explicit(exprs);
}

738 739
/*
 * Replace correlation vars (uplevel vars) with Params.
740 741 742 743 744 745 746 747 748 749 750 751 752 753
 *
 * Uplevel aggregates are replaced, too.
 *
 * Note: it is critical that this runs immediately after SS_process_sublinks.
 * Since we do not recurse into the arguments of uplevel aggregates, they will
 * get copied to the appropriate subplan args list in the parent query with
 * uplevel vars not replaced by Params, but only adjusted in level (see
 * replace_outer_agg).  That's exactly what we want for the vars of the parent
 * level --- but if an aggregate's argument contains any further-up variables,
 * they have to be replaced with Params in their turn.  That will happen when
 * the parent level runs SS_replace_correlation_vars.  Therefore it must do
 * so after expanding its sublinks to subplans.  And we don't want any steps
 * in between, else those steps would never get applied to the aggregate
 * argument expressions, either in the parent or the child level.
754
 */
755
Node *
756
SS_replace_correlation_vars(Node *expr)
757
{
758 759 760
	/* No setup needed for tree walk, so away we go */
	return replace_correlation_vars_mutator(expr, NULL);
}
761

762 763 764 765 766 767
static Node *
replace_correlation_vars_mutator(Node *node, void *context)
{
	if (node == NULL)
		return NULL;
	if (IsA(node, Var))
768
	{
769
		if (((Var *) node)->varlevelsup > 0)
770 771 772 773 774 775
			return (Node *) replace_outer_var((Var *) node);
	}
	if (IsA(node, Aggref))
	{
		if (((Aggref *) node)->agglevelsup > 0)
			return (Node *) replace_outer_agg((Aggref *) node);
776
	}
777 778 779
	return expression_tree_mutator(node,
								   replace_correlation_vars_mutator,
								   context);
780 781
}

782 783
/*
 * Expand SubLinks to SubPlans in the given expression.
784 785 786 787
 *
 * The isQual argument tells whether or not this expression is a WHERE/HAVING
 * qualifier expression.  If it is, any sublinks appearing at top level need
 * not distinguish FALSE from UNKNOWN return values.
788
 */
789
Node *
790
SS_process_sublinks(Node *expr, bool isQual)
791
{
792 793
	/* The only context needed is the initial are-we-in-a-qual flag */
	return process_sublinks_mutator(expr, &isQual);
794 795 796
}

static Node *
797
process_sublinks_mutator(Node *node, bool *isTopQual)
798
{
799 800
	bool	locTopQual;

801
	if (node == NULL)
802
		return NULL;
803
	if (IsA(node, SubLink))
804
	{
805
		SubLink    *sublink = (SubLink *) node;
806
		List	   *lefthand;
807

808
		/*
809
		 * First, recursively process the lefthand-side expressions, if any.
810
		 */
811
		locTopQual = false;
812
		lefthand = (List *)
813
			process_sublinks_mutator((Node *) sublink->lefthand, &locTopQual);
814 815 816
		/*
		 * Now build the SubPlan node and make the expr to return.
		 */
817
		return make_subplan(sublink, lefthand, *isTopQual);
818
	}
819

820
	/*
821 822 823
	 * We should never see a SubPlan expression in the input (since this is
	 * the very routine that creates 'em to begin with).  We shouldn't find
	 * ourselves invoked directly on a Query, either.
824
	 */
825
	Assert(!is_subplan(node));
826
	Assert(!IsA(node, Query));
827

828 829 830 831 832 833 834 835 836
	/*
	 * If we recurse down through anything other than a List node, we are
	 * definitely not at top qual level anymore.
	 */
	if (IsA(node, List))
		locTopQual = *isTopQual;
	else
		locTopQual = false;

837 838
	return expression_tree_mutator(node,
								   process_sublinks_mutator,
839
								   (void *) &locTopQual);
840 841
}

842 843 844
/*
 * SS_finalize_plan - do final sublink processing for a completed Plan.
 *
845 846
 * This recursively computes the extParam and allParam sets
 * for every Plan node in the given plan tree.
847
 */
848
void
849
SS_finalize_plan(Plan *plan, List *rtable)
850
{
851 852 853 854 855 856 857 858 859 860 861
	Bitmapset  *outer_params = NULL;
	Bitmapset  *valid_params = NULL;
	int			paramid;
	List	   *lst;

	/*
	 * First, scan the param list to discover the sets of params that
	 * are available from outer query levels and my own query level.
	 * We do this once to save time in the per-plan recursion steps.
	 */
	paramid = 0;
862
	foreach(lst, PlannerParamList)
863
	{
864
		PlannerParamItem *pitem = (PlannerParamItem *) lfirst(lst);
865

866
		if (pitem->abslevel < PlannerQueryLevel)
867 868 869 870 871
		{
			/* valid outer-level parameter */
			outer_params = bms_add_member(outer_params, paramid);
			valid_params = bms_add_member(valid_params, paramid);
		}
872 873
		else if (pitem->abslevel == PlannerQueryLevel &&
				 IsA(pitem->item, Param))
874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901
		{
			/* valid local parameter (i.e., a setParam of my child) */
			valid_params = bms_add_member(valid_params, paramid);
		}

		paramid++;
	}

	/*
	 * Now recurse through plan tree.
	 */
	(void) finalize_plan(plan, rtable, outer_params, valid_params);

	bms_free(outer_params);
	bms_free(valid_params);
}

/*
 * Recursive processing of all nodes in the plan tree
 *
 * The return value is the computed allParam set for the given Plan node.
 * This is just an internal notational convenience.
 */
static Bitmapset *
finalize_plan(Plan *plan, List *rtable,
			  Bitmapset *outer_params, Bitmapset *valid_params)
{
	finalize_primnode_context context;
902 903 904
	List	   *lst;

	if (plan == NULL)
905
		return NULL;
906

907 908
	context.paramids = NULL;	/* initialize set to empty */
	context.outer_params = outer_params;
909

910
	/*
911
	 * When we call finalize_primnode, context.paramids sets are
912
	 * automatically merged together.  But when recursing to self, we have
913
	 * to do it the hard way.  We want the paramids set to include params
914
	 * in subplans as well as at this level.
915 916
	 */

917
	/* Find params in targetlist and qual */
918 919
	finalize_primnode((Node *) plan->targetlist, &context);
	finalize_primnode((Node *) plan->qual, &context);
920

921
	/* Check additional node-type-specific fields */
922 923 924
	switch (nodeTag(plan))
	{
		case T_Result:
925
			finalize_primnode(((Result *) plan)->resconstantqual,
926
							  &context);
927 928
			break;

929 930
		case T_IndexScan:
			finalize_primnode((Node *) ((IndexScan *) plan)->indxqual,
931
							  &context);
932 933 934

			/*
			 * we need not look at indxqualorig, since it will have the
935
			 * same param references as indxqual.
936 937 938 939 940
			 */
			break;

		case T_TidScan:
			finalize_primnode((Node *) ((TidScan *) plan)->tideval,
941
							  &context);
942
			break;
943

944
		case T_SubqueryScan:
B
Bruce Momjian 已提交
945

946
			/*
B
Bruce Momjian 已提交
947 948 949 950 951
			 * In a SubqueryScan, SS_finalize_plan has already been run on
			 * the subplan by the inner invocation of subquery_planner, so
			 * there's no need to do it again.  Instead, just pull out the
			 * subplan's extParams list, which represents the params it
			 * needs from my level and higher levels.
952
			 */
953
			context.paramids = bms_add_members(context.paramids,
B
Bruce Momjian 已提交
954
							 ((SubqueryScan *) plan)->subplan->extParam);
955 956
			break;

957 958 959
		case T_FunctionScan:
			{
				RangeTblEntry *rte;
960

961 962 963
				rte = rt_fetch(((FunctionScan *) plan)->scan.scanrelid,
							   rtable);
				Assert(rte->rtekind == RTE_FUNCTION);
964
				finalize_primnode(rte->funcexpr, &context);
965 966 967 968 969
			}
			break;

		case T_Append:
			foreach(lst, ((Append *) plan)->appendplans)
970 971 972 973 974 975 976 977
			{
				context.paramids =
					bms_add_members(context.paramids,
									finalize_plan((Plan *) lfirst(lst),
												  rtable,
												  outer_params,
												  valid_params));
			}
978 979
			break;

980 981
		case T_NestLoop:
			finalize_primnode((Node *) ((Join *) plan)->joinqual,
982
							  &context);
983 984
			break;

985
		case T_MergeJoin:
986
			finalize_primnode((Node *) ((Join *) plan)->joinqual,
987
							  &context);
988
			finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses,
989
							  &context);
990 991 992
			break;

		case T_HashJoin:
993
			finalize_primnode((Node *) ((Join *) plan)->joinqual,
994
							  &context);
995
			finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses,
996
							  &context);
997
			break;
998

999
		case T_Hash:
1000
			finalize_primnode((Node *) ((Hash *) plan)->hashkeys,
1001
							  &context);
1002 1003 1004 1005 1006 1007 1008
			break;

		case T_Agg:
		case T_SeqScan:
		case T_Material:
		case T_Sort:
		case T_Unique:
1009
		case T_SetOp:
1010
		case T_Limit:
1011 1012
		case T_Group:
			break;
1013

1014
		default:
1015
			elog(ERROR, "finalize_plan: node %d unsupported",
1016
				 nodeTag(plan));
1017
	}
1018

1019
	/* Process left and right child plans, if any */
1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030
	context.paramids = bms_add_members(context.paramids,
									   finalize_plan(plan->lefttree,
													 rtable,
													 outer_params,
													 valid_params));

	context.paramids = bms_add_members(context.paramids,
									   finalize_plan(plan->righttree,
													 rtable,
													 outer_params,
													 valid_params));
1031

1032
	/* Now we have all the paramids */
1033

1034 1035
	if (!bms_is_subset(context.paramids, valid_params))
		elog(ERROR, "finalize_plan: plan shouldn't reference subplan's variable");
1036

1037 1038
	plan->extParam = bms_intersect(context.paramids, outer_params);
	plan->allParam = context.paramids;
1039

1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
	/*
	 * For speed at execution time, make sure extParam/allParam are actually
	 * NULL if they are empty sets.
	 */
	if (bms_is_empty(plan->extParam))
	{
		bms_free(plan->extParam);
		plan->extParam = NULL;
	}
	if (bms_is_empty(plan->allParam))
	{
		bms_free(plan->allParam);
		plan->allParam = NULL;
	}
1054

1055
	return plan->allParam;
1056
}
1057 1058

/*
1059 1060
 * finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given
 * expression tree to the result set.
1061 1062
 */
static bool
1063
finalize_primnode(Node *node, finalize_primnode_context *context)
1064 1065 1066 1067 1068 1069 1070 1071 1072
{
	if (node == NULL)
		return false;
	if (IsA(node, Param))
	{
		if (((Param *) node)->paramkind == PARAM_EXEC)
		{
			int			paramid = (int) ((Param *) node)->paramid;

1073
			context->paramids = bms_add_member(context->paramids, paramid);
1074 1075 1076 1077 1078 1079 1080
		}
		return false;			/* no more to do here */
	}
	if (is_subplan(node))
	{
		SubPlan	   *subplan = (SubPlan *) node;

1081 1082 1083 1084
		/* Add outer-level params needed by the subplan to paramids */
		context->paramids = bms_join(context->paramids,
									 bms_intersect(subplan->plan->extParam,
												   context->outer_params));
1085 1086 1087
		/* fall through to recurse into subplan args */
	}
	return expression_tree_walker(node, finalize_primnode,
1088
								  (void *) context);
1089
}