equivclass.c 73.5 KB
Newer Older
1 2 3 4 5 6 7 8
/*-------------------------------------------------------------------------
 *
 * equivclass.c
 *	  Routines for managing EquivalenceClasses
 *
 * See src/backend/optimizer/README for discussion of EquivalenceClasses.
 *
 *
9
 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
10 11 12
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
13
 *	  src/backend/optimizer/path/equivclass.c
14 15 16 17 18 19
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/skey.h"
20 21
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
22
#include "nodes/nodeFuncs.h"
23
#include "optimizer/clauses.h"
24
#include "optimizer/pathnode.h"
25 26 27 28 29 30 31
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "optimizer/prep.h"
#include "optimizer/var.h"
#include "utils/lsyscache.h"


32
static EquivalenceMember *add_eq_member(EquivalenceClass *ec,
33
			  Expr *expr, Relids relids, Relids nullable_relids,
B
Bruce Momjian 已提交
34
			  bool is_child, Oid datatype);
35
static void generate_base_implied_equalities_const(PlannerInfo *root,
36
									   EquivalenceClass *ec);
37
static void generate_base_implied_equalities_no_const(PlannerInfo *root,
38
										  EquivalenceClass *ec);
39
static void generate_base_implied_equalities_broken(PlannerInfo *root,
40
										EquivalenceClass *ec);
41
static List *generate_join_implied_equalities_normal(PlannerInfo *root,
42
										EquivalenceClass *ec,
43 44 45
										Relids join_relids,
										Relids outer_relids,
										Relids inner_relids);
46
static List *generate_join_implied_equalities_broken(PlannerInfo *root,
47
										EquivalenceClass *ec,
48 49 50 51
										Relids nominal_join_relids,
										Relids outer_relids,
										Relids nominal_inner_relids,
										AppendRelInfo *inner_appinfo);
52
static Oid select_equality_operator(EquivalenceClass *ec,
B
Bruce Momjian 已提交
53
						 Oid lefttype, Oid righttype);
54
static RestrictInfo *create_join_clause(PlannerInfo *root,
55 56 57 58
				   EquivalenceClass *ec, Oid opno,
				   EquivalenceMember *leftem,
				   EquivalenceMember *rightem,
				   EquivalenceClass *parent_ec);
59
static bool reconsider_outer_join_clause(PlannerInfo *root,
B
Bruce Momjian 已提交
60 61
							 RestrictInfo *rinfo,
							 bool outer_on_left);
62
static bool reconsider_full_join_clause(PlannerInfo *root,
B
Bruce Momjian 已提交
63
							RestrictInfo *rinfo);
64 65 66 67 68 69 70 71 72 73 74 75 76


/*
 * process_equivalence
 *	  The given clause has a mergejoinable operator and can be applied without
 *	  any delay by an outer join, so its two sides can be considered equal
 *	  anywhere they are both computable; moreover that equality can be
 *	  extended transitively.  Record this knowledge in the EquivalenceClass
 *	  data structure.  Returns TRUE if successful, FALSE if not (in which
 *	  case caller should treat the clause as ordinary, not an equivalence).
 *
 * If below_outer_join is true, then the clause was found below the nullable
 * side of an outer join, so its sides might validly be both NULL rather than
B
Bruce Momjian 已提交
77
 * strictly equal.	We can still deduce equalities in such cases, but we take
78 79 80 81 82 83
 * care to mark an EquivalenceClass if it came from any such clauses.  Also,
 * we have to check that both sides are either pseudo-constants or strict
 * functions of Vars, else they might not both go to NULL above the outer
 * join.  (This is the reason why we need a failure return.  It's more
 * convenient to check this case here than at the call sites...)
 *
84 85 86 87
 * On success return, we have also initialized the clause's left_ec/right_ec
 * fields to point to the EquivalenceClass representing it.  This saves lookup
 * effort later.
 *
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
 * Note: constructing merged EquivalenceClasses is a standard UNION-FIND
 * problem, for which there exist better data structures than simple lists.
 * If this code ever proves to be a bottleneck then it could be sped up ---
 * but for now, simple is beautiful.
 *
 * Note: this is only called during planner startup, not during GEQO
 * exploration, so we need not worry about whether we're in the right
 * memory context.
 */
bool
process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
					bool below_outer_join)
{
	Expr	   *clause = restrictinfo->clause;
	Oid			opno,
103
				collation,
104 105 106 107 108
				item1_type,
				item2_type;
	Expr	   *item1;
	Expr	   *item2;
	Relids		item1_relids,
109 110 111
				item2_relids,
				item1_nullable_relids,
				item2_nullable_relids;
112 113 114
	List	   *opfamilies;
	EquivalenceClass *ec1,
			   *ec2;
115 116
	EquivalenceMember *em1,
			   *em2;
117 118
	ListCell   *lc1;

119 120 121 122
	/* Should not already be marked as having generated an eclass */
	Assert(restrictinfo->left_ec == NULL);
	Assert(restrictinfo->right_ec == NULL);

123 124 125
	/* Extract info from given clause */
	Assert(is_opclause(clause));
	opno = ((OpExpr *) clause)->opno;
126
	collation = ((OpExpr *) clause)->inputcollid;
127 128 129 130 131
	item1 = (Expr *) get_leftop(clause);
	item2 = (Expr *) get_rightop(clause);
	item1_relids = restrictinfo->left_relids;
	item2_relids = restrictinfo->right_relids;

132 133 134 135 136 137 138 139 140 141 142
	/*
	 * Ensure both input expressions expose the desired collation (their types
	 * should be OK already); see comments for canonicalize_ec_expression.
	 */
	item1 = canonicalize_ec_expression(item1,
									   exprType((Node *) item1),
									   collation);
	item2 = canonicalize_ec_expression(item2,
									   exprType((Node *) item2),
									   collation);

143
	/*
B
Bruce Momjian 已提交
144
	 * Reject clauses of the form X=X.	These are not as redundant as they
145
	 * might seem at first glance: assuming the operator is strict, this is
B
Bruce Momjian 已提交
146 147 148 149 150
	 * really an expensive way to write X IS NOT NULL.	So we must not risk
	 * just losing the clause, which would be possible if there is already a
	 * single-element EquivalenceClass containing X.  The case is not common
	 * enough to be worth contorting the EC machinery for, so just reject the
	 * clause and let it be processed as a normal restriction clause.
151 152 153 154
	 */
	if (equal(item1, item2))
		return false;			/* X=X is not a useful equivalence */

155 156 157 158 159 160 161 162 163 164 165 166 167
	/*
	 * If below outer join, check for strictness, else reject.
	 */
	if (below_outer_join)
	{
		if (!bms_is_empty(item1_relids) &&
			contain_nonstrict_functions((Node *) item1))
			return false;		/* LHS is non-strict but not constant */
		if (!bms_is_empty(item2_relids) &&
			contain_nonstrict_functions((Node *) item2))
			return false;		/* RHS is non-strict but not constant */
	}

168 169 170 171 172 173
	/* Calculate nullable-relid sets for each side of the clause */
	item1_nullable_relids = bms_intersect(item1_relids,
										  restrictinfo->nullable_relids);
	item2_nullable_relids = bms_intersect(item2_relids,
										  restrictinfo->nullable_relids);

174
	/*
B
Bruce Momjian 已提交
175 176 177 178 179 180
	 * We use the declared input types of the operator, not exprType() of the
	 * inputs, as the nominal datatypes for opfamily lookup.  This presumes
	 * that btree operators are always registered with amoplefttype and
	 * amoprighttype equal to their declared input types.  We will need this
	 * info anyway to build EquivalenceMember nodes, and by extracting it now
	 * we can use type comparisons to short-circuit some equal() tests.
181 182 183 184 185 186
	 */
	op_input_types(opno, &item1_type, &item2_type);

	opfamilies = restrictinfo->mergeopfamilies;

	/*
B
Bruce Momjian 已提交
187 188
	 * Sweep through the existing EquivalenceClasses looking for matches to
	 * item1 and item2.  These are the possible outcomes:
189
	 *
B
Bruce Momjian 已提交
190 191
	 * 1. We find both in the same EC.	The equivalence is already known, so
	 * there's nothing to do.
192 193 194 195 196
	 *
	 * 2. We find both in different ECs.  Merge the two ECs together.
	 *
	 * 3. We find just one.  Add the other to its EC.
	 *
B
Bruce Momjian 已提交
197
	 * 4. We find neither.	Make a new, two-entry EC.
198
	 *
199 200 201 202
	 * Note: since all ECs are built through this process or the similar
	 * search in get_eclass_for_sort_expr(), it's impossible that we'd match
	 * an item in more than one existing nonvolatile EC.  So it's okay to stop
	 * at the first match.
203 204
	 */
	ec1 = ec2 = NULL;
205
	em1 = em2 = NULL;
206 207 208 209 210 211 212 213 214
	foreach(lc1, root->eq_classes)
	{
		EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
		ListCell   *lc2;

		/* Never match to a volatile EC */
		if (cur_ec->ec_has_volatile)
			continue;

215 216 217 218 219 220 221
		/*
		 * The collation has to match; check this first since it's cheaper
		 * than the opfamily comparison.
		 */
		if (collation != cur_ec->ec_collation)
			continue;

222 223 224 225 226 227 228 229 230 231 232 233
		/*
		 * A "match" requires matching sets of btree opfamilies.  Use of
		 * equal() for this test has implications discussed in the comments
		 * for get_mergejoin_opfamilies().
		 */
		if (!equal(opfamilies, cur_ec->ec_opfamilies))
			continue;

		foreach(lc2, cur_ec->ec_members)
		{
			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);

B
Bruce Momjian 已提交
234
			Assert(!cur_em->em_is_child);		/* no children yet */
235 236

			/*
B
Bruce Momjian 已提交
237 238
			 * If below an outer join, don't match constants: they're not as
			 * constant as they look.
239 240 241 242 243 244 245 246 247 248
			 */
			if ((below_outer_join || cur_ec->ec_below_outer_join) &&
				cur_em->em_is_const)
				continue;

			if (!ec1 &&
				item1_type == cur_em->em_datatype &&
				equal(item1, cur_em->em_expr))
			{
				ec1 = cur_ec;
249
				em1 = cur_em;
250 251 252 253 254 255 256 257 258
				if (ec2)
					break;
			}

			if (!ec2 &&
				item2_type == cur_em->em_datatype &&
				equal(item2, cur_em->em_expr))
			{
				ec2 = cur_ec;
259
				em2 = cur_em;
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
				if (ec1)
					break;
			}
		}

		if (ec1 && ec2)
			break;
	}

	/* Sweep finished, what did we find? */

	if (ec1 && ec2)
	{
		/* If case 1, nothing to do, except add to sources */
		if (ec1 == ec2)
		{
			ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
			ec1->ec_below_outer_join |= below_outer_join;
278 279 280
			/* mark the RI as associated with this eclass */
			restrictinfo->left_ec = ec1;
			restrictinfo->right_ec = ec1;
281 282 283
			/* mark the RI as usable with this pair of EMs */
			restrictinfo->left_em = em1;
			restrictinfo->right_em = em2;
284 285 286 287
			return true;
		}

		/*
B
Bruce Momjian 已提交
288 289 290 291 292
		 * Case 2: need to merge ec1 and ec2.  We add ec2's items to ec1, then
		 * set ec2's ec_merged link to point to ec1 and remove ec2 from the
		 * eq_classes list.  We cannot simply delete ec2 because that could
		 * leave dangling pointers in existing PathKeys.  We leave it behind
		 * with a link so that the merged EC can be found.
293 294 295
		 */
		ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members);
		ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources);
296
		ec1->ec_derives = list_concat(ec1->ec_derives, ec2->ec_derives);
297 298 299 300 301 302 303 304 305
		ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids);
		ec1->ec_has_const |= ec2->ec_has_const;
		/* can't need to set has_volatile */
		ec1->ec_below_outer_join |= ec2->ec_below_outer_join;
		ec2->ec_merged = ec1;
		root->eq_classes = list_delete_ptr(root->eq_classes, ec2);
		/* just to avoid debugging confusion w/ dangling pointers: */
		ec2->ec_members = NIL;
		ec2->ec_sources = NIL;
306
		ec2->ec_derives = NIL;
307 308 309
		ec2->ec_relids = NULL;
		ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
		ec1->ec_below_outer_join |= below_outer_join;
310 311 312
		/* mark the RI as associated with this eclass */
		restrictinfo->left_ec = ec1;
		restrictinfo->right_ec = ec1;
313 314 315
		/* mark the RI as usable with this pair of EMs */
		restrictinfo->left_em = em1;
		restrictinfo->right_em = em2;
316 317 318 319
	}
	else if (ec1)
	{
		/* Case 3: add item2 to ec1 */
320 321
		em2 = add_eq_member(ec1, item2, item2_relids, item2_nullable_relids,
							false, item2_type);
322 323
		ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
		ec1->ec_below_outer_join |= below_outer_join;
324 325 326
		/* mark the RI as associated with this eclass */
		restrictinfo->left_ec = ec1;
		restrictinfo->right_ec = ec1;
327 328 329
		/* mark the RI as usable with this pair of EMs */
		restrictinfo->left_em = em1;
		restrictinfo->right_em = em2;
330 331 332 333
	}
	else if (ec2)
	{
		/* Case 3: add item1 to ec2 */
334 335
		em1 = add_eq_member(ec2, item1, item1_relids, item1_nullable_relids,
							false, item1_type);
336 337
		ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo);
		ec2->ec_below_outer_join |= below_outer_join;
338 339 340
		/* mark the RI as associated with this eclass */
		restrictinfo->left_ec = ec2;
		restrictinfo->right_ec = ec2;
341 342 343
		/* mark the RI as usable with this pair of EMs */
		restrictinfo->left_em = em1;
		restrictinfo->right_em = em2;
344 345 346 347 348 349 350
	}
	else
	{
		/* Case 4: make a new, two-entry EC */
		EquivalenceClass *ec = makeNode(EquivalenceClass);

		ec->ec_opfamilies = opfamilies;
351
		ec->ec_collation = collation;
352 353
		ec->ec_members = NIL;
		ec->ec_sources = list_make1(restrictinfo);
354
		ec->ec_derives = NIL;
355 356 357 358 359
		ec->ec_relids = NULL;
		ec->ec_has_const = false;
		ec->ec_has_volatile = false;
		ec->ec_below_outer_join = below_outer_join;
		ec->ec_broken = false;
360
		ec->ec_sortref = 0;
361
		ec->ec_merged = NULL;
362 363 364 365
		em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids,
							false, item1_type);
		em2 = add_eq_member(ec, item2, item2_relids, item2_nullable_relids,
							false, item2_type);
366 367

		root->eq_classes = lappend(root->eq_classes, ec);
368

369 370 371
		/* mark the RI as associated with this eclass */
		restrictinfo->left_ec = ec;
		restrictinfo->right_ec = ec;
372 373 374
		/* mark the RI as usable with this pair of EMs */
		restrictinfo->left_em = em1;
		restrictinfo->right_em = em2;
375 376 377 378 379
	}

	return true;
}

380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
/*
 * canonicalize_ec_expression
 *
 * This function ensures that the expression exposes the expected type and
 * collation, so that it will be equal() to other equivalence-class expressions
 * that it ought to be equal() to.
 *
 * The rule for datatypes is that the exposed type should match what it would
 * be for an input to an operator of the EC's opfamilies; which is usually
 * the declared input type of the operator, but in the case of polymorphic
 * operators no relabeling is wanted (compare the behavior of parse_coerce.c).
 * Expressions coming in from quals will generally have the right type
 * already, but expressions coming from indexkeys may not (because they are
 * represented without any explicit relabel in pg_index), and the same problem
 * occurs for sort expressions (because the parser is likewise cavalier about
 * putting relabels on them).  Such cases will be binary-compatible with the
 * real operators, so adding a RelabelType is sufficient.
 *
 * Also, the expression's exposed collation must match the EC's collation.
 * This is important because in comparisons like "foo < bar COLLATE baz",
 * only one of the expressions has the correct exposed collation as we receive
401
 * it from the parser.	Forcing both of them to have it ensures that all
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
 * variant spellings of such a construct behave the same.  Again, we can
 * stick on a RelabelType to force the right exposed collation.  (It might
 * work to not label the collation at all in EC members, but this is risky
 * since some parts of the system expect exprCollation() to deliver the
 * right answer for a sort key.)
 *
 * Note this code assumes that the expression has already been through
 * eval_const_expressions, so there are no CollateExprs and no redundant
 * RelabelTypes.
 */
Expr *
canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation)
{
	Oid			expr_type = exprType((Node *) expr);

	/*
	 * For a polymorphic-input-type opclass, just keep the same exposed type.
	 */
	if (IsPolymorphicType(req_type))
		req_type = expr_type;

	/*
	 * No work if the expression exposes the right type/collation already.
	 */
	if (expr_type != req_type ||
		exprCollation((Node *) expr) != req_collation)
	{
		/*
430 431
		 * Strip any existing RelabelType, then add a new one if needed. This
		 * is to preserve the invariant of no redundant RelabelTypes.
432 433 434
		 *
		 * If we have to change the exposed type of the stripped expression,
		 * set typmod to -1 (since the new type may not have the same typmod
435 436
		 * interpretation).  If we only have to change collation, preserve the
		 * exposed typmod.
437 438 439 440 441 442 443 444 445
		 */
		while (expr && IsA(expr, RelabelType))
			expr = (Expr *) ((RelabelType *) expr)->arg;

		if (exprType((Node *) expr) != req_type)
			expr = (Expr *) makeRelabelType(expr,
											req_type,
											-1,
											req_collation,
T
Tom Lane 已提交
446
											COERCE_IMPLICIT_CAST);
447 448 449 450 451
		else if (exprCollation((Node *) expr) != req_collation)
			expr = (Expr *) makeRelabelType(expr,
											req_type,
											exprTypmod((Node *) expr),
											req_collation,
T
Tom Lane 已提交
452
											COERCE_IMPLICIT_CAST);
453 454 455 456 457
	}

	return expr;
}

458 459 460
/*
 * add_eq_member - build a new EquivalenceMember and add it to an EC
 */
461
static EquivalenceMember *
462
add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
463
			  Relids nullable_relids, bool is_child, Oid datatype)
464 465 466 467 468
{
	EquivalenceMember *em = makeNode(EquivalenceMember);

	em->em_expr = expr;
	em->em_relids = relids;
469
	em->em_nullable_relids = nullable_relids;
470 471 472 473 474 475 476
	em->em_is_const = false;
	em->em_is_child = is_child;
	em->em_datatype = datatype;

	if (bms_is_empty(relids))
	{
		/*
B
Bruce Momjian 已提交
477 478 479 480
		 * No Vars, assume it's a pseudoconstant.  This is correct for entries
		 * generated from process_equivalence(), because a WHERE clause can't
		 * contain aggregates or SRFs, and non-volatility was checked before
		 * process_equivalence() ever got called.  But
481 482 483 484 485 486 487 488 489 490 491 492 493
		 * get_eclass_for_sort_expr() has to work harder.  We put the tests
		 * there not here to save cycles in the equivalence case.
		 */
		Assert(!is_child);
		em->em_is_const = true;
		ec->ec_has_const = true;
		/* it can't affect ec_relids */
	}
	else if (!is_child)			/* child members don't add to ec_relids */
	{
		ec->ec_relids = bms_add_members(ec->ec_relids, relids);
	}
	ec->ec_members = lappend(ec->ec_members, em);
494 495

	return em;
496 497 498 499 500
}


/*
 * get_eclass_for_sort_expr
501 502 503
 *	  Given an expression and opfamily/collation info, find an existing
 *	  equivalence class it is a member of; if none, optionally build a new
 *	  single-member EquivalenceClass for it.
504
 *
505
 * sortref is the SortGroupRef of the originating SortGroupClause, if any,
B
Bruce Momjian 已提交
506
 * or zero if not.	(It should never be zero if the expression is volatile!)
507
 *
508 509
 * If rel is not NULL, it identifies a specific relation we're considering
 * a path for, and indicates that child EC members for that relation can be
510
 * considered.	Otherwise child members are ignored.  (Note: since child EC
511 512 513
 * members aren't guaranteed unique, a non-NULL value means that there could
 * be more than one EC that matches the expression; if so it's order-dependent
 * which one you get.  This is annoying but it only happens in corner cases,
514
 * so for now we live with just reporting the first match.	See also
515 516
 * generate_implied_equalities_for_indexcol and match_pathkeys_to_index.)
 *
517 518 519
 * If create_it is TRUE, we'll build a new EquivalenceClass when there is no
 * match.  If create_it is FALSE, we just return NULL when no match.
 *
520 521
 * This can be used safely both before and after EquivalenceClass merging;
 * since it never causes merging it does not invalidate any existing ECs
522 523
 * or PathKeys.  However, ECs added after path generation has begun are
 * of limited usefulness, so usually it's best to create them beforehand.
524 525 526 527 528 529 530 531 532
 *
 * Note: opfamilies must be chosen consistently with the way
 * process_equivalence() would do; that is, generated from a mergejoinable
 * equality operator.  Else we might fail to detect valid equivalences,
 * generating poor (but not incorrect) plans.
 */
EquivalenceClass *
get_eclass_for_sort_expr(PlannerInfo *root,
						 Expr *expr,
533
						 List *opfamilies,
534 535
						 Oid opcintype,
						 Oid collation,
536
						 Index sortref,
537
						 Relids rel,
538
						 bool create_it)
539 540
{
	EquivalenceClass *newec;
541
	EquivalenceMember *newem;
542 543 544
	ListCell   *lc1;
	MemoryContext oldcontext;

545 546 547 548 549
	/*
	 * Ensure the expression exposes the correct type and collation.
	 */
	expr = canonicalize_ec_expression(expr, opcintype, collation);

550 551 552 553 554 555 556 557
	/*
	 * Scan through the existing EquivalenceClasses for a match
	 */
	foreach(lc1, root->eq_classes)
	{
		EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
		ListCell   *lc2;

558 559 560 561 562 563
		/*
		 * Never match to a volatile EC, except when we are looking at another
		 * reference to the same volatile SortGroupClause.
		 */
		if (cur_ec->ec_has_volatile &&
			(sortref == 0 || sortref != cur_ec->ec_sortref))
564
			continue;
565

566 567
		if (collation != cur_ec->ec_collation)
			continue;
568 569 570 571 572 573 574
		if (!equal(opfamilies, cur_ec->ec_opfamilies))
			continue;

		foreach(lc2, cur_ec->ec_members)
		{
			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);

575 576 577 578 579 580 581
			/*
			 * Ignore child members unless they match the request.
			 */
			if (cur_em->em_is_child &&
				!bms_equal(cur_em->em_relids, rel))
				continue;

582
			/*
B
Bruce Momjian 已提交
583 584
			 * If below an outer join, don't match constants: they're not as
			 * constant as they look.
585 586 587 588 589
			 */
			if (cur_ec->ec_below_outer_join &&
				cur_em->em_is_const)
				continue;

590
			if (opcintype == cur_em->em_datatype &&
591
				equal(expr, cur_em->em_expr))
B
Bruce Momjian 已提交
592
				return cur_ec;	/* Match! */
593 594 595
		}
	}

596 597 598 599
	/* No match; does caller want a NULL result? */
	if (!create_it)
		return NULL;

600
	/*
601
	 * OK, build a new single-member EC
602
	 *
603
	 * Here, we must be sure that we construct the EC in the right context.
604 605 606 607 608
	 */
	oldcontext = MemoryContextSwitchTo(root->planner_cxt);

	newec = makeNode(EquivalenceClass);
	newec->ec_opfamilies = list_copy(opfamilies);
609
	newec->ec_collation = collation;
610 611
	newec->ec_members = NIL;
	newec->ec_sources = NIL;
612
	newec->ec_derives = NIL;
613 614 615 616 617
	newec->ec_relids = NULL;
	newec->ec_has_const = false;
	newec->ec_has_volatile = contain_volatile_functions((Node *) expr);
	newec->ec_below_outer_join = false;
	newec->ec_broken = false;
618
	newec->ec_sortref = sortref;
619
	newec->ec_merged = NULL;
620

B
Bruce Momjian 已提交
621
	if (newec->ec_has_volatile && sortref == 0) /* should not happen */
622 623
		elog(ERROR, "volatile EquivalenceClass has no sortref");

624
	newem = add_eq_member(newec, copyObject(expr), pull_varnos((Node *) expr),
625
						  NULL, false, opcintype);
626 627

	/*
628
	 * add_eq_member doesn't check for volatile functions, set-returning
629 630 631
	 * functions, aggregates, or window functions, but such could appear in
	 * sort expressions; so we have to check whether its const-marking was
	 * correct.
632 633 634
	 */
	if (newec->ec_has_const)
	{
635 636
		if (newec->ec_has_volatile ||
			expression_returns_set((Node *) expr) ||
T
Tom Lane 已提交
637 638
			contain_agg_clause((Node *) expr) ||
			contain_window_function((Node *) expr))
639 640
		{
			newec->ec_has_const = false;
641
			newem->em_is_const = false;
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
		}
	}

	root->eq_classes = lappend(root->eq_classes, newec);

	MemoryContextSwitchTo(oldcontext);

	return newec;
}


/*
 * generate_base_implied_equalities
 *	  Generate any restriction clauses that we can deduce from equivalence
 *	  classes.
 *
 * When an EC contains pseudoconstants, our strategy is to generate
 * "member = const1" clauses where const1 is the first constant member, for
B
Bruce Momjian 已提交
660
 * every other member (including other constants).	If we are able to do this
661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
 * then we don't need any "var = var" comparisons because we've successfully
 * constrained all the vars at their points of creation.  If we fail to
 * generate any of these clauses due to lack of cross-type operators, we fall
 * back to the "ec_broken" strategy described below.  (XXX if there are
 * multiple constants of different types, it's possible that we might succeed
 * in forming all the required clauses if we started from a different const
 * member; but this seems a sufficiently hokey corner case to not be worth
 * spending lots of cycles on.)
 *
 * For ECs that contain no pseudoconstants, we generate derived clauses
 * "member1 = member2" for each pair of members belonging to the same base
 * relation (actually, if there are more than two for the same base relation,
 * we only need enough clauses to link each to each other).  This provides
 * the base case for the recursion: each row emitted by a base relation scan
 * will constrain all computable members of the EC to be equal.  As each
 * join path is formed, we'll add additional derived clauses on-the-fly
 * to maintain this invariant (see generate_join_implied_equalities).
 *
 * If the opfamilies used by the EC do not provide complete sets of cross-type
 * equality operators, it is possible that we will fail to generate a clause
 * that must be generated to maintain the invariant.  (An example: given
 * "WHERE a.x = b.y AND b.y = a.z", the scheme breaks down if we cannot
 * generate "a.x = a.z" as a restriction clause for A.)  In this case we mark
 * the EC "ec_broken" and fall back to regurgitating its original source
B
Bruce Momjian 已提交
685
 * RestrictInfos at appropriate times.	We do not try to retract any derived
686 687 688 689 690 691 692 693
 * clauses already generated from the broken EC, so the resulting plan could
 * be poor due to bad selectivity estimates caused by redundant clauses.  But
 * the correct solution to that is to fix the opfamilies ...
 *
 * Equality clauses derived by this function are passed off to
 * process_implied_equality (in plan/initsplan.c) to be inserted into the
 * restrictinfo datastructures.  Note that this must be called after initial
 * scanning of the quals and before Path construction begins.
694 695 696 697 698 699
 *
 * We make no attempt to avoid generating duplicate RestrictInfos here: we
 * don't search ec_sources for matches, nor put the created RestrictInfos
 * into ec_derives.  Doing so would require some slightly ugly changes in
 * initsplan.c's API, and there's no real advantage, because the clauses
 * generated here can't duplicate anything we will generate for joins anyway.
700 701 702 703 704 705 706 707 708 709 710
 */
void
generate_base_implied_equalities(PlannerInfo *root)
{
	ListCell   *lc;
	Index		rti;

	foreach(lc, root->eq_classes)
	{
		EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc);

B
Bruce Momjian 已提交
711 712
		Assert(ec->ec_merged == NULL);	/* else shouldn't be in list */
		Assert(!ec->ec_broken); /* not yet anyway... */
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728

		/* Single-member ECs won't generate any deductions */
		if (list_length(ec->ec_members) <= 1)
			continue;

		if (ec->ec_has_const)
			generate_base_implied_equalities_const(root, ec);
		else
			generate_base_implied_equalities_no_const(root, ec);

		/* Recover if we failed to generate required derived clauses */
		if (ec->ec_broken)
			generate_base_implied_equalities_broken(root, ec);
	}

	/*
B
Bruce Momjian 已提交
729 730
	 * This is also a handy place to mark base rels (which should all exist by
	 * now) with flags showing whether they have pending eclass joins.
731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
	 */
	for (rti = 1; rti < root->simple_rel_array_size; rti++)
	{
		RelOptInfo *brel = root->simple_rel_array[rti];

		if (brel == NULL)
			continue;

		brel->has_eclass_joins = has_relevant_eclass_joinclause(root, brel);
	}
}

/*
 * generate_base_implied_equalities when EC contains pseudoconstant(s)
 */
static void
generate_base_implied_equalities_const(PlannerInfo *root,
748
									   EquivalenceClass *ec)
749 750 751 752
{
	EquivalenceMember *const_em = NULL;
	ListCell   *lc;

753
	/*
754 755 756 757 758
	 * In the trivial case where we just had one "var = const" clause, push
	 * the original clause back into the main planner machinery.  There is
	 * nothing to be gained by doing it differently, and we save the effort to
	 * re-build and re-analyze an equality clause that will be exactly
	 * equivalent to the old one.
759 760 761 762 763 764 765 766 767 768 769 770 771
	 */
	if (list_length(ec->ec_members) == 2 &&
		list_length(ec->ec_sources) == 1)
	{
		RestrictInfo *restrictinfo = (RestrictInfo *) linitial(ec->ec_sources);

		if (bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE)
		{
			distribute_restrictinfo_to_rels(root, restrictinfo);
			return;
		}
	}

772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
	/* Find the constant member to use */
	foreach(lc, ec->ec_members)
	{
		EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);

		if (cur_em->em_is_const)
		{
			const_em = cur_em;
			break;
		}
	}
	Assert(const_em != NULL);

	/* Generate a derived equality against each other member */
	foreach(lc, ec->ec_members)
	{
		EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
		Oid			eq_op;

B
Bruce Momjian 已提交
791
		Assert(!cur_em->em_is_child);	/* no children yet */
792 793 794 795 796 797 798 799 800 801 802
		if (cur_em == const_em)
			continue;
		eq_op = select_equality_operator(ec,
										 cur_em->em_datatype,
										 const_em->em_datatype);
		if (!OidIsValid(eq_op))
		{
			/* failed... */
			ec->ec_broken = true;
			break;
		}
803
		process_implied_equality(root, eq_op, ec->ec_collation,
804
								 cur_em->em_expr, const_em->em_expr,
805 806 807
								 bms_copy(ec->ec_relids),
								 bms_union(cur_em->em_nullable_relids,
										   const_em->em_nullable_relids),
808 809 810 811 812 813 814 815 816 817
								 ec->ec_below_outer_join,
								 cur_em->em_is_const);
	}
}

/*
 * generate_base_implied_equalities when EC contains no pseudoconstants
 */
static void
generate_base_implied_equalities_no_const(PlannerInfo *root,
818
										  EquivalenceClass *ec)
819 820 821 822 823 824 825 826
{
	EquivalenceMember **prev_ems;
	ListCell   *lc;

	/*
	 * We scan the EC members once and track the last-seen member for each
	 * base relation.  When we see another member of the same base relation,
	 * we generate "prev_mem = cur_mem".  This results in the minimum number
B
Bruce Momjian 已提交
827 828 829 830
	 * of derived clauses, but it's possible that it will fail when a
	 * different ordering would succeed.  XXX FIXME: use a UNION-FIND
	 * algorithm similar to the way we build merged ECs.  (Use a list-of-lists
	 * for each rel.)
831 832 833 834 835 836 837 838 839
	 */
	prev_ems = (EquivalenceMember **)
		palloc0(root->simple_rel_array_size * sizeof(EquivalenceMember *));

	foreach(lc, ec->ec_members)
	{
		EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
		int			relid;

B
Bruce Momjian 已提交
840
		Assert(!cur_em->em_is_child);	/* no children yet */
841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
		if (bms_membership(cur_em->em_relids) != BMS_SINGLETON)
			continue;
		relid = bms_singleton_member(cur_em->em_relids);
		Assert(relid < root->simple_rel_array_size);

		if (prev_ems[relid] != NULL)
		{
			EquivalenceMember *prev_em = prev_ems[relid];
			Oid			eq_op;

			eq_op = select_equality_operator(ec,
											 prev_em->em_datatype,
											 cur_em->em_datatype);
			if (!OidIsValid(eq_op))
			{
				/* failed... */
				ec->ec_broken = true;
				break;
			}
860
			process_implied_equality(root, eq_op, ec->ec_collation,
861
									 prev_em->em_expr, cur_em->em_expr,
862 863 864
									 bms_copy(ec->ec_relids),
									 bms_union(prev_em->em_nullable_relids,
											   cur_em->em_nullable_relids),
865 866 867 868 869 870 871 872 873
									 ec->ec_below_outer_join,
									 false);
		}
		prev_ems[relid] = cur_em;
	}

	pfree(prev_ems);

	/*
B
Bruce Momjian 已提交
874 875 876 877 878 879
	 * We also have to make sure that all the Vars used in the member clauses
	 * will be available at any join node we might try to reference them at.
	 * For the moment we force all the Vars to be available at all join nodes
	 * for this eclass.  Perhaps this could be improved by doing some
	 * pre-analysis of which members we prefer to join, but it's no worse than
	 * what happened in the pre-8.3 code.
880 881 882 883
	 */
	foreach(lc, ec->ec_members)
	{
		EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
884
		List	   *vars = pull_var_clause((Node *) cur_em->em_expr,
885
										   PVC_RECURSE_AGGREGATES,
886
										   PVC_INCLUDE_PLACEHOLDERS);
887

888
		add_vars_to_targetlist(root, vars, ec->ec_relids, false);
889 890 891 892 893 894 895 896 897 898 899
		list_free(vars);
	}
}

/*
 * generate_base_implied_equalities cleanup after failure
 *
 * What we must do here is push any zero- or one-relation source RestrictInfos
 * of the EC back into the main restrictinfo datastructures.  Multi-relation
 * clauses will be regurgitated later by generate_join_implied_equalities().
 * (We do it this way to maintain continuity with the case that ec_broken
900 901 902 903 904 905
 * becomes set only after we've gone up a join level or two.)  However, for
 * an EC that contains constants, we can adopt a simpler strategy and just
 * throw back all the source RestrictInfos immediately; that works because
 * we know that such an EC can't become broken later.  (This rule justifies
 * ignoring ec_has_const ECs in generate_join_implied_equalities, even when
 * they are broken.)
906 907 908
 */
static void
generate_base_implied_equalities_broken(PlannerInfo *root,
909
										EquivalenceClass *ec)
910 911 912 913 914 915 916
{
	ListCell   *lc;

	foreach(lc, ec->ec_sources)
	{
		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);

917 918
		if (ec->ec_has_const ||
			bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE)
919 920 921 922 923 924 925 926 927 928 929 930 931
			distribute_restrictinfo_to_rels(root, restrictinfo);
	}
}


/*
 * generate_join_implied_equalities
 *	  Generate any join clauses that we can deduce from equivalence classes.
 *
 * At a join node, we must enforce restriction clauses sufficient to ensure
 * that all equivalence-class members computable at that node are equal.
 * Since the set of clauses to enforce can vary depending on which subset
 * relations are the inputs, we have to compute this afresh for each join
932 933 934 935 936 937 938 939 940 941
 * relation pair.  Hence a fresh List of RestrictInfo nodes is built and
 * passed back on each call.
 *
 * In addition to its use at join nodes, this can be applied to generate
 * eclass-based join clauses for use in a parameterized scan of a base rel.
 * The reason for the asymmetry of specifying the inner rel as a RelOptInfo
 * and the outer rel by Relids is that this usage occurs before we have
 * built any join RelOptInfos.
 *
 * An annoying special case for parameterized scans is that the inner rel can
942 943
 * be an appendrel child (an "other rel").	In this case we must generate
 * appropriate clauses using child EC members.	add_child_rel_equivalences
944
 * must already have been done for the child rel.
945 946 947
 *
 * The results are sufficient for use in merge, hash, and plain nestloop join
 * methods.  We do not worry here about selecting clauses that are optimal
948 949 950 951
 * for use in a parameterized indexscan.  indxpath.c makes its own selections
 * of clauses to use, and if the ones we pick here are redundant with those,
 * the extras will be eliminated at createplan time, using the parent_ec
 * markers that we provide (see is_redundant_derived_clause()).
952 953 954 955 956
 *
 * Because the same join clauses are likely to be needed multiple times as
 * we consider different join paths, we avoid generating multiple copies:
 * whenever we select a particular pair of EquivalenceMembers to join,
 * we check to see if the pair matches any original clause (in ec_sources)
B
Bruce Momjian 已提交
957
 * or previously-built clause (in ec_derives).	This saves memory and allows
958
 * re-use of information cached in RestrictInfos.
959 960 961 962
 *
 * join_relids should always equal bms_union(outer_relids, inner_rel->relids).
 * We could simplify this function's API by computing it internally, but in
 * all current uses, the caller has the value at hand anyway.
963 964 965
 */
List *
generate_join_implied_equalities(PlannerInfo *root,
966 967
								 Relids join_relids,
								 Relids outer_relids,
968 969 970
								 RelOptInfo *inner_rel)
{
	List	   *result = NIL;
971 972 973 974
	Relids		inner_relids = inner_rel->relids;
	Relids		nominal_inner_relids;
	Relids		nominal_join_relids;
	AppendRelInfo *inner_appinfo;
975 976
	ListCell   *lc;

977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993
	/* If inner rel is a child, extra setup work is needed */
	if (inner_rel->reloptkind == RELOPT_OTHER_MEMBER_REL)
	{
		/* Lookup parent->child translation data */
		inner_appinfo = find_childrel_appendrelinfo(root, inner_rel);
		/* Construct relids for the parent rel */
		nominal_inner_relids = bms_make_singleton(inner_appinfo->parent_relid);
		/* ECs will be marked with the parent's relid, not the child's */
		nominal_join_relids = bms_union(outer_relids, nominal_inner_relids);
	}
	else
	{
		inner_appinfo = NULL;
		nominal_inner_relids = inner_relids;
		nominal_join_relids = join_relids;
	}

994 995 996
	foreach(lc, root->eq_classes)
	{
		EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc);
B
Bruce Momjian 已提交
997
		List	   *sublist = NIL;
998 999 1000 1001 1002 1003 1004 1005 1006 1007

		/* ECs containing consts do not need any further enforcement */
		if (ec->ec_has_const)
			continue;

		/* Single-member ECs won't generate any deductions */
		if (list_length(ec->ec_members) <= 1)
			continue;

		/* We can quickly ignore any that don't overlap the join, too */
1008
		if (!bms_overlap(ec->ec_relids, nominal_join_relids))
1009 1010 1011 1012 1013
			continue;

		if (!ec->ec_broken)
			sublist = generate_join_implied_equalities_normal(root,
															  ec,
1014 1015 1016
															  join_relids,
															  outer_relids,
															  inner_relids);
1017 1018 1019 1020 1021

		/* Recover if we failed to generate required derived clauses */
		if (ec->ec_broken)
			sublist = generate_join_implied_equalities_broken(root,
															  ec,
1022
														 nominal_join_relids,
1023
															  outer_relids,
1024
														nominal_inner_relids,
1025
															  inner_appinfo);
1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037

		result = list_concat(result, sublist);
	}

	return result;
}

/*
 * generate_join_implied_equalities for a still-valid EC
 */
static List *
generate_join_implied_equalities_normal(PlannerInfo *root,
1038
										EquivalenceClass *ec,
1039 1040 1041
										Relids join_relids,
										Relids outer_relids,
										Relids inner_relids)
1042 1043 1044 1045 1046 1047 1048 1049
{
	List	   *result = NIL;
	List	   *new_members = NIL;
	List	   *outer_members = NIL;
	List	   *inner_members = NIL;
	ListCell   *lc1;

	/*
B
Bruce Momjian 已提交
1050 1051 1052 1053 1054 1055 1056
	 * First, scan the EC to identify member values that are computable at the
	 * outer rel, at the inner rel, or at this relation but not in either
	 * input rel.  The outer-rel members should already be enforced equal,
	 * likewise for the inner-rel members.	We'll need to create clauses to
	 * enforce that any newly computable members are all equal to each other
	 * as well as to at least one input member, plus enforce at least one
	 * outer-rel member equal to at least one inner-rel member.
1057 1058 1059 1060 1061
	 */
	foreach(lc1, ec->ec_members)
	{
		EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1);

1062 1063 1064 1065 1066 1067 1068
		/*
		 * We don't need to check explicitly for child EC members.  This test
		 * against join_relids will cause them to be ignored except when
		 * considering a child inner rel, which is what we want.
		 */
		if (!bms_is_subset(cur_em->em_relids, join_relids))
			continue;			/* not computable yet, or wrong child */
1069

1070
		if (bms_is_subset(cur_em->em_relids, outer_relids))
1071
			outer_members = lappend(outer_members, cur_em);
1072
		else if (bms_is_subset(cur_em->em_relids, inner_relids))
1073 1074 1075 1076 1077 1078
			inner_members = lappend(inner_members, cur_em);
		else
			new_members = lappend(new_members, cur_em);
	}

	/*
B
Bruce Momjian 已提交
1079
	 * First, select the joinclause if needed.	We can equate any one outer
1080
	 * member to any one inner member, but we have to find a datatype
B
Bruce Momjian 已提交
1081 1082 1083
	 * combination for which an opfamily member operator exists.  If we have
	 * choices, we prefer simple Var members (possibly with RelabelType) since
	 * these are (a) cheapest to compute at runtime and (b) most likely to
1084 1085
	 * have useful statistics. Also, prefer operators that are also
	 * hashjoinable.
1086 1087 1088 1089 1090
	 */
	if (outer_members && inner_members)
	{
		EquivalenceMember *best_outer_em = NULL;
		EquivalenceMember *best_inner_em = NULL;
B
Bruce Momjian 已提交
1091 1092
		Oid			best_eq_op = InvalidOid;
		int			best_score = -1;
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
		RestrictInfo *rinfo;

		foreach(lc1, outer_members)
		{
			EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc1);
			ListCell   *lc2;

			foreach(lc2, inner_members)
			{
				EquivalenceMember *inner_em = (EquivalenceMember *) lfirst(lc2);
B
Bruce Momjian 已提交
1103 1104
				Oid			eq_op;
				int			score;
1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119

				eq_op = select_equality_operator(ec,
												 outer_em->em_datatype,
												 inner_em->em_datatype);
				if (!OidIsValid(eq_op))
					continue;
				score = 0;
				if (IsA(outer_em->em_expr, Var) ||
					(IsA(outer_em->em_expr, RelabelType) &&
					 IsA(((RelabelType *) outer_em->em_expr)->arg, Var)))
					score++;
				if (IsA(inner_em->em_expr, Var) ||
					(IsA(inner_em->em_expr, RelabelType) &&
					 IsA(((RelabelType *) inner_em->em_expr)->arg, Var)))
					score++;
1120
				if (op_hashjoinable(eq_op,
T
Tom Lane 已提交
1121
									exprType((Node *) outer_em->em_expr)))
1122 1123 1124 1125 1126 1127 1128 1129
					score++;
				if (score > best_score)
				{
					best_outer_em = outer_em;
					best_inner_em = inner_em;
					best_eq_op = eq_op;
					best_score = score;
					if (best_score == 3)
B
Bruce Momjian 已提交
1130
						break;	/* no need to look further */
1131 1132 1133
				}
			}
			if (best_score == 3)
B
Bruce Momjian 已提交
1134
				break;			/* no need to look further */
1135 1136 1137 1138 1139 1140 1141 1142
		}
		if (best_score < 0)
		{
			/* failed... */
			ec->ec_broken = true;
			return NIL;
		}

1143 1144 1145 1146 1147 1148 1149
		/*
		 * Create clause, setting parent_ec to mark it as redundant with other
		 * joinclauses
		 */
		rinfo = create_join_clause(root, ec, best_eq_op,
								   best_outer_em, best_inner_em,
								   ec);
1150 1151 1152 1153 1154 1155 1156 1157 1158

		result = lappend(result, rinfo);
	}

	/*
	 * Now deal with building restrictions for any expressions that involve
	 * Vars from both sides of the join.  We have to equate all of these to
	 * each other as well as to at least one old member (if any).
	 *
B
Bruce Momjian 已提交
1159 1160
	 * XXX as in generate_base_implied_equalities_no_const, we could be a lot
	 * smarter here to avoid unnecessary failures in cross-type situations.
1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190
	 * For now, use the same left-to-right method used there.
	 */
	if (new_members)
	{
		List	   *old_members = list_concat(outer_members, inner_members);
		EquivalenceMember *prev_em = NULL;
		RestrictInfo *rinfo;

		/* For now, arbitrarily take the first old_member as the one to use */
		if (old_members)
			new_members = lappend(new_members, linitial(old_members));

		foreach(lc1, new_members)
		{
			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1);

			if (prev_em != NULL)
			{
				Oid			eq_op;

				eq_op = select_equality_operator(ec,
												 prev_em->em_datatype,
												 cur_em->em_datatype);
				if (!OidIsValid(eq_op))
				{
					/* failed... */
					ec->ec_broken = true;
					return NIL;
				}
				/* do NOT set parent_ec, this qual is not redundant! */
1191 1192 1193
				rinfo = create_join_clause(root, ec, eq_op,
										   prev_em, cur_em,
										   NULL);
1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207

				result = lappend(result, rinfo);
			}
			prev_em = cur_em;
		}
	}

	return result;
}

/*
 * generate_join_implied_equalities cleanup after failure
 *
 * Return any original RestrictInfos that are enforceable at this join.
1208 1209 1210
 *
 * In the case of a child inner relation, we have to translate the
 * original RestrictInfos from parent to child Vars.
1211 1212 1213
 */
static List *
generate_join_implied_equalities_broken(PlannerInfo *root,
1214
										EquivalenceClass *ec,
1215 1216 1217 1218
										Relids nominal_join_relids,
										Relids outer_relids,
										Relids nominal_inner_relids,
										AppendRelInfo *inner_appinfo)
1219 1220 1221 1222 1223 1224 1225
{
	List	   *result = NIL;
	ListCell   *lc;

	foreach(lc, ec->ec_sources)
	{
		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1226
		Relids		clause_relids = restrictinfo->required_relids;
1227

1228 1229 1230
		if (bms_is_subset(clause_relids, nominal_join_relids) &&
			!bms_is_subset(clause_relids, outer_relids) &&
			!bms_is_subset(clause_relids, nominal_inner_relids))
1231 1232 1233
			result = lappend(result, restrictinfo);
	}

1234 1235 1236
	/*
	 * If we have to translate, just brute-force apply adjust_appendrel_attrs
	 * to all the RestrictInfos at once.  This will result in returning
1237 1238 1239
	 * RestrictInfos that are not listed in ec_derives, but there shouldn't be
	 * any duplication, and it's a sufficiently narrow corner case that we
	 * shouldn't sweat too much over it anyway.
1240 1241 1242 1243 1244
	 */
	if (inner_appinfo)
		result = (List *) adjust_appendrel_attrs(root, (Node *) result,
												 inner_appinfo);

1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255
	return result;
}


/*
 * select_equality_operator
 *	  Select a suitable equality operator for comparing two EC members
 *
 * Returns InvalidOid if no operator can be found for this datatype combination
 */
static Oid
1256
select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype)
1257 1258 1259 1260 1261
{
	ListCell   *lc;

	foreach(lc, ec->ec_opfamilies)
	{
B
Bruce Momjian 已提交
1262 1263
		Oid			opfamily = lfirst_oid(lc);
		Oid			opno;
1264 1265 1266 1267 1268 1269 1270 1271 1272 1273

		opno = get_opfamily_member(opfamily, lefttype, righttype,
								   BTEqualStrategyNumber);
		if (OidIsValid(opno))
			return opno;
	}
	return InvalidOid;
}


1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
/*
 * create_join_clause
 *	  Find or make a RestrictInfo comparing the two given EC members
 *	  with the given operator.
 *
 * parent_ec is either equal to ec (if the clause is a potentially-redundant
 * join clause) or NULL (if not).  We have to treat this as part of the
 * match requirements --- it's possible that a clause comparing the same two
 * EMs is a join clause in one join path and a restriction clause in another.
 */
static RestrictInfo *
create_join_clause(PlannerInfo *root,
1286 1287 1288 1289
				   EquivalenceClass *ec, Oid opno,
				   EquivalenceMember *leftem,
				   EquivalenceMember *rightem,
				   EquivalenceClass *parent_ec)
1290 1291 1292 1293 1294 1295 1296
{
	RestrictInfo *rinfo;
	ListCell   *lc;
	MemoryContext oldcontext;

	/*
	 * Search to see if we already built a RestrictInfo for this pair of
B
Bruce Momjian 已提交
1297 1298
	 * EquivalenceMembers.	We can use either original source clauses or
	 * previously-derived clauses.	The check on opno is probably redundant,
1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321
	 * but be safe ...
	 */
	foreach(lc, ec->ec_sources)
	{
		rinfo = (RestrictInfo *) lfirst(lc);
		if (rinfo->left_em == leftem &&
			rinfo->right_em == rightem &&
			rinfo->parent_ec == parent_ec &&
			opno == ((OpExpr *) rinfo->clause)->opno)
			return rinfo;
	}

	foreach(lc, ec->ec_derives)
	{
		rinfo = (RestrictInfo *) lfirst(lc);
		if (rinfo->left_em == leftem &&
			rinfo->right_em == rightem &&
			rinfo->parent_ec == parent_ec &&
			opno == ((OpExpr *) rinfo->clause)->opno)
			return rinfo;
	}

	/*
B
Bruce Momjian 已提交
1322 1323
	 * Not there, so build it, in planner context so we can re-use it. (Not
	 * important in normal planning, but definitely so in GEQO.)
1324 1325 1326 1327
	 */
	oldcontext = MemoryContextSwitchTo(root->planner_cxt);

	rinfo = build_implied_join_equality(opno,
1328
										ec->ec_collation,
1329 1330
										leftem->em_expr,
										rightem->em_expr,
1331
										bms_union(leftem->em_relids,
1332 1333 1334
												  rightem->em_relids),
										bms_union(leftem->em_nullable_relids,
											   rightem->em_nullable_relids));
1335 1336 1337 1338 1339

	/* Mark the clause as redundant, or not */
	rinfo->parent_ec = parent_ec;

	/*
1340 1341
	 * We know the correct values for left_ec/right_ec, ie this particular EC,
	 * so we can just set them directly instead of forcing another lookup.
1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357
	 */
	rinfo->left_ec = ec;
	rinfo->right_ec = ec;

	/* Mark it as usable with these EMs */
	rinfo->left_em = leftem;
	rinfo->right_em = rightem;
	/* and save it for possible re-use */
	ec->ec_derives = lappend(ec->ec_derives, rinfo);

	MemoryContextSwitchTo(oldcontext);

	return rinfo;
}


1358 1359 1360
/*
 * reconsider_outer_join_clauses
 *	  Re-examine any outer-join clauses that were set aside by
1361 1362 1363
 *	  distribute_qual_to_rels(), and see if we can derive any
 *	  EquivalenceClasses from them.  Then, if they were not made
 *	  redundant, push them out into the regular join-clause lists.
1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379
 *
 * When we have mergejoinable clauses A = B that are outer-join clauses,
 * we can't blindly combine them with other clauses A = C to deduce B = C,
 * since in fact the "equality" A = B won't necessarily hold above the
 * outer join (one of the variables might be NULL instead).  Nonetheless
 * there are cases where we can add qual clauses using transitivity.
 *
 * One case that we look for here is an outer-join clause OUTERVAR = INNERVAR
 * for which there is also an equivalence clause OUTERVAR = CONSTANT.
 * It is safe and useful to push a clause INNERVAR = CONSTANT into the
 * evaluation of the inner (nullable) relation, because any inner rows not
 * meeting this condition will not contribute to the outer-join result anyway.
 * (Any outer rows they could join to will be eliminated by the pushed-down
 * equivalence clause.)
 *
 * Note that the above rule does not work for full outer joins; nor is it
1380 1381
 * very interesting to consider cases where the generated equivalence clause
 * would involve relations outside the outer join, since such clauses couldn't
1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408
 * be pushed into the inner side's scan anyway.  So the restriction to
 * outervar = pseudoconstant is not really giving up anything.
 *
 * For full-join cases, we can only do something useful if it's a FULL JOIN
 * USING and a merged column has an equivalence MERGEDVAR = CONSTANT.
 * By the time it gets here, the merged column will look like
 *		COALESCE(LEFTVAR, RIGHTVAR)
 * and we will have a full-join clause LEFTVAR = RIGHTVAR that we can match
 * the COALESCE expression to. In this situation we can push LEFTVAR = CONSTANT
 * and RIGHTVAR = CONSTANT into the input relations, since any rows not
 * meeting these conditions cannot contribute to the join result.
 *
 * Again, there isn't any traction to be gained by trying to deal with
 * clauses comparing a mergedvar to a non-pseudoconstant.  So we can make
 * use of the EquivalenceClasses to search for matching variables that were
 * equivalenced to constants.  The interesting outer-join clauses were
 * accumulated for us by distribute_qual_to_rels.
 *
 * When we find one of these cases, we implement the changes we want by
 * generating a new equivalence clause INNERVAR = CONSTANT (or LEFTVAR, etc)
 * and pushing it into the EquivalenceClass structures.  This is because we
 * may already know that INNERVAR is equivalenced to some other var(s), and
 * we'd like the constant to propagate to them too.  Note that it would be
 * unsafe to merge any existing EC for INNERVAR with the OUTERVAR's EC ---
 * that could result in propagating constant restrictions from
 * INNERVAR to OUTERVAR, which would be very wrong.
 *
1409 1410 1411 1412 1413 1414
 * It's possible that the INNERVAR is also an OUTERVAR for some other
 * outer-join clause, in which case the process can be repeated.  So we repeat
 * looping over the lists of clauses until no further deductions can be made.
 * Whenever we do make a deduction, we remove the generating clause from the
 * lists, since we don't want to make the same deduction twice.
 *
1415 1416
 * If we don't find any match for a set-aside outer join clause, we must
 * throw it back into the regular joinclause processing by passing it to
1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428
 * distribute_restrictinfo_to_rels().  If we do generate a derived clause,
 * however, the outer-join clause is redundant.  We still throw it back,
 * because otherwise the join will be seen as a clauseless join and avoided
 * during join order searching; but we mark it as redundant to keep from
 * messing up the joinrel's size estimate.  (This behavior means that the
 * API for this routine is uselessly complex: we could have just put all
 * the clauses into the regular processing initially.  We keep it because
 * someday we might want to do something else, such as inserting "dummy"
 * joinclauses instead of real ones.)
 *
 * Outer join clauses that are marked outerjoin_delayed are special: this
 * condition means that one or both VARs might go to null due to a lower
1429
 * outer join.	We can still push a constant through the clause, but only
1430 1431 1432 1433 1434
 * if its operator is strict; and we *have to* throw the clause back into
 * regular joinclause processing.  By keeping the strict join clause,
 * we ensure that any null-extended rows that are mistakenly generated due
 * to suppressing rows not matching the constant will be rejected at the
 * upper outer join.  (This doesn't work for full-join clauses.)
1435 1436 1437 1438
 */
void
reconsider_outer_join_clauses(PlannerInfo *root)
{
1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463
	bool		found;
	ListCell   *cell;
	ListCell   *prev;
	ListCell   *next;

	/* Outer loop repeats until we find no more deductions */
	do
	{
		found = false;

		/* Process the LEFT JOIN clauses */
		prev = NULL;
		for (cell = list_head(root->left_join_clauses); cell; cell = next)
		{
			RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);

			next = lnext(cell);
			if (reconsider_outer_join_clause(root, rinfo, true))
			{
				found = true;
				/* remove it from the list */
				root->left_join_clauses =
					list_delete_cell(root->left_join_clauses, cell, prev);
				/* we throw it back anyway (see notes above) */
				/* but the thrown-back clause has no extra selectivity */
1464 1465
				rinfo->norm_selec = 2.0;
				rinfo->outer_selec = 1.0;
1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486
				distribute_restrictinfo_to_rels(root, rinfo);
			}
			else
				prev = cell;
		}

		/* Process the RIGHT JOIN clauses */
		prev = NULL;
		for (cell = list_head(root->right_join_clauses); cell; cell = next)
		{
			RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);

			next = lnext(cell);
			if (reconsider_outer_join_clause(root, rinfo, false))
			{
				found = true;
				/* remove it from the list */
				root->right_join_clauses =
					list_delete_cell(root->right_join_clauses, cell, prev);
				/* we throw it back anyway (see notes above) */
				/* but the thrown-back clause has no extra selectivity */
1487 1488
				rinfo->norm_selec = 2.0;
				rinfo->outer_selec = 1.0;
1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509
				distribute_restrictinfo_to_rels(root, rinfo);
			}
			else
				prev = cell;
		}

		/* Process the FULL JOIN clauses */
		prev = NULL;
		for (cell = list_head(root->full_join_clauses); cell; cell = next)
		{
			RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);

			next = lnext(cell);
			if (reconsider_full_join_clause(root, rinfo))
			{
				found = true;
				/* remove it from the list */
				root->full_join_clauses =
					list_delete_cell(root->full_join_clauses, cell, prev);
				/* we throw it back anyway (see notes above) */
				/* but the thrown-back clause has no extra selectivity */
1510 1511
				rinfo->norm_selec = 2.0;
				rinfo->outer_selec = 1.0;
1512 1513 1514 1515 1516 1517
				distribute_restrictinfo_to_rels(root, rinfo);
			}
			else
				prev = cell;
		}
	} while (found);
1518

1519 1520
	/* Now, any remaining clauses have to be thrown back */
	foreach(cell, root->left_join_clauses)
1521
	{
1522
		RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
1523

1524
		distribute_restrictinfo_to_rels(root, rinfo);
1525
	}
1526
	foreach(cell, root->right_join_clauses)
1527
	{
1528
		RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
1529

1530
		distribute_restrictinfo_to_rels(root, rinfo);
1531
	}
1532
	foreach(cell, root->full_join_clauses)
1533
	{
1534
		RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
1535

1536
		distribute_restrictinfo_to_rels(root, rinfo);
1537 1538 1539 1540 1541
	}
}

/*
 * reconsider_outer_join_clauses for a single LEFT/RIGHT JOIN clause
1542 1543
 *
 * Returns TRUE if we were able to propagate a constant through the clause.
1544
 */
1545
static bool
1546 1547 1548 1549 1550
reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo,
							 bool outer_on_left)
{
	Expr	   *outervar,
			   *innervar;
1551
	Oid			opno,
1552
				collation,
1553
				left_type,
1554 1555
				right_type,
				inner_datatype;
1556 1557
	Relids		inner_relids,
				inner_nullable_relids;
1558 1559 1560
	ListCell   *lc1;

	Assert(is_opclause(rinfo->clause));
1561
	opno = ((OpExpr *) rinfo->clause)->opno;
1562
	collation = ((OpExpr *) rinfo->clause)->inputcollid;
1563 1564 1565 1566 1567 1568 1569

	/* If clause is outerjoin_delayed, operator must be strict */
	if (rinfo->outerjoin_delayed && !op_strict(opno))
		return false;

	/* Extract needed info from the clause */
	op_input_types(opno, &left_type, &right_type);
1570 1571 1572 1573 1574
	if (outer_on_left)
	{
		outervar = (Expr *) get_leftop(rinfo->clause);
		innervar = (Expr *) get_rightop(rinfo->clause);
		inner_datatype = right_type;
1575
		inner_relids = rinfo->right_relids;
1576 1577 1578 1579 1580 1581
	}
	else
	{
		outervar = (Expr *) get_rightop(rinfo->clause);
		innervar = (Expr *) get_leftop(rinfo->clause);
		inner_datatype = left_type;
1582
		inner_relids = rinfo->left_relids;
1583
	}
1584 1585
	inner_nullable_relids = bms_intersect(inner_relids,
										  rinfo->nullable_relids);
1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599

	/* Scan EquivalenceClasses for a match to outervar */
	foreach(lc1, root->eq_classes)
	{
		EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
		bool		match;
		ListCell   *lc2;

		/* Ignore EC unless it contains pseudoconstants */
		if (!cur_ec->ec_has_const)
			continue;
		/* Never match to a volatile EC */
		if (cur_ec->ec_has_volatile)
			continue;
1600 1601 1602
		/* It has to match the outer-join clause as to semantics, too */
		if (collation != cur_ec->ec_collation)
			continue;
1603 1604 1605 1606 1607 1608 1609 1610
		if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies))
			continue;
		/* Does it contain a match to outervar? */
		match = false;
		foreach(lc2, cur_ec->ec_members)
		{
			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);

1611
			Assert(!cur_em->em_is_child);		/* no children yet */
1612 1613 1614 1615 1616 1617 1618 1619 1620 1621
			if (equal(outervar, cur_em->em_expr))
			{
				match = true;
				break;
			}
		}
		if (!match)
			continue;			/* no match, so ignore this EC */

		/*
B
Bruce Momjian 已提交
1622 1623 1624
		 * Yes it does!  Try to generate a clause INNERVAR = CONSTANT for each
		 * CONSTANT in the EC.	Note that we must succeed with at least one
		 * constant before we can decide to throw away the outer-join clause.
1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640
		 */
		match = false;
		foreach(lc2, cur_ec->ec_members)
		{
			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
			Oid			eq_op;
			RestrictInfo *newrinfo;

			if (!cur_em->em_is_const)
				continue;		/* ignore non-const members */
			eq_op = select_equality_operator(cur_ec,
											 inner_datatype,
											 cur_em->em_datatype);
			if (!OidIsValid(eq_op))
				continue;		/* can't generate equality */
			newrinfo = build_implied_join_equality(eq_op,
1641
												   cur_ec->ec_collation,
1642 1643
												   innervar,
												   cur_em->em_expr,
1644 1645
												   bms_copy(inner_relids),
											bms_copy(inner_nullable_relids));
1646 1647 1648 1649 1650
			if (process_equivalence(root, newrinfo, true))
				match = true;
		}

		/*
1651 1652 1653
		 * If we were able to equate INNERVAR to any constant, report success.
		 * Otherwise, fall out of the search loop, since we know the OUTERVAR
		 * appears in at most one EC.
1654 1655
		 */
		if (match)
1656
			return true;
1657 1658 1659 1660
		else
			break;
	}

1661
	return false;				/* failed to make any deduction */
1662 1663 1664 1665
}

/*
 * reconsider_outer_join_clauses for a single FULL JOIN clause
1666 1667
 *
 * Returns TRUE if we were able to propagate a constant through the clause.
1668
 */
1669
static bool
1670 1671 1672 1673
reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo)
{
	Expr	   *leftvar;
	Expr	   *rightvar;
1674
	Oid			opno,
1675
				collation,
1676
				left_type,
1677
				right_type;
1678
	Relids		left_relids,
1679 1680 1681
				right_relids,
				left_nullable_relids,
				right_nullable_relids;
1682 1683
	ListCell   *lc1;

1684 1685 1686 1687
	/* Can't use an outerjoin_delayed clause here */
	if (rinfo->outerjoin_delayed)
		return false;

1688 1689
	/* Extract needed info from the clause */
	Assert(is_opclause(rinfo->clause));
1690
	opno = ((OpExpr *) rinfo->clause)->opno;
1691
	collation = ((OpExpr *) rinfo->clause)->inputcollid;
1692
	op_input_types(opno, &left_type, &right_type);
1693 1694
	leftvar = (Expr *) get_leftop(rinfo->clause);
	rightvar = (Expr *) get_rightop(rinfo->clause);
1695 1696
	left_relids = rinfo->left_relids;
	right_relids = rinfo->right_relids;
1697 1698 1699 1700
	left_nullable_relids = bms_intersect(left_relids,
										 rinfo->nullable_relids);
	right_nullable_relids = bms_intersect(right_relids,
										  rinfo->nullable_relids);
1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716

	foreach(lc1, root->eq_classes)
	{
		EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
		EquivalenceMember *coal_em = NULL;
		bool		match;
		bool		matchleft;
		bool		matchright;
		ListCell   *lc2;

		/* Ignore EC unless it contains pseudoconstants */
		if (!cur_ec->ec_has_const)
			continue;
		/* Never match to a volatile EC */
		if (cur_ec->ec_has_volatile)
			continue;
1717 1718 1719
		/* It has to match the outer-join clause as to semantics, too */
		if (collation != cur_ec->ec_collation)
			continue;
1720 1721 1722 1723 1724 1725
		if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies))
			continue;

		/*
		 * Does it contain a COALESCE(leftvar, rightvar) construct?
		 *
B
Bruce Momjian 已提交
1726 1727 1728
		 * We can assume the COALESCE() inputs are in the same order as the
		 * join clause, since both were automatically generated in the cases
		 * we care about.
1729
		 *
B
Bruce Momjian 已提交
1730 1731 1732 1733 1734
		 * XXX currently this may fail to match in cross-type cases because
		 * the COALESCE will contain typecast operations while the join clause
		 * may not (if there is a cross-type mergejoin operator available for
		 * the two column types). Is it OK to strip implicit coercions from
		 * the COALESCE arguments?
1735 1736 1737 1738 1739
		 */
		match = false;
		foreach(lc2, cur_ec->ec_members)
		{
			coal_em = (EquivalenceMember *) lfirst(lc2);
1740
			Assert(!coal_em->em_is_child);		/* no children yet */
1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763
			if (IsA(coal_em->em_expr, CoalesceExpr))
			{
				CoalesceExpr *cexpr = (CoalesceExpr *) coal_em->em_expr;
				Node	   *cfirst;
				Node	   *csecond;

				if (list_length(cexpr->args) != 2)
					continue;
				cfirst = (Node *) linitial(cexpr->args);
				csecond = (Node *) lsecond(cexpr->args);

				if (equal(leftvar, cfirst) && equal(rightvar, csecond))
				{
					match = true;
					break;
				}
			}
		}
		if (!match)
			continue;			/* no match, so ignore this EC */

		/*
		 * Yes it does!  Try to generate clauses LEFTVAR = CONSTANT and
B
Bruce Momjian 已提交
1764 1765 1766
		 * RIGHTVAR = CONSTANT for each CONSTANT in the EC.  Note that we must
		 * succeed with at least one constant for each var before we can
		 * decide to throw away the outer-join clause.
1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782
		 */
		matchleft = matchright = false;
		foreach(lc2, cur_ec->ec_members)
		{
			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
			Oid			eq_op;
			RestrictInfo *newrinfo;

			if (!cur_em->em_is_const)
				continue;		/* ignore non-const members */
			eq_op = select_equality_operator(cur_ec,
											 left_type,
											 cur_em->em_datatype);
			if (OidIsValid(eq_op))
			{
				newrinfo = build_implied_join_equality(eq_op,
1783
													   cur_ec->ec_collation,
1784 1785
													   leftvar,
													   cur_em->em_expr,
1786 1787
													   bms_copy(left_relids),
											 bms_copy(left_nullable_relids));
1788 1789 1790 1791 1792 1793 1794 1795 1796
				if (process_equivalence(root, newrinfo, true))
					matchleft = true;
			}
			eq_op = select_equality_operator(cur_ec,
											 right_type,
											 cur_em->em_datatype);
			if (OidIsValid(eq_op))
			{
				newrinfo = build_implied_join_equality(eq_op,
1797
													   cur_ec->ec_collation,
1798 1799
													   rightvar,
													   cur_em->em_expr,
1800 1801
													   bms_copy(right_relids),
											bms_copy(right_nullable_relids));
1802 1803 1804 1805 1806 1807 1808
				if (process_equivalence(root, newrinfo, true))
					matchright = true;
			}
		}

		/*
		 * If we were able to equate both vars to constants, we're done, and
B
Bruce Momjian 已提交
1809 1810 1811 1812
		 * we can throw away the full-join clause as redundant.  Moreover, we
		 * can remove the COALESCE entry from the EC, since the added
		 * restrictions ensure it will always have the expected value. (We
		 * don't bother trying to update ec_relids or ec_sources.)
1813 1814 1815 1816
		 */
		if (matchleft && matchright)
		{
			cur_ec->ec_members = list_delete_ptr(cur_ec->ec_members, coal_em);
1817
			return true;
1818
		}
B
Bruce Momjian 已提交
1819

1820 1821 1822 1823 1824 1825 1826 1827
		/*
		 * Otherwise, fall out of the search loop, since we know the COALESCE
		 * appears in at most one EC (XXX might stop being true if we allow
		 * stripping of coercions above?)
		 */
		break;
	}

1828
	return false;				/* failed to make any deduction */
1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863
}


/*
 * exprs_known_equal
 *	  Detect whether two expressions are known equal due to equivalence
 *	  relationships.
 *
 * Actually, this only shows that the expressions are equal according
 * to some opfamily's notion of equality --- but we only use it for
 * selectivity estimation, so a fuzzy idea of equality is OK.
 *
 * Note: does not bother to check for "equal(item1, item2)"; caller must
 * check that case if it's possible to pass identical items.
 */
bool
exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
{
	ListCell   *lc1;

	foreach(lc1, root->eq_classes)
	{
		EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
		bool		item1member = false;
		bool		item2member = false;
		ListCell   *lc2;

		/* Never match to a volatile EC */
		if (ec->ec_has_volatile)
			continue;

		foreach(lc2, ec->ec_members)
		{
			EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);

1864 1865
			if (em->em_is_child)
				continue;		/* ignore children here */
1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880
			if (equal(item1, em->em_expr))
				item1member = true;
			else if (equal(item2, em->em_expr))
				item2member = true;
			/* Exit as soon as equality is proven */
			if (item1member && item2member)
				return true;
		}
	}
	return false;
}


/*
 * add_child_rel_equivalences
1881
 *	  Search for EC members that reference the parent_rel, and
1882 1883
 *	  add transformed members referencing the child_rel.
 *
1884 1885
 * Note that this function won't be called at all unless we have at least some
 * reason to believe that the EC members it generates will be useful.
1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903
 *
 * parent_rel and child_rel could be derived from appinfo, but since the
 * caller has already computed them, we might as well just pass them in.
 */
void
add_child_rel_equivalences(PlannerInfo *root,
						   AppendRelInfo *appinfo,
						   RelOptInfo *parent_rel,
						   RelOptInfo *child_rel)
{
	ListCell   *lc1;

	foreach(lc1, root->eq_classes)
	{
		EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
		ListCell   *lc2;

		/*
1904
		 * If this EC contains a volatile expression, then generating child
1905 1906
		 * EMs would be downright dangerous, so skip it.  We rely on a
		 * volatile EC having only one EM.
1907
		 */
1908
		if (cur_ec->ec_has_volatile)
1909 1910 1911 1912 1913 1914 1915 1916 1917 1918
			continue;

		/* No point in searching if parent rel not mentioned in eclass */
		if (!bms_is_subset(parent_rel->relids, cur_ec->ec_relids))
			continue;

		foreach(lc2, cur_ec->ec_members)
		{
			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);

1919 1920
			if (cur_em->em_is_const || cur_em->em_is_child)
				continue;		/* ignore consts and children here */
1921

1922 1923
			/* Does it reference parent_rel? */
			if (bms_overlap(cur_em->em_relids, parent_rel->relids))
1924 1925
			{
				/* Yes, generate transformed child version */
B
Bruce Momjian 已提交
1926
				Expr	   *child_expr;
1927
				Relids		new_relids;
1928
				Relids		new_nullable_relids;
B
Bruce Momjian 已提交
1929

1930
				child_expr = (Expr *)
1931 1932
					adjust_appendrel_attrs(root,
										   (Node *) cur_em->em_expr,
1933
										   appinfo);
1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944

				/*
				 * Transform em_relids to match.  Note we do *not* do
				 * pull_varnos(child_expr) here, as for example the
				 * transformation might have substituted a constant, but we
				 * don't want the child member to be marked as constant.
				 */
				new_relids = bms_difference(cur_em->em_relids,
											parent_rel->relids);
				new_relids = bms_add_members(new_relids, child_rel->relids);

1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959
				/*
				 * And likewise for nullable_relids.  Note this code assumes
				 * parent and child relids are singletons.
				 */
				new_nullable_relids = cur_em->em_nullable_relids;
				if (bms_overlap(new_nullable_relids, parent_rel->relids))
				{
					new_nullable_relids = bms_difference(new_nullable_relids,
														 parent_rel->relids);
					new_nullable_relids = bms_add_members(new_nullable_relids,
														  child_rel->relids);
				}

				(void) add_eq_member(cur_ec, child_expr,
									 new_relids, new_nullable_relids,
1960
									 true, cur_em->em_datatype);
1961 1962 1963 1964 1965 1966
			}
		}
	}
}


1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
/*
 * mutate_eclass_expressions
 *	  Apply an expression tree mutator to all expressions stored in
 *	  equivalence classes.
 *
 * This is a bit of a hack ... it's currently needed only by planagg.c,
 * which needs to do a global search-and-replace of MIN/MAX Aggrefs
 * after eclasses are already set up.  Without changing the eclasses too,
 * subsequent matching of ORDER BY clauses would fail.
 *
 * Note that we assume the mutation won't affect relation membership or any
 * other properties we keep track of (which is a bit bogus, but by the time
 * planagg.c runs, it no longer matters).  Also we must be called in the
 * main planner memory context.
 */
void
mutate_eclass_expressions(PlannerInfo *root,
						  Node *(*mutator) (),
						  void *context)
{
	ListCell   *lc1;

	foreach(lc1, root->eq_classes)
	{
		EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
		ListCell   *lc2;

		foreach(lc2, cur_ec->ec_members)
		{
			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);

			cur_em->em_expr = (Expr *)
				mutator((Node *) cur_em->em_expr, context);
		}
	}
}


2005
/*
2006 2007 2008 2009 2010 2011 2012 2013 2014
 * generate_implied_equalities_for_indexcol
 *	  Create EC-derived joinclauses usable with a specific index column.
 *
 * We assume that any given index column could appear in only one EC.
 * (This should be true in all but the most pathological cases, and if it
 * isn't, we stop on the first match anyway.)  Therefore, what we return
 * is a redundant list of clauses equating the index column to each of
 * the other-relation values it is known to be equal to.  Any one of
 * these clauses can be used to create a parameterized indexscan, and there
2015
 * is no value in using more than one.	(But it *is* worthwhile to create
2016 2017
 * a separate parameterized path for each one, since that leads to different
 * join orders.)
2018 2019 2020
 *
 * The caller can pass a Relids set of rels we aren't interested in joining
 * to, so as to save the work of creating useless clauses.
2021 2022
 */
List *
2023 2024
generate_implied_equalities_for_indexcol(PlannerInfo *root,
										 IndexOptInfo *index,
2025 2026
										 int indexcol,
										 Relids prohibited_rels)
2027 2028
{
	List	   *result = NIL;
2029
	RelOptInfo *rel = index->rel;
2030
	bool		is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
2031
	Index		parent_relid;
2032 2033
	ListCell   *lc1;

2034 2035
	/* If it's a child rel, we'll need to know what its parent is */
	if (is_child_rel)
2036
		parent_relid = find_childrel_appendrelinfo(root, rel)->parent_relid;
2037 2038 2039
	else
		parent_relid = 0;		/* not used, but keep compiler quiet */

2040 2041 2042
	foreach(lc1, root->eq_classes)
	{
		EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
2043
		EquivalenceMember *cur_em;
2044 2045 2046 2047 2048 2049 2050 2051 2052 2053
		ListCell   *lc2;

		/*
		 * Won't generate joinclauses if const or single-member (the latter
		 * test covers the volatile case too)
		 */
		if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
			continue;

		/*
B
Bruce Momjian 已提交
2054 2055
		 * No point in searching if rel not mentioned in eclass (but we can't
		 * tell that for a child rel).
2056 2057 2058 2059
		 */
		if (!is_child_rel &&
			!bms_is_subset(rel->relids, cur_ec->ec_relids))
			continue;
2060

2061 2062 2063 2064 2065 2066
		/*
		 * Scan members, looking for a match to the indexable column.  Note
		 * that child EC members are considered, but only when they belong to
		 * the target relation.  (Unlike regular members, the same expression
		 * could be a child member of more than one EC.  Therefore, it's
		 * potentially order-dependent which EC a child relation's index
2067
		 * column gets matched to.	This is annoying but it only happens in
2068 2069 2070
		 * corner cases, so for now we live with just reporting the first
		 * match.  See also get_eclass_for_sort_expr.)
		 */
2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082
		cur_em = NULL;
		foreach(lc2, cur_ec->ec_members)
		{
			cur_em = (EquivalenceMember *) lfirst(lc2);
			if (bms_equal(cur_em->em_relids, rel->relids) &&
				eclass_member_matches_indexcol(cur_ec, cur_em,
											   index, indexcol))
				break;
			cur_em = NULL;
		}

		if (!cur_em)
2083 2084
			continue;

2085 2086 2087 2088
		/*
		 * Found our match.  Scan the other EC members and attempt to generate
		 * joinclauses.
		 */
2089 2090
		foreach(lc2, cur_ec->ec_members)
		{
2091 2092 2093
			EquivalenceMember *other_em = (EquivalenceMember *) lfirst(lc2);
			Oid			eq_op;
			RestrictInfo *rinfo;
2094

2095 2096 2097
			if (other_em->em_is_child)
				continue;		/* ignore children here */

2098 2099 2100
			/* Make sure it'll be a join to a different rel */
			if (other_em == cur_em ||
				bms_overlap(other_em->em_relids, rel->relids))
2101 2102
				continue;

2103 2104 2105 2106
			/* Forget it if caller doesn't want joins to this rel */
			if (bms_overlap(other_em->em_relids, prohibited_rels))
				continue;

2107
			/*
2108 2109
			 * Also, if this is a child rel, avoid generating a useless join
			 * to its parent rel.
2110
			 */
2111 2112 2113
			if (is_child_rel &&
				bms_is_member(parent_relid, other_em->em_relids))
				continue;
2114

2115 2116 2117 2118 2119
			eq_op = select_equality_operator(cur_ec,
											 cur_em->em_datatype,
											 other_em->em_datatype);
			if (!OidIsValid(eq_op))
				continue;
2120

2121 2122 2123 2124
			/* set parent_ec to mark as redundant with other joinclauses */
			rinfo = create_join_clause(root, cur_ec, eq_op,
									   cur_em, other_em,
									   cur_ec);
2125

2126
			result = lappend(result, rinfo);
2127
		}
2128 2129 2130 2131 2132 2133 2134 2135

		/*
		 * If somehow we failed to create any join clauses, we might as well
		 * keep scanning the ECs for another match.  But if we did make any,
		 * we're done, because we don't want to return non-redundant clauses.
		 */
		if (result)
			break;
2136 2137 2138 2139 2140 2141 2142 2143
	}

	return result;
}

/*
 * have_relevant_eclass_joinclause
 *		Detect whether there is an EquivalenceClass that could produce
2144
 *		a joinclause involving the two given relations.
2145 2146
 *
 * This is essentially a very cut-down version of
B
Bruce Momjian 已提交
2147
 * generate_join_implied_equalities().	Note it's OK to occasionally say "yes"
2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161
 * incorrectly.  Hence we don't bother with details like whether the lack of a
 * cross-type operator might prevent the clause from actually being generated.
 */
bool
have_relevant_eclass_joinclause(PlannerInfo *root,
								RelOptInfo *rel1, RelOptInfo *rel2)
{
	ListCell   *lc1;

	foreach(lc1, root->eq_classes)
	{
		EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);

		/*
2162 2163
		 * Won't generate joinclauses if single-member (this test covers the
		 * volatile case too)
2164
		 */
2165
		if (list_length(ec->ec_members) <= 1)
2166 2167 2168
			continue;

		/*
2169 2170 2171 2172 2173 2174 2175 2176 2177
		 * We do not need to examine the individual members of the EC, because
		 * all that we care about is whether each rel overlaps the relids of
		 * at least one member, and a test on ec_relids is sufficient to prove
		 * that.  (As with have_relevant_joinclause(), it is not necessary
		 * that the EC be able to form a joinclause relating exactly the two
		 * given rels, only that it be able to form a joinclause mentioning
		 * both, and this will surely be true if both of them overlap
		 * ec_relids.)
		 *
2178
		 * Note we don't test ec_broken; if we did, we'd need a separate code
2179 2180
		 * path to look through ec_sources.  Checking the membership anyway is
		 * OK as a possibly-overoptimistic heuristic.
2181
		 *
2182 2183 2184 2185 2186
		 * We don't test ec_has_const either, even though a const eclass won't
		 * generate real join clauses.	This is because if we had "WHERE a.x =
		 * b.y and a.x = 42", it is worth considering a join between a and b,
		 * since the join result is likely to be small even though it'll end
		 * up being an unqualified nestloop.
2187
		 */
2188 2189
		if (bms_overlap(rel1->relids, ec->ec_relids) &&
			bms_overlap(rel2->relids, ec->ec_relids))
2190 2191 2192 2193 2194 2195 2196 2197 2198 2199
			return true;
	}

	return false;
}


/*
 * has_relevant_eclass_joinclause
 *		Detect whether there is an EquivalenceClass that could produce
2200
 *		a joinclause involving the given relation and anything else.
2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214
 *
 * This is the same as have_relevant_eclass_joinclause with the other rel
 * implicitly defined as "everything else in the query".
 */
bool
has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1)
{
	ListCell   *lc1;

	foreach(lc1, root->eq_classes)
	{
		EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);

		/*
2215 2216
		 * Won't generate joinclauses if single-member (this test covers the
		 * volatile case too)
2217
		 */
2218
		if (list_length(ec->ec_members) <= 1)
2219 2220 2221
			continue;

		/*
2222 2223
		 * Per the comment in have_relevant_eclass_joinclause, it's sufficient
		 * to find an EC that mentions both this rel and some other rel.
2224
		 */
2225 2226
		if (bms_overlap(rel1->relids, ec->ec_relids) &&
			!bms_is_subset(ec->ec_relids, rel1->relids))
2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239
			return true;
	}

	return false;
}


/*
 * eclass_useful_for_merging
 *	  Detect whether the EC could produce any mergejoinable join clauses
 *	  against the specified relation.
 *
 * This is just a heuristic test and doesn't have to be exact; it's better
B
Bruce Momjian 已提交
2240
 * to say "yes" incorrectly than "no".	Hence we don't bother with details
2241 2242 2243 2244
 * like whether the lack of a cross-type operator might prevent the clause
 * from actually being generated.
 */
bool
2245
eclass_useful_for_merging(EquivalenceClass *eclass,
2246 2247 2248 2249 2250 2251 2252
						  RelOptInfo *rel)
{
	ListCell   *lc;

	Assert(!eclass->ec_merged);

	/*
B
Bruce Momjian 已提交
2253 2254
	 * Won't generate joinclauses if const or single-member (the latter test
	 * covers the volatile case too)
2255 2256 2257 2258 2259
	 */
	if (eclass->ec_has_const || list_length(eclass->ec_members) <= 1)
		return false;

	/*
B
Bruce Momjian 已提交
2260 2261 2262
	 * Note we don't test ec_broken; if we did, we'd need a separate code path
	 * to look through ec_sources.	Checking the members anyway is OK as a
	 * possibly-overoptimistic heuristic.
2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273
	 */

	/* If rel already includes all members of eclass, no point in searching */
	if (bms_is_subset(eclass->ec_relids, rel->relids))
		return false;

	/* To join, we need a member not in the given rel */
	foreach(lc, eclass->ec_members)
	{
		EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);

2274 2275 2276 2277
		if (cur_em->em_is_child)
			continue;			/* ignore children here */

		if (!bms_overlap(cur_em->em_relids, rel->relids))
2278 2279 2280 2281 2282
			return true;
	}

	return false;
}
2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310


/*
 * is_redundant_derived_clause
 *		Test whether rinfo is derived from same EC as any clause in clauselist;
 *		if so, it can be presumed to represent a condition that's redundant
 *		with that member of the list.
 */
bool
is_redundant_derived_clause(RestrictInfo *rinfo, List *clauselist)
{
	EquivalenceClass *parent_ec = rinfo->parent_ec;
	ListCell   *lc;

	/* Fail if it's not a potentially-redundant clause from some EC */
	if (parent_ec == NULL)
		return false;

	foreach(lc, clauselist)
	{
		RestrictInfo *otherrinfo = (RestrictInfo *) lfirst(lc);

		if (otherrinfo->parent_ec == parent_ec)
			return true;
	}

	return false;
}