joinpath.c 26.6 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * joinpath.c
4
 *	  Routines to find all possible paths for processing a set of joins
5
 *
P
 
PostgreSQL Daemon 已提交
6
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.97 2005/10/25 20:30:30 tgl Exp $
12 13 14
 *
 *-------------------------------------------------------------------------
 */
15 16
#include "postgres.h"

17 18
#include <math.h>

19
#include "optimizer/clauses.h"
B
Bruce Momjian 已提交
20
#include "optimizer/cost.h"
B
Bruce Momjian 已提交
21 22
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
23
#include "parser/parsetree.h"
24
#include "utils/lsyscache.h"
25

26

27
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
B
Bruce Momjian 已提交
28 29 30
					 RelOptInfo *outerrel, RelOptInfo *innerrel,
					 List *restrictlist, List *mergeclause_list,
					 JoinType jointype);
31
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
B
Bruce Momjian 已提交
32 33 34
					 RelOptInfo *outerrel, RelOptInfo *innerrel,
					 List *restrictlist, List *mergeclause_list,
					 JoinType jointype);
35
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
B
Bruce Momjian 已提交
36 37
					 RelOptInfo *outerrel, RelOptInfo *innerrel,
					 List *restrictlist, JoinType jointype);
38
static List *select_mergejoin_clauses(RelOptInfo *joinrel,
B
Bruce Momjian 已提交
39 40 41 42
						 RelOptInfo *outerrel,
						 RelOptInfo *innerrel,
						 List *restrictlist,
						 JoinType jointype);
43

44 45

/*
46 47 48 49 50 51
 * add_paths_to_joinrel
 *	  Given a join relation and two component rels from which it can be made,
 *	  consider all possible paths that use the two component rels as outer
 *	  and inner rel respectively.  Add these paths to the join rel's pathlist
 *	  if they survive comparison with other paths (and remove any existing
 *	  paths that are dominated by these paths).
52
 *
53 54
 * Modifies the pathlist field of the joinrel node to contain the best
 * paths found so far.
55 56
 */
void
57
add_paths_to_joinrel(PlannerInfo *root,
58 59 60
					 RelOptInfo *joinrel,
					 RelOptInfo *outerrel,
					 RelOptInfo *innerrel,
61
					 JoinType jointype,
62
					 List *restrictlist)
63
{
64
	List	   *mergeclause_list = NIL;
65

66
	/*
67
	 * Find potential mergejoin clauses.  We can skip this if we are not
B
Bruce Momjian 已提交
68 69 70
	 * interested in doing a mergejoin.  However, mergejoin is currently our
	 * only way of implementing full outer joins, so override mergejoin
	 * disable if it's a full join.
71
	 */
72
	if (enable_mergejoin || jointype == JOIN_FULL)
73 74 75
		mergeclause_list = select_mergejoin_clauses(joinrel,
													outerrel,
													innerrel,
76 77
													restrictlist,
													jointype);
78

79
	/*
80 81
	 * 1. Consider mergejoin paths where both relations must be explicitly
	 * sorted.
82
	 */
83
	sort_inner_and_outer(root, joinrel, outerrel, innerrel,
84
						 restrictlist, mergeclause_list, jointype);
85

86
	/*
87 88 89
	 * 2. Consider paths where the outer relation need not be explicitly
	 * sorted. This includes both nestloops and mergejoins where the outer
	 * path is already ordered.
90
	 */
91
	match_unsorted_outer(root, joinrel, outerrel, innerrel,
92
						 restrictlist, mergeclause_list, jointype);
93

94
#ifdef NOT_USED
95

96
	/*
97
	 * 3. Consider paths where the inner relation need not be explicitly
B
Bruce Momjian 已提交
98 99
	 * sorted.	This includes mergejoins only (nestloops were already built in
	 * match_unsorted_outer).
100
	 *
101
	 * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
B
Bruce Momjian 已提交
102 103 104 105
	 * significant difference between the inner and outer side of a mergejoin,
	 * so match_unsorted_inner creates no paths that aren't equivalent to
	 * those made by match_unsorted_outer when add_paths_to_joinrel() is
	 * invoked with the two rels given in the other order.
106
	 */
107
	match_unsorted_inner(root, joinrel, outerrel, innerrel,
108
						 restrictlist, mergeclause_list, jointype);
109
#endif
110

111
	/*
B
Bruce Momjian 已提交
112 113
	 * 4. Consider paths where both outer and inner relations must be hashed
	 * before being joined.
114 115
	 */
	if (enable_hashjoin)
116
		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
117
							 restrictlist, jointype);
118 119
}

120
/*
121
 * sort_inner_and_outer
122
 *	  Create mergejoin join paths by explicitly sorting both the outer and
123 124
 *	  inner join relations on each available merge ordering.
 *
125 126 127
 * 'joinrel' is the join relation
 * 'outerrel' is the outer join relation
 * 'innerrel' is the inner join relation
128 129
 * 'restrictlist' contains all of the RestrictInfo nodes for restriction
 *		clauses that apply to this join
130
 * 'mergeclause_list' is a list of RestrictInfo nodes for available
131
 *		mergejoin clauses in this join
132
 * 'jointype' is the type of join to do
133
 */
134
static void
135
sort_inner_and_outer(PlannerInfo *root,
136
					 RelOptInfo *joinrel,
137 138
					 RelOptInfo *outerrel,
					 RelOptInfo *innerrel,
139
					 List *restrictlist,
140 141
					 List *mergeclause_list,
					 JoinType jointype)
142
{
143
	bool		useallclauses;
144 145
	Path	   *outer_path;
	Path	   *inner_path;
146
	List	   *all_pathkeys;
147
	ListCell   *l;
148

149 150 151 152 153 154 155 156
	/*
	 * If we are doing a right or full join, we must use *all* the
	 * mergeclauses as join clauses, else we will not have a valid plan.
	 */
	switch (jointype)
	{
		case JOIN_INNER:
		case JOIN_LEFT:
157 158 159
		case JOIN_IN:
		case JOIN_UNIQUE_OUTER:
		case JOIN_UNIQUE_INNER:
160 161 162 163 164 165 166
			useallclauses = false;
			break;
		case JOIN_RIGHT:
		case JOIN_FULL:
			useallclauses = true;
			break;
		default:
167
			elog(ERROR, "unrecognized join type: %d",
168
				 (int) jointype);
B
Bruce Momjian 已提交
169
			useallclauses = false;		/* keep compiler quiet */
170 171 172
			break;
	}

173 174 175
	/*
	 * We only consider the cheapest-total-cost input paths, since we are
	 * assuming here that a sort is required.  We will consider
B
Bruce Momjian 已提交
176 177
	 * cheapest-startup-cost input paths later, and only if they don't need a
	 * sort.
178
	 *
B
Bruce Momjian 已提交
179 180
	 * If unique-ification is requested, do it and then handle as a plain inner
	 * join.
181 182 183 184 185 186 187 188 189 190 191 192 193 194
	 */
	outer_path = outerrel->cheapest_total_path;
	inner_path = innerrel->cheapest_total_path;
	if (jointype == JOIN_UNIQUE_OUTER)
	{
		outer_path = (Path *) create_unique_path(root, outerrel, outer_path);
		jointype = JOIN_INNER;
	}
	else if (jointype == JOIN_UNIQUE_INNER)
	{
		inner_path = (Path *) create_unique_path(root, innerrel, inner_path);
		jointype = JOIN_INNER;
	}

195
	/*
B
Bruce Momjian 已提交
196 197 198 199 200
	 * Each possible ordering of the available mergejoin clauses will generate
	 * a differently-sorted result path at essentially the same cost.  We have
	 * no basis for choosing one over another at this level of joining, but
	 * some sort orders may be more useful than others for higher-level
	 * mergejoins, so it's worth considering multiple orderings.
201
	 *
202 203
	 * Actually, it's not quite true that every mergeclause ordering will
	 * generate a different path order, because some of the clauses may be
B
Bruce Momjian 已提交
204 205 206
	 * redundant.  Therefore, what we do is convert the mergeclause list to a
	 * list of canonical pathkeys, and then consider different orderings of
	 * the pathkeys.
207
	 *
B
Bruce Momjian 已提交
208 209
	 * Generating a path for *every* permutation of the pathkeys doesn't seem
	 * like a winning strategy; the cost in planning time is too high. For
B
Bruce Momjian 已提交
210 211 212 213 214 215 216 217 218
	 * now, we generate one path for each pathkey, listing that pathkey first
	 * and the rest in random order.  This should allow at least a one-clause
	 * mergejoin without re-sorting against any other possible mergejoin
	 * partner path.  But if we've not guessed the right ordering of secondary
	 * keys, we may end up evaluating clauses as qpquals when they could have
	 * been done as mergeclauses. We need to figure out a better way.  (Two
	 * possible approaches: look at all the relevant index relations to
	 * suggest plausible sort orders, or make just one output path and somehow
	 * mark it as having a sort-order that can be rearranged freely.)
219
	 */
220 221 222 223
	all_pathkeys = make_pathkeys_for_mergeclauses(root,
												  mergeclause_list,
												  outerrel);

224
	foreach(l, all_pathkeys)
225
	{
226
		List	   *front_pathkey = (List *) lfirst(l);
227 228
		List	   *cur_pathkeys;
		List	   *cur_mergeclauses;
229 230 231
		List	   *outerkeys;
		List	   *innerkeys;
		List	   *merge_pathkeys;
232

233
		/* Make a pathkey list with this guy first. */
234
		if (l != list_head(all_pathkeys))
235
			cur_pathkeys = lcons(front_pathkey,
236 237
								 list_delete_ptr(list_copy(all_pathkeys),
												 front_pathkey));
238
		else
B
Bruce Momjian 已提交
239
			cur_pathkeys = all_pathkeys;		/* no work at first one... */
240 241 242

		/*
		 * Select mergeclause(s) that match this sort ordering.  If we had
B
Bruce Momjian 已提交
243 244
		 * redundant merge clauses then we will get a subset of the original
		 * clause list.  There had better be some match, however...
245 246 247
		 */
		cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
														  cur_pathkeys,
B
Bruce Momjian 已提交
248
														  mergeclause_list);
249
		Assert(cur_mergeclauses != NIL);
250

251 252
		/* Forget it if can't use all the clauses in right/full join */
		if (useallclauses &&
B
Bruce Momjian 已提交
253
			list_length(cur_mergeclauses) != list_length(mergeclause_list))
254 255
			continue;

256 257
		/*
		 * Build sort pathkeys for both sides.
258
		 *
259
		 * Note: it's possible that the cheapest paths will already be sorted
B
Bruce Momjian 已提交
260 261
		 * properly.  create_mergejoin_path will detect that case and suppress
		 * an explicit sort step, so we needn't do so here.
262
		 */
263
		outerkeys = make_pathkeys_for_mergeclauses(root,
264
												   cur_mergeclauses,
265
												   outerrel);
266
		innerkeys = make_pathkeys_for_mergeclauses(root,
267
												   cur_mergeclauses,
268
												   innerrel);
269
		/* Build pathkeys representing output sort order. */
270 271
		merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
											 outerkeys);
272

273
		/*
274
		 * And now we can make the path.
275 276
		 */
		add_path(joinrel, (Path *)
277 278
				 create_mergejoin_path(root,
									   joinrel,
279
									   jointype,
280 281
									   outer_path,
									   inner_path,
282 283
									   restrictlist,
									   merge_pathkeys,
284
									   cur_mergeclauses,
285 286
									   outerkeys,
									   innerkeys));
287
	}
288 289
}

290
/*
291
 * match_unsorted_outer
292 293
 *	  Creates possible join paths for processing a single join relation
 *	  'joinrel' by employing either iterative substitution or
294 295
 *	  mergejoining on each of its possible outer paths (considering
 *	  only outer paths that are already ordered well enough for merging).
296
 *
297
 * We always generate a nestloop path for each available outer path.
298 299
 * In fact we may generate as many as four: one on the cheapest-total-cost
 * inner path, one on the same with materialization, one on the
B
Bruce Momjian 已提交
300
 * cheapest-startup-cost inner path (if different),
301
 * and one on the best inner-indexscan path (if any).
302
 *
303
 * We also consider mergejoins if mergejoin clauses are available.	We have
304 305 306 307 308 309 310 311
 * two ways to generate the inner path for a mergejoin: sort the cheapest
 * inner path, or use an inner path that is already suitably ordered for the
 * merge.  If we have several mergeclauses, it could be that there is no inner
 * path (or only a very expensive one) for the full list of mergeclauses, but
 * better paths exist if we truncate the mergeclause list (thereby discarding
 * some sort key requirements).  So, we consider truncations of the
 * mergeclause list as well as the full list.  (Ideally we'd consider all
 * subsets of the mergeclause list, but that seems way too expensive.)
312
 *
313 314 315
 * 'joinrel' is the join relation
 * 'outerrel' is the outer join relation
 * 'innerrel' is the inner join relation
316 317
 * 'restrictlist' contains all of the RestrictInfo nodes for restriction
 *		clauses that apply to this join
318
 * 'mergeclause_list' is a list of RestrictInfo nodes for available
319
 *		mergejoin clauses in this join
320
 * 'jointype' is the type of join to do
321
 */
322
static void
323
match_unsorted_outer(PlannerInfo *root,
324
					 RelOptInfo *joinrel,
325 326
					 RelOptInfo *outerrel,
					 RelOptInfo *innerrel,
327
					 List *restrictlist,
328 329
					 List *mergeclause_list,
					 JoinType jointype)
330
{
331
	JoinType	save_jointype = jointype;
332
	bool		nestjoinOK;
333
	bool		useallclauses;
334 335
	Path	   *inner_cheapest_startup = innerrel->cheapest_startup_path;
	Path	   *inner_cheapest_total = innerrel->cheapest_total_path;
336 337
	Path	   *matpath = NULL;
	Path	   *bestinnerjoin = NULL;
338
	ListCell   *l;
339

340
	/*
341
	 * Nestloop only supports inner, left, and IN joins.  Also, if we are
B
Bruce Momjian 已提交
342 343 344 345
	 * doing a right or full join, we must use *all* the mergeclauses as join
	 * clauses, else we will not have a valid plan.  (Although these two flags
	 * are currently inverses, keep them separate for clarity and possible
	 * future changes.)
346 347 348 349 350
	 */
	switch (jointype)
	{
		case JOIN_INNER:
		case JOIN_LEFT:
351 352 353
		case JOIN_IN:
		case JOIN_UNIQUE_OUTER:
		case JOIN_UNIQUE_INNER:
354
			nestjoinOK = true;
355
			useallclauses = false;
356
			break;
357 358
		case JOIN_RIGHT:
		case JOIN_FULL:
359
			nestjoinOK = false;
360 361 362
			useallclauses = true;
			break;
		default:
363
			elog(ERROR, "unrecognized join type: %d",
364
				 (int) jointype);
365
			nestjoinOK = false; /* keep compiler quiet */
366
			useallclauses = false;
367 368 369
			break;
	}

370
	/*
B
Bruce Momjian 已提交
371 372
	 * If we need to unique-ify the inner path, we will consider only the
	 * cheapest inner.
373 374 375 376 377 378 379 380 381
	 */
	if (jointype == JOIN_UNIQUE_INNER)
	{
		inner_cheapest_total = (Path *)
			create_unique_path(root, innerrel, inner_cheapest_total);
		inner_cheapest_startup = inner_cheapest_total;
		jointype = JOIN_INNER;
	}
	else if (nestjoinOK)
382 383
	{
		/*
B
Bruce Momjian 已提交
384 385 386
		 * If the cheapest inner path is a join or seqscan, we should consider
		 * materializing it.  (This is a heuristic: we could consider it
		 * always, but for inner indexscans it's probably a waste of time.)
387
		 */
388
		if (!(IsA(inner_cheapest_total, IndexPath) ||
389
			  IsA(inner_cheapest_total, BitmapHeapPath) ||
390
			  IsA(inner_cheapest_total, TidPath)))
391
			matpath = (Path *)
392
				create_material_path(innerrel, inner_cheapest_total);
393 394

		/*
B
Bruce Momjian 已提交
395 396
		 * Get the best innerjoin indexpath (if any) for this outer rel. It's
		 * the same for all outer paths.
397 398 399 400
		 */
		bestinnerjoin = best_inner_indexscan(root, innerrel,
											 outerrel->relids, jointype);
	}
401

402
	foreach(l, outerrel->pathlist)
403
	{
404
		Path	   *outerpath = (Path *) lfirst(l);
405
		List	   *merge_pathkeys;
406
		List	   *mergeclauses;
407
		List	   *innersortkeys;
408 409 410
		List	   *trialsortkeys;
		Path	   *cheapest_startup_inner;
		Path	   *cheapest_total_inner;
411 412
		int			num_sortkeys;
		int			sortkeycnt;
413

414
		/*
B
Bruce Momjian 已提交
415 416
		 * If we need to unique-ify the outer path, it's pointless to consider
		 * any but the cheapest outer.
417 418 419 420 421 422 423 424 425
		 */
		if (save_jointype == JOIN_UNIQUE_OUTER)
		{
			if (outerpath != outerrel->cheapest_total_path)
				continue;
			outerpath = (Path *) create_unique_path(root, outerrel, outerpath);
			jointype = JOIN_INNER;
		}

426
		/*
B
Bruce Momjian 已提交
427 428 429
		 * The result will have this sort order (even if it is implemented as
		 * a nestloop, and even if some of the mergeclauses are implemented by
		 * qpquals rather than as true mergeclauses):
430
		 */
431
		merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
432
											 outerpath->pathkeys);
433

434 435 436
		if (nestjoinOK)
		{
			/*
B
Bruce Momjian 已提交
437
			 * Always consider a nestloop join with this outer and
438 439 440
			 * cheapest-total-cost inner.  When appropriate, also consider
			 * using the materialized form of the cheapest inner, the
			 * cheapest-startup-cost inner path, and the best innerjoin
441
			 * indexpath.
442
			 */
443
			add_path(joinrel, (Path *)
444 445
					 create_nestloop_path(root,
										  joinrel,
446
										  jointype,
447
										  outerpath,
448
										  inner_cheapest_total,
449 450
										  restrictlist,
										  merge_pathkeys));
451 452 453 454 455 456 457 458 459
			if (matpath != NULL)
				add_path(joinrel, (Path *)
						 create_nestloop_path(root,
											  joinrel,
											  jointype,
											  outerpath,
											  matpath,
											  restrictlist,
											  merge_pathkeys));
460
			if (inner_cheapest_startup != inner_cheapest_total)
461
				add_path(joinrel, (Path *)
462 463
						 create_nestloop_path(root,
											  joinrel,
464 465
											  jointype,
											  outerpath,
466
											  inner_cheapest_startup,
467 468 469 470
											  restrictlist,
											  merge_pathkeys));
			if (bestinnerjoin != NULL)
				add_path(joinrel, (Path *)
471 472
						 create_nestloop_path(root,
											  joinrel,
473 474 475 476 477 478
											  jointype,
											  outerpath,
											  bestinnerjoin,
											  restrictlist,
											  merge_pathkeys));
		}
479

480 481 482 483
		/* Can't do anything else if outer path needs to be unique'd */
		if (save_jointype == JOIN_UNIQUE_OUTER)
			continue;

484
		/* Look for useful mergeclauses (if any) */
485 486
		mergeclauses = find_mergeclauses_for_pathkeys(root,
													  outerpath->pathkeys,
487
													  mergeclause_list);
488

489 490 491
		/*
		 * Done with this outer path if no chance for a mergejoin.
		 *
B
Bruce Momjian 已提交
492 493
		 * Special corner case: for "x FULL JOIN y ON true", there will be no
		 * join clauses at all.  Ordinarily we'd generate a clauseless
494 495 496 497
		 * nestloop path, but since mergejoin is our only join type that
		 * supports FULL JOIN, it's necessary to generate a clauseless
		 * mergejoin path instead.
		 */
498
		if (mergeclauses == NIL)
499
		{
500
			if (jointype == JOIN_FULL)
B
Bruce Momjian 已提交
501
				 /* okay to try for mergejoin */ ;
502 503 504
			else
				continue;
		}
505
		if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
506
			continue;
507 508

		/* Compute the required ordering of the inner path */
509 510
		innersortkeys = make_pathkeys_for_mergeclauses(root,
													   mergeclauses,
511
													   innerrel);
512

513
		/*
B
Bruce Momjian 已提交
514 515 516
		 * Generate a mergejoin on the basis of sorting the cheapest inner.
		 * Since a sort will be needed, only cheapest total cost matters.
		 * (But create_mergejoin_path will do the right thing if
517
		 * inner_cheapest_total is already correctly sorted.)
518
		 */
519
		add_path(joinrel, (Path *)
520 521
				 create_mergejoin_path(root,
									   joinrel,
522
									   jointype,
523
									   outerpath,
524
									   inner_cheapest_total,
525 526
									   restrictlist,
									   merge_pathkeys,
527
									   mergeclauses,
528 529
									   NIL,
									   innersortkeys));
530

531 532 533 534
		/* Can't do anything else if inner path needs to be unique'd */
		if (save_jointype == JOIN_UNIQUE_INNER)
			continue;

535
		/*
B
Bruce Momjian 已提交
536 537 538 539
		 * Look for presorted inner paths that satisfy the innersortkey list
		 * --- or any truncation thereof, if we are allowed to build a
		 * mergejoin using a subset of the merge clauses.  Here, we consider
		 * both cheap startup cost and cheap total cost.  Ignore
540
		 * inner_cheapest_total, since we already made a path with it.
541
		 */
542
		num_sortkeys = list_length(innersortkeys);
543
		if (num_sortkeys > 1 && !useallclauses)
544
			trialsortkeys = list_copy(innersortkeys);	/* need modifiable copy */
545
		else
B
Bruce Momjian 已提交
546
			trialsortkeys = innersortkeys;		/* won't really truncate */
547 548
		cheapest_startup_inner = NULL;
		cheapest_total_inner = NULL;
549

550
		for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
551 552
		{
			Path	   *innerpath;
553
			List	   *newclauses = NIL;
554

555
			/*
556
			 * Look for an inner path ordered well enough for the first
B
Bruce Momjian 已提交
557 558
			 * 'sortkeycnt' innersortkeys.	NB: trialsortkeys list is modified
			 * destructively, which is why we made a copy...
559
			 */
560
			trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
561 562 563 564
			innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
													   trialsortkeys,
													   TOTAL_COST);
			if (innerpath != NULL &&
565
				innerpath != inner_cheapest_total &&
566 567 568
				(cheapest_total_inner == NULL ||
				 compare_path_costs(innerpath, cheapest_total_inner,
									TOTAL_COST) < 0))
569
			{
570
				/* Found a cheap (or even-cheaper) sorted path */
571 572 573 574 575 576 577 578 579
				/* Select the right mergeclauses, if we didn't already */
				if (sortkeycnt < num_sortkeys)
				{
					newclauses =
						find_mergeclauses_for_pathkeys(root,
													   trialsortkeys,
													   mergeclauses);
					Assert(newclauses != NIL);
				}
580 581
				else
					newclauses = mergeclauses;
582
				add_path(joinrel, (Path *)
583 584
						 create_mergejoin_path(root,
											   joinrel,
585
											   jointype,
586 587 588 589 590 591 592 593 594 595 596 597 598 599
											   outerpath,
											   innerpath,
											   restrictlist,
											   merge_pathkeys,
											   newclauses,
											   NIL,
											   NIL));
				cheapest_total_inner = innerpath;
			}
			/* Same on the basis of cheapest startup cost ... */
			innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
													   trialsortkeys,
													   STARTUP_COST);
			if (innerpath != NULL &&
600
				innerpath != inner_cheapest_total &&
601 602 603 604 605 606
				(cheapest_startup_inner == NULL ||
				 compare_path_costs(innerpath, cheapest_startup_inner,
									STARTUP_COST) < 0))
			{
				/* Found a cheap (or even-cheaper) sorted path */
				if (innerpath != cheapest_total_inner)
607
				{
608
					/*
B
Bruce Momjian 已提交
609 610
					 * Avoid rebuilding clause list if we already made one;
					 * saves memory in big join trees...
611 612 613
					 */
					if (newclauses == NIL)
					{
614 615 616 617
						if (sortkeycnt < num_sortkeys)
						{
							newclauses =
								find_mergeclauses_for_pathkeys(root,
B
Bruce Momjian 已提交
618 619
															   trialsortkeys,
															   mergeclauses);
620 621
							Assert(newclauses != NIL);
						}
622 623 624
						else
							newclauses = mergeclauses;
					}
625
					add_path(joinrel, (Path *)
626 627
							 create_mergejoin_path(root,
												   joinrel,
628
												   jointype,
629 630 631 632 633 634 635
												   outerpath,
												   innerpath,
												   restrictlist,
												   merge_pathkeys,
												   newclauses,
												   NIL,
												   NIL));
636
				}
637
				cheapest_startup_inner = innerpath;
638
			}
639

640 641 642 643 644
			/*
			 * Don't consider truncated sortkeys if we need all clauses.
			 */
			if (useallclauses)
				break;
645 646
		}
	}
647 648
}

649
/*
650
 * hash_inner_and_outer
651
 *	  Create hashjoin join paths by explicitly hashing both the outer and
652
 *	  inner keys of each available hash clause.
653
 *
654 655 656
 * 'joinrel' is the join relation
 * 'outerrel' is the outer join relation
 * 'innerrel' is the inner join relation
657 658
 * 'restrictlist' contains all of the RestrictInfo nodes for restriction
 *		clauses that apply to this join
659
 * 'jointype' is the type of join to do
660
 */
661
static void
662
hash_inner_and_outer(PlannerInfo *root,
663
					 RelOptInfo *joinrel,
664
					 RelOptInfo *outerrel,
665
					 RelOptInfo *innerrel,
666 667
					 List *restrictlist,
					 JoinType jointype)
668
{
669
	bool		isouterjoin;
670
	List	   *hashclauses;
671
	ListCell   *l;
672

673
	/*
674
	 * Hashjoin only supports inner, left, and IN joins.
675 676 677 678
	 */
	switch (jointype)
	{
		case JOIN_INNER:
679 680 681
		case JOIN_IN:
		case JOIN_UNIQUE_OUTER:
		case JOIN_UNIQUE_INNER:
682 683 684 685 686 687 688 689 690
			isouterjoin = false;
			break;
		case JOIN_LEFT:
			isouterjoin = true;
			break;
		default:
			return;
	}

691
	/*
692 693 694
	 * We need to build only one hashpath for any given pair of outer and
	 * inner relations; all of the hashable clauses will be used as keys.
	 *
B
Bruce Momjian 已提交
695 696
	 * Scan the join's restrictinfo list to find hashjoinable clauses that are
	 * usable with this pair of sub-relations.
697
	 */
698
	hashclauses = NIL;
699
	foreach(l, restrictlist)
700
	{
701
		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
702

703
		if (!restrictinfo->can_join ||
704
			restrictinfo->hashjoinoperator == InvalidOid)
705 706
			continue;			/* not hashjoinable */

707
		/*
708
		 * If processing an outer join, only use its own join clauses for
709 710
		 * hashing.  For inner joins we need not be so picky.
		 */
711
		if (isouterjoin && restrictinfo->is_pushed_down)
712 713
			continue;

714
		/*
715
		 * Check if clause is usable with these input rels.
716
		 */
717 718
		if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
			bms_is_subset(restrictinfo->right_relids, innerrel->relids))
719 720 721
		{
			/* righthand side is inner */
		}
722
		else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
B
Bruce Momjian 已提交
723
				 bms_is_subset(restrictinfo->right_relids, outerrel->relids))
724 725 726
		{
			/* lefthand side is inner */
		}
727 728
		else
			continue;			/* no good for these input relations */
B
Bruce Momjian 已提交
729

730 731
		hashclauses = lappend(hashclauses, restrictinfo);
	}
732

733 734 735
	/* If we found any usable hashclauses, make a path */
	if (hashclauses)
	{
736
		/*
B
Bruce Momjian 已提交
737 738 739
		 * We consider both the cheapest-total-cost and cheapest-startup-cost
		 * outer paths.  There's no need to consider any but the
		 * cheapest-total-cost inner path, however.
740
		 */
B
Bruce Momjian 已提交
741 742 743
		Path	   *cheapest_startup_outer = outerrel->cheapest_startup_path;
		Path	   *cheapest_total_outer = outerrel->cheapest_total_path;
		Path	   *cheapest_total_inner = innerrel->cheapest_total_path;
744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759

		/* Unique-ify if need be */
		if (jointype == JOIN_UNIQUE_OUTER)
		{
			cheapest_total_outer = (Path *)
				create_unique_path(root, outerrel, cheapest_total_outer);
			cheapest_startup_outer = cheapest_total_outer;
			jointype = JOIN_INNER;
		}
		else if (jointype == JOIN_UNIQUE_INNER)
		{
			cheapest_total_inner = (Path *)
				create_unique_path(root, innerrel, cheapest_total_inner);
			jointype = JOIN_INNER;
		}

760
		add_path(joinrel, (Path *)
761 762
				 create_hashjoin_path(root,
									  joinrel,
763
									  jointype,
764 765
									  cheapest_total_outer,
									  cheapest_total_inner,
766
									  restrictlist,
767
									  hashclauses));
768
		if (cheapest_startup_outer != cheapest_total_outer)
769
			add_path(joinrel, (Path *)
770 771
					 create_hashjoin_path(root,
										  joinrel,
772
										  jointype,
773 774
										  cheapest_startup_outer,
										  cheapest_total_inner,
775
										  restrictlist,
776
										  hashclauses));
777
	}
778
}
779

780 781 782 783 784
/*
 * select_mergejoin_clauses
 *	  Select mergejoin clauses that are usable for a particular join.
 *	  Returns a list of RestrictInfo nodes for those clauses.
 *
785 786 787
 * We examine each restrictinfo clause known for the join to see
 * if it is mergejoinable and involves vars from the two sub-relations
 * currently of interest.
788 789
 */
static List *
790 791 792
select_mergejoin_clauses(RelOptInfo *joinrel,
						 RelOptInfo *outerrel,
						 RelOptInfo *innerrel,
793 794
						 List *restrictlist,
						 JoinType jointype)
795 796
{
	List	   *result_list = NIL;
797
	bool		isouterjoin = IS_OUTER_JOIN(jointype);
798
	bool		have_nonmergeable_joinclause = false;
799
	ListCell   *l;
800

801
	foreach(l, restrictlist)
802
	{
803
		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
804

805
		/*
B
Bruce Momjian 已提交
806
		 * If processing an outer join, only use its own join clauses in the
807 808 809
		 * merge.  For inner joins we can use pushed-down clauses too.
		 * (Note: we don't set have_nonmergeable_joinclause here because
		 * pushed-down clauses will become otherquals not joinquals.)
810
		 */
811 812
		if (isouterjoin && restrictinfo->is_pushed_down)
			continue;
813

814
		if (!restrictinfo->can_join ||
815
			restrictinfo->mergejoinoperator == InvalidOid)
816 817
		{
			have_nonmergeable_joinclause = true;
818
			continue;			/* not mergejoinable */
819
		}
820

821
		/*
822
		 * Check if clause is usable with these input rels.  All the vars
B
Bruce Momjian 已提交
823 824
		 * needed on each side of the clause must be available from one or the
		 * other of the input rels.
825
		 */
826 827
		if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
			bms_is_subset(restrictinfo->right_relids, innerrel->relids))
828 829 830
		{
			/* righthand side is inner */
		}
831
		else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
B
Bruce Momjian 已提交
832
				 bms_is_subset(restrictinfo->right_relids, outerrel->relids))
833 834 835 836
		{
			/* lefthand side is inner */
		}
		else
837 838
		{
			have_nonmergeable_joinclause = true;
839
			continue;			/* no good for these input relations */
840
		}
841

842
		result_list = lcons(restrictinfo, result_list);
843 844
	}

845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
	/*
	 * If it is a right/full join then *all* the explicit join clauses must be
	 * mergejoinable, else the executor will fail. If we are asked for a right
	 * join then just return NIL to indicate no mergejoin is possible (we can
	 * handle it as a left join instead). If we are asked for a full join then
	 * emit an error, because there is no fallback.
	 */
	if (have_nonmergeable_joinclause)
	{
		switch (jointype)
		{
			case JOIN_RIGHT:
				return NIL;		/* not mergejoinable */
			case JOIN_FULL:
				ereport(ERROR,
						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						 errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
				break;
			default:
				/* otherwise, it's OK to have nonmergeable join quals */
				break;
		}
	}

869 870
	return result_list;
}