pathkeys.c 12.8 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * joinutils.c
4
 *	  Utilities for matching and building join and path keys
5 6 7 8 9
 *
 * Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
B
Bruce Momjian 已提交
10
 *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.6 1999/02/21 01:55:02 momjian Exp $
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "nodes/pg_list.h"
#include "nodes/relation.h"
#include "nodes/plannodes.h"

#include "optimizer/internal.h"
#include "optimizer/paths.h"
#include "optimizer/var.h"
#include "optimizer/keys.h"
#include "optimizer/tlist.h"
#include "optimizer/joininfo.h"
#include "optimizer/ordering.h"

28
static int match_pathkey_joinkeys(List *pathkey, List *joinkeys,
B
Bruce Momjian 已提交
29
						int outer_or_inner);
30 31
static List *new_join_pathkey(List *subkeys, List *considered_subkeys,
					 	List *join_rel_tlist, List *joinclauses);
32
static List *new_matching_subkeys(Var *subkey, List *considered_subkeys,
33
					 	List *join_rel_tlist, List *joinclauses);
34

B
Bruce Momjian 已提交
35 36 37 38

/*
 *	Explanation of Path.pathkeys
 *
B
Bruce Momjian 已提交
39
 *	Path.pathkeys is a List of List of Var nodes that represent the sort
B
Bruce Momjian 已提交
40 41 42 43 44 45 46 47 48 49
 *	order of the result generated by the Path.
 *
 *	In single/base relation RelOptInfo's, the Path's represent various ways
 *	of generate the relation.  Sequential scan Paths have a NIL pathkeys.
 *	Index scans have Path.pathkeys that represent the chosen index.  A
 *	single-key index pathkeys would be { {tab1_indexkey1} }.  The pathkeys
 *	entry for a multi-key index would be { {tab1_indexkey1}, {tab1_indexkey2} }.
 *
 *	Multi-relation RelOptInfo Path's are more complicated.  Mergejoins are
 *	only performed with equajoins("=").  Because of this, the multi-relation
B
Bruce Momjian 已提交
50 51
 *	path actually has more than one primary Var key.  For example, a
 *	mergejoin Path of "tab1.col1 = tab2.col1" would generate a pathkeys of
B
Bruce Momjian 已提交
52 53 54
 *	{ {tab1.col1, tab2.col1} }.  This allows future joins to use either Var
 *	as a pre-sorted key to prevent Mergejoins from having to re-sort the Path.
 *	They are equal, so they are both primary sort keys.  This is why pathkeys
B
Bruce Momjian 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67
 *	is a List of Lists.
 *
 *	For multi-key sorts, if the outer is sorted by a multi-key index, the
 *	multi-key index remains after the join.  If the inner has a multi-key
 *	sort, only the primary key of the inner is added to the result.
 *	Mergejoins only join on the primary key.  Currently, non-primary keys
 *	in the pathkeys List are of limited value.
 *
 *	HashJoin has similar functionality.  NestJoin does not perform sorting,
 *	and allows non-equajoins, so it does not allow useful pathkeys.
 *
 *	-- bjm
 *	
B
Bruce Momjian 已提交
68 69
 */
 
70
/****************************************************************************
71
 *		KEY COMPARISONS
72 73
 ****************************************************************************/

74
/*
B
Bruce Momjian 已提交
75
 * order_joinkeys_by_pathkeys
76 77
 *	  Attempts to match the keys of a path against the keys of join clauses.
 *	  This is done by looking for a matching join key in 'joinkeys' for
78
 *	  every path key in the list 'path.keys'. If there is a matching join key
79 80 81 82
 *	  (not necessarily unique) for every path key, then the list of
 *	  corresponding join keys and join clauses are returned in the order in
 *	  which the keys matched the path keys.
 *
83
 * 'pathkeys' is a list of path keys:
84
 *		( ( (var) (var) ... ) ( (var) ... ) )
85
 * 'joinkeys' is a list of join keys:
86
 *		( (outer inner) (outer inner) ... )
87
 * 'joinclauses' is a list of clauses corresponding to the join keys in
88
 *		'joinkeys'
B
Bruce Momjian 已提交
89
 * 'outer_or_inner' is a flag that selects the desired subkey of a join key
90 91
 *		in 'joinkeys'
 *
92 93
 * Returns the join keys and corresponding join clauses in a list if all
 * of the path keys were matched:
94
 *		(
B
Bruce Momjian 已提交
95
 *		 ( (outerkey0 innerkey0) ... (outerkeyN or innerkeyN) )
96 97
 *		 ( clause0 ... clauseN )
 *		)
98
 * and nil otherwise.
99
 *
100
 * Returns a list of matched join keys and a list of matched join clauses
B
Bruce Momjian 已提交
101
 * in pointers if valid order can be found.
102
 */
B
Bruce Momjian 已提交
103
bool
B
Bruce Momjian 已提交
104
order_joinkeys_by_pathkeys(List *pathkeys,
B
Bruce Momjian 已提交
105 106 107
							List *joinkeys,
							List *joinclauses,
							int outer_or_inner,
B
Bruce Momjian 已提交
108
							List **matchedJoinKeysPtr,
B
Bruce Momjian 已提交
109
							List **matchedJoinClausesPtr)
110
{
111 112 113 114 115
	List	   *matched_joinkeys = NIL;
	List	   *matched_joinclauses = NIL;
	List	   *pathkey = NIL;
	List	   *i = NIL;
	int			matched_joinkey_index = -1;
B
Bruce Momjian 已提交
116
	int			matched_keys = 0;
B
Bruce Momjian 已提交
117 118 119 120
	/*
	 *	Reorder the joinkeys by picking out one that matches each pathkey,
	 *	and create a new joinkey/joinclause list in pathkey order
	 */
121 122 123
	foreach(i, pathkeys)
	{
		pathkey = lfirst(i);
B
Bruce Momjian 已提交
124 125
		matched_joinkey_index = match_pathkey_joinkeys(pathkey, joinkeys,
													   outer_or_inner);
126 127 128

		if (matched_joinkey_index != -1)
		{
B
Bruce Momjian 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141
			matched_keys++;
			if (matchedJoinKeysPtr)
			{
				JoinKey	   *joinkey = nth(matched_joinkey_index, joinkeys);
				matched_joinkeys = lappend(matched_joinkeys, joinkey);
			}
			
			if (matchedJoinClausesPtr && joinclauses)
			{
				Expr	   *joinclause = nth(matched_joinkey_index,
											 joinclauses);
				matched_joinclauses = lappend(matched_joinclauses, joinclause);
			}
142 143
		}
		else
B
Bruce Momjian 已提交
144 145 146 147 148 149 150 151
			/*	A pathkey could not be matched. */
			break;
	}

	/*
	 *	Did we fail to match all the joinkeys?
	 *	Extra pathkeys are no problem.
	 */
B
Bruce Momjian 已提交
152
	if (matched_keys != length(joinkeys))
B
Bruce Momjian 已提交
153
	{
B
Bruce Momjian 已提交
154 155 156 157 158
			if (matchedJoinKeysPtr)
				*matchedJoinKeysPtr = NIL;
			if (matchedJoinClausesPtr)
				*matchedJoinClausesPtr = NIL;
			return false;
159 160
	}

B
Bruce Momjian 已提交
161 162 163 164 165
	if (matchedJoinKeysPtr)
		*matchedJoinKeysPtr = matched_joinkeys;
	if (matchedJoinClausesPtr)
		*matchedJoinClausesPtr = matched_joinclauses;
	return true;
166 167
}

B
Bruce Momjian 已提交
168

169
/*
170
 * match_pathkey_joinkeys
171 172
 *	  Returns the 0-based index into 'joinkeys' of the first joinkey whose
 *	  outer or inner subkey matches any subkey of 'pathkey'.
B
Bruce Momjian 已提交
173 174
 *
 *	All these keys are equivalent, so any of them can match.  See above.
175 176
 */
static int
177 178
match_pathkey_joinkeys(List *pathkey,
					   List *joinkeys,
B
Bruce Momjian 已提交
179
					   int outer_or_inner)
180
{
181 182
	Var		   *path_subkey;
	int			pos;
B
Bruce Momjian 已提交
183
	List	   *i, *x;
184
	JoinKey    *jk;
185 186 187 188 189 190 191 192

	foreach(i, pathkey)
	{
		path_subkey = (Var *) lfirst(i);
		pos = 0;
		foreach(x, joinkeys)
		{
			jk = (JoinKey *) lfirst(x);
B
Bruce Momjian 已提交
193
			if (var_equal(path_subkey, extract_join_key(jk, outer_or_inner)))
194
				return pos;
195 196
			pos++;
		}
197
	}
198
	return -1;					/* no index found	*/
199 200
}

B
Bruce Momjian 已提交
201

202
/*
B
Bruce Momjian 已提交
203
 * get_cheapest_path_for_joinkeys
204 205 206 207 208 209 210 211 212 213
 *	  Attempts to find a path in 'paths' whose keys match a set of join
 *	  keys 'joinkeys'.	To match,
 *	  1. the path node ordering must equal 'ordering'.
 *	  2. each subkey of a given path must match(i.e., be(var_equal) to) the
 *		 appropriate subkey of the corresponding join key in 'joinkeys',
 *		 i.e., the Nth path key must match its subkeys against the subkey of
 *		 the Nth join key in 'joinkeys'.
 *
 * 'joinkeys' is the list of key pairs to which the path keys must be
 *		matched
214
 * 'ordering' is the ordering of the(outer) path to which 'joinkeys'
215
 *		must correspond
216
 * 'paths' is a list of(inner) paths which are to be matched against
217
 *		each join key in 'joinkeys'
B
Bruce Momjian 已提交
218
 * 'outer_or_inner' is a flag that selects the desired subkey of a join key
219 220
 *		in 'joinkeys'
 *
B
Bruce Momjian 已提交
221
 *	Find the cheapest path that matches the join keys
222
 */
223
Path *
B
Bruce Momjian 已提交
224 225 226 227
get_cheapest_path_for_joinkeys(List *joinkeys,
								 PathOrder *ordering,
								 List *paths,
								 int outer_or_inner)
228
{
229 230
	Path	   *matched_path = NULL;
	List	   *i = NIL;
231 232 233

	foreach(i, paths)
	{
234
		Path	   *path = (Path *) lfirst(i);
B
Bruce Momjian 已提交
235
		int			better_sort, better_key;
B
Bruce Momjian 已提交
236
		
B
Bruce Momjian 已提交
237 238
		if (order_joinkeys_by_pathkeys(path->pathkeys, joinkeys, NIL,
									   outer_or_inner, NULL, NULL) &&
B
Bruce Momjian 已提交
239 240
			pathorder_match(ordering, path->pathorder, &better_sort) &&
			better_sort == 0)
241 242 243 244 245 246 247
		{
			if (matched_path)
				if (path->path_cost < matched_path->path_cost)
					matched_path = path;
			else
				matched_path = path;
		}
248
	}
249
	return matched_path;
250 251 252
}


253
/*
254
 * extract_path_keys
255 256 257 258
 *	  Builds a subkey list for a path by pulling one of the subkeys from
 *	  a list of join keys 'joinkeys' and then finding the var node in the
 *	  target list 'tlist' that corresponds to that subkey.
 *
259 260
 * 'joinkeys' is a list of join key pairs
 * 'tlist' is a relation target list
B
Bruce Momjian 已提交
261
 * 'outer_or_inner' is a flag that selects the desired subkey of a join key
B
Bruce Momjian 已提交
262
 *	in 'joinkeys'
263
 *
264
 * Returns a list of pathkeys: ((tlvar1)(tlvar2)...(tlvarN)).
265
 * It is a list of lists because of multi-key indexes.
266
 */
267
List *
268 269
extract_path_keys(List *joinkeys,
				  List *tlist,
B
Bruce Momjian 已提交
270
				  int outer_or_inner)
271
{
272 273
	List	   *pathkeys = NIL;
	List	   *jk;
274 275 276

	foreach(jk, joinkeys)
	{
277 278 279 280
		JoinKey    *jkey = (JoinKey *) lfirst(jk);
		Var		   *var,
				   *key;
		List	   *p;
281

282 283 284
		/*
		 * find the right Var in the target list for this key
		 */
B
Bruce Momjian 已提交
285
		var = (Var *) extract_join_key(jkey, outer_or_inner);
B
rename  
Bruce Momjian 已提交
286
		key = (Var *) matching_tlist_var(var, tlist);
287 288

		/*
289
		 * Include it in the pathkeys list if we haven't already done so
290 291 292
		 */
		foreach(p, pathkeys)
		{
293
			Var		   *pkey = lfirst((List *) lfirst(p));		/* XXX fix me */
294 295 296 297 298 299 300

			if (key == pkey)
				break;
		}
		if (p != NIL)
			continue;			/* key already in pathkeys */

301
		pathkeys = lappend(pathkeys, lcons(key, NIL));
302
	}
303
	return pathkeys;
304 305 306 307
}


/****************************************************************************
308
 *		NEW PATHKEY FORMATION
309 310
 ****************************************************************************/

311
/*
312
 * new_join_pathkeys
313 314 315 316 317 318 319 320
 *	  Find the path keys for a join relation by finding all vars in the list
 *	  of join clauses 'joinclauses' such that:
 *		(1) the var corresponding to the outer join relation is a
 *			key on the outer path
 *		(2) the var appears in the target list of the join relation
 *	  In other words, add to each outer path key the inner path keys that
 *	  are required for qualification.
 *
321 322
 * 'outer_pathkeys' is the list of the outer path's path keys
 * 'join_rel_tlist' is the target list of the join relation
323
 * 'joinclauses' is the list of restricting join clauses
324 325 326
 *
 * Returns the list of new path keys.
 *
327
 */
328
List *
329 330 331
new_join_pathkeys(List *outer_pathkeys,
				  List *join_rel_tlist,
				  List *joinclauses)
332
{
333 334 335 336
	List	   *outer_pathkey = NIL;
	List	   *t_list = NIL;
	List	   *x;
	List	   *i = NIL;
337 338 339 340

	foreach(i, outer_pathkeys)
	{
		outer_pathkey = lfirst(i);
341
		x = new_join_pathkey(outer_pathkey, NIL, join_rel_tlist, joinclauses);
342 343
		if (x != NIL)
			t_list = lappend(t_list, x);
344
	}
345
	return t_list;
346 347
}

348
/*
349
 * new_join_pathkey
350 351 352 353 354
 *	  Finds new vars that become subkeys due to qualification clauses that
 *	  contain any previously considered subkeys.  These new subkeys plus the
 *	  subkeys from 'subkeys' form a new pathkey for the join relation.
 *
 *	  Note that each returned subkey is the var node found in
355
 *	  'join_rel_tlist' rather than the joinclause var node.
356
 *
357
 * 'subkeys' is a list of subkeys for which matching subkeys are to be
358
 *		found
359
 * 'considered_subkeys' is the current list of all subkeys corresponding
360 361
 *		to a given pathkey
 *
362
 * Returns a new pathkey(list of subkeys).
363
 *
364
 */
365
static List *
366 367 368 369
new_join_pathkey(List *subkeys,
				 List *considered_subkeys,
				 List *join_rel_tlist,
				 List *joinclauses)
370
{
371 372 373 374 375 376
	List	   *t_list = NIL;
	Var		   *subkey;
	List	   *i = NIL;
	List	   *matched_subkeys = NIL;
	Expr	   *tlist_key = (Expr *) NULL;
	List	   *newly_considered_subkeys = NIL;
377 378 379 380 381 382

	foreach(i, subkeys)
	{
		subkey = (Var *) lfirst(i);
		if (subkey == NULL)
			break;				/* XXX something is wrong */
383
		matched_subkeys = new_matching_subkeys(subkey, considered_subkeys,
384
								 join_rel_tlist, joinclauses);
B
rename  
Bruce Momjian 已提交
385
		tlist_key = matching_tlist_var(subkey, join_rel_tlist);
386 387 388 389 390
		newly_considered_subkeys = NIL;

		if (tlist_key)
		{
			if (!member(tlist_key, matched_subkeys))
B
Bruce Momjian 已提交
391
				newly_considered_subkeys = lcons(tlist_key, matched_subkeys);
392 393 394 395
		}
		else
			newly_considered_subkeys = matched_subkeys;

396
		considered_subkeys =  append(considered_subkeys, newly_considered_subkeys);
397 398 399

		t_list = nconc(t_list, newly_considered_subkeys);
	}
400
	return t_list;
401 402
}

403
/*
404
 * new_matching_subkeys
405
 *	  Returns a list of new subkeys:
406
 *	  (1) which are not listed in 'considered_subkeys'
407 408
 *	  (2) for which the "other" variable in some clause in 'joinclauses' is
 *		  'subkey'
409
 *	  (3) which are mentioned in 'join_rel_tlist'
410 411
 *
 *	  Note that each returned subkey is the var node found in
412
 *	  'join_rel_tlist' rather than the joinclause var node.
413
 *
414
 * 'subkey' is the var node for which we are trying to find matching
415 416
 *		clauses
 *
417 418 419
 * Returns a list of new subkeys.
 *
 */
420
static List *
421 422 423 424
new_matching_subkeys(Var *subkey,
					 List *considered_subkeys,
					 List *join_rel_tlist,
					 List *joinclauses)
425
{
426
	List	   *t_list = NIL;
427 428 429
	Expr	   *joinclause;
	List	   *i;
	Expr	   *tlist_other_var;
430 431 432 433

	foreach(i, joinclauses)
	{
		joinclause = lfirst(i);
B
rename  
Bruce Momjian 已提交
434
		tlist_other_var = matching_tlist_var(
435 436
									other_join_clause_var(subkey, joinclause),
									join_rel_tlist);
437 438 439 440 441 442 443 444 445 446 447 448 449 450

		if (tlist_other_var &&
			!(member(tlist_other_var, considered_subkeys)))
		{

			/* XXX was "push" function	*/
			considered_subkeys = lappend(considered_subkeys,
										 tlist_other_var);

			/*
			 * considered_subkeys = nreverse(considered_subkeys); XXX -- I
			 * am not sure of this.
			 */

451
			t_list = lappend(t_list, tlist_other_var);
452 453
		}
	}
454
	return t_list;
455
}