plancat.c 33.7 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * plancat.c
4
 *	   routines for accessing the system catalogs
5 6
 *
 *
B
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
8
 * Portions Copyright (c) 1994, Regents of the University of California
9 10 11
 *
 *
 * IDENTIFICATION
12
 *	  src/backend/optimizer/util/plancat.c
13 14 15
 *
 *-------------------------------------------------------------------------
 */
16
#include "postgres.h"
17

18 19
#include <math.h>

B
Bruce Momjian 已提交
20 21
#include "access/genam.h"
#include "access/heapam.h"
22
#include "access/htup_details.h"
23
#include "access/nbtree.h"
24
#include "access/sysattr.h"
25
#include "access/transam.h"
26
#include "access/xlog.h"
27
#include "catalog/catalog.h"
28
#include "catalog/heap.h"
29
#include "foreign/fdwapi.h"
30
#include "miscadmin.h"
31
#include "nodes/makefuncs.h"
32
#include "optimizer/clauses.h"
33
#include "optimizer/cost.h"
34
#include "optimizer/plancat.h"
35
#include "optimizer/predtest.h"
36
#include "optimizer/prep.h"
37
#include "parser/parse_relation.h"
38
#include "parser/parsetree.h"
39
#include "rewrite/rewriteManip.h"
40
#include "storage/bufmgr.h"
41
#include "utils/lsyscache.h"
42
#include "utils/rel.h"
43
#include "utils/snapmgr.h"
44 45


46
/* GUC parameter */
47
int			constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION;
48

49 50 51
/* Hook for plugins to get control in get_relation_info() */
get_relation_info_hook_type get_relation_info_hook = NULL;

52

53
static int32 get_rel_data_width(Relation rel, int32 *attr_widths);
54 55
static List *get_relation_constraints(PlannerInfo *root,
						 Oid relationObjectId, RelOptInfo *rel,
56
						 bool include_notnull);
57 58
static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
				  Relation heapRelation);
59 60


61
/*
62
 * get_relation_info -
63
 *	  Retrieves catalog information for a given relation.
64 65 66 67
 *
 * Given the Oid of the relation, return the following info into fields
 * of the RelOptInfo struct:
 *
68 69
 *	min_attr	lowest valid AttrNumber
 *	max_attr	highest valid AttrNumber
70
 *	indexlist	list of IndexOptInfos for relation's indexes
71
 *	fdwroutine	if it's a foreign table, the FDW function pointers
72 73
 *	pages		number of pages
 *	tuples		number of tuples
74 75 76 77
 *
 * Also, initialize the attr_needed[] and attr_widths[] arrays.  In most
 * cases these are left as zeroes, but sometimes we need to compute attr
 * widths here, and we may as well cache the results for costsize.c.
78 79 80 81 82
 *
 * If inhparent is true, all we need to do is set up the attr arrays:
 * the RelOptInfo actually represents the appendrel formed by an inheritance
 * tree, and so the parent rel's physical size and index information isn't
 * important for it.
83 84
 */
void
85 86
get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
				  RelOptInfo *rel)
87
{
88
	Index		varno = rel->relid;
89
	Relation	relation;
90 91
	bool		hasindex;
	List	   *indexinfos = NIL;
92

93
	/*
B
Bruce Momjian 已提交
94 95 96
	 * We need not lock the relation since it was already locked, either by
	 * the rewriter or when expand_inherited_rtentry() added it to the query's
	 * rangetable.
97
	 */
98
	relation = heap_open(relationObjectId, NoLock);
99

100 101 102 103 104 105
	/* Temporary and unlogged relations are inaccessible during recovery. */
	if (!RelationNeedsWAL(relation) && RecoveryInProgress())
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("cannot access temporary or unlogged relations during recovery")));

106 107
	rel->min_attr = FirstLowInvalidHeapAttributeNumber + 1;
	rel->max_attr = RelationGetNumberOfAttributes(relation);
108
	rel->reltablespace = RelationGetForm(relation)->reltablespace;
109

110 111 112 113 114 115 116
	Assert(rel->max_attr >= rel->min_attr);
	rel->attr_needed = (Relids *)
		palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids));
	rel->attr_widths = (int32 *)
		palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32));

	/*
117
	 * Estimate relation size --- unless it's an inheritance parent, in which
B
Bruce Momjian 已提交
118 119
	 * case the size will be computed later in set_append_rel_pathlist, and we
	 * must leave it zero for now to avoid bollixing the total_table_pages
120
	 * calculation.
121
	 */
122 123
	if (!inhparent)
		estimate_rel_size(relation, rel->attr_widths - rel->min_attr,
124
						  &rel->pages, &rel->tuples, &rel->allvisfrac);
125

126
	/*
B
Bruce Momjian 已提交
127
	 * Make list of indexes.  Ignore indexes on system catalogs if told to.
128
	 * Don't bother with indexes for an inheritance parent, either.
129
	 */
130
	if (inhparent ||
131
		(IgnoreSystemIndexes && IsSystemRelation(relation)))
132 133 134
		hasindex = false;
	else
		hasindex = relation->rd_rel->relhasindex;
135

136
	if (hasindex)
137
	{
138 139
		List	   *indexoidlist;
		ListCell   *l;
140
		LOCKMODE	lmode;
141

142
		indexoidlist = RelationGetIndexList(relation);
143

144 145 146 147 148 149 150 151 152 153 154 155 156
		/*
		 * For each index, we get the same type of lock that the executor will
		 * need, and do not release it.  This saves a couple of trips to the
		 * shared lock manager while not creating any real loss of
		 * concurrency, because no schema changes could be happening on the
		 * index while we hold lock on the parent rel, and neither lock type
		 * blocks any other kind of index operation.
		 */
		if (rel->relid == root->parse->resultRelation)
			lmode = RowExclusiveLock;
		else
			lmode = AccessShareLock;

157
		foreach(l, indexoidlist)
158
		{
159
			Oid			indexoid = lfirst_oid(l);
160 161 162
			Relation	indexRelation;
			Form_pg_index index;
			IndexOptInfo *info;
163
			int			ncolumns;
164 165
			int			i;

166 167 168
			/*
			 * Extract info from the relation descriptor for the index.
			 */
169
			indexRelation = index_open(indexoid, lmode);
170
			index = indexRelation->rd_index;
171

172 173
			/*
			 * Ignore invalid indexes, since they can't safely be used for
B
Bruce Momjian 已提交
174 175
			 * queries.  Note that this is OK because the data structure we
			 * are constructing is only used by the planner --- the executor
176 177
			 * still needs to insert into "invalid" indexes, if they're marked
			 * IndexIsReady.
178
			 */
179
			if (!IndexIsValid(index))
180 181 182 183 184
			{
				index_close(indexRelation, NoLock);
				continue;
			}

185
			/*
B
Bruce Momjian 已提交
186 187 188
			 * If the index is valid, but cannot yet be used, ignore it; but
			 * mark the plan we are generating as transient. See
			 * src/backend/access/heap/README.HOT for discussion.
189 190 191 192 193 194 195 196 197 198
			 */
			if (index->indcheckxmin &&
				!TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data),
									   TransactionXmin))
			{
				root->glob->transientPlan = true;
				index_close(indexRelation, NoLock);
				continue;
			}

199 200 201
			info = makeNode(IndexOptInfo);

			info->indexoid = index->indexrelid;
202 203
			info->reltablespace =
				RelationGetForm(indexRelation)->reltablespace;
204
			info->rel = rel;
205 206
			info->ncolumns = ncolumns = index->indnatts;
			info->indexkeys = (int *) palloc(sizeof(int) * ncolumns);
207
			info->indexcollations = (Oid *) palloc(sizeof(Oid) * ncolumns);
208 209
			info->opfamily = (Oid *) palloc(sizeof(Oid) * ncolumns);
			info->opcintype = (Oid *) palloc(sizeof(Oid) * ncolumns);
210
			info->canreturn = (bool *) palloc(sizeof(bool) * ncolumns);
211

212
			for (i = 0; i < ncolumns; i++)
213
			{
214
				info->indexkeys[i] = index->indkey.values[i];
P
Peter Eisentraut 已提交
215
				info->indexcollations[i] = indexRelation->rd_indcollation[i];
216 217
				info->opfamily[i] = indexRelation->rd_opfamily[i];
				info->opcintype[i] = indexRelation->rd_opcintype[i];
218
				info->canreturn[i] = index_can_return(indexRelation, i + 1);
219 220 221
			}

			info->relam = indexRelation->rd_rel->relam;
222
			info->amcostestimate = indexRelation->rd_am->amcostestimate;
223
			info->amcanorderbyop = indexRelation->rd_am->amcanorderbyop;
224
			info->amoptionalkey = indexRelation->rd_am->amoptionalkey;
225
			info->amsearcharray = indexRelation->rd_am->amsearcharray;
226
			info->amsearchnulls = indexRelation->rd_am->amsearchnulls;
227 228
			info->amhasgettuple = OidIsValid(indexRelation->rd_am->amgettuple);
			info->amhasgetbitmap = OidIsValid(indexRelation->rd_am->amgetbitmap);
229 230

			/*
231
			 * Fetch the ordering information for the index, if any.
232
			 */
233
			if (info->relam == BTREE_AM_OID)
234
			{
235 236 237 238 239 240 241 242 243
				/*
				 * If it's a btree index, we can use its opfamily OIDs
				 * directly as the sort ordering opfamily OIDs.
				 */
				Assert(indexRelation->rd_am->amcanorder);

				info->sortopfamily = info->opfamily;
				info->reverse_sort = (bool *) palloc(sizeof(bool) * ncolumns);
				info->nulls_first = (bool *) palloc(sizeof(bool) * ncolumns);
244

245
				for (i = 0; i < ncolumns; i++)
246
				{
B
Bruce Momjian 已提交
247
					int16		opt = indexRelation->rd_indoption[i];
248

249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
					info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
					info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
				}
			}
			else if (indexRelation->rd_am->amcanorder)
			{
				/*
				 * Otherwise, identify the corresponding btree opfamilies by
				 * trying to map this index's "<" operators into btree.  Since
				 * "<" uniquely defines the behavior of a sort order, this is
				 * a sufficient test.
				 *
				 * XXX This method is rather slow and also requires the
				 * undesirable assumption that the other index AM numbers its
				 * strategies the same as btree.  It'd be better to have a way
				 * to explicitly declare the corresponding btree opfamily for
				 * each opfamily of the other index type.  But given the lack
				 * of current or foreseeable amcanorder index types, it's not
				 * worth expending more effort on now.
				 */
				info->sortopfamily = (Oid *) palloc(sizeof(Oid) * ncolumns);
				info->reverse_sort = (bool *) palloc(sizeof(bool) * ncolumns);
				info->nulls_first = (bool *) palloc(sizeof(bool) * ncolumns);

				for (i = 0; i < ncolumns; i++)
				{
					int16		opt = indexRelation->rd_indoption[i];
					Oid			ltopr;
					Oid			btopfamily;
					Oid			btopcintype;
					int16		btstrategy;
B
Bruce Momjian 已提交
280

281 282 283 284 285 286 287 288 289 290 291 292 293 294
					info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
					info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;

					ltopr = get_opfamily_member(info->opfamily[i],
												info->opcintype[i],
												info->opcintype[i],
												BTLessStrategyNumber);
					if (OidIsValid(ltopr) &&
						get_ordering_op_properties(ltopr,
												   &btopfamily,
												   &btopcintype,
												   &btstrategy) &&
						btopcintype == info->opcintype[i] &&
						btstrategy == BTLessStrategyNumber)
295
					{
296 297
						/* Successful mapping */
						info->sortopfamily[i] = btopfamily;
298
					}
299
					else
300
					{
301 302 303 304 305
						/* Fail ... quietly treat index as unordered */
						info->sortopfamily = NULL;
						info->reverse_sort = NULL;
						info->nulls_first = NULL;
						break;
306
					}
307 308
				}
			}
309 310 311 312 313 314
			else
			{
				info->sortopfamily = NULL;
				info->reverse_sort = NULL;
				info->nulls_first = NULL;
			}
315

316 317 318
			/*
			 * Fetch the index expressions and predicate, if any.  We must
			 * modify the copies we obtain from the relcache to have the
B
Bruce Momjian 已提交
319 320
			 * correct varno for the parent relation, so that they match up
			 * correctly against qual clauses.
321 322 323 324 325 326 327
			 */
			info->indexprs = RelationGetIndexExpressions(indexRelation);
			info->indpred = RelationGetIndexPredicate(indexRelation);
			if (info->indexprs && varno != 1)
				ChangeVarNodes((Node *) info->indexprs, 1, varno, 0);
			if (info->indpred && varno != 1)
				ChangeVarNodes((Node *) info->indpred, 1, varno, 0);
328 329 330 331

			/* Build targetlist using the completed indexprs data */
			info->indextlist = build_index_tlist(root, info, relation);

B
Bruce Momjian 已提交
332
			info->predOK = false;		/* set later in indxpath.c */
333
			info->unique = index->indisunique;
334
			info->immediate = index->indimmediate;
335
			info->hypothetical = false;
336

337
			/*
B
Bruce Momjian 已提交
338 339 340 341 342
			 * Estimate the index size.  If it's not a partial index, we lock
			 * the number-of-tuples estimate to equal the parent table; if it
			 * is partial then we have to use the same methods as we would for
			 * a table, except we can be sure that the index is not larger
			 * than the table.
343 344 345 346 347 348 349 350
			 */
			if (info->indpred == NIL)
			{
				info->pages = RelationGetNumberOfBlocks(indexRelation);
				info->tuples = rel->tuples;
			}
			else
			{
351
				double		allvisfrac; /* dummy */
352

353
				estimate_rel_size(indexRelation, NULL,
354
								  &info->pages, &info->tuples, &allvisfrac);
355 356 357 358
				if (info->tuples > rel->tuples)
					info->tuples = rel->tuples;
			}

359 360 361 362 363 364 365 366 367 368 369
			if (info->relam == BTREE_AM_OID)
			{
				/* For btrees, get tree height while we have the index open */
				info->tree_height = _bt_getrootheight(indexRelation);
			}
			else
			{
				/* For other index types, just set it to "unknown" for now */
				info->tree_height = -1;
			}

370
			index_close(indexRelation, NoLock);
371 372 373 374

			indexinfos = lcons(info, indexinfos);
		}

375
		list_free(indexoidlist);
376
	}
377

378 379
	rel->indexlist = indexinfos;

380 381
	/* Grab the fdwroutine info using the relcache, while we have it */
	if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
382 383
	{
		rel->fdw_handler = GetFdwHandlerByRelId(RelationGetRelid(relation));
384
		rel->fdwroutine = GetFdwRoutineForRelation(relation, true);
385
	}
386
	else
387 388
	{
		rel->fdw_handler = InvalidOid;
389
		rel->fdwroutine = NULL;
390
	}
391
	heap_close(relation, NoLock);
392 393 394 395 396 397 398 399

	/*
	 * Allow a plugin to editorialize on the info we obtained from the
	 * catalogs.  Actions might include altering the assumed relation size,
	 * removing an index, or adding a hypothetical index to the indexlist.
	 */
	if (get_relation_info_hook)
		(*get_relation_info_hook) (root, relationObjectId, inhparent, rel);
400 401
}

402 403 404
/*
 * estimate_rel_size - estimate # pages and # tuples in a table or index
 *
405 406 407
 * We also estimate the fraction of the pages that are marked all-visible in
 * the visibility map, for use in estimation of index-only scans.
 *
408
 * If attr_widths isn't NULL, it points to the zero-index entry of the
409
 * relation's attr_widths[] cache; we fill this in if we have need to compute
410 411
 * the attribute widths for estimation purposes.
 */
412
void
413
estimate_rel_size(Relation rel, int32 *attr_widths,
414
				  BlockNumber *pages, double *tuples, double *allvisfrac)
415
{
B
Bruce Momjian 已提交
416 417
	BlockNumber curpages;
	BlockNumber relpages;
418
	double		reltuples;
419
	BlockNumber relallvisible;
420 421 422 423 424 425
	double		density;

	switch (rel->rd_rel->relkind)
	{
		case RELKIND_RELATION:
		case RELKIND_INDEX:
426
		case RELKIND_MATVIEW:
427 428
		case RELKIND_TOASTVALUE:
			/* it has storage, ok to call the smgr */
429 430 431 432
			curpages = RelationGetNumberOfBlocks(rel);

			/*
			 * HACK: if the relation has never yet been vacuumed, use a
433 434 435
			 * minimum size estimate of 10 pages.  The idea here is to avoid
			 * assuming a newly-created table is really small, even if it
			 * currently is, because that may not be true once some data gets
B
Bruce Momjian 已提交
436
			 * loaded into it.  Once a vacuum or analyze cycle has been done
437 438 439 440
			 * on it, it's more reasonable to believe the size is somewhat
			 * stable.
			 *
			 * (Note that this is only an issue if the plan gets cached and
B
Bruce Momjian 已提交
441
			 * used again after the table has been filled.  What we're trying
442 443 444 445 446 447
			 * to avoid is using a nestloop-type plan on a table that has
			 * grown substantially since the plan was made.  Normally,
			 * autovacuum/autoanalyze will occur once enough inserts have
			 * happened and cause cached-plan invalidation; but that doesn't
			 * happen instantaneously, and it won't happen at all for cases
			 * such as temporary tables.)
448
			 *
449
			 * We approximate "never vacuumed" by "has relpages = 0", which
B
Bruce Momjian 已提交
450
			 * means this will also fire on genuinely empty relations.  Not
451 452 453
			 * great, but fortunately that's a seldom-seen case in the real
			 * world, and it shouldn't degrade the quality of the plan too
			 * much anyway to err in this direction.
454 455 456 457 458 459
			 *
			 * There are two exceptions wherein we don't apply this heuristic.
			 * One is if the table has inheritance children.  Totally empty
			 * parent tables are quite common, so we should be willing to
			 * believe that they are empty.  Also, we don't apply the 10-page
			 * minimum to indexes.
460
			 */
461 462 463 464
			if (curpages < 10 &&
				rel->rd_rel->relpages == 0 &&
				!rel->rd_rel->relhassubclass &&
				rel->rd_rel->relkind != RELKIND_INDEX)
465 466 467 468
				curpages = 10;

			/* report estimated # pages */
			*pages = curpages;
469 470 471 472
			/* quick exit if rel is clearly empty */
			if (curpages == 0)
			{
				*tuples = 0;
473
				*allvisfrac = 0;
474 475 476 477 478
				break;
			}
			/* coerce values in pg_class to more desirable types */
			relpages = (BlockNumber) rel->rd_rel->relpages;
			reltuples = (double) rel->rd_rel->reltuples;
479
			relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
B
Bruce Momjian 已提交
480

481
			/*
482 483 484 485
			 * If it's an index, discount the metapage while estimating the
			 * number of tuples.  This is a kluge because it assumes more than
			 * it ought to about index structure.  Currently it's OK for
			 * btree, hash, and GIN indexes but suspect for GiST indexes.
486 487 488 489 490 491 492
			 */
			if (rel->rd_rel->relkind == RELKIND_INDEX &&
				relpages > 0)
			{
				curpages--;
				relpages--;
			}
493

494 495 496 497 498 499 500 501 502
			/* estimate number of tuples from previous tuple density */
			if (relpages > 0)
				density = reltuples / (double) relpages;
			else
			{
				/*
				 * When we have no data because the relation was truncated,
				 * estimate tuple width from attribute datatypes.  We assume
				 * here that the pages are completely full, which is OK for
B
Bruce Momjian 已提交
503 504
				 * tables (since they've presumably not been VACUUMed yet) but
				 * is probably an overestimate for indexes.  Fortunately
505 506
				 * get_relation_info() can clamp the overestimate to the
				 * parent table's size.
507 508
				 *
				 * Note: this code intentionally disregards alignment
B
Bruce Momjian 已提交
509 510 511 512
				 * considerations, because (a) that would be gilding the lily
				 * considering how crude the estimate is, and (b) it creates
				 * platform dependencies in the default plans which are kind
				 * of a headache for regression testing.
513
				 */
514
				int32		tuple_width;
515

516
				tuple_width = get_rel_data_width(rel, attr_widths);
517
				tuple_width += MAXALIGN(SizeofHeapTupleHeader);
518
				tuple_width += sizeof(ItemIdData);
519
				/* note: integer division is intentional here */
520
				density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width;
521 522
			}
			*tuples = rint(density * (double) curpages);
523 524 525 526 527 528 529 530 531 532 533 534 535

			/*
			 * We use relallvisible as-is, rather than scaling it up like we
			 * do for the pages and tuples counts, on the theory that any
			 * pages added since the last VACUUM are most likely not marked
			 * all-visible.  But costsize.c wants it converted to a fraction.
			 */
			if (relallvisible == 0 || curpages <= 0)
				*allvisfrac = 0;
			else if ((double) relallvisible >= curpages)
				*allvisfrac = 1;
			else
				*allvisfrac = (double) relallvisible / curpages;
536 537 538 539 540
			break;
		case RELKIND_SEQUENCE:
			/* Sequences always have a known size */
			*pages = 1;
			*tuples = 1;
541
			*allvisfrac = 0;
542
			break;
543 544 545 546
		case RELKIND_FOREIGN_TABLE:
			/* Just use whatever's in pg_class */
			*pages = rel->rd_rel->relpages;
			*tuples = rel->rd_rel->reltuples;
547
			*allvisfrac = 0;
548
			break;
549 550 551 552
		default:
			/* else it has no disk storage; probably shouldn't get here? */
			*pages = 0;
			*tuples = 0;
553
			*allvisfrac = 0;
554 555 556 557
			break;
	}
}

558

559 560 561 562
/*
 * get_rel_data_width
 *
 * Estimate the average width of (the data part of) the relation's tuples.
563 564 565
 *
 * If attr_widths isn't NULL, it points to the zero-index entry of the
 * relation's attr_widths[] cache; use and update that cache as appropriate.
566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
 *
 * Currently we ignore dropped columns.  Ideally those should be included
 * in the result, but we haven't got any way to get info about them; and
 * since they might be mostly NULLs, treating them as zero-width is not
 * necessarily the wrong thing anyway.
 */
static int32
get_rel_data_width(Relation rel, int32 *attr_widths)
{
	int32		tuple_width = 0;
	int			i;

	for (i = 1; i <= RelationGetNumberOfAttributes(rel); i++)
	{
		Form_pg_attribute att = rel->rd_att->attrs[i - 1];
		int32		item_width;

		if (att->attisdropped)
			continue;
585 586 587 588 589 590 591 592

		/* use previously cached data, if any */
		if (attr_widths != NULL && attr_widths[i] > 0)
		{
			tuple_width += attr_widths[i];
			continue;
		}

593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
		/* This should match set_rel_width() in costsize.c */
		item_width = get_attavgwidth(RelationGetRelid(rel), i);
		if (item_width <= 0)
		{
			item_width = get_typavgwidth(att->atttypid, att->atttypmod);
			Assert(item_width > 0);
		}
		if (attr_widths != NULL)
			attr_widths[i] = item_width;
		tuple_width += item_width;
	}

	return tuple_width;
}

/*
 * get_relation_data_width
 *
611 612
 * External API for get_rel_data_width: same behavior except we have to
 * open the relcache entry.
613 614
 */
int32
615
get_relation_data_width(Oid relid, int32 *attr_widths)
616 617 618 619 620 621 622
{
	int32		result;
	Relation	relation;

	/* As above, assume relation is already locked */
	relation = heap_open(relid, NoLock);

623
	result = get_rel_data_width(relation, attr_widths);
624 625 626 627 628 629 630

	heap_close(relation, NoLock);

	return result;
}


631 632 633
/*
 * get_relation_constraints
 *
634
 * Retrieve the validated CHECK constraint expressions of the given relation.
635 636 637 638 639 640
 *
 * Returns a List (possibly empty) of constraint expressions.  Each one
 * has been canonicalized, and its Vars are changed to have the varno
 * indicated by rel->relid.  This allows the expressions to be easily
 * compared to expressions taken from WHERE.
 *
641 642 643
 * If include_notnull is true, "col IS NOT NULL" expressions are generated
 * and added to the result for each column that's marked attnotnull.
 *
644 645 646 647
 * Note: at present this is invoked at most once per relation per planner
 * run, and in many cases it won't be invoked at all, so there seems no
 * point in caching the data in RelOptInfo.
 */
648
static List *
649 650
get_relation_constraints(PlannerInfo *root,
						 Oid relationObjectId, RelOptInfo *rel,
651
						 bool include_notnull)
652 653 654 655 656 657 658 659 660 661 662 663 664 665
{
	List	   *result = NIL;
	Index		varno = rel->relid;
	Relation	relation;
	TupleConstr *constr;

	/*
	 * We assume the relation has already been safely locked.
	 */
	relation = heap_open(relationObjectId, NoLock);

	constr = relation->rd_att->constr;
	if (constr != NULL)
	{
B
Bruce Momjian 已提交
666 667
		int			num_check = constr->num_check;
		int			i;
668 669 670

		for (i = 0; i < num_check; i++)
		{
B
Bruce Momjian 已提交
671
			Node	   *cexpr;
672

673 674 675 676 677 678 679
			/*
			 * If this constraint hasn't been fully validated yet, we must
			 * ignore it here.
			 */
			if (!constr->check[i].ccvalid)
				continue;

680 681 682 683 684 685 686 687 688 689 690 691
			cexpr = stringToNode(constr->check[i].ccbin);

			/*
			 * Run each expression through const-simplification and
			 * canonicalization.  This is not just an optimization, but is
			 * necessary, because we will be comparing it to
			 * similarly-processed qual clauses, and may fail to detect valid
			 * matches without this.  This must match the processing done to
			 * qual clauses in preprocess_expression()!  (We can skip the
			 * stuff involving subqueries, however, since we don't allow any
			 * in check constraints.)
			 */
692
			cexpr = eval_const_expressions(root, cexpr);
693 694 695 696 697 698 699 700

			cexpr = (Node *) canonicalize_qual((Expr *) cexpr);

			/* Fix Vars to have the desired varno */
			if (varno != 1)
				ChangeVarNodes(cexpr, 1, varno, 0);

			/*
B
Bruce Momjian 已提交
701 702
			 * Finally, convert to implicit-AND format (that is, a List) and
			 * append the resulting item(s) to our output list.
703 704 705 706
			 */
			result = list_concat(result,
								 make_ands_implicit((Expr *) cexpr));
		}
707 708 709 710

		/* Add NOT NULL constraints in expression form, if requested */
		if (include_notnull && constr->has_not_null)
		{
711
			int			natts = relation->rd_att->natts;
712 713 714 715 716 717 718

			for (i = 1; i <= natts; i++)
			{
				Form_pg_attribute att = relation->rd_att->attrs[i - 1];

				if (att->attnotnull && !att->attisdropped)
				{
719
					NullTest   *ntest = makeNode(NullTest);
720 721 722 723 724

					ntest->arg = (Expr *) makeVar(varno,
												  i,
												  att->atttypid,
												  att->atttypmod,
P
Peter Eisentraut 已提交
725
												  att->attcollation,
726 727
												  0);
					ntest->nulltesttype = IS_NOT_NULL;
728
					ntest->argisrow = type_is_rowtype(att->atttypid);
729
					ntest->location = -1;
730 731 732 733
					result = lappend(result, ntest);
				}
			}
		}
734 735 736 737 738 739 740 741
	}

	heap_close(relation, NoLock);

	return result;
}


742 743 744
/*
 * relation_excluded_by_constraints
 *
745 746
 * Detect whether the relation need not be scanned because it has either
 * self-inconsistent restrictions, or restrictions inconsistent with the
747
 * relation's validated CHECK constraints.
748
 *
749 750 751
 * Note: this examines only rel->relid, rel->reloptkind, and
 * rel->baserestrictinfo; therefore it can be called before filling in
 * other fields of the RelOptInfo.
752 753
 */
bool
754 755
relation_excluded_by_constraints(PlannerInfo *root,
								 RelOptInfo *rel, RangeTblEntry *rte)
756
{
757
	List	   *safe_restrictions;
758
	List	   *constraint_pred;
759 760
	List	   *safe_constraints;
	ListCell   *lc;
761

762 763 764
	/* Skip the test if constraint exclusion is disabled for the rel */
	if (constraint_exclusion == CONSTRAINT_EXCLUSION_OFF ||
		(constraint_exclusion == CONSTRAINT_EXCLUSION_PARTITION &&
765 766 767 768
		 !(rel->reloptkind == RELOPT_OTHER_MEMBER_REL ||
		   (root->hasInheritedTarget &&
			rel->reloptkind == RELOPT_BASEREL &&
			rel->relid == root->parse->resultRelation))))
769 770
		return false;

771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
	/*
	 * Check for self-contradictory restriction clauses.  We dare not make
	 * deductions with non-immutable functions, but any immutable clauses that
	 * are self-contradictory allow us to conclude the scan is unnecessary.
	 *
	 * Note: strip off RestrictInfo because predicate_refuted_by() isn't
	 * expecting to see any in its predicate argument.
	 */
	safe_restrictions = NIL;
	foreach(lc, rel->baserestrictinfo)
	{
		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);

		if (!contain_mutable_functions((Node *) rinfo->clause))
			safe_restrictions = lappend(safe_restrictions, rinfo->clause);
	}

	if (predicate_refuted_by(safe_restrictions, safe_restrictions))
		return true;

791 792 793 794
	/* Only plain relations have constraints */
	if (rte->rtekind != RTE_RELATION || rte->inh)
		return false;

795
	/*
B
Bruce Momjian 已提交
796
	 * OK to fetch the constraint expressions.  Include "col IS NOT NULL"
797 798
	 * expressions for attnotnull columns, in case we can refute those.
	 */
799
	constraint_pred = get_relation_constraints(root, rte->relid, rel, true);
800 801 802 803

	/*
	 * We do not currently enforce that CHECK constraints contain only
	 * immutable functions, so it's necessary to check here. We daren't draw
B
Bruce Momjian 已提交
804 805 806
	 * conclusions from plan-time evaluation of non-immutable functions. Since
	 * they're ANDed, we can just ignore any mutable constraints in the list,
	 * and reason about the rest.
807
	 */
808 809 810
	safe_constraints = NIL;
	foreach(lc, constraint_pred)
	{
B
Bruce Momjian 已提交
811
		Node	   *pred = (Node *) lfirst(lc);
812 813 814 815

		if (!contain_mutable_functions(pred))
			safe_constraints = lappend(safe_constraints, pred);
	}
816 817 818 819 820

	/*
	 * The constraints are effectively ANDed together, so we can just try to
	 * refute the entire collection at once.  This may allow us to make proofs
	 * that would fail if we took them individually.
821
	 *
B
Bruce Momjian 已提交
822 823 824
	 * Note: we use rel->baserestrictinfo, not safe_restrictions as might seem
	 * an obvious optimization.  Some of the clauses might be OR clauses that
	 * have volatile and nonvolatile subclauses, and it's OK to make
825
	 * deductions with the nonvolatile parts.
826
	 */
827
	if (predicate_refuted_by(safe_constraints, rel->baserestrictinfo))
828 829 830 831 832 833
		return true;

	return false;
}


834 835 836 837 838 839 840 841 842 843
/*
 * build_physical_tlist
 *
 * Build a targetlist consisting of exactly the relation's user attributes,
 * in order.  The executor can special-case such tlists to avoid a projection
 * step at runtime, so we use such tlists preferentially for scan nodes.
 *
 * Exception: if there are any dropped columns, we punt and return NIL.
 * Ideally we would like to handle the dropped-column case too.  However this
 * creates problems for ExecTypeFromTL, which may be asked to build a tupdesc
B
Bruce Momjian 已提交
844
 * for a tlist that includes vars of no-longer-existent types.  In theory we
845 846 847 848
 * could dig out the required info from the pg_attribute entries of the
 * relation, but that data is not readily available to ExecTypeFromTL.
 * For now, we don't apply the physical-tlist optimization when there are
 * dropped cols.
849
 *
850
 * We also support building a "physical" tlist for subqueries, functions,
851 852
 * values lists, and CTEs, since the same optimization can occur in
 * SubqueryScan, FunctionScan, ValuesScan, CteScan, and WorkTableScan nodes.
853 854
 */
List *
855
build_physical_tlist(PlannerInfo *root, RelOptInfo *rel)
856
{
857
	List	   *tlist = NIL;
858
	Index		varno = rel->relid;
859
	RangeTblEntry *rte = planner_rt_fetch(varno, root);
860
	Relation	relation;
861 862 863
	Query	   *subquery;
	Var		   *var;
	ListCell   *l;
864 865
	int			attrno,
				numattrs;
866
	List	   *colvars;
867

868 869 870
	switch (rte->rtekind)
	{
		case RTE_RELATION:
871 872
			/* Assume we already have adequate lock */
			relation = heap_open(rte->relid, NoLock);
873

874 875 876 877
			numattrs = RelationGetNumberOfAttributes(relation);
			for (attrno = 1; attrno <= numattrs; attrno++)
			{
				Form_pg_attribute att_tup = relation->rd_att->attrs[attrno - 1];
878

879 880 881 882 883 884
				if (att_tup->attisdropped)
				{
					/* found a dropped col, so punt */
					tlist = NIL;
					break;
				}
885

886 887 888 889
				var = makeVar(varno,
							  attrno,
							  att_tup->atttypid,
							  att_tup->atttypmod,
P
Peter Eisentraut 已提交
890
							  att_tup->attcollation,
891 892 893 894 895 896 897 898
							  0);

				tlist = lappend(tlist,
								makeTargetEntry((Expr *) var,
												attrno,
												NULL,
												false));
			}
899

900
			heap_close(relation, NoLock);
901 902
			break;

903 904 905 906 907 908
		case RTE_SUBQUERY:
			subquery = rte->subquery;
			foreach(l, subquery->targetList)
			{
				TargetEntry *tle = (TargetEntry *) lfirst(l);

909 910 911 912
				/*
				 * A resjunk column of the subquery can be reflected as
				 * resjunk in the physical tlist; we need not punt.
				 */
913
				var = makeVarFromTargetEntry(varno, tle);
914 915 916 917 918 919 920 921

				tlist = lappend(tlist,
								makeTargetEntry((Expr *) var,
												tle->resno,
												NULL,
												tle->resjunk));
			}
			break;
922

923
		case RTE_FUNCTION:
924 925 926
		case RTE_VALUES:
		case RTE_CTE:
			/* Not all of these can have dropped cols, but share code anyway */
927
			expandRTE(rte, varno, 0, -1, true /* include dropped */ ,
928 929 930 931
					  NULL, &colvars);
			foreach(l, colvars)
			{
				var = (Var *) lfirst(l);
B
Bruce Momjian 已提交
932

933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950
				/*
				 * A non-Var in expandRTE's output means a dropped column;
				 * must punt.
				 */
				if (!IsA(var, Var))
				{
					tlist = NIL;
					break;
				}

				tlist = lappend(tlist,
								makeTargetEntry((Expr *) var,
												var->varattno,
												NULL,
												false));
			}
			break;

951 952 953 954 955 956
		default:
			/* caller error */
			elog(ERROR, "unsupported RTE kind %d in build_physical_tlist",
				 (int) rte->rtekind);
			break;
	}
957

958
	return tlist;
959 960
}

961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992
/*
 * build_index_tlist
 *
 * Build a targetlist representing the columns of the specified index.
 * Each column is represented by a Var for the corresponding base-relation
 * column, or an expression in base-relation Vars, as appropriate.
 *
 * There are never any dropped columns in indexes, so unlike
 * build_physical_tlist, we need no failure case.
 */
static List *
build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
				  Relation heapRelation)
{
	List	   *tlist = NIL;
	Index		varno = index->rel->relid;
	ListCell   *indexpr_item;
	int			i;

	indexpr_item = list_head(index->indexprs);
	for (i = 0; i < index->ncolumns; i++)
	{
		int			indexkey = index->indexkeys[i];
		Expr	   *indexvar;

		if (indexkey != 0)
		{
			/* simple column */
			Form_pg_attribute att_tup;

			if (indexkey < 0)
				att_tup = SystemAttributeDefinition(indexkey,
993
										   heapRelation->rd_rel->relhasoids);
994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
			else
				att_tup = heapRelation->rd_att->attrs[indexkey - 1];

			indexvar = (Expr *) makeVar(varno,
										indexkey,
										att_tup->atttypid,
										att_tup->atttypmod,
										att_tup->attcollation,
										0);
		}
		else
		{
			/* expression column */
			if (indexpr_item == NULL)
				elog(ERROR, "wrong number of index expressions");
			indexvar = (Expr *) lfirst(indexpr_item);
			indexpr_item = lnext(indexpr_item);
		}

		tlist = lappend(tlist,
						makeTargetEntry(indexvar,
										i + 1,
										NULL,
										false));
	}
	if (indexpr_item != NULL)
		elog(ERROR, "wrong number of index expressions");

	return tlist;
}

1025
/*
1026
 * restriction_selectivity
1027
 *
1028
 * Returns the selectivity of a specified restriction operator clause.
1029 1030 1031
 * This code executes registered procedures stored in the
 * operator relation, by calling the function manager.
 *
1032
 * See clause_selectivity() for the meaning of the additional parameters.
1033
 */
1034
Selectivity
1035
restriction_selectivity(PlannerInfo *root,
1036
						Oid operatorid,
1037
						List *args,
1038
						Oid inputcollid,
1039
						int varRelid)
1040
{
1041
	RegProcedure oprrest = get_oprrest(operatorid);
1042 1043
	float8		result;

1044
	/*
1045 1046
	 * if the oprrest procedure is missing for whatever reason, use a
	 * selectivity of 0.5
1047 1048 1049 1050
	 */
	if (!oprrest)
		return (Selectivity) 0.5;

1051 1052 1053 1054 1055 1056
	result = DatumGetFloat8(OidFunctionCall4Coll(oprrest,
												 inputcollid,
												 PointerGetDatum(root),
												 ObjectIdGetDatum(operatorid),
												 PointerGetDatum(args),
												 Int32GetDatum(varRelid)));
1057 1058

	if (result < 0.0 || result > 1.0)
1059
		elog(ERROR, "invalid restriction selectivity: %f", result);
1060 1061

	return (Selectivity) result;
1062 1063 1064
}

/*
1065
 * join_selectivity
1066
 *
1067 1068 1069
 * Returns the selectivity of a specified join operator clause.
 * This code executes registered procedures stored in the
 * operator relation, by calling the function manager.
1070
 */
1071
Selectivity
1072
join_selectivity(PlannerInfo *root,
1073
				 Oid operatorid,
1074
				 List *args,
1075
				 Oid inputcollid,
1076 1077
				 JoinType jointype,
				 SpecialJoinInfo *sjinfo)
1078
{
1079
	RegProcedure oprjoin = get_oprjoin(operatorid);
1080 1081
	float8		result;

1082
	/*
1083 1084
	 * if the oprjoin procedure is missing for whatever reason, use a
	 * selectivity of 0.5
1085 1086 1087 1088
	 */
	if (!oprjoin)
		return (Selectivity) 0.5;

1089 1090 1091 1092 1093 1094 1095
	result = DatumGetFloat8(OidFunctionCall5Coll(oprjoin,
												 inputcollid,
												 PointerGetDatum(root),
												 ObjectIdGetDatum(operatorid),
												 PointerGetDatum(args),
												 Int16GetDatum(jointype),
												 PointerGetDatum(sjinfo)));
1096 1097

	if (result < 0.0 || result > 1.0)
1098
		elog(ERROR, "invalid join selectivity: %f", result);
1099 1100

	return (Selectivity) result;
1101 1102
}

1103 1104 1105 1106 1107 1108
/*
 * has_unique_index
 *
 * Detect whether there is a unique index on the specified attribute
 * of the specified relation, thus allowing us to conclude that all
 * the (non-null) values of the attribute are distinct.
1109 1110 1111 1112 1113
 *
 * This function does not check the index's indimmediate property, which
 * means that uniqueness may transiently fail to hold intra-transaction.
 * That's appropriate when we are making statistical estimates, but beware
 * of using this for any correctness proofs.
1114 1115 1116 1117
 */
bool
has_unique_index(RelOptInfo *rel, AttrNumber attno)
{
1118
	ListCell   *ilist;
1119 1120 1121 1122 1123 1124

	foreach(ilist, rel->indexlist)
	{
		IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);

		/*
B
Bruce Momjian 已提交
1125
		 * Note: ignore partial indexes, since they don't allow us to conclude
1126
		 * that all attr values are distinct, *unless* they are marked predOK
1127 1128 1129 1130
		 * which means we know the index's predicate is satisfied by the
		 * query. We don't take any interest in expressional indexes either.
		 * Also, a multicolumn unique index doesn't allow us to conclude that
		 * just the specified attr is unique.
1131 1132
		 */
		if (index->unique &&
1133
			index->ncolumns == 1 &&
1134
			index->indexkeys[0] == attno &&
1135
			(index->indpred == NIL || index->predOK))
1136 1137 1138 1139
			return true;
	}
	return false;
}